File size: 1,718 Bytes
7560d37 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 | {
"config": {
"epochs": 10,
"lr": 0.0003,
"warmup_steps": 300,
"grad_accum_steps": 8,
"grad_clip": 1.0
},
"train": [
{
"epoch": 1,
"avg_loss": 7.070409,
"lr": 0.00027,
"step": 270,
"time_s": 971.0
},
{
"epoch": 2,
"avg_loss": 6.138723,
"lr": 0.0002986039232296601,
"step": 540,
"time_s": 969.5
},
{
"epoch": 3,
"avg_loss": 5.511719,
"lr": 0.00029373201732051774,
"step": 810,
"time_s": 970.5
},
{
"epoch": 4,
"avg_loss": 5.033876,
"lr": 0.00028548319801432657,
"step": 1080,
"time_s": 966.4
},
{
"epoch": 5,
"avg_loss": 4.711938,
"lr": 0.00027406193668993577,
"step": 1350,
"time_s": 977.1
},
{
"epoch": 6,
"avg_loss": 4.464801,
"lr": 0.00027245653264858043,
"step": 1620,
"time_s": 973.7
},
{
"epoch": 7,
"avg_loss": 4.292098,
"lr": 0.0002676801152109725,
"step": 1890,
"time_s": 977.7
},
{
"epoch": 8,
"avg_loss": 4.150779,
"lr": 0.00025985108200750904,
"step": 2160,
"time_s": 975.7
}
],
"val": [
{
"epoch": 1,
"avg_loss": 6.459693
},
{
"epoch": 2,
"avg_loss": 5.804065
},
{
"epoch": 3,
"avg_loss": 5.179338
},
{
"epoch": 4,
"avg_loss": 4.774045
},
{
"epoch": 5,
"avg_loss": 4.511194
},
{
"epoch": 6,
"avg_loss": 4.305547
},
{
"epoch": 7,
"avg_loss": 4.154681
},
{
"epoch": 8,
"avg_loss": 4.037178
}
],
"sanity": []
} |