| { |
| "config": { |
| "epochs": 10, |
| "lr": 0.0003, |
| "warmup_steps": 300, |
| "grad_accum_steps": 8, |
| "grad_clip": 1.0 |
| }, |
| "train": [ |
| { |
| "epoch": 1, |
| "avg_loss": 7.070409, |
| "lr": 0.00027, |
| "step": 270, |
| "time_s": 971.0 |
| }, |
| { |
| "epoch": 2, |
| "avg_loss": 6.138723, |
| "lr": 0.0002986039232296601, |
| "step": 540, |
| "time_s": 969.5 |
| }, |
| { |
| "epoch": 3, |
| "avg_loss": 5.511719, |
| "lr": 0.00029373201732051774, |
| "step": 810, |
| "time_s": 970.5 |
| }, |
| { |
| "epoch": 4, |
| "avg_loss": 5.033876, |
| "lr": 0.00028548319801432657, |
| "step": 1080, |
| "time_s": 966.4 |
| }, |
| { |
| "epoch": 5, |
| "avg_loss": 4.711938, |
| "lr": 0.00027406193668993577, |
| "step": 1350, |
| "time_s": 977.1 |
| }, |
| { |
| "epoch": 6, |
| "avg_loss": 4.464801, |
| "lr": 0.00027245653264858043, |
| "step": 1620, |
| "time_s": 973.7 |
| }, |
| { |
| "epoch": 7, |
| "avg_loss": 4.292098, |
| "lr": 0.0002676801152109725, |
| "step": 1890, |
| "time_s": 977.7 |
| }, |
| { |
| "epoch": 8, |
| "avg_loss": 4.150779, |
| "lr": 0.00025985108200750904, |
| "step": 2160, |
| "time_s": 975.7 |
| }, |
| { |
| "epoch": 9, |
| "avg_loss": 3.803133, |
| "lr": 0.0002587006831839486, |
| "step": 2170, |
| "time_s": 34.3 |
| }, |
| { |
| "epoch": 10, |
| "avg_loss": 3.725935, |
| "lr": 0.00025557782178568903, |
| "step": 2177, |
| "time_s": 33.6 |
| }, |
| { |
| "epoch": 11, |
| "avg_loss": 3.637672, |
| "lr": 0.00025043245687112674, |
| "step": 2184, |
| "time_s": 34.3 |
| }, |
| { |
| "epoch": 12, |
| "avg_loss": 3.560756, |
| "lr": 0.00024335262706051453, |
| "step": 2191, |
| "time_s": 34.0 |
| }, |
| { |
| "epoch": 13, |
| "avg_loss": 3.493701, |
| "lr": 0.00023445947020481162, |
| "step": 2198, |
| "time_s": 34.2 |
| } |
| ], |
| "val": [ |
| { |
| "epoch": 1, |
| "avg_loss": 6.459693 |
| }, |
| { |
| "epoch": 2, |
| "avg_loss": 5.804065 |
| }, |
| { |
| "epoch": 3, |
| "avg_loss": 5.179338 |
| }, |
| { |
| "epoch": 4, |
| "avg_loss": 4.774045 |
| }, |
| { |
| "epoch": 5, |
| "avg_loss": 4.511194 |
| }, |
| { |
| "epoch": 6, |
| "avg_loss": 4.305547 |
| }, |
| { |
| "epoch": 7, |
| "avg_loss": 4.154681 |
| }, |
| { |
| "epoch": 8, |
| "avg_loss": 4.037178 |
| }, |
| { |
| "epoch": 9, |
| "avg_loss": 3.911244 |
| }, |
| { |
| "epoch": 10, |
| "avg_loss": 3.914251 |
| }, |
| { |
| "epoch": 11, |
| "avg_loss": 3.909345 |
| }, |
| { |
| "epoch": 12, |
| "avg_loss": 3.932855 |
| }, |
| { |
| "epoch": 13, |
| "avg_loss": 3.951809 |
| } |
| ], |
| "sanity": [] |
| } |