| { | |
| "config": { | |
| "epochs": 10, | |
| "lr": 0.0003, | |
| "warmup_steps": 300, | |
| "grad_accum_steps": 8, | |
| "grad_clip": 1.0 | |
| }, | |
| "train": [ | |
| { | |
| "epoch": 1, | |
| "avg_loss": 7.070409, | |
| "lr": 0.00027, | |
| "step": 270, | |
| "time_s": 971.0 | |
| }, | |
| { | |
| "epoch": 2, | |
| "avg_loss": 6.138723, | |
| "lr": 0.0002986039232296601, | |
| "step": 540, | |
| "time_s": 969.5 | |
| }, | |
| { | |
| "epoch": 3, | |
| "avg_loss": 5.511719, | |
| "lr": 0.00029373201732051774, | |
| "step": 810, | |
| "time_s": 970.5 | |
| }, | |
| { | |
| "epoch": 4, | |
| "avg_loss": 5.033876, | |
| "lr": 0.00028548319801432657, | |
| "step": 1080, | |
| "time_s": 966.4 | |
| }, | |
| { | |
| "epoch": 5, | |
| "avg_loss": 4.711938, | |
| "lr": 0.00027406193668993577, | |
| "step": 1350, | |
| "time_s": 977.1 | |
| }, | |
| { | |
| "epoch": 6, | |
| "avg_loss": 4.464801, | |
| "lr": 0.00027245653264858043, | |
| "step": 1620, | |
| "time_s": 973.7 | |
| }, | |
| { | |
| "epoch": 7, | |
| "avg_loss": 4.292098, | |
| "lr": 0.0002676801152109725, | |
| "step": 1890, | |
| "time_s": 977.7 | |
| }, | |
| { | |
| "epoch": 8, | |
| "avg_loss": 4.150779, | |
| "lr": 0.00025985108200750904, | |
| "step": 2160, | |
| "time_s": 975.7 | |
| } | |
| ], | |
| "val": [ | |
| { | |
| "epoch": 1, | |
| "avg_loss": 6.459693 | |
| }, | |
| { | |
| "epoch": 2, | |
| "avg_loss": 5.804065 | |
| }, | |
| { | |
| "epoch": 3, | |
| "avg_loss": 5.179338 | |
| }, | |
| { | |
| "epoch": 4, | |
| "avg_loss": 4.774045 | |
| }, | |
| { | |
| "epoch": 5, | |
| "avg_loss": 4.511194 | |
| }, | |
| { | |
| "epoch": 6, | |
| "avg_loss": 4.305547 | |
| }, | |
| { | |
| "epoch": 7, | |
| "avg_loss": 4.154681 | |
| }, | |
| { | |
| "epoch": 8, | |
| "avg_loss": 4.037178 | |
| } | |
| ], | |
| "sanity": [] | |
| } |