{ "config": { "epochs": 10, "lr": 0.0003, "warmup_steps": 300, "grad_accum_steps": 8, "grad_clip": 1.0 }, "train": [ { "epoch": 1, "avg_loss": 7.070409, "lr": 0.00027, "step": 270, "time_s": 971.0 }, { "epoch": 2, "avg_loss": 6.138723, "lr": 0.0002986039232296601, "step": 540, "time_s": 969.5 }, { "epoch": 3, "avg_loss": 5.511719, "lr": 0.00029373201732051774, "step": 810, "time_s": 970.5 }, { "epoch": 4, "avg_loss": 5.033876, "lr": 0.00028548319801432657, "step": 1080, "time_s": 966.4 }, { "epoch": 5, "avg_loss": 4.711938, "lr": 0.00027406193668993577, "step": 1350, "time_s": 977.1 }, { "epoch": 6, "avg_loss": 4.464801, "lr": 0.00027245653264858043, "step": 1620, "time_s": 973.7 }, { "epoch": 7, "avg_loss": 4.292098, "lr": 0.0002676801152109725, "step": 1890, "time_s": 977.7 }, { "epoch": 8, "avg_loss": 4.150779, "lr": 0.00025985108200750904, "step": 2160, "time_s": 975.7 } ], "val": [ { "epoch": 1, "avg_loss": 6.459693 }, { "epoch": 2, "avg_loss": 5.804065 }, { "epoch": 3, "avg_loss": 5.179338 }, { "epoch": 4, "avg_loss": 4.774045 }, { "epoch": 5, "avg_loss": 4.511194 }, { "epoch": 6, "avg_loss": 4.305547 }, { "epoch": 7, "avg_loss": 4.154681 }, { "epoch": 8, "avg_loss": 4.037178 } ], "sanity": [] }