| { |
| "config": { |
| "epochs": 10, |
| "lr": 0.0003, |
| "warmup_steps": 300, |
| "grad_accum_steps": 8, |
| "grad_clip": 1.0 |
| }, |
| "train": [ |
| { |
| "epoch": 1, |
| "avg_loss": 7.070409, |
| "lr": 0.00027, |
| "step": 270, |
| "time_s": 971.0 |
| }, |
| { |
| "epoch": 2, |
| "avg_loss": 6.138723, |
| "lr": 0.0002986039232296601, |
| "step": 540, |
| "time_s": 969.5 |
| }, |
| { |
| "epoch": 3, |
| "avg_loss": 5.511719, |
| "lr": 0.00029373201732051774, |
| "step": 810, |
| "time_s": 970.5 |
| }, |
| { |
| "epoch": 4, |
| "avg_loss": 5.033876, |
| "lr": 0.00028548319801432657, |
| "step": 1080, |
| "time_s": 966.4 |
| }, |
| { |
| "epoch": 5, |
| "avg_loss": 4.711938, |
| "lr": 0.00027406193668993577, |
| "step": 1350, |
| "time_s": 977.1 |
| }, |
| { |
| "epoch": 6, |
| "avg_loss": 4.464801, |
| "lr": 0.00027245653264858043, |
| "step": 1620, |
| "time_s": 973.7 |
| }, |
| { |
| "epoch": 7, |
| "avg_loss": 4.292098, |
| "lr": 0.0002676801152109725, |
| "step": 1890, |
| "time_s": 977.7 |
| }, |
| { |
| "epoch": 8, |
| "avg_loss": 4.150779, |
| "lr": 0.00025985108200750904, |
| "step": 2160, |
| "time_s": 975.7 |
| }, |
| { |
| "epoch": 9, |
| "avg_loss": 4.048912, |
| "lr": 0.0002595061312929626, |
| "step": 2486, |
| "time_s": 1655.5 |
| }, |
| { |
| "epoch": 10, |
| "avg_loss": 3.914366, |
| "lr": 0.00025878265404557577, |
| "step": 2809, |
| "time_s": 1653.9 |
| }, |
| { |
| "epoch": 11, |
| "avg_loss": 3.793121, |
| "lr": 0.00025758003153357115, |
| "step": 3132, |
| "time_s": 1677.3 |
| }, |
| { |
| "epoch": 12, |
| "avg_loss": 3.684527, |
| "lr": 0.00025590300995857436, |
| "step": 3455, |
| "time_s": 1674.5 |
| }, |
| { |
| "epoch": 13, |
| "avg_loss": 3.584779, |
| "lr": 0.0002557817233080656, |
| "step": 3778, |
| "time_s": 1717.0 |
| }, |
| { |
| "epoch": 14, |
| "avg_loss": 3.478163, |
| "lr": 0.0002554181076117055, |
| "step": 4101, |
| "time_s": 1719.1 |
| }, |
| { |
| "epoch": 15, |
| "avg_loss": 3.377975, |
| "lr": 0.0002548128951431141, |
| "step": 4424, |
| "time_s": 1718.5 |
| }, |
| { |
| "epoch": 16, |
| "avg_loss": 3.283389, |
| "lr": 0.00025396730471965995, |
| "step": 4747, |
| "time_s": 1719.1 |
| }, |
| { |
| "epoch": 17, |
| "avg_loss": 3.198379, |
| "lr": 0.0002528830392479232, |
| "step": 5070, |
| "time_s": 1718.4 |
| }, |
| { |
| "epoch": 18, |
| "avg_loss": 3.123276, |
| "lr": 0.0002515622822942667, |
| "step": 5393, |
| "time_s": 1716.5 |
| }, |
| { |
| "epoch": 19, |
| "avg_loss": 3.054019, |
| "lr": 0.00025000769368742505, |
| "step": 5716, |
| "time_s": 1714.5 |
| }, |
| { |
| "epoch": 20, |
| "avg_loss": 2.990954, |
| "lr": 0.00024822240416196397, |
| "step": 6039, |
| "time_s": 1717.5 |
| }, |
| { |
| "epoch": 21, |
| "avg_loss": 2.932507, |
| "lr": 0.0002462100090533974, |
| "step": 6362, |
| "time_s": 1720.0 |
| }, |
| { |
| "epoch": 22, |
| "avg_loss": 2.879704, |
| "lr": 0.0002439745610576592, |
| "step": 6685, |
| "time_s": 1717.7 |
| }, |
| { |
| "epoch": 23, |
| "avg_loss": 2.830638, |
| "lr": 0.00024152056206951165, |
| "step": 7008, |
| "time_s": 1716.3 |
| }, |
| { |
| "epoch": 24, |
| "avg_loss": 2.785015, |
| "lr": 0.00023885295411633396, |
| "step": 7331, |
| "time_s": 1718.6 |
| }, |
| { |
| "epoch": 25, |
| "avg_loss": 2.743108, |
| "lr": 0.00023597710940553054, |
| "step": 7654, |
| "time_s": 1719.6 |
| }, |
| { |
| "epoch": 26, |
| "avg_loss": 2.72249, |
| "lr": 0.00023586585478300336, |
| "step": 7977, |
| "time_s": 1699.7 |
| }, |
| { |
| "epoch": 27, |
| "avg_loss": 2.681254, |
| "lr": 0.00023553231496741537, |
| "step": 8300, |
| "time_s": 1697.8 |
| }, |
| { |
| "epoch": 28, |
| "avg_loss": 2.644416, |
| "lr": 0.0002349771616635675, |
| "step": 8623, |
| "time_s": 1702.3 |
| }, |
| { |
| "epoch": 29, |
| "avg_loss": 2.610499, |
| "lr": 0.00023420151287633192, |
| "step": 8946, |
| "time_s": 1700.0 |
| }, |
| { |
| "epoch": 30, |
| "avg_loss": 2.578076, |
| "lr": 0.00023320693065914135, |
| "step": 9269, |
| "time_s": 1706.3 |
| }, |
| { |
| "epoch": 31, |
| "avg_loss": 2.547269, |
| "lr": 0.00023199541796821904, |
| "step": 9592, |
| "time_s": 1707.6 |
| }, |
| { |
| "epoch": 32, |
| "avg_loss": 2.51851, |
| "lr": 0.00023056941462889428, |
| "step": 9915, |
| "time_s": 1709.3 |
| }, |
| { |
| "epoch": 33, |
| "avg_loss": 2.49159, |
| "lr": 0.00022893179242211494, |
| "step": 10238, |
| "time_s": 1707.1 |
| }, |
| { |
| "epoch": 34, |
| "avg_loss": 2.467141, |
| "lr": 0.00022708584930106635, |
| "step": 10561, |
| "time_s": 1703.1 |
| }, |
| { |
| "epoch": 35, |
| "avg_loss": 2.440859, |
| "lr": 0.00022503530274953248, |
| "step": 10884, |
| "time_s": 1703.3 |
| }, |
| { |
| "epoch": 36, |
| "avg_loss": 2.439114, |
| "lr": 0.00022498348287805102, |
| "step": 11207, |
| "time_s": 1661.0 |
| }, |
| { |
| "epoch": 37, |
| "avg_loss": 2.412668, |
| "lr": 0.00022482807440355346, |
| "step": 11530, |
| "time_s": 1677.7 |
| }, |
| { |
| "epoch": 38, |
| "avg_loss": 2.390983, |
| "lr": 0.0002245692306954427, |
| "step": 11853, |
| "time_s": 1671.4 |
| }, |
| { |
| "epoch": 39, |
| "avg_loss": 2.368829, |
| "lr": 0.00022420720720120794, |
| "step": 12176, |
| "time_s": 1673.6 |
| }, |
| { |
| "epoch": 40, |
| "avg_loss": 2.349286, |
| "lr": 0.00022374236119432995, |
| "step": 12499, |
| "time_s": 1675.2 |
| }, |
| { |
| "epoch": 41, |
| "avg_loss": 2.329778, |
| "lr": 0.00022317515142169614, |
| "step": 12822, |
| "time_s": 1669.3 |
| }, |
| { |
| "epoch": 42, |
| "avg_loss": 2.312137, |
| "lr": 0.00022250613765087169, |
| "step": 13145, |
| "time_s": 1665.3 |
| }, |
| { |
| "epoch": 43, |
| "avg_loss": 2.226545, |
| "lr": 0.00011089582625474084, |
| "step": 13468, |
| "time_s": 1661.9 |
| }, |
| { |
| "epoch": 44, |
| "avg_loss": 2.203883, |
| "lr": 0.0001104920200486558, |
| "step": 13791, |
| "time_s": 1676.9 |
| }, |
| { |
| "epoch": 45, |
| "avg_loss": 2.190213, |
| "lr": 0.00011004204871515414, |
| "step": 14114, |
| "time_s": 1663.6 |
| }, |
| { |
| "epoch": 46, |
| "avg_loss": 2.178578, |
| "lr": 0.00010954635632161652, |
| "step": 14437, |
| "time_s": 1673.2 |
| }, |
| { |
| "epoch": 47, |
| "avg_loss": 2.167334, |
| "lr": 0.00010900543205658946, |
| "step": 14760, |
| "time_s": 1661.2 |
| }, |
| { |
| "epoch": 48, |
| "avg_loss": 2.157297, |
| "lr": 0.00010841980974701614, |
| "step": 15083, |
| "time_s": 1661.9 |
| }, |
| { |
| "epoch": 49, |
| "avg_loss": 2.14751, |
| "lr": 0.00010779006733141285, |
| "step": 15406, |
| "time_s": 1662.9 |
| }, |
| { |
| "epoch": 50, |
| "avg_loss": 2.138343, |
| "lr": 0.00010711682628951417, |
| "step": 15729, |
| "time_s": 1661.5 |
| } |
| ], |
| "val": [ |
| { |
| "epoch": 1, |
| "avg_loss": 6.459693 |
| }, |
| { |
| "epoch": 2, |
| "avg_loss": 5.804065 |
| }, |
| { |
| "epoch": 3, |
| "avg_loss": 5.179338 |
| }, |
| { |
| "epoch": 4, |
| "avg_loss": 4.774045 |
| }, |
| { |
| "epoch": 5, |
| "avg_loss": 4.511194 |
| }, |
| { |
| "epoch": 6, |
| "avg_loss": 4.305547 |
| }, |
| { |
| "epoch": 7, |
| "avg_loss": 4.154681 |
| }, |
| { |
| "epoch": 8, |
| "avg_loss": 4.037178 |
| }, |
| { |
| "epoch": 9, |
| "avg_loss": 3.829285 |
| }, |
| { |
| "epoch": 10, |
| "avg_loss": 3.708552 |
| }, |
| { |
| "epoch": 11, |
| "avg_loss": 3.594035 |
| }, |
| { |
| "epoch": 12, |
| "avg_loss": 3.495301 |
| }, |
| { |
| "epoch": 13, |
| "avg_loss": 3.318829 |
| }, |
| { |
| "epoch": 14, |
| "avg_loss": 3.237618 |
| }, |
| { |
| "epoch": 15, |
| "avg_loss": 3.171485 |
| }, |
| { |
| "epoch": 16, |
| "avg_loss": 3.082833 |
| }, |
| { |
| "epoch": 17, |
| "avg_loss": 3.013669 |
| }, |
| { |
| "epoch": 18, |
| "avg_loss": 2.949275 |
| }, |
| { |
| "epoch": 19, |
| "avg_loss": 2.898899 |
| }, |
| { |
| "epoch": 20, |
| "avg_loss": 2.855805 |
| }, |
| { |
| "epoch": 21, |
| "avg_loss": 2.804574 |
| }, |
| { |
| "epoch": 22, |
| "avg_loss": 2.76583 |
| }, |
| { |
| "epoch": 23, |
| "avg_loss": 2.720584 |
| }, |
| { |
| "epoch": 24, |
| "avg_loss": 2.692678 |
| }, |
| { |
| "epoch": 25, |
| "avg_loss": 2.660146 |
| }, |
| { |
| "epoch": 26, |
| "avg_loss": 2.477178 |
| }, |
| { |
| "epoch": 27, |
| "avg_loss": 2.463243 |
| }, |
| { |
| "epoch": 28, |
| "avg_loss": 2.459879 |
| }, |
| { |
| "epoch": 29, |
| "avg_loss": 2.455403 |
| }, |
| { |
| "epoch": 30, |
| "avg_loss": 2.439061 |
| }, |
| { |
| "epoch": 31, |
| "avg_loss": 2.430927 |
| }, |
| { |
| "epoch": 32, |
| "avg_loss": 2.414427 |
| }, |
| { |
| "epoch": 33, |
| "avg_loss": 2.401476 |
| }, |
| { |
| "epoch": 34, |
| "avg_loss": 2.404088 |
| }, |
| { |
| "epoch": 35, |
| "avg_loss": 2.382748 |
| }, |
| { |
| "epoch": 36, |
| "avg_loss": 2.18284 |
| }, |
| { |
| "epoch": 37, |
| "avg_loss": 2.201308 |
| }, |
| { |
| "epoch": 38, |
| "avg_loss": 2.214867 |
| }, |
| { |
| "epoch": 39, |
| "avg_loss": 2.223911 |
| }, |
| { |
| "epoch": 40, |
| "avg_loss": 2.225972 |
| }, |
| { |
| "epoch": 41, |
| "avg_loss": 2.224468 |
| }, |
| { |
| "epoch": 42, |
| "avg_loss": 2.238013 |
| }, |
| { |
| "epoch": 43, |
| "avg_loss": 2.184722 |
| }, |
| { |
| "epoch": 44, |
| "avg_loss": 2.177716 |
| }, |
| { |
| "epoch": 45, |
| "avg_loss": 2.176141 |
| }, |
| { |
| "epoch": 46, |
| "avg_loss": 2.170592 |
| }, |
| { |
| "epoch": 47, |
| "avg_loss": 2.173157 |
| }, |
| { |
| "epoch": 48, |
| "avg_loss": 2.173247 |
| }, |
| { |
| "epoch": 49, |
| "avg_loss": 2.175019 |
| }, |
| { |
| "epoch": 50, |
| "avg_loss": 2.18001 |
| } |
| ], |
| "sanity": [] |
| } |