| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 24.716786817713697, |
| "eval_steps": 500, |
| "global_step": 24000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.010298661174047374, |
| "grad_norm": 13.149330139160156, |
| "learning_rate": 7.5e-07, |
| "loss": 1.0642, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02059732234809475, |
| "grad_norm": 10.384645462036133, |
| "learning_rate": 1.5833333333333336e-06, |
| "loss": 1.1435, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.030895983522142123, |
| "grad_norm": 4.765048027038574, |
| "learning_rate": 2.4166666666666667e-06, |
| "loss": 0.8064, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0411946446961895, |
| "grad_norm": 1.7030583620071411, |
| "learning_rate": 3.2500000000000002e-06, |
| "loss": 0.4879, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.05149330587023687, |
| "grad_norm": 1.4565768241882324, |
| "learning_rate": 4.083333333333334e-06, |
| "loss": 0.3388, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.061791967044284246, |
| "grad_norm": 1.2755788564682007, |
| "learning_rate": 4.9166666666666665e-06, |
| "loss": 0.2841, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.07209062821833162, |
| "grad_norm": 1.058097004890442, |
| "learning_rate": 5.750000000000001e-06, |
| "loss": 0.2302, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.082389289392379, |
| "grad_norm": 1.4532016515731812, |
| "learning_rate": 6.583333333333333e-06, |
| "loss": 0.2028, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.09268795056642637, |
| "grad_norm": 1.1775424480438232, |
| "learning_rate": 7.416666666666668e-06, |
| "loss": 0.1856, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.10298661174047374, |
| "grad_norm": 1.3665395975112915, |
| "learning_rate": 8.25e-06, |
| "loss": 0.181, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.11328527291452112, |
| "grad_norm": 1.4632707834243774, |
| "learning_rate": 9.083333333333333e-06, |
| "loss": 0.1686, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.12358393408856849, |
| "grad_norm": 1.2164292335510254, |
| "learning_rate": 9.916666666666668e-06, |
| "loss": 0.1575, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.13388259526261587, |
| "grad_norm": 0.6911335587501526, |
| "learning_rate": 1.075e-05, |
| "loss": 0.1477, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.14418125643666324, |
| "grad_norm": 0.7088552117347717, |
| "learning_rate": 1.1583333333333333e-05, |
| "loss": 0.1382, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.15447991761071062, |
| "grad_norm": 1.046133041381836, |
| "learning_rate": 1.2416666666666667e-05, |
| "loss": 0.1411, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.164778578784758, |
| "grad_norm": 1.1359339952468872, |
| "learning_rate": 1.3250000000000002e-05, |
| "loss": 0.1347, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.17507723995880536, |
| "grad_norm": 0.8838634490966797, |
| "learning_rate": 1.4083333333333335e-05, |
| "loss": 0.1272, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.18537590113285274, |
| "grad_norm": 1.1631752252578735, |
| "learning_rate": 1.4916666666666667e-05, |
| "loss": 0.1351, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.1956745623069001, |
| "grad_norm": 1.1706782579421997, |
| "learning_rate": 1.575e-05, |
| "loss": 0.1273, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.2059732234809475, |
| "grad_norm": 0.9792522192001343, |
| "learning_rate": 1.6583333333333334e-05, |
| "loss": 0.1253, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.21627188465499486, |
| "grad_norm": 0.7125104069709778, |
| "learning_rate": 1.741666666666667e-05, |
| "loss": 0.1238, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.22657054582904224, |
| "grad_norm": 0.849624514579773, |
| "learning_rate": 1.825e-05, |
| "loss": 0.1094, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.2368692070030896, |
| "grad_norm": 1.1960203647613525, |
| "learning_rate": 1.9083333333333334e-05, |
| "loss": 0.1211, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.24716786817713698, |
| "grad_norm": 0.7790117859840393, |
| "learning_rate": 1.9916666666666665e-05, |
| "loss": 0.1017, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.25746652935118436, |
| "grad_norm": 0.8855171799659729, |
| "learning_rate": 2.075e-05, |
| "loss": 0.099, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.26776519052523173, |
| "grad_norm": 0.6777231693267822, |
| "learning_rate": 2.1583333333333334e-05, |
| "loss": 0.1089, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.2780638516992791, |
| "grad_norm": 0.9875190854072571, |
| "learning_rate": 2.2416666666666665e-05, |
| "loss": 0.1096, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.2883625128733265, |
| "grad_norm": 1.0280433893203735, |
| "learning_rate": 2.3250000000000003e-05, |
| "loss": 0.0929, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.29866117404737386, |
| "grad_norm": 0.7355198860168457, |
| "learning_rate": 2.4083333333333337e-05, |
| "loss": 0.1095, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.30895983522142123, |
| "grad_norm": 0.8522012829780579, |
| "learning_rate": 2.4916666666666668e-05, |
| "loss": 0.0985, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.3192584963954686, |
| "grad_norm": 0.547737717628479, |
| "learning_rate": 2.5750000000000002e-05, |
| "loss": 0.0926, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.329557157569516, |
| "grad_norm": 0.8814008235931396, |
| "learning_rate": 2.6583333333333333e-05, |
| "loss": 0.0878, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.33985581874356335, |
| "grad_norm": 1.0947939157485962, |
| "learning_rate": 2.7416666666666668e-05, |
| "loss": 0.0981, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.35015447991761073, |
| "grad_norm": 0.799148678779602, |
| "learning_rate": 2.825e-05, |
| "loss": 0.0875, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.3604531410916581, |
| "grad_norm": 0.6206310987472534, |
| "learning_rate": 2.9083333333333333e-05, |
| "loss": 0.0793, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.3707518022657055, |
| "grad_norm": 0.6761384010314941, |
| "learning_rate": 2.991666666666667e-05, |
| "loss": 0.0765, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.38105046343975285, |
| "grad_norm": 0.49738550186157227, |
| "learning_rate": 3.075e-05, |
| "loss": 0.0909, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.3913491246138002, |
| "grad_norm": 0.9449822306632996, |
| "learning_rate": 3.158333333333334e-05, |
| "loss": 0.0767, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.4016477857878476, |
| "grad_norm": 0.8295601606369019, |
| "learning_rate": 3.2416666666666664e-05, |
| "loss": 0.0837, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.411946446961895, |
| "grad_norm": 0.9744588136672974, |
| "learning_rate": 3.325e-05, |
| "loss": 0.0778, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.42224510813594235, |
| "grad_norm": 1.1776330471038818, |
| "learning_rate": 3.408333333333333e-05, |
| "loss": 0.0796, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.4325437693099897, |
| "grad_norm": 0.7325335144996643, |
| "learning_rate": 3.491666666666667e-05, |
| "loss": 0.082, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.4428424304840371, |
| "grad_norm": 0.9740093350410461, |
| "learning_rate": 3.575e-05, |
| "loss": 0.0844, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.45314109165808447, |
| "grad_norm": 0.9867604970932007, |
| "learning_rate": 3.658333333333334e-05, |
| "loss": 0.074, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.46343975283213185, |
| "grad_norm": 0.8307005763053894, |
| "learning_rate": 3.7416666666666664e-05, |
| "loss": 0.0763, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.4737384140061792, |
| "grad_norm": 0.8879880905151367, |
| "learning_rate": 3.825e-05, |
| "loss": 0.0766, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.4840370751802266, |
| "grad_norm": 0.8298061490058899, |
| "learning_rate": 3.908333333333333e-05, |
| "loss": 0.0808, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.49433573635427397, |
| "grad_norm": 0.9233872294425964, |
| "learning_rate": 3.991666666666667e-05, |
| "loss": 0.0795, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.5046343975283213, |
| "grad_norm": 0.9856144189834595, |
| "learning_rate": 4.075e-05, |
| "loss": 0.0764, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.5149330587023687, |
| "grad_norm": 0.6428182721138, |
| "learning_rate": 4.158333333333333e-05, |
| "loss": 0.0804, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.525231719876416, |
| "grad_norm": 0.7709551453590393, |
| "learning_rate": 4.241666666666667e-05, |
| "loss": 0.071, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.5355303810504635, |
| "grad_norm": 0.5600109100341797, |
| "learning_rate": 4.325e-05, |
| "loss": 0.0766, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.5458290422245108, |
| "grad_norm": 0.6735143065452576, |
| "learning_rate": 4.408333333333334e-05, |
| "loss": 0.0796, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.5561277033985582, |
| "grad_norm": 0.7762349843978882, |
| "learning_rate": 4.491666666666667e-05, |
| "loss": 0.0816, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.5664263645726055, |
| "grad_norm": 0.613395631313324, |
| "learning_rate": 4.575e-05, |
| "loss": 0.0663, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.576725025746653, |
| "grad_norm": 0.5750753283500671, |
| "learning_rate": 4.658333333333333e-05, |
| "loss": 0.064, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.5870236869207003, |
| "grad_norm": 0.7344352006912231, |
| "learning_rate": 4.741666666666667e-05, |
| "loss": 0.0708, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.5973223480947477, |
| "grad_norm": 0.5173144340515137, |
| "learning_rate": 4.825e-05, |
| "loss": 0.0811, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.607621009268795, |
| "grad_norm": 0.6798299551010132, |
| "learning_rate": 4.908333333333334e-05, |
| "loss": 0.0694, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.6179196704428425, |
| "grad_norm": 0.931536853313446, |
| "learning_rate": 4.991666666666667e-05, |
| "loss": 0.085, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6282183316168898, |
| "grad_norm": 0.9364974498748779, |
| "learning_rate": 5.075e-05, |
| "loss": 0.081, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.6385169927909372, |
| "grad_norm": 0.8403638005256653, |
| "learning_rate": 5.158333333333334e-05, |
| "loss": 0.0695, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.6488156539649845, |
| "grad_norm": 0.5034735798835754, |
| "learning_rate": 5.241666666666667e-05, |
| "loss": 0.0732, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.659114315139032, |
| "grad_norm": 0.7169724106788635, |
| "learning_rate": 5.325e-05, |
| "loss": 0.0735, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.6694129763130793, |
| "grad_norm": 0.5860706567764282, |
| "learning_rate": 5.4083333333333345e-05, |
| "loss": 0.0678, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.6797116374871267, |
| "grad_norm": 1.0557808876037598, |
| "learning_rate": 5.491666666666667e-05, |
| "loss": 0.073, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.690010298661174, |
| "grad_norm": 0.5684497952461243, |
| "learning_rate": 5.575e-05, |
| "loss": 0.0681, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.7003089598352215, |
| "grad_norm": 0.9891376495361328, |
| "learning_rate": 5.658333333333333e-05, |
| "loss": 0.079, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.7106076210092688, |
| "grad_norm": 0.6732739210128784, |
| "learning_rate": 5.7416666666666675e-05, |
| "loss": 0.0677, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.7209062821833162, |
| "grad_norm": 0.6489456295967102, |
| "learning_rate": 5.8250000000000006e-05, |
| "loss": 0.0742, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.7312049433573635, |
| "grad_norm": 0.6407278776168823, |
| "learning_rate": 5.908333333333334e-05, |
| "loss": 0.0772, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.741503604531411, |
| "grad_norm": 0.5639925599098206, |
| "learning_rate": 5.991666666666667e-05, |
| "loss": 0.0643, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.7518022657054583, |
| "grad_norm": 0.9374951720237732, |
| "learning_rate": 6.0750000000000006e-05, |
| "loss": 0.0754, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.7621009268795057, |
| "grad_norm": 0.6028985381126404, |
| "learning_rate": 6.158333333333334e-05, |
| "loss": 0.0659, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.772399588053553, |
| "grad_norm": 0.7816867232322693, |
| "learning_rate": 6.241666666666666e-05, |
| "loss": 0.0781, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.7826982492276005, |
| "grad_norm": 0.5676434636116028, |
| "learning_rate": 6.324999999999999e-05, |
| "loss": 0.0632, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.7929969104016478, |
| "grad_norm": 0.5514146685600281, |
| "learning_rate": 6.408333333333334e-05, |
| "loss": 0.0653, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.8032955715756952, |
| "grad_norm": 0.4813036322593689, |
| "learning_rate": 6.491666666666667e-05, |
| "loss": 0.0718, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.8135942327497425, |
| "grad_norm": 0.7320117354393005, |
| "learning_rate": 6.575e-05, |
| "loss": 0.0668, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.82389289392379, |
| "grad_norm": 0.8730434775352478, |
| "learning_rate": 6.658333333333334e-05, |
| "loss": 0.0732, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.8341915550978373, |
| "grad_norm": 0.5224652886390686, |
| "learning_rate": 6.741666666666667e-05, |
| "loss": 0.0665, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.8444902162718847, |
| "grad_norm": 0.7872908711433411, |
| "learning_rate": 6.825e-05, |
| "loss": 0.0675, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.854788877445932, |
| "grad_norm": 0.3085736930370331, |
| "learning_rate": 6.908333333333334e-05, |
| "loss": 0.069, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.8650875386199794, |
| "grad_norm": 0.45653483271598816, |
| "learning_rate": 6.991666666666668e-05, |
| "loss": 0.0654, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.8753861997940268, |
| "grad_norm": 0.7160809636116028, |
| "learning_rate": 7.075e-05, |
| "loss": 0.073, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.8856848609680742, |
| "grad_norm": 0.7236250042915344, |
| "learning_rate": 7.158333333333333e-05, |
| "loss": 0.068, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.8959835221421215, |
| "grad_norm": 0.696873664855957, |
| "learning_rate": 7.241666666666666e-05, |
| "loss": 0.0612, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.9062821833161689, |
| "grad_norm": 0.6647109389305115, |
| "learning_rate": 7.325e-05, |
| "loss": 0.0627, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.9165808444902163, |
| "grad_norm": 0.6832423806190491, |
| "learning_rate": 7.408333333333334e-05, |
| "loss": 0.0634, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.9268795056642637, |
| "grad_norm": 0.5562313795089722, |
| "learning_rate": 7.491666666666667e-05, |
| "loss": 0.0679, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.937178166838311, |
| "grad_norm": 0.5476490259170532, |
| "learning_rate": 7.575e-05, |
| "loss": 0.0573, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.9474768280123584, |
| "grad_norm": 0.5675793290138245, |
| "learning_rate": 7.658333333333334e-05, |
| "loss": 0.0643, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.9577754891864058, |
| "grad_norm": 0.5144763588905334, |
| "learning_rate": 7.741666666666667e-05, |
| "loss": 0.0569, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.9680741503604532, |
| "grad_norm": 0.6619398593902588, |
| "learning_rate": 7.825e-05, |
| "loss": 0.0649, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.9783728115345005, |
| "grad_norm": 0.7253459692001343, |
| "learning_rate": 7.908333333333335e-05, |
| "loss": 0.069, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.9886714727085479, |
| "grad_norm": 0.9297211766242981, |
| "learning_rate": 7.991666666666667e-05, |
| "loss": 0.0581, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.9989701338825953, |
| "grad_norm": 0.3911210298538208, |
| "learning_rate": 8.075e-05, |
| "loss": 0.0621, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.0092687950566426, |
| "grad_norm": 0.5856914520263672, |
| "learning_rate": 8.158333333333333e-05, |
| "loss": 0.0583, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.01956745623069, |
| "grad_norm": 0.5337209701538086, |
| "learning_rate": 8.241666666666667e-05, |
| "loss": 0.0652, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.0298661174047374, |
| "grad_norm": 0.6302834153175354, |
| "learning_rate": 8.325e-05, |
| "loss": 0.0639, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.0401647785787849, |
| "grad_norm": 0.6350454092025757, |
| "learning_rate": 8.408333333333334e-05, |
| "loss": 0.0644, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.050463439752832, |
| "grad_norm": 0.4556646943092346, |
| "learning_rate": 8.491666666666667e-05, |
| "loss": 0.0652, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.0607621009268795, |
| "grad_norm": 0.531972348690033, |
| "learning_rate": 8.575000000000001e-05, |
| "loss": 0.0638, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.071060762100927, |
| "grad_norm": 0.47181880474090576, |
| "learning_rate": 8.658333333333334e-05, |
| "loss": 0.0691, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.0813594232749741, |
| "grad_norm": 0.5719209313392639, |
| "learning_rate": 8.741666666666667e-05, |
| "loss": 0.062, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.0916580844490216, |
| "grad_norm": 0.5950759649276733, |
| "learning_rate": 8.825e-05, |
| "loss": 0.0751, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.101956745623069, |
| "grad_norm": 0.547822117805481, |
| "learning_rate": 8.908333333333333e-05, |
| "loss": 0.0611, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.1122554067971164, |
| "grad_norm": 0.8676643967628479, |
| "learning_rate": 8.991666666666667e-05, |
| "loss": 0.0692, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.1225540679711639, |
| "grad_norm": 0.6153643727302551, |
| "learning_rate": 9.075e-05, |
| "loss": 0.0622, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.132852729145211, |
| "grad_norm": 0.6054476499557495, |
| "learning_rate": 9.158333333333334e-05, |
| "loss": 0.0758, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.1431513903192585, |
| "grad_norm": 0.7495082020759583, |
| "learning_rate": 9.241666666666667e-05, |
| "loss": 0.0762, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.153450051493306, |
| "grad_norm": 0.7987021207809448, |
| "learning_rate": 9.325e-05, |
| "loss": 0.0672, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.1637487126673531, |
| "grad_norm": 0.5528245568275452, |
| "learning_rate": 9.408333333333333e-05, |
| "loss": 0.0719, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.1740473738414006, |
| "grad_norm": 0.6874306797981262, |
| "learning_rate": 9.491666666666668e-05, |
| "loss": 0.0743, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.184346035015448, |
| "grad_norm": 0.7587935924530029, |
| "learning_rate": 9.575000000000001e-05, |
| "loss": 0.0699, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.1946446961894954, |
| "grad_norm": 0.6630555391311646, |
| "learning_rate": 9.658333333333334e-05, |
| "loss": 0.0705, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.2049433573635429, |
| "grad_norm": 0.654225766658783, |
| "learning_rate": 9.741666666666667e-05, |
| "loss": 0.0755, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.21524201853759, |
| "grad_norm": 0.5464800000190735, |
| "learning_rate": 9.825e-05, |
| "loss": 0.0677, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.2255406797116375, |
| "grad_norm": 0.6069100499153137, |
| "learning_rate": 9.908333333333333e-05, |
| "loss": 0.0642, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.235839340885685, |
| "grad_norm": 0.4604052007198334, |
| "learning_rate": 9.991666666666666e-05, |
| "loss": 0.0669, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.2461380020597321, |
| "grad_norm": 0.6743689775466919, |
| "learning_rate": 9.999996155365815e-05, |
| "loss": 0.0642, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.2564366632337796, |
| "grad_norm": 0.7741529941558838, |
| "learning_rate": 9.999982865279924e-05, |
| "loss": 0.0634, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.266735324407827, |
| "grad_norm": 0.43897321820259094, |
| "learning_rate": 9.999960082302935e-05, |
| "loss": 0.0649, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.2770339855818744, |
| "grad_norm": 0.41431164741516113, |
| "learning_rate": 9.999927806478104e-05, |
| "loss": 0.0687, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.2873326467559219, |
| "grad_norm": 0.4003790020942688, |
| "learning_rate": 9.999886037866707e-05, |
| "loss": 0.072, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.297631307929969, |
| "grad_norm": 0.7988683581352234, |
| "learning_rate": 9.999834776548048e-05, |
| "loss": 0.0612, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.3079299691040165, |
| "grad_norm": 0.574458122253418, |
| "learning_rate": 9.99977402261945e-05, |
| "loss": 0.0676, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.318228630278064, |
| "grad_norm": 0.4979383945465088, |
| "learning_rate": 9.999703776196258e-05, |
| "loss": 0.0628, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.3285272914521111, |
| "grad_norm": 0.8276640772819519, |
| "learning_rate": 9.999624037411843e-05, |
| "loss": 0.0627, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.3388259526261586, |
| "grad_norm": 0.35682398080825806, |
| "learning_rate": 9.999534806417596e-05, |
| "loss": 0.06, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.349124613800206, |
| "grad_norm": 0.683525562286377, |
| "learning_rate": 9.999436083382927e-05, |
| "loss": 0.0602, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.3594232749742534, |
| "grad_norm": 0.6512396335601807, |
| "learning_rate": 9.999327868495272e-05, |
| "loss": 0.0592, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.3697219361483008, |
| "grad_norm": 0.7462307214736938, |
| "learning_rate": 9.999210161960084e-05, |
| "loss": 0.0623, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.380020597322348, |
| "grad_norm": 0.5599775910377502, |
| "learning_rate": 9.999082964000844e-05, |
| "loss": 0.0643, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.3903192584963955, |
| "grad_norm": 0.6953631639480591, |
| "learning_rate": 9.998946274859042e-05, |
| "loss": 0.0604, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.400617919670443, |
| "grad_norm": 0.42685210704803467, |
| "learning_rate": 9.998800094794197e-05, |
| "loss": 0.0603, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.4109165808444901, |
| "grad_norm": 0.5579178929328918, |
| "learning_rate": 9.998644424083843e-05, |
| "loss": 0.0591, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.4212152420185376, |
| "grad_norm": 0.525526225566864, |
| "learning_rate": 9.998479263023536e-05, |
| "loss": 0.0521, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.431513903192585, |
| "grad_norm": 0.6368007659912109, |
| "learning_rate": 9.998304611926846e-05, |
| "loss": 0.0544, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.4418125643666324, |
| "grad_norm": 0.7885141968727112, |
| "learning_rate": 9.998120471125365e-05, |
| "loss": 0.0606, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.4521112255406798, |
| "grad_norm": 0.44973626732826233, |
| "learning_rate": 9.997926840968699e-05, |
| "loss": 0.0608, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.462409886714727, |
| "grad_norm": 0.5216629505157471, |
| "learning_rate": 9.99772372182447e-05, |
| "loss": 0.0651, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.4727085478887745, |
| "grad_norm": 0.2978615164756775, |
| "learning_rate": 9.99751111407832e-05, |
| "loss": 0.0552, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.483007209062822, |
| "grad_norm": 0.4659578502178192, |
| "learning_rate": 9.9972890181339e-05, |
| "loss": 0.0612, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.4933058702368691, |
| "grad_norm": 0.5563666224479675, |
| "learning_rate": 9.997057434412878e-05, |
| "loss": 0.0574, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.5036045314109165, |
| "grad_norm": 0.6018580198287964, |
| "learning_rate": 9.996816363354937e-05, |
| "loss": 0.0599, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.513903192584964, |
| "grad_norm": 0.6983861327171326, |
| "learning_rate": 9.996565805417769e-05, |
| "loss": 0.0625, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.5242018537590112, |
| "grad_norm": 0.7207497358322144, |
| "learning_rate": 9.99630576107708e-05, |
| "loss": 0.0628, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.5345005149330588, |
| "grad_norm": 0.44567862153053284, |
| "learning_rate": 9.996036230826589e-05, |
| "loss": 0.057, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.544799176107106, |
| "grad_norm": 0.7830976843833923, |
| "learning_rate": 9.995757215178018e-05, |
| "loss": 0.0685, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.5550978372811535, |
| "grad_norm": 0.5960754752159119, |
| "learning_rate": 9.995468714661105e-05, |
| "loss": 0.06, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.565396498455201, |
| "grad_norm": 0.40101906657218933, |
| "learning_rate": 9.995170729823588e-05, |
| "loss": 0.0577, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.575695159629248, |
| "grad_norm": 0.46922755241394043, |
| "learning_rate": 9.994863261231224e-05, |
| "loss": 0.0668, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.5859938208032955, |
| "grad_norm": 0.42105957865715027, |
| "learning_rate": 9.99454630946776e-05, |
| "loss": 0.0651, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.596292481977343, |
| "grad_norm": 0.4200706481933594, |
| "learning_rate": 9.994219875134962e-05, |
| "loss": 0.0586, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.6065911431513902, |
| "grad_norm": 0.602203369140625, |
| "learning_rate": 9.99388395885259e-05, |
| "loss": 0.0523, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.6168898043254378, |
| "grad_norm": 0.5096521973609924, |
| "learning_rate": 9.993538561258411e-05, |
| "loss": 0.0578, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.627188465499485, |
| "grad_norm": 0.26851776242256165, |
| "learning_rate": 9.993183683008189e-05, |
| "loss": 0.0533, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.6374871266735325, |
| "grad_norm": 0.48737746477127075, |
| "learning_rate": 9.992819324775696e-05, |
| "loss": 0.0535, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.64778578784758, |
| "grad_norm": 0.7643722891807556, |
| "learning_rate": 9.992445487252691e-05, |
| "loss": 0.0606, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.658084449021627, |
| "grad_norm": 0.4624369740486145, |
| "learning_rate": 9.992062171148938e-05, |
| "loss": 0.0547, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.6683831101956745, |
| "grad_norm": 0.644801914691925, |
| "learning_rate": 9.991669377192196e-05, |
| "loss": 0.0516, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.678681771369722, |
| "grad_norm": 0.48216086626052856, |
| "learning_rate": 9.991267106128219e-05, |
| "loss": 0.051, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.6889804325437692, |
| "grad_norm": 0.5955299735069275, |
| "learning_rate": 9.99085535872075e-05, |
| "loss": 0.0576, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.6992790937178168, |
| "grad_norm": 0.650093674659729, |
| "learning_rate": 9.990434135751526e-05, |
| "loss": 0.0689, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.709577754891864, |
| "grad_norm": 0.4557111859321594, |
| "learning_rate": 9.990003438020276e-05, |
| "loss": 0.0499, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.7198764160659115, |
| "grad_norm": 0.49610280990600586, |
| "learning_rate": 9.989563266344718e-05, |
| "loss": 0.0575, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.730175077239959, |
| "grad_norm": 0.6158459782600403, |
| "learning_rate": 9.989113621560552e-05, |
| "loss": 0.0611, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.740473738414006, |
| "grad_norm": 0.6327665448188782, |
| "learning_rate": 9.988654504521469e-05, |
| "loss": 0.0689, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.7507723995880535, |
| "grad_norm": 0.5656455755233765, |
| "learning_rate": 9.988185916099139e-05, |
| "loss": 0.0596, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.761071060762101, |
| "grad_norm": 0.5649673938751221, |
| "learning_rate": 9.987707857183218e-05, |
| "loss": 0.0595, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.7713697219361482, |
| "grad_norm": 0.7367047071456909, |
| "learning_rate": 9.987220328681343e-05, |
| "loss": 0.0635, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.7816683831101958, |
| "grad_norm": 0.4531395435333252, |
| "learning_rate": 9.986723331519126e-05, |
| "loss": 0.0572, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.791967044284243, |
| "grad_norm": 0.7696741223335266, |
| "learning_rate": 9.986216866640159e-05, |
| "loss": 0.0477, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.8022657054582905, |
| "grad_norm": 0.8207795023918152, |
| "learning_rate": 9.985700935006009e-05, |
| "loss": 0.0602, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.8125643666323379, |
| "grad_norm": 0.4749796390533447, |
| "learning_rate": 9.985175537596216e-05, |
| "loss": 0.0581, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.822863027806385, |
| "grad_norm": 0.48487281799316406, |
| "learning_rate": 9.98464067540829e-05, |
| "loss": 0.0579, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.8331616889804325, |
| "grad_norm": 0.58293217420578, |
| "learning_rate": 9.984096349457714e-05, |
| "loss": 0.056, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.84346035015448, |
| "grad_norm": 0.5481081008911133, |
| "learning_rate": 9.983542560777935e-05, |
| "loss": 0.061, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.8537590113285272, |
| "grad_norm": 0.49807319045066833, |
| "learning_rate": 9.982979310420369e-05, |
| "loss": 0.0557, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.8640576725025748, |
| "grad_norm": 0.5096905827522278, |
| "learning_rate": 9.982406599454395e-05, |
| "loss": 0.0604, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.874356333676622, |
| "grad_norm": 0.3464685082435608, |
| "learning_rate": 9.981824428967352e-05, |
| "loss": 0.0583, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.8846549948506695, |
| "grad_norm": 0.4973633885383606, |
| "learning_rate": 9.981232800064542e-05, |
| "loss": 0.0608, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.8949536560247169, |
| "grad_norm": 0.4537138044834137, |
| "learning_rate": 9.980631713869219e-05, |
| "loss": 0.0554, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.905252317198764, |
| "grad_norm": 0.5278320908546448, |
| "learning_rate": 9.9800211715226e-05, |
| "loss": 0.0587, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.9155509783728115, |
| "grad_norm": 0.5589116811752319, |
| "learning_rate": 9.979401174183849e-05, |
| "loss": 0.0719, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.925849639546859, |
| "grad_norm": 0.4185578525066376, |
| "learning_rate": 9.978771723030084e-05, |
| "loss": 0.0641, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.9361483007209062, |
| "grad_norm": 0.6495696306228638, |
| "learning_rate": 9.978132819256371e-05, |
| "loss": 0.0666, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.9464469618949538, |
| "grad_norm": 0.8356088995933533, |
| "learning_rate": 9.977484464075725e-05, |
| "loss": 0.0584, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.956745623069001, |
| "grad_norm": 0.397126704454422, |
| "learning_rate": 9.9768266587191e-05, |
| "loss": 0.0735, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.9670442842430484, |
| "grad_norm": 0.52510005235672, |
| "learning_rate": 9.976159404435397e-05, |
| "loss": 0.0709, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.9773429454170959, |
| "grad_norm": 0.44712215662002563, |
| "learning_rate": 9.975482702491457e-05, |
| "loss": 0.0606, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.987641606591143, |
| "grad_norm": 0.8051986694335938, |
| "learning_rate": 9.974796554172052e-05, |
| "loss": 0.0598, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.9979402677651905, |
| "grad_norm": 0.5369552969932556, |
| "learning_rate": 9.974100960779892e-05, |
| "loss": 0.0689, |
| "step": 1940 |
| }, |
| { |
| "epoch": 2.008238928939238, |
| "grad_norm": 0.6968551278114319, |
| "learning_rate": 9.973395923635627e-05, |
| "loss": 0.0539, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.018537590113285, |
| "grad_norm": 0.6285783052444458, |
| "learning_rate": 9.972681444077823e-05, |
| "loss": 0.0589, |
| "step": 1960 |
| }, |
| { |
| "epoch": 2.028836251287333, |
| "grad_norm": 0.48109880089759827, |
| "learning_rate": 9.97195752346298e-05, |
| "loss": 0.064, |
| "step": 1970 |
| }, |
| { |
| "epoch": 2.03913491246138, |
| "grad_norm": 0.3831437826156616, |
| "learning_rate": 9.971224163165527e-05, |
| "loss": 0.0656, |
| "step": 1980 |
| }, |
| { |
| "epoch": 2.049433573635427, |
| "grad_norm": 0.4746626913547516, |
| "learning_rate": 9.970481364577808e-05, |
| "loss": 0.054, |
| "step": 1990 |
| }, |
| { |
| "epoch": 2.059732234809475, |
| "grad_norm": 0.5078164339065552, |
| "learning_rate": 9.96972912911009e-05, |
| "loss": 0.0576, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.070030895983522, |
| "grad_norm": 0.39853551983833313, |
| "learning_rate": 9.968967458190554e-05, |
| "loss": 0.0497, |
| "step": 2010 |
| }, |
| { |
| "epoch": 2.0803295571575697, |
| "grad_norm": 0.32678648829460144, |
| "learning_rate": 9.9681963532653e-05, |
| "loss": 0.0524, |
| "step": 2020 |
| }, |
| { |
| "epoch": 2.090628218331617, |
| "grad_norm": 0.575631856918335, |
| "learning_rate": 9.967415815798331e-05, |
| "loss": 0.057, |
| "step": 2030 |
| }, |
| { |
| "epoch": 2.100926879505664, |
| "grad_norm": 0.3121809661388397, |
| "learning_rate": 9.966625847271569e-05, |
| "loss": 0.0563, |
| "step": 2040 |
| }, |
| { |
| "epoch": 2.111225540679712, |
| "grad_norm": 0.4975629150867462, |
| "learning_rate": 9.965826449184832e-05, |
| "loss": 0.0533, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.121524201853759, |
| "grad_norm": 0.4501200318336487, |
| "learning_rate": 9.965017623055848e-05, |
| "loss": 0.0485, |
| "step": 2060 |
| }, |
| { |
| "epoch": 2.131822863027806, |
| "grad_norm": 0.4712199568748474, |
| "learning_rate": 9.96419937042024e-05, |
| "loss": 0.0462, |
| "step": 2070 |
| }, |
| { |
| "epoch": 2.142121524201854, |
| "grad_norm": 0.3075672388076782, |
| "learning_rate": 9.963371692831528e-05, |
| "loss": 0.0533, |
| "step": 2080 |
| }, |
| { |
| "epoch": 2.152420185375901, |
| "grad_norm": 0.7105554342269897, |
| "learning_rate": 9.962534591861135e-05, |
| "loss": 0.0492, |
| "step": 2090 |
| }, |
| { |
| "epoch": 2.1627188465499483, |
| "grad_norm": 0.2947746515274048, |
| "learning_rate": 9.961688069098362e-05, |
| "loss": 0.0506, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.173017507723996, |
| "grad_norm": 0.4763787090778351, |
| "learning_rate": 9.960832126150405e-05, |
| "loss": 0.0529, |
| "step": 2110 |
| }, |
| { |
| "epoch": 2.183316168898043, |
| "grad_norm": 0.5647318363189697, |
| "learning_rate": 9.959966764642346e-05, |
| "loss": 0.0538, |
| "step": 2120 |
| }, |
| { |
| "epoch": 2.193614830072091, |
| "grad_norm": 0.4324939250946045, |
| "learning_rate": 9.959091986217146e-05, |
| "loss": 0.0626, |
| "step": 2130 |
| }, |
| { |
| "epoch": 2.203913491246138, |
| "grad_norm": 0.5161953568458557, |
| "learning_rate": 9.958207792535647e-05, |
| "loss": 0.0548, |
| "step": 2140 |
| }, |
| { |
| "epoch": 2.214212152420185, |
| "grad_norm": 0.3394765257835388, |
| "learning_rate": 9.957314185276564e-05, |
| "loss": 0.0653, |
| "step": 2150 |
| }, |
| { |
| "epoch": 2.224510813594233, |
| "grad_norm": 0.5128792524337769, |
| "learning_rate": 9.956411166136488e-05, |
| "loss": 0.0579, |
| "step": 2160 |
| }, |
| { |
| "epoch": 2.23480947476828, |
| "grad_norm": 0.3266693353652954, |
| "learning_rate": 9.955498736829875e-05, |
| "loss": 0.05, |
| "step": 2170 |
| }, |
| { |
| "epoch": 2.2451081359423277, |
| "grad_norm": 0.32357707619667053, |
| "learning_rate": 9.954576899089049e-05, |
| "loss": 0.0567, |
| "step": 2180 |
| }, |
| { |
| "epoch": 2.255406797116375, |
| "grad_norm": 0.2624098062515259, |
| "learning_rate": 9.953645654664202e-05, |
| "loss": 0.0566, |
| "step": 2190 |
| }, |
| { |
| "epoch": 2.265705458290422, |
| "grad_norm": 0.43851861357688904, |
| "learning_rate": 9.952705005323375e-05, |
| "loss": 0.0575, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.27600411946447, |
| "grad_norm": 0.4381640553474426, |
| "learning_rate": 9.951754952852474e-05, |
| "loss": 0.048, |
| "step": 2210 |
| }, |
| { |
| "epoch": 2.286302780638517, |
| "grad_norm": 0.41920751333236694, |
| "learning_rate": 9.950795499055252e-05, |
| "loss": 0.0613, |
| "step": 2220 |
| }, |
| { |
| "epoch": 2.296601441812564, |
| "grad_norm": 0.32066893577575684, |
| "learning_rate": 9.949826645753314e-05, |
| "loss": 0.0542, |
| "step": 2230 |
| }, |
| { |
| "epoch": 2.306900102986612, |
| "grad_norm": 0.40712329745292664, |
| "learning_rate": 9.94884839478611e-05, |
| "loss": 0.0533, |
| "step": 2240 |
| }, |
| { |
| "epoch": 2.317198764160659, |
| "grad_norm": 0.558935284614563, |
| "learning_rate": 9.947860748010933e-05, |
| "loss": 0.062, |
| "step": 2250 |
| }, |
| { |
| "epoch": 2.3274974253347063, |
| "grad_norm": 0.6141128540039062, |
| "learning_rate": 9.946863707302913e-05, |
| "loss": 0.0531, |
| "step": 2260 |
| }, |
| { |
| "epoch": 2.337796086508754, |
| "grad_norm": 0.5253733396530151, |
| "learning_rate": 9.945857274555017e-05, |
| "loss": 0.0516, |
| "step": 2270 |
| }, |
| { |
| "epoch": 2.348094747682801, |
| "grad_norm": 0.5007606744766235, |
| "learning_rate": 9.944841451678043e-05, |
| "loss": 0.0648, |
| "step": 2280 |
| }, |
| { |
| "epoch": 2.358393408856849, |
| "grad_norm": 0.4784156382083893, |
| "learning_rate": 9.943816240600614e-05, |
| "loss": 0.0678, |
| "step": 2290 |
| }, |
| { |
| "epoch": 2.368692070030896, |
| "grad_norm": 0.5175110101699829, |
| "learning_rate": 9.942781643269186e-05, |
| "loss": 0.0584, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.378990731204943, |
| "grad_norm": 0.41106414794921875, |
| "learning_rate": 9.941737661648024e-05, |
| "loss": 0.0617, |
| "step": 2310 |
| }, |
| { |
| "epoch": 2.389289392378991, |
| "grad_norm": 0.372574120759964, |
| "learning_rate": 9.940684297719218e-05, |
| "loss": 0.0587, |
| "step": 2320 |
| }, |
| { |
| "epoch": 2.399588053553038, |
| "grad_norm": 0.7082722187042236, |
| "learning_rate": 9.939621553482669e-05, |
| "loss": 0.066, |
| "step": 2330 |
| }, |
| { |
| "epoch": 2.4098867147270857, |
| "grad_norm": 0.28144243359565735, |
| "learning_rate": 9.938549430956085e-05, |
| "loss": 0.0549, |
| "step": 2340 |
| }, |
| { |
| "epoch": 2.420185375901133, |
| "grad_norm": 0.4946507513523102, |
| "learning_rate": 9.937467932174985e-05, |
| "loss": 0.0557, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.43048403707518, |
| "grad_norm": 0.4784681499004364, |
| "learning_rate": 9.936377059192683e-05, |
| "loss": 0.0575, |
| "step": 2360 |
| }, |
| { |
| "epoch": 2.4407826982492278, |
| "grad_norm": 0.47319531440734863, |
| "learning_rate": 9.935276814080294e-05, |
| "loss": 0.0612, |
| "step": 2370 |
| }, |
| { |
| "epoch": 2.451081359423275, |
| "grad_norm": 0.3896636664867401, |
| "learning_rate": 9.934167198926729e-05, |
| "loss": 0.0554, |
| "step": 2380 |
| }, |
| { |
| "epoch": 2.461380020597322, |
| "grad_norm": 0.4796706736087799, |
| "learning_rate": 9.933048215838682e-05, |
| "loss": 0.0588, |
| "step": 2390 |
| }, |
| { |
| "epoch": 2.47167868177137, |
| "grad_norm": 0.3464389145374298, |
| "learning_rate": 9.931919866940642e-05, |
| "loss": 0.0667, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.481977342945417, |
| "grad_norm": 0.3699023425579071, |
| "learning_rate": 9.930782154374874e-05, |
| "loss": 0.0498, |
| "step": 2410 |
| }, |
| { |
| "epoch": 2.4922760041194643, |
| "grad_norm": 0.3829363286495209, |
| "learning_rate": 9.929635080301418e-05, |
| "loss": 0.0539, |
| "step": 2420 |
| }, |
| { |
| "epoch": 2.502574665293512, |
| "grad_norm": 0.47303831577301025, |
| "learning_rate": 9.928478646898096e-05, |
| "loss": 0.0439, |
| "step": 2430 |
| }, |
| { |
| "epoch": 2.512873326467559, |
| "grad_norm": 0.5764483213424683, |
| "learning_rate": 9.92731285636049e-05, |
| "loss": 0.0547, |
| "step": 2440 |
| }, |
| { |
| "epoch": 2.5231719876416063, |
| "grad_norm": 0.3862007260322571, |
| "learning_rate": 9.926137710901956e-05, |
| "loss": 0.0485, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.533470648815654, |
| "grad_norm": 0.35379621386528015, |
| "learning_rate": 9.924953212753607e-05, |
| "loss": 0.0555, |
| "step": 2460 |
| }, |
| { |
| "epoch": 2.543769309989701, |
| "grad_norm": 0.32140642404556274, |
| "learning_rate": 9.923759364164311e-05, |
| "loss": 0.0495, |
| "step": 2470 |
| }, |
| { |
| "epoch": 2.554067971163749, |
| "grad_norm": 0.530728816986084, |
| "learning_rate": 9.922556167400692e-05, |
| "loss": 0.0601, |
| "step": 2480 |
| }, |
| { |
| "epoch": 2.564366632337796, |
| "grad_norm": 0.3118293285369873, |
| "learning_rate": 9.92134362474712e-05, |
| "loss": 0.0536, |
| "step": 2490 |
| }, |
| { |
| "epoch": 2.5746652935118437, |
| "grad_norm": 0.5819094777107239, |
| "learning_rate": 9.920121738505713e-05, |
| "loss": 0.0556, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.584963954685891, |
| "grad_norm": 0.3461971580982208, |
| "learning_rate": 9.91889051099632e-05, |
| "loss": 0.0572, |
| "step": 2510 |
| }, |
| { |
| "epoch": 2.595262615859938, |
| "grad_norm": 0.4987310767173767, |
| "learning_rate": 9.917649944556536e-05, |
| "loss": 0.0501, |
| "step": 2520 |
| }, |
| { |
| "epoch": 2.6055612770339858, |
| "grad_norm": 0.4364427626132965, |
| "learning_rate": 9.91640004154168e-05, |
| "loss": 0.0493, |
| "step": 2530 |
| }, |
| { |
| "epoch": 2.615859938208033, |
| "grad_norm": 0.5407469868659973, |
| "learning_rate": 9.915140804324797e-05, |
| "loss": 0.0515, |
| "step": 2540 |
| }, |
| { |
| "epoch": 2.62615859938208, |
| "grad_norm": 0.47441163659095764, |
| "learning_rate": 9.913872235296657e-05, |
| "loss": 0.0525, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.636457260556128, |
| "grad_norm": 0.6901610493659973, |
| "learning_rate": 9.912594336865745e-05, |
| "loss": 0.0522, |
| "step": 2560 |
| }, |
| { |
| "epoch": 2.646755921730175, |
| "grad_norm": 0.29979637265205383, |
| "learning_rate": 9.911307111458262e-05, |
| "loss": 0.0521, |
| "step": 2570 |
| }, |
| { |
| "epoch": 2.6570545829042223, |
| "grad_norm": 0.44594720005989075, |
| "learning_rate": 9.910010561518112e-05, |
| "loss": 0.0502, |
| "step": 2580 |
| }, |
| { |
| "epoch": 2.66735324407827, |
| "grad_norm": 0.36554035544395447, |
| "learning_rate": 9.908704689506904e-05, |
| "loss": 0.0545, |
| "step": 2590 |
| }, |
| { |
| "epoch": 2.677651905252317, |
| "grad_norm": 0.3384472727775574, |
| "learning_rate": 9.907389497903949e-05, |
| "loss": 0.0466, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.6879505664263643, |
| "grad_norm": 0.6370285749435425, |
| "learning_rate": 9.906064989206249e-05, |
| "loss": 0.0521, |
| "step": 2610 |
| }, |
| { |
| "epoch": 2.698249227600412, |
| "grad_norm": 0.4321992099285126, |
| "learning_rate": 9.904731165928497e-05, |
| "loss": 0.0486, |
| "step": 2620 |
| }, |
| { |
| "epoch": 2.708547888774459, |
| "grad_norm": 0.35261237621307373, |
| "learning_rate": 9.903388030603067e-05, |
| "loss": 0.0529, |
| "step": 2630 |
| }, |
| { |
| "epoch": 2.718846549948507, |
| "grad_norm": 0.2982620894908905, |
| "learning_rate": 9.902035585780016e-05, |
| "loss": 0.0457, |
| "step": 2640 |
| }, |
| { |
| "epoch": 2.729145211122554, |
| "grad_norm": 0.286214143037796, |
| "learning_rate": 9.900673834027077e-05, |
| "loss": 0.0449, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.7394438722966017, |
| "grad_norm": 0.41237062215805054, |
| "learning_rate": 9.899302777929649e-05, |
| "loss": 0.0552, |
| "step": 2660 |
| }, |
| { |
| "epoch": 2.749742533470649, |
| "grad_norm": 0.2523784935474396, |
| "learning_rate": 9.897922420090799e-05, |
| "loss": 0.0501, |
| "step": 2670 |
| }, |
| { |
| "epoch": 2.760041194644696, |
| "grad_norm": 0.5662415027618408, |
| "learning_rate": 9.896532763131253e-05, |
| "loss": 0.0521, |
| "step": 2680 |
| }, |
| { |
| "epoch": 2.7703398558187438, |
| "grad_norm": 0.45098549127578735, |
| "learning_rate": 9.895133809689391e-05, |
| "loss": 0.0504, |
| "step": 2690 |
| }, |
| { |
| "epoch": 2.780638516992791, |
| "grad_norm": 0.41485512256622314, |
| "learning_rate": 9.893725562421249e-05, |
| "loss": 0.0535, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.790937178166838, |
| "grad_norm": 0.42439576983451843, |
| "learning_rate": 9.8923080240005e-05, |
| "loss": 0.046, |
| "step": 2710 |
| }, |
| { |
| "epoch": 2.801235839340886, |
| "grad_norm": 0.5364951491355896, |
| "learning_rate": 9.890881197118462e-05, |
| "loss": 0.0492, |
| "step": 2720 |
| }, |
| { |
| "epoch": 2.811534500514933, |
| "grad_norm": 0.6617226600646973, |
| "learning_rate": 9.889445084484086e-05, |
| "loss": 0.0526, |
| "step": 2730 |
| }, |
| { |
| "epoch": 2.8218331616889802, |
| "grad_norm": 0.572847843170166, |
| "learning_rate": 9.887999688823955e-05, |
| "loss": 0.0551, |
| "step": 2740 |
| }, |
| { |
| "epoch": 2.832131822863028, |
| "grad_norm": 0.5418044328689575, |
| "learning_rate": 9.886545012882272e-05, |
| "loss": 0.052, |
| "step": 2750 |
| }, |
| { |
| "epoch": 2.842430484037075, |
| "grad_norm": 0.3684234321117401, |
| "learning_rate": 9.885081059420866e-05, |
| "loss": 0.0497, |
| "step": 2760 |
| }, |
| { |
| "epoch": 2.8527291452111223, |
| "grad_norm": 0.32835209369659424, |
| "learning_rate": 9.883607831219176e-05, |
| "loss": 0.0501, |
| "step": 2770 |
| }, |
| { |
| "epoch": 2.86302780638517, |
| "grad_norm": 0.4320078194141388, |
| "learning_rate": 9.882125331074251e-05, |
| "loss": 0.0557, |
| "step": 2780 |
| }, |
| { |
| "epoch": 2.873326467559217, |
| "grad_norm": 0.3763574957847595, |
| "learning_rate": 9.880633561800739e-05, |
| "loss": 0.0466, |
| "step": 2790 |
| }, |
| { |
| "epoch": 2.883625128733265, |
| "grad_norm": 0.267978310585022, |
| "learning_rate": 9.879132526230895e-05, |
| "loss": 0.0488, |
| "step": 2800 |
| }, |
| { |
| "epoch": 2.893923789907312, |
| "grad_norm": 0.35433250665664673, |
| "learning_rate": 9.87762222721456e-05, |
| "loss": 0.0499, |
| "step": 2810 |
| }, |
| { |
| "epoch": 2.9042224510813597, |
| "grad_norm": 0.5067030787467957, |
| "learning_rate": 9.876102667619166e-05, |
| "loss": 0.0443, |
| "step": 2820 |
| }, |
| { |
| "epoch": 2.914521112255407, |
| "grad_norm": 1.2673553228378296, |
| "learning_rate": 9.874573850329724e-05, |
| "loss": 0.0547, |
| "step": 2830 |
| }, |
| { |
| "epoch": 2.924819773429454, |
| "grad_norm": 0.53081876039505, |
| "learning_rate": 9.873035778248826e-05, |
| "loss": 0.05, |
| "step": 2840 |
| }, |
| { |
| "epoch": 2.9351184346035017, |
| "grad_norm": 0.343241810798645, |
| "learning_rate": 9.871488454296629e-05, |
| "loss": 0.0508, |
| "step": 2850 |
| }, |
| { |
| "epoch": 2.945417095777549, |
| "grad_norm": 0.2269151359796524, |
| "learning_rate": 9.86993188141086e-05, |
| "loss": 0.0485, |
| "step": 2860 |
| }, |
| { |
| "epoch": 2.955715756951596, |
| "grad_norm": 0.3384031355381012, |
| "learning_rate": 9.868366062546803e-05, |
| "loss": 0.0456, |
| "step": 2870 |
| }, |
| { |
| "epoch": 2.966014418125644, |
| "grad_norm": 0.3758116662502289, |
| "learning_rate": 9.866791000677302e-05, |
| "loss": 0.0543, |
| "step": 2880 |
| }, |
| { |
| "epoch": 2.976313079299691, |
| "grad_norm": 0.3721799850463867, |
| "learning_rate": 9.86520669879274e-05, |
| "loss": 0.0496, |
| "step": 2890 |
| }, |
| { |
| "epoch": 2.9866117404737382, |
| "grad_norm": 0.4035572409629822, |
| "learning_rate": 9.863613159901053e-05, |
| "loss": 0.0488, |
| "step": 2900 |
| }, |
| { |
| "epoch": 2.996910401647786, |
| "grad_norm": 0.36414894461631775, |
| "learning_rate": 9.862010387027706e-05, |
| "loss": 0.0599, |
| "step": 2910 |
| }, |
| { |
| "epoch": 3.007209062821833, |
| "grad_norm": 0.40540945529937744, |
| "learning_rate": 9.860398383215701e-05, |
| "loss": 0.05, |
| "step": 2920 |
| }, |
| { |
| "epoch": 3.0175077239958807, |
| "grad_norm": 0.39733192324638367, |
| "learning_rate": 9.858777151525564e-05, |
| "loss": 0.0456, |
| "step": 2930 |
| }, |
| { |
| "epoch": 3.027806385169928, |
| "grad_norm": 0.4590163230895996, |
| "learning_rate": 9.857146695035339e-05, |
| "loss": 0.0452, |
| "step": 2940 |
| }, |
| { |
| "epoch": 3.038105046343975, |
| "grad_norm": 0.3180115222930908, |
| "learning_rate": 9.855507016840587e-05, |
| "loss": 0.0463, |
| "step": 2950 |
| }, |
| { |
| "epoch": 3.048403707518023, |
| "grad_norm": 0.4976675510406494, |
| "learning_rate": 9.853858120054377e-05, |
| "loss": 0.061, |
| "step": 2960 |
| }, |
| { |
| "epoch": 3.05870236869207, |
| "grad_norm": 0.3708176612854004, |
| "learning_rate": 9.852200007807278e-05, |
| "loss": 0.0544, |
| "step": 2970 |
| }, |
| { |
| "epoch": 3.0690010298661172, |
| "grad_norm": 0.3110915422439575, |
| "learning_rate": 9.850532683247355e-05, |
| "loss": 0.0538, |
| "step": 2980 |
| }, |
| { |
| "epoch": 3.079299691040165, |
| "grad_norm": 0.38979801535606384, |
| "learning_rate": 9.848856149540168e-05, |
| "loss": 0.052, |
| "step": 2990 |
| }, |
| { |
| "epoch": 3.089598352214212, |
| "grad_norm": 0.558873176574707, |
| "learning_rate": 9.847170409868758e-05, |
| "loss": 0.05, |
| "step": 3000 |
| }, |
| { |
| "epoch": 3.0998970133882597, |
| "grad_norm": 0.5149732828140259, |
| "learning_rate": 9.845475467433643e-05, |
| "loss": 0.0582, |
| "step": 3010 |
| }, |
| { |
| "epoch": 3.110195674562307, |
| "grad_norm": 0.6636197566986084, |
| "learning_rate": 9.843771325452815e-05, |
| "loss": 0.061, |
| "step": 3020 |
| }, |
| { |
| "epoch": 3.120494335736354, |
| "grad_norm": 0.47130295634269714, |
| "learning_rate": 9.842057987161731e-05, |
| "loss": 0.0511, |
| "step": 3030 |
| }, |
| { |
| "epoch": 3.130792996910402, |
| "grad_norm": 0.4013822674751282, |
| "learning_rate": 9.840335455813312e-05, |
| "loss": 0.0513, |
| "step": 3040 |
| }, |
| { |
| "epoch": 3.141091658084449, |
| "grad_norm": 0.3371870219707489, |
| "learning_rate": 9.838603734677926e-05, |
| "loss": 0.0498, |
| "step": 3050 |
| }, |
| { |
| "epoch": 3.151390319258496, |
| "grad_norm": 0.3246757686138153, |
| "learning_rate": 9.836862827043396e-05, |
| "loss": 0.0525, |
| "step": 3060 |
| }, |
| { |
| "epoch": 3.161688980432544, |
| "grad_norm": 0.5342622995376587, |
| "learning_rate": 9.835112736214982e-05, |
| "loss": 0.059, |
| "step": 3070 |
| }, |
| { |
| "epoch": 3.171987641606591, |
| "grad_norm": 0.27634263038635254, |
| "learning_rate": 9.833353465515376e-05, |
| "loss": 0.0454, |
| "step": 3080 |
| }, |
| { |
| "epoch": 3.1822863027806383, |
| "grad_norm": 0.5118007659912109, |
| "learning_rate": 9.831585018284707e-05, |
| "loss": 0.0571, |
| "step": 3090 |
| }, |
| { |
| "epoch": 3.192584963954686, |
| "grad_norm": 0.42241352796554565, |
| "learning_rate": 9.829807397880519e-05, |
| "loss": 0.0517, |
| "step": 3100 |
| }, |
| { |
| "epoch": 3.202883625128733, |
| "grad_norm": 0.5368523001670837, |
| "learning_rate": 9.828020607677775e-05, |
| "loss": 0.0595, |
| "step": 3110 |
| }, |
| { |
| "epoch": 3.213182286302781, |
| "grad_norm": 0.5250363349914551, |
| "learning_rate": 9.826224651068852e-05, |
| "loss": 0.0544, |
| "step": 3120 |
| }, |
| { |
| "epoch": 3.223480947476828, |
| "grad_norm": 0.3914489448070526, |
| "learning_rate": 9.82441953146352e-05, |
| "loss": 0.0504, |
| "step": 3130 |
| }, |
| { |
| "epoch": 3.233779608650875, |
| "grad_norm": 0.42789822816848755, |
| "learning_rate": 9.822605252288953e-05, |
| "loss": 0.0539, |
| "step": 3140 |
| }, |
| { |
| "epoch": 3.244078269824923, |
| "grad_norm": 0.2896466553211212, |
| "learning_rate": 9.820781816989715e-05, |
| "loss": 0.044, |
| "step": 3150 |
| }, |
| { |
| "epoch": 3.25437693099897, |
| "grad_norm": 0.3793918788433075, |
| "learning_rate": 9.818949229027752e-05, |
| "loss": 0.0498, |
| "step": 3160 |
| }, |
| { |
| "epoch": 3.2646755921730177, |
| "grad_norm": 0.39543598890304565, |
| "learning_rate": 9.817107491882388e-05, |
| "loss": 0.0542, |
| "step": 3170 |
| }, |
| { |
| "epoch": 3.274974253347065, |
| "grad_norm": 0.38835805654525757, |
| "learning_rate": 9.815256609050316e-05, |
| "loss": 0.0474, |
| "step": 3180 |
| }, |
| { |
| "epoch": 3.285272914521112, |
| "grad_norm": 0.37964895367622375, |
| "learning_rate": 9.813396584045596e-05, |
| "loss": 0.0541, |
| "step": 3190 |
| }, |
| { |
| "epoch": 3.29557157569516, |
| "grad_norm": 0.4047639071941376, |
| "learning_rate": 9.811527420399639e-05, |
| "loss": 0.0564, |
| "step": 3200 |
| }, |
| { |
| "epoch": 3.305870236869207, |
| "grad_norm": 0.598591148853302, |
| "learning_rate": 9.809649121661214e-05, |
| "loss": 0.0579, |
| "step": 3210 |
| }, |
| { |
| "epoch": 3.316168898043254, |
| "grad_norm": 0.5548397302627563, |
| "learning_rate": 9.807761691396429e-05, |
| "loss": 0.049, |
| "step": 3220 |
| }, |
| { |
| "epoch": 3.326467559217302, |
| "grad_norm": 0.7035659551620483, |
| "learning_rate": 9.805865133188731e-05, |
| "loss": 0.0518, |
| "step": 3230 |
| }, |
| { |
| "epoch": 3.336766220391349, |
| "grad_norm": 0.7861883044242859, |
| "learning_rate": 9.803959450638896e-05, |
| "loss": 0.0547, |
| "step": 3240 |
| }, |
| { |
| "epoch": 3.3470648815653963, |
| "grad_norm": 0.4890913665294647, |
| "learning_rate": 9.802044647365023e-05, |
| "loss": 0.0473, |
| "step": 3250 |
| }, |
| { |
| "epoch": 3.357363542739444, |
| "grad_norm": 0.34270229935646057, |
| "learning_rate": 9.800120727002529e-05, |
| "loss": 0.0575, |
| "step": 3260 |
| }, |
| { |
| "epoch": 3.367662203913491, |
| "grad_norm": 0.4263719320297241, |
| "learning_rate": 9.798187693204141e-05, |
| "loss": 0.0516, |
| "step": 3270 |
| }, |
| { |
| "epoch": 3.377960865087539, |
| "grad_norm": 0.31996503472328186, |
| "learning_rate": 9.796245549639885e-05, |
| "loss": 0.0565, |
| "step": 3280 |
| }, |
| { |
| "epoch": 3.388259526261586, |
| "grad_norm": 0.3111179769039154, |
| "learning_rate": 9.794294299997088e-05, |
| "loss": 0.0518, |
| "step": 3290 |
| }, |
| { |
| "epoch": 3.398558187435633, |
| "grad_norm": 0.44092559814453125, |
| "learning_rate": 9.792333947980359e-05, |
| "loss": 0.0506, |
| "step": 3300 |
| }, |
| { |
| "epoch": 3.408856848609681, |
| "grad_norm": 0.27052825689315796, |
| "learning_rate": 9.790364497311597e-05, |
| "loss": 0.0526, |
| "step": 3310 |
| }, |
| { |
| "epoch": 3.419155509783728, |
| "grad_norm": 0.4862545132637024, |
| "learning_rate": 9.788385951729966e-05, |
| "loss": 0.043, |
| "step": 3320 |
| }, |
| { |
| "epoch": 3.4294541709577757, |
| "grad_norm": 0.3908790647983551, |
| "learning_rate": 9.786398314991907e-05, |
| "loss": 0.0536, |
| "step": 3330 |
| }, |
| { |
| "epoch": 3.439752832131823, |
| "grad_norm": 0.5570793151855469, |
| "learning_rate": 9.784401590871112e-05, |
| "loss": 0.0543, |
| "step": 3340 |
| }, |
| { |
| "epoch": 3.45005149330587, |
| "grad_norm": 0.34768927097320557, |
| "learning_rate": 9.782395783158537e-05, |
| "loss": 0.0464, |
| "step": 3350 |
| }, |
| { |
| "epoch": 3.460350154479918, |
| "grad_norm": 0.45262208580970764, |
| "learning_rate": 9.780380895662373e-05, |
| "loss": 0.0476, |
| "step": 3360 |
| }, |
| { |
| "epoch": 3.470648815653965, |
| "grad_norm": 0.3095530569553375, |
| "learning_rate": 9.778356932208055e-05, |
| "loss": 0.0582, |
| "step": 3370 |
| }, |
| { |
| "epoch": 3.480947476828012, |
| "grad_norm": 0.3530728220939636, |
| "learning_rate": 9.776323896638251e-05, |
| "loss": 0.0506, |
| "step": 3380 |
| }, |
| { |
| "epoch": 3.49124613800206, |
| "grad_norm": 0.34243884682655334, |
| "learning_rate": 9.774281792812852e-05, |
| "loss": 0.057, |
| "step": 3390 |
| }, |
| { |
| "epoch": 3.501544799176107, |
| "grad_norm": 0.4583176076412201, |
| "learning_rate": 9.772230624608961e-05, |
| "loss": 0.0559, |
| "step": 3400 |
| }, |
| { |
| "epoch": 3.5118434603501543, |
| "grad_norm": 0.3211122751235962, |
| "learning_rate": 9.770170395920899e-05, |
| "loss": 0.0497, |
| "step": 3410 |
| }, |
| { |
| "epoch": 3.522142121524202, |
| "grad_norm": 0.6675431132316589, |
| "learning_rate": 9.768101110660182e-05, |
| "loss": 0.0588, |
| "step": 3420 |
| }, |
| { |
| "epoch": 3.532440782698249, |
| "grad_norm": 0.47643566131591797, |
| "learning_rate": 9.766022772755525e-05, |
| "loss": 0.052, |
| "step": 3430 |
| }, |
| { |
| "epoch": 3.5427394438722963, |
| "grad_norm": 0.3988637626171112, |
| "learning_rate": 9.763935386152827e-05, |
| "loss": 0.0479, |
| "step": 3440 |
| }, |
| { |
| "epoch": 3.553038105046344, |
| "grad_norm": 0.3347717225551605, |
| "learning_rate": 9.76183895481517e-05, |
| "loss": 0.0517, |
| "step": 3450 |
| }, |
| { |
| "epoch": 3.563336766220391, |
| "grad_norm": 0.4554286599159241, |
| "learning_rate": 9.759733482722806e-05, |
| "loss": 0.0671, |
| "step": 3460 |
| }, |
| { |
| "epoch": 3.573635427394439, |
| "grad_norm": 0.39437830448150635, |
| "learning_rate": 9.757618973873152e-05, |
| "loss": 0.0456, |
| "step": 3470 |
| }, |
| { |
| "epoch": 3.583934088568486, |
| "grad_norm": 0.3920309245586395, |
| "learning_rate": 9.755495432280781e-05, |
| "loss": 0.0553, |
| "step": 3480 |
| }, |
| { |
| "epoch": 3.5942327497425337, |
| "grad_norm": 0.38523244857788086, |
| "learning_rate": 9.75336286197742e-05, |
| "loss": 0.0506, |
| "step": 3490 |
| }, |
| { |
| "epoch": 3.604531410916581, |
| "grad_norm": 0.3649066984653473, |
| "learning_rate": 9.751221267011929e-05, |
| "loss": 0.0544, |
| "step": 3500 |
| }, |
| { |
| "epoch": 3.614830072090628, |
| "grad_norm": 0.5154473185539246, |
| "learning_rate": 9.749070651450314e-05, |
| "loss": 0.0556, |
| "step": 3510 |
| }, |
| { |
| "epoch": 3.6251287332646758, |
| "grad_norm": 0.4133189618587494, |
| "learning_rate": 9.746911019375695e-05, |
| "loss": 0.0483, |
| "step": 3520 |
| }, |
| { |
| "epoch": 3.635427394438723, |
| "grad_norm": 0.39976581931114197, |
| "learning_rate": 9.744742374888321e-05, |
| "loss": 0.0545, |
| "step": 3530 |
| }, |
| { |
| "epoch": 3.64572605561277, |
| "grad_norm": 0.3134078085422516, |
| "learning_rate": 9.742564722105543e-05, |
| "loss": 0.05, |
| "step": 3540 |
| }, |
| { |
| "epoch": 3.656024716786818, |
| "grad_norm": 0.5159580707550049, |
| "learning_rate": 9.740378065161823e-05, |
| "loss": 0.0546, |
| "step": 3550 |
| }, |
| { |
| "epoch": 3.666323377960865, |
| "grad_norm": 0.3549450933933258, |
| "learning_rate": 9.738182408208714e-05, |
| "loss": 0.057, |
| "step": 3560 |
| }, |
| { |
| "epoch": 3.6766220391349123, |
| "grad_norm": 0.46382319927215576, |
| "learning_rate": 9.735977755414855e-05, |
| "loss": 0.0603, |
| "step": 3570 |
| }, |
| { |
| "epoch": 3.68692070030896, |
| "grad_norm": 0.43110236525535583, |
| "learning_rate": 9.733764110965966e-05, |
| "loss": 0.0524, |
| "step": 3580 |
| }, |
| { |
| "epoch": 3.697219361483007, |
| "grad_norm": 0.4503861367702484, |
| "learning_rate": 9.73154147906484e-05, |
| "loss": 0.0541, |
| "step": 3590 |
| }, |
| { |
| "epoch": 3.7075180226570543, |
| "grad_norm": 0.39958786964416504, |
| "learning_rate": 9.729309863931333e-05, |
| "loss": 0.053, |
| "step": 3600 |
| }, |
| { |
| "epoch": 3.717816683831102, |
| "grad_norm": 0.3972841203212738, |
| "learning_rate": 9.727069269802352e-05, |
| "loss": 0.0445, |
| "step": 3610 |
| }, |
| { |
| "epoch": 3.728115345005149, |
| "grad_norm": 0.38955458998680115, |
| "learning_rate": 9.724819700931858e-05, |
| "loss": 0.0477, |
| "step": 3620 |
| }, |
| { |
| "epoch": 3.738414006179197, |
| "grad_norm": 0.4105938673019409, |
| "learning_rate": 9.722561161590845e-05, |
| "loss": 0.0537, |
| "step": 3630 |
| }, |
| { |
| "epoch": 3.748712667353244, |
| "grad_norm": 0.5155870914459229, |
| "learning_rate": 9.720293656067345e-05, |
| "loss": 0.0484, |
| "step": 3640 |
| }, |
| { |
| "epoch": 3.7590113285272917, |
| "grad_norm": 0.6213602423667908, |
| "learning_rate": 9.718017188666406e-05, |
| "loss": 0.0568, |
| "step": 3650 |
| }, |
| { |
| "epoch": 3.769309989701339, |
| "grad_norm": 0.5646706223487854, |
| "learning_rate": 9.715731763710097e-05, |
| "loss": 0.0531, |
| "step": 3660 |
| }, |
| { |
| "epoch": 3.779608650875386, |
| "grad_norm": 0.3185526430606842, |
| "learning_rate": 9.713437385537489e-05, |
| "loss": 0.0481, |
| "step": 3670 |
| }, |
| { |
| "epoch": 3.7899073120494338, |
| "grad_norm": 0.34746411442756653, |
| "learning_rate": 9.711134058504653e-05, |
| "loss": 0.0469, |
| "step": 3680 |
| }, |
| { |
| "epoch": 3.800205973223481, |
| "grad_norm": 0.6697479486465454, |
| "learning_rate": 9.708821786984652e-05, |
| "loss": 0.05, |
| "step": 3690 |
| }, |
| { |
| "epoch": 3.810504634397528, |
| "grad_norm": 0.4407866597175598, |
| "learning_rate": 9.70650057536753e-05, |
| "loss": 0.049, |
| "step": 3700 |
| }, |
| { |
| "epoch": 3.820803295571576, |
| "grad_norm": 0.41101008653640747, |
| "learning_rate": 9.7041704280603e-05, |
| "loss": 0.0496, |
| "step": 3710 |
| }, |
| { |
| "epoch": 3.831101956745623, |
| "grad_norm": 0.4155142307281494, |
| "learning_rate": 9.70183134948695e-05, |
| "loss": 0.046, |
| "step": 3720 |
| }, |
| { |
| "epoch": 3.8414006179196702, |
| "grad_norm": 0.2584857940673828, |
| "learning_rate": 9.699483344088414e-05, |
| "loss": 0.0457, |
| "step": 3730 |
| }, |
| { |
| "epoch": 3.851699279093718, |
| "grad_norm": 0.348079115152359, |
| "learning_rate": 9.697126416322582e-05, |
| "loss": 0.0515, |
| "step": 3740 |
| }, |
| { |
| "epoch": 3.861997940267765, |
| "grad_norm": 0.34925252199172974, |
| "learning_rate": 9.694760570664278e-05, |
| "loss": 0.0503, |
| "step": 3750 |
| }, |
| { |
| "epoch": 3.8722966014418123, |
| "grad_norm": 0.39806774258613586, |
| "learning_rate": 9.692385811605263e-05, |
| "loss": 0.0534, |
| "step": 3760 |
| }, |
| { |
| "epoch": 3.88259526261586, |
| "grad_norm": 0.5874518156051636, |
| "learning_rate": 9.690002143654218e-05, |
| "loss": 0.0506, |
| "step": 3770 |
| }, |
| { |
| "epoch": 3.892893923789907, |
| "grad_norm": 0.4007057547569275, |
| "learning_rate": 9.687609571336739e-05, |
| "loss": 0.0576, |
| "step": 3780 |
| }, |
| { |
| "epoch": 3.903192584963955, |
| "grad_norm": 0.22046445310115814, |
| "learning_rate": 9.685208099195327e-05, |
| "loss": 0.0414, |
| "step": 3790 |
| }, |
| { |
| "epoch": 3.913491246138002, |
| "grad_norm": 0.28014832735061646, |
| "learning_rate": 9.682797731789378e-05, |
| "loss": 0.052, |
| "step": 3800 |
| }, |
| { |
| "epoch": 3.9237899073120497, |
| "grad_norm": 0.4397921562194824, |
| "learning_rate": 9.680378473695181e-05, |
| "loss": 0.0532, |
| "step": 3810 |
| }, |
| { |
| "epoch": 3.934088568486097, |
| "grad_norm": 0.3825766444206238, |
| "learning_rate": 9.677950329505902e-05, |
| "loss": 0.0567, |
| "step": 3820 |
| }, |
| { |
| "epoch": 3.944387229660144, |
| "grad_norm": 0.25868862867355347, |
| "learning_rate": 9.675513303831579e-05, |
| "loss": 0.0559, |
| "step": 3830 |
| }, |
| { |
| "epoch": 3.9546858908341918, |
| "grad_norm": 0.2990769147872925, |
| "learning_rate": 9.673067401299112e-05, |
| "loss": 0.05, |
| "step": 3840 |
| }, |
| { |
| "epoch": 3.964984552008239, |
| "grad_norm": 0.48668310046195984, |
| "learning_rate": 9.670612626552251e-05, |
| "loss": 0.0439, |
| "step": 3850 |
| }, |
| { |
| "epoch": 3.975283213182286, |
| "grad_norm": 0.27801236510276794, |
| "learning_rate": 9.668148984251597e-05, |
| "loss": 0.055, |
| "step": 3860 |
| }, |
| { |
| "epoch": 3.985581874356334, |
| "grad_norm": 0.4435347318649292, |
| "learning_rate": 9.665676479074583e-05, |
| "loss": 0.0514, |
| "step": 3870 |
| }, |
| { |
| "epoch": 3.995880535530381, |
| "grad_norm": 0.28677383065223694, |
| "learning_rate": 9.663195115715471e-05, |
| "loss": 0.0459, |
| "step": 3880 |
| }, |
| { |
| "epoch": 4.006179196704428, |
| "grad_norm": 0.3085887134075165, |
| "learning_rate": 9.660704898885337e-05, |
| "loss": 0.0475, |
| "step": 3890 |
| }, |
| { |
| "epoch": 4.016477857878476, |
| "grad_norm": 0.37418079376220703, |
| "learning_rate": 9.65820583331207e-05, |
| "loss": 0.05, |
| "step": 3900 |
| }, |
| { |
| "epoch": 4.0267765190525235, |
| "grad_norm": 0.3386285901069641, |
| "learning_rate": 9.655697923740357e-05, |
| "loss": 0.0491, |
| "step": 3910 |
| }, |
| { |
| "epoch": 4.03707518022657, |
| "grad_norm": 0.36807170510292053, |
| "learning_rate": 9.653181174931676e-05, |
| "loss": 0.0445, |
| "step": 3920 |
| }, |
| { |
| "epoch": 4.047373841400618, |
| "grad_norm": 0.39420345425605774, |
| "learning_rate": 9.65065559166429e-05, |
| "loss": 0.0487, |
| "step": 3930 |
| }, |
| { |
| "epoch": 4.057672502574666, |
| "grad_norm": 0.45534148812294006, |
| "learning_rate": 9.648121178733233e-05, |
| "loss": 0.0475, |
| "step": 3940 |
| }, |
| { |
| "epoch": 4.067971163748712, |
| "grad_norm": 0.552058756351471, |
| "learning_rate": 9.6455779409503e-05, |
| "loss": 0.0517, |
| "step": 3950 |
| }, |
| { |
| "epoch": 4.07826982492276, |
| "grad_norm": 0.5828810334205627, |
| "learning_rate": 9.643025883144046e-05, |
| "loss": 0.0489, |
| "step": 3960 |
| }, |
| { |
| "epoch": 4.088568486096808, |
| "grad_norm": 0.4259772002696991, |
| "learning_rate": 9.640465010159767e-05, |
| "loss": 0.0544, |
| "step": 3970 |
| }, |
| { |
| "epoch": 4.098867147270854, |
| "grad_norm": 0.6728827953338623, |
| "learning_rate": 9.637895326859498e-05, |
| "loss": 0.0531, |
| "step": 3980 |
| }, |
| { |
| "epoch": 4.109165808444902, |
| "grad_norm": 0.4142812490463257, |
| "learning_rate": 9.635316838122002e-05, |
| "loss": 0.0513, |
| "step": 3990 |
| }, |
| { |
| "epoch": 4.11946446961895, |
| "grad_norm": 0.4978756904602051, |
| "learning_rate": 9.632729548842756e-05, |
| "loss": 0.0464, |
| "step": 4000 |
| }, |
| { |
| "epoch": 4.1297631307929965, |
| "grad_norm": 0.4538825750350952, |
| "learning_rate": 9.630133463933948e-05, |
| "loss": 0.0512, |
| "step": 4010 |
| }, |
| { |
| "epoch": 4.140061791967044, |
| "grad_norm": 0.5200012922286987, |
| "learning_rate": 9.627528588324469e-05, |
| "loss": 0.0461, |
| "step": 4020 |
| }, |
| { |
| "epoch": 4.150360453141092, |
| "grad_norm": 0.5186310410499573, |
| "learning_rate": 9.62491492695989e-05, |
| "loss": 0.0529, |
| "step": 4030 |
| }, |
| { |
| "epoch": 4.1606591143151395, |
| "grad_norm": 0.34113621711730957, |
| "learning_rate": 9.622292484802473e-05, |
| "loss": 0.0631, |
| "step": 4040 |
| }, |
| { |
| "epoch": 4.170957775489186, |
| "grad_norm": 0.33333924412727356, |
| "learning_rate": 9.619661266831145e-05, |
| "loss": 0.0468, |
| "step": 4050 |
| }, |
| { |
| "epoch": 4.181256436663234, |
| "grad_norm": 0.41653305292129517, |
| "learning_rate": 9.617021278041499e-05, |
| "loss": 0.0536, |
| "step": 4060 |
| }, |
| { |
| "epoch": 4.1915550978372815, |
| "grad_norm": 0.2886696755886078, |
| "learning_rate": 9.614372523445771e-05, |
| "loss": 0.0496, |
| "step": 4070 |
| }, |
| { |
| "epoch": 4.201853759011328, |
| "grad_norm": 0.29733291268348694, |
| "learning_rate": 9.611715008072853e-05, |
| "loss": 0.0473, |
| "step": 4080 |
| }, |
| { |
| "epoch": 4.212152420185376, |
| "grad_norm": 0.332317590713501, |
| "learning_rate": 9.609048736968259e-05, |
| "loss": 0.0446, |
| "step": 4090 |
| }, |
| { |
| "epoch": 4.222451081359424, |
| "grad_norm": 0.30169472098350525, |
| "learning_rate": 9.606373715194133e-05, |
| "loss": 0.0531, |
| "step": 4100 |
| }, |
| { |
| "epoch": 4.23274974253347, |
| "grad_norm": 0.4030070900917053, |
| "learning_rate": 9.603689947829226e-05, |
| "loss": 0.0502, |
| "step": 4110 |
| }, |
| { |
| "epoch": 4.243048403707518, |
| "grad_norm": 0.6141675710678101, |
| "learning_rate": 9.600997439968904e-05, |
| "loss": 0.0459, |
| "step": 4120 |
| }, |
| { |
| "epoch": 4.253347064881566, |
| "grad_norm": 0.3945510685443878, |
| "learning_rate": 9.598296196725117e-05, |
| "loss": 0.0495, |
| "step": 4130 |
| }, |
| { |
| "epoch": 4.263645726055612, |
| "grad_norm": 0.3716789186000824, |
| "learning_rate": 9.595586223226405e-05, |
| "loss": 0.0483, |
| "step": 4140 |
| }, |
| { |
| "epoch": 4.27394438722966, |
| "grad_norm": 0.31221863627433777, |
| "learning_rate": 9.592867524617882e-05, |
| "loss": 0.0501, |
| "step": 4150 |
| }, |
| { |
| "epoch": 4.284243048403708, |
| "grad_norm": 0.37691348791122437, |
| "learning_rate": 9.59014010606123e-05, |
| "loss": 0.0469, |
| "step": 4160 |
| }, |
| { |
| "epoch": 4.2945417095777545, |
| "grad_norm": 0.3065001666545868, |
| "learning_rate": 9.58740397273468e-05, |
| "loss": 0.0546, |
| "step": 4170 |
| }, |
| { |
| "epoch": 4.304840370751802, |
| "grad_norm": 0.48879119753837585, |
| "learning_rate": 9.584659129833018e-05, |
| "loss": 0.049, |
| "step": 4180 |
| }, |
| { |
| "epoch": 4.31513903192585, |
| "grad_norm": 0.5020750164985657, |
| "learning_rate": 9.581905582567557e-05, |
| "loss": 0.0538, |
| "step": 4190 |
| }, |
| { |
| "epoch": 4.325437693099897, |
| "grad_norm": 0.29413041472435, |
| "learning_rate": 9.579143336166142e-05, |
| "loss": 0.0422, |
| "step": 4200 |
| }, |
| { |
| "epoch": 4.335736354273944, |
| "grad_norm": 0.23740436136722565, |
| "learning_rate": 9.57637239587313e-05, |
| "loss": 0.0445, |
| "step": 4210 |
| }, |
| { |
| "epoch": 4.346035015447992, |
| "grad_norm": 0.34985145926475525, |
| "learning_rate": 9.573592766949387e-05, |
| "loss": 0.0459, |
| "step": 4220 |
| }, |
| { |
| "epoch": 4.3563336766220395, |
| "grad_norm": 0.396068811416626, |
| "learning_rate": 9.570804454672274e-05, |
| "loss": 0.0417, |
| "step": 4230 |
| }, |
| { |
| "epoch": 4.366632337796086, |
| "grad_norm": 0.45746490359306335, |
| "learning_rate": 9.568007464335637e-05, |
| "loss": 0.0532, |
| "step": 4240 |
| }, |
| { |
| "epoch": 4.376930998970134, |
| "grad_norm": 0.36292997002601624, |
| "learning_rate": 9.565201801249801e-05, |
| "loss": 0.0524, |
| "step": 4250 |
| }, |
| { |
| "epoch": 4.387229660144182, |
| "grad_norm": 0.27282604575157166, |
| "learning_rate": 9.562387470741554e-05, |
| "loss": 0.0488, |
| "step": 4260 |
| }, |
| { |
| "epoch": 4.397528321318228, |
| "grad_norm": 0.29837465286254883, |
| "learning_rate": 9.559564478154141e-05, |
| "loss": 0.0547, |
| "step": 4270 |
| }, |
| { |
| "epoch": 4.407826982492276, |
| "grad_norm": 0.43137961626052856, |
| "learning_rate": 9.556732828847253e-05, |
| "loss": 0.0453, |
| "step": 4280 |
| }, |
| { |
| "epoch": 4.418125643666324, |
| "grad_norm": 0.4074147343635559, |
| "learning_rate": 9.553892528197015e-05, |
| "loss": 0.0463, |
| "step": 4290 |
| }, |
| { |
| "epoch": 4.42842430484037, |
| "grad_norm": 0.3655324876308441, |
| "learning_rate": 9.551043581595979e-05, |
| "loss": 0.0501, |
| "step": 4300 |
| }, |
| { |
| "epoch": 4.438722966014418, |
| "grad_norm": 0.2569686472415924, |
| "learning_rate": 9.548185994453111e-05, |
| "loss": 0.0477, |
| "step": 4310 |
| }, |
| { |
| "epoch": 4.449021627188466, |
| "grad_norm": 0.27901747822761536, |
| "learning_rate": 9.545319772193784e-05, |
| "loss": 0.0444, |
| "step": 4320 |
| }, |
| { |
| "epoch": 4.4593202883625125, |
| "grad_norm": 0.6347026228904724, |
| "learning_rate": 9.542444920259763e-05, |
| "loss": 0.0483, |
| "step": 4330 |
| }, |
| { |
| "epoch": 4.46961894953656, |
| "grad_norm": 0.24210530519485474, |
| "learning_rate": 9.539561444109199e-05, |
| "loss": 0.0474, |
| "step": 4340 |
| }, |
| { |
| "epoch": 4.479917610710608, |
| "grad_norm": 0.7740889191627502, |
| "learning_rate": 9.536669349216613e-05, |
| "loss": 0.0513, |
| "step": 4350 |
| }, |
| { |
| "epoch": 4.490216271884655, |
| "grad_norm": 0.29452022910118103, |
| "learning_rate": 9.533768641072893e-05, |
| "loss": 0.0422, |
| "step": 4360 |
| }, |
| { |
| "epoch": 4.500514933058702, |
| "grad_norm": 0.42809170484542847, |
| "learning_rate": 9.530859325185282e-05, |
| "loss": 0.0531, |
| "step": 4370 |
| }, |
| { |
| "epoch": 4.51081359423275, |
| "grad_norm": 0.4382365047931671, |
| "learning_rate": 9.52794140707736e-05, |
| "loss": 0.0507, |
| "step": 4380 |
| }, |
| { |
| "epoch": 4.521112255406797, |
| "grad_norm": 0.2706674635410309, |
| "learning_rate": 9.52501489228904e-05, |
| "loss": 0.0406, |
| "step": 4390 |
| }, |
| { |
| "epoch": 4.531410916580844, |
| "grad_norm": 0.32525891065597534, |
| "learning_rate": 9.522079786376563e-05, |
| "loss": 0.0566, |
| "step": 4400 |
| }, |
| { |
| "epoch": 4.541709577754892, |
| "grad_norm": 0.4712689220905304, |
| "learning_rate": 9.519136094912473e-05, |
| "loss": 0.0502, |
| "step": 4410 |
| }, |
| { |
| "epoch": 4.55200823892894, |
| "grad_norm": 0.4823038578033447, |
| "learning_rate": 9.516183823485618e-05, |
| "loss": 0.048, |
| "step": 4420 |
| }, |
| { |
| "epoch": 4.562306900102986, |
| "grad_norm": 0.5360227227210999, |
| "learning_rate": 9.513222977701137e-05, |
| "loss": 0.0482, |
| "step": 4430 |
| }, |
| { |
| "epoch": 4.572605561277034, |
| "grad_norm": 0.5071778893470764, |
| "learning_rate": 9.510253563180447e-05, |
| "loss": 0.0483, |
| "step": 4440 |
| }, |
| { |
| "epoch": 4.582904222451082, |
| "grad_norm": 0.306821346282959, |
| "learning_rate": 9.507275585561229e-05, |
| "loss": 0.0535, |
| "step": 4450 |
| }, |
| { |
| "epoch": 4.593202883625128, |
| "grad_norm": 0.3385528326034546, |
| "learning_rate": 9.504289050497429e-05, |
| "loss": 0.0495, |
| "step": 4460 |
| }, |
| { |
| "epoch": 4.603501544799176, |
| "grad_norm": 0.3122556209564209, |
| "learning_rate": 9.501293963659237e-05, |
| "loss": 0.0497, |
| "step": 4470 |
| }, |
| { |
| "epoch": 4.613800205973224, |
| "grad_norm": 0.4538201093673706, |
| "learning_rate": 9.498290330733078e-05, |
| "loss": 0.0497, |
| "step": 4480 |
| }, |
| { |
| "epoch": 4.6240988671472705, |
| "grad_norm": 0.3034268617630005, |
| "learning_rate": 9.495278157421604e-05, |
| "loss": 0.0492, |
| "step": 4490 |
| }, |
| { |
| "epoch": 4.634397528321318, |
| "grad_norm": 0.3226306140422821, |
| "learning_rate": 9.492257449443683e-05, |
| "loss": 0.0479, |
| "step": 4500 |
| }, |
| { |
| "epoch": 4.644696189495366, |
| "grad_norm": 0.30091768503189087, |
| "learning_rate": 9.48922821253438e-05, |
| "loss": 0.0523, |
| "step": 4510 |
| }, |
| { |
| "epoch": 4.6549948506694125, |
| "grad_norm": 0.28747081756591797, |
| "learning_rate": 9.486190452444963e-05, |
| "loss": 0.0529, |
| "step": 4520 |
| }, |
| { |
| "epoch": 4.66529351184346, |
| "grad_norm": 0.352568656206131, |
| "learning_rate": 9.483144174942874e-05, |
| "loss": 0.0406, |
| "step": 4530 |
| }, |
| { |
| "epoch": 4.675592173017508, |
| "grad_norm": 0.7357869148254395, |
| "learning_rate": 9.480089385811733e-05, |
| "loss": 0.0519, |
| "step": 4540 |
| }, |
| { |
| "epoch": 4.6858908341915555, |
| "grad_norm": 0.38945066928863525, |
| "learning_rate": 9.477026090851311e-05, |
| "loss": 0.0535, |
| "step": 4550 |
| }, |
| { |
| "epoch": 4.696189495365602, |
| "grad_norm": 0.2964130640029907, |
| "learning_rate": 9.473954295877535e-05, |
| "loss": 0.0485, |
| "step": 4560 |
| }, |
| { |
| "epoch": 4.70648815653965, |
| "grad_norm": 0.3560560643672943, |
| "learning_rate": 9.470874006722468e-05, |
| "loss": 0.0498, |
| "step": 4570 |
| }, |
| { |
| "epoch": 4.716786817713698, |
| "grad_norm": 0.3421489894390106, |
| "learning_rate": 9.4677852292343e-05, |
| "loss": 0.0526, |
| "step": 4580 |
| }, |
| { |
| "epoch": 4.727085478887744, |
| "grad_norm": 0.36280402541160583, |
| "learning_rate": 9.464687969277338e-05, |
| "loss": 0.0424, |
| "step": 4590 |
| }, |
| { |
| "epoch": 4.737384140061792, |
| "grad_norm": 0.2949029505252838, |
| "learning_rate": 9.46158223273199e-05, |
| "loss": 0.0471, |
| "step": 4600 |
| }, |
| { |
| "epoch": 4.74768280123584, |
| "grad_norm": 0.20883090794086456, |
| "learning_rate": 9.458468025494763e-05, |
| "loss": 0.0514, |
| "step": 4610 |
| }, |
| { |
| "epoch": 4.757981462409886, |
| "grad_norm": 0.3708082139492035, |
| "learning_rate": 9.45534535347824e-05, |
| "loss": 0.0538, |
| "step": 4620 |
| }, |
| { |
| "epoch": 4.768280123583934, |
| "grad_norm": 0.2781459093093872, |
| "learning_rate": 9.452214222611079e-05, |
| "loss": 0.0508, |
| "step": 4630 |
| }, |
| { |
| "epoch": 4.778578784757982, |
| "grad_norm": 0.3967878818511963, |
| "learning_rate": 9.449074638837999e-05, |
| "loss": 0.0427, |
| "step": 4640 |
| }, |
| { |
| "epoch": 4.7888774459320285, |
| "grad_norm": 0.4552743434906006, |
| "learning_rate": 9.445926608119765e-05, |
| "loss": 0.0534, |
| "step": 4650 |
| }, |
| { |
| "epoch": 4.799176107106076, |
| "grad_norm": 0.27807968854904175, |
| "learning_rate": 9.442770136433181e-05, |
| "loss": 0.0615, |
| "step": 4660 |
| }, |
| { |
| "epoch": 4.809474768280124, |
| "grad_norm": 0.46061357855796814, |
| "learning_rate": 9.439605229771074e-05, |
| "loss": 0.0507, |
| "step": 4670 |
| }, |
| { |
| "epoch": 4.819773429454171, |
| "grad_norm": 0.22432656586170197, |
| "learning_rate": 9.436431894142288e-05, |
| "loss": 0.0447, |
| "step": 4680 |
| }, |
| { |
| "epoch": 4.830072090628218, |
| "grad_norm": 0.2655046284198761, |
| "learning_rate": 9.43325013557167e-05, |
| "loss": 0.0522, |
| "step": 4690 |
| }, |
| { |
| "epoch": 4.840370751802266, |
| "grad_norm": 0.30245622992515564, |
| "learning_rate": 9.430059960100057e-05, |
| "loss": 0.0573, |
| "step": 4700 |
| }, |
| { |
| "epoch": 4.850669412976313, |
| "grad_norm": 0.40526264905929565, |
| "learning_rate": 9.42686137378427e-05, |
| "loss": 0.0509, |
| "step": 4710 |
| }, |
| { |
| "epoch": 4.86096807415036, |
| "grad_norm": 0.5215529203414917, |
| "learning_rate": 9.423654382697096e-05, |
| "loss": 0.0525, |
| "step": 4720 |
| }, |
| { |
| "epoch": 4.871266735324408, |
| "grad_norm": 0.25576338171958923, |
| "learning_rate": 9.420438992927276e-05, |
| "loss": 0.0456, |
| "step": 4730 |
| }, |
| { |
| "epoch": 4.8815653964984556, |
| "grad_norm": 0.4159855544567108, |
| "learning_rate": 9.417215210579502e-05, |
| "loss": 0.047, |
| "step": 4740 |
| }, |
| { |
| "epoch": 4.891864057672502, |
| "grad_norm": 0.3707447946071625, |
| "learning_rate": 9.4139830417744e-05, |
| "loss": 0.0512, |
| "step": 4750 |
| }, |
| { |
| "epoch": 4.90216271884655, |
| "grad_norm": 1.0935066938400269, |
| "learning_rate": 9.41074249264851e-05, |
| "loss": 0.0463, |
| "step": 4760 |
| }, |
| { |
| "epoch": 4.912461380020598, |
| "grad_norm": 0.1729433387517929, |
| "learning_rate": 9.407493569354296e-05, |
| "loss": 0.052, |
| "step": 4770 |
| }, |
| { |
| "epoch": 4.922760041194644, |
| "grad_norm": 0.5101364850997925, |
| "learning_rate": 9.404236278060111e-05, |
| "loss": 0.0452, |
| "step": 4780 |
| }, |
| { |
| "epoch": 4.933058702368692, |
| "grad_norm": 0.4424050748348236, |
| "learning_rate": 9.400970624950198e-05, |
| "loss": 0.0469, |
| "step": 4790 |
| }, |
| { |
| "epoch": 4.94335736354274, |
| "grad_norm": 0.4668126106262207, |
| "learning_rate": 9.397696616224679e-05, |
| "loss": 0.0475, |
| "step": 4800 |
| }, |
| { |
| "epoch": 4.9536560247167865, |
| "grad_norm": 0.39517247676849365, |
| "learning_rate": 9.394414258099533e-05, |
| "loss": 0.0532, |
| "step": 4810 |
| }, |
| { |
| "epoch": 4.963954685890834, |
| "grad_norm": 0.30740824341773987, |
| "learning_rate": 9.391123556806598e-05, |
| "loss": 0.046, |
| "step": 4820 |
| }, |
| { |
| "epoch": 4.974253347064882, |
| "grad_norm": 0.5773882865905762, |
| "learning_rate": 9.387824518593546e-05, |
| "loss": 0.0518, |
| "step": 4830 |
| }, |
| { |
| "epoch": 4.9845520082389285, |
| "grad_norm": 0.35598331689834595, |
| "learning_rate": 9.384517149723885e-05, |
| "loss": 0.0596, |
| "step": 4840 |
| }, |
| { |
| "epoch": 4.994850669412976, |
| "grad_norm": 0.37489575147628784, |
| "learning_rate": 9.381201456476933e-05, |
| "loss": 0.051, |
| "step": 4850 |
| }, |
| { |
| "epoch": 5.005149330587024, |
| "grad_norm": 0.539053201675415, |
| "learning_rate": 9.377877445147812e-05, |
| "loss": 0.0502, |
| "step": 4860 |
| }, |
| { |
| "epoch": 5.0154479917610715, |
| "grad_norm": 0.26694029569625854, |
| "learning_rate": 9.374545122047443e-05, |
| "loss": 0.0509, |
| "step": 4870 |
| }, |
| { |
| "epoch": 5.025746652935118, |
| "grad_norm": 0.3624720573425293, |
| "learning_rate": 9.371204493502522e-05, |
| "loss": 0.0482, |
| "step": 4880 |
| }, |
| { |
| "epoch": 5.036045314109166, |
| "grad_norm": 0.480744332075119, |
| "learning_rate": 9.367855565855514e-05, |
| "loss": 0.0422, |
| "step": 4890 |
| }, |
| { |
| "epoch": 5.0463439752832135, |
| "grad_norm": 0.3989076018333435, |
| "learning_rate": 9.364498345464647e-05, |
| "loss": 0.0477, |
| "step": 4900 |
| }, |
| { |
| "epoch": 5.05664263645726, |
| "grad_norm": 0.23205599188804626, |
| "learning_rate": 9.361132838703882e-05, |
| "loss": 0.0494, |
| "step": 4910 |
| }, |
| { |
| "epoch": 5.066941297631308, |
| "grad_norm": 0.39161673188209534, |
| "learning_rate": 9.357759051962921e-05, |
| "loss": 0.0459, |
| "step": 4920 |
| }, |
| { |
| "epoch": 5.077239958805356, |
| "grad_norm": 0.32930952310562134, |
| "learning_rate": 9.354376991647184e-05, |
| "loss": 0.0457, |
| "step": 4930 |
| }, |
| { |
| "epoch": 5.087538619979402, |
| "grad_norm": 0.3652319014072418, |
| "learning_rate": 9.350986664177802e-05, |
| "loss": 0.0395, |
| "step": 4940 |
| }, |
| { |
| "epoch": 5.09783728115345, |
| "grad_norm": 0.27907899022102356, |
| "learning_rate": 9.347588075991596e-05, |
| "loss": 0.0557, |
| "step": 4950 |
| }, |
| { |
| "epoch": 5.108135942327498, |
| "grad_norm": 0.366009384393692, |
| "learning_rate": 9.344181233541072e-05, |
| "loss": 0.054, |
| "step": 4960 |
| }, |
| { |
| "epoch": 5.1184346035015444, |
| "grad_norm": 0.506394624710083, |
| "learning_rate": 9.340766143294412e-05, |
| "loss": 0.0492, |
| "step": 4970 |
| }, |
| { |
| "epoch": 5.128733264675592, |
| "grad_norm": 0.39846888184547424, |
| "learning_rate": 9.337342811735454e-05, |
| "loss": 0.0535, |
| "step": 4980 |
| }, |
| { |
| "epoch": 5.13903192584964, |
| "grad_norm": 0.4480404257774353, |
| "learning_rate": 9.333911245363682e-05, |
| "loss": 0.0464, |
| "step": 4990 |
| }, |
| { |
| "epoch": 5.1493305870236865, |
| "grad_norm": 0.30809465050697327, |
| "learning_rate": 9.330471450694216e-05, |
| "loss": 0.0524, |
| "step": 5000 |
| }, |
| { |
| "epoch": 5.159629248197734, |
| "grad_norm": 0.4051932692527771, |
| "learning_rate": 9.327023434257796e-05, |
| "loss": 0.0503, |
| "step": 5010 |
| }, |
| { |
| "epoch": 5.169927909371782, |
| "grad_norm": 0.5069543123245239, |
| "learning_rate": 9.323567202600776e-05, |
| "loss": 0.0507, |
| "step": 5020 |
| }, |
| { |
| "epoch": 5.1802265705458295, |
| "grad_norm": 0.4128740131855011, |
| "learning_rate": 9.320102762285103e-05, |
| "loss": 0.0549, |
| "step": 5030 |
| }, |
| { |
| "epoch": 5.190525231719876, |
| "grad_norm": 0.4237695038318634, |
| "learning_rate": 9.31663011988831e-05, |
| "loss": 0.0455, |
| "step": 5040 |
| }, |
| { |
| "epoch": 5.200823892893924, |
| "grad_norm": 0.6371134519577026, |
| "learning_rate": 9.313149282003507e-05, |
| "loss": 0.0524, |
| "step": 5050 |
| }, |
| { |
| "epoch": 5.2111225540679715, |
| "grad_norm": 0.4311446249485016, |
| "learning_rate": 9.309660255239352e-05, |
| "loss": 0.0404, |
| "step": 5060 |
| }, |
| { |
| "epoch": 5.221421215242018, |
| "grad_norm": 0.3251447081565857, |
| "learning_rate": 9.306163046220064e-05, |
| "loss": 0.0518, |
| "step": 5070 |
| }, |
| { |
| "epoch": 5.231719876416066, |
| "grad_norm": 0.4141363799571991, |
| "learning_rate": 9.30265766158539e-05, |
| "loss": 0.0512, |
| "step": 5080 |
| }, |
| { |
| "epoch": 5.242018537590114, |
| "grad_norm": 0.3799976408481598, |
| "learning_rate": 9.299144107990596e-05, |
| "loss": 0.0438, |
| "step": 5090 |
| }, |
| { |
| "epoch": 5.25231719876416, |
| "grad_norm": 0.3431903123855591, |
| "learning_rate": 9.295622392106466e-05, |
| "loss": 0.0594, |
| "step": 5100 |
| }, |
| { |
| "epoch": 5.262615859938208, |
| "grad_norm": 0.360916405916214, |
| "learning_rate": 9.292092520619273e-05, |
| "loss": 0.0506, |
| "step": 5110 |
| }, |
| { |
| "epoch": 5.272914521112256, |
| "grad_norm": 0.559694766998291, |
| "learning_rate": 9.288554500230777e-05, |
| "loss": 0.0517, |
| "step": 5120 |
| }, |
| { |
| "epoch": 5.283213182286302, |
| "grad_norm": 0.6030452251434326, |
| "learning_rate": 9.285008337658212e-05, |
| "loss": 0.0469, |
| "step": 5130 |
| }, |
| { |
| "epoch": 5.29351184346035, |
| "grad_norm": 0.4279625713825226, |
| "learning_rate": 9.281454039634265e-05, |
| "loss": 0.0489, |
| "step": 5140 |
| }, |
| { |
| "epoch": 5.303810504634398, |
| "grad_norm": 0.525072455406189, |
| "learning_rate": 9.277891612907074e-05, |
| "loss": 0.0522, |
| "step": 5150 |
| }, |
| { |
| "epoch": 5.3141091658084445, |
| "grad_norm": 0.35859811305999756, |
| "learning_rate": 9.274321064240207e-05, |
| "loss": 0.055, |
| "step": 5160 |
| }, |
| { |
| "epoch": 5.324407826982492, |
| "grad_norm": 0.33742043375968933, |
| "learning_rate": 9.270742400412653e-05, |
| "loss": 0.0408, |
| "step": 5170 |
| }, |
| { |
| "epoch": 5.33470648815654, |
| "grad_norm": 0.5135817527770996, |
| "learning_rate": 9.26715562821881e-05, |
| "loss": 0.0452, |
| "step": 5180 |
| }, |
| { |
| "epoch": 5.3450051493305875, |
| "grad_norm": 0.4040980041027069, |
| "learning_rate": 9.263560754468468e-05, |
| "loss": 0.0486, |
| "step": 5190 |
| }, |
| { |
| "epoch": 5.355303810504634, |
| "grad_norm": 0.5518568158149719, |
| "learning_rate": 9.259957785986799e-05, |
| "loss": 0.0526, |
| "step": 5200 |
| }, |
| { |
| "epoch": 5.365602471678682, |
| "grad_norm": 0.31775131821632385, |
| "learning_rate": 9.256346729614342e-05, |
| "loss": 0.0462, |
| "step": 5210 |
| }, |
| { |
| "epoch": 5.3759011328527295, |
| "grad_norm": 0.34002628922462463, |
| "learning_rate": 9.252727592206997e-05, |
| "loss": 0.0521, |
| "step": 5220 |
| }, |
| { |
| "epoch": 5.386199794026776, |
| "grad_norm": 0.4139520227909088, |
| "learning_rate": 9.249100380636003e-05, |
| "loss": 0.0476, |
| "step": 5230 |
| }, |
| { |
| "epoch": 5.396498455200824, |
| "grad_norm": 0.268572062253952, |
| "learning_rate": 9.245465101787927e-05, |
| "loss": 0.0432, |
| "step": 5240 |
| }, |
| { |
| "epoch": 5.406797116374872, |
| "grad_norm": 0.32383212447166443, |
| "learning_rate": 9.241821762564653e-05, |
| "loss": 0.0408, |
| "step": 5250 |
| }, |
| { |
| "epoch": 5.417095777548918, |
| "grad_norm": 0.38563185930252075, |
| "learning_rate": 9.23817036988337e-05, |
| "loss": 0.0497, |
| "step": 5260 |
| }, |
| { |
| "epoch": 5.427394438722966, |
| "grad_norm": 0.27567192912101746, |
| "learning_rate": 9.234510930676558e-05, |
| "loss": 0.0486, |
| "step": 5270 |
| }, |
| { |
| "epoch": 5.437693099897014, |
| "grad_norm": 0.563594400882721, |
| "learning_rate": 9.23084345189197e-05, |
| "loss": 0.05, |
| "step": 5280 |
| }, |
| { |
| "epoch": 5.44799176107106, |
| "grad_norm": 0.4761587381362915, |
| "learning_rate": 9.227167940492626e-05, |
| "loss": 0.0479, |
| "step": 5290 |
| }, |
| { |
| "epoch": 5.458290422245108, |
| "grad_norm": 0.38452261686325073, |
| "learning_rate": 9.223484403456797e-05, |
| "loss": 0.045, |
| "step": 5300 |
| }, |
| { |
| "epoch": 5.468589083419156, |
| "grad_norm": 0.32944151759147644, |
| "learning_rate": 9.219792847777989e-05, |
| "loss": 0.0496, |
| "step": 5310 |
| }, |
| { |
| "epoch": 5.4788877445932025, |
| "grad_norm": 0.4086971580982208, |
| "learning_rate": 9.216093280464933e-05, |
| "loss": 0.0497, |
| "step": 5320 |
| }, |
| { |
| "epoch": 5.48918640576725, |
| "grad_norm": 0.20752595365047455, |
| "learning_rate": 9.212385708541571e-05, |
| "loss": 0.0471, |
| "step": 5330 |
| }, |
| { |
| "epoch": 5.499485066941298, |
| "grad_norm": 0.38301628828048706, |
| "learning_rate": 9.208670139047041e-05, |
| "loss": 0.0502, |
| "step": 5340 |
| }, |
| { |
| "epoch": 5.509783728115345, |
| "grad_norm": 0.2867702543735504, |
| "learning_rate": 9.204946579035671e-05, |
| "loss": 0.0465, |
| "step": 5350 |
| }, |
| { |
| "epoch": 5.520082389289392, |
| "grad_norm": 0.3082829415798187, |
| "learning_rate": 9.201215035576953e-05, |
| "loss": 0.0509, |
| "step": 5360 |
| }, |
| { |
| "epoch": 5.53038105046344, |
| "grad_norm": 0.36042046546936035, |
| "learning_rate": 9.197475515755535e-05, |
| "loss": 0.047, |
| "step": 5370 |
| }, |
| { |
| "epoch": 5.5406797116374875, |
| "grad_norm": 0.39044928550720215, |
| "learning_rate": 9.193728026671218e-05, |
| "loss": 0.0482, |
| "step": 5380 |
| }, |
| { |
| "epoch": 5.550978372811534, |
| "grad_norm": 0.4395255148410797, |
| "learning_rate": 9.189972575438923e-05, |
| "loss": 0.0488, |
| "step": 5390 |
| }, |
| { |
| "epoch": 5.561277033985582, |
| "grad_norm": 0.35821837186813354, |
| "learning_rate": 9.186209169188695e-05, |
| "loss": 0.0509, |
| "step": 5400 |
| }, |
| { |
| "epoch": 5.57157569515963, |
| "grad_norm": 0.6066220998764038, |
| "learning_rate": 9.182437815065679e-05, |
| "loss": 0.0516, |
| "step": 5410 |
| }, |
| { |
| "epoch": 5.581874356333676, |
| "grad_norm": 0.3900398910045624, |
| "learning_rate": 9.178658520230108e-05, |
| "loss": 0.0489, |
| "step": 5420 |
| }, |
| { |
| "epoch": 5.592173017507724, |
| "grad_norm": 0.4040411114692688, |
| "learning_rate": 9.174871291857296e-05, |
| "loss": 0.0402, |
| "step": 5430 |
| }, |
| { |
| "epoch": 5.602471678681772, |
| "grad_norm": 0.49822893738746643, |
| "learning_rate": 9.171076137137617e-05, |
| "loss": 0.0479, |
| "step": 5440 |
| }, |
| { |
| "epoch": 5.612770339855818, |
| "grad_norm": 0.4336659610271454, |
| "learning_rate": 9.167273063276493e-05, |
| "loss": 0.0484, |
| "step": 5450 |
| }, |
| { |
| "epoch": 5.623069001029866, |
| "grad_norm": 0.5972550511360168, |
| "learning_rate": 9.163462077494382e-05, |
| "loss": 0.0459, |
| "step": 5460 |
| }, |
| { |
| "epoch": 5.633367662203914, |
| "grad_norm": 0.28320547938346863, |
| "learning_rate": 9.159643187026762e-05, |
| "loss": 0.0478, |
| "step": 5470 |
| }, |
| { |
| "epoch": 5.6436663233779605, |
| "grad_norm": 0.4079047739505768, |
| "learning_rate": 9.155816399124125e-05, |
| "loss": 0.0477, |
| "step": 5480 |
| }, |
| { |
| "epoch": 5.653964984552008, |
| "grad_norm": 0.6271510124206543, |
| "learning_rate": 9.151981721051946e-05, |
| "loss": 0.0388, |
| "step": 5490 |
| }, |
| { |
| "epoch": 5.664263645726056, |
| "grad_norm": 0.3322283625602722, |
| "learning_rate": 9.148139160090688e-05, |
| "loss": 0.0411, |
| "step": 5500 |
| }, |
| { |
| "epoch": 5.674562306900103, |
| "grad_norm": 0.4952861964702606, |
| "learning_rate": 9.144288723535781e-05, |
| "loss": 0.0494, |
| "step": 5510 |
| }, |
| { |
| "epoch": 5.68486096807415, |
| "grad_norm": 0.40626558661460876, |
| "learning_rate": 9.140430418697603e-05, |
| "loss": 0.0416, |
| "step": 5520 |
| }, |
| { |
| "epoch": 5.695159629248198, |
| "grad_norm": 0.2986306846141815, |
| "learning_rate": 9.136564252901474e-05, |
| "loss": 0.0453, |
| "step": 5530 |
| }, |
| { |
| "epoch": 5.705458290422245, |
| "grad_norm": 0.3145788013935089, |
| "learning_rate": 9.132690233487635e-05, |
| "loss": 0.0495, |
| "step": 5540 |
| }, |
| { |
| "epoch": 5.715756951596292, |
| "grad_norm": 0.4465279281139374, |
| "learning_rate": 9.128808367811241e-05, |
| "loss": 0.0465, |
| "step": 5550 |
| }, |
| { |
| "epoch": 5.72605561277034, |
| "grad_norm": 0.3666897714138031, |
| "learning_rate": 9.124918663242346e-05, |
| "loss": 0.0515, |
| "step": 5560 |
| }, |
| { |
| "epoch": 5.736354273944388, |
| "grad_norm": 0.5630766749382019, |
| "learning_rate": 9.121021127165878e-05, |
| "loss": 0.044, |
| "step": 5570 |
| }, |
| { |
| "epoch": 5.746652935118434, |
| "grad_norm": 0.3722570836544037, |
| "learning_rate": 9.117115766981644e-05, |
| "loss": 0.0508, |
| "step": 5580 |
| }, |
| { |
| "epoch": 5.756951596292482, |
| "grad_norm": 0.3674274682998657, |
| "learning_rate": 9.1132025901043e-05, |
| "loss": 0.0588, |
| "step": 5590 |
| }, |
| { |
| "epoch": 5.76725025746653, |
| "grad_norm": 0.535066545009613, |
| "learning_rate": 9.109281603963342e-05, |
| "loss": 0.0496, |
| "step": 5600 |
| }, |
| { |
| "epoch": 5.777548918640576, |
| "grad_norm": 0.44427838921546936, |
| "learning_rate": 9.105352816003098e-05, |
| "loss": 0.055, |
| "step": 5610 |
| }, |
| { |
| "epoch": 5.787847579814624, |
| "grad_norm": 0.3270188570022583, |
| "learning_rate": 9.101416233682701e-05, |
| "loss": 0.0543, |
| "step": 5620 |
| }, |
| { |
| "epoch": 5.798146240988672, |
| "grad_norm": 0.43694767355918884, |
| "learning_rate": 9.097471864476089e-05, |
| "loss": 0.0464, |
| "step": 5630 |
| }, |
| { |
| "epoch": 5.8084449021627185, |
| "grad_norm": 0.3801634609699249, |
| "learning_rate": 9.093519715871979e-05, |
| "loss": 0.0536, |
| "step": 5640 |
| }, |
| { |
| "epoch": 5.818743563336766, |
| "grad_norm": 0.3844929337501526, |
| "learning_rate": 9.089559795373862e-05, |
| "loss": 0.0543, |
| "step": 5650 |
| }, |
| { |
| "epoch": 5.829042224510814, |
| "grad_norm": 0.4263947904109955, |
| "learning_rate": 9.08559211049998e-05, |
| "loss": 0.0475, |
| "step": 5660 |
| }, |
| { |
| "epoch": 5.8393408856848605, |
| "grad_norm": 0.39681196212768555, |
| "learning_rate": 9.081616668783322e-05, |
| "loss": 0.0551, |
| "step": 5670 |
| }, |
| { |
| "epoch": 5.849639546858908, |
| "grad_norm": 0.33459895849227905, |
| "learning_rate": 9.077633477771599e-05, |
| "loss": 0.0448, |
| "step": 5680 |
| }, |
| { |
| "epoch": 5.859938208032956, |
| "grad_norm": 0.30157431960105896, |
| "learning_rate": 9.073642545027236e-05, |
| "loss": 0.0519, |
| "step": 5690 |
| }, |
| { |
| "epoch": 5.8702368692070035, |
| "grad_norm": 0.2790994644165039, |
| "learning_rate": 9.069643878127359e-05, |
| "loss": 0.0517, |
| "step": 5700 |
| }, |
| { |
| "epoch": 5.88053553038105, |
| "grad_norm": 0.4210405945777893, |
| "learning_rate": 9.065637484663773e-05, |
| "loss": 0.0444, |
| "step": 5710 |
| }, |
| { |
| "epoch": 5.890834191555098, |
| "grad_norm": 0.3264312148094177, |
| "learning_rate": 9.06162337224296e-05, |
| "loss": 0.0481, |
| "step": 5720 |
| }, |
| { |
| "epoch": 5.901132852729146, |
| "grad_norm": 0.32913386821746826, |
| "learning_rate": 9.057601548486047e-05, |
| "loss": 0.0446, |
| "step": 5730 |
| }, |
| { |
| "epoch": 5.911431513903192, |
| "grad_norm": 0.25155073404312134, |
| "learning_rate": 9.053572021028812e-05, |
| "loss": 0.0436, |
| "step": 5740 |
| }, |
| { |
| "epoch": 5.92173017507724, |
| "grad_norm": 0.25006964802742004, |
| "learning_rate": 9.04953479752165e-05, |
| "loss": 0.0479, |
| "step": 5750 |
| }, |
| { |
| "epoch": 5.932028836251288, |
| "grad_norm": 0.3676224648952484, |
| "learning_rate": 9.045489885629576e-05, |
| "loss": 0.0441, |
| "step": 5760 |
| }, |
| { |
| "epoch": 5.942327497425334, |
| "grad_norm": 0.22950799763202667, |
| "learning_rate": 9.041437293032195e-05, |
| "loss": 0.0459, |
| "step": 5770 |
| }, |
| { |
| "epoch": 5.952626158599382, |
| "grad_norm": 0.26980340480804443, |
| "learning_rate": 9.0373770274237e-05, |
| "loss": 0.0494, |
| "step": 5780 |
| }, |
| { |
| "epoch": 5.96292481977343, |
| "grad_norm": 0.4180351495742798, |
| "learning_rate": 9.033309096512846e-05, |
| "loss": 0.0513, |
| "step": 5790 |
| }, |
| { |
| "epoch": 5.9732234809474765, |
| "grad_norm": 0.39388883113861084, |
| "learning_rate": 9.029233508022947e-05, |
| "loss": 0.0522, |
| "step": 5800 |
| }, |
| { |
| "epoch": 5.983522142121524, |
| "grad_norm": 0.33429384231567383, |
| "learning_rate": 9.025150269691852e-05, |
| "loss": 0.0438, |
| "step": 5810 |
| }, |
| { |
| "epoch": 5.993820803295572, |
| "grad_norm": 0.2864847183227539, |
| "learning_rate": 9.021059389271935e-05, |
| "loss": 0.0488, |
| "step": 5820 |
| }, |
| { |
| "epoch": 6.0041194644696185, |
| "grad_norm": 0.2295219898223877, |
| "learning_rate": 9.01696087453008e-05, |
| "loss": 0.0477, |
| "step": 5830 |
| }, |
| { |
| "epoch": 6.014418125643666, |
| "grad_norm": 0.2532294690608978, |
| "learning_rate": 9.012854733247663e-05, |
| "loss": 0.0456, |
| "step": 5840 |
| }, |
| { |
| "epoch": 6.024716786817714, |
| "grad_norm": 0.37234383821487427, |
| "learning_rate": 9.008740973220542e-05, |
| "loss": 0.0444, |
| "step": 5850 |
| }, |
| { |
| "epoch": 6.0350154479917615, |
| "grad_norm": 0.26388946175575256, |
| "learning_rate": 9.004619602259038e-05, |
| "loss": 0.0498, |
| "step": 5860 |
| }, |
| { |
| "epoch": 6.045314109165808, |
| "grad_norm": 0.2828536629676819, |
| "learning_rate": 9.000490628187926e-05, |
| "loss": 0.044, |
| "step": 5870 |
| }, |
| { |
| "epoch": 6.055612770339856, |
| "grad_norm": 0.25269097089767456, |
| "learning_rate": 8.996354058846408e-05, |
| "loss": 0.0491, |
| "step": 5880 |
| }, |
| { |
| "epoch": 6.0659114315139036, |
| "grad_norm": 0.37286481261253357, |
| "learning_rate": 8.992209902088116e-05, |
| "loss": 0.0548, |
| "step": 5890 |
| }, |
| { |
| "epoch": 6.07621009268795, |
| "grad_norm": 0.34140411019325256, |
| "learning_rate": 8.988058165781081e-05, |
| "loss": 0.0444, |
| "step": 5900 |
| }, |
| { |
| "epoch": 6.086508753861998, |
| "grad_norm": 0.40900543332099915, |
| "learning_rate": 8.983898857807726e-05, |
| "loss": 0.0488, |
| "step": 5910 |
| }, |
| { |
| "epoch": 6.096807415036046, |
| "grad_norm": 0.27773845195770264, |
| "learning_rate": 8.979731986064849e-05, |
| "loss": 0.0424, |
| "step": 5920 |
| }, |
| { |
| "epoch": 6.107106076210092, |
| "grad_norm": 0.35663488507270813, |
| "learning_rate": 8.97555755846361e-05, |
| "loss": 0.0436, |
| "step": 5930 |
| }, |
| { |
| "epoch": 6.11740473738414, |
| "grad_norm": 0.5180099010467529, |
| "learning_rate": 8.971375582929513e-05, |
| "loss": 0.0517, |
| "step": 5940 |
| }, |
| { |
| "epoch": 6.127703398558188, |
| "grad_norm": 0.41418635845184326, |
| "learning_rate": 8.967186067402393e-05, |
| "loss": 0.0422, |
| "step": 5950 |
| }, |
| { |
| "epoch": 6.1380020597322344, |
| "grad_norm": 0.38915103673934937, |
| "learning_rate": 8.9629890198364e-05, |
| "loss": 0.042, |
| "step": 5960 |
| }, |
| { |
| "epoch": 6.148300720906282, |
| "grad_norm": 0.7760641574859619, |
| "learning_rate": 8.958784448199987e-05, |
| "loss": 0.0465, |
| "step": 5970 |
| }, |
| { |
| "epoch": 6.15859938208033, |
| "grad_norm": 0.31317341327667236, |
| "learning_rate": 8.954572360475886e-05, |
| "loss": 0.0508, |
| "step": 5980 |
| }, |
| { |
| "epoch": 6.1688980432543765, |
| "grad_norm": 0.5295110940933228, |
| "learning_rate": 8.950352764661103e-05, |
| "loss": 0.0459, |
| "step": 5990 |
| }, |
| { |
| "epoch": 6.179196704428424, |
| "grad_norm": 0.3669707477092743, |
| "learning_rate": 8.946125668766898e-05, |
| "loss": 0.0495, |
| "step": 6000 |
| }, |
| { |
| "epoch": 6.189495365602472, |
| "grad_norm": 0.3288549780845642, |
| "learning_rate": 8.941891080818773e-05, |
| "loss": 0.0463, |
| "step": 6010 |
| }, |
| { |
| "epoch": 6.1997940267765195, |
| "grad_norm": 0.37521734833717346, |
| "learning_rate": 8.93764900885645e-05, |
| "loss": 0.0432, |
| "step": 6020 |
| }, |
| { |
| "epoch": 6.210092687950566, |
| "grad_norm": 0.40569764375686646, |
| "learning_rate": 8.933399460933862e-05, |
| "loss": 0.0442, |
| "step": 6030 |
| }, |
| { |
| "epoch": 6.220391349124614, |
| "grad_norm": 0.43062832951545715, |
| "learning_rate": 8.929142445119137e-05, |
| "loss": 0.0424, |
| "step": 6040 |
| }, |
| { |
| "epoch": 6.2306900102986615, |
| "grad_norm": 0.25243639945983887, |
| "learning_rate": 8.924877969494578e-05, |
| "loss": 0.0431, |
| "step": 6050 |
| }, |
| { |
| "epoch": 6.240988671472708, |
| "grad_norm": 0.33310917019844055, |
| "learning_rate": 8.920606042156657e-05, |
| "loss": 0.0436, |
| "step": 6060 |
| }, |
| { |
| "epoch": 6.251287332646756, |
| "grad_norm": 0.3819611668586731, |
| "learning_rate": 8.916326671215987e-05, |
| "loss": 0.0416, |
| "step": 6070 |
| }, |
| { |
| "epoch": 6.261585993820804, |
| "grad_norm": 0.38392260670661926, |
| "learning_rate": 8.912039864797317e-05, |
| "loss": 0.0491, |
| "step": 6080 |
| }, |
| { |
| "epoch": 6.27188465499485, |
| "grad_norm": 0.29428112506866455, |
| "learning_rate": 8.907745631039511e-05, |
| "loss": 0.045, |
| "step": 6090 |
| }, |
| { |
| "epoch": 6.282183316168898, |
| "grad_norm": 0.301840215921402, |
| "learning_rate": 8.903443978095537e-05, |
| "loss": 0.0506, |
| "step": 6100 |
| }, |
| { |
| "epoch": 6.292481977342946, |
| "grad_norm": 0.33956289291381836, |
| "learning_rate": 8.89913491413245e-05, |
| "loss": 0.0423, |
| "step": 6110 |
| }, |
| { |
| "epoch": 6.302780638516992, |
| "grad_norm": 0.2992474138736725, |
| "learning_rate": 8.894818447331368e-05, |
| "loss": 0.0503, |
| "step": 6120 |
| }, |
| { |
| "epoch": 6.31307929969104, |
| "grad_norm": 0.2867885231971741, |
| "learning_rate": 8.890494585887472e-05, |
| "loss": 0.05, |
| "step": 6130 |
| }, |
| { |
| "epoch": 6.323377960865088, |
| "grad_norm": 0.40204593539237976, |
| "learning_rate": 8.886163338009978e-05, |
| "loss": 0.048, |
| "step": 6140 |
| }, |
| { |
| "epoch": 6.3336766220391345, |
| "grad_norm": 0.3581812083721161, |
| "learning_rate": 8.881824711922129e-05, |
| "loss": 0.0488, |
| "step": 6150 |
| }, |
| { |
| "epoch": 6.343975283213182, |
| "grad_norm": 0.30317041277885437, |
| "learning_rate": 8.877478715861173e-05, |
| "loss": 0.0467, |
| "step": 6160 |
| }, |
| { |
| "epoch": 6.35427394438723, |
| "grad_norm": 0.4209807515144348, |
| "learning_rate": 8.873125358078352e-05, |
| "loss": 0.0527, |
| "step": 6170 |
| }, |
| { |
| "epoch": 6.364572605561277, |
| "grad_norm": 0.2900068461894989, |
| "learning_rate": 8.868764646838883e-05, |
| "loss": 0.0461, |
| "step": 6180 |
| }, |
| { |
| "epoch": 6.374871266735324, |
| "grad_norm": 0.30829277634620667, |
| "learning_rate": 8.864396590421947e-05, |
| "loss": 0.0449, |
| "step": 6190 |
| }, |
| { |
| "epoch": 6.385169927909372, |
| "grad_norm": 0.20672224462032318, |
| "learning_rate": 8.86002119712067e-05, |
| "loss": 0.0412, |
| "step": 6200 |
| }, |
| { |
| "epoch": 6.3954685890834195, |
| "grad_norm": 0.2930947542190552, |
| "learning_rate": 8.855638475242107e-05, |
| "loss": 0.0424, |
| "step": 6210 |
| }, |
| { |
| "epoch": 6.405767250257466, |
| "grad_norm": 0.3625617027282715, |
| "learning_rate": 8.851248433107225e-05, |
| "loss": 0.0427, |
| "step": 6220 |
| }, |
| { |
| "epoch": 6.416065911431514, |
| "grad_norm": 0.31254348158836365, |
| "learning_rate": 8.846851079050892e-05, |
| "loss": 0.044, |
| "step": 6230 |
| }, |
| { |
| "epoch": 6.426364572605562, |
| "grad_norm": 0.388361394405365, |
| "learning_rate": 8.842446421421857e-05, |
| "loss": 0.0386, |
| "step": 6240 |
| }, |
| { |
| "epoch": 6.436663233779608, |
| "grad_norm": 0.37418097257614136, |
| "learning_rate": 8.838034468582737e-05, |
| "loss": 0.0519, |
| "step": 6250 |
| }, |
| { |
| "epoch": 6.446961894953656, |
| "grad_norm": 0.42668136954307556, |
| "learning_rate": 8.833615228909995e-05, |
| "loss": 0.0533, |
| "step": 6260 |
| }, |
| { |
| "epoch": 6.457260556127704, |
| "grad_norm": 0.4811553657054901, |
| "learning_rate": 8.829188710793937e-05, |
| "loss": 0.0478, |
| "step": 6270 |
| }, |
| { |
| "epoch": 6.46755921730175, |
| "grad_norm": 0.30195683240890503, |
| "learning_rate": 8.824754922638677e-05, |
| "loss": 0.046, |
| "step": 6280 |
| }, |
| { |
| "epoch": 6.477857878475798, |
| "grad_norm": 0.3815639913082123, |
| "learning_rate": 8.82031387286214e-05, |
| "loss": 0.0511, |
| "step": 6290 |
| }, |
| { |
| "epoch": 6.488156539649846, |
| "grad_norm": 0.4513855278491974, |
| "learning_rate": 8.815865569896038e-05, |
| "loss": 0.0487, |
| "step": 6300 |
| }, |
| { |
| "epoch": 6.4984552008238925, |
| "grad_norm": 0.3365342915058136, |
| "learning_rate": 8.811410022185847e-05, |
| "loss": 0.0444, |
| "step": 6310 |
| }, |
| { |
| "epoch": 6.50875386199794, |
| "grad_norm": 0.3500465154647827, |
| "learning_rate": 8.806947238190803e-05, |
| "loss": 0.0432, |
| "step": 6320 |
| }, |
| { |
| "epoch": 6.519052523171988, |
| "grad_norm": 0.4398089349269867, |
| "learning_rate": 8.802477226383881e-05, |
| "loss": 0.042, |
| "step": 6330 |
| }, |
| { |
| "epoch": 6.5293511843460355, |
| "grad_norm": 0.318387895822525, |
| "learning_rate": 8.797999995251777e-05, |
| "loss": 0.0483, |
| "step": 6340 |
| }, |
| { |
| "epoch": 6.539649845520082, |
| "grad_norm": 0.32100003957748413, |
| "learning_rate": 8.793515553294891e-05, |
| "loss": 0.0468, |
| "step": 6350 |
| }, |
| { |
| "epoch": 6.54994850669413, |
| "grad_norm": 0.292423814535141, |
| "learning_rate": 8.789023909027319e-05, |
| "loss": 0.0489, |
| "step": 6360 |
| }, |
| { |
| "epoch": 6.5602471678681775, |
| "grad_norm": 0.31183555722236633, |
| "learning_rate": 8.784525070976825e-05, |
| "loss": 0.0456, |
| "step": 6370 |
| }, |
| { |
| "epoch": 6.570545829042224, |
| "grad_norm": 0.25083911418914795, |
| "learning_rate": 8.780019047684837e-05, |
| "loss": 0.0524, |
| "step": 6380 |
| }, |
| { |
| "epoch": 6.580844490216272, |
| "grad_norm": 0.4049484133720398, |
| "learning_rate": 8.775505847706422e-05, |
| "loss": 0.0484, |
| "step": 6390 |
| }, |
| { |
| "epoch": 6.59114315139032, |
| "grad_norm": 0.46989813446998596, |
| "learning_rate": 8.770985479610273e-05, |
| "loss": 0.0409, |
| "step": 6400 |
| }, |
| { |
| "epoch": 6.601441812564366, |
| "grad_norm": 0.301536500453949, |
| "learning_rate": 8.766457951978687e-05, |
| "loss": 0.0435, |
| "step": 6410 |
| }, |
| { |
| "epoch": 6.611740473738414, |
| "grad_norm": 0.3885561227798462, |
| "learning_rate": 8.761923273407564e-05, |
| "loss": 0.0417, |
| "step": 6420 |
| }, |
| { |
| "epoch": 6.622039134912462, |
| "grad_norm": 0.2474498450756073, |
| "learning_rate": 8.757381452506374e-05, |
| "loss": 0.0439, |
| "step": 6430 |
| }, |
| { |
| "epoch": 6.632337796086508, |
| "grad_norm": 0.6527761816978455, |
| "learning_rate": 8.752832497898148e-05, |
| "loss": 0.0491, |
| "step": 6440 |
| }, |
| { |
| "epoch": 6.642636457260556, |
| "grad_norm": 0.4527409076690674, |
| "learning_rate": 8.748276418219463e-05, |
| "loss": 0.0491, |
| "step": 6450 |
| }, |
| { |
| "epoch": 6.652935118434604, |
| "grad_norm": 0.433944433927536, |
| "learning_rate": 8.743713222120421e-05, |
| "loss": 0.043, |
| "step": 6460 |
| }, |
| { |
| "epoch": 6.663233779608651, |
| "grad_norm": 0.5092076063156128, |
| "learning_rate": 8.739142918264638e-05, |
| "loss": 0.0487, |
| "step": 6470 |
| }, |
| { |
| "epoch": 6.673532440782698, |
| "grad_norm": 0.3467596471309662, |
| "learning_rate": 8.734565515329221e-05, |
| "loss": 0.0404, |
| "step": 6480 |
| }, |
| { |
| "epoch": 6.683831101956746, |
| "grad_norm": 0.23407290875911713, |
| "learning_rate": 8.729981022004758e-05, |
| "loss": 0.0446, |
| "step": 6490 |
| }, |
| { |
| "epoch": 6.6941297631307926, |
| "grad_norm": 0.33283254504203796, |
| "learning_rate": 8.7253894469953e-05, |
| "loss": 0.0441, |
| "step": 6500 |
| }, |
| { |
| "epoch": 6.70442842430484, |
| "grad_norm": 0.30493679642677307, |
| "learning_rate": 8.720790799018337e-05, |
| "loss": 0.0457, |
| "step": 6510 |
| }, |
| { |
| "epoch": 6.714727085478888, |
| "grad_norm": 0.4004342257976532, |
| "learning_rate": 8.716185086804798e-05, |
| "loss": 0.0448, |
| "step": 6520 |
| }, |
| { |
| "epoch": 6.7250257466529355, |
| "grad_norm": 0.5191412568092346, |
| "learning_rate": 8.71157231909901e-05, |
| "loss": 0.0528, |
| "step": 6530 |
| }, |
| { |
| "epoch": 6.735324407826982, |
| "grad_norm": 0.6779634356498718, |
| "learning_rate": 8.706952504658712e-05, |
| "loss": 0.0496, |
| "step": 6540 |
| }, |
| { |
| "epoch": 6.74562306900103, |
| "grad_norm": 0.3031466007232666, |
| "learning_rate": 8.702325652255005e-05, |
| "loss": 0.0473, |
| "step": 6550 |
| }, |
| { |
| "epoch": 6.755921730175078, |
| "grad_norm": 0.41129881143569946, |
| "learning_rate": 8.697691770672366e-05, |
| "loss": 0.0468, |
| "step": 6560 |
| }, |
| { |
| "epoch": 6.766220391349124, |
| "grad_norm": 0.278126984834671, |
| "learning_rate": 8.69305086870861e-05, |
| "loss": 0.0439, |
| "step": 6570 |
| }, |
| { |
| "epoch": 6.776519052523172, |
| "grad_norm": 0.3106745779514313, |
| "learning_rate": 8.688402955174881e-05, |
| "loss": 0.0487, |
| "step": 6580 |
| }, |
| { |
| "epoch": 6.78681771369722, |
| "grad_norm": 0.4737553596496582, |
| "learning_rate": 8.683748038895639e-05, |
| "loss": 0.0512, |
| "step": 6590 |
| }, |
| { |
| "epoch": 6.797116374871266, |
| "grad_norm": 0.3413580358028412, |
| "learning_rate": 8.679086128708636e-05, |
| "loss": 0.0485, |
| "step": 6600 |
| }, |
| { |
| "epoch": 6.807415036045314, |
| "grad_norm": 0.2869110703468323, |
| "learning_rate": 8.674417233464903e-05, |
| "loss": 0.0397, |
| "step": 6610 |
| }, |
| { |
| "epoch": 6.817713697219362, |
| "grad_norm": 0.295481413602829, |
| "learning_rate": 8.669741362028734e-05, |
| "loss": 0.0422, |
| "step": 6620 |
| }, |
| { |
| "epoch": 6.8280123583934085, |
| "grad_norm": 0.453259140253067, |
| "learning_rate": 8.665058523277667e-05, |
| "loss": 0.0456, |
| "step": 6630 |
| }, |
| { |
| "epoch": 6.838311019567456, |
| "grad_norm": 0.3729632794857025, |
| "learning_rate": 8.660368726102469e-05, |
| "loss": 0.0483, |
| "step": 6640 |
| }, |
| { |
| "epoch": 6.848609680741504, |
| "grad_norm": 0.40101921558380127, |
| "learning_rate": 8.655671979407112e-05, |
| "loss": 0.0462, |
| "step": 6650 |
| }, |
| { |
| "epoch": 6.858908341915551, |
| "grad_norm": 0.27768710255622864, |
| "learning_rate": 8.650968292108774e-05, |
| "loss": 0.0425, |
| "step": 6660 |
| }, |
| { |
| "epoch": 6.869207003089598, |
| "grad_norm": 0.24457606673240662, |
| "learning_rate": 8.646257673137803e-05, |
| "loss": 0.044, |
| "step": 6670 |
| }, |
| { |
| "epoch": 6.879505664263646, |
| "grad_norm": 0.49996355175971985, |
| "learning_rate": 8.641540131437705e-05, |
| "loss": 0.0506, |
| "step": 6680 |
| }, |
| { |
| "epoch": 6.889804325437693, |
| "grad_norm": 0.46634727716445923, |
| "learning_rate": 8.636815675965136e-05, |
| "loss": 0.0492, |
| "step": 6690 |
| }, |
| { |
| "epoch": 6.90010298661174, |
| "grad_norm": 0.3688158690929413, |
| "learning_rate": 8.632084315689872e-05, |
| "loss": 0.045, |
| "step": 6700 |
| }, |
| { |
| "epoch": 6.910401647785788, |
| "grad_norm": 0.5695337653160095, |
| "learning_rate": 8.627346059594802e-05, |
| "loss": 0.0427, |
| "step": 6710 |
| }, |
| { |
| "epoch": 6.920700308959836, |
| "grad_norm": 0.2298751175403595, |
| "learning_rate": 8.622600916675909e-05, |
| "loss": 0.0524, |
| "step": 6720 |
| }, |
| { |
| "epoch": 6.930998970133882, |
| "grad_norm": 0.33885377645492554, |
| "learning_rate": 8.617848895942247e-05, |
| "loss": 0.0436, |
| "step": 6730 |
| }, |
| { |
| "epoch": 6.94129763130793, |
| "grad_norm": 0.3807486295700073, |
| "learning_rate": 8.61309000641593e-05, |
| "loss": 0.0446, |
| "step": 6740 |
| }, |
| { |
| "epoch": 6.951596292481978, |
| "grad_norm": 0.37897372245788574, |
| "learning_rate": 8.608324257132114e-05, |
| "loss": 0.0411, |
| "step": 6750 |
| }, |
| { |
| "epoch": 6.961894953656024, |
| "grad_norm": 0.4584294855594635, |
| "learning_rate": 8.603551657138975e-05, |
| "loss": 0.0417, |
| "step": 6760 |
| }, |
| { |
| "epoch": 6.972193614830072, |
| "grad_norm": 0.31342461705207825, |
| "learning_rate": 8.598772215497703e-05, |
| "loss": 0.0411, |
| "step": 6770 |
| }, |
| { |
| "epoch": 6.98249227600412, |
| "grad_norm": 0.3062428832054138, |
| "learning_rate": 8.593985941282468e-05, |
| "loss": 0.0405, |
| "step": 6780 |
| }, |
| { |
| "epoch": 6.9927909371781665, |
| "grad_norm": 0.40509548783302307, |
| "learning_rate": 8.58919284358042e-05, |
| "loss": 0.0424, |
| "step": 6790 |
| }, |
| { |
| "epoch": 7.003089598352214, |
| "grad_norm": 0.4782925248146057, |
| "learning_rate": 8.584392931491662e-05, |
| "loss": 0.0513, |
| "step": 6800 |
| }, |
| { |
| "epoch": 7.013388259526262, |
| "grad_norm": 0.5316755771636963, |
| "learning_rate": 8.579586214129232e-05, |
| "loss": 0.0432, |
| "step": 6810 |
| }, |
| { |
| "epoch": 7.0236869207003085, |
| "grad_norm": 0.3078876733779907, |
| "learning_rate": 8.574772700619089e-05, |
| "loss": 0.0467, |
| "step": 6820 |
| }, |
| { |
| "epoch": 7.033985581874356, |
| "grad_norm": 0.3792472183704376, |
| "learning_rate": 8.569952400100099e-05, |
| "loss": 0.0396, |
| "step": 6830 |
| }, |
| { |
| "epoch": 7.044284243048404, |
| "grad_norm": 0.3639551103115082, |
| "learning_rate": 8.565125321724009e-05, |
| "loss": 0.0455, |
| "step": 6840 |
| }, |
| { |
| "epoch": 7.0545829042224515, |
| "grad_norm": 0.27368006110191345, |
| "learning_rate": 8.560291474655438e-05, |
| "loss": 0.0409, |
| "step": 6850 |
| }, |
| { |
| "epoch": 7.064881565396498, |
| "grad_norm": 0.28575336933135986, |
| "learning_rate": 8.555450868071852e-05, |
| "loss": 0.0432, |
| "step": 6860 |
| }, |
| { |
| "epoch": 7.075180226570546, |
| "grad_norm": 0.29869362711906433, |
| "learning_rate": 8.550603511163554e-05, |
| "loss": 0.0488, |
| "step": 6870 |
| }, |
| { |
| "epoch": 7.085478887744594, |
| "grad_norm": 0.4075066149234772, |
| "learning_rate": 8.545749413133662e-05, |
| "loss": 0.0529, |
| "step": 6880 |
| }, |
| { |
| "epoch": 7.09577754891864, |
| "grad_norm": 1.2341939210891724, |
| "learning_rate": 8.540888583198092e-05, |
| "loss": 0.0413, |
| "step": 6890 |
| }, |
| { |
| "epoch": 7.106076210092688, |
| "grad_norm": 0.4124568998813629, |
| "learning_rate": 8.536021030585541e-05, |
| "loss": 0.0421, |
| "step": 6900 |
| }, |
| { |
| "epoch": 7.116374871266736, |
| "grad_norm": 0.30242303013801575, |
| "learning_rate": 8.53114676453747e-05, |
| "loss": 0.0472, |
| "step": 6910 |
| }, |
| { |
| "epoch": 7.126673532440782, |
| "grad_norm": 0.48593899607658386, |
| "learning_rate": 8.526265794308089e-05, |
| "loss": 0.0512, |
| "step": 6920 |
| }, |
| { |
| "epoch": 7.13697219361483, |
| "grad_norm": 0.3725290894508362, |
| "learning_rate": 8.52137812916433e-05, |
| "loss": 0.0485, |
| "step": 6930 |
| }, |
| { |
| "epoch": 7.147270854788878, |
| "grad_norm": 0.38606998324394226, |
| "learning_rate": 8.51648377838584e-05, |
| "loss": 0.043, |
| "step": 6940 |
| }, |
| { |
| "epoch": 7.1575695159629245, |
| "grad_norm": 0.49902254343032837, |
| "learning_rate": 8.511582751264959e-05, |
| "loss": 0.0405, |
| "step": 6950 |
| }, |
| { |
| "epoch": 7.167868177136972, |
| "grad_norm": 0.46539393067359924, |
| "learning_rate": 8.506675057106704e-05, |
| "loss": 0.0458, |
| "step": 6960 |
| }, |
| { |
| "epoch": 7.17816683831102, |
| "grad_norm": 0.2839367687702179, |
| "learning_rate": 8.501760705228746e-05, |
| "loss": 0.0444, |
| "step": 6970 |
| }, |
| { |
| "epoch": 7.1884654994850665, |
| "grad_norm": 0.3735373914241791, |
| "learning_rate": 8.496839704961398e-05, |
| "loss": 0.0549, |
| "step": 6980 |
| }, |
| { |
| "epoch": 7.198764160659114, |
| "grad_norm": 0.28953662514686584, |
| "learning_rate": 8.491912065647599e-05, |
| "loss": 0.041, |
| "step": 6990 |
| }, |
| { |
| "epoch": 7.209062821833162, |
| "grad_norm": 0.3638463318347931, |
| "learning_rate": 8.486977796642886e-05, |
| "loss": 0.0443, |
| "step": 7000 |
| }, |
| { |
| "epoch": 7.2193614830072095, |
| "grad_norm": 0.35548821091651917, |
| "learning_rate": 8.482036907315388e-05, |
| "loss": 0.0399, |
| "step": 7010 |
| }, |
| { |
| "epoch": 7.229660144181256, |
| "grad_norm": 0.3023133873939514, |
| "learning_rate": 8.477089407045803e-05, |
| "loss": 0.0456, |
| "step": 7020 |
| }, |
| { |
| "epoch": 7.239958805355304, |
| "grad_norm": 0.32346412539482117, |
| "learning_rate": 8.47213530522738e-05, |
| "loss": 0.0456, |
| "step": 7030 |
| }, |
| { |
| "epoch": 7.2502574665293515, |
| "grad_norm": 0.30193185806274414, |
| "learning_rate": 8.467174611265898e-05, |
| "loss": 0.0436, |
| "step": 7040 |
| }, |
| { |
| "epoch": 7.260556127703398, |
| "grad_norm": 0.3131241798400879, |
| "learning_rate": 8.462207334579658e-05, |
| "loss": 0.0493, |
| "step": 7050 |
| }, |
| { |
| "epoch": 7.270854788877446, |
| "grad_norm": 0.38617610931396484, |
| "learning_rate": 8.457233484599454e-05, |
| "loss": 0.0389, |
| "step": 7060 |
| }, |
| { |
| "epoch": 7.281153450051494, |
| "grad_norm": 0.34710705280303955, |
| "learning_rate": 8.452253070768562e-05, |
| "loss": 0.0548, |
| "step": 7070 |
| }, |
| { |
| "epoch": 7.29145211122554, |
| "grad_norm": 0.4385312795639038, |
| "learning_rate": 8.447266102542719e-05, |
| "loss": 0.0414, |
| "step": 7080 |
| }, |
| { |
| "epoch": 7.301750772399588, |
| "grad_norm": 0.33167046308517456, |
| "learning_rate": 8.442272589390107e-05, |
| "loss": 0.0436, |
| "step": 7090 |
| }, |
| { |
| "epoch": 7.312049433573636, |
| "grad_norm": 0.3587557375431061, |
| "learning_rate": 8.437272540791337e-05, |
| "loss": 0.0491, |
| "step": 7100 |
| }, |
| { |
| "epoch": 7.3223480947476824, |
| "grad_norm": 0.4869999587535858, |
| "learning_rate": 8.432265966239419e-05, |
| "loss": 0.0412, |
| "step": 7110 |
| }, |
| { |
| "epoch": 7.33264675592173, |
| "grad_norm": 0.3566068410873413, |
| "learning_rate": 8.427252875239764e-05, |
| "loss": 0.0395, |
| "step": 7120 |
| }, |
| { |
| "epoch": 7.342945417095778, |
| "grad_norm": 0.27183136343955994, |
| "learning_rate": 8.422233277310145e-05, |
| "loss": 0.0458, |
| "step": 7130 |
| }, |
| { |
| "epoch": 7.3532440782698245, |
| "grad_norm": 0.6837592124938965, |
| "learning_rate": 8.4172071819807e-05, |
| "loss": 0.0486, |
| "step": 7140 |
| }, |
| { |
| "epoch": 7.363542739443872, |
| "grad_norm": 0.3837999403476715, |
| "learning_rate": 8.412174598793893e-05, |
| "loss": 0.0517, |
| "step": 7150 |
| }, |
| { |
| "epoch": 7.37384140061792, |
| "grad_norm": 0.3554346561431885, |
| "learning_rate": 8.40713553730451e-05, |
| "loss": 0.0458, |
| "step": 7160 |
| }, |
| { |
| "epoch": 7.3841400617919675, |
| "grad_norm": 0.43407344818115234, |
| "learning_rate": 8.402090007079636e-05, |
| "loss": 0.0513, |
| "step": 7170 |
| }, |
| { |
| "epoch": 7.394438722966014, |
| "grad_norm": 0.3473135232925415, |
| "learning_rate": 8.397038017698638e-05, |
| "loss": 0.0459, |
| "step": 7180 |
| }, |
| { |
| "epoch": 7.404737384140062, |
| "grad_norm": 0.3611428439617157, |
| "learning_rate": 8.391979578753146e-05, |
| "loss": 0.0493, |
| "step": 7190 |
| }, |
| { |
| "epoch": 7.4150360453141095, |
| "grad_norm": 0.43605300784111023, |
| "learning_rate": 8.386914699847037e-05, |
| "loss": 0.0441, |
| "step": 7200 |
| }, |
| { |
| "epoch": 7.425334706488156, |
| "grad_norm": 0.4645574688911438, |
| "learning_rate": 8.381843390596409e-05, |
| "loss": 0.0411, |
| "step": 7210 |
| }, |
| { |
| "epoch": 7.435633367662204, |
| "grad_norm": 0.40116310119628906, |
| "learning_rate": 8.376765660629574e-05, |
| "loss": 0.0424, |
| "step": 7220 |
| }, |
| { |
| "epoch": 7.445932028836252, |
| "grad_norm": 0.31686821579933167, |
| "learning_rate": 8.371681519587033e-05, |
| "loss": 0.0438, |
| "step": 7230 |
| }, |
| { |
| "epoch": 7.456230690010298, |
| "grad_norm": 0.34404608607292175, |
| "learning_rate": 8.366590977121457e-05, |
| "loss": 0.0476, |
| "step": 7240 |
| }, |
| { |
| "epoch": 7.466529351184346, |
| "grad_norm": 0.4584087133407593, |
| "learning_rate": 8.361494042897675e-05, |
| "loss": 0.0403, |
| "step": 7250 |
| }, |
| { |
| "epoch": 7.476828012358394, |
| "grad_norm": 0.4275796413421631, |
| "learning_rate": 8.356390726592645e-05, |
| "loss": 0.0431, |
| "step": 7260 |
| }, |
| { |
| "epoch": 7.48712667353244, |
| "grad_norm": 0.5235409140586853, |
| "learning_rate": 8.351281037895448e-05, |
| "loss": 0.0486, |
| "step": 7270 |
| }, |
| { |
| "epoch": 7.497425334706488, |
| "grad_norm": 0.4118786156177521, |
| "learning_rate": 8.346164986507258e-05, |
| "loss": 0.0438, |
| "step": 7280 |
| }, |
| { |
| "epoch": 7.507723995880536, |
| "grad_norm": 0.3873061239719391, |
| "learning_rate": 8.341042582141336e-05, |
| "loss": 0.0501, |
| "step": 7290 |
| }, |
| { |
| "epoch": 7.518022657054583, |
| "grad_norm": 0.2781210243701935, |
| "learning_rate": 8.335913834522999e-05, |
| "loss": 0.0496, |
| "step": 7300 |
| }, |
| { |
| "epoch": 7.52832131822863, |
| "grad_norm": 0.39076125621795654, |
| "learning_rate": 8.330778753389608e-05, |
| "loss": 0.0518, |
| "step": 7310 |
| }, |
| { |
| "epoch": 7.538619979402678, |
| "grad_norm": 0.24123363196849823, |
| "learning_rate": 8.32563734849055e-05, |
| "loss": 0.0419, |
| "step": 7320 |
| }, |
| { |
| "epoch": 7.548918640576725, |
| "grad_norm": 0.510017991065979, |
| "learning_rate": 8.320489629587218e-05, |
| "loss": 0.049, |
| "step": 7330 |
| }, |
| { |
| "epoch": 7.559217301750772, |
| "grad_norm": 0.5126045942306519, |
| "learning_rate": 8.315335606452992e-05, |
| "loss": 0.0433, |
| "step": 7340 |
| }, |
| { |
| "epoch": 7.56951596292482, |
| "grad_norm": 0.26801052689552307, |
| "learning_rate": 8.310175288873222e-05, |
| "loss": 0.043, |
| "step": 7350 |
| }, |
| { |
| "epoch": 7.5798146240988675, |
| "grad_norm": 0.27340441942214966, |
| "learning_rate": 8.305008686645209e-05, |
| "loss": 0.0453, |
| "step": 7360 |
| }, |
| { |
| "epoch": 7.590113285272914, |
| "grad_norm": 0.33311620354652405, |
| "learning_rate": 8.299835809578183e-05, |
| "loss": 0.046, |
| "step": 7370 |
| }, |
| { |
| "epoch": 7.600411946446962, |
| "grad_norm": 0.3186795115470886, |
| "learning_rate": 8.294656667493292e-05, |
| "loss": 0.0466, |
| "step": 7380 |
| }, |
| { |
| "epoch": 7.61071060762101, |
| "grad_norm": 0.418103963136673, |
| "learning_rate": 8.289471270223575e-05, |
| "loss": 0.0496, |
| "step": 7390 |
| }, |
| { |
| "epoch": 7.621009268795056, |
| "grad_norm": 0.36763474345207214, |
| "learning_rate": 8.284279627613948e-05, |
| "loss": 0.0466, |
| "step": 7400 |
| }, |
| { |
| "epoch": 7.631307929969104, |
| "grad_norm": 0.555157482624054, |
| "learning_rate": 8.279081749521182e-05, |
| "loss": 0.0438, |
| "step": 7410 |
| }, |
| { |
| "epoch": 7.641606591143152, |
| "grad_norm": 0.3913489282131195, |
| "learning_rate": 8.273877645813893e-05, |
| "loss": 0.0419, |
| "step": 7420 |
| }, |
| { |
| "epoch": 7.651905252317198, |
| "grad_norm": 0.22710680961608887, |
| "learning_rate": 8.26866732637251e-05, |
| "loss": 0.0399, |
| "step": 7430 |
| }, |
| { |
| "epoch": 7.662203913491246, |
| "grad_norm": 0.6678986549377441, |
| "learning_rate": 8.263450801089268e-05, |
| "loss": 0.0454, |
| "step": 7440 |
| }, |
| { |
| "epoch": 7.672502574665294, |
| "grad_norm": 0.3249244689941406, |
| "learning_rate": 8.25822807986818e-05, |
| "loss": 0.0439, |
| "step": 7450 |
| }, |
| { |
| "epoch": 7.6828012358393405, |
| "grad_norm": 0.4379661977291107, |
| "learning_rate": 8.252999172625026e-05, |
| "loss": 0.0489, |
| "step": 7460 |
| }, |
| { |
| "epoch": 7.693099897013388, |
| "grad_norm": 0.3780250549316406, |
| "learning_rate": 8.24776408928733e-05, |
| "loss": 0.0498, |
| "step": 7470 |
| }, |
| { |
| "epoch": 7.703398558187436, |
| "grad_norm": 0.3127906620502472, |
| "learning_rate": 8.242522839794338e-05, |
| "loss": 0.0444, |
| "step": 7480 |
| }, |
| { |
| "epoch": 7.7136972193614834, |
| "grad_norm": 0.47873997688293457, |
| "learning_rate": 8.237275434097012e-05, |
| "loss": 0.0451, |
| "step": 7490 |
| }, |
| { |
| "epoch": 7.72399588053553, |
| "grad_norm": 0.3613678216934204, |
| "learning_rate": 8.23202188215799e-05, |
| "loss": 0.0453, |
| "step": 7500 |
| }, |
| { |
| "epoch": 7.734294541709578, |
| "grad_norm": 0.43183133006095886, |
| "learning_rate": 8.22676219395159e-05, |
| "loss": 0.0415, |
| "step": 7510 |
| }, |
| { |
| "epoch": 7.7445932028836255, |
| "grad_norm": 0.5181182622909546, |
| "learning_rate": 8.221496379463774e-05, |
| "loss": 0.0451, |
| "step": 7520 |
| }, |
| { |
| "epoch": 7.754891864057672, |
| "grad_norm": 0.3400101661682129, |
| "learning_rate": 8.216224448692138e-05, |
| "loss": 0.0508, |
| "step": 7530 |
| }, |
| { |
| "epoch": 7.76519052523172, |
| "grad_norm": 0.3759165108203888, |
| "learning_rate": 8.210946411645887e-05, |
| "loss": 0.046, |
| "step": 7540 |
| }, |
| { |
| "epoch": 7.775489186405768, |
| "grad_norm": 0.46893036365509033, |
| "learning_rate": 8.205662278345823e-05, |
| "loss": 0.0464, |
| "step": 7550 |
| }, |
| { |
| "epoch": 7.785787847579814, |
| "grad_norm": 0.3439854383468628, |
| "learning_rate": 8.200372058824322e-05, |
| "loss": 0.0432, |
| "step": 7560 |
| }, |
| { |
| "epoch": 7.796086508753862, |
| "grad_norm": 0.28945380449295044, |
| "learning_rate": 8.19507576312531e-05, |
| "loss": 0.043, |
| "step": 7570 |
| }, |
| { |
| "epoch": 7.80638516992791, |
| "grad_norm": 0.39605122804641724, |
| "learning_rate": 8.189773401304259e-05, |
| "loss": 0.041, |
| "step": 7580 |
| }, |
| { |
| "epoch": 7.816683831101956, |
| "grad_norm": 0.312443345785141, |
| "learning_rate": 8.184464983428146e-05, |
| "loss": 0.0495, |
| "step": 7590 |
| }, |
| { |
| "epoch": 7.826982492276004, |
| "grad_norm": 0.2684251368045807, |
| "learning_rate": 8.179150519575456e-05, |
| "loss": 0.0455, |
| "step": 7600 |
| }, |
| { |
| "epoch": 7.837281153450052, |
| "grad_norm": 0.3516882658004761, |
| "learning_rate": 8.17383001983615e-05, |
| "loss": 0.0421, |
| "step": 7610 |
| }, |
| { |
| "epoch": 7.8475798146240985, |
| "grad_norm": 0.2721065580844879, |
| "learning_rate": 8.168503494311644e-05, |
| "loss": 0.0465, |
| "step": 7620 |
| }, |
| { |
| "epoch": 7.857878475798146, |
| "grad_norm": 0.36698025465011597, |
| "learning_rate": 8.163170953114798e-05, |
| "loss": 0.0436, |
| "step": 7630 |
| }, |
| { |
| "epoch": 7.868177136972194, |
| "grad_norm": 0.25991520285606384, |
| "learning_rate": 8.157832406369897e-05, |
| "loss": 0.0485, |
| "step": 7640 |
| }, |
| { |
| "epoch": 7.8784757981462405, |
| "grad_norm": 0.42714834213256836, |
| "learning_rate": 8.15248786421262e-05, |
| "loss": 0.0505, |
| "step": 7650 |
| }, |
| { |
| "epoch": 7.888774459320288, |
| "grad_norm": 0.29556575417518616, |
| "learning_rate": 8.147137336790036e-05, |
| "loss": 0.045, |
| "step": 7660 |
| }, |
| { |
| "epoch": 7.899073120494336, |
| "grad_norm": 0.7319002747535706, |
| "learning_rate": 8.141780834260575e-05, |
| "loss": 0.0392, |
| "step": 7670 |
| }, |
| { |
| "epoch": 7.9093717816683835, |
| "grad_norm": 0.3782273530960083, |
| "learning_rate": 8.136418366794008e-05, |
| "loss": 0.041, |
| "step": 7680 |
| }, |
| { |
| "epoch": 7.91967044284243, |
| "grad_norm": 0.8856772184371948, |
| "learning_rate": 8.131049944571437e-05, |
| "loss": 0.0406, |
| "step": 7690 |
| }, |
| { |
| "epoch": 7.929969104016478, |
| "grad_norm": 0.3390977084636688, |
| "learning_rate": 8.125675577785264e-05, |
| "loss": 0.0381, |
| "step": 7700 |
| }, |
| { |
| "epoch": 7.940267765190526, |
| "grad_norm": 0.29981207847595215, |
| "learning_rate": 8.120295276639181e-05, |
| "loss": 0.0434, |
| "step": 7710 |
| }, |
| { |
| "epoch": 7.950566426364572, |
| "grad_norm": 0.3433596193790436, |
| "learning_rate": 8.114909051348144e-05, |
| "loss": 0.0504, |
| "step": 7720 |
| }, |
| { |
| "epoch": 7.96086508753862, |
| "grad_norm": 0.41486749053001404, |
| "learning_rate": 8.109516912138361e-05, |
| "loss": 0.0415, |
| "step": 7730 |
| }, |
| { |
| "epoch": 7.971163748712668, |
| "grad_norm": 0.36171993613243103, |
| "learning_rate": 8.104118869247263e-05, |
| "loss": 0.0452, |
| "step": 7740 |
| }, |
| { |
| "epoch": 7.981462409886714, |
| "grad_norm": 0.32786816358566284, |
| "learning_rate": 8.09871493292349e-05, |
| "loss": 0.0412, |
| "step": 7750 |
| }, |
| { |
| "epoch": 7.991761071060762, |
| "grad_norm": 0.3479085862636566, |
| "learning_rate": 8.09330511342688e-05, |
| "loss": 0.0453, |
| "step": 7760 |
| }, |
| { |
| "epoch": 8.002059732234809, |
| "grad_norm": 0.34164348244667053, |
| "learning_rate": 8.087889421028424e-05, |
| "loss": 0.0388, |
| "step": 7770 |
| }, |
| { |
| "epoch": 8.012358393408856, |
| "grad_norm": 0.29838696122169495, |
| "learning_rate": 8.082467866010279e-05, |
| "loss": 0.0484, |
| "step": 7780 |
| }, |
| { |
| "epoch": 8.022657054582904, |
| "grad_norm": 0.2911168932914734, |
| "learning_rate": 8.077040458665725e-05, |
| "loss": 0.0419, |
| "step": 7790 |
| }, |
| { |
| "epoch": 8.032955715756952, |
| "grad_norm": 0.4936712980270386, |
| "learning_rate": 8.071607209299157e-05, |
| "loss": 0.0439, |
| "step": 7800 |
| }, |
| { |
| "epoch": 8.043254376931, |
| "grad_norm": 0.3870621919631958, |
| "learning_rate": 8.066168128226057e-05, |
| "loss": 0.0479, |
| "step": 7810 |
| }, |
| { |
| "epoch": 8.053553038105047, |
| "grad_norm": 0.46932512521743774, |
| "learning_rate": 8.06072322577298e-05, |
| "loss": 0.0475, |
| "step": 7820 |
| }, |
| { |
| "epoch": 8.063851699279093, |
| "grad_norm": 0.3050336241722107, |
| "learning_rate": 8.055272512277537e-05, |
| "loss": 0.0484, |
| "step": 7830 |
| }, |
| { |
| "epoch": 8.07415036045314, |
| "grad_norm": 0.4158398509025574, |
| "learning_rate": 8.049815998088368e-05, |
| "loss": 0.0498, |
| "step": 7840 |
| }, |
| { |
| "epoch": 8.084449021627188, |
| "grad_norm": 0.25001806020736694, |
| "learning_rate": 8.044353693565127e-05, |
| "loss": 0.043, |
| "step": 7850 |
| }, |
| { |
| "epoch": 8.094747682801236, |
| "grad_norm": 0.35990580916404724, |
| "learning_rate": 8.038885609078464e-05, |
| "loss": 0.0402, |
| "step": 7860 |
| }, |
| { |
| "epoch": 8.105046343975284, |
| "grad_norm": 0.32985004782676697, |
| "learning_rate": 8.033411755009999e-05, |
| "loss": 0.0359, |
| "step": 7870 |
| }, |
| { |
| "epoch": 8.115345005149331, |
| "grad_norm": 0.3465685546398163, |
| "learning_rate": 8.027932141752309e-05, |
| "loss": 0.0466, |
| "step": 7880 |
| }, |
| { |
| "epoch": 8.125643666323377, |
| "grad_norm": 0.2139962762594223, |
| "learning_rate": 8.022446779708903e-05, |
| "loss": 0.0391, |
| "step": 7890 |
| }, |
| { |
| "epoch": 8.135942327497425, |
| "grad_norm": 0.3868210017681122, |
| "learning_rate": 8.016955679294206e-05, |
| "loss": 0.0445, |
| "step": 7900 |
| }, |
| { |
| "epoch": 8.146240988671472, |
| "grad_norm": 0.3728034496307373, |
| "learning_rate": 8.011458850933538e-05, |
| "loss": 0.0467, |
| "step": 7910 |
| }, |
| { |
| "epoch": 8.15653964984552, |
| "grad_norm": 0.31305524706840515, |
| "learning_rate": 8.005956305063091e-05, |
| "loss": 0.0422, |
| "step": 7920 |
| }, |
| { |
| "epoch": 8.166838311019568, |
| "grad_norm": 0.4065026044845581, |
| "learning_rate": 8.000448052129917e-05, |
| "loss": 0.0421, |
| "step": 7930 |
| }, |
| { |
| "epoch": 8.177136972193615, |
| "grad_norm": 0.29192665219306946, |
| "learning_rate": 7.9949341025919e-05, |
| "loss": 0.0509, |
| "step": 7940 |
| }, |
| { |
| "epoch": 8.187435633367663, |
| "grad_norm": 0.3657580316066742, |
| "learning_rate": 7.989414466917741e-05, |
| "loss": 0.0437, |
| "step": 7950 |
| }, |
| { |
| "epoch": 8.197734294541709, |
| "grad_norm": 0.36567407846450806, |
| "learning_rate": 7.983889155586935e-05, |
| "loss": 0.0511, |
| "step": 7960 |
| }, |
| { |
| "epoch": 8.208032955715757, |
| "grad_norm": 0.45859163999557495, |
| "learning_rate": 7.978358179089755e-05, |
| "loss": 0.0518, |
| "step": 7970 |
| }, |
| { |
| "epoch": 8.218331616889804, |
| "grad_norm": 0.4686458706855774, |
| "learning_rate": 7.972821547927228e-05, |
| "loss": 0.0512, |
| "step": 7980 |
| }, |
| { |
| "epoch": 8.228630278063852, |
| "grad_norm": 0.530068039894104, |
| "learning_rate": 7.967279272611115e-05, |
| "loss": 0.0504, |
| "step": 7990 |
| }, |
| { |
| "epoch": 8.2389289392379, |
| "grad_norm": 0.7595759034156799, |
| "learning_rate": 7.961731363663898e-05, |
| "loss": 0.0422, |
| "step": 8000 |
| }, |
| { |
| "epoch": 8.249227600411947, |
| "grad_norm": 0.317537397146225, |
| "learning_rate": 7.956177831618752e-05, |
| "loss": 0.0464, |
| "step": 8010 |
| }, |
| { |
| "epoch": 8.259526261585993, |
| "grad_norm": 0.3107547163963318, |
| "learning_rate": 7.950618687019527e-05, |
| "loss": 0.0427, |
| "step": 8020 |
| }, |
| { |
| "epoch": 8.26982492276004, |
| "grad_norm": 0.29626405239105225, |
| "learning_rate": 7.945053940420732e-05, |
| "loss": 0.0374, |
| "step": 8030 |
| }, |
| { |
| "epoch": 8.280123583934088, |
| "grad_norm": 0.30061104893684387, |
| "learning_rate": 7.939483602387508e-05, |
| "loss": 0.0391, |
| "step": 8040 |
| }, |
| { |
| "epoch": 8.290422245108136, |
| "grad_norm": 0.45963454246520996, |
| "learning_rate": 7.933907683495614e-05, |
| "loss": 0.0451, |
| "step": 8050 |
| }, |
| { |
| "epoch": 8.300720906282184, |
| "grad_norm": 0.36147454380989075, |
| "learning_rate": 7.928326194331404e-05, |
| "loss": 0.0428, |
| "step": 8060 |
| }, |
| { |
| "epoch": 8.311019567456231, |
| "grad_norm": 0.3963719606399536, |
| "learning_rate": 7.922739145491809e-05, |
| "loss": 0.0439, |
| "step": 8070 |
| }, |
| { |
| "epoch": 8.321318228630279, |
| "grad_norm": 0.3437096178531647, |
| "learning_rate": 7.917146547584314e-05, |
| "loss": 0.0475, |
| "step": 8080 |
| }, |
| { |
| "epoch": 8.331616889804325, |
| "grad_norm": 0.36308997869491577, |
| "learning_rate": 7.911548411226941e-05, |
| "loss": 0.0476, |
| "step": 8090 |
| }, |
| { |
| "epoch": 8.341915550978372, |
| "grad_norm": 0.37580838799476624, |
| "learning_rate": 7.905944747048225e-05, |
| "loss": 0.0409, |
| "step": 8100 |
| }, |
| { |
| "epoch": 8.35221421215242, |
| "grad_norm": 0.3824920058250427, |
| "learning_rate": 7.900335565687198e-05, |
| "loss": 0.042, |
| "step": 8110 |
| }, |
| { |
| "epoch": 8.362512873326468, |
| "grad_norm": 0.2326115220785141, |
| "learning_rate": 7.894720877793365e-05, |
| "loss": 0.0427, |
| "step": 8120 |
| }, |
| { |
| "epoch": 8.372811534500515, |
| "grad_norm": 0.4421122074127197, |
| "learning_rate": 7.88910069402669e-05, |
| "loss": 0.0482, |
| "step": 8130 |
| }, |
| { |
| "epoch": 8.383110195674563, |
| "grad_norm": 0.3101021647453308, |
| "learning_rate": 7.883475025057563e-05, |
| "loss": 0.043, |
| "step": 8140 |
| }, |
| { |
| "epoch": 8.393408856848609, |
| "grad_norm": 0.32335329055786133, |
| "learning_rate": 7.877843881566795e-05, |
| "loss": 0.0403, |
| "step": 8150 |
| }, |
| { |
| "epoch": 8.403707518022657, |
| "grad_norm": 0.20502477884292603, |
| "learning_rate": 7.872207274245592e-05, |
| "loss": 0.0467, |
| "step": 8160 |
| }, |
| { |
| "epoch": 8.414006179196704, |
| "grad_norm": 0.4081539809703827, |
| "learning_rate": 7.866565213795527e-05, |
| "loss": 0.0451, |
| "step": 8170 |
| }, |
| { |
| "epoch": 8.424304840370752, |
| "grad_norm": 0.24273471534252167, |
| "learning_rate": 7.860917710928532e-05, |
| "loss": 0.0453, |
| "step": 8180 |
| }, |
| { |
| "epoch": 8.4346035015448, |
| "grad_norm": 0.6472697854042053, |
| "learning_rate": 7.855264776366868e-05, |
| "loss": 0.0442, |
| "step": 8190 |
| }, |
| { |
| "epoch": 8.444902162718847, |
| "grad_norm": 0.38166165351867676, |
| "learning_rate": 7.849606420843111e-05, |
| "loss": 0.0446, |
| "step": 8200 |
| }, |
| { |
| "epoch": 8.455200823892893, |
| "grad_norm": 0.2978123128414154, |
| "learning_rate": 7.843942655100129e-05, |
| "loss": 0.0418, |
| "step": 8210 |
| }, |
| { |
| "epoch": 8.46549948506694, |
| "grad_norm": 0.731920063495636, |
| "learning_rate": 7.838273489891059e-05, |
| "loss": 0.0449, |
| "step": 8220 |
| }, |
| { |
| "epoch": 8.475798146240988, |
| "grad_norm": 0.30535033345222473, |
| "learning_rate": 7.832598935979294e-05, |
| "loss": 0.0427, |
| "step": 8230 |
| }, |
| { |
| "epoch": 8.486096807415036, |
| "grad_norm": 0.2867630422115326, |
| "learning_rate": 7.826919004138453e-05, |
| "loss": 0.043, |
| "step": 8240 |
| }, |
| { |
| "epoch": 8.496395468589084, |
| "grad_norm": 0.28375399112701416, |
| "learning_rate": 7.821233705152371e-05, |
| "loss": 0.0359, |
| "step": 8250 |
| }, |
| { |
| "epoch": 8.506694129763131, |
| "grad_norm": 0.272128164768219, |
| "learning_rate": 7.815543049815066e-05, |
| "loss": 0.0386, |
| "step": 8260 |
| }, |
| { |
| "epoch": 8.516992790937179, |
| "grad_norm": 0.28272774815559387, |
| "learning_rate": 7.809847048930734e-05, |
| "loss": 0.0537, |
| "step": 8270 |
| }, |
| { |
| "epoch": 8.527291452111225, |
| "grad_norm": 0.3891587257385254, |
| "learning_rate": 7.80414571331371e-05, |
| "loss": 0.0445, |
| "step": 8280 |
| }, |
| { |
| "epoch": 8.537590113285273, |
| "grad_norm": 0.28232893347740173, |
| "learning_rate": 7.798439053788467e-05, |
| "loss": 0.0431, |
| "step": 8290 |
| }, |
| { |
| "epoch": 8.54788877445932, |
| "grad_norm": 0.3467990756034851, |
| "learning_rate": 7.792727081189579e-05, |
| "loss": 0.0393, |
| "step": 8300 |
| }, |
| { |
| "epoch": 8.558187435633368, |
| "grad_norm": 0.2620207965373993, |
| "learning_rate": 7.78700980636171e-05, |
| "loss": 0.0424, |
| "step": 8310 |
| }, |
| { |
| "epoch": 8.568486096807415, |
| "grad_norm": 0.35534194111824036, |
| "learning_rate": 7.781287240159592e-05, |
| "loss": 0.0478, |
| "step": 8320 |
| }, |
| { |
| "epoch": 8.578784757981463, |
| "grad_norm": 0.3371882140636444, |
| "learning_rate": 7.775559393448002e-05, |
| "loss": 0.0441, |
| "step": 8330 |
| }, |
| { |
| "epoch": 8.589083419155509, |
| "grad_norm": 0.2300422340631485, |
| "learning_rate": 7.76982627710174e-05, |
| "loss": 0.0445, |
| "step": 8340 |
| }, |
| { |
| "epoch": 8.599382080329557, |
| "grad_norm": 0.3695335388183594, |
| "learning_rate": 7.764087902005616e-05, |
| "loss": 0.0461, |
| "step": 8350 |
| }, |
| { |
| "epoch": 8.609680741503604, |
| "grad_norm": 0.35191041231155396, |
| "learning_rate": 7.75834427905442e-05, |
| "loss": 0.0417, |
| "step": 8360 |
| }, |
| { |
| "epoch": 8.619979402677652, |
| "grad_norm": 0.354792982339859, |
| "learning_rate": 7.752595419152905e-05, |
| "loss": 0.0519, |
| "step": 8370 |
| }, |
| { |
| "epoch": 8.6302780638517, |
| "grad_norm": 0.306573748588562, |
| "learning_rate": 7.746841333215772e-05, |
| "loss": 0.0403, |
| "step": 8380 |
| }, |
| { |
| "epoch": 8.640576725025747, |
| "grad_norm": 0.5979099869728088, |
| "learning_rate": 7.741082032167641e-05, |
| "loss": 0.0372, |
| "step": 8390 |
| }, |
| { |
| "epoch": 8.650875386199793, |
| "grad_norm": 0.3066156208515167, |
| "learning_rate": 7.735317526943029e-05, |
| "loss": 0.0452, |
| "step": 8400 |
| }, |
| { |
| "epoch": 8.66117404737384, |
| "grad_norm": 0.5244849324226379, |
| "learning_rate": 7.729547828486339e-05, |
| "loss": 0.0397, |
| "step": 8410 |
| }, |
| { |
| "epoch": 8.671472708547888, |
| "grad_norm": 0.4708474576473236, |
| "learning_rate": 7.723772947751834e-05, |
| "loss": 0.043, |
| "step": 8420 |
| }, |
| { |
| "epoch": 8.681771369721936, |
| "grad_norm": 0.26460787653923035, |
| "learning_rate": 7.717992895703612e-05, |
| "loss": 0.0381, |
| "step": 8430 |
| }, |
| { |
| "epoch": 8.692070030895984, |
| "grad_norm": 0.35859358310699463, |
| "learning_rate": 7.712207683315594e-05, |
| "loss": 0.0394, |
| "step": 8440 |
| }, |
| { |
| "epoch": 8.702368692070031, |
| "grad_norm": 0.6054845452308655, |
| "learning_rate": 7.706417321571496e-05, |
| "loss": 0.0452, |
| "step": 8450 |
| }, |
| { |
| "epoch": 8.712667353244079, |
| "grad_norm": 0.34088876843452454, |
| "learning_rate": 7.700621821464807e-05, |
| "loss": 0.0392, |
| "step": 8460 |
| }, |
| { |
| "epoch": 8.722966014418125, |
| "grad_norm": 0.3852469027042389, |
| "learning_rate": 7.694821193998779e-05, |
| "loss": 0.0334, |
| "step": 8470 |
| }, |
| { |
| "epoch": 8.733264675592173, |
| "grad_norm": 0.5442830324172974, |
| "learning_rate": 7.68901545018639e-05, |
| "loss": 0.0399, |
| "step": 8480 |
| }, |
| { |
| "epoch": 8.74356333676622, |
| "grad_norm": 0.33397114276885986, |
| "learning_rate": 7.68320460105034e-05, |
| "loss": 0.0453, |
| "step": 8490 |
| }, |
| { |
| "epoch": 8.753861997940268, |
| "grad_norm": 0.47606992721557617, |
| "learning_rate": 7.677388657623019e-05, |
| "loss": 0.0435, |
| "step": 8500 |
| }, |
| { |
| "epoch": 8.764160659114316, |
| "grad_norm": 0.2601523697376251, |
| "learning_rate": 7.671567630946488e-05, |
| "loss": 0.0367, |
| "step": 8510 |
| }, |
| { |
| "epoch": 8.774459320288363, |
| "grad_norm": 0.3622424304485321, |
| "learning_rate": 7.665741532072457e-05, |
| "loss": 0.0506, |
| "step": 8520 |
| }, |
| { |
| "epoch": 8.784757981462409, |
| "grad_norm": 0.34801945090293884, |
| "learning_rate": 7.659910372062268e-05, |
| "loss": 0.0404, |
| "step": 8530 |
| }, |
| { |
| "epoch": 8.795056642636457, |
| "grad_norm": 0.8768806457519531, |
| "learning_rate": 7.654074161986877e-05, |
| "loss": 0.0353, |
| "step": 8540 |
| }, |
| { |
| "epoch": 8.805355303810504, |
| "grad_norm": 0.5520436763763428, |
| "learning_rate": 7.648232912926821e-05, |
| "loss": 0.0422, |
| "step": 8550 |
| }, |
| { |
| "epoch": 8.815653964984552, |
| "grad_norm": 0.24350865185260773, |
| "learning_rate": 7.642386635972202e-05, |
| "loss": 0.0447, |
| "step": 8560 |
| }, |
| { |
| "epoch": 8.8259526261586, |
| "grad_norm": 0.3099444806575775, |
| "learning_rate": 7.636535342222679e-05, |
| "loss": 0.0417, |
| "step": 8570 |
| }, |
| { |
| "epoch": 8.836251287332647, |
| "grad_norm": 0.36553552746772766, |
| "learning_rate": 7.630679042787425e-05, |
| "loss": 0.0393, |
| "step": 8580 |
| }, |
| { |
| "epoch": 8.846549948506695, |
| "grad_norm": 0.24580466747283936, |
| "learning_rate": 7.624817748785122e-05, |
| "loss": 0.0415, |
| "step": 8590 |
| }, |
| { |
| "epoch": 8.85684860968074, |
| "grad_norm": 0.30042266845703125, |
| "learning_rate": 7.618951471343931e-05, |
| "loss": 0.0473, |
| "step": 8600 |
| }, |
| { |
| "epoch": 8.867147270854788, |
| "grad_norm": 0.30034321546554565, |
| "learning_rate": 7.613080221601479e-05, |
| "loss": 0.0436, |
| "step": 8610 |
| }, |
| { |
| "epoch": 8.877445932028836, |
| "grad_norm": 0.3541191518306732, |
| "learning_rate": 7.607204010704831e-05, |
| "loss": 0.047, |
| "step": 8620 |
| }, |
| { |
| "epoch": 8.887744593202884, |
| "grad_norm": 0.20642833411693573, |
| "learning_rate": 7.60132284981047e-05, |
| "loss": 0.0438, |
| "step": 8630 |
| }, |
| { |
| "epoch": 8.898043254376931, |
| "grad_norm": 0.44705307483673096, |
| "learning_rate": 7.59543675008428e-05, |
| "loss": 0.0469, |
| "step": 8640 |
| }, |
| { |
| "epoch": 8.908341915550979, |
| "grad_norm": 0.3301944434642792, |
| "learning_rate": 7.589545722701519e-05, |
| "loss": 0.0402, |
| "step": 8650 |
| }, |
| { |
| "epoch": 8.918640576725025, |
| "grad_norm": 0.2832626402378082, |
| "learning_rate": 7.583649778846801e-05, |
| "loss": 0.0389, |
| "step": 8660 |
| }, |
| { |
| "epoch": 8.928939237899073, |
| "grad_norm": 0.22809769213199615, |
| "learning_rate": 7.577748929714074e-05, |
| "loss": 0.0421, |
| "step": 8670 |
| }, |
| { |
| "epoch": 8.93923789907312, |
| "grad_norm": 0.5481482148170471, |
| "learning_rate": 7.571843186506604e-05, |
| "loss": 0.0433, |
| "step": 8680 |
| }, |
| { |
| "epoch": 8.949536560247168, |
| "grad_norm": 0.28562408685684204, |
| "learning_rate": 7.565932560436942e-05, |
| "loss": 0.0424, |
| "step": 8690 |
| }, |
| { |
| "epoch": 8.959835221421216, |
| "grad_norm": 0.44328007102012634, |
| "learning_rate": 7.560017062726912e-05, |
| "loss": 0.0494, |
| "step": 8700 |
| }, |
| { |
| "epoch": 8.970133882595263, |
| "grad_norm": 0.2741154730319977, |
| "learning_rate": 7.554096704607589e-05, |
| "loss": 0.0439, |
| "step": 8710 |
| }, |
| { |
| "epoch": 8.98043254376931, |
| "grad_norm": 0.2939404547214508, |
| "learning_rate": 7.548171497319274e-05, |
| "loss": 0.0397, |
| "step": 8720 |
| }, |
| { |
| "epoch": 8.990731204943357, |
| "grad_norm": 0.2603434920310974, |
| "learning_rate": 7.542241452111476e-05, |
| "loss": 0.0429, |
| "step": 8730 |
| }, |
| { |
| "epoch": 9.001029866117404, |
| "grad_norm": 0.24690425395965576, |
| "learning_rate": 7.536306580242886e-05, |
| "loss": 0.0416, |
| "step": 8740 |
| }, |
| { |
| "epoch": 9.011328527291452, |
| "grad_norm": 0.3746093213558197, |
| "learning_rate": 7.530366892981362e-05, |
| "loss": 0.0436, |
| "step": 8750 |
| }, |
| { |
| "epoch": 9.0216271884655, |
| "grad_norm": 0.4268042743206024, |
| "learning_rate": 7.524422401603905e-05, |
| "loss": 0.0431, |
| "step": 8760 |
| }, |
| { |
| "epoch": 9.031925849639547, |
| "grad_norm": 0.35786107182502747, |
| "learning_rate": 7.518473117396633e-05, |
| "loss": 0.0404, |
| "step": 8770 |
| }, |
| { |
| "epoch": 9.042224510813595, |
| "grad_norm": 0.31200069189071655, |
| "learning_rate": 7.51251905165477e-05, |
| "loss": 0.0406, |
| "step": 8780 |
| }, |
| { |
| "epoch": 9.052523171987641, |
| "grad_norm": 0.33136019110679626, |
| "learning_rate": 7.506560215682608e-05, |
| "loss": 0.0484, |
| "step": 8790 |
| }, |
| { |
| "epoch": 9.062821833161689, |
| "grad_norm": 0.6744624972343445, |
| "learning_rate": 7.500596620793508e-05, |
| "loss": 0.0424, |
| "step": 8800 |
| }, |
| { |
| "epoch": 9.073120494335736, |
| "grad_norm": 0.5903081893920898, |
| "learning_rate": 7.494628278309858e-05, |
| "loss": 0.0497, |
| "step": 8810 |
| }, |
| { |
| "epoch": 9.083419155509784, |
| "grad_norm": 0.4158842861652374, |
| "learning_rate": 7.488655199563062e-05, |
| "loss": 0.0386, |
| "step": 8820 |
| }, |
| { |
| "epoch": 9.093717816683832, |
| "grad_norm": 0.2558102011680603, |
| "learning_rate": 7.482677395893515e-05, |
| "loss": 0.0477, |
| "step": 8830 |
| }, |
| { |
| "epoch": 9.10401647785788, |
| "grad_norm": 0.27110162377357483, |
| "learning_rate": 7.476694878650586e-05, |
| "loss": 0.0392, |
| "step": 8840 |
| }, |
| { |
| "epoch": 9.114315139031925, |
| "grad_norm": 0.3769000768661499, |
| "learning_rate": 7.470707659192588e-05, |
| "loss": 0.0456, |
| "step": 8850 |
| }, |
| { |
| "epoch": 9.124613800205973, |
| "grad_norm": 0.327959269285202, |
| "learning_rate": 7.464715748886766e-05, |
| "loss": 0.0428, |
| "step": 8860 |
| }, |
| { |
| "epoch": 9.13491246138002, |
| "grad_norm": 0.26664164662361145, |
| "learning_rate": 7.458719159109269e-05, |
| "loss": 0.0397, |
| "step": 8870 |
| }, |
| { |
| "epoch": 9.145211122554068, |
| "grad_norm": 0.4833793044090271, |
| "learning_rate": 7.452717901245132e-05, |
| "loss": 0.0406, |
| "step": 8880 |
| }, |
| { |
| "epoch": 9.155509783728116, |
| "grad_norm": 0.5883243680000305, |
| "learning_rate": 7.44671198668825e-05, |
| "loss": 0.0454, |
| "step": 8890 |
| }, |
| { |
| "epoch": 9.165808444902163, |
| "grad_norm": 0.46850427985191345, |
| "learning_rate": 7.440701426841361e-05, |
| "loss": 0.0405, |
| "step": 8900 |
| }, |
| { |
| "epoch": 9.176107106076211, |
| "grad_norm": 0.2926773726940155, |
| "learning_rate": 7.434686233116022e-05, |
| "loss": 0.0485, |
| "step": 8910 |
| }, |
| { |
| "epoch": 9.186405767250257, |
| "grad_norm": 0.46925362944602966, |
| "learning_rate": 7.428666416932589e-05, |
| "loss": 0.0385, |
| "step": 8920 |
| }, |
| { |
| "epoch": 9.196704428424304, |
| "grad_norm": 0.4723007380962372, |
| "learning_rate": 7.422641989720193e-05, |
| "loss": 0.0378, |
| "step": 8930 |
| }, |
| { |
| "epoch": 9.207003089598352, |
| "grad_norm": 0.3344404995441437, |
| "learning_rate": 7.416612962916718e-05, |
| "loss": 0.0417, |
| "step": 8940 |
| }, |
| { |
| "epoch": 9.2173017507724, |
| "grad_norm": 0.43562179803848267, |
| "learning_rate": 7.410579347968782e-05, |
| "loss": 0.0451, |
| "step": 8950 |
| }, |
| { |
| "epoch": 9.227600411946447, |
| "grad_norm": 0.4855707287788391, |
| "learning_rate": 7.404541156331717e-05, |
| "loss": 0.037, |
| "step": 8960 |
| }, |
| { |
| "epoch": 9.237899073120495, |
| "grad_norm": 0.30603402853012085, |
| "learning_rate": 7.398498399469539e-05, |
| "loss": 0.0436, |
| "step": 8970 |
| }, |
| { |
| "epoch": 9.248197734294541, |
| "grad_norm": 0.2129911333322525, |
| "learning_rate": 7.392451088854934e-05, |
| "loss": 0.0522, |
| "step": 8980 |
| }, |
| { |
| "epoch": 9.258496395468589, |
| "grad_norm": 0.361104279756546, |
| "learning_rate": 7.386399235969235e-05, |
| "loss": 0.0476, |
| "step": 8990 |
| }, |
| { |
| "epoch": 9.268795056642636, |
| "grad_norm": 0.2940675616264343, |
| "learning_rate": 7.380342852302395e-05, |
| "loss": 0.041, |
| "step": 9000 |
| }, |
| { |
| "epoch": 9.279093717816684, |
| "grad_norm": 0.18984778225421906, |
| "learning_rate": 7.374281949352973e-05, |
| "loss": 0.0368, |
| "step": 9010 |
| }, |
| { |
| "epoch": 9.289392378990732, |
| "grad_norm": 0.31379929184913635, |
| "learning_rate": 7.368216538628108e-05, |
| "loss": 0.0383, |
| "step": 9020 |
| }, |
| { |
| "epoch": 9.29969104016478, |
| "grad_norm": 0.26660239696502686, |
| "learning_rate": 7.362146631643495e-05, |
| "loss": 0.0414, |
| "step": 9030 |
| }, |
| { |
| "epoch": 9.309989701338825, |
| "grad_norm": 0.5518432259559631, |
| "learning_rate": 7.356072239923366e-05, |
| "loss": 0.0414, |
| "step": 9040 |
| }, |
| { |
| "epoch": 9.320288362512873, |
| "grad_norm": 0.3368736207485199, |
| "learning_rate": 7.349993375000468e-05, |
| "loss": 0.0383, |
| "step": 9050 |
| }, |
| { |
| "epoch": 9.33058702368692, |
| "grad_norm": 1.1266578435897827, |
| "learning_rate": 7.343910048416043e-05, |
| "loss": 0.045, |
| "step": 9060 |
| }, |
| { |
| "epoch": 9.340885684860968, |
| "grad_norm": 0.39378392696380615, |
| "learning_rate": 7.337822271719802e-05, |
| "loss": 0.035, |
| "step": 9070 |
| }, |
| { |
| "epoch": 9.351184346035016, |
| "grad_norm": 0.30862686038017273, |
| "learning_rate": 7.331730056469901e-05, |
| "loss": 0.0405, |
| "step": 9080 |
| }, |
| { |
| "epoch": 9.361483007209063, |
| "grad_norm": 0.3060796856880188, |
| "learning_rate": 7.325633414232933e-05, |
| "loss": 0.0336, |
| "step": 9090 |
| }, |
| { |
| "epoch": 9.371781668383111, |
| "grad_norm": 0.33509689569473267, |
| "learning_rate": 7.319532356583884e-05, |
| "loss": 0.0371, |
| "step": 9100 |
| }, |
| { |
| "epoch": 9.382080329557157, |
| "grad_norm": 0.320268452167511, |
| "learning_rate": 7.313426895106133e-05, |
| "loss": 0.0397, |
| "step": 9110 |
| }, |
| { |
| "epoch": 9.392378990731205, |
| "grad_norm": 0.7082319259643555, |
| "learning_rate": 7.307317041391415e-05, |
| "loss": 0.0515, |
| "step": 9120 |
| }, |
| { |
| "epoch": 9.402677651905252, |
| "grad_norm": 0.29491856694221497, |
| "learning_rate": 7.301202807039801e-05, |
| "loss": 0.033, |
| "step": 9130 |
| }, |
| { |
| "epoch": 9.4129763130793, |
| "grad_norm": 0.5242739319801331, |
| "learning_rate": 7.295084203659689e-05, |
| "loss": 0.0426, |
| "step": 9140 |
| }, |
| { |
| "epoch": 9.423274974253347, |
| "grad_norm": 0.38009077310562134, |
| "learning_rate": 7.288961242867762e-05, |
| "loss": 0.0401, |
| "step": 9150 |
| }, |
| { |
| "epoch": 9.433573635427395, |
| "grad_norm": 0.3632734417915344, |
| "learning_rate": 7.282833936288981e-05, |
| "loss": 0.0409, |
| "step": 9160 |
| }, |
| { |
| "epoch": 9.443872296601441, |
| "grad_norm": 0.23923484981060028, |
| "learning_rate": 7.276702295556557e-05, |
| "loss": 0.0415, |
| "step": 9170 |
| }, |
| { |
| "epoch": 9.454170957775489, |
| "grad_norm": 0.2659699320793152, |
| "learning_rate": 7.27056633231193e-05, |
| "loss": 0.0472, |
| "step": 9180 |
| }, |
| { |
| "epoch": 9.464469618949536, |
| "grad_norm": 0.4790288805961609, |
| "learning_rate": 7.264426058204741e-05, |
| "loss": 0.0423, |
| "step": 9190 |
| }, |
| { |
| "epoch": 9.474768280123584, |
| "grad_norm": 0.6561058163642883, |
| "learning_rate": 7.258281484892829e-05, |
| "loss": 0.0388, |
| "step": 9200 |
| }, |
| { |
| "epoch": 9.485066941297632, |
| "grad_norm": 0.28043946623802185, |
| "learning_rate": 7.252132624042182e-05, |
| "loss": 0.0405, |
| "step": 9210 |
| }, |
| { |
| "epoch": 9.49536560247168, |
| "grad_norm": 0.2707328200340271, |
| "learning_rate": 7.245979487326933e-05, |
| "loss": 0.0387, |
| "step": 9220 |
| }, |
| { |
| "epoch": 9.505664263645727, |
| "grad_norm": 0.32571718096733093, |
| "learning_rate": 7.239822086429335e-05, |
| "loss": 0.0463, |
| "step": 9230 |
| }, |
| { |
| "epoch": 9.515962924819773, |
| "grad_norm": 0.25263553857803345, |
| "learning_rate": 7.233660433039734e-05, |
| "loss": 0.0379, |
| "step": 9240 |
| }, |
| { |
| "epoch": 9.52626158599382, |
| "grad_norm": 0.2631954848766327, |
| "learning_rate": 7.227494538856552e-05, |
| "loss": 0.0424, |
| "step": 9250 |
| }, |
| { |
| "epoch": 9.536560247167868, |
| "grad_norm": 0.40027284622192383, |
| "learning_rate": 7.221324415586261e-05, |
| "loss": 0.0386, |
| "step": 9260 |
| }, |
| { |
| "epoch": 9.546858908341916, |
| "grad_norm": 0.6129460334777832, |
| "learning_rate": 7.215150074943365e-05, |
| "loss": 0.0364, |
| "step": 9270 |
| }, |
| { |
| "epoch": 9.557157569515963, |
| "grad_norm": 0.21345844864845276, |
| "learning_rate": 7.20897152865037e-05, |
| "loss": 0.0417, |
| "step": 9280 |
| }, |
| { |
| "epoch": 9.567456230690011, |
| "grad_norm": 2.892314910888672, |
| "learning_rate": 7.20278878843777e-05, |
| "loss": 0.0385, |
| "step": 9290 |
| }, |
| { |
| "epoch": 9.577754891864057, |
| "grad_norm": 0.3310607969760895, |
| "learning_rate": 7.196601866044023e-05, |
| "loss": 0.0384, |
| "step": 9300 |
| }, |
| { |
| "epoch": 9.588053553038105, |
| "grad_norm": 0.20281566679477692, |
| "learning_rate": 7.190410773215524e-05, |
| "loss": 0.0521, |
| "step": 9310 |
| }, |
| { |
| "epoch": 9.598352214212152, |
| "grad_norm": 0.5886447429656982, |
| "learning_rate": 7.184215521706585e-05, |
| "loss": 0.0434, |
| "step": 9320 |
| }, |
| { |
| "epoch": 9.6086508753862, |
| "grad_norm": 0.3617643713951111, |
| "learning_rate": 7.178016123279421e-05, |
| "loss": 0.0376, |
| "step": 9330 |
| }, |
| { |
| "epoch": 9.618949536560248, |
| "grad_norm": 0.7849340438842773, |
| "learning_rate": 7.17181258970411e-05, |
| "loss": 0.0443, |
| "step": 9340 |
| }, |
| { |
| "epoch": 9.629248197734295, |
| "grad_norm": 0.23935212194919586, |
| "learning_rate": 7.16560493275859e-05, |
| "loss": 0.0358, |
| "step": 9350 |
| }, |
| { |
| "epoch": 9.639546858908343, |
| "grad_norm": 0.2504394054412842, |
| "learning_rate": 7.159393164228622e-05, |
| "loss": 0.0398, |
| "step": 9360 |
| }, |
| { |
| "epoch": 9.649845520082389, |
| "grad_norm": 0.27913835644721985, |
| "learning_rate": 7.153177295907774e-05, |
| "loss": 0.0478, |
| "step": 9370 |
| }, |
| { |
| "epoch": 9.660144181256436, |
| "grad_norm": 0.36944735050201416, |
| "learning_rate": 7.1469573395974e-05, |
| "loss": 0.0447, |
| "step": 9380 |
| }, |
| { |
| "epoch": 9.670442842430484, |
| "grad_norm": 0.3498212695121765, |
| "learning_rate": 7.140733307106615e-05, |
| "loss": 0.045, |
| "step": 9390 |
| }, |
| { |
| "epoch": 9.680741503604532, |
| "grad_norm": 0.5487031936645508, |
| "learning_rate": 7.13450521025227e-05, |
| "loss": 0.0436, |
| "step": 9400 |
| }, |
| { |
| "epoch": 9.69104016477858, |
| "grad_norm": 0.2848561108112335, |
| "learning_rate": 7.128273060858935e-05, |
| "loss": 0.043, |
| "step": 9410 |
| }, |
| { |
| "epoch": 9.701338825952627, |
| "grad_norm": 0.6054211258888245, |
| "learning_rate": 7.122036870758875e-05, |
| "loss": 0.0417, |
| "step": 9420 |
| }, |
| { |
| "epoch": 9.711637487126673, |
| "grad_norm": 0.40893781185150146, |
| "learning_rate": 7.115796651792023e-05, |
| "loss": 0.0449, |
| "step": 9430 |
| }, |
| { |
| "epoch": 9.72193614830072, |
| "grad_norm": 0.252646803855896, |
| "learning_rate": 7.109552415805964e-05, |
| "loss": 0.0411, |
| "step": 9440 |
| }, |
| { |
| "epoch": 9.732234809474768, |
| "grad_norm": 0.24804732203483582, |
| "learning_rate": 7.10330417465591e-05, |
| "loss": 0.0453, |
| "step": 9450 |
| }, |
| { |
| "epoch": 9.742533470648816, |
| "grad_norm": 0.315452516078949, |
| "learning_rate": 7.097051940204677e-05, |
| "loss": 0.0407, |
| "step": 9460 |
| }, |
| { |
| "epoch": 9.752832131822863, |
| "grad_norm": 0.2799887955188751, |
| "learning_rate": 7.090795724322661e-05, |
| "loss": 0.04, |
| "step": 9470 |
| }, |
| { |
| "epoch": 9.763130792996911, |
| "grad_norm": 0.38197386264801025, |
| "learning_rate": 7.084535538887816e-05, |
| "loss": 0.0421, |
| "step": 9480 |
| }, |
| { |
| "epoch": 9.773429454170957, |
| "grad_norm": 0.23834991455078125, |
| "learning_rate": 7.078271395785638e-05, |
| "loss": 0.0337, |
| "step": 9490 |
| }, |
| { |
| "epoch": 9.783728115345005, |
| "grad_norm": 0.3643531799316406, |
| "learning_rate": 7.07200330690913e-05, |
| "loss": 0.0503, |
| "step": 9500 |
| }, |
| { |
| "epoch": 9.794026776519052, |
| "grad_norm": 0.6071295142173767, |
| "learning_rate": 7.06573128415879e-05, |
| "loss": 0.0492, |
| "step": 9510 |
| }, |
| { |
| "epoch": 9.8043254376931, |
| "grad_norm": 0.4726833701133728, |
| "learning_rate": 7.059455339442589e-05, |
| "loss": 0.0437, |
| "step": 9520 |
| }, |
| { |
| "epoch": 9.814624098867148, |
| "grad_norm": 0.43971115350723267, |
| "learning_rate": 7.053175484675935e-05, |
| "loss": 0.0372, |
| "step": 9530 |
| }, |
| { |
| "epoch": 9.824922760041195, |
| "grad_norm": 0.372178316116333, |
| "learning_rate": 7.046891731781667e-05, |
| "loss": 0.0419, |
| "step": 9540 |
| }, |
| { |
| "epoch": 9.835221421215241, |
| "grad_norm": 0.36207082867622375, |
| "learning_rate": 7.04060409269002e-05, |
| "loss": 0.0414, |
| "step": 9550 |
| }, |
| { |
| "epoch": 9.845520082389289, |
| "grad_norm": 0.34154579043388367, |
| "learning_rate": 7.034312579338611e-05, |
| "loss": 0.044, |
| "step": 9560 |
| }, |
| { |
| "epoch": 9.855818743563336, |
| "grad_norm": 0.22793729603290558, |
| "learning_rate": 7.028017203672412e-05, |
| "loss": 0.0351, |
| "step": 9570 |
| }, |
| { |
| "epoch": 9.866117404737384, |
| "grad_norm": 0.24308715760707855, |
| "learning_rate": 7.021717977643726e-05, |
| "loss": 0.0386, |
| "step": 9580 |
| }, |
| { |
| "epoch": 9.876416065911432, |
| "grad_norm": 0.28684303164482117, |
| "learning_rate": 7.015414913212166e-05, |
| "loss": 0.0463, |
| "step": 9590 |
| }, |
| { |
| "epoch": 9.88671472708548, |
| "grad_norm": 0.38573402166366577, |
| "learning_rate": 7.009108022344637e-05, |
| "loss": 0.0395, |
| "step": 9600 |
| }, |
| { |
| "epoch": 9.897013388259527, |
| "grad_norm": 0.5661159157752991, |
| "learning_rate": 7.002797317015302e-05, |
| "loss": 0.0369, |
| "step": 9610 |
| }, |
| { |
| "epoch": 9.907312049433573, |
| "grad_norm": 0.4411841332912445, |
| "learning_rate": 6.996482809205574e-05, |
| "loss": 0.0464, |
| "step": 9620 |
| }, |
| { |
| "epoch": 9.91761071060762, |
| "grad_norm": 0.30994483828544617, |
| "learning_rate": 6.990164510904077e-05, |
| "loss": 0.0437, |
| "step": 9630 |
| }, |
| { |
| "epoch": 9.927909371781668, |
| "grad_norm": 0.24947911500930786, |
| "learning_rate": 6.983842434106637e-05, |
| "loss": 0.0385, |
| "step": 9640 |
| }, |
| { |
| "epoch": 9.938208032955716, |
| "grad_norm": 0.23983316123485565, |
| "learning_rate": 6.977516590816255e-05, |
| "loss": 0.0428, |
| "step": 9650 |
| }, |
| { |
| "epoch": 9.948506694129764, |
| "grad_norm": 0.35743358731269836, |
| "learning_rate": 6.971186993043076e-05, |
| "loss": 0.0391, |
| "step": 9660 |
| }, |
| { |
| "epoch": 9.958805355303811, |
| "grad_norm": 0.23140031099319458, |
| "learning_rate": 6.964853652804382e-05, |
| "loss": 0.0366, |
| "step": 9670 |
| }, |
| { |
| "epoch": 9.969104016477857, |
| "grad_norm": 0.2773256003856659, |
| "learning_rate": 6.958516582124552e-05, |
| "loss": 0.0414, |
| "step": 9680 |
| }, |
| { |
| "epoch": 9.979402677651905, |
| "grad_norm": 0.28996285796165466, |
| "learning_rate": 6.952175793035053e-05, |
| "loss": 0.0406, |
| "step": 9690 |
| }, |
| { |
| "epoch": 9.989701338825952, |
| "grad_norm": 0.4465389549732208, |
| "learning_rate": 6.945831297574414e-05, |
| "loss": 0.0388, |
| "step": 9700 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.44371479749679565, |
| "learning_rate": 6.939483107788192e-05, |
| "loss": 0.0434, |
| "step": 9710 |
| }, |
| { |
| "epoch": 10.010298661174048, |
| "grad_norm": 0.26840487122535706, |
| "learning_rate": 6.933131235728967e-05, |
| "loss": 0.0426, |
| "step": 9720 |
| }, |
| { |
| "epoch": 10.020597322348095, |
| "grad_norm": 0.46961385011672974, |
| "learning_rate": 6.926775693456303e-05, |
| "loss": 0.0457, |
| "step": 9730 |
| }, |
| { |
| "epoch": 10.030895983522143, |
| "grad_norm": 0.2932722866535187, |
| "learning_rate": 6.920416493036737e-05, |
| "loss": 0.0444, |
| "step": 9740 |
| }, |
| { |
| "epoch": 10.041194644696189, |
| "grad_norm": 0.4758000671863556, |
| "learning_rate": 6.91405364654375e-05, |
| "loss": 0.0433, |
| "step": 9750 |
| }, |
| { |
| "epoch": 10.051493305870236, |
| "grad_norm": 0.960474967956543, |
| "learning_rate": 6.907687166057746e-05, |
| "loss": 0.0475, |
| "step": 9760 |
| }, |
| { |
| "epoch": 10.061791967044284, |
| "grad_norm": 0.3127216398715973, |
| "learning_rate": 6.901317063666025e-05, |
| "loss": 0.0428, |
| "step": 9770 |
| }, |
| { |
| "epoch": 10.072090628218332, |
| "grad_norm": 0.3544008433818817, |
| "learning_rate": 6.894943351462769e-05, |
| "loss": 0.0432, |
| "step": 9780 |
| }, |
| { |
| "epoch": 10.08238928939238, |
| "grad_norm": 0.3885089159011841, |
| "learning_rate": 6.888566041549008e-05, |
| "loss": 0.043, |
| "step": 9790 |
| }, |
| { |
| "epoch": 10.092687950566427, |
| "grad_norm": 0.4694571793079376, |
| "learning_rate": 6.882185146032607e-05, |
| "loss": 0.0426, |
| "step": 9800 |
| }, |
| { |
| "epoch": 10.102986611740473, |
| "grad_norm": 0.43533068895339966, |
| "learning_rate": 6.875800677028235e-05, |
| "loss": 0.0425, |
| "step": 9810 |
| }, |
| { |
| "epoch": 10.11328527291452, |
| "grad_norm": 0.2700258493423462, |
| "learning_rate": 6.869412646657352e-05, |
| "loss": 0.0404, |
| "step": 9820 |
| }, |
| { |
| "epoch": 10.123583934088568, |
| "grad_norm": 0.5093254446983337, |
| "learning_rate": 6.863021067048168e-05, |
| "loss": 0.0486, |
| "step": 9830 |
| }, |
| { |
| "epoch": 10.133882595262616, |
| "grad_norm": 0.20484302937984467, |
| "learning_rate": 6.856625950335645e-05, |
| "loss": 0.0425, |
| "step": 9840 |
| }, |
| { |
| "epoch": 10.144181256436664, |
| "grad_norm": 0.2024274617433548, |
| "learning_rate": 6.850227308661448e-05, |
| "loss": 0.0496, |
| "step": 9850 |
| }, |
| { |
| "epoch": 10.154479917610711, |
| "grad_norm": 0.33070307970046997, |
| "learning_rate": 6.843825154173945e-05, |
| "loss": 0.0469, |
| "step": 9860 |
| }, |
| { |
| "epoch": 10.164778578784759, |
| "grad_norm": 0.27297335863113403, |
| "learning_rate": 6.837419499028166e-05, |
| "loss": 0.0433, |
| "step": 9870 |
| }, |
| { |
| "epoch": 10.175077239958805, |
| "grad_norm": 0.3384619951248169, |
| "learning_rate": 6.831010355385791e-05, |
| "loss": 0.0493, |
| "step": 9880 |
| }, |
| { |
| "epoch": 10.185375901132852, |
| "grad_norm": 0.3151882588863373, |
| "learning_rate": 6.824597735415123e-05, |
| "loss": 0.0387, |
| "step": 9890 |
| }, |
| { |
| "epoch": 10.1956745623069, |
| "grad_norm": 0.36280104517936707, |
| "learning_rate": 6.818181651291062e-05, |
| "loss": 0.0366, |
| "step": 9900 |
| }, |
| { |
| "epoch": 10.205973223480948, |
| "grad_norm": 0.3145824670791626, |
| "learning_rate": 6.811762115195089e-05, |
| "loss": 0.0349, |
| "step": 9910 |
| }, |
| { |
| "epoch": 10.216271884654995, |
| "grad_norm": 0.5121240615844727, |
| "learning_rate": 6.805339139315239e-05, |
| "loss": 0.0444, |
| "step": 9920 |
| }, |
| { |
| "epoch": 10.226570545829043, |
| "grad_norm": 0.3912114202976227, |
| "learning_rate": 6.798912735846072e-05, |
| "loss": 0.0355, |
| "step": 9930 |
| }, |
| { |
| "epoch": 10.236869207003089, |
| "grad_norm": 0.3818724751472473, |
| "learning_rate": 6.792482916988661e-05, |
| "loss": 0.0355, |
| "step": 9940 |
| }, |
| { |
| "epoch": 10.247167868177137, |
| "grad_norm": 0.4238334894180298, |
| "learning_rate": 6.786049694950564e-05, |
| "loss": 0.0407, |
| "step": 9950 |
| }, |
| { |
| "epoch": 10.257466529351184, |
| "grad_norm": 0.34777218103408813, |
| "learning_rate": 6.779613081945795e-05, |
| "loss": 0.0414, |
| "step": 9960 |
| }, |
| { |
| "epoch": 10.267765190525232, |
| "grad_norm": 0.21516083180904388, |
| "learning_rate": 6.77317309019481e-05, |
| "loss": 0.0435, |
| "step": 9970 |
| }, |
| { |
| "epoch": 10.27806385169928, |
| "grad_norm": 0.5008741617202759, |
| "learning_rate": 6.766729731924481e-05, |
| "loss": 0.0428, |
| "step": 9980 |
| }, |
| { |
| "epoch": 10.288362512873327, |
| "grad_norm": 0.3099767863750458, |
| "learning_rate": 6.760283019368067e-05, |
| "loss": 0.041, |
| "step": 9990 |
| }, |
| { |
| "epoch": 10.298661174047373, |
| "grad_norm": 0.3440527319908142, |
| "learning_rate": 6.753832964765199e-05, |
| "loss": 0.043, |
| "step": 10000 |
| }, |
| { |
| "epoch": 10.30895983522142, |
| "grad_norm": 0.22595007717609406, |
| "learning_rate": 6.747379580361853e-05, |
| "loss": 0.0416, |
| "step": 10010 |
| }, |
| { |
| "epoch": 10.319258496395468, |
| "grad_norm": 0.41214779019355774, |
| "learning_rate": 6.740922878410324e-05, |
| "loss": 0.0429, |
| "step": 10020 |
| }, |
| { |
| "epoch": 10.329557157569516, |
| "grad_norm": 0.2798968553543091, |
| "learning_rate": 6.73446287116921e-05, |
| "loss": 0.0348, |
| "step": 10030 |
| }, |
| { |
| "epoch": 10.339855818743564, |
| "grad_norm": 0.4669380784034729, |
| "learning_rate": 6.727999570903381e-05, |
| "loss": 0.0401, |
| "step": 10040 |
| }, |
| { |
| "epoch": 10.350154479917611, |
| "grad_norm": 0.6160303950309753, |
| "learning_rate": 6.721532989883958e-05, |
| "loss": 0.0419, |
| "step": 10050 |
| }, |
| { |
| "epoch": 10.360453141091659, |
| "grad_norm": 0.45790204405784607, |
| "learning_rate": 6.715063140388297e-05, |
| "loss": 0.0407, |
| "step": 10060 |
| }, |
| { |
| "epoch": 10.370751802265705, |
| "grad_norm": 0.3356086015701294, |
| "learning_rate": 6.708590034699954e-05, |
| "loss": 0.0371, |
| "step": 10070 |
| }, |
| { |
| "epoch": 10.381050463439752, |
| "grad_norm": 0.2664395272731781, |
| "learning_rate": 6.702113685108668e-05, |
| "loss": 0.042, |
| "step": 10080 |
| }, |
| { |
| "epoch": 10.3913491246138, |
| "grad_norm": 0.452416330575943, |
| "learning_rate": 6.695634103910336e-05, |
| "loss": 0.0343, |
| "step": 10090 |
| }, |
| { |
| "epoch": 10.401647785787848, |
| "grad_norm": 0.25324398279190063, |
| "learning_rate": 6.689151303406995e-05, |
| "loss": 0.0387, |
| "step": 10100 |
| }, |
| { |
| "epoch": 10.411946446961895, |
| "grad_norm": 0.49538522958755493, |
| "learning_rate": 6.682665295906789e-05, |
| "loss": 0.0435, |
| "step": 10110 |
| }, |
| { |
| "epoch": 10.422245108135943, |
| "grad_norm": 0.4202151596546173, |
| "learning_rate": 6.676176093723952e-05, |
| "loss": 0.0379, |
| "step": 10120 |
| }, |
| { |
| "epoch": 10.432543769309989, |
| "grad_norm": 0.37350866198539734, |
| "learning_rate": 6.669683709178788e-05, |
| "loss": 0.0344, |
| "step": 10130 |
| }, |
| { |
| "epoch": 10.442842430484037, |
| "grad_norm": 0.33116838335990906, |
| "learning_rate": 6.663188154597635e-05, |
| "loss": 0.0402, |
| "step": 10140 |
| }, |
| { |
| "epoch": 10.453141091658084, |
| "grad_norm": 0.2758043706417084, |
| "learning_rate": 6.656689442312855e-05, |
| "loss": 0.0369, |
| "step": 10150 |
| }, |
| { |
| "epoch": 10.463439752832132, |
| "grad_norm": 0.7657129168510437, |
| "learning_rate": 6.650187584662804e-05, |
| "loss": 0.0384, |
| "step": 10160 |
| }, |
| { |
| "epoch": 10.47373841400618, |
| "grad_norm": 0.5935283899307251, |
| "learning_rate": 6.643682593991812e-05, |
| "loss": 0.042, |
| "step": 10170 |
| }, |
| { |
| "epoch": 10.484037075180227, |
| "grad_norm": 0.30886420607566833, |
| "learning_rate": 6.637174482650149e-05, |
| "loss": 0.0418, |
| "step": 10180 |
| }, |
| { |
| "epoch": 10.494335736354273, |
| "grad_norm": 0.3029436767101288, |
| "learning_rate": 6.630663262994023e-05, |
| "loss": 0.0347, |
| "step": 10190 |
| }, |
| { |
| "epoch": 10.50463439752832, |
| "grad_norm": 1.5523313283920288, |
| "learning_rate": 6.62414894738553e-05, |
| "loss": 0.0425, |
| "step": 10200 |
| }, |
| { |
| "epoch": 10.514933058702368, |
| "grad_norm": 0.42397624254226685, |
| "learning_rate": 6.617631548192654e-05, |
| "loss": 0.037, |
| "step": 10210 |
| }, |
| { |
| "epoch": 10.525231719876416, |
| "grad_norm": 0.22810333967208862, |
| "learning_rate": 6.611111077789229e-05, |
| "loss": 0.0363, |
| "step": 10220 |
| }, |
| { |
| "epoch": 10.535530381050464, |
| "grad_norm": 2.0810601711273193, |
| "learning_rate": 6.604587548554918e-05, |
| "loss": 0.0354, |
| "step": 10230 |
| }, |
| { |
| "epoch": 10.545829042224511, |
| "grad_norm": 0.3718070089817047, |
| "learning_rate": 6.598060972875197e-05, |
| "loss": 0.045, |
| "step": 10240 |
| }, |
| { |
| "epoch": 10.556127703398559, |
| "grad_norm": 0.4691748321056366, |
| "learning_rate": 6.591531363141322e-05, |
| "loss": 0.0437, |
| "step": 10250 |
| }, |
| { |
| "epoch": 10.566426364572605, |
| "grad_norm": 0.3207489252090454, |
| "learning_rate": 6.58499873175031e-05, |
| "loss": 0.0415, |
| "step": 10260 |
| }, |
| { |
| "epoch": 10.576725025746653, |
| "grad_norm": 0.3843994140625, |
| "learning_rate": 6.578463091104915e-05, |
| "loss": 0.0402, |
| "step": 10270 |
| }, |
| { |
| "epoch": 10.5870236869207, |
| "grad_norm": 0.3183903992176056, |
| "learning_rate": 6.571924453613604e-05, |
| "loss": 0.0408, |
| "step": 10280 |
| }, |
| { |
| "epoch": 10.597322348094748, |
| "grad_norm": 0.38627657294273376, |
| "learning_rate": 6.565382831690536e-05, |
| "loss": 0.0465, |
| "step": 10290 |
| }, |
| { |
| "epoch": 10.607621009268795, |
| "grad_norm": 0.402624249458313, |
| "learning_rate": 6.558838237755533e-05, |
| "loss": 0.0405, |
| "step": 10300 |
| }, |
| { |
| "epoch": 10.617919670442843, |
| "grad_norm": 0.43303969502449036, |
| "learning_rate": 6.55229068423406e-05, |
| "loss": 0.0425, |
| "step": 10310 |
| }, |
| { |
| "epoch": 10.628218331616889, |
| "grad_norm": 0.23877955973148346, |
| "learning_rate": 6.545740183557205e-05, |
| "loss": 0.0398, |
| "step": 10320 |
| }, |
| { |
| "epoch": 10.638516992790937, |
| "grad_norm": 0.39281123876571655, |
| "learning_rate": 6.539186748161647e-05, |
| "loss": 0.0333, |
| "step": 10330 |
| }, |
| { |
| "epoch": 10.648815653964984, |
| "grad_norm": 0.2784782350063324, |
| "learning_rate": 6.53263039048964e-05, |
| "loss": 0.0309, |
| "step": 10340 |
| }, |
| { |
| "epoch": 10.659114315139032, |
| "grad_norm": 0.30646565556526184, |
| "learning_rate": 6.526071122988981e-05, |
| "loss": 0.0377, |
| "step": 10350 |
| }, |
| { |
| "epoch": 10.66941297631308, |
| "grad_norm": 0.6558123230934143, |
| "learning_rate": 6.519508958112998e-05, |
| "loss": 0.038, |
| "step": 10360 |
| }, |
| { |
| "epoch": 10.679711637487127, |
| "grad_norm": 0.30704358220100403, |
| "learning_rate": 6.512943908320514e-05, |
| "loss": 0.0347, |
| "step": 10370 |
| }, |
| { |
| "epoch": 10.690010298661175, |
| "grad_norm": 0.3183159828186035, |
| "learning_rate": 6.506375986075838e-05, |
| "loss": 0.0351, |
| "step": 10380 |
| }, |
| { |
| "epoch": 10.70030895983522, |
| "grad_norm": 0.4113023579120636, |
| "learning_rate": 6.499805203848721e-05, |
| "loss": 0.042, |
| "step": 10390 |
| }, |
| { |
| "epoch": 10.710607621009268, |
| "grad_norm": 0.3853531777858734, |
| "learning_rate": 6.493231574114352e-05, |
| "loss": 0.0379, |
| "step": 10400 |
| }, |
| { |
| "epoch": 10.720906282183316, |
| "grad_norm": 0.2897813618183136, |
| "learning_rate": 6.486655109353326e-05, |
| "loss": 0.0401, |
| "step": 10410 |
| }, |
| { |
| "epoch": 10.731204943357364, |
| "grad_norm": 0.3457646369934082, |
| "learning_rate": 6.480075822051615e-05, |
| "loss": 0.0424, |
| "step": 10420 |
| }, |
| { |
| "epoch": 10.741503604531411, |
| "grad_norm": 0.25340861082077026, |
| "learning_rate": 6.473493724700554e-05, |
| "loss": 0.0452, |
| "step": 10430 |
| }, |
| { |
| "epoch": 10.751802265705459, |
| "grad_norm": 0.34103667736053467, |
| "learning_rate": 6.466908829796817e-05, |
| "loss": 0.0385, |
| "step": 10440 |
| }, |
| { |
| "epoch": 10.762100926879505, |
| "grad_norm": 0.4537379741668701, |
| "learning_rate": 6.460321149842376e-05, |
| "loss": 0.0374, |
| "step": 10450 |
| }, |
| { |
| "epoch": 10.772399588053553, |
| "grad_norm": 0.31676602363586426, |
| "learning_rate": 6.453730697344509e-05, |
| "loss": 0.0424, |
| "step": 10460 |
| }, |
| { |
| "epoch": 10.7826982492276, |
| "grad_norm": 0.2844392955303192, |
| "learning_rate": 6.447137484815742e-05, |
| "loss": 0.0386, |
| "step": 10470 |
| }, |
| { |
| "epoch": 10.792996910401648, |
| "grad_norm": 0.7473909258842468, |
| "learning_rate": 6.440541524773852e-05, |
| "loss": 0.0487, |
| "step": 10480 |
| }, |
| { |
| "epoch": 10.803295571575696, |
| "grad_norm": 0.2831460237503052, |
| "learning_rate": 6.433942829741825e-05, |
| "loss": 0.0422, |
| "step": 10490 |
| }, |
| { |
| "epoch": 10.813594232749743, |
| "grad_norm": 0.17480167746543884, |
| "learning_rate": 6.427341412247844e-05, |
| "loss": 0.0346, |
| "step": 10500 |
| }, |
| { |
| "epoch": 10.82389289392379, |
| "grad_norm": 0.2777688503265381, |
| "learning_rate": 6.420737284825257e-05, |
| "loss": 0.0364, |
| "step": 10510 |
| }, |
| { |
| "epoch": 10.834191555097837, |
| "grad_norm": 0.42611163854599, |
| "learning_rate": 6.414130460012563e-05, |
| "loss": 0.0419, |
| "step": 10520 |
| }, |
| { |
| "epoch": 10.844490216271884, |
| "grad_norm": 0.28869888186454773, |
| "learning_rate": 6.407520950353377e-05, |
| "loss": 0.039, |
| "step": 10530 |
| }, |
| { |
| "epoch": 10.854788877445932, |
| "grad_norm": 0.25100454688072205, |
| "learning_rate": 6.400908768396414e-05, |
| "loss": 0.041, |
| "step": 10540 |
| }, |
| { |
| "epoch": 10.86508753861998, |
| "grad_norm": 0.3695141673088074, |
| "learning_rate": 6.394293926695458e-05, |
| "loss": 0.0358, |
| "step": 10550 |
| }, |
| { |
| "epoch": 10.875386199794027, |
| "grad_norm": 0.3969493508338928, |
| "learning_rate": 6.387676437809352e-05, |
| "loss": 0.0425, |
| "step": 10560 |
| }, |
| { |
| "epoch": 10.885684860968075, |
| "grad_norm": 0.4601006805896759, |
| "learning_rate": 6.381056314301955e-05, |
| "loss": 0.0424, |
| "step": 10570 |
| }, |
| { |
| "epoch": 10.89598352214212, |
| "grad_norm": 0.2443699687719345, |
| "learning_rate": 6.374433568742135e-05, |
| "loss": 0.0356, |
| "step": 10580 |
| }, |
| { |
| "epoch": 10.906282183316168, |
| "grad_norm": 0.3879341781139374, |
| "learning_rate": 6.367808213703735e-05, |
| "loss": 0.0377, |
| "step": 10590 |
| }, |
| { |
| "epoch": 10.916580844490216, |
| "grad_norm": 0.47516438364982605, |
| "learning_rate": 6.361180261765551e-05, |
| "loss": 0.0403, |
| "step": 10600 |
| }, |
| { |
| "epoch": 10.926879505664264, |
| "grad_norm": 0.8033366799354553, |
| "learning_rate": 6.354549725511312e-05, |
| "loss": 0.0379, |
| "step": 10610 |
| }, |
| { |
| "epoch": 10.937178166838311, |
| "grad_norm": 0.31796321272850037, |
| "learning_rate": 6.347916617529655e-05, |
| "loss": 0.0344, |
| "step": 10620 |
| }, |
| { |
| "epoch": 10.947476828012359, |
| "grad_norm": 0.22474992275238037, |
| "learning_rate": 6.341280950414096e-05, |
| "loss": 0.0362, |
| "step": 10630 |
| }, |
| { |
| "epoch": 10.957775489186405, |
| "grad_norm": 0.3195785880088806, |
| "learning_rate": 6.334642736763011e-05, |
| "loss": 0.0433, |
| "step": 10640 |
| }, |
| { |
| "epoch": 10.968074150360453, |
| "grad_norm": 0.2575366199016571, |
| "learning_rate": 6.328001989179613e-05, |
| "loss": 0.0392, |
| "step": 10650 |
| }, |
| { |
| "epoch": 10.9783728115345, |
| "grad_norm": 0.2524723708629608, |
| "learning_rate": 6.321358720271921e-05, |
| "loss": 0.0361, |
| "step": 10660 |
| }, |
| { |
| "epoch": 10.988671472708548, |
| "grad_norm": 0.3837646245956421, |
| "learning_rate": 6.314712942652744e-05, |
| "loss": 0.0323, |
| "step": 10670 |
| }, |
| { |
| "epoch": 10.998970133882596, |
| "grad_norm": 0.28651994466781616, |
| "learning_rate": 6.308064668939656e-05, |
| "loss": 0.0338, |
| "step": 10680 |
| }, |
| { |
| "epoch": 11.009268795056643, |
| "grad_norm": 0.2467758059501648, |
| "learning_rate": 6.301413911754966e-05, |
| "loss": 0.041, |
| "step": 10690 |
| }, |
| { |
| "epoch": 11.019567456230691, |
| "grad_norm": 0.47968339920043945, |
| "learning_rate": 6.294760683725702e-05, |
| "loss": 0.0393, |
| "step": 10700 |
| }, |
| { |
| "epoch": 11.029866117404737, |
| "grad_norm": 0.43202805519104004, |
| "learning_rate": 6.28810499748358e-05, |
| "loss": 0.04, |
| "step": 10710 |
| }, |
| { |
| "epoch": 11.040164778578784, |
| "grad_norm": 0.27403122186660767, |
| "learning_rate": 6.281446865664984e-05, |
| "loss": 0.0445, |
| "step": 10720 |
| }, |
| { |
| "epoch": 11.050463439752832, |
| "grad_norm": 0.28393465280532837, |
| "learning_rate": 6.274786300910942e-05, |
| "loss": 0.0344, |
| "step": 10730 |
| }, |
| { |
| "epoch": 11.06076210092688, |
| "grad_norm": 0.2949467599391937, |
| "learning_rate": 6.2681233158671e-05, |
| "loss": 0.0396, |
| "step": 10740 |
| }, |
| { |
| "epoch": 11.071060762100927, |
| "grad_norm": 0.30573686957359314, |
| "learning_rate": 6.2614579231837e-05, |
| "loss": 0.0389, |
| "step": 10750 |
| }, |
| { |
| "epoch": 11.081359423274975, |
| "grad_norm": 0.32069993019104004, |
| "learning_rate": 6.254790135515554e-05, |
| "loss": 0.0417, |
| "step": 10760 |
| }, |
| { |
| "epoch": 11.091658084449021, |
| "grad_norm": 0.42498165369033813, |
| "learning_rate": 6.248119965522024e-05, |
| "loss": 0.0435, |
| "step": 10770 |
| }, |
| { |
| "epoch": 11.101956745623069, |
| "grad_norm": 0.2858854830265045, |
| "learning_rate": 6.241447425866988e-05, |
| "loss": 0.0437, |
| "step": 10780 |
| }, |
| { |
| "epoch": 11.112255406797116, |
| "grad_norm": 0.38175907731056213, |
| "learning_rate": 6.234772529218833e-05, |
| "loss": 0.0412, |
| "step": 10790 |
| }, |
| { |
| "epoch": 11.122554067971164, |
| "grad_norm": 0.7922874689102173, |
| "learning_rate": 6.228095288250415e-05, |
| "loss": 0.046, |
| "step": 10800 |
| }, |
| { |
| "epoch": 11.132852729145212, |
| "grad_norm": 0.25292420387268066, |
| "learning_rate": 6.22141571563904e-05, |
| "loss": 0.0372, |
| "step": 10810 |
| }, |
| { |
| "epoch": 11.14315139031926, |
| "grad_norm": 0.49811357259750366, |
| "learning_rate": 6.214733824066443e-05, |
| "loss": 0.0402, |
| "step": 10820 |
| }, |
| { |
| "epoch": 11.153450051493305, |
| "grad_norm": 0.33916985988616943, |
| "learning_rate": 6.208049626218761e-05, |
| "loss": 0.0413, |
| "step": 10830 |
| }, |
| { |
| "epoch": 11.163748712667353, |
| "grad_norm": 0.3031541109085083, |
| "learning_rate": 6.20136313478651e-05, |
| "loss": 0.0409, |
| "step": 10840 |
| }, |
| { |
| "epoch": 11.1740473738414, |
| "grad_norm": 0.34187036752700806, |
| "learning_rate": 6.194674362464563e-05, |
| "loss": 0.0387, |
| "step": 10850 |
| }, |
| { |
| "epoch": 11.184346035015448, |
| "grad_norm": 0.41674673557281494, |
| "learning_rate": 6.187983321952117e-05, |
| "loss": 0.0331, |
| "step": 10860 |
| }, |
| { |
| "epoch": 11.194644696189496, |
| "grad_norm": 0.39333340525627136, |
| "learning_rate": 6.181290025952684e-05, |
| "loss": 0.0355, |
| "step": 10870 |
| }, |
| { |
| "epoch": 11.204943357363543, |
| "grad_norm": 0.23343288898468018, |
| "learning_rate": 6.174594487174047e-05, |
| "loss": 0.037, |
| "step": 10880 |
| }, |
| { |
| "epoch": 11.215242018537591, |
| "grad_norm": 0.26009804010391235, |
| "learning_rate": 6.167896718328259e-05, |
| "loss": 0.036, |
| "step": 10890 |
| }, |
| { |
| "epoch": 11.225540679711637, |
| "grad_norm": 0.3702475428581238, |
| "learning_rate": 6.161196732131601e-05, |
| "loss": 0.038, |
| "step": 10900 |
| }, |
| { |
| "epoch": 11.235839340885684, |
| "grad_norm": 0.22221307456493378, |
| "learning_rate": 6.154494541304561e-05, |
| "loss": 0.033, |
| "step": 10910 |
| }, |
| { |
| "epoch": 11.246138002059732, |
| "grad_norm": 0.27125799655914307, |
| "learning_rate": 6.147790158571821e-05, |
| "loss": 0.0362, |
| "step": 10920 |
| }, |
| { |
| "epoch": 11.25643666323378, |
| "grad_norm": 0.32463979721069336, |
| "learning_rate": 6.141083596662218e-05, |
| "loss": 0.0412, |
| "step": 10930 |
| }, |
| { |
| "epoch": 11.266735324407827, |
| "grad_norm": 0.28501760959625244, |
| "learning_rate": 6.134374868308726e-05, |
| "loss": 0.0414, |
| "step": 10940 |
| }, |
| { |
| "epoch": 11.277033985581875, |
| "grad_norm": 0.39163145422935486, |
| "learning_rate": 6.127663986248434e-05, |
| "loss": 0.0323, |
| "step": 10950 |
| }, |
| { |
| "epoch": 11.287332646755921, |
| "grad_norm": 0.3159801959991455, |
| "learning_rate": 6.120950963222523e-05, |
| "loss": 0.0388, |
| "step": 10960 |
| }, |
| { |
| "epoch": 11.297631307929969, |
| "grad_norm": 0.25974059104919434, |
| "learning_rate": 6.114235811976235e-05, |
| "loss": 0.041, |
| "step": 10970 |
| }, |
| { |
| "epoch": 11.307929969104016, |
| "grad_norm": 0.38364288210868835, |
| "learning_rate": 6.107518545258853e-05, |
| "loss": 0.035, |
| "step": 10980 |
| }, |
| { |
| "epoch": 11.318228630278064, |
| "grad_norm": 0.4129471778869629, |
| "learning_rate": 6.100799175823678e-05, |
| "loss": 0.0455, |
| "step": 10990 |
| }, |
| { |
| "epoch": 11.328527291452112, |
| "grad_norm": 0.3018711805343628, |
| "learning_rate": 6.094077716428e-05, |
| "loss": 0.0431, |
| "step": 11000 |
| }, |
| { |
| "epoch": 11.33882595262616, |
| "grad_norm": 0.5106986165046692, |
| "learning_rate": 6.0873541798330814e-05, |
| "loss": 0.037, |
| "step": 11010 |
| }, |
| { |
| "epoch": 11.349124613800207, |
| "grad_norm": 0.3932558298110962, |
| "learning_rate": 6.080628578804125e-05, |
| "loss": 0.0344, |
| "step": 11020 |
| }, |
| { |
| "epoch": 11.359423274974253, |
| "grad_norm": 0.32844093441963196, |
| "learning_rate": 6.073900926110254e-05, |
| "loss": 0.0386, |
| "step": 11030 |
| }, |
| { |
| "epoch": 11.3697219361483, |
| "grad_norm": 0.22941184043884277, |
| "learning_rate": 6.067171234524488e-05, |
| "loss": 0.0295, |
| "step": 11040 |
| }, |
| { |
| "epoch": 11.380020597322348, |
| "grad_norm": 0.23273223638534546, |
| "learning_rate": 6.0604395168237174e-05, |
| "loss": 0.0334, |
| "step": 11050 |
| }, |
| { |
| "epoch": 11.390319258496396, |
| "grad_norm": 0.26568588614463806, |
| "learning_rate": 6.0537057857886755e-05, |
| "loss": 0.0378, |
| "step": 11060 |
| }, |
| { |
| "epoch": 11.400617919670443, |
| "grad_norm": 0.40793830156326294, |
| "learning_rate": 6.0469700542039234e-05, |
| "loss": 0.033, |
| "step": 11070 |
| }, |
| { |
| "epoch": 11.410916580844491, |
| "grad_norm": 0.308687299489975, |
| "learning_rate": 6.040232334857818e-05, |
| "loss": 0.0411, |
| "step": 11080 |
| }, |
| { |
| "epoch": 11.421215242018537, |
| "grad_norm": 0.359393835067749, |
| "learning_rate": 6.033492640542491e-05, |
| "loss": 0.0394, |
| "step": 11090 |
| }, |
| { |
| "epoch": 11.431513903192585, |
| "grad_norm": 0.37227872014045715, |
| "learning_rate": 6.026750984053821e-05, |
| "loss": 0.0394, |
| "step": 11100 |
| }, |
| { |
| "epoch": 11.441812564366632, |
| "grad_norm": 0.5043579339981079, |
| "learning_rate": 6.020007378191416e-05, |
| "loss": 0.0425, |
| "step": 11110 |
| }, |
| { |
| "epoch": 11.45211122554068, |
| "grad_norm": 0.33296504616737366, |
| "learning_rate": 6.013261835758581e-05, |
| "loss": 0.0358, |
| "step": 11120 |
| }, |
| { |
| "epoch": 11.462409886714727, |
| "grad_norm": 0.19659306108951569, |
| "learning_rate": 6.0065143695623016e-05, |
| "loss": 0.0352, |
| "step": 11130 |
| }, |
| { |
| "epoch": 11.472708547888775, |
| "grad_norm": 0.3573089838027954, |
| "learning_rate": 5.9997649924132146e-05, |
| "loss": 0.0417, |
| "step": 11140 |
| }, |
| { |
| "epoch": 11.483007209062821, |
| "grad_norm": 0.38887205719947815, |
| "learning_rate": 5.993013717125583e-05, |
| "loss": 0.0584, |
| "step": 11150 |
| }, |
| { |
| "epoch": 11.493305870236869, |
| "grad_norm": 0.2696877121925354, |
| "learning_rate": 5.986260556517276e-05, |
| "loss": 0.0335, |
| "step": 11160 |
| }, |
| { |
| "epoch": 11.503604531410916, |
| "grad_norm": 0.2923082411289215, |
| "learning_rate": 5.97950552340974e-05, |
| "loss": 0.0345, |
| "step": 11170 |
| }, |
| { |
| "epoch": 11.513903192584964, |
| "grad_norm": 0.4937894940376282, |
| "learning_rate": 5.972748630627978e-05, |
| "loss": 0.0393, |
| "step": 11180 |
| }, |
| { |
| "epoch": 11.524201853759012, |
| "grad_norm": 0.4129011034965515, |
| "learning_rate": 5.965989891000523e-05, |
| "loss": 0.0392, |
| "step": 11190 |
| }, |
| { |
| "epoch": 11.53450051493306, |
| "grad_norm": 1.712835669517517, |
| "learning_rate": 5.9592293173594174e-05, |
| "loss": 0.0431, |
| "step": 11200 |
| }, |
| { |
| "epoch": 11.544799176107105, |
| "grad_norm": 0.6095489859580994, |
| "learning_rate": 5.9524669225401794e-05, |
| "loss": 0.0481, |
| "step": 11210 |
| }, |
| { |
| "epoch": 11.555097837281153, |
| "grad_norm": 0.26651468873023987, |
| "learning_rate": 5.945702719381791e-05, |
| "loss": 0.0399, |
| "step": 11220 |
| }, |
| { |
| "epoch": 11.5653964984552, |
| "grad_norm": 0.7687448859214783, |
| "learning_rate": 5.9389367207266645e-05, |
| "loss": 0.0422, |
| "step": 11230 |
| }, |
| { |
| "epoch": 11.575695159629248, |
| "grad_norm": 0.3595399558544159, |
| "learning_rate": 5.9321689394206215e-05, |
| "loss": 0.0408, |
| "step": 11240 |
| }, |
| { |
| "epoch": 11.585993820803296, |
| "grad_norm": 0.2710624933242798, |
| "learning_rate": 5.9253993883128666e-05, |
| "loss": 0.0396, |
| "step": 11250 |
| }, |
| { |
| "epoch": 11.596292481977343, |
| "grad_norm": 0.22831162810325623, |
| "learning_rate": 5.918628080255969e-05, |
| "loss": 0.038, |
| "step": 11260 |
| }, |
| { |
| "epoch": 11.606591143151391, |
| "grad_norm": 0.43246155977249146, |
| "learning_rate": 5.9118550281058295e-05, |
| "loss": 0.0334, |
| "step": 11270 |
| }, |
| { |
| "epoch": 11.616889804325437, |
| "grad_norm": 0.6237396597862244, |
| "learning_rate": 5.9050802447216604e-05, |
| "loss": 0.0354, |
| "step": 11280 |
| }, |
| { |
| "epoch": 11.627188465499485, |
| "grad_norm": 0.4510927200317383, |
| "learning_rate": 5.898303742965964e-05, |
| "loss": 0.0408, |
| "step": 11290 |
| }, |
| { |
| "epoch": 11.637487126673532, |
| "grad_norm": 0.2852180004119873, |
| "learning_rate": 5.8915255357045006e-05, |
| "loss": 0.0422, |
| "step": 11300 |
| }, |
| { |
| "epoch": 11.64778578784758, |
| "grad_norm": 0.27295804023742676, |
| "learning_rate": 5.884745635806272e-05, |
| "loss": 0.0441, |
| "step": 11310 |
| }, |
| { |
| "epoch": 11.658084449021628, |
| "grad_norm": 0.3172348737716675, |
| "learning_rate": 5.8779640561434943e-05, |
| "loss": 0.0404, |
| "step": 11320 |
| }, |
| { |
| "epoch": 11.668383110195675, |
| "grad_norm": 0.30553555488586426, |
| "learning_rate": 5.87118080959157e-05, |
| "loss": 0.0391, |
| "step": 11330 |
| }, |
| { |
| "epoch": 11.678681771369721, |
| "grad_norm": 0.3673776388168335, |
| "learning_rate": 5.8643959090290653e-05, |
| "loss": 0.0362, |
| "step": 11340 |
| }, |
| { |
| "epoch": 11.688980432543769, |
| "grad_norm": 0.3422437012195587, |
| "learning_rate": 5.857609367337692e-05, |
| "loss": 0.0384, |
| "step": 11350 |
| }, |
| { |
| "epoch": 11.699279093717816, |
| "grad_norm": 0.2885243892669678, |
| "learning_rate": 5.850821197402272e-05, |
| "loss": 0.035, |
| "step": 11360 |
| }, |
| { |
| "epoch": 11.709577754891864, |
| "grad_norm": 0.30880603194236755, |
| "learning_rate": 5.844031412110722e-05, |
| "loss": 0.0395, |
| "step": 11370 |
| }, |
| { |
| "epoch": 11.719876416065912, |
| "grad_norm": 0.24245630204677582, |
| "learning_rate": 5.837240024354026e-05, |
| "loss": 0.0411, |
| "step": 11380 |
| }, |
| { |
| "epoch": 11.73017507723996, |
| "grad_norm": 0.29702940583229065, |
| "learning_rate": 5.830447047026206e-05, |
| "loss": 0.0362, |
| "step": 11390 |
| }, |
| { |
| "epoch": 11.740473738414007, |
| "grad_norm": 0.23466959595680237, |
| "learning_rate": 5.8236524930243075e-05, |
| "loss": 0.0318, |
| "step": 11400 |
| }, |
| { |
| "epoch": 11.750772399588053, |
| "grad_norm": 0.2884041666984558, |
| "learning_rate": 5.816856375248368e-05, |
| "loss": 0.035, |
| "step": 11410 |
| }, |
| { |
| "epoch": 11.7610710607621, |
| "grad_norm": 0.9963494539260864, |
| "learning_rate": 5.810058706601389e-05, |
| "loss": 0.0377, |
| "step": 11420 |
| }, |
| { |
| "epoch": 11.771369721936148, |
| "grad_norm": 0.6087297201156616, |
| "learning_rate": 5.803259499989323e-05, |
| "loss": 0.037, |
| "step": 11430 |
| }, |
| { |
| "epoch": 11.781668383110196, |
| "grad_norm": 0.41283634305000305, |
| "learning_rate": 5.79645876832104e-05, |
| "loss": 0.0373, |
| "step": 11440 |
| }, |
| { |
| "epoch": 11.791967044284243, |
| "grad_norm": 0.3417651355266571, |
| "learning_rate": 5.7896565245083035e-05, |
| "loss": 0.044, |
| "step": 11450 |
| }, |
| { |
| "epoch": 11.802265705458291, |
| "grad_norm": 0.42142239212989807, |
| "learning_rate": 5.782852781465751e-05, |
| "loss": 0.0419, |
| "step": 11460 |
| }, |
| { |
| "epoch": 11.812564366632337, |
| "grad_norm": 0.34466907382011414, |
| "learning_rate": 5.776047552110866e-05, |
| "loss": 0.042, |
| "step": 11470 |
| }, |
| { |
| "epoch": 11.822863027806385, |
| "grad_norm": 0.29393690824508667, |
| "learning_rate": 5.769240849363952e-05, |
| "loss": 0.0435, |
| "step": 11480 |
| }, |
| { |
| "epoch": 11.833161688980432, |
| "grad_norm": 0.45664796233177185, |
| "learning_rate": 5.7624326861481094e-05, |
| "loss": 0.0296, |
| "step": 11490 |
| }, |
| { |
| "epoch": 11.84346035015448, |
| "grad_norm": 0.276324063539505, |
| "learning_rate": 5.755623075389214e-05, |
| "loss": 0.0397, |
| "step": 11500 |
| }, |
| { |
| "epoch": 11.853759011328528, |
| "grad_norm": 0.35282203555107117, |
| "learning_rate": 5.748812030015891e-05, |
| "loss": 0.042, |
| "step": 11510 |
| }, |
| { |
| "epoch": 11.864057672502575, |
| "grad_norm": 0.8150110840797424, |
| "learning_rate": 5.7419995629594835e-05, |
| "loss": 0.0347, |
| "step": 11520 |
| }, |
| { |
| "epoch": 11.874356333676623, |
| "grad_norm": 0.7844158411026001, |
| "learning_rate": 5.735185687154039e-05, |
| "loss": 0.0458, |
| "step": 11530 |
| }, |
| { |
| "epoch": 11.884654994850669, |
| "grad_norm": 0.2974760830402374, |
| "learning_rate": 5.7283704155362796e-05, |
| "loss": 0.036, |
| "step": 11540 |
| }, |
| { |
| "epoch": 11.894953656024716, |
| "grad_norm": 0.25210148096084595, |
| "learning_rate": 5.7215537610455726e-05, |
| "loss": 0.0366, |
| "step": 11550 |
| }, |
| { |
| "epoch": 11.905252317198764, |
| "grad_norm": 0.30548202991485596, |
| "learning_rate": 5.7147357366239174e-05, |
| "loss": 0.0416, |
| "step": 11560 |
| }, |
| { |
| "epoch": 11.915550978372812, |
| "grad_norm": 0.3365536332130432, |
| "learning_rate": 5.70791635521591e-05, |
| "loss": 0.0371, |
| "step": 11570 |
| }, |
| { |
| "epoch": 11.92584963954686, |
| "grad_norm": 0.2530035674571991, |
| "learning_rate": 5.7010956297687215e-05, |
| "loss": 0.0331, |
| "step": 11580 |
| }, |
| { |
| "epoch": 11.936148300720907, |
| "grad_norm": 0.2957620918750763, |
| "learning_rate": 5.694273573232078e-05, |
| "loss": 0.0395, |
| "step": 11590 |
| }, |
| { |
| "epoch": 11.946446961894953, |
| "grad_norm": 0.2622826099395752, |
| "learning_rate": 5.6874501985582365e-05, |
| "loss": 0.0339, |
| "step": 11600 |
| }, |
| { |
| "epoch": 11.956745623069, |
| "grad_norm": 0.25079742074012756, |
| "learning_rate": 5.6806255187019456e-05, |
| "loss": 0.034, |
| "step": 11610 |
| }, |
| { |
| "epoch": 11.967044284243048, |
| "grad_norm": 0.28562131524086, |
| "learning_rate": 5.67379954662044e-05, |
| "loss": 0.0374, |
| "step": 11620 |
| }, |
| { |
| "epoch": 11.977342945417096, |
| "grad_norm": 0.2564544081687927, |
| "learning_rate": 5.666972295273409e-05, |
| "loss": 0.0362, |
| "step": 11630 |
| }, |
| { |
| "epoch": 11.987641606591144, |
| "grad_norm": 0.3891771733760834, |
| "learning_rate": 5.660143777622964e-05, |
| "loss": 0.0396, |
| "step": 11640 |
| }, |
| { |
| "epoch": 11.997940267765191, |
| "grad_norm": 0.27068471908569336, |
| "learning_rate": 5.653314006633625e-05, |
| "loss": 0.0332, |
| "step": 11650 |
| }, |
| { |
| "epoch": 12.008238928939237, |
| "grad_norm": 0.22479818761348724, |
| "learning_rate": 5.6464829952722955e-05, |
| "loss": 0.0401, |
| "step": 11660 |
| }, |
| { |
| "epoch": 12.018537590113285, |
| "grad_norm": 0.2226206511259079, |
| "learning_rate": 5.639650756508222e-05, |
| "loss": 0.0333, |
| "step": 11670 |
| }, |
| { |
| "epoch": 12.028836251287332, |
| "grad_norm": 0.3488923907279968, |
| "learning_rate": 5.6328173033129925e-05, |
| "loss": 0.0425, |
| "step": 11680 |
| }, |
| { |
| "epoch": 12.03913491246138, |
| "grad_norm": 0.21748347580432892, |
| "learning_rate": 5.6259826486604996e-05, |
| "loss": 0.0292, |
| "step": 11690 |
| }, |
| { |
| "epoch": 12.049433573635428, |
| "grad_norm": 0.3232283294200897, |
| "learning_rate": 5.619146805526908e-05, |
| "loss": 0.0368, |
| "step": 11700 |
| }, |
| { |
| "epoch": 12.059732234809475, |
| "grad_norm": 0.38965263962745667, |
| "learning_rate": 5.612309786890649e-05, |
| "loss": 0.041, |
| "step": 11710 |
| }, |
| { |
| "epoch": 12.070030895983523, |
| "grad_norm": 0.6188856959342957, |
| "learning_rate": 5.6054716057323816e-05, |
| "loss": 0.0379, |
| "step": 11720 |
| }, |
| { |
| "epoch": 12.080329557157569, |
| "grad_norm": 0.2538016736507416, |
| "learning_rate": 5.5986322750349716e-05, |
| "loss": 0.0313, |
| "step": 11730 |
| }, |
| { |
| "epoch": 12.090628218331616, |
| "grad_norm": 0.26488491892814636, |
| "learning_rate": 5.591791807783466e-05, |
| "loss": 0.0459, |
| "step": 11740 |
| }, |
| { |
| "epoch": 12.100926879505664, |
| "grad_norm": 0.3006492555141449, |
| "learning_rate": 5.584950216965076e-05, |
| "loss": 0.0324, |
| "step": 11750 |
| }, |
| { |
| "epoch": 12.111225540679712, |
| "grad_norm": 0.3942396342754364, |
| "learning_rate": 5.5781075155691376e-05, |
| "loss": 0.03, |
| "step": 11760 |
| }, |
| { |
| "epoch": 12.12152420185376, |
| "grad_norm": 0.27575209736824036, |
| "learning_rate": 5.571263716587099e-05, |
| "loss": 0.0304, |
| "step": 11770 |
| }, |
| { |
| "epoch": 12.131822863027807, |
| "grad_norm": 0.30952727794647217, |
| "learning_rate": 5.5644188330124944e-05, |
| "loss": 0.0354, |
| "step": 11780 |
| }, |
| { |
| "epoch": 12.142121524201853, |
| "grad_norm": 0.3153734505176544, |
| "learning_rate": 5.557572877840915e-05, |
| "loss": 0.0381, |
| "step": 11790 |
| }, |
| { |
| "epoch": 12.1524201853759, |
| "grad_norm": 0.522873044013977, |
| "learning_rate": 5.5507258640699856e-05, |
| "loss": 0.0402, |
| "step": 11800 |
| }, |
| { |
| "epoch": 12.162718846549948, |
| "grad_norm": 0.294101357460022, |
| "learning_rate": 5.5438778046993424e-05, |
| "loss": 0.0341, |
| "step": 11810 |
| }, |
| { |
| "epoch": 12.173017507723996, |
| "grad_norm": 0.28906193375587463, |
| "learning_rate": 5.537028712730606e-05, |
| "loss": 0.0434, |
| "step": 11820 |
| }, |
| { |
| "epoch": 12.183316168898044, |
| "grad_norm": 0.2495001256465912, |
| "learning_rate": 5.5301786011673586e-05, |
| "loss": 0.0379, |
| "step": 11830 |
| }, |
| { |
| "epoch": 12.193614830072091, |
| "grad_norm": 0.3273758292198181, |
| "learning_rate": 5.5233274830151175e-05, |
| "loss": 0.0387, |
| "step": 11840 |
| }, |
| { |
| "epoch": 12.203913491246137, |
| "grad_norm": 0.27889516949653625, |
| "learning_rate": 5.516475371281309e-05, |
| "loss": 0.0381, |
| "step": 11850 |
| }, |
| { |
| "epoch": 12.214212152420185, |
| "grad_norm": 0.314471960067749, |
| "learning_rate": 5.50962227897525e-05, |
| "loss": 0.0355, |
| "step": 11860 |
| }, |
| { |
| "epoch": 12.224510813594232, |
| "grad_norm": 0.29174938797950745, |
| "learning_rate": 5.502768219108118e-05, |
| "loss": 0.043, |
| "step": 11870 |
| }, |
| { |
| "epoch": 12.23480947476828, |
| "grad_norm": 0.2509532570838928, |
| "learning_rate": 5.495913204692923e-05, |
| "loss": 0.0349, |
| "step": 11880 |
| }, |
| { |
| "epoch": 12.245108135942328, |
| "grad_norm": 0.331087589263916, |
| "learning_rate": 5.489057248744491e-05, |
| "loss": 0.0358, |
| "step": 11890 |
| }, |
| { |
| "epoch": 12.255406797116375, |
| "grad_norm": 0.27980178594589233, |
| "learning_rate": 5.482200364279437e-05, |
| "loss": 0.0363, |
| "step": 11900 |
| }, |
| { |
| "epoch": 12.265705458290423, |
| "grad_norm": 0.24944637715816498, |
| "learning_rate": 5.475342564316137e-05, |
| "loss": 0.0359, |
| "step": 11910 |
| }, |
| { |
| "epoch": 12.276004119464469, |
| "grad_norm": 0.21886838972568512, |
| "learning_rate": 5.468483861874705e-05, |
| "loss": 0.0363, |
| "step": 11920 |
| }, |
| { |
| "epoch": 12.286302780638517, |
| "grad_norm": 1.8008060455322266, |
| "learning_rate": 5.461624269976967e-05, |
| "loss": 0.0328, |
| "step": 11930 |
| }, |
| { |
| "epoch": 12.296601441812564, |
| "grad_norm": 0.19407658278942108, |
| "learning_rate": 5.454763801646443e-05, |
| "loss": 0.0316, |
| "step": 11940 |
| }, |
| { |
| "epoch": 12.306900102986612, |
| "grad_norm": 0.28356480598449707, |
| "learning_rate": 5.44790246990831e-05, |
| "loss": 0.0386, |
| "step": 11950 |
| }, |
| { |
| "epoch": 12.31719876416066, |
| "grad_norm": 0.2647826075553894, |
| "learning_rate": 5.441040287789388e-05, |
| "loss": 0.0383, |
| "step": 11960 |
| }, |
| { |
| "epoch": 12.327497425334707, |
| "grad_norm": 0.30886611342430115, |
| "learning_rate": 5.4341772683181144e-05, |
| "loss": 0.0411, |
| "step": 11970 |
| }, |
| { |
| "epoch": 12.337796086508753, |
| "grad_norm": 0.2655414342880249, |
| "learning_rate": 5.4273134245245095e-05, |
| "loss": 0.0332, |
| "step": 11980 |
| }, |
| { |
| "epoch": 12.3480947476828, |
| "grad_norm": 0.2595519423484802, |
| "learning_rate": 5.420448769440163e-05, |
| "loss": 0.035, |
| "step": 11990 |
| }, |
| { |
| "epoch": 12.358393408856848, |
| "grad_norm": 0.6750108003616333, |
| "learning_rate": 5.413583316098206e-05, |
| "loss": 0.0362, |
| "step": 12000 |
| }, |
| { |
| "epoch": 12.368692070030896, |
| "grad_norm": 0.37275227904319763, |
| "learning_rate": 5.406717077533281e-05, |
| "loss": 0.0392, |
| "step": 12010 |
| }, |
| { |
| "epoch": 12.378990731204944, |
| "grad_norm": 0.4077666699886322, |
| "learning_rate": 5.399850066781526e-05, |
| "loss": 0.0344, |
| "step": 12020 |
| }, |
| { |
| "epoch": 12.389289392378991, |
| "grad_norm": 0.27722200751304626, |
| "learning_rate": 5.392982296880541e-05, |
| "loss": 0.0335, |
| "step": 12030 |
| }, |
| { |
| "epoch": 12.399588053553039, |
| "grad_norm": 0.47021421790122986, |
| "learning_rate": 5.3861137808693695e-05, |
| "loss": 0.034, |
| "step": 12040 |
| }, |
| { |
| "epoch": 12.409886714727085, |
| "grad_norm": 2.5477991104125977, |
| "learning_rate": 5.3792445317884696e-05, |
| "loss": 0.039, |
| "step": 12050 |
| }, |
| { |
| "epoch": 12.420185375901132, |
| "grad_norm": 0.2117021083831787, |
| "learning_rate": 5.372374562679697e-05, |
| "loss": 0.0347, |
| "step": 12060 |
| }, |
| { |
| "epoch": 12.43048403707518, |
| "grad_norm": 0.2672605514526367, |
| "learning_rate": 5.3655038865862664e-05, |
| "loss": 0.0404, |
| "step": 12070 |
| }, |
| { |
| "epoch": 12.440782698249228, |
| "grad_norm": 0.37322327494621277, |
| "learning_rate": 5.358632516552738e-05, |
| "loss": 0.0357, |
| "step": 12080 |
| }, |
| { |
| "epoch": 12.451081359423275, |
| "grad_norm": 0.22624556720256805, |
| "learning_rate": 5.351760465624993e-05, |
| "loss": 0.0368, |
| "step": 12090 |
| }, |
| { |
| "epoch": 12.461380020597323, |
| "grad_norm": 0.2701815366744995, |
| "learning_rate": 5.3448877468502e-05, |
| "loss": 0.0436, |
| "step": 12100 |
| }, |
| { |
| "epoch": 12.471678681771369, |
| "grad_norm": 0.20336881279945374, |
| "learning_rate": 5.3380143732768e-05, |
| "loss": 0.04, |
| "step": 12110 |
| }, |
| { |
| "epoch": 12.481977342945417, |
| "grad_norm": 0.258292555809021, |
| "learning_rate": 5.331140357954473e-05, |
| "loss": 0.0376, |
| "step": 12120 |
| }, |
| { |
| "epoch": 12.492276004119464, |
| "grad_norm": 0.30692732334136963, |
| "learning_rate": 5.324265713934119e-05, |
| "loss": 0.0409, |
| "step": 12130 |
| }, |
| { |
| "epoch": 12.502574665293512, |
| "grad_norm": 0.3250420391559601, |
| "learning_rate": 5.317390454267834e-05, |
| "loss": 0.0359, |
| "step": 12140 |
| }, |
| { |
| "epoch": 12.51287332646756, |
| "grad_norm": 1.7624558210372925, |
| "learning_rate": 5.310514592008882e-05, |
| "loss": 0.0358, |
| "step": 12150 |
| }, |
| { |
| "epoch": 12.523171987641607, |
| "grad_norm": 0.45035508275032043, |
| "learning_rate": 5.303638140211666e-05, |
| "loss": 0.0465, |
| "step": 12160 |
| }, |
| { |
| "epoch": 12.533470648815655, |
| "grad_norm": 0.30285102128982544, |
| "learning_rate": 5.296761111931715e-05, |
| "loss": 0.031, |
| "step": 12170 |
| }, |
| { |
| "epoch": 12.5437693099897, |
| "grad_norm": 0.3329470157623291, |
| "learning_rate": 5.289883520225651e-05, |
| "loss": 0.0367, |
| "step": 12180 |
| }, |
| { |
| "epoch": 12.554067971163748, |
| "grad_norm": 0.2198537141084671, |
| "learning_rate": 5.283005378151162e-05, |
| "loss": 0.044, |
| "step": 12190 |
| }, |
| { |
| "epoch": 12.564366632337796, |
| "grad_norm": 0.7126184105873108, |
| "learning_rate": 5.276126698766985e-05, |
| "loss": 0.0373, |
| "step": 12200 |
| }, |
| { |
| "epoch": 12.574665293511844, |
| "grad_norm": 0.25698626041412354, |
| "learning_rate": 5.269247495132877e-05, |
| "loss": 0.0336, |
| "step": 12210 |
| }, |
| { |
| "epoch": 12.584963954685891, |
| "grad_norm": 0.3503556549549103, |
| "learning_rate": 5.2623677803095864e-05, |
| "loss": 0.0389, |
| "step": 12220 |
| }, |
| { |
| "epoch": 12.595262615859939, |
| "grad_norm": 0.24767592549324036, |
| "learning_rate": 5.2554875673588334e-05, |
| "loss": 0.0337, |
| "step": 12230 |
| }, |
| { |
| "epoch": 12.605561277033985, |
| "grad_norm": 0.21992820501327515, |
| "learning_rate": 5.24860686934329e-05, |
| "loss": 0.0423, |
| "step": 12240 |
| }, |
| { |
| "epoch": 12.615859938208033, |
| "grad_norm": 0.8321926593780518, |
| "learning_rate": 5.2417256993265396e-05, |
| "loss": 0.0327, |
| "step": 12250 |
| }, |
| { |
| "epoch": 12.62615859938208, |
| "grad_norm": 0.7005571722984314, |
| "learning_rate": 5.234844070373069e-05, |
| "loss": 0.0346, |
| "step": 12260 |
| }, |
| { |
| "epoch": 12.636457260556128, |
| "grad_norm": 0.27472203969955444, |
| "learning_rate": 5.227961995548235e-05, |
| "loss": 0.0299, |
| "step": 12270 |
| }, |
| { |
| "epoch": 12.646755921730175, |
| "grad_norm": 0.3291379511356354, |
| "learning_rate": 5.2210794879182376e-05, |
| "loss": 0.0354, |
| "step": 12280 |
| }, |
| { |
| "epoch": 12.657054582904223, |
| "grad_norm": 0.21298809349536896, |
| "learning_rate": 5.214196560550101e-05, |
| "loss": 0.034, |
| "step": 12290 |
| }, |
| { |
| "epoch": 12.667353244078269, |
| "grad_norm": 0.34874945878982544, |
| "learning_rate": 5.2073132265116456e-05, |
| "loss": 0.0326, |
| "step": 12300 |
| }, |
| { |
| "epoch": 12.677651905252317, |
| "grad_norm": 0.3092985153198242, |
| "learning_rate": 5.2004294988714654e-05, |
| "loss": 0.0366, |
| "step": 12310 |
| }, |
| { |
| "epoch": 12.687950566426364, |
| "grad_norm": 0.23273812234401703, |
| "learning_rate": 5.1935453906989e-05, |
| "loss": 0.0344, |
| "step": 12320 |
| }, |
| { |
| "epoch": 12.698249227600412, |
| "grad_norm": 0.3094405233860016, |
| "learning_rate": 5.1866609150640114e-05, |
| "loss": 0.0343, |
| "step": 12330 |
| }, |
| { |
| "epoch": 12.70854788877446, |
| "grad_norm": 0.3472210764884949, |
| "learning_rate": 5.179776085037561e-05, |
| "loss": 0.0348, |
| "step": 12340 |
| }, |
| { |
| "epoch": 12.718846549948507, |
| "grad_norm": 0.39559873938560486, |
| "learning_rate": 5.172890913690981e-05, |
| "loss": 0.035, |
| "step": 12350 |
| }, |
| { |
| "epoch": 12.729145211122553, |
| "grad_norm": 0.34745925664901733, |
| "learning_rate": 5.166005414096353e-05, |
| "loss": 0.0364, |
| "step": 12360 |
| }, |
| { |
| "epoch": 12.7394438722966, |
| "grad_norm": 0.3649449646472931, |
| "learning_rate": 5.159119599326383e-05, |
| "loss": 0.0312, |
| "step": 12370 |
| }, |
| { |
| "epoch": 12.749742533470648, |
| "grad_norm": 0.26928043365478516, |
| "learning_rate": 5.152233482454369e-05, |
| "loss": 0.0303, |
| "step": 12380 |
| }, |
| { |
| "epoch": 12.760041194644696, |
| "grad_norm": 0.278518944978714, |
| "learning_rate": 5.145347076554192e-05, |
| "loss": 0.0391, |
| "step": 12390 |
| }, |
| { |
| "epoch": 12.770339855818744, |
| "grad_norm": 0.366487979888916, |
| "learning_rate": 5.1384603947002775e-05, |
| "loss": 0.0337, |
| "step": 12400 |
| }, |
| { |
| "epoch": 12.780638516992791, |
| "grad_norm": 0.47460800409317017, |
| "learning_rate": 5.131573449967571e-05, |
| "loss": 0.0435, |
| "step": 12410 |
| }, |
| { |
| "epoch": 12.790937178166839, |
| "grad_norm": 0.22989706695079803, |
| "learning_rate": 5.12468625543152e-05, |
| "loss": 0.0371, |
| "step": 12420 |
| }, |
| { |
| "epoch": 12.801235839340885, |
| "grad_norm": 0.3515235185623169, |
| "learning_rate": 5.117798824168052e-05, |
| "loss": 0.0378, |
| "step": 12430 |
| }, |
| { |
| "epoch": 12.811534500514933, |
| "grad_norm": 0.3192023038864136, |
| "learning_rate": 5.1109111692535335e-05, |
| "loss": 0.0335, |
| "step": 12440 |
| }, |
| { |
| "epoch": 12.82183316168898, |
| "grad_norm": 0.22734206914901733, |
| "learning_rate": 5.1040233037647636e-05, |
| "loss": 0.0357, |
| "step": 12450 |
| }, |
| { |
| "epoch": 12.832131822863028, |
| "grad_norm": 0.2719617784023285, |
| "learning_rate": 5.0971352407789396e-05, |
| "loss": 0.0306, |
| "step": 12460 |
| }, |
| { |
| "epoch": 12.842430484037076, |
| "grad_norm": 0.28744617104530334, |
| "learning_rate": 5.0902469933736295e-05, |
| "loss": 0.0351, |
| "step": 12470 |
| }, |
| { |
| "epoch": 12.852729145211123, |
| "grad_norm": 0.273926317691803, |
| "learning_rate": 5.0833585746267556e-05, |
| "loss": 0.032, |
| "step": 12480 |
| }, |
| { |
| "epoch": 12.863027806385169, |
| "grad_norm": 1.2121611833572388, |
| "learning_rate": 5.076469997616568e-05, |
| "loss": 0.035, |
| "step": 12490 |
| }, |
| { |
| "epoch": 12.873326467559217, |
| "grad_norm": 0.22267426550388336, |
| "learning_rate": 5.0695812754216076e-05, |
| "loss": 0.0339, |
| "step": 12500 |
| }, |
| { |
| "epoch": 12.883625128733264, |
| "grad_norm": 0.204677551984787, |
| "learning_rate": 5.0626924211207015e-05, |
| "loss": 0.0371, |
| "step": 12510 |
| }, |
| { |
| "epoch": 12.893923789907312, |
| "grad_norm": 0.25673428177833557, |
| "learning_rate": 5.055803447792924e-05, |
| "loss": 0.0355, |
| "step": 12520 |
| }, |
| { |
| "epoch": 12.90422245108136, |
| "grad_norm": 0.1868886500597, |
| "learning_rate": 5.0489143685175714e-05, |
| "loss": 0.0363, |
| "step": 12530 |
| }, |
| { |
| "epoch": 12.914521112255407, |
| "grad_norm": 0.27115195989608765, |
| "learning_rate": 5.042025196374145e-05, |
| "loss": 0.0303, |
| "step": 12540 |
| }, |
| { |
| "epoch": 12.924819773429455, |
| "grad_norm": 0.2909318804740906, |
| "learning_rate": 5.035135944442324e-05, |
| "loss": 0.0329, |
| "step": 12550 |
| }, |
| { |
| "epoch": 12.9351184346035, |
| "grad_norm": 0.2762157618999481, |
| "learning_rate": 5.028246625801935e-05, |
| "loss": 0.041, |
| "step": 12560 |
| }, |
| { |
| "epoch": 12.945417095777549, |
| "grad_norm": 0.3040686547756195, |
| "learning_rate": 5.0213572535329336e-05, |
| "loss": 0.0353, |
| "step": 12570 |
| }, |
| { |
| "epoch": 12.955715756951596, |
| "grad_norm": 0.1944282352924347, |
| "learning_rate": 5.014467840715378e-05, |
| "loss": 0.0383, |
| "step": 12580 |
| }, |
| { |
| "epoch": 12.966014418125644, |
| "grad_norm": 0.29223209619522095, |
| "learning_rate": 5.007578400429399e-05, |
| "loss": 0.0321, |
| "step": 12590 |
| }, |
| { |
| "epoch": 12.976313079299691, |
| "grad_norm": 0.27292895317077637, |
| "learning_rate": 5.0006889457551864e-05, |
| "loss": 0.0304, |
| "step": 12600 |
| }, |
| { |
| "epoch": 12.98661174047374, |
| "grad_norm": 0.20329241454601288, |
| "learning_rate": 4.9937994897729515e-05, |
| "loss": 0.039, |
| "step": 12610 |
| }, |
| { |
| "epoch": 12.996910401647785, |
| "grad_norm": 0.3806949257850647, |
| "learning_rate": 4.9869100455629105e-05, |
| "loss": 0.0359, |
| "step": 12620 |
| }, |
| { |
| "epoch": 13.007209062821833, |
| "grad_norm": 0.3294031322002411, |
| "learning_rate": 4.9800206262052574e-05, |
| "loss": 0.0339, |
| "step": 12630 |
| }, |
| { |
| "epoch": 13.01750772399588, |
| "grad_norm": 0.2774278223514557, |
| "learning_rate": 4.973131244780138e-05, |
| "loss": 0.0344, |
| "step": 12640 |
| }, |
| { |
| "epoch": 13.027806385169928, |
| "grad_norm": 4.895512104034424, |
| "learning_rate": 4.966241914367627e-05, |
| "loss": 0.0407, |
| "step": 12650 |
| }, |
| { |
| "epoch": 13.038105046343976, |
| "grad_norm": 0.21008244156837463, |
| "learning_rate": 4.9593526480476996e-05, |
| "loss": 0.0342, |
| "step": 12660 |
| }, |
| { |
| "epoch": 13.048403707518023, |
| "grad_norm": 0.1632891595363617, |
| "learning_rate": 4.9524634589002164e-05, |
| "loss": 0.0364, |
| "step": 12670 |
| }, |
| { |
| "epoch": 13.058702368692071, |
| "grad_norm": 0.8435172438621521, |
| "learning_rate": 4.945574360004883e-05, |
| "loss": 0.0402, |
| "step": 12680 |
| }, |
| { |
| "epoch": 13.069001029866117, |
| "grad_norm": 0.16290047764778137, |
| "learning_rate": 4.93868536444124e-05, |
| "loss": 0.0432, |
| "step": 12690 |
| }, |
| { |
| "epoch": 13.079299691040164, |
| "grad_norm": 0.22083142399787903, |
| "learning_rate": 4.9317964852886256e-05, |
| "loss": 0.0427, |
| "step": 12700 |
| }, |
| { |
| "epoch": 13.089598352214212, |
| "grad_norm": 0.24757379293441772, |
| "learning_rate": 4.924907735626164e-05, |
| "loss": 0.0364, |
| "step": 12710 |
| }, |
| { |
| "epoch": 13.09989701338826, |
| "grad_norm": 0.4580037593841553, |
| "learning_rate": 4.918019128532726e-05, |
| "loss": 0.0354, |
| "step": 12720 |
| }, |
| { |
| "epoch": 13.110195674562307, |
| "grad_norm": 0.6730195879936218, |
| "learning_rate": 4.911130677086921e-05, |
| "loss": 0.0408, |
| "step": 12730 |
| }, |
| { |
| "epoch": 13.120494335736355, |
| "grad_norm": 0.27834802865982056, |
| "learning_rate": 4.9042423943670536e-05, |
| "loss": 0.0281, |
| "step": 12740 |
| }, |
| { |
| "epoch": 13.130792996910401, |
| "grad_norm": 0.25800591707229614, |
| "learning_rate": 4.8973542934511145e-05, |
| "loss": 0.0328, |
| "step": 12750 |
| }, |
| { |
| "epoch": 13.141091658084449, |
| "grad_norm": 0.27142634987831116, |
| "learning_rate": 4.8904663874167456e-05, |
| "loss": 0.0322, |
| "step": 12760 |
| }, |
| { |
| "epoch": 13.151390319258496, |
| "grad_norm": 0.16801717877388, |
| "learning_rate": 4.8835786893412215e-05, |
| "loss": 0.0289, |
| "step": 12770 |
| }, |
| { |
| "epoch": 13.161688980432544, |
| "grad_norm": 0.25099271535873413, |
| "learning_rate": 4.8766912123014177e-05, |
| "loss": 0.0337, |
| "step": 12780 |
| }, |
| { |
| "epoch": 13.171987641606592, |
| "grad_norm": 0.26906880736351013, |
| "learning_rate": 4.869803969373796e-05, |
| "loss": 0.0406, |
| "step": 12790 |
| }, |
| { |
| "epoch": 13.18228630278064, |
| "grad_norm": 0.5079290866851807, |
| "learning_rate": 4.862916973634369e-05, |
| "loss": 0.0363, |
| "step": 12800 |
| }, |
| { |
| "epoch": 13.192584963954685, |
| "grad_norm": 0.30340585112571716, |
| "learning_rate": 4.8560302381586834e-05, |
| "loss": 0.0303, |
| "step": 12810 |
| }, |
| { |
| "epoch": 13.202883625128733, |
| "grad_norm": 0.2514529228210449, |
| "learning_rate": 4.849143776021787e-05, |
| "loss": 0.0334, |
| "step": 12820 |
| }, |
| { |
| "epoch": 13.21318228630278, |
| "grad_norm": 0.19497543573379517, |
| "learning_rate": 4.8422576002982146e-05, |
| "loss": 0.0303, |
| "step": 12830 |
| }, |
| { |
| "epoch": 13.223480947476828, |
| "grad_norm": 0.4469880759716034, |
| "learning_rate": 4.8353717240619506e-05, |
| "loss": 0.0398, |
| "step": 12840 |
| }, |
| { |
| "epoch": 13.233779608650876, |
| "grad_norm": 0.3474951684474945, |
| "learning_rate": 4.82848616038642e-05, |
| "loss": 0.0349, |
| "step": 12850 |
| }, |
| { |
| "epoch": 13.244078269824923, |
| "grad_norm": 0.26160264015197754, |
| "learning_rate": 4.821600922344443e-05, |
| "loss": 0.0304, |
| "step": 12860 |
| }, |
| { |
| "epoch": 13.254376930998971, |
| "grad_norm": 0.3258974552154541, |
| "learning_rate": 4.814716023008231e-05, |
| "loss": 0.0311, |
| "step": 12870 |
| }, |
| { |
| "epoch": 13.264675592173017, |
| "grad_norm": 0.3310175836086273, |
| "learning_rate": 4.8078314754493475e-05, |
| "loss": 0.0345, |
| "step": 12880 |
| }, |
| { |
| "epoch": 13.274974253347064, |
| "grad_norm": 0.26981380581855774, |
| "learning_rate": 4.800947292738691e-05, |
| "loss": 0.0341, |
| "step": 12890 |
| }, |
| { |
| "epoch": 13.285272914521112, |
| "grad_norm": 0.1864253133535385, |
| "learning_rate": 4.794063487946463e-05, |
| "loss": 0.0353, |
| "step": 12900 |
| }, |
| { |
| "epoch": 13.29557157569516, |
| "grad_norm": 0.26357418298721313, |
| "learning_rate": 4.7871800741421496e-05, |
| "loss": 0.0314, |
| "step": 12910 |
| }, |
| { |
| "epoch": 13.305870236869207, |
| "grad_norm": 0.30822688341140747, |
| "learning_rate": 4.7802970643945e-05, |
| "loss": 0.0345, |
| "step": 12920 |
| }, |
| { |
| "epoch": 13.316168898043255, |
| "grad_norm": 0.45302730798721313, |
| "learning_rate": 4.773414471771485e-05, |
| "loss": 0.0327, |
| "step": 12930 |
| }, |
| { |
| "epoch": 13.326467559217301, |
| "grad_norm": 0.3582782447338104, |
| "learning_rate": 4.7665323093402955e-05, |
| "loss": 0.0379, |
| "step": 12940 |
| }, |
| { |
| "epoch": 13.336766220391349, |
| "grad_norm": 0.19117462635040283, |
| "learning_rate": 4.759650590167296e-05, |
| "loss": 0.0319, |
| "step": 12950 |
| }, |
| { |
| "epoch": 13.347064881565396, |
| "grad_norm": 0.4505470097064972, |
| "learning_rate": 4.752769327318016e-05, |
| "loss": 0.036, |
| "step": 12960 |
| }, |
| { |
| "epoch": 13.357363542739444, |
| "grad_norm": 0.18662497401237488, |
| "learning_rate": 4.745888533857114e-05, |
| "loss": 0.0379, |
| "step": 12970 |
| }, |
| { |
| "epoch": 13.367662203913492, |
| "grad_norm": 0.6353393793106079, |
| "learning_rate": 4.739008222848362e-05, |
| "loss": 0.0364, |
| "step": 12980 |
| }, |
| { |
| "epoch": 13.37796086508754, |
| "grad_norm": 0.2027062177658081, |
| "learning_rate": 4.732128407354609e-05, |
| "loss": 0.0343, |
| "step": 12990 |
| }, |
| { |
| "epoch": 13.388259526261585, |
| "grad_norm": 0.3728000521659851, |
| "learning_rate": 4.725249100437773e-05, |
| "loss": 0.0394, |
| "step": 13000 |
| }, |
| { |
| "epoch": 13.398558187435633, |
| "grad_norm": 0.3568314015865326, |
| "learning_rate": 4.718370315158796e-05, |
| "loss": 0.041, |
| "step": 13010 |
| }, |
| { |
| "epoch": 13.40885684860968, |
| "grad_norm": 0.2146635800600052, |
| "learning_rate": 4.711492064577639e-05, |
| "loss": 0.0381, |
| "step": 13020 |
| }, |
| { |
| "epoch": 13.419155509783728, |
| "grad_norm": 2.543966770172119, |
| "learning_rate": 4.704614361753239e-05, |
| "loss": 0.0431, |
| "step": 13030 |
| }, |
| { |
| "epoch": 13.429454170957776, |
| "grad_norm": 0.30011627078056335, |
| "learning_rate": 4.6977372197435023e-05, |
| "loss": 0.0348, |
| "step": 13040 |
| }, |
| { |
| "epoch": 13.439752832131823, |
| "grad_norm": 0.36090970039367676, |
| "learning_rate": 4.690860651605263e-05, |
| "loss": 0.0307, |
| "step": 13050 |
| }, |
| { |
| "epoch": 13.450051493305871, |
| "grad_norm": 0.5852978229522705, |
| "learning_rate": 4.683984670394269e-05, |
| "loss": 0.0391, |
| "step": 13060 |
| }, |
| { |
| "epoch": 13.460350154479917, |
| "grad_norm": 0.18308031558990479, |
| "learning_rate": 4.677109289165152e-05, |
| "loss": 0.0301, |
| "step": 13070 |
| }, |
| { |
| "epoch": 13.470648815653965, |
| "grad_norm": 0.2781670391559601, |
| "learning_rate": 4.670234520971408e-05, |
| "loss": 0.0338, |
| "step": 13080 |
| }, |
| { |
| "epoch": 13.480947476828012, |
| "grad_norm": 0.32513490319252014, |
| "learning_rate": 4.6633603788653636e-05, |
| "loss": 0.0429, |
| "step": 13090 |
| }, |
| { |
| "epoch": 13.49124613800206, |
| "grad_norm": 0.2742452025413513, |
| "learning_rate": 4.656486875898164e-05, |
| "loss": 0.0373, |
| "step": 13100 |
| }, |
| { |
| "epoch": 13.501544799176108, |
| "grad_norm": 0.3207267224788666, |
| "learning_rate": 4.649614025119734e-05, |
| "loss": 0.0353, |
| "step": 13110 |
| }, |
| { |
| "epoch": 13.511843460350155, |
| "grad_norm": 0.3440451920032501, |
| "learning_rate": 4.6427418395787655e-05, |
| "loss": 0.0341, |
| "step": 13120 |
| }, |
| { |
| "epoch": 13.522142121524201, |
| "grad_norm": 0.2777320444583893, |
| "learning_rate": 4.635870332322682e-05, |
| "loss": 0.039, |
| "step": 13130 |
| }, |
| { |
| "epoch": 13.532440782698249, |
| "grad_norm": 0.41627272963523865, |
| "learning_rate": 4.628999516397625e-05, |
| "loss": 0.0329, |
| "step": 13140 |
| }, |
| { |
| "epoch": 13.542739443872296, |
| "grad_norm": 0.2555646598339081, |
| "learning_rate": 4.6221294048484174e-05, |
| "loss": 0.04, |
| "step": 13150 |
| }, |
| { |
| "epoch": 13.553038105046344, |
| "grad_norm": 0.2983425557613373, |
| "learning_rate": 4.615260010718553e-05, |
| "loss": 0.0374, |
| "step": 13160 |
| }, |
| { |
| "epoch": 13.563336766220392, |
| "grad_norm": 0.32177847623825073, |
| "learning_rate": 4.608391347050154e-05, |
| "loss": 0.0336, |
| "step": 13170 |
| }, |
| { |
| "epoch": 13.57363542739444, |
| "grad_norm": 0.27015167474746704, |
| "learning_rate": 4.601523426883963e-05, |
| "loss": 0.0285, |
| "step": 13180 |
| }, |
| { |
| "epoch": 13.583934088568487, |
| "grad_norm": 0.2573201358318329, |
| "learning_rate": 4.5946562632593066e-05, |
| "loss": 0.031, |
| "step": 13190 |
| }, |
| { |
| "epoch": 13.594232749742533, |
| "grad_norm": 0.32997414469718933, |
| "learning_rate": 4.587789869214079e-05, |
| "loss": 0.0336, |
| "step": 13200 |
| }, |
| { |
| "epoch": 13.60453141091658, |
| "grad_norm": 0.2865029275417328, |
| "learning_rate": 4.580924257784706e-05, |
| "loss": 0.0327, |
| "step": 13210 |
| }, |
| { |
| "epoch": 13.614830072090628, |
| "grad_norm": 0.2227432280778885, |
| "learning_rate": 4.5740594420061396e-05, |
| "loss": 0.0306, |
| "step": 13220 |
| }, |
| { |
| "epoch": 13.625128733264676, |
| "grad_norm": 0.2854247987270355, |
| "learning_rate": 4.5671954349118094e-05, |
| "loss": 0.0298, |
| "step": 13230 |
| }, |
| { |
| "epoch": 13.635427394438723, |
| "grad_norm": 0.46989086270332336, |
| "learning_rate": 4.560332249533617e-05, |
| "loss": 0.0386, |
| "step": 13240 |
| }, |
| { |
| "epoch": 13.645726055612771, |
| "grad_norm": 0.36545002460479736, |
| "learning_rate": 4.5534698989018984e-05, |
| "loss": 0.0411, |
| "step": 13250 |
| }, |
| { |
| "epoch": 13.656024716786817, |
| "grad_norm": 0.3201320469379425, |
| "learning_rate": 4.5466083960454117e-05, |
| "loss": 0.0367, |
| "step": 13260 |
| }, |
| { |
| "epoch": 13.666323377960865, |
| "grad_norm": 1.3402334451675415, |
| "learning_rate": 4.539747753991297e-05, |
| "loss": 0.0464, |
| "step": 13270 |
| }, |
| { |
| "epoch": 13.676622039134912, |
| "grad_norm": 0.17494896054267883, |
| "learning_rate": 4.5328879857650676e-05, |
| "loss": 0.0368, |
| "step": 13280 |
| }, |
| { |
| "epoch": 13.68692070030896, |
| "grad_norm": 0.27458542585372925, |
| "learning_rate": 4.5260291043905736e-05, |
| "loss": 0.037, |
| "step": 13290 |
| }, |
| { |
| "epoch": 13.697219361483008, |
| "grad_norm": 0.3501793444156647, |
| "learning_rate": 4.519171122889983e-05, |
| "loss": 0.0434, |
| "step": 13300 |
| }, |
| { |
| "epoch": 13.707518022657055, |
| "grad_norm": 0.33499833941459656, |
| "learning_rate": 4.512314054283755e-05, |
| "loss": 0.0377, |
| "step": 13310 |
| }, |
| { |
| "epoch": 13.717816683831103, |
| "grad_norm": 0.30087441205978394, |
| "learning_rate": 4.505457911590613e-05, |
| "loss": 0.0372, |
| "step": 13320 |
| }, |
| { |
| "epoch": 13.728115345005149, |
| "grad_norm": 0.22873124480247498, |
| "learning_rate": 4.498602707827528e-05, |
| "loss": 0.0283, |
| "step": 13330 |
| }, |
| { |
| "epoch": 13.738414006179196, |
| "grad_norm": 0.24429480731487274, |
| "learning_rate": 4.4917484560096804e-05, |
| "loss": 0.0333, |
| "step": 13340 |
| }, |
| { |
| "epoch": 13.748712667353244, |
| "grad_norm": 0.23670226335525513, |
| "learning_rate": 4.4848951691504555e-05, |
| "loss": 0.0331, |
| "step": 13350 |
| }, |
| { |
| "epoch": 13.759011328527292, |
| "grad_norm": 0.24842509627342224, |
| "learning_rate": 4.478042860261391e-05, |
| "loss": 0.037, |
| "step": 13360 |
| }, |
| { |
| "epoch": 13.76930998970134, |
| "grad_norm": 0.24097035825252533, |
| "learning_rate": 4.4711915423521816e-05, |
| "loss": 0.0276, |
| "step": 13370 |
| }, |
| { |
| "epoch": 13.779608650875387, |
| "grad_norm": 0.1889146864414215, |
| "learning_rate": 4.4643412284306324e-05, |
| "loss": 0.0333, |
| "step": 13380 |
| }, |
| { |
| "epoch": 13.789907312049433, |
| "grad_norm": 0.22404436767101288, |
| "learning_rate": 4.457491931502646e-05, |
| "loss": 0.0313, |
| "step": 13390 |
| }, |
| { |
| "epoch": 13.80020597322348, |
| "grad_norm": 0.20593509078025818, |
| "learning_rate": 4.45064366457219e-05, |
| "loss": 0.034, |
| "step": 13400 |
| }, |
| { |
| "epoch": 13.810504634397528, |
| "grad_norm": 0.2181590348482132, |
| "learning_rate": 4.4437964406412844e-05, |
| "loss": 0.0339, |
| "step": 13410 |
| }, |
| { |
| "epoch": 13.820803295571576, |
| "grad_norm": 0.23468917608261108, |
| "learning_rate": 4.436950272709959e-05, |
| "loss": 0.0323, |
| "step": 13420 |
| }, |
| { |
| "epoch": 13.831101956745623, |
| "grad_norm": 0.2851261794567108, |
| "learning_rate": 4.4301051737762466e-05, |
| "loss": 0.0341, |
| "step": 13430 |
| }, |
| { |
| "epoch": 13.841400617919671, |
| "grad_norm": 0.23932397365570068, |
| "learning_rate": 4.423261156836146e-05, |
| "loss": 0.0356, |
| "step": 13440 |
| }, |
| { |
| "epoch": 13.851699279093717, |
| "grad_norm": 0.37307506799697876, |
| "learning_rate": 4.4164182348836056e-05, |
| "loss": 0.0345, |
| "step": 13450 |
| }, |
| { |
| "epoch": 13.861997940267765, |
| "grad_norm": 0.29198941588401794, |
| "learning_rate": 4.409576420910488e-05, |
| "loss": 0.039, |
| "step": 13460 |
| }, |
| { |
| "epoch": 13.872296601441812, |
| "grad_norm": 0.3117142617702484, |
| "learning_rate": 4.402735727906564e-05, |
| "loss": 0.0402, |
| "step": 13470 |
| }, |
| { |
| "epoch": 13.88259526261586, |
| "grad_norm": 0.6684085726737976, |
| "learning_rate": 4.39589616885946e-05, |
| "loss": 0.0387, |
| "step": 13480 |
| }, |
| { |
| "epoch": 13.892893923789908, |
| "grad_norm": 0.48037102818489075, |
| "learning_rate": 4.389057756754665e-05, |
| "loss": 0.0378, |
| "step": 13490 |
| }, |
| { |
| "epoch": 13.903192584963955, |
| "grad_norm": 0.25196754932403564, |
| "learning_rate": 4.3822205045754804e-05, |
| "loss": 0.0422, |
| "step": 13500 |
| }, |
| { |
| "epoch": 13.913491246138001, |
| "grad_norm": 0.22298026084899902, |
| "learning_rate": 4.3753844253030115e-05, |
| "loss": 0.0333, |
| "step": 13510 |
| }, |
| { |
| "epoch": 13.923789907312049, |
| "grad_norm": 1.1777735948562622, |
| "learning_rate": 4.368549531916129e-05, |
| "loss": 0.0426, |
| "step": 13520 |
| }, |
| { |
| "epoch": 13.934088568486096, |
| "grad_norm": 1.4645673036575317, |
| "learning_rate": 4.361715837391465e-05, |
| "loss": 0.0312, |
| "step": 13530 |
| }, |
| { |
| "epoch": 13.944387229660144, |
| "grad_norm": 0.34733325242996216, |
| "learning_rate": 4.3548833547033585e-05, |
| "loss": 0.0314, |
| "step": 13540 |
| }, |
| { |
| "epoch": 13.954685890834192, |
| "grad_norm": 0.31025397777557373, |
| "learning_rate": 4.348052096823864e-05, |
| "loss": 0.0326, |
| "step": 13550 |
| }, |
| { |
| "epoch": 13.96498455200824, |
| "grad_norm": 0.29611408710479736, |
| "learning_rate": 4.3412220767227e-05, |
| "loss": 0.0386, |
| "step": 13560 |
| }, |
| { |
| "epoch": 13.975283213182287, |
| "grad_norm": 0.24702376127243042, |
| "learning_rate": 4.334393307367239e-05, |
| "loss": 0.0329, |
| "step": 13570 |
| }, |
| { |
| "epoch": 13.985581874356333, |
| "grad_norm": 0.24566444754600525, |
| "learning_rate": 4.327565801722477e-05, |
| "loss": 0.0353, |
| "step": 13580 |
| }, |
| { |
| "epoch": 13.99588053553038, |
| "grad_norm": 0.3815259039402008, |
| "learning_rate": 4.3207395727510156e-05, |
| "loss": 0.0337, |
| "step": 13590 |
| }, |
| { |
| "epoch": 14.006179196704428, |
| "grad_norm": 0.30025404691696167, |
| "learning_rate": 4.313914633413023e-05, |
| "loss": 0.027, |
| "step": 13600 |
| }, |
| { |
| "epoch": 14.016477857878476, |
| "grad_norm": 0.25736743211746216, |
| "learning_rate": 4.307090996666231e-05, |
| "loss": 0.0337, |
| "step": 13610 |
| }, |
| { |
| "epoch": 14.026776519052524, |
| "grad_norm": 0.39078590273857117, |
| "learning_rate": 4.300268675465888e-05, |
| "loss": 0.0331, |
| "step": 13620 |
| }, |
| { |
| "epoch": 14.037075180226571, |
| "grad_norm": 0.4594716727733612, |
| "learning_rate": 4.293447682764751e-05, |
| "loss": 0.033, |
| "step": 13630 |
| }, |
| { |
| "epoch": 14.047373841400617, |
| "grad_norm": 2.626016855239868, |
| "learning_rate": 4.286628031513049e-05, |
| "loss": 0.0307, |
| "step": 13640 |
| }, |
| { |
| "epoch": 14.057672502574665, |
| "grad_norm": 0.36267414689064026, |
| "learning_rate": 4.2798097346584745e-05, |
| "loss": 0.0359, |
| "step": 13650 |
| }, |
| { |
| "epoch": 14.067971163748712, |
| "grad_norm": 0.9793422818183899, |
| "learning_rate": 4.272992805146133e-05, |
| "loss": 0.0286, |
| "step": 13660 |
| }, |
| { |
| "epoch": 14.07826982492276, |
| "grad_norm": 0.3450843095779419, |
| "learning_rate": 4.2661772559185506e-05, |
| "loss": 0.0363, |
| "step": 13670 |
| }, |
| { |
| "epoch": 14.088568486096808, |
| "grad_norm": 0.24814942479133606, |
| "learning_rate": 4.2593630999156196e-05, |
| "loss": 0.0307, |
| "step": 13680 |
| }, |
| { |
| "epoch": 14.098867147270855, |
| "grad_norm": 0.34020665287971497, |
| "learning_rate": 4.252550350074597e-05, |
| "loss": 0.0333, |
| "step": 13690 |
| }, |
| { |
| "epoch": 14.109165808444903, |
| "grad_norm": 0.3881569504737854, |
| "learning_rate": 4.24573901933006e-05, |
| "loss": 0.0337, |
| "step": 13700 |
| }, |
| { |
| "epoch": 14.119464469618949, |
| "grad_norm": 0.1805485635995865, |
| "learning_rate": 4.238929120613903e-05, |
| "loss": 0.0377, |
| "step": 13710 |
| }, |
| { |
| "epoch": 14.129763130792997, |
| "grad_norm": 0.39343783259391785, |
| "learning_rate": 4.2321206668552934e-05, |
| "loss": 0.0363, |
| "step": 13720 |
| }, |
| { |
| "epoch": 14.140061791967044, |
| "grad_norm": 0.30404913425445557, |
| "learning_rate": 4.225313670980655e-05, |
| "loss": 0.032, |
| "step": 13730 |
| }, |
| { |
| "epoch": 14.150360453141092, |
| "grad_norm": 0.2011529803276062, |
| "learning_rate": 4.21850814591365e-05, |
| "loss": 0.026, |
| "step": 13740 |
| }, |
| { |
| "epoch": 14.16065911431514, |
| "grad_norm": 0.32636958360671997, |
| "learning_rate": 4.2117041045751416e-05, |
| "loss": 0.0391, |
| "step": 13750 |
| }, |
| { |
| "epoch": 14.170957775489187, |
| "grad_norm": 0.19938711822032928, |
| "learning_rate": 4.204901559883181e-05, |
| "loss": 0.0306, |
| "step": 13760 |
| }, |
| { |
| "epoch": 14.181256436663233, |
| "grad_norm": 0.3431154489517212, |
| "learning_rate": 4.1981005247529716e-05, |
| "loss": 0.0388, |
| "step": 13770 |
| }, |
| { |
| "epoch": 14.19155509783728, |
| "grad_norm": 0.28631865978240967, |
| "learning_rate": 4.191301012096861e-05, |
| "loss": 0.0304, |
| "step": 13780 |
| }, |
| { |
| "epoch": 14.201853759011328, |
| "grad_norm": 0.3022019863128662, |
| "learning_rate": 4.1845030348242945e-05, |
| "loss": 0.0347, |
| "step": 13790 |
| }, |
| { |
| "epoch": 14.212152420185376, |
| "grad_norm": 0.2202078104019165, |
| "learning_rate": 4.177706605841811e-05, |
| "loss": 0.0281, |
| "step": 13800 |
| }, |
| { |
| "epoch": 14.222451081359424, |
| "grad_norm": 0.2681058347225189, |
| "learning_rate": 4.170911738053006e-05, |
| "loss": 0.0368, |
| "step": 13810 |
| }, |
| { |
| "epoch": 14.232749742533471, |
| "grad_norm": 0.23820021748542786, |
| "learning_rate": 4.164118444358512e-05, |
| "loss": 0.0297, |
| "step": 13820 |
| }, |
| { |
| "epoch": 14.243048403707519, |
| "grad_norm": 0.3738692104816437, |
| "learning_rate": 4.1573267376559705e-05, |
| "loss": 0.032, |
| "step": 13830 |
| }, |
| { |
| "epoch": 14.253347064881565, |
| "grad_norm": 0.18206800520420074, |
| "learning_rate": 4.150536630840017e-05, |
| "loss": 0.0351, |
| "step": 13840 |
| }, |
| { |
| "epoch": 14.263645726055612, |
| "grad_norm": 0.3507537245750427, |
| "learning_rate": 4.143748136802238e-05, |
| "loss": 0.0325, |
| "step": 13850 |
| }, |
| { |
| "epoch": 14.27394438722966, |
| "grad_norm": 0.2590770125389099, |
| "learning_rate": 4.13696126843117e-05, |
| "loss": 0.0315, |
| "step": 13860 |
| }, |
| { |
| "epoch": 14.284243048403708, |
| "grad_norm": 0.4932098984718323, |
| "learning_rate": 4.130176038612256e-05, |
| "loss": 0.0367, |
| "step": 13870 |
| }, |
| { |
| "epoch": 14.294541709577755, |
| "grad_norm": 0.22558774054050446, |
| "learning_rate": 4.123392460227829e-05, |
| "loss": 0.03, |
| "step": 13880 |
| }, |
| { |
| "epoch": 14.304840370751803, |
| "grad_norm": 0.3039613962173462, |
| "learning_rate": 4.116610546157086e-05, |
| "loss": 0.0294, |
| "step": 13890 |
| }, |
| { |
| "epoch": 14.315139031925849, |
| "grad_norm": 0.26412516832351685, |
| "learning_rate": 4.1098303092760707e-05, |
| "loss": 0.0278, |
| "step": 13900 |
| }, |
| { |
| "epoch": 14.325437693099897, |
| "grad_norm": 0.24321669340133667, |
| "learning_rate": 4.1030517624576304e-05, |
| "loss": 0.0316, |
| "step": 13910 |
| }, |
| { |
| "epoch": 14.335736354273944, |
| "grad_norm": 0.1724211871623993, |
| "learning_rate": 4.0962749185714156e-05, |
| "loss": 0.0299, |
| "step": 13920 |
| }, |
| { |
| "epoch": 14.346035015447992, |
| "grad_norm": 0.37654179334640503, |
| "learning_rate": 4.0894997904838364e-05, |
| "loss": 0.0376, |
| "step": 13930 |
| }, |
| { |
| "epoch": 14.35633367662204, |
| "grad_norm": 0.24133779108524323, |
| "learning_rate": 4.082726391058048e-05, |
| "loss": 0.0361, |
| "step": 13940 |
| }, |
| { |
| "epoch": 14.366632337796087, |
| "grad_norm": 0.24687901139259338, |
| "learning_rate": 4.075954733153922e-05, |
| "loss": 0.034, |
| "step": 13950 |
| }, |
| { |
| "epoch": 14.376930998970133, |
| "grad_norm": 0.23451952636241913, |
| "learning_rate": 4.069184829628029e-05, |
| "loss": 0.0305, |
| "step": 13960 |
| }, |
| { |
| "epoch": 14.38722966014418, |
| "grad_norm": 0.4871678650379181, |
| "learning_rate": 4.062416693333598e-05, |
| "loss": 0.0333, |
| "step": 13970 |
| }, |
| { |
| "epoch": 14.397528321318228, |
| "grad_norm": 0.30757251381874084, |
| "learning_rate": 4.055650337120514e-05, |
| "loss": 0.0299, |
| "step": 13980 |
| }, |
| { |
| "epoch": 14.407826982492276, |
| "grad_norm": 0.22331511974334717, |
| "learning_rate": 4.0488857738352745e-05, |
| "loss": 0.0289, |
| "step": 13990 |
| }, |
| { |
| "epoch": 14.418125643666324, |
| "grad_norm": 0.24289913475513458, |
| "learning_rate": 4.042123016320979e-05, |
| "loss": 0.0352, |
| "step": 14000 |
| }, |
| { |
| "epoch": 14.428424304840371, |
| "grad_norm": 0.18522806465625763, |
| "learning_rate": 4.035362077417292e-05, |
| "loss": 0.0302, |
| "step": 14010 |
| }, |
| { |
| "epoch": 14.438722966014419, |
| "grad_norm": 0.23417025804519653, |
| "learning_rate": 4.028602969960434e-05, |
| "loss": 0.028, |
| "step": 14020 |
| }, |
| { |
| "epoch": 14.449021627188465, |
| "grad_norm": 0.27148130536079407, |
| "learning_rate": 4.021845706783138e-05, |
| "loss": 0.0303, |
| "step": 14030 |
| }, |
| { |
| "epoch": 14.459320288362512, |
| "grad_norm": 0.36462321877479553, |
| "learning_rate": 4.0150903007146434e-05, |
| "loss": 0.0305, |
| "step": 14040 |
| }, |
| { |
| "epoch": 14.46961894953656, |
| "grad_norm": 0.4786697328090668, |
| "learning_rate": 4.00833676458066e-05, |
| "loss": 0.0349, |
| "step": 14050 |
| }, |
| { |
| "epoch": 14.479917610710608, |
| "grad_norm": 0.4755783677101135, |
| "learning_rate": 4.00158511120335e-05, |
| "loss": 0.0335, |
| "step": 14060 |
| }, |
| { |
| "epoch": 14.490216271884655, |
| "grad_norm": 0.25529634952545166, |
| "learning_rate": 3.994835353401295e-05, |
| "loss": 0.0347, |
| "step": 14070 |
| }, |
| { |
| "epoch": 14.500514933058703, |
| "grad_norm": 0.4141525328159332, |
| "learning_rate": 3.988087503989489e-05, |
| "loss": 0.0328, |
| "step": 14080 |
| }, |
| { |
| "epoch": 14.510813594232749, |
| "grad_norm": 0.30694714188575745, |
| "learning_rate": 3.9813415757792885e-05, |
| "loss": 0.0306, |
| "step": 14090 |
| }, |
| { |
| "epoch": 14.521112255406797, |
| "grad_norm": 0.18365953862667084, |
| "learning_rate": 3.974597581578416e-05, |
| "loss": 0.029, |
| "step": 14100 |
| }, |
| { |
| "epoch": 14.531410916580844, |
| "grad_norm": 0.3182389438152313, |
| "learning_rate": 3.9678555341909125e-05, |
| "loss": 0.0355, |
| "step": 14110 |
| }, |
| { |
| "epoch": 14.541709577754892, |
| "grad_norm": 0.2846277952194214, |
| "learning_rate": 3.9611154464171255e-05, |
| "loss": 0.0349, |
| "step": 14120 |
| }, |
| { |
| "epoch": 14.55200823892894, |
| "grad_norm": 0.2074788361787796, |
| "learning_rate": 3.954377331053686e-05, |
| "loss": 0.036, |
| "step": 14130 |
| }, |
| { |
| "epoch": 14.562306900102987, |
| "grad_norm": 0.3968390226364136, |
| "learning_rate": 3.947641200893473e-05, |
| "loss": 0.0441, |
| "step": 14140 |
| }, |
| { |
| "epoch": 14.572605561277033, |
| "grad_norm": 0.3291980028152466, |
| "learning_rate": 3.940907068725603e-05, |
| "loss": 0.0367, |
| "step": 14150 |
| }, |
| { |
| "epoch": 14.58290422245108, |
| "grad_norm": 0.270810067653656, |
| "learning_rate": 3.934174947335394e-05, |
| "loss": 0.0361, |
| "step": 14160 |
| }, |
| { |
| "epoch": 14.593202883625128, |
| "grad_norm": 0.1831371933221817, |
| "learning_rate": 3.927444849504353e-05, |
| "loss": 0.0313, |
| "step": 14170 |
| }, |
| { |
| "epoch": 14.603501544799176, |
| "grad_norm": 0.30703842639923096, |
| "learning_rate": 3.920716788010137e-05, |
| "loss": 0.0361, |
| "step": 14180 |
| }, |
| { |
| "epoch": 14.613800205973224, |
| "grad_norm": 0.23841963708400726, |
| "learning_rate": 3.913990775626544e-05, |
| "loss": 0.027, |
| "step": 14190 |
| }, |
| { |
| "epoch": 14.624098867147271, |
| "grad_norm": 0.36122044920921326, |
| "learning_rate": 3.907266825123475e-05, |
| "loss": 0.0297, |
| "step": 14200 |
| }, |
| { |
| "epoch": 14.634397528321319, |
| "grad_norm": 0.4103507995605469, |
| "learning_rate": 3.9005449492669224e-05, |
| "loss": 0.0303, |
| "step": 14210 |
| }, |
| { |
| "epoch": 14.644696189495365, |
| "grad_norm": 0.4827253222465515, |
| "learning_rate": 3.893825160818935e-05, |
| "loss": 0.032, |
| "step": 14220 |
| }, |
| { |
| "epoch": 14.654994850669413, |
| "grad_norm": 0.3118212819099426, |
| "learning_rate": 3.887107472537602e-05, |
| "loss": 0.0319, |
| "step": 14230 |
| }, |
| { |
| "epoch": 14.66529351184346, |
| "grad_norm": 0.27819785475730896, |
| "learning_rate": 3.880391897177024e-05, |
| "loss": 0.0305, |
| "step": 14240 |
| }, |
| { |
| "epoch": 14.675592173017508, |
| "grad_norm": 0.24802176654338837, |
| "learning_rate": 3.873678447487289e-05, |
| "loss": 0.0376, |
| "step": 14250 |
| }, |
| { |
| "epoch": 14.685890834191555, |
| "grad_norm": 0.41126886010169983, |
| "learning_rate": 3.8669671362144485e-05, |
| "loss": 0.0435, |
| "step": 14260 |
| }, |
| { |
| "epoch": 14.696189495365603, |
| "grad_norm": 0.3224561810493469, |
| "learning_rate": 3.8602579761005e-05, |
| "loss": 0.034, |
| "step": 14270 |
| }, |
| { |
| "epoch": 14.706488156539649, |
| "grad_norm": 0.263100802898407, |
| "learning_rate": 3.853550979883346e-05, |
| "loss": 0.0287, |
| "step": 14280 |
| }, |
| { |
| "epoch": 14.716786817713697, |
| "grad_norm": 0.2607341706752777, |
| "learning_rate": 3.846846160296794e-05, |
| "loss": 0.0251, |
| "step": 14290 |
| }, |
| { |
| "epoch": 14.727085478887744, |
| "grad_norm": 0.2823236584663391, |
| "learning_rate": 3.840143530070507e-05, |
| "loss": 0.0328, |
| "step": 14300 |
| }, |
| { |
| "epoch": 14.737384140061792, |
| "grad_norm": 0.4001230001449585, |
| "learning_rate": 3.833443101929999e-05, |
| "loss": 0.0317, |
| "step": 14310 |
| }, |
| { |
| "epoch": 14.74768280123584, |
| "grad_norm": 0.29263654351234436, |
| "learning_rate": 3.8267448885965994e-05, |
| "loss": 0.0349, |
| "step": 14320 |
| }, |
| { |
| "epoch": 14.757981462409887, |
| "grad_norm": 0.3369698226451874, |
| "learning_rate": 3.820048902787435e-05, |
| "loss": 0.0409, |
| "step": 14330 |
| }, |
| { |
| "epoch": 14.768280123583935, |
| "grad_norm": 0.42798182368278503, |
| "learning_rate": 3.813355157215398e-05, |
| "loss": 0.0305, |
| "step": 14340 |
| }, |
| { |
| "epoch": 14.77857878475798, |
| "grad_norm": 0.26598209142684937, |
| "learning_rate": 3.806663664589138e-05, |
| "loss": 0.0312, |
| "step": 14350 |
| }, |
| { |
| "epoch": 14.788877445932028, |
| "grad_norm": 0.17724353075027466, |
| "learning_rate": 3.799974437613016e-05, |
| "loss": 0.0309, |
| "step": 14360 |
| }, |
| { |
| "epoch": 14.799176107106076, |
| "grad_norm": 0.24102918803691864, |
| "learning_rate": 3.793287488987098e-05, |
| "loss": 0.0353, |
| "step": 14370 |
| }, |
| { |
| "epoch": 14.809474768280124, |
| "grad_norm": 0.29433581233024597, |
| "learning_rate": 3.786602831407121e-05, |
| "loss": 0.03, |
| "step": 14380 |
| }, |
| { |
| "epoch": 14.819773429454171, |
| "grad_norm": 0.3096264898777008, |
| "learning_rate": 3.779920477564477e-05, |
| "loss": 0.032, |
| "step": 14390 |
| }, |
| { |
| "epoch": 14.830072090628219, |
| "grad_norm": 0.2747699022293091, |
| "learning_rate": 3.7732404401461764e-05, |
| "loss": 0.0329, |
| "step": 14400 |
| }, |
| { |
| "epoch": 14.840370751802265, |
| "grad_norm": 0.21906539797782898, |
| "learning_rate": 3.76656273183484e-05, |
| "loss": 0.0351, |
| "step": 14410 |
| }, |
| { |
| "epoch": 14.850669412976313, |
| "grad_norm": 0.32619962096214294, |
| "learning_rate": 3.759887365308661e-05, |
| "loss": 0.0354, |
| "step": 14420 |
| }, |
| { |
| "epoch": 14.86096807415036, |
| "grad_norm": 0.35620927810668945, |
| "learning_rate": 3.7532143532413886e-05, |
| "loss": 0.0307, |
| "step": 14430 |
| }, |
| { |
| "epoch": 14.871266735324408, |
| "grad_norm": 0.3073720335960388, |
| "learning_rate": 3.746543708302301e-05, |
| "loss": 0.0315, |
| "step": 14440 |
| }, |
| { |
| "epoch": 14.881565396498456, |
| "grad_norm": 0.28775277733802795, |
| "learning_rate": 3.739875443156186e-05, |
| "loss": 0.0286, |
| "step": 14450 |
| }, |
| { |
| "epoch": 14.891864057672503, |
| "grad_norm": 0.29448071122169495, |
| "learning_rate": 3.733209570463304e-05, |
| "loss": 0.0361, |
| "step": 14460 |
| }, |
| { |
| "epoch": 14.90216271884655, |
| "grad_norm": 0.2410167008638382, |
| "learning_rate": 3.726546102879386e-05, |
| "loss": 0.0279, |
| "step": 14470 |
| }, |
| { |
| "epoch": 14.912461380020597, |
| "grad_norm": 0.21059367060661316, |
| "learning_rate": 3.719885053055584e-05, |
| "loss": 0.0336, |
| "step": 14480 |
| }, |
| { |
| "epoch": 14.922760041194644, |
| "grad_norm": 1.691361665725708, |
| "learning_rate": 3.713226433638469e-05, |
| "loss": 0.0357, |
| "step": 14490 |
| }, |
| { |
| "epoch": 14.933058702368692, |
| "grad_norm": 0.2861790955066681, |
| "learning_rate": 3.706570257269991e-05, |
| "loss": 0.0308, |
| "step": 14500 |
| }, |
| { |
| "epoch": 14.94335736354274, |
| "grad_norm": 0.23796629905700684, |
| "learning_rate": 3.6999165365874696e-05, |
| "loss": 0.0294, |
| "step": 14510 |
| }, |
| { |
| "epoch": 14.953656024716787, |
| "grad_norm": 0.29010501503944397, |
| "learning_rate": 3.693265284223554e-05, |
| "loss": 0.0315, |
| "step": 14520 |
| }, |
| { |
| "epoch": 14.963954685890835, |
| "grad_norm": 0.4312698245048523, |
| "learning_rate": 3.686616512806208e-05, |
| "loss": 0.0318, |
| "step": 14530 |
| }, |
| { |
| "epoch": 14.97425334706488, |
| "grad_norm": 0.1826692372560501, |
| "learning_rate": 3.6799702349586914e-05, |
| "loss": 0.0299, |
| "step": 14540 |
| }, |
| { |
| "epoch": 14.984552008238929, |
| "grad_norm": 0.3241519033908844, |
| "learning_rate": 3.6733264632995254e-05, |
| "loss": 0.0308, |
| "step": 14550 |
| }, |
| { |
| "epoch": 14.994850669412976, |
| "grad_norm": 0.23148488998413086, |
| "learning_rate": 3.666685210442472e-05, |
| "loss": 0.0312, |
| "step": 14560 |
| }, |
| { |
| "epoch": 15.005149330587024, |
| "grad_norm": 0.20060890913009644, |
| "learning_rate": 3.660046488996513e-05, |
| "loss": 0.039, |
| "step": 14570 |
| }, |
| { |
| "epoch": 15.015447991761071, |
| "grad_norm": 0.25309333205223083, |
| "learning_rate": 3.6534103115658244e-05, |
| "loss": 0.0331, |
| "step": 14580 |
| }, |
| { |
| "epoch": 15.02574665293512, |
| "grad_norm": 0.32379934191703796, |
| "learning_rate": 3.646776690749748e-05, |
| "loss": 0.0319, |
| "step": 14590 |
| }, |
| { |
| "epoch": 15.036045314109165, |
| "grad_norm": 0.2242245078086853, |
| "learning_rate": 3.640145639142779e-05, |
| "loss": 0.0293, |
| "step": 14600 |
| }, |
| { |
| "epoch": 15.046343975283213, |
| "grad_norm": 0.37435922026634216, |
| "learning_rate": 3.6335171693345295e-05, |
| "loss": 0.0317, |
| "step": 14610 |
| }, |
| { |
| "epoch": 15.05664263645726, |
| "grad_norm": 0.4914971888065338, |
| "learning_rate": 3.62689129390971e-05, |
| "loss": 0.0297, |
| "step": 14620 |
| }, |
| { |
| "epoch": 15.066941297631308, |
| "grad_norm": 0.2628423571586609, |
| "learning_rate": 3.620268025448107e-05, |
| "loss": 0.0295, |
| "step": 14630 |
| }, |
| { |
| "epoch": 15.077239958805356, |
| "grad_norm": 0.22137194871902466, |
| "learning_rate": 3.6136473765245575e-05, |
| "loss": 0.0382, |
| "step": 14640 |
| }, |
| { |
| "epoch": 15.087538619979403, |
| "grad_norm": 0.33135750889778137, |
| "learning_rate": 3.6070293597089225e-05, |
| "loss": 0.0346, |
| "step": 14650 |
| }, |
| { |
| "epoch": 15.097837281153451, |
| "grad_norm": 0.2422097623348236, |
| "learning_rate": 3.60041398756607e-05, |
| "loss": 0.028, |
| "step": 14660 |
| }, |
| { |
| "epoch": 15.108135942327497, |
| "grad_norm": 0.25506383180618286, |
| "learning_rate": 3.593801272655842e-05, |
| "loss": 0.0292, |
| "step": 14670 |
| }, |
| { |
| "epoch": 15.118434603501544, |
| "grad_norm": 0.20777393877506256, |
| "learning_rate": 3.5871912275330415e-05, |
| "loss": 0.0256, |
| "step": 14680 |
| }, |
| { |
| "epoch": 15.128733264675592, |
| "grad_norm": 0.3608769476413727, |
| "learning_rate": 3.5805838647473956e-05, |
| "loss": 0.034, |
| "step": 14690 |
| }, |
| { |
| "epoch": 15.13903192584964, |
| "grad_norm": 0.24145326018333435, |
| "learning_rate": 3.573979196843546e-05, |
| "loss": 0.0293, |
| "step": 14700 |
| }, |
| { |
| "epoch": 15.149330587023687, |
| "grad_norm": 0.2989254891872406, |
| "learning_rate": 3.567377236361008e-05, |
| "loss": 0.0281, |
| "step": 14710 |
| }, |
| { |
| "epoch": 15.159629248197735, |
| "grad_norm": 0.19318531453609467, |
| "learning_rate": 3.560777995834171e-05, |
| "loss": 0.0426, |
| "step": 14720 |
| }, |
| { |
| "epoch": 15.169927909371781, |
| "grad_norm": 0.2559758722782135, |
| "learning_rate": 3.554181487792246e-05, |
| "loss": 0.0308, |
| "step": 14730 |
| }, |
| { |
| "epoch": 15.180226570545829, |
| "grad_norm": 0.20600715279579163, |
| "learning_rate": 3.547587724759267e-05, |
| "loss": 0.0312, |
| "step": 14740 |
| }, |
| { |
| "epoch": 15.190525231719876, |
| "grad_norm": 0.25616544485092163, |
| "learning_rate": 3.540996719254048e-05, |
| "loss": 0.0296, |
| "step": 14750 |
| }, |
| { |
| "epoch": 15.200823892893924, |
| "grad_norm": 0.1702621430158615, |
| "learning_rate": 3.5344084837901745e-05, |
| "loss": 0.0297, |
| "step": 14760 |
| }, |
| { |
| "epoch": 15.211122554067972, |
| "grad_norm": 0.26881083846092224, |
| "learning_rate": 3.527823030875966e-05, |
| "loss": 0.0338, |
| "step": 14770 |
| }, |
| { |
| "epoch": 15.22142121524202, |
| "grad_norm": 0.2374623417854309, |
| "learning_rate": 3.5212403730144674e-05, |
| "loss": 0.0322, |
| "step": 14780 |
| }, |
| { |
| "epoch": 15.231719876416065, |
| "grad_norm": 0.3395121991634369, |
| "learning_rate": 3.5146605227034076e-05, |
| "loss": 0.0352, |
| "step": 14790 |
| }, |
| { |
| "epoch": 15.242018537590113, |
| "grad_norm": 0.2763458490371704, |
| "learning_rate": 3.508083492435195e-05, |
| "loss": 0.0339, |
| "step": 14800 |
| }, |
| { |
| "epoch": 15.25231719876416, |
| "grad_norm": 0.24392758309841156, |
| "learning_rate": 3.501509294696874e-05, |
| "loss": 0.0298, |
| "step": 14810 |
| }, |
| { |
| "epoch": 15.262615859938208, |
| "grad_norm": 0.3314224183559418, |
| "learning_rate": 3.49493794197012e-05, |
| "loss": 0.0357, |
| "step": 14820 |
| }, |
| { |
| "epoch": 15.272914521112256, |
| "grad_norm": 0.40287965536117554, |
| "learning_rate": 3.488369446731198e-05, |
| "loss": 0.0301, |
| "step": 14830 |
| }, |
| { |
| "epoch": 15.283213182286303, |
| "grad_norm": 0.3372296392917633, |
| "learning_rate": 3.4818038214509584e-05, |
| "loss": 0.0336, |
| "step": 14840 |
| }, |
| { |
| "epoch": 15.293511843460351, |
| "grad_norm": 0.2584548890590668, |
| "learning_rate": 3.4752410785947937e-05, |
| "loss": 0.029, |
| "step": 14850 |
| }, |
| { |
| "epoch": 15.303810504634397, |
| "grad_norm": 0.34848877787590027, |
| "learning_rate": 3.468681230622629e-05, |
| "loss": 0.0318, |
| "step": 14860 |
| }, |
| { |
| "epoch": 15.314109165808445, |
| "grad_norm": 0.4821033477783203, |
| "learning_rate": 3.462124289988889e-05, |
| "loss": 0.0331, |
| "step": 14870 |
| }, |
| { |
| "epoch": 15.324407826982492, |
| "grad_norm": 0.3023509085178375, |
| "learning_rate": 3.4555702691424834e-05, |
| "loss": 0.0354, |
| "step": 14880 |
| }, |
| { |
| "epoch": 15.33470648815654, |
| "grad_norm": 0.23537398874759674, |
| "learning_rate": 3.449019180526774e-05, |
| "loss": 0.0282, |
| "step": 14890 |
| }, |
| { |
| "epoch": 15.345005149330587, |
| "grad_norm": 0.2854698598384857, |
| "learning_rate": 3.44247103657956e-05, |
| "loss": 0.0307, |
| "step": 14900 |
| }, |
| { |
| "epoch": 15.355303810504635, |
| "grad_norm": 0.23819385468959808, |
| "learning_rate": 3.435925849733045e-05, |
| "loss": 0.0312, |
| "step": 14910 |
| }, |
| { |
| "epoch": 15.365602471678681, |
| "grad_norm": 0.241379514336586, |
| "learning_rate": 3.429383632413823e-05, |
| "loss": 0.0301, |
| "step": 14920 |
| }, |
| { |
| "epoch": 15.375901132852729, |
| "grad_norm": 0.2703462839126587, |
| "learning_rate": 3.422844397042847e-05, |
| "loss": 0.0387, |
| "step": 14930 |
| }, |
| { |
| "epoch": 15.386199794026776, |
| "grad_norm": 0.2962131202220917, |
| "learning_rate": 3.416308156035409e-05, |
| "loss": 0.0278, |
| "step": 14940 |
| }, |
| { |
| "epoch": 15.396498455200824, |
| "grad_norm": 0.2346954643726349, |
| "learning_rate": 3.4097749218011174e-05, |
| "loss": 0.0285, |
| "step": 14950 |
| }, |
| { |
| "epoch": 15.406797116374872, |
| "grad_norm": 0.4202100336551666, |
| "learning_rate": 3.4032447067438674e-05, |
| "loss": 0.0279, |
| "step": 14960 |
| }, |
| { |
| "epoch": 15.41709577754892, |
| "grad_norm": 0.25344449281692505, |
| "learning_rate": 3.396717523261831e-05, |
| "loss": 0.029, |
| "step": 14970 |
| }, |
| { |
| "epoch": 15.427394438722967, |
| "grad_norm": 0.23738066852092743, |
| "learning_rate": 3.390193383747415e-05, |
| "loss": 0.0309, |
| "step": 14980 |
| }, |
| { |
| "epoch": 15.437693099897013, |
| "grad_norm": 2.6296916007995605, |
| "learning_rate": 3.383672300587254e-05, |
| "loss": 0.0366, |
| "step": 14990 |
| }, |
| { |
| "epoch": 15.44799176107106, |
| "grad_norm": 0.3654041290283203, |
| "learning_rate": 3.3771542861621736e-05, |
| "loss": 0.0387, |
| "step": 15000 |
| }, |
| { |
| "epoch": 15.458290422245108, |
| "grad_norm": 0.26136070489883423, |
| "learning_rate": 3.370639352847179e-05, |
| "loss": 0.0332, |
| "step": 15010 |
| }, |
| { |
| "epoch": 15.468589083419156, |
| "grad_norm": 0.26935380697250366, |
| "learning_rate": 3.3641275130114206e-05, |
| "loss": 0.0307, |
| "step": 15020 |
| }, |
| { |
| "epoch": 15.478887744593203, |
| "grad_norm": 0.2979789972305298, |
| "learning_rate": 3.357618779018182e-05, |
| "loss": 0.0255, |
| "step": 15030 |
| }, |
| { |
| "epoch": 15.489186405767251, |
| "grad_norm": 0.2572219967842102, |
| "learning_rate": 3.351113163224843e-05, |
| "loss": 0.0358, |
| "step": 15040 |
| }, |
| { |
| "epoch": 15.499485066941297, |
| "grad_norm": 0.2063932716846466, |
| "learning_rate": 3.344610677982867e-05, |
| "loss": 0.0322, |
| "step": 15050 |
| }, |
| { |
| "epoch": 15.509783728115345, |
| "grad_norm": 0.44023096561431885, |
| "learning_rate": 3.338111335637773e-05, |
| "loss": 0.0323, |
| "step": 15060 |
| }, |
| { |
| "epoch": 15.520082389289392, |
| "grad_norm": 0.3511848449707031, |
| "learning_rate": 3.3316151485291146e-05, |
| "loss": 0.0284, |
| "step": 15070 |
| }, |
| { |
| "epoch": 15.53038105046344, |
| "grad_norm": 0.3060377538204193, |
| "learning_rate": 3.32512212899045e-05, |
| "loss": 0.0299, |
| "step": 15080 |
| }, |
| { |
| "epoch": 15.540679711637488, |
| "grad_norm": 0.2702105939388275, |
| "learning_rate": 3.318632289349332e-05, |
| "loss": 0.0296, |
| "step": 15090 |
| }, |
| { |
| "epoch": 15.550978372811535, |
| "grad_norm": 0.13126692175865173, |
| "learning_rate": 3.312145641927265e-05, |
| "loss": 0.0283, |
| "step": 15100 |
| }, |
| { |
| "epoch": 15.561277033985581, |
| "grad_norm": 0.17313429713249207, |
| "learning_rate": 3.305662199039705e-05, |
| "loss": 0.032, |
| "step": 15110 |
| }, |
| { |
| "epoch": 15.571575695159629, |
| "grad_norm": 0.28625550866127014, |
| "learning_rate": 3.2991819729960136e-05, |
| "loss": 0.0339, |
| "step": 15120 |
| }, |
| { |
| "epoch": 15.581874356333676, |
| "grad_norm": 0.308564692735672, |
| "learning_rate": 3.29270497609945e-05, |
| "loss": 0.0357, |
| "step": 15130 |
| }, |
| { |
| "epoch": 15.592173017507724, |
| "grad_norm": 0.2243090718984604, |
| "learning_rate": 3.28623122064714e-05, |
| "loss": 0.03, |
| "step": 15140 |
| }, |
| { |
| "epoch": 15.602471678681772, |
| "grad_norm": 0.19758236408233643, |
| "learning_rate": 3.27976071893006e-05, |
| "loss": 0.0284, |
| "step": 15150 |
| }, |
| { |
| "epoch": 15.61277033985582, |
| "grad_norm": 0.2614839971065521, |
| "learning_rate": 3.2732934832330033e-05, |
| "loss": 0.0318, |
| "step": 15160 |
| }, |
| { |
| "epoch": 15.623069001029865, |
| "grad_norm": 0.22418425977230072, |
| "learning_rate": 3.2668295258345665e-05, |
| "loss": 0.0302, |
| "step": 15170 |
| }, |
| { |
| "epoch": 15.633367662203913, |
| "grad_norm": 0.2627883851528168, |
| "learning_rate": 3.260368859007119e-05, |
| "loss": 0.0257, |
| "step": 15180 |
| }, |
| { |
| "epoch": 15.64366632337796, |
| "grad_norm": 0.23288536071777344, |
| "learning_rate": 3.253911495016785e-05, |
| "loss": 0.0313, |
| "step": 15190 |
| }, |
| { |
| "epoch": 15.653964984552008, |
| "grad_norm": 0.3750770390033722, |
| "learning_rate": 3.247457446123415e-05, |
| "loss": 0.034, |
| "step": 15200 |
| }, |
| { |
| "epoch": 15.664263645726056, |
| "grad_norm": 0.7027446627616882, |
| "learning_rate": 3.2410067245805715e-05, |
| "loss": 0.0315, |
| "step": 15210 |
| }, |
| { |
| "epoch": 15.674562306900103, |
| "grad_norm": 0.2821480929851532, |
| "learning_rate": 3.234559342635493e-05, |
| "loss": 0.0259, |
| "step": 15220 |
| }, |
| { |
| "epoch": 15.684860968074151, |
| "grad_norm": 0.18873628973960876, |
| "learning_rate": 3.228115312529082e-05, |
| "loss": 0.0238, |
| "step": 15230 |
| }, |
| { |
| "epoch": 15.695159629248197, |
| "grad_norm": 0.25101834535598755, |
| "learning_rate": 3.221674646495874e-05, |
| "loss": 0.0293, |
| "step": 15240 |
| }, |
| { |
| "epoch": 15.705458290422245, |
| "grad_norm": 0.4093859791755676, |
| "learning_rate": 3.215237356764021e-05, |
| "loss": 0.0295, |
| "step": 15250 |
| }, |
| { |
| "epoch": 15.715756951596292, |
| "grad_norm": 0.26483431458473206, |
| "learning_rate": 3.208803455555259e-05, |
| "loss": 0.0349, |
| "step": 15260 |
| }, |
| { |
| "epoch": 15.72605561277034, |
| "grad_norm": 0.19289034605026245, |
| "learning_rate": 3.2023729550849e-05, |
| "loss": 0.0304, |
| "step": 15270 |
| }, |
| { |
| "epoch": 15.736354273944388, |
| "grad_norm": 0.2590263783931732, |
| "learning_rate": 3.195945867561791e-05, |
| "loss": 0.032, |
| "step": 15280 |
| }, |
| { |
| "epoch": 15.746652935118435, |
| "grad_norm": 0.29010581970214844, |
| "learning_rate": 3.189522205188303e-05, |
| "loss": 0.0296, |
| "step": 15290 |
| }, |
| { |
| "epoch": 15.756951596292481, |
| "grad_norm": 0.24612699449062347, |
| "learning_rate": 3.183101980160303e-05, |
| "loss": 0.0414, |
| "step": 15300 |
| }, |
| { |
| "epoch": 15.767250257466529, |
| "grad_norm": 0.281768798828125, |
| "learning_rate": 3.176685204667132e-05, |
| "loss": 0.0306, |
| "step": 15310 |
| }, |
| { |
| "epoch": 15.777548918640576, |
| "grad_norm": 0.16171668469905853, |
| "learning_rate": 3.1702718908915805e-05, |
| "loss": 0.0347, |
| "step": 15320 |
| }, |
| { |
| "epoch": 15.787847579814624, |
| "grad_norm": 0.34655749797821045, |
| "learning_rate": 3.1638620510098725e-05, |
| "loss": 0.0342, |
| "step": 15330 |
| }, |
| { |
| "epoch": 15.798146240988672, |
| "grad_norm": 0.30499929189682007, |
| "learning_rate": 3.157455697191629e-05, |
| "loss": 0.0298, |
| "step": 15340 |
| }, |
| { |
| "epoch": 15.80844490216272, |
| "grad_norm": 0.3088139593601227, |
| "learning_rate": 3.151052841599854e-05, |
| "loss": 0.0325, |
| "step": 15350 |
| }, |
| { |
| "epoch": 15.818743563336767, |
| "grad_norm": 0.24579790234565735, |
| "learning_rate": 3.1446534963909146e-05, |
| "loss": 0.0318, |
| "step": 15360 |
| }, |
| { |
| "epoch": 15.829042224510813, |
| "grad_norm": 0.1964549720287323, |
| "learning_rate": 3.138257673714507e-05, |
| "loss": 0.0283, |
| "step": 15370 |
| }, |
| { |
| "epoch": 15.83934088568486, |
| "grad_norm": 0.257536381483078, |
| "learning_rate": 3.131865385713645e-05, |
| "loss": 0.0314, |
| "step": 15380 |
| }, |
| { |
| "epoch": 15.849639546858908, |
| "grad_norm": 0.25062111020088196, |
| "learning_rate": 3.1254766445246255e-05, |
| "loss": 0.0323, |
| "step": 15390 |
| }, |
| { |
| "epoch": 15.859938208032956, |
| "grad_norm": 0.24054642021656036, |
| "learning_rate": 3.11909146227702e-05, |
| "loss": 0.0342, |
| "step": 15400 |
| }, |
| { |
| "epoch": 15.870236869207003, |
| "grad_norm": 0.2715880870819092, |
| "learning_rate": 3.1127098510936335e-05, |
| "loss": 0.0349, |
| "step": 15410 |
| }, |
| { |
| "epoch": 15.880535530381051, |
| "grad_norm": 0.39196503162384033, |
| "learning_rate": 3.106331823090498e-05, |
| "loss": 0.0313, |
| "step": 15420 |
| }, |
| { |
| "epoch": 15.890834191555097, |
| "grad_norm": 0.2922776937484741, |
| "learning_rate": 3.0999573903768386e-05, |
| "loss": 0.0355, |
| "step": 15430 |
| }, |
| { |
| "epoch": 15.901132852729145, |
| "grad_norm": 0.4503494203090668, |
| "learning_rate": 3.093586565055058e-05, |
| "loss": 0.0357, |
| "step": 15440 |
| }, |
| { |
| "epoch": 15.911431513903192, |
| "grad_norm": 0.2735413908958435, |
| "learning_rate": 3.0872193592207035e-05, |
| "loss": 0.0322, |
| "step": 15450 |
| }, |
| { |
| "epoch": 15.92173017507724, |
| "grad_norm": 0.2650955021381378, |
| "learning_rate": 3.080855784962461e-05, |
| "loss": 0.031, |
| "step": 15460 |
| }, |
| { |
| "epoch": 15.932028836251288, |
| "grad_norm": 0.3438953161239624, |
| "learning_rate": 3.07449585436211e-05, |
| "loss": 0.0309, |
| "step": 15470 |
| }, |
| { |
| "epoch": 15.942327497425335, |
| "grad_norm": 0.26952746510505676, |
| "learning_rate": 3.068139579494521e-05, |
| "loss": 0.0365, |
| "step": 15480 |
| }, |
| { |
| "epoch": 15.952626158599383, |
| "grad_norm": 0.33866754174232483, |
| "learning_rate": 3.061786972427618e-05, |
| "loss": 0.0296, |
| "step": 15490 |
| }, |
| { |
| "epoch": 15.962924819773429, |
| "grad_norm": 0.30630460381507874, |
| "learning_rate": 3.0554380452223666e-05, |
| "loss": 0.031, |
| "step": 15500 |
| }, |
| { |
| "epoch": 15.973223480947476, |
| "grad_norm": 0.27467259764671326, |
| "learning_rate": 3.0490928099327386e-05, |
| "loss": 0.0317, |
| "step": 15510 |
| }, |
| { |
| "epoch": 15.983522142121524, |
| "grad_norm": 0.23503737151622772, |
| "learning_rate": 3.0427512786057054e-05, |
| "loss": 0.0325, |
| "step": 15520 |
| }, |
| { |
| "epoch": 15.993820803295572, |
| "grad_norm": 0.28616487979888916, |
| "learning_rate": 3.0364134632811992e-05, |
| "loss": 0.0314, |
| "step": 15530 |
| }, |
| { |
| "epoch": 16.004119464469618, |
| "grad_norm": 0.20699192583560944, |
| "learning_rate": 3.0300793759921003e-05, |
| "loss": 0.041, |
| "step": 15540 |
| }, |
| { |
| "epoch": 16.014418125643665, |
| "grad_norm": 0.2688738703727722, |
| "learning_rate": 3.0237490287642088e-05, |
| "loss": 0.0329, |
| "step": 15550 |
| }, |
| { |
| "epoch": 16.024716786817713, |
| "grad_norm": 0.49202895164489746, |
| "learning_rate": 3.017422433616227e-05, |
| "loss": 0.0327, |
| "step": 15560 |
| }, |
| { |
| "epoch": 16.03501544799176, |
| "grad_norm": 0.2781137228012085, |
| "learning_rate": 3.011099602559728e-05, |
| "loss": 0.0343, |
| "step": 15570 |
| }, |
| { |
| "epoch": 16.04531410916581, |
| "grad_norm": 0.28886276483535767, |
| "learning_rate": 3.0047805475991487e-05, |
| "loss": 0.0298, |
| "step": 15580 |
| }, |
| { |
| "epoch": 16.055612770339856, |
| "grad_norm": 0.2826370596885681, |
| "learning_rate": 2.9984652807317442e-05, |
| "loss": 0.0308, |
| "step": 15590 |
| }, |
| { |
| "epoch": 16.065911431513904, |
| "grad_norm": 0.22490771114826202, |
| "learning_rate": 2.992153813947588e-05, |
| "loss": 0.0267, |
| "step": 15600 |
| }, |
| { |
| "epoch": 16.07621009268795, |
| "grad_norm": 0.17981351912021637, |
| "learning_rate": 2.9858461592295316e-05, |
| "loss": 0.0249, |
| "step": 15610 |
| }, |
| { |
| "epoch": 16.086508753862, |
| "grad_norm": 0.2858598828315735, |
| "learning_rate": 2.9795423285531942e-05, |
| "loss": 0.0287, |
| "step": 15620 |
| }, |
| { |
| "epoch": 16.096807415036047, |
| "grad_norm": 0.2903124988079071, |
| "learning_rate": 2.9732423338869286e-05, |
| "loss": 0.0339, |
| "step": 15630 |
| }, |
| { |
| "epoch": 16.107106076210094, |
| "grad_norm": 0.2045518010854721, |
| "learning_rate": 2.9669461871918143e-05, |
| "loss": 0.0262, |
| "step": 15640 |
| }, |
| { |
| "epoch": 16.117404737384142, |
| "grad_norm": 0.18494410812854767, |
| "learning_rate": 2.9606539004216142e-05, |
| "loss": 0.037, |
| "step": 15650 |
| }, |
| { |
| "epoch": 16.127703398558186, |
| "grad_norm": 0.2084999680519104, |
| "learning_rate": 2.954365485522771e-05, |
| "loss": 0.0321, |
| "step": 15660 |
| }, |
| { |
| "epoch": 16.138002059732234, |
| "grad_norm": 0.39265531301498413, |
| "learning_rate": 2.9480809544343713e-05, |
| "loss": 0.0293, |
| "step": 15670 |
| }, |
| { |
| "epoch": 16.14830072090628, |
| "grad_norm": 0.20841005444526672, |
| "learning_rate": 2.9418003190881314e-05, |
| "loss": 0.0303, |
| "step": 15680 |
| }, |
| { |
| "epoch": 16.15859938208033, |
| "grad_norm": 0.30363988876342773, |
| "learning_rate": 2.935523591408366e-05, |
| "loss": 0.0292, |
| "step": 15690 |
| }, |
| { |
| "epoch": 16.168898043254377, |
| "grad_norm": 0.24384894967079163, |
| "learning_rate": 2.9292507833119798e-05, |
| "loss": 0.0272, |
| "step": 15700 |
| }, |
| { |
| "epoch": 16.179196704428424, |
| "grad_norm": 0.2245059460401535, |
| "learning_rate": 2.9229819067084262e-05, |
| "loss": 0.0256, |
| "step": 15710 |
| }, |
| { |
| "epoch": 16.189495365602472, |
| "grad_norm": 0.2277413308620453, |
| "learning_rate": 2.9167169734997024e-05, |
| "loss": 0.0269, |
| "step": 15720 |
| }, |
| { |
| "epoch": 16.19979402677652, |
| "grad_norm": 0.2198115885257721, |
| "learning_rate": 2.91045599558031e-05, |
| "loss": 0.0348, |
| "step": 15730 |
| }, |
| { |
| "epoch": 16.210092687950567, |
| "grad_norm": 0.21734359860420227, |
| "learning_rate": 2.9041989848372497e-05, |
| "loss": 0.0241, |
| "step": 15740 |
| }, |
| { |
| "epoch": 16.220391349124615, |
| "grad_norm": 0.23658619821071625, |
| "learning_rate": 2.8979459531499876e-05, |
| "loss": 0.032, |
| "step": 15750 |
| }, |
| { |
| "epoch": 16.230690010298662, |
| "grad_norm": 0.33513155579566956, |
| "learning_rate": 2.891696912390429e-05, |
| "loss": 0.0301, |
| "step": 15760 |
| }, |
| { |
| "epoch": 16.24098867147271, |
| "grad_norm": 0.26320722699165344, |
| "learning_rate": 2.885451874422911e-05, |
| "loss": 0.0305, |
| "step": 15770 |
| }, |
| { |
| "epoch": 16.251287332646754, |
| "grad_norm": 0.26464515924453735, |
| "learning_rate": 2.8792108511041666e-05, |
| "loss": 0.0332, |
| "step": 15780 |
| }, |
| { |
| "epoch": 16.261585993820802, |
| "grad_norm": 0.30329862236976624, |
| "learning_rate": 2.8729738542833097e-05, |
| "loss": 0.0284, |
| "step": 15790 |
| }, |
| { |
| "epoch": 16.27188465499485, |
| "grad_norm": 0.237071231007576, |
| "learning_rate": 2.8667408958018e-05, |
| "loss": 0.0323, |
| "step": 15800 |
| }, |
| { |
| "epoch": 16.282183316168897, |
| "grad_norm": 0.22881901264190674, |
| "learning_rate": 2.860511987493446e-05, |
| "loss": 0.0342, |
| "step": 15810 |
| }, |
| { |
| "epoch": 16.292481977342945, |
| "grad_norm": 0.32153141498565674, |
| "learning_rate": 2.854287141184353e-05, |
| "loss": 0.0266, |
| "step": 15820 |
| }, |
| { |
| "epoch": 16.302780638516992, |
| "grad_norm": 0.27103251218795776, |
| "learning_rate": 2.8480663686929194e-05, |
| "loss": 0.0333, |
| "step": 15830 |
| }, |
| { |
| "epoch": 16.31307929969104, |
| "grad_norm": 0.3440805971622467, |
| "learning_rate": 2.8418496818298095e-05, |
| "loss": 0.0308, |
| "step": 15840 |
| }, |
| { |
| "epoch": 16.323377960865088, |
| "grad_norm": 0.3047653138637543, |
| "learning_rate": 2.8356370923979324e-05, |
| "loss": 0.0269, |
| "step": 15850 |
| }, |
| { |
| "epoch": 16.333676622039135, |
| "grad_norm": 0.19806207716464996, |
| "learning_rate": 2.8294286121924084e-05, |
| "loss": 0.0317, |
| "step": 15860 |
| }, |
| { |
| "epoch": 16.343975283213183, |
| "grad_norm": 0.1738138198852539, |
| "learning_rate": 2.8232242530005726e-05, |
| "loss": 0.0326, |
| "step": 15870 |
| }, |
| { |
| "epoch": 16.35427394438723, |
| "grad_norm": 0.3517332077026367, |
| "learning_rate": 2.8170240266019197e-05, |
| "loss": 0.0343, |
| "step": 15880 |
| }, |
| { |
| "epoch": 16.36457260556128, |
| "grad_norm": 0.28405335545539856, |
| "learning_rate": 2.8108279447681072e-05, |
| "loss": 0.0301, |
| "step": 15890 |
| }, |
| { |
| "epoch": 16.374871266735326, |
| "grad_norm": 0.34772422909736633, |
| "learning_rate": 2.8046360192629218e-05, |
| "loss": 0.0266, |
| "step": 15900 |
| }, |
| { |
| "epoch": 16.38516992790937, |
| "grad_norm": 0.26349806785583496, |
| "learning_rate": 2.7984482618422604e-05, |
| "loss": 0.0338, |
| "step": 15910 |
| }, |
| { |
| "epoch": 16.395468589083418, |
| "grad_norm": 0.4074728190898895, |
| "learning_rate": 2.7922646842540977e-05, |
| "loss": 0.0302, |
| "step": 15920 |
| }, |
| { |
| "epoch": 16.405767250257465, |
| "grad_norm": 0.3678358793258667, |
| "learning_rate": 2.7860852982384887e-05, |
| "loss": 0.0389, |
| "step": 15930 |
| }, |
| { |
| "epoch": 16.416065911431513, |
| "grad_norm": 0.22004730999469757, |
| "learning_rate": 2.7799101155275155e-05, |
| "loss": 0.0335, |
| "step": 15940 |
| }, |
| { |
| "epoch": 16.42636457260556, |
| "grad_norm": 0.41160476207733154, |
| "learning_rate": 2.773739147845286e-05, |
| "loss": 0.0335, |
| "step": 15950 |
| }, |
| { |
| "epoch": 16.43666323377961, |
| "grad_norm": 0.2567139267921448, |
| "learning_rate": 2.767572406907908e-05, |
| "loss": 0.0275, |
| "step": 15960 |
| }, |
| { |
| "epoch": 16.446961894953656, |
| "grad_norm": 0.3399168848991394, |
| "learning_rate": 2.761409904423461e-05, |
| "loss": 0.0326, |
| "step": 15970 |
| }, |
| { |
| "epoch": 16.457260556127704, |
| "grad_norm": 0.35717037320137024, |
| "learning_rate": 2.7552516520919734e-05, |
| "loss": 0.0346, |
| "step": 15980 |
| }, |
| { |
| "epoch": 16.46755921730175, |
| "grad_norm": 0.3658212721347809, |
| "learning_rate": 2.7490976616054177e-05, |
| "loss": 0.0353, |
| "step": 15990 |
| }, |
| { |
| "epoch": 16.4778578784758, |
| "grad_norm": 0.22349724173545837, |
| "learning_rate": 2.7429479446476587e-05, |
| "loss": 0.0285, |
| "step": 16000 |
| }, |
| { |
| "epoch": 16.488156539649847, |
| "grad_norm": 0.23875796794891357, |
| "learning_rate": 2.7368025128944576e-05, |
| "loss": 0.0276, |
| "step": 16010 |
| }, |
| { |
| "epoch": 16.498455200823894, |
| "grad_norm": 0.2834377586841583, |
| "learning_rate": 2.7306613780134376e-05, |
| "loss": 0.0325, |
| "step": 16020 |
| }, |
| { |
| "epoch": 16.508753861997942, |
| "grad_norm": 0.2421753704547882, |
| "learning_rate": 2.7245245516640656e-05, |
| "loss": 0.0275, |
| "step": 16030 |
| }, |
| { |
| "epoch": 16.519052523171986, |
| "grad_norm": 0.2751816213130951, |
| "learning_rate": 2.7183920454976196e-05, |
| "loss": 0.0263, |
| "step": 16040 |
| }, |
| { |
| "epoch": 16.529351184346034, |
| "grad_norm": 0.2655097544193268, |
| "learning_rate": 2.7122638711571912e-05, |
| "loss": 0.0266, |
| "step": 16050 |
| }, |
| { |
| "epoch": 16.53964984552008, |
| "grad_norm": 0.24982544779777527, |
| "learning_rate": 2.7061400402776328e-05, |
| "loss": 0.0293, |
| "step": 16060 |
| }, |
| { |
| "epoch": 16.54994850669413, |
| "grad_norm": 0.2736775577068329, |
| "learning_rate": 2.7000205644855557e-05, |
| "loss": 0.0326, |
| "step": 16070 |
| }, |
| { |
| "epoch": 16.560247167868177, |
| "grad_norm": 0.2747161388397217, |
| "learning_rate": 2.6939054553993065e-05, |
| "loss": 0.0253, |
| "step": 16080 |
| }, |
| { |
| "epoch": 16.570545829042224, |
| "grad_norm": 0.22185395658016205, |
| "learning_rate": 2.6877947246289376e-05, |
| "loss": 0.0312, |
| "step": 16090 |
| }, |
| { |
| "epoch": 16.580844490216272, |
| "grad_norm": 0.2517991364002228, |
| "learning_rate": 2.681688383776184e-05, |
| "loss": 0.027, |
| "step": 16100 |
| }, |
| { |
| "epoch": 16.59114315139032, |
| "grad_norm": 0.2567277252674103, |
| "learning_rate": 2.675586444434459e-05, |
| "loss": 0.03, |
| "step": 16110 |
| }, |
| { |
| "epoch": 16.601441812564367, |
| "grad_norm": 0.2631073296070099, |
| "learning_rate": 2.669488918188806e-05, |
| "loss": 0.0322, |
| "step": 16120 |
| }, |
| { |
| "epoch": 16.611740473738415, |
| "grad_norm": 0.46920910477638245, |
| "learning_rate": 2.6633958166158958e-05, |
| "loss": 0.0273, |
| "step": 16130 |
| }, |
| { |
| "epoch": 16.622039134912463, |
| "grad_norm": 0.1975933313369751, |
| "learning_rate": 2.6573071512839996e-05, |
| "loss": 0.0302, |
| "step": 16140 |
| }, |
| { |
| "epoch": 16.63233779608651, |
| "grad_norm": 0.34002429246902466, |
| "learning_rate": 2.651222933752965e-05, |
| "loss": 0.0284, |
| "step": 16150 |
| }, |
| { |
| "epoch": 16.642636457260558, |
| "grad_norm": 0.18480847775936127, |
| "learning_rate": 2.645143175574192e-05, |
| "loss": 0.0284, |
| "step": 16160 |
| }, |
| { |
| "epoch": 16.652935118434602, |
| "grad_norm": 0.1662713885307312, |
| "learning_rate": 2.6390678882906173e-05, |
| "loss": 0.0311, |
| "step": 16170 |
| }, |
| { |
| "epoch": 16.66323377960865, |
| "grad_norm": 0.26757383346557617, |
| "learning_rate": 2.6329970834366886e-05, |
| "loss": 0.0303, |
| "step": 16180 |
| }, |
| { |
| "epoch": 16.673532440782697, |
| "grad_norm": 0.2224883884191513, |
| "learning_rate": 2.626930772538344e-05, |
| "loss": 0.0331, |
| "step": 16190 |
| }, |
| { |
| "epoch": 16.683831101956745, |
| "grad_norm": 0.5324035882949829, |
| "learning_rate": 2.6208689671129898e-05, |
| "loss": 0.0277, |
| "step": 16200 |
| }, |
| { |
| "epoch": 16.694129763130793, |
| "grad_norm": 0.24052481353282928, |
| "learning_rate": 2.6148116786694743e-05, |
| "loss": 0.0362, |
| "step": 16210 |
| }, |
| { |
| "epoch": 16.70442842430484, |
| "grad_norm": 0.16978083550930023, |
| "learning_rate": 2.6087589187080742e-05, |
| "loss": 0.0249, |
| "step": 16220 |
| }, |
| { |
| "epoch": 16.714727085478888, |
| "grad_norm": 0.2954985499382019, |
| "learning_rate": 2.6027106987204676e-05, |
| "loss": 0.0304, |
| "step": 16230 |
| }, |
| { |
| "epoch": 16.725025746652936, |
| "grad_norm": 0.31481167674064636, |
| "learning_rate": 2.5966670301897116e-05, |
| "loss": 0.0301, |
| "step": 16240 |
| }, |
| { |
| "epoch": 16.735324407826983, |
| "grad_norm": 0.4357336461544037, |
| "learning_rate": 2.590627924590224e-05, |
| "loss": 0.0276, |
| "step": 16250 |
| }, |
| { |
| "epoch": 16.74562306900103, |
| "grad_norm": 0.22642803192138672, |
| "learning_rate": 2.58459339338776e-05, |
| "loss": 0.0307, |
| "step": 16260 |
| }, |
| { |
| "epoch": 16.75592173017508, |
| "grad_norm": 0.2144901007413864, |
| "learning_rate": 2.578563448039384e-05, |
| "loss": 0.0353, |
| "step": 16270 |
| }, |
| { |
| "epoch": 16.766220391349126, |
| "grad_norm": 0.19773432612419128, |
| "learning_rate": 2.57253809999346e-05, |
| "loss": 0.0325, |
| "step": 16280 |
| }, |
| { |
| "epoch": 16.77651905252317, |
| "grad_norm": 0.2523026168346405, |
| "learning_rate": 2.5665173606896233e-05, |
| "loss": 0.0312, |
| "step": 16290 |
| }, |
| { |
| "epoch": 16.786817713697218, |
| "grad_norm": 0.3775824010372162, |
| "learning_rate": 2.560501241558756e-05, |
| "loss": 0.0344, |
| "step": 16300 |
| }, |
| { |
| "epoch": 16.797116374871266, |
| "grad_norm": 0.24595195055007935, |
| "learning_rate": 2.5544897540229708e-05, |
| "loss": 0.032, |
| "step": 16310 |
| }, |
| { |
| "epoch": 16.807415036045313, |
| "grad_norm": 0.3129733204841614, |
| "learning_rate": 2.5484829094955876e-05, |
| "loss": 0.0317, |
| "step": 16320 |
| }, |
| { |
| "epoch": 16.81771369721936, |
| "grad_norm": 0.3071824610233307, |
| "learning_rate": 2.5424807193811055e-05, |
| "loss": 0.0322, |
| "step": 16330 |
| }, |
| { |
| "epoch": 16.82801235839341, |
| "grad_norm": 0.2712470591068268, |
| "learning_rate": 2.5364831950751945e-05, |
| "loss": 0.0298, |
| "step": 16340 |
| }, |
| { |
| "epoch": 16.838311019567456, |
| "grad_norm": 0.31381168961524963, |
| "learning_rate": 2.5304903479646603e-05, |
| "loss": 0.0305, |
| "step": 16350 |
| }, |
| { |
| "epoch": 16.848609680741504, |
| "grad_norm": 0.24024145305156708, |
| "learning_rate": 2.5245021894274333e-05, |
| "loss": 0.0309, |
| "step": 16360 |
| }, |
| { |
| "epoch": 16.85890834191555, |
| "grad_norm": 0.2446487545967102, |
| "learning_rate": 2.5185187308325375e-05, |
| "loss": 0.0336, |
| "step": 16370 |
| }, |
| { |
| "epoch": 16.8692070030896, |
| "grad_norm": 0.2759553790092468, |
| "learning_rate": 2.51253998354008e-05, |
| "loss": 0.0308, |
| "step": 16380 |
| }, |
| { |
| "epoch": 16.879505664263647, |
| "grad_norm": 0.23006874322891235, |
| "learning_rate": 2.506565958901214e-05, |
| "loss": 0.0261, |
| "step": 16390 |
| }, |
| { |
| "epoch": 16.889804325437694, |
| "grad_norm": 0.25269433856010437, |
| "learning_rate": 2.500596668258134e-05, |
| "loss": 0.0317, |
| "step": 16400 |
| }, |
| { |
| "epoch": 16.900102986611742, |
| "grad_norm": 0.33330032229423523, |
| "learning_rate": 2.4946321229440435e-05, |
| "loss": 0.0372, |
| "step": 16410 |
| }, |
| { |
| "epoch": 16.910401647785786, |
| "grad_norm": 0.38393720984458923, |
| "learning_rate": 2.4886723342831374e-05, |
| "loss": 0.0368, |
| "step": 16420 |
| }, |
| { |
| "epoch": 16.920700308959834, |
| "grad_norm": 0.254517525434494, |
| "learning_rate": 2.482717313590579e-05, |
| "loss": 0.0261, |
| "step": 16430 |
| }, |
| { |
| "epoch": 16.93099897013388, |
| "grad_norm": 0.21054412424564362, |
| "learning_rate": 2.4767670721724822e-05, |
| "loss": 0.0226, |
| "step": 16440 |
| }, |
| { |
| "epoch": 16.94129763130793, |
| "grad_norm": 0.2419450879096985, |
| "learning_rate": 2.4708216213258805e-05, |
| "loss": 0.0259, |
| "step": 16450 |
| }, |
| { |
| "epoch": 16.951596292481977, |
| "grad_norm": 0.220100998878479, |
| "learning_rate": 2.464880972338718e-05, |
| "loss": 0.0296, |
| "step": 16460 |
| }, |
| { |
| "epoch": 16.961894953656024, |
| "grad_norm": 0.2163473665714264, |
| "learning_rate": 2.4589451364898197e-05, |
| "loss": 0.0276, |
| "step": 16470 |
| }, |
| { |
| "epoch": 16.972193614830072, |
| "grad_norm": 0.30859941244125366, |
| "learning_rate": 2.453014125048873e-05, |
| "loss": 0.0332, |
| "step": 16480 |
| }, |
| { |
| "epoch": 16.98249227600412, |
| "grad_norm": 0.16488564014434814, |
| "learning_rate": 2.447087949276406e-05, |
| "loss": 0.0287, |
| "step": 16490 |
| }, |
| { |
| "epoch": 16.992790937178167, |
| "grad_norm": 1.141352891921997, |
| "learning_rate": 2.441166620423767e-05, |
| "loss": 0.0282, |
| "step": 16500 |
| }, |
| { |
| "epoch": 17.003089598352215, |
| "grad_norm": 0.6053497791290283, |
| "learning_rate": 2.435250149733097e-05, |
| "loss": 0.027, |
| "step": 16510 |
| }, |
| { |
| "epoch": 17.013388259526263, |
| "grad_norm": 0.4089854955673218, |
| "learning_rate": 2.4293385484373188e-05, |
| "loss": 0.03, |
| "step": 16520 |
| }, |
| { |
| "epoch": 17.02368692070031, |
| "grad_norm": 0.38011640310287476, |
| "learning_rate": 2.423431827760108e-05, |
| "loss": 0.0242, |
| "step": 16530 |
| }, |
| { |
| "epoch": 17.033985581874358, |
| "grad_norm": 0.15878790616989136, |
| "learning_rate": 2.417529998915875e-05, |
| "loss": 0.0295, |
| "step": 16540 |
| }, |
| { |
| "epoch": 17.044284243048402, |
| "grad_norm": 0.3160199820995331, |
| "learning_rate": 2.411633073109741e-05, |
| "loss": 0.0317, |
| "step": 16550 |
| }, |
| { |
| "epoch": 17.05458290422245, |
| "grad_norm": 0.4487075209617615, |
| "learning_rate": 2.4057410615375215e-05, |
| "loss": 0.0322, |
| "step": 16560 |
| }, |
| { |
| "epoch": 17.064881565396497, |
| "grad_norm": 0.14873281121253967, |
| "learning_rate": 2.3998539753856962e-05, |
| "loss": 0.0275, |
| "step": 16570 |
| }, |
| { |
| "epoch": 17.075180226570545, |
| "grad_norm": 0.26425817608833313, |
| "learning_rate": 2.393971825831398e-05, |
| "loss": 0.0332, |
| "step": 16580 |
| }, |
| { |
| "epoch": 17.085478887744593, |
| "grad_norm": 0.28254973888397217, |
| "learning_rate": 2.388094624042389e-05, |
| "loss": 0.0272, |
| "step": 16590 |
| }, |
| { |
| "epoch": 17.09577754891864, |
| "grad_norm": 0.2275743931531906, |
| "learning_rate": 2.3822223811770288e-05, |
| "loss": 0.0274, |
| "step": 16600 |
| }, |
| { |
| "epoch": 17.106076210092688, |
| "grad_norm": 0.2673308253288269, |
| "learning_rate": 2.3763551083842757e-05, |
| "loss": 0.0275, |
| "step": 16610 |
| }, |
| { |
| "epoch": 17.116374871266736, |
| "grad_norm": 0.25909948348999023, |
| "learning_rate": 2.370492816803638e-05, |
| "loss": 0.0327, |
| "step": 16620 |
| }, |
| { |
| "epoch": 17.126673532440783, |
| "grad_norm": 0.2639828026294708, |
| "learning_rate": 2.364635517565175e-05, |
| "loss": 0.0302, |
| "step": 16630 |
| }, |
| { |
| "epoch": 17.13697219361483, |
| "grad_norm": 0.2498447746038437, |
| "learning_rate": 2.358783221789466e-05, |
| "loss": 0.0332, |
| "step": 16640 |
| }, |
| { |
| "epoch": 17.14727085478888, |
| "grad_norm": 0.4615592658519745, |
| "learning_rate": 2.352935940587592e-05, |
| "loss": 0.0324, |
| "step": 16650 |
| }, |
| { |
| "epoch": 17.157569515962926, |
| "grad_norm": 0.3771480619907379, |
| "learning_rate": 2.3470936850611063e-05, |
| "loss": 0.0319, |
| "step": 16660 |
| }, |
| { |
| "epoch": 17.167868177136974, |
| "grad_norm": 0.15599535405635834, |
| "learning_rate": 2.3412564663020337e-05, |
| "loss": 0.0361, |
| "step": 16670 |
| }, |
| { |
| "epoch": 17.178166838311018, |
| "grad_norm": 0.26683464646339417, |
| "learning_rate": 2.335424295392822e-05, |
| "loss": 0.0297, |
| "step": 16680 |
| }, |
| { |
| "epoch": 17.188465499485066, |
| "grad_norm": 0.2283865511417389, |
| "learning_rate": 2.3295971834063446e-05, |
| "loss": 0.0285, |
| "step": 16690 |
| }, |
| { |
| "epoch": 17.198764160659113, |
| "grad_norm": 0.2537069022655487, |
| "learning_rate": 2.323775141405867e-05, |
| "loss": 0.0248, |
| "step": 16700 |
| }, |
| { |
| "epoch": 17.20906282183316, |
| "grad_norm": 0.40300917625427246, |
| "learning_rate": 2.3179581804450306e-05, |
| "loss": 0.0285, |
| "step": 16710 |
| }, |
| { |
| "epoch": 17.21936148300721, |
| "grad_norm": 0.32292699813842773, |
| "learning_rate": 2.3121463115678237e-05, |
| "loss": 0.0277, |
| "step": 16720 |
| }, |
| { |
| "epoch": 17.229660144181256, |
| "grad_norm": 0.2189875841140747, |
| "learning_rate": 2.3063395458085795e-05, |
| "loss": 0.0349, |
| "step": 16730 |
| }, |
| { |
| "epoch": 17.239958805355304, |
| "grad_norm": 0.24812109768390656, |
| "learning_rate": 2.3005378941919287e-05, |
| "loss": 0.0293, |
| "step": 16740 |
| }, |
| { |
| "epoch": 17.25025746652935, |
| "grad_norm": 0.46981245279312134, |
| "learning_rate": 2.2947413677328e-05, |
| "loss": 0.0297, |
| "step": 16750 |
| }, |
| { |
| "epoch": 17.2605561277034, |
| "grad_norm": 0.293948233127594, |
| "learning_rate": 2.2889499774363903e-05, |
| "loss": 0.0303, |
| "step": 16760 |
| }, |
| { |
| "epoch": 17.270854788877447, |
| "grad_norm": 1.4435584545135498, |
| "learning_rate": 2.283163734298147e-05, |
| "loss": 0.0281, |
| "step": 16770 |
| }, |
| { |
| "epoch": 17.281153450051495, |
| "grad_norm": 2.3254072666168213, |
| "learning_rate": 2.2773826493037377e-05, |
| "loss": 0.0307, |
| "step": 16780 |
| }, |
| { |
| "epoch": 17.291452111225542, |
| "grad_norm": 0.45493340492248535, |
| "learning_rate": 2.271606733429048e-05, |
| "loss": 0.0315, |
| "step": 16790 |
| }, |
| { |
| "epoch": 17.301750772399586, |
| "grad_norm": 0.3371964991092682, |
| "learning_rate": 2.2658359976401388e-05, |
| "loss": 0.0301, |
| "step": 16800 |
| }, |
| { |
| "epoch": 17.312049433573634, |
| "grad_norm": 0.2556536793708801, |
| "learning_rate": 2.260070452893243e-05, |
| "loss": 0.0324, |
| "step": 16810 |
| }, |
| { |
| "epoch": 17.32234809474768, |
| "grad_norm": 0.24934424459934235, |
| "learning_rate": 2.2543101101347357e-05, |
| "loss": 0.0289, |
| "step": 16820 |
| }, |
| { |
| "epoch": 17.33264675592173, |
| "grad_norm": 0.2703939974308014, |
| "learning_rate": 2.2485549803011175e-05, |
| "loss": 0.029, |
| "step": 16830 |
| }, |
| { |
| "epoch": 17.342945417095777, |
| "grad_norm": 0.24408842623233795, |
| "learning_rate": 2.2428050743189845e-05, |
| "loss": 0.0267, |
| "step": 16840 |
| }, |
| { |
| "epoch": 17.353244078269825, |
| "grad_norm": 0.3018171787261963, |
| "learning_rate": 2.237060403105027e-05, |
| "loss": 0.0286, |
| "step": 16850 |
| }, |
| { |
| "epoch": 17.363542739443872, |
| "grad_norm": 0.3476426899433136, |
| "learning_rate": 2.2313209775659854e-05, |
| "loss": 0.0299, |
| "step": 16860 |
| }, |
| { |
| "epoch": 17.37384140061792, |
| "grad_norm": 0.25924161076545715, |
| "learning_rate": 2.2255868085986476e-05, |
| "loss": 0.03, |
| "step": 16870 |
| }, |
| { |
| "epoch": 17.384140061791967, |
| "grad_norm": 0.3101584017276764, |
| "learning_rate": 2.219857907089818e-05, |
| "loss": 0.0256, |
| "step": 16880 |
| }, |
| { |
| "epoch": 17.394438722966015, |
| "grad_norm": 0.21111738681793213, |
| "learning_rate": 2.2141342839163038e-05, |
| "loss": 0.0262, |
| "step": 16890 |
| }, |
| { |
| "epoch": 17.404737384140063, |
| "grad_norm": 0.2484743893146515, |
| "learning_rate": 2.2084159499448833e-05, |
| "loss": 0.0295, |
| "step": 16900 |
| }, |
| { |
| "epoch": 17.41503604531411, |
| "grad_norm": 0.30068278312683105, |
| "learning_rate": 2.2027029160323053e-05, |
| "loss": 0.0309, |
| "step": 16910 |
| }, |
| { |
| "epoch": 17.425334706488158, |
| "grad_norm": 0.20330175757408142, |
| "learning_rate": 2.196995193025243e-05, |
| "loss": 0.0236, |
| "step": 16920 |
| }, |
| { |
| "epoch": 17.435633367662202, |
| "grad_norm": 0.31278255581855774, |
| "learning_rate": 2.1912927917602944e-05, |
| "loss": 0.0357, |
| "step": 16930 |
| }, |
| { |
| "epoch": 17.44593202883625, |
| "grad_norm": 0.5232197642326355, |
| "learning_rate": 2.1855957230639507e-05, |
| "loss": 0.0324, |
| "step": 16940 |
| }, |
| { |
| "epoch": 17.456230690010297, |
| "grad_norm": 0.2283019870519638, |
| "learning_rate": 2.179903997752582e-05, |
| "loss": 0.0271, |
| "step": 16950 |
| }, |
| { |
| "epoch": 17.466529351184345, |
| "grad_norm": 0.2504780888557434, |
| "learning_rate": 2.174217626632407e-05, |
| "loss": 0.0245, |
| "step": 16960 |
| }, |
| { |
| "epoch": 17.476828012358393, |
| "grad_norm": 0.40278464555740356, |
| "learning_rate": 2.168536620499485e-05, |
| "loss": 0.0371, |
| "step": 16970 |
| }, |
| { |
| "epoch": 17.48712667353244, |
| "grad_norm": 0.21591483056545258, |
| "learning_rate": 2.162860990139688e-05, |
| "loss": 0.0312, |
| "step": 16980 |
| }, |
| { |
| "epoch": 17.497425334706488, |
| "grad_norm": 0.33034050464630127, |
| "learning_rate": 2.15719074632868e-05, |
| "loss": 0.0281, |
| "step": 16990 |
| }, |
| { |
| "epoch": 17.507723995880536, |
| "grad_norm": 0.3057808578014374, |
| "learning_rate": 2.151525899831902e-05, |
| "loss": 0.0343, |
| "step": 17000 |
| }, |
| { |
| "epoch": 17.518022657054583, |
| "grad_norm": 0.344502717256546, |
| "learning_rate": 2.1458664614045415e-05, |
| "loss": 0.0341, |
| "step": 17010 |
| }, |
| { |
| "epoch": 17.52832131822863, |
| "grad_norm": 0.2241314798593521, |
| "learning_rate": 2.1402124417915226e-05, |
| "loss": 0.0326, |
| "step": 17020 |
| }, |
| { |
| "epoch": 17.53861997940268, |
| "grad_norm": 0.30178990960121155, |
| "learning_rate": 2.134563851727482e-05, |
| "loss": 0.0337, |
| "step": 17030 |
| }, |
| { |
| "epoch": 17.548918640576726, |
| "grad_norm": 0.39564597606658936, |
| "learning_rate": 2.128920701936745e-05, |
| "loss": 0.0309, |
| "step": 17040 |
| }, |
| { |
| "epoch": 17.559217301750774, |
| "grad_norm": 0.20545372366905212, |
| "learning_rate": 2.123283003133311e-05, |
| "loss": 0.031, |
| "step": 17050 |
| }, |
| { |
| "epoch": 17.569515962924818, |
| "grad_norm": 0.43441474437713623, |
| "learning_rate": 2.11765076602083e-05, |
| "loss": 0.0321, |
| "step": 17060 |
| }, |
| { |
| "epoch": 17.579814624098866, |
| "grad_norm": 0.4398522675037384, |
| "learning_rate": 2.1120240012925775e-05, |
| "loss": 0.0301, |
| "step": 17070 |
| }, |
| { |
| "epoch": 17.590113285272913, |
| "grad_norm": 0.3297673761844635, |
| "learning_rate": 2.1064027196314452e-05, |
| "loss": 0.0215, |
| "step": 17080 |
| }, |
| { |
| "epoch": 17.60041194644696, |
| "grad_norm": 0.3874305486679077, |
| "learning_rate": 2.1007869317099128e-05, |
| "loss": 0.029, |
| "step": 17090 |
| }, |
| { |
| "epoch": 17.61071060762101, |
| "grad_norm": 0.5168983340263367, |
| "learning_rate": 2.095176648190029e-05, |
| "loss": 0.0265, |
| "step": 17100 |
| }, |
| { |
| "epoch": 17.621009268795056, |
| "grad_norm": 0.29372426867485046, |
| "learning_rate": 2.0895718797233925e-05, |
| "loss": 0.0325, |
| "step": 17110 |
| }, |
| { |
| "epoch": 17.631307929969104, |
| "grad_norm": 0.33520805835723877, |
| "learning_rate": 2.0839726369511338e-05, |
| "loss": 0.0342, |
| "step": 17120 |
| }, |
| { |
| "epoch": 17.64160659114315, |
| "grad_norm": 0.2704266607761383, |
| "learning_rate": 2.0783789305038847e-05, |
| "loss": 0.0273, |
| "step": 17130 |
| }, |
| { |
| "epoch": 17.6519052523172, |
| "grad_norm": 0.29930296540260315, |
| "learning_rate": 2.0727907710017736e-05, |
| "loss": 0.0342, |
| "step": 17140 |
| }, |
| { |
| "epoch": 17.662203913491247, |
| "grad_norm": 0.2962852716445923, |
| "learning_rate": 2.067208169054394e-05, |
| "loss": 0.0322, |
| "step": 17150 |
| }, |
| { |
| "epoch": 17.672502574665295, |
| "grad_norm": 0.31568607687950134, |
| "learning_rate": 2.061631135260789e-05, |
| "loss": 0.0249, |
| "step": 17160 |
| }, |
| { |
| "epoch": 17.682801235839342, |
| "grad_norm": 0.2064095288515091, |
| "learning_rate": 2.0560596802094305e-05, |
| "loss": 0.0246, |
| "step": 17170 |
| }, |
| { |
| "epoch": 17.69309989701339, |
| "grad_norm": 0.3407458961009979, |
| "learning_rate": 2.0504938144781988e-05, |
| "loss": 0.0262, |
| "step": 17180 |
| }, |
| { |
| "epoch": 17.703398558187434, |
| "grad_norm": 0.273579478263855, |
| "learning_rate": 2.0449335486343584e-05, |
| "loss": 0.0273, |
| "step": 17190 |
| }, |
| { |
| "epoch": 17.71369721936148, |
| "grad_norm": 0.23050899803638458, |
| "learning_rate": 2.039378893234547e-05, |
| "loss": 0.0259, |
| "step": 17200 |
| }, |
| { |
| "epoch": 17.72399588053553, |
| "grad_norm": 0.287034809589386, |
| "learning_rate": 2.0338298588247485e-05, |
| "loss": 0.0339, |
| "step": 17210 |
| }, |
| { |
| "epoch": 17.734294541709577, |
| "grad_norm": 0.2856465280056, |
| "learning_rate": 2.028286455940274e-05, |
| "loss": 0.0331, |
| "step": 17220 |
| }, |
| { |
| "epoch": 17.744593202883625, |
| "grad_norm": 0.18497076630592346, |
| "learning_rate": 2.022748695105745e-05, |
| "loss": 0.0274, |
| "step": 17230 |
| }, |
| { |
| "epoch": 17.754891864057672, |
| "grad_norm": 0.27101799845695496, |
| "learning_rate": 2.0172165868350707e-05, |
| "loss": 0.031, |
| "step": 17240 |
| }, |
| { |
| "epoch": 17.76519052523172, |
| "grad_norm": 0.27160143852233887, |
| "learning_rate": 2.0116901416314234e-05, |
| "loss": 0.029, |
| "step": 17250 |
| }, |
| { |
| "epoch": 17.775489186405768, |
| "grad_norm": 0.24040678143501282, |
| "learning_rate": 2.0061693699872298e-05, |
| "loss": 0.0336, |
| "step": 17260 |
| }, |
| { |
| "epoch": 17.785787847579815, |
| "grad_norm": 0.2809303402900696, |
| "learning_rate": 2.0006542823841423e-05, |
| "loss": 0.0245, |
| "step": 17270 |
| }, |
| { |
| "epoch": 17.796086508753863, |
| "grad_norm": 0.33203157782554626, |
| "learning_rate": 1.9951448892930225e-05, |
| "loss": 0.032, |
| "step": 17280 |
| }, |
| { |
| "epoch": 17.80638516992791, |
| "grad_norm": 0.3012252151966095, |
| "learning_rate": 1.9896412011739197e-05, |
| "loss": 0.0295, |
| "step": 17290 |
| }, |
| { |
| "epoch": 17.816683831101958, |
| "grad_norm": 0.22999230027198792, |
| "learning_rate": 1.9841432284760537e-05, |
| "loss": 0.0294, |
| "step": 17300 |
| }, |
| { |
| "epoch": 17.826982492276002, |
| "grad_norm": 0.42275768518447876, |
| "learning_rate": 1.978650981637788e-05, |
| "loss": 0.0343, |
| "step": 17310 |
| }, |
| { |
| "epoch": 17.83728115345005, |
| "grad_norm": 0.29662439227104187, |
| "learning_rate": 1.9731644710866204e-05, |
| "loss": 0.0288, |
| "step": 17320 |
| }, |
| { |
| "epoch": 17.847579814624098, |
| "grad_norm": 0.19465483725070953, |
| "learning_rate": 1.967683707239156e-05, |
| "loss": 0.0263, |
| "step": 17330 |
| }, |
| { |
| "epoch": 17.857878475798145, |
| "grad_norm": 0.22264879941940308, |
| "learning_rate": 1.9622087005010902e-05, |
| "loss": 0.0263, |
| "step": 17340 |
| }, |
| { |
| "epoch": 17.868177136972193, |
| "grad_norm": 0.24109327793121338, |
| "learning_rate": 1.956739461267186e-05, |
| "loss": 0.0302, |
| "step": 17350 |
| }, |
| { |
| "epoch": 17.87847579814624, |
| "grad_norm": 0.2714434266090393, |
| "learning_rate": 1.9512759999212593e-05, |
| "loss": 0.0328, |
| "step": 17360 |
| }, |
| { |
| "epoch": 17.888774459320288, |
| "grad_norm": 0.18760699033737183, |
| "learning_rate": 1.945818326836151e-05, |
| "loss": 0.03, |
| "step": 17370 |
| }, |
| { |
| "epoch": 17.899073120494336, |
| "grad_norm": 0.19363541901111603, |
| "learning_rate": 1.940366452373718e-05, |
| "loss": 0.0256, |
| "step": 17380 |
| }, |
| { |
| "epoch": 17.909371781668384, |
| "grad_norm": 0.30826708674430847, |
| "learning_rate": 1.9349203868848077e-05, |
| "loss": 0.0314, |
| "step": 17390 |
| }, |
| { |
| "epoch": 17.91967044284243, |
| "grad_norm": 0.24862709641456604, |
| "learning_rate": 1.929480140709231e-05, |
| "loss": 0.0242, |
| "step": 17400 |
| }, |
| { |
| "epoch": 17.92996910401648, |
| "grad_norm": 0.2579440176486969, |
| "learning_rate": 1.9240457241757635e-05, |
| "loss": 0.0247, |
| "step": 17410 |
| }, |
| { |
| "epoch": 17.940267765190526, |
| "grad_norm": 0.2497265785932541, |
| "learning_rate": 1.9186171476021004e-05, |
| "loss": 0.0276, |
| "step": 17420 |
| }, |
| { |
| "epoch": 17.950566426364574, |
| "grad_norm": 0.2420155256986618, |
| "learning_rate": 1.9131944212948555e-05, |
| "loss": 0.029, |
| "step": 17430 |
| }, |
| { |
| "epoch": 17.96086508753862, |
| "grad_norm": 0.18265056610107422, |
| "learning_rate": 1.907777555549534e-05, |
| "loss": 0.0382, |
| "step": 17440 |
| }, |
| { |
| "epoch": 17.971163748712666, |
| "grad_norm": 0.2574191093444824, |
| "learning_rate": 1.9023665606505175e-05, |
| "loss": 0.0305, |
| "step": 17450 |
| }, |
| { |
| "epoch": 17.981462409886714, |
| "grad_norm": 0.3031885325908661, |
| "learning_rate": 1.8969614468710317e-05, |
| "loss": 0.022, |
| "step": 17460 |
| }, |
| { |
| "epoch": 17.99176107106076, |
| "grad_norm": 0.2563394010066986, |
| "learning_rate": 1.89156222447315e-05, |
| "loss": 0.0261, |
| "step": 17470 |
| }, |
| { |
| "epoch": 18.00205973223481, |
| "grad_norm": 0.23329629004001617, |
| "learning_rate": 1.8861689037077494e-05, |
| "loss": 0.0264, |
| "step": 17480 |
| }, |
| { |
| "epoch": 18.012358393408856, |
| "grad_norm": 0.24654603004455566, |
| "learning_rate": 1.880781494814508e-05, |
| "loss": 0.0289, |
| "step": 17490 |
| }, |
| { |
| "epoch": 18.022657054582904, |
| "grad_norm": 0.33546507358551025, |
| "learning_rate": 1.875400008021877e-05, |
| "loss": 0.0279, |
| "step": 17500 |
| }, |
| { |
| "epoch": 18.03295571575695, |
| "grad_norm": 0.28472423553466797, |
| "learning_rate": 1.8700244535470673e-05, |
| "loss": 0.0246, |
| "step": 17510 |
| }, |
| { |
| "epoch": 18.043254376931, |
| "grad_norm": 0.22415411472320557, |
| "learning_rate": 1.8646548415960197e-05, |
| "loss": 0.0262, |
| "step": 17520 |
| }, |
| { |
| "epoch": 18.053553038105047, |
| "grad_norm": 0.31146541237831116, |
| "learning_rate": 1.8592911823634034e-05, |
| "loss": 0.0316, |
| "step": 17530 |
| }, |
| { |
| "epoch": 18.063851699279095, |
| "grad_norm": 0.480960875749588, |
| "learning_rate": 1.8539334860325757e-05, |
| "loss": 0.0257, |
| "step": 17540 |
| }, |
| { |
| "epoch": 18.074150360453142, |
| "grad_norm": 0.23705746233463287, |
| "learning_rate": 1.8485817627755787e-05, |
| "loss": 0.0361, |
| "step": 17550 |
| }, |
| { |
| "epoch": 18.08444902162719, |
| "grad_norm": 0.2233453243970871, |
| "learning_rate": 1.8432360227531116e-05, |
| "loss": 0.0332, |
| "step": 17560 |
| }, |
| { |
| "epoch": 18.094747682801234, |
| "grad_norm": 0.19901172816753387, |
| "learning_rate": 1.837896276114517e-05, |
| "loss": 0.0271, |
| "step": 17570 |
| }, |
| { |
| "epoch": 18.105046343975282, |
| "grad_norm": 0.22876538336277008, |
| "learning_rate": 1.832562532997751e-05, |
| "loss": 0.0255, |
| "step": 17580 |
| }, |
| { |
| "epoch": 18.11534500514933, |
| "grad_norm": 0.33671191334724426, |
| "learning_rate": 1.827234803529384e-05, |
| "loss": 0.0312, |
| "step": 17590 |
| }, |
| { |
| "epoch": 18.125643666323377, |
| "grad_norm": 0.24380284547805786, |
| "learning_rate": 1.8219130978245563e-05, |
| "loss": 0.023, |
| "step": 17600 |
| }, |
| { |
| "epoch": 18.135942327497425, |
| "grad_norm": 0.26702800393104553, |
| "learning_rate": 1.816597425986979e-05, |
| "loss": 0.0239, |
| "step": 17610 |
| }, |
| { |
| "epoch": 18.146240988671472, |
| "grad_norm": 0.18801453709602356, |
| "learning_rate": 1.8112877981089055e-05, |
| "loss": 0.0291, |
| "step": 17620 |
| }, |
| { |
| "epoch": 18.15653964984552, |
| "grad_norm": 0.23468522727489471, |
| "learning_rate": 1.8059842242711156e-05, |
| "loss": 0.023, |
| "step": 17630 |
| }, |
| { |
| "epoch": 18.166838311019568, |
| "grad_norm": 0.23238340020179749, |
| "learning_rate": 1.800686714542889e-05, |
| "loss": 0.0267, |
| "step": 17640 |
| }, |
| { |
| "epoch": 18.177136972193615, |
| "grad_norm": 0.24705295264720917, |
| "learning_rate": 1.795395278982003e-05, |
| "loss": 0.0257, |
| "step": 17650 |
| }, |
| { |
| "epoch": 18.187435633367663, |
| "grad_norm": 0.21146944165229797, |
| "learning_rate": 1.7901099276346912e-05, |
| "loss": 0.0276, |
| "step": 17660 |
| }, |
| { |
| "epoch": 18.19773429454171, |
| "grad_norm": 0.28207865357398987, |
| "learning_rate": 1.7848306705356434e-05, |
| "loss": 0.0291, |
| "step": 17670 |
| }, |
| { |
| "epoch": 18.20803295571576, |
| "grad_norm": 0.2663029432296753, |
| "learning_rate": 1.7795575177079754e-05, |
| "loss": 0.0303, |
| "step": 17680 |
| }, |
| { |
| "epoch": 18.218331616889806, |
| "grad_norm": 0.2542501389980316, |
| "learning_rate": 1.7742904791632175e-05, |
| "loss": 0.0247, |
| "step": 17690 |
| }, |
| { |
| "epoch": 18.22863027806385, |
| "grad_norm": 0.2575390338897705, |
| "learning_rate": 1.769029564901282e-05, |
| "loss": 0.0256, |
| "step": 17700 |
| }, |
| { |
| "epoch": 18.238928939237898, |
| "grad_norm": 0.25764167308807373, |
| "learning_rate": 1.7637747849104692e-05, |
| "loss": 0.0308, |
| "step": 17710 |
| }, |
| { |
| "epoch": 18.249227600411945, |
| "grad_norm": 0.24213287234306335, |
| "learning_rate": 1.7585261491674175e-05, |
| "loss": 0.0277, |
| "step": 17720 |
| }, |
| { |
| "epoch": 18.259526261585993, |
| "grad_norm": 0.2765040099620819, |
| "learning_rate": 1.7532836676371083e-05, |
| "loss": 0.0284, |
| "step": 17730 |
| }, |
| { |
| "epoch": 18.26982492276004, |
| "grad_norm": 0.4227076768875122, |
| "learning_rate": 1.748047350272838e-05, |
| "loss": 0.0269, |
| "step": 17740 |
| }, |
| { |
| "epoch": 18.28012358393409, |
| "grad_norm": 0.30982914566993713, |
| "learning_rate": 1.7428172070161992e-05, |
| "loss": 0.027, |
| "step": 17750 |
| }, |
| { |
| "epoch": 18.290422245108136, |
| "grad_norm": 0.24797886610031128, |
| "learning_rate": 1.737593247797058e-05, |
| "loss": 0.0268, |
| "step": 17760 |
| }, |
| { |
| "epoch": 18.300720906282184, |
| "grad_norm": 0.25233253836631775, |
| "learning_rate": 1.7323754825335493e-05, |
| "loss": 0.0259, |
| "step": 17770 |
| }, |
| { |
| "epoch": 18.31101956745623, |
| "grad_norm": 0.18777655065059662, |
| "learning_rate": 1.72716392113204e-05, |
| "loss": 0.0306, |
| "step": 17780 |
| }, |
| { |
| "epoch": 18.32131822863028, |
| "grad_norm": 0.3482552766799927, |
| "learning_rate": 1.7219585734871175e-05, |
| "loss": 0.0283, |
| "step": 17790 |
| }, |
| { |
| "epoch": 18.331616889804327, |
| "grad_norm": 0.19487865269184113, |
| "learning_rate": 1.716759449481582e-05, |
| "loss": 0.0311, |
| "step": 17800 |
| }, |
| { |
| "epoch": 18.341915550978374, |
| "grad_norm": 0.5890926718711853, |
| "learning_rate": 1.7115665589864055e-05, |
| "loss": 0.0369, |
| "step": 17810 |
| }, |
| { |
| "epoch": 18.352214212152422, |
| "grad_norm": 0.27247244119644165, |
| "learning_rate": 1.706379911860732e-05, |
| "loss": 0.0257, |
| "step": 17820 |
| }, |
| { |
| "epoch": 18.362512873326466, |
| "grad_norm": 0.19774708151817322, |
| "learning_rate": 1.701199517951852e-05, |
| "loss": 0.0277, |
| "step": 17830 |
| }, |
| { |
| "epoch": 18.372811534500514, |
| "grad_norm": 0.21970653533935547, |
| "learning_rate": 1.6960253870951825e-05, |
| "loss": 0.0269, |
| "step": 17840 |
| }, |
| { |
| "epoch": 18.38311019567456, |
| "grad_norm": 0.25846680998802185, |
| "learning_rate": 1.6908575291142447e-05, |
| "loss": 0.0275, |
| "step": 17850 |
| }, |
| { |
| "epoch": 18.39340885684861, |
| "grad_norm": 0.16934412717819214, |
| "learning_rate": 1.6856959538206618e-05, |
| "loss": 0.0325, |
| "step": 17860 |
| }, |
| { |
| "epoch": 18.403707518022657, |
| "grad_norm": 0.34561848640441895, |
| "learning_rate": 1.6805406710141164e-05, |
| "loss": 0.0248, |
| "step": 17870 |
| }, |
| { |
| "epoch": 18.414006179196704, |
| "grad_norm": 0.21684630215168, |
| "learning_rate": 1.6753916904823518e-05, |
| "loss": 0.0273, |
| "step": 17880 |
| }, |
| { |
| "epoch": 18.424304840370752, |
| "grad_norm": 0.27160486578941345, |
| "learning_rate": 1.670249022001143e-05, |
| "loss": 0.0222, |
| "step": 17890 |
| }, |
| { |
| "epoch": 18.4346035015448, |
| "grad_norm": 0.24969789385795593, |
| "learning_rate": 1.6651126753342845e-05, |
| "loss": 0.0259, |
| "step": 17900 |
| }, |
| { |
| "epoch": 18.444902162718847, |
| "grad_norm": 0.28738731145858765, |
| "learning_rate": 1.65998266023356e-05, |
| "loss": 0.0292, |
| "step": 17910 |
| }, |
| { |
| "epoch": 18.455200823892895, |
| "grad_norm": 0.424589067697525, |
| "learning_rate": 1.654858986438745e-05, |
| "loss": 0.0318, |
| "step": 17920 |
| }, |
| { |
| "epoch": 18.465499485066942, |
| "grad_norm": 0.3065814971923828, |
| "learning_rate": 1.6497416636775625e-05, |
| "loss": 0.0357, |
| "step": 17930 |
| }, |
| { |
| "epoch": 18.47579814624099, |
| "grad_norm": 0.19018980860710144, |
| "learning_rate": 1.644630701665686e-05, |
| "loss": 0.0299, |
| "step": 17940 |
| }, |
| { |
| "epoch": 18.486096807415038, |
| "grad_norm": 0.29618680477142334, |
| "learning_rate": 1.6395261101067082e-05, |
| "loss": 0.0288, |
| "step": 17950 |
| }, |
| { |
| "epoch": 18.496395468589082, |
| "grad_norm": 0.18861742317676544, |
| "learning_rate": 1.6344278986921325e-05, |
| "loss": 0.0222, |
| "step": 17960 |
| }, |
| { |
| "epoch": 18.50669412976313, |
| "grad_norm": 0.25318944454193115, |
| "learning_rate": 1.629336077101339e-05, |
| "loss": 0.0256, |
| "step": 17970 |
| }, |
| { |
| "epoch": 18.516992790937177, |
| "grad_norm": 0.1487075686454773, |
| "learning_rate": 1.6242506550015896e-05, |
| "loss": 0.032, |
| "step": 17980 |
| }, |
| { |
| "epoch": 18.527291452111225, |
| "grad_norm": 0.1627105176448822, |
| "learning_rate": 1.619171642047983e-05, |
| "loss": 0.027, |
| "step": 17990 |
| }, |
| { |
| "epoch": 18.537590113285273, |
| "grad_norm": 0.21512849628925323, |
| "learning_rate": 1.6140990478834582e-05, |
| "loss": 0.0288, |
| "step": 18000 |
| }, |
| { |
| "epoch": 18.54788877445932, |
| "grad_norm": 0.2389346957206726, |
| "learning_rate": 1.609032882138765e-05, |
| "loss": 0.0242, |
| "step": 18010 |
| }, |
| { |
| "epoch": 18.558187435633368, |
| "grad_norm": 0.23594819009304047, |
| "learning_rate": 1.60397315443245e-05, |
| "loss": 0.0335, |
| "step": 18020 |
| }, |
| { |
| "epoch": 18.568486096807415, |
| "grad_norm": 0.31918737292289734, |
| "learning_rate": 1.5989198743708294e-05, |
| "loss": 0.0272, |
| "step": 18030 |
| }, |
| { |
| "epoch": 18.578784757981463, |
| "grad_norm": 0.40922242403030396, |
| "learning_rate": 1.5938730515479904e-05, |
| "loss": 0.0317, |
| "step": 18040 |
| }, |
| { |
| "epoch": 18.58908341915551, |
| "grad_norm": 0.506949782371521, |
| "learning_rate": 1.5888326955457487e-05, |
| "loss": 0.0348, |
| "step": 18050 |
| }, |
| { |
| "epoch": 18.59938208032956, |
| "grad_norm": 1.7606223821640015, |
| "learning_rate": 1.5837988159336493e-05, |
| "loss": 0.0329, |
| "step": 18060 |
| }, |
| { |
| "epoch": 18.609680741503606, |
| "grad_norm": 0.25751596689224243, |
| "learning_rate": 1.5787714222689386e-05, |
| "loss": 0.032, |
| "step": 18070 |
| }, |
| { |
| "epoch": 18.61997940267765, |
| "grad_norm": 0.22682945430278778, |
| "learning_rate": 1.5737505240965515e-05, |
| "loss": 0.0267, |
| "step": 18080 |
| }, |
| { |
| "epoch": 18.630278063851698, |
| "grad_norm": 0.32027962803840637, |
| "learning_rate": 1.5687361309490838e-05, |
| "loss": 0.0284, |
| "step": 18090 |
| }, |
| { |
| "epoch": 18.640576725025745, |
| "grad_norm": 0.18152746558189392, |
| "learning_rate": 1.5637282523467918e-05, |
| "loss": 0.0235, |
| "step": 18100 |
| }, |
| { |
| "epoch": 18.650875386199793, |
| "grad_norm": 0.30700448155403137, |
| "learning_rate": 1.5587268977975528e-05, |
| "loss": 0.0272, |
| "step": 18110 |
| }, |
| { |
| "epoch": 18.66117404737384, |
| "grad_norm": 0.2509545385837555, |
| "learning_rate": 1.553732076796863e-05, |
| "loss": 0.0256, |
| "step": 18120 |
| }, |
| { |
| "epoch": 18.67147270854789, |
| "grad_norm": 0.5025020241737366, |
| "learning_rate": 1.5487437988278142e-05, |
| "loss": 0.0252, |
| "step": 18130 |
| }, |
| { |
| "epoch": 18.681771369721936, |
| "grad_norm": 0.5062695145606995, |
| "learning_rate": 1.5437620733610757e-05, |
| "loss": 0.0241, |
| "step": 18140 |
| }, |
| { |
| "epoch": 18.692070030895984, |
| "grad_norm": 0.22402432560920715, |
| "learning_rate": 1.5387869098548713e-05, |
| "loss": 0.0276, |
| "step": 18150 |
| }, |
| { |
| "epoch": 18.70236869207003, |
| "grad_norm": 0.2398539036512375, |
| "learning_rate": 1.5338183177549763e-05, |
| "loss": 0.0277, |
| "step": 18160 |
| }, |
| { |
| "epoch": 18.71266735324408, |
| "grad_norm": 0.7876859903335571, |
| "learning_rate": 1.5288563064946793e-05, |
| "loss": 0.0251, |
| "step": 18170 |
| }, |
| { |
| "epoch": 18.722966014418127, |
| "grad_norm": 0.35239315032958984, |
| "learning_rate": 1.52390088549478e-05, |
| "loss": 0.0242, |
| "step": 18180 |
| }, |
| { |
| "epoch": 18.733264675592174, |
| "grad_norm": 0.2633828818798065, |
| "learning_rate": 1.5189520641635674e-05, |
| "loss": 0.0258, |
| "step": 18190 |
| }, |
| { |
| "epoch": 18.743563336766222, |
| "grad_norm": 0.28488120436668396, |
| "learning_rate": 1.514009851896795e-05, |
| "loss": 0.0273, |
| "step": 18200 |
| }, |
| { |
| "epoch": 18.753861997940266, |
| "grad_norm": 0.1860319823026657, |
| "learning_rate": 1.5090742580776723e-05, |
| "loss": 0.0227, |
| "step": 18210 |
| }, |
| { |
| "epoch": 18.764160659114314, |
| "grad_norm": 0.2818928062915802, |
| "learning_rate": 1.5041452920768423e-05, |
| "loss": 0.026, |
| "step": 18220 |
| }, |
| { |
| "epoch": 18.77445932028836, |
| "grad_norm": 0.9211439490318298, |
| "learning_rate": 1.4992229632523657e-05, |
| "loss": 0.0315, |
| "step": 18230 |
| }, |
| { |
| "epoch": 18.78475798146241, |
| "grad_norm": 0.29785358905792236, |
| "learning_rate": 1.4943072809497e-05, |
| "loss": 0.0287, |
| "step": 18240 |
| }, |
| { |
| "epoch": 18.795056642636457, |
| "grad_norm": 0.4559323489665985, |
| "learning_rate": 1.4893982545016866e-05, |
| "loss": 0.0257, |
| "step": 18250 |
| }, |
| { |
| "epoch": 18.805355303810504, |
| "grad_norm": 0.31199562549591064, |
| "learning_rate": 1.484495893228524e-05, |
| "loss": 0.0273, |
| "step": 18260 |
| }, |
| { |
| "epoch": 18.815653964984552, |
| "grad_norm": 0.23612628877162933, |
| "learning_rate": 1.4796002064377629e-05, |
| "loss": 0.0284, |
| "step": 18270 |
| }, |
| { |
| "epoch": 18.8259526261586, |
| "grad_norm": 0.3867475688457489, |
| "learning_rate": 1.4747112034242794e-05, |
| "loss": 0.0295, |
| "step": 18280 |
| }, |
| { |
| "epoch": 18.836251287332647, |
| "grad_norm": 0.2292865663766861, |
| "learning_rate": 1.4698288934702597e-05, |
| "loss": 0.0262, |
| "step": 18290 |
| }, |
| { |
| "epoch": 18.846549948506695, |
| "grad_norm": 0.2770872414112091, |
| "learning_rate": 1.4649532858451826e-05, |
| "loss": 0.0294, |
| "step": 18300 |
| }, |
| { |
| "epoch": 18.856848609680743, |
| "grad_norm": 0.2201216071844101, |
| "learning_rate": 1.4600843898058048e-05, |
| "loss": 0.0302, |
| "step": 18310 |
| }, |
| { |
| "epoch": 18.86714727085479, |
| "grad_norm": 0.26552289724349976, |
| "learning_rate": 1.4552222145961325e-05, |
| "loss": 0.0265, |
| "step": 18320 |
| }, |
| { |
| "epoch": 18.877445932028838, |
| "grad_norm": 0.22724245488643646, |
| "learning_rate": 1.4503667694474232e-05, |
| "loss": 0.0187, |
| "step": 18330 |
| }, |
| { |
| "epoch": 18.887744593202882, |
| "grad_norm": 0.18195529282093048, |
| "learning_rate": 1.4455180635781474e-05, |
| "loss": 0.0234, |
| "step": 18340 |
| }, |
| { |
| "epoch": 18.89804325437693, |
| "grad_norm": 0.2183389961719513, |
| "learning_rate": 1.4406761061939844e-05, |
| "loss": 0.0276, |
| "step": 18350 |
| }, |
| { |
| "epoch": 18.908341915550977, |
| "grad_norm": 0.3725976347923279, |
| "learning_rate": 1.4358409064878015e-05, |
| "loss": 0.0257, |
| "step": 18360 |
| }, |
| { |
| "epoch": 18.918640576725025, |
| "grad_norm": 0.30146896839141846, |
| "learning_rate": 1.4310124736396358e-05, |
| "loss": 0.0301, |
| "step": 18370 |
| }, |
| { |
| "epoch": 18.928939237899073, |
| "grad_norm": 0.21054169535636902, |
| "learning_rate": 1.4261908168166716e-05, |
| "loss": 0.0256, |
| "step": 18380 |
| }, |
| { |
| "epoch": 18.93923789907312, |
| "grad_norm": 0.22048494219779968, |
| "learning_rate": 1.4213759451732395e-05, |
| "loss": 0.0275, |
| "step": 18390 |
| }, |
| { |
| "epoch": 18.949536560247168, |
| "grad_norm": 0.15067796409130096, |
| "learning_rate": 1.416567867850776e-05, |
| "loss": 0.0282, |
| "step": 18400 |
| }, |
| { |
| "epoch": 18.959835221421216, |
| "grad_norm": 0.21609516441822052, |
| "learning_rate": 1.4117665939778257e-05, |
| "loss": 0.0299, |
| "step": 18410 |
| }, |
| { |
| "epoch": 18.970133882595263, |
| "grad_norm": 0.2782236933708191, |
| "learning_rate": 1.4069721326700131e-05, |
| "loss": 0.0291, |
| "step": 18420 |
| }, |
| { |
| "epoch": 18.98043254376931, |
| "grad_norm": 0.22176700830459595, |
| "learning_rate": 1.4021844930300315e-05, |
| "loss": 0.0305, |
| "step": 18430 |
| }, |
| { |
| "epoch": 18.99073120494336, |
| "grad_norm": 0.2350529432296753, |
| "learning_rate": 1.3974036841476146e-05, |
| "loss": 0.0236, |
| "step": 18440 |
| }, |
| { |
| "epoch": 19.001029866117406, |
| "grad_norm": 0.23732732236385345, |
| "learning_rate": 1.3926297150995404e-05, |
| "loss": 0.0271, |
| "step": 18450 |
| }, |
| { |
| "epoch": 19.011328527291454, |
| "grad_norm": 0.21267026662826538, |
| "learning_rate": 1.3878625949495883e-05, |
| "loss": 0.0247, |
| "step": 18460 |
| }, |
| { |
| "epoch": 19.021627188465498, |
| "grad_norm": 0.21228396892547607, |
| "learning_rate": 1.3831023327485416e-05, |
| "loss": 0.027, |
| "step": 18470 |
| }, |
| { |
| "epoch": 19.031925849639546, |
| "grad_norm": 0.3779662847518921, |
| "learning_rate": 1.3783489375341613e-05, |
| "loss": 0.0225, |
| "step": 18480 |
| }, |
| { |
| "epoch": 19.042224510813593, |
| "grad_norm": 0.19540618360042572, |
| "learning_rate": 1.373602418331173e-05, |
| "loss": 0.0201, |
| "step": 18490 |
| }, |
| { |
| "epoch": 19.05252317198764, |
| "grad_norm": 0.1852959394454956, |
| "learning_rate": 1.3688627841512402e-05, |
| "loss": 0.0285, |
| "step": 18500 |
| }, |
| { |
| "epoch": 19.06282183316169, |
| "grad_norm": 0.12700384855270386, |
| "learning_rate": 1.3641300439929666e-05, |
| "loss": 0.0256, |
| "step": 18510 |
| }, |
| { |
| "epoch": 19.073120494335736, |
| "grad_norm": 0.2065911442041397, |
| "learning_rate": 1.3594042068418555e-05, |
| "loss": 0.0238, |
| "step": 18520 |
| }, |
| { |
| "epoch": 19.083419155509784, |
| "grad_norm": 0.3038773238658905, |
| "learning_rate": 1.3546852816703109e-05, |
| "loss": 0.0303, |
| "step": 18530 |
| }, |
| { |
| "epoch": 19.09371781668383, |
| "grad_norm": 0.21155600249767303, |
| "learning_rate": 1.3499732774376118e-05, |
| "loss": 0.0243, |
| "step": 18540 |
| }, |
| { |
| "epoch": 19.10401647785788, |
| "grad_norm": 0.15863338112831116, |
| "learning_rate": 1.345268203089899e-05, |
| "loss": 0.0316, |
| "step": 18550 |
| }, |
| { |
| "epoch": 19.114315139031927, |
| "grad_norm": 0.20425492525100708, |
| "learning_rate": 1.3405700675601506e-05, |
| "loss": 0.0298, |
| "step": 18560 |
| }, |
| { |
| "epoch": 19.124613800205974, |
| "grad_norm": 0.3093377351760864, |
| "learning_rate": 1.3358788797681805e-05, |
| "loss": 0.0296, |
| "step": 18570 |
| }, |
| { |
| "epoch": 19.134912461380022, |
| "grad_norm": 0.3627874255180359, |
| "learning_rate": 1.3311946486206022e-05, |
| "loss": 0.0285, |
| "step": 18580 |
| }, |
| { |
| "epoch": 19.145211122554066, |
| "grad_norm": 0.5787222385406494, |
| "learning_rate": 1.326517383010827e-05, |
| "loss": 0.0274, |
| "step": 18590 |
| }, |
| { |
| "epoch": 19.155509783728114, |
| "grad_norm": 0.16981856524944305, |
| "learning_rate": 1.3218470918190401e-05, |
| "loss": 0.025, |
| "step": 18600 |
| }, |
| { |
| "epoch": 19.16580844490216, |
| "grad_norm": 0.1558324694633484, |
| "learning_rate": 1.3171837839121837e-05, |
| "loss": 0.0308, |
| "step": 18610 |
| }, |
| { |
| "epoch": 19.17610710607621, |
| "grad_norm": 0.19524988532066345, |
| "learning_rate": 1.3125274681439436e-05, |
| "loss": 0.0236, |
| "step": 18620 |
| }, |
| { |
| "epoch": 19.186405767250257, |
| "grad_norm": 0.4640062749385834, |
| "learning_rate": 1.3078781533547303e-05, |
| "loss": 0.0299, |
| "step": 18630 |
| }, |
| { |
| "epoch": 19.196704428424304, |
| "grad_norm": 0.26007869839668274, |
| "learning_rate": 1.3032358483716622e-05, |
| "loss": 0.032, |
| "step": 18640 |
| }, |
| { |
| "epoch": 19.207003089598352, |
| "grad_norm": 0.679685115814209, |
| "learning_rate": 1.2986005620085456e-05, |
| "loss": 0.0223, |
| "step": 18650 |
| }, |
| { |
| "epoch": 19.2173017507724, |
| "grad_norm": 0.1843012571334839, |
| "learning_rate": 1.2939723030658695e-05, |
| "loss": 0.0266, |
| "step": 18660 |
| }, |
| { |
| "epoch": 19.227600411946447, |
| "grad_norm": 0.18425314128398895, |
| "learning_rate": 1.2893510803307718e-05, |
| "loss": 0.0246, |
| "step": 18670 |
| }, |
| { |
| "epoch": 19.237899073120495, |
| "grad_norm": 0.26258182525634766, |
| "learning_rate": 1.2847369025770361e-05, |
| "loss": 0.032, |
| "step": 18680 |
| }, |
| { |
| "epoch": 19.248197734294543, |
| "grad_norm": 0.22664831578731537, |
| "learning_rate": 1.2801297785650706e-05, |
| "loss": 0.0211, |
| "step": 18690 |
| }, |
| { |
| "epoch": 19.25849639546859, |
| "grad_norm": 0.24430438876152039, |
| "learning_rate": 1.2755297170418912e-05, |
| "loss": 0.0284, |
| "step": 18700 |
| }, |
| { |
| "epoch": 19.268795056642638, |
| "grad_norm": 0.24674183130264282, |
| "learning_rate": 1.2709367267411004e-05, |
| "loss": 0.0299, |
| "step": 18710 |
| }, |
| { |
| "epoch": 19.279093717816682, |
| "grad_norm": 0.24077735841274261, |
| "learning_rate": 1.2663508163828857e-05, |
| "loss": 0.0316, |
| "step": 18720 |
| }, |
| { |
| "epoch": 19.28939237899073, |
| "grad_norm": 0.2905300259590149, |
| "learning_rate": 1.2617719946739814e-05, |
| "loss": 0.0245, |
| "step": 18730 |
| }, |
| { |
| "epoch": 19.299691040164777, |
| "grad_norm": 0.2111150026321411, |
| "learning_rate": 1.2572002703076708e-05, |
| "loss": 0.0232, |
| "step": 18740 |
| }, |
| { |
| "epoch": 19.309989701338825, |
| "grad_norm": 0.28864920139312744, |
| "learning_rate": 1.2526356519637588e-05, |
| "loss": 0.024, |
| "step": 18750 |
| }, |
| { |
| "epoch": 19.320288362512873, |
| "grad_norm": 0.1956787258386612, |
| "learning_rate": 1.248078148308563e-05, |
| "loss": 0.0257, |
| "step": 18760 |
| }, |
| { |
| "epoch": 19.33058702368692, |
| "grad_norm": 0.44175270199775696, |
| "learning_rate": 1.2435277679948842e-05, |
| "loss": 0.0242, |
| "step": 18770 |
| }, |
| { |
| "epoch": 19.340885684860968, |
| "grad_norm": 0.18450774252414703, |
| "learning_rate": 1.2389845196620121e-05, |
| "loss": 0.0257, |
| "step": 18780 |
| }, |
| { |
| "epoch": 19.351184346035016, |
| "grad_norm": 0.17851541936397552, |
| "learning_rate": 1.234448411935683e-05, |
| "loss": 0.0289, |
| "step": 18790 |
| }, |
| { |
| "epoch": 19.361483007209063, |
| "grad_norm": 0.3439672589302063, |
| "learning_rate": 1.2299194534280844e-05, |
| "loss": 0.0221, |
| "step": 18800 |
| }, |
| { |
| "epoch": 19.37178166838311, |
| "grad_norm": 0.2443213164806366, |
| "learning_rate": 1.2253976527378274e-05, |
| "loss": 0.0242, |
| "step": 18810 |
| }, |
| { |
| "epoch": 19.38208032955716, |
| "grad_norm": 0.23600997030735016, |
| "learning_rate": 1.2208830184499347e-05, |
| "loss": 0.0305, |
| "step": 18820 |
| }, |
| { |
| "epoch": 19.392378990731206, |
| "grad_norm": 0.3016259968280792, |
| "learning_rate": 1.2163755591358184e-05, |
| "loss": 0.0273, |
| "step": 18830 |
| }, |
| { |
| "epoch": 19.402677651905254, |
| "grad_norm": 0.306363970041275, |
| "learning_rate": 1.211875283353277e-05, |
| "loss": 0.0259, |
| "step": 18840 |
| }, |
| { |
| "epoch": 19.412976313079298, |
| "grad_norm": 0.23701894283294678, |
| "learning_rate": 1.2073821996464613e-05, |
| "loss": 0.0271, |
| "step": 18850 |
| }, |
| { |
| "epoch": 19.423274974253346, |
| "grad_norm": 0.3945840895175934, |
| "learning_rate": 1.2028963165458728e-05, |
| "loss": 0.022, |
| "step": 18860 |
| }, |
| { |
| "epoch": 19.433573635427393, |
| "grad_norm": 0.2171708345413208, |
| "learning_rate": 1.1984176425683408e-05, |
| "loss": 0.0234, |
| "step": 18870 |
| }, |
| { |
| "epoch": 19.44387229660144, |
| "grad_norm": 0.252113938331604, |
| "learning_rate": 1.1939461862170086e-05, |
| "loss": 0.0245, |
| "step": 18880 |
| }, |
| { |
| "epoch": 19.45417095777549, |
| "grad_norm": 0.5461235046386719, |
| "learning_rate": 1.1894819559813108e-05, |
| "loss": 0.0247, |
| "step": 18890 |
| }, |
| { |
| "epoch": 19.464469618949536, |
| "grad_norm": 0.23864491283893585, |
| "learning_rate": 1.1850249603369723e-05, |
| "loss": 0.024, |
| "step": 18900 |
| }, |
| { |
| "epoch": 19.474768280123584, |
| "grad_norm": 0.21564757823944092, |
| "learning_rate": 1.1805752077459725e-05, |
| "loss": 0.0308, |
| "step": 18910 |
| }, |
| { |
| "epoch": 19.48506694129763, |
| "grad_norm": 0.31438037753105164, |
| "learning_rate": 1.1761327066565452e-05, |
| "loss": 0.0282, |
| "step": 18920 |
| }, |
| { |
| "epoch": 19.49536560247168, |
| "grad_norm": 0.20203201472759247, |
| "learning_rate": 1.1716974655031554e-05, |
| "loss": 0.0246, |
| "step": 18930 |
| }, |
| { |
| "epoch": 19.505664263645727, |
| "grad_norm": 0.19842277467250824, |
| "learning_rate": 1.1672694927064858e-05, |
| "loss": 0.026, |
| "step": 18940 |
| }, |
| { |
| "epoch": 19.515962924819775, |
| "grad_norm": 0.23037093877792358, |
| "learning_rate": 1.162848796673413e-05, |
| "loss": 0.0327, |
| "step": 18950 |
| }, |
| { |
| "epoch": 19.526261585993822, |
| "grad_norm": 0.366129070520401, |
| "learning_rate": 1.1584353857970088e-05, |
| "loss": 0.0264, |
| "step": 18960 |
| }, |
| { |
| "epoch": 19.53656024716787, |
| "grad_norm": 0.31818097829818726, |
| "learning_rate": 1.154029268456504e-05, |
| "loss": 0.0243, |
| "step": 18970 |
| }, |
| { |
| "epoch": 19.546858908341914, |
| "grad_norm": 0.5746592879295349, |
| "learning_rate": 1.1496304530172863e-05, |
| "loss": 0.0235, |
| "step": 18980 |
| }, |
| { |
| "epoch": 19.55715756951596, |
| "grad_norm": 0.26987478137016296, |
| "learning_rate": 1.1452389478308806e-05, |
| "loss": 0.0215, |
| "step": 18990 |
| }, |
| { |
| "epoch": 19.56745623069001, |
| "grad_norm": 0.2493455708026886, |
| "learning_rate": 1.1408547612349318e-05, |
| "loss": 0.0239, |
| "step": 19000 |
| }, |
| { |
| "epoch": 19.577754891864057, |
| "grad_norm": 0.21429450809955597, |
| "learning_rate": 1.1364779015531873e-05, |
| "loss": 0.0206, |
| "step": 19010 |
| }, |
| { |
| "epoch": 19.588053553038105, |
| "grad_norm": 0.26744788885116577, |
| "learning_rate": 1.1321083770954871e-05, |
| "loss": 0.0274, |
| "step": 19020 |
| }, |
| { |
| "epoch": 19.598352214212152, |
| "grad_norm": 0.28107255697250366, |
| "learning_rate": 1.1277461961577446e-05, |
| "loss": 0.0308, |
| "step": 19030 |
| }, |
| { |
| "epoch": 19.6086508753862, |
| "grad_norm": 0.22350192070007324, |
| "learning_rate": 1.1233913670219287e-05, |
| "loss": 0.029, |
| "step": 19040 |
| }, |
| { |
| "epoch": 19.618949536560248, |
| "grad_norm": 0.39531412720680237, |
| "learning_rate": 1.1190438979560536e-05, |
| "loss": 0.0351, |
| "step": 19050 |
| }, |
| { |
| "epoch": 19.629248197734295, |
| "grad_norm": 0.19649939239025116, |
| "learning_rate": 1.1147037972141545e-05, |
| "loss": 0.0242, |
| "step": 19060 |
| }, |
| { |
| "epoch": 19.639546858908343, |
| "grad_norm": 0.1779136210680008, |
| "learning_rate": 1.1103710730362821e-05, |
| "loss": 0.0259, |
| "step": 19070 |
| }, |
| { |
| "epoch": 19.64984552008239, |
| "grad_norm": 0.2700199782848358, |
| "learning_rate": 1.1060457336484803e-05, |
| "loss": 0.0237, |
| "step": 19080 |
| }, |
| { |
| "epoch": 19.660144181256438, |
| "grad_norm": 0.19958068430423737, |
| "learning_rate": 1.1017277872627719e-05, |
| "loss": 0.0278, |
| "step": 19090 |
| }, |
| { |
| "epoch": 19.670442842430482, |
| "grad_norm": 0.24603743851184845, |
| "learning_rate": 1.0974172420771444e-05, |
| "loss": 0.0255, |
| "step": 19100 |
| }, |
| { |
| "epoch": 19.68074150360453, |
| "grad_norm": 0.25125035643577576, |
| "learning_rate": 1.0931141062755346e-05, |
| "loss": 0.0225, |
| "step": 19110 |
| }, |
| { |
| "epoch": 19.691040164778578, |
| "grad_norm": 0.24162299931049347, |
| "learning_rate": 1.0888183880278074e-05, |
| "loss": 0.0235, |
| "step": 19120 |
| }, |
| { |
| "epoch": 19.701338825952625, |
| "grad_norm": 0.23117221891880035, |
| "learning_rate": 1.0845300954897492e-05, |
| "loss": 0.0251, |
| "step": 19130 |
| }, |
| { |
| "epoch": 19.711637487126673, |
| "grad_norm": 0.4146316945552826, |
| "learning_rate": 1.0802492368030471e-05, |
| "loss": 0.0257, |
| "step": 19140 |
| }, |
| { |
| "epoch": 19.72193614830072, |
| "grad_norm": 0.3388553261756897, |
| "learning_rate": 1.0759758200952729e-05, |
| "loss": 0.0265, |
| "step": 19150 |
| }, |
| { |
| "epoch": 19.732234809474768, |
| "grad_norm": 0.3048183023929596, |
| "learning_rate": 1.0717098534798714e-05, |
| "loss": 0.0277, |
| "step": 19160 |
| }, |
| { |
| "epoch": 19.742533470648816, |
| "grad_norm": 0.7091302871704102, |
| "learning_rate": 1.0674513450561429e-05, |
| "loss": 0.0245, |
| "step": 19170 |
| }, |
| { |
| "epoch": 19.752832131822863, |
| "grad_norm": 0.2630675435066223, |
| "learning_rate": 1.0632003029092235e-05, |
| "loss": 0.032, |
| "step": 19180 |
| }, |
| { |
| "epoch": 19.76313079299691, |
| "grad_norm": 0.2844875752925873, |
| "learning_rate": 1.0589567351100782e-05, |
| "loss": 0.0303, |
| "step": 19190 |
| }, |
| { |
| "epoch": 19.77342945417096, |
| "grad_norm": 0.24576683342456818, |
| "learning_rate": 1.0547206497154798e-05, |
| "loss": 0.0244, |
| "step": 19200 |
| }, |
| { |
| "epoch": 19.783728115345006, |
| "grad_norm": 0.19090470671653748, |
| "learning_rate": 1.0504920547679958e-05, |
| "loss": 0.0283, |
| "step": 19210 |
| }, |
| { |
| "epoch": 19.794026776519054, |
| "grad_norm": 0.35600224137306213, |
| "learning_rate": 1.0462709582959718e-05, |
| "loss": 0.0212, |
| "step": 19220 |
| }, |
| { |
| "epoch": 19.8043254376931, |
| "grad_norm": 0.31343191862106323, |
| "learning_rate": 1.0420573683135187e-05, |
| "loss": 0.0216, |
| "step": 19230 |
| }, |
| { |
| "epoch": 19.814624098867146, |
| "grad_norm": 0.2817479074001312, |
| "learning_rate": 1.037851292820491e-05, |
| "loss": 0.0294, |
| "step": 19240 |
| }, |
| { |
| "epoch": 19.824922760041193, |
| "grad_norm": 0.24477481842041016, |
| "learning_rate": 1.0336527398024804e-05, |
| "loss": 0.0187, |
| "step": 19250 |
| }, |
| { |
| "epoch": 19.83522142121524, |
| "grad_norm": 0.2589147388935089, |
| "learning_rate": 1.0294617172307963e-05, |
| "loss": 0.0262, |
| "step": 19260 |
| }, |
| { |
| "epoch": 19.84552008238929, |
| "grad_norm": 0.5249900817871094, |
| "learning_rate": 1.02527823306245e-05, |
| "loss": 0.0271, |
| "step": 19270 |
| }, |
| { |
| "epoch": 19.855818743563336, |
| "grad_norm": 0.24145112931728363, |
| "learning_rate": 1.02110229524014e-05, |
| "loss": 0.0231, |
| "step": 19280 |
| }, |
| { |
| "epoch": 19.866117404737384, |
| "grad_norm": 0.28619223833084106, |
| "learning_rate": 1.01693391169224e-05, |
| "loss": 0.0278, |
| "step": 19290 |
| }, |
| { |
| "epoch": 19.87641606591143, |
| "grad_norm": 0.23199136555194855, |
| "learning_rate": 1.0127730903327765e-05, |
| "loss": 0.0277, |
| "step": 19300 |
| }, |
| { |
| "epoch": 19.88671472708548, |
| "grad_norm": 0.3432420492172241, |
| "learning_rate": 1.0086198390614227e-05, |
| "loss": 0.0266, |
| "step": 19310 |
| }, |
| { |
| "epoch": 19.897013388259527, |
| "grad_norm": 0.21095818281173706, |
| "learning_rate": 1.0044741657634782e-05, |
| "loss": 0.024, |
| "step": 19320 |
| }, |
| { |
| "epoch": 19.907312049433575, |
| "grad_norm": 0.1991989016532898, |
| "learning_rate": 1.0003360783098548e-05, |
| "loss": 0.0224, |
| "step": 19330 |
| }, |
| { |
| "epoch": 19.917610710607622, |
| "grad_norm": 0.2295091152191162, |
| "learning_rate": 9.962055845570622e-06, |
| "loss": 0.0277, |
| "step": 19340 |
| }, |
| { |
| "epoch": 19.92790937178167, |
| "grad_norm": 0.39027097821235657, |
| "learning_rate": 9.920826923471943e-06, |
| "loss": 0.0215, |
| "step": 19350 |
| }, |
| { |
| "epoch": 19.938208032955714, |
| "grad_norm": 0.2114678919315338, |
| "learning_rate": 9.879674095079083e-06, |
| "loss": 0.0292, |
| "step": 19360 |
| }, |
| { |
| "epoch": 19.94850669412976, |
| "grad_norm": 0.22323083877563477, |
| "learning_rate": 9.838597438524182e-06, |
| "loss": 0.0236, |
| "step": 19370 |
| }, |
| { |
| "epoch": 19.95880535530381, |
| "grad_norm": 0.5763716101646423, |
| "learning_rate": 9.797597031794763e-06, |
| "loss": 0.0275, |
| "step": 19380 |
| }, |
| { |
| "epoch": 19.969104016477857, |
| "grad_norm": 0.2158919721841812, |
| "learning_rate": 9.75667295273357e-06, |
| "loss": 0.0299, |
| "step": 19390 |
| }, |
| { |
| "epoch": 19.979402677651905, |
| "grad_norm": 0.25896155834198, |
| "learning_rate": 9.715825279038433e-06, |
| "loss": 0.0226, |
| "step": 19400 |
| }, |
| { |
| "epoch": 19.989701338825952, |
| "grad_norm": 0.19433017075061798, |
| "learning_rate": 9.675054088262125e-06, |
| "loss": 0.0188, |
| "step": 19410 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 0.4307090640068054, |
| "learning_rate": 9.634359457812192e-06, |
| "loss": 0.0287, |
| "step": 19420 |
| }, |
| { |
| "epoch": 20.010298661174048, |
| "grad_norm": 0.2535577714443207, |
| "learning_rate": 9.59374146495085e-06, |
| "loss": 0.0283, |
| "step": 19430 |
| }, |
| { |
| "epoch": 20.020597322348095, |
| "grad_norm": 0.2701593041419983, |
| "learning_rate": 9.553200186794809e-06, |
| "loss": 0.0249, |
| "step": 19440 |
| }, |
| { |
| "epoch": 20.030895983522143, |
| "grad_norm": 0.42833298444747925, |
| "learning_rate": 9.51273570031508e-06, |
| "loss": 0.0305, |
| "step": 19450 |
| }, |
| { |
| "epoch": 20.04119464469619, |
| "grad_norm": 0.26018840074539185, |
| "learning_rate": 9.472348082336973e-06, |
| "loss": 0.0229, |
| "step": 19460 |
| }, |
| { |
| "epoch": 20.05149330587024, |
| "grad_norm": 0.1944461166858673, |
| "learning_rate": 9.432037409539768e-06, |
| "loss": 0.0288, |
| "step": 19470 |
| }, |
| { |
| "epoch": 20.061791967044286, |
| "grad_norm": 0.2379932403564453, |
| "learning_rate": 9.391803758456696e-06, |
| "loss": 0.0283, |
| "step": 19480 |
| }, |
| { |
| "epoch": 20.07209062821833, |
| "grad_norm": 0.24124853312969208, |
| "learning_rate": 9.351647205474762e-06, |
| "loss": 0.0273, |
| "step": 19490 |
| }, |
| { |
| "epoch": 20.082389289392378, |
| "grad_norm": 0.25228720903396606, |
| "learning_rate": 9.311567826834593e-06, |
| "loss": 0.0257, |
| "step": 19500 |
| }, |
| { |
| "epoch": 20.092687950566425, |
| "grad_norm": 0.16082525253295898, |
| "learning_rate": 9.271565698630246e-06, |
| "loss": 0.024, |
| "step": 19510 |
| }, |
| { |
| "epoch": 20.102986611740473, |
| "grad_norm": 0.132386714220047, |
| "learning_rate": 9.231640896809202e-06, |
| "loss": 0.0237, |
| "step": 19520 |
| }, |
| { |
| "epoch": 20.11328527291452, |
| "grad_norm": 0.2096797674894333, |
| "learning_rate": 9.191793497172041e-06, |
| "loss": 0.0261, |
| "step": 19530 |
| }, |
| { |
| "epoch": 20.12358393408857, |
| "grad_norm": 0.3422824740409851, |
| "learning_rate": 9.152023575372443e-06, |
| "loss": 0.0252, |
| "step": 19540 |
| }, |
| { |
| "epoch": 20.133882595262616, |
| "grad_norm": 0.2562166750431061, |
| "learning_rate": 9.112331206916968e-06, |
| "loss": 0.0271, |
| "step": 19550 |
| }, |
| { |
| "epoch": 20.144181256436664, |
| "grad_norm": 0.40402811765670776, |
| "learning_rate": 9.072716467164965e-06, |
| "loss": 0.0274, |
| "step": 19560 |
| }, |
| { |
| "epoch": 20.15447991761071, |
| "grad_norm": 0.16052035987377167, |
| "learning_rate": 9.033179431328326e-06, |
| "loss": 0.0283, |
| "step": 19570 |
| }, |
| { |
| "epoch": 20.16477857878476, |
| "grad_norm": 0.24665406346321106, |
| "learning_rate": 8.993720174471509e-06, |
| "loss": 0.0272, |
| "step": 19580 |
| }, |
| { |
| "epoch": 20.175077239958807, |
| "grad_norm": 0.27554023265838623, |
| "learning_rate": 8.954338771511234e-06, |
| "loss": 0.0253, |
| "step": 19590 |
| }, |
| { |
| "epoch": 20.185375901132854, |
| "grad_norm": 0.40893182158470154, |
| "learning_rate": 8.915035297216434e-06, |
| "loss": 0.0292, |
| "step": 19600 |
| }, |
| { |
| "epoch": 20.195674562306902, |
| "grad_norm": 0.4667159914970398, |
| "learning_rate": 8.875809826208082e-06, |
| "loss": 0.027, |
| "step": 19610 |
| }, |
| { |
| "epoch": 20.205973223480946, |
| "grad_norm": 0.14994700253009796, |
| "learning_rate": 8.83666243295908e-06, |
| "loss": 0.023, |
| "step": 19620 |
| }, |
| { |
| "epoch": 20.216271884654994, |
| "grad_norm": 0.28246450424194336, |
| "learning_rate": 8.797593191794024e-06, |
| "loss": 0.0266, |
| "step": 19630 |
| }, |
| { |
| "epoch": 20.22657054582904, |
| "grad_norm": 0.2889540195465088, |
| "learning_rate": 8.758602176889236e-06, |
| "loss": 0.0227, |
| "step": 19640 |
| }, |
| { |
| "epoch": 20.23686920700309, |
| "grad_norm": 0.3575446307659149, |
| "learning_rate": 8.719689462272417e-06, |
| "loss": 0.0267, |
| "step": 19650 |
| }, |
| { |
| "epoch": 20.247167868177137, |
| "grad_norm": 1.8250188827514648, |
| "learning_rate": 8.680855121822673e-06, |
| "loss": 0.0244, |
| "step": 19660 |
| }, |
| { |
| "epoch": 20.257466529351184, |
| "grad_norm": 0.3060987889766693, |
| "learning_rate": 8.642099229270284e-06, |
| "loss": 0.027, |
| "step": 19670 |
| }, |
| { |
| "epoch": 20.267765190525232, |
| "grad_norm": 0.3836202919483185, |
| "learning_rate": 8.603421858196615e-06, |
| "loss": 0.0265, |
| "step": 19680 |
| }, |
| { |
| "epoch": 20.27806385169928, |
| "grad_norm": 0.22838439047336578, |
| "learning_rate": 8.56482308203388e-06, |
| "loss": 0.0226, |
| "step": 19690 |
| }, |
| { |
| "epoch": 20.288362512873327, |
| "grad_norm": 0.33937060832977295, |
| "learning_rate": 8.526302974065193e-06, |
| "loss": 0.0242, |
| "step": 19700 |
| }, |
| { |
| "epoch": 20.298661174047375, |
| "grad_norm": 0.17305344343185425, |
| "learning_rate": 8.487861607424191e-06, |
| "loss": 0.032, |
| "step": 19710 |
| }, |
| { |
| "epoch": 20.308959835221422, |
| "grad_norm": 0.2143699824810028, |
| "learning_rate": 8.449499055095089e-06, |
| "loss": 0.0319, |
| "step": 19720 |
| }, |
| { |
| "epoch": 20.31925849639547, |
| "grad_norm": 0.20306874811649323, |
| "learning_rate": 8.41121538991243e-06, |
| "loss": 0.024, |
| "step": 19730 |
| }, |
| { |
| "epoch": 20.329557157569518, |
| "grad_norm": 0.2488834708929062, |
| "learning_rate": 8.373010684561022e-06, |
| "loss": 0.0234, |
| "step": 19740 |
| }, |
| { |
| "epoch": 20.339855818743562, |
| "grad_norm": 0.20350010693073273, |
| "learning_rate": 8.334885011575694e-06, |
| "loss": 0.0261, |
| "step": 19750 |
| }, |
| { |
| "epoch": 20.35015447991761, |
| "grad_norm": 0.3989735543727875, |
| "learning_rate": 8.296838443341314e-06, |
| "loss": 0.0261, |
| "step": 19760 |
| }, |
| { |
| "epoch": 20.360453141091657, |
| "grad_norm": 0.2657710313796997, |
| "learning_rate": 8.258871052092476e-06, |
| "loss": 0.024, |
| "step": 19770 |
| }, |
| { |
| "epoch": 20.370751802265705, |
| "grad_norm": 0.9721512794494629, |
| "learning_rate": 8.220982909913504e-06, |
| "loss": 0.0268, |
| "step": 19780 |
| }, |
| { |
| "epoch": 20.381050463439752, |
| "grad_norm": 0.17601211369037628, |
| "learning_rate": 8.183174088738248e-06, |
| "loss": 0.0286, |
| "step": 19790 |
| }, |
| { |
| "epoch": 20.3913491246138, |
| "grad_norm": 0.1754244714975357, |
| "learning_rate": 8.145444660349966e-06, |
| "loss": 0.0255, |
| "step": 19800 |
| }, |
| { |
| "epoch": 20.401647785787848, |
| "grad_norm": 0.3604333996772766, |
| "learning_rate": 8.107794696381155e-06, |
| "loss": 0.0288, |
| "step": 19810 |
| }, |
| { |
| "epoch": 20.411946446961895, |
| "grad_norm": 0.23770973086357117, |
| "learning_rate": 8.07022426831347e-06, |
| "loss": 0.0262, |
| "step": 19820 |
| }, |
| { |
| "epoch": 20.422245108135943, |
| "grad_norm": 0.2746269404888153, |
| "learning_rate": 8.032733447477552e-06, |
| "loss": 0.0225, |
| "step": 19830 |
| }, |
| { |
| "epoch": 20.43254376930999, |
| "grad_norm": 0.22414709627628326, |
| "learning_rate": 7.995322305052905e-06, |
| "loss": 0.0344, |
| "step": 19840 |
| }, |
| { |
| "epoch": 20.44284243048404, |
| "grad_norm": 0.2225753217935562, |
| "learning_rate": 7.95799091206776e-06, |
| "loss": 0.0309, |
| "step": 19850 |
| }, |
| { |
| "epoch": 20.453141091658086, |
| "grad_norm": 0.22550413012504578, |
| "learning_rate": 7.920739339398908e-06, |
| "loss": 0.0249, |
| "step": 19860 |
| }, |
| { |
| "epoch": 20.46343975283213, |
| "grad_norm": 0.1763533353805542, |
| "learning_rate": 7.883567657771623e-06, |
| "loss": 0.0294, |
| "step": 19870 |
| }, |
| { |
| "epoch": 20.473738414006178, |
| "grad_norm": 0.22323361039161682, |
| "learning_rate": 7.8464759377595e-06, |
| "loss": 0.031, |
| "step": 19880 |
| }, |
| { |
| "epoch": 20.484037075180225, |
| "grad_norm": 0.3230327069759369, |
| "learning_rate": 7.809464249784309e-06, |
| "loss": 0.0227, |
| "step": 19890 |
| }, |
| { |
| "epoch": 20.494335736354273, |
| "grad_norm": 0.17077143490314484, |
| "learning_rate": 7.772532664115872e-06, |
| "loss": 0.0203, |
| "step": 19900 |
| }, |
| { |
| "epoch": 20.50463439752832, |
| "grad_norm": 0.2937758266925812, |
| "learning_rate": 7.73568125087195e-06, |
| "loss": 0.0274, |
| "step": 19910 |
| }, |
| { |
| "epoch": 20.51493305870237, |
| "grad_norm": 0.2762782871723175, |
| "learning_rate": 7.698910080018046e-06, |
| "loss": 0.0182, |
| "step": 19920 |
| }, |
| { |
| "epoch": 20.525231719876416, |
| "grad_norm": 0.37982815504074097, |
| "learning_rate": 7.662219221367356e-06, |
| "loss": 0.0207, |
| "step": 19930 |
| }, |
| { |
| "epoch": 20.535530381050464, |
| "grad_norm": 0.18541328608989716, |
| "learning_rate": 7.625608744580587e-06, |
| "loss": 0.0187, |
| "step": 19940 |
| }, |
| { |
| "epoch": 20.54582904222451, |
| "grad_norm": 0.24123801290988922, |
| "learning_rate": 7.5890787191658265e-06, |
| "loss": 0.0232, |
| "step": 19950 |
| }, |
| { |
| "epoch": 20.55612770339856, |
| "grad_norm": 0.2651348114013672, |
| "learning_rate": 7.5526292144784235e-06, |
| "loss": 0.0267, |
| "step": 19960 |
| }, |
| { |
| "epoch": 20.566426364572607, |
| "grad_norm": 0.24495820701122284, |
| "learning_rate": 7.516260299720862e-06, |
| "loss": 0.0249, |
| "step": 19970 |
| }, |
| { |
| "epoch": 20.576725025746654, |
| "grad_norm": 0.3241025507450104, |
| "learning_rate": 7.47997204394259e-06, |
| "loss": 0.0233, |
| "step": 19980 |
| }, |
| { |
| "epoch": 20.587023686920702, |
| "grad_norm": 0.18341611325740814, |
| "learning_rate": 7.443764516039947e-06, |
| "loss": 0.024, |
| "step": 19990 |
| }, |
| { |
| "epoch": 20.597322348094746, |
| "grad_norm": 0.2776510715484619, |
| "learning_rate": 7.407637784755983e-06, |
| "loss": 0.024, |
| "step": 20000 |
| }, |
| { |
| "epoch": 20.607621009268794, |
| "grad_norm": 0.2483317106962204, |
| "learning_rate": 7.37159191868037e-06, |
| "loss": 0.0259, |
| "step": 20010 |
| }, |
| { |
| "epoch": 20.61791967044284, |
| "grad_norm": 0.35903045535087585, |
| "learning_rate": 7.3356269862492276e-06, |
| "loss": 0.0298, |
| "step": 20020 |
| }, |
| { |
| "epoch": 20.62821833161689, |
| "grad_norm": 0.1740478128194809, |
| "learning_rate": 7.299743055745051e-06, |
| "loss": 0.0205, |
| "step": 20030 |
| }, |
| { |
| "epoch": 20.638516992790937, |
| "grad_norm": 0.289324551820755, |
| "learning_rate": 7.263940195296487e-06, |
| "loss": 0.0261, |
| "step": 20040 |
| }, |
| { |
| "epoch": 20.648815653964984, |
| "grad_norm": 0.1669989377260208, |
| "learning_rate": 7.228218472878323e-06, |
| "loss": 0.0246, |
| "step": 20050 |
| }, |
| { |
| "epoch": 20.659114315139032, |
| "grad_norm": 0.21035151183605194, |
| "learning_rate": 7.192577956311264e-06, |
| "loss": 0.0252, |
| "step": 20060 |
| }, |
| { |
| "epoch": 20.66941297631308, |
| "grad_norm": 0.3138667941093445, |
| "learning_rate": 7.157018713261859e-06, |
| "loss": 0.0257, |
| "step": 20070 |
| }, |
| { |
| "epoch": 20.679711637487127, |
| "grad_norm": 0.26812276244163513, |
| "learning_rate": 7.121540811242339e-06, |
| "loss": 0.0265, |
| "step": 20080 |
| }, |
| { |
| "epoch": 20.690010298661175, |
| "grad_norm": 0.17188164591789246, |
| "learning_rate": 7.086144317610521e-06, |
| "loss": 0.0216, |
| "step": 20090 |
| }, |
| { |
| "epoch": 20.700308959835223, |
| "grad_norm": 0.2384611815214157, |
| "learning_rate": 7.050829299569622e-06, |
| "loss": 0.0211, |
| "step": 20100 |
| }, |
| { |
| "epoch": 20.71060762100927, |
| "grad_norm": 0.23628003895282745, |
| "learning_rate": 7.015595824168214e-06, |
| "loss": 0.0229, |
| "step": 20110 |
| }, |
| { |
| "epoch": 20.720906282183318, |
| "grad_norm": 0.41519197821617126, |
| "learning_rate": 6.9804439583000255e-06, |
| "loss": 0.0325, |
| "step": 20120 |
| }, |
| { |
| "epoch": 20.731204943357362, |
| "grad_norm": 0.29381370544433594, |
| "learning_rate": 6.945373768703856e-06, |
| "loss": 0.0253, |
| "step": 20130 |
| }, |
| { |
| "epoch": 20.74150360453141, |
| "grad_norm": 0.3895536959171295, |
| "learning_rate": 6.910385321963431e-06, |
| "loss": 0.0247, |
| "step": 20140 |
| }, |
| { |
| "epoch": 20.751802265705457, |
| "grad_norm": 0.26379773020744324, |
| "learning_rate": 6.875478684507297e-06, |
| "loss": 0.0231, |
| "step": 20150 |
| }, |
| { |
| "epoch": 20.762100926879505, |
| "grad_norm": 0.20624184608459473, |
| "learning_rate": 6.840653922608636e-06, |
| "loss": 0.0327, |
| "step": 20160 |
| }, |
| { |
| "epoch": 20.772399588053553, |
| "grad_norm": 0.3396904170513153, |
| "learning_rate": 6.805911102385221e-06, |
| "loss": 0.0271, |
| "step": 20170 |
| }, |
| { |
| "epoch": 20.7826982492276, |
| "grad_norm": 0.2959730625152588, |
| "learning_rate": 6.771250289799236e-06, |
| "loss": 0.0206, |
| "step": 20180 |
| }, |
| { |
| "epoch": 20.792996910401648, |
| "grad_norm": 0.25411221385002136, |
| "learning_rate": 6.736671550657181e-06, |
| "loss": 0.0267, |
| "step": 20190 |
| }, |
| { |
| "epoch": 20.803295571575696, |
| "grad_norm": 0.5632199645042419, |
| "learning_rate": 6.702174950609708e-06, |
| "loss": 0.0269, |
| "step": 20200 |
| }, |
| { |
| "epoch": 20.813594232749743, |
| "grad_norm": 0.23322194814682007, |
| "learning_rate": 6.667760555151559e-06, |
| "loss": 0.0272, |
| "step": 20210 |
| }, |
| { |
| "epoch": 20.82389289392379, |
| "grad_norm": 0.8834056854248047, |
| "learning_rate": 6.6334284296213524e-06, |
| "loss": 0.0268, |
| "step": 20220 |
| }, |
| { |
| "epoch": 20.83419155509784, |
| "grad_norm": 0.4093743860721588, |
| "learning_rate": 6.599178639201542e-06, |
| "loss": 0.0177, |
| "step": 20230 |
| }, |
| { |
| "epoch": 20.844490216271886, |
| "grad_norm": 0.3726256489753723, |
| "learning_rate": 6.565011248918279e-06, |
| "loss": 0.0283, |
| "step": 20240 |
| }, |
| { |
| "epoch": 20.854788877445934, |
| "grad_norm": 0.24083060026168823, |
| "learning_rate": 6.530926323641207e-06, |
| "loss": 0.0226, |
| "step": 20250 |
| }, |
| { |
| "epoch": 20.865087538619978, |
| "grad_norm": 0.2924061119556427, |
| "learning_rate": 6.496923928083493e-06, |
| "loss": 0.0201, |
| "step": 20260 |
| }, |
| { |
| "epoch": 20.875386199794026, |
| "grad_norm": 0.26381853222846985, |
| "learning_rate": 6.463004126801531e-06, |
| "loss": 0.0262, |
| "step": 20270 |
| }, |
| { |
| "epoch": 20.885684860968073, |
| "grad_norm": 0.29354435205459595, |
| "learning_rate": 6.429166984194945e-06, |
| "loss": 0.0251, |
| "step": 20280 |
| }, |
| { |
| "epoch": 20.89598352214212, |
| "grad_norm": 0.267516553401947, |
| "learning_rate": 6.395412564506426e-06, |
| "loss": 0.0178, |
| "step": 20290 |
| }, |
| { |
| "epoch": 20.90628218331617, |
| "grad_norm": 0.6406127214431763, |
| "learning_rate": 6.361740931821608e-06, |
| "loss": 0.0238, |
| "step": 20300 |
| }, |
| { |
| "epoch": 20.916580844490216, |
| "grad_norm": 0.38734951615333557, |
| "learning_rate": 6.3281521500689e-06, |
| "loss": 0.0268, |
| "step": 20310 |
| }, |
| { |
| "epoch": 20.926879505664264, |
| "grad_norm": 0.148049458861351, |
| "learning_rate": 6.2946462830195005e-06, |
| "loss": 0.0213, |
| "step": 20320 |
| }, |
| { |
| "epoch": 20.93717816683831, |
| "grad_norm": 0.15365584194660187, |
| "learning_rate": 6.261223394287097e-06, |
| "loss": 0.0263, |
| "step": 20330 |
| }, |
| { |
| "epoch": 20.94747682801236, |
| "grad_norm": 0.45060428977012634, |
| "learning_rate": 6.22788354732789e-06, |
| "loss": 0.0254, |
| "step": 20340 |
| }, |
| { |
| "epoch": 20.957775489186407, |
| "grad_norm": 0.35524290800094604, |
| "learning_rate": 6.194626805440407e-06, |
| "loss": 0.0288, |
| "step": 20350 |
| }, |
| { |
| "epoch": 20.968074150360454, |
| "grad_norm": 0.20348112285137177, |
| "learning_rate": 6.1614532317654015e-06, |
| "loss": 0.0291, |
| "step": 20360 |
| }, |
| { |
| "epoch": 20.978372811534502, |
| "grad_norm": 0.4701661467552185, |
| "learning_rate": 6.128362889285671e-06, |
| "loss": 0.0292, |
| "step": 20370 |
| }, |
| { |
| "epoch": 20.988671472708546, |
| "grad_norm": 0.34608376026153564, |
| "learning_rate": 6.095355840826089e-06, |
| "loss": 0.0262, |
| "step": 20380 |
| }, |
| { |
| "epoch": 20.998970133882594, |
| "grad_norm": 0.15542836487293243, |
| "learning_rate": 6.062432149053293e-06, |
| "loss": 0.0266, |
| "step": 20390 |
| }, |
| { |
| "epoch": 21.00926879505664, |
| "grad_norm": 0.15955375134944916, |
| "learning_rate": 6.029591876475721e-06, |
| "loss": 0.0277, |
| "step": 20400 |
| }, |
| { |
| "epoch": 21.01956745623069, |
| "grad_norm": 0.3163027763366699, |
| "learning_rate": 5.996835085443403e-06, |
| "loss": 0.0233, |
| "step": 20410 |
| }, |
| { |
| "epoch": 21.029866117404737, |
| "grad_norm": 0.33183568716049194, |
| "learning_rate": 5.964161838147897e-06, |
| "loss": 0.025, |
| "step": 20420 |
| }, |
| { |
| "epoch": 21.040164778578784, |
| "grad_norm": 0.192501500248909, |
| "learning_rate": 5.931572196622104e-06, |
| "loss": 0.0239, |
| "step": 20430 |
| }, |
| { |
| "epoch": 21.050463439752832, |
| "grad_norm": 0.2504754066467285, |
| "learning_rate": 5.899066222740257e-06, |
| "loss": 0.0202, |
| "step": 20440 |
| }, |
| { |
| "epoch": 21.06076210092688, |
| "grad_norm": 0.1719776839017868, |
| "learning_rate": 5.866643978217667e-06, |
| "loss": 0.0266, |
| "step": 20450 |
| }, |
| { |
| "epoch": 21.071060762100927, |
| "grad_norm": 0.2612786293029785, |
| "learning_rate": 5.834305524610728e-06, |
| "loss": 0.0232, |
| "step": 20460 |
| }, |
| { |
| "epoch": 21.081359423274975, |
| "grad_norm": 0.2384280562400818, |
| "learning_rate": 5.802050923316738e-06, |
| "loss": 0.0252, |
| "step": 20470 |
| }, |
| { |
| "epoch": 21.091658084449023, |
| "grad_norm": 0.36142706871032715, |
| "learning_rate": 5.769880235573788e-06, |
| "loss": 0.0235, |
| "step": 20480 |
| }, |
| { |
| "epoch": 21.10195674562307, |
| "grad_norm": 0.19037047028541565, |
| "learning_rate": 5.737793522460633e-06, |
| "loss": 0.0234, |
| "step": 20490 |
| }, |
| { |
| "epoch": 21.112255406797118, |
| "grad_norm": 0.30957984924316406, |
| "learning_rate": 5.705790844896658e-06, |
| "loss": 0.0232, |
| "step": 20500 |
| }, |
| { |
| "epoch": 21.122554067971162, |
| "grad_norm": 1.0624467134475708, |
| "learning_rate": 5.673872263641622e-06, |
| "loss": 0.0283, |
| "step": 20510 |
| }, |
| { |
| "epoch": 21.13285272914521, |
| "grad_norm": 0.43074896931648254, |
| "learning_rate": 5.642037839295666e-06, |
| "loss": 0.0267, |
| "step": 20520 |
| }, |
| { |
| "epoch": 21.143151390319257, |
| "grad_norm": 0.27735230326652527, |
| "learning_rate": 5.6102876322991495e-06, |
| "loss": 0.0251, |
| "step": 20530 |
| }, |
| { |
| "epoch": 21.153450051493305, |
| "grad_norm": 0.21708090603351593, |
| "learning_rate": 5.5786217029325295e-06, |
| "loss": 0.0201, |
| "step": 20540 |
| }, |
| { |
| "epoch": 21.163748712667353, |
| "grad_norm": 0.2916223406791687, |
| "learning_rate": 5.547040111316232e-06, |
| "loss": 0.0262, |
| "step": 20550 |
| }, |
| { |
| "epoch": 21.1740473738414, |
| "grad_norm": 0.2554883360862732, |
| "learning_rate": 5.515542917410627e-06, |
| "loss": 0.0267, |
| "step": 20560 |
| }, |
| { |
| "epoch": 21.184346035015448, |
| "grad_norm": 0.26386693120002747, |
| "learning_rate": 5.484130181015773e-06, |
| "loss": 0.0227, |
| "step": 20570 |
| }, |
| { |
| "epoch": 21.194644696189496, |
| "grad_norm": 0.3534632623195648, |
| "learning_rate": 5.4528019617714195e-06, |
| "loss": 0.0261, |
| "step": 20580 |
| }, |
| { |
| "epoch": 21.204943357363543, |
| "grad_norm": 0.3145328462123871, |
| "learning_rate": 5.42155831915685e-06, |
| "loss": 0.0234, |
| "step": 20590 |
| }, |
| { |
| "epoch": 21.21524201853759, |
| "grad_norm": 0.2311139851808548, |
| "learning_rate": 5.3903993124907736e-06, |
| "loss": 0.0253, |
| "step": 20600 |
| }, |
| { |
| "epoch": 21.22554067971164, |
| "grad_norm": 0.2479424774646759, |
| "learning_rate": 5.35932500093117e-06, |
| "loss": 0.0212, |
| "step": 20610 |
| }, |
| { |
| "epoch": 21.235839340885686, |
| "grad_norm": 0.20154082775115967, |
| "learning_rate": 5.328335443475302e-06, |
| "loss": 0.0282, |
| "step": 20620 |
| }, |
| { |
| "epoch": 21.246138002059734, |
| "grad_norm": 0.19295744597911835, |
| "learning_rate": 5.297430698959443e-06, |
| "loss": 0.0242, |
| "step": 20630 |
| }, |
| { |
| "epoch": 21.256436663233778, |
| "grad_norm": 0.2379661649465561, |
| "learning_rate": 5.266610826058854e-06, |
| "loss": 0.0228, |
| "step": 20640 |
| }, |
| { |
| "epoch": 21.266735324407826, |
| "grad_norm": 0.3034205436706543, |
| "learning_rate": 5.235875883287705e-06, |
| "loss": 0.0263, |
| "step": 20650 |
| }, |
| { |
| "epoch": 21.277033985581873, |
| "grad_norm": 0.3351058065891266, |
| "learning_rate": 5.205225928998874e-06, |
| "loss": 0.026, |
| "step": 20660 |
| }, |
| { |
| "epoch": 21.28733264675592, |
| "grad_norm": 0.17250697314739227, |
| "learning_rate": 5.174661021383898e-06, |
| "loss": 0.0225, |
| "step": 20670 |
| }, |
| { |
| "epoch": 21.29763130792997, |
| "grad_norm": 0.16241209208965302, |
| "learning_rate": 5.144181218472838e-06, |
| "loss": 0.0286, |
| "step": 20680 |
| }, |
| { |
| "epoch": 21.307929969104016, |
| "grad_norm": 0.189495250582695, |
| "learning_rate": 5.113786578134205e-06, |
| "loss": 0.0267, |
| "step": 20690 |
| }, |
| { |
| "epoch": 21.318228630278064, |
| "grad_norm": 0.3670301139354706, |
| "learning_rate": 5.083477158074757e-06, |
| "loss": 0.03, |
| "step": 20700 |
| }, |
| { |
| "epoch": 21.32852729145211, |
| "grad_norm": 0.545432448387146, |
| "learning_rate": 5.053253015839543e-06, |
| "loss": 0.0245, |
| "step": 20710 |
| }, |
| { |
| "epoch": 21.33882595262616, |
| "grad_norm": 0.281653493642807, |
| "learning_rate": 5.0231142088116245e-06, |
| "loss": 0.0321, |
| "step": 20720 |
| }, |
| { |
| "epoch": 21.349124613800207, |
| "grad_norm": 0.37861448526382446, |
| "learning_rate": 4.993060794212096e-06, |
| "loss": 0.0234, |
| "step": 20730 |
| }, |
| { |
| "epoch": 21.359423274974255, |
| "grad_norm": 0.9391881823539734, |
| "learning_rate": 4.9630928290999026e-06, |
| "loss": 0.0257, |
| "step": 20740 |
| }, |
| { |
| "epoch": 21.369721936148302, |
| "grad_norm": 0.20733687281608582, |
| "learning_rate": 4.933210370371783e-06, |
| "loss": 0.0194, |
| "step": 20750 |
| }, |
| { |
| "epoch": 21.38002059732235, |
| "grad_norm": 0.20302939414978027, |
| "learning_rate": 4.9034134747620805e-06, |
| "loss": 0.0223, |
| "step": 20760 |
| }, |
| { |
| "epoch": 21.390319258496394, |
| "grad_norm": 0.3713189363479614, |
| "learning_rate": 4.873702198842767e-06, |
| "loss": 0.0212, |
| "step": 20770 |
| }, |
| { |
| "epoch": 21.40061791967044, |
| "grad_norm": 0.6167316436767578, |
| "learning_rate": 4.844076599023195e-06, |
| "loss": 0.0234, |
| "step": 20780 |
| }, |
| { |
| "epoch": 21.41091658084449, |
| "grad_norm": 0.3855701982975006, |
| "learning_rate": 4.814536731550073e-06, |
| "loss": 0.0276, |
| "step": 20790 |
| }, |
| { |
| "epoch": 21.421215242018537, |
| "grad_norm": 0.24730083346366882, |
| "learning_rate": 4.785082652507355e-06, |
| "loss": 0.0252, |
| "step": 20800 |
| }, |
| { |
| "epoch": 21.431513903192585, |
| "grad_norm": 0.19756212830543518, |
| "learning_rate": 4.755714417816104e-06, |
| "loss": 0.0273, |
| "step": 20810 |
| }, |
| { |
| "epoch": 21.441812564366632, |
| "grad_norm": 0.3603731095790863, |
| "learning_rate": 4.726432083234383e-06, |
| "loss": 0.0204, |
| "step": 20820 |
| }, |
| { |
| "epoch": 21.45211122554068, |
| "grad_norm": 0.7706936597824097, |
| "learning_rate": 4.697235704357217e-06, |
| "loss": 0.0252, |
| "step": 20830 |
| }, |
| { |
| "epoch": 21.462409886714727, |
| "grad_norm": 0.23673802614212036, |
| "learning_rate": 4.66812533661638e-06, |
| "loss": 0.0229, |
| "step": 20840 |
| }, |
| { |
| "epoch": 21.472708547888775, |
| "grad_norm": 0.184868723154068, |
| "learning_rate": 4.6391010352803745e-06, |
| "loss": 0.0272, |
| "step": 20850 |
| }, |
| { |
| "epoch": 21.483007209062823, |
| "grad_norm": 0.2611936926841736, |
| "learning_rate": 4.610162855454303e-06, |
| "loss": 0.0295, |
| "step": 20860 |
| }, |
| { |
| "epoch": 21.49330587023687, |
| "grad_norm": 0.24588996171951294, |
| "learning_rate": 4.581310852079762e-06, |
| "loss": 0.0303, |
| "step": 20870 |
| }, |
| { |
| "epoch": 21.503604531410918, |
| "grad_norm": 0.31468459963798523, |
| "learning_rate": 4.552545079934689e-06, |
| "loss": 0.0237, |
| "step": 20880 |
| }, |
| { |
| "epoch": 21.513903192584962, |
| "grad_norm": 0.26411235332489014, |
| "learning_rate": 4.523865593633381e-06, |
| "loss": 0.025, |
| "step": 20890 |
| }, |
| { |
| "epoch": 21.52420185375901, |
| "grad_norm": 0.31742873787879944, |
| "learning_rate": 4.4952724476262475e-06, |
| "loss": 0.0194, |
| "step": 20900 |
| }, |
| { |
| "epoch": 21.534500514933058, |
| "grad_norm": 0.22004817426204681, |
| "learning_rate": 4.466765696199798e-06, |
| "loss": 0.0265, |
| "step": 20910 |
| }, |
| { |
| "epoch": 21.544799176107105, |
| "grad_norm": 0.247009739279747, |
| "learning_rate": 4.438345393476528e-06, |
| "loss": 0.0242, |
| "step": 20920 |
| }, |
| { |
| "epoch": 21.555097837281153, |
| "grad_norm": 0.34784770011901855, |
| "learning_rate": 4.410011593414792e-06, |
| "loss": 0.0204, |
| "step": 20930 |
| }, |
| { |
| "epoch": 21.5653964984552, |
| "grad_norm": 0.5714616775512695, |
| "learning_rate": 4.381764349808687e-06, |
| "loss": 0.0264, |
| "step": 20940 |
| }, |
| { |
| "epoch": 21.575695159629248, |
| "grad_norm": 0.24919095635414124, |
| "learning_rate": 4.35360371628803e-06, |
| "loss": 0.0202, |
| "step": 20950 |
| }, |
| { |
| "epoch": 21.585993820803296, |
| "grad_norm": 0.1831541508436203, |
| "learning_rate": 4.325529746318147e-06, |
| "loss": 0.0234, |
| "step": 20960 |
| }, |
| { |
| "epoch": 21.596292481977343, |
| "grad_norm": 0.28681278228759766, |
| "learning_rate": 4.297542493199852e-06, |
| "loss": 0.0265, |
| "step": 20970 |
| }, |
| { |
| "epoch": 21.60659114315139, |
| "grad_norm": 0.1981225460767746, |
| "learning_rate": 4.269642010069319e-06, |
| "loss": 0.029, |
| "step": 20980 |
| }, |
| { |
| "epoch": 21.61688980432544, |
| "grad_norm": 0.3072325587272644, |
| "learning_rate": 4.241828349897991e-06, |
| "loss": 0.0216, |
| "step": 20990 |
| }, |
| { |
| "epoch": 21.627188465499486, |
| "grad_norm": 0.24011924862861633, |
| "learning_rate": 4.214101565492429e-06, |
| "loss": 0.0215, |
| "step": 21000 |
| }, |
| { |
| "epoch": 21.637487126673534, |
| "grad_norm": 0.24705877900123596, |
| "learning_rate": 4.186461709494316e-06, |
| "loss": 0.0279, |
| "step": 21010 |
| }, |
| { |
| "epoch": 21.647785787847578, |
| "grad_norm": 0.33800575137138367, |
| "learning_rate": 4.158908834380237e-06, |
| "loss": 0.0233, |
| "step": 21020 |
| }, |
| { |
| "epoch": 21.658084449021626, |
| "grad_norm": 0.13398931920528412, |
| "learning_rate": 4.13144299246167e-06, |
| "loss": 0.0181, |
| "step": 21030 |
| }, |
| { |
| "epoch": 21.668383110195673, |
| "grad_norm": 0.39062628149986267, |
| "learning_rate": 4.104064235884847e-06, |
| "loss": 0.023, |
| "step": 21040 |
| }, |
| { |
| "epoch": 21.67868177136972, |
| "grad_norm": 0.23508042097091675, |
| "learning_rate": 4.076772616630642e-06, |
| "loss": 0.0227, |
| "step": 21050 |
| }, |
| { |
| "epoch": 21.68898043254377, |
| "grad_norm": 0.2864128649234772, |
| "learning_rate": 4.049568186514513e-06, |
| "loss": 0.0219, |
| "step": 21060 |
| }, |
| { |
| "epoch": 21.699279093717816, |
| "grad_norm": 0.30533504486083984, |
| "learning_rate": 4.022450997186378e-06, |
| "loss": 0.0262, |
| "step": 21070 |
| }, |
| { |
| "epoch": 21.709577754891864, |
| "grad_norm": 0.30080464482307434, |
| "learning_rate": 3.99542110013052e-06, |
| "loss": 0.0222, |
| "step": 21080 |
| }, |
| { |
| "epoch": 21.71987641606591, |
| "grad_norm": 0.263285368680954, |
| "learning_rate": 3.9684785466654885e-06, |
| "loss": 0.0258, |
| "step": 21090 |
| }, |
| { |
| "epoch": 21.73017507723996, |
| "grad_norm": 0.2877969741821289, |
| "learning_rate": 3.9416233879440046e-06, |
| "loss": 0.0269, |
| "step": 21100 |
| }, |
| { |
| "epoch": 21.740473738414007, |
| "grad_norm": 0.6060110330581665, |
| "learning_rate": 3.914855674952856e-06, |
| "loss": 0.0253, |
| "step": 21110 |
| }, |
| { |
| "epoch": 21.750772399588055, |
| "grad_norm": 0.27484217286109924, |
| "learning_rate": 3.888175458512816e-06, |
| "loss": 0.0243, |
| "step": 21120 |
| }, |
| { |
| "epoch": 21.761071060762102, |
| "grad_norm": 0.2235790491104126, |
| "learning_rate": 3.86158278927854e-06, |
| "loss": 0.0214, |
| "step": 21130 |
| }, |
| { |
| "epoch": 21.77136972193615, |
| "grad_norm": 0.18296927213668823, |
| "learning_rate": 3.835077717738461e-06, |
| "loss": 0.0247, |
| "step": 21140 |
| }, |
| { |
| "epoch": 21.781668383110194, |
| "grad_norm": 0.22553135454654694, |
| "learning_rate": 3.8086602942147053e-06, |
| "loss": 0.0219, |
| "step": 21150 |
| }, |
| { |
| "epoch": 21.79196704428424, |
| "grad_norm": 0.2685544788837433, |
| "learning_rate": 3.7823305688629907e-06, |
| "loss": 0.0244, |
| "step": 21160 |
| }, |
| { |
| "epoch": 21.80226570545829, |
| "grad_norm": 0.22941188514232635, |
| "learning_rate": 3.756088591672513e-06, |
| "loss": 0.0253, |
| "step": 21170 |
| }, |
| { |
| "epoch": 21.812564366632337, |
| "grad_norm": 0.24472026526927948, |
| "learning_rate": 3.729934412465924e-06, |
| "loss": 0.0276, |
| "step": 21180 |
| }, |
| { |
| "epoch": 21.822863027806385, |
| "grad_norm": 0.3415147364139557, |
| "learning_rate": 3.7038680808991255e-06, |
| "loss": 0.0253, |
| "step": 21190 |
| }, |
| { |
| "epoch": 21.833161688980432, |
| "grad_norm": 0.27968692779541016, |
| "learning_rate": 3.677889646461252e-06, |
| "loss": 0.0266, |
| "step": 21200 |
| }, |
| { |
| "epoch": 21.84346035015448, |
| "grad_norm": 0.3046143352985382, |
| "learning_rate": 3.6519991584745782e-06, |
| "loss": 0.0308, |
| "step": 21210 |
| }, |
| { |
| "epoch": 21.853759011328528, |
| "grad_norm": 0.20563410222530365, |
| "learning_rate": 3.626196666094389e-06, |
| "loss": 0.0225, |
| "step": 21220 |
| }, |
| { |
| "epoch": 21.864057672502575, |
| "grad_norm": 0.22085511684417725, |
| "learning_rate": 3.600482218308876e-06, |
| "loss": 0.0241, |
| "step": 21230 |
| }, |
| { |
| "epoch": 21.874356333676623, |
| "grad_norm": 0.2871539890766144, |
| "learning_rate": 3.574855863939136e-06, |
| "loss": 0.0307, |
| "step": 21240 |
| }, |
| { |
| "epoch": 21.88465499485067, |
| "grad_norm": 0.16943010687828064, |
| "learning_rate": 3.5493176516389447e-06, |
| "loss": 0.02, |
| "step": 21250 |
| }, |
| { |
| "epoch": 21.894953656024718, |
| "grad_norm": 0.30576252937316895, |
| "learning_rate": 3.5238676298947726e-06, |
| "loss": 0.026, |
| "step": 21260 |
| }, |
| { |
| "epoch": 21.905252317198766, |
| "grad_norm": 0.369899719953537, |
| "learning_rate": 3.4985058470256403e-06, |
| "loss": 0.0242, |
| "step": 21270 |
| }, |
| { |
| "epoch": 21.91555097837281, |
| "grad_norm": 0.17503631114959717, |
| "learning_rate": 3.473232351183048e-06, |
| "loss": 0.0273, |
| "step": 21280 |
| }, |
| { |
| "epoch": 21.925849639546858, |
| "grad_norm": 0.1711639016866684, |
| "learning_rate": 3.4480471903508505e-06, |
| "loss": 0.028, |
| "step": 21290 |
| }, |
| { |
| "epoch": 21.936148300720905, |
| "grad_norm": 0.7285293340682983, |
| "learning_rate": 3.422950412345238e-06, |
| "loss": 0.0254, |
| "step": 21300 |
| }, |
| { |
| "epoch": 21.946446961894953, |
| "grad_norm": 0.35878413915634155, |
| "learning_rate": 3.3979420648145465e-06, |
| "loss": 0.0307, |
| "step": 21310 |
| }, |
| { |
| "epoch": 21.956745623069, |
| "grad_norm": 0.20208248496055603, |
| "learning_rate": 3.3730221952392503e-06, |
| "loss": 0.0182, |
| "step": 21320 |
| }, |
| { |
| "epoch": 21.96704428424305, |
| "grad_norm": 0.1312950998544693, |
| "learning_rate": 3.3481908509318316e-06, |
| "loss": 0.0199, |
| "step": 21330 |
| }, |
| { |
| "epoch": 21.977342945417096, |
| "grad_norm": 0.24594810605049133, |
| "learning_rate": 3.323448079036712e-06, |
| "loss": 0.0274, |
| "step": 21340 |
| }, |
| { |
| "epoch": 21.987641606591144, |
| "grad_norm": 0.15972022712230682, |
| "learning_rate": 3.2987939265301137e-06, |
| "loss": 0.0204, |
| "step": 21350 |
| }, |
| { |
| "epoch": 21.99794026776519, |
| "grad_norm": 0.2519496977329254, |
| "learning_rate": 3.2742284402200674e-06, |
| "loss": 0.023, |
| "step": 21360 |
| }, |
| { |
| "epoch": 22.00823892893924, |
| "grad_norm": 0.4822291135787964, |
| "learning_rate": 3.249751666746209e-06, |
| "loss": 0.0269, |
| "step": 21370 |
| }, |
| { |
| "epoch": 22.018537590113286, |
| "grad_norm": 0.1985165923833847, |
| "learning_rate": 3.2253636525797715e-06, |
| "loss": 0.024, |
| "step": 21380 |
| }, |
| { |
| "epoch": 22.028836251287334, |
| "grad_norm": 0.2741225063800812, |
| "learning_rate": 3.201064444023466e-06, |
| "loss": 0.0238, |
| "step": 21390 |
| }, |
| { |
| "epoch": 22.039134912461382, |
| "grad_norm": 0.3621535301208496, |
| "learning_rate": 3.176854087211406e-06, |
| "loss": 0.0217, |
| "step": 21400 |
| }, |
| { |
| "epoch": 22.049433573635426, |
| "grad_norm": 0.15000490844249725, |
| "learning_rate": 3.1527326281089895e-06, |
| "loss": 0.0277, |
| "step": 21410 |
| }, |
| { |
| "epoch": 22.059732234809474, |
| "grad_norm": 0.2596382200717926, |
| "learning_rate": 3.128700112512867e-06, |
| "loss": 0.0171, |
| "step": 21420 |
| }, |
| { |
| "epoch": 22.07003089598352, |
| "grad_norm": 0.15466873347759247, |
| "learning_rate": 3.104756586050794e-06, |
| "loss": 0.0283, |
| "step": 21430 |
| }, |
| { |
| "epoch": 22.08032955715757, |
| "grad_norm": 0.15102769434452057, |
| "learning_rate": 3.080902094181587e-06, |
| "loss": 0.0219, |
| "step": 21440 |
| }, |
| { |
| "epoch": 22.090628218331616, |
| "grad_norm": 0.21250209212303162, |
| "learning_rate": 3.0571366821950274e-06, |
| "loss": 0.0203, |
| "step": 21450 |
| }, |
| { |
| "epoch": 22.100926879505664, |
| "grad_norm": 0.24926158785820007, |
| "learning_rate": 3.0334603952117513e-06, |
| "loss": 0.0298, |
| "step": 21460 |
| }, |
| { |
| "epoch": 22.111225540679712, |
| "grad_norm": 0.22714541852474213, |
| "learning_rate": 3.0098732781832005e-06, |
| "loss": 0.0253, |
| "step": 21470 |
| }, |
| { |
| "epoch": 22.12152420185376, |
| "grad_norm": 0.16168661415576935, |
| "learning_rate": 2.9863753758915204e-06, |
| "loss": 0.0216, |
| "step": 21480 |
| }, |
| { |
| "epoch": 22.131822863027807, |
| "grad_norm": 0.30570539832115173, |
| "learning_rate": 2.9629667329494683e-06, |
| "loss": 0.0231, |
| "step": 21490 |
| }, |
| { |
| "epoch": 22.142121524201855, |
| "grad_norm": 0.269822359085083, |
| "learning_rate": 2.9396473938003153e-06, |
| "loss": 0.0232, |
| "step": 21500 |
| }, |
| { |
| "epoch": 22.152420185375902, |
| "grad_norm": 0.2935786247253418, |
| "learning_rate": 2.9164174027178413e-06, |
| "loss": 0.024, |
| "step": 21510 |
| }, |
| { |
| "epoch": 22.16271884654995, |
| "grad_norm": 0.8317728638648987, |
| "learning_rate": 2.8932768038061163e-06, |
| "loss": 0.0204, |
| "step": 21520 |
| }, |
| { |
| "epoch": 22.173017507723994, |
| "grad_norm": 0.2997764050960541, |
| "learning_rate": 2.8702256409995466e-06, |
| "loss": 0.0279, |
| "step": 21530 |
| }, |
| { |
| "epoch": 22.183316168898042, |
| "grad_norm": 0.25958797335624695, |
| "learning_rate": 2.847263958062718e-06, |
| "loss": 0.0213, |
| "step": 21540 |
| }, |
| { |
| "epoch": 22.19361483007209, |
| "grad_norm": 0.27575650811195374, |
| "learning_rate": 2.8243917985903258e-06, |
| "loss": 0.0218, |
| "step": 21550 |
| }, |
| { |
| "epoch": 22.203913491246137, |
| "grad_norm": 0.28547245264053345, |
| "learning_rate": 2.801609206007094e-06, |
| "loss": 0.0244, |
| "step": 21560 |
| }, |
| { |
| "epoch": 22.214212152420185, |
| "grad_norm": 0.2561958134174347, |
| "learning_rate": 2.778916223567729e-06, |
| "loss": 0.0225, |
| "step": 21570 |
| }, |
| { |
| "epoch": 22.224510813594232, |
| "grad_norm": 0.3616337478160858, |
| "learning_rate": 2.7563128943567607e-06, |
| "loss": 0.0243, |
| "step": 21580 |
| }, |
| { |
| "epoch": 22.23480947476828, |
| "grad_norm": 0.9687033295631409, |
| "learning_rate": 2.7337992612885275e-06, |
| "loss": 0.0262, |
| "step": 21590 |
| }, |
| { |
| "epoch": 22.245108135942328, |
| "grad_norm": 0.5252371430397034, |
| "learning_rate": 2.7113753671070774e-06, |
| "loss": 0.0256, |
| "step": 21600 |
| }, |
| { |
| "epoch": 22.255406797116375, |
| "grad_norm": 0.19724826514720917, |
| "learning_rate": 2.689041254386071e-06, |
| "loss": 0.0265, |
| "step": 21610 |
| }, |
| { |
| "epoch": 22.265705458290423, |
| "grad_norm": 0.27360984683036804, |
| "learning_rate": 2.666796965528695e-06, |
| "loss": 0.0256, |
| "step": 21620 |
| }, |
| { |
| "epoch": 22.27600411946447, |
| "grad_norm": 0.17367063462734222, |
| "learning_rate": 2.6446425427676503e-06, |
| "loss": 0.0254, |
| "step": 21630 |
| }, |
| { |
| "epoch": 22.28630278063852, |
| "grad_norm": 0.2514844536781311, |
| "learning_rate": 2.6225780281649626e-06, |
| "loss": 0.0325, |
| "step": 21640 |
| }, |
| { |
| "epoch": 22.296601441812566, |
| "grad_norm": 0.37126439809799194, |
| "learning_rate": 2.6006034636119835e-06, |
| "loss": 0.0188, |
| "step": 21650 |
| }, |
| { |
| "epoch": 22.30690010298661, |
| "grad_norm": 0.21366749703884125, |
| "learning_rate": 2.5787188908292847e-06, |
| "loss": 0.0232, |
| "step": 21660 |
| }, |
| { |
| "epoch": 22.317198764160658, |
| "grad_norm": 0.18360739946365356, |
| "learning_rate": 2.5569243513666017e-06, |
| "loss": 0.0275, |
| "step": 21670 |
| }, |
| { |
| "epoch": 22.327497425334705, |
| "grad_norm": 0.4810870289802551, |
| "learning_rate": 2.53521988660268e-06, |
| "loss": 0.0218, |
| "step": 21680 |
| }, |
| { |
| "epoch": 22.337796086508753, |
| "grad_norm": 0.24920041859149933, |
| "learning_rate": 2.513605537745317e-06, |
| "loss": 0.0213, |
| "step": 21690 |
| }, |
| { |
| "epoch": 22.3480947476828, |
| "grad_norm": 0.24797523021697998, |
| "learning_rate": 2.492081345831171e-06, |
| "loss": 0.0257, |
| "step": 21700 |
| }, |
| { |
| "epoch": 22.35839340885685, |
| "grad_norm": 0.3930380642414093, |
| "learning_rate": 2.4706473517257413e-06, |
| "loss": 0.0313, |
| "step": 21710 |
| }, |
| { |
| "epoch": 22.368692070030896, |
| "grad_norm": 0.26522117853164673, |
| "learning_rate": 2.449303596123287e-06, |
| "loss": 0.0242, |
| "step": 21720 |
| }, |
| { |
| "epoch": 22.378990731204944, |
| "grad_norm": 0.27054446935653687, |
| "learning_rate": 2.4280501195467374e-06, |
| "loss": 0.0248, |
| "step": 21730 |
| }, |
| { |
| "epoch": 22.38928939237899, |
| "grad_norm": 0.3780190348625183, |
| "learning_rate": 2.4068869623476097e-06, |
| "loss": 0.0228, |
| "step": 21740 |
| }, |
| { |
| "epoch": 22.39958805355304, |
| "grad_norm": 0.2384350597858429, |
| "learning_rate": 2.3858141647059683e-06, |
| "loss": 0.022, |
| "step": 21750 |
| }, |
| { |
| "epoch": 22.409886714727087, |
| "grad_norm": 0.21788683533668518, |
| "learning_rate": 2.3648317666302823e-06, |
| "loss": 0.023, |
| "step": 21760 |
| }, |
| { |
| "epoch": 22.420185375901134, |
| "grad_norm": 0.3061414659023285, |
| "learning_rate": 2.343939807957429e-06, |
| "loss": 0.0216, |
| "step": 21770 |
| }, |
| { |
| "epoch": 22.430484037075182, |
| "grad_norm": 0.186418816447258, |
| "learning_rate": 2.3231383283525588e-06, |
| "loss": 0.0275, |
| "step": 21780 |
| }, |
| { |
| "epoch": 22.440782698249226, |
| "grad_norm": 0.25363844633102417, |
| "learning_rate": 2.302427367309046e-06, |
| "loss": 0.0204, |
| "step": 21790 |
| }, |
| { |
| "epoch": 22.451081359423274, |
| "grad_norm": 0.20051760971546173, |
| "learning_rate": 2.2818069641483864e-06, |
| "loss": 0.026, |
| "step": 21800 |
| }, |
| { |
| "epoch": 22.46138002059732, |
| "grad_norm": 0.29427051544189453, |
| "learning_rate": 2.2612771580201863e-06, |
| "loss": 0.0216, |
| "step": 21810 |
| }, |
| { |
| "epoch": 22.47167868177137, |
| "grad_norm": 0.14624521136283875, |
| "learning_rate": 2.2408379879020114e-06, |
| "loss": 0.0217, |
| "step": 21820 |
| }, |
| { |
| "epoch": 22.481977342945417, |
| "grad_norm": 0.21760720014572144, |
| "learning_rate": 2.2204894925993535e-06, |
| "loss": 0.0265, |
| "step": 21830 |
| }, |
| { |
| "epoch": 22.492276004119464, |
| "grad_norm": 0.23411710560321808, |
| "learning_rate": 2.200231710745565e-06, |
| "loss": 0.0258, |
| "step": 21840 |
| }, |
| { |
| "epoch": 22.502574665293512, |
| "grad_norm": 0.38780733942985535, |
| "learning_rate": 2.1800646808017576e-06, |
| "loss": 0.0243, |
| "step": 21850 |
| }, |
| { |
| "epoch": 22.51287332646756, |
| "grad_norm": 1.6846517324447632, |
| "learning_rate": 2.1599884410567427e-06, |
| "loss": 0.0293, |
| "step": 21860 |
| }, |
| { |
| "epoch": 22.523171987641607, |
| "grad_norm": 0.3452723026275635, |
| "learning_rate": 2.1400030296269633e-06, |
| "loss": 0.0248, |
| "step": 21870 |
| }, |
| { |
| "epoch": 22.533470648815655, |
| "grad_norm": 0.30434274673461914, |
| "learning_rate": 2.1201084844564124e-06, |
| "loss": 0.0267, |
| "step": 21880 |
| }, |
| { |
| "epoch": 22.543769309989703, |
| "grad_norm": 0.28344815969467163, |
| "learning_rate": 2.1003048433165806e-06, |
| "loss": 0.0226, |
| "step": 21890 |
| }, |
| { |
| "epoch": 22.55406797116375, |
| "grad_norm": 0.3896522521972656, |
| "learning_rate": 2.0805921438063593e-06, |
| "loss": 0.0248, |
| "step": 21900 |
| }, |
| { |
| "epoch": 22.564366632337794, |
| "grad_norm": 0.2550482749938965, |
| "learning_rate": 2.0609704233519657e-06, |
| "loss": 0.0226, |
| "step": 21910 |
| }, |
| { |
| "epoch": 22.574665293511842, |
| "grad_norm": 0.21991167962551117, |
| "learning_rate": 2.0414397192069003e-06, |
| "loss": 0.0258, |
| "step": 21920 |
| }, |
| { |
| "epoch": 22.58496395468589, |
| "grad_norm": 0.19379939138889313, |
| "learning_rate": 2.022000068451868e-06, |
| "loss": 0.0199, |
| "step": 21930 |
| }, |
| { |
| "epoch": 22.595262615859937, |
| "grad_norm": 0.21848715841770172, |
| "learning_rate": 2.0026515079946906e-06, |
| "loss": 0.0205, |
| "step": 21940 |
| }, |
| { |
| "epoch": 22.605561277033985, |
| "grad_norm": 0.22498224675655365, |
| "learning_rate": 1.983394074570244e-06, |
| "loss": 0.0223, |
| "step": 21950 |
| }, |
| { |
| "epoch": 22.615859938208033, |
| "grad_norm": 0.279851496219635, |
| "learning_rate": 1.9642278047404095e-06, |
| "loss": 0.0278, |
| "step": 21960 |
| }, |
| { |
| "epoch": 22.62615859938208, |
| "grad_norm": 0.22556227445602417, |
| "learning_rate": 1.9451527348939568e-06, |
| "loss": 0.0228, |
| "step": 21970 |
| }, |
| { |
| "epoch": 22.636457260556128, |
| "grad_norm": 0.13972289860248566, |
| "learning_rate": 1.926168901246539e-06, |
| "loss": 0.0228, |
| "step": 21980 |
| }, |
| { |
| "epoch": 22.646755921730175, |
| "grad_norm": 0.4148993194103241, |
| "learning_rate": 1.907276339840558e-06, |
| "loss": 0.0259, |
| "step": 21990 |
| }, |
| { |
| "epoch": 22.657054582904223, |
| "grad_norm": 0.33898934721946716, |
| "learning_rate": 1.8884750865451494e-06, |
| "loss": 0.0302, |
| "step": 22000 |
| }, |
| { |
| "epoch": 22.66735324407827, |
| "grad_norm": 0.2901424169540405, |
| "learning_rate": 1.8697651770560876e-06, |
| "loss": 0.021, |
| "step": 22010 |
| }, |
| { |
| "epoch": 22.67765190525232, |
| "grad_norm": 0.26389849185943604, |
| "learning_rate": 1.851146646895724e-06, |
| "loss": 0.0259, |
| "step": 22020 |
| }, |
| { |
| "epoch": 22.687950566426366, |
| "grad_norm": 0.43549320101737976, |
| "learning_rate": 1.8326195314129047e-06, |
| "loss": 0.0262, |
| "step": 22030 |
| }, |
| { |
| "epoch": 22.698249227600414, |
| "grad_norm": 0.17495097219944, |
| "learning_rate": 1.8141838657829313e-06, |
| "loss": 0.0253, |
| "step": 22040 |
| }, |
| { |
| "epoch": 22.708547888774458, |
| "grad_norm": 0.3065433204174042, |
| "learning_rate": 1.7958396850074832e-06, |
| "loss": 0.0218, |
| "step": 22050 |
| }, |
| { |
| "epoch": 22.718846549948505, |
| "grad_norm": 0.2452508509159088, |
| "learning_rate": 1.7775870239145398e-06, |
| "loss": 0.0183, |
| "step": 22060 |
| }, |
| { |
| "epoch": 22.729145211122553, |
| "grad_norm": 0.2773599326610565, |
| "learning_rate": 1.7594259171583195e-06, |
| "loss": 0.0234, |
| "step": 22070 |
| }, |
| { |
| "epoch": 22.7394438722966, |
| "grad_norm": 0.37604328989982605, |
| "learning_rate": 1.7413563992192294e-06, |
| "loss": 0.0187, |
| "step": 22080 |
| }, |
| { |
| "epoch": 22.74974253347065, |
| "grad_norm": 0.2879926264286041, |
| "learning_rate": 1.723378504403772e-06, |
| "loss": 0.0253, |
| "step": 22090 |
| }, |
| { |
| "epoch": 22.760041194644696, |
| "grad_norm": 0.6065763831138611, |
| "learning_rate": 1.7054922668445106e-06, |
| "loss": 0.0284, |
| "step": 22100 |
| }, |
| { |
| "epoch": 22.770339855818744, |
| "grad_norm": 0.2422868013381958, |
| "learning_rate": 1.687697720499981e-06, |
| "loss": 0.0286, |
| "step": 22110 |
| }, |
| { |
| "epoch": 22.78063851699279, |
| "grad_norm": 0.3824164569377899, |
| "learning_rate": 1.6699948991546366e-06, |
| "loss": 0.0218, |
| "step": 22120 |
| }, |
| { |
| "epoch": 22.79093717816684, |
| "grad_norm": 0.27847930788993835, |
| "learning_rate": 1.6523838364187806e-06, |
| "loss": 0.0289, |
| "step": 22130 |
| }, |
| { |
| "epoch": 22.801235839340887, |
| "grad_norm": 0.1401498168706894, |
| "learning_rate": 1.6348645657285166e-06, |
| "loss": 0.0207, |
| "step": 22140 |
| }, |
| { |
| "epoch": 22.811534500514934, |
| "grad_norm": 0.3438400328159332, |
| "learning_rate": 1.617437120345655e-06, |
| "loss": 0.0216, |
| "step": 22150 |
| }, |
| { |
| "epoch": 22.821833161688982, |
| "grad_norm": 0.18453820049762726, |
| "learning_rate": 1.6001015333576786e-06, |
| "loss": 0.0213, |
| "step": 22160 |
| }, |
| { |
| "epoch": 22.832131822863026, |
| "grad_norm": 0.38582921028137207, |
| "learning_rate": 1.5828578376776704e-06, |
| "loss": 0.0236, |
| "step": 22170 |
| }, |
| { |
| "epoch": 22.842430484037074, |
| "grad_norm": 0.2904150187969208, |
| "learning_rate": 1.565706066044248e-06, |
| "loss": 0.0225, |
| "step": 22180 |
| }, |
| { |
| "epoch": 22.85272914521112, |
| "grad_norm": 0.21802499890327454, |
| "learning_rate": 1.5486462510215016e-06, |
| "loss": 0.0253, |
| "step": 22190 |
| }, |
| { |
| "epoch": 22.86302780638517, |
| "grad_norm": 0.14389820396900177, |
| "learning_rate": 1.5316784249989447e-06, |
| "loss": 0.0253, |
| "step": 22200 |
| }, |
| { |
| "epoch": 22.873326467559217, |
| "grad_norm": 0.8343854546546936, |
| "learning_rate": 1.5148026201914134e-06, |
| "loss": 0.0231, |
| "step": 22210 |
| }, |
| { |
| "epoch": 22.883625128733264, |
| "grad_norm": 0.20952999591827393, |
| "learning_rate": 1.4980188686390672e-06, |
| "loss": 0.0233, |
| "step": 22220 |
| }, |
| { |
| "epoch": 22.893923789907312, |
| "grad_norm": 0.2580125331878662, |
| "learning_rate": 1.4813272022072778e-06, |
| "loss": 0.0239, |
| "step": 22230 |
| }, |
| { |
| "epoch": 22.90422245108136, |
| "grad_norm": 0.31164586544036865, |
| "learning_rate": 1.4647276525865894e-06, |
| "loss": 0.0282, |
| "step": 22240 |
| }, |
| { |
| "epoch": 22.914521112255407, |
| "grad_norm": 0.1792629510164261, |
| "learning_rate": 1.448220251292648e-06, |
| "loss": 0.0195, |
| "step": 22250 |
| }, |
| { |
| "epoch": 22.924819773429455, |
| "grad_norm": 0.37863343954086304, |
| "learning_rate": 1.431805029666161e-06, |
| "loss": 0.0265, |
| "step": 22260 |
| }, |
| { |
| "epoch": 22.935118434603503, |
| "grad_norm": 0.2690068781375885, |
| "learning_rate": 1.41548201887281e-06, |
| "loss": 0.024, |
| "step": 22270 |
| }, |
| { |
| "epoch": 22.94541709577755, |
| "grad_norm": 0.236667662858963, |
| "learning_rate": 1.3992512499032217e-06, |
| "loss": 0.0244, |
| "step": 22280 |
| }, |
| { |
| "epoch": 22.955715756951598, |
| "grad_norm": 0.2539116144180298, |
| "learning_rate": 1.3831127535728794e-06, |
| "loss": 0.023, |
| "step": 22290 |
| }, |
| { |
| "epoch": 22.966014418125642, |
| "grad_norm": 0.2247859090566635, |
| "learning_rate": 1.3670665605220845e-06, |
| "loss": 0.0203, |
| "step": 22300 |
| }, |
| { |
| "epoch": 22.97631307929969, |
| "grad_norm": 0.292784184217453, |
| "learning_rate": 1.3511127012159007e-06, |
| "loss": 0.0196, |
| "step": 22310 |
| }, |
| { |
| "epoch": 22.986611740473737, |
| "grad_norm": 0.22877749800682068, |
| "learning_rate": 1.3352512059440825e-06, |
| "loss": 0.0234, |
| "step": 22320 |
| }, |
| { |
| "epoch": 22.996910401647785, |
| "grad_norm": 0.3564964830875397, |
| "learning_rate": 1.3194821048210126e-06, |
| "loss": 0.0208, |
| "step": 22330 |
| }, |
| { |
| "epoch": 23.007209062821833, |
| "grad_norm": 0.2306360900402069, |
| "learning_rate": 1.3038054277856703e-06, |
| "loss": 0.0247, |
| "step": 22340 |
| }, |
| { |
| "epoch": 23.01750772399588, |
| "grad_norm": 0.37728771567344666, |
| "learning_rate": 1.2882212046015641e-06, |
| "loss": 0.0268, |
| "step": 22350 |
| }, |
| { |
| "epoch": 23.027806385169928, |
| "grad_norm": 0.21463976800441742, |
| "learning_rate": 1.2727294648566424e-06, |
| "loss": 0.0236, |
| "step": 22360 |
| }, |
| { |
| "epoch": 23.038105046343976, |
| "grad_norm": 0.24805015325546265, |
| "learning_rate": 1.2573302379633112e-06, |
| "loss": 0.0204, |
| "step": 22370 |
| }, |
| { |
| "epoch": 23.048403707518023, |
| "grad_norm": 0.20930248498916626, |
| "learning_rate": 1.2420235531582892e-06, |
| "loss": 0.0234, |
| "step": 22380 |
| }, |
| { |
| "epoch": 23.05870236869207, |
| "grad_norm": 0.32791492342948914, |
| "learning_rate": 1.2268094395026186e-06, |
| "loss": 0.0264, |
| "step": 22390 |
| }, |
| { |
| "epoch": 23.06900102986612, |
| "grad_norm": 0.1751231551170349, |
| "learning_rate": 1.2116879258815772e-06, |
| "loss": 0.0192, |
| "step": 22400 |
| }, |
| { |
| "epoch": 23.079299691040166, |
| "grad_norm": 0.12545523047447205, |
| "learning_rate": 1.1966590410046607e-06, |
| "loss": 0.022, |
| "step": 22410 |
| }, |
| { |
| "epoch": 23.089598352214214, |
| "grad_norm": 0.29402562975883484, |
| "learning_rate": 1.1817228134054502e-06, |
| "loss": 0.0277, |
| "step": 22420 |
| }, |
| { |
| "epoch": 23.099897013388258, |
| "grad_norm": 0.17338956892490387, |
| "learning_rate": 1.1668792714416676e-06, |
| "loss": 0.0271, |
| "step": 22430 |
| }, |
| { |
| "epoch": 23.110195674562306, |
| "grad_norm": 0.16912265121936798, |
| "learning_rate": 1.152128443295014e-06, |
| "loss": 0.0259, |
| "step": 22440 |
| }, |
| { |
| "epoch": 23.120494335736353, |
| "grad_norm": 0.2655620276927948, |
| "learning_rate": 1.1374703569711986e-06, |
| "loss": 0.0267, |
| "step": 22450 |
| }, |
| { |
| "epoch": 23.1307929969104, |
| "grad_norm": 0.23414306342601776, |
| "learning_rate": 1.1229050402998375e-06, |
| "loss": 0.0298, |
| "step": 22460 |
| }, |
| { |
| "epoch": 23.14109165808445, |
| "grad_norm": 0.28117451071739197, |
| "learning_rate": 1.1084325209344216e-06, |
| "loss": 0.0311, |
| "step": 22470 |
| }, |
| { |
| "epoch": 23.151390319258496, |
| "grad_norm": 0.2047118991613388, |
| "learning_rate": 1.0940528263522376e-06, |
| "loss": 0.0237, |
| "step": 22480 |
| }, |
| { |
| "epoch": 23.161688980432544, |
| "grad_norm": 0.2524803578853607, |
| "learning_rate": 1.0797659838543805e-06, |
| "loss": 0.0246, |
| "step": 22490 |
| }, |
| { |
| "epoch": 23.17198764160659, |
| "grad_norm": 0.16907493770122528, |
| "learning_rate": 1.0655720205656083e-06, |
| "loss": 0.0229, |
| "step": 22500 |
| }, |
| { |
| "epoch": 23.18228630278064, |
| "grad_norm": 0.2680252492427826, |
| "learning_rate": 1.0514709634343812e-06, |
| "loss": 0.0263, |
| "step": 22510 |
| }, |
| { |
| "epoch": 23.192584963954687, |
| "grad_norm": 0.24415595829486847, |
| "learning_rate": 1.0374628392327335e-06, |
| "loss": 0.0223, |
| "step": 22520 |
| }, |
| { |
| "epoch": 23.202883625128734, |
| "grad_norm": 0.2939612865447998, |
| "learning_rate": 1.0235476745562967e-06, |
| "loss": 0.023, |
| "step": 22530 |
| }, |
| { |
| "epoch": 23.213182286302782, |
| "grad_norm": 0.2137501835823059, |
| "learning_rate": 1.0097254958241653e-06, |
| "loss": 0.0198, |
| "step": 22540 |
| }, |
| { |
| "epoch": 23.22348094747683, |
| "grad_norm": 0.1515851467847824, |
| "learning_rate": 9.959963292789364e-07, |
| "loss": 0.022, |
| "step": 22550 |
| }, |
| { |
| "epoch": 23.233779608650874, |
| "grad_norm": 0.24298404157161713, |
| "learning_rate": 9.823602009865873e-07, |
| "loss": 0.0231, |
| "step": 22560 |
| }, |
| { |
| "epoch": 23.24407826982492, |
| "grad_norm": 0.3320057690143585, |
| "learning_rate": 9.68817136836464e-07, |
| "loss": 0.0219, |
| "step": 22570 |
| }, |
| { |
| "epoch": 23.25437693099897, |
| "grad_norm": 0.4774312674999237, |
| "learning_rate": 9.553671625412264e-07, |
| "loss": 0.0286, |
| "step": 22580 |
| }, |
| { |
| "epoch": 23.264675592173017, |
| "grad_norm": 0.3456204831600189, |
| "learning_rate": 9.420103036367811e-07, |
| "loss": 0.018, |
| "step": 22590 |
| }, |
| { |
| "epoch": 23.274974253347064, |
| "grad_norm": 0.1354740411043167, |
| "learning_rate": 9.287465854822597e-07, |
| "loss": 0.0247, |
| "step": 22600 |
| }, |
| { |
| "epoch": 23.285272914521112, |
| "grad_norm": 0.27582287788391113, |
| "learning_rate": 9.155760332599627e-07, |
| "loss": 0.0237, |
| "step": 22610 |
| }, |
| { |
| "epoch": 23.29557157569516, |
| "grad_norm": 0.23232264816761017, |
| "learning_rate": 9.024986719752881e-07, |
| "loss": 0.0229, |
| "step": 22620 |
| }, |
| { |
| "epoch": 23.305870236869207, |
| "grad_norm": 0.33807799220085144, |
| "learning_rate": 8.89514526456714e-07, |
| "loss": 0.0255, |
| "step": 22630 |
| }, |
| { |
| "epoch": 23.316168898043255, |
| "grad_norm": 0.2717418670654297, |
| "learning_rate": 8.766236213557544e-07, |
| "loss": 0.0157, |
| "step": 22640 |
| }, |
| { |
| "epoch": 23.326467559217303, |
| "grad_norm": 0.233852356672287, |
| "learning_rate": 8.638259811468708e-07, |
| "loss": 0.0235, |
| "step": 22650 |
| }, |
| { |
| "epoch": 23.33676622039135, |
| "grad_norm": 0.3999418318271637, |
| "learning_rate": 8.511216301274772e-07, |
| "loss": 0.0235, |
| "step": 22660 |
| }, |
| { |
| "epoch": 23.347064881565398, |
| "grad_norm": 0.258497029542923, |
| "learning_rate": 8.385105924178516e-07, |
| "loss": 0.0277, |
| "step": 22670 |
| }, |
| { |
| "epoch": 23.357363542739442, |
| "grad_norm": 0.27405309677124023, |
| "learning_rate": 8.259928919611248e-07, |
| "loss": 0.0222, |
| "step": 22680 |
| }, |
| { |
| "epoch": 23.36766220391349, |
| "grad_norm": 0.13552913069725037, |
| "learning_rate": 8.135685525232028e-07, |
| "loss": 0.0205, |
| "step": 22690 |
| }, |
| { |
| "epoch": 23.377960865087537, |
| "grad_norm": 0.4466046094894409, |
| "learning_rate": 8.012375976927611e-07, |
| "loss": 0.0293, |
| "step": 22700 |
| }, |
| { |
| "epoch": 23.388259526261585, |
| "grad_norm": 0.15895040333271027, |
| "learning_rate": 7.890000508811501e-07, |
| "loss": 0.0293, |
| "step": 22710 |
| }, |
| { |
| "epoch": 23.398558187435633, |
| "grad_norm": 0.27999603748321533, |
| "learning_rate": 7.768559353223958e-07, |
| "loss": 0.0244, |
| "step": 22720 |
| }, |
| { |
| "epoch": 23.40885684860968, |
| "grad_norm": 0.30583688616752625, |
| "learning_rate": 7.648052740731215e-07, |
| "loss": 0.0237, |
| "step": 22730 |
| }, |
| { |
| "epoch": 23.419155509783728, |
| "grad_norm": 0.16827332973480225, |
| "learning_rate": 7.528480900125368e-07, |
| "loss": 0.024, |
| "step": 22740 |
| }, |
| { |
| "epoch": 23.429454170957776, |
| "grad_norm": 0.24422600865364075, |
| "learning_rate": 7.409844058423709e-07, |
| "loss": 0.0298, |
| "step": 22750 |
| }, |
| { |
| "epoch": 23.439752832131823, |
| "grad_norm": 0.17749178409576416, |
| "learning_rate": 7.292142440868289e-07, |
| "loss": 0.0235, |
| "step": 22760 |
| }, |
| { |
| "epoch": 23.45005149330587, |
| "grad_norm": 0.3712615370750427, |
| "learning_rate": 7.175376270925571e-07, |
| "loss": 0.028, |
| "step": 22770 |
| }, |
| { |
| "epoch": 23.46035015447992, |
| "grad_norm": 0.3284103572368622, |
| "learning_rate": 7.059545770286058e-07, |
| "loss": 0.0234, |
| "step": 22780 |
| }, |
| { |
| "epoch": 23.470648815653966, |
| "grad_norm": 0.3628927767276764, |
| "learning_rate": 6.94465115886378e-07, |
| "loss": 0.0279, |
| "step": 22790 |
| }, |
| { |
| "epoch": 23.480947476828014, |
| "grad_norm": 0.24092569947242737, |
| "learning_rate": 6.830692654795856e-07, |
| "loss": 0.0295, |
| "step": 22800 |
| }, |
| { |
| "epoch": 23.491246138002058, |
| "grad_norm": 0.25664201378822327, |
| "learning_rate": 6.717670474442217e-07, |
| "loss": 0.0237, |
| "step": 22810 |
| }, |
| { |
| "epoch": 23.501544799176106, |
| "grad_norm": 0.31676584482192993, |
| "learning_rate": 6.605584832384992e-07, |
| "loss": 0.0247, |
| "step": 22820 |
| }, |
| { |
| "epoch": 23.511843460350153, |
| "grad_norm": 0.2529754936695099, |
| "learning_rate": 6.49443594142829e-07, |
| "loss": 0.0272, |
| "step": 22830 |
| }, |
| { |
| "epoch": 23.5221421215242, |
| "grad_norm": 0.224832683801651, |
| "learning_rate": 6.384224012597695e-07, |
| "loss": 0.0262, |
| "step": 22840 |
| }, |
| { |
| "epoch": 23.53244078269825, |
| "grad_norm": 0.35248956084251404, |
| "learning_rate": 6.274949255139883e-07, |
| "loss": 0.0233, |
| "step": 22850 |
| }, |
| { |
| "epoch": 23.542739443872296, |
| "grad_norm": 0.146409809589386, |
| "learning_rate": 6.166611876522288e-07, |
| "loss": 0.0214, |
| "step": 22860 |
| }, |
| { |
| "epoch": 23.553038105046344, |
| "grad_norm": 0.25351008772850037, |
| "learning_rate": 6.059212082432542e-07, |
| "loss": 0.0204, |
| "step": 22870 |
| }, |
| { |
| "epoch": 23.56333676622039, |
| "grad_norm": 0.22615984082221985, |
| "learning_rate": 5.952750076778312e-07, |
| "loss": 0.0197, |
| "step": 22880 |
| }, |
| { |
| "epoch": 23.57363542739444, |
| "grad_norm": 0.4564981162548065, |
| "learning_rate": 5.847226061686695e-07, |
| "loss": 0.0263, |
| "step": 22890 |
| }, |
| { |
| "epoch": 23.583934088568487, |
| "grad_norm": 0.2238938808441162, |
| "learning_rate": 5.742640237503927e-07, |
| "loss": 0.0202, |
| "step": 22900 |
| }, |
| { |
| "epoch": 23.594232749742535, |
| "grad_norm": 0.3683464825153351, |
| "learning_rate": 5.638992802795173e-07, |
| "loss": 0.0288, |
| "step": 22910 |
| }, |
| { |
| "epoch": 23.604531410916582, |
| "grad_norm": 0.21569469571113586, |
| "learning_rate": 5.536283954343747e-07, |
| "loss": 0.025, |
| "step": 22920 |
| }, |
| { |
| "epoch": 23.61483007209063, |
| "grad_norm": 0.24484778940677643, |
| "learning_rate": 5.434513887151216e-07, |
| "loss": 0.0241, |
| "step": 22930 |
| }, |
| { |
| "epoch": 23.625128733264674, |
| "grad_norm": 0.2003822773694992, |
| "learning_rate": 5.333682794436578e-07, |
| "loss": 0.0256, |
| "step": 22940 |
| }, |
| { |
| "epoch": 23.63542739443872, |
| "grad_norm": 0.20311187207698822, |
| "learning_rate": 5.233790867636257e-07, |
| "loss": 0.0253, |
| "step": 22950 |
| }, |
| { |
| "epoch": 23.64572605561277, |
| "grad_norm": 0.2250942438840866, |
| "learning_rate": 5.134838296403544e-07, |
| "loss": 0.0211, |
| "step": 22960 |
| }, |
| { |
| "epoch": 23.656024716786817, |
| "grad_norm": 0.21214169263839722, |
| "learning_rate": 5.03682526860827e-07, |
| "loss": 0.026, |
| "step": 22970 |
| }, |
| { |
| "epoch": 23.666323377960865, |
| "grad_norm": 0.23194220662117004, |
| "learning_rate": 4.939751970336415e-07, |
| "loss": 0.0224, |
| "step": 22980 |
| }, |
| { |
| "epoch": 23.676622039134912, |
| "grad_norm": 0.19989456236362457, |
| "learning_rate": 4.843618585889942e-07, |
| "loss": 0.0252, |
| "step": 22990 |
| }, |
| { |
| "epoch": 23.68692070030896, |
| "grad_norm": 0.23081596195697784, |
| "learning_rate": 4.748425297786241e-07, |
| "loss": 0.0309, |
| "step": 23000 |
| }, |
| { |
| "epoch": 23.697219361483008, |
| "grad_norm": 0.35934457182884216, |
| "learning_rate": 4.654172286757741e-07, |
| "loss": 0.0237, |
| "step": 23010 |
| }, |
| { |
| "epoch": 23.707518022657055, |
| "grad_norm": 0.3558262586593628, |
| "learning_rate": 4.5608597317517987e-07, |
| "loss": 0.0199, |
| "step": 23020 |
| }, |
| { |
| "epoch": 23.717816683831103, |
| "grad_norm": 0.22818619012832642, |
| "learning_rate": 4.468487809930255e-07, |
| "loss": 0.0208, |
| "step": 23030 |
| }, |
| { |
| "epoch": 23.72811534500515, |
| "grad_norm": 0.23627056181430817, |
| "learning_rate": 4.377056696668991e-07, |
| "loss": 0.0229, |
| "step": 23040 |
| }, |
| { |
| "epoch": 23.738414006179198, |
| "grad_norm": 0.4432663023471832, |
| "learning_rate": 4.286566565557759e-07, |
| "loss": 0.0208, |
| "step": 23050 |
| }, |
| { |
| "epoch": 23.748712667353246, |
| "grad_norm": 0.19427362084388733, |
| "learning_rate": 4.197017588399743e-07, |
| "loss": 0.0191, |
| "step": 23060 |
| }, |
| { |
| "epoch": 23.75901132852729, |
| "grad_norm": 0.3566558361053467, |
| "learning_rate": 4.108409935211166e-07, |
| "loss": 0.0251, |
| "step": 23070 |
| }, |
| { |
| "epoch": 23.769309989701338, |
| "grad_norm": 0.19104306399822235, |
| "learning_rate": 4.0207437742212363e-07, |
| "loss": 0.0285, |
| "step": 23080 |
| }, |
| { |
| "epoch": 23.779608650875385, |
| "grad_norm": 0.37885013222694397, |
| "learning_rate": 3.934019271871592e-07, |
| "loss": 0.0256, |
| "step": 23090 |
| }, |
| { |
| "epoch": 23.789907312049433, |
| "grad_norm": 0.20795543491840363, |
| "learning_rate": 3.8482365928160236e-07, |
| "loss": 0.0215, |
| "step": 23100 |
| }, |
| { |
| "epoch": 23.80020597322348, |
| "grad_norm": 0.14693936705589294, |
| "learning_rate": 3.7633958999202523e-07, |
| "loss": 0.0255, |
| "step": 23110 |
| }, |
| { |
| "epoch": 23.810504634397528, |
| "grad_norm": 0.19041191041469574, |
| "learning_rate": 3.679497354261485e-07, |
| "loss": 0.0257, |
| "step": 23120 |
| }, |
| { |
| "epoch": 23.820803295571576, |
| "grad_norm": 0.3607507050037384, |
| "learning_rate": 3.5965411151282493e-07, |
| "loss": 0.0211, |
| "step": 23130 |
| }, |
| { |
| "epoch": 23.831101956745623, |
| "grad_norm": 0.138556107878685, |
| "learning_rate": 3.5145273400200017e-07, |
| "loss": 0.019, |
| "step": 23140 |
| }, |
| { |
| "epoch": 23.84140061791967, |
| "grad_norm": 0.14976058900356293, |
| "learning_rate": 3.4334561846467995e-07, |
| "loss": 0.0217, |
| "step": 23150 |
| }, |
| { |
| "epoch": 23.85169927909372, |
| "grad_norm": 0.7945707440376282, |
| "learning_rate": 3.353327802929074e-07, |
| "loss": 0.027, |
| "step": 23160 |
| }, |
| { |
| "epoch": 23.861997940267766, |
| "grad_norm": 0.7725450992584229, |
| "learning_rate": 3.274142346997466e-07, |
| "loss": 0.0255, |
| "step": 23170 |
| }, |
| { |
| "epoch": 23.872296601441814, |
| "grad_norm": 0.25931257009506226, |
| "learning_rate": 3.195899967192162e-07, |
| "loss": 0.0243, |
| "step": 23180 |
| }, |
| { |
| "epoch": 23.882595262615858, |
| "grad_norm": 0.5646887421607971, |
| "learning_rate": 3.118600812063e-07, |
| "loss": 0.0265, |
| "step": 23190 |
| }, |
| { |
| "epoch": 23.892893923789906, |
| "grad_norm": 0.21446570754051208, |
| "learning_rate": 3.042245028368973e-07, |
| "loss": 0.024, |
| "step": 23200 |
| }, |
| { |
| "epoch": 23.903192584963953, |
| "grad_norm": 0.5765520930290222, |
| "learning_rate": 2.966832761077953e-07, |
| "loss": 0.0249, |
| "step": 23210 |
| }, |
| { |
| "epoch": 23.913491246138, |
| "grad_norm": 0.7060703039169312, |
| "learning_rate": 2.892364153366578e-07, |
| "loss": 0.0312, |
| "step": 23220 |
| }, |
| { |
| "epoch": 23.92378990731205, |
| "grad_norm": 0.3687019646167755, |
| "learning_rate": 2.818839346619806e-07, |
| "loss": 0.0231, |
| "step": 23230 |
| }, |
| { |
| "epoch": 23.934088568486096, |
| "grad_norm": 0.28896424174308777, |
| "learning_rate": 2.7462584804306966e-07, |
| "loss": 0.0291, |
| "step": 23240 |
| }, |
| { |
| "epoch": 23.944387229660144, |
| "grad_norm": 0.7275195121765137, |
| "learning_rate": 2.6746216926001875e-07, |
| "loss": 0.028, |
| "step": 23250 |
| }, |
| { |
| "epoch": 23.95468589083419, |
| "grad_norm": 0.3027587831020355, |
| "learning_rate": 2.603929119136761e-07, |
| "loss": 0.0276, |
| "step": 23260 |
| }, |
| { |
| "epoch": 23.96498455200824, |
| "grad_norm": 0.14704370498657227, |
| "learning_rate": 2.534180894256277e-07, |
| "loss": 0.0263, |
| "step": 23270 |
| }, |
| { |
| "epoch": 23.975283213182287, |
| "grad_norm": 0.1789170503616333, |
| "learning_rate": 2.4653771503816424e-07, |
| "loss": 0.0263, |
| "step": 23280 |
| }, |
| { |
| "epoch": 23.985581874356335, |
| "grad_norm": 0.20929020643234253, |
| "learning_rate": 2.3975180181426414e-07, |
| "loss": 0.0221, |
| "step": 23290 |
| }, |
| { |
| "epoch": 23.995880535530382, |
| "grad_norm": 0.18785648047924042, |
| "learning_rate": 2.3306036263754938e-07, |
| "loss": 0.0255, |
| "step": 23300 |
| }, |
| { |
| "epoch": 24.00617919670443, |
| "grad_norm": 0.8915624022483826, |
| "learning_rate": 2.264634102122909e-07, |
| "loss": 0.0232, |
| "step": 23310 |
| }, |
| { |
| "epoch": 24.016477857878474, |
| "grad_norm": 0.24266694486141205, |
| "learning_rate": 2.1996095706335872e-07, |
| "loss": 0.0242, |
| "step": 23320 |
| }, |
| { |
| "epoch": 24.02677651905252, |
| "grad_norm": 0.1327933520078659, |
| "learning_rate": 2.1355301553621644e-07, |
| "loss": 0.0304, |
| "step": 23330 |
| }, |
| { |
| "epoch": 24.03707518022657, |
| "grad_norm": 0.3175448179244995, |
| "learning_rate": 2.072395977968711e-07, |
| "loss": 0.0211, |
| "step": 23340 |
| }, |
| { |
| "epoch": 24.047373841400617, |
| "grad_norm": 0.19741173088550568, |
| "learning_rate": 2.0102071583190108e-07, |
| "loss": 0.0179, |
| "step": 23350 |
| }, |
| { |
| "epoch": 24.057672502574665, |
| "grad_norm": 0.363275408744812, |
| "learning_rate": 1.9489638144836176e-07, |
| "loss": 0.0255, |
| "step": 23360 |
| }, |
| { |
| "epoch": 24.067971163748712, |
| "grad_norm": 0.2827618420124054, |
| "learning_rate": 1.8886660627383534e-07, |
| "loss": 0.0216, |
| "step": 23370 |
| }, |
| { |
| "epoch": 24.07826982492276, |
| "grad_norm": 0.13472236692905426, |
| "learning_rate": 1.829314017563477e-07, |
| "loss": 0.0196, |
| "step": 23380 |
| }, |
| { |
| "epoch": 24.088568486096808, |
| "grad_norm": 0.25898340344429016, |
| "learning_rate": 1.7709077916440163e-07, |
| "loss": 0.0229, |
| "step": 23390 |
| }, |
| { |
| "epoch": 24.098867147270855, |
| "grad_norm": 0.5560616850852966, |
| "learning_rate": 1.7134474958689917e-07, |
| "loss": 0.0216, |
| "step": 23400 |
| }, |
| { |
| "epoch": 24.109165808444903, |
| "grad_norm": 0.32972094416618347, |
| "learning_rate": 1.6569332393317483e-07, |
| "loss": 0.0259, |
| "step": 23410 |
| }, |
| { |
| "epoch": 24.11946446961895, |
| "grad_norm": 0.2403629869222641, |
| "learning_rate": 1.6013651293293464e-07, |
| "loss": 0.0252, |
| "step": 23420 |
| }, |
| { |
| "epoch": 24.129763130793, |
| "grad_norm": 0.237924724817276, |
| "learning_rate": 1.5467432713625607e-07, |
| "loss": 0.0235, |
| "step": 23430 |
| }, |
| { |
| "epoch": 24.140061791967046, |
| "grad_norm": 0.21502567827701569, |
| "learning_rate": 1.4930677691356033e-07, |
| "loss": 0.0205, |
| "step": 23440 |
| }, |
| { |
| "epoch": 24.15036045314109, |
| "grad_norm": 0.27757346630096436, |
| "learning_rate": 1.4403387245560117e-07, |
| "loss": 0.0322, |
| "step": 23450 |
| }, |
| { |
| "epoch": 24.160659114315138, |
| "grad_norm": 0.2888646125793457, |
| "learning_rate": 1.3885562377343176e-07, |
| "loss": 0.0205, |
| "step": 23460 |
| }, |
| { |
| "epoch": 24.170957775489185, |
| "grad_norm": 0.25868770480155945, |
| "learning_rate": 1.3377204069839333e-07, |
| "loss": 0.0224, |
| "step": 23470 |
| }, |
| { |
| "epoch": 24.181256436663233, |
| "grad_norm": 0.25292280316352844, |
| "learning_rate": 1.2878313288209876e-07, |
| "loss": 0.0216, |
| "step": 23480 |
| }, |
| { |
| "epoch": 24.19155509783728, |
| "grad_norm": 0.8537740707397461, |
| "learning_rate": 1.2388890979641576e-07, |
| "loss": 0.031, |
| "step": 23490 |
| }, |
| { |
| "epoch": 24.20185375901133, |
| "grad_norm": 0.19652284681797028, |
| "learning_rate": 1.1908938073344477e-07, |
| "loss": 0.022, |
| "step": 23500 |
| }, |
| { |
| "epoch": 24.212152420185376, |
| "grad_norm": 0.2277028113603592, |
| "learning_rate": 1.1438455480549115e-07, |
| "loss": 0.0212, |
| "step": 23510 |
| }, |
| { |
| "epoch": 24.222451081359424, |
| "grad_norm": 0.28218087553977966, |
| "learning_rate": 1.0977444094506517e-07, |
| "loss": 0.0226, |
| "step": 23520 |
| }, |
| { |
| "epoch": 24.23274974253347, |
| "grad_norm": 0.3701043426990509, |
| "learning_rate": 1.0525904790485985e-07, |
| "loss": 0.0252, |
| "step": 23530 |
| }, |
| { |
| "epoch": 24.24304840370752, |
| "grad_norm": 0.19152191281318665, |
| "learning_rate": 1.0083838425773984e-07, |
| "loss": 0.0219, |
| "step": 23540 |
| }, |
| { |
| "epoch": 24.253347064881567, |
| "grad_norm": 0.31448763608932495, |
| "learning_rate": 9.651245839669698e-08, |
| "loss": 0.0294, |
| "step": 23550 |
| }, |
| { |
| "epoch": 24.263645726055614, |
| "grad_norm": 0.32265207171440125, |
| "learning_rate": 9.228127853487811e-08, |
| "loss": 0.0185, |
| "step": 23560 |
| }, |
| { |
| "epoch": 24.273944387229662, |
| "grad_norm": 0.2978125214576721, |
| "learning_rate": 8.814485270553508e-08, |
| "loss": 0.0245, |
| "step": 23570 |
| }, |
| { |
| "epoch": 24.284243048403706, |
| "grad_norm": 0.3098287582397461, |
| "learning_rate": 8.410318876201362e-08, |
| "loss": 0.0235, |
| "step": 23580 |
| }, |
| { |
| "epoch": 24.294541709577754, |
| "grad_norm": 0.2803812026977539, |
| "learning_rate": 8.01562943777645e-08, |
| "loss": 0.0249, |
| "step": 23590 |
| }, |
| { |
| "epoch": 24.3048403707518, |
| "grad_norm": 0.23657964169979095, |
| "learning_rate": 7.630417704630466e-08, |
| "loss": 0.0217, |
| "step": 23600 |
| }, |
| { |
| "epoch": 24.31513903192585, |
| "grad_norm": 0.14953146874904633, |
| "learning_rate": 7.254684408118939e-08, |
| "loss": 0.0242, |
| "step": 23610 |
| }, |
| { |
| "epoch": 24.325437693099897, |
| "grad_norm": 0.28647905588150024, |
| "learning_rate": 6.888430261605128e-08, |
| "loss": 0.0206, |
| "step": 23620 |
| }, |
| { |
| "epoch": 24.335736354273944, |
| "grad_norm": 0.18508291244506836, |
| "learning_rate": 6.531655960452243e-08, |
| "loss": 0.0239, |
| "step": 23630 |
| }, |
| { |
| "epoch": 24.346035015447992, |
| "grad_norm": 0.25578784942626953, |
| "learning_rate": 6.184362182026781e-08, |
| "loss": 0.0255, |
| "step": 23640 |
| }, |
| { |
| "epoch": 24.35633367662204, |
| "grad_norm": 0.30153825879096985, |
| "learning_rate": 5.8465495856963035e-08, |
| "loss": 0.0199, |
| "step": 23650 |
| }, |
| { |
| "epoch": 24.366632337796087, |
| "grad_norm": 0.21878878772258759, |
| "learning_rate": 5.5182188128261035e-08, |
| "loss": 0.0277, |
| "step": 23660 |
| }, |
| { |
| "epoch": 24.376930998970135, |
| "grad_norm": 0.25511273741722107, |
| "learning_rate": 5.199370486779209e-08, |
| "loss": 0.0293, |
| "step": 23670 |
| }, |
| { |
| "epoch": 24.387229660144182, |
| "grad_norm": 0.42204421758651733, |
| "learning_rate": 4.8900052129174924e-08, |
| "loss": 0.0244, |
| "step": 23680 |
| }, |
| { |
| "epoch": 24.39752832131823, |
| "grad_norm": 0.3273943066596985, |
| "learning_rate": 4.590123578596117e-08, |
| "loss": 0.024, |
| "step": 23690 |
| }, |
| { |
| "epoch": 24.407826982492274, |
| "grad_norm": 0.4405585825443268, |
| "learning_rate": 4.299726153166317e-08, |
| "loss": 0.0224, |
| "step": 23700 |
| }, |
| { |
| "epoch": 24.418125643666322, |
| "grad_norm": 0.3542063236236572, |
| "learning_rate": 4.0188134879715064e-08, |
| "loss": 0.0194, |
| "step": 23710 |
| }, |
| { |
| "epoch": 24.42842430484037, |
| "grad_norm": 0.48409783840179443, |
| "learning_rate": 3.747386116349505e-08, |
| "loss": 0.0234, |
| "step": 23720 |
| }, |
| { |
| "epoch": 24.438722966014417, |
| "grad_norm": 0.22048763930797577, |
| "learning_rate": 3.485444553626982e-08, |
| "loss": 0.0212, |
| "step": 23730 |
| }, |
| { |
| "epoch": 24.449021627188465, |
| "grad_norm": 0.2321694940328598, |
| "learning_rate": 3.232989297122791e-08, |
| "loss": 0.0286, |
| "step": 23740 |
| }, |
| { |
| "epoch": 24.459320288362512, |
| "grad_norm": 0.23533855378627777, |
| "learning_rate": 2.99002082614408e-08, |
| "loss": 0.0242, |
| "step": 23750 |
| }, |
| { |
| "epoch": 24.46961894953656, |
| "grad_norm": 0.2119673490524292, |
| "learning_rate": 2.7565396019879618e-08, |
| "loss": 0.0214, |
| "step": 23760 |
| }, |
| { |
| "epoch": 24.479917610710608, |
| "grad_norm": 0.2214546799659729, |
| "learning_rate": 2.5325460679376246e-08, |
| "loss": 0.0292, |
| "step": 23770 |
| }, |
| { |
| "epoch": 24.490216271884655, |
| "grad_norm": 0.751918375492096, |
| "learning_rate": 2.3180406492634422e-08, |
| "loss": 0.0278, |
| "step": 23780 |
| }, |
| { |
| "epoch": 24.500514933058703, |
| "grad_norm": 0.4512670934200287, |
| "learning_rate": 2.113023753222976e-08, |
| "loss": 0.02, |
| "step": 23790 |
| }, |
| { |
| "epoch": 24.51081359423275, |
| "grad_norm": 0.29734495282173157, |
| "learning_rate": 1.9174957690581972e-08, |
| "loss": 0.0282, |
| "step": 23800 |
| }, |
| { |
| "epoch": 24.5211122554068, |
| "grad_norm": 0.1899419128894806, |
| "learning_rate": 1.7314570679949347e-08, |
| "loss": 0.0237, |
| "step": 23810 |
| }, |
| { |
| "epoch": 24.531410916580846, |
| "grad_norm": 0.8968344330787659, |
| "learning_rate": 1.5549080032434273e-08, |
| "loss": 0.0189, |
| "step": 23820 |
| }, |
| { |
| "epoch": 24.54170957775489, |
| "grad_norm": 0.16808949410915375, |
| "learning_rate": 1.3878489099972136e-08, |
| "loss": 0.0253, |
| "step": 23830 |
| }, |
| { |
| "epoch": 24.552008238928938, |
| "grad_norm": 0.3582736849784851, |
| "learning_rate": 1.2302801054325797e-08, |
| "loss": 0.0213, |
| "step": 23840 |
| }, |
| { |
| "epoch": 24.562306900102985, |
| "grad_norm": 0.3200194239616394, |
| "learning_rate": 1.0822018887063357e-08, |
| "loss": 0.0253, |
| "step": 23850 |
| }, |
| { |
| "epoch": 24.572605561277033, |
| "grad_norm": 0.2794504463672638, |
| "learning_rate": 9.436145409585927e-09, |
| "loss": 0.0283, |
| "step": 23860 |
| }, |
| { |
| "epoch": 24.58290422245108, |
| "grad_norm": 0.2662137746810913, |
| "learning_rate": 8.145183253083222e-09, |
| "loss": 0.023, |
| "step": 23870 |
| }, |
| { |
| "epoch": 24.59320288362513, |
| "grad_norm": 0.4419270157814026, |
| "learning_rate": 6.9491348685613025e-09, |
| "loss": 0.0287, |
| "step": 23880 |
| }, |
| { |
| "epoch": 24.603501544799176, |
| "grad_norm": 0.24515603482723236, |
| "learning_rate": 5.848002526814833e-09, |
| "loss": 0.0226, |
| "step": 23890 |
| }, |
| { |
| "epoch": 24.613800205973224, |
| "grad_norm": 0.24852906167507172, |
| "learning_rate": 4.8417883184381784e-09, |
| "loss": 0.0265, |
| "step": 23900 |
| }, |
| { |
| "epoch": 24.62409886714727, |
| "grad_norm": 0.24117045104503632, |
| "learning_rate": 3.930494153819853e-09, |
| "loss": 0.0214, |
| "step": 23910 |
| }, |
| { |
| "epoch": 24.63439752832132, |
| "grad_norm": 0.41323116421699524, |
| "learning_rate": 3.1141217631203147e-09, |
| "loss": 0.0279, |
| "step": 23920 |
| }, |
| { |
| "epoch": 24.644696189495367, |
| "grad_norm": 0.2782849669456482, |
| "learning_rate": 2.3926726962997248e-09, |
| "loss": 0.0261, |
| "step": 23930 |
| }, |
| { |
| "epoch": 24.654994850669414, |
| "grad_norm": 0.2185438722372055, |
| "learning_rate": 1.7661483230846377e-09, |
| "loss": 0.0271, |
| "step": 23940 |
| }, |
| { |
| "epoch": 24.665293511843462, |
| "grad_norm": 0.4621998369693756, |
| "learning_rate": 1.234549832984655e-09, |
| "loss": 0.0255, |
| "step": 23950 |
| }, |
| { |
| "epoch": 24.675592173017506, |
| "grad_norm": 0.18405234813690186, |
| "learning_rate": 7.978782352924264e-10, |
| "loss": 0.0248, |
| "step": 23960 |
| }, |
| { |
| "epoch": 24.685890834191554, |
| "grad_norm": 0.18703693151474, |
| "learning_rate": 4.5613435905589305e-10, |
| "loss": 0.0235, |
| "step": 23970 |
| }, |
| { |
| "epoch": 24.6961894953656, |
| "grad_norm": 0.3629539906978607, |
| "learning_rate": 2.0931885311159526e-10, |
| "loss": 0.0247, |
| "step": 23980 |
| }, |
| { |
| "epoch": 24.70648815653965, |
| "grad_norm": 0.3040322959423065, |
| "learning_rate": 5.743218605136491e-11, |
| "loss": 0.0226, |
| "step": 23990 |
| }, |
| { |
| "epoch": 24.716786817713697, |
| "grad_norm": 0.2292143851518631, |
| "learning_rate": 4.746462556326492e-13, |
| "loss": 0.0208, |
| "step": 24000 |
| }, |
| { |
| "epoch": 24.716786817713697, |
| "step": 24000, |
| "total_flos": 0.0, |
| "train_loss": 0.04103807942320903, |
| "train_runtime": 10175.0236, |
| "train_samples_per_second": 75.479, |
| "train_steps_per_second": 2.359 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 24000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 25, |
| "save_steps": 20000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|