| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 18.537590113285273, |
| "eval_steps": 500, |
| "global_step": 18000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.010298661174047374, |
| "grad_norm": 28.08726692199707, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 2.0981, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02059732234809475, |
| "grad_norm": 16.46196746826172, |
| "learning_rate": 2.1111111111111114e-06, |
| "loss": 1.9619, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.030895983522142123, |
| "grad_norm": 13.453218460083008, |
| "learning_rate": 3.2222222222222222e-06, |
| "loss": 1.5883, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0411946446961895, |
| "grad_norm": 3.0111494064331055, |
| "learning_rate": 4.333333333333334e-06, |
| "loss": 0.8443, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.05149330587023687, |
| "grad_norm": 1.8473039865493774, |
| "learning_rate": 5.444444444444445e-06, |
| "loss": 0.4851, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.061791967044284246, |
| "grad_norm": 1.983799695968628, |
| "learning_rate": 6.555555555555556e-06, |
| "loss": 0.4895, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.07209062821833162, |
| "grad_norm": 1.359467625617981, |
| "learning_rate": 7.666666666666667e-06, |
| "loss": 0.3476, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.082389289392379, |
| "grad_norm": 1.6559157371520996, |
| "learning_rate": 8.777777777777778e-06, |
| "loss": 0.3161, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.09268795056642637, |
| "grad_norm": 1.4577065706253052, |
| "learning_rate": 9.888888888888889e-06, |
| "loss": 0.2894, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.10298661174047374, |
| "grad_norm": 1.9685674905776978, |
| "learning_rate": 1.1000000000000001e-05, |
| "loss": 0.2669, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.11328527291452112, |
| "grad_norm": 1.0735788345336914, |
| "learning_rate": 1.2111111111111112e-05, |
| "loss": 0.2384, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.12358393408856849, |
| "grad_norm": 2.065934419631958, |
| "learning_rate": 1.3222222222222221e-05, |
| "loss": 0.2315, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.13388259526261587, |
| "grad_norm": 1.3160645961761475, |
| "learning_rate": 1.4333333333333334e-05, |
| "loss": 0.2212, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.14418125643666324, |
| "grad_norm": 1.132812738418579, |
| "learning_rate": 1.5444444444444446e-05, |
| "loss": 0.2107, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.15447991761071062, |
| "grad_norm": 0.8556684851646423, |
| "learning_rate": 1.655555555555556e-05, |
| "loss": 0.1983, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.164778578784758, |
| "grad_norm": 1.1401009559631348, |
| "learning_rate": 1.7666666666666668e-05, |
| "loss": 0.1821, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.17507723995880536, |
| "grad_norm": 0.9898369312286377, |
| "learning_rate": 1.8777777777777777e-05, |
| "loss": 0.1725, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.18537590113285274, |
| "grad_norm": 1.2845979928970337, |
| "learning_rate": 1.988888888888889e-05, |
| "loss": 0.1798, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.1956745623069001, |
| "grad_norm": 0.7349956631660461, |
| "learning_rate": 2.1e-05, |
| "loss": 0.1553, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.2059732234809475, |
| "grad_norm": 1.0893903970718384, |
| "learning_rate": 2.211111111111111e-05, |
| "loss": 0.161, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.21627188465499486, |
| "grad_norm": 1.4773167371749878, |
| "learning_rate": 2.3222222222222224e-05, |
| "loss": 0.1687, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.22657054582904224, |
| "grad_norm": 0.7343375086784363, |
| "learning_rate": 2.4333333333333336e-05, |
| "loss": 0.1541, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.2368692070030896, |
| "grad_norm": 1.459641456604004, |
| "learning_rate": 2.5444444444444442e-05, |
| "loss": 0.1546, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.24716786817713698, |
| "grad_norm": 1.007576823234558, |
| "learning_rate": 2.6555555555555555e-05, |
| "loss": 0.1397, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.25746652935118436, |
| "grad_norm": 0.7707590460777283, |
| "learning_rate": 2.7666666666666667e-05, |
| "loss": 0.1395, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.26776519052523173, |
| "grad_norm": 0.8418192863464355, |
| "learning_rate": 2.877777777777778e-05, |
| "loss": 0.1367, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.2780638516992791, |
| "grad_norm": 1.433361291885376, |
| "learning_rate": 2.988888888888889e-05, |
| "loss": 0.1443, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.2883625128733265, |
| "grad_norm": 1.6851385831832886, |
| "learning_rate": 3.1e-05, |
| "loss": 0.1412, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.29866117404737386, |
| "grad_norm": 1.0967495441436768, |
| "learning_rate": 3.2111111111111114e-05, |
| "loss": 0.1465, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.30895983522142123, |
| "grad_norm": 0.9680765867233276, |
| "learning_rate": 3.322222222222222e-05, |
| "loss": 0.1409, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.3192584963954686, |
| "grad_norm": 0.8024266362190247, |
| "learning_rate": 3.433333333333333e-05, |
| "loss": 0.151, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.329557157569516, |
| "grad_norm": 1.2099324464797974, |
| "learning_rate": 3.5444444444444445e-05, |
| "loss": 0.1276, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.33985581874356335, |
| "grad_norm": 1.553401231765747, |
| "learning_rate": 3.655555555555556e-05, |
| "loss": 0.1407, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.35015447991761073, |
| "grad_norm": 0.9965718388557434, |
| "learning_rate": 3.766666666666667e-05, |
| "loss": 0.1193, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.3604531410916581, |
| "grad_norm": 1.0881636142730713, |
| "learning_rate": 3.877777777777778e-05, |
| "loss": 0.1161, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.3707518022657055, |
| "grad_norm": 0.7971917986869812, |
| "learning_rate": 3.9888888888888895e-05, |
| "loss": 0.1153, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.38105046343975285, |
| "grad_norm": 0.6419103741645813, |
| "learning_rate": 4.1e-05, |
| "loss": 0.1268, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.3913491246138002, |
| "grad_norm": 0.8467381596565247, |
| "learning_rate": 4.211111111111111e-05, |
| "loss": 0.1089, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.4016477857878476, |
| "grad_norm": 0.7437835335731506, |
| "learning_rate": 4.3222222222222226e-05, |
| "loss": 0.1196, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.411946446961895, |
| "grad_norm": 1.1879000663757324, |
| "learning_rate": 4.433333333333334e-05, |
| "loss": 0.1104, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.42224510813594235, |
| "grad_norm": 1.103964924812317, |
| "learning_rate": 4.5444444444444444e-05, |
| "loss": 0.1154, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.4325437693099897, |
| "grad_norm": 1.20859956741333, |
| "learning_rate": 4.6555555555555556e-05, |
| "loss": 0.1151, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.4428424304840371, |
| "grad_norm": 1.3592861890792847, |
| "learning_rate": 4.766666666666667e-05, |
| "loss": 0.1221, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.45314109165808447, |
| "grad_norm": 0.7694193720817566, |
| "learning_rate": 4.8777777777777775e-05, |
| "loss": 0.1081, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.46343975283213185, |
| "grad_norm": 0.8526501655578613, |
| "learning_rate": 4.9888888888888894e-05, |
| "loss": 0.1071, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.4737384140061792, |
| "grad_norm": 0.8666425943374634, |
| "learning_rate": 5.1000000000000006e-05, |
| "loss": 0.1125, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.4840370751802266, |
| "grad_norm": 1.0404722690582275, |
| "learning_rate": 5.211111111111111e-05, |
| "loss": 0.1235, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.49433573635427397, |
| "grad_norm": 0.8314346671104431, |
| "learning_rate": 5.322222222222223e-05, |
| "loss": 0.1156, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.5046343975283213, |
| "grad_norm": 0.8053165674209595, |
| "learning_rate": 5.433333333333334e-05, |
| "loss": 0.0963, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.5149330587023687, |
| "grad_norm": 0.9703218340873718, |
| "learning_rate": 5.544444444444444e-05, |
| "loss": 0.1081, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.525231719876416, |
| "grad_norm": 1.0357967615127563, |
| "learning_rate": 5.655555555555556e-05, |
| "loss": 0.1053, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.5355303810504635, |
| "grad_norm": 0.6202366948127747, |
| "learning_rate": 5.766666666666667e-05, |
| "loss": 0.1161, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.5458290422245108, |
| "grad_norm": 0.9413891434669495, |
| "learning_rate": 5.8777777777777774e-05, |
| "loss": 0.1109, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.5561277033985582, |
| "grad_norm": 0.9725326299667358, |
| "learning_rate": 5.988888888888889e-05, |
| "loss": 0.1087, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.5664263645726055, |
| "grad_norm": 1.1372697353363037, |
| "learning_rate": 6.1e-05, |
| "loss": 0.0934, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.576725025746653, |
| "grad_norm": 0.9730582237243652, |
| "learning_rate": 6.21111111111111e-05, |
| "loss": 0.089, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.5870236869207003, |
| "grad_norm": 1.031986117362976, |
| "learning_rate": 6.322222222222223e-05, |
| "loss": 0.0921, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.5973223480947477, |
| "grad_norm": 0.9803087115287781, |
| "learning_rate": 6.433333333333333e-05, |
| "loss": 0.109, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.607621009268795, |
| "grad_norm": 1.2565224170684814, |
| "learning_rate": 6.544444444444446e-05, |
| "loss": 0.1075, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.6179196704428425, |
| "grad_norm": 0.6035177707672119, |
| "learning_rate": 6.655555555555555e-05, |
| "loss": 0.1069, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6282183316168898, |
| "grad_norm": 0.6485044360160828, |
| "learning_rate": 6.766666666666667e-05, |
| "loss": 0.1041, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.6385169927909372, |
| "grad_norm": 0.9063082337379456, |
| "learning_rate": 6.877777777777778e-05, |
| "loss": 0.087, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.6488156539649845, |
| "grad_norm": 0.7508301734924316, |
| "learning_rate": 6.988888888888889e-05, |
| "loss": 0.0993, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.659114315139032, |
| "grad_norm": 0.7371131777763367, |
| "learning_rate": 7.1e-05, |
| "loss": 0.0965, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.6694129763130793, |
| "grad_norm": 0.9033893942832947, |
| "learning_rate": 7.211111111111112e-05, |
| "loss": 0.0927, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.6797116374871267, |
| "grad_norm": 1.0828319787979126, |
| "learning_rate": 7.322222222222223e-05, |
| "loss": 0.1039, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.690010298661174, |
| "grad_norm": 0.7973754405975342, |
| "learning_rate": 7.433333333333333e-05, |
| "loss": 0.0942, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.7003089598352215, |
| "grad_norm": 0.9999275803565979, |
| "learning_rate": 7.544444444444445e-05, |
| "loss": 0.0938, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.7106076210092688, |
| "grad_norm": 0.7432506680488586, |
| "learning_rate": 7.655555555555555e-05, |
| "loss": 0.0822, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.7209062821833162, |
| "grad_norm": 0.7960357069969177, |
| "learning_rate": 7.766666666666667e-05, |
| "loss": 0.0885, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.7312049433573635, |
| "grad_norm": 0.6295223236083984, |
| "learning_rate": 7.877777777777778e-05, |
| "loss": 0.0984, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.741503604531411, |
| "grad_norm": 0.6425987482070923, |
| "learning_rate": 7.988888888888889e-05, |
| "loss": 0.0851, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.7518022657054583, |
| "grad_norm": 0.7241719961166382, |
| "learning_rate": 8.1e-05, |
| "loss": 0.0818, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.7621009268795057, |
| "grad_norm": 0.6875414252281189, |
| "learning_rate": 8.211111111111112e-05, |
| "loss": 0.0776, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.772399588053553, |
| "grad_norm": 0.7593461275100708, |
| "learning_rate": 8.322222222222223e-05, |
| "loss": 0.0862, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.7826982492276005, |
| "grad_norm": 1.1254090070724487, |
| "learning_rate": 8.433333333333334e-05, |
| "loss": 0.0831, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.7929969104016478, |
| "grad_norm": 0.6563543677330017, |
| "learning_rate": 8.544444444444445e-05, |
| "loss": 0.0756, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.8032955715756952, |
| "grad_norm": 0.500499963760376, |
| "learning_rate": 8.655555555555555e-05, |
| "loss": 0.09, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.8135942327497425, |
| "grad_norm": 0.6962169408798218, |
| "learning_rate": 8.766666666666668e-05, |
| "loss": 0.0913, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.82389289392379, |
| "grad_norm": 0.8879425525665283, |
| "learning_rate": 8.877777777777778e-05, |
| "loss": 0.094, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.8341915550978373, |
| "grad_norm": 0.7109111547470093, |
| "learning_rate": 8.988888888888889e-05, |
| "loss": 0.0899, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.8444902162718847, |
| "grad_norm": 0.6895614266395569, |
| "learning_rate": 9.1e-05, |
| "loss": 0.0899, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.854788877445932, |
| "grad_norm": 0.5885145664215088, |
| "learning_rate": 9.211111111111112e-05, |
| "loss": 0.0894, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.8650875386199794, |
| "grad_norm": 0.6228615641593933, |
| "learning_rate": 9.322222222222223e-05, |
| "loss": 0.0826, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.8753861997940268, |
| "grad_norm": 0.6920461654663086, |
| "learning_rate": 9.433333333333334e-05, |
| "loss": 0.0926, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.8856848609680742, |
| "grad_norm": 0.8142651319503784, |
| "learning_rate": 9.544444444444445e-05, |
| "loss": 0.0769, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.8959835221421215, |
| "grad_norm": 0.8525772094726562, |
| "learning_rate": 9.655555555555555e-05, |
| "loss": 0.0775, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.9062821833161689, |
| "grad_norm": 0.6274034976959229, |
| "learning_rate": 9.766666666666668e-05, |
| "loss": 0.0793, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.9165808444902163, |
| "grad_norm": 0.7031662464141846, |
| "learning_rate": 9.877777777777778e-05, |
| "loss": 0.081, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.9268795056642637, |
| "grad_norm": 0.542312741279602, |
| "learning_rate": 9.98888888888889e-05, |
| "loss": 0.0878, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.937178166838311, |
| "grad_norm": 0.5504183173179626, |
| "learning_rate": 9.999993165095463e-05, |
| "loss": 0.0711, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.9474768280123584, |
| "grad_norm": 0.6083622574806213, |
| "learning_rate": 9.999969538288952e-05, |
| "loss": 0.0774, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.9577754891864058, |
| "grad_norm": 0.7640944123268127, |
| "learning_rate": 9.999929035278659e-05, |
| "loss": 0.0711, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.9680741503604532, |
| "grad_norm": 0.34581655263900757, |
| "learning_rate": 9.999871656201292e-05, |
| "loss": 0.0716, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.9783728115345005, |
| "grad_norm": 0.6435947418212891, |
| "learning_rate": 9.999797401250521e-05, |
| "loss": 0.0833, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.9886714727085479, |
| "grad_norm": 0.6153683662414551, |
| "learning_rate": 9.999706270676973e-05, |
| "loss": 0.0683, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.9989701338825953, |
| "grad_norm": 0.5145250558853149, |
| "learning_rate": 9.999598264788241e-05, |
| "loss": 0.0679, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.0092687950566426, |
| "grad_norm": 0.5474639534950256, |
| "learning_rate": 9.999473383948872e-05, |
| "loss": 0.0652, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.01956745623069, |
| "grad_norm": 0.4673866331577301, |
| "learning_rate": 9.99933162858037e-05, |
| "loss": 0.0806, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.0298661174047374, |
| "grad_norm": 0.500733494758606, |
| "learning_rate": 9.999172999161198e-05, |
| "loss": 0.0746, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.0401647785787849, |
| "grad_norm": 0.6277178525924683, |
| "learning_rate": 9.998997496226772e-05, |
| "loss": 0.0691, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.050463439752832, |
| "grad_norm": 0.34232184290885925, |
| "learning_rate": 9.998805120369458e-05, |
| "loss": 0.069, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.0607621009268795, |
| "grad_norm": 0.6583809852600098, |
| "learning_rate": 9.998595872238577e-05, |
| "loss": 0.0646, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.071060762100927, |
| "grad_norm": 0.5400450825691223, |
| "learning_rate": 9.998369752540395e-05, |
| "loss": 0.0709, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.0813594232749741, |
| "grad_norm": 0.716460645198822, |
| "learning_rate": 9.998126762038126e-05, |
| "loss": 0.0659, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.0916580844490216, |
| "grad_norm": 0.7969040274620056, |
| "learning_rate": 9.997866901551926e-05, |
| "loss": 0.0834, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.101956745623069, |
| "grad_norm": 0.6805360317230225, |
| "learning_rate": 9.997590171958892e-05, |
| "loss": 0.0661, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.1122554067971164, |
| "grad_norm": 0.6645709872245789, |
| "learning_rate": 9.997296574193058e-05, |
| "loss": 0.0719, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.1225540679711639, |
| "grad_norm": 0.9983972311019897, |
| "learning_rate": 9.996986109245395e-05, |
| "loss": 0.063, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.132852729145211, |
| "grad_norm": 0.47811999917030334, |
| "learning_rate": 9.996658778163802e-05, |
| "loss": 0.0812, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.1431513903192585, |
| "grad_norm": 0.9598459601402283, |
| "learning_rate": 9.996314582053106e-05, |
| "loss": 0.0797, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.153450051493306, |
| "grad_norm": 0.8147891759872437, |
| "learning_rate": 9.995953522075061e-05, |
| "loss": 0.076, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.1637487126673531, |
| "grad_norm": 0.36551281809806824, |
| "learning_rate": 9.995575599448336e-05, |
| "loss": 0.0689, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.1740473738414006, |
| "grad_norm": 0.41024380922317505, |
| "learning_rate": 9.995180815448523e-05, |
| "loss": 0.091, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.184346035015448, |
| "grad_norm": 0.5559478998184204, |
| "learning_rate": 9.994769171408118e-05, |
| "loss": 0.0783, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.1946446961894954, |
| "grad_norm": 0.39498281478881836, |
| "learning_rate": 9.994340668716527e-05, |
| "loss": 0.0655, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.2049433573635429, |
| "grad_norm": 0.7332147359848022, |
| "learning_rate": 9.993895308820058e-05, |
| "loss": 0.0739, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.21524201853759, |
| "grad_norm": 0.5935864448547363, |
| "learning_rate": 9.99343309322192e-05, |
| "loss": 0.0651, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.2255406797116375, |
| "grad_norm": 0.5222606658935547, |
| "learning_rate": 9.99295402348221e-05, |
| "loss": 0.0676, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.235839340885685, |
| "grad_norm": 0.5474528670310974, |
| "learning_rate": 9.992458101217912e-05, |
| "loss": 0.0775, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.2461380020597321, |
| "grad_norm": 0.7393515110015869, |
| "learning_rate": 9.991945328102897e-05, |
| "loss": 0.0679, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.2564366632337796, |
| "grad_norm": 0.48135286569595337, |
| "learning_rate": 9.991415705867903e-05, |
| "loss": 0.0627, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.266735324407827, |
| "grad_norm": 0.40880492329597473, |
| "learning_rate": 9.990869236300546e-05, |
| "loss": 0.0621, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.2770339855818744, |
| "grad_norm": 0.4522377550601959, |
| "learning_rate": 9.990305921245306e-05, |
| "loss": 0.0629, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.2873326467559219, |
| "grad_norm": 0.5431732535362244, |
| "learning_rate": 9.989725762603515e-05, |
| "loss": 0.0711, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.297631307929969, |
| "grad_norm": 0.4390816390514374, |
| "learning_rate": 9.989128762333362e-05, |
| "loss": 0.058, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.3079299691040165, |
| "grad_norm": 0.5823209881782532, |
| "learning_rate": 9.988514922449879e-05, |
| "loss": 0.0742, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.318228630278064, |
| "grad_norm": 0.6167677044868469, |
| "learning_rate": 9.987884245024934e-05, |
| "loss": 0.0698, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.3285272914521111, |
| "grad_norm": 0.470501184463501, |
| "learning_rate": 9.98723673218723e-05, |
| "loss": 0.0669, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.3388259526261586, |
| "grad_norm": 0.3435496985912323, |
| "learning_rate": 9.986572386122291e-05, |
| "loss": 0.0655, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.349124613800206, |
| "grad_norm": 0.5990545749664307, |
| "learning_rate": 9.98589120907246e-05, |
| "loss": 0.0653, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.3594232749742534, |
| "grad_norm": 0.7209518551826477, |
| "learning_rate": 9.985193203336886e-05, |
| "loss": 0.0654, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.3697219361483008, |
| "grad_norm": 0.6588581800460815, |
| "learning_rate": 9.984478371271521e-05, |
| "loss": 0.066, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.380020597322348, |
| "grad_norm": 0.5437431931495667, |
| "learning_rate": 9.98374671528911e-05, |
| "loss": 0.0685, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.3903192584963955, |
| "grad_norm": 0.4081268012523651, |
| "learning_rate": 9.982998237859184e-05, |
| "loss": 0.0649, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.400617919670443, |
| "grad_norm": 0.5363196134567261, |
| "learning_rate": 9.98223294150805e-05, |
| "loss": 0.0614, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.4109165808444901, |
| "grad_norm": 0.5327999591827393, |
| "learning_rate": 9.981450828818783e-05, |
| "loss": 0.058, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.4212152420185376, |
| "grad_norm": 0.39524152874946594, |
| "learning_rate": 9.980651902431216e-05, |
| "loss": 0.0606, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.431513903192585, |
| "grad_norm": 0.5942156910896301, |
| "learning_rate": 9.979836165041936e-05, |
| "loss": 0.0589, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.4418125643666324, |
| "grad_norm": 0.6506125330924988, |
| "learning_rate": 9.97900361940427e-05, |
| "loss": 0.0618, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.4521112255406798, |
| "grad_norm": 0.43637052178382874, |
| "learning_rate": 9.978154268328276e-05, |
| "loss": 0.0728, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.462409886714727, |
| "grad_norm": 0.5816675424575806, |
| "learning_rate": 9.977288114680737e-05, |
| "loss": 0.0738, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.4727085478887745, |
| "grad_norm": 0.3983500301837921, |
| "learning_rate": 9.976405161385147e-05, |
| "loss": 0.0674, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.483007209062822, |
| "grad_norm": 0.41254571080207825, |
| "learning_rate": 9.975505411421704e-05, |
| "loss": 0.066, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.4933058702368691, |
| "grad_norm": 0.4647277593612671, |
| "learning_rate": 9.974588867827301e-05, |
| "loss": 0.0646, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.5036045314109165, |
| "grad_norm": 0.4378807544708252, |
| "learning_rate": 9.97365553369551e-05, |
| "loss": 0.0589, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.513903192584964, |
| "grad_norm": 0.6178969144821167, |
| "learning_rate": 9.972705412176577e-05, |
| "loss": 0.0621, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.5242018537590112, |
| "grad_norm": 0.5825141072273254, |
| "learning_rate": 9.971738506477414e-05, |
| "loss": 0.0644, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.5345005149330588, |
| "grad_norm": 0.5849868655204773, |
| "learning_rate": 9.970754819861577e-05, |
| "loss": 0.0669, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.544799176107106, |
| "grad_norm": 0.5067623853683472, |
| "learning_rate": 9.969754355649268e-05, |
| "loss": 0.071, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.5550978372811535, |
| "grad_norm": 0.5842755436897278, |
| "learning_rate": 9.968737117217313e-05, |
| "loss": 0.0713, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.565396498455201, |
| "grad_norm": 0.3868110179901123, |
| "learning_rate": 9.967703107999158e-05, |
| "loss": 0.0635, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.575695159629248, |
| "grad_norm": 0.4535583257675171, |
| "learning_rate": 9.966652331484853e-05, |
| "loss": 0.0587, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.5859938208032955, |
| "grad_norm": 0.38644909858703613, |
| "learning_rate": 9.965584791221048e-05, |
| "loss": 0.0708, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.596292481977343, |
| "grad_norm": 0.460753858089447, |
| "learning_rate": 9.964500490810966e-05, |
| "loss": 0.0645, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.6065911431513902, |
| "grad_norm": 0.5585173964500427, |
| "learning_rate": 9.963399433914405e-05, |
| "loss": 0.0587, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.6168898043254378, |
| "grad_norm": 0.6196934580802917, |
| "learning_rate": 9.962281624247722e-05, |
| "loss": 0.0663, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.627188465499485, |
| "grad_norm": 0.440153568983078, |
| "learning_rate": 9.961147065583813e-05, |
| "loss": 0.0568, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.6374871266735325, |
| "grad_norm": 0.49740493297576904, |
| "learning_rate": 9.959995761752112e-05, |
| "loss": 0.0616, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.64778578784758, |
| "grad_norm": 0.7940653562545776, |
| "learning_rate": 9.958827716638572e-05, |
| "loss": 0.0656, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.658084449021627, |
| "grad_norm": 0.39363256096839905, |
| "learning_rate": 9.957642934185648e-05, |
| "loss": 0.059, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.6683831101956745, |
| "grad_norm": 0.5798192620277405, |
| "learning_rate": 9.95644141839229e-05, |
| "loss": 0.057, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.678681771369722, |
| "grad_norm": 0.43519875407218933, |
| "learning_rate": 9.955223173313931e-05, |
| "loss": 0.0547, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.6889804325437692, |
| "grad_norm": 0.5713900327682495, |
| "learning_rate": 9.953988203062463e-05, |
| "loss": 0.0655, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.6992790937178168, |
| "grad_norm": 0.8694477677345276, |
| "learning_rate": 9.952736511806236e-05, |
| "loss": 0.0793, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.709577754891864, |
| "grad_norm": 0.344855397939682, |
| "learning_rate": 9.951468103770032e-05, |
| "loss": 0.0654, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.7198764160659115, |
| "grad_norm": 0.747203528881073, |
| "learning_rate": 9.950182983235063e-05, |
| "loss": 0.0694, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.730175077239959, |
| "grad_norm": 0.44555550813674927, |
| "learning_rate": 9.948881154538945e-05, |
| "loss": 0.0729, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.740473738414006, |
| "grad_norm": 0.4354792535305023, |
| "learning_rate": 9.94756262207569e-05, |
| "loss": 0.0739, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.7507723995880535, |
| "grad_norm": 0.4117138683795929, |
| "learning_rate": 9.946227390295689e-05, |
| "loss": 0.0648, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.761071060762101, |
| "grad_norm": 0.5352147221565247, |
| "learning_rate": 9.9448754637057e-05, |
| "loss": 0.0614, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.7713697219361482, |
| "grad_norm": 0.3937685787677765, |
| "learning_rate": 9.943506846868826e-05, |
| "loss": 0.0668, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.7816683831101958, |
| "grad_norm": 0.510313868522644, |
| "learning_rate": 9.942121544404509e-05, |
| "loss": 0.0564, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.791967044284243, |
| "grad_norm": 0.43196746706962585, |
| "learning_rate": 9.940719560988505e-05, |
| "loss": 0.0515, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.8022657054582905, |
| "grad_norm": 0.4649578928947449, |
| "learning_rate": 9.939300901352876e-05, |
| "loss": 0.0681, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.8125643666323379, |
| "grad_norm": 0.6281247735023499, |
| "learning_rate": 9.937865570285967e-05, |
| "loss": 0.0721, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.822863027806385, |
| "grad_norm": 0.6799906492233276, |
| "learning_rate": 9.936413572632397e-05, |
| "loss": 0.0565, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.8331616889804325, |
| "grad_norm": 0.4169757068157196, |
| "learning_rate": 9.934944913293038e-05, |
| "loss": 0.0626, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.84346035015448, |
| "grad_norm": 0.42282024025917053, |
| "learning_rate": 9.933459597224997e-05, |
| "loss": 0.0654, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.8537590113285272, |
| "grad_norm": 0.34127193689346313, |
| "learning_rate": 9.931957629441607e-05, |
| "loss": 0.0572, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.8640576725025748, |
| "grad_norm": 0.3683079183101654, |
| "learning_rate": 9.930439015012396e-05, |
| "loss": 0.0621, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.874356333676622, |
| "grad_norm": 0.5137266516685486, |
| "learning_rate": 9.92890375906309e-05, |
| "loss": 0.0554, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.8846549948506695, |
| "grad_norm": 0.4121856391429901, |
| "learning_rate": 9.927351866775578e-05, |
| "loss": 0.0631, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.8949536560247169, |
| "grad_norm": 0.5225406289100647, |
| "learning_rate": 9.925783343387903e-05, |
| "loss": 0.0557, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.905252317198764, |
| "grad_norm": 0.3983275294303894, |
| "learning_rate": 9.924198194194237e-05, |
| "loss": 0.0631, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.9155509783728115, |
| "grad_norm": 0.49256351590156555, |
| "learning_rate": 9.922596424544876e-05, |
| "loss": 0.0661, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.925849639546859, |
| "grad_norm": 0.5363610982894897, |
| "learning_rate": 9.92097803984621e-05, |
| "loss": 0.0706, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.9361483007209062, |
| "grad_norm": 0.4455360472202301, |
| "learning_rate": 9.919343045560712e-05, |
| "loss": 0.0698, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.9464469618949538, |
| "grad_norm": 0.5394087433815002, |
| "learning_rate": 9.917691447206913e-05, |
| "loss": 0.0616, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.956745623069001, |
| "grad_norm": 0.3595924377441406, |
| "learning_rate": 9.91602325035939e-05, |
| "loss": 0.067, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.9670442842430484, |
| "grad_norm": 0.2918682396411896, |
| "learning_rate": 9.914338460648743e-05, |
| "loss": 0.0732, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.9773429454170959, |
| "grad_norm": 0.41418296098709106, |
| "learning_rate": 9.912637083761578e-05, |
| "loss": 0.0635, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.987641606591143, |
| "grad_norm": 0.5165850520133972, |
| "learning_rate": 9.910919125440485e-05, |
| "loss": 0.069, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.9979402677651905, |
| "grad_norm": 0.3793902099132538, |
| "learning_rate": 9.909184591484027e-05, |
| "loss": 0.0717, |
| "step": 1940 |
| }, |
| { |
| "epoch": 2.008238928939238, |
| "grad_norm": 0.6616620421409607, |
| "learning_rate": 9.907433487746702e-05, |
| "loss": 0.0586, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.018537590113285, |
| "grad_norm": 0.5687305331230164, |
| "learning_rate": 9.905665820138949e-05, |
| "loss": 0.0569, |
| "step": 1960 |
| }, |
| { |
| "epoch": 2.028836251287333, |
| "grad_norm": 0.49890944361686707, |
| "learning_rate": 9.903881594627105e-05, |
| "loss": 0.0668, |
| "step": 1970 |
| }, |
| { |
| "epoch": 2.03913491246138, |
| "grad_norm": 0.5814046859741211, |
| "learning_rate": 9.902080817233398e-05, |
| "loss": 0.0644, |
| "step": 1980 |
| }, |
| { |
| "epoch": 2.049433573635427, |
| "grad_norm": 0.32920873165130615, |
| "learning_rate": 9.900263494035921e-05, |
| "loss": 0.0611, |
| "step": 1990 |
| }, |
| { |
| "epoch": 2.059732234809475, |
| "grad_norm": 0.5075499415397644, |
| "learning_rate": 9.898429631168619e-05, |
| "loss": 0.0586, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.070030895983522, |
| "grad_norm": 0.4823492169380188, |
| "learning_rate": 9.896579234821253e-05, |
| "loss": 0.0468, |
| "step": 2010 |
| }, |
| { |
| "epoch": 2.0803295571575697, |
| "grad_norm": 0.5481283068656921, |
| "learning_rate": 9.894712311239398e-05, |
| "loss": 0.0611, |
| "step": 2020 |
| }, |
| { |
| "epoch": 2.090628218331617, |
| "grad_norm": 0.4776170551776886, |
| "learning_rate": 9.892828866724406e-05, |
| "loss": 0.0657, |
| "step": 2030 |
| }, |
| { |
| "epoch": 2.100926879505664, |
| "grad_norm": 0.5601367354393005, |
| "learning_rate": 9.8909289076334e-05, |
| "loss": 0.0665, |
| "step": 2040 |
| }, |
| { |
| "epoch": 2.111225540679712, |
| "grad_norm": 0.3499130308628082, |
| "learning_rate": 9.88901244037923e-05, |
| "loss": 0.0563, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.121524201853759, |
| "grad_norm": 0.4545436501502991, |
| "learning_rate": 9.88707947143048e-05, |
| "loss": 0.0557, |
| "step": 2060 |
| }, |
| { |
| "epoch": 2.131822863027806, |
| "grad_norm": 0.46852630376815796, |
| "learning_rate": 9.885130007311423e-05, |
| "loss": 0.0522, |
| "step": 2070 |
| }, |
| { |
| "epoch": 2.142121524201854, |
| "grad_norm": 0.308856338262558, |
| "learning_rate": 9.883164054602012e-05, |
| "loss": 0.058, |
| "step": 2080 |
| }, |
| { |
| "epoch": 2.152420185375901, |
| "grad_norm": 0.7965716123580933, |
| "learning_rate": 9.881181619937848e-05, |
| "loss": 0.0535, |
| "step": 2090 |
| }, |
| { |
| "epoch": 2.1627188465499483, |
| "grad_norm": 0.3949962556362152, |
| "learning_rate": 9.879182710010169e-05, |
| "loss": 0.0536, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.173017507723996, |
| "grad_norm": 0.40669289231300354, |
| "learning_rate": 9.877167331565816e-05, |
| "loss": 0.0598, |
| "step": 2110 |
| }, |
| { |
| "epoch": 2.183316168898043, |
| "grad_norm": 0.6267198324203491, |
| "learning_rate": 9.875135491407217e-05, |
| "loss": 0.0647, |
| "step": 2120 |
| }, |
| { |
| "epoch": 2.193614830072091, |
| "grad_norm": 0.3919011950492859, |
| "learning_rate": 9.873087196392368e-05, |
| "loss": 0.063, |
| "step": 2130 |
| }, |
| { |
| "epoch": 2.203913491246138, |
| "grad_norm": 0.3769017457962036, |
| "learning_rate": 9.871022453434798e-05, |
| "loss": 0.0558, |
| "step": 2140 |
| }, |
| { |
| "epoch": 2.214212152420185, |
| "grad_norm": 0.382344126701355, |
| "learning_rate": 9.868941269503551e-05, |
| "loss": 0.0615, |
| "step": 2150 |
| }, |
| { |
| "epoch": 2.224510813594233, |
| "grad_norm": 0.7266145348548889, |
| "learning_rate": 9.86684365162317e-05, |
| "loss": 0.0611, |
| "step": 2160 |
| }, |
| { |
| "epoch": 2.23480947476828, |
| "grad_norm": 0.5791377425193787, |
| "learning_rate": 9.864729606873663e-05, |
| "loss": 0.0575, |
| "step": 2170 |
| }, |
| { |
| "epoch": 2.2451081359423277, |
| "grad_norm": 0.40031886100769043, |
| "learning_rate": 9.862599142390482e-05, |
| "loss": 0.0559, |
| "step": 2180 |
| }, |
| { |
| "epoch": 2.255406797116375, |
| "grad_norm": 0.34372609853744507, |
| "learning_rate": 9.860452265364502e-05, |
| "loss": 0.0623, |
| "step": 2190 |
| }, |
| { |
| "epoch": 2.265705458290422, |
| "grad_norm": 0.5310713052749634, |
| "learning_rate": 9.858288983041996e-05, |
| "loss": 0.0628, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.27600411946447, |
| "grad_norm": 0.4002261459827423, |
| "learning_rate": 9.856109302724603e-05, |
| "loss": 0.0528, |
| "step": 2210 |
| }, |
| { |
| "epoch": 2.286302780638517, |
| "grad_norm": 0.3995415270328522, |
| "learning_rate": 9.853913231769318e-05, |
| "loss": 0.0603, |
| "step": 2220 |
| }, |
| { |
| "epoch": 2.296601441812564, |
| "grad_norm": 0.5082608461380005, |
| "learning_rate": 9.851700777588453e-05, |
| "loss": 0.0555, |
| "step": 2230 |
| }, |
| { |
| "epoch": 2.306900102986612, |
| "grad_norm": 0.3878387212753296, |
| "learning_rate": 9.849471947649617e-05, |
| "loss": 0.054, |
| "step": 2240 |
| }, |
| { |
| "epoch": 2.317198764160659, |
| "grad_norm": 0.44272416830062866, |
| "learning_rate": 9.847226749475695e-05, |
| "loss": 0.067, |
| "step": 2250 |
| }, |
| { |
| "epoch": 2.3274974253347063, |
| "grad_norm": 0.38929831981658936, |
| "learning_rate": 9.844965190644817e-05, |
| "loss": 0.0518, |
| "step": 2260 |
| }, |
| { |
| "epoch": 2.337796086508754, |
| "grad_norm": 0.3083374798297882, |
| "learning_rate": 9.842687278790337e-05, |
| "loss": 0.0484, |
| "step": 2270 |
| }, |
| { |
| "epoch": 2.348094747682801, |
| "grad_norm": 0.41075581312179565, |
| "learning_rate": 9.8403930216008e-05, |
| "loss": 0.0635, |
| "step": 2280 |
| }, |
| { |
| "epoch": 2.358393408856849, |
| "grad_norm": 0.2911306917667389, |
| "learning_rate": 9.838082426819926e-05, |
| "loss": 0.0599, |
| "step": 2290 |
| }, |
| { |
| "epoch": 2.368692070030896, |
| "grad_norm": 0.524851381778717, |
| "learning_rate": 9.835755502246575e-05, |
| "loss": 0.0542, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.378990731204943, |
| "grad_norm": 0.45933887362480164, |
| "learning_rate": 9.833412255734724e-05, |
| "loss": 0.0671, |
| "step": 2310 |
| }, |
| { |
| "epoch": 2.389289392378991, |
| "grad_norm": 0.38324400782585144, |
| "learning_rate": 9.831052695193445e-05, |
| "loss": 0.0596, |
| "step": 2320 |
| }, |
| { |
| "epoch": 2.399588053553038, |
| "grad_norm": 0.7916087508201599, |
| "learning_rate": 9.828676828586871e-05, |
| "loss": 0.0722, |
| "step": 2330 |
| }, |
| { |
| "epoch": 2.4098867147270857, |
| "grad_norm": 0.4739670157432556, |
| "learning_rate": 9.826284663934171e-05, |
| "loss": 0.0596, |
| "step": 2340 |
| }, |
| { |
| "epoch": 2.420185375901133, |
| "grad_norm": 0.37064895033836365, |
| "learning_rate": 9.823876209309527e-05, |
| "loss": 0.062, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.43048403707518, |
| "grad_norm": 0.6001970171928406, |
| "learning_rate": 9.821451472842102e-05, |
| "loss": 0.0623, |
| "step": 2360 |
| }, |
| { |
| "epoch": 2.4407826982492278, |
| "grad_norm": 0.40998250246047974, |
| "learning_rate": 9.819010462716016e-05, |
| "loss": 0.0586, |
| "step": 2370 |
| }, |
| { |
| "epoch": 2.451081359423275, |
| "grad_norm": 0.4756927490234375, |
| "learning_rate": 9.816553187170317e-05, |
| "loss": 0.0522, |
| "step": 2380 |
| }, |
| { |
| "epoch": 2.461380020597322, |
| "grad_norm": 0.47659242153167725, |
| "learning_rate": 9.814079654498949e-05, |
| "loss": 0.0573, |
| "step": 2390 |
| }, |
| { |
| "epoch": 2.47167868177137, |
| "grad_norm": 0.4043289124965668, |
| "learning_rate": 9.811589873050735e-05, |
| "loss": 0.0654, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.481977342945417, |
| "grad_norm": 0.7355890870094299, |
| "learning_rate": 9.809083851229335e-05, |
| "loss": 0.0523, |
| "step": 2410 |
| }, |
| { |
| "epoch": 2.4922760041194643, |
| "grad_norm": 0.4957990348339081, |
| "learning_rate": 9.806561597493228e-05, |
| "loss": 0.0566, |
| "step": 2420 |
| }, |
| { |
| "epoch": 2.502574665293512, |
| "grad_norm": 0.3758098781108856, |
| "learning_rate": 9.80402312035568e-05, |
| "loss": 0.0509, |
| "step": 2430 |
| }, |
| { |
| "epoch": 2.512873326467559, |
| "grad_norm": 0.4361479878425598, |
| "learning_rate": 9.801468428384716e-05, |
| "loss": 0.0566, |
| "step": 2440 |
| }, |
| { |
| "epoch": 2.5231719876416063, |
| "grad_norm": 0.4788246750831604, |
| "learning_rate": 9.798897530203087e-05, |
| "loss": 0.0577, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.533470648815654, |
| "grad_norm": 0.3828676640987396, |
| "learning_rate": 9.796310434488248e-05, |
| "loss": 0.0552, |
| "step": 2460 |
| }, |
| { |
| "epoch": 2.543769309989701, |
| "grad_norm": 0.34888461232185364, |
| "learning_rate": 9.79370714997232e-05, |
| "loss": 0.0562, |
| "step": 2470 |
| }, |
| { |
| "epoch": 2.554067971163749, |
| "grad_norm": 0.5660400986671448, |
| "learning_rate": 9.791087685442071e-05, |
| "loss": 0.0593, |
| "step": 2480 |
| }, |
| { |
| "epoch": 2.564366632337796, |
| "grad_norm": 0.3883237838745117, |
| "learning_rate": 9.788452049738879e-05, |
| "loss": 0.0567, |
| "step": 2490 |
| }, |
| { |
| "epoch": 2.5746652935118437, |
| "grad_norm": 0.34366926550865173, |
| "learning_rate": 9.785800251758701e-05, |
| "loss": 0.055, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.584963954685891, |
| "grad_norm": 0.2992055416107178, |
| "learning_rate": 9.783132300452049e-05, |
| "loss": 0.053, |
| "step": 2510 |
| }, |
| { |
| "epoch": 2.595262615859938, |
| "grad_norm": 0.3543379306793213, |
| "learning_rate": 9.780448204823958e-05, |
| "loss": 0.0587, |
| "step": 2520 |
| }, |
| { |
| "epoch": 2.6055612770339858, |
| "grad_norm": 0.32997754216194153, |
| "learning_rate": 9.777747973933948e-05, |
| "loss": 0.0483, |
| "step": 2530 |
| }, |
| { |
| "epoch": 2.615859938208033, |
| "grad_norm": 0.4290192425251007, |
| "learning_rate": 9.775031616896008e-05, |
| "loss": 0.0565, |
| "step": 2540 |
| }, |
| { |
| "epoch": 2.62615859938208, |
| "grad_norm": 0.39540722966194153, |
| "learning_rate": 9.772299142878549e-05, |
| "loss": 0.0567, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.636457260556128, |
| "grad_norm": 0.46537721157073975, |
| "learning_rate": 9.769550561104388e-05, |
| "loss": 0.0511, |
| "step": 2560 |
| }, |
| { |
| "epoch": 2.646755921730175, |
| "grad_norm": 0.4019800126552582, |
| "learning_rate": 9.766785880850707e-05, |
| "loss": 0.0576, |
| "step": 2570 |
| }, |
| { |
| "epoch": 2.6570545829042223, |
| "grad_norm": 0.3543599545955658, |
| "learning_rate": 9.764005111449021e-05, |
| "loss": 0.0561, |
| "step": 2580 |
| }, |
| { |
| "epoch": 2.66735324407827, |
| "grad_norm": 0.459049791097641, |
| "learning_rate": 9.761208262285155e-05, |
| "loss": 0.0626, |
| "step": 2590 |
| }, |
| { |
| "epoch": 2.677651905252317, |
| "grad_norm": 0.4867796003818512, |
| "learning_rate": 9.758395342799206e-05, |
| "loss": 0.0504, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.6879505664263643, |
| "grad_norm": 0.42788106203079224, |
| "learning_rate": 9.755566362485512e-05, |
| "loss": 0.0578, |
| "step": 2610 |
| }, |
| { |
| "epoch": 2.698249227600412, |
| "grad_norm": 0.3226776719093323, |
| "learning_rate": 9.752721330892624e-05, |
| "loss": 0.0552, |
| "step": 2620 |
| }, |
| { |
| "epoch": 2.708547888774459, |
| "grad_norm": 0.4271225333213806, |
| "learning_rate": 9.749860257623263e-05, |
| "loss": 0.0549, |
| "step": 2630 |
| }, |
| { |
| "epoch": 2.718846549948507, |
| "grad_norm": 0.39057081937789917, |
| "learning_rate": 9.7469831523343e-05, |
| "loss": 0.0558, |
| "step": 2640 |
| }, |
| { |
| "epoch": 2.729145211122554, |
| "grad_norm": 0.4585021436214447, |
| "learning_rate": 9.744090024736719e-05, |
| "loss": 0.0481, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.7394438722966017, |
| "grad_norm": 0.4004554748535156, |
| "learning_rate": 9.741180884595578e-05, |
| "loss": 0.0671, |
| "step": 2660 |
| }, |
| { |
| "epoch": 2.749742533470649, |
| "grad_norm": 0.3565993010997772, |
| "learning_rate": 9.738255741729987e-05, |
| "loss": 0.0623, |
| "step": 2670 |
| }, |
| { |
| "epoch": 2.760041194644696, |
| "grad_norm": 0.30855366587638855, |
| "learning_rate": 9.735314606013068e-05, |
| "loss": 0.0588, |
| "step": 2680 |
| }, |
| { |
| "epoch": 2.7703398558187438, |
| "grad_norm": 0.4170495271682739, |
| "learning_rate": 9.732357487371924e-05, |
| "loss": 0.056, |
| "step": 2690 |
| }, |
| { |
| "epoch": 2.780638516992791, |
| "grad_norm": 0.5667279362678528, |
| "learning_rate": 9.729384395787602e-05, |
| "loss": 0.0612, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.790937178166838, |
| "grad_norm": 0.27353501319885254, |
| "learning_rate": 9.726395341295062e-05, |
| "loss": 0.0493, |
| "step": 2710 |
| }, |
| { |
| "epoch": 2.801235839340886, |
| "grad_norm": 0.5288174152374268, |
| "learning_rate": 9.723390333983144e-05, |
| "loss": 0.0629, |
| "step": 2720 |
| }, |
| { |
| "epoch": 2.811534500514933, |
| "grad_norm": 0.4831124544143677, |
| "learning_rate": 9.720369383994535e-05, |
| "loss": 0.0549, |
| "step": 2730 |
| }, |
| { |
| "epoch": 2.8218331616889802, |
| "grad_norm": 0.3807002902030945, |
| "learning_rate": 9.717332501525729e-05, |
| "loss": 0.0561, |
| "step": 2740 |
| }, |
| { |
| "epoch": 2.832131822863028, |
| "grad_norm": 0.6944444179534912, |
| "learning_rate": 9.714279696826998e-05, |
| "loss": 0.0564, |
| "step": 2750 |
| }, |
| { |
| "epoch": 2.842430484037075, |
| "grad_norm": 0.3146667778491974, |
| "learning_rate": 9.711210980202354e-05, |
| "loss": 0.0544, |
| "step": 2760 |
| }, |
| { |
| "epoch": 2.8527291452111223, |
| "grad_norm": 0.4342884421348572, |
| "learning_rate": 9.708126362009522e-05, |
| "loss": 0.0541, |
| "step": 2770 |
| }, |
| { |
| "epoch": 2.86302780638517, |
| "grad_norm": 0.4473581612110138, |
| "learning_rate": 9.70502585265989e-05, |
| "loss": 0.0567, |
| "step": 2780 |
| }, |
| { |
| "epoch": 2.873326467559217, |
| "grad_norm": 0.34954315423965454, |
| "learning_rate": 9.70190946261849e-05, |
| "loss": 0.0508, |
| "step": 2790 |
| }, |
| { |
| "epoch": 2.883625128733265, |
| "grad_norm": 0.37677961587905884, |
| "learning_rate": 9.698777202403953e-05, |
| "loss": 0.0555, |
| "step": 2800 |
| }, |
| { |
| "epoch": 2.893923789907312, |
| "grad_norm": 0.3924347460269928, |
| "learning_rate": 9.695629082588473e-05, |
| "loss": 0.0607, |
| "step": 2810 |
| }, |
| { |
| "epoch": 2.9042224510813597, |
| "grad_norm": 0.34362998604774475, |
| "learning_rate": 9.69246511379778e-05, |
| "loss": 0.0479, |
| "step": 2820 |
| }, |
| { |
| "epoch": 2.914521112255407, |
| "grad_norm": 0.48478758335113525, |
| "learning_rate": 9.689285306711094e-05, |
| "loss": 0.0564, |
| "step": 2830 |
| }, |
| { |
| "epoch": 2.924819773429454, |
| "grad_norm": 0.39429691433906555, |
| "learning_rate": 9.686089672061094e-05, |
| "loss": 0.0552, |
| "step": 2840 |
| }, |
| { |
| "epoch": 2.9351184346035017, |
| "grad_norm": 0.27760738134384155, |
| "learning_rate": 9.682878220633885e-05, |
| "loss": 0.0507, |
| "step": 2850 |
| }, |
| { |
| "epoch": 2.945417095777549, |
| "grad_norm": 0.3564143180847168, |
| "learning_rate": 9.679650963268951e-05, |
| "loss": 0.0529, |
| "step": 2860 |
| }, |
| { |
| "epoch": 2.955715756951596, |
| "grad_norm": 0.3425343930721283, |
| "learning_rate": 9.676407910859131e-05, |
| "loss": 0.05, |
| "step": 2870 |
| }, |
| { |
| "epoch": 2.966014418125644, |
| "grad_norm": 0.3504887819290161, |
| "learning_rate": 9.673149074350573e-05, |
| "loss": 0.0529, |
| "step": 2880 |
| }, |
| { |
| "epoch": 2.976313079299691, |
| "grad_norm": 0.432216078042984, |
| "learning_rate": 9.669874464742705e-05, |
| "loss": 0.0582, |
| "step": 2890 |
| }, |
| { |
| "epoch": 2.9866117404737382, |
| "grad_norm": 0.4117823541164398, |
| "learning_rate": 9.666584093088189e-05, |
| "loss": 0.0516, |
| "step": 2900 |
| }, |
| { |
| "epoch": 2.996910401647786, |
| "grad_norm": 0.4118179380893707, |
| "learning_rate": 9.663277970492886e-05, |
| "loss": 0.0664, |
| "step": 2910 |
| }, |
| { |
| "epoch": 3.007209062821833, |
| "grad_norm": 0.31822094321250916, |
| "learning_rate": 9.659956108115827e-05, |
| "loss": 0.0607, |
| "step": 2920 |
| }, |
| { |
| "epoch": 3.0175077239958807, |
| "grad_norm": 0.34220412373542786, |
| "learning_rate": 9.656618517169164e-05, |
| "loss": 0.0523, |
| "step": 2930 |
| }, |
| { |
| "epoch": 3.027806385169928, |
| "grad_norm": 0.33871203660964966, |
| "learning_rate": 9.65326520891814e-05, |
| "loss": 0.0486, |
| "step": 2940 |
| }, |
| { |
| "epoch": 3.038105046343975, |
| "grad_norm": 0.4035494327545166, |
| "learning_rate": 9.649896194681045e-05, |
| "loss": 0.0497, |
| "step": 2950 |
| }, |
| { |
| "epoch": 3.048403707518023, |
| "grad_norm": 0.36851248145103455, |
| "learning_rate": 9.646511485829186e-05, |
| "loss": 0.062, |
| "step": 2960 |
| }, |
| { |
| "epoch": 3.05870236869207, |
| "grad_norm": 0.3193969428539276, |
| "learning_rate": 9.643111093786835e-05, |
| "loss": 0.0514, |
| "step": 2970 |
| }, |
| { |
| "epoch": 3.0690010298661172, |
| "grad_norm": 0.331909716129303, |
| "learning_rate": 9.639695030031204e-05, |
| "loss": 0.0488, |
| "step": 2980 |
| }, |
| { |
| "epoch": 3.079299691040165, |
| "grad_norm": 0.35757410526275635, |
| "learning_rate": 9.636263306092406e-05, |
| "loss": 0.0576, |
| "step": 2990 |
| }, |
| { |
| "epoch": 3.089598352214212, |
| "grad_norm": 0.4217674434185028, |
| "learning_rate": 9.6328159335534e-05, |
| "loss": 0.0554, |
| "step": 3000 |
| }, |
| { |
| "epoch": 3.0998970133882597, |
| "grad_norm": 0.3531946539878845, |
| "learning_rate": 9.629352924049975e-05, |
| "loss": 0.059, |
| "step": 3010 |
| }, |
| { |
| "epoch": 3.110195674562307, |
| "grad_norm": 0.39479324221611023, |
| "learning_rate": 9.625874289270688e-05, |
| "loss": 0.0621, |
| "step": 3020 |
| }, |
| { |
| "epoch": 3.120494335736354, |
| "grad_norm": 0.29987436532974243, |
| "learning_rate": 9.622380040956842e-05, |
| "loss": 0.0511, |
| "step": 3030 |
| }, |
| { |
| "epoch": 3.130792996910402, |
| "grad_norm": 0.5292258262634277, |
| "learning_rate": 9.61887019090244e-05, |
| "loss": 0.0564, |
| "step": 3040 |
| }, |
| { |
| "epoch": 3.141091658084449, |
| "grad_norm": 0.33128613233566284, |
| "learning_rate": 9.615344750954141e-05, |
| "loss": 0.0548, |
| "step": 3050 |
| }, |
| { |
| "epoch": 3.151390319258496, |
| "grad_norm": 0.43356847763061523, |
| "learning_rate": 9.611803733011229e-05, |
| "loss": 0.0557, |
| "step": 3060 |
| }, |
| { |
| "epoch": 3.161688980432544, |
| "grad_norm": 0.4408741295337677, |
| "learning_rate": 9.60824714902556e-05, |
| "loss": 0.0582, |
| "step": 3070 |
| }, |
| { |
| "epoch": 3.171987641606591, |
| "grad_norm": 0.307669460773468, |
| "learning_rate": 9.604675011001538e-05, |
| "loss": 0.0442, |
| "step": 3080 |
| }, |
| { |
| "epoch": 3.1822863027806383, |
| "grad_norm": 0.49202683568000793, |
| "learning_rate": 9.601087330996061e-05, |
| "loss": 0.0599, |
| "step": 3090 |
| }, |
| { |
| "epoch": 3.192584963954686, |
| "grad_norm": 0.3430628180503845, |
| "learning_rate": 9.597484121118487e-05, |
| "loss": 0.0501, |
| "step": 3100 |
| }, |
| { |
| "epoch": 3.202883625128733, |
| "grad_norm": 0.45715686678886414, |
| "learning_rate": 9.593865393530592e-05, |
| "loss": 0.0533, |
| "step": 3110 |
| }, |
| { |
| "epoch": 3.213182286302781, |
| "grad_norm": 0.29405537247657776, |
| "learning_rate": 9.590231160446526e-05, |
| "loss": 0.0579, |
| "step": 3120 |
| }, |
| { |
| "epoch": 3.223480947476828, |
| "grad_norm": 0.4138418436050415, |
| "learning_rate": 9.586581434132775e-05, |
| "loss": 0.0553, |
| "step": 3130 |
| }, |
| { |
| "epoch": 3.233779608650875, |
| "grad_norm": 0.2747637927532196, |
| "learning_rate": 9.582916226908118e-05, |
| "loss": 0.0534, |
| "step": 3140 |
| }, |
| { |
| "epoch": 3.244078269824923, |
| "grad_norm": 0.3608400821685791, |
| "learning_rate": 9.57923555114359e-05, |
| "loss": 0.0512, |
| "step": 3150 |
| }, |
| { |
| "epoch": 3.25437693099897, |
| "grad_norm": 0.4042729437351227, |
| "learning_rate": 9.575539419262434e-05, |
| "loss": 0.0445, |
| "step": 3160 |
| }, |
| { |
| "epoch": 3.2646755921730177, |
| "grad_norm": 0.35471370816230774, |
| "learning_rate": 9.571827843740057e-05, |
| "loss": 0.0542, |
| "step": 3170 |
| }, |
| { |
| "epoch": 3.274974253347065, |
| "grad_norm": 0.2936842441558838, |
| "learning_rate": 9.568100837104e-05, |
| "loss": 0.0505, |
| "step": 3180 |
| }, |
| { |
| "epoch": 3.285272914521112, |
| "grad_norm": 0.2880595028400421, |
| "learning_rate": 9.56435841193388e-05, |
| "loss": 0.0458, |
| "step": 3190 |
| }, |
| { |
| "epoch": 3.29557157569516, |
| "grad_norm": 0.33003637194633484, |
| "learning_rate": 9.560600580861365e-05, |
| "loss": 0.0576, |
| "step": 3200 |
| }, |
| { |
| "epoch": 3.305870236869207, |
| "grad_norm": 0.4025996923446655, |
| "learning_rate": 9.556827356570116e-05, |
| "loss": 0.0598, |
| "step": 3210 |
| }, |
| { |
| "epoch": 3.316168898043254, |
| "grad_norm": 0.5448514819145203, |
| "learning_rate": 9.553038751795746e-05, |
| "loss": 0.0503, |
| "step": 3220 |
| }, |
| { |
| "epoch": 3.326467559217302, |
| "grad_norm": 0.39959079027175903, |
| "learning_rate": 9.549234779325792e-05, |
| "loss": 0.0581, |
| "step": 3230 |
| }, |
| { |
| "epoch": 3.336766220391349, |
| "grad_norm": 0.31689804792404175, |
| "learning_rate": 9.545415451999653e-05, |
| "loss": 0.054, |
| "step": 3240 |
| }, |
| { |
| "epoch": 3.3470648815653963, |
| "grad_norm": 0.5861422419548035, |
| "learning_rate": 9.541580782708557e-05, |
| "loss": 0.0498, |
| "step": 3250 |
| }, |
| { |
| "epoch": 3.357363542739444, |
| "grad_norm": 0.36639899015426636, |
| "learning_rate": 9.537730784395514e-05, |
| "loss": 0.0625, |
| "step": 3260 |
| }, |
| { |
| "epoch": 3.367662203913491, |
| "grad_norm": 0.3032686710357666, |
| "learning_rate": 9.533865470055275e-05, |
| "loss": 0.0543, |
| "step": 3270 |
| }, |
| { |
| "epoch": 3.377960865087539, |
| "grad_norm": 0.4109341502189636, |
| "learning_rate": 9.529984852734285e-05, |
| "loss": 0.0582, |
| "step": 3280 |
| }, |
| { |
| "epoch": 3.388259526261586, |
| "grad_norm": 0.38670700788497925, |
| "learning_rate": 9.526088945530645e-05, |
| "loss": 0.0547, |
| "step": 3290 |
| }, |
| { |
| "epoch": 3.398558187435633, |
| "grad_norm": 0.30283281207084656, |
| "learning_rate": 9.522177761594057e-05, |
| "loss": 0.0434, |
| "step": 3300 |
| }, |
| { |
| "epoch": 3.408856848609681, |
| "grad_norm": 0.3940243721008301, |
| "learning_rate": 9.518251314125788e-05, |
| "loss": 0.0548, |
| "step": 3310 |
| }, |
| { |
| "epoch": 3.419155509783728, |
| "grad_norm": 0.6107800006866455, |
| "learning_rate": 9.514309616378626e-05, |
| "loss": 0.0453, |
| "step": 3320 |
| }, |
| { |
| "epoch": 3.4294541709577757, |
| "grad_norm": 0.3535449802875519, |
| "learning_rate": 9.510352681656832e-05, |
| "loss": 0.0509, |
| "step": 3330 |
| }, |
| { |
| "epoch": 3.439752832131823, |
| "grad_norm": 0.4279785454273224, |
| "learning_rate": 9.50638052331609e-05, |
| "loss": 0.0511, |
| "step": 3340 |
| }, |
| { |
| "epoch": 3.45005149330587, |
| "grad_norm": 0.5184943675994873, |
| "learning_rate": 9.502393154763478e-05, |
| "loss": 0.0553, |
| "step": 3350 |
| }, |
| { |
| "epoch": 3.460350154479918, |
| "grad_norm": 0.6247850656509399, |
| "learning_rate": 9.498390589457404e-05, |
| "loss": 0.0485, |
| "step": 3360 |
| }, |
| { |
| "epoch": 3.470648815653965, |
| "grad_norm": 0.4810273349285126, |
| "learning_rate": 9.494372840907572e-05, |
| "loss": 0.0646, |
| "step": 3370 |
| }, |
| { |
| "epoch": 3.480947476828012, |
| "grad_norm": 0.31024450063705444, |
| "learning_rate": 9.490339922674934e-05, |
| "loss": 0.0506, |
| "step": 3380 |
| }, |
| { |
| "epoch": 3.49124613800206, |
| "grad_norm": 0.3408045172691345, |
| "learning_rate": 9.486291848371643e-05, |
| "loss": 0.0598, |
| "step": 3390 |
| }, |
| { |
| "epoch": 3.501544799176107, |
| "grad_norm": 0.3190326988697052, |
| "learning_rate": 9.482228631661005e-05, |
| "loss": 0.0569, |
| "step": 3400 |
| }, |
| { |
| "epoch": 3.5118434603501543, |
| "grad_norm": 0.3894359767436981, |
| "learning_rate": 9.478150286257443e-05, |
| "loss": 0.048, |
| "step": 3410 |
| }, |
| { |
| "epoch": 3.522142121524202, |
| "grad_norm": 0.33339062333106995, |
| "learning_rate": 9.474056825926434e-05, |
| "loss": 0.0533, |
| "step": 3420 |
| }, |
| { |
| "epoch": 3.532440782698249, |
| "grad_norm": 0.4688987731933594, |
| "learning_rate": 9.46994826448448e-05, |
| "loss": 0.0495, |
| "step": 3430 |
| }, |
| { |
| "epoch": 3.5427394438722963, |
| "grad_norm": 0.24669192731380463, |
| "learning_rate": 9.465824615799046e-05, |
| "loss": 0.0487, |
| "step": 3440 |
| }, |
| { |
| "epoch": 3.553038105046344, |
| "grad_norm": 0.43672746419906616, |
| "learning_rate": 9.461685893788526e-05, |
| "loss": 0.0529, |
| "step": 3450 |
| }, |
| { |
| "epoch": 3.563336766220391, |
| "grad_norm": 0.3806833028793335, |
| "learning_rate": 9.457532112422187e-05, |
| "loss": 0.0644, |
| "step": 3460 |
| }, |
| { |
| "epoch": 3.573635427394439, |
| "grad_norm": 0.43160000443458557, |
| "learning_rate": 9.453363285720129e-05, |
| "loss": 0.046, |
| "step": 3470 |
| }, |
| { |
| "epoch": 3.583934088568486, |
| "grad_norm": 0.3873897194862366, |
| "learning_rate": 9.44917942775323e-05, |
| "loss": 0.0561, |
| "step": 3480 |
| }, |
| { |
| "epoch": 3.5942327497425337, |
| "grad_norm": 0.420244425535202, |
| "learning_rate": 9.444980552643103e-05, |
| "loss": 0.0544, |
| "step": 3490 |
| }, |
| { |
| "epoch": 3.604531410916581, |
| "grad_norm": 0.2572662830352783, |
| "learning_rate": 9.44076667456205e-05, |
| "loss": 0.0609, |
| "step": 3500 |
| }, |
| { |
| "epoch": 3.614830072090628, |
| "grad_norm": 0.5829557776451111, |
| "learning_rate": 9.43653780773301e-05, |
| "loss": 0.0683, |
| "step": 3510 |
| }, |
| { |
| "epoch": 3.6251287332646758, |
| "grad_norm": 0.5830304622650146, |
| "learning_rate": 9.432293966429514e-05, |
| "loss": 0.067, |
| "step": 3520 |
| }, |
| { |
| "epoch": 3.635427394438723, |
| "grad_norm": 0.38021519780158997, |
| "learning_rate": 9.428035164975636e-05, |
| "loss": 0.0498, |
| "step": 3530 |
| }, |
| { |
| "epoch": 3.64572605561277, |
| "grad_norm": 0.4201594591140747, |
| "learning_rate": 9.423761417745942e-05, |
| "loss": 0.0569, |
| "step": 3540 |
| }, |
| { |
| "epoch": 3.656024716786818, |
| "grad_norm": 0.5576770305633545, |
| "learning_rate": 9.419472739165449e-05, |
| "loss": 0.0667, |
| "step": 3550 |
| }, |
| { |
| "epoch": 3.666323377960865, |
| "grad_norm": 0.34150251746177673, |
| "learning_rate": 9.415169143709565e-05, |
| "loss": 0.0539, |
| "step": 3560 |
| }, |
| { |
| "epoch": 3.6766220391349123, |
| "grad_norm": 0.5191327333450317, |
| "learning_rate": 9.410850645904049e-05, |
| "loss": 0.0609, |
| "step": 3570 |
| }, |
| { |
| "epoch": 3.68692070030896, |
| "grad_norm": 0.3418954610824585, |
| "learning_rate": 9.40651726032496e-05, |
| "loss": 0.0485, |
| "step": 3580 |
| }, |
| { |
| "epoch": 3.697219361483007, |
| "grad_norm": 0.44254234433174133, |
| "learning_rate": 9.402169001598611e-05, |
| "loss": 0.0552, |
| "step": 3590 |
| }, |
| { |
| "epoch": 3.7075180226570543, |
| "grad_norm": 0.549349308013916, |
| "learning_rate": 9.397805884401504e-05, |
| "loss": 0.0601, |
| "step": 3600 |
| }, |
| { |
| "epoch": 3.717816683831102, |
| "grad_norm": 0.4500453472137451, |
| "learning_rate": 9.393427923460308e-05, |
| "loss": 0.0496, |
| "step": 3610 |
| }, |
| { |
| "epoch": 3.728115345005149, |
| "grad_norm": 0.5540750622749329, |
| "learning_rate": 9.389035133551778e-05, |
| "loss": 0.0563, |
| "step": 3620 |
| }, |
| { |
| "epoch": 3.738414006179197, |
| "grad_norm": 0.28786641359329224, |
| "learning_rate": 9.38462752950273e-05, |
| "loss": 0.0532, |
| "step": 3630 |
| }, |
| { |
| "epoch": 3.748712667353244, |
| "grad_norm": 0.3725302219390869, |
| "learning_rate": 9.380205126189983e-05, |
| "loss": 0.0558, |
| "step": 3640 |
| }, |
| { |
| "epoch": 3.7590113285272917, |
| "grad_norm": 0.47449609637260437, |
| "learning_rate": 9.375767938540299e-05, |
| "loss": 0.0559, |
| "step": 3650 |
| }, |
| { |
| "epoch": 3.769309989701339, |
| "grad_norm": 0.5294702649116516, |
| "learning_rate": 9.371315981530349e-05, |
| "loss": 0.0534, |
| "step": 3660 |
| }, |
| { |
| "epoch": 3.779608650875386, |
| "grad_norm": 0.29216107726097107, |
| "learning_rate": 9.366849270186649e-05, |
| "loss": 0.0519, |
| "step": 3670 |
| }, |
| { |
| "epoch": 3.7899073120494338, |
| "grad_norm": 0.28166675567626953, |
| "learning_rate": 9.362367819585518e-05, |
| "loss": 0.0532, |
| "step": 3680 |
| }, |
| { |
| "epoch": 3.800205973223481, |
| "grad_norm": 0.5699660778045654, |
| "learning_rate": 9.357871644853024e-05, |
| "loss": 0.0533, |
| "step": 3690 |
| }, |
| { |
| "epoch": 3.810504634397528, |
| "grad_norm": 0.44877076148986816, |
| "learning_rate": 9.353360761164931e-05, |
| "loss": 0.0569, |
| "step": 3700 |
| }, |
| { |
| "epoch": 3.820803295571576, |
| "grad_norm": 0.4341685175895691, |
| "learning_rate": 9.348835183746649e-05, |
| "loss": 0.0579, |
| "step": 3710 |
| }, |
| { |
| "epoch": 3.831101956745623, |
| "grad_norm": 0.37804657220840454, |
| "learning_rate": 9.344294927873188e-05, |
| "loss": 0.0535, |
| "step": 3720 |
| }, |
| { |
| "epoch": 3.8414006179196702, |
| "grad_norm": 0.47172001004219055, |
| "learning_rate": 9.339740008869092e-05, |
| "loss": 0.049, |
| "step": 3730 |
| }, |
| { |
| "epoch": 3.851699279093718, |
| "grad_norm": 0.29430967569351196, |
| "learning_rate": 9.335170442108408e-05, |
| "loss": 0.0547, |
| "step": 3740 |
| }, |
| { |
| "epoch": 3.861997940267765, |
| "grad_norm": 0.40547069907188416, |
| "learning_rate": 9.330586243014617e-05, |
| "loss": 0.0486, |
| "step": 3750 |
| }, |
| { |
| "epoch": 3.8722966014418123, |
| "grad_norm": 0.3896206319332123, |
| "learning_rate": 9.325987427060586e-05, |
| "loss": 0.0585, |
| "step": 3760 |
| }, |
| { |
| "epoch": 3.88259526261586, |
| "grad_norm": 0.29565155506134033, |
| "learning_rate": 9.321374009768525e-05, |
| "loss": 0.0508, |
| "step": 3770 |
| }, |
| { |
| "epoch": 3.892893923789907, |
| "grad_norm": 0.5239169597625732, |
| "learning_rate": 9.316746006709919e-05, |
| "loss": 0.0608, |
| "step": 3780 |
| }, |
| { |
| "epoch": 3.903192584963955, |
| "grad_norm": 0.2817414402961731, |
| "learning_rate": 9.31210343350549e-05, |
| "loss": 0.0465, |
| "step": 3790 |
| }, |
| { |
| "epoch": 3.913491246138002, |
| "grad_norm": 0.4744998514652252, |
| "learning_rate": 9.307446305825135e-05, |
| "loss": 0.0616, |
| "step": 3800 |
| }, |
| { |
| "epoch": 3.9237899073120497, |
| "grad_norm": 0.4715334475040436, |
| "learning_rate": 9.302774639387877e-05, |
| "loss": 0.0557, |
| "step": 3810 |
| }, |
| { |
| "epoch": 3.934088568486097, |
| "grad_norm": 0.5753309726715088, |
| "learning_rate": 9.298088449961813e-05, |
| "loss": 0.0592, |
| "step": 3820 |
| }, |
| { |
| "epoch": 3.944387229660144, |
| "grad_norm": 0.318158358335495, |
| "learning_rate": 9.293387753364052e-05, |
| "loss": 0.0604, |
| "step": 3830 |
| }, |
| { |
| "epoch": 3.9546858908341918, |
| "grad_norm": 0.4752749800682068, |
| "learning_rate": 9.288672565460679e-05, |
| "loss": 0.049, |
| "step": 3840 |
| }, |
| { |
| "epoch": 3.964984552008239, |
| "grad_norm": 0.284682035446167, |
| "learning_rate": 9.283942902166681e-05, |
| "loss": 0.0491, |
| "step": 3850 |
| }, |
| { |
| "epoch": 3.975283213182286, |
| "grad_norm": 0.4126709997653961, |
| "learning_rate": 9.27919877944591e-05, |
| "loss": 0.0508, |
| "step": 3860 |
| }, |
| { |
| "epoch": 3.985581874356334, |
| "grad_norm": 0.34126409888267517, |
| "learning_rate": 9.27444021331102e-05, |
| "loss": 0.0545, |
| "step": 3870 |
| }, |
| { |
| "epoch": 3.995880535530381, |
| "grad_norm": 0.5670478343963623, |
| "learning_rate": 9.269667219823412e-05, |
| "loss": 0.0483, |
| "step": 3880 |
| }, |
| { |
| "epoch": 4.006179196704428, |
| "grad_norm": 0.3084736466407776, |
| "learning_rate": 9.264879815093191e-05, |
| "loss": 0.0499, |
| "step": 3890 |
| }, |
| { |
| "epoch": 4.016477857878476, |
| "grad_norm": 0.4823373258113861, |
| "learning_rate": 9.260078015279096e-05, |
| "loss": 0.0558, |
| "step": 3900 |
| }, |
| { |
| "epoch": 4.0267765190525235, |
| "grad_norm": 0.2889825105667114, |
| "learning_rate": 9.255261836588458e-05, |
| "loss": 0.0561, |
| "step": 3910 |
| }, |
| { |
| "epoch": 4.03707518022657, |
| "grad_norm": 0.28834298253059387, |
| "learning_rate": 9.250431295277137e-05, |
| "loss": 0.0498, |
| "step": 3920 |
| }, |
| { |
| "epoch": 4.047373841400618, |
| "grad_norm": 0.40643489360809326, |
| "learning_rate": 9.245586407649473e-05, |
| "loss": 0.0479, |
| "step": 3930 |
| }, |
| { |
| "epoch": 4.057672502574666, |
| "grad_norm": 0.3214862644672394, |
| "learning_rate": 9.240727190058227e-05, |
| "loss": 0.0498, |
| "step": 3940 |
| }, |
| { |
| "epoch": 4.067971163748712, |
| "grad_norm": 0.40402647852897644, |
| "learning_rate": 9.235853658904529e-05, |
| "loss": 0.0522, |
| "step": 3950 |
| }, |
| { |
| "epoch": 4.07826982492276, |
| "grad_norm": 0.3338010311126709, |
| "learning_rate": 9.230965830637821e-05, |
| "loss": 0.0506, |
| "step": 3960 |
| }, |
| { |
| "epoch": 4.088568486096808, |
| "grad_norm": 0.42742258310317993, |
| "learning_rate": 9.226063721755799e-05, |
| "loss": 0.053, |
| "step": 3970 |
| }, |
| { |
| "epoch": 4.098867147270854, |
| "grad_norm": 0.3947793245315552, |
| "learning_rate": 9.221147348804362e-05, |
| "loss": 0.0541, |
| "step": 3980 |
| }, |
| { |
| "epoch": 4.109165808444902, |
| "grad_norm": 0.4395465552806854, |
| "learning_rate": 9.216216728377554e-05, |
| "loss": 0.0509, |
| "step": 3990 |
| }, |
| { |
| "epoch": 4.11946446961895, |
| "grad_norm": 0.28796476125717163, |
| "learning_rate": 9.211271877117507e-05, |
| "loss": 0.0501, |
| "step": 4000 |
| }, |
| { |
| "epoch": 4.1297631307929965, |
| "grad_norm": 0.31560418009757996, |
| "learning_rate": 9.206312811714386e-05, |
| "loss": 0.0502, |
| "step": 4010 |
| }, |
| { |
| "epoch": 4.140061791967044, |
| "grad_norm": 0.45714765787124634, |
| "learning_rate": 9.201339548906332e-05, |
| "loss": 0.0579, |
| "step": 4020 |
| }, |
| { |
| "epoch": 4.150360453141092, |
| "grad_norm": 0.3373541831970215, |
| "learning_rate": 9.196352105479409e-05, |
| "loss": 0.0504, |
| "step": 4030 |
| }, |
| { |
| "epoch": 4.1606591143151395, |
| "grad_norm": 0.5105737447738647, |
| "learning_rate": 9.19135049826754e-05, |
| "loss": 0.0619, |
| "step": 4040 |
| }, |
| { |
| "epoch": 4.170957775489186, |
| "grad_norm": 0.3023523688316345, |
| "learning_rate": 9.186334744152458e-05, |
| "loss": 0.0499, |
| "step": 4050 |
| }, |
| { |
| "epoch": 4.181256436663234, |
| "grad_norm": 0.3311084508895874, |
| "learning_rate": 9.18130486006364e-05, |
| "loss": 0.0484, |
| "step": 4060 |
| }, |
| { |
| "epoch": 4.1915550978372815, |
| "grad_norm": 0.3167574405670166, |
| "learning_rate": 9.176260862978263e-05, |
| "loss": 0.0605, |
| "step": 4070 |
| }, |
| { |
| "epoch": 4.201853759011328, |
| "grad_norm": 0.3764163553714752, |
| "learning_rate": 9.171202769921134e-05, |
| "loss": 0.0521, |
| "step": 4080 |
| }, |
| { |
| "epoch": 4.212152420185376, |
| "grad_norm": 0.325210303068161, |
| "learning_rate": 9.16613059796464e-05, |
| "loss": 0.0471, |
| "step": 4090 |
| }, |
| { |
| "epoch": 4.222451081359424, |
| "grad_norm": 0.3970625102519989, |
| "learning_rate": 9.161044364228683e-05, |
| "loss": 0.0545, |
| "step": 4100 |
| }, |
| { |
| "epoch": 4.23274974253347, |
| "grad_norm": 0.306384414434433, |
| "learning_rate": 9.155944085880637e-05, |
| "loss": 0.0539, |
| "step": 4110 |
| }, |
| { |
| "epoch": 4.243048403707518, |
| "grad_norm": 0.4230334162712097, |
| "learning_rate": 9.150829780135269e-05, |
| "loss": 0.0456, |
| "step": 4120 |
| }, |
| { |
| "epoch": 4.253347064881566, |
| "grad_norm": 0.29097849130630493, |
| "learning_rate": 9.145701464254698e-05, |
| "loss": 0.0511, |
| "step": 4130 |
| }, |
| { |
| "epoch": 4.263645726055612, |
| "grad_norm": 0.390979140996933, |
| "learning_rate": 9.140559155548333e-05, |
| "loss": 0.0461, |
| "step": 4140 |
| }, |
| { |
| "epoch": 4.27394438722966, |
| "grad_norm": 0.2566828429698944, |
| "learning_rate": 9.135402871372808e-05, |
| "loss": 0.0508, |
| "step": 4150 |
| }, |
| { |
| "epoch": 4.284243048403708, |
| "grad_norm": 0.4710136651992798, |
| "learning_rate": 9.130232629131932e-05, |
| "loss": 0.0503, |
| "step": 4160 |
| }, |
| { |
| "epoch": 4.2945417095777545, |
| "grad_norm": 0.4374995827674866, |
| "learning_rate": 9.125048446276618e-05, |
| "loss": 0.0599, |
| "step": 4170 |
| }, |
| { |
| "epoch": 4.304840370751802, |
| "grad_norm": 0.43765076994895935, |
| "learning_rate": 9.119850340304843e-05, |
| "loss": 0.0531, |
| "step": 4180 |
| }, |
| { |
| "epoch": 4.31513903192585, |
| "grad_norm": 0.45118576288223267, |
| "learning_rate": 9.114638328761571e-05, |
| "loss": 0.0527, |
| "step": 4190 |
| }, |
| { |
| "epoch": 4.325437693099897, |
| "grad_norm": 0.3243924379348755, |
| "learning_rate": 9.109412429238704e-05, |
| "loss": 0.0431, |
| "step": 4200 |
| }, |
| { |
| "epoch": 4.335736354273944, |
| "grad_norm": 0.33518919348716736, |
| "learning_rate": 9.104172659375017e-05, |
| "loss": 0.0491, |
| "step": 4210 |
| }, |
| { |
| "epoch": 4.346035015447992, |
| "grad_norm": 0.6875081062316895, |
| "learning_rate": 9.098919036856102e-05, |
| "loss": 0.0488, |
| "step": 4220 |
| }, |
| { |
| "epoch": 4.3563336766220395, |
| "grad_norm": 0.5093826055526733, |
| "learning_rate": 9.093651579414311e-05, |
| "loss": 0.0487, |
| "step": 4230 |
| }, |
| { |
| "epoch": 4.366632337796086, |
| "grad_norm": 0.37270835041999817, |
| "learning_rate": 9.088370304828685e-05, |
| "loss": 0.0559, |
| "step": 4240 |
| }, |
| { |
| "epoch": 4.376930998970134, |
| "grad_norm": 0.4596996307373047, |
| "learning_rate": 9.083075230924913e-05, |
| "loss": 0.0578, |
| "step": 4250 |
| }, |
| { |
| "epoch": 4.387229660144182, |
| "grad_norm": 0.3775595426559448, |
| "learning_rate": 9.077766375575246e-05, |
| "loss": 0.0562, |
| "step": 4260 |
| }, |
| { |
| "epoch": 4.397528321318228, |
| "grad_norm": 0.3252449333667755, |
| "learning_rate": 9.072443756698459e-05, |
| "loss": 0.0558, |
| "step": 4270 |
| }, |
| { |
| "epoch": 4.407826982492276, |
| "grad_norm": 0.42610299587249756, |
| "learning_rate": 9.067107392259783e-05, |
| "loss": 0.0455, |
| "step": 4280 |
| }, |
| { |
| "epoch": 4.418125643666324, |
| "grad_norm": 0.36227330565452576, |
| "learning_rate": 9.061757300270845e-05, |
| "loss": 0.0498, |
| "step": 4290 |
| }, |
| { |
| "epoch": 4.42842430484037, |
| "grad_norm": 0.4343869686126709, |
| "learning_rate": 9.056393498789602e-05, |
| "loss": 0.0504, |
| "step": 4300 |
| }, |
| { |
| "epoch": 4.438722966014418, |
| "grad_norm": 0.4492502808570862, |
| "learning_rate": 9.051016005920282e-05, |
| "loss": 0.0526, |
| "step": 4310 |
| }, |
| { |
| "epoch": 4.449021627188466, |
| "grad_norm": 0.2649560868740082, |
| "learning_rate": 9.045624839813334e-05, |
| "loss": 0.0488, |
| "step": 4320 |
| }, |
| { |
| "epoch": 4.4593202883625125, |
| "grad_norm": 0.2290182262659073, |
| "learning_rate": 9.040220018665347e-05, |
| "loss": 0.0427, |
| "step": 4330 |
| }, |
| { |
| "epoch": 4.46961894953656, |
| "grad_norm": 0.37687376141548157, |
| "learning_rate": 9.034801560719011e-05, |
| "loss": 0.0437, |
| "step": 4340 |
| }, |
| { |
| "epoch": 4.479917610710608, |
| "grad_norm": 0.21943651139736176, |
| "learning_rate": 9.029369484263033e-05, |
| "loss": 0.047, |
| "step": 4350 |
| }, |
| { |
| "epoch": 4.490216271884655, |
| "grad_norm": 0.32304951548576355, |
| "learning_rate": 9.02392380763209e-05, |
| "loss": 0.0461, |
| "step": 4360 |
| }, |
| { |
| "epoch": 4.500514933058702, |
| "grad_norm": 0.21305856108665466, |
| "learning_rate": 9.018464549206769e-05, |
| "loss": 0.0461, |
| "step": 4370 |
| }, |
| { |
| "epoch": 4.51081359423275, |
| "grad_norm": 0.6847507953643799, |
| "learning_rate": 9.012991727413487e-05, |
| "loss": 0.0475, |
| "step": 4380 |
| }, |
| { |
| "epoch": 4.521112255406797, |
| "grad_norm": 0.3444644808769226, |
| "learning_rate": 9.007505360724453e-05, |
| "loss": 0.0423, |
| "step": 4390 |
| }, |
| { |
| "epoch": 4.531410916580844, |
| "grad_norm": 0.3524458110332489, |
| "learning_rate": 9.002005467657586e-05, |
| "loss": 0.058, |
| "step": 4400 |
| }, |
| { |
| "epoch": 4.541709577754892, |
| "grad_norm": 0.4131333529949188, |
| "learning_rate": 8.996492066776464e-05, |
| "loss": 0.0462, |
| "step": 4410 |
| }, |
| { |
| "epoch": 4.55200823892894, |
| "grad_norm": 0.35865673422813416, |
| "learning_rate": 8.990965176690252e-05, |
| "loss": 0.0493, |
| "step": 4420 |
| }, |
| { |
| "epoch": 4.562306900102986, |
| "grad_norm": 0.3511912524700165, |
| "learning_rate": 8.985424816053651e-05, |
| "loss": 0.0561, |
| "step": 4430 |
| }, |
| { |
| "epoch": 4.572605561277034, |
| "grad_norm": 0.2704029083251953, |
| "learning_rate": 8.979871003566826e-05, |
| "loss": 0.0526, |
| "step": 4440 |
| }, |
| { |
| "epoch": 4.582904222451082, |
| "grad_norm": 0.3202318847179413, |
| "learning_rate": 8.974303757975345e-05, |
| "loss": 0.0532, |
| "step": 4450 |
| }, |
| { |
| "epoch": 4.593202883625128, |
| "grad_norm": 0.31483763456344604, |
| "learning_rate": 8.968723098070117e-05, |
| "loss": 0.051, |
| "step": 4460 |
| }, |
| { |
| "epoch": 4.603501544799176, |
| "grad_norm": 0.3457460403442383, |
| "learning_rate": 8.963129042687329e-05, |
| "loss": 0.0507, |
| "step": 4470 |
| }, |
| { |
| "epoch": 4.613800205973224, |
| "grad_norm": 0.31409910321235657, |
| "learning_rate": 8.957521610708375e-05, |
| "loss": 0.0503, |
| "step": 4480 |
| }, |
| { |
| "epoch": 4.6240988671472705, |
| "grad_norm": 0.2827114164829254, |
| "learning_rate": 8.951900821059809e-05, |
| "loss": 0.0494, |
| "step": 4490 |
| }, |
| { |
| "epoch": 4.634397528321318, |
| "grad_norm": 0.31604471802711487, |
| "learning_rate": 8.946266692713261e-05, |
| "loss": 0.0483, |
| "step": 4500 |
| }, |
| { |
| "epoch": 4.644696189495366, |
| "grad_norm": 0.3118681311607361, |
| "learning_rate": 8.940619244685388e-05, |
| "loss": 0.0553, |
| "step": 4510 |
| }, |
| { |
| "epoch": 4.6549948506694125, |
| "grad_norm": 0.2974856197834015, |
| "learning_rate": 8.934958496037802e-05, |
| "loss": 0.051, |
| "step": 4520 |
| }, |
| { |
| "epoch": 4.66529351184346, |
| "grad_norm": 0.3584068715572357, |
| "learning_rate": 8.92928446587701e-05, |
| "loss": 0.0459, |
| "step": 4530 |
| }, |
| { |
| "epoch": 4.675592173017508, |
| "grad_norm": 0.36687174439430237, |
| "learning_rate": 8.923597173354345e-05, |
| "loss": 0.0483, |
| "step": 4540 |
| }, |
| { |
| "epoch": 4.6858908341915555, |
| "grad_norm": 0.35569944977760315, |
| "learning_rate": 8.917896637665908e-05, |
| "loss": 0.05, |
| "step": 4550 |
| }, |
| { |
| "epoch": 4.696189495365602, |
| "grad_norm": 0.38467368483543396, |
| "learning_rate": 8.912182878052495e-05, |
| "loss": 0.0421, |
| "step": 4560 |
| }, |
| { |
| "epoch": 4.70648815653965, |
| "grad_norm": 0.36783739924430847, |
| "learning_rate": 8.906455913799538e-05, |
| "loss": 0.0509, |
| "step": 4570 |
| }, |
| { |
| "epoch": 4.716786817713698, |
| "grad_norm": 0.2462991178035736, |
| "learning_rate": 8.900715764237037e-05, |
| "loss": 0.0469, |
| "step": 4580 |
| }, |
| { |
| "epoch": 4.727085478887744, |
| "grad_norm": 0.3449934720993042, |
| "learning_rate": 8.894962448739499e-05, |
| "loss": 0.0467, |
| "step": 4590 |
| }, |
| { |
| "epoch": 4.737384140061792, |
| "grad_norm": 0.38251376152038574, |
| "learning_rate": 8.889195986725865e-05, |
| "loss": 0.049, |
| "step": 4600 |
| }, |
| { |
| "epoch": 4.74768280123584, |
| "grad_norm": 0.30399325489997864, |
| "learning_rate": 8.883416397659452e-05, |
| "loss": 0.0532, |
| "step": 4610 |
| }, |
| { |
| "epoch": 4.757981462409886, |
| "grad_norm": 0.4609906077384949, |
| "learning_rate": 8.877623701047885e-05, |
| "loss": 0.0511, |
| "step": 4620 |
| }, |
| { |
| "epoch": 4.768280123583934, |
| "grad_norm": 0.40049266815185547, |
| "learning_rate": 8.871817916443025e-05, |
| "loss": 0.0567, |
| "step": 4630 |
| }, |
| { |
| "epoch": 4.778578784757982, |
| "grad_norm": 0.5834691524505615, |
| "learning_rate": 8.865999063440916e-05, |
| "loss": 0.0491, |
| "step": 4640 |
| }, |
| { |
| "epoch": 4.7888774459320285, |
| "grad_norm": 0.4367988705635071, |
| "learning_rate": 8.860167161681707e-05, |
| "loss": 0.0573, |
| "step": 4650 |
| }, |
| { |
| "epoch": 4.799176107106076, |
| "grad_norm": 0.33364230394363403, |
| "learning_rate": 8.854322230849588e-05, |
| "loss": 0.0604, |
| "step": 4660 |
| }, |
| { |
| "epoch": 4.809474768280124, |
| "grad_norm": 0.42235320806503296, |
| "learning_rate": 8.848464290672729e-05, |
| "loss": 0.0518, |
| "step": 4670 |
| }, |
| { |
| "epoch": 4.819773429454171, |
| "grad_norm": 0.32555538415908813, |
| "learning_rate": 8.84259336092321e-05, |
| "loss": 0.0457, |
| "step": 4680 |
| }, |
| { |
| "epoch": 4.830072090628218, |
| "grad_norm": 0.34331732988357544, |
| "learning_rate": 8.836709461416952e-05, |
| "loss": 0.0558, |
| "step": 4690 |
| }, |
| { |
| "epoch": 4.840370751802266, |
| "grad_norm": 0.6019324064254761, |
| "learning_rate": 8.830812612013655e-05, |
| "loss": 0.0573, |
| "step": 4700 |
| }, |
| { |
| "epoch": 4.850669412976313, |
| "grad_norm": 0.2844030261039734, |
| "learning_rate": 8.824902832616723e-05, |
| "loss": 0.0571, |
| "step": 4710 |
| }, |
| { |
| "epoch": 4.86096807415036, |
| "grad_norm": 0.47788453102111816, |
| "learning_rate": 8.818980143173213e-05, |
| "loss": 0.0565, |
| "step": 4720 |
| }, |
| { |
| "epoch": 4.871266735324408, |
| "grad_norm": 0.24314385652542114, |
| "learning_rate": 8.81304456367374e-05, |
| "loss": 0.046, |
| "step": 4730 |
| }, |
| { |
| "epoch": 4.8815653964984556, |
| "grad_norm": 0.3316558301448822, |
| "learning_rate": 8.807096114152442e-05, |
| "loss": 0.0519, |
| "step": 4740 |
| }, |
| { |
| "epoch": 4.891864057672502, |
| "grad_norm": 0.4027853012084961, |
| "learning_rate": 8.801134814686891e-05, |
| "loss": 0.0495, |
| "step": 4750 |
| }, |
| { |
| "epoch": 4.90216271884655, |
| "grad_norm": 0.3290289342403412, |
| "learning_rate": 8.795160685398027e-05, |
| "loss": 0.0449, |
| "step": 4760 |
| }, |
| { |
| "epoch": 4.912461380020598, |
| "grad_norm": 0.3217390775680542, |
| "learning_rate": 8.789173746450101e-05, |
| "loss": 0.0578, |
| "step": 4770 |
| }, |
| { |
| "epoch": 4.922760041194644, |
| "grad_norm": 0.43397730588912964, |
| "learning_rate": 8.783174018050594e-05, |
| "loss": 0.0483, |
| "step": 4780 |
| }, |
| { |
| "epoch": 4.933058702368692, |
| "grad_norm": 0.38298988342285156, |
| "learning_rate": 8.777161520450158e-05, |
| "loss": 0.0479, |
| "step": 4790 |
| }, |
| { |
| "epoch": 4.94335736354274, |
| "grad_norm": 0.36208289861679077, |
| "learning_rate": 8.771136273942544e-05, |
| "loss": 0.0525, |
| "step": 4800 |
| }, |
| { |
| "epoch": 4.9536560247167865, |
| "grad_norm": 0.3291323482990265, |
| "learning_rate": 8.765098298864533e-05, |
| "loss": 0.0469, |
| "step": 4810 |
| }, |
| { |
| "epoch": 4.963954685890834, |
| "grad_norm": 0.23334382474422455, |
| "learning_rate": 8.759047615595869e-05, |
| "loss": 0.0478, |
| "step": 4820 |
| }, |
| { |
| "epoch": 4.974253347064882, |
| "grad_norm": 0.3632581830024719, |
| "learning_rate": 8.752984244559188e-05, |
| "loss": 0.0558, |
| "step": 4830 |
| }, |
| { |
| "epoch": 4.9845520082389285, |
| "grad_norm": 0.3983827531337738, |
| "learning_rate": 8.746908206219955e-05, |
| "loss": 0.0584, |
| "step": 4840 |
| }, |
| { |
| "epoch": 4.994850669412976, |
| "grad_norm": 0.5021440982818604, |
| "learning_rate": 8.740819521086383e-05, |
| "loss": 0.0522, |
| "step": 4850 |
| }, |
| { |
| "epoch": 5.005149330587024, |
| "grad_norm": 0.4782863259315491, |
| "learning_rate": 8.734718209709377e-05, |
| "loss": 0.0503, |
| "step": 4860 |
| }, |
| { |
| "epoch": 5.0154479917610715, |
| "grad_norm": 0.3124346435070038, |
| "learning_rate": 8.728604292682459e-05, |
| "loss": 0.0523, |
| "step": 4870 |
| }, |
| { |
| "epoch": 5.025746652935118, |
| "grad_norm": 0.46991485357284546, |
| "learning_rate": 8.722477790641694e-05, |
| "loss": 0.0507, |
| "step": 4880 |
| }, |
| { |
| "epoch": 5.036045314109166, |
| "grad_norm": 0.381569504737854, |
| "learning_rate": 8.71633872426563e-05, |
| "loss": 0.0473, |
| "step": 4890 |
| }, |
| { |
| "epoch": 5.0463439752832135, |
| "grad_norm": 0.4210774004459381, |
| "learning_rate": 8.710187114275219e-05, |
| "loss": 0.0521, |
| "step": 4900 |
| }, |
| { |
| "epoch": 5.05664263645726, |
| "grad_norm": 0.3999352753162384, |
| "learning_rate": 8.70402298143375e-05, |
| "loss": 0.0548, |
| "step": 4910 |
| }, |
| { |
| "epoch": 5.066941297631308, |
| "grad_norm": 0.32023027539253235, |
| "learning_rate": 8.697846346546787e-05, |
| "loss": 0.0508, |
| "step": 4920 |
| }, |
| { |
| "epoch": 5.077239958805356, |
| "grad_norm": 0.38814589381217957, |
| "learning_rate": 8.691657230462083e-05, |
| "loss": 0.0484, |
| "step": 4930 |
| }, |
| { |
| "epoch": 5.087538619979402, |
| "grad_norm": 0.3033084571361542, |
| "learning_rate": 8.685455654069523e-05, |
| "loss": 0.0432, |
| "step": 4940 |
| }, |
| { |
| "epoch": 5.09783728115345, |
| "grad_norm": 0.39010483026504517, |
| "learning_rate": 8.679241638301049e-05, |
| "loss": 0.0506, |
| "step": 4950 |
| }, |
| { |
| "epoch": 5.108135942327498, |
| "grad_norm": 0.28835776448249817, |
| "learning_rate": 8.673015204130586e-05, |
| "loss": 0.0543, |
| "step": 4960 |
| }, |
| { |
| "epoch": 5.1184346035015444, |
| "grad_norm": 0.5217164754867554, |
| "learning_rate": 8.66677637257398e-05, |
| "loss": 0.0501, |
| "step": 4970 |
| }, |
| { |
| "epoch": 5.128733264675592, |
| "grad_norm": 0.4083517789840698, |
| "learning_rate": 8.660525164688913e-05, |
| "loss": 0.0572, |
| "step": 4980 |
| }, |
| { |
| "epoch": 5.13903192584964, |
| "grad_norm": 0.5034805536270142, |
| "learning_rate": 8.654261601574849e-05, |
| "loss": 0.0541, |
| "step": 4990 |
| }, |
| { |
| "epoch": 5.1493305870236865, |
| "grad_norm": 0.3255571126937866, |
| "learning_rate": 8.647985704372948e-05, |
| "loss": 0.0539, |
| "step": 5000 |
| }, |
| { |
| "epoch": 5.159629248197734, |
| "grad_norm": 0.589500367641449, |
| "learning_rate": 8.641697494266006e-05, |
| "loss": 0.0497, |
| "step": 5010 |
| }, |
| { |
| "epoch": 5.169927909371782, |
| "grad_norm": 0.3600839674472809, |
| "learning_rate": 8.635396992478371e-05, |
| "loss": 0.0564, |
| "step": 5020 |
| }, |
| { |
| "epoch": 5.1802265705458295, |
| "grad_norm": 0.3535096049308777, |
| "learning_rate": 8.629084220275887e-05, |
| "loss": 0.0528, |
| "step": 5030 |
| }, |
| { |
| "epoch": 5.190525231719876, |
| "grad_norm": 0.3266212046146393, |
| "learning_rate": 8.622759198965809e-05, |
| "loss": 0.0476, |
| "step": 5040 |
| }, |
| { |
| "epoch": 5.200823892893924, |
| "grad_norm": 0.4038067162036896, |
| "learning_rate": 8.616421949896734e-05, |
| "loss": 0.0517, |
| "step": 5050 |
| }, |
| { |
| "epoch": 5.2111225540679715, |
| "grad_norm": 0.3460542857646942, |
| "learning_rate": 8.610072494458535e-05, |
| "loss": 0.0474, |
| "step": 5060 |
| }, |
| { |
| "epoch": 5.221421215242018, |
| "grad_norm": 0.41362518072128296, |
| "learning_rate": 8.603710854082286e-05, |
| "loss": 0.0515, |
| "step": 5070 |
| }, |
| { |
| "epoch": 5.231719876416066, |
| "grad_norm": 0.2805697023868561, |
| "learning_rate": 8.597337050240184e-05, |
| "loss": 0.0519, |
| "step": 5080 |
| }, |
| { |
| "epoch": 5.242018537590114, |
| "grad_norm": 0.4825451374053955, |
| "learning_rate": 8.590951104445482e-05, |
| "loss": 0.0504, |
| "step": 5090 |
| }, |
| { |
| "epoch": 5.25231719876416, |
| "grad_norm": 0.3441821038722992, |
| "learning_rate": 8.584553038252414e-05, |
| "loss": 0.0581, |
| "step": 5100 |
| }, |
| { |
| "epoch": 5.262615859938208, |
| "grad_norm": 0.39510828256607056, |
| "learning_rate": 8.578142873256129e-05, |
| "loss": 0.0532, |
| "step": 5110 |
| }, |
| { |
| "epoch": 5.272914521112256, |
| "grad_norm": 0.3733309805393219, |
| "learning_rate": 8.571720631092609e-05, |
| "loss": 0.057, |
| "step": 5120 |
| }, |
| { |
| "epoch": 5.283213182286302, |
| "grad_norm": 0.3860830068588257, |
| "learning_rate": 8.565286333438594e-05, |
| "loss": 0.049, |
| "step": 5130 |
| }, |
| { |
| "epoch": 5.29351184346035, |
| "grad_norm": 0.3507029414176941, |
| "learning_rate": 8.558840002011528e-05, |
| "loss": 0.0542, |
| "step": 5140 |
| }, |
| { |
| "epoch": 5.303810504634398, |
| "grad_norm": 0.30535757541656494, |
| "learning_rate": 8.552381658569457e-05, |
| "loss": 0.0584, |
| "step": 5150 |
| }, |
| { |
| "epoch": 5.3141091658084445, |
| "grad_norm": 0.3580070734024048, |
| "learning_rate": 8.545911324910982e-05, |
| "loss": 0.0509, |
| "step": 5160 |
| }, |
| { |
| "epoch": 5.324407826982492, |
| "grad_norm": 0.21992090344429016, |
| "learning_rate": 8.539429022875169e-05, |
| "loss": 0.0412, |
| "step": 5170 |
| }, |
| { |
| "epoch": 5.33470648815654, |
| "grad_norm": 0.6406000852584839, |
| "learning_rate": 8.532934774341483e-05, |
| "loss": 0.0518, |
| "step": 5180 |
| }, |
| { |
| "epoch": 5.3450051493305875, |
| "grad_norm": 0.43300265073776245, |
| "learning_rate": 8.526428601229706e-05, |
| "loss": 0.0539, |
| "step": 5190 |
| }, |
| { |
| "epoch": 5.355303810504634, |
| "grad_norm": 0.5168215036392212, |
| "learning_rate": 8.519910525499874e-05, |
| "loss": 0.0552, |
| "step": 5200 |
| }, |
| { |
| "epoch": 5.365602471678682, |
| "grad_norm": 0.2501913905143738, |
| "learning_rate": 8.513380569152196e-05, |
| "loss": 0.0506, |
| "step": 5210 |
| }, |
| { |
| "epoch": 5.3759011328527295, |
| "grad_norm": 0.2757486402988434, |
| "learning_rate": 8.506838754226982e-05, |
| "loss": 0.0565, |
| "step": 5220 |
| }, |
| { |
| "epoch": 5.386199794026776, |
| "grad_norm": 0.47264114022254944, |
| "learning_rate": 8.500285102804568e-05, |
| "loss": 0.0519, |
| "step": 5230 |
| }, |
| { |
| "epoch": 5.396498455200824, |
| "grad_norm": 0.30214348435401917, |
| "learning_rate": 8.493719637005237e-05, |
| "loss": 0.0424, |
| "step": 5240 |
| }, |
| { |
| "epoch": 5.406797116374872, |
| "grad_norm": 0.4345119893550873, |
| "learning_rate": 8.487142378989152e-05, |
| "loss": 0.0412, |
| "step": 5250 |
| }, |
| { |
| "epoch": 5.417095777548918, |
| "grad_norm": 0.33627235889434814, |
| "learning_rate": 8.480553350956282e-05, |
| "loss": 0.0481, |
| "step": 5260 |
| }, |
| { |
| "epoch": 5.427394438722966, |
| "grad_norm": 0.3047385811805725, |
| "learning_rate": 8.473952575146312e-05, |
| "loss": 0.0481, |
| "step": 5270 |
| }, |
| { |
| "epoch": 5.437693099897014, |
| "grad_norm": 0.4447433352470398, |
| "learning_rate": 8.46734007383859e-05, |
| "loss": 0.046, |
| "step": 5280 |
| }, |
| { |
| "epoch": 5.44799176107106, |
| "grad_norm": 0.4087453782558441, |
| "learning_rate": 8.460715869352035e-05, |
| "loss": 0.0487, |
| "step": 5290 |
| }, |
| { |
| "epoch": 5.458290422245108, |
| "grad_norm": 0.3321467339992523, |
| "learning_rate": 8.454079984045065e-05, |
| "loss": 0.0413, |
| "step": 5300 |
| }, |
| { |
| "epoch": 5.468589083419156, |
| "grad_norm": 0.356514036655426, |
| "learning_rate": 8.447432440315533e-05, |
| "loss": 0.049, |
| "step": 5310 |
| }, |
| { |
| "epoch": 5.4788877445932025, |
| "grad_norm": 0.37567445635795593, |
| "learning_rate": 8.44077326060063e-05, |
| "loss": 0.0461, |
| "step": 5320 |
| }, |
| { |
| "epoch": 5.48918640576725, |
| "grad_norm": 0.3040042519569397, |
| "learning_rate": 8.434102467376832e-05, |
| "loss": 0.0401, |
| "step": 5330 |
| }, |
| { |
| "epoch": 5.499485066941298, |
| "grad_norm": 0.39934873580932617, |
| "learning_rate": 8.427420083159807e-05, |
| "loss": 0.0493, |
| "step": 5340 |
| }, |
| { |
| "epoch": 5.509783728115345, |
| "grad_norm": 0.4000271260738373, |
| "learning_rate": 8.420726130504351e-05, |
| "loss": 0.0541, |
| "step": 5350 |
| }, |
| { |
| "epoch": 5.520082389289392, |
| "grad_norm": 0.2750590443611145, |
| "learning_rate": 8.414020632004299e-05, |
| "loss": 0.0481, |
| "step": 5360 |
| }, |
| { |
| "epoch": 5.53038105046344, |
| "grad_norm": 0.4174776077270508, |
| "learning_rate": 8.407303610292462e-05, |
| "loss": 0.0501, |
| "step": 5370 |
| }, |
| { |
| "epoch": 5.5406797116374875, |
| "grad_norm": 0.2651192247867584, |
| "learning_rate": 8.400575088040548e-05, |
| "loss": 0.0491, |
| "step": 5380 |
| }, |
| { |
| "epoch": 5.550978372811534, |
| "grad_norm": 0.49490901827812195, |
| "learning_rate": 8.393835087959072e-05, |
| "loss": 0.0488, |
| "step": 5390 |
| }, |
| { |
| "epoch": 5.561277033985582, |
| "grad_norm": 0.6012644171714783, |
| "learning_rate": 8.387083632797299e-05, |
| "loss": 0.05, |
| "step": 5400 |
| }, |
| { |
| "epoch": 5.57157569515963, |
| "grad_norm": 0.4538785219192505, |
| "learning_rate": 8.380320745343153e-05, |
| "loss": 0.0479, |
| "step": 5410 |
| }, |
| { |
| "epoch": 5.581874356333676, |
| "grad_norm": 0.358992338180542, |
| "learning_rate": 8.373546448423147e-05, |
| "loss": 0.05, |
| "step": 5420 |
| }, |
| { |
| "epoch": 5.592173017507724, |
| "grad_norm": 0.3814113736152649, |
| "learning_rate": 8.366760764902304e-05, |
| "loss": 0.0415, |
| "step": 5430 |
| }, |
| { |
| "epoch": 5.602471678681772, |
| "grad_norm": 0.6442550420761108, |
| "learning_rate": 8.359963717684077e-05, |
| "loss": 0.0495, |
| "step": 5440 |
| }, |
| { |
| "epoch": 5.612770339855818, |
| "grad_norm": 0.34561294317245483, |
| "learning_rate": 8.353155329710279e-05, |
| "loss": 0.0507, |
| "step": 5450 |
| }, |
| { |
| "epoch": 5.623069001029866, |
| "grad_norm": 0.333892822265625, |
| "learning_rate": 8.346335623960998e-05, |
| "loss": 0.0406, |
| "step": 5460 |
| }, |
| { |
| "epoch": 5.633367662203914, |
| "grad_norm": 0.21642594039440155, |
| "learning_rate": 8.339504623454521e-05, |
| "loss": 0.05, |
| "step": 5470 |
| }, |
| { |
| "epoch": 5.6436663233779605, |
| "grad_norm": 0.21974137425422668, |
| "learning_rate": 8.332662351247262e-05, |
| "loss": 0.0497, |
| "step": 5480 |
| }, |
| { |
| "epoch": 5.653964984552008, |
| "grad_norm": 0.35917189717292786, |
| "learning_rate": 8.325808830433679e-05, |
| "loss": 0.041, |
| "step": 5490 |
| }, |
| { |
| "epoch": 5.664263645726056, |
| "grad_norm": 0.2640712857246399, |
| "learning_rate": 8.318944084146192e-05, |
| "loss": 0.047, |
| "step": 5500 |
| }, |
| { |
| "epoch": 5.674562306900103, |
| "grad_norm": 6.280691623687744, |
| "learning_rate": 8.312068135555115e-05, |
| "loss": 0.0481, |
| "step": 5510 |
| }, |
| { |
| "epoch": 5.68486096807415, |
| "grad_norm": 0.269490122795105, |
| "learning_rate": 8.305181007868572e-05, |
| "loss": 0.0416, |
| "step": 5520 |
| }, |
| { |
| "epoch": 5.695159629248198, |
| "grad_norm": 0.408123254776001, |
| "learning_rate": 8.298282724332419e-05, |
| "loss": 0.049, |
| "step": 5530 |
| }, |
| { |
| "epoch": 5.705458290422245, |
| "grad_norm": 0.2983226478099823, |
| "learning_rate": 8.291373308230165e-05, |
| "loss": 0.0497, |
| "step": 5540 |
| }, |
| { |
| "epoch": 5.715756951596292, |
| "grad_norm": 0.35842761397361755, |
| "learning_rate": 8.284452782882894e-05, |
| "loss": 0.0477, |
| "step": 5550 |
| }, |
| { |
| "epoch": 5.72605561277034, |
| "grad_norm": 0.2742210328578949, |
| "learning_rate": 8.277521171649189e-05, |
| "loss": 0.052, |
| "step": 5560 |
| }, |
| { |
| "epoch": 5.736354273944388, |
| "grad_norm": 0.2822439968585968, |
| "learning_rate": 8.27057849792505e-05, |
| "loss": 0.0491, |
| "step": 5570 |
| }, |
| { |
| "epoch": 5.746652935118434, |
| "grad_norm": 0.3104664385318756, |
| "learning_rate": 8.263624785143812e-05, |
| "loss": 0.0493, |
| "step": 5580 |
| }, |
| { |
| "epoch": 5.756951596292482, |
| "grad_norm": 0.32532253861427307, |
| "learning_rate": 8.256660056776076e-05, |
| "loss": 0.0581, |
| "step": 5590 |
| }, |
| { |
| "epoch": 5.76725025746653, |
| "grad_norm": 0.3366002142429352, |
| "learning_rate": 8.249684336329617e-05, |
| "loss": 0.043, |
| "step": 5600 |
| }, |
| { |
| "epoch": 5.777548918640576, |
| "grad_norm": 0.25842759013175964, |
| "learning_rate": 8.242697647349317e-05, |
| "loss": 0.0485, |
| "step": 5610 |
| }, |
| { |
| "epoch": 5.787847579814624, |
| "grad_norm": 0.302432656288147, |
| "learning_rate": 8.235700013417076e-05, |
| "loss": 0.0521, |
| "step": 5620 |
| }, |
| { |
| "epoch": 5.798146240988672, |
| "grad_norm": 0.3358532190322876, |
| "learning_rate": 8.228691458151738e-05, |
| "loss": 0.0441, |
| "step": 5630 |
| }, |
| { |
| "epoch": 5.8084449021627185, |
| "grad_norm": 0.4343230724334717, |
| "learning_rate": 8.221672005209008e-05, |
| "loss": 0.0521, |
| "step": 5640 |
| }, |
| { |
| "epoch": 5.818743563336766, |
| "grad_norm": 0.30650976300239563, |
| "learning_rate": 8.214641678281374e-05, |
| "loss": 0.0538, |
| "step": 5650 |
| }, |
| { |
| "epoch": 5.829042224510814, |
| "grad_norm": 0.3401453197002411, |
| "learning_rate": 8.207600501098026e-05, |
| "loss": 0.0428, |
| "step": 5660 |
| }, |
| { |
| "epoch": 5.8393408856848605, |
| "grad_norm": 0.45636221766471863, |
| "learning_rate": 8.200548497424778e-05, |
| "loss": 0.0582, |
| "step": 5670 |
| }, |
| { |
| "epoch": 5.849639546858908, |
| "grad_norm": 0.2774709165096283, |
| "learning_rate": 8.193485691063985e-05, |
| "loss": 0.048, |
| "step": 5680 |
| }, |
| { |
| "epoch": 5.859938208032956, |
| "grad_norm": 0.29194507002830505, |
| "learning_rate": 8.186412105854463e-05, |
| "loss": 0.0534, |
| "step": 5690 |
| }, |
| { |
| "epoch": 5.8702368692070035, |
| "grad_norm": 0.36549675464630127, |
| "learning_rate": 8.17932776567141e-05, |
| "loss": 0.0571, |
| "step": 5700 |
| }, |
| { |
| "epoch": 5.88053553038105, |
| "grad_norm": 0.302418977022171, |
| "learning_rate": 8.172232694426329e-05, |
| "loss": 0.0423, |
| "step": 5710 |
| }, |
| { |
| "epoch": 5.890834191555098, |
| "grad_norm": 0.27770909667015076, |
| "learning_rate": 8.165126916066936e-05, |
| "loss": 0.0487, |
| "step": 5720 |
| }, |
| { |
| "epoch": 5.901132852729146, |
| "grad_norm": 0.3784064054489136, |
| "learning_rate": 8.158010454577093e-05, |
| "loss": 0.0504, |
| "step": 5730 |
| }, |
| { |
| "epoch": 5.911431513903192, |
| "grad_norm": 0.29943570494651794, |
| "learning_rate": 8.150883333976713e-05, |
| "loss": 0.0458, |
| "step": 5740 |
| }, |
| { |
| "epoch": 5.92173017507724, |
| "grad_norm": 0.26842376589775085, |
| "learning_rate": 8.143745578321695e-05, |
| "loss": 0.0523, |
| "step": 5750 |
| }, |
| { |
| "epoch": 5.932028836251288, |
| "grad_norm": 0.19866850972175598, |
| "learning_rate": 8.136597211703827e-05, |
| "loss": 0.0429, |
| "step": 5760 |
| }, |
| { |
| "epoch": 5.942327497425334, |
| "grad_norm": 0.30413612723350525, |
| "learning_rate": 8.129438258250712e-05, |
| "loss": 0.0441, |
| "step": 5770 |
| }, |
| { |
| "epoch": 5.952626158599382, |
| "grad_norm": 0.2791491746902466, |
| "learning_rate": 8.122268742125695e-05, |
| "loss": 0.047, |
| "step": 5780 |
| }, |
| { |
| "epoch": 5.96292481977343, |
| "grad_norm": 0.34201282262802124, |
| "learning_rate": 8.115088687527761e-05, |
| "loss": 0.0501, |
| "step": 5790 |
| }, |
| { |
| "epoch": 5.9732234809474765, |
| "grad_norm": 0.39383724331855774, |
| "learning_rate": 8.107898118691473e-05, |
| "loss": 0.0497, |
| "step": 5800 |
| }, |
| { |
| "epoch": 5.983522142121524, |
| "grad_norm": 0.3670088052749634, |
| "learning_rate": 8.100697059886879e-05, |
| "loss": 0.0428, |
| "step": 5810 |
| }, |
| { |
| "epoch": 5.993820803295572, |
| "grad_norm": 0.3595752716064453, |
| "learning_rate": 8.093485535419434e-05, |
| "loss": 0.0467, |
| "step": 5820 |
| }, |
| { |
| "epoch": 6.0041194644696185, |
| "grad_norm": 0.403352290391922, |
| "learning_rate": 8.086263569629919e-05, |
| "loss": 0.0441, |
| "step": 5830 |
| }, |
| { |
| "epoch": 6.014418125643666, |
| "grad_norm": 0.18506278097629547, |
| "learning_rate": 8.079031186894354e-05, |
| "loss": 0.0508, |
| "step": 5840 |
| }, |
| { |
| "epoch": 6.024716786817714, |
| "grad_norm": 0.5713401436805725, |
| "learning_rate": 8.071788411623922e-05, |
| "loss": 0.0491, |
| "step": 5850 |
| }, |
| { |
| "epoch": 6.0350154479917615, |
| "grad_norm": 0.20415346324443817, |
| "learning_rate": 8.064535268264883e-05, |
| "loss": 0.0502, |
| "step": 5860 |
| }, |
| { |
| "epoch": 6.045314109165808, |
| "grad_norm": 0.28075137734413147, |
| "learning_rate": 8.057271781298489e-05, |
| "loss": 0.0512, |
| "step": 5870 |
| }, |
| { |
| "epoch": 6.055612770339856, |
| "grad_norm": 0.3114660680294037, |
| "learning_rate": 8.049997975240909e-05, |
| "loss": 0.0508, |
| "step": 5880 |
| }, |
| { |
| "epoch": 6.0659114315139036, |
| "grad_norm": 0.3134065866470337, |
| "learning_rate": 8.042713874643136e-05, |
| "loss": 0.0531, |
| "step": 5890 |
| }, |
| { |
| "epoch": 6.07621009268795, |
| "grad_norm": 0.24600578844547272, |
| "learning_rate": 8.035419504090915e-05, |
| "loss": 0.0478, |
| "step": 5900 |
| }, |
| { |
| "epoch": 6.086508753861998, |
| "grad_norm": 0.34766799211502075, |
| "learning_rate": 8.028114888204653e-05, |
| "loss": 0.0486, |
| "step": 5910 |
| }, |
| { |
| "epoch": 6.096807415036046, |
| "grad_norm": 0.3067956268787384, |
| "learning_rate": 8.020800051639337e-05, |
| "loss": 0.0452, |
| "step": 5920 |
| }, |
| { |
| "epoch": 6.107106076210092, |
| "grad_norm": 0.3019874691963196, |
| "learning_rate": 8.013475019084453e-05, |
| "loss": 0.0458, |
| "step": 5930 |
| }, |
| { |
| "epoch": 6.11740473738414, |
| "grad_norm": 0.3271634578704834, |
| "learning_rate": 8.006139815263898e-05, |
| "loss": 0.0561, |
| "step": 5940 |
| }, |
| { |
| "epoch": 6.127703398558188, |
| "grad_norm": 0.2930561304092407, |
| "learning_rate": 7.998794464935904e-05, |
| "loss": 0.0407, |
| "step": 5950 |
| }, |
| { |
| "epoch": 6.1380020597322344, |
| "grad_norm": 0.37962770462036133, |
| "learning_rate": 7.991438992892946e-05, |
| "loss": 0.048, |
| "step": 5960 |
| }, |
| { |
| "epoch": 6.148300720906282, |
| "grad_norm": 0.36476749181747437, |
| "learning_rate": 7.984073423961664e-05, |
| "loss": 0.0439, |
| "step": 5970 |
| }, |
| { |
| "epoch": 6.15859938208033, |
| "grad_norm": 0.31208914518356323, |
| "learning_rate": 7.97669778300278e-05, |
| "loss": 0.0431, |
| "step": 5980 |
| }, |
| { |
| "epoch": 6.1688980432543765, |
| "grad_norm": 0.758002758026123, |
| "learning_rate": 7.969312094911007e-05, |
| "loss": 0.0481, |
| "step": 5990 |
| }, |
| { |
| "epoch": 6.179196704428424, |
| "grad_norm": 1.8981136083602905, |
| "learning_rate": 7.961916384614975e-05, |
| "loss": 0.0621, |
| "step": 6000 |
| }, |
| { |
| "epoch": 6.189495365602472, |
| "grad_norm": 0.277136892080307, |
| "learning_rate": 7.954510677077138e-05, |
| "loss": 0.0586, |
| "step": 6010 |
| }, |
| { |
| "epoch": 6.1997940267765195, |
| "grad_norm": 0.27095285058021545, |
| "learning_rate": 7.947094997293695e-05, |
| "loss": 0.0484, |
| "step": 6020 |
| }, |
| { |
| "epoch": 6.210092687950566, |
| "grad_norm": 0.2608092427253723, |
| "learning_rate": 7.9396693702945e-05, |
| "loss": 0.0457, |
| "step": 6030 |
| }, |
| { |
| "epoch": 6.220391349124614, |
| "grad_norm": 0.5210095643997192, |
| "learning_rate": 7.932233821142987e-05, |
| "loss": 0.0473, |
| "step": 6040 |
| }, |
| { |
| "epoch": 6.2306900102986615, |
| "grad_norm": 0.254302978515625, |
| "learning_rate": 7.924788374936078e-05, |
| "loss": 0.045, |
| "step": 6050 |
| }, |
| { |
| "epoch": 6.240988671472708, |
| "grad_norm": 0.343322217464447, |
| "learning_rate": 7.917333056804097e-05, |
| "loss": 0.054, |
| "step": 6060 |
| }, |
| { |
| "epoch": 6.251287332646756, |
| "grad_norm": 0.4098043143749237, |
| "learning_rate": 7.909867891910694e-05, |
| "loss": 0.0435, |
| "step": 6070 |
| }, |
| { |
| "epoch": 6.261585993820804, |
| "grad_norm": 0.34776240587234497, |
| "learning_rate": 7.902392905452749e-05, |
| "loss": 0.0538, |
| "step": 6080 |
| }, |
| { |
| "epoch": 6.27188465499485, |
| "grad_norm": 0.5250643491744995, |
| "learning_rate": 7.894908122660296e-05, |
| "loss": 0.0431, |
| "step": 6090 |
| }, |
| { |
| "epoch": 6.282183316168898, |
| "grad_norm": 0.37657663226127625, |
| "learning_rate": 7.887413568796433e-05, |
| "loss": 0.0532, |
| "step": 6100 |
| }, |
| { |
| "epoch": 6.292481977342946, |
| "grad_norm": 0.28036069869995117, |
| "learning_rate": 7.879909269157236e-05, |
| "loss": 0.0382, |
| "step": 6110 |
| }, |
| { |
| "epoch": 6.302780638516992, |
| "grad_norm": 0.4012965261936188, |
| "learning_rate": 7.87239524907168e-05, |
| "loss": 0.0472, |
| "step": 6120 |
| }, |
| { |
| "epoch": 6.31307929969104, |
| "grad_norm": 0.4002419412136078, |
| "learning_rate": 7.864871533901544e-05, |
| "loss": 0.051, |
| "step": 6130 |
| }, |
| { |
| "epoch": 6.323377960865088, |
| "grad_norm": 0.3897566795349121, |
| "learning_rate": 7.857338149041332e-05, |
| "loss": 0.0487, |
| "step": 6140 |
| }, |
| { |
| "epoch": 6.3336766220391345, |
| "grad_norm": 0.4365810751914978, |
| "learning_rate": 7.849795119918191e-05, |
| "loss": 0.0486, |
| "step": 6150 |
| }, |
| { |
| "epoch": 6.343975283213182, |
| "grad_norm": 0.38556814193725586, |
| "learning_rate": 7.842242471991809e-05, |
| "loss": 0.0509, |
| "step": 6160 |
| }, |
| { |
| "epoch": 6.35427394438723, |
| "grad_norm": 0.3570299744606018, |
| "learning_rate": 7.834680230754353e-05, |
| "loss": 0.0485, |
| "step": 6170 |
| }, |
| { |
| "epoch": 6.364572605561277, |
| "grad_norm": 0.25796523690223694, |
| "learning_rate": 7.82710842173036e-05, |
| "loss": 0.0474, |
| "step": 6180 |
| }, |
| { |
| "epoch": 6.374871266735324, |
| "grad_norm": 0.4013979732990265, |
| "learning_rate": 7.819527070476665e-05, |
| "loss": 0.0453, |
| "step": 6190 |
| }, |
| { |
| "epoch": 6.385169927909372, |
| "grad_norm": 0.2755083739757538, |
| "learning_rate": 7.811936202582306e-05, |
| "loss": 0.0407, |
| "step": 6200 |
| }, |
| { |
| "epoch": 6.3954685890834195, |
| "grad_norm": 0.8050864338874817, |
| "learning_rate": 7.80433584366845e-05, |
| "loss": 0.0468, |
| "step": 6210 |
| }, |
| { |
| "epoch": 6.405767250257466, |
| "grad_norm": 0.5987268686294556, |
| "learning_rate": 7.796726019388295e-05, |
| "loss": 0.0445, |
| "step": 6220 |
| }, |
| { |
| "epoch": 6.416065911431514, |
| "grad_norm": 0.31688612699508667, |
| "learning_rate": 7.789106755426985e-05, |
| "loss": 0.0414, |
| "step": 6230 |
| }, |
| { |
| "epoch": 6.426364572605562, |
| "grad_norm": 0.2687252163887024, |
| "learning_rate": 7.781478077501525e-05, |
| "loss": 0.0381, |
| "step": 6240 |
| }, |
| { |
| "epoch": 6.436663233779608, |
| "grad_norm": 0.31859585642814636, |
| "learning_rate": 7.773840011360698e-05, |
| "loss": 0.0486, |
| "step": 6250 |
| }, |
| { |
| "epoch": 6.446961894953656, |
| "grad_norm": 0.39176130294799805, |
| "learning_rate": 7.766192582784974e-05, |
| "loss": 0.0492, |
| "step": 6260 |
| }, |
| { |
| "epoch": 6.457260556127704, |
| "grad_norm": 0.4192884862422943, |
| "learning_rate": 7.758535817586424e-05, |
| "loss": 0.0524, |
| "step": 6270 |
| }, |
| { |
| "epoch": 6.46755921730175, |
| "grad_norm": 0.41165101528167725, |
| "learning_rate": 7.750869741608628e-05, |
| "loss": 0.0459, |
| "step": 6280 |
| }, |
| { |
| "epoch": 6.477857878475798, |
| "grad_norm": 0.37704214453697205, |
| "learning_rate": 7.7431943807266e-05, |
| "loss": 0.0555, |
| "step": 6290 |
| }, |
| { |
| "epoch": 6.488156539649846, |
| "grad_norm": 0.4949089586734772, |
| "learning_rate": 7.735509760846682e-05, |
| "loss": 0.0493, |
| "step": 6300 |
| }, |
| { |
| "epoch": 6.4984552008238925, |
| "grad_norm": 0.27363213896751404, |
| "learning_rate": 7.727815907906481e-05, |
| "loss": 0.0498, |
| "step": 6310 |
| }, |
| { |
| "epoch": 6.50875386199794, |
| "grad_norm": 0.32286787033081055, |
| "learning_rate": 7.720112847874759e-05, |
| "loss": 0.0445, |
| "step": 6320 |
| }, |
| { |
| "epoch": 6.519052523171988, |
| "grad_norm": 0.2211546152830124, |
| "learning_rate": 7.712400606751356e-05, |
| "loss": 0.0475, |
| "step": 6330 |
| }, |
| { |
| "epoch": 6.5293511843460355, |
| "grad_norm": 0.2400301843881607, |
| "learning_rate": 7.7046792105671e-05, |
| "loss": 0.0459, |
| "step": 6340 |
| }, |
| { |
| "epoch": 6.539649845520082, |
| "grad_norm": 0.3111647069454193, |
| "learning_rate": 7.696948685383725e-05, |
| "loss": 0.0492, |
| "step": 6350 |
| }, |
| { |
| "epoch": 6.54994850669413, |
| "grad_norm": 0.3468630313873291, |
| "learning_rate": 7.68920905729377e-05, |
| "loss": 0.0422, |
| "step": 6360 |
| }, |
| { |
| "epoch": 6.5602471678681775, |
| "grad_norm": 0.4992178678512573, |
| "learning_rate": 7.6814603524205e-05, |
| "loss": 0.0489, |
| "step": 6370 |
| }, |
| { |
| "epoch": 6.570545829042224, |
| "grad_norm": 0.33954063057899475, |
| "learning_rate": 7.673702596917824e-05, |
| "loss": 0.0483, |
| "step": 6380 |
| }, |
| { |
| "epoch": 6.580844490216272, |
| "grad_norm": 0.3721350133419037, |
| "learning_rate": 7.665935816970193e-05, |
| "loss": 0.0415, |
| "step": 6390 |
| }, |
| { |
| "epoch": 6.59114315139032, |
| "grad_norm": 0.30230167508125305, |
| "learning_rate": 7.658160038792518e-05, |
| "loss": 0.0431, |
| "step": 6400 |
| }, |
| { |
| "epoch": 6.601441812564366, |
| "grad_norm": 0.2966795861721039, |
| "learning_rate": 7.650375288630083e-05, |
| "loss": 0.0431, |
| "step": 6410 |
| }, |
| { |
| "epoch": 6.611740473738414, |
| "grad_norm": 0.28090888261795044, |
| "learning_rate": 7.642581592758453e-05, |
| "loss": 0.0413, |
| "step": 6420 |
| }, |
| { |
| "epoch": 6.622039134912462, |
| "grad_norm": 0.3371041715145111, |
| "learning_rate": 7.634778977483389e-05, |
| "loss": 0.0469, |
| "step": 6430 |
| }, |
| { |
| "epoch": 6.632337796086508, |
| "grad_norm": 0.28260523080825806, |
| "learning_rate": 7.626967469140754e-05, |
| "loss": 0.0437, |
| "step": 6440 |
| }, |
| { |
| "epoch": 6.642636457260556, |
| "grad_norm": 0.2734527289867401, |
| "learning_rate": 7.619147094096434e-05, |
| "loss": 0.043, |
| "step": 6450 |
| }, |
| { |
| "epoch": 6.652935118434604, |
| "grad_norm": 0.3294004797935486, |
| "learning_rate": 7.611317878746238e-05, |
| "loss": 0.0414, |
| "step": 6460 |
| }, |
| { |
| "epoch": 6.663233779608651, |
| "grad_norm": 0.45815443992614746, |
| "learning_rate": 7.60347984951581e-05, |
| "loss": 0.0496, |
| "step": 6470 |
| }, |
| { |
| "epoch": 6.673532440782698, |
| "grad_norm": 0.24537749588489532, |
| "learning_rate": 7.59563303286055e-05, |
| "loss": 0.0425, |
| "step": 6480 |
| }, |
| { |
| "epoch": 6.683831101956746, |
| "grad_norm": 0.32262513041496277, |
| "learning_rate": 7.587777455265515e-05, |
| "loss": 0.042, |
| "step": 6490 |
| }, |
| { |
| "epoch": 6.6941297631307926, |
| "grad_norm": 0.19561485946178436, |
| "learning_rate": 7.579913143245328e-05, |
| "loss": 0.0424, |
| "step": 6500 |
| }, |
| { |
| "epoch": 6.70442842430484, |
| "grad_norm": 0.29754048585891724, |
| "learning_rate": 7.572040123344103e-05, |
| "loss": 0.0466, |
| "step": 6510 |
| }, |
| { |
| "epoch": 6.714727085478888, |
| "grad_norm": 0.33084553480148315, |
| "learning_rate": 7.564158422135337e-05, |
| "loss": 0.0496, |
| "step": 6520 |
| }, |
| { |
| "epoch": 6.7250257466529355, |
| "grad_norm": 0.40858951210975647, |
| "learning_rate": 7.55626806622183e-05, |
| "loss": 0.0481, |
| "step": 6530 |
| }, |
| { |
| "epoch": 6.735324407826982, |
| "grad_norm": 0.9231746792793274, |
| "learning_rate": 7.548369082235595e-05, |
| "loss": 0.0512, |
| "step": 6540 |
| }, |
| { |
| "epoch": 6.74562306900103, |
| "grad_norm": 0.4263251721858978, |
| "learning_rate": 7.54046149683777e-05, |
| "loss": 0.0429, |
| "step": 6550 |
| }, |
| { |
| "epoch": 6.755921730175078, |
| "grad_norm": 0.2868654131889343, |
| "learning_rate": 7.532545336718521e-05, |
| "loss": 0.048, |
| "step": 6560 |
| }, |
| { |
| "epoch": 6.766220391349124, |
| "grad_norm": 0.250887930393219, |
| "learning_rate": 7.524620628596954e-05, |
| "loss": 0.0477, |
| "step": 6570 |
| }, |
| { |
| "epoch": 6.776519052523172, |
| "grad_norm": 0.3410227298736572, |
| "learning_rate": 7.516687399221037e-05, |
| "loss": 0.0474, |
| "step": 6580 |
| }, |
| { |
| "epoch": 6.78681771369722, |
| "grad_norm": 0.42289555072784424, |
| "learning_rate": 7.508745675367483e-05, |
| "loss": 0.0445, |
| "step": 6590 |
| }, |
| { |
| "epoch": 6.797116374871266, |
| "grad_norm": 0.3723140358924866, |
| "learning_rate": 7.500795483841692e-05, |
| "loss": 0.0473, |
| "step": 6600 |
| }, |
| { |
| "epoch": 6.807415036045314, |
| "grad_norm": 0.5165073275566101, |
| "learning_rate": 7.492836851477636e-05, |
| "loss": 0.0502, |
| "step": 6610 |
| }, |
| { |
| "epoch": 6.817713697219362, |
| "grad_norm": 0.3081056773662567, |
| "learning_rate": 7.484869805137778e-05, |
| "loss": 0.0478, |
| "step": 6620 |
| }, |
| { |
| "epoch": 6.8280123583934085, |
| "grad_norm": 0.39798182249069214, |
| "learning_rate": 7.476894371712982e-05, |
| "loss": 0.0516, |
| "step": 6630 |
| }, |
| { |
| "epoch": 6.838311019567456, |
| "grad_norm": 0.3031449615955353, |
| "learning_rate": 7.468910578122418e-05, |
| "loss": 0.0458, |
| "step": 6640 |
| }, |
| { |
| "epoch": 6.848609680741504, |
| "grad_norm": 0.40421777963638306, |
| "learning_rate": 7.460918451313481e-05, |
| "loss": 0.0464, |
| "step": 6650 |
| }, |
| { |
| "epoch": 6.858908341915551, |
| "grad_norm": 0.3347015976905823, |
| "learning_rate": 7.452918018261684e-05, |
| "loss": 0.0427, |
| "step": 6660 |
| }, |
| { |
| "epoch": 6.869207003089598, |
| "grad_norm": 0.46592167019844055, |
| "learning_rate": 7.444909305970578e-05, |
| "loss": 0.0395, |
| "step": 6670 |
| }, |
| { |
| "epoch": 6.879505664263646, |
| "grad_norm": 0.31017211079597473, |
| "learning_rate": 7.436892341471663e-05, |
| "loss": 0.052, |
| "step": 6680 |
| }, |
| { |
| "epoch": 6.889804325437693, |
| "grad_norm": 0.575901210308075, |
| "learning_rate": 7.428867151824287e-05, |
| "loss": 0.0489, |
| "step": 6690 |
| }, |
| { |
| "epoch": 6.90010298661174, |
| "grad_norm": 0.372746080160141, |
| "learning_rate": 7.420833764115561e-05, |
| "loss": 0.0428, |
| "step": 6700 |
| }, |
| { |
| "epoch": 6.910401647785788, |
| "grad_norm": 0.37451857328414917, |
| "learning_rate": 7.41279220546027e-05, |
| "loss": 0.0432, |
| "step": 6710 |
| }, |
| { |
| "epoch": 6.920700308959836, |
| "grad_norm": 0.3189006447792053, |
| "learning_rate": 7.404742503000776e-05, |
| "loss": 0.0519, |
| "step": 6720 |
| }, |
| { |
| "epoch": 6.930998970133882, |
| "grad_norm": 0.22485186159610748, |
| "learning_rate": 7.396684683906928e-05, |
| "loss": 0.0507, |
| "step": 6730 |
| }, |
| { |
| "epoch": 6.94129763130793, |
| "grad_norm": 0.3649514615535736, |
| "learning_rate": 7.38861877537597e-05, |
| "loss": 0.0485, |
| "step": 6740 |
| }, |
| { |
| "epoch": 6.951596292481978, |
| "grad_norm": 0.37899455428123474, |
| "learning_rate": 7.380544804632453e-05, |
| "loss": 0.0454, |
| "step": 6750 |
| }, |
| { |
| "epoch": 6.961894953656024, |
| "grad_norm": 0.4623110294342041, |
| "learning_rate": 7.372462798928137e-05, |
| "loss": 0.0446, |
| "step": 6760 |
| }, |
| { |
| "epoch": 6.972193614830072, |
| "grad_norm": 0.41896483302116394, |
| "learning_rate": 7.364372785541902e-05, |
| "loss": 0.0432, |
| "step": 6770 |
| }, |
| { |
| "epoch": 6.98249227600412, |
| "grad_norm": 0.28001904487609863, |
| "learning_rate": 7.356274791779661e-05, |
| "loss": 0.0447, |
| "step": 6780 |
| }, |
| { |
| "epoch": 6.9927909371781665, |
| "grad_norm": 0.35105225443840027, |
| "learning_rate": 7.348168844974254e-05, |
| "loss": 0.0445, |
| "step": 6790 |
| }, |
| { |
| "epoch": 7.003089598352214, |
| "grad_norm": 0.41556599736213684, |
| "learning_rate": 7.340054972485371e-05, |
| "loss": 0.0512, |
| "step": 6800 |
| }, |
| { |
| "epoch": 7.013388259526262, |
| "grad_norm": 0.4035722017288208, |
| "learning_rate": 7.331933201699457e-05, |
| "loss": 0.0423, |
| "step": 6810 |
| }, |
| { |
| "epoch": 7.0236869207003085, |
| "grad_norm": 0.4090428352355957, |
| "learning_rate": 7.323803560029605e-05, |
| "loss": 0.0514, |
| "step": 6820 |
| }, |
| { |
| "epoch": 7.033985581874356, |
| "grad_norm": 0.3787795901298523, |
| "learning_rate": 7.315666074915481e-05, |
| "loss": 0.0402, |
| "step": 6830 |
| }, |
| { |
| "epoch": 7.044284243048404, |
| "grad_norm": 0.32284408807754517, |
| "learning_rate": 7.307520773823227e-05, |
| "loss": 0.0466, |
| "step": 6840 |
| }, |
| { |
| "epoch": 7.0545829042224515, |
| "grad_norm": 0.35008612275123596, |
| "learning_rate": 7.299367684245362e-05, |
| "loss": 0.0451, |
| "step": 6850 |
| }, |
| { |
| "epoch": 7.064881565396498, |
| "grad_norm": 0.38151565194129944, |
| "learning_rate": 7.29120683370069e-05, |
| "loss": 0.0364, |
| "step": 6860 |
| }, |
| { |
| "epoch": 7.075180226570546, |
| "grad_norm": 0.21700677275657654, |
| "learning_rate": 7.283038249734217e-05, |
| "loss": 0.0504, |
| "step": 6870 |
| }, |
| { |
| "epoch": 7.085478887744594, |
| "grad_norm": 0.3018152415752411, |
| "learning_rate": 7.27486195991705e-05, |
| "loss": 0.0519, |
| "step": 6880 |
| }, |
| { |
| "epoch": 7.09577754891864, |
| "grad_norm": 0.2052696943283081, |
| "learning_rate": 7.266677991846301e-05, |
| "loss": 0.042, |
| "step": 6890 |
| }, |
| { |
| "epoch": 7.106076210092688, |
| "grad_norm": 0.39970454573631287, |
| "learning_rate": 7.258486373144999e-05, |
| "loss": 0.0409, |
| "step": 6900 |
| }, |
| { |
| "epoch": 7.116374871266736, |
| "grad_norm": 0.22980281710624695, |
| "learning_rate": 7.250287131462004e-05, |
| "loss": 0.0445, |
| "step": 6910 |
| }, |
| { |
| "epoch": 7.126673532440782, |
| "grad_norm": 0.3403468430042267, |
| "learning_rate": 7.242080294471895e-05, |
| "loss": 0.0565, |
| "step": 6920 |
| }, |
| { |
| "epoch": 7.13697219361483, |
| "grad_norm": 0.25713488459587097, |
| "learning_rate": 7.233865889874891e-05, |
| "loss": 0.0456, |
| "step": 6930 |
| }, |
| { |
| "epoch": 7.147270854788878, |
| "grad_norm": 0.3376232981681824, |
| "learning_rate": 7.225643945396757e-05, |
| "loss": 0.0378, |
| "step": 6940 |
| }, |
| { |
| "epoch": 7.1575695159629245, |
| "grad_norm": 0.255604088306427, |
| "learning_rate": 7.217414488788702e-05, |
| "loss": 0.041, |
| "step": 6950 |
| }, |
| { |
| "epoch": 7.167868177136972, |
| "grad_norm": 0.2713391184806824, |
| "learning_rate": 7.209177547827294e-05, |
| "loss": 0.0527, |
| "step": 6960 |
| }, |
| { |
| "epoch": 7.17816683831102, |
| "grad_norm": 0.2645740509033203, |
| "learning_rate": 7.20093315031436e-05, |
| "loss": 0.0432, |
| "step": 6970 |
| }, |
| { |
| "epoch": 7.1884654994850665, |
| "grad_norm": 0.3499581515789032, |
| "learning_rate": 7.192681324076896e-05, |
| "loss": 0.0516, |
| "step": 6980 |
| }, |
| { |
| "epoch": 7.198764160659114, |
| "grad_norm": 0.24416272342205048, |
| "learning_rate": 7.184422096966971e-05, |
| "loss": 0.0435, |
| "step": 6990 |
| }, |
| { |
| "epoch": 7.209062821833162, |
| "grad_norm": 0.3371264338493347, |
| "learning_rate": 7.176155496861638e-05, |
| "loss": 0.0463, |
| "step": 7000 |
| }, |
| { |
| "epoch": 7.2193614830072095, |
| "grad_norm": 0.3851630687713623, |
| "learning_rate": 7.167881551662831e-05, |
| "loss": 0.0407, |
| "step": 7010 |
| }, |
| { |
| "epoch": 7.229660144181256, |
| "grad_norm": 0.2070106714963913, |
| "learning_rate": 7.159600289297276e-05, |
| "loss": 0.0386, |
| "step": 7020 |
| }, |
| { |
| "epoch": 7.239958805355304, |
| "grad_norm": 0.3137363791465759, |
| "learning_rate": 7.151311737716397e-05, |
| "loss": 0.0411, |
| "step": 7030 |
| }, |
| { |
| "epoch": 7.2502574665293515, |
| "grad_norm": 0.3703240752220154, |
| "learning_rate": 7.143015924896226e-05, |
| "loss": 0.0426, |
| "step": 7040 |
| }, |
| { |
| "epoch": 7.260556127703398, |
| "grad_norm": 0.3365670144557953, |
| "learning_rate": 7.134712878837294e-05, |
| "loss": 0.0506, |
| "step": 7050 |
| }, |
| { |
| "epoch": 7.270854788877446, |
| "grad_norm": 0.2538038194179535, |
| "learning_rate": 7.126402627564555e-05, |
| "loss": 0.0466, |
| "step": 7060 |
| }, |
| { |
| "epoch": 7.281153450051494, |
| "grad_norm": 0.43290919065475464, |
| "learning_rate": 7.118085199127276e-05, |
| "loss": 0.0463, |
| "step": 7070 |
| }, |
| { |
| "epoch": 7.29145211122554, |
| "grad_norm": 0.2167598456144333, |
| "learning_rate": 7.109760621598952e-05, |
| "loss": 0.0421, |
| "step": 7080 |
| }, |
| { |
| "epoch": 7.301750772399588, |
| "grad_norm": 0.24321898818016052, |
| "learning_rate": 7.101428923077209e-05, |
| "loss": 0.0382, |
| "step": 7090 |
| }, |
| { |
| "epoch": 7.312049433573636, |
| "grad_norm": 0.31298938393592834, |
| "learning_rate": 7.093090131683704e-05, |
| "loss": 0.0401, |
| "step": 7100 |
| }, |
| { |
| "epoch": 7.3223480947476824, |
| "grad_norm": 0.38020390272140503, |
| "learning_rate": 7.08474427556404e-05, |
| "loss": 0.0454, |
| "step": 7110 |
| }, |
| { |
| "epoch": 7.33264675592173, |
| "grad_norm": 0.37544867396354675, |
| "learning_rate": 7.076391382887661e-05, |
| "loss": 0.0408, |
| "step": 7120 |
| }, |
| { |
| "epoch": 7.342945417095778, |
| "grad_norm": 0.2992228865623474, |
| "learning_rate": 7.068031481847762e-05, |
| "loss": 0.0454, |
| "step": 7130 |
| }, |
| { |
| "epoch": 7.3532440782698245, |
| "grad_norm": 0.48509418964385986, |
| "learning_rate": 7.059664600661196e-05, |
| "loss": 0.044, |
| "step": 7140 |
| }, |
| { |
| "epoch": 7.363542739443872, |
| "grad_norm": 0.4964796304702759, |
| "learning_rate": 7.051290767568371e-05, |
| "loss": 0.0526, |
| "step": 7150 |
| }, |
| { |
| "epoch": 7.37384140061792, |
| "grad_norm": 0.22935813665390015, |
| "learning_rate": 7.042910010833163e-05, |
| "loss": 0.0416, |
| "step": 7160 |
| }, |
| { |
| "epoch": 7.3841400617919675, |
| "grad_norm": 0.2570447325706482, |
| "learning_rate": 7.034522358742816e-05, |
| "loss": 0.0488, |
| "step": 7170 |
| }, |
| { |
| "epoch": 7.394438722966014, |
| "grad_norm": 0.23174193501472473, |
| "learning_rate": 7.026127839607847e-05, |
| "loss": 0.0423, |
| "step": 7180 |
| }, |
| { |
| "epoch": 7.404737384140062, |
| "grad_norm": 0.33260369300842285, |
| "learning_rate": 7.017726481761951e-05, |
| "loss": 0.0464, |
| "step": 7190 |
| }, |
| { |
| "epoch": 7.4150360453141095, |
| "grad_norm": 0.4475546181201935, |
| "learning_rate": 7.009318313561908e-05, |
| "loss": 0.0475, |
| "step": 7200 |
| }, |
| { |
| "epoch": 7.425334706488156, |
| "grad_norm": 0.2761160731315613, |
| "learning_rate": 7.000903363387482e-05, |
| "loss": 0.0448, |
| "step": 7210 |
| }, |
| { |
| "epoch": 7.435633367662204, |
| "grad_norm": 0.39867162704467773, |
| "learning_rate": 6.99248165964133e-05, |
| "loss": 0.0455, |
| "step": 7220 |
| }, |
| { |
| "epoch": 7.445932028836252, |
| "grad_norm": 0.3500315546989441, |
| "learning_rate": 6.9840532307489e-05, |
| "loss": 0.0452, |
| "step": 7230 |
| }, |
| { |
| "epoch": 7.456230690010298, |
| "grad_norm": 0.30247119069099426, |
| "learning_rate": 6.975618105158346e-05, |
| "loss": 0.0458, |
| "step": 7240 |
| }, |
| { |
| "epoch": 7.466529351184346, |
| "grad_norm": 0.357147753238678, |
| "learning_rate": 6.967176311340418e-05, |
| "loss": 0.0401, |
| "step": 7250 |
| }, |
| { |
| "epoch": 7.476828012358394, |
| "grad_norm": 0.36390820145606995, |
| "learning_rate": 6.958727877788378e-05, |
| "loss": 0.0432, |
| "step": 7260 |
| }, |
| { |
| "epoch": 7.48712667353244, |
| "grad_norm": 0.3110693395137787, |
| "learning_rate": 6.950272833017896e-05, |
| "loss": 0.0413, |
| "step": 7270 |
| }, |
| { |
| "epoch": 7.497425334706488, |
| "grad_norm": 0.26132798194885254, |
| "learning_rate": 6.941811205566957e-05, |
| "loss": 0.0448, |
| "step": 7280 |
| }, |
| { |
| "epoch": 7.507723995880536, |
| "grad_norm": 0.2721041142940521, |
| "learning_rate": 6.933343023995767e-05, |
| "loss": 0.0358, |
| "step": 7290 |
| }, |
| { |
| "epoch": 7.518022657054583, |
| "grad_norm": 0.26367267966270447, |
| "learning_rate": 6.924868316886649e-05, |
| "loss": 0.0515, |
| "step": 7300 |
| }, |
| { |
| "epoch": 7.52832131822863, |
| "grad_norm": 0.4417518377304077, |
| "learning_rate": 6.916387112843957e-05, |
| "loss": 0.054, |
| "step": 7310 |
| }, |
| { |
| "epoch": 7.538619979402678, |
| "grad_norm": 0.3166719079017639, |
| "learning_rate": 6.907899440493968e-05, |
| "loss": 0.0485, |
| "step": 7320 |
| }, |
| { |
| "epoch": 7.548918640576725, |
| "grad_norm": 0.330705463886261, |
| "learning_rate": 6.899405328484794e-05, |
| "loss": 0.0444, |
| "step": 7330 |
| }, |
| { |
| "epoch": 7.559217301750772, |
| "grad_norm": 0.22663088142871857, |
| "learning_rate": 6.890904805486286e-05, |
| "loss": 0.0424, |
| "step": 7340 |
| }, |
| { |
| "epoch": 7.56951596292482, |
| "grad_norm": 0.3720453083515167, |
| "learning_rate": 6.88239790018993e-05, |
| "loss": 0.043, |
| "step": 7350 |
| }, |
| { |
| "epoch": 7.5798146240988675, |
| "grad_norm": 0.2161106914281845, |
| "learning_rate": 6.873884641308752e-05, |
| "loss": 0.042, |
| "step": 7360 |
| }, |
| { |
| "epoch": 7.590113285272914, |
| "grad_norm": 0.3371187448501587, |
| "learning_rate": 6.865365057577227e-05, |
| "loss": 0.0463, |
| "step": 7370 |
| }, |
| { |
| "epoch": 7.600411946446962, |
| "grad_norm": 0.3055129945278168, |
| "learning_rate": 6.856839177751176e-05, |
| "loss": 0.0474, |
| "step": 7380 |
| }, |
| { |
| "epoch": 7.61071060762101, |
| "grad_norm": 0.3375736474990845, |
| "learning_rate": 6.84830703060767e-05, |
| "loss": 0.0439, |
| "step": 7390 |
| }, |
| { |
| "epoch": 7.621009268795056, |
| "grad_norm": 0.3460111916065216, |
| "learning_rate": 6.839768644944937e-05, |
| "loss": 0.0464, |
| "step": 7400 |
| }, |
| { |
| "epoch": 7.631307929969104, |
| "grad_norm": 0.3610309660434723, |
| "learning_rate": 6.83122404958226e-05, |
| "loss": 0.0441, |
| "step": 7410 |
| }, |
| { |
| "epoch": 7.641606591143152, |
| "grad_norm": 0.32009249925613403, |
| "learning_rate": 6.82267327335988e-05, |
| "loss": 0.0405, |
| "step": 7420 |
| }, |
| { |
| "epoch": 7.651905252317198, |
| "grad_norm": 0.532019853591919, |
| "learning_rate": 6.814116345138902e-05, |
| "loss": 0.0401, |
| "step": 7430 |
| }, |
| { |
| "epoch": 7.662203913491246, |
| "grad_norm": 0.25246256589889526, |
| "learning_rate": 6.805553293801196e-05, |
| "loss": 0.0476, |
| "step": 7440 |
| }, |
| { |
| "epoch": 7.672502574665294, |
| "grad_norm": 0.2576782703399658, |
| "learning_rate": 6.796984148249295e-05, |
| "loss": 0.0456, |
| "step": 7450 |
| }, |
| { |
| "epoch": 7.6828012358393405, |
| "grad_norm": 0.4437432885169983, |
| "learning_rate": 6.788408937406307e-05, |
| "loss": 0.0434, |
| "step": 7460 |
| }, |
| { |
| "epoch": 7.693099897013388, |
| "grad_norm": 0.3884623050689697, |
| "learning_rate": 6.77982769021581e-05, |
| "loss": 0.0433, |
| "step": 7470 |
| }, |
| { |
| "epoch": 7.703398558187436, |
| "grad_norm": 0.30564385652542114, |
| "learning_rate": 6.771240435641754e-05, |
| "loss": 0.0419, |
| "step": 7480 |
| }, |
| { |
| "epoch": 7.7136972193614834, |
| "grad_norm": 0.29946035146713257, |
| "learning_rate": 6.762647202668366e-05, |
| "loss": 0.0481, |
| "step": 7490 |
| }, |
| { |
| "epoch": 7.72399588053553, |
| "grad_norm": 0.270355761051178, |
| "learning_rate": 6.754048020300054e-05, |
| "loss": 0.0432, |
| "step": 7500 |
| }, |
| { |
| "epoch": 7.734294541709578, |
| "grad_norm": 0.3664805293083191, |
| "learning_rate": 6.745442917561309e-05, |
| "loss": 0.0379, |
| "step": 7510 |
| }, |
| { |
| "epoch": 7.7445932028836255, |
| "grad_norm": 0.788110077381134, |
| "learning_rate": 6.736831923496596e-05, |
| "loss": 0.0521, |
| "step": 7520 |
| }, |
| { |
| "epoch": 7.754891864057672, |
| "grad_norm": 0.46117472648620605, |
| "learning_rate": 6.728215067170273e-05, |
| "loss": 0.0487, |
| "step": 7530 |
| }, |
| { |
| "epoch": 7.76519052523172, |
| "grad_norm": 0.18957702815532684, |
| "learning_rate": 6.719592377666483e-05, |
| "loss": 0.0479, |
| "step": 7540 |
| }, |
| { |
| "epoch": 7.775489186405768, |
| "grad_norm": 0.4086840748786926, |
| "learning_rate": 6.710963884089054e-05, |
| "loss": 0.0426, |
| "step": 7550 |
| }, |
| { |
| "epoch": 7.785787847579814, |
| "grad_norm": 0.21845366060733795, |
| "learning_rate": 6.70232961556141e-05, |
| "loss": 0.0402, |
| "step": 7560 |
| }, |
| { |
| "epoch": 7.796086508753862, |
| "grad_norm": 0.18775074183940887, |
| "learning_rate": 6.693689601226458e-05, |
| "loss": 0.04, |
| "step": 7570 |
| }, |
| { |
| "epoch": 7.80638516992791, |
| "grad_norm": 0.30147698521614075, |
| "learning_rate": 6.685043870246507e-05, |
| "loss": 0.0434, |
| "step": 7580 |
| }, |
| { |
| "epoch": 7.816683831101956, |
| "grad_norm": 0.366470068693161, |
| "learning_rate": 6.676392451803161e-05, |
| "loss": 0.0463, |
| "step": 7590 |
| }, |
| { |
| "epoch": 7.826982492276004, |
| "grad_norm": 0.3885975778102875, |
| "learning_rate": 6.667735375097214e-05, |
| "loss": 0.0453, |
| "step": 7600 |
| }, |
| { |
| "epoch": 7.837281153450052, |
| "grad_norm": 0.29683852195739746, |
| "learning_rate": 6.659072669348564e-05, |
| "loss": 0.0419, |
| "step": 7610 |
| }, |
| { |
| "epoch": 7.8475798146240985, |
| "grad_norm": 0.29188981652259827, |
| "learning_rate": 6.650404363796108e-05, |
| "loss": 0.0371, |
| "step": 7620 |
| }, |
| { |
| "epoch": 7.857878475798146, |
| "grad_norm": 0.40961870551109314, |
| "learning_rate": 6.641730487697639e-05, |
| "loss": 0.0435, |
| "step": 7630 |
| }, |
| { |
| "epoch": 7.868177136972194, |
| "grad_norm": 0.33139774203300476, |
| "learning_rate": 6.633051070329759e-05, |
| "loss": 0.0413, |
| "step": 7640 |
| }, |
| { |
| "epoch": 7.8784757981462405, |
| "grad_norm": 0.28173500299453735, |
| "learning_rate": 6.624366140987768e-05, |
| "loss": 0.0452, |
| "step": 7650 |
| }, |
| { |
| "epoch": 7.888774459320288, |
| "grad_norm": 0.2889021039009094, |
| "learning_rate": 6.615675728985572e-05, |
| "loss": 0.0423, |
| "step": 7660 |
| }, |
| { |
| "epoch": 7.899073120494336, |
| "grad_norm": 0.6384182572364807, |
| "learning_rate": 6.606979863655583e-05, |
| "loss": 0.0379, |
| "step": 7670 |
| }, |
| { |
| "epoch": 7.9093717816683835, |
| "grad_norm": 0.4132192134857178, |
| "learning_rate": 6.598278574348619e-05, |
| "loss": 0.0391, |
| "step": 7680 |
| }, |
| { |
| "epoch": 7.91967044284243, |
| "grad_norm": 0.3432478606700897, |
| "learning_rate": 6.589571890433803e-05, |
| "loss": 0.0473, |
| "step": 7690 |
| }, |
| { |
| "epoch": 7.929969104016478, |
| "grad_norm": 0.3030139207839966, |
| "learning_rate": 6.580859841298471e-05, |
| "loss": 0.0374, |
| "step": 7700 |
| }, |
| { |
| "epoch": 7.940267765190526, |
| "grad_norm": 0.27307939529418945, |
| "learning_rate": 6.572142456348065e-05, |
| "loss": 0.0402, |
| "step": 7710 |
| }, |
| { |
| "epoch": 7.950566426364572, |
| "grad_norm": 0.2667880952358246, |
| "learning_rate": 6.563419765006038e-05, |
| "loss": 0.0463, |
| "step": 7720 |
| }, |
| { |
| "epoch": 7.96086508753862, |
| "grad_norm": 0.37028032541275024, |
| "learning_rate": 6.55469179671375e-05, |
| "loss": 0.038, |
| "step": 7730 |
| }, |
| { |
| "epoch": 7.971163748712668, |
| "grad_norm": 0.3381376266479492, |
| "learning_rate": 6.545958580930377e-05, |
| "loss": 0.0455, |
| "step": 7740 |
| }, |
| { |
| "epoch": 7.981462409886714, |
| "grad_norm": 0.28161460161209106, |
| "learning_rate": 6.537220147132805e-05, |
| "loss": 0.0396, |
| "step": 7750 |
| }, |
| { |
| "epoch": 7.991761071060762, |
| "grad_norm": 0.26298263669013977, |
| "learning_rate": 6.528476524815528e-05, |
| "loss": 0.0424, |
| "step": 7760 |
| }, |
| { |
| "epoch": 8.002059732234809, |
| "grad_norm": 0.2671511769294739, |
| "learning_rate": 6.519727743490561e-05, |
| "loss": 0.0384, |
| "step": 7770 |
| }, |
| { |
| "epoch": 8.012358393408856, |
| "grad_norm": 0.3101862967014313, |
| "learning_rate": 6.510973832687323e-05, |
| "loss": 0.0465, |
| "step": 7780 |
| }, |
| { |
| "epoch": 8.022657054582904, |
| "grad_norm": 0.3037969768047333, |
| "learning_rate": 6.502214821952555e-05, |
| "loss": 0.0473, |
| "step": 7790 |
| }, |
| { |
| "epoch": 8.032955715756952, |
| "grad_norm": 0.45323264598846436, |
| "learning_rate": 6.493450740850203e-05, |
| "loss": 0.0432, |
| "step": 7800 |
| }, |
| { |
| "epoch": 8.043254376931, |
| "grad_norm": 0.41797924041748047, |
| "learning_rate": 6.484681618961331e-05, |
| "loss": 0.048, |
| "step": 7810 |
| }, |
| { |
| "epoch": 8.053553038105047, |
| "grad_norm": 0.4865727424621582, |
| "learning_rate": 6.47590748588402e-05, |
| "loss": 0.0512, |
| "step": 7820 |
| }, |
| { |
| "epoch": 8.063851699279093, |
| "grad_norm": 0.3105076849460602, |
| "learning_rate": 6.46712837123326e-05, |
| "loss": 0.0448, |
| "step": 7830 |
| }, |
| { |
| "epoch": 8.07415036045314, |
| "grad_norm": 0.25625815987586975, |
| "learning_rate": 6.458344304640858e-05, |
| "loss": 0.0416, |
| "step": 7840 |
| }, |
| { |
| "epoch": 8.084449021627188, |
| "grad_norm": 0.31119033694267273, |
| "learning_rate": 6.449555315755333e-05, |
| "loss": 0.041, |
| "step": 7850 |
| }, |
| { |
| "epoch": 8.094747682801236, |
| "grad_norm": 0.39366838335990906, |
| "learning_rate": 6.440761434241821e-05, |
| "loss": 0.0404, |
| "step": 7860 |
| }, |
| { |
| "epoch": 8.105046343975284, |
| "grad_norm": 0.31691083312034607, |
| "learning_rate": 6.431962689781969e-05, |
| "loss": 0.0392, |
| "step": 7870 |
| }, |
| { |
| "epoch": 8.115345005149331, |
| "grad_norm": 0.23836584389209747, |
| "learning_rate": 6.423159112073838e-05, |
| "loss": 0.0455, |
| "step": 7880 |
| }, |
| { |
| "epoch": 8.125643666323377, |
| "grad_norm": 0.2766348719596863, |
| "learning_rate": 6.414350730831805e-05, |
| "loss": 0.0405, |
| "step": 7890 |
| }, |
| { |
| "epoch": 8.135942327497425, |
| "grad_norm": 0.3610820174217224, |
| "learning_rate": 6.405537575786456e-05, |
| "loss": 0.0459, |
| "step": 7900 |
| }, |
| { |
| "epoch": 8.146240988671472, |
| "grad_norm": 0.4069831669330597, |
| "learning_rate": 6.396719676684494e-05, |
| "loss": 0.0449, |
| "step": 7910 |
| }, |
| { |
| "epoch": 8.15653964984552, |
| "grad_norm": 0.38294172286987305, |
| "learning_rate": 6.387897063288635e-05, |
| "loss": 0.0495, |
| "step": 7920 |
| }, |
| { |
| "epoch": 8.166838311019568, |
| "grad_norm": 0.3302978575229645, |
| "learning_rate": 6.3790697653775e-05, |
| "loss": 0.0453, |
| "step": 7930 |
| }, |
| { |
| "epoch": 8.177136972193615, |
| "grad_norm": 0.26982101798057556, |
| "learning_rate": 6.37023781274553e-05, |
| "loss": 0.0463, |
| "step": 7940 |
| }, |
| { |
| "epoch": 8.187435633367663, |
| "grad_norm": 0.23370954394340515, |
| "learning_rate": 6.361401235202872e-05, |
| "loss": 0.0465, |
| "step": 7950 |
| }, |
| { |
| "epoch": 8.197734294541709, |
| "grad_norm": 0.3092534840106964, |
| "learning_rate": 6.352560062575284e-05, |
| "loss": 0.055, |
| "step": 7960 |
| }, |
| { |
| "epoch": 8.208032955715757, |
| "grad_norm": 0.36051103472709656, |
| "learning_rate": 6.343714324704034e-05, |
| "loss": 0.0551, |
| "step": 7970 |
| }, |
| { |
| "epoch": 8.218331616889804, |
| "grad_norm": 0.33508798480033875, |
| "learning_rate": 6.3348640514458e-05, |
| "loss": 0.0462, |
| "step": 7980 |
| }, |
| { |
| "epoch": 8.228630278063852, |
| "grad_norm": 0.9673136472702026, |
| "learning_rate": 6.326009272672564e-05, |
| "loss": 0.0442, |
| "step": 7990 |
| }, |
| { |
| "epoch": 8.2389289392379, |
| "grad_norm": 1.469125509262085, |
| "learning_rate": 6.317150018271522e-05, |
| "loss": 0.0465, |
| "step": 8000 |
| }, |
| { |
| "epoch": 8.249227600411947, |
| "grad_norm": 0.3022879660129547, |
| "learning_rate": 6.308286318144971e-05, |
| "loss": 0.052, |
| "step": 8010 |
| }, |
| { |
| "epoch": 8.259526261585993, |
| "grad_norm": 0.240738183259964, |
| "learning_rate": 6.299418202210214e-05, |
| "loss": 0.044, |
| "step": 8020 |
| }, |
| { |
| "epoch": 8.26982492276004, |
| "grad_norm": 0.3125, |
| "learning_rate": 6.290545700399462e-05, |
| "loss": 0.0413, |
| "step": 8030 |
| }, |
| { |
| "epoch": 8.280123583934088, |
| "grad_norm": 0.3256394565105438, |
| "learning_rate": 6.281668842659725e-05, |
| "loss": 0.0381, |
| "step": 8040 |
| }, |
| { |
| "epoch": 8.290422245108136, |
| "grad_norm": 0.3764393925666809, |
| "learning_rate": 6.27278765895272e-05, |
| "loss": 0.0412, |
| "step": 8050 |
| }, |
| { |
| "epoch": 8.300720906282184, |
| "grad_norm": 0.28021517395973206, |
| "learning_rate": 6.263902179254762e-05, |
| "loss": 0.0392, |
| "step": 8060 |
| }, |
| { |
| "epoch": 8.311019567456231, |
| "grad_norm": 0.3545322120189667, |
| "learning_rate": 6.255012433556665e-05, |
| "loss": 0.039, |
| "step": 8070 |
| }, |
| { |
| "epoch": 8.321318228630279, |
| "grad_norm": 0.33872804045677185, |
| "learning_rate": 6.246118451863646e-05, |
| "loss": 0.0417, |
| "step": 8080 |
| }, |
| { |
| "epoch": 8.331616889804325, |
| "grad_norm": 0.9136466383934021, |
| "learning_rate": 6.237220264195216e-05, |
| "loss": 0.0429, |
| "step": 8090 |
| }, |
| { |
| "epoch": 8.341915550978372, |
| "grad_norm": 0.31747815012931824, |
| "learning_rate": 6.228317900585083e-05, |
| "loss": 0.0425, |
| "step": 8100 |
| }, |
| { |
| "epoch": 8.35221421215242, |
| "grad_norm": 0.3648073375225067, |
| "learning_rate": 6.219411391081055e-05, |
| "loss": 0.0384, |
| "step": 8110 |
| }, |
| { |
| "epoch": 8.362512873326468, |
| "grad_norm": 0.26562437415122986, |
| "learning_rate": 6.210500765744925e-05, |
| "loss": 0.036, |
| "step": 8120 |
| }, |
| { |
| "epoch": 8.372811534500515, |
| "grad_norm": 0.2761411666870117, |
| "learning_rate": 6.201586054652379e-05, |
| "loss": 0.0466, |
| "step": 8130 |
| }, |
| { |
| "epoch": 8.383110195674563, |
| "grad_norm": 0.46033117175102234, |
| "learning_rate": 6.192667287892905e-05, |
| "loss": 0.0432, |
| "step": 8140 |
| }, |
| { |
| "epoch": 8.393408856848609, |
| "grad_norm": 0.3292730450630188, |
| "learning_rate": 6.183744495569666e-05, |
| "loss": 0.0426, |
| "step": 8150 |
| }, |
| { |
| "epoch": 8.403707518022657, |
| "grad_norm": 0.2943620979785919, |
| "learning_rate": 6.174817707799417e-05, |
| "loss": 0.0483, |
| "step": 8160 |
| }, |
| { |
| "epoch": 8.414006179196704, |
| "grad_norm": 0.3903990685939789, |
| "learning_rate": 6.165886954712401e-05, |
| "loss": 0.043, |
| "step": 8170 |
| }, |
| { |
| "epoch": 8.424304840370752, |
| "grad_norm": 0.41772767901420593, |
| "learning_rate": 6.156952266452247e-05, |
| "loss": 0.0407, |
| "step": 8180 |
| }, |
| { |
| "epoch": 8.4346035015448, |
| "grad_norm": 0.5899285078048706, |
| "learning_rate": 6.148013673175857e-05, |
| "loss": 0.0434, |
| "step": 8190 |
| }, |
| { |
| "epoch": 8.444902162718847, |
| "grad_norm": 0.22386884689331055, |
| "learning_rate": 6.13907120505332e-05, |
| "loss": 0.042, |
| "step": 8200 |
| }, |
| { |
| "epoch": 8.455200823892893, |
| "grad_norm": 0.3034772276878357, |
| "learning_rate": 6.130124892267806e-05, |
| "loss": 0.0365, |
| "step": 8210 |
| }, |
| { |
| "epoch": 8.46549948506694, |
| "grad_norm": 0.37777379155158997, |
| "learning_rate": 6.121174765015455e-05, |
| "loss": 0.0419, |
| "step": 8220 |
| }, |
| { |
| "epoch": 8.475798146240988, |
| "grad_norm": 0.30282172560691833, |
| "learning_rate": 6.112220853505288e-05, |
| "loss": 0.0418, |
| "step": 8230 |
| }, |
| { |
| "epoch": 8.486096807415036, |
| "grad_norm": 0.5801701545715332, |
| "learning_rate": 6.103263187959095e-05, |
| "loss": 0.049, |
| "step": 8240 |
| }, |
| { |
| "epoch": 8.496395468589084, |
| "grad_norm": 0.32179057598114014, |
| "learning_rate": 6.094301798611338e-05, |
| "loss": 0.0396, |
| "step": 8250 |
| }, |
| { |
| "epoch": 8.506694129763131, |
| "grad_norm": 0.2766133248806, |
| "learning_rate": 6.085336715709049e-05, |
| "loss": 0.0484, |
| "step": 8260 |
| }, |
| { |
| "epoch": 8.516992790937179, |
| "grad_norm": 0.2891679108142853, |
| "learning_rate": 6.076367969511725e-05, |
| "loss": 0.0483, |
| "step": 8270 |
| }, |
| { |
| "epoch": 8.527291452111225, |
| "grad_norm": 0.35707661509513855, |
| "learning_rate": 6.067395590291226e-05, |
| "loss": 0.0468, |
| "step": 8280 |
| }, |
| { |
| "epoch": 8.537590113285273, |
| "grad_norm": 0.29469162225723267, |
| "learning_rate": 6.0584196083316794e-05, |
| "loss": 0.0441, |
| "step": 8290 |
| }, |
| { |
| "epoch": 8.54788877445932, |
| "grad_norm": 0.29220518469810486, |
| "learning_rate": 6.0494400539293675e-05, |
| "loss": 0.0389, |
| "step": 8300 |
| }, |
| { |
| "epoch": 8.558187435633368, |
| "grad_norm": 0.3941989243030548, |
| "learning_rate": 6.040456957392635e-05, |
| "loss": 0.0389, |
| "step": 8310 |
| }, |
| { |
| "epoch": 8.568486096807415, |
| "grad_norm": 0.2707824409008026, |
| "learning_rate": 6.03147034904178e-05, |
| "loss": 0.0471, |
| "step": 8320 |
| }, |
| { |
| "epoch": 8.578784757981463, |
| "grad_norm": 0.35828855633735657, |
| "learning_rate": 6.0224802592089513e-05, |
| "loss": 0.0453, |
| "step": 8330 |
| }, |
| { |
| "epoch": 8.589083419155509, |
| "grad_norm": 0.2687852382659912, |
| "learning_rate": 6.013486718238055e-05, |
| "loss": 0.041, |
| "step": 8340 |
| }, |
| { |
| "epoch": 8.599382080329557, |
| "grad_norm": 0.25436437129974365, |
| "learning_rate": 6.004489756484641e-05, |
| "loss": 0.0411, |
| "step": 8350 |
| }, |
| { |
| "epoch": 8.609680741503604, |
| "grad_norm": 0.22475087642669678, |
| "learning_rate": 5.995489404315806e-05, |
| "loss": 0.0409, |
| "step": 8360 |
| }, |
| { |
| "epoch": 8.619979402677652, |
| "grad_norm": 0.32723718881607056, |
| "learning_rate": 5.98648569211009e-05, |
| "loss": 0.0477, |
| "step": 8370 |
| }, |
| { |
| "epoch": 8.6302780638517, |
| "grad_norm": 0.2676869034767151, |
| "learning_rate": 5.977478650257374e-05, |
| "loss": 0.0363, |
| "step": 8380 |
| }, |
| { |
| "epoch": 8.640576725025747, |
| "grad_norm": 0.6640805006027222, |
| "learning_rate": 5.9684683091587804e-05, |
| "loss": 0.0396, |
| "step": 8390 |
| }, |
| { |
| "epoch": 8.650875386199793, |
| "grad_norm": 0.29109275341033936, |
| "learning_rate": 5.959454699226562e-05, |
| "loss": 0.0452, |
| "step": 8400 |
| }, |
| { |
| "epoch": 8.66117404737384, |
| "grad_norm": 0.39319050312042236, |
| "learning_rate": 5.95043785088401e-05, |
| "loss": 0.0359, |
| "step": 8410 |
| }, |
| { |
| "epoch": 8.671472708547888, |
| "grad_norm": 0.2134009450674057, |
| "learning_rate": 5.941417794565343e-05, |
| "loss": 0.0387, |
| "step": 8420 |
| }, |
| { |
| "epoch": 8.681771369721936, |
| "grad_norm": 0.21827584505081177, |
| "learning_rate": 5.9323945607156076e-05, |
| "loss": 0.0382, |
| "step": 8430 |
| }, |
| { |
| "epoch": 8.692070030895984, |
| "grad_norm": 0.41963616013526917, |
| "learning_rate": 5.9233681797905785e-05, |
| "loss": 0.0404, |
| "step": 8440 |
| }, |
| { |
| "epoch": 8.702368692070031, |
| "grad_norm": 0.21744829416275024, |
| "learning_rate": 5.914338682256647e-05, |
| "loss": 0.0437, |
| "step": 8450 |
| }, |
| { |
| "epoch": 8.712667353244079, |
| "grad_norm": 0.27720943093299866, |
| "learning_rate": 5.905306098590728e-05, |
| "loss": 0.0403, |
| "step": 8460 |
| }, |
| { |
| "epoch": 8.722966014418125, |
| "grad_norm": 0.30195143818855286, |
| "learning_rate": 5.896270459280153e-05, |
| "loss": 0.0374, |
| "step": 8470 |
| }, |
| { |
| "epoch": 8.733264675592173, |
| "grad_norm": 0.32989758253097534, |
| "learning_rate": 5.8872317948225644e-05, |
| "loss": 0.0368, |
| "step": 8480 |
| }, |
| { |
| "epoch": 8.74356333676622, |
| "grad_norm": 0.22078627347946167, |
| "learning_rate": 5.8781901357258165e-05, |
| "loss": 0.0467, |
| "step": 8490 |
| }, |
| { |
| "epoch": 8.753861997940268, |
| "grad_norm": 0.5876451134681702, |
| "learning_rate": 5.869145512507872e-05, |
| "loss": 0.0407, |
| "step": 8500 |
| }, |
| { |
| "epoch": 8.764160659114316, |
| "grad_norm": 0.44796323776245117, |
| "learning_rate": 5.860097955696698e-05, |
| "loss": 0.0382, |
| "step": 8510 |
| }, |
| { |
| "epoch": 8.774459320288363, |
| "grad_norm": 0.35779476165771484, |
| "learning_rate": 5.851047495830163e-05, |
| "loss": 0.0438, |
| "step": 8520 |
| }, |
| { |
| "epoch": 8.784757981462409, |
| "grad_norm": 0.28585049510002136, |
| "learning_rate": 5.841994163455934e-05, |
| "loss": 0.0376, |
| "step": 8530 |
| }, |
| { |
| "epoch": 8.795056642636457, |
| "grad_norm": 0.26791223883628845, |
| "learning_rate": 5.832937989131374e-05, |
| "loss": 0.0387, |
| "step": 8540 |
| }, |
| { |
| "epoch": 8.805355303810504, |
| "grad_norm": 0.5671482086181641, |
| "learning_rate": 5.823879003423438e-05, |
| "loss": 0.0366, |
| "step": 8550 |
| }, |
| { |
| "epoch": 8.815653964984552, |
| "grad_norm": 0.1565544456243515, |
| "learning_rate": 5.8148172369085686e-05, |
| "loss": 0.0369, |
| "step": 8560 |
| }, |
| { |
| "epoch": 8.8259526261586, |
| "grad_norm": 0.46639129519462585, |
| "learning_rate": 5.8057527201725984e-05, |
| "loss": 0.0398, |
| "step": 8570 |
| }, |
| { |
| "epoch": 8.836251287332647, |
| "grad_norm": 0.8469918370246887, |
| "learning_rate": 5.796685483810637e-05, |
| "loss": 0.047, |
| "step": 8580 |
| }, |
| { |
| "epoch": 8.846549948506695, |
| "grad_norm": 0.1878482550382614, |
| "learning_rate": 5.7876155584269785e-05, |
| "loss": 0.0386, |
| "step": 8590 |
| }, |
| { |
| "epoch": 8.85684860968074, |
| "grad_norm": 0.26714402437210083, |
| "learning_rate": 5.7785429746349905e-05, |
| "loss": 0.049, |
| "step": 8600 |
| }, |
| { |
| "epoch": 8.867147270854788, |
| "grad_norm": 0.35005736351013184, |
| "learning_rate": 5.7694677630570146e-05, |
| "loss": 0.0435, |
| "step": 8610 |
| }, |
| { |
| "epoch": 8.877445932028836, |
| "grad_norm": 0.48994550108909607, |
| "learning_rate": 5.760389954324261e-05, |
| "loss": 0.049, |
| "step": 8620 |
| }, |
| { |
| "epoch": 8.887744593202884, |
| "grad_norm": 0.24901621043682098, |
| "learning_rate": 5.7513095790767066e-05, |
| "loss": 0.0445, |
| "step": 8630 |
| }, |
| { |
| "epoch": 8.898043254376931, |
| "grad_norm": 0.32309484481811523, |
| "learning_rate": 5.742226667962991e-05, |
| "loss": 0.0471, |
| "step": 8640 |
| }, |
| { |
| "epoch": 8.908341915550979, |
| "grad_norm": 0.30904820561408997, |
| "learning_rate": 5.733141251640315e-05, |
| "loss": 0.0377, |
| "step": 8650 |
| }, |
| { |
| "epoch": 8.918640576725025, |
| "grad_norm": 0.30617690086364746, |
| "learning_rate": 5.724053360774327e-05, |
| "loss": 0.0378, |
| "step": 8660 |
| }, |
| { |
| "epoch": 8.928939237899073, |
| "grad_norm": 0.19513899087905884, |
| "learning_rate": 5.7149630260390384e-05, |
| "loss": 0.0315, |
| "step": 8670 |
| }, |
| { |
| "epoch": 8.93923789907312, |
| "grad_norm": 0.5502423048019409, |
| "learning_rate": 5.705870278116703e-05, |
| "loss": 0.0422, |
| "step": 8680 |
| }, |
| { |
| "epoch": 8.949536560247168, |
| "grad_norm": 0.3435225486755371, |
| "learning_rate": 5.6967751476977215e-05, |
| "loss": 0.0406, |
| "step": 8690 |
| }, |
| { |
| "epoch": 8.959835221421216, |
| "grad_norm": 0.28045403957366943, |
| "learning_rate": 5.687677665480533e-05, |
| "loss": 0.0473, |
| "step": 8700 |
| }, |
| { |
| "epoch": 8.970133882595263, |
| "grad_norm": 0.2749752700328827, |
| "learning_rate": 5.6785778621715225e-05, |
| "loss": 0.0378, |
| "step": 8710 |
| }, |
| { |
| "epoch": 8.98043254376931, |
| "grad_norm": 0.39981475472450256, |
| "learning_rate": 5.669475768484901e-05, |
| "loss": 0.0406, |
| "step": 8720 |
| }, |
| { |
| "epoch": 8.990731204943357, |
| "grad_norm": 0.28953787684440613, |
| "learning_rate": 5.660371415142611e-05, |
| "loss": 0.0379, |
| "step": 8730 |
| }, |
| { |
| "epoch": 9.001029866117404, |
| "grad_norm": 0.17452044785022736, |
| "learning_rate": 5.65126483287423e-05, |
| "loss": 0.0412, |
| "step": 8740 |
| }, |
| { |
| "epoch": 9.011328527291452, |
| "grad_norm": 0.3600793182849884, |
| "learning_rate": 5.642156052416849e-05, |
| "loss": 0.041, |
| "step": 8750 |
| }, |
| { |
| "epoch": 9.0216271884655, |
| "grad_norm": 0.2760295569896698, |
| "learning_rate": 5.633045104514982e-05, |
| "loss": 0.0435, |
| "step": 8760 |
| }, |
| { |
| "epoch": 9.031925849639547, |
| "grad_norm": 0.3825409710407257, |
| "learning_rate": 5.6239320199204616e-05, |
| "loss": 0.0408, |
| "step": 8770 |
| }, |
| { |
| "epoch": 9.042224510813595, |
| "grad_norm": 0.374891072511673, |
| "learning_rate": 5.614816829392328e-05, |
| "loss": 0.0383, |
| "step": 8780 |
| }, |
| { |
| "epoch": 9.052523171987641, |
| "grad_norm": 0.27747559547424316, |
| "learning_rate": 5.60569956369673e-05, |
| "loss": 0.0464, |
| "step": 8790 |
| }, |
| { |
| "epoch": 9.062821833161689, |
| "grad_norm": 0.28678062558174133, |
| "learning_rate": 5.596580253606824e-05, |
| "loss": 0.0487, |
| "step": 8800 |
| }, |
| { |
| "epoch": 9.073120494335736, |
| "grad_norm": 0.4970363676548004, |
| "learning_rate": 5.587458929902664e-05, |
| "loss": 0.051, |
| "step": 8810 |
| }, |
| { |
| "epoch": 9.083419155509784, |
| "grad_norm": 0.30037108063697815, |
| "learning_rate": 5.5783356233711005e-05, |
| "loss": 0.0383, |
| "step": 8820 |
| }, |
| { |
| "epoch": 9.093717816683832, |
| "grad_norm": 0.2640860676765442, |
| "learning_rate": 5.569210364805677e-05, |
| "loss": 0.0462, |
| "step": 8830 |
| }, |
| { |
| "epoch": 9.10401647785788, |
| "grad_norm": 0.30006083846092224, |
| "learning_rate": 5.5600831850065274e-05, |
| "loss": 0.0362, |
| "step": 8840 |
| }, |
| { |
| "epoch": 9.114315139031925, |
| "grad_norm": 0.3721349537372589, |
| "learning_rate": 5.550954114780269e-05, |
| "loss": 0.0399, |
| "step": 8850 |
| }, |
| { |
| "epoch": 9.124613800205973, |
| "grad_norm": 0.336732417345047, |
| "learning_rate": 5.541823184939896e-05, |
| "loss": 0.0421, |
| "step": 8860 |
| }, |
| { |
| "epoch": 9.13491246138002, |
| "grad_norm": 0.26279309391975403, |
| "learning_rate": 5.532690426304685e-05, |
| "loss": 0.0433, |
| "step": 8870 |
| }, |
| { |
| "epoch": 9.145211122554068, |
| "grad_norm": 0.2945043742656708, |
| "learning_rate": 5.5235558697000836e-05, |
| "loss": 0.0439, |
| "step": 8880 |
| }, |
| { |
| "epoch": 9.155509783728116, |
| "grad_norm": 0.47877517342567444, |
| "learning_rate": 5.514419545957606e-05, |
| "loss": 0.0431, |
| "step": 8890 |
| }, |
| { |
| "epoch": 9.165808444902163, |
| "grad_norm": 0.3854601979255676, |
| "learning_rate": 5.5052814859147315e-05, |
| "loss": 0.0365, |
| "step": 8900 |
| }, |
| { |
| "epoch": 9.176107106076211, |
| "grad_norm": 0.3006962835788727, |
| "learning_rate": 5.496141720414804e-05, |
| "loss": 0.0427, |
| "step": 8910 |
| }, |
| { |
| "epoch": 9.186405767250257, |
| "grad_norm": 0.5065596699714661, |
| "learning_rate": 5.487000280306917e-05, |
| "loss": 0.0395, |
| "step": 8920 |
| }, |
| { |
| "epoch": 9.196704428424304, |
| "grad_norm": 0.4032178521156311, |
| "learning_rate": 5.4778571964458214e-05, |
| "loss": 0.0341, |
| "step": 8930 |
| }, |
| { |
| "epoch": 9.207003089598352, |
| "grad_norm": 0.357695609331131, |
| "learning_rate": 5.468712499691816e-05, |
| "loss": 0.0427, |
| "step": 8940 |
| }, |
| { |
| "epoch": 9.2173017507724, |
| "grad_norm": 0.6212796568870544, |
| "learning_rate": 5.45956622091064e-05, |
| "loss": 0.0444, |
| "step": 8950 |
| }, |
| { |
| "epoch": 9.227600411946447, |
| "grad_norm": 0.29458391666412354, |
| "learning_rate": 5.4504183909733734e-05, |
| "loss": 0.0402, |
| "step": 8960 |
| }, |
| { |
| "epoch": 9.237899073120495, |
| "grad_norm": 0.309467613697052, |
| "learning_rate": 5.441269040756334e-05, |
| "loss": 0.0412, |
| "step": 8970 |
| }, |
| { |
| "epoch": 9.248197734294541, |
| "grad_norm": 0.17707674205303192, |
| "learning_rate": 5.43211820114097e-05, |
| "loss": 0.0423, |
| "step": 8980 |
| }, |
| { |
| "epoch": 9.258496395468589, |
| "grad_norm": 0.4098307490348816, |
| "learning_rate": 5.422965903013757e-05, |
| "loss": 0.0421, |
| "step": 8990 |
| }, |
| { |
| "epoch": 9.268795056642636, |
| "grad_norm": 0.31290164589881897, |
| "learning_rate": 5.41381217726609e-05, |
| "loss": 0.0402, |
| "step": 9000 |
| }, |
| { |
| "epoch": 9.279093717816684, |
| "grad_norm": 0.20957662165164948, |
| "learning_rate": 5.404657054794189e-05, |
| "loss": 0.0426, |
| "step": 9010 |
| }, |
| { |
| "epoch": 9.289392378990732, |
| "grad_norm": 0.2308698147535324, |
| "learning_rate": 5.3955005664989834e-05, |
| "loss": 0.0389, |
| "step": 9020 |
| }, |
| { |
| "epoch": 9.29969104016478, |
| "grad_norm": 0.2409774512052536, |
| "learning_rate": 5.3863427432860125e-05, |
| "loss": 0.0352, |
| "step": 9030 |
| }, |
| { |
| "epoch": 9.309989701338825, |
| "grad_norm": 0.24483443796634674, |
| "learning_rate": 5.3771836160653254e-05, |
| "loss": 0.0406, |
| "step": 9040 |
| }, |
| { |
| "epoch": 9.320288362512873, |
| "grad_norm": 0.2869531810283661, |
| "learning_rate": 5.368023215751369e-05, |
| "loss": 0.0379, |
| "step": 9050 |
| }, |
| { |
| "epoch": 9.33058702368692, |
| "grad_norm": 0.27807915210723877, |
| "learning_rate": 5.3588615732628854e-05, |
| "loss": 0.0451, |
| "step": 9060 |
| }, |
| { |
| "epoch": 9.340885684860968, |
| "grad_norm": 0.33199331164360046, |
| "learning_rate": 5.3496987195228156e-05, |
| "loss": 0.034, |
| "step": 9070 |
| }, |
| { |
| "epoch": 9.351184346035016, |
| "grad_norm": 0.2562348246574402, |
| "learning_rate": 5.340534685458185e-05, |
| "loss": 0.0413, |
| "step": 9080 |
| }, |
| { |
| "epoch": 9.361483007209063, |
| "grad_norm": 0.3097791075706482, |
| "learning_rate": 5.3313695020000024e-05, |
| "loss": 0.039, |
| "step": 9090 |
| }, |
| { |
| "epoch": 9.371781668383111, |
| "grad_norm": 0.3079645037651062, |
| "learning_rate": 5.322203200083154e-05, |
| "loss": 0.0349, |
| "step": 9100 |
| }, |
| { |
| "epoch": 9.382080329557157, |
| "grad_norm": 0.4117037057876587, |
| "learning_rate": 5.3130358106463104e-05, |
| "loss": 0.0407, |
| "step": 9110 |
| }, |
| { |
| "epoch": 9.392378990731205, |
| "grad_norm": 0.4133201539516449, |
| "learning_rate": 5.303867364631804e-05, |
| "loss": 0.045, |
| "step": 9120 |
| }, |
| { |
| "epoch": 9.402677651905252, |
| "grad_norm": 0.2096584141254425, |
| "learning_rate": 5.294697892985534e-05, |
| "loss": 0.0335, |
| "step": 9130 |
| }, |
| { |
| "epoch": 9.4129763130793, |
| "grad_norm": 0.28559908270835876, |
| "learning_rate": 5.285527426656865e-05, |
| "loss": 0.0398, |
| "step": 9140 |
| }, |
| { |
| "epoch": 9.423274974253347, |
| "grad_norm": 0.3598606288433075, |
| "learning_rate": 5.2763559965985184e-05, |
| "loss": 0.0419, |
| "step": 9150 |
| }, |
| { |
| "epoch": 9.433573635427395, |
| "grad_norm": 0.35209372639656067, |
| "learning_rate": 5.2671836337664634e-05, |
| "loss": 0.0405, |
| "step": 9160 |
| }, |
| { |
| "epoch": 9.443872296601441, |
| "grad_norm": 0.23415158689022064, |
| "learning_rate": 5.2580103691198255e-05, |
| "loss": 0.0366, |
| "step": 9170 |
| }, |
| { |
| "epoch": 9.454170957775489, |
| "grad_norm": 0.2906668484210968, |
| "learning_rate": 5.24883623362077e-05, |
| "loss": 0.0493, |
| "step": 9180 |
| }, |
| { |
| "epoch": 9.464469618949536, |
| "grad_norm": 0.21137650310993195, |
| "learning_rate": 5.2396612582343986e-05, |
| "loss": 0.0423, |
| "step": 9190 |
| }, |
| { |
| "epoch": 9.474768280123584, |
| "grad_norm": 0.23499812185764313, |
| "learning_rate": 5.230485473928651e-05, |
| "loss": 0.0416, |
| "step": 9200 |
| }, |
| { |
| "epoch": 9.485066941297632, |
| "grad_norm": 0.372158020734787, |
| "learning_rate": 5.221308911674201e-05, |
| "loss": 0.0407, |
| "step": 9210 |
| }, |
| { |
| "epoch": 9.49536560247168, |
| "grad_norm": 0.2552221119403839, |
| "learning_rate": 5.2121316024443415e-05, |
| "loss": 0.0408, |
| "step": 9220 |
| }, |
| { |
| "epoch": 9.505664263645727, |
| "grad_norm": 0.27116450667381287, |
| "learning_rate": 5.202953577214889e-05, |
| "loss": 0.0375, |
| "step": 9230 |
| }, |
| { |
| "epoch": 9.515962924819773, |
| "grad_norm": 1.0216639041900635, |
| "learning_rate": 5.1937748669640776e-05, |
| "loss": 0.0412, |
| "step": 9240 |
| }, |
| { |
| "epoch": 9.52626158599382, |
| "grad_norm": 0.39132076501846313, |
| "learning_rate": 5.1845955026724535e-05, |
| "loss": 0.0408, |
| "step": 9250 |
| }, |
| { |
| "epoch": 9.536560247167868, |
| "grad_norm": 0.3046022653579712, |
| "learning_rate": 5.175415515322768e-05, |
| "loss": 0.0349, |
| "step": 9260 |
| }, |
| { |
| "epoch": 9.546858908341916, |
| "grad_norm": 0.5317039489746094, |
| "learning_rate": 5.1662349358998796e-05, |
| "loss": 0.0377, |
| "step": 9270 |
| }, |
| { |
| "epoch": 9.557157569515963, |
| "grad_norm": 0.308902382850647, |
| "learning_rate": 5.157053795390642e-05, |
| "loss": 0.0416, |
| "step": 9280 |
| }, |
| { |
| "epoch": 9.567456230690011, |
| "grad_norm": 0.1709175854921341, |
| "learning_rate": 5.147872124783805e-05, |
| "loss": 0.0367, |
| "step": 9290 |
| }, |
| { |
| "epoch": 9.577754891864057, |
| "grad_norm": 0.35447025299072266, |
| "learning_rate": 5.138689955069902e-05, |
| "loss": 0.0339, |
| "step": 9300 |
| }, |
| { |
| "epoch": 9.588053553038105, |
| "grad_norm": 0.20557384192943573, |
| "learning_rate": 5.12950731724116e-05, |
| "loss": 0.0435, |
| "step": 9310 |
| }, |
| { |
| "epoch": 9.598352214212152, |
| "grad_norm": 0.27278539538383484, |
| "learning_rate": 5.12032424229138e-05, |
| "loss": 0.0399, |
| "step": 9320 |
| }, |
| { |
| "epoch": 9.6086508753862, |
| "grad_norm": 0.3033859133720398, |
| "learning_rate": 5.111140761215839e-05, |
| "loss": 0.0376, |
| "step": 9330 |
| }, |
| { |
| "epoch": 9.618949536560248, |
| "grad_norm": 0.3543021082878113, |
| "learning_rate": 5.101956905011185e-05, |
| "loss": 0.0427, |
| "step": 9340 |
| }, |
| { |
| "epoch": 9.629248197734295, |
| "grad_norm": 0.2944181561470032, |
| "learning_rate": 5.0927727046753336e-05, |
| "loss": 0.0371, |
| "step": 9350 |
| }, |
| { |
| "epoch": 9.639546858908343, |
| "grad_norm": 0.3597414493560791, |
| "learning_rate": 5.08358819120736e-05, |
| "loss": 0.0373, |
| "step": 9360 |
| }, |
| { |
| "epoch": 9.649845520082389, |
| "grad_norm": 0.33194977045059204, |
| "learning_rate": 5.074403395607399e-05, |
| "loss": 0.0424, |
| "step": 9370 |
| }, |
| { |
| "epoch": 9.660144181256436, |
| "grad_norm": 0.21433711051940918, |
| "learning_rate": 5.0652183488765335e-05, |
| "loss": 0.0407, |
| "step": 9380 |
| }, |
| { |
| "epoch": 9.670442842430484, |
| "grad_norm": 0.3961849808692932, |
| "learning_rate": 5.056033082016699e-05, |
| "loss": 0.0419, |
| "step": 9390 |
| }, |
| { |
| "epoch": 9.680741503604532, |
| "grad_norm": 0.9774559140205383, |
| "learning_rate": 5.046847626030569e-05, |
| "loss": 0.041, |
| "step": 9400 |
| }, |
| { |
| "epoch": 9.69104016477858, |
| "grad_norm": 0.36883220076560974, |
| "learning_rate": 5.037662011921459e-05, |
| "loss": 0.0377, |
| "step": 9410 |
| }, |
| { |
| "epoch": 9.701338825952627, |
| "grad_norm": 0.37542909383773804, |
| "learning_rate": 5.028476270693217e-05, |
| "loss": 0.0408, |
| "step": 9420 |
| }, |
| { |
| "epoch": 9.711637487126673, |
| "grad_norm": 0.45353376865386963, |
| "learning_rate": 5.0192904333501214e-05, |
| "loss": 0.0419, |
| "step": 9430 |
| }, |
| { |
| "epoch": 9.72193614830072, |
| "grad_norm": 0.27116161584854126, |
| "learning_rate": 5.010104530896771e-05, |
| "loss": 0.0447, |
| "step": 9440 |
| }, |
| { |
| "epoch": 9.732234809474768, |
| "grad_norm": 0.26916906237602234, |
| "learning_rate": 5.000918594337989e-05, |
| "loss": 0.0461, |
| "step": 9450 |
| }, |
| { |
| "epoch": 9.742533470648816, |
| "grad_norm": 0.3069358766078949, |
| "learning_rate": 4.991732654678709e-05, |
| "loss": 0.0458, |
| "step": 9460 |
| }, |
| { |
| "epoch": 9.752832131822863, |
| "grad_norm": 0.42274564504623413, |
| "learning_rate": 4.9825467429238834e-05, |
| "loss": 0.0401, |
| "step": 9470 |
| }, |
| { |
| "epoch": 9.763130792996911, |
| "grad_norm": 0.17982327938079834, |
| "learning_rate": 4.973360890078358e-05, |
| "loss": 0.0427, |
| "step": 9480 |
| }, |
| { |
| "epoch": 9.773429454170957, |
| "grad_norm": 0.23251447081565857, |
| "learning_rate": 4.96417512714679e-05, |
| "loss": 0.0326, |
| "step": 9490 |
| }, |
| { |
| "epoch": 9.783728115345005, |
| "grad_norm": 0.2869229018688202, |
| "learning_rate": 4.954989485133533e-05, |
| "loss": 0.0507, |
| "step": 9500 |
| }, |
| { |
| "epoch": 9.794026776519052, |
| "grad_norm": 1.0959696769714355, |
| "learning_rate": 4.9458039950425224e-05, |
| "loss": 0.0518, |
| "step": 9510 |
| }, |
| { |
| "epoch": 9.8043254376931, |
| "grad_norm": 0.3641543686389923, |
| "learning_rate": 4.9366186878771926e-05, |
| "loss": 0.0434, |
| "step": 9520 |
| }, |
| { |
| "epoch": 9.814624098867148, |
| "grad_norm": 0.5896167159080505, |
| "learning_rate": 4.927433594640354e-05, |
| "loss": 0.0409, |
| "step": 9530 |
| }, |
| { |
| "epoch": 9.824922760041195, |
| "grad_norm": 0.24302540719509125, |
| "learning_rate": 4.918248746334096e-05, |
| "loss": 0.0451, |
| "step": 9540 |
| }, |
| { |
| "epoch": 9.835221421215241, |
| "grad_norm": 0.2889201045036316, |
| "learning_rate": 4.909064173959681e-05, |
| "loss": 0.0384, |
| "step": 9550 |
| }, |
| { |
| "epoch": 9.845520082389289, |
| "grad_norm": 0.37873101234436035, |
| "learning_rate": 4.8998799085174455e-05, |
| "loss": 0.0404, |
| "step": 9560 |
| }, |
| { |
| "epoch": 9.855818743563336, |
| "grad_norm": 0.4369457960128784, |
| "learning_rate": 4.89069598100668e-05, |
| "loss": 0.0431, |
| "step": 9570 |
| }, |
| { |
| "epoch": 9.866117404737384, |
| "grad_norm": 0.37580832839012146, |
| "learning_rate": 4.881512422425541e-05, |
| "loss": 0.044, |
| "step": 9580 |
| }, |
| { |
| "epoch": 9.876416065911432, |
| "grad_norm": 0.46920913457870483, |
| "learning_rate": 4.872329263770942e-05, |
| "loss": 0.0469, |
| "step": 9590 |
| }, |
| { |
| "epoch": 9.88671472708548, |
| "grad_norm": 0.24571798741817474, |
| "learning_rate": 4.8631465360384385e-05, |
| "loss": 0.0398, |
| "step": 9600 |
| }, |
| { |
| "epoch": 9.897013388259527, |
| "grad_norm": 0.3728749454021454, |
| "learning_rate": 4.85396427022214e-05, |
| "loss": 0.0352, |
| "step": 9610 |
| }, |
| { |
| "epoch": 9.907312049433573, |
| "grad_norm": 0.301878958940506, |
| "learning_rate": 4.844782497314591e-05, |
| "loss": 0.0432, |
| "step": 9620 |
| }, |
| { |
| "epoch": 9.91761071060762, |
| "grad_norm": 0.26632949709892273, |
| "learning_rate": 4.835601248306675e-05, |
| "loss": 0.0439, |
| "step": 9630 |
| }, |
| { |
| "epoch": 9.927909371781668, |
| "grad_norm": 0.31497064232826233, |
| "learning_rate": 4.826420554187506e-05, |
| "loss": 0.0399, |
| "step": 9640 |
| }, |
| { |
| "epoch": 9.938208032955716, |
| "grad_norm": 0.26114657521247864, |
| "learning_rate": 4.817240445944327e-05, |
| "loss": 0.0408, |
| "step": 9650 |
| }, |
| { |
| "epoch": 9.948506694129764, |
| "grad_norm": 0.2729547619819641, |
| "learning_rate": 4.8080609545624004e-05, |
| "loss": 0.0392, |
| "step": 9660 |
| }, |
| { |
| "epoch": 9.958805355303811, |
| "grad_norm": 0.22712601721286774, |
| "learning_rate": 4.798882111024912e-05, |
| "loss": 0.0363, |
| "step": 9670 |
| }, |
| { |
| "epoch": 9.969104016477857, |
| "grad_norm": 0.47241315245628357, |
| "learning_rate": 4.7897039463128524e-05, |
| "loss": 0.0369, |
| "step": 9680 |
| }, |
| { |
| "epoch": 9.979402677651905, |
| "grad_norm": 0.3929249048233032, |
| "learning_rate": 4.780526491404929e-05, |
| "loss": 0.0436, |
| "step": 9690 |
| }, |
| { |
| "epoch": 9.989701338825952, |
| "grad_norm": 0.32324254512786865, |
| "learning_rate": 4.771349777277452e-05, |
| "loss": 0.0418, |
| "step": 9700 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.4991161525249481, |
| "learning_rate": 4.762173834904225e-05, |
| "loss": 0.0352, |
| "step": 9710 |
| }, |
| { |
| "epoch": 10.010298661174048, |
| "grad_norm": 0.2615014612674713, |
| "learning_rate": 4.752998695256455e-05, |
| "loss": 0.0412, |
| "step": 9720 |
| }, |
| { |
| "epoch": 10.020597322348095, |
| "grad_norm": 0.29027608036994934, |
| "learning_rate": 4.743824389302635e-05, |
| "loss": 0.035, |
| "step": 9730 |
| }, |
| { |
| "epoch": 10.030895983522143, |
| "grad_norm": 0.3496328294277191, |
| "learning_rate": 4.734650948008445e-05, |
| "loss": 0.038, |
| "step": 9740 |
| }, |
| { |
| "epoch": 10.041194644696189, |
| "grad_norm": 0.25003111362457275, |
| "learning_rate": 4.7254784023366444e-05, |
| "loss": 0.0408, |
| "step": 9750 |
| }, |
| { |
| "epoch": 10.051493305870236, |
| "grad_norm": 0.28183093667030334, |
| "learning_rate": 4.716306783246977e-05, |
| "loss": 0.0415, |
| "step": 9760 |
| }, |
| { |
| "epoch": 10.061791967044284, |
| "grad_norm": 0.3574424386024475, |
| "learning_rate": 4.707136121696048e-05, |
| "loss": 0.0394, |
| "step": 9770 |
| }, |
| { |
| "epoch": 10.072090628218332, |
| "grad_norm": 0.2761897146701813, |
| "learning_rate": 4.69796644863724e-05, |
| "loss": 0.034, |
| "step": 9780 |
| }, |
| { |
| "epoch": 10.08238928939238, |
| "grad_norm": 0.2602722644805908, |
| "learning_rate": 4.688797795020597e-05, |
| "loss": 0.0354, |
| "step": 9790 |
| }, |
| { |
| "epoch": 10.092687950566427, |
| "grad_norm": 0.2515560984611511, |
| "learning_rate": 4.6796301917927166e-05, |
| "loss": 0.0402, |
| "step": 9800 |
| }, |
| { |
| "epoch": 10.102986611740473, |
| "grad_norm": 0.24942000210285187, |
| "learning_rate": 4.670463669896659e-05, |
| "loss": 0.0406, |
| "step": 9810 |
| }, |
| { |
| "epoch": 10.11328527291452, |
| "grad_norm": 0.29609471559524536, |
| "learning_rate": 4.66129826027183e-05, |
| "loss": 0.0397, |
| "step": 9820 |
| }, |
| { |
| "epoch": 10.123583934088568, |
| "grad_norm": 0.3640936613082886, |
| "learning_rate": 4.652133993853883e-05, |
| "loss": 0.0456, |
| "step": 9830 |
| }, |
| { |
| "epoch": 10.133882595262616, |
| "grad_norm": 0.2724517285823822, |
| "learning_rate": 4.64297090157461e-05, |
| "loss": 0.0371, |
| "step": 9840 |
| }, |
| { |
| "epoch": 10.144181256436664, |
| "grad_norm": 0.33307430148124695, |
| "learning_rate": 4.633809014361843e-05, |
| "loss": 0.0438, |
| "step": 9850 |
| }, |
| { |
| "epoch": 10.154479917610711, |
| "grad_norm": 0.45976462960243225, |
| "learning_rate": 4.624648363139344e-05, |
| "loss": 0.0479, |
| "step": 9860 |
| }, |
| { |
| "epoch": 10.164778578784759, |
| "grad_norm": 0.24571570754051208, |
| "learning_rate": 4.615488978826709e-05, |
| "loss": 0.0375, |
| "step": 9870 |
| }, |
| { |
| "epoch": 10.175077239958805, |
| "grad_norm": 0.4202505052089691, |
| "learning_rate": 4.6063308923392485e-05, |
| "loss": 0.0446, |
| "step": 9880 |
| }, |
| { |
| "epoch": 10.185375901132852, |
| "grad_norm": 0.30180397629737854, |
| "learning_rate": 4.5971741345879e-05, |
| "loss": 0.0372, |
| "step": 9890 |
| }, |
| { |
| "epoch": 10.1956745623069, |
| "grad_norm": 0.39542245864868164, |
| "learning_rate": 4.588018736479115e-05, |
| "loss": 0.0407, |
| "step": 9900 |
| }, |
| { |
| "epoch": 10.205973223480948, |
| "grad_norm": 0.5576333403587341, |
| "learning_rate": 4.5788647289147516e-05, |
| "loss": 0.0372, |
| "step": 9910 |
| }, |
| { |
| "epoch": 10.216271884654995, |
| "grad_norm": 0.2639693319797516, |
| "learning_rate": 4.56971214279198e-05, |
| "loss": 0.0463, |
| "step": 9920 |
| }, |
| { |
| "epoch": 10.226570545829043, |
| "grad_norm": 0.26938265562057495, |
| "learning_rate": 4.56056100900317e-05, |
| "loss": 0.0367, |
| "step": 9930 |
| }, |
| { |
| "epoch": 10.236869207003089, |
| "grad_norm": 0.27783456444740295, |
| "learning_rate": 4.5514113584357873e-05, |
| "loss": 0.0369, |
| "step": 9940 |
| }, |
| { |
| "epoch": 10.247167868177137, |
| "grad_norm": 0.27680081129074097, |
| "learning_rate": 4.542263221972295e-05, |
| "loss": 0.0393, |
| "step": 9950 |
| }, |
| { |
| "epoch": 10.257466529351184, |
| "grad_norm": 0.2161240130662918, |
| "learning_rate": 4.5331166304900464e-05, |
| "loss": 0.042, |
| "step": 9960 |
| }, |
| { |
| "epoch": 10.267765190525232, |
| "grad_norm": 0.27455902099609375, |
| "learning_rate": 4.5239716148611724e-05, |
| "loss": 0.0434, |
| "step": 9970 |
| }, |
| { |
| "epoch": 10.27806385169928, |
| "grad_norm": 0.3013168275356293, |
| "learning_rate": 4.514828205952495e-05, |
| "loss": 0.0395, |
| "step": 9980 |
| }, |
| { |
| "epoch": 10.288362512873327, |
| "grad_norm": 0.2296813279390335, |
| "learning_rate": 4.505686434625409e-05, |
| "loss": 0.0368, |
| "step": 9990 |
| }, |
| { |
| "epoch": 10.298661174047373, |
| "grad_norm": 0.19806218147277832, |
| "learning_rate": 4.496546331735778e-05, |
| "loss": 0.0391, |
| "step": 10000 |
| }, |
| { |
| "epoch": 10.30895983522142, |
| "grad_norm": 0.24850870668888092, |
| "learning_rate": 4.4874079281338416e-05, |
| "loss": 0.0407, |
| "step": 10010 |
| }, |
| { |
| "epoch": 10.319258496395468, |
| "grad_norm": 0.16531158983707428, |
| "learning_rate": 4.478271254664097e-05, |
| "loss": 0.0359, |
| "step": 10020 |
| }, |
| { |
| "epoch": 10.329557157569516, |
| "grad_norm": 0.5394207835197449, |
| "learning_rate": 4.469136342165207e-05, |
| "loss": 0.0375, |
| "step": 10030 |
| }, |
| { |
| "epoch": 10.339855818743564, |
| "grad_norm": 0.4204263687133789, |
| "learning_rate": 4.460003221469886e-05, |
| "loss": 0.042, |
| "step": 10040 |
| }, |
| { |
| "epoch": 10.350154479917611, |
| "grad_norm": 2.313096284866333, |
| "learning_rate": 4.450871923404806e-05, |
| "loss": 0.0465, |
| "step": 10050 |
| }, |
| { |
| "epoch": 10.360453141091659, |
| "grad_norm": 0.6360970735549927, |
| "learning_rate": 4.441742478790481e-05, |
| "loss": 0.0421, |
| "step": 10060 |
| }, |
| { |
| "epoch": 10.370751802265705, |
| "grad_norm": 0.23286186158657074, |
| "learning_rate": 4.432614918441175e-05, |
| "loss": 0.0352, |
| "step": 10070 |
| }, |
| { |
| "epoch": 10.381050463439752, |
| "grad_norm": 0.3724748194217682, |
| "learning_rate": 4.4234892731647866e-05, |
| "loss": 0.0434, |
| "step": 10080 |
| }, |
| { |
| "epoch": 10.3913491246138, |
| "grad_norm": 0.212792307138443, |
| "learning_rate": 4.414365573762755e-05, |
| "loss": 0.0357, |
| "step": 10090 |
| }, |
| { |
| "epoch": 10.401647785787848, |
| "grad_norm": 0.22442536056041718, |
| "learning_rate": 4.4052438510299515e-05, |
| "loss": 0.0398, |
| "step": 10100 |
| }, |
| { |
| "epoch": 10.411946446961895, |
| "grad_norm": 0.3250674307346344, |
| "learning_rate": 4.3961241357545706e-05, |
| "loss": 0.0377, |
| "step": 10110 |
| }, |
| { |
| "epoch": 10.422245108135943, |
| "grad_norm": 0.2997426986694336, |
| "learning_rate": 4.387006458718037e-05, |
| "loss": 0.0385, |
| "step": 10120 |
| }, |
| { |
| "epoch": 10.432543769309989, |
| "grad_norm": 0.26953554153442383, |
| "learning_rate": 4.377890850694893e-05, |
| "loss": 0.0352, |
| "step": 10130 |
| }, |
| { |
| "epoch": 10.442842430484037, |
| "grad_norm": 0.3824928402900696, |
| "learning_rate": 4.368777342452697e-05, |
| "loss": 0.038, |
| "step": 10140 |
| }, |
| { |
| "epoch": 10.453141091658084, |
| "grad_norm": 0.33039042353630066, |
| "learning_rate": 4.35966596475192e-05, |
| "loss": 0.0354, |
| "step": 10150 |
| }, |
| { |
| "epoch": 10.463439752832132, |
| "grad_norm": 0.665787935256958, |
| "learning_rate": 4.3505567483458456e-05, |
| "loss": 0.0393, |
| "step": 10160 |
| }, |
| { |
| "epoch": 10.47373841400618, |
| "grad_norm": 0.25892671942710876, |
| "learning_rate": 4.341449723980457e-05, |
| "loss": 0.0403, |
| "step": 10170 |
| }, |
| { |
| "epoch": 10.484037075180227, |
| "grad_norm": 0.8381480574607849, |
| "learning_rate": 4.3323449223943416e-05, |
| "loss": 0.0403, |
| "step": 10180 |
| }, |
| { |
| "epoch": 10.494335736354273, |
| "grad_norm": 0.2520352303981781, |
| "learning_rate": 4.323242374318586e-05, |
| "loss": 0.0376, |
| "step": 10190 |
| }, |
| { |
| "epoch": 10.50463439752832, |
| "grad_norm": 0.30395472049713135, |
| "learning_rate": 4.314142110476666e-05, |
| "loss": 0.039, |
| "step": 10200 |
| }, |
| { |
| "epoch": 10.514933058702368, |
| "grad_norm": 0.2134946584701538, |
| "learning_rate": 4.305044161584352e-05, |
| "loss": 0.0356, |
| "step": 10210 |
| }, |
| { |
| "epoch": 10.525231719876416, |
| "grad_norm": 0.30410531163215637, |
| "learning_rate": 4.295948558349598e-05, |
| "loss": 0.0399, |
| "step": 10220 |
| }, |
| { |
| "epoch": 10.535530381050464, |
| "grad_norm": 0.3639879524707794, |
| "learning_rate": 4.2868553314724425e-05, |
| "loss": 0.0377, |
| "step": 10230 |
| }, |
| { |
| "epoch": 10.545829042224511, |
| "grad_norm": 0.7833529114723206, |
| "learning_rate": 4.2777645116449004e-05, |
| "loss": 0.042, |
| "step": 10240 |
| }, |
| { |
| "epoch": 10.556127703398559, |
| "grad_norm": 0.3496880829334259, |
| "learning_rate": 4.268676129550869e-05, |
| "loss": 0.043, |
| "step": 10250 |
| }, |
| { |
| "epoch": 10.566426364572605, |
| "grad_norm": 0.24933426082134247, |
| "learning_rate": 4.2595902158660074e-05, |
| "loss": 0.0392, |
| "step": 10260 |
| }, |
| { |
| "epoch": 10.576725025746653, |
| "grad_norm": 0.35013383626937866, |
| "learning_rate": 4.250506801257653e-05, |
| "loss": 0.0403, |
| "step": 10270 |
| }, |
| { |
| "epoch": 10.5870236869207, |
| "grad_norm": 0.5155181884765625, |
| "learning_rate": 4.241425916384699e-05, |
| "loss": 0.0383, |
| "step": 10280 |
| }, |
| { |
| "epoch": 10.597322348094748, |
| "grad_norm": 0.5019784569740295, |
| "learning_rate": 4.2323475918975075e-05, |
| "loss": 0.0412, |
| "step": 10290 |
| }, |
| { |
| "epoch": 10.607621009268795, |
| "grad_norm": 0.38487544655799866, |
| "learning_rate": 4.223271858437799e-05, |
| "loss": 0.0377, |
| "step": 10300 |
| }, |
| { |
| "epoch": 10.617919670442843, |
| "grad_norm": 0.2794114947319031, |
| "learning_rate": 4.21419874663854e-05, |
| "loss": 0.0398, |
| "step": 10310 |
| }, |
| { |
| "epoch": 10.628218331616889, |
| "grad_norm": 0.1784840226173401, |
| "learning_rate": 4.205128287123858e-05, |
| "loss": 0.0375, |
| "step": 10320 |
| }, |
| { |
| "epoch": 10.638516992790937, |
| "grad_norm": 0.19784130156040192, |
| "learning_rate": 4.196060510508922e-05, |
| "loss": 0.0329, |
| "step": 10330 |
| }, |
| { |
| "epoch": 10.648815653964984, |
| "grad_norm": 0.25078096985816956, |
| "learning_rate": 4.186995447399849e-05, |
| "loss": 0.0305, |
| "step": 10340 |
| }, |
| { |
| "epoch": 10.659114315139032, |
| "grad_norm": 0.2800082862377167, |
| "learning_rate": 4.177933128393594e-05, |
| "loss": 0.0386, |
| "step": 10350 |
| }, |
| { |
| "epoch": 10.66941297631308, |
| "grad_norm": 0.2689889073371887, |
| "learning_rate": 4.1688735840778546e-05, |
| "loss": 0.0355, |
| "step": 10360 |
| }, |
| { |
| "epoch": 10.679711637487127, |
| "grad_norm": 0.26448753476142883, |
| "learning_rate": 4.159816845030957e-05, |
| "loss": 0.0357, |
| "step": 10370 |
| }, |
| { |
| "epoch": 10.690010298661175, |
| "grad_norm": 0.2718246579170227, |
| "learning_rate": 4.1507629418217634e-05, |
| "loss": 0.0339, |
| "step": 10380 |
| }, |
| { |
| "epoch": 10.70030895983522, |
| "grad_norm": 0.2607558071613312, |
| "learning_rate": 4.141711905009566e-05, |
| "loss": 0.0397, |
| "step": 10390 |
| }, |
| { |
| "epoch": 10.710607621009268, |
| "grad_norm": 0.324266254901886, |
| "learning_rate": 4.132663765143975e-05, |
| "loss": 0.0355, |
| "step": 10400 |
| }, |
| { |
| "epoch": 10.720906282183316, |
| "grad_norm": 0.31110501289367676, |
| "learning_rate": 4.1236185527648294e-05, |
| "loss": 0.0389, |
| "step": 10410 |
| }, |
| { |
| "epoch": 10.731204943357364, |
| "grad_norm": 0.3010208010673523, |
| "learning_rate": 4.114576298402084e-05, |
| "loss": 0.0384, |
| "step": 10420 |
| }, |
| { |
| "epoch": 10.741503604531411, |
| "grad_norm": 0.42494192719459534, |
| "learning_rate": 4.1055370325757106e-05, |
| "loss": 0.0407, |
| "step": 10430 |
| }, |
| { |
| "epoch": 10.751802265705459, |
| "grad_norm": 0.26597830653190613, |
| "learning_rate": 4.096500785795591e-05, |
| "loss": 0.0351, |
| "step": 10440 |
| }, |
| { |
| "epoch": 10.762100926879505, |
| "grad_norm": 0.3270758092403412, |
| "learning_rate": 4.087467588561424e-05, |
| "loss": 0.0351, |
| "step": 10450 |
| }, |
| { |
| "epoch": 10.772399588053553, |
| "grad_norm": 0.35372480750083923, |
| "learning_rate": 4.0784374713626076e-05, |
| "loss": 0.0431, |
| "step": 10460 |
| }, |
| { |
| "epoch": 10.7826982492276, |
| "grad_norm": 0.3251330256462097, |
| "learning_rate": 4.069410464678148e-05, |
| "loss": 0.0352, |
| "step": 10470 |
| }, |
| { |
| "epoch": 10.792996910401648, |
| "grad_norm": 0.26621249318122864, |
| "learning_rate": 4.0603865989765504e-05, |
| "loss": 0.0432, |
| "step": 10480 |
| }, |
| { |
| "epoch": 10.803295571575696, |
| "grad_norm": 0.3128867745399475, |
| "learning_rate": 4.05136590471572e-05, |
| "loss": 0.0412, |
| "step": 10490 |
| }, |
| { |
| "epoch": 10.813594232749743, |
| "grad_norm": 0.20734545588493347, |
| "learning_rate": 4.042348412342861e-05, |
| "loss": 0.0352, |
| "step": 10500 |
| }, |
| { |
| "epoch": 10.82389289392379, |
| "grad_norm": 0.3195039629936218, |
| "learning_rate": 4.0333341522943614e-05, |
| "loss": 0.0374, |
| "step": 10510 |
| }, |
| { |
| "epoch": 10.834191555097837, |
| "grad_norm": 0.27724260091781616, |
| "learning_rate": 4.024323154995708e-05, |
| "loss": 0.0405, |
| "step": 10520 |
| }, |
| { |
| "epoch": 10.844490216271884, |
| "grad_norm": 0.2909531593322754, |
| "learning_rate": 4.015315450861371e-05, |
| "loss": 0.0364, |
| "step": 10530 |
| }, |
| { |
| "epoch": 10.854788877445932, |
| "grad_norm": 0.28578925132751465, |
| "learning_rate": 4.006311070294702e-05, |
| "loss": 0.0354, |
| "step": 10540 |
| }, |
| { |
| "epoch": 10.86508753861998, |
| "grad_norm": 0.2503175437450409, |
| "learning_rate": 3.997310043687842e-05, |
| "loss": 0.0348, |
| "step": 10550 |
| }, |
| { |
| "epoch": 10.875386199794027, |
| "grad_norm": 0.36039701104164124, |
| "learning_rate": 3.988312401421609e-05, |
| "loss": 0.0414, |
| "step": 10560 |
| }, |
| { |
| "epoch": 10.885684860968075, |
| "grad_norm": 0.45128464698791504, |
| "learning_rate": 3.979318173865393e-05, |
| "loss": 0.04, |
| "step": 10570 |
| }, |
| { |
| "epoch": 10.89598352214212, |
| "grad_norm": 0.35974377393722534, |
| "learning_rate": 3.970327391377064e-05, |
| "loss": 0.0392, |
| "step": 10580 |
| }, |
| { |
| "epoch": 10.906282183316168, |
| "grad_norm": 0.22907008230686188, |
| "learning_rate": 3.9613400843028666e-05, |
| "loss": 0.0342, |
| "step": 10590 |
| }, |
| { |
| "epoch": 10.916580844490216, |
| "grad_norm": 0.3276582956314087, |
| "learning_rate": 3.9523562829773036e-05, |
| "loss": 0.043, |
| "step": 10600 |
| }, |
| { |
| "epoch": 10.926879505664264, |
| "grad_norm": 0.27974191308021545, |
| "learning_rate": 3.943376017723057e-05, |
| "loss": 0.0357, |
| "step": 10610 |
| }, |
| { |
| "epoch": 10.937178166838311, |
| "grad_norm": 0.3858673572540283, |
| "learning_rate": 3.934399318850868e-05, |
| "loss": 0.0369, |
| "step": 10620 |
| }, |
| { |
| "epoch": 10.947476828012359, |
| "grad_norm": 0.29965823888778687, |
| "learning_rate": 3.925426216659438e-05, |
| "loss": 0.0369, |
| "step": 10630 |
| }, |
| { |
| "epoch": 10.957775489186405, |
| "grad_norm": 0.3583829998970032, |
| "learning_rate": 3.916456741435336e-05, |
| "loss": 0.0425, |
| "step": 10640 |
| }, |
| { |
| "epoch": 10.968074150360453, |
| "grad_norm": 0.27793335914611816, |
| "learning_rate": 3.9074909234528826e-05, |
| "loss": 0.0399, |
| "step": 10650 |
| }, |
| { |
| "epoch": 10.9783728115345, |
| "grad_norm": 0.24120087921619415, |
| "learning_rate": 3.898528792974056e-05, |
| "loss": 0.0403, |
| "step": 10660 |
| }, |
| { |
| "epoch": 10.988671472708548, |
| "grad_norm": 0.22013327479362488, |
| "learning_rate": 3.8895703802483916e-05, |
| "loss": 0.034, |
| "step": 10670 |
| }, |
| { |
| "epoch": 10.998970133882596, |
| "grad_norm": 0.2588166296482086, |
| "learning_rate": 3.880615715512868e-05, |
| "loss": 0.0316, |
| "step": 10680 |
| }, |
| { |
| "epoch": 11.009268795056643, |
| "grad_norm": 0.2514420449733734, |
| "learning_rate": 3.871664828991822e-05, |
| "loss": 0.0383, |
| "step": 10690 |
| }, |
| { |
| "epoch": 11.019567456230691, |
| "grad_norm": 0.3404804468154907, |
| "learning_rate": 3.862717750896837e-05, |
| "loss": 0.0352, |
| "step": 10700 |
| }, |
| { |
| "epoch": 11.029866117404737, |
| "grad_norm": 0.9497872591018677, |
| "learning_rate": 3.853774511426634e-05, |
| "loss": 0.0366, |
| "step": 10710 |
| }, |
| { |
| "epoch": 11.040164778578784, |
| "grad_norm": 0.28247174620628357, |
| "learning_rate": 3.844835140766988e-05, |
| "loss": 0.0473, |
| "step": 10720 |
| }, |
| { |
| "epoch": 11.050463439752832, |
| "grad_norm": 0.28879600763320923, |
| "learning_rate": 3.83589966909061e-05, |
| "loss": 0.0344, |
| "step": 10730 |
| }, |
| { |
| "epoch": 11.06076210092688, |
| "grad_norm": 0.23894581198692322, |
| "learning_rate": 3.82696812655705e-05, |
| "loss": 0.0349, |
| "step": 10740 |
| }, |
| { |
| "epoch": 11.071060762100927, |
| "grad_norm": 0.26289770007133484, |
| "learning_rate": 3.818040543312598e-05, |
| "loss": 0.0384, |
| "step": 10750 |
| }, |
| { |
| "epoch": 11.081359423274975, |
| "grad_norm": 0.33045023679733276, |
| "learning_rate": 3.809116949490184e-05, |
| "loss": 0.0331, |
| "step": 10760 |
| }, |
| { |
| "epoch": 11.091658084449021, |
| "grad_norm": 0.46705836057662964, |
| "learning_rate": 3.8001973752092655e-05, |
| "loss": 0.0386, |
| "step": 10770 |
| }, |
| { |
| "epoch": 11.101956745623069, |
| "grad_norm": 0.5863741040229797, |
| "learning_rate": 3.791281850575737e-05, |
| "loss": 0.0415, |
| "step": 10780 |
| }, |
| { |
| "epoch": 11.112255406797116, |
| "grad_norm": 0.24471549689769745, |
| "learning_rate": 3.782370405681828e-05, |
| "loss": 0.0372, |
| "step": 10790 |
| }, |
| { |
| "epoch": 11.122554067971164, |
| "grad_norm": 0.3259426951408386, |
| "learning_rate": 3.773463070605987e-05, |
| "loss": 0.043, |
| "step": 10800 |
| }, |
| { |
| "epoch": 11.132852729145212, |
| "grad_norm": 0.2583596408367157, |
| "learning_rate": 3.764559875412803e-05, |
| "loss": 0.0354, |
| "step": 10810 |
| }, |
| { |
| "epoch": 11.14315139031926, |
| "grad_norm": 0.46032634377479553, |
| "learning_rate": 3.7556608501528846e-05, |
| "loss": 0.0393, |
| "step": 10820 |
| }, |
| { |
| "epoch": 11.153450051493305, |
| "grad_norm": 0.38069912791252136, |
| "learning_rate": 3.7467660248627654e-05, |
| "loss": 0.0398, |
| "step": 10830 |
| }, |
| { |
| "epoch": 11.163748712667353, |
| "grad_norm": 0.28435567021369934, |
| "learning_rate": 3.737875429564807e-05, |
| "loss": 0.0388, |
| "step": 10840 |
| }, |
| { |
| "epoch": 11.1740473738414, |
| "grad_norm": 0.34043052792549133, |
| "learning_rate": 3.7289890942670946e-05, |
| "loss": 0.0296, |
| "step": 10850 |
| }, |
| { |
| "epoch": 11.184346035015448, |
| "grad_norm": 0.3213551938533783, |
| "learning_rate": 3.720107048963327e-05, |
| "loss": 0.0296, |
| "step": 10860 |
| }, |
| { |
| "epoch": 11.194644696189496, |
| "grad_norm": 0.45642250776290894, |
| "learning_rate": 3.711229323632732e-05, |
| "loss": 0.0347, |
| "step": 10870 |
| }, |
| { |
| "epoch": 11.204943357363543, |
| "grad_norm": 0.29973405599594116, |
| "learning_rate": 3.70235594823995e-05, |
| "loss": 0.036, |
| "step": 10880 |
| }, |
| { |
| "epoch": 11.215242018537591, |
| "grad_norm": 0.2634925842285156, |
| "learning_rate": 3.693486952734941e-05, |
| "loss": 0.0337, |
| "step": 10890 |
| }, |
| { |
| "epoch": 11.225540679711637, |
| "grad_norm": 0.25237777829170227, |
| "learning_rate": 3.684622367052887e-05, |
| "loss": 0.0347, |
| "step": 10900 |
| }, |
| { |
| "epoch": 11.235839340885684, |
| "grad_norm": 0.20709861814975739, |
| "learning_rate": 3.675762221114077e-05, |
| "loss": 0.0305, |
| "step": 10910 |
| }, |
| { |
| "epoch": 11.246138002059732, |
| "grad_norm": 0.14299030601978302, |
| "learning_rate": 3.66690654482382e-05, |
| "loss": 0.0334, |
| "step": 10920 |
| }, |
| { |
| "epoch": 11.25643666323378, |
| "grad_norm": 0.2454812377691269, |
| "learning_rate": 3.658055368072339e-05, |
| "loss": 0.0375, |
| "step": 10930 |
| }, |
| { |
| "epoch": 11.266735324407827, |
| "grad_norm": 0.2894679307937622, |
| "learning_rate": 3.6492087207346666e-05, |
| "loss": 0.0416, |
| "step": 10940 |
| }, |
| { |
| "epoch": 11.277033985581875, |
| "grad_norm": 0.2871219217777252, |
| "learning_rate": 3.640366632670549e-05, |
| "loss": 0.034, |
| "step": 10950 |
| }, |
| { |
| "epoch": 11.287332646755921, |
| "grad_norm": 0.30559393763542175, |
| "learning_rate": 3.631529133724348e-05, |
| "loss": 0.0369, |
| "step": 10960 |
| }, |
| { |
| "epoch": 11.297631307929969, |
| "grad_norm": 0.35164326429367065, |
| "learning_rate": 3.622696253724927e-05, |
| "loss": 0.035, |
| "step": 10970 |
| }, |
| { |
| "epoch": 11.307929969104016, |
| "grad_norm": 0.27396318316459656, |
| "learning_rate": 3.613868022485566e-05, |
| "loss": 0.0389, |
| "step": 10980 |
| }, |
| { |
| "epoch": 11.318228630278064, |
| "grad_norm": 0.27721869945526123, |
| "learning_rate": 3.605044469803854e-05, |
| "loss": 0.0365, |
| "step": 10990 |
| }, |
| { |
| "epoch": 11.328527291452112, |
| "grad_norm": 0.2726707458496094, |
| "learning_rate": 3.5962256254615853e-05, |
| "loss": 0.0382, |
| "step": 11000 |
| }, |
| { |
| "epoch": 11.33882595262616, |
| "grad_norm": 0.3522757589817047, |
| "learning_rate": 3.587411519224665e-05, |
| "loss": 0.0432, |
| "step": 11010 |
| }, |
| { |
| "epoch": 11.349124613800207, |
| "grad_norm": 0.2744219899177551, |
| "learning_rate": 3.5786021808430054e-05, |
| "loss": 0.0328, |
| "step": 11020 |
| }, |
| { |
| "epoch": 11.359423274974253, |
| "grad_norm": 0.36627647280693054, |
| "learning_rate": 3.569797640050423e-05, |
| "loss": 0.0407, |
| "step": 11030 |
| }, |
| { |
| "epoch": 11.3697219361483, |
| "grad_norm": 0.20793434977531433, |
| "learning_rate": 3.560997926564545e-05, |
| "loss": 0.0284, |
| "step": 11040 |
| }, |
| { |
| "epoch": 11.380020597322348, |
| "grad_norm": 0.23446743190288544, |
| "learning_rate": 3.552203070086707e-05, |
| "loss": 0.0355, |
| "step": 11050 |
| }, |
| { |
| "epoch": 11.390319258496396, |
| "grad_norm": 0.48527511954307556, |
| "learning_rate": 3.543413100301843e-05, |
| "loss": 0.0378, |
| "step": 11060 |
| }, |
| { |
| "epoch": 11.400617919670443, |
| "grad_norm": 0.39768174290657043, |
| "learning_rate": 3.534628046878403e-05, |
| "loss": 0.0329, |
| "step": 11070 |
| }, |
| { |
| "epoch": 11.410916580844491, |
| "grad_norm": 0.19781740009784698, |
| "learning_rate": 3.525847939468233e-05, |
| "loss": 0.0371, |
| "step": 11080 |
| }, |
| { |
| "epoch": 11.421215242018537, |
| "grad_norm": 0.2503238022327423, |
| "learning_rate": 3.517072807706492e-05, |
| "loss": 0.0363, |
| "step": 11090 |
| }, |
| { |
| "epoch": 11.431513903192585, |
| "grad_norm": 0.3444472849369049, |
| "learning_rate": 3.508302681211546e-05, |
| "loss": 0.0343, |
| "step": 11100 |
| }, |
| { |
| "epoch": 11.441812564366632, |
| "grad_norm": 0.3007254898548126, |
| "learning_rate": 3.499537589584859e-05, |
| "loss": 0.0441, |
| "step": 11110 |
| }, |
| { |
| "epoch": 11.45211122554068, |
| "grad_norm": 0.38914212584495544, |
| "learning_rate": 3.490777562410907e-05, |
| "loss": 0.0331, |
| "step": 11120 |
| }, |
| { |
| "epoch": 11.462409886714727, |
| "grad_norm": 0.3051401674747467, |
| "learning_rate": 3.482022629257074e-05, |
| "loss": 0.0328, |
| "step": 11130 |
| }, |
| { |
| "epoch": 11.472708547888775, |
| "grad_norm": 0.306740403175354, |
| "learning_rate": 3.473272819673542e-05, |
| "loss": 0.039, |
| "step": 11140 |
| }, |
| { |
| "epoch": 11.483007209062821, |
| "grad_norm": 0.42291760444641113, |
| "learning_rate": 3.4645281631932074e-05, |
| "loss": 0.0526, |
| "step": 11150 |
| }, |
| { |
| "epoch": 11.493305870236869, |
| "grad_norm": 0.2984221577644348, |
| "learning_rate": 3.455788689331574e-05, |
| "loss": 0.0345, |
| "step": 11160 |
| }, |
| { |
| "epoch": 11.503604531410916, |
| "grad_norm": 0.19411993026733398, |
| "learning_rate": 3.447054427586644e-05, |
| "loss": 0.0384, |
| "step": 11170 |
| }, |
| { |
| "epoch": 11.513903192584964, |
| "grad_norm": 0.3595150113105774, |
| "learning_rate": 3.438325407438837e-05, |
| "loss": 0.0358, |
| "step": 11180 |
| }, |
| { |
| "epoch": 11.524201853759012, |
| "grad_norm": 0.289594829082489, |
| "learning_rate": 3.4296016583508775e-05, |
| "loss": 0.0314, |
| "step": 11190 |
| }, |
| { |
| "epoch": 11.53450051493306, |
| "grad_norm": 0.3801267743110657, |
| "learning_rate": 3.420883209767697e-05, |
| "loss": 0.0453, |
| "step": 11200 |
| }, |
| { |
| "epoch": 11.544799176107105, |
| "grad_norm": 0.45930567383766174, |
| "learning_rate": 3.4121700911163366e-05, |
| "loss": 0.0418, |
| "step": 11210 |
| }, |
| { |
| "epoch": 11.555097837281153, |
| "grad_norm": 0.2295006662607193, |
| "learning_rate": 3.403462331805852e-05, |
| "loss": 0.0378, |
| "step": 11220 |
| }, |
| { |
| "epoch": 11.5653964984552, |
| "grad_norm": 0.38683414459228516, |
| "learning_rate": 3.394759961227202e-05, |
| "loss": 0.038, |
| "step": 11230 |
| }, |
| { |
| "epoch": 11.575695159629248, |
| "grad_norm": 0.32741764187812805, |
| "learning_rate": 3.386063008753164e-05, |
| "loss": 0.0403, |
| "step": 11240 |
| }, |
| { |
| "epoch": 11.585993820803296, |
| "grad_norm": 0.3826991319656372, |
| "learning_rate": 3.377371503738227e-05, |
| "loss": 0.0408, |
| "step": 11250 |
| }, |
| { |
| "epoch": 11.596292481977343, |
| "grad_norm": 0.5855404138565063, |
| "learning_rate": 3.368685475518488e-05, |
| "loss": 0.0343, |
| "step": 11260 |
| }, |
| { |
| "epoch": 11.606591143151391, |
| "grad_norm": 0.30145469307899475, |
| "learning_rate": 3.360004953411566e-05, |
| "loss": 0.0292, |
| "step": 11270 |
| }, |
| { |
| "epoch": 11.616889804325437, |
| "grad_norm": 1.2090197801589966, |
| "learning_rate": 3.3513299667164864e-05, |
| "loss": 0.0298, |
| "step": 11280 |
| }, |
| { |
| "epoch": 11.627188465499485, |
| "grad_norm": 0.7051903009414673, |
| "learning_rate": 3.3426605447136004e-05, |
| "loss": 0.0366, |
| "step": 11290 |
| }, |
| { |
| "epoch": 11.637487126673532, |
| "grad_norm": 0.3094668984413147, |
| "learning_rate": 3.3339967166644726e-05, |
| "loss": 0.0378, |
| "step": 11300 |
| }, |
| { |
| "epoch": 11.64778578784758, |
| "grad_norm": 0.3277672231197357, |
| "learning_rate": 3.325338511811784e-05, |
| "loss": 0.0407, |
| "step": 11310 |
| }, |
| { |
| "epoch": 11.658084449021628, |
| "grad_norm": 0.27167952060699463, |
| "learning_rate": 3.316685959379241e-05, |
| "loss": 0.0377, |
| "step": 11320 |
| }, |
| { |
| "epoch": 11.668383110195675, |
| "grad_norm": 0.5050401091575623, |
| "learning_rate": 3.308039088571469e-05, |
| "loss": 0.039, |
| "step": 11330 |
| }, |
| { |
| "epoch": 11.678681771369721, |
| "grad_norm": 0.23651434481143951, |
| "learning_rate": 3.2993979285739143e-05, |
| "loss": 0.0339, |
| "step": 11340 |
| }, |
| { |
| "epoch": 11.688980432543769, |
| "grad_norm": 0.3040764331817627, |
| "learning_rate": 3.2907625085527503e-05, |
| "loss": 0.0351, |
| "step": 11350 |
| }, |
| { |
| "epoch": 11.699279093717816, |
| "grad_norm": 0.23311540484428406, |
| "learning_rate": 3.28213285765478e-05, |
| "loss": 0.0347, |
| "step": 11360 |
| }, |
| { |
| "epoch": 11.709577754891864, |
| "grad_norm": 0.21837526559829712, |
| "learning_rate": 3.273509005007327e-05, |
| "loss": 0.0397, |
| "step": 11370 |
| }, |
| { |
| "epoch": 11.719876416065912, |
| "grad_norm": 0.24095067381858826, |
| "learning_rate": 3.264890979718147e-05, |
| "loss": 0.0335, |
| "step": 11380 |
| }, |
| { |
| "epoch": 11.73017507723996, |
| "grad_norm": 0.4714142680168152, |
| "learning_rate": 3.256278810875332e-05, |
| "loss": 0.0355, |
| "step": 11390 |
| }, |
| { |
| "epoch": 11.740473738414007, |
| "grad_norm": 0.3001396059989929, |
| "learning_rate": 3.247672527547197e-05, |
| "loss": 0.0311, |
| "step": 11400 |
| }, |
| { |
| "epoch": 11.750772399588053, |
| "grad_norm": 0.2514890730381012, |
| "learning_rate": 3.239072158782198e-05, |
| "loss": 0.0374, |
| "step": 11410 |
| }, |
| { |
| "epoch": 11.7610710607621, |
| "grad_norm": 0.22603774070739746, |
| "learning_rate": 3.230477733608831e-05, |
| "loss": 0.0368, |
| "step": 11420 |
| }, |
| { |
| "epoch": 11.771369721936148, |
| "grad_norm": 0.22810235619544983, |
| "learning_rate": 3.221889281035522e-05, |
| "loss": 0.0331, |
| "step": 11430 |
| }, |
| { |
| "epoch": 11.781668383110196, |
| "grad_norm": 0.18763025104999542, |
| "learning_rate": 3.2133068300505455e-05, |
| "loss": 0.0328, |
| "step": 11440 |
| }, |
| { |
| "epoch": 11.791967044284243, |
| "grad_norm": 0.32261693477630615, |
| "learning_rate": 3.204730409621917e-05, |
| "loss": 0.0408, |
| "step": 11450 |
| }, |
| { |
| "epoch": 11.802265705458291, |
| "grad_norm": 0.27985504269599915, |
| "learning_rate": 3.196160048697293e-05, |
| "loss": 0.0415, |
| "step": 11460 |
| }, |
| { |
| "epoch": 11.812564366632337, |
| "grad_norm": 0.28317996859550476, |
| "learning_rate": 3.187595776203886e-05, |
| "loss": 0.0413, |
| "step": 11470 |
| }, |
| { |
| "epoch": 11.822863027806385, |
| "grad_norm": 0.2768697440624237, |
| "learning_rate": 3.1790376210483494e-05, |
| "loss": 0.0433, |
| "step": 11480 |
| }, |
| { |
| "epoch": 11.833161688980432, |
| "grad_norm": 0.27718645334243774, |
| "learning_rate": 3.170485612116697e-05, |
| "loss": 0.028, |
| "step": 11490 |
| }, |
| { |
| "epoch": 11.84346035015448, |
| "grad_norm": 0.27956560254096985, |
| "learning_rate": 3.161939778274191e-05, |
| "loss": 0.0318, |
| "step": 11500 |
| }, |
| { |
| "epoch": 11.853759011328528, |
| "grad_norm": 0.25807636976242065, |
| "learning_rate": 3.1534001483652556e-05, |
| "loss": 0.0439, |
| "step": 11510 |
| }, |
| { |
| "epoch": 11.864057672502575, |
| "grad_norm": 0.6703087687492371, |
| "learning_rate": 3.14486675121337e-05, |
| "loss": 0.0298, |
| "step": 11520 |
| }, |
| { |
| "epoch": 11.874356333676623, |
| "grad_norm": 0.46335524320602417, |
| "learning_rate": 3.136339615620985e-05, |
| "loss": 0.0481, |
| "step": 11530 |
| }, |
| { |
| "epoch": 11.884654994850669, |
| "grad_norm": 0.250967800617218, |
| "learning_rate": 3.127818770369406e-05, |
| "loss": 0.0337, |
| "step": 11540 |
| }, |
| { |
| "epoch": 11.894953656024716, |
| "grad_norm": 0.2240300476551056, |
| "learning_rate": 3.119304244218715e-05, |
| "loss": 0.0327, |
| "step": 11550 |
| }, |
| { |
| "epoch": 11.905252317198764, |
| "grad_norm": 0.2884691655635834, |
| "learning_rate": 3.110796065907665e-05, |
| "loss": 0.0363, |
| "step": 11560 |
| }, |
| { |
| "epoch": 11.915550978372812, |
| "grad_norm": 0.28418871760368347, |
| "learning_rate": 3.102294264153577e-05, |
| "loss": 0.0325, |
| "step": 11570 |
| }, |
| { |
| "epoch": 11.92584963954686, |
| "grad_norm": 0.2494005262851715, |
| "learning_rate": 3.093798867652257e-05, |
| "loss": 0.0358, |
| "step": 11580 |
| }, |
| { |
| "epoch": 11.936148300720907, |
| "grad_norm": 0.43249595165252686, |
| "learning_rate": 3.0853099050778854e-05, |
| "loss": 0.0361, |
| "step": 11590 |
| }, |
| { |
| "epoch": 11.946446961894953, |
| "grad_norm": 0.32216548919677734, |
| "learning_rate": 3.0768274050829306e-05, |
| "loss": 0.0359, |
| "step": 11600 |
| }, |
| { |
| "epoch": 11.956745623069, |
| "grad_norm": 0.3839482069015503, |
| "learning_rate": 3.0683513962980456e-05, |
| "loss": 0.0338, |
| "step": 11610 |
| }, |
| { |
| "epoch": 11.967044284243048, |
| "grad_norm": 0.25899192690849304, |
| "learning_rate": 3.059881907331979e-05, |
| "loss": 0.0326, |
| "step": 11620 |
| }, |
| { |
| "epoch": 11.977342945417096, |
| "grad_norm": 0.2512173652648926, |
| "learning_rate": 3.0514189667714632e-05, |
| "loss": 0.0352, |
| "step": 11630 |
| }, |
| { |
| "epoch": 11.987641606591144, |
| "grad_norm": 0.43213722109794617, |
| "learning_rate": 3.042962603181138e-05, |
| "loss": 0.0395, |
| "step": 11640 |
| }, |
| { |
| "epoch": 11.997940267765191, |
| "grad_norm": 0.25386422872543335, |
| "learning_rate": 3.034512845103441e-05, |
| "loss": 0.0314, |
| "step": 11650 |
| }, |
| { |
| "epoch": 12.008238928939237, |
| "grad_norm": 0.35718950629234314, |
| "learning_rate": 3.0260697210585108e-05, |
| "loss": 0.0371, |
| "step": 11660 |
| }, |
| { |
| "epoch": 12.018537590113285, |
| "grad_norm": 0.29993295669555664, |
| "learning_rate": 3.017633259544101e-05, |
| "loss": 0.035, |
| "step": 11670 |
| }, |
| { |
| "epoch": 12.028836251287332, |
| "grad_norm": 0.3331249952316284, |
| "learning_rate": 3.0092034890354694e-05, |
| "loss": 0.0406, |
| "step": 11680 |
| }, |
| { |
| "epoch": 12.03913491246138, |
| "grad_norm": 0.22086752951145172, |
| "learning_rate": 3.0007804379852977e-05, |
| "loss": 0.0252, |
| "step": 11690 |
| }, |
| { |
| "epoch": 12.049433573635428, |
| "grad_norm": 0.22861167788505554, |
| "learning_rate": 2.9923641348235843e-05, |
| "loss": 0.0426, |
| "step": 11700 |
| }, |
| { |
| "epoch": 12.059732234809475, |
| "grad_norm": 0.26923444867134094, |
| "learning_rate": 2.9839546079575497e-05, |
| "loss": 0.0454, |
| "step": 11710 |
| }, |
| { |
| "epoch": 12.070030895983523, |
| "grad_norm": 0.23918205499649048, |
| "learning_rate": 2.9755518857715448e-05, |
| "loss": 0.0402, |
| "step": 11720 |
| }, |
| { |
| "epoch": 12.080329557157569, |
| "grad_norm": 0.23139654099941254, |
| "learning_rate": 2.967155996626956e-05, |
| "loss": 0.0303, |
| "step": 11730 |
| }, |
| { |
| "epoch": 12.090628218331616, |
| "grad_norm": 0.38359567523002625, |
| "learning_rate": 2.9587669688620988e-05, |
| "loss": 0.0398, |
| "step": 11740 |
| }, |
| { |
| "epoch": 12.100926879505664, |
| "grad_norm": 0.23274274170398712, |
| "learning_rate": 2.950384830792136e-05, |
| "loss": 0.0283, |
| "step": 11750 |
| }, |
| { |
| "epoch": 12.111225540679712, |
| "grad_norm": 0.29843324422836304, |
| "learning_rate": 2.942009610708976e-05, |
| "loss": 0.0339, |
| "step": 11760 |
| }, |
| { |
| "epoch": 12.12152420185376, |
| "grad_norm": 0.2866639494895935, |
| "learning_rate": 2.9336413368811723e-05, |
| "loss": 0.0325, |
| "step": 11770 |
| }, |
| { |
| "epoch": 12.131822863027807, |
| "grad_norm": 0.3042534589767456, |
| "learning_rate": 2.9252800375538368e-05, |
| "loss": 0.0355, |
| "step": 11780 |
| }, |
| { |
| "epoch": 12.142121524201853, |
| "grad_norm": 0.2678833305835724, |
| "learning_rate": 2.9169257409485418e-05, |
| "loss": 0.0329, |
| "step": 11790 |
| }, |
| { |
| "epoch": 12.1524201853759, |
| "grad_norm": 0.19894133508205414, |
| "learning_rate": 2.9085784752632157e-05, |
| "loss": 0.0383, |
| "step": 11800 |
| }, |
| { |
| "epoch": 12.162718846549948, |
| "grad_norm": 0.19369176030158997, |
| "learning_rate": 2.9002382686720676e-05, |
| "loss": 0.0303, |
| "step": 11810 |
| }, |
| { |
| "epoch": 12.173017507723996, |
| "grad_norm": 0.23142315447330475, |
| "learning_rate": 2.8919051493254724e-05, |
| "loss": 0.0404, |
| "step": 11820 |
| }, |
| { |
| "epoch": 12.183316168898044, |
| "grad_norm": 0.2168169468641281, |
| "learning_rate": 2.883579145349884e-05, |
| "loss": 0.0352, |
| "step": 11830 |
| }, |
| { |
| "epoch": 12.193614830072091, |
| "grad_norm": 0.27123361825942993, |
| "learning_rate": 2.8752602848477432e-05, |
| "loss": 0.0358, |
| "step": 11840 |
| }, |
| { |
| "epoch": 12.203913491246137, |
| "grad_norm": 1.34294593334198, |
| "learning_rate": 2.8669485958973775e-05, |
| "loss": 0.0336, |
| "step": 11850 |
| }, |
| { |
| "epoch": 12.214212152420185, |
| "grad_norm": 0.35292431712150574, |
| "learning_rate": 2.858644106552909e-05, |
| "loss": 0.0356, |
| "step": 11860 |
| }, |
| { |
| "epoch": 12.224510813594232, |
| "grad_norm": 0.5437068939208984, |
| "learning_rate": 2.850346844844157e-05, |
| "loss": 0.04, |
| "step": 11870 |
| }, |
| { |
| "epoch": 12.23480947476828, |
| "grad_norm": 0.7077152729034424, |
| "learning_rate": 2.8420568387765557e-05, |
| "loss": 0.0381, |
| "step": 11880 |
| }, |
| { |
| "epoch": 12.245108135942328, |
| "grad_norm": 1.2102924585342407, |
| "learning_rate": 2.8337741163310317e-05, |
| "loss": 0.0316, |
| "step": 11890 |
| }, |
| { |
| "epoch": 12.255406797116375, |
| "grad_norm": 0.22898398339748383, |
| "learning_rate": 2.825498705463947e-05, |
| "loss": 0.0355, |
| "step": 11900 |
| }, |
| { |
| "epoch": 12.265705458290423, |
| "grad_norm": 0.16343450546264648, |
| "learning_rate": 2.8172306341069672e-05, |
| "loss": 0.0333, |
| "step": 11910 |
| }, |
| { |
| "epoch": 12.276004119464469, |
| "grad_norm": 0.2778915762901306, |
| "learning_rate": 2.8089699301670002e-05, |
| "loss": 0.034, |
| "step": 11920 |
| }, |
| { |
| "epoch": 12.286302780638517, |
| "grad_norm": 0.2954021096229553, |
| "learning_rate": 2.800716621526078e-05, |
| "loss": 0.03, |
| "step": 11930 |
| }, |
| { |
| "epoch": 12.296601441812564, |
| "grad_norm": 0.18878135085105896, |
| "learning_rate": 2.7924707360412746e-05, |
| "loss": 0.0322, |
| "step": 11940 |
| }, |
| { |
| "epoch": 12.306900102986612, |
| "grad_norm": 0.25053462386131287, |
| "learning_rate": 2.7842323015446082e-05, |
| "loss": 0.0376, |
| "step": 11950 |
| }, |
| { |
| "epoch": 12.31719876416066, |
| "grad_norm": 0.21085461974143982, |
| "learning_rate": 2.7760013458429475e-05, |
| "loss": 0.0333, |
| "step": 11960 |
| }, |
| { |
| "epoch": 12.327497425334707, |
| "grad_norm": 0.27033373713493347, |
| "learning_rate": 2.767777896717919e-05, |
| "loss": 0.0387, |
| "step": 11970 |
| }, |
| { |
| "epoch": 12.337796086508753, |
| "grad_norm": 0.2603791356086731, |
| "learning_rate": 2.7595619819258116e-05, |
| "loss": 0.0336, |
| "step": 11980 |
| }, |
| { |
| "epoch": 12.3480947476828, |
| "grad_norm": 0.2735675573348999, |
| "learning_rate": 2.7513536291974895e-05, |
| "loss": 0.0367, |
| "step": 11990 |
| }, |
| { |
| "epoch": 12.358393408856848, |
| "grad_norm": 0.2710510790348053, |
| "learning_rate": 2.743152866238281e-05, |
| "loss": 0.0359, |
| "step": 12000 |
| }, |
| { |
| "epoch": 12.368692070030896, |
| "grad_norm": 0.3120410144329071, |
| "learning_rate": 2.7349597207279088e-05, |
| "loss": 0.0353, |
| "step": 12010 |
| }, |
| { |
| "epoch": 12.378990731204944, |
| "grad_norm": 1.238741159439087, |
| "learning_rate": 2.7267742203203795e-05, |
| "loss": 0.0328, |
| "step": 12020 |
| }, |
| { |
| "epoch": 12.389289392378991, |
| "grad_norm": 0.24720178544521332, |
| "learning_rate": 2.718596392643895e-05, |
| "loss": 0.035, |
| "step": 12030 |
| }, |
| { |
| "epoch": 12.399588053553039, |
| "grad_norm": 0.5230728387832642, |
| "learning_rate": 2.7104262653007616e-05, |
| "loss": 0.0385, |
| "step": 12040 |
| }, |
| { |
| "epoch": 12.409886714727085, |
| "grad_norm": 0.30197054147720337, |
| "learning_rate": 2.7022638658672933e-05, |
| "loss": 0.0378, |
| "step": 12050 |
| }, |
| { |
| "epoch": 12.420185375901132, |
| "grad_norm": 0.35036417841911316, |
| "learning_rate": 2.6941092218937214e-05, |
| "loss": 0.0316, |
| "step": 12060 |
| }, |
| { |
| "epoch": 12.43048403707518, |
| "grad_norm": 0.1900859922170639, |
| "learning_rate": 2.6859623609040984e-05, |
| "loss": 0.0416, |
| "step": 12070 |
| }, |
| { |
| "epoch": 12.440782698249228, |
| "grad_norm": 0.3137092888355255, |
| "learning_rate": 2.6778233103962158e-05, |
| "loss": 0.0347, |
| "step": 12080 |
| }, |
| { |
| "epoch": 12.451081359423275, |
| "grad_norm": 0.2586371600627899, |
| "learning_rate": 2.6696920978414862e-05, |
| "loss": 0.0313, |
| "step": 12090 |
| }, |
| { |
| "epoch": 12.461380020597323, |
| "grad_norm": 0.22871264815330505, |
| "learning_rate": 2.6615687506848864e-05, |
| "loss": 0.0384, |
| "step": 12100 |
| }, |
| { |
| "epoch": 12.471678681771369, |
| "grad_norm": 0.500694751739502, |
| "learning_rate": 2.6534532963448274e-05, |
| "loss": 0.0365, |
| "step": 12110 |
| }, |
| { |
| "epoch": 12.481977342945417, |
| "grad_norm": 0.23115640878677368, |
| "learning_rate": 2.645345762213094e-05, |
| "loss": 0.0359, |
| "step": 12120 |
| }, |
| { |
| "epoch": 12.492276004119464, |
| "grad_norm": 0.27199363708496094, |
| "learning_rate": 2.6372461756547306e-05, |
| "loss": 0.0367, |
| "step": 12130 |
| }, |
| { |
| "epoch": 12.502574665293512, |
| "grad_norm": 0.4970080256462097, |
| "learning_rate": 2.6291545640079583e-05, |
| "loss": 0.038, |
| "step": 12140 |
| }, |
| { |
| "epoch": 12.51287332646756, |
| "grad_norm": 0.31872427463531494, |
| "learning_rate": 2.6210709545840816e-05, |
| "loss": 0.0349, |
| "step": 12150 |
| }, |
| { |
| "epoch": 12.523171987641607, |
| "grad_norm": 0.543602705001831, |
| "learning_rate": 2.612995374667394e-05, |
| "loss": 0.0456, |
| "step": 12160 |
| }, |
| { |
| "epoch": 12.533470648815655, |
| "grad_norm": 0.24425791203975677, |
| "learning_rate": 2.6049278515150888e-05, |
| "loss": 0.0343, |
| "step": 12170 |
| }, |
| { |
| "epoch": 12.5437693099897, |
| "grad_norm": 0.32970938086509705, |
| "learning_rate": 2.5968684123571625e-05, |
| "loss": 0.0358, |
| "step": 12180 |
| }, |
| { |
| "epoch": 12.554067971163748, |
| "grad_norm": 0.24140028655529022, |
| "learning_rate": 2.5888170843963332e-05, |
| "loss": 0.0415, |
| "step": 12190 |
| }, |
| { |
| "epoch": 12.564366632337796, |
| "grad_norm": 0.1907021552324295, |
| "learning_rate": 2.5807738948079307e-05, |
| "loss": 0.0332, |
| "step": 12200 |
| }, |
| { |
| "epoch": 12.574665293511844, |
| "grad_norm": 0.2994469404220581, |
| "learning_rate": 2.572738870739827e-05, |
| "loss": 0.0332, |
| "step": 12210 |
| }, |
| { |
| "epoch": 12.584963954685891, |
| "grad_norm": 0.3281172811985016, |
| "learning_rate": 2.5647120393123246e-05, |
| "loss": 0.0355, |
| "step": 12220 |
| }, |
| { |
| "epoch": 12.595262615859939, |
| "grad_norm": 0.222566619515419, |
| "learning_rate": 2.5566934276180792e-05, |
| "loss": 0.0299, |
| "step": 12230 |
| }, |
| { |
| "epoch": 12.605561277033985, |
| "grad_norm": 0.38741955161094666, |
| "learning_rate": 2.5486830627219993e-05, |
| "loss": 0.0369, |
| "step": 12240 |
| }, |
| { |
| "epoch": 12.615859938208033, |
| "grad_norm": 0.24740222096443176, |
| "learning_rate": 2.540680971661161e-05, |
| "loss": 0.034, |
| "step": 12250 |
| }, |
| { |
| "epoch": 12.62615859938208, |
| "grad_norm": 0.2917155623435974, |
| "learning_rate": 2.5326871814447116e-05, |
| "loss": 0.0325, |
| "step": 12260 |
| }, |
| { |
| "epoch": 12.636457260556128, |
| "grad_norm": 0.3306695818901062, |
| "learning_rate": 2.5247017190537802e-05, |
| "loss": 0.0314, |
| "step": 12270 |
| }, |
| { |
| "epoch": 12.646755921730175, |
| "grad_norm": 0.3189143240451813, |
| "learning_rate": 2.5167246114413956e-05, |
| "loss": 0.0406, |
| "step": 12280 |
| }, |
| { |
| "epoch": 12.657054582904223, |
| "grad_norm": 0.27937018871307373, |
| "learning_rate": 2.5087558855323718e-05, |
| "loss": 0.037, |
| "step": 12290 |
| }, |
| { |
| "epoch": 12.667353244078269, |
| "grad_norm": 0.23929426074028015, |
| "learning_rate": 2.5007955682232498e-05, |
| "loss": 0.0366, |
| "step": 12300 |
| }, |
| { |
| "epoch": 12.677651905252317, |
| "grad_norm": 0.38764917850494385, |
| "learning_rate": 2.4928436863821725e-05, |
| "loss": 0.0357, |
| "step": 12310 |
| }, |
| { |
| "epoch": 12.687950566426364, |
| "grad_norm": 0.22392131388187408, |
| "learning_rate": 2.4849002668488245e-05, |
| "loss": 0.031, |
| "step": 12320 |
| }, |
| { |
| "epoch": 12.698249227600412, |
| "grad_norm": 0.35927116870880127, |
| "learning_rate": 2.4769653364343222e-05, |
| "loss": 0.0355, |
| "step": 12330 |
| }, |
| { |
| "epoch": 12.70854788877446, |
| "grad_norm": 0.3391915261745453, |
| "learning_rate": 2.4690389219211273e-05, |
| "loss": 0.0346, |
| "step": 12340 |
| }, |
| { |
| "epoch": 12.718846549948507, |
| "grad_norm": 0.21950756013393402, |
| "learning_rate": 2.4611210500629618e-05, |
| "loss": 0.0339, |
| "step": 12350 |
| }, |
| { |
| "epoch": 12.729145211122553, |
| "grad_norm": 0.22874067723751068, |
| "learning_rate": 2.453211747584711e-05, |
| "loss": 0.0347, |
| "step": 12360 |
| }, |
| { |
| "epoch": 12.7394438722966, |
| "grad_norm": 0.5297624468803406, |
| "learning_rate": 2.4453110411823382e-05, |
| "loss": 0.0308, |
| "step": 12370 |
| }, |
| { |
| "epoch": 12.749742533470648, |
| "grad_norm": 0.31514862179756165, |
| "learning_rate": 2.4374189575227902e-05, |
| "loss": 0.032, |
| "step": 12380 |
| }, |
| { |
| "epoch": 12.760041194644696, |
| "grad_norm": 0.26266971230506897, |
| "learning_rate": 2.429535523243917e-05, |
| "loss": 0.0357, |
| "step": 12390 |
| }, |
| { |
| "epoch": 12.770339855818744, |
| "grad_norm": 0.18397288024425507, |
| "learning_rate": 2.4216607649543628e-05, |
| "loss": 0.0307, |
| "step": 12400 |
| }, |
| { |
| "epoch": 12.780638516992791, |
| "grad_norm": 0.26537027955055237, |
| "learning_rate": 2.4137947092334994e-05, |
| "loss": 0.0363, |
| "step": 12410 |
| }, |
| { |
| "epoch": 12.790937178166839, |
| "grad_norm": 0.28661102056503296, |
| "learning_rate": 2.4059373826313185e-05, |
| "loss": 0.0306, |
| "step": 12420 |
| }, |
| { |
| "epoch": 12.801235839340885, |
| "grad_norm": 0.26964297890663147, |
| "learning_rate": 2.3980888116683515e-05, |
| "loss": 0.0324, |
| "step": 12430 |
| }, |
| { |
| "epoch": 12.811534500514933, |
| "grad_norm": 0.2776640057563782, |
| "learning_rate": 2.3902490228355756e-05, |
| "loss": 0.0329, |
| "step": 12440 |
| }, |
| { |
| "epoch": 12.82183316168898, |
| "grad_norm": 0.4814803898334503, |
| "learning_rate": 2.3824180425943277e-05, |
| "loss": 0.0303, |
| "step": 12450 |
| }, |
| { |
| "epoch": 12.832131822863028, |
| "grad_norm": 0.22867955267429352, |
| "learning_rate": 2.374595897376211e-05, |
| "loss": 0.0288, |
| "step": 12460 |
| }, |
| { |
| "epoch": 12.842430484037076, |
| "grad_norm": 0.21567359566688538, |
| "learning_rate": 2.366782613583009e-05, |
| "loss": 0.0325, |
| "step": 12470 |
| }, |
| { |
| "epoch": 12.852729145211123, |
| "grad_norm": 0.290703684091568, |
| "learning_rate": 2.3589782175866015e-05, |
| "loss": 0.0298, |
| "step": 12480 |
| }, |
| { |
| "epoch": 12.863027806385169, |
| "grad_norm": 0.3255325257778168, |
| "learning_rate": 2.3511827357288575e-05, |
| "loss": 0.0363, |
| "step": 12490 |
| }, |
| { |
| "epoch": 12.873326467559217, |
| "grad_norm": 0.44946736097335815, |
| "learning_rate": 2.343396194321572e-05, |
| "loss": 0.0332, |
| "step": 12500 |
| }, |
| { |
| "epoch": 12.883625128733264, |
| "grad_norm": 0.25294211506843567, |
| "learning_rate": 2.33561861964635e-05, |
| "loss": 0.0348, |
| "step": 12510 |
| }, |
| { |
| "epoch": 12.893923789907312, |
| "grad_norm": 0.18743322789669037, |
| "learning_rate": 2.3278500379545436e-05, |
| "loss": 0.0336, |
| "step": 12520 |
| }, |
| { |
| "epoch": 12.90422245108136, |
| "grad_norm": 0.16629280149936676, |
| "learning_rate": 2.3200904754671453e-05, |
| "loss": 0.0381, |
| "step": 12530 |
| }, |
| { |
| "epoch": 12.914521112255407, |
| "grad_norm": 0.1841958910226822, |
| "learning_rate": 2.312339958374705e-05, |
| "loss": 0.0273, |
| "step": 12540 |
| }, |
| { |
| "epoch": 12.924819773429455, |
| "grad_norm": 0.3820919096469879, |
| "learning_rate": 2.3045985128372442e-05, |
| "loss": 0.0354, |
| "step": 12550 |
| }, |
| { |
| "epoch": 12.9351184346035, |
| "grad_norm": 0.22891731560230255, |
| "learning_rate": 2.2968661649841643e-05, |
| "loss": 0.0393, |
| "step": 12560 |
| }, |
| { |
| "epoch": 12.945417095777549, |
| "grad_norm": 0.21805356442928314, |
| "learning_rate": 2.2891429409141594e-05, |
| "loss": 0.0312, |
| "step": 12570 |
| }, |
| { |
| "epoch": 12.955715756951596, |
| "grad_norm": 0.29530712962150574, |
| "learning_rate": 2.281428866695128e-05, |
| "loss": 0.034, |
| "step": 12580 |
| }, |
| { |
| "epoch": 12.966014418125644, |
| "grad_norm": 0.3417767286300659, |
| "learning_rate": 2.2737239683640908e-05, |
| "loss": 0.0291, |
| "step": 12590 |
| }, |
| { |
| "epoch": 12.976313079299691, |
| "grad_norm": 0.36338862776756287, |
| "learning_rate": 2.266028271927087e-05, |
| "loss": 0.0288, |
| "step": 12600 |
| }, |
| { |
| "epoch": 12.98661174047374, |
| "grad_norm": 0.18803521990776062, |
| "learning_rate": 2.258341803359108e-05, |
| "loss": 0.035, |
| "step": 12610 |
| }, |
| { |
| "epoch": 12.996910401647785, |
| "grad_norm": 0.2204011231660843, |
| "learning_rate": 2.2506645886039918e-05, |
| "loss": 0.0331, |
| "step": 12620 |
| }, |
| { |
| "epoch": 13.007209062821833, |
| "grad_norm": 0.23867210745811462, |
| "learning_rate": 2.242996653574345e-05, |
| "loss": 0.0327, |
| "step": 12630 |
| }, |
| { |
| "epoch": 13.01750772399588, |
| "grad_norm": 0.22372329235076904, |
| "learning_rate": 2.2353380241514515e-05, |
| "loss": 0.0313, |
| "step": 12640 |
| }, |
| { |
| "epoch": 13.027806385169928, |
| "grad_norm": 0.2398245483636856, |
| "learning_rate": 2.2276887261851875e-05, |
| "loss": 0.0405, |
| "step": 12650 |
| }, |
| { |
| "epoch": 13.038105046343976, |
| "grad_norm": 0.20746667683124542, |
| "learning_rate": 2.2200487854939322e-05, |
| "loss": 0.0332, |
| "step": 12660 |
| }, |
| { |
| "epoch": 13.048403707518023, |
| "grad_norm": 0.23980452120304108, |
| "learning_rate": 2.21241822786448e-05, |
| "loss": 0.0331, |
| "step": 12670 |
| }, |
| { |
| "epoch": 13.058702368692071, |
| "grad_norm": 0.2431352734565735, |
| "learning_rate": 2.204797079051962e-05, |
| "loss": 0.0337, |
| "step": 12680 |
| }, |
| { |
| "epoch": 13.069001029866117, |
| "grad_norm": 0.21622303128242493, |
| "learning_rate": 2.1971853647797415e-05, |
| "loss": 0.0369, |
| "step": 12690 |
| }, |
| { |
| "epoch": 13.079299691040164, |
| "grad_norm": 0.17636331915855408, |
| "learning_rate": 2.1895831107393484e-05, |
| "loss": 0.0385, |
| "step": 12700 |
| }, |
| { |
| "epoch": 13.089598352214212, |
| "grad_norm": 0.3212912976741791, |
| "learning_rate": 2.181990342590371e-05, |
| "loss": 0.0388, |
| "step": 12710 |
| }, |
| { |
| "epoch": 13.09989701338826, |
| "grad_norm": 0.4048994183540344, |
| "learning_rate": 2.1744070859603897e-05, |
| "loss": 0.0314, |
| "step": 12720 |
| }, |
| { |
| "epoch": 13.110195674562307, |
| "grad_norm": 0.2608017921447754, |
| "learning_rate": 2.1668333664448776e-05, |
| "loss": 0.0348, |
| "step": 12730 |
| }, |
| { |
| "epoch": 13.120494335736355, |
| "grad_norm": 0.22120167315006256, |
| "learning_rate": 2.1592692096071153e-05, |
| "loss": 0.0282, |
| "step": 12740 |
| }, |
| { |
| "epoch": 13.130792996910401, |
| "grad_norm": 0.22117048501968384, |
| "learning_rate": 2.1517146409781103e-05, |
| "loss": 0.0346, |
| "step": 12750 |
| }, |
| { |
| "epoch": 13.141091658084449, |
| "grad_norm": 0.2921169102191925, |
| "learning_rate": 2.1441696860565048e-05, |
| "loss": 0.0342, |
| "step": 12760 |
| }, |
| { |
| "epoch": 13.151390319258496, |
| "grad_norm": 0.22612257301807404, |
| "learning_rate": 2.1366343703084936e-05, |
| "loss": 0.0312, |
| "step": 12770 |
| }, |
| { |
| "epoch": 13.161688980432544, |
| "grad_norm": 0.27955397963523865, |
| "learning_rate": 2.1291087191677343e-05, |
| "loss": 0.0332, |
| "step": 12780 |
| }, |
| { |
| "epoch": 13.171987641606592, |
| "grad_norm": 0.2641075849533081, |
| "learning_rate": 2.121592758035273e-05, |
| "loss": 0.0368, |
| "step": 12790 |
| }, |
| { |
| "epoch": 13.18228630278064, |
| "grad_norm": 0.26150405406951904, |
| "learning_rate": 2.114086512279434e-05, |
| "loss": 0.0355, |
| "step": 12800 |
| }, |
| { |
| "epoch": 13.192584963954685, |
| "grad_norm": 0.2792717218399048, |
| "learning_rate": 2.1065900072357635e-05, |
| "loss": 0.029, |
| "step": 12810 |
| }, |
| { |
| "epoch": 13.202883625128733, |
| "grad_norm": 0.21909286081790924, |
| "learning_rate": 2.0991032682069246e-05, |
| "loss": 0.0379, |
| "step": 12820 |
| }, |
| { |
| "epoch": 13.21318228630278, |
| "grad_norm": 0.2866324782371521, |
| "learning_rate": 2.0916263204626162e-05, |
| "loss": 0.0282, |
| "step": 12830 |
| }, |
| { |
| "epoch": 13.223480947476828, |
| "grad_norm": 0.28694427013397217, |
| "learning_rate": 2.0841591892394925e-05, |
| "loss": 0.0399, |
| "step": 12840 |
| }, |
| { |
| "epoch": 13.233779608650876, |
| "grad_norm": 0.31920716166496277, |
| "learning_rate": 2.0767018997410713e-05, |
| "loss": 0.0365, |
| "step": 12850 |
| }, |
| { |
| "epoch": 13.244078269824923, |
| "grad_norm": 0.35022082924842834, |
| "learning_rate": 2.0692544771376543e-05, |
| "loss": 0.0264, |
| "step": 12860 |
| }, |
| { |
| "epoch": 13.254376930998971, |
| "grad_norm": 0.25149139761924744, |
| "learning_rate": 2.0618169465662364e-05, |
| "loss": 0.0302, |
| "step": 12870 |
| }, |
| { |
| "epoch": 13.264675592173017, |
| "grad_norm": 0.2645907402038574, |
| "learning_rate": 2.0543893331304333e-05, |
| "loss": 0.0328, |
| "step": 12880 |
| }, |
| { |
| "epoch": 13.274974253347064, |
| "grad_norm": 0.17596539855003357, |
| "learning_rate": 2.0469716619003725e-05, |
| "loss": 0.0328, |
| "step": 12890 |
| }, |
| { |
| "epoch": 13.285272914521112, |
| "grad_norm": 0.2291368991136551, |
| "learning_rate": 2.039563957912642e-05, |
| "loss": 0.0318, |
| "step": 12900 |
| }, |
| { |
| "epoch": 13.29557157569516, |
| "grad_norm": 0.21256229281425476, |
| "learning_rate": 2.0321662461701696e-05, |
| "loss": 0.0334, |
| "step": 12910 |
| }, |
| { |
| "epoch": 13.305870236869207, |
| "grad_norm": 0.30739450454711914, |
| "learning_rate": 2.024778551642172e-05, |
| "loss": 0.0321, |
| "step": 12920 |
| }, |
| { |
| "epoch": 13.316168898043255, |
| "grad_norm": 0.2791813015937805, |
| "learning_rate": 2.017400899264047e-05, |
| "loss": 0.0302, |
| "step": 12930 |
| }, |
| { |
| "epoch": 13.326467559217301, |
| "grad_norm": 0.3258625864982605, |
| "learning_rate": 2.0100333139372985e-05, |
| "loss": 0.0361, |
| "step": 12940 |
| }, |
| { |
| "epoch": 13.336766220391349, |
| "grad_norm": 0.2523643672466278, |
| "learning_rate": 2.0026758205294533e-05, |
| "loss": 0.0322, |
| "step": 12950 |
| }, |
| { |
| "epoch": 13.347064881565396, |
| "grad_norm": 0.2704935073852539, |
| "learning_rate": 1.9953284438739733e-05, |
| "loss": 0.0321, |
| "step": 12960 |
| }, |
| { |
| "epoch": 13.357363542739444, |
| "grad_norm": 0.45123302936553955, |
| "learning_rate": 1.9879912087701753e-05, |
| "loss": 0.0331, |
| "step": 12970 |
| }, |
| { |
| "epoch": 13.367662203913492, |
| "grad_norm": 1.1362191438674927, |
| "learning_rate": 1.9806641399831433e-05, |
| "loss": 0.0352, |
| "step": 12980 |
| }, |
| { |
| "epoch": 13.37796086508754, |
| "grad_norm": 0.3239549398422241, |
| "learning_rate": 1.9733472622436544e-05, |
| "loss": 0.0317, |
| "step": 12990 |
| }, |
| { |
| "epoch": 13.388259526261585, |
| "grad_norm": 0.20692795515060425, |
| "learning_rate": 1.9660406002480765e-05, |
| "loss": 0.0328, |
| "step": 13000 |
| }, |
| { |
| "epoch": 13.398558187435633, |
| "grad_norm": 0.24428331851959229, |
| "learning_rate": 1.9587441786583076e-05, |
| "loss": 0.0344, |
| "step": 13010 |
| }, |
| { |
| "epoch": 13.40885684860968, |
| "grad_norm": 0.17566567659378052, |
| "learning_rate": 1.951458022101676e-05, |
| "loss": 0.0346, |
| "step": 13020 |
| }, |
| { |
| "epoch": 13.419155509783728, |
| "grad_norm": 0.2601017951965332, |
| "learning_rate": 1.944182155170864e-05, |
| "loss": 0.0413, |
| "step": 13030 |
| }, |
| { |
| "epoch": 13.429454170957776, |
| "grad_norm": 0.22690336406230927, |
| "learning_rate": 1.9369166024238232e-05, |
| "loss": 0.039, |
| "step": 13040 |
| }, |
| { |
| "epoch": 13.439752832131823, |
| "grad_norm": 0.34189629554748535, |
| "learning_rate": 1.9296613883836945e-05, |
| "loss": 0.0297, |
| "step": 13050 |
| }, |
| { |
| "epoch": 13.450051493305871, |
| "grad_norm": 0.39015287160873413, |
| "learning_rate": 1.9224165375387193e-05, |
| "loss": 0.0352, |
| "step": 13060 |
| }, |
| { |
| "epoch": 13.460350154479917, |
| "grad_norm": 0.16422075033187866, |
| "learning_rate": 1.9151820743421617e-05, |
| "loss": 0.0298, |
| "step": 13070 |
| }, |
| { |
| "epoch": 13.470648815653965, |
| "grad_norm": 0.20099236071109772, |
| "learning_rate": 1.9079580232122303e-05, |
| "loss": 0.0271, |
| "step": 13080 |
| }, |
| { |
| "epoch": 13.480947476828012, |
| "grad_norm": 0.37444478273391724, |
| "learning_rate": 1.9007444085319786e-05, |
| "loss": 0.0382, |
| "step": 13090 |
| }, |
| { |
| "epoch": 13.49124613800206, |
| "grad_norm": 0.24139359593391418, |
| "learning_rate": 1.8935412546492486e-05, |
| "loss": 0.0334, |
| "step": 13100 |
| }, |
| { |
| "epoch": 13.501544799176108, |
| "grad_norm": 0.3007052540779114, |
| "learning_rate": 1.88634858587656e-05, |
| "loss": 0.0341, |
| "step": 13110 |
| }, |
| { |
| "epoch": 13.511843460350155, |
| "grad_norm": 0.30898720026016235, |
| "learning_rate": 1.8791664264910537e-05, |
| "loss": 0.0324, |
| "step": 13120 |
| }, |
| { |
| "epoch": 13.522142121524201, |
| "grad_norm": 0.3256855905056, |
| "learning_rate": 1.8719948007343936e-05, |
| "loss": 0.0376, |
| "step": 13130 |
| }, |
| { |
| "epoch": 13.532440782698249, |
| "grad_norm": 0.2092374563217163, |
| "learning_rate": 1.8648337328126906e-05, |
| "loss": 0.0298, |
| "step": 13140 |
| }, |
| { |
| "epoch": 13.542739443872296, |
| "grad_norm": 0.34433215856552124, |
| "learning_rate": 1.85768324689642e-05, |
| "loss": 0.0371, |
| "step": 13150 |
| }, |
| { |
| "epoch": 13.553038105046344, |
| "grad_norm": 0.47145530581474304, |
| "learning_rate": 1.850543367120341e-05, |
| "loss": 0.0389, |
| "step": 13160 |
| }, |
| { |
| "epoch": 13.563336766220392, |
| "grad_norm": 1.9276230335235596, |
| "learning_rate": 1.8434141175834125e-05, |
| "loss": 0.0356, |
| "step": 13170 |
| }, |
| { |
| "epoch": 13.57363542739444, |
| "grad_norm": 0.1196000725030899, |
| "learning_rate": 1.8362955223487143e-05, |
| "loss": 0.0292, |
| "step": 13180 |
| }, |
| { |
| "epoch": 13.583934088568487, |
| "grad_norm": 0.21239057183265686, |
| "learning_rate": 1.8291876054433693e-05, |
| "loss": 0.0314, |
| "step": 13190 |
| }, |
| { |
| "epoch": 13.594232749742533, |
| "grad_norm": 0.27161744236946106, |
| "learning_rate": 1.8220903908584492e-05, |
| "loss": 0.0323, |
| "step": 13200 |
| }, |
| { |
| "epoch": 13.60453141091658, |
| "grad_norm": 0.23213060200214386, |
| "learning_rate": 1.8150039025489113e-05, |
| "loss": 0.0335, |
| "step": 13210 |
| }, |
| { |
| "epoch": 13.614830072090628, |
| "grad_norm": 0.26432856917381287, |
| "learning_rate": 1.8079281644335055e-05, |
| "loss": 0.0348, |
| "step": 13220 |
| }, |
| { |
| "epoch": 13.625128733264676, |
| "grad_norm": 0.24627777934074402, |
| "learning_rate": 1.8008632003946957e-05, |
| "loss": 0.0308, |
| "step": 13230 |
| }, |
| { |
| "epoch": 13.635427394438723, |
| "grad_norm": 0.3506312966346741, |
| "learning_rate": 1.7938090342785817e-05, |
| "loss": 0.0379, |
| "step": 13240 |
| }, |
| { |
| "epoch": 13.645726055612771, |
| "grad_norm": 0.20565661787986755, |
| "learning_rate": 1.7867656898948187e-05, |
| "loss": 0.0338, |
| "step": 13250 |
| }, |
| { |
| "epoch": 13.656024716786817, |
| "grad_norm": 0.2677291929721832, |
| "learning_rate": 1.7797331910165336e-05, |
| "loss": 0.0325, |
| "step": 13260 |
| }, |
| { |
| "epoch": 13.666323377960865, |
| "grad_norm": 0.30942559242248535, |
| "learning_rate": 1.7727115613802465e-05, |
| "loss": 0.0365, |
| "step": 13270 |
| }, |
| { |
| "epoch": 13.676622039134912, |
| "grad_norm": 0.23922519385814667, |
| "learning_rate": 1.765700824685797e-05, |
| "loss": 0.0366, |
| "step": 13280 |
| }, |
| { |
| "epoch": 13.68692070030896, |
| "grad_norm": 0.18366648256778717, |
| "learning_rate": 1.758701004596247e-05, |
| "loss": 0.0305, |
| "step": 13290 |
| }, |
| { |
| "epoch": 13.697219361483008, |
| "grad_norm": 0.2875716984272003, |
| "learning_rate": 1.751712124737826e-05, |
| "loss": 0.0363, |
| "step": 13300 |
| }, |
| { |
| "epoch": 13.707518022657055, |
| "grad_norm": 0.3050890564918518, |
| "learning_rate": 1.744734208699822e-05, |
| "loss": 0.037, |
| "step": 13310 |
| }, |
| { |
| "epoch": 13.717816683831103, |
| "grad_norm": 0.24879583716392517, |
| "learning_rate": 1.7377672800345302e-05, |
| "loss": 0.0285, |
| "step": 13320 |
| }, |
| { |
| "epoch": 13.728115345005149, |
| "grad_norm": 0.22065865993499756, |
| "learning_rate": 1.7308113622571544e-05, |
| "loss": 0.0299, |
| "step": 13330 |
| }, |
| { |
| "epoch": 13.738414006179196, |
| "grad_norm": 0.1869887113571167, |
| "learning_rate": 1.7238664788457342e-05, |
| "loss": 0.0344, |
| "step": 13340 |
| }, |
| { |
| "epoch": 13.748712667353244, |
| "grad_norm": 0.21137484908103943, |
| "learning_rate": 1.7169326532410663e-05, |
| "loss": 0.0332, |
| "step": 13350 |
| }, |
| { |
| "epoch": 13.759011328527292, |
| "grad_norm": 0.3234722912311554, |
| "learning_rate": 1.7100099088466242e-05, |
| "loss": 0.0345, |
| "step": 13360 |
| }, |
| { |
| "epoch": 13.76930998970134, |
| "grad_norm": 0.2264581024646759, |
| "learning_rate": 1.7030982690284792e-05, |
| "loss": 0.0291, |
| "step": 13370 |
| }, |
| { |
| "epoch": 13.779608650875387, |
| "grad_norm": 0.29631558060646057, |
| "learning_rate": 1.69619775711522e-05, |
| "loss": 0.0361, |
| "step": 13380 |
| }, |
| { |
| "epoch": 13.789907312049433, |
| "grad_norm": 0.292219340801239, |
| "learning_rate": 1.689308396397882e-05, |
| "loss": 0.0256, |
| "step": 13390 |
| }, |
| { |
| "epoch": 13.80020597322348, |
| "grad_norm": 0.17191918194293976, |
| "learning_rate": 1.6824302101298526e-05, |
| "loss": 0.0349, |
| "step": 13400 |
| }, |
| { |
| "epoch": 13.810504634397528, |
| "grad_norm": 0.22219271957874298, |
| "learning_rate": 1.6755632215268118e-05, |
| "loss": 0.0316, |
| "step": 13410 |
| }, |
| { |
| "epoch": 13.820803295571576, |
| "grad_norm": 0.18818335235118866, |
| "learning_rate": 1.6687074537666398e-05, |
| "loss": 0.0325, |
| "step": 13420 |
| }, |
| { |
| "epoch": 13.831101956745623, |
| "grad_norm": 0.2848359942436218, |
| "learning_rate": 1.6618629299893434e-05, |
| "loss": 0.0327, |
| "step": 13430 |
| }, |
| { |
| "epoch": 13.841400617919671, |
| "grad_norm": 0.26240599155426025, |
| "learning_rate": 1.6550296732969795e-05, |
| "loss": 0.0321, |
| "step": 13440 |
| }, |
| { |
| "epoch": 13.851699279093717, |
| "grad_norm": 0.166743665933609, |
| "learning_rate": 1.648207706753575e-05, |
| "loss": 0.0361, |
| "step": 13450 |
| }, |
| { |
| "epoch": 13.861997940267765, |
| "grad_norm": 0.2783146798610687, |
| "learning_rate": 1.6413970533850498e-05, |
| "loss": 0.0395, |
| "step": 13460 |
| }, |
| { |
| "epoch": 13.872296601441812, |
| "grad_norm": 0.2442004680633545, |
| "learning_rate": 1.6345977361791366e-05, |
| "loss": 0.0385, |
| "step": 13470 |
| }, |
| { |
| "epoch": 13.88259526261586, |
| "grad_norm": 0.16581279039382935, |
| "learning_rate": 1.6278097780853136e-05, |
| "loss": 0.0356, |
| "step": 13480 |
| }, |
| { |
| "epoch": 13.892893923789908, |
| "grad_norm": 0.37210017442703247, |
| "learning_rate": 1.6210332020147055e-05, |
| "loss": 0.0363, |
| "step": 13490 |
| }, |
| { |
| "epoch": 13.903192584963955, |
| "grad_norm": 0.18403227627277374, |
| "learning_rate": 1.6142680308400338e-05, |
| "loss": 0.0389, |
| "step": 13500 |
| }, |
| { |
| "epoch": 13.913491246138001, |
| "grad_norm": 0.283448189496994, |
| "learning_rate": 1.6075142873955164e-05, |
| "loss": 0.0318, |
| "step": 13510 |
| }, |
| { |
| "epoch": 13.923789907312049, |
| "grad_norm": 0.24017812311649323, |
| "learning_rate": 1.6007719944768025e-05, |
| "loss": 0.035, |
| "step": 13520 |
| }, |
| { |
| "epoch": 13.934088568486096, |
| "grad_norm": 0.14648008346557617, |
| "learning_rate": 1.594041174840894e-05, |
| "loss": 0.0276, |
| "step": 13530 |
| }, |
| { |
| "epoch": 13.944387229660144, |
| "grad_norm": 0.31949880719184875, |
| "learning_rate": 1.587321851206061e-05, |
| "loss": 0.0312, |
| "step": 13540 |
| }, |
| { |
| "epoch": 13.954685890834192, |
| "grad_norm": 0.27566295862197876, |
| "learning_rate": 1.5806140462517828e-05, |
| "loss": 0.0308, |
| "step": 13550 |
| }, |
| { |
| "epoch": 13.96498455200824, |
| "grad_norm": 0.221617192029953, |
| "learning_rate": 1.573917782618651e-05, |
| "loss": 0.033, |
| "step": 13560 |
| }, |
| { |
| "epoch": 13.975283213182287, |
| "grad_norm": 0.15257342159748077, |
| "learning_rate": 1.567233082908306e-05, |
| "loss": 0.0272, |
| "step": 13570 |
| }, |
| { |
| "epoch": 13.985581874356333, |
| "grad_norm": 0.31881460547447205, |
| "learning_rate": 1.5605599696833544e-05, |
| "loss": 0.036, |
| "step": 13580 |
| }, |
| { |
| "epoch": 13.99588053553038, |
| "grad_norm": 0.21161913871765137, |
| "learning_rate": 1.5538984654673016e-05, |
| "loss": 0.0272, |
| "step": 13590 |
| }, |
| { |
| "epoch": 14.006179196704428, |
| "grad_norm": 0.22538325190544128, |
| "learning_rate": 1.5472485927444597e-05, |
| "loss": 0.023, |
| "step": 13600 |
| }, |
| { |
| "epoch": 14.016477857878476, |
| "grad_norm": 0.2999170422554016, |
| "learning_rate": 1.5406103739598903e-05, |
| "loss": 0.032, |
| "step": 13610 |
| }, |
| { |
| "epoch": 14.026776519052524, |
| "grad_norm": 0.26565343141555786, |
| "learning_rate": 1.5339838315193156e-05, |
| "loss": 0.031, |
| "step": 13620 |
| }, |
| { |
| "epoch": 14.037075180226571, |
| "grad_norm": 0.3137536942958832, |
| "learning_rate": 1.5273689877890485e-05, |
| "loss": 0.0302, |
| "step": 13630 |
| }, |
| { |
| "epoch": 14.047373841400617, |
| "grad_norm": 0.1854087859392166, |
| "learning_rate": 1.5207658650959138e-05, |
| "loss": 0.0345, |
| "step": 13640 |
| }, |
| { |
| "epoch": 14.057672502574665, |
| "grad_norm": 0.2928926348686218, |
| "learning_rate": 1.5141744857271778e-05, |
| "loss": 0.0334, |
| "step": 13650 |
| }, |
| { |
| "epoch": 14.067971163748712, |
| "grad_norm": 0.42930635809898376, |
| "learning_rate": 1.5075948719304672e-05, |
| "loss": 0.0272, |
| "step": 13660 |
| }, |
| { |
| "epoch": 14.07826982492276, |
| "grad_norm": 0.20846472680568695, |
| "learning_rate": 1.5010270459136966e-05, |
| "loss": 0.0331, |
| "step": 13670 |
| }, |
| { |
| "epoch": 14.088568486096808, |
| "grad_norm": 0.2335253208875656, |
| "learning_rate": 1.4944710298449999e-05, |
| "loss": 0.0312, |
| "step": 13680 |
| }, |
| { |
| "epoch": 14.098867147270855, |
| "grad_norm": 0.18406903743743896, |
| "learning_rate": 1.4879268458526379e-05, |
| "loss": 0.033, |
| "step": 13690 |
| }, |
| { |
| "epoch": 14.109165808444903, |
| "grad_norm": 0.26444944739341736, |
| "learning_rate": 1.481394516024947e-05, |
| "loss": 0.0282, |
| "step": 13700 |
| }, |
| { |
| "epoch": 14.119464469618949, |
| "grad_norm": 0.19681231677532196, |
| "learning_rate": 1.4748740624102459e-05, |
| "loss": 0.0354, |
| "step": 13710 |
| }, |
| { |
| "epoch": 14.129763130792997, |
| "grad_norm": 0.22566291689872742, |
| "learning_rate": 1.468365507016769e-05, |
| "loss": 0.0327, |
| "step": 13720 |
| }, |
| { |
| "epoch": 14.140061791967044, |
| "grad_norm": 0.24647872149944305, |
| "learning_rate": 1.4618688718125929e-05, |
| "loss": 0.0301, |
| "step": 13730 |
| }, |
| { |
| "epoch": 14.150360453141092, |
| "grad_norm": 0.2727005183696747, |
| "learning_rate": 1.455384178725555e-05, |
| "loss": 0.0261, |
| "step": 13740 |
| }, |
| { |
| "epoch": 14.16065911431514, |
| "grad_norm": 0.2636515200138092, |
| "learning_rate": 1.4489114496431938e-05, |
| "loss": 0.0362, |
| "step": 13750 |
| }, |
| { |
| "epoch": 14.170957775489187, |
| "grad_norm": 0.24423463642597198, |
| "learning_rate": 1.4424507064126597e-05, |
| "loss": 0.0308, |
| "step": 13760 |
| }, |
| { |
| "epoch": 14.181256436663233, |
| "grad_norm": 0.2822682559490204, |
| "learning_rate": 1.4360019708406487e-05, |
| "loss": 0.038, |
| "step": 13770 |
| }, |
| { |
| "epoch": 14.19155509783728, |
| "grad_norm": 0.19930243492126465, |
| "learning_rate": 1.4295652646933277e-05, |
| "loss": 0.0291, |
| "step": 13780 |
| }, |
| { |
| "epoch": 14.201853759011328, |
| "grad_norm": 0.1978948414325714, |
| "learning_rate": 1.4231406096962669e-05, |
| "loss": 0.0302, |
| "step": 13790 |
| }, |
| { |
| "epoch": 14.212152420185376, |
| "grad_norm": 0.17142613232135773, |
| "learning_rate": 1.4167280275343492e-05, |
| "loss": 0.0257, |
| "step": 13800 |
| }, |
| { |
| "epoch": 14.222451081359424, |
| "grad_norm": 0.2695595622062683, |
| "learning_rate": 1.4103275398517197e-05, |
| "loss": 0.0349, |
| "step": 13810 |
| }, |
| { |
| "epoch": 14.232749742533471, |
| "grad_norm": 0.23960620164871216, |
| "learning_rate": 1.4039391682516972e-05, |
| "loss": 0.0307, |
| "step": 13820 |
| }, |
| { |
| "epoch": 14.243048403707519, |
| "grad_norm": 0.279876172542572, |
| "learning_rate": 1.3975629342967001e-05, |
| "loss": 0.0334, |
| "step": 13830 |
| }, |
| { |
| "epoch": 14.253347064881565, |
| "grad_norm": 0.260696142911911, |
| "learning_rate": 1.3911988595081893e-05, |
| "loss": 0.0316, |
| "step": 13840 |
| }, |
| { |
| "epoch": 14.263645726055612, |
| "grad_norm": 0.24109739065170288, |
| "learning_rate": 1.3848469653665786e-05, |
| "loss": 0.0306, |
| "step": 13850 |
| }, |
| { |
| "epoch": 14.27394438722966, |
| "grad_norm": 0.3289351165294647, |
| "learning_rate": 1.378507273311171e-05, |
| "loss": 0.0362, |
| "step": 13860 |
| }, |
| { |
| "epoch": 14.284243048403708, |
| "grad_norm": 0.33488863706588745, |
| "learning_rate": 1.3721798047400813e-05, |
| "loss": 0.0408, |
| "step": 13870 |
| }, |
| { |
| "epoch": 14.294541709577755, |
| "grad_norm": 3.9080820083618164, |
| "learning_rate": 1.3658645810101755e-05, |
| "loss": 0.0278, |
| "step": 13880 |
| }, |
| { |
| "epoch": 14.304840370751803, |
| "grad_norm": 0.2996270954608917, |
| "learning_rate": 1.3595616234369762e-05, |
| "loss": 0.0277, |
| "step": 13890 |
| }, |
| { |
| "epoch": 14.315139031925849, |
| "grad_norm": 0.2796926498413086, |
| "learning_rate": 1.3532709532946186e-05, |
| "loss": 0.0328, |
| "step": 13900 |
| }, |
| { |
| "epoch": 14.325437693099897, |
| "grad_norm": 0.24468347430229187, |
| "learning_rate": 1.3469925918157567e-05, |
| "loss": 0.0327, |
| "step": 13910 |
| }, |
| { |
| "epoch": 14.335736354273944, |
| "grad_norm": 0.23212593793869019, |
| "learning_rate": 1.3407265601914976e-05, |
| "loss": 0.0317, |
| "step": 13920 |
| }, |
| { |
| "epoch": 14.346035015447992, |
| "grad_norm": 0.23879218101501465, |
| "learning_rate": 1.3344728795713413e-05, |
| "loss": 0.0365, |
| "step": 13930 |
| }, |
| { |
| "epoch": 14.35633367662204, |
| "grad_norm": 0.2575908303260803, |
| "learning_rate": 1.3282315710630882e-05, |
| "loss": 0.0385, |
| "step": 13940 |
| }, |
| { |
| "epoch": 14.366632337796087, |
| "grad_norm": 0.3186909556388855, |
| "learning_rate": 1.3220026557327898e-05, |
| "loss": 0.0403, |
| "step": 13950 |
| }, |
| { |
| "epoch": 14.376930998970133, |
| "grad_norm": 0.2613557279109955, |
| "learning_rate": 1.3157861546046613e-05, |
| "loss": 0.0328, |
| "step": 13960 |
| }, |
| { |
| "epoch": 14.38722966014418, |
| "grad_norm": 0.3558288514614105, |
| "learning_rate": 1.3095820886610188e-05, |
| "loss": 0.0293, |
| "step": 13970 |
| }, |
| { |
| "epoch": 14.397528321318228, |
| "grad_norm": 0.2622450292110443, |
| "learning_rate": 1.3033904788422047e-05, |
| "loss": 0.0261, |
| "step": 13980 |
| }, |
| { |
| "epoch": 14.407826982492276, |
| "grad_norm": 0.23433591425418854, |
| "learning_rate": 1.2972113460465246e-05, |
| "loss": 0.0286, |
| "step": 13990 |
| }, |
| { |
| "epoch": 14.418125643666324, |
| "grad_norm": 0.2427792251110077, |
| "learning_rate": 1.2910447111301604e-05, |
| "loss": 0.0316, |
| "step": 14000 |
| }, |
| { |
| "epoch": 14.428424304840371, |
| "grad_norm": 0.3044346570968628, |
| "learning_rate": 1.284890594907121e-05, |
| "loss": 0.0273, |
| "step": 14010 |
| }, |
| { |
| "epoch": 14.438722966014419, |
| "grad_norm": 0.16404663026332855, |
| "learning_rate": 1.2787490181491568e-05, |
| "loss": 0.0257, |
| "step": 14020 |
| }, |
| { |
| "epoch": 14.449021627188465, |
| "grad_norm": 0.26250144839286804, |
| "learning_rate": 1.2726200015856892e-05, |
| "loss": 0.0328, |
| "step": 14030 |
| }, |
| { |
| "epoch": 14.459320288362512, |
| "grad_norm": 0.7278460264205933, |
| "learning_rate": 1.2665035659037561e-05, |
| "loss": 0.0297, |
| "step": 14040 |
| }, |
| { |
| "epoch": 14.46961894953656, |
| "grad_norm": 0.34996357560157776, |
| "learning_rate": 1.2603997317479238e-05, |
| "loss": 0.0324, |
| "step": 14050 |
| }, |
| { |
| "epoch": 14.479917610710608, |
| "grad_norm": 0.44799286127090454, |
| "learning_rate": 1.2543085197202287e-05, |
| "loss": 0.036, |
| "step": 14060 |
| }, |
| { |
| "epoch": 14.490216271884655, |
| "grad_norm": 0.24697241187095642, |
| "learning_rate": 1.2482299503801016e-05, |
| "loss": 0.0315, |
| "step": 14070 |
| }, |
| { |
| "epoch": 14.500514933058703, |
| "grad_norm": 0.3266669511795044, |
| "learning_rate": 1.2421640442443055e-05, |
| "loss": 0.0351, |
| "step": 14080 |
| }, |
| { |
| "epoch": 14.510813594232749, |
| "grad_norm": 0.42595696449279785, |
| "learning_rate": 1.2361108217868544e-05, |
| "loss": 0.029, |
| "step": 14090 |
| }, |
| { |
| "epoch": 14.521112255406797, |
| "grad_norm": 0.28600630164146423, |
| "learning_rate": 1.23007030343896e-05, |
| "loss": 0.0288, |
| "step": 14100 |
| }, |
| { |
| "epoch": 14.531410916580844, |
| "grad_norm": 0.32830336689949036, |
| "learning_rate": 1.2240425095889495e-05, |
| "loss": 0.0323, |
| "step": 14110 |
| }, |
| { |
| "epoch": 14.541709577754892, |
| "grad_norm": 0.23947954177856445, |
| "learning_rate": 1.2180274605821989e-05, |
| "loss": 0.0301, |
| "step": 14120 |
| }, |
| { |
| "epoch": 14.55200823892894, |
| "grad_norm": 0.14854808151721954, |
| "learning_rate": 1.2120251767210755e-05, |
| "loss": 0.0305, |
| "step": 14130 |
| }, |
| { |
| "epoch": 14.562306900102987, |
| "grad_norm": 0.4753403961658478, |
| "learning_rate": 1.2060356782648503e-05, |
| "loss": 0.0333, |
| "step": 14140 |
| }, |
| { |
| "epoch": 14.572605561277033, |
| "grad_norm": 0.15201760828495026, |
| "learning_rate": 1.2000589854296507e-05, |
| "loss": 0.0348, |
| "step": 14150 |
| }, |
| { |
| "epoch": 14.58290422245108, |
| "grad_norm": 0.36805441975593567, |
| "learning_rate": 1.1940951183883742e-05, |
| "loss": 0.0315, |
| "step": 14160 |
| }, |
| { |
| "epoch": 14.593202883625128, |
| "grad_norm": 0.22207669913768768, |
| "learning_rate": 1.1881440972706315e-05, |
| "loss": 0.0299, |
| "step": 14170 |
| }, |
| { |
| "epoch": 14.603501544799176, |
| "grad_norm": 0.27251651883125305, |
| "learning_rate": 1.1822059421626724e-05, |
| "loss": 0.0364, |
| "step": 14180 |
| }, |
| { |
| "epoch": 14.613800205973224, |
| "grad_norm": 0.2771929204463959, |
| "learning_rate": 1.1762806731073261e-05, |
| "loss": 0.0272, |
| "step": 14190 |
| }, |
| { |
| "epoch": 14.624098867147271, |
| "grad_norm": 0.2667066156864166, |
| "learning_rate": 1.1703683101039197e-05, |
| "loss": 0.0271, |
| "step": 14200 |
| }, |
| { |
| "epoch": 14.634397528321319, |
| "grad_norm": 0.2355891466140747, |
| "learning_rate": 1.1644688731082242e-05, |
| "loss": 0.0299, |
| "step": 14210 |
| }, |
| { |
| "epoch": 14.644696189495365, |
| "grad_norm": 0.39315053820610046, |
| "learning_rate": 1.1585823820323843e-05, |
| "loss": 0.0334, |
| "step": 14220 |
| }, |
| { |
| "epoch": 14.654994850669413, |
| "grad_norm": 0.298880010843277, |
| "learning_rate": 1.1527088567448407e-05, |
| "loss": 0.0309, |
| "step": 14230 |
| }, |
| { |
| "epoch": 14.66529351184346, |
| "grad_norm": 0.21369227766990662, |
| "learning_rate": 1.1468483170702805e-05, |
| "loss": 0.0271, |
| "step": 14240 |
| }, |
| { |
| "epoch": 14.675592173017508, |
| "grad_norm": 0.21962594985961914, |
| "learning_rate": 1.141000782789554e-05, |
| "loss": 0.0296, |
| "step": 14250 |
| }, |
| { |
| "epoch": 14.685890834191555, |
| "grad_norm": 0.3962979316711426, |
| "learning_rate": 1.135166273639619e-05, |
| "loss": 0.0361, |
| "step": 14260 |
| }, |
| { |
| "epoch": 14.696189495365603, |
| "grad_norm": 0.2696010172367096, |
| "learning_rate": 1.1293448093134656e-05, |
| "loss": 0.0317, |
| "step": 14270 |
| }, |
| { |
| "epoch": 14.706488156539649, |
| "grad_norm": 0.16473254561424255, |
| "learning_rate": 1.1235364094600632e-05, |
| "loss": 0.0259, |
| "step": 14280 |
| }, |
| { |
| "epoch": 14.716786817713697, |
| "grad_norm": 0.18638800084590912, |
| "learning_rate": 1.1177410936842719e-05, |
| "loss": 0.0236, |
| "step": 14290 |
| }, |
| { |
| "epoch": 14.727085478887744, |
| "grad_norm": 0.35101962089538574, |
| "learning_rate": 1.1119588815468012e-05, |
| "loss": 0.0266, |
| "step": 14300 |
| }, |
| { |
| "epoch": 14.737384140061792, |
| "grad_norm": 0.2792340815067291, |
| "learning_rate": 1.1061897925641296e-05, |
| "loss": 0.0318, |
| "step": 14310 |
| }, |
| { |
| "epoch": 14.74768280123584, |
| "grad_norm": 0.19751253724098206, |
| "learning_rate": 1.100433846208434e-05, |
| "loss": 0.0294, |
| "step": 14320 |
| }, |
| { |
| "epoch": 14.757981462409887, |
| "grad_norm": 0.2783863842487335, |
| "learning_rate": 1.094691061907544e-05, |
| "loss": 0.0359, |
| "step": 14330 |
| }, |
| { |
| "epoch": 14.768280123583935, |
| "grad_norm": 0.2864331305027008, |
| "learning_rate": 1.088961459044852e-05, |
| "loss": 0.0289, |
| "step": 14340 |
| }, |
| { |
| "epoch": 14.77857878475798, |
| "grad_norm": 0.19958889484405518, |
| "learning_rate": 1.0832450569592684e-05, |
| "loss": 0.0296, |
| "step": 14350 |
| }, |
| { |
| "epoch": 14.788877445932028, |
| "grad_norm": 0.2572004199028015, |
| "learning_rate": 1.0775418749451427e-05, |
| "loss": 0.0299, |
| "step": 14360 |
| }, |
| { |
| "epoch": 14.799176107106076, |
| "grad_norm": 0.24685412645339966, |
| "learning_rate": 1.0718519322522053e-05, |
| "loss": 0.0346, |
| "step": 14370 |
| }, |
| { |
| "epoch": 14.809474768280124, |
| "grad_norm": 0.2643430829048157, |
| "learning_rate": 1.0661752480854975e-05, |
| "loss": 0.0253, |
| "step": 14380 |
| }, |
| { |
| "epoch": 14.819773429454171, |
| "grad_norm": 0.2792705297470093, |
| "learning_rate": 1.0605118416053162e-05, |
| "loss": 0.0295, |
| "step": 14390 |
| }, |
| { |
| "epoch": 14.830072090628219, |
| "grad_norm": 0.4018799662590027, |
| "learning_rate": 1.0548617319271342e-05, |
| "loss": 0.034, |
| "step": 14400 |
| }, |
| { |
| "epoch": 14.840370751802265, |
| "grad_norm": 0.20562392473220825, |
| "learning_rate": 1.049224938121548e-05, |
| "loss": 0.0386, |
| "step": 14410 |
| }, |
| { |
| "epoch": 14.850669412976313, |
| "grad_norm": 0.2107439637184143, |
| "learning_rate": 1.043601479214214e-05, |
| "loss": 0.038, |
| "step": 14420 |
| }, |
| { |
| "epoch": 14.86096807415036, |
| "grad_norm": 0.2785644829273224, |
| "learning_rate": 1.0379913741857699e-05, |
| "loss": 0.0308, |
| "step": 14430 |
| }, |
| { |
| "epoch": 14.871266735324408, |
| "grad_norm": 0.23650747537612915, |
| "learning_rate": 1.03239464197179e-05, |
| "loss": 0.0312, |
| "step": 14440 |
| }, |
| { |
| "epoch": 14.881565396498456, |
| "grad_norm": 0.2766387462615967, |
| "learning_rate": 1.0268113014627073e-05, |
| "loss": 0.0265, |
| "step": 14450 |
| }, |
| { |
| "epoch": 14.891864057672503, |
| "grad_norm": 0.2568782567977905, |
| "learning_rate": 1.021241371503755e-05, |
| "loss": 0.037, |
| "step": 14460 |
| }, |
| { |
| "epoch": 14.90216271884655, |
| "grad_norm": 0.18696804344654083, |
| "learning_rate": 1.0156848708949006e-05, |
| "loss": 0.0266, |
| "step": 14470 |
| }, |
| { |
| "epoch": 14.912461380020597, |
| "grad_norm": 0.23785705864429474, |
| "learning_rate": 1.0101418183907896e-05, |
| "loss": 0.0304, |
| "step": 14480 |
| }, |
| { |
| "epoch": 14.922760041194644, |
| "grad_norm": 0.2720486521720886, |
| "learning_rate": 1.004612232700669e-05, |
| "loss": 0.0359, |
| "step": 14490 |
| }, |
| { |
| "epoch": 14.933058702368692, |
| "grad_norm": 0.21330799162387848, |
| "learning_rate": 9.990961324883358e-06, |
| "loss": 0.0288, |
| "step": 14500 |
| }, |
| { |
| "epoch": 14.94335736354274, |
| "grad_norm": 0.24091622233390808, |
| "learning_rate": 9.935935363720728e-06, |
| "loss": 0.0275, |
| "step": 14510 |
| }, |
| { |
| "epoch": 14.953656024716787, |
| "grad_norm": 0.34269654750823975, |
| "learning_rate": 9.88104462924575e-06, |
| "loss": 0.0323, |
| "step": 14520 |
| }, |
| { |
| "epoch": 14.963954685890835, |
| "grad_norm": 0.23459886014461517, |
| "learning_rate": 9.826289306729052e-06, |
| "loss": 0.0293, |
| "step": 14530 |
| }, |
| { |
| "epoch": 14.97425334706488, |
| "grad_norm": 0.27133437991142273, |
| "learning_rate": 9.7716695809841e-06, |
| "loss": 0.0329, |
| "step": 14540 |
| }, |
| { |
| "epoch": 14.984552008238929, |
| "grad_norm": 0.24615567922592163, |
| "learning_rate": 9.717185636366783e-06, |
| "loss": 0.0317, |
| "step": 14550 |
| }, |
| { |
| "epoch": 14.994850669412976, |
| "grad_norm": 0.26164570450782776, |
| "learning_rate": 9.662837656774632e-06, |
| "loss": 0.031, |
| "step": 14560 |
| }, |
| { |
| "epoch": 15.005149330587024, |
| "grad_norm": 0.18910399079322815, |
| "learning_rate": 9.608625825646288e-06, |
| "loss": 0.0349, |
| "step": 14570 |
| }, |
| { |
| "epoch": 15.015447991761071, |
| "grad_norm": 0.3117832541465759, |
| "learning_rate": 9.554550325960853e-06, |
| "loss": 0.032, |
| "step": 14580 |
| }, |
| { |
| "epoch": 15.02574665293512, |
| "grad_norm": 0.22034838795661926, |
| "learning_rate": 9.500611340237258e-06, |
| "loss": 0.0301, |
| "step": 14590 |
| }, |
| { |
| "epoch": 15.036045314109165, |
| "grad_norm": 0.2756035029888153, |
| "learning_rate": 9.446809050533678e-06, |
| "loss": 0.0272, |
| "step": 14600 |
| }, |
| { |
| "epoch": 15.046343975283213, |
| "grad_norm": 0.3038906157016754, |
| "learning_rate": 9.393143638446889e-06, |
| "loss": 0.0327, |
| "step": 14610 |
| }, |
| { |
| "epoch": 15.05664263645726, |
| "grad_norm": 0.22907866537570953, |
| "learning_rate": 9.33961528511172e-06, |
| "loss": 0.0307, |
| "step": 14620 |
| }, |
| { |
| "epoch": 15.066941297631308, |
| "grad_norm": 0.4842381775379181, |
| "learning_rate": 9.286224171200297e-06, |
| "loss": 0.0284, |
| "step": 14630 |
| }, |
| { |
| "epoch": 15.077239958805356, |
| "grad_norm": 0.8235160112380981, |
| "learning_rate": 9.232970476921626e-06, |
| "loss": 0.0336, |
| "step": 14640 |
| }, |
| { |
| "epoch": 15.087538619979403, |
| "grad_norm": 0.4762952923774719, |
| "learning_rate": 9.17985438202082e-06, |
| "loss": 0.0315, |
| "step": 14650 |
| }, |
| { |
| "epoch": 15.097837281153451, |
| "grad_norm": 0.20582009851932526, |
| "learning_rate": 9.12687606577859e-06, |
| "loss": 0.0283, |
| "step": 14660 |
| }, |
| { |
| "epoch": 15.108135942327497, |
| "grad_norm": 0.20658078789710999, |
| "learning_rate": 9.074035707010575e-06, |
| "loss": 0.0277, |
| "step": 14670 |
| }, |
| { |
| "epoch": 15.118434603501544, |
| "grad_norm": 0.2650274336338043, |
| "learning_rate": 9.02133348406684e-06, |
| "loss": 0.031, |
| "step": 14680 |
| }, |
| { |
| "epoch": 15.128733264675592, |
| "grad_norm": 0.26044949889183044, |
| "learning_rate": 8.968769574831115e-06, |
| "loss": 0.0287, |
| "step": 14690 |
| }, |
| { |
| "epoch": 15.13903192584964, |
| "grad_norm": 0.25187498331069946, |
| "learning_rate": 8.916344156720335e-06, |
| "loss": 0.0301, |
| "step": 14700 |
| }, |
| { |
| "epoch": 15.149330587023687, |
| "grad_norm": 0.4505482017993927, |
| "learning_rate": 8.864057406684023e-06, |
| "loss": 0.0264, |
| "step": 14710 |
| }, |
| { |
| "epoch": 15.159629248197735, |
| "grad_norm": 0.2146962434053421, |
| "learning_rate": 8.81190950120357e-06, |
| "loss": 0.0386, |
| "step": 14720 |
| }, |
| { |
| "epoch": 15.169927909371781, |
| "grad_norm": 0.17643073201179504, |
| "learning_rate": 8.759900616291834e-06, |
| "loss": 0.0271, |
| "step": 14730 |
| }, |
| { |
| "epoch": 15.180226570545829, |
| "grad_norm": 0.3004768192768097, |
| "learning_rate": 8.708030927492345e-06, |
| "loss": 0.034, |
| "step": 14740 |
| }, |
| { |
| "epoch": 15.190525231719876, |
| "grad_norm": 0.33159592747688293, |
| "learning_rate": 8.656300609878898e-06, |
| "loss": 0.033, |
| "step": 14750 |
| }, |
| { |
| "epoch": 15.200823892893924, |
| "grad_norm": 0.2567281126976013, |
| "learning_rate": 8.604709838054813e-06, |
| "loss": 0.0325, |
| "step": 14760 |
| }, |
| { |
| "epoch": 15.211122554067972, |
| "grad_norm": 0.20799218118190765, |
| "learning_rate": 8.55325878615244e-06, |
| "loss": 0.0317, |
| "step": 14770 |
| }, |
| { |
| "epoch": 15.22142121524202, |
| "grad_norm": 0.2914055585861206, |
| "learning_rate": 8.501947627832507e-06, |
| "loss": 0.0308, |
| "step": 14780 |
| }, |
| { |
| "epoch": 15.231719876416065, |
| "grad_norm": 0.24458810687065125, |
| "learning_rate": 8.450776536283594e-06, |
| "loss": 0.0359, |
| "step": 14790 |
| }, |
| { |
| "epoch": 15.242018537590113, |
| "grad_norm": 0.30409494042396545, |
| "learning_rate": 8.399745684221499e-06, |
| "loss": 0.0357, |
| "step": 14800 |
| }, |
| { |
| "epoch": 15.25231719876416, |
| "grad_norm": 0.2720089852809906, |
| "learning_rate": 8.348855243888681e-06, |
| "loss": 0.0344, |
| "step": 14810 |
| }, |
| { |
| "epoch": 15.262615859938208, |
| "grad_norm": 0.25461846590042114, |
| "learning_rate": 8.2981053870537e-06, |
| "loss": 0.0325, |
| "step": 14820 |
| }, |
| { |
| "epoch": 15.272914521112256, |
| "grad_norm": 0.2355855405330658, |
| "learning_rate": 8.247496285010548e-06, |
| "loss": 0.0276, |
| "step": 14830 |
| }, |
| { |
| "epoch": 15.283213182286303, |
| "grad_norm": 0.1807708442211151, |
| "learning_rate": 8.197028108578197e-06, |
| "loss": 0.03, |
| "step": 14840 |
| }, |
| { |
| "epoch": 15.293511843460351, |
| "grad_norm": 0.21903660893440247, |
| "learning_rate": 8.146701028099917e-06, |
| "loss": 0.0254, |
| "step": 14850 |
| }, |
| { |
| "epoch": 15.303810504634397, |
| "grad_norm": 0.5081159472465515, |
| "learning_rate": 8.096515213442762e-06, |
| "loss": 0.0276, |
| "step": 14860 |
| }, |
| { |
| "epoch": 15.314109165808445, |
| "grad_norm": 0.22669517993927002, |
| "learning_rate": 8.046470833996973e-06, |
| "loss": 0.0272, |
| "step": 14870 |
| }, |
| { |
| "epoch": 15.324407826982492, |
| "grad_norm": 0.2578093409538269, |
| "learning_rate": 7.996568058675402e-06, |
| "loss": 0.0304, |
| "step": 14880 |
| }, |
| { |
| "epoch": 15.33470648815654, |
| "grad_norm": 0.20256255567073822, |
| "learning_rate": 7.946807055912959e-06, |
| "loss": 0.0292, |
| "step": 14890 |
| }, |
| { |
| "epoch": 15.345005149330587, |
| "grad_norm": 0.2500031888484955, |
| "learning_rate": 7.897187993666022e-06, |
| "loss": 0.0315, |
| "step": 14900 |
| }, |
| { |
| "epoch": 15.355303810504635, |
| "grad_norm": 0.2907675802707672, |
| "learning_rate": 7.84771103941192e-06, |
| "loss": 0.0341, |
| "step": 14910 |
| }, |
| { |
| "epoch": 15.365602471678681, |
| "grad_norm": 0.1547321081161499, |
| "learning_rate": 7.79837636014827e-06, |
| "loss": 0.0249, |
| "step": 14920 |
| }, |
| { |
| "epoch": 15.375901132852729, |
| "grad_norm": 0.2814120054244995, |
| "learning_rate": 7.749184122392539e-06, |
| "loss": 0.0365, |
| "step": 14930 |
| }, |
| { |
| "epoch": 15.386199794026776, |
| "grad_norm": 0.37319841980934143, |
| "learning_rate": 7.700134492181344e-06, |
| "loss": 0.0274, |
| "step": 14940 |
| }, |
| { |
| "epoch": 15.396498455200824, |
| "grad_norm": 0.24200180172920227, |
| "learning_rate": 7.651227635070041e-06, |
| "loss": 0.0306, |
| "step": 14950 |
| }, |
| { |
| "epoch": 15.406797116374872, |
| "grad_norm": 0.6322610378265381, |
| "learning_rate": 7.602463716132041e-06, |
| "loss": 0.0279, |
| "step": 14960 |
| }, |
| { |
| "epoch": 15.41709577754892, |
| "grad_norm": 0.43964508175849915, |
| "learning_rate": 7.553842899958308e-06, |
| "loss": 0.032, |
| "step": 14970 |
| }, |
| { |
| "epoch": 15.427394438722967, |
| "grad_norm": 0.3598411977291107, |
| "learning_rate": 7.505365350656812e-06, |
| "loss": 0.0275, |
| "step": 14980 |
| }, |
| { |
| "epoch": 15.437693099897013, |
| "grad_norm": 0.19508050382137299, |
| "learning_rate": 7.457031231851941e-06, |
| "loss": 0.034, |
| "step": 14990 |
| }, |
| { |
| "epoch": 15.44799176107106, |
| "grad_norm": 0.29256248474121094, |
| "learning_rate": 7.4088407066839784e-06, |
| "loss": 0.0387, |
| "step": 15000 |
| }, |
| { |
| "epoch": 15.458290422245108, |
| "grad_norm": 0.2301289290189743, |
| "learning_rate": 7.36079393780853e-06, |
| "loss": 0.0311, |
| "step": 15010 |
| }, |
| { |
| "epoch": 15.468589083419156, |
| "grad_norm": 0.29095834493637085, |
| "learning_rate": 7.312891087396034e-06, |
| "loss": 0.0259, |
| "step": 15020 |
| }, |
| { |
| "epoch": 15.478887744593203, |
| "grad_norm": 0.2932276129722595, |
| "learning_rate": 7.2651323171310795e-06, |
| "loss": 0.0293, |
| "step": 15030 |
| }, |
| { |
| "epoch": 15.489186405767251, |
| "grad_norm": 0.24277035892009735, |
| "learning_rate": 7.217517788212025e-06, |
| "loss": 0.0334, |
| "step": 15040 |
| }, |
| { |
| "epoch": 15.499485066941297, |
| "grad_norm": 0.23208442330360413, |
| "learning_rate": 7.170047661350349e-06, |
| "loss": 0.0296, |
| "step": 15050 |
| }, |
| { |
| "epoch": 15.509783728115345, |
| "grad_norm": 0.1625526398420334, |
| "learning_rate": 7.122722096770123e-06, |
| "loss": 0.0283, |
| "step": 15060 |
| }, |
| { |
| "epoch": 15.520082389289392, |
| "grad_norm": 0.29437604546546936, |
| "learning_rate": 7.075541254207502e-06, |
| "loss": 0.0284, |
| "step": 15070 |
| }, |
| { |
| "epoch": 15.53038105046344, |
| "grad_norm": 0.3337920308113098, |
| "learning_rate": 7.028505292910154e-06, |
| "loss": 0.0235, |
| "step": 15080 |
| }, |
| { |
| "epoch": 15.540679711637488, |
| "grad_norm": 0.16761137545108795, |
| "learning_rate": 6.981614371636747e-06, |
| "loss": 0.0261, |
| "step": 15090 |
| }, |
| { |
| "epoch": 15.550978372811535, |
| "grad_norm": 0.18191471695899963, |
| "learning_rate": 6.934868648656373e-06, |
| "loss": 0.0273, |
| "step": 15100 |
| }, |
| { |
| "epoch": 15.561277033985581, |
| "grad_norm": 0.2083984911441803, |
| "learning_rate": 6.8882682817481006e-06, |
| "loss": 0.0339, |
| "step": 15110 |
| }, |
| { |
| "epoch": 15.571575695159629, |
| "grad_norm": 0.33254730701446533, |
| "learning_rate": 6.841813428200306e-06, |
| "loss": 0.0335, |
| "step": 15120 |
| }, |
| { |
| "epoch": 15.581874356333676, |
| "grad_norm": 0.22721487283706665, |
| "learning_rate": 6.795504244810285e-06, |
| "loss": 0.0284, |
| "step": 15130 |
| }, |
| { |
| "epoch": 15.592173017507724, |
| "grad_norm": 0.3968798816204071, |
| "learning_rate": 6.749340887883626e-06, |
| "loss": 0.0326, |
| "step": 15140 |
| }, |
| { |
| "epoch": 15.602471678681772, |
| "grad_norm": 0.1721322387456894, |
| "learning_rate": 6.7033235132337225e-06, |
| "loss": 0.0267, |
| "step": 15150 |
| }, |
| { |
| "epoch": 15.61277033985582, |
| "grad_norm": 0.3585062026977539, |
| "learning_rate": 6.6574522761812366e-06, |
| "loss": 0.0297, |
| "step": 15160 |
| }, |
| { |
| "epoch": 15.623069001029865, |
| "grad_norm": 0.45918750762939453, |
| "learning_rate": 6.611727331553586e-06, |
| "loss": 0.0275, |
| "step": 15170 |
| }, |
| { |
| "epoch": 15.633367662203913, |
| "grad_norm": 0.3067721724510193, |
| "learning_rate": 6.566148833684399e-06, |
| "loss": 0.0287, |
| "step": 15180 |
| }, |
| { |
| "epoch": 15.64366632337796, |
| "grad_norm": 0.2751639187335968, |
| "learning_rate": 6.520716936413018e-06, |
| "loss": 0.0295, |
| "step": 15190 |
| }, |
| { |
| "epoch": 15.653964984552008, |
| "grad_norm": 0.21889840066432953, |
| "learning_rate": 6.475431793083974e-06, |
| "loss": 0.0321, |
| "step": 15200 |
| }, |
| { |
| "epoch": 15.664263645726056, |
| "grad_norm": 0.3290077745914459, |
| "learning_rate": 6.4302935565464514e-06, |
| "loss": 0.031, |
| "step": 15210 |
| }, |
| { |
| "epoch": 15.674562306900103, |
| "grad_norm": 0.5243391394615173, |
| "learning_rate": 6.385302379153818e-06, |
| "loss": 0.0248, |
| "step": 15220 |
| }, |
| { |
| "epoch": 15.684860968074151, |
| "grad_norm": 1.0162177085876465, |
| "learning_rate": 6.3404584127630115e-06, |
| "loss": 0.0243, |
| "step": 15230 |
| }, |
| { |
| "epoch": 15.695159629248197, |
| "grad_norm": 0.33608901500701904, |
| "learning_rate": 6.295761808734174e-06, |
| "loss": 0.0307, |
| "step": 15240 |
| }, |
| { |
| "epoch": 15.705458290422245, |
| "grad_norm": 0.2736285626888275, |
| "learning_rate": 6.251212717930017e-06, |
| "loss": 0.0341, |
| "step": 15250 |
| }, |
| { |
| "epoch": 15.715756951596292, |
| "grad_norm": 0.3048650920391083, |
| "learning_rate": 6.206811290715353e-06, |
| "loss": 0.035, |
| "step": 15260 |
| }, |
| { |
| "epoch": 15.72605561277034, |
| "grad_norm": 0.2898007929325104, |
| "learning_rate": 6.16255767695661e-06, |
| "loss": 0.0304, |
| "step": 15270 |
| }, |
| { |
| "epoch": 15.736354273944388, |
| "grad_norm": 0.2866269052028656, |
| "learning_rate": 6.118452026021299e-06, |
| "loss": 0.0344, |
| "step": 15280 |
| }, |
| { |
| "epoch": 15.746652935118435, |
| "grad_norm": 0.29790258407592773, |
| "learning_rate": 6.07449448677751e-06, |
| "loss": 0.0333, |
| "step": 15290 |
| }, |
| { |
| "epoch": 15.756951596292481, |
| "grad_norm": 0.33838725090026855, |
| "learning_rate": 6.030685207593423e-06, |
| "loss": 0.0345, |
| "step": 15300 |
| }, |
| { |
| "epoch": 15.767250257466529, |
| "grad_norm": 0.28657403588294983, |
| "learning_rate": 5.9870243363368275e-06, |
| "loss": 0.0321, |
| "step": 15310 |
| }, |
| { |
| "epoch": 15.777548918640576, |
| "grad_norm": 0.34499257802963257, |
| "learning_rate": 5.943512020374537e-06, |
| "loss": 0.0367, |
| "step": 15320 |
| }, |
| { |
| "epoch": 15.787847579814624, |
| "grad_norm": 0.2314077764749527, |
| "learning_rate": 5.90014840657202e-06, |
| "loss": 0.0351, |
| "step": 15330 |
| }, |
| { |
| "epoch": 15.798146240988672, |
| "grad_norm": 0.40013644099235535, |
| "learning_rate": 5.856933641292789e-06, |
| "loss": 0.0305, |
| "step": 15340 |
| }, |
| { |
| "epoch": 15.80844490216272, |
| "grad_norm": 0.6308583617210388, |
| "learning_rate": 5.813867870397977e-06, |
| "loss": 0.0331, |
| "step": 15350 |
| }, |
| { |
| "epoch": 15.818743563336767, |
| "grad_norm": 0.3136028051376343, |
| "learning_rate": 5.770951239245803e-06, |
| "loss": 0.0313, |
| "step": 15360 |
| }, |
| { |
| "epoch": 15.829042224510813, |
| "grad_norm": 0.18756185472011566, |
| "learning_rate": 5.72818389269113e-06, |
| "loss": 0.0261, |
| "step": 15370 |
| }, |
| { |
| "epoch": 15.83934088568486, |
| "grad_norm": 0.22854579985141754, |
| "learning_rate": 5.685565975084911e-06, |
| "loss": 0.0307, |
| "step": 15380 |
| }, |
| { |
| "epoch": 15.849639546858908, |
| "grad_norm": 0.18659406900405884, |
| "learning_rate": 5.643097630273769e-06, |
| "loss": 0.0293, |
| "step": 15390 |
| }, |
| { |
| "epoch": 15.859938208032956, |
| "grad_norm": 0.2682023048400879, |
| "learning_rate": 5.600779001599455e-06, |
| "loss": 0.0339, |
| "step": 15400 |
| }, |
| { |
| "epoch": 15.870236869207003, |
| "grad_norm": 0.29009154438972473, |
| "learning_rate": 5.558610231898393e-06, |
| "loss": 0.037, |
| "step": 15410 |
| }, |
| { |
| "epoch": 15.880535530381051, |
| "grad_norm": 0.32601863145828247, |
| "learning_rate": 5.516591463501231e-06, |
| "loss": 0.0322, |
| "step": 15420 |
| }, |
| { |
| "epoch": 15.890834191555097, |
| "grad_norm": 0.25241759419441223, |
| "learning_rate": 5.474722838232254e-06, |
| "loss": 0.0335, |
| "step": 15430 |
| }, |
| { |
| "epoch": 15.901132852729145, |
| "grad_norm": 0.34431523084640503, |
| "learning_rate": 5.433004497409039e-06, |
| "loss": 0.027, |
| "step": 15440 |
| }, |
| { |
| "epoch": 15.911431513903192, |
| "grad_norm": 0.24490360915660858, |
| "learning_rate": 5.391436581841886e-06, |
| "loss": 0.0287, |
| "step": 15450 |
| }, |
| { |
| "epoch": 15.92173017507724, |
| "grad_norm": 0.25288495421409607, |
| "learning_rate": 5.350019231833364e-06, |
| "loss": 0.0301, |
| "step": 15460 |
| }, |
| { |
| "epoch": 15.932028836251288, |
| "grad_norm": 0.23814049363136292, |
| "learning_rate": 5.3087525871778565e-06, |
| "loss": 0.0291, |
| "step": 15470 |
| }, |
| { |
| "epoch": 15.942327497425335, |
| "grad_norm": 0.2367774397134781, |
| "learning_rate": 5.2676367871610675e-06, |
| "loss": 0.0325, |
| "step": 15480 |
| }, |
| { |
| "epoch": 15.952626158599383, |
| "grad_norm": 0.20925898849964142, |
| "learning_rate": 5.226671970559577e-06, |
| "loss": 0.0307, |
| "step": 15490 |
| }, |
| { |
| "epoch": 15.962924819773429, |
| "grad_norm": 0.36154627799987793, |
| "learning_rate": 5.185858275640332e-06, |
| "loss": 0.0328, |
| "step": 15500 |
| }, |
| { |
| "epoch": 15.973223480947476, |
| "grad_norm": 0.25385522842407227, |
| "learning_rate": 5.145195840160239e-06, |
| "loss": 0.0299, |
| "step": 15510 |
| }, |
| { |
| "epoch": 15.983522142121524, |
| "grad_norm": 0.25496914982795715, |
| "learning_rate": 5.1046848013656165e-06, |
| "loss": 0.0292, |
| "step": 15520 |
| }, |
| { |
| "epoch": 15.993820803295572, |
| "grad_norm": 0.2563509941101074, |
| "learning_rate": 5.064325295991829e-06, |
| "loss": 0.0284, |
| "step": 15530 |
| }, |
| { |
| "epoch": 16.004119464469618, |
| "grad_norm": 0.2616461217403412, |
| "learning_rate": 5.024117460262751e-06, |
| "loss": 0.0439, |
| "step": 15540 |
| }, |
| { |
| "epoch": 16.014418125643665, |
| "grad_norm": 0.3009835481643677, |
| "learning_rate": 4.984061429890324e-06, |
| "loss": 0.0304, |
| "step": 15550 |
| }, |
| { |
| "epoch": 16.024716786817713, |
| "grad_norm": 0.29534780979156494, |
| "learning_rate": 4.94415734007413e-06, |
| "loss": 0.0319, |
| "step": 15560 |
| }, |
| { |
| "epoch": 16.03501544799176, |
| "grad_norm": 0.21110209822654724, |
| "learning_rate": 4.9044053255008935e-06, |
| "loss": 0.0309, |
| "step": 15570 |
| }, |
| { |
| "epoch": 16.04531410916581, |
| "grad_norm": 0.257237046957016, |
| "learning_rate": 4.864805520344051e-06, |
| "loss": 0.0274, |
| "step": 15580 |
| }, |
| { |
| "epoch": 16.055612770339856, |
| "grad_norm": 0.3104022741317749, |
| "learning_rate": 4.8253580582632906e-06, |
| "loss": 0.0294, |
| "step": 15590 |
| }, |
| { |
| "epoch": 16.065911431513904, |
| "grad_norm": 0.1543678343296051, |
| "learning_rate": 4.786063072404112e-06, |
| "loss": 0.0247, |
| "step": 15600 |
| }, |
| { |
| "epoch": 16.07621009268795, |
| "grad_norm": 0.18241259455680847, |
| "learning_rate": 4.7469206953973495e-06, |
| "loss": 0.0245, |
| "step": 15610 |
| }, |
| { |
| "epoch": 16.086508753862, |
| "grad_norm": 0.18561235070228577, |
| "learning_rate": 4.707931059358783e-06, |
| "loss": 0.0282, |
| "step": 15620 |
| }, |
| { |
| "epoch": 16.096807415036047, |
| "grad_norm": 0.36796221137046814, |
| "learning_rate": 4.669094295888588e-06, |
| "loss": 0.0323, |
| "step": 15630 |
| }, |
| { |
| "epoch": 16.107106076210094, |
| "grad_norm": 0.21030554175376892, |
| "learning_rate": 4.630410536071006e-06, |
| "loss": 0.0271, |
| "step": 15640 |
| }, |
| { |
| "epoch": 16.117404737384142, |
| "grad_norm": 0.23774808645248413, |
| "learning_rate": 4.59187991047384e-06, |
| "loss": 0.0319, |
| "step": 15650 |
| }, |
| { |
| "epoch": 16.127703398558186, |
| "grad_norm": 0.16403083503246307, |
| "learning_rate": 4.553502549148009e-06, |
| "loss": 0.0339, |
| "step": 15660 |
| }, |
| { |
| "epoch": 16.138002059732234, |
| "grad_norm": 0.23186904191970825, |
| "learning_rate": 4.515278581627141e-06, |
| "loss": 0.0301, |
| "step": 15670 |
| }, |
| { |
| "epoch": 16.14830072090628, |
| "grad_norm": 0.24327369034290314, |
| "learning_rate": 4.477208136927119e-06, |
| "loss": 0.0308, |
| "step": 15680 |
| }, |
| { |
| "epoch": 16.15859938208033, |
| "grad_norm": 0.2953716814517975, |
| "learning_rate": 4.439291343545643e-06, |
| "loss": 0.0281, |
| "step": 15690 |
| }, |
| { |
| "epoch": 16.168898043254377, |
| "grad_norm": 0.24078382551670074, |
| "learning_rate": 4.401528329461779e-06, |
| "loss": 0.0304, |
| "step": 15700 |
| }, |
| { |
| "epoch": 16.179196704428424, |
| "grad_norm": 0.3598305583000183, |
| "learning_rate": 4.363919222135604e-06, |
| "loss": 0.0279, |
| "step": 15710 |
| }, |
| { |
| "epoch": 16.189495365602472, |
| "grad_norm": 0.18711034953594208, |
| "learning_rate": 4.326464148507647e-06, |
| "loss": 0.0289, |
| "step": 15720 |
| }, |
| { |
| "epoch": 16.19979402677652, |
| "grad_norm": 0.3203088045120239, |
| "learning_rate": 4.289163234998589e-06, |
| "loss": 0.0334, |
| "step": 15730 |
| }, |
| { |
| "epoch": 16.210092687950567, |
| "grad_norm": 0.2985017001628876, |
| "learning_rate": 4.2520166075087635e-06, |
| "loss": 0.0246, |
| "step": 15740 |
| }, |
| { |
| "epoch": 16.220391349124615, |
| "grad_norm": 0.25471287965774536, |
| "learning_rate": 4.2150243914177325e-06, |
| "loss": 0.029, |
| "step": 15750 |
| }, |
| { |
| "epoch": 16.230690010298662, |
| "grad_norm": 0.22707876563072205, |
| "learning_rate": 4.178186711583904e-06, |
| "loss": 0.0258, |
| "step": 15760 |
| }, |
| { |
| "epoch": 16.24098867147271, |
| "grad_norm": 0.2530466914176941, |
| "learning_rate": 4.141503692344062e-06, |
| "loss": 0.0324, |
| "step": 15770 |
| }, |
| { |
| "epoch": 16.251287332646754, |
| "grad_norm": 0.23593966662883759, |
| "learning_rate": 4.1049754575129935e-06, |
| "loss": 0.0299, |
| "step": 15780 |
| }, |
| { |
| "epoch": 16.261585993820802, |
| "grad_norm": 0.26746660470962524, |
| "learning_rate": 4.068602130383031e-06, |
| "loss": 0.025, |
| "step": 15790 |
| }, |
| { |
| "epoch": 16.27188465499485, |
| "grad_norm": 0.3687654733657837, |
| "learning_rate": 4.032383833723657e-06, |
| "loss": 0.0344, |
| "step": 15800 |
| }, |
| { |
| "epoch": 16.282183316168897, |
| "grad_norm": 0.26962026953697205, |
| "learning_rate": 3.99632068978108e-06, |
| "loss": 0.0315, |
| "step": 15810 |
| }, |
| { |
| "epoch": 16.292481977342945, |
| "grad_norm": 0.3096659779548645, |
| "learning_rate": 3.960412820277865e-06, |
| "loss": 0.0241, |
| "step": 15820 |
| }, |
| { |
| "epoch": 16.302780638516992, |
| "grad_norm": 0.3644077777862549, |
| "learning_rate": 3.924660346412418e-06, |
| "loss": 0.0348, |
| "step": 15830 |
| }, |
| { |
| "epoch": 16.31307929969104, |
| "grad_norm": 0.2755933701992035, |
| "learning_rate": 3.8890633888587046e-06, |
| "loss": 0.0309, |
| "step": 15840 |
| }, |
| { |
| "epoch": 16.323377960865088, |
| "grad_norm": 0.5915675163269043, |
| "learning_rate": 3.8536220677657495e-06, |
| "loss": 0.0314, |
| "step": 15850 |
| }, |
| { |
| "epoch": 16.333676622039135, |
| "grad_norm": 0.2403060346841812, |
| "learning_rate": 3.8183365027572805e-06, |
| "loss": 0.0304, |
| "step": 15860 |
| }, |
| { |
| "epoch": 16.343975283213183, |
| "grad_norm": 0.24288389086723328, |
| "learning_rate": 3.783206812931289e-06, |
| "loss": 0.0291, |
| "step": 15870 |
| }, |
| { |
| "epoch": 16.35427394438723, |
| "grad_norm": 0.3532700836658478, |
| "learning_rate": 3.7482331168596675e-06, |
| "loss": 0.0289, |
| "step": 15880 |
| }, |
| { |
| "epoch": 16.36457260556128, |
| "grad_norm": 0.18153394758701324, |
| "learning_rate": 3.7134155325877772e-06, |
| "loss": 0.0329, |
| "step": 15890 |
| }, |
| { |
| "epoch": 16.374871266735326, |
| "grad_norm": 0.4066762924194336, |
| "learning_rate": 3.678754177634053e-06, |
| "loss": 0.0293, |
| "step": 15900 |
| }, |
| { |
| "epoch": 16.38516992790937, |
| "grad_norm": 0.33672627806663513, |
| "learning_rate": 3.64424916898965e-06, |
| "loss": 0.0303, |
| "step": 15910 |
| }, |
| { |
| "epoch": 16.395468589083418, |
| "grad_norm": 0.273366242647171, |
| "learning_rate": 3.6099006231179622e-06, |
| "loss": 0.0307, |
| "step": 15920 |
| }, |
| { |
| "epoch": 16.405767250257465, |
| "grad_norm": 0.22325216233730316, |
| "learning_rate": 3.575708655954324e-06, |
| "loss": 0.0327, |
| "step": 15930 |
| }, |
| { |
| "epoch": 16.416065911431513, |
| "grad_norm": 0.18643653392791748, |
| "learning_rate": 3.541673382905558e-06, |
| "loss": 0.0346, |
| "step": 15940 |
| }, |
| { |
| "epoch": 16.42636457260556, |
| "grad_norm": 0.2503977119922638, |
| "learning_rate": 3.5077949188495996e-06, |
| "loss": 0.033, |
| "step": 15950 |
| }, |
| { |
| "epoch": 16.43666323377961, |
| "grad_norm": 0.29063940048217773, |
| "learning_rate": 3.474073378135123e-06, |
| "loss": 0.0286, |
| "step": 15960 |
| }, |
| { |
| "epoch": 16.446961894953656, |
| "grad_norm": 0.2275126725435257, |
| "learning_rate": 3.440508874581139e-06, |
| "loss": 0.0321, |
| "step": 15970 |
| }, |
| { |
| "epoch": 16.457260556127704, |
| "grad_norm": 0.24945175647735596, |
| "learning_rate": 3.4071015214766134e-06, |
| "loss": 0.0312, |
| "step": 15980 |
| }, |
| { |
| "epoch": 16.46755921730175, |
| "grad_norm": 0.4091668725013733, |
| "learning_rate": 3.3738514315800995e-06, |
| "loss": 0.0351, |
| "step": 15990 |
| }, |
| { |
| "epoch": 16.4778578784758, |
| "grad_norm": 0.20869703590869904, |
| "learning_rate": 3.3407587171193354e-06, |
| "loss": 0.0262, |
| "step": 16000 |
| }, |
| { |
| "epoch": 16.488156539649847, |
| "grad_norm": 0.19803866744041443, |
| "learning_rate": 3.3078234897908788e-06, |
| "loss": 0.0293, |
| "step": 16010 |
| }, |
| { |
| "epoch": 16.498455200823894, |
| "grad_norm": 0.24785685539245605, |
| "learning_rate": 3.2750458607597457e-06, |
| "loss": 0.0295, |
| "step": 16020 |
| }, |
| { |
| "epoch": 16.508753861997942, |
| "grad_norm": 0.23679105937480927, |
| "learning_rate": 3.2424259406589664e-06, |
| "loss": 0.0269, |
| "step": 16030 |
| }, |
| { |
| "epoch": 16.519052523171986, |
| "grad_norm": 0.21375852823257446, |
| "learning_rate": 3.209963839589325e-06, |
| "loss": 0.0236, |
| "step": 16040 |
| }, |
| { |
| "epoch": 16.529351184346034, |
| "grad_norm": 0.1723773181438446, |
| "learning_rate": 3.177659667118882e-06, |
| "loss": 0.0312, |
| "step": 16050 |
| }, |
| { |
| "epoch": 16.53964984552008, |
| "grad_norm": 0.24385997653007507, |
| "learning_rate": 3.1455135322826678e-06, |
| "loss": 0.0301, |
| "step": 16060 |
| }, |
| { |
| "epoch": 16.54994850669413, |
| "grad_norm": 0.2073340266942978, |
| "learning_rate": 3.1135255435822796e-06, |
| "loss": 0.0286, |
| "step": 16070 |
| }, |
| { |
| "epoch": 16.560247167868177, |
| "grad_norm": 0.2794674336910248, |
| "learning_rate": 3.0816958089855462e-06, |
| "loss": 0.0265, |
| "step": 16080 |
| }, |
| { |
| "epoch": 16.570545829042224, |
| "grad_norm": 0.2308894544839859, |
| "learning_rate": 3.0500244359261355e-06, |
| "loss": 0.0284, |
| "step": 16090 |
| }, |
| { |
| "epoch": 16.580844490216272, |
| "grad_norm": 0.2674751579761505, |
| "learning_rate": 3.018511531303203e-06, |
| "loss": 0.0282, |
| "step": 16100 |
| }, |
| { |
| "epoch": 16.59114315139032, |
| "grad_norm": 0.20278188586235046, |
| "learning_rate": 2.9871572014810555e-06, |
| "loss": 0.0272, |
| "step": 16110 |
| }, |
| { |
| "epoch": 16.601441812564367, |
| "grad_norm": 0.20840872824192047, |
| "learning_rate": 2.9559615522887273e-06, |
| "loss": 0.0358, |
| "step": 16120 |
| }, |
| { |
| "epoch": 16.611740473738415, |
| "grad_norm": 0.26591232419013977, |
| "learning_rate": 2.924924689019698e-06, |
| "loss": 0.0262, |
| "step": 16130 |
| }, |
| { |
| "epoch": 16.622039134912463, |
| "grad_norm": 0.22082144021987915, |
| "learning_rate": 2.8940467164314924e-06, |
| "loss": 0.0321, |
| "step": 16140 |
| }, |
| { |
| "epoch": 16.63233779608651, |
| "grad_norm": 0.2413538098335266, |
| "learning_rate": 2.8633277387453308e-06, |
| "loss": 0.0377, |
| "step": 16150 |
| }, |
| { |
| "epoch": 16.642636457260558, |
| "grad_norm": 0.2731287479400635, |
| "learning_rate": 2.8327678596457963e-06, |
| "loss": 0.031, |
| "step": 16160 |
| }, |
| { |
| "epoch": 16.652935118434602, |
| "grad_norm": 0.18613195419311523, |
| "learning_rate": 2.802367182280463e-06, |
| "loss": 0.0367, |
| "step": 16170 |
| }, |
| { |
| "epoch": 16.66323377960865, |
| "grad_norm": 0.19616888463497162, |
| "learning_rate": 2.7721258092595627e-06, |
| "loss": 0.0265, |
| "step": 16180 |
| }, |
| { |
| "epoch": 16.673532440782697, |
| "grad_norm": 0.20527370274066925, |
| "learning_rate": 2.7420438426556338e-06, |
| "loss": 0.0331, |
| "step": 16190 |
| }, |
| { |
| "epoch": 16.683831101956745, |
| "grad_norm": 0.21385008096694946, |
| "learning_rate": 2.712121384003169e-06, |
| "loss": 0.0271, |
| "step": 16200 |
| }, |
| { |
| "epoch": 16.694129763130793, |
| "grad_norm": 0.2785768210887909, |
| "learning_rate": 2.682358534298285e-06, |
| "loss": 0.0365, |
| "step": 16210 |
| }, |
| { |
| "epoch": 16.70442842430484, |
| "grad_norm": 0.2710186243057251, |
| "learning_rate": 2.652755393998396e-06, |
| "loss": 0.0245, |
| "step": 16220 |
| }, |
| { |
| "epoch": 16.714727085478888, |
| "grad_norm": 0.2453254610300064, |
| "learning_rate": 2.6233120630218045e-06, |
| "loss": 0.0327, |
| "step": 16230 |
| }, |
| { |
| "epoch": 16.725025746652936, |
| "grad_norm": 0.2788352072238922, |
| "learning_rate": 2.594028640747476e-06, |
| "loss": 0.0292, |
| "step": 16240 |
| }, |
| { |
| "epoch": 16.735324407826983, |
| "grad_norm": 0.4019950032234192, |
| "learning_rate": 2.564905226014597e-06, |
| "loss": 0.029, |
| "step": 16250 |
| }, |
| { |
| "epoch": 16.74562306900103, |
| "grad_norm": 0.2551436424255371, |
| "learning_rate": 2.5359419171223086e-06, |
| "loss": 0.0296, |
| "step": 16260 |
| }, |
| { |
| "epoch": 16.75592173017508, |
| "grad_norm": 0.2889397442340851, |
| "learning_rate": 2.507138811829346e-06, |
| "loss": 0.033, |
| "step": 16270 |
| }, |
| { |
| "epoch": 16.766220391349126, |
| "grad_norm": 0.25674816966056824, |
| "learning_rate": 2.4784960073537143e-06, |
| "loss": 0.0267, |
| "step": 16280 |
| }, |
| { |
| "epoch": 16.77651905252317, |
| "grad_norm": 0.21177352964878082, |
| "learning_rate": 2.4500136003723638e-06, |
| "loss": 0.0262, |
| "step": 16290 |
| }, |
| { |
| "epoch": 16.786817713697218, |
| "grad_norm": 0.21103815734386444, |
| "learning_rate": 2.421691687020855e-06, |
| "loss": 0.0295, |
| "step": 16300 |
| }, |
| { |
| "epoch": 16.797116374871266, |
| "grad_norm": 0.26780322194099426, |
| "learning_rate": 2.3935303628930707e-06, |
| "loss": 0.0327, |
| "step": 16310 |
| }, |
| { |
| "epoch": 16.807415036045313, |
| "grad_norm": 0.49311545491218567, |
| "learning_rate": 2.3655297230408045e-06, |
| "loss": 0.03, |
| "step": 16320 |
| }, |
| { |
| "epoch": 16.81771369721936, |
| "grad_norm": 0.2364225834608078, |
| "learning_rate": 2.3376898619735577e-06, |
| "loss": 0.0276, |
| "step": 16330 |
| }, |
| { |
| "epoch": 16.82801235839341, |
| "grad_norm": 0.29716435074806213, |
| "learning_rate": 2.3100108736581305e-06, |
| "loss": 0.027, |
| "step": 16340 |
| }, |
| { |
| "epoch": 16.838311019567456, |
| "grad_norm": 0.20759916305541992, |
| "learning_rate": 2.282492851518342e-06, |
| "loss": 0.0275, |
| "step": 16350 |
| }, |
| { |
| "epoch": 16.848609680741504, |
| "grad_norm": 0.1657613217830658, |
| "learning_rate": 2.2551358884347007e-06, |
| "loss": 0.0273, |
| "step": 16360 |
| }, |
| { |
| "epoch": 16.85890834191555, |
| "grad_norm": 0.16528256237506866, |
| "learning_rate": 2.227940076744117e-06, |
| "loss": 0.0309, |
| "step": 16370 |
| }, |
| { |
| "epoch": 16.8692070030896, |
| "grad_norm": 0.28386402130126953, |
| "learning_rate": 2.2009055082395537e-06, |
| "loss": 0.0324, |
| "step": 16380 |
| }, |
| { |
| "epoch": 16.879505664263647, |
| "grad_norm": 0.23188601434230804, |
| "learning_rate": 2.174032274169746e-06, |
| "loss": 0.0283, |
| "step": 16390 |
| }, |
| { |
| "epoch": 16.889804325437694, |
| "grad_norm": 0.34195181727409363, |
| "learning_rate": 2.1473204652388834e-06, |
| "loss": 0.031, |
| "step": 16400 |
| }, |
| { |
| "epoch": 16.900102986611742, |
| "grad_norm": 0.19225898385047913, |
| "learning_rate": 2.1207701716062956e-06, |
| "loss": 0.0374, |
| "step": 16410 |
| }, |
| { |
| "epoch": 16.910401647785786, |
| "grad_norm": 0.4472239911556244, |
| "learning_rate": 2.0943814828861762e-06, |
| "loss": 0.0304, |
| "step": 16420 |
| }, |
| { |
| "epoch": 16.920700308959834, |
| "grad_norm": 0.26532843708992004, |
| "learning_rate": 2.0681544881472283e-06, |
| "loss": 0.0291, |
| "step": 16430 |
| }, |
| { |
| "epoch": 16.93099897013388, |
| "grad_norm": 0.27116134762763977, |
| "learning_rate": 2.0420892759124176e-06, |
| "loss": 0.0224, |
| "step": 16440 |
| }, |
| { |
| "epoch": 16.94129763130793, |
| "grad_norm": 0.3424379825592041, |
| "learning_rate": 2.0161859341586597e-06, |
| "loss": 0.0274, |
| "step": 16450 |
| }, |
| { |
| "epoch": 16.951596292481977, |
| "grad_norm": 0.23772460222244263, |
| "learning_rate": 1.9904445503164838e-06, |
| "loss": 0.0308, |
| "step": 16460 |
| }, |
| { |
| "epoch": 16.961894953656024, |
| "grad_norm": 0.23013190925121307, |
| "learning_rate": 1.964865211269801e-06, |
| "loss": 0.0265, |
| "step": 16470 |
| }, |
| { |
| "epoch": 16.972193614830072, |
| "grad_norm": 0.2528025805950165, |
| "learning_rate": 1.939448003355554e-06, |
| "loss": 0.0342, |
| "step": 16480 |
| }, |
| { |
| "epoch": 16.98249227600412, |
| "grad_norm": 0.39106324315071106, |
| "learning_rate": 1.914193012363469e-06, |
| "loss": 0.0326, |
| "step": 16490 |
| }, |
| { |
| "epoch": 16.992790937178167, |
| "grad_norm": 0.4082978069782257, |
| "learning_rate": 1.8891003235357308e-06, |
| "loss": 0.0321, |
| "step": 16500 |
| }, |
| { |
| "epoch": 17.003089598352215, |
| "grad_norm": 0.1785215586423874, |
| "learning_rate": 1.8641700215667413e-06, |
| "loss": 0.0265, |
| "step": 16510 |
| }, |
| { |
| "epoch": 17.013388259526263, |
| "grad_norm": 0.5540566444396973, |
| "learning_rate": 1.839402190602757e-06, |
| "loss": 0.0281, |
| "step": 16520 |
| }, |
| { |
| "epoch": 17.02368692070031, |
| "grad_norm": 0.2588430941104889, |
| "learning_rate": 1.8147969142417066e-06, |
| "loss": 0.0284, |
| "step": 16530 |
| }, |
| { |
| "epoch": 17.033985581874358, |
| "grad_norm": 0.3563145399093628, |
| "learning_rate": 1.7903542755328073e-06, |
| "loss": 0.0308, |
| "step": 16540 |
| }, |
| { |
| "epoch": 17.044284243048402, |
| "grad_norm": 0.303353488445282, |
| "learning_rate": 1.766074356976366e-06, |
| "loss": 0.0302, |
| "step": 16550 |
| }, |
| { |
| "epoch": 17.05458290422245, |
| "grad_norm": 0.24329645931720734, |
| "learning_rate": 1.7419572405234453e-06, |
| "loss": 0.0282, |
| "step": 16560 |
| }, |
| { |
| "epoch": 17.064881565396497, |
| "grad_norm": 0.212374746799469, |
| "learning_rate": 1.7180030075756136e-06, |
| "loss": 0.0298, |
| "step": 16570 |
| }, |
| { |
| "epoch": 17.075180226570545, |
| "grad_norm": 0.22339214384555817, |
| "learning_rate": 1.6942117389846746e-06, |
| "loss": 0.0314, |
| "step": 16580 |
| }, |
| { |
| "epoch": 17.085478887744593, |
| "grad_norm": 0.2897525131702423, |
| "learning_rate": 1.6705835150523707e-06, |
| "loss": 0.0331, |
| "step": 16590 |
| }, |
| { |
| "epoch": 17.09577754891864, |
| "grad_norm": 0.20139732956886292, |
| "learning_rate": 1.6471184155301355e-06, |
| "loss": 0.0271, |
| "step": 16600 |
| }, |
| { |
| "epoch": 17.106076210092688, |
| "grad_norm": 0.30817776918411255, |
| "learning_rate": 1.6238165196188039e-06, |
| "loss": 0.0288, |
| "step": 16610 |
| }, |
| { |
| "epoch": 17.116374871266736, |
| "grad_norm": 0.23742049932479858, |
| "learning_rate": 1.6006779059683784e-06, |
| "loss": 0.0317, |
| "step": 16620 |
| }, |
| { |
| "epoch": 17.126673532440783, |
| "grad_norm": 0.2712803781032562, |
| "learning_rate": 1.5777026526777094e-06, |
| "loss": 0.029, |
| "step": 16630 |
| }, |
| { |
| "epoch": 17.13697219361483, |
| "grad_norm": 0.19828765094280243, |
| "learning_rate": 1.5548908372942983e-06, |
| "loss": 0.0315, |
| "step": 16640 |
| }, |
| { |
| "epoch": 17.14727085478888, |
| "grad_norm": 0.27912184596061707, |
| "learning_rate": 1.5322425368139714e-06, |
| "loss": 0.0293, |
| "step": 16650 |
| }, |
| { |
| "epoch": 17.157569515962926, |
| "grad_norm": 0.41649627685546875, |
| "learning_rate": 1.5097578276806633e-06, |
| "loss": 0.0299, |
| "step": 16660 |
| }, |
| { |
| "epoch": 17.167868177136974, |
| "grad_norm": 0.20297054946422577, |
| "learning_rate": 1.487436785786145e-06, |
| "loss": 0.0313, |
| "step": 16670 |
| }, |
| { |
| "epoch": 17.178166838311018, |
| "grad_norm": 0.38883742690086365, |
| "learning_rate": 1.4652794864697671e-06, |
| "loss": 0.0293, |
| "step": 16680 |
| }, |
| { |
| "epoch": 17.188465499485066, |
| "grad_norm": 0.2401762455701828, |
| "learning_rate": 1.4432860045182017e-06, |
| "loss": 0.0282, |
| "step": 16690 |
| }, |
| { |
| "epoch": 17.198764160659113, |
| "grad_norm": 0.3450429141521454, |
| "learning_rate": 1.4214564141651898e-06, |
| "loss": 0.0249, |
| "step": 16700 |
| }, |
| { |
| "epoch": 17.20906282183316, |
| "grad_norm": 0.17480014264583588, |
| "learning_rate": 1.3997907890913265e-06, |
| "loss": 0.0271, |
| "step": 16710 |
| }, |
| { |
| "epoch": 17.21936148300721, |
| "grad_norm": 0.2633569538593292, |
| "learning_rate": 1.3782892024237327e-06, |
| "loss": 0.0282, |
| "step": 16720 |
| }, |
| { |
| "epoch": 17.229660144181256, |
| "grad_norm": 0.22684310376644135, |
| "learning_rate": 1.3569517267359e-06, |
| "loss": 0.0325, |
| "step": 16730 |
| }, |
| { |
| "epoch": 17.239958805355304, |
| "grad_norm": 0.30432412028312683, |
| "learning_rate": 1.33577843404738e-06, |
| "loss": 0.027, |
| "step": 16740 |
| }, |
| { |
| "epoch": 17.25025746652935, |
| "grad_norm": 0.3308713734149933, |
| "learning_rate": 1.3147693958235618e-06, |
| "loss": 0.0296, |
| "step": 16750 |
| }, |
| { |
| "epoch": 17.2605561277034, |
| "grad_norm": 0.2591300904750824, |
| "learning_rate": 1.2939246829754503e-06, |
| "loss": 0.0191, |
| "step": 16760 |
| }, |
| { |
| "epoch": 17.270854788877447, |
| "grad_norm": 0.3229091763496399, |
| "learning_rate": 1.2732443658593884e-06, |
| "loss": 0.0278, |
| "step": 16770 |
| }, |
| { |
| "epoch": 17.281153450051495, |
| "grad_norm": 0.3232883810997009, |
| "learning_rate": 1.2527285142768574e-06, |
| "loss": 0.0308, |
| "step": 16780 |
| }, |
| { |
| "epoch": 17.291452111225542, |
| "grad_norm": 0.16374994814395905, |
| "learning_rate": 1.2323771974742104e-06, |
| "loss": 0.0285, |
| "step": 16790 |
| }, |
| { |
| "epoch": 17.301750772399586, |
| "grad_norm": 0.4016587734222412, |
| "learning_rate": 1.212190484142467e-06, |
| "loss": 0.0287, |
| "step": 16800 |
| }, |
| { |
| "epoch": 17.312049433573634, |
| "grad_norm": 0.7468344569206238, |
| "learning_rate": 1.192168442417052e-06, |
| "loss": 0.0318, |
| "step": 16810 |
| }, |
| { |
| "epoch": 17.32234809474768, |
| "grad_norm": 0.62845778465271, |
| "learning_rate": 1.1723111398776077e-06, |
| "loss": 0.0307, |
| "step": 16820 |
| }, |
| { |
| "epoch": 17.33264675592173, |
| "grad_norm": 0.29316961765289307, |
| "learning_rate": 1.1526186435476927e-06, |
| "loss": 0.0322, |
| "step": 16830 |
| }, |
| { |
| "epoch": 17.342945417095777, |
| "grad_norm": 0.2891688942909241, |
| "learning_rate": 1.1330910198946442e-06, |
| "loss": 0.0274, |
| "step": 16840 |
| }, |
| { |
| "epoch": 17.353244078269825, |
| "grad_norm": 0.28778383135795593, |
| "learning_rate": 1.1137283348292892e-06, |
| "loss": 0.0341, |
| "step": 16850 |
| }, |
| { |
| "epoch": 17.363542739443872, |
| "grad_norm": 0.17100463807582855, |
| "learning_rate": 1.0945306537057555e-06, |
| "loss": 0.0334, |
| "step": 16860 |
| }, |
| { |
| "epoch": 17.37384140061792, |
| "grad_norm": 0.17976661026477814, |
| "learning_rate": 1.0754980413212268e-06, |
| "loss": 0.0299, |
| "step": 16870 |
| }, |
| { |
| "epoch": 17.384140061791967, |
| "grad_norm": 0.2614526152610779, |
| "learning_rate": 1.0566305619157502e-06, |
| "loss": 0.0278, |
| "step": 16880 |
| }, |
| { |
| "epoch": 17.394438722966015, |
| "grad_norm": 0.195588618516922, |
| "learning_rate": 1.0379282791719958e-06, |
| "loss": 0.028, |
| "step": 16890 |
| }, |
| { |
| "epoch": 17.404737384140063, |
| "grad_norm": 1.0282113552093506, |
| "learning_rate": 1.0193912562150464e-06, |
| "loss": 0.0291, |
| "step": 16900 |
| }, |
| { |
| "epoch": 17.41503604531411, |
| "grad_norm": 0.2868080735206604, |
| "learning_rate": 1.0010195556122203e-06, |
| "loss": 0.0329, |
| "step": 16910 |
| }, |
| { |
| "epoch": 17.425334706488158, |
| "grad_norm": 0.2227233201265335, |
| "learning_rate": 9.828132393727875e-07, |
| "loss": 0.0262, |
| "step": 16920 |
| }, |
| { |
| "epoch": 17.435633367662202, |
| "grad_norm": 0.20315021276474, |
| "learning_rate": 9.647723689478305e-07, |
| "loss": 0.0324, |
| "step": 16930 |
| }, |
| { |
| "epoch": 17.44593202883625, |
| "grad_norm": 0.6371609568595886, |
| "learning_rate": 9.468970052300019e-07, |
| "loss": 0.0318, |
| "step": 16940 |
| }, |
| { |
| "epoch": 17.456230690010297, |
| "grad_norm": 0.18564990162849426, |
| "learning_rate": 9.291872085533227e-07, |
| "loss": 0.0289, |
| "step": 16950 |
| }, |
| { |
| "epoch": 17.466529351184345, |
| "grad_norm": 0.22705796360969543, |
| "learning_rate": 9.116430386929886e-07, |
| "loss": 0.0249, |
| "step": 16960 |
| }, |
| { |
| "epoch": 17.476828012358393, |
| "grad_norm": 0.2133428156375885, |
| "learning_rate": 8.942645548651541e-07, |
| "loss": 0.0376, |
| "step": 16970 |
| }, |
| { |
| "epoch": 17.48712667353244, |
| "grad_norm": 0.19329524040222168, |
| "learning_rate": 8.770518157267482e-07, |
| "loss": 0.0308, |
| "step": 16980 |
| }, |
| { |
| "epoch": 17.497425334706488, |
| "grad_norm": 0.2410387098789215, |
| "learning_rate": 8.60004879375259e-07, |
| "loss": 0.0273, |
| "step": 16990 |
| }, |
| { |
| "epoch": 17.507723995880536, |
| "grad_norm": 0.20141083002090454, |
| "learning_rate": 8.4312380334855e-07, |
| "loss": 0.0336, |
| "step": 17000 |
| }, |
| { |
| "epoch": 17.518022657054583, |
| "grad_norm": 0.27098795771598816, |
| "learning_rate": 8.264086446246655e-07, |
| "loss": 0.0313, |
| "step": 17010 |
| }, |
| { |
| "epoch": 17.52832131822863, |
| "grad_norm": 0.35340428352355957, |
| "learning_rate": 8.098594596216424e-07, |
| "loss": 0.0348, |
| "step": 17020 |
| }, |
| { |
| "epoch": 17.53861997940268, |
| "grad_norm": 0.3264867663383484, |
| "learning_rate": 7.934763041972937e-07, |
| "loss": 0.0302, |
| "step": 17030 |
| }, |
| { |
| "epoch": 17.548918640576726, |
| "grad_norm": 0.2895232141017914, |
| "learning_rate": 7.772592336490525e-07, |
| "loss": 0.0325, |
| "step": 17040 |
| }, |
| { |
| "epoch": 17.559217301750774, |
| "grad_norm": 0.24770499765872955, |
| "learning_rate": 7.612083027137728e-07, |
| "loss": 0.0319, |
| "step": 17050 |
| }, |
| { |
| "epoch": 17.569515962924818, |
| "grad_norm": 0.4487510323524475, |
| "learning_rate": 7.453235655675406e-07, |
| "loss": 0.0258, |
| "step": 17060 |
| }, |
| { |
| "epoch": 17.579814624098866, |
| "grad_norm": 0.38243043422698975, |
| "learning_rate": 7.296050758254957e-07, |
| "loss": 0.0308, |
| "step": 17070 |
| }, |
| { |
| "epoch": 17.590113285272913, |
| "grad_norm": 0.5216277837753296, |
| "learning_rate": 7.140528865416441e-07, |
| "loss": 0.0268, |
| "step": 17080 |
| }, |
| { |
| "epoch": 17.60041194644696, |
| "grad_norm": 0.300006240606308, |
| "learning_rate": 6.986670502086901e-07, |
| "loss": 0.0324, |
| "step": 17090 |
| }, |
| { |
| "epoch": 17.61071060762101, |
| "grad_norm": 0.22057189047336578, |
| "learning_rate": 6.834476187578543e-07, |
| "loss": 0.0282, |
| "step": 17100 |
| }, |
| { |
| "epoch": 17.621009268795056, |
| "grad_norm": 0.26959654688835144, |
| "learning_rate": 6.683946435586952e-07, |
| "loss": 0.0307, |
| "step": 17110 |
| }, |
| { |
| "epoch": 17.631307929969104, |
| "grad_norm": 0.28995075821876526, |
| "learning_rate": 6.535081754189321e-07, |
| "loss": 0.0318, |
| "step": 17120 |
| }, |
| { |
| "epoch": 17.64160659114315, |
| "grad_norm": 0.3135945200920105, |
| "learning_rate": 6.387882645842947e-07, |
| "loss": 0.0287, |
| "step": 17130 |
| }, |
| { |
| "epoch": 17.6519052523172, |
| "grad_norm": 0.26953238248825073, |
| "learning_rate": 6.24234960738318e-07, |
| "loss": 0.0292, |
| "step": 17140 |
| }, |
| { |
| "epoch": 17.662203913491247, |
| "grad_norm": 0.2764807343482971, |
| "learning_rate": 6.098483130022148e-07, |
| "loss": 0.027, |
| "step": 17150 |
| }, |
| { |
| "epoch": 17.672502574665295, |
| "grad_norm": 0.3281687796115875, |
| "learning_rate": 5.956283699346754e-07, |
| "loss": 0.0254, |
| "step": 17160 |
| }, |
| { |
| "epoch": 17.682801235839342, |
| "grad_norm": 0.17730310559272766, |
| "learning_rate": 5.815751795317237e-07, |
| "loss": 0.0277, |
| "step": 17170 |
| }, |
| { |
| "epoch": 17.69309989701339, |
| "grad_norm": 0.43514519929885864, |
| "learning_rate": 5.676887892265559e-07, |
| "loss": 0.0238, |
| "step": 17180 |
| }, |
| { |
| "epoch": 17.703398558187434, |
| "grad_norm": 0.31942808628082275, |
| "learning_rate": 5.539692458893575e-07, |
| "loss": 0.027, |
| "step": 17190 |
| }, |
| { |
| "epoch": 17.71369721936148, |
| "grad_norm": 1.2527509927749634, |
| "learning_rate": 5.404165958271811e-07, |
| "loss": 0.029, |
| "step": 17200 |
| }, |
| { |
| "epoch": 17.72399588053553, |
| "grad_norm": 0.2568182051181793, |
| "learning_rate": 5.270308847837579e-07, |
| "loss": 0.0316, |
| "step": 17210 |
| }, |
| { |
| "epoch": 17.734294541709577, |
| "grad_norm": 0.32886284589767456, |
| "learning_rate": 5.13812157939364e-07, |
| "loss": 0.0341, |
| "step": 17220 |
| }, |
| { |
| "epoch": 17.744593202883625, |
| "grad_norm": 0.1350669264793396, |
| "learning_rate": 5.007604599106486e-07, |
| "loss": 0.0279, |
| "step": 17230 |
| }, |
| { |
| "epoch": 17.754891864057672, |
| "grad_norm": 0.24451610445976257, |
| "learning_rate": 4.878758347505175e-07, |
| "loss": 0.0261, |
| "step": 17240 |
| }, |
| { |
| "epoch": 17.76519052523172, |
| "grad_norm": 0.23091380298137665, |
| "learning_rate": 4.751583259479331e-07, |
| "loss": 0.031, |
| "step": 17250 |
| }, |
| { |
| "epoch": 17.775489186405768, |
| "grad_norm": 0.311443030834198, |
| "learning_rate": 4.6260797642782014e-07, |
| "loss": 0.032, |
| "step": 17260 |
| }, |
| { |
| "epoch": 17.785787847579815, |
| "grad_norm": 0.2045062929391861, |
| "learning_rate": 4.5022482855088255e-07, |
| "loss": 0.0256, |
| "step": 17270 |
| }, |
| { |
| "epoch": 17.796086508753863, |
| "grad_norm": 0.339093953371048, |
| "learning_rate": 4.380089241134866e-07, |
| "loss": 0.0306, |
| "step": 17280 |
| }, |
| { |
| "epoch": 17.80638516992791, |
| "grad_norm": 0.3019813597202301, |
| "learning_rate": 4.259603043475002e-07, |
| "loss": 0.0302, |
| "step": 17290 |
| }, |
| { |
| "epoch": 17.816683831101958, |
| "grad_norm": 0.21195490658283234, |
| "learning_rate": 4.1407900992015414e-07, |
| "loss": 0.0318, |
| "step": 17300 |
| }, |
| { |
| "epoch": 17.826982492276002, |
| "grad_norm": 0.2570505142211914, |
| "learning_rate": 4.023650809339363e-07, |
| "loss": 0.0387, |
| "step": 17310 |
| }, |
| { |
| "epoch": 17.83728115345005, |
| "grad_norm": 0.36077165603637695, |
| "learning_rate": 3.9081855692640333e-07, |
| "loss": 0.0281, |
| "step": 17320 |
| }, |
| { |
| "epoch": 17.847579814624098, |
| "grad_norm": 0.24089422821998596, |
| "learning_rate": 3.7943947687010816e-07, |
| "loss": 0.0265, |
| "step": 17330 |
| }, |
| { |
| "epoch": 17.857878475798145, |
| "grad_norm": 0.3065880835056305, |
| "learning_rate": 3.6822787917240587e-07, |
| "loss": 0.0265, |
| "step": 17340 |
| }, |
| { |
| "epoch": 17.868177136972193, |
| "grad_norm": 0.20888155698776245, |
| "learning_rate": 3.571838016753759e-07, |
| "loss": 0.0345, |
| "step": 17350 |
| }, |
| { |
| "epoch": 17.87847579814624, |
| "grad_norm": 0.42461952567100525, |
| "learning_rate": 3.4630728165566117e-07, |
| "loss": 0.0334, |
| "step": 17360 |
| }, |
| { |
| "epoch": 17.888774459320288, |
| "grad_norm": 0.36267679929733276, |
| "learning_rate": 3.3559835582435695e-07, |
| "loss": 0.0306, |
| "step": 17370 |
| }, |
| { |
| "epoch": 17.899073120494336, |
| "grad_norm": 0.1654314249753952, |
| "learning_rate": 3.250570603268943e-07, |
| "loss": 0.0247, |
| "step": 17380 |
| }, |
| { |
| "epoch": 17.909371781668384, |
| "grad_norm": 0.2670270800590515, |
| "learning_rate": 3.1468343074290143e-07, |
| "loss": 0.032, |
| "step": 17390 |
| }, |
| { |
| "epoch": 17.91967044284243, |
| "grad_norm": 0.2694757878780365, |
| "learning_rate": 3.0447750208607573e-07, |
| "loss": 0.0269, |
| "step": 17400 |
| }, |
| { |
| "epoch": 17.92996910401648, |
| "grad_norm": 0.34293317794799805, |
| "learning_rate": 2.944393088041009e-07, |
| "loss": 0.0234, |
| "step": 17410 |
| }, |
| { |
| "epoch": 17.940267765190526, |
| "grad_norm": 0.25010308623313904, |
| "learning_rate": 2.8456888477850776e-07, |
| "loss": 0.0294, |
| "step": 17420 |
| }, |
| { |
| "epoch": 17.950566426364574, |
| "grad_norm": 0.34105420112609863, |
| "learning_rate": 2.7486626332455245e-07, |
| "loss": 0.0292, |
| "step": 17430 |
| }, |
| { |
| "epoch": 17.96086508753862, |
| "grad_norm": 0.2277262657880783, |
| "learning_rate": 2.653314771911108e-07, |
| "loss": 0.0398, |
| "step": 17440 |
| }, |
| { |
| "epoch": 17.971163748712666, |
| "grad_norm": 0.3880465030670166, |
| "learning_rate": 2.5596455856058963e-07, |
| "loss": 0.0323, |
| "step": 17450 |
| }, |
| { |
| "epoch": 17.981462409886714, |
| "grad_norm": 0.1923012137413025, |
| "learning_rate": 2.467655390487822e-07, |
| "loss": 0.0227, |
| "step": 17460 |
| }, |
| { |
| "epoch": 17.99176107106076, |
| "grad_norm": 0.24936918914318085, |
| "learning_rate": 2.3773444970477955e-07, |
| "loss": 0.0249, |
| "step": 17470 |
| }, |
| { |
| "epoch": 18.00205973223481, |
| "grad_norm": 0.2869769334793091, |
| "learning_rate": 2.2887132101087615e-07, |
| "loss": 0.0248, |
| "step": 17480 |
| }, |
| { |
| "epoch": 18.012358393408856, |
| "grad_norm": 0.25350290536880493, |
| "learning_rate": 2.201761828824367e-07, |
| "loss": 0.0327, |
| "step": 17490 |
| }, |
| { |
| "epoch": 18.022657054582904, |
| "grad_norm": 0.27213600277900696, |
| "learning_rate": 2.1164906466783485e-07, |
| "loss": 0.0285, |
| "step": 17500 |
| }, |
| { |
| "epoch": 18.03295571575695, |
| "grad_norm": 0.257794588804245, |
| "learning_rate": 2.032899951483147e-07, |
| "loss": 0.0281, |
| "step": 17510 |
| }, |
| { |
| "epoch": 18.043254376931, |
| "grad_norm": 0.2469080537557602, |
| "learning_rate": 1.9509900253792955e-07, |
| "loss": 0.0259, |
| "step": 17520 |
| }, |
| { |
| "epoch": 18.053553038105047, |
| "grad_norm": 0.2920747995376587, |
| "learning_rate": 1.870761144834088e-07, |
| "loss": 0.0287, |
| "step": 17530 |
| }, |
| { |
| "epoch": 18.063851699279095, |
| "grad_norm": 0.2282969057559967, |
| "learning_rate": 1.7922135806410778e-07, |
| "loss": 0.0277, |
| "step": 17540 |
| }, |
| { |
| "epoch": 18.074150360453142, |
| "grad_norm": 0.28502708673477173, |
| "learning_rate": 1.7153475979186927e-07, |
| "loss": 0.0345, |
| "step": 17550 |
| }, |
| { |
| "epoch": 18.08444902162719, |
| "grad_norm": 0.23902451992034912, |
| "learning_rate": 1.6401634561098444e-07, |
| "loss": 0.0335, |
| "step": 17560 |
| }, |
| { |
| "epoch": 18.094747682801234, |
| "grad_norm": 0.3159581124782562, |
| "learning_rate": 1.566661408980541e-07, |
| "loss": 0.0299, |
| "step": 17570 |
| }, |
| { |
| "epoch": 18.105046343975282, |
| "grad_norm": 0.12344943732023239, |
| "learning_rate": 1.4948417046194985e-07, |
| "loss": 0.0272, |
| "step": 17580 |
| }, |
| { |
| "epoch": 18.11534500514933, |
| "grad_norm": 0.3794369101524353, |
| "learning_rate": 1.42470458543692e-07, |
| "loss": 0.0338, |
| "step": 17590 |
| }, |
| { |
| "epoch": 18.125643666323377, |
| "grad_norm": 0.1987241804599762, |
| "learning_rate": 1.3562502881639404e-07, |
| "loss": 0.0223, |
| "step": 17600 |
| }, |
| { |
| "epoch": 18.135942327497425, |
| "grad_norm": 0.21883957087993622, |
| "learning_rate": 1.2894790438516824e-07, |
| "loss": 0.0275, |
| "step": 17610 |
| }, |
| { |
| "epoch": 18.146240988671472, |
| "grad_norm": 0.2665363550186157, |
| "learning_rate": 1.2243910778705348e-07, |
| "loss": 0.033, |
| "step": 17620 |
| }, |
| { |
| "epoch": 18.15653964984552, |
| "grad_norm": 0.15010571479797363, |
| "learning_rate": 1.1609866099094313e-07, |
| "loss": 0.0227, |
| "step": 17630 |
| }, |
| { |
| "epoch": 18.166838311019568, |
| "grad_norm": 0.19142857193946838, |
| "learning_rate": 1.0992658539750178e-07, |
| "loss": 0.0279, |
| "step": 17640 |
| }, |
| { |
| "epoch": 18.177136972193615, |
| "grad_norm": 0.2638980746269226, |
| "learning_rate": 1.0392290183909304e-07, |
| "loss": 0.0265, |
| "step": 17650 |
| }, |
| { |
| "epoch": 18.187435633367663, |
| "grad_norm": 0.19933411478996277, |
| "learning_rate": 9.808763057971849e-08, |
| "loss": 0.0294, |
| "step": 17660 |
| }, |
| { |
| "epoch": 18.19773429454171, |
| "grad_norm": 0.32049107551574707, |
| "learning_rate": 9.242079131495107e-08, |
| "loss": 0.0268, |
| "step": 17670 |
| }, |
| { |
| "epoch": 18.20803295571576, |
| "grad_norm": 0.22636005282402039, |
| "learning_rate": 8.69224031718463e-08, |
| "loss": 0.0359, |
| "step": 17680 |
| }, |
| { |
| "epoch": 18.218331616889806, |
| "grad_norm": 0.19072987139225006, |
| "learning_rate": 8.159248470890334e-08, |
| "loss": 0.0272, |
| "step": 17690 |
| }, |
| { |
| "epoch": 18.22863027806385, |
| "grad_norm": 0.5597253441810608, |
| "learning_rate": 7.643105391598737e-08, |
| "loss": 0.0296, |
| "step": 17700 |
| }, |
| { |
| "epoch": 18.238928939237898, |
| "grad_norm": 0.20172372460365295, |
| "learning_rate": 7.143812821427953e-08, |
| "loss": 0.0321, |
| "step": 17710 |
| }, |
| { |
| "epoch": 18.249227600411945, |
| "grad_norm": 0.49044567346572876, |
| "learning_rate": 6.661372445621039e-08, |
| "loss": 0.0284, |
| "step": 17720 |
| }, |
| { |
| "epoch": 18.259526261585993, |
| "grad_norm": 0.2032887190580368, |
| "learning_rate": 6.19578589253933e-08, |
| "loss": 0.03, |
| "step": 17730 |
| }, |
| { |
| "epoch": 18.26982492276004, |
| "grad_norm": 0.30425992608070374, |
| "learning_rate": 5.747054733660773e-08, |
| "loss": 0.0301, |
| "step": 17740 |
| }, |
| { |
| "epoch": 18.28012358393409, |
| "grad_norm": 0.2486412227153778, |
| "learning_rate": 5.3151804835688267e-08, |
| "loss": 0.0261, |
| "step": 17750 |
| }, |
| { |
| "epoch": 18.290422245108136, |
| "grad_norm": 0.21091780066490173, |
| "learning_rate": 4.9001645999524613e-08, |
| "loss": 0.0276, |
| "step": 17760 |
| }, |
| { |
| "epoch": 18.300720906282184, |
| "grad_norm": 0.36458486318588257, |
| "learning_rate": 4.502008483598941e-08, |
| "loss": 0.0277, |
| "step": 17770 |
| }, |
| { |
| "epoch": 18.31101956745623, |
| "grad_norm": 0.21798443794250488, |
| "learning_rate": 4.1207134783888265e-08, |
| "loss": 0.0307, |
| "step": 17780 |
| }, |
| { |
| "epoch": 18.32131822863028, |
| "grad_norm": 0.27093908190727234, |
| "learning_rate": 3.756280871293205e-08, |
| "loss": 0.0328, |
| "step": 17790 |
| }, |
| { |
| "epoch": 18.331616889804327, |
| "grad_norm": 0.1765187829732895, |
| "learning_rate": 3.4087118923659125e-08, |
| "loss": 0.0305, |
| "step": 17800 |
| }, |
| { |
| "epoch": 18.341915550978374, |
| "grad_norm": 0.9125376343727112, |
| "learning_rate": 3.078007714744646e-08, |
| "loss": 0.0408, |
| "step": 17810 |
| }, |
| { |
| "epoch": 18.352214212152422, |
| "grad_norm": 0.1739547997713089, |
| "learning_rate": 2.7641694546409746e-08, |
| "loss": 0.0282, |
| "step": 17820 |
| }, |
| { |
| "epoch": 18.362512873326466, |
| "grad_norm": 0.2467593103647232, |
| "learning_rate": 2.467198171342e-08, |
| "loss": 0.0266, |
| "step": 17830 |
| }, |
| { |
| "epoch": 18.372811534500514, |
| "grad_norm": 0.7820371389389038, |
| "learning_rate": 2.1870948672036984e-08, |
| "loss": 0.0263, |
| "step": 17840 |
| }, |
| { |
| "epoch": 18.38311019567456, |
| "grad_norm": 0.30878883600234985, |
| "learning_rate": 1.9238604876470334e-08, |
| "loss": 0.03, |
| "step": 17850 |
| }, |
| { |
| "epoch": 18.39340885684861, |
| "grad_norm": 0.2729048728942871, |
| "learning_rate": 1.6774959211568465e-08, |
| "loss": 0.035, |
| "step": 17860 |
| }, |
| { |
| "epoch": 18.403707518022657, |
| "grad_norm": 0.33503258228302, |
| "learning_rate": 1.4480019992785254e-08, |
| "loss": 0.0261, |
| "step": 17870 |
| }, |
| { |
| "epoch": 18.414006179196704, |
| "grad_norm": 0.24983762204647064, |
| "learning_rate": 1.2353794966135646e-08, |
| "loss": 0.0265, |
| "step": 17880 |
| }, |
| { |
| "epoch": 18.424304840370752, |
| "grad_norm": 0.24591587483882904, |
| "learning_rate": 1.0396291308190087e-08, |
| "loss": 0.0248, |
| "step": 17890 |
| }, |
| { |
| "epoch": 18.4346035015448, |
| "grad_norm": 0.24605391919612885, |
| "learning_rate": 8.607515626030128e-09, |
| "loss": 0.0289, |
| "step": 17900 |
| }, |
| { |
| "epoch": 18.444902162718847, |
| "grad_norm": 0.2520316541194916, |
| "learning_rate": 6.987473957242863e-09, |
| "loss": 0.0307, |
| "step": 17910 |
| }, |
| { |
| "epoch": 18.455200823892895, |
| "grad_norm": 0.46191495656967163, |
| "learning_rate": 5.536171769887632e-09, |
| "loss": 0.0303, |
| "step": 17920 |
| }, |
| { |
| "epoch": 18.465499485066942, |
| "grad_norm": 0.26452863216400146, |
| "learning_rate": 4.253613962496017e-09, |
| "loss": 0.0329, |
| "step": 17930 |
| }, |
| { |
| "epoch": 18.47579814624099, |
| "grad_norm": 0.3968678116798401, |
| "learning_rate": 3.1398048640385315e-09, |
| "loss": 0.0356, |
| "step": 17940 |
| }, |
| { |
| "epoch": 18.486096807415038, |
| "grad_norm": 0.19242151081562042, |
| "learning_rate": 2.1947482338968705e-09, |
| "loss": 0.0265, |
| "step": 17950 |
| }, |
| { |
| "epoch": 18.496395468589082, |
| "grad_norm": 0.20866911113262177, |
| "learning_rate": 1.4184472618972154e-09, |
| "loss": 0.0251, |
| "step": 17960 |
| }, |
| { |
| "epoch": 18.50669412976313, |
| "grad_norm": 0.17729917168617249, |
| "learning_rate": 8.109045682547223e-10, |
| "loss": 0.0264, |
| "step": 17970 |
| }, |
| { |
| "epoch": 18.516992790937177, |
| "grad_norm": 0.19232727587223053, |
| "learning_rate": 3.721222035846239e-10, |
| "loss": 0.0366, |
| "step": 17980 |
| }, |
| { |
| "epoch": 18.527291452111225, |
| "grad_norm": 0.41915977001190186, |
| "learning_rate": 1.0210164889112861e-10, |
| "loss": 0.0288, |
| "step": 17990 |
| }, |
| { |
| "epoch": 18.537590113285273, |
| "grad_norm": 0.742242693901062, |
| "learning_rate": 8.438155674195258e-13, |
| "loss": 0.0335, |
| "step": 18000 |
| }, |
| { |
| "epoch": 18.537590113285273, |
| "step": 18000, |
| "total_flos": 0.0, |
| "train_loss": 0.05001054983586073, |
| "train_runtime": 5749.6082, |
| "train_samples_per_second": 100.181, |
| "train_steps_per_second": 3.131 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 18000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 19, |
| "save_steps": 20000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|