{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.4293995562871252, "eval_steps": 500, "global_step": 60000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.156659271452087e-05, "grad_norm": 1.0665696947853576e-07, "learning_rate": 9.99935590066557e-06, "loss": 0.001, "step": 10 }, { "epoch": 0.00014313318542904174, "grad_norm": 0.04173135384917259, "learning_rate": 9.998640234738424e-06, "loss": 0.0002, "step": 20 }, { "epoch": 0.00021469977814356258, "grad_norm": 0.0013619003584608436, "learning_rate": 9.997996135403994e-06, "loss": 0.0047, "step": 30 }, { "epoch": 0.0002862663708580835, "grad_norm": 0.00038414556183852255, "learning_rate": 9.997352036069564e-06, "loss": 0.06, "step": 40 }, { "epoch": 0.0003578329635726043, "grad_norm": 0.3609836995601654, "learning_rate": 9.996636370142419e-06, "loss": 0.009, "step": 50 }, { "epoch": 0.00042939955628712516, "grad_norm": 0.001068720011971891, "learning_rate": 9.995992270807989e-06, "loss": 0.1414, "step": 60 }, { "epoch": 0.000500966149001646, "grad_norm": 0.0006150787230581045, "learning_rate": 9.995276604880842e-06, "loss": 0.0017, "step": 70 }, { "epoch": 0.000572532741716167, "grad_norm": 0.029033252969384193, "learning_rate": 9.994560938953698e-06, "loss": 0.0004, "step": 80 }, { "epoch": 0.0006440993344306878, "grad_norm": 0.007110236212611198, "learning_rate": 9.993845273026553e-06, "loss": 0.0128, "step": 90 }, { "epoch": 0.0007156659271452086, "grad_norm": 7.374540018645348e-06, "learning_rate": 9.993129607099407e-06, "loss": 0.0266, "step": 100 }, { "epoch": 0.0007872325198597295, "grad_norm": 0.00017453469627071172, "learning_rate": 9.992413941172262e-06, "loss": 0.0004, "step": 110 }, { "epoch": 0.0008587991125742503, "grad_norm": 0.011737486347556114, "learning_rate": 9.991698275245117e-06, "loss": 0.0027, "step": 120 }, { "epoch": 0.0009303657052887712, "grad_norm": 0.001037347363308072, "learning_rate": 9.99098260931797e-06, "loss": 0.0, "step": 130 }, { "epoch": 0.001001932298003292, "grad_norm": 0.0, "learning_rate": 9.990266943390826e-06, "loss": 0.0, "step": 140 }, { "epoch": 0.0010734988907178129, "grad_norm": 1.9183582500303942e-10, "learning_rate": 9.989551277463681e-06, "loss": 0.0012, "step": 150 }, { "epoch": 0.001145065483432334, "grad_norm": 2.354545722482726e-05, "learning_rate": 9.98890717812925e-06, "loss": 0.0577, "step": 160 }, { "epoch": 0.0012166320761468547, "grad_norm": 0.0008460516110062599, "learning_rate": 9.988191512202105e-06, "loss": 0.0117, "step": 170 }, { "epoch": 0.0012881986688613755, "grad_norm": 0.0, "learning_rate": 9.98747584627496e-06, "loss": 0.0007, "step": 180 }, { "epoch": 0.0013597652615758963, "grad_norm": 0.22307294607162476, "learning_rate": 9.986760180347814e-06, "loss": 0.0001, "step": 190 }, { "epoch": 0.0014313318542904171, "grad_norm": 2.6319284529563447e-07, "learning_rate": 9.98604451442067e-06, "loss": 0.0004, "step": 200 }, { "epoch": 0.0015028984470049382, "grad_norm": 0.47867366671562195, "learning_rate": 9.985328848493524e-06, "loss": 0.016, "step": 210 }, { "epoch": 0.001574465039719459, "grad_norm": 0.00024342595133930445, "learning_rate": 9.984613182566378e-06, "loss": 0.0678, "step": 220 }, { "epoch": 0.0016460316324339798, "grad_norm": 2.4763109340852907e-09, "learning_rate": 9.983969083231947e-06, "loss": 0.1383, "step": 230 }, { "epoch": 0.0017175982251485006, "grad_norm": 0.00747171463444829, "learning_rate": 9.983253417304803e-06, "loss": 0.0022, "step": 240 }, { "epoch": 0.0017891648178630214, "grad_norm": 3.1424710300598235e-07, "learning_rate": 9.982537751377658e-06, "loss": 0.0125, "step": 250 }, { "epoch": 0.0018607314105775425, "grad_norm": 0.0025624637492001057, "learning_rate": 9.981822085450512e-06, "loss": 0.0001, "step": 260 }, { "epoch": 0.0019322980032920633, "grad_norm": 0.0014779971679672599, "learning_rate": 9.981106419523367e-06, "loss": 0.0, "step": 270 }, { "epoch": 0.002003864596006584, "grad_norm": 0.0, "learning_rate": 9.980390753596222e-06, "loss": 0.0596, "step": 280 }, { "epoch": 0.002075431188721105, "grad_norm": 0.01014158595353365, "learning_rate": 9.979675087669078e-06, "loss": 0.2296, "step": 290 }, { "epoch": 0.0021469977814356257, "grad_norm": 0.00038621146813966334, "learning_rate": 9.978959421741931e-06, "loss": 0.0313, "step": 300 }, { "epoch": 0.0022185643741501468, "grad_norm": 0.0023568831384181976, "learning_rate": 9.978243755814786e-06, "loss": 0.0955, "step": 310 }, { "epoch": 0.002290130966864668, "grad_norm": 9.97691699922143e-07, "learning_rate": 9.977528089887642e-06, "loss": 0.0006, "step": 320 }, { "epoch": 0.0023616975595791884, "grad_norm": 0.0003033360408153385, "learning_rate": 9.976812423960495e-06, "loss": 0.0, "step": 330 }, { "epoch": 0.0024332641522937094, "grad_norm": 0.3742881119251251, "learning_rate": 9.97609675803335e-06, "loss": 0.0001, "step": 340 }, { "epoch": 0.00250483074500823, "grad_norm": 1.6097541077897404e-08, "learning_rate": 9.975381092106206e-06, "loss": 0.0031, "step": 350 }, { "epoch": 0.002576397337722751, "grad_norm": 0.0012006378965452313, "learning_rate": 9.974665426179061e-06, "loss": 0.0068, "step": 360 }, { "epoch": 0.002647963930437272, "grad_norm": 1.776325753155561e-09, "learning_rate": 9.973949760251915e-06, "loss": 0.0001, "step": 370 }, { "epoch": 0.0027195305231517927, "grad_norm": 0.0, "learning_rate": 9.97323409432477e-06, "loss": 0.0, "step": 380 }, { "epoch": 0.0027910971158663137, "grad_norm": 0.0, "learning_rate": 9.972518428397625e-06, "loss": 0.0, "step": 390 }, { "epoch": 0.0028626637085808343, "grad_norm": 0.017719902098178864, "learning_rate": 9.971802762470479e-06, "loss": 0.0004, "step": 400 }, { "epoch": 0.0029342303012953553, "grad_norm": 6.509122685294244e-10, "learning_rate": 9.971087096543334e-06, "loss": 0.0111, "step": 410 }, { "epoch": 0.0030057968940098764, "grad_norm": 0.00775287626311183, "learning_rate": 9.97037143061619e-06, "loss": 0.0005, "step": 420 }, { "epoch": 0.003077363486724397, "grad_norm": 8.778872029324702e-08, "learning_rate": 9.969655764689043e-06, "loss": 0.0, "step": 430 }, { "epoch": 0.003148930079438918, "grad_norm": 10.180048942565918, "learning_rate": 9.968940098761898e-06, "loss": 0.0017, "step": 440 }, { "epoch": 0.0032204966721534386, "grad_norm": 0.0016790173249319196, "learning_rate": 9.968224432834754e-06, "loss": 0.002, "step": 450 }, { "epoch": 0.0032920632648679596, "grad_norm": 0.003979462664574385, "learning_rate": 9.967508766907609e-06, "loss": 0.0, "step": 460 }, { "epoch": 0.0033636298575824806, "grad_norm": 0.0001845760125434026, "learning_rate": 9.966793100980463e-06, "loss": 0.0, "step": 470 }, { "epoch": 0.0034351964502970012, "grad_norm": 0.0, "learning_rate": 9.966077435053318e-06, "loss": 0.0039, "step": 480 }, { "epoch": 0.0035067630430115223, "grad_norm": 18.806777954101562, "learning_rate": 9.965361769126173e-06, "loss": 0.0051, "step": 490 }, { "epoch": 0.003578329635726043, "grad_norm": 7.529943957251817e-08, "learning_rate": 9.964646103199027e-06, "loss": 0.0, "step": 500 }, { "epoch": 0.003649896228440564, "grad_norm": 0.00028561174985952675, "learning_rate": 9.963930437271882e-06, "loss": 0.0, "step": 510 }, { "epoch": 0.003721462821155085, "grad_norm": 124.23316955566406, "learning_rate": 9.963214771344737e-06, "loss": 0.0382, "step": 520 }, { "epoch": 0.0037930294138696055, "grad_norm": 0.0, "learning_rate": 9.962499105417593e-06, "loss": 0.0434, "step": 530 }, { "epoch": 0.0038645960065841266, "grad_norm": 0.0, "learning_rate": 9.961783439490446e-06, "loss": 0.0, "step": 540 }, { "epoch": 0.003936162599298647, "grad_norm": 7.629507692286097e-09, "learning_rate": 9.961067773563302e-06, "loss": 0.0, "step": 550 }, { "epoch": 0.004007729192013168, "grad_norm": 2.5727549868292954e-08, "learning_rate": 9.960352107636157e-06, "loss": 0.0, "step": 560 }, { "epoch": 0.004079295784727689, "grad_norm": 0.0, "learning_rate": 9.95963644170901e-06, "loss": 0.0, "step": 570 }, { "epoch": 0.00415086237744221, "grad_norm": 0.17488853633403778, "learning_rate": 9.958920775781866e-06, "loss": 0.0178, "step": 580 }, { "epoch": 0.00422242897015673, "grad_norm": 94.36006927490234, "learning_rate": 9.958205109854721e-06, "loss": 0.0222, "step": 590 }, { "epoch": 0.0042939955628712514, "grad_norm": 2.5901192657329375e-07, "learning_rate": 9.957489443927575e-06, "loss": 0.0016, "step": 600 }, { "epoch": 0.0043655621555857725, "grad_norm": 0.0, "learning_rate": 9.95677377800043e-06, "loss": 0.0, "step": 610 }, { "epoch": 0.0044371287483002935, "grad_norm": 0.0008648286457173526, "learning_rate": 9.956058112073285e-06, "loss": 0.0, "step": 620 }, { "epoch": 0.0045086953410148145, "grad_norm": 0.08059705048799515, "learning_rate": 9.95534244614614e-06, "loss": 0.0003, "step": 630 }, { "epoch": 0.004580261933729336, "grad_norm": 5.640831801656532e-08, "learning_rate": 9.954626780218994e-06, "loss": 0.0002, "step": 640 }, { "epoch": 0.004651828526443856, "grad_norm": 0.0, "learning_rate": 9.95391111429185e-06, "loss": 0.0002, "step": 650 }, { "epoch": 0.004723395119158377, "grad_norm": 6.516717721005705e-10, "learning_rate": 9.953195448364705e-06, "loss": 0.0243, "step": 660 }, { "epoch": 0.004794961711872898, "grad_norm": 0.00017126731108874083, "learning_rate": 9.952479782437558e-06, "loss": 0.0008, "step": 670 }, { "epoch": 0.004866528304587419, "grad_norm": 0.0, "learning_rate": 9.951764116510414e-06, "loss": 0.0, "step": 680 }, { "epoch": 0.00493809489730194, "grad_norm": 7.248900368850286e-10, "learning_rate": 9.951048450583269e-06, "loss": 0.0002, "step": 690 }, { "epoch": 0.00500966149001646, "grad_norm": 2.339270395168569e-05, "learning_rate": 9.950332784656124e-06, "loss": 0.1335, "step": 700 }, { "epoch": 0.005081228082730981, "grad_norm": 0.059922970831394196, "learning_rate": 9.949617118728978e-06, "loss": 0.0077, "step": 710 }, { "epoch": 0.005152794675445502, "grad_norm": 0.0001174464778159745, "learning_rate": 9.948901452801833e-06, "loss": 0.0, "step": 720 }, { "epoch": 0.005224361268160023, "grad_norm": 1.288625668394161e-07, "learning_rate": 9.948185786874688e-06, "loss": 0.0001, "step": 730 }, { "epoch": 0.005295927860874544, "grad_norm": 6.428718668516353e-10, "learning_rate": 9.947470120947542e-06, "loss": 0.0, "step": 740 }, { "epoch": 0.005367494453589064, "grad_norm": 0.0, "learning_rate": 9.946754455020397e-06, "loss": 0.0, "step": 750 }, { "epoch": 0.005439061046303585, "grad_norm": 1.538817048072815, "learning_rate": 9.946038789093253e-06, "loss": 0.0003, "step": 760 }, { "epoch": 0.005510627639018106, "grad_norm": 1.7431714738336268e-08, "learning_rate": 9.945323123166108e-06, "loss": 0.0, "step": 770 }, { "epoch": 0.005582194231732627, "grad_norm": 9.345171747554559e-07, "learning_rate": 9.944607457238961e-06, "loss": 0.0, "step": 780 }, { "epoch": 0.005653760824447148, "grad_norm": 0.004772907588630915, "learning_rate": 9.943891791311817e-06, "loss": 0.0001, "step": 790 }, { "epoch": 0.005725327417161669, "grad_norm": 0.003016833448782563, "learning_rate": 9.943176125384672e-06, "loss": 0.0031, "step": 800 }, { "epoch": 0.00579689400987619, "grad_norm": 0.007513599935919046, "learning_rate": 9.942460459457526e-06, "loss": 0.0004, "step": 810 }, { "epoch": 0.005868460602590711, "grad_norm": 0.00039559247670695186, "learning_rate": 9.941744793530381e-06, "loss": 0.0, "step": 820 }, { "epoch": 0.005940027195305232, "grad_norm": 0.0024365654680877924, "learning_rate": 9.941029127603236e-06, "loss": 0.0001, "step": 830 }, { "epoch": 0.006011593788019753, "grad_norm": 7.398688328663638e-10, "learning_rate": 9.94031346167609e-06, "loss": 0.0, "step": 840 }, { "epoch": 0.006083160380734273, "grad_norm": 1.7558342335632915e-07, "learning_rate": 9.939597795748945e-06, "loss": 0.0, "step": 850 }, { "epoch": 0.006154726973448794, "grad_norm": 0.00027524487813934684, "learning_rate": 9.9388821298218e-06, "loss": 0.0107, "step": 860 }, { "epoch": 0.006226293566163315, "grad_norm": 7.489454105780169e-07, "learning_rate": 9.938166463894656e-06, "loss": 0.0013, "step": 870 }, { "epoch": 0.006297860158877836, "grad_norm": 1.3250531782915687e-09, "learning_rate": 9.93745079796751e-06, "loss": 0.0, "step": 880 }, { "epoch": 0.006369426751592357, "grad_norm": 0.0, "learning_rate": 9.936735132040365e-06, "loss": 0.0001, "step": 890 }, { "epoch": 0.006440993344306877, "grad_norm": 2.3626999734460696e-07, "learning_rate": 9.93601946611322e-06, "loss": 0.0002, "step": 900 }, { "epoch": 0.006512559937021398, "grad_norm": 1.6611747923889197e-05, "learning_rate": 9.935303800186073e-06, "loss": 0.0, "step": 910 }, { "epoch": 0.006584126529735919, "grad_norm": 1.868029952944994e-09, "learning_rate": 9.934588134258929e-06, "loss": 0.0081, "step": 920 }, { "epoch": 0.00665569312245044, "grad_norm": 2.962829448449611e-08, "learning_rate": 9.933872468331784e-06, "loss": 0.0002, "step": 930 }, { "epoch": 0.006727259715164961, "grad_norm": 0.000442830438259989, "learning_rate": 9.93315680240464e-06, "loss": 0.0, "step": 940 }, { "epoch": 0.0067988263078794815, "grad_norm": 0.0021042018197476864, "learning_rate": 9.932441136477493e-06, "loss": 0.0, "step": 950 }, { "epoch": 0.0068703929005940025, "grad_norm": 0.30429553985595703, "learning_rate": 9.931725470550348e-06, "loss": 0.0002, "step": 960 }, { "epoch": 0.0069419594933085235, "grad_norm": 0.0, "learning_rate": 9.931009804623204e-06, "loss": 0.0008, "step": 970 }, { "epoch": 0.0070135260860230446, "grad_norm": 5.563533136410115e-07, "learning_rate": 9.930294138696057e-06, "loss": 0.0, "step": 980 }, { "epoch": 0.007085092678737566, "grad_norm": 39.8405647277832, "learning_rate": 9.929578472768912e-06, "loss": 0.0059, "step": 990 }, { "epoch": 0.007156659271452086, "grad_norm": 0.20451369881629944, "learning_rate": 9.928862806841768e-06, "loss": 0.0001, "step": 1000 }, { "epoch": 0.007228225864166607, "grad_norm": 0.0, "learning_rate": 9.928147140914623e-06, "loss": 0.0, "step": 1010 }, { "epoch": 0.007299792456881128, "grad_norm": 0.7672151327133179, "learning_rate": 9.927431474987477e-06, "loss": 0.0002, "step": 1020 }, { "epoch": 0.007371359049595649, "grad_norm": 0.0, "learning_rate": 9.926715809060332e-06, "loss": 0.0, "step": 1030 }, { "epoch": 0.00744292564231017, "grad_norm": 0.0001947313139680773, "learning_rate": 9.926000143133187e-06, "loss": 0.0003, "step": 1040 }, { "epoch": 0.007514492235024691, "grad_norm": 18.21683692932129, "learning_rate": 9.92528447720604e-06, "loss": 0.0035, "step": 1050 }, { "epoch": 0.007586058827739211, "grad_norm": 8.01308051450178e-05, "learning_rate": 9.924568811278896e-06, "loss": 0.001, "step": 1060 }, { "epoch": 0.007657625420453732, "grad_norm": 0.0, "learning_rate": 9.923853145351751e-06, "loss": 0.0, "step": 1070 }, { "epoch": 0.007729192013168253, "grad_norm": 9.29667081095431e-08, "learning_rate": 9.923137479424605e-06, "loss": 0.0092, "step": 1080 }, { "epoch": 0.007800758605882774, "grad_norm": 0.22147075831890106, "learning_rate": 9.92242181349746e-06, "loss": 0.0001, "step": 1090 }, { "epoch": 0.007872325198597294, "grad_norm": 0.0023223678581416607, "learning_rate": 9.921706147570316e-06, "loss": 0.0304, "step": 1100 }, { "epoch": 0.007943891791311816, "grad_norm": 0.0, "learning_rate": 9.92099048164317e-06, "loss": 0.0, "step": 1110 }, { "epoch": 0.008015458384026336, "grad_norm": 0.0, "learning_rate": 9.920274815716024e-06, "loss": 0.0, "step": 1120 }, { "epoch": 0.008087024976740857, "grad_norm": 0.0, "learning_rate": 9.91955914978888e-06, "loss": 0.0005, "step": 1130 }, { "epoch": 0.008158591569455378, "grad_norm": 0.0, "learning_rate": 9.918843483861735e-06, "loss": 0.0005, "step": 1140 }, { "epoch": 0.008230158162169899, "grad_norm": 0.0, "learning_rate": 9.918127817934589e-06, "loss": 0.0175, "step": 1150 }, { "epoch": 0.00830172475488442, "grad_norm": 0.0, "learning_rate": 9.917412152007444e-06, "loss": 0.0014, "step": 1160 }, { "epoch": 0.00837329134759894, "grad_norm": 0.0, "learning_rate": 9.9166964860803e-06, "loss": 0.0, "step": 1170 }, { "epoch": 0.00844485794031346, "grad_norm": 3.430635331369558e-09, "learning_rate": 9.915980820153154e-06, "loss": 0.0, "step": 1180 }, { "epoch": 0.008516424533027983, "grad_norm": 0.0, "learning_rate": 9.915265154226008e-06, "loss": 0.0097, "step": 1190 }, { "epoch": 0.008587991125742503, "grad_norm": 0.0, "learning_rate": 9.914549488298862e-06, "loss": 0.0, "step": 1200 }, { "epoch": 0.008659557718457025, "grad_norm": 1.1210968819952427e-09, "learning_rate": 9.913833822371719e-06, "loss": 0.017, "step": 1210 }, { "epoch": 0.008731124311171545, "grad_norm": 0.0, "learning_rate": 9.913118156444572e-06, "loss": 0.0, "step": 1220 }, { "epoch": 0.008802690903886065, "grad_norm": 0.0, "learning_rate": 9.912402490517428e-06, "loss": 0.0219, "step": 1230 }, { "epoch": 0.008874257496600587, "grad_norm": 0.0, "learning_rate": 9.911686824590281e-06, "loss": 0.0, "step": 1240 }, { "epoch": 0.008945824089315107, "grad_norm": 12.847982406616211, "learning_rate": 9.910971158663136e-06, "loss": 0.003, "step": 1250 }, { "epoch": 0.009017390682029629, "grad_norm": 0.0, "learning_rate": 9.910255492735992e-06, "loss": 0.0, "step": 1260 }, { "epoch": 0.00908895727474415, "grad_norm": 0.0, "learning_rate": 9.909539826808845e-06, "loss": 0.0001, "step": 1270 }, { "epoch": 0.009160523867458671, "grad_norm": 0.0, "learning_rate": 9.908824160881702e-06, "loss": 0.0, "step": 1280 }, { "epoch": 0.009232090460173191, "grad_norm": 0.0, "learning_rate": 9.908108494954556e-06, "loss": 0.0, "step": 1290 }, { "epoch": 0.009303657052887711, "grad_norm": 0.0, "learning_rate": 9.907392829027411e-06, "loss": 0.0002, "step": 1300 }, { "epoch": 0.009375223645602233, "grad_norm": 5.1091660679958295e-06, "learning_rate": 9.906677163100265e-06, "loss": 0.0, "step": 1310 }, { "epoch": 0.009446790238316754, "grad_norm": 0.0003692104364745319, "learning_rate": 9.90596149717312e-06, "loss": 0.0, "step": 1320 }, { "epoch": 0.009518356831031275, "grad_norm": 2.80346807812748e-06, "learning_rate": 9.905245831245975e-06, "loss": 0.0, "step": 1330 }, { "epoch": 0.009589923423745796, "grad_norm": 0.0, "learning_rate": 9.904530165318829e-06, "loss": 0.0002, "step": 1340 }, { "epoch": 0.009661490016460316, "grad_norm": 1.5455165339517407e-05, "learning_rate": 9.903814499391686e-06, "loss": 0.0002, "step": 1350 }, { "epoch": 0.009733056609174838, "grad_norm": 0.0, "learning_rate": 9.90309883346454e-06, "loss": 0.0309, "step": 1360 }, { "epoch": 0.009804623201889358, "grad_norm": 1.022346509671479e-06, "learning_rate": 9.902383167537393e-06, "loss": 0.0003, "step": 1370 }, { "epoch": 0.00987618979460388, "grad_norm": 0.0, "learning_rate": 9.901667501610248e-06, "loss": 0.0002, "step": 1380 }, { "epoch": 0.0099477563873184, "grad_norm": 1.4283578395843506, "learning_rate": 9.900951835683104e-06, "loss": 0.0003, "step": 1390 }, { "epoch": 0.01001932298003292, "grad_norm": 0.0016347956843674183, "learning_rate": 9.900236169755959e-06, "loss": 0.0, "step": 1400 }, { "epoch": 0.010090889572747442, "grad_norm": 7.855666626710445e-05, "learning_rate": 9.899520503828813e-06, "loss": 0.0, "step": 1410 }, { "epoch": 0.010162456165461962, "grad_norm": 0.0009814942022785544, "learning_rate": 9.89880483790167e-06, "loss": 0.0, "step": 1420 }, { "epoch": 0.010234022758176484, "grad_norm": 0.0, "learning_rate": 9.898089171974523e-06, "loss": 0.0, "step": 1430 }, { "epoch": 0.010305589350891004, "grad_norm": 2.139753087249119e-05, "learning_rate": 9.897373506047377e-06, "loss": 0.0001, "step": 1440 }, { "epoch": 0.010377155943605524, "grad_norm": 0.012905268929898739, "learning_rate": 9.896657840120232e-06, "loss": 0.0013, "step": 1450 }, { "epoch": 0.010448722536320046, "grad_norm": 0.0, "learning_rate": 9.895942174193087e-06, "loss": 0.0004, "step": 1460 }, { "epoch": 0.010520289129034566, "grad_norm": 0.04740719497203827, "learning_rate": 9.895226508265943e-06, "loss": 0.0, "step": 1470 }, { "epoch": 0.010591855721749088, "grad_norm": 0.0, "learning_rate": 9.894510842338796e-06, "loss": 0.0078, "step": 1480 }, { "epoch": 0.010663422314463608, "grad_norm": 0.0, "learning_rate": 9.893795176411652e-06, "loss": 0.0005, "step": 1490 }, { "epoch": 0.010734988907178129, "grad_norm": 0.0, "learning_rate": 9.893079510484507e-06, "loss": 0.0, "step": 1500 }, { "epoch": 0.01080655549989265, "grad_norm": 0.0, "learning_rate": 9.89236384455736e-06, "loss": 0.0, "step": 1510 }, { "epoch": 0.01087812209260717, "grad_norm": 2.652165412902832, "learning_rate": 9.891648178630216e-06, "loss": 0.0007, "step": 1520 }, { "epoch": 0.010949688685321693, "grad_norm": 0.001545075443573296, "learning_rate": 9.890932512703071e-06, "loss": 0.0, "step": 1530 }, { "epoch": 0.011021255278036213, "grad_norm": 5.160245564184152e-10, "learning_rate": 9.890216846775926e-06, "loss": 0.0009, "step": 1540 }, { "epoch": 0.011092821870750733, "grad_norm": 0.0, "learning_rate": 9.88950118084878e-06, "loss": 0.0, "step": 1550 }, { "epoch": 0.011164388463465255, "grad_norm": 2.5166247723973356e-05, "learning_rate": 9.888785514921635e-06, "loss": 0.0001, "step": 1560 }, { "epoch": 0.011235955056179775, "grad_norm": 0.0010291750077158213, "learning_rate": 9.88806984899449e-06, "loss": 0.1354, "step": 1570 }, { "epoch": 0.011307521648894297, "grad_norm": 0.003323540324345231, "learning_rate": 9.887354183067344e-06, "loss": 0.0033, "step": 1580 }, { "epoch": 0.011379088241608817, "grad_norm": 4.6162565436880243e-10, "learning_rate": 9.8866385171402e-06, "loss": 0.0, "step": 1590 }, { "epoch": 0.011450654834323337, "grad_norm": 0.0006468519568443298, "learning_rate": 9.885922851213055e-06, "loss": 0.01, "step": 1600 }, { "epoch": 0.011522221427037859, "grad_norm": 2.6513555440033088e-06, "learning_rate": 9.885207185285908e-06, "loss": 0.0029, "step": 1610 }, { "epoch": 0.01159378801975238, "grad_norm": 0.0, "learning_rate": 9.884491519358764e-06, "loss": 0.0001, "step": 1620 }, { "epoch": 0.011665354612466901, "grad_norm": 1.726349728414789e-05, "learning_rate": 9.883775853431619e-06, "loss": 0.0, "step": 1630 }, { "epoch": 0.011736921205181421, "grad_norm": 6.160176326375222e-06, "learning_rate": 9.883060187504474e-06, "loss": 0.0005, "step": 1640 }, { "epoch": 0.011808487797895941, "grad_norm": 7.844020615266345e-07, "learning_rate": 9.882344521577328e-06, "loss": 0.0, "step": 1650 }, { "epoch": 0.011880054390610463, "grad_norm": 5.114344503454049e-10, "learning_rate": 9.881628855650183e-06, "loss": 0.0, "step": 1660 }, { "epoch": 0.011951620983324984, "grad_norm": 0.0, "learning_rate": 9.880913189723038e-06, "loss": 0.0, "step": 1670 }, { "epoch": 0.012023187576039505, "grad_norm": 0.0, "learning_rate": 9.880197523795892e-06, "loss": 0.0002, "step": 1680 }, { "epoch": 0.012094754168754026, "grad_norm": 9.89863191414031e-10, "learning_rate": 9.879481857868747e-06, "loss": 0.0, "step": 1690 }, { "epoch": 0.012166320761468546, "grad_norm": 0.0, "learning_rate": 9.878766191941603e-06, "loss": 0.0, "step": 1700 }, { "epoch": 0.012237887354183068, "grad_norm": 0.0015257945051416755, "learning_rate": 9.878050526014458e-06, "loss": 0.0, "step": 1710 }, { "epoch": 0.012309453946897588, "grad_norm": 0.0, "learning_rate": 9.877334860087311e-06, "loss": 0.0, "step": 1720 }, { "epoch": 0.01238102053961211, "grad_norm": 0.0004509277641773224, "learning_rate": 9.876619194160167e-06, "loss": 0.0, "step": 1730 }, { "epoch": 0.01245258713232663, "grad_norm": 0.0026518418453633785, "learning_rate": 9.875903528233022e-06, "loss": 0.0, "step": 1740 }, { "epoch": 0.01252415372504115, "grad_norm": 1.577619634929306e-08, "learning_rate": 9.875187862305876e-06, "loss": 0.0004, "step": 1750 }, { "epoch": 0.012595720317755672, "grad_norm": 0.0, "learning_rate": 9.874472196378731e-06, "loss": 0.0, "step": 1760 }, { "epoch": 0.012667286910470192, "grad_norm": 5.295082705636389e-10, "learning_rate": 9.873756530451586e-06, "loss": 0.0, "step": 1770 }, { "epoch": 0.012738853503184714, "grad_norm": 0.0, "learning_rate": 9.873040864524441e-06, "loss": 0.0, "step": 1780 }, { "epoch": 0.012810420095899234, "grad_norm": 0.0, "learning_rate": 9.872325198597295e-06, "loss": 0.0, "step": 1790 }, { "epoch": 0.012881986688613754, "grad_norm": 3.47268702682868e-08, "learning_rate": 9.87160953267015e-06, "loss": 0.0, "step": 1800 }, { "epoch": 0.012953553281328276, "grad_norm": 3.3454754522210806e-09, "learning_rate": 9.870893866743006e-06, "loss": 0.0002, "step": 1810 }, { "epoch": 0.013025119874042796, "grad_norm": 0.0, "learning_rate": 9.87017820081586e-06, "loss": 0.0, "step": 1820 }, { "epoch": 0.013096686466757318, "grad_norm": 0.0, "learning_rate": 9.869462534888715e-06, "loss": 0.0, "step": 1830 }, { "epoch": 0.013168253059471838, "grad_norm": 1.3083990779705346e-07, "learning_rate": 9.86874686896157e-06, "loss": 0.0, "step": 1840 }, { "epoch": 0.013239819652186359, "grad_norm": 0.0, "learning_rate": 9.868031203034423e-06, "loss": 0.0, "step": 1850 }, { "epoch": 0.01331138624490088, "grad_norm": 0.0, "learning_rate": 9.867315537107279e-06, "loss": 0.0, "step": 1860 }, { "epoch": 0.0133829528376154, "grad_norm": 2.1358020774187025e-07, "learning_rate": 9.866599871180134e-06, "loss": 0.0, "step": 1870 }, { "epoch": 0.013454519430329923, "grad_norm": 0.0, "learning_rate": 9.86588420525299e-06, "loss": 0.0002, "step": 1880 }, { "epoch": 0.013526086023044443, "grad_norm": 0.0, "learning_rate": 9.865168539325843e-06, "loss": 0.0, "step": 1890 }, { "epoch": 0.013597652615758963, "grad_norm": 9.239310202246998e-06, "learning_rate": 9.864452873398698e-06, "loss": 0.6477, "step": 1900 }, { "epoch": 0.013669219208473485, "grad_norm": 0.0, "learning_rate": 9.863737207471553e-06, "loss": 0.0, "step": 1910 }, { "epoch": 0.013740785801188005, "grad_norm": 0.00018629147962201387, "learning_rate": 9.863021541544407e-06, "loss": 0.0, "step": 1920 }, { "epoch": 0.013812352393902527, "grad_norm": 0.4870759844779968, "learning_rate": 9.862305875617262e-06, "loss": 0.0012, "step": 1930 }, { "epoch": 0.013883918986617047, "grad_norm": 0.0002193218533648178, "learning_rate": 9.861590209690118e-06, "loss": 0.0, "step": 1940 }, { "epoch": 0.013955485579331567, "grad_norm": 0.3153599500656128, "learning_rate": 9.860874543762973e-06, "loss": 0.0499, "step": 1950 }, { "epoch": 0.014027052172046089, "grad_norm": 5.737544883643864e-10, "learning_rate": 9.860158877835827e-06, "loss": 0.0046, "step": 1960 }, { "epoch": 0.01409861876476061, "grad_norm": 187.06614685058594, "learning_rate": 9.859443211908682e-06, "loss": 0.0374, "step": 1970 }, { "epoch": 0.014170185357475131, "grad_norm": 0.0, "learning_rate": 9.858727545981537e-06, "loss": 0.0354, "step": 1980 }, { "epoch": 0.014241751950189651, "grad_norm": 0.0, "learning_rate": 9.85801188005439e-06, "loss": 0.0, "step": 1990 }, { "epoch": 0.014313318542904171, "grad_norm": 15.968443870544434, "learning_rate": 9.857296214127246e-06, "loss": 0.0181, "step": 2000 }, { "epoch": 0.014384885135618693, "grad_norm": 0.0, "learning_rate": 9.856580548200101e-06, "loss": 0.0, "step": 2010 }, { "epoch": 0.014456451728333214, "grad_norm": 6.594394683837891, "learning_rate": 9.855864882272955e-06, "loss": 0.0048, "step": 2020 }, { "epoch": 0.014528018321047735, "grad_norm": 12.64254093170166, "learning_rate": 9.85514921634581e-06, "loss": 2.1536, "step": 2030 }, { "epoch": 0.014599584913762256, "grad_norm": 0.000530912249814719, "learning_rate": 9.854433550418666e-06, "loss": 0.0, "step": 2040 }, { "epoch": 0.014671151506476776, "grad_norm": 2.797987690428272e-07, "learning_rate": 9.85371788449152e-06, "loss": 0.0, "step": 2050 }, { "epoch": 0.014742718099191298, "grad_norm": 4.825419508414086e-10, "learning_rate": 9.853002218564374e-06, "loss": 0.0153, "step": 2060 }, { "epoch": 0.014814284691905818, "grad_norm": 9.363269271034369e-08, "learning_rate": 9.85228655263723e-06, "loss": 0.0, "step": 2070 }, { "epoch": 0.01488585128462034, "grad_norm": 5.835069094572987e-10, "learning_rate": 9.851570886710085e-06, "loss": 0.0001, "step": 2080 }, { "epoch": 0.01495741787733486, "grad_norm": 1.0331623343518004e-05, "learning_rate": 9.850855220782939e-06, "loss": 0.0, "step": 2090 }, { "epoch": 0.015028984470049382, "grad_norm": 0.0, "learning_rate": 9.850139554855794e-06, "loss": 0.0138, "step": 2100 }, { "epoch": 0.015100551062763902, "grad_norm": 1.4426464645111992e-07, "learning_rate": 9.84942388892865e-06, "loss": 0.0, "step": 2110 }, { "epoch": 0.015172117655478422, "grad_norm": 7.491582891816506e-06, "learning_rate": 9.848708223001504e-06, "loss": 0.0, "step": 2120 }, { "epoch": 0.015243684248192944, "grad_norm": 0.0, "learning_rate": 9.847992557074358e-06, "loss": 0.0, "step": 2130 }, { "epoch": 0.015315250840907464, "grad_norm": 5.705431931346538e-07, "learning_rate": 9.847276891147213e-06, "loss": 0.0, "step": 2140 }, { "epoch": 0.015386817433621986, "grad_norm": 0.0, "learning_rate": 9.846561225220069e-06, "loss": 0.0, "step": 2150 }, { "epoch": 0.015458384026336506, "grad_norm": 0.0, "learning_rate": 9.845845559292922e-06, "loss": 0.0, "step": 2160 }, { "epoch": 0.015529950619051026, "grad_norm": 0.00021062888845335692, "learning_rate": 9.845129893365778e-06, "loss": 0.0, "step": 2170 }, { "epoch": 0.015601517211765548, "grad_norm": 0.0, "learning_rate": 9.844414227438633e-06, "loss": 0.0, "step": 2180 }, { "epoch": 0.01567308380448007, "grad_norm": 0.0, "learning_rate": 9.843698561511488e-06, "loss": 0.0, "step": 2190 }, { "epoch": 0.01574465039719459, "grad_norm": 0.0, "learning_rate": 9.842982895584342e-06, "loss": 0.0018, "step": 2200 }, { "epoch": 0.01581621698990911, "grad_norm": 1.7581329725402384e-09, "learning_rate": 9.842267229657197e-06, "loss": 0.8078, "step": 2210 }, { "epoch": 0.015887783582623632, "grad_norm": 0.0, "learning_rate": 9.841551563730052e-06, "loss": 0.0045, "step": 2220 }, { "epoch": 0.01595935017533815, "grad_norm": NaN, "learning_rate": 9.84090746439562e-06, "loss": 0.2393, "step": 2230 }, { "epoch": 0.016030916768052673, "grad_norm": 0.0, "learning_rate": 9.840191798468476e-06, "loss": 0.0097, "step": 2240 }, { "epoch": 0.016102483360767195, "grad_norm": 0.0, "learning_rate": 9.839476132541331e-06, "loss": 0.0, "step": 2250 }, { "epoch": 0.016174049953481713, "grad_norm": 10.784346580505371, "learning_rate": 9.838760466614186e-06, "loss": 0.0016, "step": 2260 }, { "epoch": 0.016245616546196235, "grad_norm": 0.0, "learning_rate": 9.83804480068704e-06, "loss": 0.1242, "step": 2270 }, { "epoch": 0.016317183138910757, "grad_norm": 9.801570892333984, "learning_rate": 9.837329134759895e-06, "loss": 0.167, "step": 2280 }, { "epoch": 0.01638874973162528, "grad_norm": 2.846622737706639e-05, "learning_rate": 9.83661346883275e-06, "loss": 0.0, "step": 2290 }, { "epoch": 0.016460316324339797, "grad_norm": 2.199837581429165e-05, "learning_rate": 9.835897802905604e-06, "loss": 0.0013, "step": 2300 }, { "epoch": 0.01653188291705432, "grad_norm": 1.7948255104371924e-09, "learning_rate": 9.83518213697846e-06, "loss": 0.0001, "step": 2310 }, { "epoch": 0.01660344950976884, "grad_norm": 0.0, "learning_rate": 9.834466471051315e-06, "loss": 0.0001, "step": 2320 }, { "epoch": 0.01667501610248336, "grad_norm": 0.07214747369289398, "learning_rate": 9.83375080512417e-06, "loss": 0.0005, "step": 2330 }, { "epoch": 0.01674658269519788, "grad_norm": 5.818569661641959e-06, "learning_rate": 9.833035139197023e-06, "loss": 0.0324, "step": 2340 }, { "epoch": 0.016818149287912403, "grad_norm": 0.0, "learning_rate": 9.832319473269877e-06, "loss": 0.0044, "step": 2350 }, { "epoch": 0.01688971588062692, "grad_norm": 0.0070510078221559525, "learning_rate": 9.831603807342734e-06, "loss": 0.0, "step": 2360 }, { "epoch": 0.016961282473341444, "grad_norm": 0.12176995724439621, "learning_rate": 9.830888141415588e-06, "loss": 0.0, "step": 2370 }, { "epoch": 0.017032849066055965, "grad_norm": 0.18405930697917938, "learning_rate": 9.830172475488443e-06, "loss": 0.0001, "step": 2380 }, { "epoch": 0.017104415658770487, "grad_norm": 7.549598279865677e-09, "learning_rate": 9.829456809561298e-06, "loss": 0.0, "step": 2390 }, { "epoch": 0.017175982251485006, "grad_norm": 0.0, "learning_rate": 9.828741143634154e-06, "loss": 0.0, "step": 2400 }, { "epoch": 0.017247548844199528, "grad_norm": 0.0, "learning_rate": 9.828025477707007e-06, "loss": 0.0, "step": 2410 }, { "epoch": 0.01731911543691405, "grad_norm": 0.0, "learning_rate": 9.82730981177986e-06, "loss": 0.0, "step": 2420 }, { "epoch": 0.017390682029628568, "grad_norm": 0.0, "learning_rate": 9.826594145852718e-06, "loss": 0.0, "step": 2430 }, { "epoch": 0.01746224862234309, "grad_norm": 0.0, "learning_rate": 9.825878479925571e-06, "loss": 0.0, "step": 2440 }, { "epoch": 0.017533815215057612, "grad_norm": 0.0, "learning_rate": 9.825162813998427e-06, "loss": 0.0, "step": 2450 }, { "epoch": 0.01760538180777213, "grad_norm": 0.0, "learning_rate": 9.824447148071282e-06, "loss": 0.0, "step": 2460 }, { "epoch": 0.017676948400486652, "grad_norm": 0.0, "learning_rate": 9.823731482144135e-06, "loss": 0.0, "step": 2470 }, { "epoch": 0.017748514993201174, "grad_norm": 0.0011258295271545649, "learning_rate": 9.82301581621699e-06, "loss": 0.0, "step": 2480 }, { "epoch": 0.017820081585915696, "grad_norm": 0.0, "learning_rate": 9.822300150289844e-06, "loss": 0.0, "step": 2490 }, { "epoch": 0.017891648178630214, "grad_norm": 3.4875276924140053e-06, "learning_rate": 9.821584484362701e-06, "loss": 0.0, "step": 2500 }, { "epoch": 0.017963214771344736, "grad_norm": 0.0, "learning_rate": 9.820868818435555e-06, "loss": 0.168, "step": 2510 }, { "epoch": 0.018034781364059258, "grad_norm": 1.0073993861325903e-09, "learning_rate": 9.82015315250841e-06, "loss": 0.0, "step": 2520 }, { "epoch": 0.018106347956773777, "grad_norm": 0.0, "learning_rate": 9.819437486581266e-06, "loss": 0.0004, "step": 2530 }, { "epoch": 0.0181779145494883, "grad_norm": 0.0014765149680897593, "learning_rate": 9.818721820654119e-06, "loss": 0.0, "step": 2540 }, { "epoch": 0.01824948114220282, "grad_norm": 13.022109031677246, "learning_rate": 9.818006154726974e-06, "loss": 0.0018, "step": 2550 }, { "epoch": 0.018321047734917342, "grad_norm": 0.0012617555912584066, "learning_rate": 9.817290488799828e-06, "loss": 0.0, "step": 2560 }, { "epoch": 0.01839261432763186, "grad_norm": 5.937579317105701e-10, "learning_rate": 9.816574822872685e-06, "loss": 0.0, "step": 2570 }, { "epoch": 0.018464180920346383, "grad_norm": 0.0, "learning_rate": 9.815859156945539e-06, "loss": 0.0002, "step": 2580 }, { "epoch": 0.018535747513060905, "grad_norm": 0.0, "learning_rate": 9.815143491018392e-06, "loss": 0.0, "step": 2590 }, { "epoch": 0.018607314105775423, "grad_norm": 8.227721615128303e-09, "learning_rate": 9.81442782509125e-06, "loss": 0.0001, "step": 2600 }, { "epoch": 0.018678880698489945, "grad_norm": 0.0, "learning_rate": 9.813712159164103e-06, "loss": 0.0, "step": 2610 }, { "epoch": 0.018750447291204467, "grad_norm": 2.489784310455434e-05, "learning_rate": 9.812996493236958e-06, "loss": 0.018, "step": 2620 }, { "epoch": 0.018822013883918985, "grad_norm": 0.0, "learning_rate": 9.812280827309812e-06, "loss": 0.0016, "step": 2630 }, { "epoch": 0.018893580476633507, "grad_norm": 3.445071339314154e-09, "learning_rate": 9.811565161382669e-06, "loss": 0.0001, "step": 2640 }, { "epoch": 0.01896514706934803, "grad_norm": 0.0, "learning_rate": 9.810849495455522e-06, "loss": 0.0, "step": 2650 }, { "epoch": 0.01903671366206255, "grad_norm": 102.42285919189453, "learning_rate": 9.810133829528376e-06, "loss": 0.0263, "step": 2660 }, { "epoch": 0.01910828025477707, "grad_norm": 14.247214317321777, "learning_rate": 9.809418163601231e-06, "loss": 0.0026, "step": 2670 }, { "epoch": 0.01917984684749159, "grad_norm": 1.286596216232283e-05, "learning_rate": 9.808702497674086e-06, "loss": 0.0004, "step": 2680 }, { "epoch": 0.019251413440206113, "grad_norm": 0.0, "learning_rate": 9.807986831746942e-06, "loss": 0.0, "step": 2690 }, { "epoch": 0.01932298003292063, "grad_norm": 0.0, "learning_rate": 9.807271165819795e-06, "loss": 0.0, "step": 2700 }, { "epoch": 0.019394546625635153, "grad_norm": 3.4145302834076574e-06, "learning_rate": 9.80655549989265e-06, "loss": 0.235, "step": 2710 }, { "epoch": 0.019466113218349675, "grad_norm": 4.586463431266452e-10, "learning_rate": 9.805839833965506e-06, "loss": 0.0001, "step": 2720 }, { "epoch": 0.019537679811064194, "grad_norm": 0.11640314012765884, "learning_rate": 9.80512416803836e-06, "loss": 0.0, "step": 2730 }, { "epoch": 0.019609246403778716, "grad_norm": 0.0, "learning_rate": 9.804408502111215e-06, "loss": 0.1924, "step": 2740 }, { "epoch": 0.019680812996493238, "grad_norm": 0.06982874125242233, "learning_rate": 9.80369283618407e-06, "loss": 0.8016, "step": 2750 }, { "epoch": 0.01975237958920776, "grad_norm": 0.0, "learning_rate": 9.80304873684964e-06, "loss": 0.0345, "step": 2760 }, { "epoch": 0.019823946181922278, "grad_norm": 0.0, "learning_rate": 9.802333070922493e-06, "loss": 0.0002, "step": 2770 }, { "epoch": 0.0198955127746368, "grad_norm": 0.0, "learning_rate": 9.801617404995349e-06, "loss": 0.0, "step": 2780 }, { "epoch": 0.01996707936735132, "grad_norm": 9.70888036633255e-10, "learning_rate": 9.800901739068204e-06, "loss": 0.0, "step": 2790 }, { "epoch": 0.02003864596006584, "grad_norm": 0.0, "learning_rate": 9.800186073141058e-06, "loss": 0.0549, "step": 2800 }, { "epoch": 0.020110212552780362, "grad_norm": 0.0, "learning_rate": 9.799470407213913e-06, "loss": 0.2594, "step": 2810 }, { "epoch": 0.020181779145494884, "grad_norm": 0.0, "learning_rate": 9.798754741286768e-06, "loss": 0.0, "step": 2820 }, { "epoch": 0.020253345738209402, "grad_norm": 214.9196014404297, "learning_rate": 9.798039075359624e-06, "loss": 0.0805, "step": 2830 }, { "epoch": 0.020324912330923924, "grad_norm": 0.0, "learning_rate": 9.797323409432477e-06, "loss": 0.0, "step": 2840 }, { "epoch": 0.020396478923638446, "grad_norm": 3.1326176213042345e-06, "learning_rate": 9.796607743505332e-06, "loss": 0.0, "step": 2850 }, { "epoch": 0.020468045516352968, "grad_norm": 0.0, "learning_rate": 9.795892077578188e-06, "loss": 0.0, "step": 2860 }, { "epoch": 0.020539612109067486, "grad_norm": 0.0, "learning_rate": 9.795176411651041e-06, "loss": 0.0965, "step": 2870 }, { "epoch": 0.02061117870178201, "grad_norm": 0.0, "learning_rate": 9.794460745723897e-06, "loss": 0.0, "step": 2880 }, { "epoch": 0.02068274529449653, "grad_norm": 2.1021892848693824e-07, "learning_rate": 9.793745079796752e-06, "loss": 0.0, "step": 2890 }, { "epoch": 0.02075431188721105, "grad_norm": 0.0, "learning_rate": 9.793029413869607e-06, "loss": 0.0004, "step": 2900 }, { "epoch": 0.02082587847992557, "grad_norm": 0.0, "learning_rate": 9.79231374794246e-06, "loss": 0.0089, "step": 2910 }, { "epoch": 0.020897445072640092, "grad_norm": 0.0, "learning_rate": 9.791598082015316e-06, "loss": 0.0, "step": 2920 }, { "epoch": 0.02096901166535461, "grad_norm": 0.0, "learning_rate": 9.790882416088171e-06, "loss": 0.0, "step": 2930 }, { "epoch": 0.021040578258069133, "grad_norm": 2.0733745831336137e-09, "learning_rate": 9.790166750161025e-06, "loss": 0.0, "step": 2940 }, { "epoch": 0.021112144850783655, "grad_norm": 42.42689895629883, "learning_rate": 9.78945108423388e-06, "loss": 0.0196, "step": 2950 }, { "epoch": 0.021183711443498177, "grad_norm": 162.19989013671875, "learning_rate": 9.788735418306736e-06, "loss": 0.0324, "step": 2960 }, { "epoch": 0.021255278036212695, "grad_norm": 0.0, "learning_rate": 9.788019752379589e-06, "loss": 0.0013, "step": 2970 }, { "epoch": 0.021326844628927217, "grad_norm": 5.168766392671387e-08, "learning_rate": 9.787304086452444e-06, "loss": 0.0026, "step": 2980 }, { "epoch": 0.02139841122164174, "grad_norm": 1.666653592913292e-09, "learning_rate": 9.7865884205253e-06, "loss": 0.0, "step": 2990 }, { "epoch": 0.021469977814356257, "grad_norm": 2.1183393001556396, "learning_rate": 9.785872754598155e-06, "loss": 0.0033, "step": 3000 }, { "epoch": 0.02154154440707078, "grad_norm": 0.0, "learning_rate": 9.785157088671009e-06, "loss": 0.0003, "step": 3010 }, { "epoch": 0.0216131109997853, "grad_norm": 3.6536346215143567e-07, "learning_rate": 9.784441422743864e-06, "loss": 0.0, "step": 3020 }, { "epoch": 0.02168467759249982, "grad_norm": 0.0, "learning_rate": 9.78372575681672e-06, "loss": 0.0, "step": 3030 }, { "epoch": 0.02175624418521434, "grad_norm": 0.0, "learning_rate": 9.783010090889573e-06, "loss": 0.0, "step": 3040 }, { "epoch": 0.021827810777928863, "grad_norm": 0.0, "learning_rate": 9.782294424962428e-06, "loss": 0.0, "step": 3050 }, { "epoch": 0.021899377370643385, "grad_norm": 0.0, "learning_rate": 9.781578759035283e-06, "loss": 0.0006, "step": 3060 }, { "epoch": 0.021970943963357904, "grad_norm": 0.0, "learning_rate": 9.780863093108139e-06, "loss": 0.0009, "step": 3070 }, { "epoch": 0.022042510556072425, "grad_norm": 0.25615569949150085, "learning_rate": 9.780147427180992e-06, "loss": 0.0001, "step": 3080 }, { "epoch": 0.022114077148786947, "grad_norm": 0.004236515611410141, "learning_rate": 9.779431761253848e-06, "loss": 0.016, "step": 3090 }, { "epoch": 0.022185643741501466, "grad_norm": 0.0, "learning_rate": 9.778716095326703e-06, "loss": 0.0016, "step": 3100 }, { "epoch": 0.022257210334215988, "grad_norm": 0.0, "learning_rate": 9.778000429399556e-06, "loss": 0.9531, "step": 3110 }, { "epoch": 0.02232877692693051, "grad_norm": 0.0, "learning_rate": 9.777284763472412e-06, "loss": 0.0001, "step": 3120 }, { "epoch": 0.022400343519645028, "grad_norm": 0.00131712865550071, "learning_rate": 9.776569097545267e-06, "loss": 0.0, "step": 3130 }, { "epoch": 0.02247191011235955, "grad_norm": 0.0, "learning_rate": 9.775853431618122e-06, "loss": 0.1359, "step": 3140 }, { "epoch": 0.022543476705074072, "grad_norm": 0.0, "learning_rate": 9.775137765690976e-06, "loss": 0.0766, "step": 3150 }, { "epoch": 0.022615043297788594, "grad_norm": 0.0, "learning_rate": 9.774422099763831e-06, "loss": 0.0, "step": 3160 }, { "epoch": 0.022686609890503112, "grad_norm": 0.0, "learning_rate": 9.773706433836686e-06, "loss": 0.0, "step": 3170 }, { "epoch": 0.022758176483217634, "grad_norm": 0.0, "learning_rate": 9.77299076790954e-06, "loss": 0.0111, "step": 3180 }, { "epoch": 0.022829743075932156, "grad_norm": 1.156690473180788e-06, "learning_rate": 9.772275101982395e-06, "loss": 0.383, "step": 3190 }, { "epoch": 0.022901309668646674, "grad_norm": 0.0, "learning_rate": 9.77155943605525e-06, "loss": 0.0, "step": 3200 }, { "epoch": 0.022972876261361196, "grad_norm": 0.0, "learning_rate": 9.770843770128104e-06, "loss": 0.0, "step": 3210 }, { "epoch": 0.023044442854075718, "grad_norm": 1.892047657747753e-05, "learning_rate": 9.77012810420096e-06, "loss": 0.0434, "step": 3220 }, { "epoch": 0.023116009446790237, "grad_norm": 0.0, "learning_rate": 9.769412438273815e-06, "loss": 0.0, "step": 3230 }, { "epoch": 0.02318757603950476, "grad_norm": 0.05734855309128761, "learning_rate": 9.76869677234667e-06, "loss": 0.0004, "step": 3240 }, { "epoch": 0.02325914263221928, "grad_norm": 0.0, "learning_rate": 9.767981106419524e-06, "loss": 0.0, "step": 3250 }, { "epoch": 0.023330709224933802, "grad_norm": 5.51534506953999e-09, "learning_rate": 9.767265440492379e-06, "loss": 0.1831, "step": 3260 }, { "epoch": 0.02340227581764832, "grad_norm": 0.0, "learning_rate": 9.766549774565234e-06, "loss": 0.0066, "step": 3270 }, { "epoch": 0.023473842410362843, "grad_norm": 0.0, "learning_rate": 9.765834108638088e-06, "loss": 0.0, "step": 3280 }, { "epoch": 0.023545409003077365, "grad_norm": 0.0, "learning_rate": 9.765118442710943e-06, "loss": 0.0, "step": 3290 }, { "epoch": 0.023616975595791883, "grad_norm": 0.0, "learning_rate": 9.764402776783799e-06, "loss": 0.0, "step": 3300 }, { "epoch": 0.023688542188506405, "grad_norm": 0.0, "learning_rate": 9.763687110856654e-06, "loss": 0.0844, "step": 3310 }, { "epoch": 0.023760108781220927, "grad_norm": 0.0, "learning_rate": 9.762971444929507e-06, "loss": 0.0006, "step": 3320 }, { "epoch": 0.02383167537393545, "grad_norm": 0.3314148783683777, "learning_rate": 9.762255779002363e-06, "loss": 0.0001, "step": 3330 }, { "epoch": 0.023903241966649967, "grad_norm": 0.0, "learning_rate": 9.761540113075218e-06, "loss": 0.0, "step": 3340 }, { "epoch": 0.02397480855936449, "grad_norm": 1.905678254843224e-05, "learning_rate": 9.760824447148072e-06, "loss": 0.0, "step": 3350 }, { "epoch": 0.02404637515207901, "grad_norm": 0.005165101494640112, "learning_rate": 9.760108781220927e-06, "loss": 1.0648, "step": 3360 }, { "epoch": 0.02411794174479353, "grad_norm": 3.354509487962787e-07, "learning_rate": 9.759393115293782e-06, "loss": 0.0013, "step": 3370 }, { "epoch": 0.02418950833750805, "grad_norm": 0.0, "learning_rate": 9.758677449366637e-06, "loss": 0.0, "step": 3380 }, { "epoch": 0.024261074930222573, "grad_norm": 1.0472368527914e-09, "learning_rate": 9.757961783439491e-06, "loss": 0.1538, "step": 3390 }, { "epoch": 0.02433264152293709, "grad_norm": 0.0, "learning_rate": 9.757246117512346e-06, "loss": 0.0001, "step": 3400 }, { "epoch": 0.024404208115651613, "grad_norm": 30.705673217773438, "learning_rate": 9.756530451585202e-06, "loss": 0.0312, "step": 3410 }, { "epoch": 0.024475774708366135, "grad_norm": 0.02345309779047966, "learning_rate": 9.755814785658055e-06, "loss": 0.0, "step": 3420 }, { "epoch": 0.024547341301080657, "grad_norm": 2.047288762696553e-05, "learning_rate": 9.75509911973091e-06, "loss": 0.0, "step": 3430 }, { "epoch": 0.024618907893795176, "grad_norm": 0.00014969820040278137, "learning_rate": 9.754383453803766e-06, "loss": 0.0001, "step": 3440 }, { "epoch": 0.024690474486509698, "grad_norm": 0.0, "learning_rate": 9.75366778787662e-06, "loss": 0.0001, "step": 3450 }, { "epoch": 0.02476204107922422, "grad_norm": 0.0029442773666232824, "learning_rate": 9.752952121949475e-06, "loss": 0.0, "step": 3460 }, { "epoch": 0.024833607671938738, "grad_norm": 1.8944483599625528e-06, "learning_rate": 9.75223645602233e-06, "loss": 0.0002, "step": 3470 }, { "epoch": 0.02490517426465326, "grad_norm": 0.05058615654706955, "learning_rate": 9.751520790095185e-06, "loss": 0.0, "step": 3480 }, { "epoch": 0.02497674085736778, "grad_norm": 0.0001443417859263718, "learning_rate": 9.750805124168039e-06, "loss": 0.0, "step": 3490 }, { "epoch": 0.0250483074500823, "grad_norm": 1.038566344035985e-09, "learning_rate": 9.750089458240894e-06, "loss": 0.0, "step": 3500 }, { "epoch": 0.025119874042796822, "grad_norm": 9.681346000434132e-07, "learning_rate": 9.74937379231375e-06, "loss": 0.0002, "step": 3510 }, { "epoch": 0.025191440635511344, "grad_norm": 0.09848636388778687, "learning_rate": 9.748658126386603e-06, "loss": 0.0, "step": 3520 }, { "epoch": 0.025263007228225866, "grad_norm": 0.0, "learning_rate": 9.747942460459458e-06, "loss": 0.3851, "step": 3530 }, { "epoch": 0.025334573820940384, "grad_norm": 0.0, "learning_rate": 9.747226794532314e-06, "loss": 0.0, "step": 3540 }, { "epoch": 0.025406140413654906, "grad_norm": 9.701968117781234e-10, "learning_rate": 9.746511128605169e-06, "loss": 0.0, "step": 3550 }, { "epoch": 0.025477707006369428, "grad_norm": 0.0, "learning_rate": 9.745795462678023e-06, "loss": 0.0001, "step": 3560 }, { "epoch": 0.025549273599083946, "grad_norm": 0.0, "learning_rate": 9.745079796750878e-06, "loss": 0.0277, "step": 3570 }, { "epoch": 0.02562084019179847, "grad_norm": 0.0, "learning_rate": 9.744364130823733e-06, "loss": 0.0, "step": 3580 }, { "epoch": 0.02569240678451299, "grad_norm": 0.0, "learning_rate": 9.743648464896587e-06, "loss": 0.0, "step": 3590 }, { "epoch": 0.02576397337722751, "grad_norm": 0.0, "learning_rate": 9.742932798969442e-06, "loss": 0.0002, "step": 3600 }, { "epoch": 0.02583553996994203, "grad_norm": 0.0, "learning_rate": 9.742217133042297e-06, "loss": 1.3016, "step": 3610 }, { "epoch": 0.025907106562656552, "grad_norm": 2.51212304647197e-06, "learning_rate": 9.741501467115153e-06, "loss": 0.0006, "step": 3620 }, { "epoch": 0.025978673155371074, "grad_norm": 0.0, "learning_rate": 9.740785801188006e-06, "loss": 0.0, "step": 3630 }, { "epoch": 0.026050239748085593, "grad_norm": 0.0, "learning_rate": 9.740070135260861e-06, "loss": 0.0001, "step": 3640 }, { "epoch": 0.026121806340800115, "grad_norm": 0.0, "learning_rate": 9.739354469333717e-06, "loss": 0.0, "step": 3650 }, { "epoch": 0.026193372933514637, "grad_norm": 0.0, "learning_rate": 9.73863880340657e-06, "loss": 0.0907, "step": 3660 }, { "epoch": 0.026264939526229155, "grad_norm": 0.0, "learning_rate": 9.737923137479426e-06, "loss": 0.0077, "step": 3670 }, { "epoch": 0.026336506118943677, "grad_norm": 0.0, "learning_rate": 9.737207471552281e-06, "loss": 0.0015, "step": 3680 }, { "epoch": 0.0264080727116582, "grad_norm": 8.983202093304499e-08, "learning_rate": 9.736491805625135e-06, "loss": 0.0, "step": 3690 }, { "epoch": 0.026479639304372717, "grad_norm": 2.1392251525753636e-08, "learning_rate": 9.73577613969799e-06, "loss": 0.0, "step": 3700 }, { "epoch": 0.02655120589708724, "grad_norm": 4.86369174268475e-07, "learning_rate": 9.735060473770843e-06, "loss": 0.0, "step": 3710 }, { "epoch": 0.02662277248980176, "grad_norm": 0.0006399693666025996, "learning_rate": 9.7343448078437e-06, "loss": 0.0, "step": 3720 }, { "epoch": 0.026694339082516283, "grad_norm": 0.19239398837089539, "learning_rate": 9.733629141916554e-06, "loss": 0.0001, "step": 3730 }, { "epoch": 0.0267659056752308, "grad_norm": 0.0, "learning_rate": 9.732913475989408e-06, "loss": 0.0, "step": 3740 }, { "epoch": 0.026837472267945323, "grad_norm": 1.7671626384441197e-09, "learning_rate": 9.732197810062265e-06, "loss": 0.0042, "step": 3750 }, { "epoch": 0.026909038860659845, "grad_norm": 0.021312307566404343, "learning_rate": 9.731482144135118e-06, "loss": 0.0204, "step": 3760 }, { "epoch": 0.026980605453374364, "grad_norm": 9.299539471818719e-10, "learning_rate": 9.730766478207974e-06, "loss": 0.0, "step": 3770 }, { "epoch": 0.027052172046088886, "grad_norm": 0.0, "learning_rate": 9.730050812280827e-06, "loss": 0.1175, "step": 3780 }, { "epoch": 0.027123738638803407, "grad_norm": 0.0, "learning_rate": 9.729335146353684e-06, "loss": 0.0, "step": 3790 }, { "epoch": 0.027195305231517926, "grad_norm": 0.0011523172724992037, "learning_rate": 9.728619480426538e-06, "loss": 0.0002, "step": 3800 }, { "epoch": 0.027266871824232448, "grad_norm": 0.0, "learning_rate": 9.727903814499391e-06, "loss": 0.0003, "step": 3810 }, { "epoch": 0.02733843841694697, "grad_norm": 0.0, "learning_rate": 9.727188148572248e-06, "loss": 0.0, "step": 3820 }, { "epoch": 0.02741000500966149, "grad_norm": 2.96830153465271, "learning_rate": 9.726472482645102e-06, "loss": 0.0589, "step": 3830 }, { "epoch": 0.02748157160237601, "grad_norm": 1.614406073713326e-06, "learning_rate": 9.725756816717957e-06, "loss": 0.0, "step": 3840 }, { "epoch": 0.027553138195090532, "grad_norm": 0.0, "learning_rate": 9.72504115079081e-06, "loss": 0.0012, "step": 3850 }, { "epoch": 0.027624704787805054, "grad_norm": 0.6496194005012512, "learning_rate": 9.724325484863666e-06, "loss": 0.0002, "step": 3860 }, { "epoch": 0.027696271380519572, "grad_norm": 0.0, "learning_rate": 9.723609818936521e-06, "loss": 0.0, "step": 3870 }, { "epoch": 0.027767837973234094, "grad_norm": 3.2480791389843944e-08, "learning_rate": 9.722894153009375e-06, "loss": 0.0, "step": 3880 }, { "epoch": 0.027839404565948616, "grad_norm": 0.0004377333098091185, "learning_rate": 9.722178487082232e-06, "loss": 0.0, "step": 3890 }, { "epoch": 0.027910971158663134, "grad_norm": 0.0, "learning_rate": 9.721462821155086e-06, "loss": 0.0, "step": 3900 }, { "epoch": 0.027982537751377656, "grad_norm": 1.724989306239877e-05, "learning_rate": 9.72074715522794e-06, "loss": 0.2488, "step": 3910 }, { "epoch": 0.028054104344092178, "grad_norm": 0.37987151741981506, "learning_rate": 9.720031489300794e-06, "loss": 0.0001, "step": 3920 }, { "epoch": 0.0281256709368067, "grad_norm": 5.72642693441594e-06, "learning_rate": 9.71931582337365e-06, "loss": 0.0, "step": 3930 }, { "epoch": 0.02819723752952122, "grad_norm": 0.0, "learning_rate": 9.718600157446505e-06, "loss": 0.0289, "step": 3940 }, { "epoch": 0.02826880412223574, "grad_norm": 0.0, "learning_rate": 9.717884491519359e-06, "loss": 0.0001, "step": 3950 }, { "epoch": 0.028340370714950262, "grad_norm": 8.083093661070961e-08, "learning_rate": 9.717168825592216e-06, "loss": 0.0, "step": 3960 }, { "epoch": 0.02841193730766478, "grad_norm": 0.0, "learning_rate": 9.71645315966507e-06, "loss": 0.0, "step": 3970 }, { "epoch": 0.028483503900379303, "grad_norm": 0.0, "learning_rate": 9.715737493737923e-06, "loss": 0.0, "step": 3980 }, { "epoch": 0.028555070493093825, "grad_norm": 0.0, "learning_rate": 9.715021827810778e-06, "loss": 0.0, "step": 3990 }, { "epoch": 0.028626637085808343, "grad_norm": 0.0, "learning_rate": 9.714306161883633e-06, "loss": 0.0018, "step": 4000 }, { "epoch": 0.028698203678522865, "grad_norm": 0.0, "learning_rate": 9.713590495956489e-06, "loss": 0.0, "step": 4010 }, { "epoch": 0.028769770271237387, "grad_norm": 470.7726745605469, "learning_rate": 9.712874830029342e-06, "loss": 0.5254, "step": 4020 }, { "epoch": 0.02884133686395191, "grad_norm": 0.0, "learning_rate": 9.7121591641022e-06, "loss": 0.0001, "step": 4030 }, { "epoch": 0.028912903456666427, "grad_norm": 0.0, "learning_rate": 9.711443498175053e-06, "loss": 0.0, "step": 4040 }, { "epoch": 0.02898447004938095, "grad_norm": 1.8576035927253542e-06, "learning_rate": 9.710727832247906e-06, "loss": 0.0, "step": 4050 }, { "epoch": 0.02905603664209547, "grad_norm": 0.0, "learning_rate": 9.710012166320762e-06, "loss": 0.0058, "step": 4060 }, { "epoch": 0.02912760323480999, "grad_norm": 2.2148551579448394e-05, "learning_rate": 9.709296500393617e-06, "loss": 0.0, "step": 4070 }, { "epoch": 0.02919916982752451, "grad_norm": 8.995672396849841e-05, "learning_rate": 9.708580834466472e-06, "loss": 0.0, "step": 4080 }, { "epoch": 0.029270736420239033, "grad_norm": 0.0, "learning_rate": 9.707865168539326e-06, "loss": 0.0, "step": 4090 }, { "epoch": 0.02934230301295355, "grad_norm": 1.1638344403763767e-05, "learning_rate": 9.707149502612181e-06, "loss": 0.0127, "step": 4100 }, { "epoch": 0.029413869605668073, "grad_norm": 5.9930033683776855, "learning_rate": 9.706433836685036e-06, "loss": 0.001, "step": 4110 }, { "epoch": 0.029485436198382595, "grad_norm": 0.0, "learning_rate": 9.70571817075789e-06, "loss": 0.0, "step": 4120 }, { "epoch": 0.029557002791097117, "grad_norm": 102.9602279663086, "learning_rate": 9.705002504830745e-06, "loss": 0.0445, "step": 4130 }, { "epoch": 0.029628569383811636, "grad_norm": 0.05482972413301468, "learning_rate": 9.7042868389036e-06, "loss": 0.0, "step": 4140 }, { "epoch": 0.029700135976526158, "grad_norm": 0.0, "learning_rate": 9.703571172976456e-06, "loss": 0.0, "step": 4150 }, { "epoch": 0.02977170256924068, "grad_norm": 0.0, "learning_rate": 9.70285550704931e-06, "loss": 0.0, "step": 4160 }, { "epoch": 0.029843269161955198, "grad_norm": 0.0002339003694942221, "learning_rate": 9.702139841122165e-06, "loss": 0.1978, "step": 4170 }, { "epoch": 0.02991483575466972, "grad_norm": 0.0, "learning_rate": 9.70142417519502e-06, "loss": 0.0, "step": 4180 }, { "epoch": 0.02998640234738424, "grad_norm": 0.0, "learning_rate": 9.700708509267874e-06, "loss": 0.0, "step": 4190 }, { "epoch": 0.030057968940098764, "grad_norm": 5.1090886188376317e-08, "learning_rate": 9.699992843340729e-06, "loss": 0.0, "step": 4200 }, { "epoch": 0.030129535532813282, "grad_norm": 0.0, "learning_rate": 9.699277177413584e-06, "loss": 0.0038, "step": 4210 }, { "epoch": 0.030201102125527804, "grad_norm": 0.0, "learning_rate": 9.698561511486438e-06, "loss": 0.0, "step": 4220 }, { "epoch": 0.030272668718242326, "grad_norm": 0.0, "learning_rate": 9.697845845559293e-06, "loss": 0.0, "step": 4230 }, { "epoch": 0.030344235310956844, "grad_norm": 0.0, "learning_rate": 9.697130179632148e-06, "loss": 0.0021, "step": 4240 }, { "epoch": 0.030415801903671366, "grad_norm": 0.0, "learning_rate": 9.696414513705004e-06, "loss": 0.0, "step": 4250 }, { "epoch": 0.030487368496385888, "grad_norm": 0.0014274335699155927, "learning_rate": 9.695698847777857e-06, "loss": 0.3926, "step": 4260 }, { "epoch": 0.030558935089100406, "grad_norm": 0.0, "learning_rate": 9.694983181850713e-06, "loss": 0.0005, "step": 4270 }, { "epoch": 0.03063050168181493, "grad_norm": 0.03037519007921219, "learning_rate": 9.694267515923568e-06, "loss": 0.4621, "step": 4280 }, { "epoch": 0.03070206827452945, "grad_norm": 0.0, "learning_rate": 9.693551849996422e-06, "loss": 0.0, "step": 4290 }, { "epoch": 0.030773634867243972, "grad_norm": 0.0014290051767602563, "learning_rate": 9.692836184069277e-06, "loss": 0.0, "step": 4300 }, { "epoch": 0.03084520145995849, "grad_norm": 0.0, "learning_rate": 9.692120518142132e-06, "loss": 0.0025, "step": 4310 }, { "epoch": 0.030916768052673013, "grad_norm": 0.5594335794448853, "learning_rate": 9.691404852214987e-06, "loss": 0.0001, "step": 4320 }, { "epoch": 0.030988334645387534, "grad_norm": 0.0, "learning_rate": 9.690689186287841e-06, "loss": 0.0, "step": 4330 }, { "epoch": 0.031059901238102053, "grad_norm": 1.2064508199691772, "learning_rate": 9.689973520360696e-06, "loss": 0.0003, "step": 4340 }, { "epoch": 0.031131467830816575, "grad_norm": 0.9478038549423218, "learning_rate": 9.689257854433552e-06, "loss": 0.0005, "step": 4350 }, { "epoch": 0.031203034423531097, "grad_norm": 0.0, "learning_rate": 9.688542188506405e-06, "loss": 0.0, "step": 4360 }, { "epoch": 0.031274601016245615, "grad_norm": 0.0, "learning_rate": 9.68782652257926e-06, "loss": 0.0, "step": 4370 }, { "epoch": 0.03134616760896014, "grad_norm": 0.000388710992410779, "learning_rate": 9.687110856652116e-06, "loss": 0.0, "step": 4380 }, { "epoch": 0.03141773420167466, "grad_norm": 2.183829561630546e-09, "learning_rate": 9.686395190724971e-06, "loss": 0.0, "step": 4390 }, { "epoch": 0.03148930079438918, "grad_norm": 0.0005892125773243606, "learning_rate": 9.685679524797825e-06, "loss": 0.0001, "step": 4400 }, { "epoch": 0.0315608673871037, "grad_norm": 0.0, "learning_rate": 9.68496385887068e-06, "loss": 0.0, "step": 4410 }, { "epoch": 0.03163243397981822, "grad_norm": 0.0, "learning_rate": 9.684248192943535e-06, "loss": 0.0, "step": 4420 }, { "epoch": 0.03170400057253274, "grad_norm": 0.0, "learning_rate": 9.683532527016389e-06, "loss": 0.0026, "step": 4430 }, { "epoch": 0.031775567165247265, "grad_norm": 0.0, "learning_rate": 9.682816861089244e-06, "loss": 0.0, "step": 4440 }, { "epoch": 0.03184713375796178, "grad_norm": 0.0, "learning_rate": 9.6821011951621e-06, "loss": 0.0, "step": 4450 }, { "epoch": 0.0319187003506763, "grad_norm": 8.481946256466699e-10, "learning_rate": 9.681385529234953e-06, "loss": 0.0005, "step": 4460 }, { "epoch": 0.03199026694339083, "grad_norm": 0.0038706623017787933, "learning_rate": 9.680669863307808e-06, "loss": 0.0001, "step": 4470 }, { "epoch": 0.032061833536105346, "grad_norm": 0.0, "learning_rate": 9.679954197380664e-06, "loss": 0.0, "step": 4480 }, { "epoch": 0.032133400128819864, "grad_norm": 0.005443080328404903, "learning_rate": 9.679238531453519e-06, "loss": 0.0, "step": 4490 }, { "epoch": 0.03220496672153439, "grad_norm": 0.0, "learning_rate": 9.678522865526373e-06, "loss": 0.0, "step": 4500 }, { "epoch": 0.03227653331424891, "grad_norm": 0.0, "learning_rate": 9.677807199599228e-06, "loss": 0.0, "step": 4510 }, { "epoch": 0.032348099906963426, "grad_norm": 0.0, "learning_rate": 9.677091533672083e-06, "loss": 0.0001, "step": 4520 }, { "epoch": 0.03241966649967795, "grad_norm": 0.0, "learning_rate": 9.676375867744937e-06, "loss": 0.2551, "step": 4530 }, { "epoch": 0.03249123309239247, "grad_norm": 0.00016206786676775664, "learning_rate": 9.675660201817792e-06, "loss": 0.0576, "step": 4540 }, { "epoch": 0.032562799685106995, "grad_norm": 2.927404921138077e-06, "learning_rate": 9.674944535890647e-06, "loss": 0.0135, "step": 4550 }, { "epoch": 0.032634366277821514, "grad_norm": 0.0, "learning_rate": 9.674228869963503e-06, "loss": 0.0, "step": 4560 }, { "epoch": 0.03270593287053603, "grad_norm": 0.0, "learning_rate": 9.673513204036356e-06, "loss": 0.0002, "step": 4570 }, { "epoch": 0.03277749946325056, "grad_norm": 0.00918518379330635, "learning_rate": 9.672797538109211e-06, "loss": 0.0002, "step": 4580 }, { "epoch": 0.032849066055965076, "grad_norm": 0.000291949778329581, "learning_rate": 9.672081872182067e-06, "loss": 0.001, "step": 4590 }, { "epoch": 0.032920632648679594, "grad_norm": 0.0, "learning_rate": 9.67136620625492e-06, "loss": 0.0006, "step": 4600 }, { "epoch": 0.03299219924139412, "grad_norm": 9.880532161332667e-05, "learning_rate": 9.670650540327776e-06, "loss": 0.0, "step": 4610 }, { "epoch": 0.03306376583410864, "grad_norm": 0.0, "learning_rate": 9.669934874400631e-06, "loss": 0.0, "step": 4620 }, { "epoch": 0.03313533242682316, "grad_norm": 6.764481440768577e-06, "learning_rate": 9.669219208473485e-06, "loss": 0.0, "step": 4630 }, { "epoch": 0.03320689901953768, "grad_norm": 0.009746033698320389, "learning_rate": 9.66850354254634e-06, "loss": 0.2709, "step": 4640 }, { "epoch": 0.0332784656122522, "grad_norm": 0.0, "learning_rate": 9.667787876619195e-06, "loss": 0.0, "step": 4650 }, { "epoch": 0.03335003220496672, "grad_norm": 5.9109602545959206e-08, "learning_rate": 9.66707221069205e-06, "loss": 0.0, "step": 4660 }, { "epoch": 0.033421598797681244, "grad_norm": 0.0, "learning_rate": 9.666356544764904e-06, "loss": 0.5176, "step": 4670 }, { "epoch": 0.03349316539039576, "grad_norm": 0.0, "learning_rate": 9.66564087883776e-06, "loss": 0.4711, "step": 4680 }, { "epoch": 0.03356473198311028, "grad_norm": 1.2075459743243755e-08, "learning_rate": 9.664925212910615e-06, "loss": 0.0, "step": 4690 }, { "epoch": 0.033636298575824806, "grad_norm": 9.168904398393352e-06, "learning_rate": 9.664209546983468e-06, "loss": 0.0002, "step": 4700 }, { "epoch": 0.033707865168539325, "grad_norm": 0.00014730314433109015, "learning_rate": 9.663493881056323e-06, "loss": 0.0, "step": 4710 }, { "epoch": 0.03377943176125384, "grad_norm": 0.0001222168211825192, "learning_rate": 9.662778215129179e-06, "loss": 0.0633, "step": 4720 }, { "epoch": 0.03385099835396837, "grad_norm": 0.0020293642301112413, "learning_rate": 9.662062549202034e-06, "loss": 0.0001, "step": 4730 }, { "epoch": 0.03392256494668289, "grad_norm": 0.6403902769088745, "learning_rate": 9.661346883274888e-06, "loss": 0.0002, "step": 4740 }, { "epoch": 0.03399413153939741, "grad_norm": 7.102875088094152e-08, "learning_rate": 9.660631217347743e-06, "loss": 1.3534, "step": 4750 }, { "epoch": 0.03406569813211193, "grad_norm": 0.0008715104195289314, "learning_rate": 9.659915551420598e-06, "loss": 0.0, "step": 4760 }, { "epoch": 0.03413726472482645, "grad_norm": 0.0, "learning_rate": 9.659199885493452e-06, "loss": 0.0, "step": 4770 }, { "epoch": 0.034208831317540975, "grad_norm": 1.6150819647009484e-05, "learning_rate": 9.658484219566307e-06, "loss": 0.0, "step": 4780 }, { "epoch": 0.03428039791025549, "grad_norm": 0.0, "learning_rate": 9.657768553639162e-06, "loss": 0.0, "step": 4790 }, { "epoch": 0.03435196450297001, "grad_norm": 0.0, "learning_rate": 9.657052887712018e-06, "loss": 0.0, "step": 4800 }, { "epoch": 0.03442353109568454, "grad_norm": 0.0, "learning_rate": 9.656337221784871e-06, "loss": 0.0016, "step": 4810 }, { "epoch": 0.034495097688399055, "grad_norm": 0.0, "learning_rate": 9.655621555857727e-06, "loss": 0.0, "step": 4820 }, { "epoch": 0.034566664281113574, "grad_norm": 4.889300726063084e-06, "learning_rate": 9.654905889930582e-06, "loss": 0.0, "step": 4830 }, { "epoch": 0.0346382308738281, "grad_norm": 0.0, "learning_rate": 9.654190224003436e-06, "loss": 0.0022, "step": 4840 }, { "epoch": 0.03470979746654262, "grad_norm": 0.0060877385549247265, "learning_rate": 9.65347455807629e-06, "loss": 0.0, "step": 4850 }, { "epoch": 0.034781364059257136, "grad_norm": 0.0, "learning_rate": 9.652758892149146e-06, "loss": 0.0, "step": 4860 }, { "epoch": 0.03485293065197166, "grad_norm": 0.0, "learning_rate": 9.652043226222e-06, "loss": 0.0, "step": 4870 }, { "epoch": 0.03492449724468618, "grad_norm": 2.2153852086148618e-09, "learning_rate": 9.651327560294855e-06, "loss": 0.0015, "step": 4880 }, { "epoch": 0.0349960638374007, "grad_norm": 0.0, "learning_rate": 9.65061189436771e-06, "loss": 0.3965, "step": 4890 }, { "epoch": 0.035067630430115224, "grad_norm": 0.0, "learning_rate": 9.649896228440566e-06, "loss": 0.0, "step": 4900 }, { "epoch": 0.03513919702282974, "grad_norm": 3.104841016465798e-05, "learning_rate": 9.64918056251342e-06, "loss": 0.0, "step": 4910 }, { "epoch": 0.03521076361554426, "grad_norm": 1.1720197257147902e-08, "learning_rate": 9.648464896586274e-06, "loss": 0.1234, "step": 4920 }, { "epoch": 0.035282330208258786, "grad_norm": 0.0, "learning_rate": 9.64774923065913e-06, "loss": 0.0004, "step": 4930 }, { "epoch": 0.035353896800973304, "grad_norm": 0.0, "learning_rate": 9.647033564731983e-06, "loss": 0.0, "step": 4940 }, { "epoch": 0.03542546339368783, "grad_norm": 0.0, "learning_rate": 9.646317898804839e-06, "loss": 0.0, "step": 4950 }, { "epoch": 0.03549702998640235, "grad_norm": 0.0, "learning_rate": 9.645602232877694e-06, "loss": 0.0, "step": 4960 }, { "epoch": 0.035568596579116867, "grad_norm": 6.941792207726394e-08, "learning_rate": 9.64488656695055e-06, "loss": 0.0001, "step": 4970 }, { "epoch": 0.03564016317183139, "grad_norm": 0.0, "learning_rate": 9.644170901023403e-06, "loss": 0.0, "step": 4980 }, { "epoch": 0.03571172976454591, "grad_norm": 0.0, "learning_rate": 9.643455235096258e-06, "loss": 0.0, "step": 4990 }, { "epoch": 0.03578329635726043, "grad_norm": 1.836407022892672e-06, "learning_rate": 9.642739569169113e-06, "loss": 0.0, "step": 5000 }, { "epoch": 0.035854862949974954, "grad_norm": 0.0005514759104698896, "learning_rate": 9.642023903241967e-06, "loss": 0.0, "step": 5010 }, { "epoch": 0.03592642954268947, "grad_norm": 0.0, "learning_rate": 9.641308237314822e-06, "loss": 0.0, "step": 5020 }, { "epoch": 0.03599799613540399, "grad_norm": 0.9242817163467407, "learning_rate": 9.640592571387678e-06, "loss": 0.0001, "step": 5030 }, { "epoch": 0.036069562728118516, "grad_norm": 0.0, "learning_rate": 9.639876905460533e-06, "loss": 0.0, "step": 5040 }, { "epoch": 0.036141129320833035, "grad_norm": 0.0, "learning_rate": 9.639161239533386e-06, "loss": 0.0, "step": 5050 }, { "epoch": 0.03621269591354755, "grad_norm": 0.0007165372371673584, "learning_rate": 9.638445573606242e-06, "loss": 0.0008, "step": 5060 }, { "epoch": 0.03628426250626208, "grad_norm": 0.0014026649296283722, "learning_rate": 9.637729907679097e-06, "loss": 0.0002, "step": 5070 }, { "epoch": 0.0363558290989766, "grad_norm": 0.0, "learning_rate": 9.63701424175195e-06, "loss": 0.0126, "step": 5080 }, { "epoch": 0.036427395691691115, "grad_norm": 7.602762343594804e-05, "learning_rate": 9.636298575824806e-06, "loss": 0.0, "step": 5090 }, { "epoch": 0.03649896228440564, "grad_norm": 0.0, "learning_rate": 9.635582909897661e-06, "loss": 0.0, "step": 5100 }, { "epoch": 0.03657052887712016, "grad_norm": 0.04818640649318695, "learning_rate": 9.634867243970515e-06, "loss": 0.0, "step": 5110 }, { "epoch": 0.036642095469834685, "grad_norm": 0.0, "learning_rate": 9.63415157804337e-06, "loss": 0.0, "step": 5120 }, { "epoch": 0.0367136620625492, "grad_norm": 0.0, "learning_rate": 9.633435912116225e-06, "loss": 0.0741, "step": 5130 }, { "epoch": 0.03678522865526372, "grad_norm": 0.0, "learning_rate": 9.63272024618908e-06, "loss": 0.0, "step": 5140 }, { "epoch": 0.03685679524797825, "grad_norm": 0.0, "learning_rate": 9.632004580261934e-06, "loss": 0.0, "step": 5150 }, { "epoch": 0.036928361840692765, "grad_norm": 0.0, "learning_rate": 9.63128891433479e-06, "loss": 0.0, "step": 5160 }, { "epoch": 0.036999928433407284, "grad_norm": 0.0, "learning_rate": 9.630573248407645e-06, "loss": 0.0, "step": 5170 }, { "epoch": 0.03707149502612181, "grad_norm": 0.8727515339851379, "learning_rate": 9.629857582480498e-06, "loss": 0.0002, "step": 5180 }, { "epoch": 0.03714306161883633, "grad_norm": 0.0, "learning_rate": 9.629141916553354e-06, "loss": 0.0726, "step": 5190 }, { "epoch": 0.037214628211550846, "grad_norm": 617.369384765625, "learning_rate": 9.628426250626209e-06, "loss": 0.1214, "step": 5200 }, { "epoch": 0.03728619480426537, "grad_norm": 4.968469724531133e-08, "learning_rate": 9.627710584699064e-06, "loss": 0.0, "step": 5210 }, { "epoch": 0.03735776139697989, "grad_norm": 0.0, "learning_rate": 9.626994918771918e-06, "loss": 0.0, "step": 5220 }, { "epoch": 0.03742932798969441, "grad_norm": 1.266568006030866e-06, "learning_rate": 9.626279252844772e-06, "loss": 0.0, "step": 5230 }, { "epoch": 0.037500894582408933, "grad_norm": 1.1825644969940186, "learning_rate": 9.625563586917629e-06, "loss": 0.0002, "step": 5240 }, { "epoch": 0.03757246117512345, "grad_norm": 1.1267062518527382e-06, "learning_rate": 9.624847920990482e-06, "loss": 0.0, "step": 5250 }, { "epoch": 0.03764402776783797, "grad_norm": 0.0, "learning_rate": 9.624132255063337e-06, "loss": 0.0, "step": 5260 }, { "epoch": 0.037715594360552496, "grad_norm": 5.066989050561688e-10, "learning_rate": 9.623416589136193e-06, "loss": 0.0, "step": 5270 }, { "epoch": 0.037787160953267014, "grad_norm": 0.0, "learning_rate": 9.622700923209046e-06, "loss": 0.0, "step": 5280 }, { "epoch": 0.03785872754598153, "grad_norm": 0.0, "learning_rate": 9.621985257281902e-06, "loss": 0.0, "step": 5290 }, { "epoch": 0.03793029413869606, "grad_norm": 0.0, "learning_rate": 9.621269591354755e-06, "loss": 0.0, "step": 5300 }, { "epoch": 0.038001860731410576, "grad_norm": 0.0, "learning_rate": 9.620553925427612e-06, "loss": 0.0, "step": 5310 }, { "epoch": 0.0380734273241251, "grad_norm": 0.0, "learning_rate": 9.619838259500466e-06, "loss": 0.0, "step": 5320 }, { "epoch": 0.03814499391683962, "grad_norm": 0.0, "learning_rate": 9.619122593573321e-06, "loss": 0.0133, "step": 5330 }, { "epoch": 0.03821656050955414, "grad_norm": 0.00030622188933193684, "learning_rate": 9.618406927646176e-06, "loss": 0.0, "step": 5340 }, { "epoch": 0.038288127102268664, "grad_norm": 4.5113593416523656e-10, "learning_rate": 9.61769126171903e-06, "loss": 0.0, "step": 5350 }, { "epoch": 0.03835969369498318, "grad_norm": 0.0, "learning_rate": 9.616975595791885e-06, "loss": 0.0, "step": 5360 }, { "epoch": 0.0384312602876977, "grad_norm": 0.0, "learning_rate": 9.616259929864739e-06, "loss": 0.0001, "step": 5370 }, { "epoch": 0.038502826880412226, "grad_norm": 0.004794281907379627, "learning_rate": 9.615544263937596e-06, "loss": 0.0, "step": 5380 }, { "epoch": 0.038574393473126745, "grad_norm": 7.688442565267906e-06, "learning_rate": 9.61482859801045e-06, "loss": 0.0026, "step": 5390 }, { "epoch": 0.03864596006584126, "grad_norm": 0.0, "learning_rate": 9.614112932083303e-06, "loss": 0.0044, "step": 5400 }, { "epoch": 0.03871752665855579, "grad_norm": 0.0, "learning_rate": 9.61339726615616e-06, "loss": 0.0, "step": 5410 }, { "epoch": 0.03878909325127031, "grad_norm": 0.0, "learning_rate": 9.612681600229014e-06, "loss": 0.0, "step": 5420 }, { "epoch": 0.038860659843984825, "grad_norm": 0.0, "learning_rate": 9.611965934301869e-06, "loss": 0.0121, "step": 5430 }, { "epoch": 0.03893222643669935, "grad_norm": 0.0, "learning_rate": 9.611250268374723e-06, "loss": 0.0, "step": 5440 }, { "epoch": 0.03900379302941387, "grad_norm": 0.0, "learning_rate": 9.61053460244758e-06, "loss": 0.0012, "step": 5450 }, { "epoch": 0.03907535962212839, "grad_norm": 5.0084439862985164e-05, "learning_rate": 9.609818936520433e-06, "loss": 0.0, "step": 5460 }, { "epoch": 0.03914692621484291, "grad_norm": 0.0, "learning_rate": 9.609103270593287e-06, "loss": 0.0, "step": 5470 }, { "epoch": 0.03921849280755743, "grad_norm": 4.3872883104256744e-10, "learning_rate": 9.608387604666144e-06, "loss": 0.0, "step": 5480 }, { "epoch": 0.03929005940027195, "grad_norm": 9.355905604024883e-06, "learning_rate": 9.607671938738997e-06, "loss": 0.0004, "step": 5490 }, { "epoch": 0.039361625992986475, "grad_norm": 2.304766893386841, "learning_rate": 9.606956272811853e-06, "loss": 0.0005, "step": 5500 }, { "epoch": 0.039433192585700994, "grad_norm": 0.0, "learning_rate": 9.606240606884706e-06, "loss": 0.0002, "step": 5510 }, { "epoch": 0.03950475917841552, "grad_norm": 5.0579183152876794e-05, "learning_rate": 9.605524940957561e-06, "loss": 0.0, "step": 5520 }, { "epoch": 0.03957632577113004, "grad_norm": 0.0, "learning_rate": 9.604809275030417e-06, "loss": 0.0, "step": 5530 }, { "epoch": 0.039647892363844556, "grad_norm": 0.0, "learning_rate": 9.60409360910327e-06, "loss": 0.0, "step": 5540 }, { "epoch": 0.03971945895655908, "grad_norm": 0.0, "learning_rate": 9.603377943176126e-06, "loss": 0.0001, "step": 5550 }, { "epoch": 0.0397910255492736, "grad_norm": 0.0, "learning_rate": 9.602662277248981e-06, "loss": 0.0, "step": 5560 }, { "epoch": 0.03986259214198812, "grad_norm": 8.229755765754021e-10, "learning_rate": 9.601946611321836e-06, "loss": 0.0015, "step": 5570 }, { "epoch": 0.03993415873470264, "grad_norm": 0.0, "learning_rate": 9.60123094539469e-06, "loss": 0.0, "step": 5580 }, { "epoch": 0.04000572532741716, "grad_norm": 0.0, "learning_rate": 9.600515279467545e-06, "loss": 0.0, "step": 5590 }, { "epoch": 0.04007729192013168, "grad_norm": 0.0, "learning_rate": 9.5997996135404e-06, "loss": 0.0001, "step": 5600 }, { "epoch": 0.040148858512846206, "grad_norm": 0.0, "learning_rate": 9.599083947613254e-06, "loss": 0.0001, "step": 5610 }, { "epoch": 0.040220425105560724, "grad_norm": 7.064697274472564e-05, "learning_rate": 9.59836828168611e-06, "loss": 0.4648, "step": 5620 }, { "epoch": 0.04029199169827524, "grad_norm": 0.0, "learning_rate": 9.597652615758965e-06, "loss": 0.0, "step": 5630 }, { "epoch": 0.04036355829098977, "grad_norm": 0.0, "learning_rate": 9.596936949831818e-06, "loss": 0.0709, "step": 5640 }, { "epoch": 0.040435124883704286, "grad_norm": 0.0, "learning_rate": 9.596221283904673e-06, "loss": 0.0, "step": 5650 }, { "epoch": 0.040506691476418805, "grad_norm": 0.0, "learning_rate": 9.595505617977529e-06, "loss": 0.0964, "step": 5660 }, { "epoch": 0.04057825806913333, "grad_norm": 0.0, "learning_rate": 9.594789952050384e-06, "loss": 0.0003, "step": 5670 }, { "epoch": 0.04064982466184785, "grad_norm": 0.2422845959663391, "learning_rate": 9.594074286123238e-06, "loss": 0.0118, "step": 5680 }, { "epoch": 0.04072139125456237, "grad_norm": 0.0, "learning_rate": 9.593358620196093e-06, "loss": 0.0017, "step": 5690 }, { "epoch": 0.04079295784727689, "grad_norm": 0.0, "learning_rate": 9.592642954268948e-06, "loss": 0.0, "step": 5700 }, { "epoch": 0.04086452443999141, "grad_norm": 2.533224687795155e-07, "learning_rate": 9.591927288341802e-06, "loss": 0.3123, "step": 5710 }, { "epoch": 0.040936091032705936, "grad_norm": 0.0, "learning_rate": 9.591211622414657e-06, "loss": 0.0017, "step": 5720 }, { "epoch": 0.041007657625420454, "grad_norm": 0.0, "learning_rate": 9.590495956487512e-06, "loss": 0.0, "step": 5730 }, { "epoch": 0.04107922421813497, "grad_norm": 0.0, "learning_rate": 9.589780290560368e-06, "loss": 0.0, "step": 5740 }, { "epoch": 0.0411507908108495, "grad_norm": 0.0, "learning_rate": 9.589064624633221e-06, "loss": 0.0, "step": 5750 }, { "epoch": 0.04122235740356402, "grad_norm": 0.0, "learning_rate": 9.588348958706077e-06, "loss": 0.0047, "step": 5760 }, { "epoch": 0.041293923996278535, "grad_norm": 2.4032849044175464e-09, "learning_rate": 9.587633292778932e-06, "loss": 0.0013, "step": 5770 }, { "epoch": 0.04136549058899306, "grad_norm": 3.110669410943956e-08, "learning_rate": 9.586917626851785e-06, "loss": 0.0015, "step": 5780 }, { "epoch": 0.04143705718170758, "grad_norm": 0.0, "learning_rate": 9.58620196092464e-06, "loss": 0.0057, "step": 5790 }, { "epoch": 0.0415086237744221, "grad_norm": 0.0, "learning_rate": 9.585486294997496e-06, "loss": 0.0, "step": 5800 }, { "epoch": 0.04158019036713662, "grad_norm": 0.0, "learning_rate": 9.584770629070351e-06, "loss": 0.0196, "step": 5810 }, { "epoch": 0.04165175695985114, "grad_norm": 3.115454092039727e-05, "learning_rate": 9.584054963143205e-06, "loss": 0.011, "step": 5820 }, { "epoch": 0.04172332355256566, "grad_norm": 1.0671172276488505e-05, "learning_rate": 9.58333929721606e-06, "loss": 0.0007, "step": 5830 }, { "epoch": 0.041794890145280185, "grad_norm": 0.0, "learning_rate": 9.582623631288916e-06, "loss": 0.0049, "step": 5840 }, { "epoch": 0.0418664567379947, "grad_norm": 0.0012243612436577678, "learning_rate": 9.581907965361769e-06, "loss": 0.3453, "step": 5850 }, { "epoch": 0.04193802333070922, "grad_norm": 0.0, "learning_rate": 9.581192299434624e-06, "loss": 0.0, "step": 5860 }, { "epoch": 0.04200958992342375, "grad_norm": 0.0, "learning_rate": 9.58047663350748e-06, "loss": 0.0, "step": 5870 }, { "epoch": 0.042081156516138266, "grad_norm": 0.0, "learning_rate": 9.579760967580333e-06, "loss": 0.0317, "step": 5880 }, { "epoch": 0.04215272310885279, "grad_norm": 4.895361338519422e-10, "learning_rate": 9.579045301653189e-06, "loss": 0.0, "step": 5890 }, { "epoch": 0.04222428970156731, "grad_norm": 0.0, "learning_rate": 9.578329635726044e-06, "loss": 0.6328, "step": 5900 }, { "epoch": 0.04229585629428183, "grad_norm": 0.0, "learning_rate": 9.5776139697989e-06, "loss": 0.5541, "step": 5910 }, { "epoch": 0.04236742288699635, "grad_norm": 2.0084412426513154e-06, "learning_rate": 9.576898303871753e-06, "loss": 0.0418, "step": 5920 }, { "epoch": 0.04243898947971087, "grad_norm": 0.0, "learning_rate": 9.576254204537323e-06, "loss": 0.5395, "step": 5930 }, { "epoch": 0.04251055607242539, "grad_norm": 3.700327579281293e-05, "learning_rate": 9.575538538610178e-06, "loss": 0.0, "step": 5940 }, { "epoch": 0.042582122665139915, "grad_norm": 0.0, "learning_rate": 9.574822872683033e-06, "loss": 0.0, "step": 5950 }, { "epoch": 0.042653689257854434, "grad_norm": 0.7526620626449585, "learning_rate": 9.574107206755887e-06, "loss": 0.0001, "step": 5960 }, { "epoch": 0.04272525585056895, "grad_norm": 2.8839431820415484e-07, "learning_rate": 9.573391540828742e-06, "loss": 0.0709, "step": 5970 }, { "epoch": 0.04279682244328348, "grad_norm": 1.1728003300959244e-05, "learning_rate": 9.572675874901597e-06, "loss": 0.0, "step": 5980 }, { "epoch": 0.042868389035997996, "grad_norm": 0.00034367339685559273, "learning_rate": 9.571960208974451e-06, "loss": 0.0022, "step": 5990 }, { "epoch": 0.042939955628712514, "grad_norm": 0.0, "learning_rate": 9.571244543047306e-06, "loss": 0.0, "step": 6000 }, { "epoch": 0.04301152222142704, "grad_norm": 0.0, "learning_rate": 9.570528877120162e-06, "loss": 0.0, "step": 6010 }, { "epoch": 0.04308308881414156, "grad_norm": 3.3158269729938183e-07, "learning_rate": 9.569813211193017e-06, "loss": 0.0013, "step": 6020 }, { "epoch": 0.04315465540685608, "grad_norm": 72.14649963378906, "learning_rate": 9.56909754526587e-06, "loss": 0.0207, "step": 6030 }, { "epoch": 0.0432262219995706, "grad_norm": 1.2659523918046034e-06, "learning_rate": 9.568381879338726e-06, "loss": 0.0002, "step": 6040 }, { "epoch": 0.04329778859228512, "grad_norm": 0.0, "learning_rate": 9.567666213411581e-06, "loss": 0.0, "step": 6050 }, { "epoch": 0.04336935518499964, "grad_norm": 0.0, "learning_rate": 9.566950547484435e-06, "loss": 0.0, "step": 6060 }, { "epoch": 0.043440921777714164, "grad_norm": 1.4440414905548096, "learning_rate": 9.56623488155729e-06, "loss": 0.0004, "step": 6070 }, { "epoch": 0.04351248837042868, "grad_norm": 2.261287157523384e-08, "learning_rate": 9.565519215630145e-06, "loss": 0.0, "step": 6080 }, { "epoch": 0.04358405496314321, "grad_norm": 3.585925878724083e-05, "learning_rate": 9.564803549702999e-06, "loss": 0.0, "step": 6090 }, { "epoch": 0.043655621555857727, "grad_norm": 0.0, "learning_rate": 9.564087883775854e-06, "loss": 0.0, "step": 6100 }, { "epoch": 0.043727188148572245, "grad_norm": 0.0, "learning_rate": 9.56337221784871e-06, "loss": 0.0, "step": 6110 }, { "epoch": 0.04379875474128677, "grad_norm": 0.0, "learning_rate": 9.562656551921565e-06, "loss": 0.0, "step": 6120 }, { "epoch": 0.04387032133400129, "grad_norm": 4.793886176912565e-09, "learning_rate": 9.561940885994418e-06, "loss": 0.0271, "step": 6130 }, { "epoch": 0.04394188792671581, "grad_norm": 0.0, "learning_rate": 9.561225220067274e-06, "loss": 0.001, "step": 6140 }, { "epoch": 0.04401345451943033, "grad_norm": 6.302056863205507e-05, "learning_rate": 9.560509554140129e-06, "loss": 0.0002, "step": 6150 }, { "epoch": 0.04408502111214485, "grad_norm": 0.0, "learning_rate": 9.559793888212982e-06, "loss": 0.0, "step": 6160 }, { "epoch": 0.04415658770485937, "grad_norm": 0.0, "learning_rate": 9.559078222285838e-06, "loss": 0.0, "step": 6170 }, { "epoch": 0.044228154297573895, "grad_norm": 0.0, "learning_rate": 9.558362556358693e-06, "loss": 0.0, "step": 6180 }, { "epoch": 0.04429972089028841, "grad_norm": 0.0, "learning_rate": 9.557646890431548e-06, "loss": 0.0, "step": 6190 }, { "epoch": 0.04437128748300293, "grad_norm": 2.1414199302682846e-09, "learning_rate": 9.556931224504402e-06, "loss": 0.0, "step": 6200 }, { "epoch": 0.04444285407571746, "grad_norm": 0.0, "learning_rate": 9.556215558577257e-06, "loss": 0.0, "step": 6210 }, { "epoch": 0.044514420668431975, "grad_norm": 0.00016501992649864405, "learning_rate": 9.555499892650112e-06, "loss": 0.0, "step": 6220 }, { "epoch": 0.044585987261146494, "grad_norm": 0.0, "learning_rate": 9.554784226722966e-06, "loss": 0.0, "step": 6230 }, { "epoch": 0.04465755385386102, "grad_norm": 0.0, "learning_rate": 9.554068560795821e-06, "loss": 0.0, "step": 6240 }, { "epoch": 0.04472912044657554, "grad_norm": 1.5171850975548296e-07, "learning_rate": 9.553352894868677e-06, "loss": 0.0015, "step": 6250 }, { "epoch": 0.044800687039290056, "grad_norm": 1.0362184047698975, "learning_rate": 9.55263722894153e-06, "loss": 0.0002, "step": 6260 }, { "epoch": 0.04487225363200458, "grad_norm": 0.0, "learning_rate": 9.551921563014386e-06, "loss": 0.0358, "step": 6270 }, { "epoch": 0.0449438202247191, "grad_norm": 0.0, "learning_rate": 9.55120589708724e-06, "loss": 0.0, "step": 6280 }, { "epoch": 0.045015386817433625, "grad_norm": 0.0, "learning_rate": 9.550490231160096e-06, "loss": 0.0462, "step": 6290 }, { "epoch": 0.045086953410148144, "grad_norm": 1.1027484703163282e-08, "learning_rate": 9.54977456523295e-06, "loss": 0.0, "step": 6300 }, { "epoch": 0.04515852000286266, "grad_norm": 0.0, "learning_rate": 9.549058899305805e-06, "loss": 0.0, "step": 6310 }, { "epoch": 0.04523008659557719, "grad_norm": 0.0, "learning_rate": 9.54834323337866e-06, "loss": 0.0104, "step": 6320 }, { "epoch": 0.045301653188291706, "grad_norm": 4.3910617364417703e-07, "learning_rate": 9.547627567451514e-06, "loss": 0.0, "step": 6330 }, { "epoch": 0.045373219781006224, "grad_norm": 0.0034610338043421507, "learning_rate": 9.54691190152437e-06, "loss": 0.0, "step": 6340 }, { "epoch": 0.04544478637372075, "grad_norm": 0.0, "learning_rate": 9.546196235597224e-06, "loss": 0.001, "step": 6350 }, { "epoch": 0.04551635296643527, "grad_norm": 0.0, "learning_rate": 9.54548056967008e-06, "loss": 0.0, "step": 6360 }, { "epoch": 0.04558791955914979, "grad_norm": 0.0, "learning_rate": 9.544764903742933e-06, "loss": 0.0133, "step": 6370 }, { "epoch": 0.04565948615186431, "grad_norm": 0.0, "learning_rate": 9.544049237815789e-06, "loss": 0.0, "step": 6380 }, { "epoch": 0.04573105274457883, "grad_norm": 0.0, "learning_rate": 9.543333571888644e-06, "loss": 0.0015, "step": 6390 }, { "epoch": 0.04580261933729335, "grad_norm": 0.0, "learning_rate": 9.542617905961498e-06, "loss": 0.0077, "step": 6400 }, { "epoch": 0.045874185930007874, "grad_norm": 0.0, "learning_rate": 9.541902240034353e-06, "loss": 0.0005, "step": 6410 }, { "epoch": 0.04594575252272239, "grad_norm": 0.0, "learning_rate": 9.541186574107208e-06, "loss": 0.0, "step": 6420 }, { "epoch": 0.04601731911543691, "grad_norm": 0.0, "learning_rate": 9.540470908180063e-06, "loss": 0.0339, "step": 6430 }, { "epoch": 0.046088885708151436, "grad_norm": 0.0, "learning_rate": 9.539755242252917e-06, "loss": 0.0001, "step": 6440 }, { "epoch": 0.046160452300865955, "grad_norm": 8.22164147393778e-05, "learning_rate": 9.539039576325772e-06, "loss": 0.0, "step": 6450 }, { "epoch": 0.04623201889358047, "grad_norm": 0.0, "learning_rate": 9.538323910398628e-06, "loss": 0.0004, "step": 6460 }, { "epoch": 0.046303585486295, "grad_norm": 0.0, "learning_rate": 9.537608244471481e-06, "loss": 0.0, "step": 6470 }, { "epoch": 0.04637515207900952, "grad_norm": 0.0, "learning_rate": 9.536892578544336e-06, "loss": 0.0, "step": 6480 }, { "epoch": 0.04644671867172404, "grad_norm": 0.0, "learning_rate": 9.536176912617192e-06, "loss": 0.1325, "step": 6490 }, { "epoch": 0.04651828526443856, "grad_norm": 0.0, "learning_rate": 9.535461246690045e-06, "loss": 0.0004, "step": 6500 }, { "epoch": 0.04658985185715308, "grad_norm": 1.9281767293932717e-09, "learning_rate": 9.5347455807629e-06, "loss": 0.0311, "step": 6510 }, { "epoch": 0.046661418449867605, "grad_norm": 5.719316220620385e-08, "learning_rate": 9.534029914835756e-06, "loss": 0.0009, "step": 6520 }, { "epoch": 0.04673298504258212, "grad_norm": 0.0, "learning_rate": 9.533314248908611e-06, "loss": 0.0, "step": 6530 }, { "epoch": 0.04680455163529664, "grad_norm": 0.0, "learning_rate": 9.532598582981465e-06, "loss": 0.0377, "step": 6540 }, { "epoch": 0.04687611822801117, "grad_norm": 2.579467661689705e-07, "learning_rate": 9.53188291705432e-06, "loss": 0.0, "step": 6550 }, { "epoch": 0.046947684820725685, "grad_norm": 225.89730834960938, "learning_rate": 9.531167251127175e-06, "loss": 0.0309, "step": 6560 }, { "epoch": 0.047019251413440204, "grad_norm": 0.0, "learning_rate": 9.530451585200029e-06, "loss": 0.0, "step": 6570 }, { "epoch": 0.04709081800615473, "grad_norm": 0.0, "learning_rate": 9.529735919272884e-06, "loss": 0.0344, "step": 6580 }, { "epoch": 0.04716238459886925, "grad_norm": 0.00043293152702972293, "learning_rate": 9.529020253345738e-06, "loss": 0.0, "step": 6590 }, { "epoch": 0.047233951191583766, "grad_norm": 0.0, "learning_rate": 9.528304587418595e-06, "loss": 0.0, "step": 6600 }, { "epoch": 0.04730551778429829, "grad_norm": 0.0721668228507042, "learning_rate": 9.527588921491449e-06, "loss": 0.0, "step": 6610 }, { "epoch": 0.04737708437701281, "grad_norm": 0.0, "learning_rate": 9.526873255564302e-06, "loss": 0.0001, "step": 6620 }, { "epoch": 0.04744865096972733, "grad_norm": 0.0, "learning_rate": 9.526157589637159e-06, "loss": 0.0, "step": 6630 }, { "epoch": 0.047520217562441854, "grad_norm": 0.0, "learning_rate": 9.525441923710013e-06, "loss": 0.0, "step": 6640 }, { "epoch": 0.04759178415515637, "grad_norm": 0.0003577364841476083, "learning_rate": 9.524726257782868e-06, "loss": 0.0024, "step": 6650 }, { "epoch": 0.0476633507478709, "grad_norm": 0.0, "learning_rate": 9.524010591855722e-06, "loss": 0.0007, "step": 6660 }, { "epoch": 0.047734917340585416, "grad_norm": 0.0, "learning_rate": 9.523294925928579e-06, "loss": 0.0, "step": 6670 }, { "epoch": 0.047806483933299934, "grad_norm": 3.229789972305298, "learning_rate": 9.522579260001432e-06, "loss": 0.0006, "step": 6680 }, { "epoch": 0.04787805052601446, "grad_norm": 0.0, "learning_rate": 9.521863594074286e-06, "loss": 0.0, "step": 6690 }, { "epoch": 0.04794961711872898, "grad_norm": 0.0, "learning_rate": 9.521147928147143e-06, "loss": 0.0029, "step": 6700 }, { "epoch": 0.048021183711443496, "grad_norm": 0.0, "learning_rate": 9.520432262219996e-06, "loss": 0.0012, "step": 6710 }, { "epoch": 0.04809275030415802, "grad_norm": 9.327368877265485e-10, "learning_rate": 9.519716596292852e-06, "loss": 0.0, "step": 6720 }, { "epoch": 0.04816431689687254, "grad_norm": 5.846037765877554e-06, "learning_rate": 9.519000930365705e-06, "loss": 0.0033, "step": 6730 }, { "epoch": 0.04823588348958706, "grad_norm": 0.0, "learning_rate": 9.51828526443856e-06, "loss": 0.0, "step": 6740 }, { "epoch": 0.048307450082301584, "grad_norm": 1.3710078001022339, "learning_rate": 9.517569598511416e-06, "loss": 0.0002, "step": 6750 }, { "epoch": 0.0483790166750161, "grad_norm": 0.0, "learning_rate": 9.51685393258427e-06, "loss": 0.0, "step": 6760 }, { "epoch": 0.04845058326773062, "grad_norm": 0.0, "learning_rate": 9.516138266657126e-06, "loss": 0.0351, "step": 6770 }, { "epoch": 0.048522149860445146, "grad_norm": 0.0, "learning_rate": 9.51542260072998e-06, "loss": 0.0001, "step": 6780 }, { "epoch": 0.048593716453159665, "grad_norm": 561.7884521484375, "learning_rate": 9.514706934802835e-06, "loss": 0.9184, "step": 6790 }, { "epoch": 0.04866528304587418, "grad_norm": 0.0, "learning_rate": 9.513991268875689e-06, "loss": 0.0232, "step": 6800 }, { "epoch": 0.04873684963858871, "grad_norm": 0.0, "learning_rate": 9.513275602948544e-06, "loss": 0.0057, "step": 6810 }, { "epoch": 0.04880841623130323, "grad_norm": 0.0, "learning_rate": 9.5125599370214e-06, "loss": 0.0, "step": 6820 }, { "epoch": 0.048879982824017745, "grad_norm": 2.2667938992526615e-07, "learning_rate": 9.511844271094253e-06, "loss": 0.0812, "step": 6830 }, { "epoch": 0.04895154941673227, "grad_norm": 0.0, "learning_rate": 9.51112860516711e-06, "loss": 0.006, "step": 6840 }, { "epoch": 0.04902311600944679, "grad_norm": 0.0, "learning_rate": 9.510412939239964e-06, "loss": 0.0195, "step": 6850 }, { "epoch": 0.049094682602161314, "grad_norm": 0.0, "learning_rate": 9.509697273312817e-06, "loss": 0.0, "step": 6860 }, { "epoch": 0.04916624919487583, "grad_norm": 0.0, "learning_rate": 9.508981607385673e-06, "loss": 0.0001, "step": 6870 }, { "epoch": 0.04923781578759035, "grad_norm": 0.0, "learning_rate": 9.508265941458528e-06, "loss": 0.1098, "step": 6880 }, { "epoch": 0.04930938238030488, "grad_norm": 0.0, "learning_rate": 9.507550275531383e-06, "loss": 0.0, "step": 6890 }, { "epoch": 0.049380948973019395, "grad_norm": 0.0012489601504057646, "learning_rate": 9.506834609604237e-06, "loss": 0.0, "step": 6900 }, { "epoch": 0.049452515565733914, "grad_norm": 0.0, "learning_rate": 9.506118943677092e-06, "loss": 0.0, "step": 6910 }, { "epoch": 0.04952408215844844, "grad_norm": 0.0, "learning_rate": 9.505403277749947e-06, "loss": 0.3576, "step": 6920 }, { "epoch": 0.04959564875116296, "grad_norm": 579.9403076171875, "learning_rate": 9.504687611822801e-06, "loss": 0.2783, "step": 6930 }, { "epoch": 0.049667215343877476, "grad_norm": 0.0, "learning_rate": 9.503971945895656e-06, "loss": 0.0, "step": 6940 }, { "epoch": 0.049738781936592, "grad_norm": 376.132568359375, "learning_rate": 9.503256279968511e-06, "loss": 0.082, "step": 6950 }, { "epoch": 0.04981034852930652, "grad_norm": 0.0, "learning_rate": 9.502540614041367e-06, "loss": 0.0003, "step": 6960 }, { "epoch": 0.04988191512202104, "grad_norm": 0.0, "learning_rate": 9.50182494811422e-06, "loss": 0.0, "step": 6970 }, { "epoch": 0.04995348171473556, "grad_norm": 0.0, "learning_rate": 9.501109282187076e-06, "loss": 0.0, "step": 6980 }, { "epoch": 0.05002504830745008, "grad_norm": 0.0, "learning_rate": 9.500393616259931e-06, "loss": 0.0, "step": 6990 }, { "epoch": 0.0500966149001646, "grad_norm": 0.0, "learning_rate": 9.499677950332785e-06, "loss": 0.0149, "step": 7000 }, { "epoch": 0.050168181492879126, "grad_norm": 0.0006756429793313146, "learning_rate": 9.49896228440564e-06, "loss": 0.0, "step": 7010 }, { "epoch": 0.050239748085593644, "grad_norm": 9.596314157533925e-06, "learning_rate": 9.498246618478495e-06, "loss": 0.0, "step": 7020 }, { "epoch": 0.05031131467830816, "grad_norm": 0.0, "learning_rate": 9.497530952551349e-06, "loss": 0.0, "step": 7030 }, { "epoch": 0.05038288127102269, "grad_norm": 0.0, "learning_rate": 9.496815286624204e-06, "loss": 0.0, "step": 7040 }, { "epoch": 0.050454447863737206, "grad_norm": 7.755664228170644e-06, "learning_rate": 9.49609962069706e-06, "loss": 0.0019, "step": 7050 }, { "epoch": 0.05052601445645173, "grad_norm": 0.0, "learning_rate": 9.495383954769915e-06, "loss": 0.0, "step": 7060 }, { "epoch": 0.05059758104916625, "grad_norm": 8.979421095567375e-10, "learning_rate": 9.494668288842768e-06, "loss": 0.0, "step": 7070 }, { "epoch": 0.05066914764188077, "grad_norm": 0.0, "learning_rate": 9.493952622915624e-06, "loss": 0.0, "step": 7080 }, { "epoch": 0.050740714234595294, "grad_norm": 66.60755157470703, "learning_rate": 9.493236956988479e-06, "loss": 0.0086, "step": 7090 }, { "epoch": 0.05081228082730981, "grad_norm": 0.0, "learning_rate": 9.492521291061332e-06, "loss": 0.0, "step": 7100 }, { "epoch": 0.05088384742002433, "grad_norm": 0.0, "learning_rate": 9.491805625134188e-06, "loss": 0.0754, "step": 7110 }, { "epoch": 0.050955414012738856, "grad_norm": 78.86189270019531, "learning_rate": 9.491089959207043e-06, "loss": 0.0149, "step": 7120 }, { "epoch": 0.051026980605453374, "grad_norm": 2.2235929009184474e-06, "learning_rate": 9.490374293279898e-06, "loss": 0.0, "step": 7130 }, { "epoch": 0.05109854719816789, "grad_norm": 0.0, "learning_rate": 9.489658627352752e-06, "loss": 0.0, "step": 7140 }, { "epoch": 0.05117011379088242, "grad_norm": 0.0016077926848083735, "learning_rate": 9.488942961425607e-06, "loss": 0.0, "step": 7150 }, { "epoch": 0.05124168038359694, "grad_norm": 0.00011808779527200386, "learning_rate": 9.488227295498462e-06, "loss": 0.001, "step": 7160 }, { "epoch": 0.051313246976311455, "grad_norm": 0.00012454883835744113, "learning_rate": 9.487511629571316e-06, "loss": 0.0, "step": 7170 }, { "epoch": 0.05138481356902598, "grad_norm": 0.0, "learning_rate": 9.486795963644171e-06, "loss": 0.0001, "step": 7180 }, { "epoch": 0.0514563801617405, "grad_norm": 0.0, "learning_rate": 9.486080297717027e-06, "loss": 0.0, "step": 7190 }, { "epoch": 0.05152794675445502, "grad_norm": 21.133756637573242, "learning_rate": 9.485364631789882e-06, "loss": 0.0026, "step": 7200 }, { "epoch": 0.05159951334716954, "grad_norm": 0.0, "learning_rate": 9.484648965862736e-06, "loss": 0.0, "step": 7210 }, { "epoch": 0.05167107993988406, "grad_norm": 0.0, "learning_rate": 9.48393329993559e-06, "loss": 0.0005, "step": 7220 }, { "epoch": 0.05174264653259858, "grad_norm": 1.849756681160386e-09, "learning_rate": 9.483217634008446e-06, "loss": 0.0037, "step": 7230 }, { "epoch": 0.051814213125313105, "grad_norm": 0.0, "learning_rate": 9.4825019680813e-06, "loss": 0.0, "step": 7240 }, { "epoch": 0.05188577971802762, "grad_norm": 0.0, "learning_rate": 9.481786302154155e-06, "loss": 0.0, "step": 7250 }, { "epoch": 0.05195734631074215, "grad_norm": 0.0, "learning_rate": 9.48107063622701e-06, "loss": 0.4727, "step": 7260 }, { "epoch": 0.05202891290345667, "grad_norm": 0.16378022730350494, "learning_rate": 9.480354970299864e-06, "loss": 0.0001, "step": 7270 }, { "epoch": 0.052100479496171186, "grad_norm": 0.0, "learning_rate": 9.47963930437272e-06, "loss": 0.0, "step": 7280 }, { "epoch": 0.05217204608888571, "grad_norm": 0.0, "learning_rate": 9.478923638445574e-06, "loss": 0.0, "step": 7290 }, { "epoch": 0.05224361268160023, "grad_norm": 0.0, "learning_rate": 9.47820797251843e-06, "loss": 0.0058, "step": 7300 }, { "epoch": 0.05231517927431475, "grad_norm": 0.0, "learning_rate": 9.477492306591283e-06, "loss": 0.0, "step": 7310 }, { "epoch": 0.05238674586702927, "grad_norm": 0.0, "learning_rate": 9.476776640664139e-06, "loss": 0.0, "step": 7320 }, { "epoch": 0.05245831245974379, "grad_norm": 0.0, "learning_rate": 9.476060974736994e-06, "loss": 0.0, "step": 7330 }, { "epoch": 0.05252987905245831, "grad_norm": 0.0, "learning_rate": 9.475345308809848e-06, "loss": 0.0, "step": 7340 }, { "epoch": 0.052601445645172835, "grad_norm": 0.0, "learning_rate": 9.474629642882703e-06, "loss": 0.0, "step": 7350 }, { "epoch": 0.052673012237887354, "grad_norm": 0.0, "learning_rate": 9.473913976955558e-06, "loss": 0.0, "step": 7360 }, { "epoch": 0.05274457883060187, "grad_norm": 0.0, "learning_rate": 9.473198311028413e-06, "loss": 0.0, "step": 7370 }, { "epoch": 0.0528161454233164, "grad_norm": 0.0, "learning_rate": 9.472482645101267e-06, "loss": 0.0413, "step": 7380 }, { "epoch": 0.052887712016030916, "grad_norm": 0.0, "learning_rate": 9.471766979174122e-06, "loss": 0.0019, "step": 7390 }, { "epoch": 0.052959278608745435, "grad_norm": 0.0, "learning_rate": 9.471051313246978e-06, "loss": 0.0001, "step": 7400 }, { "epoch": 0.05303084520145996, "grad_norm": 0.0019001037580892444, "learning_rate": 9.470335647319831e-06, "loss": 0.0, "step": 7410 }, { "epoch": 0.05310241179417448, "grad_norm": 0.0, "learning_rate": 9.469619981392686e-06, "loss": 0.0, "step": 7420 }, { "epoch": 0.053173978386889, "grad_norm": 0.0, "learning_rate": 9.468904315465542e-06, "loss": 0.0, "step": 7430 }, { "epoch": 0.05324554497960352, "grad_norm": 0.0, "learning_rate": 9.468188649538397e-06, "loss": 0.0, "step": 7440 }, { "epoch": 0.05331711157231804, "grad_norm": 0.6223142743110657, "learning_rate": 9.46747298361125e-06, "loss": 0.0001, "step": 7450 }, { "epoch": 0.053388678165032566, "grad_norm": 6.741573997715022e-06, "learning_rate": 9.466757317684106e-06, "loss": 0.0038, "step": 7460 }, { "epoch": 0.053460244757747084, "grad_norm": 0.0, "learning_rate": 9.466041651756961e-06, "loss": 0.1617, "step": 7470 }, { "epoch": 0.0535318113504616, "grad_norm": 1.3469473003624444e-07, "learning_rate": 9.465325985829815e-06, "loss": 0.0, "step": 7480 }, { "epoch": 0.05360337794317613, "grad_norm": 0.00352716026827693, "learning_rate": 9.46461031990267e-06, "loss": 0.0, "step": 7490 }, { "epoch": 0.05367494453589065, "grad_norm": 0.0, "learning_rate": 9.463894653975525e-06, "loss": 0.0021, "step": 7500 }, { "epoch": 0.053746511128605165, "grad_norm": 0.0, "learning_rate": 9.463178988048379e-06, "loss": 0.0, "step": 7510 }, { "epoch": 0.05381807772131969, "grad_norm": 0.0, "learning_rate": 9.462463322121234e-06, "loss": 0.0, "step": 7520 }, { "epoch": 0.05388964431403421, "grad_norm": 0.0, "learning_rate": 9.46174765619409e-06, "loss": 0.0, "step": 7530 }, { "epoch": 0.05396121090674873, "grad_norm": 0.0001359668531222269, "learning_rate": 9.461031990266945e-06, "loss": 0.0, "step": 7540 }, { "epoch": 0.05403277749946325, "grad_norm": 0.0, "learning_rate": 9.460316324339798e-06, "loss": 0.0, "step": 7550 }, { "epoch": 0.05410434409217777, "grad_norm": 0.0, "learning_rate": 9.459600658412654e-06, "loss": 0.0156, "step": 7560 }, { "epoch": 0.05417591068489229, "grad_norm": 0.0, "learning_rate": 9.458884992485509e-06, "loss": 0.0, "step": 7570 }, { "epoch": 0.054247477277606815, "grad_norm": 3.257649950683117e-05, "learning_rate": 9.458169326558363e-06, "loss": 0.0, "step": 7580 }, { "epoch": 0.05431904387032133, "grad_norm": 3.3109370178863173e-07, "learning_rate": 9.457453660631218e-06, "loss": 0.4063, "step": 7590 }, { "epoch": 0.05439061046303585, "grad_norm": 0.0, "learning_rate": 9.456737994704073e-06, "loss": 0.0, "step": 7600 }, { "epoch": 0.05446217705575038, "grad_norm": 0.0, "learning_rate": 9.456022328776929e-06, "loss": 0.0, "step": 7610 }, { "epoch": 0.054533743648464895, "grad_norm": 0.0, "learning_rate": 9.455306662849782e-06, "loss": 0.0, "step": 7620 }, { "epoch": 0.05460531024117942, "grad_norm": 0.0, "learning_rate": 9.454590996922637e-06, "loss": 0.0, "step": 7630 }, { "epoch": 0.05467687683389394, "grad_norm": 0.0, "learning_rate": 9.453875330995493e-06, "loss": 0.0, "step": 7640 }, { "epoch": 0.05474844342660846, "grad_norm": 0.0, "learning_rate": 9.453159665068346e-06, "loss": 0.0, "step": 7650 }, { "epoch": 0.05482001001932298, "grad_norm": 240.92994689941406, "learning_rate": 9.452443999141202e-06, "loss": 0.0415, "step": 7660 }, { "epoch": 0.0548915766120375, "grad_norm": 0.0, "learning_rate": 9.451728333214057e-06, "loss": 0.0, "step": 7670 }, { "epoch": 0.05496314320475202, "grad_norm": 0.0, "learning_rate": 9.45101266728691e-06, "loss": 0.0, "step": 7680 }, { "epoch": 0.055034709797466545, "grad_norm": 0.0, "learning_rate": 9.450297001359766e-06, "loss": 0.0, "step": 7690 }, { "epoch": 0.055106276390181064, "grad_norm": 0.0, "learning_rate": 9.449581335432621e-06, "loss": 0.0012, "step": 7700 }, { "epoch": 0.05517784298289558, "grad_norm": 0.0, "learning_rate": 9.448865669505476e-06, "loss": 0.0, "step": 7710 }, { "epoch": 0.05524940957561011, "grad_norm": 0.0016668770695105195, "learning_rate": 9.44815000357833e-06, "loss": 1.1977, "step": 7720 }, { "epoch": 0.055320976168324626, "grad_norm": 0.0, "learning_rate": 9.447434337651185e-06, "loss": 0.0004, "step": 7730 }, { "epoch": 0.055392542761039144, "grad_norm": 0.0, "learning_rate": 9.44671867172404e-06, "loss": 0.0, "step": 7740 }, { "epoch": 0.05546410935375367, "grad_norm": 0.0, "learning_rate": 9.446003005796894e-06, "loss": 0.0005, "step": 7750 }, { "epoch": 0.05553567594646819, "grad_norm": 2.081811389942345e-09, "learning_rate": 9.44528733986975e-06, "loss": 0.0, "step": 7760 }, { "epoch": 0.05560724253918271, "grad_norm": 5.4966683649126935e-08, "learning_rate": 9.444571673942605e-06, "loss": 0.0517, "step": 7770 }, { "epoch": 0.05567880913189723, "grad_norm": 0.0, "learning_rate": 9.44385600801546e-06, "loss": 0.0, "step": 7780 }, { "epoch": 0.05575037572461175, "grad_norm": 0.0, "learning_rate": 9.443140342088314e-06, "loss": 0.0, "step": 7790 }, { "epoch": 0.05582194231732627, "grad_norm": 0.0, "learning_rate": 9.442424676161169e-06, "loss": 0.0002, "step": 7800 }, { "epoch": 0.055893508910040794, "grad_norm": 1.663974513732569e-09, "learning_rate": 9.441709010234024e-06, "loss": 0.0002, "step": 7810 }, { "epoch": 0.05596507550275531, "grad_norm": 0.0, "learning_rate": 9.440993344306878e-06, "loss": 0.0377, "step": 7820 }, { "epoch": 0.05603664209546984, "grad_norm": 0.0, "learning_rate": 9.440277678379733e-06, "loss": 0.0, "step": 7830 }, { "epoch": 0.056108208688184356, "grad_norm": 1.3703170225198846e-05, "learning_rate": 9.439562012452588e-06, "loss": 0.0, "step": 7840 }, { "epoch": 0.056179775280898875, "grad_norm": 0.0, "learning_rate": 9.438846346525444e-06, "loss": 0.0037, "step": 7850 }, { "epoch": 0.0562513418736134, "grad_norm": 0.0, "learning_rate": 9.438130680598297e-06, "loss": 0.0, "step": 7860 }, { "epoch": 0.05632290846632792, "grad_norm": 0.05090527608990669, "learning_rate": 9.437415014671153e-06, "loss": 0.0042, "step": 7870 }, { "epoch": 0.05639447505904244, "grad_norm": 0.0, "learning_rate": 9.436699348744008e-06, "loss": 0.0002, "step": 7880 }, { "epoch": 0.05646604165175696, "grad_norm": 0.0, "learning_rate": 9.435983682816861e-06, "loss": 0.0774, "step": 7890 }, { "epoch": 0.05653760824447148, "grad_norm": 0.0, "learning_rate": 9.435268016889717e-06, "loss": 0.0, "step": 7900 }, { "epoch": 0.056609174837186, "grad_norm": 0.0, "learning_rate": 9.434552350962572e-06, "loss": 0.0, "step": 7910 }, { "epoch": 0.056680741429900525, "grad_norm": 9.041556040756404e-05, "learning_rate": 9.433836685035426e-06, "loss": 0.071, "step": 7920 }, { "epoch": 0.05675230802261504, "grad_norm": 0.0, "learning_rate": 9.433121019108281e-06, "loss": 0.0013, "step": 7930 }, { "epoch": 0.05682387461532956, "grad_norm": 4.5403683657241345e-07, "learning_rate": 9.432405353181136e-06, "loss": 0.6301, "step": 7940 }, { "epoch": 0.05689544120804409, "grad_norm": 0.0017093069618567824, "learning_rate": 9.431689687253992e-06, "loss": 0.0031, "step": 7950 }, { "epoch": 0.056967007800758605, "grad_norm": 5.87921817896131e-07, "learning_rate": 9.430974021326845e-06, "loss": 0.0005, "step": 7960 }, { "epoch": 0.057038574393473124, "grad_norm": 0.0, "learning_rate": 9.4302583553997e-06, "loss": 0.0, "step": 7970 }, { "epoch": 0.05711014098618765, "grad_norm": 0.00012978326412849128, "learning_rate": 9.429542689472556e-06, "loss": 0.0003, "step": 7980 }, { "epoch": 0.05718170757890217, "grad_norm": 0.0008131883223541081, "learning_rate": 9.42882702354541e-06, "loss": 0.0, "step": 7990 }, { "epoch": 0.057253274171616686, "grad_norm": 4.550854555418482e-06, "learning_rate": 9.428111357618265e-06, "loss": 0.0, "step": 8000 }, { "epoch": 0.05732484076433121, "grad_norm": 0.0, "learning_rate": 9.42739569169112e-06, "loss": 0.0066, "step": 8010 }, { "epoch": 0.05739640735704573, "grad_norm": 0.0, "learning_rate": 9.426680025763975e-06, "loss": 0.0, "step": 8020 }, { "epoch": 0.057467973949760255, "grad_norm": 3.121838092803955, "learning_rate": 9.425964359836829e-06, "loss": 0.0094, "step": 8030 }, { "epoch": 0.057539540542474774, "grad_norm": 4.764161842807368e-10, "learning_rate": 9.425248693909682e-06, "loss": 0.0, "step": 8040 }, { "epoch": 0.05761110713518929, "grad_norm": 0.0, "learning_rate": 9.42453302798254e-06, "loss": 0.0, "step": 8050 }, { "epoch": 0.05768267372790382, "grad_norm": 0.003946004435420036, "learning_rate": 9.423817362055393e-06, "loss": 0.0, "step": 8060 }, { "epoch": 0.057754240320618336, "grad_norm": 0.0, "learning_rate": 9.423101696128248e-06, "loss": 0.0, "step": 8070 }, { "epoch": 0.057825806913332854, "grad_norm": 0.0, "learning_rate": 9.422386030201104e-06, "loss": 0.0, "step": 8080 }, { "epoch": 0.05789737350604738, "grad_norm": 1.3354524242004118e-07, "learning_rate": 9.421670364273959e-06, "loss": 0.0, "step": 8090 }, { "epoch": 0.0579689400987619, "grad_norm": 0.0, "learning_rate": 9.420954698346812e-06, "loss": 0.3083, "step": 8100 }, { "epoch": 0.058040506691476416, "grad_norm": 0.37263792753219604, "learning_rate": 9.420239032419666e-06, "loss": 0.1771, "step": 8110 }, { "epoch": 0.05811207328419094, "grad_norm": 0.0, "learning_rate": 9.419523366492523e-06, "loss": 0.0, "step": 8120 }, { "epoch": 0.05818363987690546, "grad_norm": 1.5324035373964762e-08, "learning_rate": 9.418807700565377e-06, "loss": 0.0, "step": 8130 }, { "epoch": 0.05825520646961998, "grad_norm": 0.00866728276014328, "learning_rate": 9.418092034638232e-06, "loss": 0.0, "step": 8140 }, { "epoch": 0.058326773062334504, "grad_norm": 0.0, "learning_rate": 9.417376368711087e-06, "loss": 0.0, "step": 8150 }, { "epoch": 0.05839833965504902, "grad_norm": 0.0, "learning_rate": 9.41666070278394e-06, "loss": 0.0, "step": 8160 }, { "epoch": 0.05846990624776354, "grad_norm": 1.0942145586013794, "learning_rate": 9.415945036856796e-06, "loss": 0.0003, "step": 8170 }, { "epoch": 0.058541472840478066, "grad_norm": 0.0003087001387029886, "learning_rate": 9.41522937092965e-06, "loss": 0.0, "step": 8180 }, { "epoch": 0.058613039433192585, "grad_norm": 0.0, "learning_rate": 9.414513705002507e-06, "loss": 0.0, "step": 8190 }, { "epoch": 0.0586846060259071, "grad_norm": 5.197036898607621e-07, "learning_rate": 9.41379803907536e-06, "loss": 0.0, "step": 8200 }, { "epoch": 0.05875617261862163, "grad_norm": 0.0, "learning_rate": 9.413082373148216e-06, "loss": 0.0, "step": 8210 }, { "epoch": 0.05882773921133615, "grad_norm": 0.0014962888089939952, "learning_rate": 9.412366707221071e-06, "loss": 0.0001, "step": 8220 }, { "epoch": 0.05889930580405067, "grad_norm": 4.945029385972077e-10, "learning_rate": 9.411651041293924e-06, "loss": 0.0, "step": 8230 }, { "epoch": 0.05897087239676519, "grad_norm": 0.0, "learning_rate": 9.41093537536678e-06, "loss": 0.0, "step": 8240 }, { "epoch": 0.05904243898947971, "grad_norm": 8.170746856883682e-10, "learning_rate": 9.410219709439633e-06, "loss": 0.0022, "step": 8250 }, { "epoch": 0.059114005582194235, "grad_norm": 0.0, "learning_rate": 9.40950404351249e-06, "loss": 0.0, "step": 8260 }, { "epoch": 0.05918557217490875, "grad_norm": 0.0, "learning_rate": 9.408788377585344e-06, "loss": 0.0, "step": 8270 }, { "epoch": 0.05925713876762327, "grad_norm": 1.8766472820175295e-08, "learning_rate": 9.408072711658198e-06, "loss": 0.0, "step": 8280 }, { "epoch": 0.0593287053603378, "grad_norm": 0.0, "learning_rate": 9.407357045731055e-06, "loss": 0.0, "step": 8290 }, { "epoch": 0.059400271953052315, "grad_norm": 0.0, "learning_rate": 9.406641379803908e-06, "loss": 0.0, "step": 8300 }, { "epoch": 0.059471838545766834, "grad_norm": 4.473858505882333e-10, "learning_rate": 9.405925713876763e-06, "loss": 0.0, "step": 8310 }, { "epoch": 0.05954340513848136, "grad_norm": 1.2749460438499227e-06, "learning_rate": 9.405210047949617e-06, "loss": 0.001, "step": 8320 }, { "epoch": 0.05961497173119588, "grad_norm": 11.619837760925293, "learning_rate": 9.404494382022474e-06, "loss": 0.0026, "step": 8330 }, { "epoch": 0.059686538323910396, "grad_norm": 0.0, "learning_rate": 9.403778716095328e-06, "loss": 0.0, "step": 8340 }, { "epoch": 0.05975810491662492, "grad_norm": 0.0, "learning_rate": 9.403063050168181e-06, "loss": 0.0, "step": 8350 }, { "epoch": 0.05982967150933944, "grad_norm": 0.0, "learning_rate": 9.402347384241038e-06, "loss": 0.011, "step": 8360 }, { "epoch": 0.05990123810205396, "grad_norm": 0.0, "learning_rate": 9.401631718313892e-06, "loss": 0.0, "step": 8370 }, { "epoch": 0.05997280469476848, "grad_norm": 549.1867065429688, "learning_rate": 9.400916052386747e-06, "loss": 0.3274, "step": 8380 }, { "epoch": 0.060044371287483, "grad_norm": 0.0, "learning_rate": 9.4002003864596e-06, "loss": 0.0, "step": 8390 }, { "epoch": 0.06011593788019753, "grad_norm": 1.3698854672838934e-05, "learning_rate": 9.399484720532456e-06, "loss": 0.0, "step": 8400 }, { "epoch": 0.060187504472912046, "grad_norm": 0.0, "learning_rate": 9.398769054605311e-06, "loss": 0.012, "step": 8410 }, { "epoch": 0.060259071065626564, "grad_norm": 0.0, "learning_rate": 9.398053388678165e-06, "loss": 0.0, "step": 8420 }, { "epoch": 0.06033063765834109, "grad_norm": 1.0913445702698255e-09, "learning_rate": 9.397337722751022e-06, "loss": 0.0, "step": 8430 }, { "epoch": 0.06040220425105561, "grad_norm": 0.4040318727493286, "learning_rate": 9.396622056823875e-06, "loss": 0.0, "step": 8440 }, { "epoch": 0.060473770843770126, "grad_norm": 0.0006815181695856154, "learning_rate": 9.395906390896729e-06, "loss": 0.0139, "step": 8450 }, { "epoch": 0.06054533743648465, "grad_norm": 18.226016998291016, "learning_rate": 9.395190724969584e-06, "loss": 0.0023, "step": 8460 }, { "epoch": 0.06061690402919917, "grad_norm": 0.0, "learning_rate": 9.39447505904244e-06, "loss": 0.0008, "step": 8470 }, { "epoch": 0.06068847062191369, "grad_norm": 0.1819014996290207, "learning_rate": 9.393759393115295e-06, "loss": 0.0001, "step": 8480 }, { "epoch": 0.060760037214628214, "grad_norm": 0.0, "learning_rate": 9.393043727188148e-06, "loss": 0.0, "step": 8490 }, { "epoch": 0.06083160380734273, "grad_norm": 0.0, "learning_rate": 9.392328061261004e-06, "loss": 0.0, "step": 8500 }, { "epoch": 0.06090317040005725, "grad_norm": 0.0, "learning_rate": 9.391612395333859e-06, "loss": 0.0, "step": 8510 }, { "epoch": 0.060974736992771776, "grad_norm": 0.0, "learning_rate": 9.390896729406713e-06, "loss": 0.0, "step": 8520 }, { "epoch": 0.061046303585486295, "grad_norm": 0.04902293160557747, "learning_rate": 9.390181063479568e-06, "loss": 0.0116, "step": 8530 }, { "epoch": 0.06111787017820081, "grad_norm": 0.0, "learning_rate": 9.389465397552423e-06, "loss": 0.0, "step": 8540 }, { "epoch": 0.06118943677091534, "grad_norm": 0.0, "learning_rate": 9.388749731625279e-06, "loss": 0.0, "step": 8550 }, { "epoch": 0.06126100336362986, "grad_norm": 0.0, "learning_rate": 9.388034065698132e-06, "loss": 0.0, "step": 8560 }, { "epoch": 0.061332569956344375, "grad_norm": 0.0, "learning_rate": 9.387318399770987e-06, "loss": 0.0, "step": 8570 }, { "epoch": 0.0614041365490589, "grad_norm": 0.0, "learning_rate": 9.386602733843843e-06, "loss": 0.0001, "step": 8580 }, { "epoch": 0.06147570314177342, "grad_norm": 1.991730414374615e-06, "learning_rate": 9.385887067916696e-06, "loss": 0.3247, "step": 8590 }, { "epoch": 0.061547269734487944, "grad_norm": 0.0, "learning_rate": 9.385171401989552e-06, "loss": 0.0, "step": 8600 }, { "epoch": 0.06161883632720246, "grad_norm": 0.006242850795388222, "learning_rate": 9.384455736062407e-06, "loss": 0.0026, "step": 8610 }, { "epoch": 0.06169040291991698, "grad_norm": 46.66232681274414, "learning_rate": 9.383740070135262e-06, "loss": 0.0089, "step": 8620 }, { "epoch": 0.06176196951263151, "grad_norm": 0.002743537537753582, "learning_rate": 9.383024404208116e-06, "loss": 0.0313, "step": 8630 }, { "epoch": 0.061833536105346025, "grad_norm": 9.242214216165223e-10, "learning_rate": 9.382308738280971e-06, "loss": 0.0003, "step": 8640 }, { "epoch": 0.06190510269806054, "grad_norm": 0.0, "learning_rate": 9.381593072353826e-06, "loss": 0.2127, "step": 8650 }, { "epoch": 0.06197666929077507, "grad_norm": 2.458816927841667e-09, "learning_rate": 9.38087740642668e-06, "loss": 0.0, "step": 8660 }, { "epoch": 0.06204823588348959, "grad_norm": 1.0119212845438597e-07, "learning_rate": 9.380161740499535e-06, "loss": 0.0, "step": 8670 }, { "epoch": 0.062119802476204106, "grad_norm": 0.0, "learning_rate": 9.37944607457239e-06, "loss": 0.0001, "step": 8680 }, { "epoch": 0.06219136906891863, "grad_norm": 0.0, "learning_rate": 9.378730408645244e-06, "loss": 0.0, "step": 8690 }, { "epoch": 0.06226293566163315, "grad_norm": 1.0664789442671463e-06, "learning_rate": 9.3780147427181e-06, "loss": 0.0, "step": 8700 }, { "epoch": 0.06233450225434767, "grad_norm": 0.01013218704611063, "learning_rate": 9.377299076790955e-06, "loss": 0.0, "step": 8710 }, { "epoch": 0.06240606884706219, "grad_norm": 0.0007385671488009393, "learning_rate": 9.37658341086381e-06, "loss": 0.0001, "step": 8720 }, { "epoch": 0.06247763543977671, "grad_norm": 1.5214401483535767, "learning_rate": 9.375867744936664e-06, "loss": 0.0002, "step": 8730 }, { "epoch": 0.06254920203249123, "grad_norm": 0.0, "learning_rate": 9.375152079009519e-06, "loss": 0.0, "step": 8740 }, { "epoch": 0.06262076862520576, "grad_norm": 0.0, "learning_rate": 9.374436413082374e-06, "loss": 0.0, "step": 8750 }, { "epoch": 0.06269233521792028, "grad_norm": 9.899886208586395e-07, "learning_rate": 9.373720747155228e-06, "loss": 0.0322, "step": 8760 }, { "epoch": 0.06276390181063479, "grad_norm": 0.0, "learning_rate": 9.373005081228083e-06, "loss": 0.0, "step": 8770 }, { "epoch": 0.06283546840334932, "grad_norm": 0.21180526912212372, "learning_rate": 9.372289415300938e-06, "loss": 0.0038, "step": 8780 }, { "epoch": 0.06290703499606384, "grad_norm": 0.0, "learning_rate": 9.371573749373794e-06, "loss": 0.0036, "step": 8790 }, { "epoch": 0.06297860158877835, "grad_norm": 1.4959660177282785e-08, "learning_rate": 9.370858083446647e-06, "loss": 0.6845, "step": 8800 }, { "epoch": 0.06305016818149288, "grad_norm": 0.0, "learning_rate": 9.370142417519503e-06, "loss": 0.0888, "step": 8810 }, { "epoch": 0.0631217347742074, "grad_norm": 0.0, "learning_rate": 9.369426751592358e-06, "loss": 0.0001, "step": 8820 }, { "epoch": 0.06319330136692192, "grad_norm": 2.407598185527604e-05, "learning_rate": 9.368711085665211e-06, "loss": 0.0783, "step": 8830 }, { "epoch": 0.06326486795963644, "grad_norm": 0.0, "learning_rate": 9.367995419738067e-06, "loss": 0.009, "step": 8840 }, { "epoch": 0.06333643455235097, "grad_norm": 1.9201218393050112e-08, "learning_rate": 9.367279753810922e-06, "loss": 0.1062, "step": 8850 }, { "epoch": 0.06340800114506548, "grad_norm": 0.0, "learning_rate": 9.366564087883777e-06, "loss": 0.0107, "step": 8860 }, { "epoch": 0.06347956773778, "grad_norm": 0.0, "learning_rate": 9.365848421956631e-06, "loss": 0.0, "step": 8870 }, { "epoch": 0.06355113433049453, "grad_norm": 4.774442913912935e-06, "learning_rate": 9.365132756029486e-06, "loss": 0.0, "step": 8880 }, { "epoch": 0.06362270092320904, "grad_norm": 0.01356564462184906, "learning_rate": 9.364417090102342e-06, "loss": 0.0033, "step": 8890 }, { "epoch": 0.06369426751592357, "grad_norm": 1.0209919132364575e-08, "learning_rate": 9.363701424175195e-06, "loss": 0.0026, "step": 8900 }, { "epoch": 0.06376583410863809, "grad_norm": 0.0, "learning_rate": 9.36298575824805e-06, "loss": 0.0, "step": 8910 }, { "epoch": 0.0638374007013526, "grad_norm": 1.5507787054502842e-08, "learning_rate": 9.362270092320906e-06, "loss": 0.0, "step": 8920 }, { "epoch": 0.06390896729406713, "grad_norm": 8.740603796297819e-10, "learning_rate": 9.36155442639376e-06, "loss": 0.0001, "step": 8930 }, { "epoch": 0.06398053388678165, "grad_norm": 0.0, "learning_rate": 9.360838760466615e-06, "loss": 0.002, "step": 8940 }, { "epoch": 0.06405210047949617, "grad_norm": 0.10433376580476761, "learning_rate": 9.36012309453947e-06, "loss": 0.3834, "step": 8950 }, { "epoch": 0.06412366707221069, "grad_norm": 3.031552564536355e-09, "learning_rate": 9.359407428612325e-06, "loss": 0.0, "step": 8960 }, { "epoch": 0.06419523366492522, "grad_norm": 2.80192125501344e-07, "learning_rate": 9.358691762685179e-06, "loss": 0.0, "step": 8970 }, { "epoch": 0.06426680025763973, "grad_norm": 0.0, "learning_rate": 9.357976096758034e-06, "loss": 0.0, "step": 8980 }, { "epoch": 0.06433836685035425, "grad_norm": 0.0, "learning_rate": 9.35726043083089e-06, "loss": 0.0001, "step": 8990 }, { "epoch": 0.06440993344306878, "grad_norm": 0.0, "learning_rate": 9.356544764903743e-06, "loss": 0.0001, "step": 9000 }, { "epoch": 0.06448150003578329, "grad_norm": 0.0, "learning_rate": 9.355829098976598e-06, "loss": 0.0, "step": 9010 }, { "epoch": 0.06455306662849782, "grad_norm": 1.4106404933045269e-08, "learning_rate": 9.355113433049454e-06, "loss": 0.0, "step": 9020 }, { "epoch": 0.06462463322121234, "grad_norm": 4.716227067547152e-06, "learning_rate": 9.354397767122309e-06, "loss": 0.0, "step": 9030 }, { "epoch": 0.06469619981392685, "grad_norm": 0.0, "learning_rate": 9.353682101195162e-06, "loss": 0.0, "step": 9040 }, { "epoch": 0.06476776640664138, "grad_norm": 0.001833460759371519, "learning_rate": 9.352966435268018e-06, "loss": 0.1052, "step": 9050 }, { "epoch": 0.0648393329993559, "grad_norm": 0.0, "learning_rate": 9.352250769340873e-06, "loss": 0.0, "step": 9060 }, { "epoch": 0.06491089959207041, "grad_norm": 0.0, "learning_rate": 9.351535103413727e-06, "loss": 0.0009, "step": 9070 }, { "epoch": 0.06498246618478494, "grad_norm": 0.0, "learning_rate": 9.350819437486582e-06, "loss": 0.0, "step": 9080 }, { "epoch": 0.06505403277749947, "grad_norm": 0.0009053618414327502, "learning_rate": 9.350103771559437e-06, "loss": 0.0, "step": 9090 }, { "epoch": 0.06512559937021399, "grad_norm": 2.806034729019302e-07, "learning_rate": 9.349388105632292e-06, "loss": 0.0, "step": 9100 }, { "epoch": 0.0651971659629285, "grad_norm": 1.5566230615604582e-07, "learning_rate": 9.348672439705146e-06, "loss": 0.0, "step": 9110 }, { "epoch": 0.06526873255564303, "grad_norm": 0.0, "learning_rate": 9.347956773778001e-06, "loss": 0.4109, "step": 9120 }, { "epoch": 0.06534029914835755, "grad_norm": 0.0, "learning_rate": 9.347241107850857e-06, "loss": 0.0, "step": 9130 }, { "epoch": 0.06541186574107206, "grad_norm": 0.0, "learning_rate": 9.34652544192371e-06, "loss": 0.0033, "step": 9140 }, { "epoch": 0.06548343233378659, "grad_norm": 5.055346965789795, "learning_rate": 9.345809775996566e-06, "loss": 0.001, "step": 9150 }, { "epoch": 0.06555499892650112, "grad_norm": 0.0, "learning_rate": 9.34509411006942e-06, "loss": 0.0001, "step": 9160 }, { "epoch": 0.06562656551921563, "grad_norm": 0.0, "learning_rate": 9.344378444142274e-06, "loss": 0.0003, "step": 9170 }, { "epoch": 0.06569813211193015, "grad_norm": 0.0, "learning_rate": 9.34366277821513e-06, "loss": 0.0, "step": 9180 }, { "epoch": 0.06576969870464468, "grad_norm": 0.0, "learning_rate": 9.342947112287985e-06, "loss": 0.0005, "step": 9190 }, { "epoch": 0.06584126529735919, "grad_norm": 0.0, "learning_rate": 9.34223144636084e-06, "loss": 0.0001, "step": 9200 }, { "epoch": 0.06591283189007371, "grad_norm": 0.02790641412138939, "learning_rate": 9.341515780433694e-06, "loss": 0.002, "step": 9210 }, { "epoch": 0.06598439848278824, "grad_norm": 7.85626745223999, "learning_rate": 9.34080011450655e-06, "loss": 0.0018, "step": 9220 }, { "epoch": 0.06605596507550275, "grad_norm": 0.0, "learning_rate": 9.340084448579404e-06, "loss": 0.0, "step": 9230 }, { "epoch": 0.06612753166821728, "grad_norm": 0.0, "learning_rate": 9.339368782652258e-06, "loss": 0.0, "step": 9240 }, { "epoch": 0.0661990982609318, "grad_norm": 0.1284400373697281, "learning_rate": 9.338653116725113e-06, "loss": 0.0001, "step": 9250 }, { "epoch": 0.06627066485364631, "grad_norm": 0.0, "learning_rate": 9.337937450797969e-06, "loss": 0.0019, "step": 9260 }, { "epoch": 0.06634223144636084, "grad_norm": 0.0, "learning_rate": 9.337221784870824e-06, "loss": 0.0, "step": 9270 }, { "epoch": 0.06641379803907536, "grad_norm": 0.0, "learning_rate": 9.336506118943678e-06, "loss": 0.0, "step": 9280 }, { "epoch": 0.06648536463178988, "grad_norm": 0.0, "learning_rate": 9.335790453016533e-06, "loss": 0.0, "step": 9290 }, { "epoch": 0.0665569312245044, "grad_norm": 2.633376880112337e-06, "learning_rate": 9.335074787089388e-06, "loss": 0.0, "step": 9300 }, { "epoch": 0.06662849781721893, "grad_norm": 9.187318028125446e-06, "learning_rate": 9.334359121162242e-06, "loss": 0.0007, "step": 9310 }, { "epoch": 0.06670006440993344, "grad_norm": 0.0, "learning_rate": 9.333643455235097e-06, "loss": 0.0085, "step": 9320 }, { "epoch": 0.06677163100264796, "grad_norm": 0.0, "learning_rate": 9.332927789307952e-06, "loss": 0.0, "step": 9330 }, { "epoch": 0.06684319759536249, "grad_norm": 3.027222250651107e-09, "learning_rate": 9.332212123380806e-06, "loss": 0.0, "step": 9340 }, { "epoch": 0.066914764188077, "grad_norm": 5.55153292225441e-07, "learning_rate": 9.331496457453661e-06, "loss": 0.9399, "step": 9350 }, { "epoch": 0.06698633078079153, "grad_norm": 8.748679647396784e-06, "learning_rate": 9.330780791526517e-06, "loss": 0.0, "step": 9360 }, { "epoch": 0.06705789737350605, "grad_norm": 0.0, "learning_rate": 9.330065125599372e-06, "loss": 0.0, "step": 9370 }, { "epoch": 0.06712946396622056, "grad_norm": 0.0, "learning_rate": 9.329349459672225e-06, "loss": 0.0011, "step": 9380 }, { "epoch": 0.06720103055893509, "grad_norm": 0.0, "learning_rate": 9.32863379374508e-06, "loss": 0.0, "step": 9390 }, { "epoch": 0.06727259715164961, "grad_norm": 0.0003475857665762305, "learning_rate": 9.327918127817936e-06, "loss": 0.0, "step": 9400 }, { "epoch": 0.06734416374436412, "grad_norm": 0.0, "learning_rate": 9.32720246189079e-06, "loss": 1.0375, "step": 9410 }, { "epoch": 0.06741573033707865, "grad_norm": 0.0, "learning_rate": 9.326486795963645e-06, "loss": 0.0, "step": 9420 }, { "epoch": 0.06748729692979318, "grad_norm": 4.499409733682569e-10, "learning_rate": 9.3257711300365e-06, "loss": 0.2293, "step": 9430 }, { "epoch": 0.06755886352250769, "grad_norm": 2.331915766262682e-06, "learning_rate": 9.325055464109355e-06, "loss": 0.0001, "step": 9440 }, { "epoch": 0.06763043011522221, "grad_norm": 0.0, "learning_rate": 9.324339798182209e-06, "loss": 0.0, "step": 9450 }, { "epoch": 0.06770199670793674, "grad_norm": 0.5314128398895264, "learning_rate": 9.323624132255064e-06, "loss": 0.0006, "step": 9460 }, { "epoch": 0.06777356330065126, "grad_norm": 0.0, "learning_rate": 9.32290846632792e-06, "loss": 0.0019, "step": 9470 }, { "epoch": 0.06784512989336577, "grad_norm": 0.0, "learning_rate": 9.322192800400773e-06, "loss": 0.001, "step": 9480 }, { "epoch": 0.0679166964860803, "grad_norm": 2.238121669506654e-05, "learning_rate": 9.321477134473629e-06, "loss": 0.0002, "step": 9490 }, { "epoch": 0.06798826307879483, "grad_norm": 0.6022571921348572, "learning_rate": 9.320761468546484e-06, "loss": 0.0001, "step": 9500 }, { "epoch": 0.06805982967150934, "grad_norm": 7.539709656612104e-08, "learning_rate": 9.320045802619339e-06, "loss": 0.0173, "step": 9510 }, { "epoch": 0.06813139626422386, "grad_norm": 4.66366223417225e-10, "learning_rate": 9.319330136692193e-06, "loss": 0.0, "step": 9520 }, { "epoch": 0.06820296285693839, "grad_norm": 0.0, "learning_rate": 9.318614470765048e-06, "loss": 0.0, "step": 9530 }, { "epoch": 0.0682745294496529, "grad_norm": 0.0, "learning_rate": 9.317898804837903e-06, "loss": 0.0011, "step": 9540 }, { "epoch": 0.06834609604236742, "grad_norm": 0.0, "learning_rate": 9.317183138910757e-06, "loss": 0.0001, "step": 9550 }, { "epoch": 0.06841766263508195, "grad_norm": 6.853817467344925e-05, "learning_rate": 9.316467472983612e-06, "loss": 0.0001, "step": 9560 }, { "epoch": 0.06848922922779646, "grad_norm": 0.0, "learning_rate": 9.315751807056467e-06, "loss": 0.0, "step": 9570 }, { "epoch": 0.06856079582051099, "grad_norm": 0.0, "learning_rate": 9.315036141129321e-06, "loss": 0.0, "step": 9580 }, { "epoch": 0.06863236241322551, "grad_norm": 0.006736443378031254, "learning_rate": 9.314320475202176e-06, "loss": 0.0, "step": 9590 }, { "epoch": 0.06870392900594002, "grad_norm": 0.0, "learning_rate": 9.313604809275032e-06, "loss": 0.4125, "step": 9600 }, { "epoch": 0.06877549559865455, "grad_norm": 0.0019620510283857584, "learning_rate": 9.312889143347887e-06, "loss": 0.0, "step": 9610 }, { "epoch": 0.06884706219136907, "grad_norm": 0.0, "learning_rate": 9.31217347742074e-06, "loss": 0.0, "step": 9620 }, { "epoch": 0.06891862878408359, "grad_norm": 4.529957797760886e-10, "learning_rate": 9.311457811493596e-06, "loss": 0.0, "step": 9630 }, { "epoch": 0.06899019537679811, "grad_norm": 0.0, "learning_rate": 9.310742145566451e-06, "loss": 0.0, "step": 9640 }, { "epoch": 0.06906176196951264, "grad_norm": 10.474174499511719, "learning_rate": 9.310026479639305e-06, "loss": 0.0021, "step": 9650 }, { "epoch": 0.06913332856222715, "grad_norm": 0.0, "learning_rate": 9.30931081371216e-06, "loss": 0.0, "step": 9660 }, { "epoch": 0.06920489515494167, "grad_norm": 0.0, "learning_rate": 9.308595147785015e-06, "loss": 0.0, "step": 9670 }, { "epoch": 0.0692764617476562, "grad_norm": 0.0, "learning_rate": 9.30787948185787e-06, "loss": 0.0, "step": 9680 }, { "epoch": 0.06934802834037071, "grad_norm": 4.653358809392216e-10, "learning_rate": 9.307163815930724e-06, "loss": 0.0002, "step": 9690 }, { "epoch": 0.06941959493308524, "grad_norm": 0.0, "learning_rate": 9.306448150003578e-06, "loss": 0.0042, "step": 9700 }, { "epoch": 0.06949116152579976, "grad_norm": 0.0, "learning_rate": 9.305732484076435e-06, "loss": 0.0047, "step": 9710 }, { "epoch": 0.06956272811851427, "grad_norm": 13.178962707519531, "learning_rate": 9.305016818149288e-06, "loss": 0.0016, "step": 9720 }, { "epoch": 0.0696342947112288, "grad_norm": 0.0, "learning_rate": 9.304301152222144e-06, "loss": 0.0, "step": 9730 }, { "epoch": 0.06970586130394332, "grad_norm": 0.0, "learning_rate": 9.303585486294999e-06, "loss": 0.0, "step": 9740 }, { "epoch": 0.06977742789665783, "grad_norm": 0.0, "learning_rate": 9.302869820367854e-06, "loss": 0.0483, "step": 9750 }, { "epoch": 0.06984899448937236, "grad_norm": 0.149002343416214, "learning_rate": 9.302154154440708e-06, "loss": 0.0, "step": 9760 }, { "epoch": 0.06992056108208689, "grad_norm": 1.306491732597351, "learning_rate": 9.301438488513561e-06, "loss": 0.0002, "step": 9770 }, { "epoch": 0.0699921276748014, "grad_norm": 2.966798782348633, "learning_rate": 9.300722822586418e-06, "loss": 0.0006, "step": 9780 }, { "epoch": 0.07006369426751592, "grad_norm": 9.072952389388433e-10, "learning_rate": 9.300007156659272e-06, "loss": 0.0092, "step": 9790 }, { "epoch": 0.07013526086023045, "grad_norm": 0.0, "learning_rate": 9.299291490732127e-06, "loss": 0.0, "step": 9800 }, { "epoch": 0.07020682745294496, "grad_norm": 0.0, "learning_rate": 9.298575824804983e-06, "loss": 0.0, "step": 9810 }, { "epoch": 0.07027839404565948, "grad_norm": 2.6356368834967725e-05, "learning_rate": 9.297860158877836e-06, "loss": 0.0, "step": 9820 }, { "epoch": 0.07034996063837401, "grad_norm": 0.0, "learning_rate": 9.297144492950691e-06, "loss": 0.0, "step": 9830 }, { "epoch": 0.07042152723108852, "grad_norm": 0.0, "learning_rate": 9.296428827023545e-06, "loss": 0.0449, "step": 9840 }, { "epoch": 0.07049309382380305, "grad_norm": 2.6833293986783247e-07, "learning_rate": 9.295713161096402e-06, "loss": 0.0844, "step": 9850 }, { "epoch": 0.07056466041651757, "grad_norm": 0.0, "learning_rate": 9.294997495169256e-06, "loss": 0.0, "step": 9860 }, { "epoch": 0.0706362270092321, "grad_norm": 7.045436359476298e-05, "learning_rate": 9.294281829242111e-06, "loss": 0.0, "step": 9870 }, { "epoch": 0.07070779360194661, "grad_norm": 0.0, "learning_rate": 9.293566163314966e-06, "loss": 0.0, "step": 9880 }, { "epoch": 0.07077936019466113, "grad_norm": 3.1363112107385405e-09, "learning_rate": 9.29285049738782e-06, "loss": 0.0, "step": 9890 }, { "epoch": 0.07085092678737566, "grad_norm": 0.0, "learning_rate": 9.292134831460675e-06, "loss": 0.0001, "step": 9900 }, { "epoch": 0.07092249338009017, "grad_norm": 0.0, "learning_rate": 9.291419165533529e-06, "loss": 0.0, "step": 9910 }, { "epoch": 0.0709940599728047, "grad_norm": 0.01750229299068451, "learning_rate": 9.290703499606386e-06, "loss": 0.0038, "step": 9920 }, { "epoch": 0.07106562656551922, "grad_norm": 0.00039926666067913175, "learning_rate": 9.28998783367924e-06, "loss": 0.0, "step": 9930 }, { "epoch": 0.07113719315823373, "grad_norm": 0.0, "learning_rate": 9.289272167752093e-06, "loss": 0.0, "step": 9940 }, { "epoch": 0.07120875975094826, "grad_norm": 0.00010806491627590731, "learning_rate": 9.288556501824948e-06, "loss": 0.0, "step": 9950 }, { "epoch": 0.07128032634366278, "grad_norm": 1.003471350458085e-08, "learning_rate": 9.287840835897804e-06, "loss": 0.0, "step": 9960 }, { "epoch": 0.0713518929363773, "grad_norm": 9.933228994896126e-08, "learning_rate": 9.287125169970659e-06, "loss": 0.0003, "step": 9970 }, { "epoch": 0.07142345952909182, "grad_norm": 0.0, "learning_rate": 9.286481070636227e-06, "loss": 0.1259, "step": 9980 }, { "epoch": 0.07149502612180635, "grad_norm": 0.0, "learning_rate": 9.285765404709082e-06, "loss": 0.0, "step": 9990 }, { "epoch": 0.07156659271452086, "grad_norm": 0.13047413527965546, "learning_rate": 9.285049738781937e-06, "loss": 0.0, "step": 10000 }, { "epoch": 0.07163815930723538, "grad_norm": 0.0, "learning_rate": 9.284334072854793e-06, "loss": 0.0, "step": 10010 }, { "epoch": 0.07170972589994991, "grad_norm": 0.0, "learning_rate": 9.283618406927646e-06, "loss": 0.0, "step": 10020 }, { "epoch": 0.07178129249266442, "grad_norm": 1.8239298960054384e-09, "learning_rate": 9.282902741000502e-06, "loss": 0.0015, "step": 10030 }, { "epoch": 0.07185285908537895, "grad_norm": 9.627340205042856e-07, "learning_rate": 9.282187075073357e-06, "loss": 0.0, "step": 10040 }, { "epoch": 0.07192442567809347, "grad_norm": 0.0, "learning_rate": 9.28147140914621e-06, "loss": 0.0, "step": 10050 }, { "epoch": 0.07199599227080798, "grad_norm": 0.0, "learning_rate": 9.280755743219066e-06, "loss": 0.0, "step": 10060 }, { "epoch": 0.07206755886352251, "grad_norm": 0.00044391394476406276, "learning_rate": 9.280040077291921e-06, "loss": 0.0001, "step": 10070 }, { "epoch": 0.07213912545623703, "grad_norm": 2.252825879622833e-06, "learning_rate": 9.279324411364775e-06, "loss": 0.0076, "step": 10080 }, { "epoch": 0.07221069204895154, "grad_norm": 0.0, "learning_rate": 9.27860874543763e-06, "loss": 0.2521, "step": 10090 }, { "epoch": 0.07228225864166607, "grad_norm": 177.4409942626953, "learning_rate": 9.277893079510485e-06, "loss": 0.8909, "step": 10100 }, { "epoch": 0.0723538252343806, "grad_norm": 6.448484054999426e-05, "learning_rate": 9.27717741358334e-06, "loss": 0.0348, "step": 10110 }, { "epoch": 0.0724253918270951, "grad_norm": 0.0, "learning_rate": 9.276461747656194e-06, "loss": 0.0005, "step": 10120 }, { "epoch": 0.07249695841980963, "grad_norm": 0.0029142030980437994, "learning_rate": 9.27574608172905e-06, "loss": 0.0, "step": 10130 }, { "epoch": 0.07256852501252416, "grad_norm": 0.0, "learning_rate": 9.275030415801905e-06, "loss": 0.0597, "step": 10140 }, { "epoch": 0.07264009160523867, "grad_norm": 0.0, "learning_rate": 9.274314749874758e-06, "loss": 0.2117, "step": 10150 }, { "epoch": 0.0727116581979532, "grad_norm": 0.001750913099385798, "learning_rate": 9.273599083947614e-06, "loss": 0.0, "step": 10160 }, { "epoch": 0.07278322479066772, "grad_norm": 0.0, "learning_rate": 9.272883418020469e-06, "loss": 0.0, "step": 10170 }, { "epoch": 0.07285479138338223, "grad_norm": 0.0, "learning_rate": 9.272167752093324e-06, "loss": 0.0, "step": 10180 }, { "epoch": 0.07292635797609676, "grad_norm": 0.0, "learning_rate": 9.271452086166178e-06, "loss": 0.0, "step": 10190 }, { "epoch": 0.07299792456881128, "grad_norm": 0.0, "learning_rate": 9.270736420239033e-06, "loss": 0.0, "step": 10200 }, { "epoch": 0.07306949116152579, "grad_norm": 0.0, "learning_rate": 9.270020754311888e-06, "loss": 0.0, "step": 10210 }, { "epoch": 0.07314105775424032, "grad_norm": 0.0009528295486234128, "learning_rate": 9.269305088384742e-06, "loss": 0.0014, "step": 10220 }, { "epoch": 0.07321262434695484, "grad_norm": 0.0, "learning_rate": 9.268589422457597e-06, "loss": 0.0016, "step": 10230 }, { "epoch": 0.07328419093966937, "grad_norm": 8.024548151297495e-05, "learning_rate": 9.267873756530453e-06, "loss": 0.0, "step": 10240 }, { "epoch": 0.07335575753238388, "grad_norm": 0.0, "learning_rate": 9.267158090603308e-06, "loss": 0.0, "step": 10250 }, { "epoch": 0.0734273241250984, "grad_norm": 12.4443941116333, "learning_rate": 9.266442424676161e-06, "loss": 0.0026, "step": 10260 }, { "epoch": 0.07349889071781293, "grad_norm": 0.0, "learning_rate": 9.265726758749017e-06, "loss": 0.0, "step": 10270 }, { "epoch": 0.07357045731052744, "grad_norm": 0.11527879536151886, "learning_rate": 9.265011092821872e-06, "loss": 0.0, "step": 10280 }, { "epoch": 0.07364202390324197, "grad_norm": 0.0, "learning_rate": 9.264295426894726e-06, "loss": 0.0, "step": 10290 }, { "epoch": 0.0737135904959565, "grad_norm": 0.0, "learning_rate": 9.263579760967581e-06, "loss": 0.0139, "step": 10300 }, { "epoch": 0.073785157088671, "grad_norm": 4.002395428415184e-07, "learning_rate": 9.262864095040436e-06, "loss": 0.0, "step": 10310 }, { "epoch": 0.07385672368138553, "grad_norm": 0.0, "learning_rate": 9.26214842911329e-06, "loss": 0.16, "step": 10320 }, { "epoch": 0.07392829027410006, "grad_norm": 1.413757085800171, "learning_rate": 9.261432763186145e-06, "loss": 0.2019, "step": 10330 }, { "epoch": 0.07399985686681457, "grad_norm": 0.0, "learning_rate": 9.260717097259e-06, "loss": 0.0, "step": 10340 }, { "epoch": 0.07407142345952909, "grad_norm": 1.0610978762315426e-09, "learning_rate": 9.260001431331856e-06, "loss": 0.0, "step": 10350 }, { "epoch": 0.07414299005224362, "grad_norm": 0.0, "learning_rate": 9.25928576540471e-06, "loss": 0.0, "step": 10360 }, { "epoch": 0.07421455664495813, "grad_norm": 0.00034043978666886687, "learning_rate": 9.258570099477565e-06, "loss": 0.1406, "step": 10370 }, { "epoch": 0.07428612323767265, "grad_norm": 0.0, "learning_rate": 9.25785443355042e-06, "loss": 0.4457, "step": 10380 }, { "epoch": 0.07435768983038718, "grad_norm": 2.4395643283270374e-09, "learning_rate": 9.257138767623273e-06, "loss": 0.0, "step": 10390 }, { "epoch": 0.07442925642310169, "grad_norm": 0.0, "learning_rate": 9.256423101696129e-06, "loss": 0.0, "step": 10400 }, { "epoch": 0.07450082301581622, "grad_norm": 0.0, "learning_rate": 9.255707435768984e-06, "loss": 0.2154, "step": 10410 }, { "epoch": 0.07457238960853074, "grad_norm": 0.0, "learning_rate": 9.25499176984184e-06, "loss": 0.1688, "step": 10420 }, { "epoch": 0.07464395620124525, "grad_norm": 0.03961986303329468, "learning_rate": 9.254276103914693e-06, "loss": 0.0186, "step": 10430 }, { "epoch": 0.07471552279395978, "grad_norm": 1.5572214806525153e-07, "learning_rate": 9.253560437987548e-06, "loss": 0.191, "step": 10440 }, { "epoch": 0.0747870893866743, "grad_norm": 0.0, "learning_rate": 9.252844772060404e-06, "loss": 0.0, "step": 10450 }, { "epoch": 0.07485865597938882, "grad_norm": 0.0, "learning_rate": 9.252129106133257e-06, "loss": 0.0001, "step": 10460 }, { "epoch": 0.07493022257210334, "grad_norm": 0.11628541350364685, "learning_rate": 9.251413440206112e-06, "loss": 0.0, "step": 10470 }, { "epoch": 0.07500178916481787, "grad_norm": 0.00104580691549927, "learning_rate": 9.250697774278968e-06, "loss": 1.2537, "step": 10480 }, { "epoch": 0.07507335575753238, "grad_norm": 7.77556579123484e-08, "learning_rate": 9.249982108351823e-06, "loss": 0.0, "step": 10490 }, { "epoch": 0.0751449223502469, "grad_norm": 0.0, "learning_rate": 9.249266442424677e-06, "loss": 0.0, "step": 10500 }, { "epoch": 0.07521648894296143, "grad_norm": 0.0, "learning_rate": 9.248550776497532e-06, "loss": 0.0, "step": 10510 }, { "epoch": 0.07528805553567594, "grad_norm": 1.4247437715530396, "learning_rate": 9.247835110570387e-06, "loss": 0.0003, "step": 10520 }, { "epoch": 0.07535962212839047, "grad_norm": 4.5126627434832756e-10, "learning_rate": 9.24711944464324e-06, "loss": 0.6098, "step": 10530 }, { "epoch": 0.07543118872110499, "grad_norm": 0.000688025844283402, "learning_rate": 9.246403778716096e-06, "loss": 0.0, "step": 10540 }, { "epoch": 0.0755027553138195, "grad_norm": 0.0, "learning_rate": 9.245688112788951e-06, "loss": 0.0, "step": 10550 }, { "epoch": 0.07557432190653403, "grad_norm": 0.0, "learning_rate": 9.244972446861805e-06, "loss": 0.0, "step": 10560 }, { "epoch": 0.07564588849924855, "grad_norm": 0.0, "learning_rate": 9.24425678093466e-06, "loss": 0.0001, "step": 10570 }, { "epoch": 0.07571745509196307, "grad_norm": 236.2851104736328, "learning_rate": 9.243541115007516e-06, "loss": 0.2228, "step": 10580 }, { "epoch": 0.07578902168467759, "grad_norm": 0.0, "learning_rate": 9.242825449080371e-06, "loss": 0.0, "step": 10590 }, { "epoch": 0.07586058827739212, "grad_norm": 3.683777686092071e-05, "learning_rate": 9.242109783153224e-06, "loss": 0.0, "step": 10600 }, { "epoch": 0.07593215487010663, "grad_norm": 0.0, "learning_rate": 9.24139411722608e-06, "loss": 0.3865, "step": 10610 }, { "epoch": 0.07600372146282115, "grad_norm": 0.0, "learning_rate": 9.240678451298935e-06, "loss": 0.0, "step": 10620 }, { "epoch": 0.07607528805553568, "grad_norm": 0.0, "learning_rate": 9.239962785371789e-06, "loss": 0.001, "step": 10630 }, { "epoch": 0.0761468546482502, "grad_norm": 0.0, "learning_rate": 9.239247119444644e-06, "loss": 0.0, "step": 10640 }, { "epoch": 0.07621842124096471, "grad_norm": 0.0, "learning_rate": 9.2385314535175e-06, "loss": 0.0, "step": 10650 }, { "epoch": 0.07628998783367924, "grad_norm": 0.0, "learning_rate": 9.237815787590355e-06, "loss": 0.0, "step": 10660 }, { "epoch": 0.07636155442639377, "grad_norm": 0.0003561459307093173, "learning_rate": 9.237100121663208e-06, "loss": 0.0, "step": 10670 }, { "epoch": 0.07643312101910828, "grad_norm": 0.0, "learning_rate": 9.236384455736063e-06, "loss": 0.0498, "step": 10680 }, { "epoch": 0.0765046876118228, "grad_norm": 0.0, "learning_rate": 9.235668789808919e-06, "loss": 0.0, "step": 10690 }, { "epoch": 0.07657625420453733, "grad_norm": 0.636433482170105, "learning_rate": 9.234953123881772e-06, "loss": 0.0108, "step": 10700 }, { "epoch": 0.07664782079725184, "grad_norm": 0.00014136522077023983, "learning_rate": 9.234237457954628e-06, "loss": 0.0, "step": 10710 }, { "epoch": 0.07671938738996636, "grad_norm": 0.010991804301738739, "learning_rate": 9.233521792027483e-06, "loss": 0.0, "step": 10720 }, { "epoch": 0.07679095398268089, "grad_norm": 1.6508751699007007e-08, "learning_rate": 9.232806126100338e-06, "loss": 0.0002, "step": 10730 }, { "epoch": 0.0768625205753954, "grad_norm": 0.6178150177001953, "learning_rate": 9.232090460173192e-06, "loss": 0.0001, "step": 10740 }, { "epoch": 0.07693408716810993, "grad_norm": 0.0, "learning_rate": 9.231374794246047e-06, "loss": 0.0, "step": 10750 }, { "epoch": 0.07700565376082445, "grad_norm": 4.14059591293335, "learning_rate": 9.230659128318902e-06, "loss": 0.0047, "step": 10760 }, { "epoch": 0.07707722035353896, "grad_norm": 0.0, "learning_rate": 9.229943462391756e-06, "loss": 0.0, "step": 10770 }, { "epoch": 0.07714878694625349, "grad_norm": 3.84620379989542e-09, "learning_rate": 9.229227796464611e-06, "loss": 0.0103, "step": 10780 }, { "epoch": 0.07722035353896801, "grad_norm": 0.0, "learning_rate": 9.228512130537467e-06, "loss": 0.0001, "step": 10790 }, { "epoch": 0.07729192013168253, "grad_norm": 0.0, "learning_rate": 9.22779646461032e-06, "loss": 0.0, "step": 10800 }, { "epoch": 0.07736348672439705, "grad_norm": 0.08102172613143921, "learning_rate": 9.227080798683175e-06, "loss": 0.0, "step": 10810 }, { "epoch": 0.07743505331711158, "grad_norm": 0.0, "learning_rate": 9.22636513275603e-06, "loss": 0.0001, "step": 10820 }, { "epoch": 0.07750661990982609, "grad_norm": 5.380459308624268, "learning_rate": 9.225649466828886e-06, "loss": 0.0013, "step": 10830 }, { "epoch": 0.07757818650254061, "grad_norm": 0.14638370275497437, "learning_rate": 9.22493380090174e-06, "loss": 0.0, "step": 10840 }, { "epoch": 0.07764975309525514, "grad_norm": 0.0, "learning_rate": 9.224218134974595e-06, "loss": 0.0, "step": 10850 }, { "epoch": 0.07772131968796965, "grad_norm": 0.0, "learning_rate": 9.22350246904745e-06, "loss": 0.0001, "step": 10860 }, { "epoch": 0.07779288628068418, "grad_norm": 0.0, "learning_rate": 9.222786803120304e-06, "loss": 0.0008, "step": 10870 }, { "epoch": 0.0778644528733987, "grad_norm": 0.0, "learning_rate": 9.222071137193159e-06, "loss": 0.0, "step": 10880 }, { "epoch": 0.07793601946611321, "grad_norm": 0.0, "learning_rate": 9.221355471266014e-06, "loss": 0.0, "step": 10890 }, { "epoch": 0.07800758605882774, "grad_norm": 0.0, "learning_rate": 9.22063980533887e-06, "loss": 0.0, "step": 10900 }, { "epoch": 0.07807915265154226, "grad_norm": 0.0, "learning_rate": 9.219924139411723e-06, "loss": 0.0, "step": 10910 }, { "epoch": 0.07815071924425677, "grad_norm": 0.00032212858786806464, "learning_rate": 9.219208473484579e-06, "loss": 0.0013, "step": 10920 }, { "epoch": 0.0782222858369713, "grad_norm": 0.0, "learning_rate": 9.218492807557434e-06, "loss": 0.0, "step": 10930 }, { "epoch": 0.07829385242968583, "grad_norm": 9.40803324134265e-10, "learning_rate": 9.217777141630287e-06, "loss": 0.0, "step": 10940 }, { "epoch": 0.07836541902240034, "grad_norm": 0.003370970021933317, "learning_rate": 9.217061475703143e-06, "loss": 0.0, "step": 10950 }, { "epoch": 0.07843698561511486, "grad_norm": 0.0, "learning_rate": 9.216345809775998e-06, "loss": 0.1095, "step": 10960 }, { "epoch": 0.07850855220782939, "grad_norm": 0.0, "learning_rate": 9.215630143848852e-06, "loss": 0.0, "step": 10970 }, { "epoch": 0.0785801188005439, "grad_norm": 5.193958041438407e-10, "learning_rate": 9.214914477921707e-06, "loss": 0.0, "step": 10980 }, { "epoch": 0.07865168539325842, "grad_norm": 4.55710358338024e-10, "learning_rate": 9.21419881199456e-06, "loss": 0.0, "step": 10990 }, { "epoch": 0.07872325198597295, "grad_norm": 0.0, "learning_rate": 9.213483146067417e-06, "loss": 0.0, "step": 11000 }, { "epoch": 0.07879481857868748, "grad_norm": 4.4895848150261486e-10, "learning_rate": 9.212767480140271e-06, "loss": 0.0001, "step": 11010 }, { "epoch": 0.07886638517140199, "grad_norm": 0.0, "learning_rate": 9.212051814213126e-06, "loss": 0.003, "step": 11020 }, { "epoch": 0.07893795176411651, "grad_norm": 8.932646778703202e-06, "learning_rate": 9.211336148285982e-06, "loss": 0.0, "step": 11030 }, { "epoch": 0.07900951835683104, "grad_norm": 9.968525773729198e-07, "learning_rate": 9.210620482358835e-06, "loss": 0.0058, "step": 11040 }, { "epoch": 0.07908108494954555, "grad_norm": 0.0, "learning_rate": 9.20990481643169e-06, "loss": 0.0348, "step": 11050 }, { "epoch": 0.07915265154226007, "grad_norm": 0.02663247473537922, "learning_rate": 9.209189150504544e-06, "loss": 0.3508, "step": 11060 }, { "epoch": 0.0792242181349746, "grad_norm": 1.100792178476695e-05, "learning_rate": 9.208545051170114e-06, "loss": 0.3356, "step": 11070 }, { "epoch": 0.07929578472768911, "grad_norm": 0.00041680067079141736, "learning_rate": 9.20782938524297e-06, "loss": 0.0, "step": 11080 }, { "epoch": 0.07936735132040364, "grad_norm": 3.5780200846602384e-07, "learning_rate": 9.207113719315825e-06, "loss": 0.0019, "step": 11090 }, { "epoch": 0.07943891791311816, "grad_norm": 0.0, "learning_rate": 9.206398053388678e-06, "loss": 0.0, "step": 11100 }, { "epoch": 0.07951048450583267, "grad_norm": 1.7725514389610453e-09, "learning_rate": 9.205682387461533e-06, "loss": 0.0008, "step": 11110 }, { "epoch": 0.0795820510985472, "grad_norm": 0.0, "learning_rate": 9.204966721534389e-06, "loss": 0.0, "step": 11120 }, { "epoch": 0.07965361769126172, "grad_norm": 0.0, "learning_rate": 9.204251055607242e-06, "loss": 0.0733, "step": 11130 }, { "epoch": 0.07972518428397624, "grad_norm": 1.4154669046401978, "learning_rate": 9.203535389680098e-06, "loss": 0.0002, "step": 11140 }, { "epoch": 0.07979675087669076, "grad_norm": 0.0, "learning_rate": 9.202819723752953e-06, "loss": 0.0009, "step": 11150 }, { "epoch": 0.07986831746940529, "grad_norm": 0.0, "learning_rate": 9.202104057825808e-06, "loss": 0.0, "step": 11160 }, { "epoch": 0.0799398840621198, "grad_norm": 0.0, "learning_rate": 9.201388391898662e-06, "loss": 0.0, "step": 11170 }, { "epoch": 0.08001145065483432, "grad_norm": 0.0, "learning_rate": 9.200672725971517e-06, "loss": 0.0, "step": 11180 }, { "epoch": 0.08008301724754885, "grad_norm": 0.6658857464790344, "learning_rate": 9.199957060044372e-06, "loss": 0.0003, "step": 11190 }, { "epoch": 0.08015458384026336, "grad_norm": 0.0, "learning_rate": 9.199241394117226e-06, "loss": 0.0, "step": 11200 }, { "epoch": 0.08022615043297789, "grad_norm": 0.0, "learning_rate": 9.198525728190081e-06, "loss": 0.0, "step": 11210 }, { "epoch": 0.08029771702569241, "grad_norm": 0.0013004272477701306, "learning_rate": 9.197810062262937e-06, "loss": 0.0, "step": 11220 }, { "epoch": 0.08036928361840692, "grad_norm": 4.840660317029233e-09, "learning_rate": 9.197094396335792e-06, "loss": 0.0067, "step": 11230 }, { "epoch": 0.08044085021112145, "grad_norm": 0.000831717043183744, "learning_rate": 9.196378730408645e-06, "loss": 0.0002, "step": 11240 }, { "epoch": 0.08051241680383597, "grad_norm": 0.0, "learning_rate": 9.1956630644815e-06, "loss": 0.0, "step": 11250 }, { "epoch": 0.08058398339655048, "grad_norm": 0.0, "learning_rate": 9.194947398554356e-06, "loss": 0.0, "step": 11260 }, { "epoch": 0.08065554998926501, "grad_norm": 0.0017510458128526807, "learning_rate": 9.19423173262721e-06, "loss": 0.0012, "step": 11270 }, { "epoch": 0.08072711658197954, "grad_norm": 10.584012985229492, "learning_rate": 9.193516066700065e-06, "loss": 0.0046, "step": 11280 }, { "epoch": 0.08079868317469405, "grad_norm": 0.0, "learning_rate": 9.19280040077292e-06, "loss": 0.0, "step": 11290 }, { "epoch": 0.08087024976740857, "grad_norm": 0.0, "learning_rate": 9.192084734845774e-06, "loss": 0.0, "step": 11300 }, { "epoch": 0.0809418163601231, "grad_norm": 0.0, "learning_rate": 9.191369068918629e-06, "loss": 0.0, "step": 11310 }, { "epoch": 0.08101338295283761, "grad_norm": 0.0, "learning_rate": 9.190653402991484e-06, "loss": 0.0, "step": 11320 }, { "epoch": 0.08108494954555213, "grad_norm": 0.0, "learning_rate": 9.18993773706434e-06, "loss": 0.0, "step": 11330 }, { "epoch": 0.08115651613826666, "grad_norm": 9.496657327190405e-08, "learning_rate": 9.189222071137193e-06, "loss": 0.0001, "step": 11340 }, { "epoch": 0.08122808273098117, "grad_norm": 0.0, "learning_rate": 9.188506405210049e-06, "loss": 0.0012, "step": 11350 }, { "epoch": 0.0812996493236957, "grad_norm": 0.0, "learning_rate": 9.187790739282904e-06, "loss": 0.0, "step": 11360 }, { "epoch": 0.08137121591641022, "grad_norm": 0.0, "learning_rate": 9.187075073355757e-06, "loss": 0.0, "step": 11370 }, { "epoch": 0.08144278250912473, "grad_norm": 0.0, "learning_rate": 9.186359407428613e-06, "loss": 0.0, "step": 11380 }, { "epoch": 0.08151434910183926, "grad_norm": 0.0, "learning_rate": 9.185643741501468e-06, "loss": 0.0, "step": 11390 }, { "epoch": 0.08158591569455378, "grad_norm": 0.0008196388371288776, "learning_rate": 9.184928075574323e-06, "loss": 0.0, "step": 11400 }, { "epoch": 0.08165748228726831, "grad_norm": 0.0, "learning_rate": 9.184212409647177e-06, "loss": 0.0089, "step": 11410 }, { "epoch": 0.08172904887998282, "grad_norm": 0.0, "learning_rate": 9.183496743720032e-06, "loss": 0.0, "step": 11420 }, { "epoch": 0.08180061547269735, "grad_norm": 0.0, "learning_rate": 9.182781077792887e-06, "loss": 0.0013, "step": 11430 }, { "epoch": 0.08187218206541187, "grad_norm": 0.0994313508272171, "learning_rate": 9.182065411865741e-06, "loss": 0.0027, "step": 11440 }, { "epoch": 0.08194374865812638, "grad_norm": 9.618753571416505e-10, "learning_rate": 9.181349745938596e-06, "loss": 0.0, "step": 11450 }, { "epoch": 0.08201531525084091, "grad_norm": 0.0, "learning_rate": 9.180634080011452e-06, "loss": 0.0007, "step": 11460 }, { "epoch": 0.08208688184355543, "grad_norm": 0.0, "learning_rate": 9.179918414084307e-06, "loss": 0.0, "step": 11470 }, { "epoch": 0.08215844843626995, "grad_norm": 0.5605391263961792, "learning_rate": 9.17920274815716e-06, "loss": 0.7446, "step": 11480 }, { "epoch": 0.08223001502898447, "grad_norm": 0.0, "learning_rate": 9.178487082230016e-06, "loss": 0.1198, "step": 11490 }, { "epoch": 0.082301581621699, "grad_norm": 0.0, "learning_rate": 9.177771416302871e-06, "loss": 0.0, "step": 11500 }, { "epoch": 0.08237314821441351, "grad_norm": 7.527523848693818e-05, "learning_rate": 9.177055750375725e-06, "loss": 0.0, "step": 11510 }, { "epoch": 0.08244471480712803, "grad_norm": 0.00037351695937104523, "learning_rate": 9.17634008444858e-06, "loss": 0.0001, "step": 11520 }, { "epoch": 0.08251628139984256, "grad_norm": 4.326618618932798e-09, "learning_rate": 9.175624418521435e-06, "loss": 0.0, "step": 11530 }, { "epoch": 0.08258784799255707, "grad_norm": 0.0, "learning_rate": 9.174908752594289e-06, "loss": 0.0, "step": 11540 }, { "epoch": 0.0826594145852716, "grad_norm": 0.0, "learning_rate": 9.174193086667144e-06, "loss": 0.0, "step": 11550 }, { "epoch": 0.08273098117798612, "grad_norm": 0.0, "learning_rate": 9.17347742074e-06, "loss": 0.3781, "step": 11560 }, { "epoch": 0.08280254777070063, "grad_norm": 9.306821424637235e-10, "learning_rate": 9.172761754812855e-06, "loss": 0.0, "step": 11570 }, { "epoch": 0.08287411436341516, "grad_norm": 0.0, "learning_rate": 9.172046088885708e-06, "loss": 0.0, "step": 11580 }, { "epoch": 0.08294568095612968, "grad_norm": 0.0, "learning_rate": 9.171330422958564e-06, "loss": 0.0, "step": 11590 }, { "epoch": 0.0830172475488442, "grad_norm": 0.0, "learning_rate": 9.170614757031419e-06, "loss": 0.0, "step": 11600 }, { "epoch": 0.08308881414155872, "grad_norm": 1.2646493985357665e-07, "learning_rate": 9.169899091104273e-06, "loss": 0.0, "step": 11610 }, { "epoch": 0.08316038073427325, "grad_norm": 9.053742408752441, "learning_rate": 9.169183425177128e-06, "loss": 0.0025, "step": 11620 }, { "epoch": 0.08323194732698776, "grad_norm": 0.0, "learning_rate": 9.168467759249983e-06, "loss": 0.0004, "step": 11630 }, { "epoch": 0.08330351391970228, "grad_norm": 0.00043414218816906214, "learning_rate": 9.167752093322838e-06, "loss": 0.0073, "step": 11640 }, { "epoch": 0.08337508051241681, "grad_norm": 1.4449471564148553e-05, "learning_rate": 9.167036427395692e-06, "loss": 0.0, "step": 11650 }, { "epoch": 0.08344664710513132, "grad_norm": 6.647328376629957e-08, "learning_rate": 9.166320761468547e-06, "loss": 0.0, "step": 11660 }, { "epoch": 0.08351821369784584, "grad_norm": 0.0, "learning_rate": 9.165605095541403e-06, "loss": 0.0, "step": 11670 }, { "epoch": 0.08358978029056037, "grad_norm": 0.0, "learning_rate": 9.164889429614256e-06, "loss": 0.0, "step": 11680 }, { "epoch": 0.08366134688327488, "grad_norm": 4.549449443817139, "learning_rate": 9.164173763687112e-06, "loss": 0.0008, "step": 11690 }, { "epoch": 0.0837329134759894, "grad_norm": 9.502161280039445e-10, "learning_rate": 9.163458097759967e-06, "loss": 0.0, "step": 11700 }, { "epoch": 0.08380448006870393, "grad_norm": 0.0, "learning_rate": 9.16274243183282e-06, "loss": 0.0, "step": 11710 }, { "epoch": 0.08387604666141844, "grad_norm": 1.0932009075759197e-08, "learning_rate": 9.162026765905676e-06, "loss": 0.0002, "step": 11720 }, { "epoch": 0.08394761325413297, "grad_norm": 0.08641468733549118, "learning_rate": 9.161311099978531e-06, "loss": 0.0004, "step": 11730 }, { "epoch": 0.0840191798468475, "grad_norm": 0.0, "learning_rate": 9.160595434051386e-06, "loss": 0.0005, "step": 11740 }, { "epoch": 0.084090746439562, "grad_norm": 0.0, "learning_rate": 9.15987976812424e-06, "loss": 0.0, "step": 11750 }, { "epoch": 0.08416231303227653, "grad_norm": 0.0, "learning_rate": 9.159164102197095e-06, "loss": 0.0, "step": 11760 }, { "epoch": 0.08423387962499106, "grad_norm": 1.8414876290506754e-09, "learning_rate": 9.15844843626995e-06, "loss": 0.0, "step": 11770 }, { "epoch": 0.08430544621770558, "grad_norm": 0.0, "learning_rate": 9.157732770342804e-06, "loss": 0.0, "step": 11780 }, { "epoch": 0.0843770128104201, "grad_norm": 0.0, "learning_rate": 9.15701710441566e-06, "loss": 0.0, "step": 11790 }, { "epoch": 0.08444857940313462, "grad_norm": 0.0, "learning_rate": 9.156301438488515e-06, "loss": 0.0, "step": 11800 }, { "epoch": 0.08452014599584914, "grad_norm": 0.0, "learning_rate": 9.15558577256137e-06, "loss": 0.0, "step": 11810 }, { "epoch": 0.08459171258856366, "grad_norm": 0.0, "learning_rate": 9.154870106634224e-06, "loss": 0.0, "step": 11820 }, { "epoch": 0.08466327918127818, "grad_norm": 2.0152466362333143e-09, "learning_rate": 9.154154440707079e-06, "loss": 0.0, "step": 11830 }, { "epoch": 0.0847348457739927, "grad_norm": 0.0, "learning_rate": 9.153438774779934e-06, "loss": 0.0, "step": 11840 }, { "epoch": 0.08480641236670722, "grad_norm": 3.3889921269292245e-07, "learning_rate": 9.152723108852788e-06, "loss": 0.0013, "step": 11850 }, { "epoch": 0.08487797895942174, "grad_norm": 0.0, "learning_rate": 9.152007442925643e-06, "loss": 0.0, "step": 11860 }, { "epoch": 0.08494954555213627, "grad_norm": 0.0, "learning_rate": 9.151291776998498e-06, "loss": 0.0, "step": 11870 }, { "epoch": 0.08502111214485078, "grad_norm": 0.001899143448099494, "learning_rate": 9.150576111071354e-06, "loss": 0.0, "step": 11880 }, { "epoch": 0.0850926787375653, "grad_norm": 9.260227670893073e-05, "learning_rate": 9.149860445144207e-06, "loss": 0.0001, "step": 11890 }, { "epoch": 0.08516424533027983, "grad_norm": 0.00041558005614206195, "learning_rate": 9.149144779217062e-06, "loss": 0.0, "step": 11900 }, { "epoch": 0.08523581192299434, "grad_norm": 6.375748373699253e-09, "learning_rate": 9.148429113289918e-06, "loss": 0.0, "step": 11910 }, { "epoch": 0.08530737851570887, "grad_norm": 0.0, "learning_rate": 9.147713447362771e-06, "loss": 0.0, "step": 11920 }, { "epoch": 0.08537894510842339, "grad_norm": 2.0993520877254923e-07, "learning_rate": 9.146997781435627e-06, "loss": 0.0, "step": 11930 }, { "epoch": 0.0854505117011379, "grad_norm": 0.0, "learning_rate": 9.146282115508482e-06, "loss": 0.0086, "step": 11940 }, { "epoch": 0.08552207829385243, "grad_norm": 0.0007135146879591048, "learning_rate": 9.145566449581336e-06, "loss": 0.0, "step": 11950 }, { "epoch": 0.08559364488656696, "grad_norm": 0.0, "learning_rate": 9.14485078365419e-06, "loss": 0.0002, "step": 11960 }, { "epoch": 0.08566521147928147, "grad_norm": 0.0, "learning_rate": 9.144135117727046e-06, "loss": 0.0102, "step": 11970 }, { "epoch": 0.08573677807199599, "grad_norm": 0.0, "learning_rate": 9.143419451799901e-06, "loss": 0.0, "step": 11980 }, { "epoch": 0.08580834466471052, "grad_norm": 0.0, "learning_rate": 9.142703785872755e-06, "loss": 0.012, "step": 11990 }, { "epoch": 0.08587991125742503, "grad_norm": 0.0, "learning_rate": 9.14198811994561e-06, "loss": 0.0, "step": 12000 }, { "epoch": 0.08595147785013955, "grad_norm": 6.065101310070986e-09, "learning_rate": 9.141272454018466e-06, "loss": 0.0, "step": 12010 }, { "epoch": 0.08602304444285408, "grad_norm": 0.0, "learning_rate": 9.14055678809132e-06, "loss": 0.0002, "step": 12020 }, { "epoch": 0.08609461103556859, "grad_norm": 2.1240153728285804e-05, "learning_rate": 9.139841122164174e-06, "loss": 0.0, "step": 12030 }, { "epoch": 0.08616617762828312, "grad_norm": 0.0, "learning_rate": 9.13912545623703e-06, "loss": 0.0001, "step": 12040 }, { "epoch": 0.08623774422099764, "grad_norm": 0.25101831555366516, "learning_rate": 9.138409790309885e-06, "loss": 0.3824, "step": 12050 }, { "epoch": 0.08630931081371215, "grad_norm": 4.1771158976189326e-08, "learning_rate": 9.137694124382739e-06, "loss": 0.0, "step": 12060 }, { "epoch": 0.08638087740642668, "grad_norm": 0.0, "learning_rate": 9.136978458455594e-06, "loss": 0.0, "step": 12070 }, { "epoch": 0.0864524439991412, "grad_norm": 0.0002558997948653996, "learning_rate": 9.13626279252845e-06, "loss": 0.0003, "step": 12080 }, { "epoch": 0.08652401059185572, "grad_norm": 96.91093444824219, "learning_rate": 9.135547126601303e-06, "loss": 0.0331, "step": 12090 }, { "epoch": 0.08659557718457024, "grad_norm": 0.548186182975769, "learning_rate": 9.134831460674158e-06, "loss": 0.0001, "step": 12100 }, { "epoch": 0.08666714377728477, "grad_norm": 0.0, "learning_rate": 9.134115794747013e-06, "loss": 0.0015, "step": 12110 }, { "epoch": 0.08673871036999928, "grad_norm": 0.0, "learning_rate": 9.133400128819869e-06, "loss": 0.0, "step": 12120 }, { "epoch": 0.0868102769627138, "grad_norm": 0.0, "learning_rate": 9.132684462892722e-06, "loss": 0.0, "step": 12130 }, { "epoch": 0.08688184355542833, "grad_norm": 1.2747715118166525e-05, "learning_rate": 9.131968796965578e-06, "loss": 0.0055, "step": 12140 }, { "epoch": 0.08695341014814284, "grad_norm": 0.0, "learning_rate": 9.131253131038433e-06, "loss": 0.3514, "step": 12150 }, { "epoch": 0.08702497674085737, "grad_norm": 2.9312232072697952e-06, "learning_rate": 9.130537465111287e-06, "loss": 0.0, "step": 12160 }, { "epoch": 0.08709654333357189, "grad_norm": 5.043420969741419e-06, "learning_rate": 9.129821799184142e-06, "loss": 0.0, "step": 12170 }, { "epoch": 0.08716810992628642, "grad_norm": 0.0, "learning_rate": 9.129106133256997e-06, "loss": 0.0, "step": 12180 }, { "epoch": 0.08723967651900093, "grad_norm": 0.0, "learning_rate": 9.12839046732985e-06, "loss": 0.0, "step": 12190 }, { "epoch": 0.08731124311171545, "grad_norm": 0.0, "learning_rate": 9.127674801402706e-06, "loss": 0.0, "step": 12200 }, { "epoch": 0.08738280970442998, "grad_norm": 0.0, "learning_rate": 9.126959135475561e-06, "loss": 0.0, "step": 12210 }, { "epoch": 0.08745437629714449, "grad_norm": 0.0011910384055227041, "learning_rate": 9.126243469548417e-06, "loss": 0.0, "step": 12220 }, { "epoch": 0.08752594288985902, "grad_norm": 0.0016735098324716091, "learning_rate": 9.12552780362127e-06, "loss": 0.0, "step": 12230 }, { "epoch": 0.08759750948257354, "grad_norm": 7.34348191144818e-07, "learning_rate": 9.124812137694125e-06, "loss": 0.0, "step": 12240 }, { "epoch": 0.08766907607528805, "grad_norm": 0.0, "learning_rate": 9.12409647176698e-06, "loss": 0.0, "step": 12250 }, { "epoch": 0.08774064266800258, "grad_norm": 0.0003429837815929204, "learning_rate": 9.123380805839834e-06, "loss": 0.0001, "step": 12260 }, { "epoch": 0.0878122092607171, "grad_norm": 0.0, "learning_rate": 9.12266513991269e-06, "loss": 0.0001, "step": 12270 }, { "epoch": 0.08788377585343161, "grad_norm": 0.0, "learning_rate": 9.121949473985545e-06, "loss": 0.0003, "step": 12280 }, { "epoch": 0.08795534244614614, "grad_norm": 0.0, "learning_rate": 9.1212338080584e-06, "loss": 0.118, "step": 12290 }, { "epoch": 0.08802690903886067, "grad_norm": 0.0, "learning_rate": 9.120518142131254e-06, "loss": 0.0086, "step": 12300 }, { "epoch": 0.08809847563157518, "grad_norm": 0.08217266201972961, "learning_rate": 9.119802476204107e-06, "loss": 0.3206, "step": 12310 }, { "epoch": 0.0881700422242897, "grad_norm": 0.0015168326208367944, "learning_rate": 9.119086810276964e-06, "loss": 0.0, "step": 12320 }, { "epoch": 0.08824160881700423, "grad_norm": 0.0, "learning_rate": 9.118371144349818e-06, "loss": 0.0001, "step": 12330 }, { "epoch": 0.08831317540971874, "grad_norm": 0.0, "learning_rate": 9.117655478422673e-06, "loss": 0.0328, "step": 12340 }, { "epoch": 0.08838474200243326, "grad_norm": 0.0, "learning_rate": 9.116939812495529e-06, "loss": 0.0003, "step": 12350 }, { "epoch": 0.08845630859514779, "grad_norm": 1.9748115676065936e-07, "learning_rate": 9.116224146568384e-06, "loss": 0.2269, "step": 12360 }, { "epoch": 0.0885278751878623, "grad_norm": 0.0, "learning_rate": 9.115508480641237e-06, "loss": 0.0, "step": 12370 }, { "epoch": 0.08859944178057683, "grad_norm": 0.47288277745246887, "learning_rate": 9.114792814714091e-06, "loss": 0.0001, "step": 12380 }, { "epoch": 0.08867100837329135, "grad_norm": 0.0, "learning_rate": 9.114077148786948e-06, "loss": 0.003, "step": 12390 }, { "epoch": 0.08874257496600586, "grad_norm": 0.0, "learning_rate": 9.113361482859802e-06, "loss": 0.0, "step": 12400 }, { "epoch": 0.08881414155872039, "grad_norm": 0.0, "learning_rate": 9.112645816932657e-06, "loss": 0.0175, "step": 12410 }, { "epoch": 0.08888570815143491, "grad_norm": 0.0007354781264439225, "learning_rate": 9.11193015100551e-06, "loss": 0.0, "step": 12420 }, { "epoch": 0.08895727474414943, "grad_norm": 3.11434519062459e-06, "learning_rate": 9.111214485078366e-06, "loss": 0.0002, "step": 12430 }, { "epoch": 0.08902884133686395, "grad_norm": 2.510569974134569e-09, "learning_rate": 9.110498819151221e-06, "loss": 0.0, "step": 12440 }, { "epoch": 0.08910040792957848, "grad_norm": 0.0, "learning_rate": 9.109783153224075e-06, "loss": 0.0, "step": 12450 }, { "epoch": 0.08917197452229299, "grad_norm": 0.005093445535749197, "learning_rate": 9.109067487296932e-06, "loss": 0.0, "step": 12460 }, { "epoch": 0.08924354111500751, "grad_norm": 0.00045226459042169154, "learning_rate": 9.108351821369785e-06, "loss": 0.0, "step": 12470 }, { "epoch": 0.08931510770772204, "grad_norm": 0.00841194111853838, "learning_rate": 9.107636155442639e-06, "loss": 0.0, "step": 12480 }, { "epoch": 0.08938667430043655, "grad_norm": 0.0, "learning_rate": 9.106920489515494e-06, "loss": 0.0003, "step": 12490 }, { "epoch": 0.08945824089315108, "grad_norm": 1.999409278141684e-06, "learning_rate": 9.10620482358835e-06, "loss": 0.0, "step": 12500 }, { "epoch": 0.0895298074858656, "grad_norm": 0.0, "learning_rate": 9.105489157661205e-06, "loss": 0.0333, "step": 12510 }, { "epoch": 0.08960137407858011, "grad_norm": 0.0, "learning_rate": 9.104773491734058e-06, "loss": 0.192, "step": 12520 }, { "epoch": 0.08967294067129464, "grad_norm": 0.0, "learning_rate": 9.104057825806915e-06, "loss": 0.0262, "step": 12530 }, { "epoch": 0.08974450726400916, "grad_norm": 0.0, "learning_rate": 9.103342159879769e-06, "loss": 0.0, "step": 12540 }, { "epoch": 0.08981607385672369, "grad_norm": 0.0, "learning_rate": 9.102626493952623e-06, "loss": 0.0, "step": 12550 }, { "epoch": 0.0898876404494382, "grad_norm": 0.0, "learning_rate": 9.101910828025478e-06, "loss": 0.0003, "step": 12560 }, { "epoch": 0.08995920704215273, "grad_norm": 0.0, "learning_rate": 9.101195162098333e-06, "loss": 0.0, "step": 12570 }, { "epoch": 0.09003077363486725, "grad_norm": 5.724181164623587e-07, "learning_rate": 9.100479496171188e-06, "loss": 0.0, "step": 12580 }, { "epoch": 0.09010234022758176, "grad_norm": 0.0, "learning_rate": 9.099763830244042e-06, "loss": 0.0, "step": 12590 }, { "epoch": 0.09017390682029629, "grad_norm": 0.0, "learning_rate": 9.099048164316897e-06, "loss": 0.0, "step": 12600 }, { "epoch": 0.09024547341301081, "grad_norm": 9.446510489397042e-07, "learning_rate": 9.098332498389753e-06, "loss": 0.0, "step": 12610 }, { "epoch": 0.09031704000572532, "grad_norm": 7.809931616975518e-07, "learning_rate": 9.097616832462606e-06, "loss": 0.0005, "step": 12620 }, { "epoch": 0.09038860659843985, "grad_norm": 0.0, "learning_rate": 9.096901166535461e-06, "loss": 0.0, "step": 12630 }, { "epoch": 0.09046017319115437, "grad_norm": 3.121883537460235e-06, "learning_rate": 9.096185500608317e-06, "loss": 0.0, "step": 12640 }, { "epoch": 0.09053173978386889, "grad_norm": 5.828292160003912e-06, "learning_rate": 9.095469834681172e-06, "loss": 0.0, "step": 12650 }, { "epoch": 0.09060330637658341, "grad_norm": 0.0007387693622149527, "learning_rate": 9.094754168754026e-06, "loss": 0.0, "step": 12660 }, { "epoch": 0.09067487296929794, "grad_norm": 0.0, "learning_rate": 9.094038502826881e-06, "loss": 0.0, "step": 12670 }, { "epoch": 0.09074643956201245, "grad_norm": 1.4846145290903223e-07, "learning_rate": 9.093322836899736e-06, "loss": 0.0, "step": 12680 }, { "epoch": 0.09081800615472697, "grad_norm": 0.0, "learning_rate": 9.09260717097259e-06, "loss": 0.6312, "step": 12690 }, { "epoch": 0.0908895727474415, "grad_norm": 6.312442337730317e-07, "learning_rate": 9.091891505045445e-06, "loss": 0.0, "step": 12700 }, { "epoch": 0.09096113934015601, "grad_norm": 0.0, "learning_rate": 9.0911758391183e-06, "loss": 0.0, "step": 12710 }, { "epoch": 0.09103270593287054, "grad_norm": 0.0, "learning_rate": 9.090460173191154e-06, "loss": 0.0024, "step": 12720 }, { "epoch": 0.09110427252558506, "grad_norm": 0.0, "learning_rate": 9.08974450726401e-06, "loss": 0.0, "step": 12730 }, { "epoch": 0.09117583911829957, "grad_norm": 2.245663210942439e-07, "learning_rate": 9.089028841336865e-06, "loss": 0.0, "step": 12740 }, { "epoch": 0.0912474057110141, "grad_norm": 0.0, "learning_rate": 9.08831317540972e-06, "loss": 0.0583, "step": 12750 }, { "epoch": 0.09131897230372862, "grad_norm": 15.268804550170898, "learning_rate": 9.087597509482574e-06, "loss": 0.0026, "step": 12760 }, { "epoch": 0.09139053889644314, "grad_norm": 0.0, "learning_rate": 9.086881843555429e-06, "loss": 0.0229, "step": 12770 }, { "epoch": 0.09146210548915766, "grad_norm": 0.0, "learning_rate": 9.086166177628284e-06, "loss": 0.0001, "step": 12780 }, { "epoch": 0.09153367208187219, "grad_norm": 9.038607085010142e-10, "learning_rate": 9.085450511701138e-06, "loss": 0.0001, "step": 12790 }, { "epoch": 0.0916052386745867, "grad_norm": 0.0, "learning_rate": 9.084734845773993e-06, "loss": 0.001, "step": 12800 }, { "epoch": 0.09167680526730122, "grad_norm": 6.148587772258907e-07, "learning_rate": 9.084019179846848e-06, "loss": 0.0, "step": 12810 }, { "epoch": 0.09174837186001575, "grad_norm": 0.0, "learning_rate": 9.083303513919704e-06, "loss": 0.0, "step": 12820 }, { "epoch": 0.09181993845273026, "grad_norm": 0.0, "learning_rate": 9.082587847992557e-06, "loss": 0.0, "step": 12830 }, { "epoch": 0.09189150504544479, "grad_norm": 8.228629155837552e-08, "learning_rate": 9.081872182065412e-06, "loss": 0.0, "step": 12840 }, { "epoch": 0.09196307163815931, "grad_norm": 0.0, "learning_rate": 9.081156516138268e-06, "loss": 0.0, "step": 12850 }, { "epoch": 0.09203463823087382, "grad_norm": 0.0, "learning_rate": 9.080440850211121e-06, "loss": 0.0, "step": 12860 }, { "epoch": 0.09210620482358835, "grad_norm": 0.0, "learning_rate": 9.079725184283977e-06, "loss": 0.1162, "step": 12870 }, { "epoch": 0.09217777141630287, "grad_norm": 0.0, "learning_rate": 9.079009518356832e-06, "loss": 0.0023, "step": 12880 }, { "epoch": 0.09224933800901738, "grad_norm": 0.0, "learning_rate": 9.078293852429687e-06, "loss": 0.0, "step": 12890 }, { "epoch": 0.09232090460173191, "grad_norm": 0.0, "learning_rate": 9.07757818650254e-06, "loss": 0.0002, "step": 12900 }, { "epoch": 0.09239247119444643, "grad_norm": 0.0, "learning_rate": 9.076862520575396e-06, "loss": 0.0, "step": 12910 }, { "epoch": 0.09246403778716095, "grad_norm": 0.0, "learning_rate": 9.076146854648251e-06, "loss": 0.0001, "step": 12920 }, { "epoch": 0.09253560437987547, "grad_norm": 0.0034777377732098103, "learning_rate": 9.075431188721105e-06, "loss": 0.0026, "step": 12930 }, { "epoch": 0.09260717097259, "grad_norm": 0.0, "learning_rate": 9.07471552279396e-06, "loss": 0.0, "step": 12940 }, { "epoch": 0.09267873756530452, "grad_norm": 2.590312988104415e-06, "learning_rate": 9.073999856866816e-06, "loss": 0.0001, "step": 12950 }, { "epoch": 0.09275030415801903, "grad_norm": 0.0021060798317193985, "learning_rate": 9.07328419093967e-06, "loss": 0.0, "step": 12960 }, { "epoch": 0.09282187075073356, "grad_norm": 1.600529628831282e-07, "learning_rate": 9.072568525012524e-06, "loss": 0.0, "step": 12970 }, { "epoch": 0.09289343734344808, "grad_norm": 0.0, "learning_rate": 9.07185285908538e-06, "loss": 0.0, "step": 12980 }, { "epoch": 0.0929650039361626, "grad_norm": 0.0, "learning_rate": 9.071137193158235e-06, "loss": 0.0033, "step": 12990 }, { "epoch": 0.09303657052887712, "grad_norm": 161.27337646484375, "learning_rate": 9.070421527231089e-06, "loss": 0.0188, "step": 13000 }, { "epoch": 0.09310813712159165, "grad_norm": 0.012008128687739372, "learning_rate": 9.069705861303944e-06, "loss": 0.0, "step": 13010 }, { "epoch": 0.09317970371430616, "grad_norm": 0.0, "learning_rate": 9.0689901953768e-06, "loss": 0.0006, "step": 13020 }, { "epoch": 0.09325127030702068, "grad_norm": 0.0, "learning_rate": 9.068274529449653e-06, "loss": 0.0, "step": 13030 }, { "epoch": 0.09332283689973521, "grad_norm": 6.25878939786162e-08, "learning_rate": 9.067558863522508e-06, "loss": 0.0, "step": 13040 }, { "epoch": 0.09339440349244972, "grad_norm": 0.0, "learning_rate": 9.066843197595363e-06, "loss": 0.6693, "step": 13050 }, { "epoch": 0.09346597008516425, "grad_norm": 1.2056921150360722e-07, "learning_rate": 9.066127531668219e-06, "loss": 0.0092, "step": 13060 }, { "epoch": 0.09353753667787877, "grad_norm": 9.202097430716094e-07, "learning_rate": 9.065411865741072e-06, "loss": 0.0, "step": 13070 }, { "epoch": 0.09360910327059328, "grad_norm": 9.785103429749142e-07, "learning_rate": 9.064696199813928e-06, "loss": 0.0001, "step": 13080 }, { "epoch": 0.09368066986330781, "grad_norm": 0.00013679114636033773, "learning_rate": 9.063980533886783e-06, "loss": 0.0, "step": 13090 }, { "epoch": 0.09375223645602233, "grad_norm": 0.0, "learning_rate": 9.063264867959636e-06, "loss": 0.0, "step": 13100 }, { "epoch": 0.09382380304873685, "grad_norm": 0.0, "learning_rate": 9.062549202032492e-06, "loss": 0.0, "step": 13110 }, { "epoch": 0.09389536964145137, "grad_norm": 0.0, "learning_rate": 9.061833536105347e-06, "loss": 0.0, "step": 13120 }, { "epoch": 0.0939669362341659, "grad_norm": 0.0, "learning_rate": 9.061117870178202e-06, "loss": 0.0353, "step": 13130 }, { "epoch": 0.09403850282688041, "grad_norm": 0.39826494455337524, "learning_rate": 9.060402204251056e-06, "loss": 0.0001, "step": 13140 }, { "epoch": 0.09411006941959493, "grad_norm": 0.0, "learning_rate": 9.059686538323911e-06, "loss": 0.0, "step": 13150 }, { "epoch": 0.09418163601230946, "grad_norm": 1.2546887546704966e-06, "learning_rate": 9.058970872396767e-06, "loss": 0.0, "step": 13160 }, { "epoch": 0.09425320260502397, "grad_norm": 0.0009980800095945597, "learning_rate": 9.05825520646962e-06, "loss": 0.0, "step": 13170 }, { "epoch": 0.0943247691977385, "grad_norm": 0.0006591822020709515, "learning_rate": 9.057539540542475e-06, "loss": 0.0, "step": 13180 }, { "epoch": 0.09439633579045302, "grad_norm": 1.4282443771662656e-05, "learning_rate": 9.05682387461533e-06, "loss": 0.0, "step": 13190 }, { "epoch": 0.09446790238316753, "grad_norm": 4.5512568713768076e-10, "learning_rate": 9.056108208688184e-06, "loss": 0.006, "step": 13200 }, { "epoch": 0.09453946897588206, "grad_norm": 361.5066223144531, "learning_rate": 9.05539254276104e-06, "loss": 0.4435, "step": 13210 }, { "epoch": 0.09461103556859658, "grad_norm": 0.0, "learning_rate": 9.054676876833895e-06, "loss": 0.0001, "step": 13220 }, { "epoch": 0.0946826021613111, "grad_norm": 0.0, "learning_rate": 9.05396121090675e-06, "loss": 0.0105, "step": 13230 }, { "epoch": 0.09475416875402562, "grad_norm": 0.0, "learning_rate": 9.053245544979604e-06, "loss": 0.0, "step": 13240 }, { "epoch": 0.09482573534674014, "grad_norm": 0.0, "learning_rate": 9.052529879052459e-06, "loss": 0.0009, "step": 13250 }, { "epoch": 0.09489730193945466, "grad_norm": 0.0, "learning_rate": 9.051814213125314e-06, "loss": 0.0, "step": 13260 }, { "epoch": 0.09496886853216918, "grad_norm": 0.0, "learning_rate": 9.051098547198168e-06, "loss": 0.0, "step": 13270 }, { "epoch": 0.09504043512488371, "grad_norm": 0.0, "learning_rate": 9.050382881271023e-06, "loss": 0.0, "step": 13280 }, { "epoch": 0.09511200171759822, "grad_norm": 0.0, "learning_rate": 9.049667215343879e-06, "loss": 0.0, "step": 13290 }, { "epoch": 0.09518356831031274, "grad_norm": 8.918220520019531, "learning_rate": 9.048951549416734e-06, "loss": 0.0184, "step": 13300 }, { "epoch": 0.09525513490302727, "grad_norm": 0.020451446995139122, "learning_rate": 9.048235883489587e-06, "loss": 0.0, "step": 13310 }, { "epoch": 0.0953267014957418, "grad_norm": 0.0002378150966251269, "learning_rate": 9.047520217562443e-06, "loss": 0.0, "step": 13320 }, { "epoch": 0.0953982680884563, "grad_norm": 0.0, "learning_rate": 9.046804551635298e-06, "loss": 0.1336, "step": 13330 }, { "epoch": 0.09546983468117083, "grad_norm": 0.0, "learning_rate": 9.046088885708152e-06, "loss": 0.0, "step": 13340 }, { "epoch": 0.09554140127388536, "grad_norm": 0.0, "learning_rate": 9.045373219781007e-06, "loss": 0.0, "step": 13350 }, { "epoch": 0.09561296786659987, "grad_norm": 0.0, "learning_rate": 9.044657553853862e-06, "loss": 0.0141, "step": 13360 }, { "epoch": 0.0956845344593144, "grad_norm": 6.407517910003662, "learning_rate": 9.043941887926716e-06, "loss": 0.01, "step": 13370 }, { "epoch": 0.09575610105202892, "grad_norm": 0.0, "learning_rate": 9.043226221999571e-06, "loss": 0.0086, "step": 13380 }, { "epoch": 0.09582766764474343, "grad_norm": 0.005692183505743742, "learning_rate": 9.042510556072426e-06, "loss": 0.0, "step": 13390 }, { "epoch": 0.09589923423745796, "grad_norm": 0.0, "learning_rate": 9.041794890145282e-06, "loss": 0.0, "step": 13400 }, { "epoch": 0.09597080083017248, "grad_norm": 0.0, "learning_rate": 9.041079224218135e-06, "loss": 0.0, "step": 13410 }, { "epoch": 0.09604236742288699, "grad_norm": 0.0, "learning_rate": 9.04036355829099e-06, "loss": 0.0, "step": 13420 }, { "epoch": 0.09611393401560152, "grad_norm": 0.0, "learning_rate": 9.039647892363846e-06, "loss": 0.0, "step": 13430 }, { "epoch": 0.09618550060831604, "grad_norm": 8.141323171173553e-10, "learning_rate": 9.0389322264367e-06, "loss": 0.0, "step": 13440 }, { "epoch": 0.09625706720103056, "grad_norm": 0.04500116780400276, "learning_rate": 9.038216560509555e-06, "loss": 0.1095, "step": 13450 }, { "epoch": 0.09632863379374508, "grad_norm": 0.003665223252028227, "learning_rate": 9.03750089458241e-06, "loss": 0.0383, "step": 13460 }, { "epoch": 0.0964002003864596, "grad_norm": 0.0, "learning_rate": 9.036785228655265e-06, "loss": 0.0, "step": 13470 }, { "epoch": 0.09647176697917412, "grad_norm": 0.01816261000931263, "learning_rate": 9.036069562728119e-06, "loss": 0.0, "step": 13480 }, { "epoch": 0.09654333357188864, "grad_norm": 0.0, "learning_rate": 9.035353896800974e-06, "loss": 0.0, "step": 13490 }, { "epoch": 0.09661490016460317, "grad_norm": 0.0, "learning_rate": 9.03463823087383e-06, "loss": 0.0, "step": 13500 }, { "epoch": 0.09668646675731768, "grad_norm": 0.0, "learning_rate": 9.033922564946683e-06, "loss": 0.4172, "step": 13510 }, { "epoch": 0.0967580333500322, "grad_norm": 4.4120022835159034e-07, "learning_rate": 9.033206899019538e-06, "loss": 0.1514, "step": 13520 }, { "epoch": 0.09682959994274673, "grad_norm": 0.0, "learning_rate": 9.032491233092394e-06, "loss": 0.0, "step": 13530 }, { "epoch": 0.09690116653546124, "grad_norm": 0.0, "learning_rate": 9.031775567165249e-06, "loss": 0.0, "step": 13540 }, { "epoch": 0.09697273312817577, "grad_norm": 0.0, "learning_rate": 9.031059901238103e-06, "loss": 0.0, "step": 13550 }, { "epoch": 0.09704429972089029, "grad_norm": 0.0, "learning_rate": 9.030344235310958e-06, "loss": 0.0, "step": 13560 }, { "epoch": 0.0971158663136048, "grad_norm": 0.15247206389904022, "learning_rate": 9.029628569383813e-06, "loss": 0.0, "step": 13570 }, { "epoch": 0.09718743290631933, "grad_norm": 0.0, "learning_rate": 9.028912903456667e-06, "loss": 0.0, "step": 13580 }, { "epoch": 0.09725899949903385, "grad_norm": 0.0, "learning_rate": 9.028197237529522e-06, "loss": 0.0002, "step": 13590 }, { "epoch": 0.09733056609174837, "grad_norm": 6.597525157303608e-07, "learning_rate": 9.027481571602377e-06, "loss": 0.0, "step": 13600 }, { "epoch": 0.09740213268446289, "grad_norm": 0.0, "learning_rate": 9.026765905675231e-06, "loss": 0.0, "step": 13610 }, { "epoch": 0.09747369927717742, "grad_norm": 0.0, "learning_rate": 9.026050239748086e-06, "loss": 0.0, "step": 13620 }, { "epoch": 0.09754526586989193, "grad_norm": 0.044111862778663635, "learning_rate": 9.025334573820942e-06, "loss": 0.0003, "step": 13630 }, { "epoch": 0.09761683246260645, "grad_norm": 0.0, "learning_rate": 9.024618907893797e-06, "loss": 0.001, "step": 13640 }, { "epoch": 0.09768839905532098, "grad_norm": 1.2636041641235352, "learning_rate": 9.02390324196665e-06, "loss": 0.0002, "step": 13650 }, { "epoch": 0.09775996564803549, "grad_norm": 0.0, "learning_rate": 9.023187576039506e-06, "loss": 0.0, "step": 13660 }, { "epoch": 0.09783153224075002, "grad_norm": 0.0, "learning_rate": 9.022471910112361e-06, "loss": 0.0015, "step": 13670 }, { "epoch": 0.09790309883346454, "grad_norm": 0.004173119552433491, "learning_rate": 9.021756244185215e-06, "loss": 0.0001, "step": 13680 }, { "epoch": 0.09797466542617905, "grad_norm": 245.95814514160156, "learning_rate": 9.02104057825807e-06, "loss": 0.0678, "step": 13690 }, { "epoch": 0.09804623201889358, "grad_norm": 0.0, "learning_rate": 9.020324912330925e-06, "loss": 0.0, "step": 13700 }, { "epoch": 0.0981177986116081, "grad_norm": 0.006568930111825466, "learning_rate": 9.01960924640378e-06, "loss": 0.0, "step": 13710 }, { "epoch": 0.09818936520432263, "grad_norm": 0.0, "learning_rate": 9.018893580476634e-06, "loss": 0.0, "step": 13720 }, { "epoch": 0.09826093179703714, "grad_norm": 0.00024722007219679654, "learning_rate": 9.01817791454949e-06, "loss": 0.0186, "step": 13730 }, { "epoch": 0.09833249838975167, "grad_norm": 0.0, "learning_rate": 9.017462248622345e-06, "loss": 0.0, "step": 13740 }, { "epoch": 0.09840406498246619, "grad_norm": 0.0, "learning_rate": 9.016746582695198e-06, "loss": 0.0, "step": 13750 }, { "epoch": 0.0984756315751807, "grad_norm": 3.277442601756775e-06, "learning_rate": 9.016030916768054e-06, "loss": 0.4118, "step": 13760 }, { "epoch": 0.09854719816789523, "grad_norm": 0.0, "learning_rate": 9.015315250840909e-06, "loss": 0.0, "step": 13770 }, { "epoch": 0.09861876476060975, "grad_norm": 0.0, "learning_rate": 9.014599584913764e-06, "loss": 0.0, "step": 13780 }, { "epoch": 0.09869033135332426, "grad_norm": 5.9997539210598916e-05, "learning_rate": 9.013883918986618e-06, "loss": 0.0, "step": 13790 }, { "epoch": 0.09876189794603879, "grad_norm": 0.0, "learning_rate": 9.013168253059473e-06, "loss": 0.0693, "step": 13800 }, { "epoch": 0.09883346453875332, "grad_norm": 2.263997345153257e-08, "learning_rate": 9.012452587132328e-06, "loss": 0.0, "step": 13810 }, { "epoch": 0.09890503113146783, "grad_norm": 0.0, "learning_rate": 9.011736921205182e-06, "loss": 0.0001, "step": 13820 }, { "epoch": 0.09897659772418235, "grad_norm": 0.004050883464515209, "learning_rate": 9.011021255278037e-06, "loss": 0.0003, "step": 13830 }, { "epoch": 0.09904816431689688, "grad_norm": 0.0, "learning_rate": 9.010305589350892e-06, "loss": 0.0001, "step": 13840 }, { "epoch": 0.09911973090961139, "grad_norm": 0.0, "learning_rate": 9.009589923423746e-06, "loss": 0.0, "step": 13850 }, { "epoch": 0.09919129750232591, "grad_norm": 0.0, "learning_rate": 9.008874257496601e-06, "loss": 0.0, "step": 13860 }, { "epoch": 0.09926286409504044, "grad_norm": 1.5061780800351698e-07, "learning_rate": 9.008158591569455e-06, "loss": 0.0, "step": 13870 }, { "epoch": 0.09933443068775495, "grad_norm": 0.0, "learning_rate": 9.007442925642312e-06, "loss": 0.1056, "step": 13880 }, { "epoch": 0.09940599728046948, "grad_norm": 6.742536243109498e-06, "learning_rate": 9.006727259715166e-06, "loss": 0.0004, "step": 13890 }, { "epoch": 0.099477563873184, "grad_norm": 0.0, "learning_rate": 9.006011593788021e-06, "loss": 0.0392, "step": 13900 }, { "epoch": 0.09954913046589851, "grad_norm": 0.0, "learning_rate": 9.005295927860876e-06, "loss": 0.0036, "step": 13910 }, { "epoch": 0.09962069705861304, "grad_norm": 0.00024993010447360575, "learning_rate": 9.00458026193373e-06, "loss": 0.0, "step": 13920 }, { "epoch": 0.09969226365132756, "grad_norm": 0.0, "learning_rate": 9.003864596006585e-06, "loss": 0.0, "step": 13930 }, { "epoch": 0.09976383024404208, "grad_norm": 0.008394169621169567, "learning_rate": 9.003148930079439e-06, "loss": 0.0, "step": 13940 }, { "epoch": 0.0998353968367566, "grad_norm": 0.0, "learning_rate": 9.002433264152296e-06, "loss": 0.0022, "step": 13950 }, { "epoch": 0.09990696342947113, "grad_norm": 3.125718572860592e-09, "learning_rate": 9.00171759822515e-06, "loss": 0.0, "step": 13960 }, { "epoch": 0.09997853002218564, "grad_norm": 1.9919449201211137e-08, "learning_rate": 9.001001932298003e-06, "loss": 0.0, "step": 13970 }, { "epoch": 0.10005009661490016, "grad_norm": 0.000567390990909189, "learning_rate": 9.00028626637086e-06, "loss": 0.0, "step": 13980 }, { "epoch": 0.10012166320761469, "grad_norm": 0.0, "learning_rate": 8.999570600443713e-06, "loss": 0.0, "step": 13990 }, { "epoch": 0.1001932298003292, "grad_norm": 0.0015730939339846373, "learning_rate": 8.998854934516569e-06, "loss": 0.0001, "step": 14000 }, { "epoch": 0.10026479639304373, "grad_norm": 0.0, "learning_rate": 8.998139268589422e-06, "loss": 0.0, "step": 14010 }, { "epoch": 0.10033636298575825, "grad_norm": 0.0, "learning_rate": 8.997423602662278e-06, "loss": 0.0, "step": 14020 }, { "epoch": 0.10040792957847276, "grad_norm": 0.0, "learning_rate": 8.996707936735133e-06, "loss": 0.0, "step": 14030 }, { "epoch": 0.10047949617118729, "grad_norm": 0.0, "learning_rate": 8.995992270807986e-06, "loss": 0.0, "step": 14040 }, { "epoch": 0.10055106276390181, "grad_norm": 0.0, "learning_rate": 8.995276604880843e-06, "loss": 0.0, "step": 14050 }, { "epoch": 0.10062262935661632, "grad_norm": 0.0, "learning_rate": 8.994560938953697e-06, "loss": 0.0001, "step": 14060 }, { "epoch": 0.10069419594933085, "grad_norm": 1.397942742187297e-05, "learning_rate": 8.993845273026552e-06, "loss": 0.0, "step": 14070 }, { "epoch": 0.10076576254204538, "grad_norm": 44.746864318847656, "learning_rate": 8.993129607099406e-06, "loss": 0.0053, "step": 14080 }, { "epoch": 0.1008373291347599, "grad_norm": 9.790468902792782e-05, "learning_rate": 8.992413941172261e-06, "loss": 0.0, "step": 14090 }, { "epoch": 0.10090889572747441, "grad_norm": 0.0, "learning_rate": 8.991698275245117e-06, "loss": 0.3711, "step": 14100 }, { "epoch": 0.10098046232018894, "grad_norm": 0.0, "learning_rate": 8.99098260931797e-06, "loss": 0.0693, "step": 14110 }, { "epoch": 0.10105202891290346, "grad_norm": 0.0005772275035269558, "learning_rate": 8.990266943390827e-06, "loss": 0.0, "step": 14120 }, { "epoch": 0.10112359550561797, "grad_norm": 0.0, "learning_rate": 8.98955127746368e-06, "loss": 0.1105, "step": 14130 }, { "epoch": 0.1011951620983325, "grad_norm": 0.0, "learning_rate": 8.988835611536534e-06, "loss": 0.0, "step": 14140 }, { "epoch": 0.10126672869104703, "grad_norm": 0.0008690189570188522, "learning_rate": 8.98811994560939e-06, "loss": 0.5999, "step": 14150 }, { "epoch": 0.10133829528376154, "grad_norm": 0.0, "learning_rate": 8.987404279682245e-06, "loss": 0.0, "step": 14160 }, { "epoch": 0.10140986187647606, "grad_norm": 4.3080399336759e-06, "learning_rate": 8.9866886137551e-06, "loss": 0.0002, "step": 14170 }, { "epoch": 0.10148142846919059, "grad_norm": 0.0003119709435850382, "learning_rate": 8.985972947827954e-06, "loss": 0.0, "step": 14180 }, { "epoch": 0.1015529950619051, "grad_norm": 2.351202964782715, "learning_rate": 8.98525728190081e-06, "loss": 0.0004, "step": 14190 }, { "epoch": 0.10162456165461962, "grad_norm": 7.633969289599918e-06, "learning_rate": 8.984541615973664e-06, "loss": 0.0013, "step": 14200 }, { "epoch": 0.10169612824733415, "grad_norm": 9.978928574128076e-05, "learning_rate": 8.983825950046518e-06, "loss": 0.0, "step": 14210 }, { "epoch": 0.10176769484004866, "grad_norm": 20.772052764892578, "learning_rate": 8.983110284119373e-06, "loss": 0.0044, "step": 14220 }, { "epoch": 0.10183926143276319, "grad_norm": 0.0, "learning_rate": 8.982394618192229e-06, "loss": 0.0, "step": 14230 }, { "epoch": 0.10191082802547771, "grad_norm": 1.3053489923477173, "learning_rate": 8.981678952265084e-06, "loss": 0.0003, "step": 14240 }, { "epoch": 0.10198239461819222, "grad_norm": 0.0, "learning_rate": 8.980963286337937e-06, "loss": 0.0, "step": 14250 }, { "epoch": 0.10205396121090675, "grad_norm": 0.0, "learning_rate": 8.980247620410793e-06, "loss": 0.0001, "step": 14260 }, { "epoch": 0.10212552780362127, "grad_norm": 0.0, "learning_rate": 8.979531954483648e-06, "loss": 0.001, "step": 14270 }, { "epoch": 0.10219709439633579, "grad_norm": 0.0, "learning_rate": 8.978816288556502e-06, "loss": 0.0, "step": 14280 }, { "epoch": 0.10226866098905031, "grad_norm": 4.2554765267155403e-10, "learning_rate": 8.978100622629357e-06, "loss": 0.0, "step": 14290 }, { "epoch": 0.10234022758176484, "grad_norm": 0.0, "learning_rate": 8.977384956702212e-06, "loss": 0.0823, "step": 14300 }, { "epoch": 0.10241179417447935, "grad_norm": 0.00019079695630352944, "learning_rate": 8.976669290775067e-06, "loss": 0.0, "step": 14310 }, { "epoch": 0.10248336076719387, "grad_norm": 0.0, "learning_rate": 8.975953624847921e-06, "loss": 0.0, "step": 14320 }, { "epoch": 0.1025549273599084, "grad_norm": 0.032832372933626175, "learning_rate": 8.975237958920776e-06, "loss": 0.0445, "step": 14330 }, { "epoch": 0.10262649395262291, "grad_norm": 0.0, "learning_rate": 8.974522292993632e-06, "loss": 0.0, "step": 14340 }, { "epoch": 0.10269806054533744, "grad_norm": 0.0, "learning_rate": 8.973806627066485e-06, "loss": 0.0, "step": 14350 }, { "epoch": 0.10276962713805196, "grad_norm": 0.005933444015681744, "learning_rate": 8.97309096113934e-06, "loss": 0.0, "step": 14360 }, { "epoch": 0.10284119373076647, "grad_norm": 0.0001087731434381567, "learning_rate": 8.972375295212196e-06, "loss": 0.0, "step": 14370 }, { "epoch": 0.102912760323481, "grad_norm": 0.0, "learning_rate": 8.97165962928505e-06, "loss": 0.0, "step": 14380 }, { "epoch": 0.10298432691619552, "grad_norm": 0.01610724814236164, "learning_rate": 8.970943963357905e-06, "loss": 0.0757, "step": 14390 }, { "epoch": 0.10305589350891003, "grad_norm": 0.0, "learning_rate": 8.97022829743076e-06, "loss": 0.0014, "step": 14400 }, { "epoch": 0.10312746010162456, "grad_norm": 3.8825733099656645e-06, "learning_rate": 8.969512631503615e-06, "loss": 0.0, "step": 14410 }, { "epoch": 0.10319902669433909, "grad_norm": 0.0, "learning_rate": 8.968796965576469e-06, "loss": 0.398, "step": 14420 }, { "epoch": 0.1032705932870536, "grad_norm": 0.0, "learning_rate": 8.968081299649324e-06, "loss": 0.0, "step": 14430 }, { "epoch": 0.10334215987976812, "grad_norm": 0.0, "learning_rate": 8.96736563372218e-06, "loss": 0.0001, "step": 14440 }, { "epoch": 0.10341372647248265, "grad_norm": 0.0065508047118783, "learning_rate": 8.966649967795033e-06, "loss": 0.0, "step": 14450 }, { "epoch": 0.10348529306519716, "grad_norm": 9.972132829716429e-05, "learning_rate": 8.965934301867888e-06, "loss": 0.0, "step": 14460 }, { "epoch": 0.10355685965791168, "grad_norm": 0.0, "learning_rate": 8.965218635940744e-06, "loss": 0.0056, "step": 14470 }, { "epoch": 0.10362842625062621, "grad_norm": 0.022046979516744614, "learning_rate": 8.964502970013599e-06, "loss": 0.01, "step": 14480 }, { "epoch": 0.10369999284334074, "grad_norm": 0.0, "learning_rate": 8.963787304086453e-06, "loss": 0.0035, "step": 14490 }, { "epoch": 0.10377155943605525, "grad_norm": 3.116657126156497e-06, "learning_rate": 8.963071638159308e-06, "loss": 0.0001, "step": 14500 }, { "epoch": 0.10384312602876977, "grad_norm": 0.0, "learning_rate": 8.962355972232163e-06, "loss": 0.0, "step": 14510 }, { "epoch": 0.1039146926214843, "grad_norm": 0.0, "learning_rate": 8.961640306305017e-06, "loss": 0.0, "step": 14520 }, { "epoch": 0.10398625921419881, "grad_norm": 0.0, "learning_rate": 8.960924640377872e-06, "loss": 0.0, "step": 14530 }, { "epoch": 0.10405782580691333, "grad_norm": 0.0020228137727826834, "learning_rate": 8.960208974450727e-06, "loss": 0.0, "step": 14540 }, { "epoch": 0.10412939239962786, "grad_norm": 0.0, "learning_rate": 8.959493308523583e-06, "loss": 0.0, "step": 14550 }, { "epoch": 0.10420095899234237, "grad_norm": 0.0013714220840483904, "learning_rate": 8.958777642596436e-06, "loss": 0.0, "step": 14560 }, { "epoch": 0.1042725255850569, "grad_norm": 0.5359599590301514, "learning_rate": 8.958061976669292e-06, "loss": 0.3469, "step": 14570 }, { "epoch": 0.10434409217777142, "grad_norm": 0.0, "learning_rate": 8.957346310742147e-06, "loss": 0.0, "step": 14580 }, { "epoch": 0.10441565877048593, "grad_norm": 0.0, "learning_rate": 8.956630644815e-06, "loss": 0.0, "step": 14590 }, { "epoch": 0.10448722536320046, "grad_norm": 0.0, "learning_rate": 8.955914978887856e-06, "loss": 0.0, "step": 14600 }, { "epoch": 0.10455879195591498, "grad_norm": 0.0, "learning_rate": 8.955199312960711e-06, "loss": 0.0001, "step": 14610 }, { "epoch": 0.1046303585486295, "grad_norm": 0.0, "learning_rate": 8.954483647033565e-06, "loss": 0.0001, "step": 14620 }, { "epoch": 0.10470192514134402, "grad_norm": 0.0, "learning_rate": 8.95376798110642e-06, "loss": 0.0, "step": 14630 }, { "epoch": 0.10477349173405855, "grad_norm": 4.843365672968503e-07, "learning_rate": 8.953052315179275e-06, "loss": 0.052, "step": 14640 }, { "epoch": 0.10484505832677306, "grad_norm": 4.145758339291206e-06, "learning_rate": 8.95233664925213e-06, "loss": 0.0, "step": 14650 }, { "epoch": 0.10491662491948758, "grad_norm": 0.0, "learning_rate": 8.951620983324984e-06, "loss": 0.0, "step": 14660 }, { "epoch": 0.10498819151220211, "grad_norm": 4.553241395033325e-10, "learning_rate": 8.95090531739784e-06, "loss": 0.0, "step": 14670 }, { "epoch": 0.10505975810491662, "grad_norm": 0.00017902717809192836, "learning_rate": 8.950189651470695e-06, "loss": 0.0, "step": 14680 }, { "epoch": 0.10513132469763115, "grad_norm": 0.0, "learning_rate": 8.949473985543548e-06, "loss": 0.0, "step": 14690 }, { "epoch": 0.10520289129034567, "grad_norm": 0.0, "learning_rate": 8.948758319616404e-06, "loss": 0.0005, "step": 14700 }, { "epoch": 0.10527445788306018, "grad_norm": 315.55487060546875, "learning_rate": 8.948042653689259e-06, "loss": 0.0523, "step": 14710 }, { "epoch": 0.10534602447577471, "grad_norm": 0.0, "learning_rate": 8.947326987762114e-06, "loss": 0.0, "step": 14720 }, { "epoch": 0.10541759106848923, "grad_norm": 0.0, "learning_rate": 8.946611321834968e-06, "loss": 0.0, "step": 14730 }, { "epoch": 0.10548915766120374, "grad_norm": 1.2946142824432627e-08, "learning_rate": 8.945895655907823e-06, "loss": 0.0, "step": 14740 }, { "epoch": 0.10556072425391827, "grad_norm": 4.227726549288491e-06, "learning_rate": 8.945179989980678e-06, "loss": 0.0007, "step": 14750 }, { "epoch": 0.1056322908466328, "grad_norm": 0.0, "learning_rate": 8.944464324053532e-06, "loss": 0.0349, "step": 14760 }, { "epoch": 0.1057038574393473, "grad_norm": 1.1413973766138952e-08, "learning_rate": 8.943748658126387e-06, "loss": 0.0, "step": 14770 }, { "epoch": 0.10577542403206183, "grad_norm": 0.0, "learning_rate": 8.943032992199242e-06, "loss": 0.0, "step": 14780 }, { "epoch": 0.10584699062477636, "grad_norm": 0.0, "learning_rate": 8.942317326272096e-06, "loss": 0.0002, "step": 14790 }, { "epoch": 0.10591855721749087, "grad_norm": 0.0, "learning_rate": 8.941601660344951e-06, "loss": 0.0, "step": 14800 }, { "epoch": 0.1059901238102054, "grad_norm": 0.0, "learning_rate": 8.940885994417807e-06, "loss": 0.5645, "step": 14810 }, { "epoch": 0.10606169040291992, "grad_norm": 0.0, "learning_rate": 8.940170328490662e-06, "loss": 0.0011, "step": 14820 }, { "epoch": 0.10613325699563443, "grad_norm": 3.3769873564182262e-09, "learning_rate": 8.939454662563516e-06, "loss": 0.0, "step": 14830 }, { "epoch": 0.10620482358834896, "grad_norm": 0.0, "learning_rate": 8.93873899663637e-06, "loss": 0.0, "step": 14840 }, { "epoch": 0.10627639018106348, "grad_norm": 0.0, "learning_rate": 8.938023330709226e-06, "loss": 0.0, "step": 14850 }, { "epoch": 0.106347956773778, "grad_norm": 0.000496312917675823, "learning_rate": 8.93730766478208e-06, "loss": 0.0, "step": 14860 }, { "epoch": 0.10641952336649252, "grad_norm": 0.0, "learning_rate": 8.936591998854935e-06, "loss": 0.0032, "step": 14870 }, { "epoch": 0.10649108995920704, "grad_norm": 2.69346012160554e-09, "learning_rate": 8.93587633292779e-06, "loss": 0.0, "step": 14880 }, { "epoch": 0.10656265655192157, "grad_norm": 0.0, "learning_rate": 8.935160667000646e-06, "loss": 0.0123, "step": 14890 }, { "epoch": 0.10663422314463608, "grad_norm": 1.0186454346694518e-05, "learning_rate": 8.9344450010735e-06, "loss": 0.0, "step": 14900 }, { "epoch": 0.1067057897373506, "grad_norm": 2.174061410187278e-05, "learning_rate": 8.933729335146354e-06, "loss": 0.0, "step": 14910 }, { "epoch": 0.10677735633006513, "grad_norm": 4.6337146386576933e-07, "learning_rate": 8.93301366921921e-06, "loss": 0.0, "step": 14920 }, { "epoch": 0.10684892292277964, "grad_norm": 0.0, "learning_rate": 8.932298003292063e-06, "loss": 0.0, "step": 14930 }, { "epoch": 0.10692048951549417, "grad_norm": 0.0, "learning_rate": 8.931582337364919e-06, "loss": 0.0, "step": 14940 }, { "epoch": 0.1069920561082087, "grad_norm": 4.430999069882091e-07, "learning_rate": 8.930866671437774e-06, "loss": 0.0, "step": 14950 }, { "epoch": 0.1070636227009232, "grad_norm": 0.0, "learning_rate": 8.93015100551063e-06, "loss": 0.0, "step": 14960 }, { "epoch": 0.10713518929363773, "grad_norm": 7.556113246209861e-07, "learning_rate": 8.929435339583483e-06, "loss": 0.0, "step": 14970 }, { "epoch": 0.10720675588635226, "grad_norm": 0.0, "learning_rate": 8.928719673656338e-06, "loss": 0.0002, "step": 14980 }, { "epoch": 0.10727832247906677, "grad_norm": 352.8498840332031, "learning_rate": 8.928004007729193e-06, "loss": 0.0725, "step": 14990 }, { "epoch": 0.1073498890717813, "grad_norm": 0.0, "learning_rate": 8.927288341802047e-06, "loss": 0.0, "step": 15000 }, { "epoch": 0.10742145566449582, "grad_norm": 0.0, "learning_rate": 8.926572675874902e-06, "loss": 0.0006, "step": 15010 }, { "epoch": 0.10749302225721033, "grad_norm": 0.7497009634971619, "learning_rate": 8.925857009947758e-06, "loss": 0.0001, "step": 15020 }, { "epoch": 0.10756458884992486, "grad_norm": 24.292097091674805, "learning_rate": 8.925141344020611e-06, "loss": 0.0059, "step": 15030 }, { "epoch": 0.10763615544263938, "grad_norm": 0.9422528147697449, "learning_rate": 8.924425678093467e-06, "loss": 0.0012, "step": 15040 }, { "epoch": 0.10770772203535389, "grad_norm": 0.0, "learning_rate": 8.923710012166322e-06, "loss": 0.0001, "step": 15050 }, { "epoch": 0.10777928862806842, "grad_norm": 0.0, "learning_rate": 8.922994346239177e-06, "loss": 0.0807, "step": 15060 }, { "epoch": 0.10785085522078294, "grad_norm": 0.22944965958595276, "learning_rate": 8.92227868031203e-06, "loss": 0.0, "step": 15070 }, { "epoch": 0.10792242181349745, "grad_norm": 0.05236121639609337, "learning_rate": 8.921563014384886e-06, "loss": 0.0, "step": 15080 }, { "epoch": 0.10799398840621198, "grad_norm": 0.0, "learning_rate": 8.920847348457741e-06, "loss": 0.0, "step": 15090 }, { "epoch": 0.1080655549989265, "grad_norm": 2.2694716790283564e-06, "learning_rate": 8.920131682530595e-06, "loss": 0.0, "step": 15100 }, { "epoch": 0.10813712159164102, "grad_norm": 0.0, "learning_rate": 8.91941601660345e-06, "loss": 0.0009, "step": 15110 }, { "epoch": 0.10820868818435554, "grad_norm": 0.0, "learning_rate": 8.918771917269018e-06, "loss": 0.3049, "step": 15120 }, { "epoch": 0.10828025477707007, "grad_norm": 0.41133934259414673, "learning_rate": 8.918056251341875e-06, "loss": 0.0001, "step": 15130 }, { "epoch": 0.10835182136978458, "grad_norm": 0.022135550156235695, "learning_rate": 8.917340585414729e-06, "loss": 0.0, "step": 15140 }, { "epoch": 0.1084233879624991, "grad_norm": 0.0, "learning_rate": 8.916624919487584e-06, "loss": 0.0, "step": 15150 }, { "epoch": 0.10849495455521363, "grad_norm": 0.0, "learning_rate": 8.91590925356044e-06, "loss": 0.0, "step": 15160 }, { "epoch": 0.10856652114792814, "grad_norm": 0.0, "learning_rate": 8.915193587633295e-06, "loss": 0.4648, "step": 15170 }, { "epoch": 0.10863808774064267, "grad_norm": 0.0, "learning_rate": 8.914477921706148e-06, "loss": 0.0001, "step": 15180 }, { "epoch": 0.10870965433335719, "grad_norm": 0.0, "learning_rate": 8.913762255779002e-06, "loss": 0.0, "step": 15190 }, { "epoch": 0.1087812209260717, "grad_norm": 0.0, "learning_rate": 8.913046589851859e-06, "loss": 0.0, "step": 15200 }, { "epoch": 0.10885278751878623, "grad_norm": 0.0, "learning_rate": 8.912330923924712e-06, "loss": 0.0, "step": 15210 }, { "epoch": 0.10892435411150075, "grad_norm": 0.25300005078315735, "learning_rate": 8.911615257997568e-06, "loss": 0.1689, "step": 15220 }, { "epoch": 0.10899592070421527, "grad_norm": 0.0, "learning_rate": 8.910899592070423e-06, "loss": 0.0, "step": 15230 }, { "epoch": 0.10906748729692979, "grad_norm": 0.0, "learning_rate": 8.910183926143277e-06, "loss": 0.0, "step": 15240 }, { "epoch": 0.10913905388964432, "grad_norm": 0.9112673401832581, "learning_rate": 8.909468260216132e-06, "loss": 0.0001, "step": 15250 }, { "epoch": 0.10921062048235884, "grad_norm": 0.0, "learning_rate": 8.908752594288986e-06, "loss": 0.7586, "step": 15260 }, { "epoch": 0.10928218707507335, "grad_norm": 5.99381792198983e-07, "learning_rate": 8.908036928361843e-06, "loss": 0.0, "step": 15270 }, { "epoch": 0.10935375366778788, "grad_norm": 0.0025609470903873444, "learning_rate": 8.907321262434696e-06, "loss": 0.0001, "step": 15280 }, { "epoch": 0.1094253202605024, "grad_norm": 0.0, "learning_rate": 8.906605596507551e-06, "loss": 0.0082, "step": 15290 }, { "epoch": 0.10949688685321692, "grad_norm": 1.6955922546912916e-05, "learning_rate": 8.905889930580405e-06, "loss": 0.0921, "step": 15300 }, { "epoch": 0.10956845344593144, "grad_norm": 0.0, "learning_rate": 8.90517426465326e-06, "loss": 0.0, "step": 15310 }, { "epoch": 0.10964002003864597, "grad_norm": 0.0, "learning_rate": 8.904458598726116e-06, "loss": 0.0, "step": 15320 }, { "epoch": 0.10971158663136048, "grad_norm": 0.0, "learning_rate": 8.90374293279897e-06, "loss": 0.0, "step": 15330 }, { "epoch": 0.109783153224075, "grad_norm": 0.0, "learning_rate": 8.903027266871826e-06, "loss": 0.0, "step": 15340 }, { "epoch": 0.10985471981678953, "grad_norm": 1.9556931874831207e-05, "learning_rate": 8.90231160094468e-06, "loss": 0.0, "step": 15350 }, { "epoch": 0.10992628640950404, "grad_norm": 0.00031349496566690505, "learning_rate": 8.901595935017533e-06, "loss": 0.0, "step": 15360 }, { "epoch": 0.10999785300221857, "grad_norm": 0.0, "learning_rate": 8.900880269090389e-06, "loss": 0.001, "step": 15370 }, { "epoch": 0.11006941959493309, "grad_norm": 574.3016967773438, "learning_rate": 8.900164603163244e-06, "loss": 0.2119, "step": 15380 }, { "epoch": 0.1101409861876476, "grad_norm": 0.0, "learning_rate": 8.8994489372361e-06, "loss": 0.0009, "step": 15390 }, { "epoch": 0.11021255278036213, "grad_norm": 0.06500844657421112, "learning_rate": 8.898733271308953e-06, "loss": 0.0, "step": 15400 }, { "epoch": 0.11028411937307665, "grad_norm": 0.0, "learning_rate": 8.89801760538181e-06, "loss": 0.0, "step": 15410 }, { "epoch": 0.11035568596579116, "grad_norm": 1.2447944754967466e-05, "learning_rate": 8.897301939454663e-06, "loss": 0.0, "step": 15420 }, { "epoch": 0.11042725255850569, "grad_norm": 0.0012750796740874648, "learning_rate": 8.896586273527517e-06, "loss": 0.0, "step": 15430 }, { "epoch": 0.11049881915122022, "grad_norm": 0.0, "learning_rate": 8.895870607600372e-06, "loss": 0.0, "step": 15440 }, { "epoch": 0.11057038574393473, "grad_norm": 1.466172218322754, "learning_rate": 8.895154941673228e-06, "loss": 0.0003, "step": 15450 }, { "epoch": 0.11064195233664925, "grad_norm": 3.3950169608942815e-07, "learning_rate": 8.894439275746083e-06, "loss": 0.0, "step": 15460 }, { "epoch": 0.11071351892936378, "grad_norm": 0.8369330763816833, "learning_rate": 8.893723609818937e-06, "loss": 0.0003, "step": 15470 }, { "epoch": 0.11078508552207829, "grad_norm": 0.0, "learning_rate": 8.893007943891792e-06, "loss": 0.0004, "step": 15480 }, { "epoch": 0.11085665211479281, "grad_norm": 0.0, "learning_rate": 8.892292277964647e-06, "loss": 0.0, "step": 15490 }, { "epoch": 0.11092821870750734, "grad_norm": 8.043379783630371, "learning_rate": 8.8915766120375e-06, "loss": 0.339, "step": 15500 }, { "epoch": 0.11099978530022185, "grad_norm": 0.0, "learning_rate": 8.890860946110356e-06, "loss": 0.0, "step": 15510 }, { "epoch": 0.11107135189293638, "grad_norm": 8.169274678948568e-08, "learning_rate": 8.890145280183211e-06, "loss": 0.0, "step": 15520 }, { "epoch": 0.1111429184856509, "grad_norm": 0.0, "learning_rate": 8.889429614256067e-06, "loss": 0.0038, "step": 15530 }, { "epoch": 0.11121448507836541, "grad_norm": 0.0, "learning_rate": 8.88871394832892e-06, "loss": 0.0, "step": 15540 }, { "epoch": 0.11128605167107994, "grad_norm": 0.0, "learning_rate": 8.887998282401775e-06, "loss": 0.0041, "step": 15550 }, { "epoch": 0.11135761826379446, "grad_norm": 0.0, "learning_rate": 8.88728261647463e-06, "loss": 0.0, "step": 15560 }, { "epoch": 0.11142918485650898, "grad_norm": 0.0, "learning_rate": 8.886566950547484e-06, "loss": 0.0, "step": 15570 }, { "epoch": 0.1115007514492235, "grad_norm": 6.213120968823205e-07, "learning_rate": 8.88585128462034e-06, "loss": 0.0, "step": 15580 }, { "epoch": 0.11157231804193803, "grad_norm": 4.81569173427232e-10, "learning_rate": 8.885135618693195e-06, "loss": 0.4309, "step": 15590 }, { "epoch": 0.11164388463465254, "grad_norm": 4.188527580328838e-10, "learning_rate": 8.884419952766049e-06, "loss": 0.0, "step": 15600 }, { "epoch": 0.11171545122736706, "grad_norm": 4.5830031436544516e-10, "learning_rate": 8.883704286838904e-06, "loss": 0.0, "step": 15610 }, { "epoch": 0.11178701782008159, "grad_norm": 0.0, "learning_rate": 8.882988620911759e-06, "loss": 0.0056, "step": 15620 }, { "epoch": 0.1118585844127961, "grad_norm": 0.0017118433024734259, "learning_rate": 8.882272954984614e-06, "loss": 0.0, "step": 15630 }, { "epoch": 0.11193015100551063, "grad_norm": 0.0, "learning_rate": 8.881557289057468e-06, "loss": 0.0, "step": 15640 }, { "epoch": 0.11200171759822515, "grad_norm": 0.0, "learning_rate": 8.880841623130323e-06, "loss": 0.0, "step": 15650 }, { "epoch": 0.11207328419093968, "grad_norm": 33.40262222290039, "learning_rate": 8.880125957203179e-06, "loss": 0.0106, "step": 15660 }, { "epoch": 0.11214485078365419, "grad_norm": 48.82555389404297, "learning_rate": 8.879410291276032e-06, "loss": 0.0143, "step": 15670 }, { "epoch": 0.11221641737636871, "grad_norm": 2.9602391649063975e-08, "learning_rate": 8.878694625348887e-06, "loss": 0.0, "step": 15680 }, { "epoch": 0.11228798396908324, "grad_norm": 0.0027720010839402676, "learning_rate": 8.877978959421743e-06, "loss": 0.0, "step": 15690 }, { "epoch": 0.11235955056179775, "grad_norm": 0.0, "learning_rate": 8.877263293494598e-06, "loss": 0.0002, "step": 15700 }, { "epoch": 0.11243111715451228, "grad_norm": 2.1752031287292084e-09, "learning_rate": 8.876547627567452e-06, "loss": 0.0, "step": 15710 }, { "epoch": 0.1125026837472268, "grad_norm": 0.0, "learning_rate": 8.875831961640307e-06, "loss": 0.0, "step": 15720 }, { "epoch": 0.11257425033994131, "grad_norm": 0.0, "learning_rate": 8.875116295713162e-06, "loss": 0.0002, "step": 15730 }, { "epoch": 0.11264581693265584, "grad_norm": 0.0, "learning_rate": 8.874400629786016e-06, "loss": 0.0004, "step": 15740 }, { "epoch": 0.11271738352537036, "grad_norm": 0.0, "learning_rate": 8.873684963858871e-06, "loss": 0.0, "step": 15750 }, { "epoch": 0.11278895011808487, "grad_norm": 0.0, "learning_rate": 8.872969297931726e-06, "loss": 0.0, "step": 15760 }, { "epoch": 0.1128605167107994, "grad_norm": 5.108190634928178e-06, "learning_rate": 8.87225363200458e-06, "loss": 0.0, "step": 15770 }, { "epoch": 0.11293208330351392, "grad_norm": 0.0, "learning_rate": 8.871537966077435e-06, "loss": 0.0001, "step": 15780 }, { "epoch": 0.11300364989622844, "grad_norm": 0.0, "learning_rate": 8.87082230015029e-06, "loss": 0.0705, "step": 15790 }, { "epoch": 0.11307521648894296, "grad_norm": 0.0, "learning_rate": 8.870106634223146e-06, "loss": 0.0011, "step": 15800 }, { "epoch": 0.11314678308165749, "grad_norm": 0.0, "learning_rate": 8.869390968296e-06, "loss": 0.0, "step": 15810 }, { "epoch": 0.113218349674372, "grad_norm": 0.013099201954901218, "learning_rate": 8.868675302368855e-06, "loss": 0.0, "step": 15820 }, { "epoch": 0.11328991626708652, "grad_norm": 1.2895647216737416e-07, "learning_rate": 8.86795963644171e-06, "loss": 0.0, "step": 15830 }, { "epoch": 0.11336148285980105, "grad_norm": 0.0, "learning_rate": 8.867243970514564e-06, "loss": 0.0, "step": 15840 }, { "epoch": 0.11343304945251556, "grad_norm": 4.620816784761672e-10, "learning_rate": 8.866528304587419e-06, "loss": 0.0, "step": 15850 }, { "epoch": 0.11350461604523009, "grad_norm": 0.0011700012255460024, "learning_rate": 8.865812638660274e-06, "loss": 0.0001, "step": 15860 }, { "epoch": 0.11357618263794461, "grad_norm": 0.01342043373733759, "learning_rate": 8.86509697273313e-06, "loss": 0.0, "step": 15870 }, { "epoch": 0.11364774923065912, "grad_norm": 0.0, "learning_rate": 8.864381306805983e-06, "loss": 0.0, "step": 15880 }, { "epoch": 0.11371931582337365, "grad_norm": 0.0, "learning_rate": 8.863665640878838e-06, "loss": 0.0002, "step": 15890 }, { "epoch": 0.11379088241608817, "grad_norm": 0.0, "learning_rate": 8.862949974951694e-06, "loss": 0.0004, "step": 15900 }, { "epoch": 0.11386244900880269, "grad_norm": 974.4373779296875, "learning_rate": 8.862234309024547e-06, "loss": 1.3781, "step": 15910 }, { "epoch": 0.11393401560151721, "grad_norm": 1.1278351545333862, "learning_rate": 8.861518643097403e-06, "loss": 0.0001, "step": 15920 }, { "epoch": 0.11400558219423174, "grad_norm": 0.0, "learning_rate": 8.860802977170258e-06, "loss": 0.0, "step": 15930 }, { "epoch": 0.11407714878694625, "grad_norm": 4.7346696874228655e-07, "learning_rate": 8.860087311243113e-06, "loss": 0.0, "step": 15940 }, { "epoch": 0.11414871537966077, "grad_norm": 0.0, "learning_rate": 8.859371645315967e-06, "loss": 0.0, "step": 15950 }, { "epoch": 0.1142202819723753, "grad_norm": 9.694637315149635e-10, "learning_rate": 8.858655979388822e-06, "loss": 0.0, "step": 15960 }, { "epoch": 0.11429184856508981, "grad_norm": 2.5572688173269853e-05, "learning_rate": 8.857940313461677e-06, "loss": 0.0, "step": 15970 }, { "epoch": 0.11436341515780434, "grad_norm": 2.086399319978227e-08, "learning_rate": 8.857224647534531e-06, "loss": 0.0, "step": 15980 }, { "epoch": 0.11443498175051886, "grad_norm": 1.0985303333654883e-07, "learning_rate": 8.856508981607386e-06, "loss": 0.0, "step": 15990 }, { "epoch": 0.11450654834323337, "grad_norm": 0.0, "learning_rate": 8.855793315680242e-06, "loss": 0.0, "step": 16000 }, { "epoch": 0.1145781149359479, "grad_norm": 2.2002956029609777e-05, "learning_rate": 8.855077649753095e-06, "loss": 0.0, "step": 16010 }, { "epoch": 0.11464968152866242, "grad_norm": 0.0, "learning_rate": 8.85436198382595e-06, "loss": 0.0, "step": 16020 }, { "epoch": 0.11472124812137695, "grad_norm": 8.39488848214387e-07, "learning_rate": 8.853646317898806e-06, "loss": 0.0003, "step": 16030 }, { "epoch": 0.11479281471409146, "grad_norm": 0.0, "learning_rate": 8.852930651971661e-06, "loss": 0.0001, "step": 16040 }, { "epoch": 0.11486438130680598, "grad_norm": 0.0, "learning_rate": 8.852214986044515e-06, "loss": 0.0021, "step": 16050 }, { "epoch": 0.11493594789952051, "grad_norm": 0.0, "learning_rate": 8.85149932011737e-06, "loss": 0.0, "step": 16060 }, { "epoch": 0.11500751449223502, "grad_norm": 0.0048495070077478886, "learning_rate": 8.850783654190225e-06, "loss": 0.0085, "step": 16070 }, { "epoch": 0.11507908108494955, "grad_norm": 0.0, "learning_rate": 8.850067988263079e-06, "loss": 0.0004, "step": 16080 }, { "epoch": 0.11515064767766407, "grad_norm": 0.0, "learning_rate": 8.849352322335934e-06, "loss": 0.0, "step": 16090 }, { "epoch": 0.11522221427037858, "grad_norm": 7.136506610549986e-05, "learning_rate": 8.84863665640879e-06, "loss": 0.0, "step": 16100 }, { "epoch": 0.11529378086309311, "grad_norm": 2.2287051677703857, "learning_rate": 8.847920990481645e-06, "loss": 0.0006, "step": 16110 }, { "epoch": 0.11536534745580763, "grad_norm": 0.0, "learning_rate": 8.847205324554498e-06, "loss": 0.0016, "step": 16120 }, { "epoch": 0.11543691404852215, "grad_norm": 0.0026080282405018806, "learning_rate": 8.846489658627354e-06, "loss": 0.0936, "step": 16130 }, { "epoch": 0.11550848064123667, "grad_norm": 8.414676813117694e-06, "learning_rate": 8.845773992700209e-06, "loss": 0.0371, "step": 16140 }, { "epoch": 0.1155800472339512, "grad_norm": 1.7623061410176888e-07, "learning_rate": 8.845058326773062e-06, "loss": 0.0, "step": 16150 }, { "epoch": 0.11565161382666571, "grad_norm": 0.000546999042853713, "learning_rate": 8.844342660845918e-06, "loss": 0.0, "step": 16160 }, { "epoch": 0.11572318041938023, "grad_norm": 0.0, "learning_rate": 8.843626994918773e-06, "loss": 0.0001, "step": 16170 }, { "epoch": 0.11579474701209476, "grad_norm": 0.0, "learning_rate": 8.842911328991628e-06, "loss": 0.0774, "step": 16180 }, { "epoch": 0.11586631360480927, "grad_norm": 0.0, "learning_rate": 8.842195663064482e-06, "loss": 0.0, "step": 16190 }, { "epoch": 0.1159378801975238, "grad_norm": 0.0, "learning_rate": 8.841479997137337e-06, "loss": 0.0, "step": 16200 }, { "epoch": 0.11600944679023832, "grad_norm": 0.0, "learning_rate": 8.840764331210193e-06, "loss": 0.0, "step": 16210 }, { "epoch": 0.11608101338295283, "grad_norm": 0.0, "learning_rate": 8.840048665283046e-06, "loss": 0.0, "step": 16220 }, { "epoch": 0.11615257997566736, "grad_norm": 0.0, "learning_rate": 8.839332999355901e-06, "loss": 0.0, "step": 16230 }, { "epoch": 0.11622414656838188, "grad_norm": 3.2331058719137218e-06, "learning_rate": 8.838617333428757e-06, "loss": 0.0, "step": 16240 }, { "epoch": 0.1162957131610964, "grad_norm": 0.0, "learning_rate": 8.83790166750161e-06, "loss": 0.0, "step": 16250 }, { "epoch": 0.11636727975381092, "grad_norm": 0.0, "learning_rate": 8.837186001574466e-06, "loss": 0.0, "step": 16260 }, { "epoch": 0.11643884634652545, "grad_norm": 0.0, "learning_rate": 8.836470335647321e-06, "loss": 0.0001, "step": 16270 }, { "epoch": 0.11651041293923996, "grad_norm": 0.0, "learning_rate": 8.835754669720176e-06, "loss": 0.0, "step": 16280 }, { "epoch": 0.11658197953195448, "grad_norm": 0.0, "learning_rate": 8.83503900379303e-06, "loss": 0.6785, "step": 16290 }, { "epoch": 0.11665354612466901, "grad_norm": 0.0, "learning_rate": 8.834323337865885e-06, "loss": 0.0, "step": 16300 }, { "epoch": 0.11672511271738352, "grad_norm": 0.0, "learning_rate": 8.83360767193874e-06, "loss": 0.0, "step": 16310 }, { "epoch": 0.11679667931009804, "grad_norm": 0.0, "learning_rate": 8.832892006011594e-06, "loss": 0.0, "step": 16320 }, { "epoch": 0.11686824590281257, "grad_norm": 0.0, "learning_rate": 8.83217634008445e-06, "loss": 0.2666, "step": 16330 }, { "epoch": 0.11693981249552708, "grad_norm": 0.0, "learning_rate": 8.831460674157305e-06, "loss": 0.0005, "step": 16340 }, { "epoch": 0.11701137908824161, "grad_norm": 1.493990282597224e-07, "learning_rate": 8.83074500823016e-06, "loss": 0.0001, "step": 16350 }, { "epoch": 0.11708294568095613, "grad_norm": 0.0, "learning_rate": 8.830029342303013e-06, "loss": 0.0, "step": 16360 }, { "epoch": 0.11715451227367064, "grad_norm": 0.0, "learning_rate": 8.829313676375869e-06, "loss": 0.0002, "step": 16370 }, { "epoch": 0.11722607886638517, "grad_norm": 3.2810969941010626e-08, "learning_rate": 8.828598010448724e-06, "loss": 0.0, "step": 16380 }, { "epoch": 0.1172976454590997, "grad_norm": 0.0, "learning_rate": 8.827882344521578e-06, "loss": 0.0002, "step": 16390 }, { "epoch": 0.1173692120518142, "grad_norm": 0.016765182837843895, "learning_rate": 8.827166678594433e-06, "loss": 0.0, "step": 16400 }, { "epoch": 0.11744077864452873, "grad_norm": 0.0, "learning_rate": 8.826451012667288e-06, "loss": 0.0, "step": 16410 }, { "epoch": 0.11751234523724326, "grad_norm": 8.830440492602065e-05, "learning_rate": 8.825735346740142e-06, "loss": 0.0, "step": 16420 }, { "epoch": 0.11758391182995778, "grad_norm": 0.0, "learning_rate": 8.825019680812997e-06, "loss": 0.0, "step": 16430 }, { "epoch": 0.1176554784226723, "grad_norm": 0.0, "learning_rate": 8.824304014885852e-06, "loss": 0.0305, "step": 16440 }, { "epoch": 0.11772704501538682, "grad_norm": 0.0, "learning_rate": 8.823588348958708e-06, "loss": 0.0, "step": 16450 }, { "epoch": 0.11779861160810134, "grad_norm": 1.350858314097536e-09, "learning_rate": 8.822872683031561e-06, "loss": 0.0, "step": 16460 }, { "epoch": 0.11787017820081586, "grad_norm": 0.0, "learning_rate": 8.822157017104417e-06, "loss": 0.0, "step": 16470 }, { "epoch": 0.11794174479353038, "grad_norm": 0.0, "learning_rate": 8.821441351177272e-06, "loss": 0.0, "step": 16480 }, { "epoch": 0.1180133113862449, "grad_norm": 0.0, "learning_rate": 8.820725685250125e-06, "loss": 0.0, "step": 16490 }, { "epoch": 0.11808487797895942, "grad_norm": 0.0, "learning_rate": 8.82001001932298e-06, "loss": 0.0, "step": 16500 }, { "epoch": 0.11815644457167394, "grad_norm": 0.029335374012589455, "learning_rate": 8.819294353395836e-06, "loss": 0.0, "step": 16510 }, { "epoch": 0.11822801116438847, "grad_norm": 0.0, "learning_rate": 8.818578687468691e-06, "loss": 0.0, "step": 16520 }, { "epoch": 0.11829957775710298, "grad_norm": 0.00019929837435483932, "learning_rate": 8.817863021541545e-06, "loss": 0.0, "step": 16530 }, { "epoch": 0.1183711443498175, "grad_norm": 0.0, "learning_rate": 8.8171473556144e-06, "loss": 0.0, "step": 16540 }, { "epoch": 0.11844271094253203, "grad_norm": 0.0, "learning_rate": 8.816431689687255e-06, "loss": 0.0, "step": 16550 }, { "epoch": 0.11851427753524654, "grad_norm": 0.0, "learning_rate": 8.815716023760109e-06, "loss": 0.0, "step": 16560 }, { "epoch": 0.11858584412796107, "grad_norm": 0.0008251478429883718, "learning_rate": 8.815000357832964e-06, "loss": 0.0, "step": 16570 }, { "epoch": 0.1186574107206756, "grad_norm": 0.0, "learning_rate": 8.81428469190582e-06, "loss": 0.0, "step": 16580 }, { "epoch": 0.1187289773133901, "grad_norm": 0.0, "learning_rate": 8.813569025978675e-06, "loss": 0.0, "step": 16590 }, { "epoch": 0.11880054390610463, "grad_norm": 0.0, "learning_rate": 8.812853360051529e-06, "loss": 0.0, "step": 16600 }, { "epoch": 0.11887211049881916, "grad_norm": 0.0, "learning_rate": 8.812137694124384e-06, "loss": 0.0, "step": 16610 }, { "epoch": 0.11894367709153367, "grad_norm": 0.0, "learning_rate": 8.811422028197239e-06, "loss": 0.0002, "step": 16620 }, { "epoch": 0.11901524368424819, "grad_norm": 0.0, "learning_rate": 8.810706362270093e-06, "loss": 0.0, "step": 16630 }, { "epoch": 0.11908681027696272, "grad_norm": 3.242132606828818e-08, "learning_rate": 8.809990696342948e-06, "loss": 0.0007, "step": 16640 }, { "epoch": 0.11915837686967723, "grad_norm": 0.0, "learning_rate": 8.809275030415803e-06, "loss": 0.0, "step": 16650 }, { "epoch": 0.11922994346239175, "grad_norm": 0.0, "learning_rate": 8.808559364488657e-06, "loss": 0.0, "step": 16660 }, { "epoch": 0.11930151005510628, "grad_norm": 8.019150845939294e-05, "learning_rate": 8.807843698561512e-06, "loss": 0.0, "step": 16670 }, { "epoch": 0.11937307664782079, "grad_norm": 0.0, "learning_rate": 8.807128032634368e-06, "loss": 0.1472, "step": 16680 }, { "epoch": 0.11944464324053532, "grad_norm": 0.0, "learning_rate": 8.806412366707223e-06, "loss": 0.0, "step": 16690 }, { "epoch": 0.11951620983324984, "grad_norm": 0.0, "learning_rate": 8.805696700780076e-06, "loss": 0.6609, "step": 16700 }, { "epoch": 0.11958777642596435, "grad_norm": 5.1209454454692604e-09, "learning_rate": 8.804981034852932e-06, "loss": 0.0001, "step": 16710 }, { "epoch": 0.11965934301867888, "grad_norm": 0.0, "learning_rate": 8.804265368925787e-06, "loss": 0.0056, "step": 16720 }, { "epoch": 0.1197309096113934, "grad_norm": 0.0, "learning_rate": 8.80354970299864e-06, "loss": 0.0, "step": 16730 }, { "epoch": 0.11980247620410792, "grad_norm": 0.0, "learning_rate": 8.802834037071496e-06, "loss": 0.0, "step": 16740 }, { "epoch": 0.11987404279682244, "grad_norm": 8.459078060241154e-08, "learning_rate": 8.80211837114435e-06, "loss": 0.2086, "step": 16750 }, { "epoch": 0.11994560938953697, "grad_norm": 0.0, "learning_rate": 8.801402705217206e-06, "loss": 0.0, "step": 16760 }, { "epoch": 0.12001717598225148, "grad_norm": 0.018842341378331184, "learning_rate": 8.80068703929006e-06, "loss": 0.1245, "step": 16770 }, { "epoch": 0.120088742574966, "grad_norm": 0.0, "learning_rate": 8.799971373362914e-06, "loss": 0.0, "step": 16780 }, { "epoch": 0.12016030916768053, "grad_norm": 0.0, "learning_rate": 8.79925570743577e-06, "loss": 0.2726, "step": 16790 }, { "epoch": 0.12023187576039505, "grad_norm": 0.1409364640712738, "learning_rate": 8.798540041508624e-06, "loss": 0.0, "step": 16800 }, { "epoch": 0.12030344235310957, "grad_norm": 0.0, "learning_rate": 8.79782437558148e-06, "loss": 0.0, "step": 16810 }, { "epoch": 0.12037500894582409, "grad_norm": 4.93340979179635e-10, "learning_rate": 8.797108709654333e-06, "loss": 0.0, "step": 16820 }, { "epoch": 0.12044657553853862, "grad_norm": 0.0, "learning_rate": 8.79639304372719e-06, "loss": 0.0, "step": 16830 }, { "epoch": 0.12051814213125313, "grad_norm": 0.0, "learning_rate": 8.795677377800044e-06, "loss": 0.0, "step": 16840 }, { "epoch": 0.12058970872396765, "grad_norm": 9.751184961714898e-07, "learning_rate": 8.794961711872897e-06, "loss": 0.0, "step": 16850 }, { "epoch": 0.12066127531668218, "grad_norm": 0.00047958793584257364, "learning_rate": 8.794246045945754e-06, "loss": 0.0, "step": 16860 }, { "epoch": 0.12073284190939669, "grad_norm": 0.0, "learning_rate": 8.793530380018608e-06, "loss": 0.0, "step": 16870 }, { "epoch": 0.12080440850211122, "grad_norm": 6.867123011033982e-05, "learning_rate": 8.792814714091463e-06, "loss": 0.0225, "step": 16880 }, { "epoch": 0.12087597509482574, "grad_norm": 0.0, "learning_rate": 8.792099048164317e-06, "loss": 0.0, "step": 16890 }, { "epoch": 0.12094754168754025, "grad_norm": 0.0, "learning_rate": 8.791383382237172e-06, "loss": 0.0, "step": 16900 }, { "epoch": 0.12101910828025478, "grad_norm": 0.0, "learning_rate": 8.790667716310027e-06, "loss": 0.0, "step": 16910 }, { "epoch": 0.1210906748729693, "grad_norm": 683.684326171875, "learning_rate": 8.789952050382881e-06, "loss": 0.4007, "step": 16920 }, { "epoch": 0.12116224146568381, "grad_norm": 0.0004949072026647627, "learning_rate": 8.789236384455738e-06, "loss": 0.0, "step": 16930 }, { "epoch": 0.12123380805839834, "grad_norm": 4.6505299611254713e-10, "learning_rate": 8.788520718528592e-06, "loss": 0.0, "step": 16940 }, { "epoch": 0.12130537465111287, "grad_norm": 0.0, "learning_rate": 8.787805052601447e-06, "loss": 0.0, "step": 16950 }, { "epoch": 0.12137694124382738, "grad_norm": 0.0, "learning_rate": 8.7870893866743e-06, "loss": 0.0, "step": 16960 }, { "epoch": 0.1214485078365419, "grad_norm": 0.0, "learning_rate": 8.786373720747156e-06, "loss": 0.2902, "step": 16970 }, { "epoch": 0.12152007442925643, "grad_norm": 0.0, "learning_rate": 8.785658054820011e-06, "loss": 0.0, "step": 16980 }, { "epoch": 0.12159164102197094, "grad_norm": 1.8156358638776737e-09, "learning_rate": 8.784942388892865e-06, "loss": 0.0, "step": 16990 }, { "epoch": 0.12166320761468546, "grad_norm": 0.0004578005464281887, "learning_rate": 8.784226722965722e-06, "loss": 0.0297, "step": 17000 }, { "epoch": 0.12173477420739999, "grad_norm": 2.3162092475104146e-05, "learning_rate": 8.783511057038575e-06, "loss": 0.001, "step": 17010 }, { "epoch": 0.1218063408001145, "grad_norm": 1.6582498929551548e-08, "learning_rate": 8.782795391111429e-06, "loss": 0.0001, "step": 17020 }, { "epoch": 0.12187790739282903, "grad_norm": 2.323779852986263e-07, "learning_rate": 8.782079725184284e-06, "loss": 0.0009, "step": 17030 }, { "epoch": 0.12194947398554355, "grad_norm": 2.7856297492980957, "learning_rate": 8.78136405925714e-06, "loss": 0.0004, "step": 17040 }, { "epoch": 0.12202104057825806, "grad_norm": 0.0, "learning_rate": 8.780648393329995e-06, "loss": 0.1406, "step": 17050 }, { "epoch": 0.12209260717097259, "grad_norm": 0.052827946841716766, "learning_rate": 8.779932727402848e-06, "loss": 0.0032, "step": 17060 }, { "epoch": 0.12216417376368711, "grad_norm": 0.42060479521751404, "learning_rate": 8.779217061475705e-06, "loss": 0.0001, "step": 17070 }, { "epoch": 0.12223574035640163, "grad_norm": 0.0, "learning_rate": 8.778501395548559e-06, "loss": 0.0, "step": 17080 }, { "epoch": 0.12230730694911615, "grad_norm": 0.0, "learning_rate": 8.777785729621412e-06, "loss": 0.0159, "step": 17090 }, { "epoch": 0.12237887354183068, "grad_norm": 0.0, "learning_rate": 8.777070063694268e-06, "loss": 0.0, "step": 17100 }, { "epoch": 0.12245044013454519, "grad_norm": 0.0, "learning_rate": 8.776354397767123e-06, "loss": 0.0, "step": 17110 }, { "epoch": 0.12252200672725971, "grad_norm": 0.016701696440577507, "learning_rate": 8.775638731839978e-06, "loss": 0.0, "step": 17120 }, { "epoch": 0.12259357331997424, "grad_norm": 0.0, "learning_rate": 8.774923065912832e-06, "loss": 0.001, "step": 17130 }, { "epoch": 0.12266513991268875, "grad_norm": 2.007541333171048e-08, "learning_rate": 8.774207399985687e-06, "loss": 0.0, "step": 17140 }, { "epoch": 0.12273670650540328, "grad_norm": 0.0, "learning_rate": 8.773491734058542e-06, "loss": 0.0, "step": 17150 }, { "epoch": 0.1228082730981178, "grad_norm": 0.7962834239006042, "learning_rate": 8.772776068131396e-06, "loss": 0.0002, "step": 17160 }, { "epoch": 0.12287983969083231, "grad_norm": 2.1645017722615734e-10, "learning_rate": 8.772060402204251e-06, "loss": 0.0, "step": 17170 }, { "epoch": 0.12295140628354684, "grad_norm": 4.742252701639416e-10, "learning_rate": 8.771344736277107e-06, "loss": 0.002, "step": 17180 }, { "epoch": 0.12302297287626136, "grad_norm": 0.0, "learning_rate": 8.77062907034996e-06, "loss": 0.0033, "step": 17190 }, { "epoch": 0.12309453946897589, "grad_norm": 4.324082510720473e-06, "learning_rate": 8.769913404422816e-06, "loss": 0.0, "step": 17200 }, { "epoch": 0.1231661060616904, "grad_norm": 0.0, "learning_rate": 8.769197738495671e-06, "loss": 0.0, "step": 17210 }, { "epoch": 0.12323767265440493, "grad_norm": 0.0, "learning_rate": 8.768482072568526e-06, "loss": 0.0, "step": 17220 }, { "epoch": 0.12330923924711945, "grad_norm": 0.0, "learning_rate": 8.76776640664138e-06, "loss": 0.0, "step": 17230 }, { "epoch": 0.12338080583983396, "grad_norm": 0.0, "learning_rate": 8.767050740714235e-06, "loss": 0.0, "step": 17240 }, { "epoch": 0.12345237243254849, "grad_norm": 0.0012796730734407902, "learning_rate": 8.76633507478709e-06, "loss": 0.0, "step": 17250 }, { "epoch": 0.12352393902526301, "grad_norm": 0.00020073444466106594, "learning_rate": 8.765619408859944e-06, "loss": 0.0, "step": 17260 }, { "epoch": 0.12359550561797752, "grad_norm": 0.0, "learning_rate": 8.764975309525514e-06, "loss": 0.118, "step": 17270 }, { "epoch": 0.12366707221069205, "grad_norm": 20.737812042236328, "learning_rate": 8.764259643598369e-06, "loss": 1.199, "step": 17280 }, { "epoch": 0.12373863880340658, "grad_norm": 0.0, "learning_rate": 8.763543977671224e-06, "loss": 0.0, "step": 17290 }, { "epoch": 0.12381020539612109, "grad_norm": 0.0, "learning_rate": 8.762828311744078e-06, "loss": 0.2771, "step": 17300 }, { "epoch": 0.12388177198883561, "grad_norm": 0.0, "learning_rate": 8.762112645816933e-06, "loss": 0.0, "step": 17310 }, { "epoch": 0.12395333858155014, "grad_norm": 2.859629866236446e-09, "learning_rate": 8.761396979889788e-06, "loss": 0.0, "step": 17320 }, { "epoch": 0.12402490517426465, "grad_norm": 0.0, "learning_rate": 8.760681313962644e-06, "loss": 0.0, "step": 17330 }, { "epoch": 0.12409647176697917, "grad_norm": 0.0, "learning_rate": 8.759965648035497e-06, "loss": 0.0, "step": 17340 }, { "epoch": 0.1241680383596937, "grad_norm": 8.772687576374949e-10, "learning_rate": 8.759249982108353e-06, "loss": 0.0, "step": 17350 }, { "epoch": 0.12423960495240821, "grad_norm": 0.0, "learning_rate": 8.758534316181208e-06, "loss": 0.0, "step": 17360 }, { "epoch": 0.12431117154512274, "grad_norm": 0.8020026087760925, "learning_rate": 8.757818650254062e-06, "loss": 0.0002, "step": 17370 }, { "epoch": 0.12438273813783726, "grad_norm": 4.3345349531875854e-10, "learning_rate": 8.757102984326917e-06, "loss": 0.0, "step": 17380 }, { "epoch": 0.12445430473055177, "grad_norm": 0.0, "learning_rate": 8.756387318399772e-06, "loss": 0.0, "step": 17390 }, { "epoch": 0.1245258713232663, "grad_norm": 0.0, "learning_rate": 8.755671652472626e-06, "loss": 0.0, "step": 17400 }, { "epoch": 0.12459743791598082, "grad_norm": 0.0006201479700393975, "learning_rate": 8.754955986545481e-06, "loss": 0.0, "step": 17410 }, { "epoch": 0.12466900450869534, "grad_norm": 0.000143294149893336, "learning_rate": 8.754240320618336e-06, "loss": 0.0249, "step": 17420 }, { "epoch": 0.12474057110140986, "grad_norm": 4.5075471462041605e-06, "learning_rate": 8.753524654691192e-06, "loss": 0.0001, "step": 17430 }, { "epoch": 0.12481213769412439, "grad_norm": 0.0, "learning_rate": 8.752808988764045e-06, "loss": 0.0408, "step": 17440 }, { "epoch": 0.1248837042868389, "grad_norm": 1.8917597117251717e-05, "learning_rate": 8.7520933228369e-06, "loss": 0.0, "step": 17450 }, { "epoch": 0.12495527087955342, "grad_norm": 0.0, "learning_rate": 8.751377656909756e-06, "loss": 0.0101, "step": 17460 }, { "epoch": 0.12502683747226795, "grad_norm": 0.0, "learning_rate": 8.75066199098261e-06, "loss": 0.0164, "step": 17470 }, { "epoch": 0.12509840406498246, "grad_norm": 0.0, "learning_rate": 8.749946325055465e-06, "loss": 0.0, "step": 17480 }, { "epoch": 0.125169970657697, "grad_norm": 0.0, "learning_rate": 8.74923065912832e-06, "loss": 0.0, "step": 17490 }, { "epoch": 0.1252415372504115, "grad_norm": 0.0, "learning_rate": 8.748514993201175e-06, "loss": 0.0, "step": 17500 }, { "epoch": 0.12531310384312602, "grad_norm": 6.451905392168555e-06, "learning_rate": 8.747799327274029e-06, "loss": 0.0475, "step": 17510 }, { "epoch": 0.12538467043584056, "grad_norm": 0.0005045748548582196, "learning_rate": 8.747083661346884e-06, "loss": 0.0002, "step": 17520 }, { "epoch": 0.12545623702855507, "grad_norm": 0.0, "learning_rate": 8.74636799541974e-06, "loss": 0.0002, "step": 17530 }, { "epoch": 0.12552780362126958, "grad_norm": 1.4379650354385376, "learning_rate": 8.745652329492593e-06, "loss": 0.0002, "step": 17540 }, { "epoch": 0.12559937021398412, "grad_norm": 0.030283192172646523, "learning_rate": 8.744936663565448e-06, "loss": 0.0002, "step": 17550 }, { "epoch": 0.12567093680669864, "grad_norm": 5.142073433717087e-10, "learning_rate": 8.744220997638304e-06, "loss": 0.0001, "step": 17560 }, { "epoch": 0.12574250339941315, "grad_norm": 0.0, "learning_rate": 8.743505331711159e-06, "loss": 0.0, "step": 17570 }, { "epoch": 0.1258140699921277, "grad_norm": 0.0, "learning_rate": 8.742789665784012e-06, "loss": 0.0026, "step": 17580 }, { "epoch": 0.1258856365848422, "grad_norm": 0.0, "learning_rate": 8.742073999856868e-06, "loss": 0.0046, "step": 17590 }, { "epoch": 0.1259572031775567, "grad_norm": 0.0, "learning_rate": 8.741358333929723e-06, "loss": 0.0002, "step": 17600 }, { "epoch": 0.12602876977027125, "grad_norm": 0.0, "learning_rate": 8.740642668002577e-06, "loss": 0.0, "step": 17610 }, { "epoch": 0.12610033636298576, "grad_norm": 0.0, "learning_rate": 8.739927002075432e-06, "loss": 0.0018, "step": 17620 }, { "epoch": 0.12617190295570027, "grad_norm": 0.0, "learning_rate": 8.739211336148287e-06, "loss": 0.0, "step": 17630 }, { "epoch": 0.1262434695484148, "grad_norm": 0.0, "learning_rate": 8.738495670221141e-06, "loss": 0.0021, "step": 17640 }, { "epoch": 0.12631503614112932, "grad_norm": 0.001162059954367578, "learning_rate": 8.737780004293996e-06, "loss": 0.0, "step": 17650 }, { "epoch": 0.12638660273384383, "grad_norm": 8.736104617490525e-10, "learning_rate": 8.737064338366851e-06, "loss": 0.0, "step": 17660 }, { "epoch": 0.12645816932655837, "grad_norm": 0.0, "learning_rate": 8.736348672439707e-06, "loss": 0.0074, "step": 17670 }, { "epoch": 0.12652973591927288, "grad_norm": 0.0, "learning_rate": 8.73563300651256e-06, "loss": 0.0, "step": 17680 }, { "epoch": 0.1266013025119874, "grad_norm": 6.6642664720006906e-09, "learning_rate": 8.734917340585416e-06, "loss": 0.0001, "step": 17690 }, { "epoch": 0.12667286910470194, "grad_norm": 0.0, "learning_rate": 8.734201674658271e-06, "loss": 0.0004, "step": 17700 }, { "epoch": 0.12674443569741645, "grad_norm": 0.0006580018671229482, "learning_rate": 8.733486008731125e-06, "loss": 0.0, "step": 17710 }, { "epoch": 0.12681600229013096, "grad_norm": 0.0, "learning_rate": 8.73277034280398e-06, "loss": 0.0037, "step": 17720 }, { "epoch": 0.1268875688828455, "grad_norm": 0.0, "learning_rate": 8.732054676876835e-06, "loss": 0.0, "step": 17730 }, { "epoch": 0.12695913547556, "grad_norm": 2.0934409761252937e-09, "learning_rate": 8.73133901094969e-06, "loss": 0.0, "step": 17740 }, { "epoch": 0.12703070206827452, "grad_norm": 1.9297653253147473e-08, "learning_rate": 8.730623345022544e-06, "loss": 0.0, "step": 17750 }, { "epoch": 0.12710226866098906, "grad_norm": 0.0, "learning_rate": 8.7299076790954e-06, "loss": 0.0, "step": 17760 }, { "epoch": 0.12717383525370357, "grad_norm": 0.0, "learning_rate": 8.729192013168255e-06, "loss": 0.0, "step": 17770 }, { "epoch": 0.12724540184641808, "grad_norm": 0.0, "learning_rate": 8.728476347241108e-06, "loss": 0.0004, "step": 17780 }, { "epoch": 0.12731696843913262, "grad_norm": 0.0, "learning_rate": 8.727760681313963e-06, "loss": 0.0, "step": 17790 }, { "epoch": 0.12738853503184713, "grad_norm": 0.0, "learning_rate": 8.727045015386819e-06, "loss": 0.06, "step": 17800 }, { "epoch": 0.12746010162456164, "grad_norm": 0.0, "learning_rate": 8.726329349459674e-06, "loss": 0.0054, "step": 17810 }, { "epoch": 0.12753166821727618, "grad_norm": 2.481744587612411e-09, "learning_rate": 8.725613683532528e-06, "loss": 0.0, "step": 17820 }, { "epoch": 0.1276032348099907, "grad_norm": 0.0, "learning_rate": 8.724898017605383e-06, "loss": 0.0, "step": 17830 }, { "epoch": 0.1276748014027052, "grad_norm": 1.1715416121660382e-06, "learning_rate": 8.724182351678238e-06, "loss": 0.0, "step": 17840 }, { "epoch": 0.12774636799541975, "grad_norm": 0.0, "learning_rate": 8.723466685751092e-06, "loss": 0.6099, "step": 17850 }, { "epoch": 0.12781793458813426, "grad_norm": 4.654897356459742e-09, "learning_rate": 8.722751019823947e-06, "loss": 0.0002, "step": 17860 }, { "epoch": 0.12788950118084877, "grad_norm": 4.4780343322337046e-10, "learning_rate": 8.722035353896802e-06, "loss": 0.0, "step": 17870 }, { "epoch": 0.1279610677735633, "grad_norm": 0.0, "learning_rate": 8.721319687969656e-06, "loss": 0.0, "step": 17880 }, { "epoch": 0.12803263436627782, "grad_norm": 0.2305746227502823, "learning_rate": 8.720604022042511e-06, "loss": 0.0, "step": 17890 }, { "epoch": 0.12810420095899233, "grad_norm": 2.4368215235881507e-05, "learning_rate": 8.719888356115367e-06, "loss": 0.0, "step": 17900 }, { "epoch": 0.12817576755170687, "grad_norm": 0.0, "learning_rate": 8.719172690188222e-06, "loss": 0.0, "step": 17910 }, { "epoch": 0.12824733414442138, "grad_norm": 0.0, "learning_rate": 8.718457024261075e-06, "loss": 0.0, "step": 17920 }, { "epoch": 0.1283189007371359, "grad_norm": 0.0, "learning_rate": 8.71774135833393e-06, "loss": 0.0, "step": 17930 }, { "epoch": 0.12839046732985043, "grad_norm": 0.0, "learning_rate": 8.717025692406786e-06, "loss": 0.0, "step": 17940 }, { "epoch": 0.12846203392256494, "grad_norm": 0.0, "learning_rate": 8.71631002647964e-06, "loss": 0.0, "step": 17950 }, { "epoch": 0.12853360051527946, "grad_norm": 0.0, "learning_rate": 8.715594360552495e-06, "loss": 0.0, "step": 17960 }, { "epoch": 0.128605167107994, "grad_norm": 0.008169733919203281, "learning_rate": 8.71487869462535e-06, "loss": 0.0, "step": 17970 }, { "epoch": 0.1286767337007085, "grad_norm": 0.00038255698746070266, "learning_rate": 8.714163028698206e-06, "loss": 0.0, "step": 17980 }, { "epoch": 0.12874830029342302, "grad_norm": 0.0, "learning_rate": 8.713447362771059e-06, "loss": 0.0015, "step": 17990 }, { "epoch": 0.12881986688613756, "grad_norm": 0.0, "learning_rate": 8.712731696843913e-06, "loss": 0.0, "step": 18000 }, { "epoch": 0.12889143347885207, "grad_norm": 1.6968746422207914e-05, "learning_rate": 8.71201603091677e-06, "loss": 0.0018, "step": 18010 }, { "epoch": 0.12896300007156658, "grad_norm": 0.0, "learning_rate": 8.711300364989623e-06, "loss": 0.8453, "step": 18020 }, { "epoch": 0.12903456666428112, "grad_norm": 0.0, "learning_rate": 8.710584699062479e-06, "loss": 0.0, "step": 18030 }, { "epoch": 0.12910613325699563, "grad_norm": 0.0, "learning_rate": 8.709869033135334e-06, "loss": 0.0, "step": 18040 }, { "epoch": 0.12917769984971014, "grad_norm": 0.0011789401760324836, "learning_rate": 8.709153367208187e-06, "loss": 0.1663, "step": 18050 }, { "epoch": 0.12924926644242468, "grad_norm": 0.009213495999574661, "learning_rate": 8.708437701281043e-06, "loss": 0.0, "step": 18060 }, { "epoch": 0.1293208330351392, "grad_norm": 0.0, "learning_rate": 8.707722035353896e-06, "loss": 0.0, "step": 18070 }, { "epoch": 0.1293923996278537, "grad_norm": 0.0, "learning_rate": 8.707006369426753e-06, "loss": 0.0, "step": 18080 }, { "epoch": 0.12946396622056824, "grad_norm": 0.0006139183533377945, "learning_rate": 8.706290703499607e-06, "loss": 0.0, "step": 18090 }, { "epoch": 0.12953553281328276, "grad_norm": 0.0, "learning_rate": 8.705575037572462e-06, "loss": 0.0, "step": 18100 }, { "epoch": 0.12960709940599727, "grad_norm": 1.2433910789866331e-08, "learning_rate": 8.704859371645318e-06, "loss": 0.0, "step": 18110 }, { "epoch": 0.1296786659987118, "grad_norm": 4.651531104737927e-10, "learning_rate": 8.704143705718171e-06, "loss": 0.0, "step": 18120 }, { "epoch": 0.12975023259142632, "grad_norm": 0.0028788927011191845, "learning_rate": 8.703428039791026e-06, "loss": 0.0127, "step": 18130 }, { "epoch": 0.12982179918414083, "grad_norm": 190.83607482910156, "learning_rate": 8.70271237386388e-06, "loss": 0.0346, "step": 18140 }, { "epoch": 0.12989336577685537, "grad_norm": 0.0, "learning_rate": 8.701996707936737e-06, "loss": 0.4324, "step": 18150 }, { "epoch": 0.12996493236956988, "grad_norm": 0.0, "learning_rate": 8.70128104200959e-06, "loss": 0.0, "step": 18160 }, { "epoch": 0.13003649896228442, "grad_norm": 0.2956288158893585, "learning_rate": 8.700565376082444e-06, "loss": 0.0, "step": 18170 }, { "epoch": 0.13010806555499893, "grad_norm": 4.142405033111572, "learning_rate": 8.6998497101553e-06, "loss": 0.0008, "step": 18180 }, { "epoch": 0.13017963214771344, "grad_norm": 0.0, "learning_rate": 8.699134044228155e-06, "loss": 0.0, "step": 18190 }, { "epoch": 0.13025119874042798, "grad_norm": 0.0, "learning_rate": 8.69841837830101e-06, "loss": 0.0, "step": 18200 }, { "epoch": 0.1303227653331425, "grad_norm": 0.0, "learning_rate": 8.697702712373864e-06, "loss": 0.0, "step": 18210 }, { "epoch": 0.130394331925857, "grad_norm": 4.835453637497267e-08, "learning_rate": 8.69698704644672e-06, "loss": 0.0058, "step": 18220 }, { "epoch": 0.13046589851857154, "grad_norm": 0.0, "learning_rate": 8.696271380519574e-06, "loss": 0.0, "step": 18230 }, { "epoch": 0.13053746511128606, "grad_norm": 0.00962154846638441, "learning_rate": 8.695555714592428e-06, "loss": 0.0, "step": 18240 }, { "epoch": 0.13060903170400057, "grad_norm": 4.840808868408203, "learning_rate": 8.694840048665283e-06, "loss": 0.0005, "step": 18250 }, { "epoch": 0.1306805982967151, "grad_norm": 0.0, "learning_rate": 8.694124382738138e-06, "loss": 0.0, "step": 18260 }, { "epoch": 0.13075216488942962, "grad_norm": 0.00017948466120287776, "learning_rate": 8.693408716810994e-06, "loss": 0.0, "step": 18270 }, { "epoch": 0.13082373148214413, "grad_norm": 1.025963403122887e-08, "learning_rate": 8.692693050883847e-06, "loss": 0.019, "step": 18280 }, { "epoch": 0.13089529807485867, "grad_norm": 0.0, "learning_rate": 8.691977384956703e-06, "loss": 0.0, "step": 18290 }, { "epoch": 0.13096686466757318, "grad_norm": 0.0, "learning_rate": 8.691261719029558e-06, "loss": 0.0, "step": 18300 }, { "epoch": 0.1310384312602877, "grad_norm": 0.05820778012275696, "learning_rate": 8.690546053102412e-06, "loss": 0.0, "step": 18310 }, { "epoch": 0.13110999785300223, "grad_norm": 7.01094108990219e-07, "learning_rate": 8.689830387175267e-06, "loss": 0.0, "step": 18320 }, { "epoch": 0.13118156444571674, "grad_norm": 0.0, "learning_rate": 8.689114721248122e-06, "loss": 0.059, "step": 18330 }, { "epoch": 0.13125313103843125, "grad_norm": 0.00015427380276378244, "learning_rate": 8.688399055320977e-06, "loss": 0.0, "step": 18340 }, { "epoch": 0.1313246976311458, "grad_norm": 9.301415193618823e-10, "learning_rate": 8.687683389393831e-06, "loss": 0.0001, "step": 18350 }, { "epoch": 0.1313962642238603, "grad_norm": 0.00020833226153627038, "learning_rate": 8.686967723466686e-06, "loss": 0.0, "step": 18360 }, { "epoch": 0.13146783081657482, "grad_norm": 0.000722648692317307, "learning_rate": 8.686252057539542e-06, "loss": 0.0, "step": 18370 }, { "epoch": 0.13153939740928935, "grad_norm": 0.0, "learning_rate": 8.685536391612395e-06, "loss": 0.0, "step": 18380 }, { "epoch": 0.13161096400200387, "grad_norm": 0.0, "learning_rate": 8.68482072568525e-06, "loss": 0.0, "step": 18390 }, { "epoch": 0.13168253059471838, "grad_norm": 6.812874175921024e-07, "learning_rate": 8.684105059758106e-06, "loss": 0.0, "step": 18400 }, { "epoch": 0.13175409718743292, "grad_norm": 0.0, "learning_rate": 8.68338939383096e-06, "loss": 0.0, "step": 18410 }, { "epoch": 0.13182566378014743, "grad_norm": 2.1101667968537186e-08, "learning_rate": 8.682673727903815e-06, "loss": 0.0, "step": 18420 }, { "epoch": 0.13189723037286194, "grad_norm": 0.0, "learning_rate": 8.68195806197667e-06, "loss": 0.0, "step": 18430 }, { "epoch": 0.13196879696557648, "grad_norm": 0.0, "learning_rate": 8.681242396049525e-06, "loss": 0.0, "step": 18440 }, { "epoch": 0.132040363558291, "grad_norm": 0.0, "learning_rate": 8.680526730122379e-06, "loss": 0.0, "step": 18450 }, { "epoch": 0.1321119301510055, "grad_norm": 0.0, "learning_rate": 8.679811064195234e-06, "loss": 0.0003, "step": 18460 }, { "epoch": 0.13218349674372004, "grad_norm": 0.072036013007164, "learning_rate": 8.67909539826809e-06, "loss": 0.0, "step": 18470 }, { "epoch": 0.13225506333643455, "grad_norm": 0.0, "learning_rate": 8.678379732340943e-06, "loss": 0.0, "step": 18480 }, { "epoch": 0.13232662992914906, "grad_norm": 0.0, "learning_rate": 8.677664066413798e-06, "loss": 0.0001, "step": 18490 }, { "epoch": 0.1323981965218636, "grad_norm": 0.0, "learning_rate": 8.676948400486654e-06, "loss": 0.0, "step": 18500 }, { "epoch": 0.13246976311457812, "grad_norm": 350.8809814453125, "learning_rate": 8.676232734559509e-06, "loss": 0.0431, "step": 18510 }, { "epoch": 0.13254132970729263, "grad_norm": 6.012478479533456e-05, "learning_rate": 8.675517068632362e-06, "loss": 0.0003, "step": 18520 }, { "epoch": 0.13261289630000717, "grad_norm": 0.0, "learning_rate": 8.674801402705218e-06, "loss": 0.0, "step": 18530 }, { "epoch": 0.13268446289272168, "grad_norm": 7.079684734344482, "learning_rate": 8.674085736778073e-06, "loss": 0.2544, "step": 18540 }, { "epoch": 0.1327560294854362, "grad_norm": 0.0, "learning_rate": 8.673370070850927e-06, "loss": 0.0, "step": 18550 }, { "epoch": 0.13282759607815073, "grad_norm": 0.5898236632347107, "learning_rate": 8.672654404923782e-06, "loss": 0.0001, "step": 18560 }, { "epoch": 0.13289916267086524, "grad_norm": 0.0, "learning_rate": 8.671938738996637e-06, "loss": 0.0, "step": 18570 }, { "epoch": 0.13297072926357975, "grad_norm": 719.43505859375, "learning_rate": 8.671223073069493e-06, "loss": 0.4575, "step": 18580 }, { "epoch": 0.1330422958562943, "grad_norm": 0.0, "learning_rate": 8.670507407142346e-06, "loss": 0.0, "step": 18590 }, { "epoch": 0.1331138624490088, "grad_norm": 0.0097750099375844, "learning_rate": 8.669791741215201e-06, "loss": 0.0, "step": 18600 }, { "epoch": 0.1331854290417233, "grad_norm": 0.0, "learning_rate": 8.669076075288057e-06, "loss": 0.0, "step": 18610 }, { "epoch": 0.13325699563443785, "grad_norm": 0.0, "learning_rate": 8.66836040936091e-06, "loss": 0.0, "step": 18620 }, { "epoch": 0.13332856222715236, "grad_norm": 0.0, "learning_rate": 8.667644743433766e-06, "loss": 0.0, "step": 18630 }, { "epoch": 0.13340012881986688, "grad_norm": 0.0, "learning_rate": 8.666929077506621e-06, "loss": 0.0022, "step": 18640 }, { "epoch": 0.13347169541258141, "grad_norm": 6.055149128769699e-07, "learning_rate": 8.666213411579474e-06, "loss": 0.0, "step": 18650 }, { "epoch": 0.13354326200529593, "grad_norm": 0.0, "learning_rate": 8.66549774565233e-06, "loss": 0.0, "step": 18660 }, { "epoch": 0.13361482859801044, "grad_norm": 2.7102234412268444e-07, "learning_rate": 8.664782079725185e-06, "loss": 0.0, "step": 18670 }, { "epoch": 0.13368639519072498, "grad_norm": 0.0, "learning_rate": 8.66406641379804e-06, "loss": 0.0, "step": 18680 }, { "epoch": 0.1337579617834395, "grad_norm": 0.0, "learning_rate": 8.663350747870894e-06, "loss": 0.0001, "step": 18690 }, { "epoch": 0.133829528376154, "grad_norm": 0.008044525980949402, "learning_rate": 8.66263508194375e-06, "loss": 0.0, "step": 18700 }, { "epoch": 0.13390109496886854, "grad_norm": 0.0, "learning_rate": 8.661919416016605e-06, "loss": 0.0004, "step": 18710 }, { "epoch": 0.13397266156158305, "grad_norm": 0.0, "learning_rate": 8.661203750089458e-06, "loss": 0.0, "step": 18720 }, { "epoch": 0.13404422815429756, "grad_norm": 0.0, "learning_rate": 8.660488084162313e-06, "loss": 0.0, "step": 18730 }, { "epoch": 0.1341157947470121, "grad_norm": 0.0, "learning_rate": 8.659772418235169e-06, "loss": 0.0, "step": 18740 }, { "epoch": 0.1341873613397266, "grad_norm": 0.002720635151490569, "learning_rate": 8.659056752308024e-06, "loss": 0.0695, "step": 18750 }, { "epoch": 0.13425892793244112, "grad_norm": 0.0, "learning_rate": 8.658341086380878e-06, "loss": 0.0511, "step": 18760 }, { "epoch": 0.13433049452515566, "grad_norm": 3.0756328105926514, "learning_rate": 8.657625420453733e-06, "loss": 0.0005, "step": 18770 }, { "epoch": 0.13440206111787018, "grad_norm": 0.0, "learning_rate": 8.656909754526588e-06, "loss": 0.0, "step": 18780 }, { "epoch": 0.1344736277105847, "grad_norm": 0.28377607464790344, "learning_rate": 8.656194088599442e-06, "loss": 0.0001, "step": 18790 }, { "epoch": 0.13454519430329923, "grad_norm": 0.0, "learning_rate": 8.655478422672297e-06, "loss": 0.0, "step": 18800 }, { "epoch": 0.13461676089601374, "grad_norm": 0.0, "learning_rate": 8.654762756745152e-06, "loss": 0.0, "step": 18810 }, { "epoch": 0.13468832748872825, "grad_norm": 0.0, "learning_rate": 8.654047090818006e-06, "loss": 0.0, "step": 18820 }, { "epoch": 0.1347598940814428, "grad_norm": 0.0, "learning_rate": 8.653331424890861e-06, "loss": 0.0, "step": 18830 }, { "epoch": 0.1348314606741573, "grad_norm": 1.1824597301313133e-08, "learning_rate": 8.652615758963717e-06, "loss": 0.0, "step": 18840 }, { "epoch": 0.1349030272668718, "grad_norm": 0.0, "learning_rate": 8.651900093036572e-06, "loss": 0.0, "step": 18850 }, { "epoch": 0.13497459385958635, "grad_norm": 0.0, "learning_rate": 8.651184427109425e-06, "loss": 0.0, "step": 18860 }, { "epoch": 0.13504616045230086, "grad_norm": 0.0, "learning_rate": 8.65046876118228e-06, "loss": 0.0, "step": 18870 }, { "epoch": 0.13511772704501537, "grad_norm": 0.0, "learning_rate": 8.649753095255136e-06, "loss": 0.0376, "step": 18880 }, { "epoch": 0.1351892936377299, "grad_norm": 0.0, "learning_rate": 8.64903742932799e-06, "loss": 0.0003, "step": 18890 }, { "epoch": 0.13526086023044442, "grad_norm": 0.0, "learning_rate": 8.648321763400845e-06, "loss": 0.0613, "step": 18900 }, { "epoch": 0.13533242682315894, "grad_norm": 0.0, "learning_rate": 8.6476060974737e-06, "loss": 0.0, "step": 18910 }, { "epoch": 0.13540399341587347, "grad_norm": 0.0, "learning_rate": 8.646890431546556e-06, "loss": 0.0014, "step": 18920 }, { "epoch": 0.135475560008588, "grad_norm": 8.339860916137695, "learning_rate": 8.646174765619409e-06, "loss": 0.003, "step": 18930 }, { "epoch": 0.13554712660130253, "grad_norm": 0.0, "learning_rate": 8.645459099692264e-06, "loss": 0.0, "step": 18940 }, { "epoch": 0.13561869319401704, "grad_norm": 0.0, "learning_rate": 8.64474343376512e-06, "loss": 0.0, "step": 18950 }, { "epoch": 0.13569025978673155, "grad_norm": 0.0, "learning_rate": 8.644027767837973e-06, "loss": 0.2187, "step": 18960 }, { "epoch": 0.1357618263794461, "grad_norm": 0.0, "learning_rate": 8.643312101910829e-06, "loss": 0.001, "step": 18970 }, { "epoch": 0.1358333929721606, "grad_norm": 4.527115904373602e-10, "learning_rate": 8.642596435983684e-06, "loss": 0.0001, "step": 18980 }, { "epoch": 0.1359049595648751, "grad_norm": 4.037934786538244e-08, "learning_rate": 8.64188077005654e-06, "loss": 0.0, "step": 18990 }, { "epoch": 0.13597652615758965, "grad_norm": 0.0, "learning_rate": 8.641165104129393e-06, "loss": 0.0, "step": 19000 }, { "epoch": 0.13604809275030416, "grad_norm": 0.0, "learning_rate": 8.640449438202248e-06, "loss": 0.0, "step": 19010 }, { "epoch": 0.13611965934301867, "grad_norm": 0.0, "learning_rate": 8.639733772275103e-06, "loss": 0.0, "step": 19020 }, { "epoch": 0.1361912259357332, "grad_norm": 0.011163773946464062, "learning_rate": 8.639018106347957e-06, "loss": 0.0, "step": 19030 }, { "epoch": 0.13626279252844772, "grad_norm": 0.0, "learning_rate": 8.638302440420812e-06, "loss": 0.0, "step": 19040 }, { "epoch": 0.13633435912116224, "grad_norm": 0.0016739999409765005, "learning_rate": 8.637586774493668e-06, "loss": 0.0, "step": 19050 }, { "epoch": 0.13640592571387677, "grad_norm": 2.5841018214123324e-05, "learning_rate": 8.636871108566521e-06, "loss": 0.0, "step": 19060 }, { "epoch": 0.13647749230659129, "grad_norm": 0.0, "learning_rate": 8.636155442639376e-06, "loss": 0.0034, "step": 19070 }, { "epoch": 0.1365490588993058, "grad_norm": 0.0, "learning_rate": 8.635439776712232e-06, "loss": 0.0, "step": 19080 }, { "epoch": 0.13662062549202034, "grad_norm": 0.0, "learning_rate": 8.634724110785087e-06, "loss": 0.0, "step": 19090 }, { "epoch": 0.13669219208473485, "grad_norm": 5.363324089557864e-05, "learning_rate": 8.63400844485794e-06, "loss": 0.2393, "step": 19100 }, { "epoch": 0.13676375867744936, "grad_norm": 0.0, "learning_rate": 8.633292778930796e-06, "loss": 0.0, "step": 19110 }, { "epoch": 0.1368353252701639, "grad_norm": 9.025685722008348e-05, "learning_rate": 8.632577113003651e-06, "loss": 0.0, "step": 19120 }, { "epoch": 0.1369068918628784, "grad_norm": 3.690905430175917e-07, "learning_rate": 8.631861447076505e-06, "loss": 0.0, "step": 19130 }, { "epoch": 0.13697845845559292, "grad_norm": 0.0, "learning_rate": 8.63114578114936e-06, "loss": 0.0001, "step": 19140 }, { "epoch": 0.13705002504830746, "grad_norm": 0.0, "learning_rate": 8.630430115222215e-06, "loss": 0.0, "step": 19150 }, { "epoch": 0.13712159164102197, "grad_norm": 0.0, "learning_rate": 8.62971444929507e-06, "loss": 0.0001, "step": 19160 }, { "epoch": 0.13719315823373648, "grad_norm": 0.0009881856385618448, "learning_rate": 8.628998783367924e-06, "loss": 0.0, "step": 19170 }, { "epoch": 0.13726472482645102, "grad_norm": 0.0, "learning_rate": 8.62828311744078e-06, "loss": 0.0, "step": 19180 }, { "epoch": 0.13733629141916553, "grad_norm": 5.4192278184928e-06, "learning_rate": 8.627567451513635e-06, "loss": 0.0, "step": 19190 }, { "epoch": 0.13740785801188005, "grad_norm": 0.0, "learning_rate": 8.626851785586488e-06, "loss": 0.0, "step": 19200 }, { "epoch": 0.13747942460459459, "grad_norm": 0.0, "learning_rate": 8.626136119659344e-06, "loss": 0.0, "step": 19210 }, { "epoch": 0.1375509911973091, "grad_norm": 0.0, "learning_rate": 8.625420453732199e-06, "loss": 0.0062, "step": 19220 }, { "epoch": 0.1376225577900236, "grad_norm": 0.0, "learning_rate": 8.624704787805054e-06, "loss": 0.0002, "step": 19230 }, { "epoch": 0.13769412438273815, "grad_norm": 0.0, "learning_rate": 8.623989121877908e-06, "loss": 0.0001, "step": 19240 }, { "epoch": 0.13776569097545266, "grad_norm": 0.0, "learning_rate": 8.623273455950763e-06, "loss": 0.0, "step": 19250 }, { "epoch": 0.13783725756816717, "grad_norm": 0.0, "learning_rate": 8.622557790023618e-06, "loss": 0.0, "step": 19260 }, { "epoch": 0.1379088241608817, "grad_norm": 1.5380826425825944e-06, "learning_rate": 8.621842124096472e-06, "loss": 0.0013, "step": 19270 }, { "epoch": 0.13798039075359622, "grad_norm": 2.401824850117862e-10, "learning_rate": 8.621126458169327e-06, "loss": 0.0, "step": 19280 }, { "epoch": 0.13805195734631073, "grad_norm": 6.971581711923136e-08, "learning_rate": 8.620410792242183e-06, "loss": 0.0, "step": 19290 }, { "epoch": 0.13812352393902527, "grad_norm": 0.0, "learning_rate": 8.619695126315036e-06, "loss": 0.0, "step": 19300 }, { "epoch": 0.13819509053173978, "grad_norm": 2.943137928923534e-07, "learning_rate": 8.618979460387892e-06, "loss": 0.0008, "step": 19310 }, { "epoch": 0.1382666571244543, "grad_norm": 9.088232036447152e-05, "learning_rate": 8.618263794460747e-06, "loss": 0.0, "step": 19320 }, { "epoch": 0.13833822371716883, "grad_norm": 0.0, "learning_rate": 8.617548128533602e-06, "loss": 0.0005, "step": 19330 }, { "epoch": 0.13840979030988335, "grad_norm": 0.0, "learning_rate": 8.616832462606456e-06, "loss": 0.0001, "step": 19340 }, { "epoch": 0.13848135690259786, "grad_norm": 3.3979911222559167e-06, "learning_rate": 8.616116796679311e-06, "loss": 0.0, "step": 19350 }, { "epoch": 0.1385529234953124, "grad_norm": 0.0, "learning_rate": 8.615401130752166e-06, "loss": 0.0, "step": 19360 }, { "epoch": 0.1386244900880269, "grad_norm": 0.0, "learning_rate": 8.61468546482502e-06, "loss": 0.0, "step": 19370 }, { "epoch": 0.13869605668074142, "grad_norm": 0.0, "learning_rate": 8.613969798897875e-06, "loss": 0.0113, "step": 19380 }, { "epoch": 0.13876762327345596, "grad_norm": 0.0, "learning_rate": 8.61325413297073e-06, "loss": 0.0115, "step": 19390 }, { "epoch": 0.13883918986617047, "grad_norm": NaN, "learning_rate": 8.6126100336363e-06, "loss": 0.4871, "step": 19400 }, { "epoch": 0.13891075645888498, "grad_norm": 0.0, "learning_rate": 8.611894367709154e-06, "loss": 0.0, "step": 19410 }, { "epoch": 0.13898232305159952, "grad_norm": 15.319676399230957, "learning_rate": 8.611178701782009e-06, "loss": 0.0016, "step": 19420 }, { "epoch": 0.13905388964431403, "grad_norm": 0.0, "learning_rate": 8.610463035854863e-06, "loss": 0.0, "step": 19430 }, { "epoch": 0.13912545623702854, "grad_norm": 0.0, "learning_rate": 8.60974736992772e-06, "loss": 0.0, "step": 19440 }, { "epoch": 0.13919702282974308, "grad_norm": 0.0, "learning_rate": 8.609031704000573e-06, "loss": 0.0004, "step": 19450 }, { "epoch": 0.1392685894224576, "grad_norm": 0.0006616091122850776, "learning_rate": 8.608316038073427e-06, "loss": 0.0, "step": 19460 }, { "epoch": 0.1393401560151721, "grad_norm": 0.0, "learning_rate": 8.607600372146284e-06, "loss": 0.0, "step": 19470 }, { "epoch": 0.13941172260788665, "grad_norm": 2.3691118400392952e-08, "learning_rate": 8.606884706219138e-06, "loss": 0.3453, "step": 19480 }, { "epoch": 0.13948328920060116, "grad_norm": 0.0, "learning_rate": 8.606169040291993e-06, "loss": 0.0035, "step": 19490 }, { "epoch": 0.13955485579331567, "grad_norm": 0.0, "learning_rate": 8.605453374364846e-06, "loss": 0.0013, "step": 19500 }, { "epoch": 0.1396264223860302, "grad_norm": 0.0, "learning_rate": 8.604737708437702e-06, "loss": 0.0001, "step": 19510 }, { "epoch": 0.13969798897874472, "grad_norm": 2.9971344470977783, "learning_rate": 8.604022042510557e-06, "loss": 0.1234, "step": 19520 }, { "epoch": 0.13976955557145923, "grad_norm": 0.0, "learning_rate": 8.60330637658341e-06, "loss": 0.0034, "step": 19530 }, { "epoch": 0.13984112216417377, "grad_norm": 0.0, "learning_rate": 8.602590710656268e-06, "loss": 0.0, "step": 19540 }, { "epoch": 0.13991268875688828, "grad_norm": 0.0, "learning_rate": 8.601875044729121e-06, "loss": 0.0, "step": 19550 }, { "epoch": 0.1399842553496028, "grad_norm": 5.040886402130127, "learning_rate": 8.601159378801976e-06, "loss": 0.0008, "step": 19560 }, { "epoch": 0.14005582194231733, "grad_norm": 0.0, "learning_rate": 8.60044371287483e-06, "loss": 0.174, "step": 19570 }, { "epoch": 0.14012738853503184, "grad_norm": 0.0, "learning_rate": 8.599728046947685e-06, "loss": 0.0026, "step": 19580 }, { "epoch": 0.14019895512774636, "grad_norm": 0.0, "learning_rate": 8.59901238102054e-06, "loss": 0.0002, "step": 19590 }, { "epoch": 0.1402705217204609, "grad_norm": 0.0, "learning_rate": 8.598296715093394e-06, "loss": 0.0, "step": 19600 }, { "epoch": 0.1403420883131754, "grad_norm": 0.0, "learning_rate": 8.59758104916625e-06, "loss": 0.0097, "step": 19610 }, { "epoch": 0.14041365490588992, "grad_norm": 2.1642856609105365e-06, "learning_rate": 8.596865383239105e-06, "loss": 0.6129, "step": 19620 }, { "epoch": 0.14048522149860446, "grad_norm": 6.164445949252695e-07, "learning_rate": 8.596149717311958e-06, "loss": 0.0001, "step": 19630 }, { "epoch": 0.14055678809131897, "grad_norm": 0.0, "learning_rate": 8.595434051384814e-06, "loss": 0.1074, "step": 19640 }, { "epoch": 0.14062835468403348, "grad_norm": 0.0, "learning_rate": 8.594718385457669e-06, "loss": 0.0, "step": 19650 }, { "epoch": 0.14069992127674802, "grad_norm": 1.4031165562755632e-07, "learning_rate": 8.594002719530524e-06, "loss": 0.0202, "step": 19660 }, { "epoch": 0.14077148786946253, "grad_norm": 0.0, "learning_rate": 8.593287053603378e-06, "loss": 0.0, "step": 19670 }, { "epoch": 0.14084305446217704, "grad_norm": 0.0, "learning_rate": 8.592571387676233e-06, "loss": 0.001, "step": 19680 }, { "epoch": 0.14091462105489158, "grad_norm": 8.123061888909433e-06, "learning_rate": 8.591855721749088e-06, "loss": 0.0, "step": 19690 }, { "epoch": 0.1409861876476061, "grad_norm": 0.0, "learning_rate": 8.591140055821942e-06, "loss": 0.0, "step": 19700 }, { "epoch": 0.14105775424032063, "grad_norm": 0.0, "learning_rate": 8.590424389894797e-06, "loss": 0.0, "step": 19710 }, { "epoch": 0.14112932083303514, "grad_norm": 0.0, "learning_rate": 8.589708723967653e-06, "loss": 0.0154, "step": 19720 }, { "epoch": 0.14120088742574965, "grad_norm": 0.0, "learning_rate": 8.588993058040508e-06, "loss": 0.0001, "step": 19730 }, { "epoch": 0.1412724540184642, "grad_norm": 1.0189540262217633e-05, "learning_rate": 8.588277392113362e-06, "loss": 0.0092, "step": 19740 }, { "epoch": 0.1413440206111787, "grad_norm": 5.265892468742095e-05, "learning_rate": 8.587561726186217e-06, "loss": 0.0, "step": 19750 }, { "epoch": 0.14141558720389322, "grad_norm": 0.0003029011422768235, "learning_rate": 8.586846060259072e-06, "loss": 0.0005, "step": 19760 }, { "epoch": 0.14148715379660776, "grad_norm": 0.0, "learning_rate": 8.586130394331926e-06, "loss": 0.0059, "step": 19770 }, { "epoch": 0.14155872038932227, "grad_norm": 4.878129011842702e-10, "learning_rate": 8.585414728404781e-06, "loss": 0.0, "step": 19780 }, { "epoch": 0.14163028698203678, "grad_norm": 0.001842375728301704, "learning_rate": 8.584699062477636e-06, "loss": 0.0, "step": 19790 }, { "epoch": 0.14170185357475132, "grad_norm": 0.0, "learning_rate": 8.58398339655049e-06, "loss": 0.0, "step": 19800 }, { "epoch": 0.14177342016746583, "grad_norm": 0.0, "learning_rate": 8.583267730623345e-06, "loss": 0.0242, "step": 19810 }, { "epoch": 0.14184498676018034, "grad_norm": 0.0, "learning_rate": 8.5825520646962e-06, "loss": 0.0, "step": 19820 }, { "epoch": 0.14191655335289488, "grad_norm": 0.006467614788562059, "learning_rate": 8.581836398769056e-06, "loss": 0.0, "step": 19830 }, { "epoch": 0.1419881199456094, "grad_norm": 0.0, "learning_rate": 8.58112073284191e-06, "loss": 0.0086, "step": 19840 }, { "epoch": 0.1420596865383239, "grad_norm": 0.001573843532241881, "learning_rate": 8.580405066914765e-06, "loss": 0.0, "step": 19850 }, { "epoch": 0.14213125313103844, "grad_norm": 0.0, "learning_rate": 8.57968940098762e-06, "loss": 0.0, "step": 19860 }, { "epoch": 0.14220281972375295, "grad_norm": 0.0, "learning_rate": 8.578973735060474e-06, "loss": 0.0, "step": 19870 }, { "epoch": 0.14227438631646747, "grad_norm": 4.49658203125, "learning_rate": 8.578258069133329e-06, "loss": 0.0015, "step": 19880 }, { "epoch": 0.142345952909182, "grad_norm": 0.0, "learning_rate": 8.577542403206184e-06, "loss": 0.0006, "step": 19890 }, { "epoch": 0.14241751950189652, "grad_norm": 0.0, "learning_rate": 8.57682673727904e-06, "loss": 0.0, "step": 19900 }, { "epoch": 0.14248908609461103, "grad_norm": 3.087777145083237e-08, "learning_rate": 8.576111071351893e-06, "loss": 0.0, "step": 19910 }, { "epoch": 0.14256065268732557, "grad_norm": 0.0, "learning_rate": 8.575395405424748e-06, "loss": 0.0, "step": 19920 }, { "epoch": 0.14263221928004008, "grad_norm": 0.0, "learning_rate": 8.574679739497604e-06, "loss": 0.0068, "step": 19930 }, { "epoch": 0.1427037858727546, "grad_norm": 0.0, "learning_rate": 8.573964073570457e-06, "loss": 0.0183, "step": 19940 }, { "epoch": 0.14277535246546913, "grad_norm": 0.0, "learning_rate": 8.573248407643312e-06, "loss": 0.0, "step": 19950 }, { "epoch": 0.14284691905818364, "grad_norm": 8.763246239773537e-10, "learning_rate": 8.572532741716168e-06, "loss": 0.0, "step": 19960 }, { "epoch": 0.14291848565089815, "grad_norm": 1.567865695051296e-07, "learning_rate": 8.571817075789023e-06, "loss": 0.0004, "step": 19970 }, { "epoch": 0.1429900522436127, "grad_norm": 0.00024351332103833556, "learning_rate": 8.571101409861877e-06, "loss": 0.0, "step": 19980 }, { "epoch": 0.1430616188363272, "grad_norm": 0.0, "learning_rate": 8.570385743934732e-06, "loss": 0.0, "step": 19990 }, { "epoch": 0.14313318542904171, "grad_norm": 4.266090536386713e-10, "learning_rate": 8.569670078007587e-06, "loss": 0.0, "step": 20000 }, { "epoch": 0.14320475202175625, "grad_norm": 0.0, "learning_rate": 8.568954412080441e-06, "loss": 0.0, "step": 20010 }, { "epoch": 0.14327631861447077, "grad_norm": 0.0, "learning_rate": 8.568238746153296e-06, "loss": 0.0, "step": 20020 }, { "epoch": 0.14334788520718528, "grad_norm": 0.0, "learning_rate": 8.567523080226151e-06, "loss": 0.0004, "step": 20030 }, { "epoch": 0.14341945179989982, "grad_norm": 0.0, "learning_rate": 8.566807414299005e-06, "loss": 0.0001, "step": 20040 }, { "epoch": 0.14349101839261433, "grad_norm": 5.863449104026586e-08, "learning_rate": 8.56609174837186e-06, "loss": 0.0001, "step": 20050 }, { "epoch": 0.14356258498532884, "grad_norm": 0.0, "learning_rate": 8.565376082444716e-06, "loss": 0.0, "step": 20060 }, { "epoch": 0.14363415157804338, "grad_norm": 0.0, "learning_rate": 8.564660416517571e-06, "loss": 0.0, "step": 20070 }, { "epoch": 0.1437057181707579, "grad_norm": 0.0, "learning_rate": 8.563944750590425e-06, "loss": 0.0012, "step": 20080 }, { "epoch": 0.1437772847634724, "grad_norm": 4.336101847002283e-05, "learning_rate": 8.56322908466328e-06, "loss": 0.0, "step": 20090 }, { "epoch": 0.14384885135618694, "grad_norm": 0.0, "learning_rate": 8.562513418736135e-06, "loss": 0.0, "step": 20100 }, { "epoch": 0.14392041794890145, "grad_norm": 0.0, "learning_rate": 8.561797752808989e-06, "loss": 0.0, "step": 20110 }, { "epoch": 0.14399198454161596, "grad_norm": 0.0, "learning_rate": 8.561082086881844e-06, "loss": 0.0, "step": 20120 }, { "epoch": 0.1440635511343305, "grad_norm": 9.330366083304398e-06, "learning_rate": 8.5603664209547e-06, "loss": 0.0, "step": 20130 }, { "epoch": 0.14413511772704501, "grad_norm": 0.0, "learning_rate": 8.559650755027555e-06, "loss": 0.0001, "step": 20140 }, { "epoch": 0.14420668431975953, "grad_norm": 0.0, "learning_rate": 8.558935089100408e-06, "loss": 0.0002, "step": 20150 }, { "epoch": 0.14427825091247407, "grad_norm": 0.0002213995612692088, "learning_rate": 8.558219423173263e-06, "loss": 0.0, "step": 20160 }, { "epoch": 0.14434981750518858, "grad_norm": 784.2569580078125, "learning_rate": 8.557503757246119e-06, "loss": 0.2611, "step": 20170 }, { "epoch": 0.1444213840979031, "grad_norm": 0.0, "learning_rate": 8.556788091318972e-06, "loss": 0.4898, "step": 20180 }, { "epoch": 0.14449295069061763, "grad_norm": 5.597924541689281e-07, "learning_rate": 8.556072425391828e-06, "loss": 0.0, "step": 20190 }, { "epoch": 0.14456451728333214, "grad_norm": 8.470606438493178e-10, "learning_rate": 8.555356759464683e-06, "loss": 0.0, "step": 20200 }, { "epoch": 0.14463608387604665, "grad_norm": 0.0, "learning_rate": 8.554641093537538e-06, "loss": 0.0, "step": 20210 }, { "epoch": 0.1447076504687612, "grad_norm": 43.47351837158203, "learning_rate": 8.553925427610392e-06, "loss": 0.0129, "step": 20220 }, { "epoch": 0.1447792170614757, "grad_norm": 0.0, "learning_rate": 8.553209761683247e-06, "loss": 0.0, "step": 20230 }, { "epoch": 0.1448507836541902, "grad_norm": 0.0, "learning_rate": 8.552494095756102e-06, "loss": 0.0, "step": 20240 }, { "epoch": 0.14492235024690475, "grad_norm": 0.0, "learning_rate": 8.551778429828956e-06, "loss": 0.0027, "step": 20250 }, { "epoch": 0.14499391683961926, "grad_norm": 4.0870029582862344e-10, "learning_rate": 8.551062763901811e-06, "loss": 0.0, "step": 20260 }, { "epoch": 0.14506548343233377, "grad_norm": 2.6685402332304875e-08, "learning_rate": 8.550347097974667e-06, "loss": 0.0, "step": 20270 }, { "epoch": 0.14513705002504831, "grad_norm": 0.006722779478877783, "learning_rate": 8.54963143204752e-06, "loss": 0.0, "step": 20280 }, { "epoch": 0.14520861661776283, "grad_norm": 0.0, "learning_rate": 8.548915766120375e-06, "loss": 0.0004, "step": 20290 }, { "epoch": 0.14528018321047734, "grad_norm": 0.026309223845601082, "learning_rate": 8.54820010019323e-06, "loss": 0.0, "step": 20300 }, { "epoch": 0.14535174980319188, "grad_norm": 0.0068744029849767685, "learning_rate": 8.547484434266086e-06, "loss": 0.0002, "step": 20310 }, { "epoch": 0.1454233163959064, "grad_norm": 0.0, "learning_rate": 8.54676876833894e-06, "loss": 0.0, "step": 20320 }, { "epoch": 0.1454948829886209, "grad_norm": 0.0, "learning_rate": 8.546053102411795e-06, "loss": 0.0001, "step": 20330 }, { "epoch": 0.14556644958133544, "grad_norm": 0.0, "learning_rate": 8.54533743648465e-06, "loss": 0.0, "step": 20340 }, { "epoch": 0.14563801617404995, "grad_norm": 0.0, "learning_rate": 8.544621770557504e-06, "loss": 0.0, "step": 20350 }, { "epoch": 0.14570958276676446, "grad_norm": 3.18714263869424e-08, "learning_rate": 8.543906104630359e-06, "loss": 0.0, "step": 20360 }, { "epoch": 0.145781149359479, "grad_norm": 0.0, "learning_rate": 8.543190438703214e-06, "loss": 1.2586, "step": 20370 }, { "epoch": 0.1458527159521935, "grad_norm": 4.6346021465026865e-10, "learning_rate": 8.54247477277607e-06, "loss": 0.0, "step": 20380 }, { "epoch": 0.14592428254490802, "grad_norm": 0.0, "learning_rate": 8.541759106848923e-06, "loss": 0.0026, "step": 20390 }, { "epoch": 0.14599584913762256, "grad_norm": 2.698557466374041e-07, "learning_rate": 8.541043440921779e-06, "loss": 0.0023, "step": 20400 }, { "epoch": 0.14606741573033707, "grad_norm": 4.40678327162658e-10, "learning_rate": 8.540327774994634e-06, "loss": 0.0, "step": 20410 }, { "epoch": 0.14613898232305159, "grad_norm": 0.0, "learning_rate": 8.539612109067487e-06, "loss": 0.0, "step": 20420 }, { "epoch": 0.14621054891576613, "grad_norm": 0.0, "learning_rate": 8.538896443140343e-06, "loss": 0.0, "step": 20430 }, { "epoch": 0.14628211550848064, "grad_norm": 0.0, "learning_rate": 8.538180777213198e-06, "loss": 0.0, "step": 20440 }, { "epoch": 0.14635368210119515, "grad_norm": 0.0, "learning_rate": 8.537465111286052e-06, "loss": 0.0002, "step": 20450 }, { "epoch": 0.1464252486939097, "grad_norm": 0.0, "learning_rate": 8.536749445358907e-06, "loss": 0.0331, "step": 20460 }, { "epoch": 0.1464968152866242, "grad_norm": 8.231626452470664e-06, "learning_rate": 8.536033779431762e-06, "loss": 0.4535, "step": 20470 }, { "epoch": 0.14656838187933874, "grad_norm": 0.0, "learning_rate": 8.535318113504618e-06, "loss": 0.0295, "step": 20480 }, { "epoch": 0.14663994847205325, "grad_norm": 0.0, "learning_rate": 8.534602447577471e-06, "loss": 0.0005, "step": 20490 }, { "epoch": 0.14671151506476776, "grad_norm": 0.0, "learning_rate": 8.533886781650326e-06, "loss": 0.0, "step": 20500 }, { "epoch": 0.1467830816574823, "grad_norm": 1.3050607705622497e-08, "learning_rate": 8.533171115723182e-06, "loss": 0.0001, "step": 20510 }, { "epoch": 0.1468546482501968, "grad_norm": 0.0, "learning_rate": 8.532455449796035e-06, "loss": 0.0026, "step": 20520 }, { "epoch": 0.14692621484291132, "grad_norm": 0.0, "learning_rate": 8.53173978386889e-06, "loss": 0.0, "step": 20530 }, { "epoch": 0.14699778143562586, "grad_norm": 0.0, "learning_rate": 8.531024117941746e-06, "loss": 0.0, "step": 20540 }, { "epoch": 0.14706934802834037, "grad_norm": 0.0, "learning_rate": 8.530308452014601e-06, "loss": 0.0, "step": 20550 }, { "epoch": 0.14714091462105489, "grad_norm": 0.0, "learning_rate": 8.529592786087455e-06, "loss": 0.0, "step": 20560 }, { "epoch": 0.14721248121376942, "grad_norm": 0.0, "learning_rate": 8.52887712016031e-06, "loss": 0.0, "step": 20570 }, { "epoch": 0.14728404780648394, "grad_norm": 0.0, "learning_rate": 8.528161454233165e-06, "loss": 0.0, "step": 20580 }, { "epoch": 0.14735561439919845, "grad_norm": 0.0, "learning_rate": 8.527445788306019e-06, "loss": 0.0, "step": 20590 }, { "epoch": 0.147427180991913, "grad_norm": 0.0, "learning_rate": 8.526730122378874e-06, "loss": 0.0, "step": 20600 }, { "epoch": 0.1474987475846275, "grad_norm": 0.0, "learning_rate": 8.52601445645173e-06, "loss": 0.059, "step": 20610 }, { "epoch": 0.147570314177342, "grad_norm": 0.0, "learning_rate": 8.525298790524585e-06, "loss": 0.0, "step": 20620 }, { "epoch": 0.14764188077005655, "grad_norm": 0.0, "learning_rate": 8.524583124597438e-06, "loss": 0.0, "step": 20630 }, { "epoch": 0.14771344736277106, "grad_norm": 0.0, "learning_rate": 8.523867458670294e-06, "loss": 0.0004, "step": 20640 }, { "epoch": 0.14778501395548557, "grad_norm": 0.0, "learning_rate": 8.523151792743149e-06, "loss": 0.0007, "step": 20650 }, { "epoch": 0.1478565805482001, "grad_norm": 0.0006032930104993284, "learning_rate": 8.522436126816003e-06, "loss": 1.6293, "step": 20660 }, { "epoch": 0.14792814714091462, "grad_norm": 0.0, "learning_rate": 8.521720460888858e-06, "loss": 0.0, "step": 20670 }, { "epoch": 0.14799971373362913, "grad_norm": 0.0019408793887123466, "learning_rate": 8.521004794961713e-06, "loss": 0.0011, "step": 20680 }, { "epoch": 0.14807128032634367, "grad_norm": 2.1266401972752647e-09, "learning_rate": 8.520289129034567e-06, "loss": 0.0019, "step": 20690 }, { "epoch": 0.14814284691905819, "grad_norm": 0.0, "learning_rate": 8.519573463107422e-06, "loss": 0.0017, "step": 20700 }, { "epoch": 0.1482144135117727, "grad_norm": 0.004775660112500191, "learning_rate": 8.518857797180277e-06, "loss": 0.0, "step": 20710 }, { "epoch": 0.14828598010448724, "grad_norm": 1.6176741155504715e-06, "learning_rate": 8.518142131253133e-06, "loss": 0.0, "step": 20720 }, { "epoch": 0.14835754669720175, "grad_norm": 0.0, "learning_rate": 8.517426465325986e-06, "loss": 0.0, "step": 20730 }, { "epoch": 0.14842911328991626, "grad_norm": 0.0, "learning_rate": 8.516710799398842e-06, "loss": 0.0, "step": 20740 }, { "epoch": 0.1485006798826308, "grad_norm": 0.13109275698661804, "learning_rate": 8.515995133471697e-06, "loss": 0.0, "step": 20750 }, { "epoch": 0.1485722464753453, "grad_norm": 35.47887420654297, "learning_rate": 8.51527946754455e-06, "loss": 0.0071, "step": 20760 }, { "epoch": 0.14864381306805982, "grad_norm": 0.0, "learning_rate": 8.514563801617406e-06, "loss": 0.0068, "step": 20770 }, { "epoch": 0.14871537966077436, "grad_norm": 4.1115108539990786e-10, "learning_rate": 8.513848135690261e-06, "loss": 0.0, "step": 20780 }, { "epoch": 0.14878694625348887, "grad_norm": 0.006486569531261921, "learning_rate": 8.513132469763116e-06, "loss": 0.0002, "step": 20790 }, { "epoch": 0.14885851284620338, "grad_norm": 0.0, "learning_rate": 8.51241680383597e-06, "loss": 0.0, "step": 20800 }, { "epoch": 0.14893007943891792, "grad_norm": 0.0, "learning_rate": 8.511701137908824e-06, "loss": 0.0, "step": 20810 }, { "epoch": 0.14900164603163243, "grad_norm": 8.754165725655128e-10, "learning_rate": 8.51098547198168e-06, "loss": 0.0, "step": 20820 }, { "epoch": 0.14907321262434695, "grad_norm": 0.0, "learning_rate": 8.510269806054534e-06, "loss": 0.0, "step": 20830 }, { "epoch": 0.14914477921706148, "grad_norm": 0.1052863597869873, "learning_rate": 8.50955414012739e-06, "loss": 0.0, "step": 20840 }, { "epoch": 0.149216345809776, "grad_norm": 0.0, "learning_rate": 8.508838474200245e-06, "loss": 0.0008, "step": 20850 }, { "epoch": 0.1492879124024905, "grad_norm": 0.0, "learning_rate": 8.5081228082731e-06, "loss": 0.0, "step": 20860 }, { "epoch": 0.14935947899520505, "grad_norm": 0.0, "learning_rate": 8.507407142345954e-06, "loss": 0.0, "step": 20870 }, { "epoch": 0.14943104558791956, "grad_norm": 0.0, "learning_rate": 8.506691476418807e-06, "loss": 0.0, "step": 20880 }, { "epoch": 0.14950261218063407, "grad_norm": 0.0, "learning_rate": 8.505975810491664e-06, "loss": 0.0, "step": 20890 }, { "epoch": 0.1495741787733486, "grad_norm": 0.0, "learning_rate": 8.505260144564518e-06, "loss": 0.0, "step": 20900 }, { "epoch": 0.14964574536606312, "grad_norm": 0.008098677732050419, "learning_rate": 8.504544478637373e-06, "loss": 0.0, "step": 20910 }, { "epoch": 0.14971731195877763, "grad_norm": 0.0, "learning_rate": 8.503828812710228e-06, "loss": 0.0062, "step": 20920 }, { "epoch": 0.14978887855149217, "grad_norm": 0.0, "learning_rate": 8.503113146783082e-06, "loss": 0.0026, "step": 20930 }, { "epoch": 0.14986044514420668, "grad_norm": 0.0, "learning_rate": 8.502397480855937e-06, "loss": 0.0002, "step": 20940 }, { "epoch": 0.1499320117369212, "grad_norm": 2.09184804589313e-06, "learning_rate": 8.50168181492879e-06, "loss": 0.0, "step": 20950 }, { "epoch": 0.15000357832963573, "grad_norm": 0.0, "learning_rate": 8.500966149001648e-06, "loss": 0.3789, "step": 20960 }, { "epoch": 0.15007514492235025, "grad_norm": 2.432694600429386e-05, "learning_rate": 8.500250483074501e-06, "loss": 0.0, "step": 20970 }, { "epoch": 0.15014671151506476, "grad_norm": 0.0009306377032771707, "learning_rate": 8.499534817147357e-06, "loss": 0.0, "step": 20980 }, { "epoch": 0.1502182781077793, "grad_norm": 0.0, "learning_rate": 8.498819151220212e-06, "loss": 0.0, "step": 20990 }, { "epoch": 0.1502898447004938, "grad_norm": 0.0, "learning_rate": 8.498103485293066e-06, "loss": 0.0001, "step": 21000 }, { "epoch": 0.15036141129320832, "grad_norm": 0.0, "learning_rate": 8.497387819365921e-06, "loss": 0.0, "step": 21010 }, { "epoch": 0.15043297788592286, "grad_norm": 0.0, "learning_rate": 8.496672153438774e-06, "loss": 0.0, "step": 21020 }, { "epoch": 0.15050454447863737, "grad_norm": 0.0, "learning_rate": 8.495956487511631e-06, "loss": 0.0, "step": 21030 }, { "epoch": 0.15057611107135188, "grad_norm": 0.0, "learning_rate": 8.495240821584485e-06, "loss": 0.0, "step": 21040 }, { "epoch": 0.15064767766406642, "grad_norm": 0.0, "learning_rate": 8.494525155657339e-06, "loss": 0.0001, "step": 21050 }, { "epoch": 0.15071924425678093, "grad_norm": 0.0, "learning_rate": 8.493809489730194e-06, "loss": 0.0, "step": 21060 }, { "epoch": 0.15079081084949544, "grad_norm": 0.0, "learning_rate": 8.49309382380305e-06, "loss": 0.0033, "step": 21070 }, { "epoch": 0.15086237744220998, "grad_norm": 0.0, "learning_rate": 8.492378157875905e-06, "loss": 0.0, "step": 21080 }, { "epoch": 0.1509339440349245, "grad_norm": 0.0, "learning_rate": 8.491662491948758e-06, "loss": 0.1313, "step": 21090 }, { "epoch": 0.151005510627639, "grad_norm": 0.0, "learning_rate": 8.490946826021615e-06, "loss": 0.0, "step": 21100 }, { "epoch": 0.15107707722035355, "grad_norm": 9.897061090669013e-07, "learning_rate": 8.490231160094469e-06, "loss": 0.0001, "step": 21110 }, { "epoch": 0.15114864381306806, "grad_norm": 0.0, "learning_rate": 8.489515494167322e-06, "loss": 0.0, "step": 21120 }, { "epoch": 0.15122021040578257, "grad_norm": 0.0, "learning_rate": 8.488799828240178e-06, "loss": 0.0001, "step": 21130 }, { "epoch": 0.1512917769984971, "grad_norm": 0.0, "learning_rate": 8.488084162313033e-06, "loss": 0.0, "step": 21140 }, { "epoch": 0.15136334359121162, "grad_norm": 0.0, "learning_rate": 8.487368496385888e-06, "loss": 0.0, "step": 21150 }, { "epoch": 0.15143491018392613, "grad_norm": 0.0, "learning_rate": 8.486652830458742e-06, "loss": 0.0078, "step": 21160 }, { "epoch": 0.15150647677664067, "grad_norm": 0.0, "learning_rate": 8.485937164531597e-06, "loss": 0.0, "step": 21170 }, { "epoch": 0.15157804336935518, "grad_norm": 0.0, "learning_rate": 8.485221498604452e-06, "loss": 0.0041, "step": 21180 }, { "epoch": 0.1516496099620697, "grad_norm": 0.0, "learning_rate": 8.484505832677306e-06, "loss": 0.0, "step": 21190 }, { "epoch": 0.15172117655478423, "grad_norm": 5.417396096163429e-06, "learning_rate": 8.483790166750161e-06, "loss": 0.0, "step": 21200 }, { "epoch": 0.15179274314749874, "grad_norm": 5.425330300568021e-07, "learning_rate": 8.483074500823017e-06, "loss": 0.0, "step": 21210 }, { "epoch": 0.15186430974021325, "grad_norm": 1.236272169080621e-06, "learning_rate": 8.48235883489587e-06, "loss": 0.0, "step": 21220 }, { "epoch": 0.1519358763329278, "grad_norm": 2.020138254010817e-06, "learning_rate": 8.481643168968725e-06, "loss": 0.1095, "step": 21230 }, { "epoch": 0.1520074429256423, "grad_norm": 4.944779448123882e-07, "learning_rate": 8.48092750304158e-06, "loss": 0.0, "step": 21240 }, { "epoch": 0.15207900951835684, "grad_norm": 0.0, "learning_rate": 8.480211837114436e-06, "loss": 0.0001, "step": 21250 }, { "epoch": 0.15215057611107136, "grad_norm": 0.0, "learning_rate": 8.47949617118729e-06, "loss": 0.0, "step": 21260 }, { "epoch": 0.15222214270378587, "grad_norm": 7.0216055192418025e-09, "learning_rate": 8.478780505260145e-06, "loss": 0.0001, "step": 21270 }, { "epoch": 0.1522937092965004, "grad_norm": 0.020965714007616043, "learning_rate": 8.478064839333e-06, "loss": 0.0, "step": 21280 }, { "epoch": 0.15236527588921492, "grad_norm": 0.0, "learning_rate": 8.477349173405854e-06, "loss": 0.1534, "step": 21290 }, { "epoch": 0.15243684248192943, "grad_norm": 0.0, "learning_rate": 8.476633507478709e-06, "loss": 0.0, "step": 21300 }, { "epoch": 0.15250840907464397, "grad_norm": 0.0, "learning_rate": 8.475917841551564e-06, "loss": 0.0255, "step": 21310 }, { "epoch": 0.15257997566735848, "grad_norm": 0.00014626355550717562, "learning_rate": 8.47520217562442e-06, "loss": 0.0, "step": 21320 }, { "epoch": 0.152651542260073, "grad_norm": 0.11780455708503723, "learning_rate": 8.474486509697273e-06, "loss": 0.0, "step": 21330 }, { "epoch": 0.15272310885278753, "grad_norm": 0.0013855991419404745, "learning_rate": 8.473770843770129e-06, "loss": 0.0, "step": 21340 }, { "epoch": 0.15279467544550204, "grad_norm": 0.0, "learning_rate": 8.473055177842984e-06, "loss": 0.0, "step": 21350 }, { "epoch": 0.15286624203821655, "grad_norm": 0.0, "learning_rate": 8.472339511915837e-06, "loss": 0.0001, "step": 21360 }, { "epoch": 0.1529378086309311, "grad_norm": 4.806524622757991e-10, "learning_rate": 8.471623845988693e-06, "loss": 0.0009, "step": 21370 }, { "epoch": 0.1530093752236456, "grad_norm": 0.00024865276645869017, "learning_rate": 8.470908180061548e-06, "loss": 0.0017, "step": 21380 }, { "epoch": 0.15308094181636012, "grad_norm": 0.0, "learning_rate": 8.470192514134403e-06, "loss": 0.0, "step": 21390 }, { "epoch": 0.15315250840907466, "grad_norm": 0.0, "learning_rate": 8.469476848207257e-06, "loss": 0.009, "step": 21400 }, { "epoch": 0.15322407500178917, "grad_norm": 0.0, "learning_rate": 8.468832748872827e-06, "loss": 0.1288, "step": 21410 }, { "epoch": 0.15329564159450368, "grad_norm": 0.0, "learning_rate": 8.468117082945682e-06, "loss": 0.0, "step": 21420 }, { "epoch": 0.15336720818721822, "grad_norm": 0.0, "learning_rate": 8.467401417018536e-06, "loss": 0.0137, "step": 21430 }, { "epoch": 0.15343877477993273, "grad_norm": 0.0, "learning_rate": 8.466685751091391e-06, "loss": 0.0, "step": 21440 }, { "epoch": 0.15351034137264724, "grad_norm": 0.0, "learning_rate": 8.465970085164246e-06, "loss": 0.0, "step": 21450 }, { "epoch": 0.15358190796536178, "grad_norm": 0.0, "learning_rate": 8.465254419237101e-06, "loss": 0.0, "step": 21460 }, { "epoch": 0.1536534745580763, "grad_norm": 1.4109879202806042e-06, "learning_rate": 8.464538753309955e-06, "loss": 0.009, "step": 21470 }, { "epoch": 0.1537250411507908, "grad_norm": 2.1167632269225578e-07, "learning_rate": 8.46382308738281e-06, "loss": 0.0, "step": 21480 }, { "epoch": 0.15379660774350534, "grad_norm": 0.0, "learning_rate": 8.463107421455666e-06, "loss": 0.0, "step": 21490 }, { "epoch": 0.15386817433621985, "grad_norm": 0.0, "learning_rate": 8.46239175552852e-06, "loss": 0.0, "step": 21500 }, { "epoch": 0.15393974092893437, "grad_norm": 0.0, "learning_rate": 8.461676089601375e-06, "loss": 0.0, "step": 21510 }, { "epoch": 0.1540113075216489, "grad_norm": 0.0, "learning_rate": 8.46096042367423e-06, "loss": 0.0, "step": 21520 }, { "epoch": 0.15408287411436342, "grad_norm": 0.0, "learning_rate": 8.460244757747085e-06, "loss": 0.0, "step": 21530 }, { "epoch": 0.15415444070707793, "grad_norm": 0.0, "learning_rate": 8.459529091819939e-06, "loss": 0.0, "step": 21540 }, { "epoch": 0.15422600729979247, "grad_norm": 0.44836103916168213, "learning_rate": 8.458813425892794e-06, "loss": 0.0001, "step": 21550 }, { "epoch": 0.15429757389250698, "grad_norm": 0.0, "learning_rate": 8.45809775996565e-06, "loss": 0.0002, "step": 21560 }, { "epoch": 0.1543691404852215, "grad_norm": 0.0, "learning_rate": 8.457382094038503e-06, "loss": 0.0, "step": 21570 }, { "epoch": 0.15444070707793603, "grad_norm": 0.0, "learning_rate": 8.456666428111358e-06, "loss": 0.0, "step": 21580 }, { "epoch": 0.15451227367065054, "grad_norm": 0.0, "learning_rate": 8.455950762184213e-06, "loss": 0.0003, "step": 21590 }, { "epoch": 0.15458384026336505, "grad_norm": 0.0, "learning_rate": 8.455235096257069e-06, "loss": 0.0015, "step": 21600 }, { "epoch": 0.1546554068560796, "grad_norm": 0.0, "learning_rate": 8.454519430329922e-06, "loss": 0.0, "step": 21610 }, { "epoch": 0.1547269734487941, "grad_norm": 0.0, "learning_rate": 8.453803764402778e-06, "loss": 0.0, "step": 21620 }, { "epoch": 0.15479854004150861, "grad_norm": 0.0, "learning_rate": 8.453088098475633e-06, "loss": 0.0002, "step": 21630 }, { "epoch": 0.15487010663422315, "grad_norm": 0.0, "learning_rate": 8.452372432548487e-06, "loss": 0.0, "step": 21640 }, { "epoch": 0.15494167322693767, "grad_norm": 0.0, "learning_rate": 8.451656766621342e-06, "loss": 0.0004, "step": 21650 }, { "epoch": 0.15501323981965218, "grad_norm": 0.0, "learning_rate": 8.450941100694197e-06, "loss": 0.0, "step": 21660 }, { "epoch": 0.15508480641236672, "grad_norm": 959.629150390625, "learning_rate": 8.45022543476705e-06, "loss": 1.6657, "step": 21670 }, { "epoch": 0.15515637300508123, "grad_norm": 5.278836397337727e-05, "learning_rate": 8.449509768839906e-06, "loss": 0.0001, "step": 21680 }, { "epoch": 0.15522793959779574, "grad_norm": 0.0, "learning_rate": 8.448794102912761e-06, "loss": 0.0, "step": 21690 }, { "epoch": 0.15529950619051028, "grad_norm": 0.0, "learning_rate": 8.448078436985617e-06, "loss": 0.0853, "step": 21700 }, { "epoch": 0.1553710727832248, "grad_norm": 0.0, "learning_rate": 8.44736277105847e-06, "loss": 0.0, "step": 21710 }, { "epoch": 0.1554426393759393, "grad_norm": 0.0, "learning_rate": 8.446647105131326e-06, "loss": 0.0, "step": 21720 }, { "epoch": 0.15551420596865384, "grad_norm": 1.942508561114664e-06, "learning_rate": 8.44593143920418e-06, "loss": 0.0, "step": 21730 }, { "epoch": 0.15558577256136835, "grad_norm": 9.455726202034498e-10, "learning_rate": 8.445215773277034e-06, "loss": 0.0, "step": 21740 }, { "epoch": 0.15565733915408286, "grad_norm": 0.0, "learning_rate": 8.44450010734989e-06, "loss": 0.0, "step": 21750 }, { "epoch": 0.1557289057467974, "grad_norm": 1.1963996257691178e-05, "learning_rate": 8.443784441422745e-06, "loss": 0.0, "step": 21760 }, { "epoch": 0.15580047233951191, "grad_norm": 0.0, "learning_rate": 8.4430687754956e-06, "loss": 0.0, "step": 21770 }, { "epoch": 0.15587203893222643, "grad_norm": 0.0, "learning_rate": 8.442353109568454e-06, "loss": 0.0563, "step": 21780 }, { "epoch": 0.15594360552494096, "grad_norm": 0.0, "learning_rate": 8.44163744364131e-06, "loss": 0.006, "step": 21790 }, { "epoch": 0.15601517211765548, "grad_norm": 1.828082349675242e-05, "learning_rate": 8.440921777714164e-06, "loss": 0.2755, "step": 21800 }, { "epoch": 0.15608673871037, "grad_norm": 0.0, "learning_rate": 8.440206111787018e-06, "loss": 0.0, "step": 21810 }, { "epoch": 0.15615830530308453, "grad_norm": 0.0, "learning_rate": 8.439490445859873e-06, "loss": 0.0001, "step": 21820 }, { "epoch": 0.15622987189579904, "grad_norm": 0.0, "learning_rate": 8.438774779932729e-06, "loss": 0.0015, "step": 21830 }, { "epoch": 0.15630143848851355, "grad_norm": 0.0, "learning_rate": 8.438059114005584e-06, "loss": 0.0, "step": 21840 }, { "epoch": 0.1563730050812281, "grad_norm": 0.0, "learning_rate": 8.437343448078438e-06, "loss": 0.0, "step": 21850 }, { "epoch": 0.1564445716739426, "grad_norm": 0.0, "learning_rate": 8.436627782151293e-06, "loss": 0.0, "step": 21860 }, { "epoch": 0.1565161382666571, "grad_norm": 0.0, "learning_rate": 8.435912116224148e-06, "loss": 0.0, "step": 21870 }, { "epoch": 0.15658770485937165, "grad_norm": 4.629396033184463e-10, "learning_rate": 8.435196450297002e-06, "loss": 0.4902, "step": 21880 }, { "epoch": 0.15665927145208616, "grad_norm": 0.0, "learning_rate": 8.434480784369857e-06, "loss": 0.0015, "step": 21890 }, { "epoch": 0.15673083804480067, "grad_norm": 0.0, "learning_rate": 8.433765118442712e-06, "loss": 0.0, "step": 21900 }, { "epoch": 0.1568024046375152, "grad_norm": 0.0, "learning_rate": 8.433049452515566e-06, "loss": 0.0, "step": 21910 }, { "epoch": 0.15687397123022973, "grad_norm": 5.983375928053647e-08, "learning_rate": 8.432333786588421e-06, "loss": 0.0, "step": 21920 }, { "epoch": 0.15694553782294424, "grad_norm": 4.488745208863776e-10, "learning_rate": 8.431618120661276e-06, "loss": 0.0, "step": 21930 }, { "epoch": 0.15701710441565878, "grad_norm": 0.0, "learning_rate": 8.430902454734132e-06, "loss": 0.0, "step": 21940 }, { "epoch": 0.1570886710083733, "grad_norm": 0.0, "learning_rate": 8.430186788806985e-06, "loss": 0.0, "step": 21950 }, { "epoch": 0.1571602376010878, "grad_norm": 0.0, "learning_rate": 8.42947112287984e-06, "loss": 0.0, "step": 21960 }, { "epoch": 0.15723180419380234, "grad_norm": 0.0, "learning_rate": 8.428755456952696e-06, "loss": 0.0006, "step": 21970 }, { "epoch": 0.15730337078651685, "grad_norm": 4.4246789565605127e-10, "learning_rate": 8.42803979102555e-06, "loss": 0.0, "step": 21980 }, { "epoch": 0.15737493737923136, "grad_norm": 491.2293701171875, "learning_rate": 8.427324125098405e-06, "loss": 0.2594, "step": 21990 }, { "epoch": 0.1574465039719459, "grad_norm": 0.0, "learning_rate": 8.42660845917126e-06, "loss": 0.0, "step": 22000 }, { "epoch": 0.1575180705646604, "grad_norm": 6.651168860116741e-07, "learning_rate": 8.425892793244115e-06, "loss": 0.0, "step": 22010 }, { "epoch": 0.15758963715737495, "grad_norm": 5.021152382767013e-10, "learning_rate": 8.425177127316969e-06, "loss": 0.0, "step": 22020 }, { "epoch": 0.15766120375008946, "grad_norm": 2.0699630898235455e-09, "learning_rate": 8.424461461389824e-06, "loss": 0.0, "step": 22030 }, { "epoch": 0.15773277034280397, "grad_norm": 0.0, "learning_rate": 8.42374579546268e-06, "loss": 0.0, "step": 22040 }, { "epoch": 0.1578043369355185, "grad_norm": 0.0, "learning_rate": 8.423030129535533e-06, "loss": 0.0, "step": 22050 }, { "epoch": 0.15787590352823302, "grad_norm": 0.0, "learning_rate": 8.422314463608388e-06, "loss": 0.0, "step": 22060 }, { "epoch": 0.15794747012094754, "grad_norm": 0.0, "learning_rate": 8.421598797681244e-06, "loss": 0.0, "step": 22070 }, { "epoch": 0.15801903671366208, "grad_norm": 0.0, "learning_rate": 8.420883131754097e-06, "loss": 0.0, "step": 22080 }, { "epoch": 0.1580906033063766, "grad_norm": 411.0445251464844, "learning_rate": 8.420167465826953e-06, "loss": 0.1805, "step": 22090 }, { "epoch": 0.1581621698990911, "grad_norm": 0.0, "learning_rate": 8.419451799899806e-06, "loss": 0.0, "step": 22100 }, { "epoch": 0.15823373649180564, "grad_norm": 0.0, "learning_rate": 8.418736133972663e-06, "loss": 0.0, "step": 22110 }, { "epoch": 0.15830530308452015, "grad_norm": 6.02587491016493e-09, "learning_rate": 8.418020468045517e-06, "loss": 0.0001, "step": 22120 }, { "epoch": 0.15837686967723466, "grad_norm": 0.0, "learning_rate": 8.417304802118372e-06, "loss": 0.0, "step": 22130 }, { "epoch": 0.1584484362699492, "grad_norm": 0.0, "learning_rate": 8.416589136191227e-06, "loss": 0.0, "step": 22140 }, { "epoch": 0.1585200028626637, "grad_norm": 0.11107701808214188, "learning_rate": 8.415873470264081e-06, "loss": 0.0, "step": 22150 }, { "epoch": 0.15859156945537822, "grad_norm": 0.0, "learning_rate": 8.415157804336936e-06, "loss": 0.0051, "step": 22160 }, { "epoch": 0.15866313604809276, "grad_norm": 0.004110316745936871, "learning_rate": 8.41444213840979e-06, "loss": 0.0, "step": 22170 }, { "epoch": 0.15873470264080727, "grad_norm": 1.3920498531660996e-06, "learning_rate": 8.413726472482647e-06, "loss": 0.0, "step": 22180 }, { "epoch": 0.15880626923352179, "grad_norm": 0.0, "learning_rate": 8.4130108065555e-06, "loss": 0.0, "step": 22190 }, { "epoch": 0.15887783582623632, "grad_norm": 0.0, "learning_rate": 8.412295140628354e-06, "loss": 0.0001, "step": 22200 }, { "epoch": 0.15894940241895084, "grad_norm": 0.0, "learning_rate": 8.411579474701211e-06, "loss": 0.0, "step": 22210 }, { "epoch": 0.15902096901166535, "grad_norm": 0.0, "learning_rate": 8.410863808774065e-06, "loss": 0.0, "step": 22220 }, { "epoch": 0.1590925356043799, "grad_norm": 0.0, "learning_rate": 8.41014814284692e-06, "loss": 0.0, "step": 22230 }, { "epoch": 0.1591641021970944, "grad_norm": 0.0, "learning_rate": 8.409432476919774e-06, "loss": 0.0, "step": 22240 }, { "epoch": 0.1592356687898089, "grad_norm": 0.0, "learning_rate": 8.40871681099263e-06, "loss": 0.0, "step": 22250 }, { "epoch": 0.15930723538252345, "grad_norm": 0.0, "learning_rate": 8.408001145065484e-06, "loss": 0.0, "step": 22260 }, { "epoch": 0.15937880197523796, "grad_norm": 0.0, "learning_rate": 8.407285479138338e-06, "loss": 0.0006, "step": 22270 }, { "epoch": 0.15945036856795247, "grad_norm": 7.758846186334267e-05, "learning_rate": 8.406569813211195e-06, "loss": 0.0, "step": 22280 }, { "epoch": 0.159521935160667, "grad_norm": 0.0, "learning_rate": 8.405854147284048e-06, "loss": 0.0, "step": 22290 }, { "epoch": 0.15959350175338152, "grad_norm": 0.0, "learning_rate": 8.405138481356904e-06, "loss": 0.0, "step": 22300 }, { "epoch": 0.15966506834609603, "grad_norm": 0.0, "learning_rate": 8.404422815429757e-06, "loss": 0.0, "step": 22310 }, { "epoch": 0.15973663493881057, "grad_norm": 4.672525144577833e-10, "learning_rate": 8.403707149502613e-06, "loss": 0.001, "step": 22320 }, { "epoch": 0.15980820153152508, "grad_norm": 0.0, "learning_rate": 8.402991483575468e-06, "loss": 0.0, "step": 22330 }, { "epoch": 0.1598797681242396, "grad_norm": 1.0358789381825773e-09, "learning_rate": 8.402275817648321e-06, "loss": 0.0, "step": 22340 }, { "epoch": 0.15995133471695414, "grad_norm": 0.0, "learning_rate": 8.401560151721178e-06, "loss": 0.0117, "step": 22350 }, { "epoch": 0.16002290130966865, "grad_norm": 0.0, "learning_rate": 8.400916052386746e-06, "loss": 0.6234, "step": 22360 }, { "epoch": 0.16009446790238316, "grad_norm": 0.0, "learning_rate": 8.400200386459602e-06, "loss": 0.0, "step": 22370 }, { "epoch": 0.1601660344950977, "grad_norm": 128.09841918945312, "learning_rate": 8.399484720532455e-06, "loss": 0.0225, "step": 22380 }, { "epoch": 0.1602376010878122, "grad_norm": 0.0, "learning_rate": 8.39876905460531e-06, "loss": 0.0, "step": 22390 }, { "epoch": 0.16030916768052672, "grad_norm": 0.00010578356159385294, "learning_rate": 8.398053388678166e-06, "loss": 0.0, "step": 22400 }, { "epoch": 0.16038073427324126, "grad_norm": 0.0, "learning_rate": 8.39733772275102e-06, "loss": 0.0, "step": 22410 }, { "epoch": 0.16045230086595577, "grad_norm": 0.0, "learning_rate": 8.396622056823875e-06, "loss": 0.0, "step": 22420 }, { "epoch": 0.16052386745867028, "grad_norm": 0.0, "learning_rate": 8.39590639089673e-06, "loss": 0.2543, "step": 22430 }, { "epoch": 0.16059543405138482, "grad_norm": 2.3869202777859755e-05, "learning_rate": 8.395190724969585e-06, "loss": 0.0, "step": 22440 }, { "epoch": 0.16066700064409933, "grad_norm": 0.0, "learning_rate": 8.394475059042439e-06, "loss": 0.0, "step": 22450 }, { "epoch": 0.16073856723681385, "grad_norm": 0.0, "learning_rate": 8.393759393115294e-06, "loss": 0.0, "step": 22460 }, { "epoch": 0.16081013382952838, "grad_norm": 0.0, "learning_rate": 8.39304372718815e-06, "loss": 0.0, "step": 22470 }, { "epoch": 0.1608817004222429, "grad_norm": 1.7201255104737356e-05, "learning_rate": 8.392328061261003e-06, "loss": 0.0, "step": 22480 }, { "epoch": 0.1609532670149574, "grad_norm": 111.2472152709961, "learning_rate": 8.391612395333858e-06, "loss": 0.0197, "step": 22490 }, { "epoch": 0.16102483360767195, "grad_norm": 0.0, "learning_rate": 8.390896729406714e-06, "loss": 0.0, "step": 22500 }, { "epoch": 0.16109640020038646, "grad_norm": 0.0, "learning_rate": 8.390181063479569e-06, "loss": 0.0, "step": 22510 }, { "epoch": 0.16116796679310097, "grad_norm": 0.0, "learning_rate": 8.389465397552423e-06, "loss": 0.001, "step": 22520 }, { "epoch": 0.1612395333858155, "grad_norm": 0.0003410393837839365, "learning_rate": 8.388749731625278e-06, "loss": 0.0, "step": 22530 }, { "epoch": 0.16131109997853002, "grad_norm": 0.0, "learning_rate": 8.388034065698133e-06, "loss": 0.0001, "step": 22540 }, { "epoch": 0.16138266657124453, "grad_norm": 0.0, "learning_rate": 8.387318399770987e-06, "loss": 0.0002, "step": 22550 }, { "epoch": 0.16145423316395907, "grad_norm": 0.0, "learning_rate": 8.386602733843842e-06, "loss": 0.0, "step": 22560 }, { "epoch": 0.16152579975667358, "grad_norm": 6.727995787514374e-05, "learning_rate": 8.385887067916697e-06, "loss": 0.0001, "step": 22570 }, { "epoch": 0.1615973663493881, "grad_norm": 1.1106036481578485e-06, "learning_rate": 8.385171401989553e-06, "loss": 0.0, "step": 22580 }, { "epoch": 0.16166893294210263, "grad_norm": 0.0, "learning_rate": 8.384455736062406e-06, "loss": 0.0001, "step": 22590 }, { "epoch": 0.16174049953481714, "grad_norm": 5.577736033046676e-07, "learning_rate": 8.383740070135262e-06, "loss": 0.0001, "step": 22600 }, { "epoch": 0.16181206612753166, "grad_norm": 5.395155611154223e-08, "learning_rate": 8.383024404208117e-06, "loss": 0.0001, "step": 22610 }, { "epoch": 0.1618836327202462, "grad_norm": 0.0, "learning_rate": 8.38230873828097e-06, "loss": 0.0, "step": 22620 }, { "epoch": 0.1619551993129607, "grad_norm": 6.704055704176426e-05, "learning_rate": 8.381593072353826e-06, "loss": 0.0, "step": 22630 }, { "epoch": 0.16202676590567522, "grad_norm": 0.0, "learning_rate": 8.380877406426681e-06, "loss": 0.0, "step": 22640 }, { "epoch": 0.16209833249838976, "grad_norm": 0.009534135460853577, "learning_rate": 8.380161740499535e-06, "loss": 0.0, "step": 22650 }, { "epoch": 0.16216989909110427, "grad_norm": 0.0, "learning_rate": 8.37944607457239e-06, "loss": 0.0043, "step": 22660 }, { "epoch": 0.16224146568381878, "grad_norm": 0.0, "learning_rate": 8.378730408645245e-06, "loss": 0.0012, "step": 22670 }, { "epoch": 0.16231303227653332, "grad_norm": 0.09624243527650833, "learning_rate": 8.3780147427181e-06, "loss": 0.0, "step": 22680 }, { "epoch": 0.16238459886924783, "grad_norm": 0.0, "learning_rate": 8.377299076790954e-06, "loss": 0.0001, "step": 22690 }, { "epoch": 0.16245616546196234, "grad_norm": 0.0, "learning_rate": 8.37658341086381e-06, "loss": 0.0003, "step": 22700 }, { "epoch": 0.16252773205467688, "grad_norm": 0.0, "learning_rate": 8.375867744936665e-06, "loss": 0.0, "step": 22710 }, { "epoch": 0.1625992986473914, "grad_norm": 0.0, "learning_rate": 8.375152079009518e-06, "loss": 0.2979, "step": 22720 }, { "epoch": 0.1626708652401059, "grad_norm": 1.3288853551784996e-05, "learning_rate": 8.374436413082374e-06, "loss": 0.0, "step": 22730 }, { "epoch": 0.16274243183282044, "grad_norm": 0.0, "learning_rate": 8.373720747155229e-06, "loss": 0.0002, "step": 22740 }, { "epoch": 0.16281399842553496, "grad_norm": 0.0, "learning_rate": 8.373005081228084e-06, "loss": 0.0, "step": 22750 }, { "epoch": 0.16288556501824947, "grad_norm": 0.0, "learning_rate": 8.372289415300938e-06, "loss": 0.4633, "step": 22760 }, { "epoch": 0.162957131610964, "grad_norm": 0.0, "learning_rate": 8.371573749373793e-06, "loss": 0.0, "step": 22770 }, { "epoch": 0.16302869820367852, "grad_norm": 0.005591914057731628, "learning_rate": 8.370858083446648e-06, "loss": 0.0081, "step": 22780 }, { "epoch": 0.16310026479639306, "grad_norm": 0.0, "learning_rate": 8.370142417519502e-06, "loss": 0.0, "step": 22790 }, { "epoch": 0.16317183138910757, "grad_norm": 0.0, "learning_rate": 8.369426751592357e-06, "loss": 0.0, "step": 22800 }, { "epoch": 0.16324339798182208, "grad_norm": 0.0, "learning_rate": 8.368711085665213e-06, "loss": 0.0, "step": 22810 }, { "epoch": 0.16331496457453662, "grad_norm": 0.0, "learning_rate": 8.367995419738068e-06, "loss": 0.0, "step": 22820 }, { "epoch": 0.16338653116725113, "grad_norm": 0.0, "learning_rate": 8.367279753810921e-06, "loss": 0.0, "step": 22830 }, { "epoch": 0.16345809775996564, "grad_norm": 0.0, "learning_rate": 8.366564087883777e-06, "loss": 0.0075, "step": 22840 }, { "epoch": 0.16352966435268018, "grad_norm": 9.563306813120676e-10, "learning_rate": 8.365848421956632e-06, "loss": 0.0327, "step": 22850 }, { "epoch": 0.1636012309453947, "grad_norm": 0.0, "learning_rate": 8.365132756029486e-06, "loss": 0.1978, "step": 22860 }, { "epoch": 0.1636727975381092, "grad_norm": 0.0, "learning_rate": 8.364417090102341e-06, "loss": 0.0, "step": 22870 }, { "epoch": 0.16374436413082374, "grad_norm": 0.0, "learning_rate": 8.363701424175196e-06, "loss": 0.0, "step": 22880 }, { "epoch": 0.16381593072353826, "grad_norm": 0.0002738672192208469, "learning_rate": 8.36298575824805e-06, "loss": 0.0, "step": 22890 }, { "epoch": 0.16388749731625277, "grad_norm": 0.0, "learning_rate": 8.362270092320905e-06, "loss": 0.0, "step": 22900 }, { "epoch": 0.1639590639089673, "grad_norm": 0.0, "learning_rate": 8.36155442639376e-06, "loss": 0.0, "step": 22910 }, { "epoch": 0.16403063050168182, "grad_norm": 0.0, "learning_rate": 8.360838760466616e-06, "loss": 0.0014, "step": 22920 }, { "epoch": 0.16410219709439633, "grad_norm": 0.0, "learning_rate": 8.36012309453947e-06, "loss": 0.0, "step": 22930 }, { "epoch": 0.16417376368711087, "grad_norm": 8.033402442932129, "learning_rate": 8.359407428612325e-06, "loss": 0.0021, "step": 22940 }, { "epoch": 0.16424533027982538, "grad_norm": 0.0, "learning_rate": 8.35869176268518e-06, "loss": 0.0, "step": 22950 }, { "epoch": 0.1643168968725399, "grad_norm": 0.0, "learning_rate": 8.357976096758033e-06, "loss": 0.0, "step": 22960 }, { "epoch": 0.16438846346525443, "grad_norm": 18.452672958374023, "learning_rate": 8.357260430830889e-06, "loss": 0.0334, "step": 22970 }, { "epoch": 0.16446003005796894, "grad_norm": 0.0008306886302307248, "learning_rate": 8.356544764903744e-06, "loss": 0.0, "step": 22980 }, { "epoch": 0.16453159665068345, "grad_norm": 0.00011256369180046022, "learning_rate": 8.3558290989766e-06, "loss": 0.3296, "step": 22990 }, { "epoch": 0.164603163243398, "grad_norm": 0.0, "learning_rate": 8.355113433049453e-06, "loss": 0.0, "step": 23000 }, { "epoch": 0.1646747298361125, "grad_norm": 0.0, "learning_rate": 8.354397767122308e-06, "loss": 0.0, "step": 23010 }, { "epoch": 0.16474629642882702, "grad_norm": 0.0, "learning_rate": 8.353682101195164e-06, "loss": 0.0, "step": 23020 }, { "epoch": 0.16481786302154156, "grad_norm": 0.00016025523655116558, "learning_rate": 8.352966435268017e-06, "loss": 0.0, "step": 23030 }, { "epoch": 0.16488942961425607, "grad_norm": 0.0, "learning_rate": 8.352250769340872e-06, "loss": 0.0, "step": 23040 }, { "epoch": 0.16496099620697058, "grad_norm": 0.0, "learning_rate": 8.351535103413728e-06, "loss": 0.0, "step": 23050 }, { "epoch": 0.16503256279968512, "grad_norm": 0.0, "learning_rate": 8.350819437486581e-06, "loss": 0.0, "step": 23060 }, { "epoch": 0.16510412939239963, "grad_norm": 0.0, "learning_rate": 8.350103771559437e-06, "loss": 0.0, "step": 23070 }, { "epoch": 0.16517569598511414, "grad_norm": 0.0, "learning_rate": 8.349388105632292e-06, "loss": 0.7531, "step": 23080 }, { "epoch": 0.16524726257782868, "grad_norm": 3.5727410316467285, "learning_rate": 8.348672439705147e-06, "loss": 0.0007, "step": 23090 }, { "epoch": 0.1653188291705432, "grad_norm": 0.0, "learning_rate": 8.347956773778e-06, "loss": 0.0, "step": 23100 }, { "epoch": 0.1653903957632577, "grad_norm": 0.0, "learning_rate": 8.347241107850856e-06, "loss": 0.0001, "step": 23110 }, { "epoch": 0.16546196235597224, "grad_norm": 0.0, "learning_rate": 8.346525441923711e-06, "loss": 0.293, "step": 23120 }, { "epoch": 0.16553352894868675, "grad_norm": 0.0, "learning_rate": 8.345809775996565e-06, "loss": 0.0, "step": 23130 }, { "epoch": 0.16560509554140126, "grad_norm": 0.0, "learning_rate": 8.34509411006942e-06, "loss": 0.0002, "step": 23140 }, { "epoch": 0.1656766621341158, "grad_norm": 1.1026470019714907e-05, "learning_rate": 8.344378444142276e-06, "loss": 0.0, "step": 23150 }, { "epoch": 0.16574822872683032, "grad_norm": 0.0, "learning_rate": 8.34366277821513e-06, "loss": 0.0, "step": 23160 }, { "epoch": 0.16581979531954483, "grad_norm": 1.882836477307137e-05, "learning_rate": 8.342947112287984e-06, "loss": 0.0, "step": 23170 }, { "epoch": 0.16589136191225937, "grad_norm": 0.0, "learning_rate": 8.34223144636084e-06, "loss": 0.0, "step": 23180 }, { "epoch": 0.16596292850497388, "grad_norm": 0.0, "learning_rate": 8.341515780433695e-06, "loss": 0.0, "step": 23190 }, { "epoch": 0.1660344950976884, "grad_norm": 0.0, "learning_rate": 8.340800114506549e-06, "loss": 0.0002, "step": 23200 }, { "epoch": 0.16610606169040293, "grad_norm": 0.002663541352376342, "learning_rate": 8.340084448579404e-06, "loss": 0.0, "step": 23210 }, { "epoch": 0.16617762828311744, "grad_norm": 3.931564549475297e-07, "learning_rate": 8.33936878265226e-06, "loss": 0.0046, "step": 23220 }, { "epoch": 0.16624919487583195, "grad_norm": 0.0, "learning_rate": 8.338653116725114e-06, "loss": 0.0, "step": 23230 }, { "epoch": 0.1663207614685465, "grad_norm": 0.0, "learning_rate": 8.337937450797968e-06, "loss": 0.0, "step": 23240 }, { "epoch": 0.166392328061261, "grad_norm": 0.0, "learning_rate": 8.337221784870823e-06, "loss": 0.0, "step": 23250 }, { "epoch": 0.1664638946539755, "grad_norm": 0.0002933366340585053, "learning_rate": 8.336506118943679e-06, "loss": 0.0753, "step": 23260 }, { "epoch": 0.16653546124669005, "grad_norm": 872.1050415039062, "learning_rate": 8.335790453016532e-06, "loss": 0.4486, "step": 23270 }, { "epoch": 0.16660702783940456, "grad_norm": 0.0012715521734207869, "learning_rate": 8.335074787089388e-06, "loss": 0.0, "step": 23280 }, { "epoch": 0.16667859443211908, "grad_norm": 0.0, "learning_rate": 8.334359121162243e-06, "loss": 0.0147, "step": 23290 }, { "epoch": 0.16675016102483362, "grad_norm": 0.0, "learning_rate": 8.333643455235096e-06, "loss": 0.0, "step": 23300 }, { "epoch": 0.16682172761754813, "grad_norm": 9.73777614099447e-10, "learning_rate": 8.332927789307952e-06, "loss": 0.0, "step": 23310 }, { "epoch": 0.16689329421026264, "grad_norm": 0.0, "learning_rate": 8.332212123380807e-06, "loss": 0.0, "step": 23320 }, { "epoch": 0.16696486080297718, "grad_norm": 0.0, "learning_rate": 8.331496457453662e-06, "loss": 0.0, "step": 23330 }, { "epoch": 0.1670364273956917, "grad_norm": 0.0, "learning_rate": 8.330780791526516e-06, "loss": 0.0, "step": 23340 }, { "epoch": 0.1671079939884062, "grad_norm": 0.0, "learning_rate": 8.330065125599371e-06, "loss": 0.2896, "step": 23350 }, { "epoch": 0.16717956058112074, "grad_norm": 0.0, "learning_rate": 8.329349459672226e-06, "loss": 0.0003, "step": 23360 }, { "epoch": 0.16725112717383525, "grad_norm": 0.0, "learning_rate": 8.32863379374508e-06, "loss": 0.0, "step": 23370 }, { "epoch": 0.16732269376654976, "grad_norm": 0.0, "learning_rate": 8.327918127817935e-06, "loss": 0.0049, "step": 23380 }, { "epoch": 0.1673942603592643, "grad_norm": 0.0, "learning_rate": 8.32720246189079e-06, "loss": 0.0085, "step": 23390 }, { "epoch": 0.1674658269519788, "grad_norm": 0.0, "learning_rate": 8.326486795963646e-06, "loss": 0.0002, "step": 23400 }, { "epoch": 0.16753739354469332, "grad_norm": 0.0, "learning_rate": 8.3257711300365e-06, "loss": 0.0012, "step": 23410 }, { "epoch": 0.16760896013740786, "grad_norm": 0.0, "learning_rate": 8.325055464109353e-06, "loss": 0.0, "step": 23420 }, { "epoch": 0.16768052673012238, "grad_norm": 0.0, "learning_rate": 8.32433979818221e-06, "loss": 0.0, "step": 23430 }, { "epoch": 0.1677520933228369, "grad_norm": 0.0, "learning_rate": 8.323624132255064e-06, "loss": 0.0, "step": 23440 }, { "epoch": 0.16782365991555143, "grad_norm": 0.15909229218959808, "learning_rate": 8.322908466327919e-06, "loss": 0.0001, "step": 23450 }, { "epoch": 0.16789522650826594, "grad_norm": 0.0, "learning_rate": 8.322192800400774e-06, "loss": 0.0, "step": 23460 }, { "epoch": 0.16796679310098045, "grad_norm": 12.818297386169434, "learning_rate": 8.32147713447363e-06, "loss": 0.0102, "step": 23470 }, { "epoch": 0.168038359693695, "grad_norm": 0.0, "learning_rate": 8.320761468546483e-06, "loss": 0.0, "step": 23480 }, { "epoch": 0.1681099262864095, "grad_norm": 0.0, "learning_rate": 8.320045802619337e-06, "loss": 0.0, "step": 23490 }, { "epoch": 0.168181492879124, "grad_norm": 5.412705395002604e-09, "learning_rate": 8.319330136692194e-06, "loss": 0.0, "step": 23500 }, { "epoch": 0.16825305947183855, "grad_norm": 0.0, "learning_rate": 8.318614470765047e-06, "loss": 0.0007, "step": 23510 }, { "epoch": 0.16832462606455306, "grad_norm": 0.0, "learning_rate": 8.317898804837903e-06, "loss": 0.7844, "step": 23520 }, { "epoch": 0.16839619265726757, "grad_norm": 8.249499416912442e-10, "learning_rate": 8.317183138910756e-06, "loss": 0.0008, "step": 23530 }, { "epoch": 0.1684677592499821, "grad_norm": 0.0, "learning_rate": 8.316467472983612e-06, "loss": 0.0, "step": 23540 }, { "epoch": 0.16853932584269662, "grad_norm": 0.0, "learning_rate": 8.315751807056467e-06, "loss": 0.0, "step": 23550 }, { "epoch": 0.16861089243541116, "grad_norm": 0.0, "learning_rate": 8.31503614112932e-06, "loss": 0.0156, "step": 23560 }, { "epoch": 0.16868245902812568, "grad_norm": 0.0, "learning_rate": 8.314320475202177e-06, "loss": 0.0, "step": 23570 }, { "epoch": 0.1687540256208402, "grad_norm": 0.0, "learning_rate": 8.313604809275031e-06, "loss": 0.0, "step": 23580 }, { "epoch": 0.16882559221355473, "grad_norm": 4.123256206512451, "learning_rate": 8.312889143347886e-06, "loss": 0.0009, "step": 23590 }, { "epoch": 0.16889715880626924, "grad_norm": 0.0, "learning_rate": 8.31217347742074e-06, "loss": 0.0033, "step": 23600 }, { "epoch": 0.16896872539898375, "grad_norm": 0.0, "learning_rate": 8.311457811493595e-06, "loss": 0.0, "step": 23610 }, { "epoch": 0.1690402919916983, "grad_norm": 1.6148061376952683e-07, "learning_rate": 8.31074214556645e-06, "loss": 0.0, "step": 23620 }, { "epoch": 0.1691118585844128, "grad_norm": 0.0, "learning_rate": 8.310026479639304e-06, "loss": 0.0, "step": 23630 }, { "epoch": 0.1691834251771273, "grad_norm": 0.0, "learning_rate": 8.309310813712161e-06, "loss": 0.2492, "step": 23640 }, { "epoch": 0.16925499176984185, "grad_norm": 0.0, "learning_rate": 8.308595147785015e-06, "loss": 0.0, "step": 23650 }, { "epoch": 0.16932655836255636, "grad_norm": 0.0, "learning_rate": 8.307879481857868e-06, "loss": 0.0, "step": 23660 }, { "epoch": 0.16939812495527087, "grad_norm": 0.0, "learning_rate": 8.307163815930724e-06, "loss": 0.0015, "step": 23670 }, { "epoch": 0.1694696915479854, "grad_norm": 0.08195120096206665, "learning_rate": 8.306448150003579e-06, "loss": 0.0, "step": 23680 }, { "epoch": 0.16954125814069992, "grad_norm": 0.0004926404799334705, "learning_rate": 8.305732484076434e-06, "loss": 0.0, "step": 23690 }, { "epoch": 0.16961282473341444, "grad_norm": 0.001085858908481896, "learning_rate": 8.305016818149288e-06, "loss": 0.0, "step": 23700 }, { "epoch": 0.16968439132612897, "grad_norm": 0.0, "learning_rate": 8.304301152222143e-06, "loss": 0.0, "step": 23710 }, { "epoch": 0.1697559579188435, "grad_norm": 0.0, "learning_rate": 8.303585486294998e-06, "loss": 0.0, "step": 23720 }, { "epoch": 0.169827524511558, "grad_norm": 0.0, "learning_rate": 8.302869820367852e-06, "loss": 0.0, "step": 23730 }, { "epoch": 0.16989909110427254, "grad_norm": 0.6452613472938538, "learning_rate": 8.302154154440707e-06, "loss": 0.0001, "step": 23740 }, { "epoch": 0.16997065769698705, "grad_norm": 0.0, "learning_rate": 8.301438488513563e-06, "loss": 0.0, "step": 23750 }, { "epoch": 0.17004222428970156, "grad_norm": 0.0, "learning_rate": 8.300722822586418e-06, "loss": 0.0, "step": 23760 }, { "epoch": 0.1701137908824161, "grad_norm": 0.0, "learning_rate": 8.300007156659271e-06, "loss": 0.0, "step": 23770 }, { "epoch": 0.1701853574751306, "grad_norm": 1.7179821952595375e-05, "learning_rate": 8.299291490732127e-06, "loss": 0.0004, "step": 23780 }, { "epoch": 0.17025692406784512, "grad_norm": 0.0, "learning_rate": 8.298575824804982e-06, "loss": 0.0, "step": 23790 }, { "epoch": 0.17032849066055966, "grad_norm": 0.0, "learning_rate": 8.297860158877836e-06, "loss": 0.0, "step": 23800 }, { "epoch": 0.17040005725327417, "grad_norm": 0.31370460987091064, "learning_rate": 8.297144492950691e-06, "loss": 0.0, "step": 23810 }, { "epoch": 0.17047162384598868, "grad_norm": 8.274758329207543e-06, "learning_rate": 8.296428827023546e-06, "loss": 0.0, "step": 23820 }, { "epoch": 0.17054319043870322, "grad_norm": 0.0, "learning_rate": 8.2957131610964e-06, "loss": 0.0, "step": 23830 }, { "epoch": 0.17061475703141774, "grad_norm": 0.0, "learning_rate": 8.294997495169255e-06, "loss": 0.0, "step": 23840 }, { "epoch": 0.17068632362413225, "grad_norm": 1.2551003010230488e-06, "learning_rate": 8.29428182924211e-06, "loss": 0.0003, "step": 23850 }, { "epoch": 0.17075789021684679, "grad_norm": 0.9532769918441772, "learning_rate": 8.293566163314966e-06, "loss": 1.6726, "step": 23860 }, { "epoch": 0.1708294568095613, "grad_norm": 5.7119578400488535e-08, "learning_rate": 8.29285049738782e-06, "loss": 0.0, "step": 23870 }, { "epoch": 0.1709010234022758, "grad_norm": 0.0, "learning_rate": 8.292134831460675e-06, "loss": 0.0, "step": 23880 }, { "epoch": 0.17097258999499035, "grad_norm": 0.0004061961080878973, "learning_rate": 8.29141916553353e-06, "loss": 0.0002, "step": 23890 }, { "epoch": 0.17104415658770486, "grad_norm": 0.0, "learning_rate": 8.290703499606383e-06, "loss": 0.0, "step": 23900 }, { "epoch": 0.17111572318041937, "grad_norm": 0.0, "learning_rate": 8.289987833679239e-06, "loss": 0.6563, "step": 23910 }, { "epoch": 0.1711872897731339, "grad_norm": 0.0, "learning_rate": 8.289272167752094e-06, "loss": 0.0, "step": 23920 }, { "epoch": 0.17125885636584842, "grad_norm": 4.842306999819357e-09, "learning_rate": 8.28855650182495e-06, "loss": 0.0, "step": 23930 }, { "epoch": 0.17133042295856293, "grad_norm": 0.0, "learning_rate": 8.287840835897803e-06, "loss": 0.0, "step": 23940 }, { "epoch": 0.17140198955127747, "grad_norm": 0.0, "learning_rate": 8.287125169970658e-06, "loss": 0.0, "step": 23950 }, { "epoch": 0.17147355614399198, "grad_norm": 8.758450076307156e-10, "learning_rate": 8.286409504043514e-06, "loss": 0.0, "step": 23960 }, { "epoch": 0.1715451227367065, "grad_norm": 1.6549948895772104e-06, "learning_rate": 8.285693838116367e-06, "loss": 0.0, "step": 23970 }, { "epoch": 0.17161668932942103, "grad_norm": 0.0, "learning_rate": 8.284978172189222e-06, "loss": 0.0004, "step": 23980 }, { "epoch": 0.17168825592213555, "grad_norm": 0.0, "learning_rate": 8.284262506262078e-06, "loss": 0.0, "step": 23990 }, { "epoch": 0.17175982251485006, "grad_norm": 0.0, "learning_rate": 8.283546840334933e-06, "loss": 0.0, "step": 24000 }, { "epoch": 0.1718313891075646, "grad_norm": 0.0, "learning_rate": 8.282831174407787e-06, "loss": 0.0255, "step": 24010 }, { "epoch": 0.1719029557002791, "grad_norm": 0.00014312425628304482, "learning_rate": 8.282115508480642e-06, "loss": 0.0165, "step": 24020 }, { "epoch": 0.17197452229299362, "grad_norm": 0.0, "learning_rate": 8.281399842553497e-06, "loss": 0.0, "step": 24030 }, { "epoch": 0.17204608888570816, "grad_norm": 8.785804675426334e-05, "learning_rate": 8.28068417662635e-06, "loss": 0.0, "step": 24040 }, { "epoch": 0.17211765547842267, "grad_norm": 8.149732457241043e-05, "learning_rate": 8.279968510699206e-06, "loss": 0.0, "step": 24050 }, { "epoch": 0.17218922207113718, "grad_norm": 0.0, "learning_rate": 8.279252844772061e-06, "loss": 0.0, "step": 24060 }, { "epoch": 0.17226078866385172, "grad_norm": 0.0, "learning_rate": 8.278537178844915e-06, "loss": 0.0008, "step": 24070 }, { "epoch": 0.17233235525656623, "grad_norm": 0.0, "learning_rate": 8.27782151291777e-06, "loss": 0.0002, "step": 24080 }, { "epoch": 0.17240392184928074, "grad_norm": 1.5111879110336304, "learning_rate": 8.277105846990626e-06, "loss": 0.0002, "step": 24090 }, { "epoch": 0.17247548844199528, "grad_norm": 0.0, "learning_rate": 8.27639018106348e-06, "loss": 0.0, "step": 24100 }, { "epoch": 0.1725470550347098, "grad_norm": 0.0, "learning_rate": 8.275674515136334e-06, "loss": 1.7219, "step": 24110 }, { "epoch": 0.1726186216274243, "grad_norm": 0.0, "learning_rate": 8.27495884920919e-06, "loss": 0.6578, "step": 24120 }, { "epoch": 0.17269018822013885, "grad_norm": 0.0, "learning_rate": 8.274243183282045e-06, "loss": 0.0, "step": 24130 }, { "epoch": 0.17276175481285336, "grad_norm": 0.0, "learning_rate": 8.273527517354899e-06, "loss": 0.0001, "step": 24140 }, { "epoch": 0.17283332140556787, "grad_norm": 0.001021871343255043, "learning_rate": 8.272811851427754e-06, "loss": 0.0, "step": 24150 }, { "epoch": 0.1729048879982824, "grad_norm": 0.0, "learning_rate": 8.27209618550061e-06, "loss": 0.0001, "step": 24160 }, { "epoch": 0.17297645459099692, "grad_norm": 0.0, "learning_rate": 8.271380519573464e-06, "loss": 0.0002, "step": 24170 }, { "epoch": 0.17304802118371143, "grad_norm": 0.0, "learning_rate": 8.270664853646318e-06, "loss": 0.0, "step": 24180 }, { "epoch": 0.17311958777642597, "grad_norm": 1.3472773389366921e-05, "learning_rate": 8.269949187719173e-06, "loss": 0.4555, "step": 24190 }, { "epoch": 0.17319115436914048, "grad_norm": 0.0, "learning_rate": 8.269233521792029e-06, "loss": 0.0, "step": 24200 }, { "epoch": 0.173262720961855, "grad_norm": 9.388843036362005e-10, "learning_rate": 8.268517855864882e-06, "loss": 0.0, "step": 24210 }, { "epoch": 0.17333428755456953, "grad_norm": 0.0, "learning_rate": 8.267802189937738e-06, "loss": 0.0, "step": 24220 }, { "epoch": 0.17340585414728404, "grad_norm": 0.1261035054922104, "learning_rate": 8.267086524010593e-06, "loss": 0.0001, "step": 24230 }, { "epoch": 0.17347742073999856, "grad_norm": 0.0, "learning_rate": 8.266370858083448e-06, "loss": 0.0, "step": 24240 }, { "epoch": 0.1735489873327131, "grad_norm": 0.0, "learning_rate": 8.265655192156302e-06, "loss": 0.0, "step": 24250 }, { "epoch": 0.1736205539254276, "grad_norm": 0.0, "learning_rate": 8.264939526229157e-06, "loss": 0.0, "step": 24260 }, { "epoch": 0.17369212051814212, "grad_norm": 5.4933418141445145e-05, "learning_rate": 8.264223860302012e-06, "loss": 0.0936, "step": 24270 }, { "epoch": 0.17376368711085666, "grad_norm": 0.0, "learning_rate": 8.263508194374866e-06, "loss": 0.0326, "step": 24280 }, { "epoch": 0.17383525370357117, "grad_norm": 0.0, "learning_rate": 8.262792528447721e-06, "loss": 0.0, "step": 24290 }, { "epoch": 0.17390682029628568, "grad_norm": 0.0, "learning_rate": 8.262076862520576e-06, "loss": 0.0, "step": 24300 }, { "epoch": 0.17397838688900022, "grad_norm": 0.0, "learning_rate": 8.26136119659343e-06, "loss": 0.1441, "step": 24310 }, { "epoch": 0.17404995348171473, "grad_norm": 0.0, "learning_rate": 8.260645530666285e-06, "loss": 0.0006, "step": 24320 }, { "epoch": 0.17412152007442927, "grad_norm": 0.0, "learning_rate": 8.25992986473914e-06, "loss": 0.0058, "step": 24330 }, { "epoch": 0.17419308666714378, "grad_norm": 0.0, "learning_rate": 8.259214198811996e-06, "loss": 0.0, "step": 24340 }, { "epoch": 0.1742646532598583, "grad_norm": 2.697984882615856e-06, "learning_rate": 8.25849853288485e-06, "loss": 0.0, "step": 24350 }, { "epoch": 0.17433621985257283, "grad_norm": 0.0, "learning_rate": 8.257782866957705e-06, "loss": 1.0984, "step": 24360 }, { "epoch": 0.17440778644528734, "grad_norm": 0.0, "learning_rate": 8.25706720103056e-06, "loss": 0.0, "step": 24370 }, { "epoch": 0.17447935303800186, "grad_norm": 0.0, "learning_rate": 8.256351535103414e-06, "loss": 0.0, "step": 24380 }, { "epoch": 0.1745509196307164, "grad_norm": 0.5841673016548157, "learning_rate": 8.255635869176269e-06, "loss": 0.0001, "step": 24390 }, { "epoch": 0.1746224862234309, "grad_norm": 0.051504358649253845, "learning_rate": 8.254920203249124e-06, "loss": 0.0, "step": 24400 }, { "epoch": 0.17469405281614542, "grad_norm": 2.463143118802691e-06, "learning_rate": 8.25420453732198e-06, "loss": 0.5459, "step": 24410 }, { "epoch": 0.17476561940885996, "grad_norm": 3.791160821914673, "learning_rate": 8.253488871394833e-06, "loss": 0.0007, "step": 24420 }, { "epoch": 0.17483718600157447, "grad_norm": 0.0, "learning_rate": 8.252773205467688e-06, "loss": 0.0, "step": 24430 }, { "epoch": 0.17490875259428898, "grad_norm": 0.0, "learning_rate": 8.252057539540544e-06, "loss": 0.0, "step": 24440 }, { "epoch": 0.17498031918700352, "grad_norm": 1.2164407792170095e-07, "learning_rate": 8.251341873613397e-06, "loss": 0.0, "step": 24450 }, { "epoch": 0.17505188577971803, "grad_norm": 0.0, "learning_rate": 8.250626207686253e-06, "loss": 0.0, "step": 24460 }, { "epoch": 0.17512345237243254, "grad_norm": 0.0, "learning_rate": 8.249910541759108e-06, "loss": 0.0, "step": 24470 }, { "epoch": 0.17519501896514708, "grad_norm": 0.0, "learning_rate": 8.249194875831962e-06, "loss": 0.0, "step": 24480 }, { "epoch": 0.1752665855578616, "grad_norm": 14.290202140808105, "learning_rate": 8.248479209904817e-06, "loss": 0.0045, "step": 24490 }, { "epoch": 0.1753381521505761, "grad_norm": 0.0, "learning_rate": 8.247763543977672e-06, "loss": 0.0, "step": 24500 }, { "epoch": 0.17540971874329064, "grad_norm": 0.1055423840880394, "learning_rate": 8.247047878050527e-06, "loss": 0.0, "step": 24510 }, { "epoch": 0.17548128533600515, "grad_norm": 0.0, "learning_rate": 8.246332212123381e-06, "loss": 0.0289, "step": 24520 }, { "epoch": 0.17555285192871967, "grad_norm": 0.0, "learning_rate": 8.245616546196236e-06, "loss": 0.0, "step": 24530 }, { "epoch": 0.1756244185214342, "grad_norm": 1.0484040302571884e-09, "learning_rate": 8.244900880269092e-06, "loss": 0.0001, "step": 24540 }, { "epoch": 0.17569598511414872, "grad_norm": 0.09496026486158371, "learning_rate": 8.244185214341945e-06, "loss": 0.0106, "step": 24550 }, { "epoch": 0.17576755170686323, "grad_norm": 0.018603676930069923, "learning_rate": 8.2434695484148e-06, "loss": 0.0007, "step": 24560 }, { "epoch": 0.17583911829957777, "grad_norm": 0.0, "learning_rate": 8.242753882487656e-06, "loss": 0.0, "step": 24570 }, { "epoch": 0.17591068489229228, "grad_norm": 0.0002548196353018284, "learning_rate": 8.242038216560511e-06, "loss": 0.055, "step": 24580 }, { "epoch": 0.1759822514850068, "grad_norm": 0.0, "learning_rate": 8.241322550633365e-06, "loss": 0.0, "step": 24590 }, { "epoch": 0.17605381807772133, "grad_norm": 0.00021324504632502794, "learning_rate": 8.24060688470622e-06, "loss": 0.0001, "step": 24600 }, { "epoch": 0.17612538467043584, "grad_norm": 0.0, "learning_rate": 8.239891218779075e-06, "loss": 0.0, "step": 24610 }, { "epoch": 0.17619695126315035, "grad_norm": 0.0, "learning_rate": 8.239175552851929e-06, "loss": 0.0, "step": 24620 }, { "epoch": 0.1762685178558649, "grad_norm": 4.994242241984637e-10, "learning_rate": 8.238459886924784e-06, "loss": 0.0, "step": 24630 }, { "epoch": 0.1763400844485794, "grad_norm": 0.0, "learning_rate": 8.23774422099764e-06, "loss": 0.0, "step": 24640 }, { "epoch": 0.17641165104129392, "grad_norm": 0.0, "learning_rate": 8.237028555070495e-06, "loss": 0.0094, "step": 24650 }, { "epoch": 0.17648321763400845, "grad_norm": 0.0, "learning_rate": 8.236312889143348e-06, "loss": 0.1158, "step": 24660 }, { "epoch": 0.17655478422672297, "grad_norm": 0.0, "learning_rate": 8.235597223216204e-06, "loss": 0.0, "step": 24670 }, { "epoch": 0.17662635081943748, "grad_norm": 11.988869667053223, "learning_rate": 8.234881557289059e-06, "loss": 0.0019, "step": 24680 }, { "epoch": 0.17669791741215202, "grad_norm": 4.5792145075829183e-10, "learning_rate": 8.234165891361913e-06, "loss": 0.0008, "step": 24690 }, { "epoch": 0.17676948400486653, "grad_norm": 0.0, "learning_rate": 8.233450225434768e-06, "loss": 0.0, "step": 24700 }, { "epoch": 0.17684105059758104, "grad_norm": 0.0, "learning_rate": 8.232734559507623e-06, "loss": 0.0005, "step": 24710 }, { "epoch": 0.17691261719029558, "grad_norm": 0.06303052604198456, "learning_rate": 8.232018893580477e-06, "loss": 0.0025, "step": 24720 }, { "epoch": 0.1769841837830101, "grad_norm": 0.0, "learning_rate": 8.231303227653332e-06, "loss": 0.0, "step": 24730 }, { "epoch": 0.1770557503757246, "grad_norm": 0.0, "learning_rate": 8.230587561726187e-06, "loss": 0.0, "step": 24740 }, { "epoch": 0.17712731696843914, "grad_norm": 0.0, "learning_rate": 8.229871895799043e-06, "loss": 0.0, "step": 24750 }, { "epoch": 0.17719888356115365, "grad_norm": 3.590528194763465e-06, "learning_rate": 8.229156229871896e-06, "loss": 0.0, "step": 24760 }, { "epoch": 0.17727045015386816, "grad_norm": 0.0011969265760853887, "learning_rate": 8.228440563944751e-06, "loss": 0.0655, "step": 24770 }, { "epoch": 0.1773420167465827, "grad_norm": 0.0, "learning_rate": 8.227724898017607e-06, "loss": 0.0182, "step": 24780 }, { "epoch": 0.17741358333929722, "grad_norm": 0.0, "learning_rate": 8.22700923209046e-06, "loss": 0.0, "step": 24790 }, { "epoch": 0.17748514993201173, "grad_norm": 0.0, "learning_rate": 8.226293566163316e-06, "loss": 0.0, "step": 24800 }, { "epoch": 0.17755671652472627, "grad_norm": 0.004311597440391779, "learning_rate": 8.225577900236171e-06, "loss": 0.0, "step": 24810 }, { "epoch": 0.17762828311744078, "grad_norm": 0.0, "learning_rate": 8.224862234309026e-06, "loss": 0.0, "step": 24820 }, { "epoch": 0.1776998497101553, "grad_norm": 0.0, "learning_rate": 8.22414656838188e-06, "loss": 0.0003, "step": 24830 }, { "epoch": 0.17777141630286983, "grad_norm": 0.0002882831613533199, "learning_rate": 8.223430902454735e-06, "loss": 0.0, "step": 24840 }, { "epoch": 0.17784298289558434, "grad_norm": 0.0, "learning_rate": 8.22271523652759e-06, "loss": 0.0542, "step": 24850 }, { "epoch": 0.17791454948829885, "grad_norm": 0.0, "learning_rate": 8.221999570600444e-06, "loss": 0.0, "step": 24860 }, { "epoch": 0.1779861160810134, "grad_norm": 0.0, "learning_rate": 8.2212839046733e-06, "loss": 0.0, "step": 24870 }, { "epoch": 0.1780576826737279, "grad_norm": 0.0, "learning_rate": 8.220568238746155e-06, "loss": 0.0, "step": 24880 }, { "epoch": 0.1781292492664424, "grad_norm": 0.0, "learning_rate": 8.21985257281901e-06, "loss": 0.0066, "step": 24890 }, { "epoch": 0.17820081585915695, "grad_norm": 0.0, "learning_rate": 8.219136906891863e-06, "loss": 0.0, "step": 24900 }, { "epoch": 0.17827238245187146, "grad_norm": 0.0, "learning_rate": 8.218421240964719e-06, "loss": 0.0, "step": 24910 }, { "epoch": 0.17834394904458598, "grad_norm": 0.0002797908673528582, "learning_rate": 8.217705575037574e-06, "loss": 0.0607, "step": 24920 }, { "epoch": 0.17841551563730051, "grad_norm": 0.0, "learning_rate": 8.216989909110428e-06, "loss": 0.0005, "step": 24930 }, { "epoch": 0.17848708223001503, "grad_norm": 5.614278486554269e-10, "learning_rate": 8.216274243183283e-06, "loss": 0.0316, "step": 24940 }, { "epoch": 0.17855864882272954, "grad_norm": 0.0, "learning_rate": 8.215558577256138e-06, "loss": 0.0007, "step": 24950 }, { "epoch": 0.17863021541544408, "grad_norm": 7.361893494817195e-07, "learning_rate": 8.214842911328992e-06, "loss": 0.0, "step": 24960 }, { "epoch": 0.1787017820081586, "grad_norm": 0.0, "learning_rate": 8.214127245401847e-06, "loss": 0.0, "step": 24970 }, { "epoch": 0.1787733486008731, "grad_norm": 0.0, "learning_rate": 8.2134115794747e-06, "loss": 0.0678, "step": 24980 }, { "epoch": 0.17884491519358764, "grad_norm": 0.02474282681941986, "learning_rate": 8.212695913547558e-06, "loss": 0.0, "step": 24990 }, { "epoch": 0.17891648178630215, "grad_norm": 0.0, "learning_rate": 8.211980247620411e-06, "loss": 0.0, "step": 25000 }, { "epoch": 0.17898804837901666, "grad_norm": 0.01484584715217352, "learning_rate": 8.211264581693267e-06, "loss": 0.0, "step": 25010 }, { "epoch": 0.1790596149717312, "grad_norm": 0.0, "learning_rate": 8.210548915766122e-06, "loss": 0.0556, "step": 25020 }, { "epoch": 0.1791311815644457, "grad_norm": 1.8442989357936312e-08, "learning_rate": 8.209833249838976e-06, "loss": 0.0002, "step": 25030 }, { "epoch": 0.17920274815716022, "grad_norm": 1.9008936078535044e-06, "learning_rate": 8.20911758391183e-06, "loss": 0.0, "step": 25040 }, { "epoch": 0.17927431474987476, "grad_norm": 0.0, "learning_rate": 8.208401917984684e-06, "loss": 0.0, "step": 25050 }, { "epoch": 0.17934588134258928, "grad_norm": 0.0, "learning_rate": 8.207686252057541e-06, "loss": 0.0, "step": 25060 }, { "epoch": 0.1794174479353038, "grad_norm": 0.0, "learning_rate": 8.206970586130395e-06, "loss": 0.0079, "step": 25070 }, { "epoch": 0.17948901452801833, "grad_norm": 0.0, "learning_rate": 8.206254920203249e-06, "loss": 0.0, "step": 25080 }, { "epoch": 0.17956058112073284, "grad_norm": 0.0, "learning_rate": 8.205539254276106e-06, "loss": 0.0068, "step": 25090 }, { "epoch": 0.17963214771344738, "grad_norm": 0.0, "learning_rate": 8.20482358834896e-06, "loss": 0.0039, "step": 25100 }, { "epoch": 0.1797037143061619, "grad_norm": 0.0, "learning_rate": 8.204107922421814e-06, "loss": 0.0, "step": 25110 }, { "epoch": 0.1797752808988764, "grad_norm": 4.5564563233568833e-10, "learning_rate": 8.203392256494668e-06, "loss": 0.0002, "step": 25120 }, { "epoch": 0.17984684749159094, "grad_norm": 1.4425585269927979, "learning_rate": 8.202676590567525e-06, "loss": 0.0003, "step": 25130 }, { "epoch": 0.17991841408430545, "grad_norm": 4.528260788561056e-08, "learning_rate": 8.201960924640379e-06, "loss": 0.0, "step": 25140 }, { "epoch": 0.17998998067701996, "grad_norm": 0.0, "learning_rate": 8.201245258713232e-06, "loss": 0.0005, "step": 25150 }, { "epoch": 0.1800615472697345, "grad_norm": 0.0, "learning_rate": 8.20052959278609e-06, "loss": 0.0, "step": 25160 }, { "epoch": 0.180133113862449, "grad_norm": 1.4324821677291766e-05, "learning_rate": 8.199813926858943e-06, "loss": 0.0, "step": 25170 }, { "epoch": 0.18020468045516352, "grad_norm": 0.0, "learning_rate": 8.199098260931798e-06, "loss": 0.0, "step": 25180 }, { "epoch": 0.18027624704787806, "grad_norm": 9.780656910152175e-06, "learning_rate": 8.198382595004652e-06, "loss": 0.0, "step": 25190 }, { "epoch": 0.18034781364059257, "grad_norm": 0.0, "learning_rate": 8.197666929077507e-06, "loss": 0.0, "step": 25200 }, { "epoch": 0.1804193802333071, "grad_norm": 0.0, "learning_rate": 8.196951263150362e-06, "loss": 0.0, "step": 25210 }, { "epoch": 0.18049094682602163, "grad_norm": 0.0, "learning_rate": 8.196235597223216e-06, "loss": 0.0, "step": 25220 }, { "epoch": 0.18056251341873614, "grad_norm": 0.0, "learning_rate": 8.195519931296073e-06, "loss": 0.0007, "step": 25230 }, { "epoch": 0.18063408001145065, "grad_norm": 0.0, "learning_rate": 8.194804265368926e-06, "loss": 0.0076, "step": 25240 }, { "epoch": 0.1807056466041652, "grad_norm": 0.0, "learning_rate": 8.19408859944178e-06, "loss": 0.0, "step": 25250 }, { "epoch": 0.1807772131968797, "grad_norm": 0.08751625567674637, "learning_rate": 8.193372933514635e-06, "loss": 0.0, "step": 25260 }, { "epoch": 0.1808487797895942, "grad_norm": 1.814678540768e-08, "learning_rate": 8.19265726758749e-06, "loss": 0.0, "step": 25270 }, { "epoch": 0.18092034638230875, "grad_norm": 0.0, "learning_rate": 8.191941601660346e-06, "loss": 0.0, "step": 25280 }, { "epoch": 0.18099191297502326, "grad_norm": 0.0, "learning_rate": 8.1912259357332e-06, "loss": 0.0, "step": 25290 }, { "epoch": 0.18106347956773777, "grad_norm": 8.31648321764078e-06, "learning_rate": 8.190510269806057e-06, "loss": 0.0, "step": 25300 }, { "epoch": 0.1811350461604523, "grad_norm": 0.0, "learning_rate": 8.18979460387891e-06, "loss": 0.0, "step": 25310 }, { "epoch": 0.18120661275316682, "grad_norm": 0.0, "learning_rate": 8.189078937951764e-06, "loss": 0.0, "step": 25320 }, { "epoch": 0.18127817934588134, "grad_norm": 4.773191449203296e-06, "learning_rate": 8.188363272024619e-06, "loss": 0.0, "step": 25330 }, { "epoch": 0.18134974593859587, "grad_norm": 0.0, "learning_rate": 8.187647606097474e-06, "loss": 0.0105, "step": 25340 }, { "epoch": 0.18142131253131039, "grad_norm": 0.0, "learning_rate": 8.18693194017033e-06, "loss": 0.0, "step": 25350 }, { "epoch": 0.1814928791240249, "grad_norm": 0.000579962448682636, "learning_rate": 8.186216274243183e-06, "loss": 0.0844, "step": 25360 }, { "epoch": 0.18156444571673944, "grad_norm": 8.686565422522108e-08, "learning_rate": 8.185500608316038e-06, "loss": 0.0004, "step": 25370 }, { "epoch": 0.18163601230945395, "grad_norm": 4.02337801963526e-10, "learning_rate": 8.184784942388894e-06, "loss": 0.0, "step": 25380 }, { "epoch": 0.18170757890216846, "grad_norm": 2.6828172394743888e-06, "learning_rate": 8.184069276461747e-06, "loss": 0.0, "step": 25390 }, { "epoch": 0.181779145494883, "grad_norm": 0.0, "learning_rate": 8.183353610534603e-06, "loss": 0.0, "step": 25400 }, { "epoch": 0.1818507120875975, "grad_norm": 7.987632125150412e-05, "learning_rate": 8.182637944607458e-06, "loss": 0.0, "step": 25410 }, { "epoch": 0.18192227868031202, "grad_norm": 0.0, "learning_rate": 8.181922278680313e-06, "loss": 0.0028, "step": 25420 }, { "epoch": 0.18199384527302656, "grad_norm": 0.0, "learning_rate": 8.181206612753167e-06, "loss": 0.0, "step": 25430 }, { "epoch": 0.18206541186574107, "grad_norm": 2.215255562987295e-06, "learning_rate": 8.180490946826022e-06, "loss": 0.0, "step": 25440 }, { "epoch": 0.18213697845845558, "grad_norm": 0.0009588833781890571, "learning_rate": 8.179775280898877e-06, "loss": 0.0141, "step": 25450 }, { "epoch": 0.18220854505117012, "grad_norm": 0.0, "learning_rate": 8.179059614971731e-06, "loss": 0.0, "step": 25460 }, { "epoch": 0.18228011164388463, "grad_norm": 0.006278991233557463, "learning_rate": 8.178343949044586e-06, "loss": 0.1601, "step": 25470 }, { "epoch": 0.18235167823659915, "grad_norm": 0.0, "learning_rate": 8.177628283117442e-06, "loss": 0.0, "step": 25480 }, { "epoch": 0.18242324482931369, "grad_norm": 4.1361406033502135e-07, "learning_rate": 8.176912617190295e-06, "loss": 0.0, "step": 25490 }, { "epoch": 0.1824948114220282, "grad_norm": 0.000253709324169904, "learning_rate": 8.17619695126315e-06, "loss": 0.0, "step": 25500 }, { "epoch": 0.1825663780147427, "grad_norm": 0.0, "learning_rate": 8.175481285336006e-06, "loss": 0.0, "step": 25510 }, { "epoch": 0.18263794460745725, "grad_norm": 0.0, "learning_rate": 8.174765619408861e-06, "loss": 0.0, "step": 25520 }, { "epoch": 0.18270951120017176, "grad_norm": 7.987813432919211e-07, "learning_rate": 8.174049953481715e-06, "loss": 0.0001, "step": 25530 }, { "epoch": 0.18278107779288627, "grad_norm": 0.0, "learning_rate": 8.17333428755457e-06, "loss": 0.0001, "step": 25540 }, { "epoch": 0.1828526443856008, "grad_norm": 0.0, "learning_rate": 8.172618621627425e-06, "loss": 0.0, "step": 25550 }, { "epoch": 0.18292421097831532, "grad_norm": 0.0, "learning_rate": 8.171902955700279e-06, "loss": 0.0, "step": 25560 }, { "epoch": 0.18299577757102983, "grad_norm": 0.0, "learning_rate": 8.171187289773134e-06, "loss": 0.0006, "step": 25570 }, { "epoch": 0.18306734416374437, "grad_norm": 0.0, "learning_rate": 8.17047162384599e-06, "loss": 0.0, "step": 25580 }, { "epoch": 0.18313891075645888, "grad_norm": 0.0, "learning_rate": 8.169755957918845e-06, "loss": 0.0, "step": 25590 }, { "epoch": 0.1832104773491734, "grad_norm": 0.0, "learning_rate": 8.169040291991698e-06, "loss": 0.0, "step": 25600 }, { "epoch": 0.18328204394188793, "grad_norm": 2.4072067006386533e-08, "learning_rate": 8.168324626064554e-06, "loss": 0.0, "step": 25610 }, { "epoch": 0.18335361053460245, "grad_norm": 0.0, "learning_rate": 8.167608960137409e-06, "loss": 0.0008, "step": 25620 }, { "epoch": 0.18342517712731696, "grad_norm": 2.4524509090184665e-09, "learning_rate": 8.166893294210263e-06, "loss": 0.0, "step": 25630 }, { "epoch": 0.1834967437200315, "grad_norm": 0.0, "learning_rate": 8.166177628283118e-06, "loss": 0.0, "step": 25640 }, { "epoch": 0.183568310312746, "grad_norm": 0.0004585839051287621, "learning_rate": 8.165461962355973e-06, "loss": 0.0, "step": 25650 }, { "epoch": 0.18363987690546052, "grad_norm": 0.0, "learning_rate": 8.164746296428828e-06, "loss": 0.1044, "step": 25660 }, { "epoch": 0.18371144349817506, "grad_norm": 5.182489530852763e-07, "learning_rate": 8.164030630501682e-06, "loss": 0.0, "step": 25670 }, { "epoch": 0.18378301009088957, "grad_norm": 0.0, "learning_rate": 8.163314964574537e-06, "loss": 0.0, "step": 25680 }, { "epoch": 0.18385457668360408, "grad_norm": 0.0, "learning_rate": 8.162599298647393e-06, "loss": 0.0, "step": 25690 }, { "epoch": 0.18392614327631862, "grad_norm": 0.0, "learning_rate": 8.161883632720246e-06, "loss": 0.0, "step": 25700 }, { "epoch": 0.18399770986903313, "grad_norm": 0.3629354238510132, "learning_rate": 8.161167966793101e-06, "loss": 0.0001, "step": 25710 }, { "epoch": 0.18406927646174764, "grad_norm": 0.0, "learning_rate": 8.160452300865957e-06, "loss": 0.0, "step": 25720 }, { "epoch": 0.18414084305446218, "grad_norm": 0.0, "learning_rate": 8.15973663493881e-06, "loss": 0.0, "step": 25730 }, { "epoch": 0.1842124096471767, "grad_norm": 6.283328843892377e-07, "learning_rate": 8.159020969011666e-06, "loss": 0.0, "step": 25740 }, { "epoch": 0.1842839762398912, "grad_norm": 0.0, "learning_rate": 8.158305303084521e-06, "loss": 0.0, "step": 25750 }, { "epoch": 0.18435554283260575, "grad_norm": 0.0, "learning_rate": 8.157589637157376e-06, "loss": 0.0, "step": 25760 }, { "epoch": 0.18442710942532026, "grad_norm": 3.0673657391844245e-08, "learning_rate": 8.15687397123023e-06, "loss": 0.0, "step": 25770 }, { "epoch": 0.18449867601803477, "grad_norm": 521.452392578125, "learning_rate": 8.156158305303085e-06, "loss": 0.5879, "step": 25780 }, { "epoch": 0.1845702426107493, "grad_norm": 0.0, "learning_rate": 8.15544263937594e-06, "loss": 0.0, "step": 25790 }, { "epoch": 0.18464180920346382, "grad_norm": 1.3069867236481514e-05, "learning_rate": 8.154726973448794e-06, "loss": 0.0345, "step": 25800 }, { "epoch": 0.18471337579617833, "grad_norm": 1.3361601531869383e-06, "learning_rate": 8.15401130752165e-06, "loss": 0.0, "step": 25810 }, { "epoch": 0.18478494238889287, "grad_norm": 0.0003840447752736509, "learning_rate": 8.153295641594505e-06, "loss": 0.0, "step": 25820 }, { "epoch": 0.18485650898160738, "grad_norm": 0.0, "learning_rate": 8.15257997566736e-06, "loss": 0.0, "step": 25830 }, { "epoch": 0.1849280755743219, "grad_norm": 0.0, "learning_rate": 8.151864309740213e-06, "loss": 0.0, "step": 25840 }, { "epoch": 0.18499964216703643, "grad_norm": 0.0, "learning_rate": 8.151148643813069e-06, "loss": 0.012, "step": 25850 }, { "epoch": 0.18507120875975094, "grad_norm": 2.1787842641174393e-08, "learning_rate": 8.150432977885924e-06, "loss": 0.0, "step": 25860 }, { "epoch": 0.18514277535246548, "grad_norm": 5.899390131958171e-08, "learning_rate": 8.149717311958778e-06, "loss": 0.0, "step": 25870 }, { "epoch": 0.18521434194518, "grad_norm": 0.0, "learning_rate": 8.149001646031633e-06, "loss": 0.0049, "step": 25880 }, { "epoch": 0.1852859085378945, "grad_norm": 4.924006134388037e-06, "learning_rate": 8.148285980104488e-06, "loss": 0.0001, "step": 25890 }, { "epoch": 0.18535747513060905, "grad_norm": 9.399992995895445e-05, "learning_rate": 8.147570314177344e-06, "loss": 0.0, "step": 25900 }, { "epoch": 0.18542904172332356, "grad_norm": 0.0, "learning_rate": 8.146854648250197e-06, "loss": 0.0, "step": 25910 }, { "epoch": 0.18550060831603807, "grad_norm": 0.0, "learning_rate": 8.146138982323052e-06, "loss": 0.0, "step": 25920 }, { "epoch": 0.1855721749087526, "grad_norm": 0.0, "learning_rate": 8.145423316395908e-06, "loss": 0.1046, "step": 25930 }, { "epoch": 0.18564374150146712, "grad_norm": 0.0, "learning_rate": 8.144707650468761e-06, "loss": 0.002, "step": 25940 }, { "epoch": 0.18571530809418163, "grad_norm": 0.0, "learning_rate": 8.143991984541617e-06, "loss": 0.7039, "step": 25950 }, { "epoch": 0.18578687468689617, "grad_norm": 0.0, "learning_rate": 8.143276318614472e-06, "loss": 0.0, "step": 25960 }, { "epoch": 0.18585844127961068, "grad_norm": 0.0, "learning_rate": 8.142560652687325e-06, "loss": 0.0, "step": 25970 }, { "epoch": 0.1859300078723252, "grad_norm": 3.436342765894551e-08, "learning_rate": 8.14184498676018e-06, "loss": 0.0003, "step": 25980 }, { "epoch": 0.18600157446503973, "grad_norm": 0.0, "learning_rate": 8.141129320833036e-06, "loss": 0.4309, "step": 25990 }, { "epoch": 0.18607314105775424, "grad_norm": 1.0127584459951322e-07, "learning_rate": 8.140413654905891e-06, "loss": 0.0, "step": 26000 }, { "epoch": 0.18614470765046875, "grad_norm": 0.0, "learning_rate": 8.139697988978745e-06, "loss": 0.0, "step": 26010 }, { "epoch": 0.1862162742431833, "grad_norm": 0.018747467547655106, "learning_rate": 8.1389823230516e-06, "loss": 0.0277, "step": 26020 }, { "epoch": 0.1862878408358978, "grad_norm": 0.0064585087820887566, "learning_rate": 8.138266657124456e-06, "loss": 0.2209, "step": 26030 }, { "epoch": 0.18635940742861232, "grad_norm": 0.0, "learning_rate": 8.137550991197309e-06, "loss": 0.0, "step": 26040 }, { "epoch": 0.18643097402132686, "grad_norm": 0.0, "learning_rate": 8.136835325270164e-06, "loss": 0.0, "step": 26050 }, { "epoch": 0.18650254061404137, "grad_norm": 0.0, "learning_rate": 8.13611965934302e-06, "loss": 0.0009, "step": 26060 }, { "epoch": 0.18657410720675588, "grad_norm": 0.0038903478998690844, "learning_rate": 8.135403993415875e-06, "loss": 0.0, "step": 26070 }, { "epoch": 0.18664567379947042, "grad_norm": 0.0, "learning_rate": 8.134688327488729e-06, "loss": 0.0, "step": 26080 }, { "epoch": 0.18671724039218493, "grad_norm": 0.0, "learning_rate": 8.133972661561584e-06, "loss": 0.0, "step": 26090 }, { "epoch": 0.18678880698489944, "grad_norm": 0.0, "learning_rate": 8.13325699563444e-06, "loss": 0.0099, "step": 26100 }, { "epoch": 0.18686037357761398, "grad_norm": 0.0, "learning_rate": 8.132541329707293e-06, "loss": 0.0, "step": 26110 }, { "epoch": 0.1869319401703285, "grad_norm": 5.480375693878159e-05, "learning_rate": 8.131825663780148e-06, "loss": 0.0, "step": 26120 }, { "epoch": 0.187003506763043, "grad_norm": 0.0, "learning_rate": 8.131109997853003e-06, "loss": 0.0306, "step": 26130 }, { "epoch": 0.18707507335575754, "grad_norm": 0.0, "learning_rate": 8.130394331925857e-06, "loss": 0.6398, "step": 26140 }, { "epoch": 0.18714663994847205, "grad_norm": 0.00011135856038890779, "learning_rate": 8.129678665998712e-06, "loss": 0.012, "step": 26150 }, { "epoch": 0.18721820654118657, "grad_norm": 0.0, "learning_rate": 8.128963000071568e-06, "loss": 0.0556, "step": 26160 }, { "epoch": 0.1872897731339011, "grad_norm": 0.0, "learning_rate": 8.128247334144423e-06, "loss": 0.0059, "step": 26170 }, { "epoch": 0.18736133972661562, "grad_norm": 0.0, "learning_rate": 8.127531668217276e-06, "loss": 0.0606, "step": 26180 }, { "epoch": 0.18743290631933013, "grad_norm": 0.0, "learning_rate": 8.126816002290132e-06, "loss": 0.0, "step": 26190 }, { "epoch": 0.18750447291204467, "grad_norm": 0.0, "learning_rate": 8.126100336362987e-06, "loss": 0.0, "step": 26200 }, { "epoch": 0.18757603950475918, "grad_norm": 5.20373799606233e-10, "learning_rate": 8.12538467043584e-06, "loss": 0.0, "step": 26210 }, { "epoch": 0.1876476060974737, "grad_norm": 3.8036180427525323e-08, "learning_rate": 8.124669004508696e-06, "loss": 0.0, "step": 26220 }, { "epoch": 0.18771917269018823, "grad_norm": 0.0, "learning_rate": 8.123953338581551e-06, "loss": 0.0, "step": 26230 }, { "epoch": 0.18779073928290274, "grad_norm": 0.0, "learning_rate": 8.123237672654407e-06, "loss": 0.0, "step": 26240 }, { "epoch": 0.18786230587561725, "grad_norm": 0.0, "learning_rate": 8.12252200672726e-06, "loss": 0.0043, "step": 26250 }, { "epoch": 0.1879338724683318, "grad_norm": 0.0, "learning_rate": 8.121806340800115e-06, "loss": 0.0, "step": 26260 }, { "epoch": 0.1880054390610463, "grad_norm": 0.0, "learning_rate": 8.12109067487297e-06, "loss": 0.0, "step": 26270 }, { "epoch": 0.18807700565376081, "grad_norm": 0.0, "learning_rate": 8.120375008945824e-06, "loss": 0.0, "step": 26280 }, { "epoch": 0.18814857224647535, "grad_norm": 0.0, "learning_rate": 8.11965934301868e-06, "loss": 0.0, "step": 26290 }, { "epoch": 0.18822013883918987, "grad_norm": 2.3419270291924477e-06, "learning_rate": 8.118943677091535e-06, "loss": 0.0003, "step": 26300 }, { "epoch": 0.18829170543190438, "grad_norm": 3.585604346767468e-08, "learning_rate": 8.11822801116439e-06, "loss": 0.0, "step": 26310 }, { "epoch": 0.18836327202461892, "grad_norm": 1.0012342066545443e-09, "learning_rate": 8.117512345237244e-06, "loss": 0.0, "step": 26320 }, { "epoch": 0.18843483861733343, "grad_norm": 0.008250666782259941, "learning_rate": 8.116796679310099e-06, "loss": 0.0, "step": 26330 }, { "epoch": 0.18850640521004794, "grad_norm": 0.00046546358498744667, "learning_rate": 8.116081013382954e-06, "loss": 0.0, "step": 26340 }, { "epoch": 0.18857797180276248, "grad_norm": 0.0, "learning_rate": 8.115365347455808e-06, "loss": 0.0, "step": 26350 }, { "epoch": 0.188649538395477, "grad_norm": 0.0, "learning_rate": 8.114649681528663e-06, "loss": 0.0005, "step": 26360 }, { "epoch": 0.1887211049881915, "grad_norm": 2.387519904001323e-10, "learning_rate": 8.113934015601519e-06, "loss": 0.0, "step": 26370 }, { "epoch": 0.18879267158090604, "grad_norm": 0.0, "learning_rate": 8.113218349674372e-06, "loss": 0.0, "step": 26380 }, { "epoch": 0.18886423817362055, "grad_norm": 0.0, "learning_rate": 8.112502683747227e-06, "loss": 0.0955, "step": 26390 }, { "epoch": 0.18893580476633506, "grad_norm": 1.1682061984430447e-09, "learning_rate": 8.111858584412797e-06, "loss": 0.85, "step": 26400 }, { "epoch": 0.1890073713590496, "grad_norm": 0.0, "learning_rate": 8.11114291848565e-06, "loss": 0.0, "step": 26410 }, { "epoch": 0.18907893795176411, "grad_norm": 0.0, "learning_rate": 8.110427252558506e-06, "loss": 0.0, "step": 26420 }, { "epoch": 0.18915050454447863, "grad_norm": 0.0, "learning_rate": 8.109711586631361e-06, "loss": 0.0, "step": 26430 }, { "epoch": 0.18922207113719317, "grad_norm": 8.727738168090582e-05, "learning_rate": 8.108995920704215e-06, "loss": 0.0, "step": 26440 }, { "epoch": 0.18929363772990768, "grad_norm": 0.0, "learning_rate": 8.108280254777072e-06, "loss": 0.0, "step": 26450 }, { "epoch": 0.1893652043226222, "grad_norm": 0.0, "learning_rate": 8.107564588849926e-06, "loss": 0.0, "step": 26460 }, { "epoch": 0.18943677091533673, "grad_norm": 214.9739227294922, "learning_rate": 8.106848922922779e-06, "loss": 0.0392, "step": 26470 }, { "epoch": 0.18950833750805124, "grad_norm": 0.0, "learning_rate": 8.106133256995634e-06, "loss": 0.0, "step": 26480 }, { "epoch": 0.18957990410076575, "grad_norm": 2.6872294256463647e-07, "learning_rate": 8.10541759106849e-06, "loss": 0.0, "step": 26490 }, { "epoch": 0.1896514706934803, "grad_norm": 3.2880659103393555, "learning_rate": 8.104701925141345e-06, "loss": 0.0005, "step": 26500 }, { "epoch": 0.1897230372861948, "grad_norm": 0.0, "learning_rate": 8.103986259214199e-06, "loss": 0.0012, "step": 26510 }, { "epoch": 0.1897946038789093, "grad_norm": 0.0, "learning_rate": 8.103270593287056e-06, "loss": 0.0, "step": 26520 }, { "epoch": 0.18986617047162385, "grad_norm": 0.0, "learning_rate": 8.10255492735991e-06, "loss": 0.0, "step": 26530 }, { "epoch": 0.18993773706433836, "grad_norm": 0.0, "learning_rate": 8.101839261432763e-06, "loss": 0.0, "step": 26540 }, { "epoch": 0.19000930365705287, "grad_norm": 0.0, "learning_rate": 8.101123595505618e-06, "loss": 0.0086, "step": 26550 }, { "epoch": 0.19008087024976741, "grad_norm": 0.0, "learning_rate": 8.100407929578473e-06, "loss": 0.0, "step": 26560 }, { "epoch": 0.19015243684248193, "grad_norm": 0.0, "learning_rate": 8.099692263651329e-06, "loss": 0.0, "step": 26570 }, { "epoch": 0.19022400343519644, "grad_norm": 7.94145023519377e-07, "learning_rate": 8.098976597724182e-06, "loss": 0.0001, "step": 26580 }, { "epoch": 0.19029557002791098, "grad_norm": 0.0, "learning_rate": 8.098260931797038e-06, "loss": 0.0167, "step": 26590 }, { "epoch": 0.1903671366206255, "grad_norm": 0.0, "learning_rate": 8.097545265869893e-06, "loss": 0.0011, "step": 26600 }, { "epoch": 0.19043870321334, "grad_norm": 3.5763534356192395e-07, "learning_rate": 8.096829599942746e-06, "loss": 0.0005, "step": 26610 }, { "epoch": 0.19051026980605454, "grad_norm": 0.0, "learning_rate": 8.096113934015602e-06, "loss": 0.0, "step": 26620 }, { "epoch": 0.19058183639876905, "grad_norm": 0.2933753728866577, "learning_rate": 8.095398268088457e-06, "loss": 0.0001, "step": 26630 }, { "epoch": 0.1906534029914836, "grad_norm": 0.0, "learning_rate": 8.094682602161312e-06, "loss": 0.0, "step": 26640 }, { "epoch": 0.1907249695841981, "grad_norm": 3.483007446902775e-07, "learning_rate": 8.093966936234166e-06, "loss": 0.0, "step": 26650 }, { "epoch": 0.1907965361769126, "grad_norm": 7.991405254870187e-06, "learning_rate": 8.093251270307021e-06, "loss": 0.0, "step": 26660 }, { "epoch": 0.19086810276962715, "grad_norm": 0.0, "learning_rate": 8.092535604379876e-06, "loss": 0.0, "step": 26670 }, { "epoch": 0.19093966936234166, "grad_norm": 9.207697644342261e-07, "learning_rate": 8.09181993845273e-06, "loss": 0.0, "step": 26680 }, { "epoch": 0.19101123595505617, "grad_norm": 2.6252355382894166e-06, "learning_rate": 8.091104272525585e-06, "loss": 0.0002, "step": 26690 }, { "epoch": 0.1910828025477707, "grad_norm": 2.226314066433588e-08, "learning_rate": 8.09038860659844e-06, "loss": 0.0, "step": 26700 }, { "epoch": 0.19115436914048523, "grad_norm": 0.0, "learning_rate": 8.089672940671294e-06, "loss": 0.0, "step": 26710 }, { "epoch": 0.19122593573319974, "grad_norm": 0.0, "learning_rate": 8.08895727474415e-06, "loss": 0.0, "step": 26720 }, { "epoch": 0.19129750232591428, "grad_norm": 0.0001239085104316473, "learning_rate": 8.088241608817005e-06, "loss": 0.0, "step": 26730 }, { "epoch": 0.1913690689186288, "grad_norm": 0.0, "learning_rate": 8.08752594288986e-06, "loss": 0.0005, "step": 26740 }, { "epoch": 0.1914406355113433, "grad_norm": 0.0, "learning_rate": 8.086810276962714e-06, "loss": 0.0002, "step": 26750 }, { "epoch": 0.19151220210405784, "grad_norm": 27.515684127807617, "learning_rate": 8.086094611035569e-06, "loss": 0.0093, "step": 26760 }, { "epoch": 0.19158376869677235, "grad_norm": 0.00243314984254539, "learning_rate": 8.085378945108424e-06, "loss": 0.0, "step": 26770 }, { "epoch": 0.19165533528948686, "grad_norm": 0.0, "learning_rate": 8.084663279181278e-06, "loss": 0.4203, "step": 26780 }, { "epoch": 0.1917269018822014, "grad_norm": 4.967845579351149e-10, "learning_rate": 8.083947613254133e-06, "loss": 0.0, "step": 26790 }, { "epoch": 0.1917984684749159, "grad_norm": 0.0006835004314780235, "learning_rate": 8.083231947326989e-06, "loss": 0.0, "step": 26800 }, { "epoch": 0.19187003506763042, "grad_norm": 0.0, "learning_rate": 8.082516281399844e-06, "loss": 0.0, "step": 26810 }, { "epoch": 0.19194160166034496, "grad_norm": 0.0, "learning_rate": 8.081800615472697e-06, "loss": 0.2113, "step": 26820 }, { "epoch": 0.19201316825305947, "grad_norm": 0.0, "learning_rate": 8.081084949545553e-06, "loss": 0.0052, "step": 26830 }, { "epoch": 0.19208473484577399, "grad_norm": 0.0, "learning_rate": 8.080369283618408e-06, "loss": 0.0, "step": 26840 }, { "epoch": 0.19215630143848852, "grad_norm": 1.0583769416427913e-08, "learning_rate": 8.079653617691262e-06, "loss": 0.0, "step": 26850 }, { "epoch": 0.19222786803120304, "grad_norm": 19.48623275756836, "learning_rate": 8.078937951764117e-06, "loss": 0.0028, "step": 26860 }, { "epoch": 0.19229943462391755, "grad_norm": 6.266158580780029, "learning_rate": 8.078222285836972e-06, "loss": 0.001, "step": 26870 }, { "epoch": 0.1923710012166321, "grad_norm": 0.0, "learning_rate": 8.077506619909826e-06, "loss": 0.0, "step": 26880 }, { "epoch": 0.1924425678093466, "grad_norm": 0.0, "learning_rate": 8.076790953982681e-06, "loss": 0.0, "step": 26890 }, { "epoch": 0.1925141344020611, "grad_norm": 0.0, "learning_rate": 8.076075288055536e-06, "loss": 0.0, "step": 26900 }, { "epoch": 0.19258570099477565, "grad_norm": 0.0055824690498411655, "learning_rate": 8.075359622128392e-06, "loss": 0.2258, "step": 26910 }, { "epoch": 0.19265726758749016, "grad_norm": 0.0, "learning_rate": 8.074643956201245e-06, "loss": 0.0508, "step": 26920 }, { "epoch": 0.19272883418020467, "grad_norm": 0.0, "learning_rate": 8.0739282902741e-06, "loss": 0.0009, "step": 26930 }, { "epoch": 0.1928004007729192, "grad_norm": 0.0017072088085114956, "learning_rate": 8.073212624346956e-06, "loss": 0.0, "step": 26940 }, { "epoch": 0.19287196736563372, "grad_norm": 0.0, "learning_rate": 8.07249695841981e-06, "loss": 0.0, "step": 26950 }, { "epoch": 0.19294353395834823, "grad_norm": 0.0, "learning_rate": 8.071781292492665e-06, "loss": 0.1268, "step": 26960 }, { "epoch": 0.19301510055106277, "grad_norm": 9.324401251120662e-10, "learning_rate": 8.07106562656552e-06, "loss": 0.0005, "step": 26970 }, { "epoch": 0.19308666714377729, "grad_norm": 2.4023072420220615e-09, "learning_rate": 8.070349960638375e-06, "loss": 0.0, "step": 26980 }, { "epoch": 0.1931582337364918, "grad_norm": 0.0, "learning_rate": 8.069634294711229e-06, "loss": 0.0, "step": 26990 }, { "epoch": 0.19322980032920634, "grad_norm": 0.002740024821832776, "learning_rate": 8.068918628784084e-06, "loss": 0.0, "step": 27000 }, { "epoch": 0.19330136692192085, "grad_norm": 0.0, "learning_rate": 8.06820296285694e-06, "loss": 0.0, "step": 27010 }, { "epoch": 0.19337293351463536, "grad_norm": 0.0, "learning_rate": 8.067487296929793e-06, "loss": 0.0002, "step": 27020 }, { "epoch": 0.1934445001073499, "grad_norm": 0.0, "learning_rate": 8.066771631002648e-06, "loss": 0.0042, "step": 27030 }, { "epoch": 0.1935160667000644, "grad_norm": 17.4365177154541, "learning_rate": 8.066055965075504e-06, "loss": 0.0024, "step": 27040 }, { "epoch": 0.19358763329277892, "grad_norm": 0.0, "learning_rate": 8.065340299148359e-06, "loss": 0.0, "step": 27050 }, { "epoch": 0.19365919988549346, "grad_norm": 7.275139796547592e-05, "learning_rate": 8.064624633221213e-06, "loss": 0.0, "step": 27060 }, { "epoch": 0.19373076647820797, "grad_norm": 0.0, "learning_rate": 8.063908967294068e-06, "loss": 0.0, "step": 27070 }, { "epoch": 0.19380233307092248, "grad_norm": 0.0, "learning_rate": 8.063193301366923e-06, "loss": 0.0, "step": 27080 }, { "epoch": 0.19387389966363702, "grad_norm": 0.0, "learning_rate": 8.062477635439777e-06, "loss": 0.0, "step": 27090 }, { "epoch": 0.19394546625635153, "grad_norm": 0.0, "learning_rate": 8.061761969512632e-06, "loss": 0.0278, "step": 27100 }, { "epoch": 0.19401703284906605, "grad_norm": 0.006313992198556662, "learning_rate": 8.061046303585487e-06, "loss": 0.0, "step": 27110 }, { "epoch": 0.19408859944178058, "grad_norm": 0.0, "learning_rate": 8.060330637658341e-06, "loss": 0.0, "step": 27120 }, { "epoch": 0.1941601660344951, "grad_norm": 0.0, "learning_rate": 8.059614971731196e-06, "loss": 0.0009, "step": 27130 }, { "epoch": 0.1942317326272096, "grad_norm": 0.0, "learning_rate": 8.058899305804051e-06, "loss": 0.0002, "step": 27140 }, { "epoch": 0.19430329921992415, "grad_norm": 0.0, "learning_rate": 8.058183639876907e-06, "loss": 0.0006, "step": 27150 }, { "epoch": 0.19437486581263866, "grad_norm": 0.0, "learning_rate": 8.05746797394976e-06, "loss": 0.0, "step": 27160 }, { "epoch": 0.19444643240535317, "grad_norm": 0.9984338879585266, "learning_rate": 8.056752308022616e-06, "loss": 0.0002, "step": 27170 }, { "epoch": 0.1945179989980677, "grad_norm": 0.0, "learning_rate": 8.056036642095471e-06, "loss": 0.0025, "step": 27180 }, { "epoch": 0.19458956559078222, "grad_norm": 0.0, "learning_rate": 8.055320976168325e-06, "loss": 0.0, "step": 27190 }, { "epoch": 0.19466113218349673, "grad_norm": 0.024233007803559303, "learning_rate": 8.05460531024118e-06, "loss": 0.0225, "step": 27200 }, { "epoch": 0.19473269877621127, "grad_norm": 0.9810116291046143, "learning_rate": 8.053889644314035e-06, "loss": 0.0002, "step": 27210 }, { "epoch": 0.19480426536892578, "grad_norm": 0.0, "learning_rate": 8.05317397838689e-06, "loss": 0.0, "step": 27220 }, { "epoch": 0.1948758319616403, "grad_norm": 0.0, "learning_rate": 8.052458312459744e-06, "loss": 0.0, "step": 27230 }, { "epoch": 0.19494739855435483, "grad_norm": 0.0, "learning_rate": 8.0517426465326e-06, "loss": 0.0, "step": 27240 }, { "epoch": 0.19501896514706935, "grad_norm": 0.0, "learning_rate": 8.051026980605455e-06, "loss": 0.0, "step": 27250 }, { "epoch": 0.19509053173978386, "grad_norm": 1.9931756014557322e-06, "learning_rate": 8.050311314678308e-06, "loss": 0.0, "step": 27260 }, { "epoch": 0.1951620983324984, "grad_norm": 0.0, "learning_rate": 8.049595648751164e-06, "loss": 0.0, "step": 27270 }, { "epoch": 0.1952336649252129, "grad_norm": 0.0, "learning_rate": 8.048879982824019e-06, "loss": 0.0, "step": 27280 }, { "epoch": 0.19530523151792742, "grad_norm": 0.0, "learning_rate": 8.048164316896874e-06, "loss": 1.1051, "step": 27290 }, { "epoch": 0.19537679811064196, "grad_norm": 0.0, "learning_rate": 8.047448650969728e-06, "loss": 0.0116, "step": 27300 }, { "epoch": 0.19544836470335647, "grad_norm": 4.996539848534098e-10, "learning_rate": 8.046732985042583e-06, "loss": 0.0, "step": 27310 }, { "epoch": 0.19551993129607098, "grad_norm": 0.0, "learning_rate": 8.046017319115438e-06, "loss": 0.0004, "step": 27320 }, { "epoch": 0.19559149788878552, "grad_norm": 0.0, "learning_rate": 8.045301653188292e-06, "loss": 0.0, "step": 27330 }, { "epoch": 0.19566306448150003, "grad_norm": 0.0, "learning_rate": 8.044585987261147e-06, "loss": 0.0, "step": 27340 }, { "epoch": 0.19573463107421454, "grad_norm": 3.239296120227664e-07, "learning_rate": 8.043870321334002e-06, "loss": 0.0001, "step": 27350 }, { "epoch": 0.19580619766692908, "grad_norm": 0.0, "learning_rate": 8.043154655406856e-06, "loss": 0.0611, "step": 27360 }, { "epoch": 0.1958777642596436, "grad_norm": 0.0, "learning_rate": 8.042438989479711e-06, "loss": 0.0, "step": 27370 }, { "epoch": 0.1959493308523581, "grad_norm": 0.0, "learning_rate": 8.041723323552567e-06, "loss": 0.0002, "step": 27380 }, { "epoch": 0.19602089744507264, "grad_norm": 0.0, "learning_rate": 8.041007657625422e-06, "loss": 0.1224, "step": 27390 }, { "epoch": 0.19609246403778716, "grad_norm": 4.996632583242899e-07, "learning_rate": 8.040291991698276e-06, "loss": 0.0, "step": 27400 }, { "epoch": 0.1961640306305017, "grad_norm": 0.004015269223600626, "learning_rate": 8.03957632577113e-06, "loss": 1.5322, "step": 27410 }, { "epoch": 0.1962355972232162, "grad_norm": 0.0, "learning_rate": 8.038860659843986e-06, "loss": 0.0, "step": 27420 }, { "epoch": 0.19630716381593072, "grad_norm": 0.0, "learning_rate": 8.03814499391684e-06, "loss": 0.0, "step": 27430 }, { "epoch": 0.19637873040864526, "grad_norm": 2.0904792108922265e-05, "learning_rate": 8.037429327989695e-06, "loss": 0.0, "step": 27440 }, { "epoch": 0.19645029700135977, "grad_norm": 2.699140688733337e-09, "learning_rate": 8.03671366206255e-06, "loss": 0.0, "step": 27450 }, { "epoch": 0.19652186359407428, "grad_norm": 2.835109000898228e-07, "learning_rate": 8.035997996135406e-06, "loss": 0.0015, "step": 27460 }, { "epoch": 0.19659343018678882, "grad_norm": 8.849502797225739e-10, "learning_rate": 8.03528233020826e-06, "loss": 0.0, "step": 27470 }, { "epoch": 0.19666499677950333, "grad_norm": 0.0, "learning_rate": 8.034566664281114e-06, "loss": 0.0001, "step": 27480 }, { "epoch": 0.19673656337221784, "grad_norm": 0.0, "learning_rate": 8.03385099835397e-06, "loss": 0.0054, "step": 27490 }, { "epoch": 0.19680812996493238, "grad_norm": 0.0, "learning_rate": 8.033135332426823e-06, "loss": 0.0, "step": 27500 }, { "epoch": 0.1968796965576469, "grad_norm": 4.538923690233787e-07, "learning_rate": 8.032419666499679e-06, "loss": 0.0, "step": 27510 }, { "epoch": 0.1969512631503614, "grad_norm": 0.0013926101382821798, "learning_rate": 8.031704000572534e-06, "loss": 0.0, "step": 27520 }, { "epoch": 0.19702282974307594, "grad_norm": 0.0, "learning_rate": 8.03098833464539e-06, "loss": 0.0139, "step": 27530 }, { "epoch": 0.19709439633579046, "grad_norm": 0.0001769658556440845, "learning_rate": 8.030272668718243e-06, "loss": 0.0, "step": 27540 }, { "epoch": 0.19716596292850497, "grad_norm": 0.0, "learning_rate": 8.029557002791098e-06, "loss": 0.0926, "step": 27550 }, { "epoch": 0.1972375295212195, "grad_norm": 1.7756989336703555e-06, "learning_rate": 8.028841336863953e-06, "loss": 0.9791, "step": 27560 }, { "epoch": 0.19730909611393402, "grad_norm": 0.0, "learning_rate": 8.028125670936807e-06, "loss": 0.0, "step": 27570 }, { "epoch": 0.19738066270664853, "grad_norm": 0.0, "learning_rate": 8.027410005009662e-06, "loss": 0.0, "step": 27580 }, { "epoch": 0.19745222929936307, "grad_norm": 9.952615801012143e-05, "learning_rate": 8.026694339082518e-06, "loss": 0.0, "step": 27590 }, { "epoch": 0.19752379589207758, "grad_norm": 0.0, "learning_rate": 8.025978673155371e-06, "loss": 0.0069, "step": 27600 }, { "epoch": 0.1975953624847921, "grad_norm": 0.4141561985015869, "learning_rate": 8.025263007228226e-06, "loss": 0.0001, "step": 27610 }, { "epoch": 0.19766692907750663, "grad_norm": 0.0, "learning_rate": 8.024547341301082e-06, "loss": 0.0058, "step": 27620 }, { "epoch": 0.19773849567022114, "grad_norm": 0.00029226820333860815, "learning_rate": 8.023831675373937e-06, "loss": 0.0004, "step": 27630 }, { "epoch": 0.19781006226293565, "grad_norm": 0.0, "learning_rate": 8.02311600944679e-06, "loss": 0.0, "step": 27640 }, { "epoch": 0.1978816288556502, "grad_norm": 0.0, "learning_rate": 8.022400343519646e-06, "loss": 0.0, "step": 27650 }, { "epoch": 0.1979531954483647, "grad_norm": 0.00022086381795816123, "learning_rate": 8.021684677592501e-06, "loss": 0.0, "step": 27660 }, { "epoch": 0.19802476204107922, "grad_norm": 8.885867686103666e-08, "learning_rate": 8.020969011665355e-06, "loss": 0.0, "step": 27670 }, { "epoch": 0.19809632863379376, "grad_norm": 0.20306110382080078, "learning_rate": 8.02025334573821e-06, "loss": 0.0498, "step": 27680 }, { "epoch": 0.19816789522650827, "grad_norm": 0.0, "learning_rate": 8.019537679811065e-06, "loss": 0.0, "step": 27690 }, { "epoch": 0.19823946181922278, "grad_norm": 0.0, "learning_rate": 8.01882201388392e-06, "loss": 0.0, "step": 27700 }, { "epoch": 0.19831102841193732, "grad_norm": 8.88224216399891e-10, "learning_rate": 8.018106347956774e-06, "loss": 0.0, "step": 27710 }, { "epoch": 0.19838259500465183, "grad_norm": 0.0, "learning_rate": 8.01739068202963e-06, "loss": 0.0, "step": 27720 }, { "epoch": 0.19845416159736634, "grad_norm": 577.2596435546875, "learning_rate": 8.016675016102485e-06, "loss": 0.1056, "step": 27730 }, { "epoch": 0.19852572819008088, "grad_norm": 0.0, "learning_rate": 8.015959350175338e-06, "loss": 0.0001, "step": 27740 }, { "epoch": 0.1985972947827954, "grad_norm": 0.0, "learning_rate": 8.015243684248194e-06, "loss": 0.0002, "step": 27750 }, { "epoch": 0.1986688613755099, "grad_norm": 0.0, "learning_rate": 8.014528018321049e-06, "loss": 0.0, "step": 27760 }, { "epoch": 0.19874042796822444, "grad_norm": 0.0, "learning_rate": 8.013812352393903e-06, "loss": 0.0, "step": 27770 }, { "epoch": 0.19881199456093895, "grad_norm": 0.0, "learning_rate": 8.013096686466758e-06, "loss": 0.0, "step": 27780 }, { "epoch": 0.19888356115365347, "grad_norm": 0.0, "learning_rate": 8.012381020539613e-06, "loss": 0.0, "step": 27790 }, { "epoch": 0.198955127746368, "grad_norm": 0.0, "learning_rate": 8.011665354612469e-06, "loss": 0.0001, "step": 27800 }, { "epoch": 0.19902669433908252, "grad_norm": 0.0, "learning_rate": 8.010949688685322e-06, "loss": 0.0, "step": 27810 }, { "epoch": 0.19909826093179703, "grad_norm": 0.0, "learning_rate": 8.010234022758177e-06, "loss": 0.0002, "step": 27820 }, { "epoch": 0.19916982752451157, "grad_norm": 0.0, "learning_rate": 8.009518356831033e-06, "loss": 0.0, "step": 27830 }, { "epoch": 0.19924139411722608, "grad_norm": 0.0, "learning_rate": 8.008802690903886e-06, "loss": 0.0, "step": 27840 }, { "epoch": 0.1993129607099406, "grad_norm": 0.0, "learning_rate": 8.008087024976742e-06, "loss": 0.0, "step": 27850 }, { "epoch": 0.19938452730265513, "grad_norm": 0.0, "learning_rate": 8.007371359049595e-06, "loss": 0.0002, "step": 27860 }, { "epoch": 0.19945609389536964, "grad_norm": 0.0, "learning_rate": 8.006655693122452e-06, "loss": 0.0, "step": 27870 }, { "epoch": 0.19952766048808415, "grad_norm": 0.0, "learning_rate": 8.005940027195306e-06, "loss": 0.0, "step": 27880 }, { "epoch": 0.1995992270807987, "grad_norm": 0.0, "learning_rate": 8.00522436126816e-06, "loss": 0.0, "step": 27890 }, { "epoch": 0.1996707936735132, "grad_norm": 0.2123895287513733, "learning_rate": 8.004508695341016e-06, "loss": 0.0, "step": 27900 }, { "epoch": 0.19974236026622771, "grad_norm": 1.127864493355446e-06, "learning_rate": 8.00379302941387e-06, "loss": 0.0, "step": 27910 }, { "epoch": 0.19981392685894225, "grad_norm": 0.005576319061219692, "learning_rate": 8.003077363486725e-06, "loss": 0.0, "step": 27920 }, { "epoch": 0.19988549345165676, "grad_norm": 0.0, "learning_rate": 8.002361697559579e-06, "loss": 0.0, "step": 27930 }, { "epoch": 0.19995706004437128, "grad_norm": 0.0, "learning_rate": 8.001646031632436e-06, "loss": 0.0005, "step": 27940 }, { "epoch": 0.20002862663708582, "grad_norm": 0.0003645142132882029, "learning_rate": 8.00093036570529e-06, "loss": 0.0, "step": 27950 }, { "epoch": 0.20010019322980033, "grad_norm": 2.32359251839398e-08, "learning_rate": 8.000214699778143e-06, "loss": 0.0007, "step": 27960 }, { "epoch": 0.20017175982251484, "grad_norm": 0.0, "learning_rate": 7.999499033851e-06, "loss": 0.0, "step": 27970 }, { "epoch": 0.20024332641522938, "grad_norm": 0.02262040786445141, "learning_rate": 7.998783367923854e-06, "loss": 0.0, "step": 27980 }, { "epoch": 0.2003148930079439, "grad_norm": 0.0, "learning_rate": 7.998067701996709e-06, "loss": 0.0, "step": 27990 }, { "epoch": 0.2003864596006584, "grad_norm": 0.0, "learning_rate": 7.997352036069563e-06, "loss": 0.0, "step": 28000 }, { "epoch": 0.20045802619337294, "grad_norm": 0.025412213057279587, "learning_rate": 7.996636370142418e-06, "loss": 0.0, "step": 28010 }, { "epoch": 0.20052959278608745, "grad_norm": 2.631960649424059e-09, "learning_rate": 7.995920704215273e-06, "loss": 0.001, "step": 28020 }, { "epoch": 0.20060115937880196, "grad_norm": 0.0, "learning_rate": 7.995205038288127e-06, "loss": 0.0, "step": 28030 }, { "epoch": 0.2006727259715165, "grad_norm": 0.0, "learning_rate": 7.994489372360984e-06, "loss": 0.0, "step": 28040 }, { "epoch": 0.200744292564231, "grad_norm": 0.0, "learning_rate": 7.993773706433837e-06, "loss": 0.0286, "step": 28050 }, { "epoch": 0.20081585915694553, "grad_norm": 0.0, "learning_rate": 7.993058040506693e-06, "loss": 0.0001, "step": 28060 }, { "epoch": 0.20088742574966006, "grad_norm": 0.0, "learning_rate": 7.992342374579546e-06, "loss": 0.0, "step": 28070 }, { "epoch": 0.20095899234237458, "grad_norm": 0.0, "learning_rate": 7.991626708652401e-06, "loss": 0.0, "step": 28080 }, { "epoch": 0.2010305589350891, "grad_norm": 0.003954716492444277, "learning_rate": 7.990911042725257e-06, "loss": 0.0, "step": 28090 }, { "epoch": 0.20110212552780363, "grad_norm": 3.643185209512012e-06, "learning_rate": 7.99019537679811e-06, "loss": 0.0, "step": 28100 }, { "epoch": 0.20117369212051814, "grad_norm": 0.0, "learning_rate": 7.989479710870967e-06, "loss": 0.0, "step": 28110 }, { "epoch": 0.20124525871323265, "grad_norm": 5.558161153551566e-10, "learning_rate": 7.988764044943821e-06, "loss": 0.0, "step": 28120 }, { "epoch": 0.2013168253059472, "grad_norm": 0.0, "learning_rate": 7.988048379016675e-06, "loss": 0.0007, "step": 28130 }, { "epoch": 0.2013883918986617, "grad_norm": 0.0, "learning_rate": 7.98733271308953e-06, "loss": 0.0, "step": 28140 }, { "epoch": 0.2014599584913762, "grad_norm": 0.0, "learning_rate": 7.986617047162385e-06, "loss": 0.0, "step": 28150 }, { "epoch": 0.20153152508409075, "grad_norm": 1.6732642507122364e-06, "learning_rate": 7.98590138123524e-06, "loss": 0.0, "step": 28160 }, { "epoch": 0.20160309167680526, "grad_norm": 2.8305493060543085e-07, "learning_rate": 7.985185715308094e-06, "loss": 0.0, "step": 28170 }, { "epoch": 0.2016746582695198, "grad_norm": 0.0004641191044356674, "learning_rate": 7.984470049380951e-06, "loss": 0.0143, "step": 28180 }, { "epoch": 0.2017462248622343, "grad_norm": 0.03576326370239258, "learning_rate": 7.983754383453805e-06, "loss": 0.0, "step": 28190 }, { "epoch": 0.20181779145494882, "grad_norm": 4.866523850566296e-10, "learning_rate": 7.983038717526658e-06, "loss": 0.0027, "step": 28200 }, { "epoch": 0.20188935804766336, "grad_norm": 0.0, "learning_rate": 7.982323051599513e-06, "loss": 0.1038, "step": 28210 }, { "epoch": 0.20196092464037788, "grad_norm": 0.0, "learning_rate": 7.981607385672369e-06, "loss": 0.0, "step": 28220 }, { "epoch": 0.2020324912330924, "grad_norm": 0.02501092664897442, "learning_rate": 7.980891719745224e-06, "loss": 0.0, "step": 28230 }, { "epoch": 0.20210405782580693, "grad_norm": 0.0, "learning_rate": 7.980176053818078e-06, "loss": 0.0, "step": 28240 }, { "epoch": 0.20217562441852144, "grad_norm": 0.0, "learning_rate": 7.979460387890933e-06, "loss": 0.0, "step": 28250 }, { "epoch": 0.20224719101123595, "grad_norm": 0.0, "learning_rate": 7.978744721963788e-06, "loss": 0.0, "step": 28260 }, { "epoch": 0.2023187576039505, "grad_norm": 0.0, "learning_rate": 7.978029056036642e-06, "loss": 0.0, "step": 28270 }, { "epoch": 0.202390324196665, "grad_norm": 8.586738113081083e-05, "learning_rate": 7.977313390109497e-06, "loss": 0.0, "step": 28280 }, { "epoch": 0.2024618907893795, "grad_norm": 0.00047935632755979896, "learning_rate": 7.976597724182352e-06, "loss": 0.0, "step": 28290 }, { "epoch": 0.20253345738209405, "grad_norm": 0.0, "learning_rate": 7.975882058255208e-06, "loss": 0.0, "step": 28300 }, { "epoch": 0.20260502397480856, "grad_norm": 0.0, "learning_rate": 7.975166392328061e-06, "loss": 0.0001, "step": 28310 }, { "epoch": 0.20267659056752307, "grad_norm": 0.004323197528719902, "learning_rate": 7.974450726400917e-06, "loss": 0.0, "step": 28320 }, { "epoch": 0.2027481571602376, "grad_norm": 0.0, "learning_rate": 7.973735060473772e-06, "loss": 0.0002, "step": 28330 }, { "epoch": 0.20281972375295212, "grad_norm": 0.0, "learning_rate": 7.973019394546625e-06, "loss": 0.0, "step": 28340 }, { "epoch": 0.20289129034566664, "grad_norm": 0.36515459418296814, "learning_rate": 7.97230372861948e-06, "loss": 0.0, "step": 28350 }, { "epoch": 0.20296285693838118, "grad_norm": 0.0, "learning_rate": 7.971588062692336e-06, "loss": 0.0019, "step": 28360 }, { "epoch": 0.2030344235310957, "grad_norm": 4.3876097199913033e-10, "learning_rate": 7.97087239676519e-06, "loss": 0.0, "step": 28370 }, { "epoch": 0.2031059901238102, "grad_norm": 0.0, "learning_rate": 7.970156730838045e-06, "loss": 0.0, "step": 28380 }, { "epoch": 0.20317755671652474, "grad_norm": 0.0, "learning_rate": 7.9694410649109e-06, "loss": 0.0, "step": 28390 }, { "epoch": 0.20324912330923925, "grad_norm": 0.004154724068939686, "learning_rate": 7.968725398983756e-06, "loss": 0.0, "step": 28400 }, { "epoch": 0.20332068990195376, "grad_norm": 2.0433493785887435e-10, "learning_rate": 7.96800973305661e-06, "loss": 0.0, "step": 28410 }, { "epoch": 0.2033922564946683, "grad_norm": 0.0, "learning_rate": 7.967294067129464e-06, "loss": 0.0015, "step": 28420 }, { "epoch": 0.2034638230873828, "grad_norm": 6.2979283939057495e-06, "learning_rate": 7.966649967795034e-06, "loss": 0.8461, "step": 28430 }, { "epoch": 0.20353538968009732, "grad_norm": 0.0, "learning_rate": 7.96593430186789e-06, "loss": 0.0, "step": 28440 }, { "epoch": 0.20360695627281186, "grad_norm": 0.0, "learning_rate": 7.965218635940743e-06, "loss": 0.0009, "step": 28450 }, { "epoch": 0.20367852286552637, "grad_norm": 0.0, "learning_rate": 7.964502970013598e-06, "loss": 0.0021, "step": 28460 }, { "epoch": 0.20375008945824089, "grad_norm": 0.0, "learning_rate": 7.963787304086454e-06, "loss": 0.0, "step": 28470 }, { "epoch": 0.20382165605095542, "grad_norm": 0.0, "learning_rate": 7.963071638159307e-06, "loss": 0.0, "step": 28480 }, { "epoch": 0.20389322264366994, "grad_norm": 0.0, "learning_rate": 7.962355972232163e-06, "loss": 0.0, "step": 28490 }, { "epoch": 0.20396478923638445, "grad_norm": 0.0, "learning_rate": 7.961640306305018e-06, "loss": 0.0, "step": 28500 }, { "epoch": 0.204036355829099, "grad_norm": 0.0, "learning_rate": 7.960924640377871e-06, "loss": 0.0, "step": 28510 }, { "epoch": 0.2041079224218135, "grad_norm": 0.0, "learning_rate": 7.960208974450727e-06, "loss": 0.0, "step": 28520 }, { "epoch": 0.204179489014528, "grad_norm": 7.816029392415658e-05, "learning_rate": 7.959493308523582e-06, "loss": 0.0, "step": 28530 }, { "epoch": 0.20425105560724255, "grad_norm": 3.139955151709728e-05, "learning_rate": 7.958777642596437e-06, "loss": 0.0, "step": 28540 }, { "epoch": 0.20432262219995706, "grad_norm": 0.0, "learning_rate": 7.958133543262005e-06, "loss": 0.5457, "step": 28550 }, { "epoch": 0.20439418879267157, "grad_norm": 0.0, "learning_rate": 7.95741787733486e-06, "loss": 0.0, "step": 28560 }, { "epoch": 0.2044657553853861, "grad_norm": 0.0, "learning_rate": 7.956702211407716e-06, "loss": 0.0003, "step": 28570 }, { "epoch": 0.20453732197810062, "grad_norm": 0.0, "learning_rate": 7.955986545480571e-06, "loss": 0.0, "step": 28580 }, { "epoch": 0.20460888857081513, "grad_norm": 0.0, "learning_rate": 7.955270879553425e-06, "loss": 0.0001, "step": 28590 }, { "epoch": 0.20468045516352967, "grad_norm": 0.0007074995082803071, "learning_rate": 7.95455521362628e-06, "loss": 0.0, "step": 28600 }, { "epoch": 0.20475202175624418, "grad_norm": 0.0, "learning_rate": 7.953839547699135e-06, "loss": 0.0, "step": 28610 }, { "epoch": 0.2048235883489587, "grad_norm": 0.0, "learning_rate": 7.953123881771989e-06, "loss": 0.0, "step": 28620 }, { "epoch": 0.20489515494167324, "grad_norm": 0.0, "learning_rate": 7.952408215844844e-06, "loss": 0.0936, "step": 28630 }, { "epoch": 0.20496672153438775, "grad_norm": 0.0, "learning_rate": 7.9516925499177e-06, "loss": 0.0, "step": 28640 }, { "epoch": 0.20503828812710226, "grad_norm": 0.0, "learning_rate": 7.950976883990555e-06, "loss": 0.0, "step": 28650 }, { "epoch": 0.2051098547198168, "grad_norm": 0.0, "learning_rate": 7.950261218063409e-06, "loss": 0.2565, "step": 28660 }, { "epoch": 0.2051814213125313, "grad_norm": 0.0, "learning_rate": 7.949545552136264e-06, "loss": 0.0, "step": 28670 }, { "epoch": 0.20525298790524582, "grad_norm": 0.00448095565661788, "learning_rate": 7.948829886209119e-06, "loss": 0.0038, "step": 28680 }, { "epoch": 0.20532455449796036, "grad_norm": 0.0, "learning_rate": 7.948114220281973e-06, "loss": 0.0, "step": 28690 }, { "epoch": 0.20539612109067487, "grad_norm": 0.0, "learning_rate": 7.947398554354828e-06, "loss": 0.0, "step": 28700 }, { "epoch": 0.20546768768338938, "grad_norm": 0.0, "learning_rate": 7.946682888427683e-06, "loss": 0.0002, "step": 28710 }, { "epoch": 0.20553925427610392, "grad_norm": 9.956767632601782e-10, "learning_rate": 7.945967222500537e-06, "loss": 0.3589, "step": 28720 }, { "epoch": 0.20561082086881843, "grad_norm": 1.1964724762947299e-05, "learning_rate": 7.945251556573392e-06, "loss": 0.0, "step": 28730 }, { "epoch": 0.20568238746153295, "grad_norm": 5.922000809732708e-07, "learning_rate": 7.944535890646247e-06, "loss": 0.0023, "step": 28740 }, { "epoch": 0.20575395405424748, "grad_norm": 1.0311364349036012e-06, "learning_rate": 7.943820224719103e-06, "loss": 0.0, "step": 28750 }, { "epoch": 0.205825520646962, "grad_norm": 0.0, "learning_rate": 7.943104558791956e-06, "loss": 0.0, "step": 28760 }, { "epoch": 0.2058970872396765, "grad_norm": 0.0, "learning_rate": 7.942388892864812e-06, "loss": 0.0, "step": 28770 }, { "epoch": 0.20596865383239105, "grad_norm": 0.0, "learning_rate": 7.941673226937667e-06, "loss": 0.0, "step": 28780 }, { "epoch": 0.20604022042510556, "grad_norm": 0.0, "learning_rate": 7.94095756101052e-06, "loss": 0.0, "step": 28790 }, { "epoch": 0.20611178701782007, "grad_norm": 0.00023412858718074858, "learning_rate": 7.940241895083376e-06, "loss": 0.0792, "step": 28800 }, { "epoch": 0.2061833536105346, "grad_norm": 0.0, "learning_rate": 7.939526229156231e-06, "loss": 0.0, "step": 28810 }, { "epoch": 0.20625492020324912, "grad_norm": 0.0, "learning_rate": 7.938810563229086e-06, "loss": 0.0, "step": 28820 }, { "epoch": 0.20632648679596363, "grad_norm": 0.0, "learning_rate": 7.93809489730194e-06, "loss": 0.0, "step": 28830 }, { "epoch": 0.20639805338867817, "grad_norm": 0.0, "learning_rate": 7.937379231374794e-06, "loss": 0.0285, "step": 28840 }, { "epoch": 0.20646961998139268, "grad_norm": 0.0, "learning_rate": 7.93666356544765e-06, "loss": 0.0, "step": 28850 }, { "epoch": 0.2065411865741072, "grad_norm": 0.0, "learning_rate": 7.935947899520504e-06, "loss": 0.0, "step": 28860 }, { "epoch": 0.20661275316682173, "grad_norm": 8.089571929303929e-05, "learning_rate": 7.93523223359336e-06, "loss": 0.0, "step": 28870 }, { "epoch": 0.20668431975953624, "grad_norm": 0.0, "learning_rate": 7.934516567666213e-06, "loss": 0.0, "step": 28880 }, { "epoch": 0.20675588635225076, "grad_norm": 23.627655029296875, "learning_rate": 7.93380090173907e-06, "loss": 0.0032, "step": 28890 }, { "epoch": 0.2068274529449653, "grad_norm": 0.0, "learning_rate": 7.933085235811924e-06, "loss": 0.0, "step": 28900 }, { "epoch": 0.2068990195376798, "grad_norm": 0.0, "learning_rate": 7.932369569884777e-06, "loss": 0.0, "step": 28910 }, { "epoch": 0.20697058613039432, "grad_norm": 0.0, "learning_rate": 7.931653903957634e-06, "loss": 0.0018, "step": 28920 }, { "epoch": 0.20704215272310886, "grad_norm": 0.0, "learning_rate": 7.930938238030488e-06, "loss": 0.0, "step": 28930 }, { "epoch": 0.20711371931582337, "grad_norm": 0.0, "learning_rate": 7.930222572103343e-06, "loss": 0.0008, "step": 28940 }, { "epoch": 0.2071852859085379, "grad_norm": 0.0, "learning_rate": 7.929506906176197e-06, "loss": 0.0344, "step": 28950 }, { "epoch": 0.20725685250125242, "grad_norm": 0.0, "learning_rate": 7.928791240249052e-06, "loss": 0.0, "step": 28960 }, { "epoch": 0.20732841909396693, "grad_norm": 0.0, "learning_rate": 7.928075574321907e-06, "loss": 0.0, "step": 28970 }, { "epoch": 0.20739998568668147, "grad_norm": 0.0, "learning_rate": 7.927359908394761e-06, "loss": 0.0, "step": 28980 }, { "epoch": 0.20747155227939598, "grad_norm": 0.0019059957703575492, "learning_rate": 7.926644242467618e-06, "loss": 0.0, "step": 28990 }, { "epoch": 0.2075431188721105, "grad_norm": 0.0, "learning_rate": 7.925928576540472e-06, "loss": 0.0, "step": 29000 }, { "epoch": 0.20761468546482503, "grad_norm": 0.0, "learning_rate": 7.925212910613327e-06, "loss": 0.0, "step": 29010 }, { "epoch": 0.20768625205753954, "grad_norm": 0.0, "learning_rate": 7.92449724468618e-06, "loss": 0.0001, "step": 29020 }, { "epoch": 0.20775781865025406, "grad_norm": 3.004337045808825e-08, "learning_rate": 7.923781578759036e-06, "loss": 0.0, "step": 29030 }, { "epoch": 0.2078293852429686, "grad_norm": 0.0, "learning_rate": 7.923065912831891e-06, "loss": 0.0, "step": 29040 }, { "epoch": 0.2079009518356831, "grad_norm": 5.105441687192069e-07, "learning_rate": 7.922350246904745e-06, "loss": 0.0, "step": 29050 }, { "epoch": 0.20797251842839762, "grad_norm": 3.769231989281252e-05, "learning_rate": 7.921634580977602e-06, "loss": 0.0001, "step": 29060 }, { "epoch": 0.20804408502111216, "grad_norm": 5.886513054775833e-09, "learning_rate": 7.920918915050455e-06, "loss": 0.0, "step": 29070 }, { "epoch": 0.20811565161382667, "grad_norm": 0.0, "learning_rate": 7.920203249123309e-06, "loss": 0.0, "step": 29080 }, { "epoch": 0.20818721820654118, "grad_norm": 5.41054287168663e-05, "learning_rate": 7.919487583196164e-06, "loss": 0.0, "step": 29090 }, { "epoch": 0.20825878479925572, "grad_norm": 0.0, "learning_rate": 7.91877191726902e-06, "loss": 0.0, "step": 29100 }, { "epoch": 0.20833035139197023, "grad_norm": 0.0, "learning_rate": 7.918056251341875e-06, "loss": 0.0474, "step": 29110 }, { "epoch": 0.20840191798468474, "grad_norm": 0.0, "learning_rate": 7.917340585414728e-06, "loss": 0.0, "step": 29120 }, { "epoch": 0.20847348457739928, "grad_norm": 0.0, "learning_rate": 7.916624919487585e-06, "loss": 0.0, "step": 29130 }, { "epoch": 0.2085450511701138, "grad_norm": 0.0, "learning_rate": 7.915909253560439e-06, "loss": 0.0, "step": 29140 }, { "epoch": 0.2086166177628283, "grad_norm": 0.0, "learning_rate": 7.915193587633292e-06, "loss": 0.0041, "step": 29150 }, { "epoch": 0.20868818435554284, "grad_norm": 0.0, "learning_rate": 7.914477921706148e-06, "loss": 0.0, "step": 29160 }, { "epoch": 0.20875975094825736, "grad_norm": 0.0, "learning_rate": 7.913762255779003e-06, "loss": 0.0, "step": 29170 }, { "epoch": 0.20883131754097187, "grad_norm": 0.0, "learning_rate": 7.913046589851858e-06, "loss": 0.0, "step": 29180 }, { "epoch": 0.2089028841336864, "grad_norm": 0.0, "learning_rate": 7.912330923924712e-06, "loss": 0.0001, "step": 29190 }, { "epoch": 0.20897445072640092, "grad_norm": 0.0, "learning_rate": 7.911615257997567e-06, "loss": 0.0015, "step": 29200 }, { "epoch": 0.20904601731911543, "grad_norm": 0.0, "learning_rate": 7.910899592070422e-06, "loss": 0.0, "step": 29210 }, { "epoch": 0.20911758391182997, "grad_norm": 0.0, "learning_rate": 7.910183926143276e-06, "loss": 0.0003, "step": 29220 }, { "epoch": 0.20918915050454448, "grad_norm": 8.941527518402381e-10, "learning_rate": 7.909468260216131e-06, "loss": 0.0, "step": 29230 }, { "epoch": 0.209260717097259, "grad_norm": 0.0, "learning_rate": 7.908752594288987e-06, "loss": 0.0, "step": 29240 }, { "epoch": 0.20933228368997353, "grad_norm": 0.0, "learning_rate": 7.908036928361842e-06, "loss": 0.0576, "step": 29250 }, { "epoch": 0.20940385028268804, "grad_norm": 0.0, "learning_rate": 7.907321262434696e-06, "loss": 0.0, "step": 29260 }, { "epoch": 0.20947541687540255, "grad_norm": 0.0, "learning_rate": 7.90660559650755e-06, "loss": 0.0, "step": 29270 }, { "epoch": 0.2095469834681171, "grad_norm": 0.0, "learning_rate": 7.905889930580406e-06, "loss": 0.0, "step": 29280 }, { "epoch": 0.2096185500608316, "grad_norm": 7.4839301109313965, "learning_rate": 7.90517426465326e-06, "loss": 0.0015, "step": 29290 }, { "epoch": 0.20969011665354612, "grad_norm": 0.0, "learning_rate": 7.904458598726115e-06, "loss": 0.0002, "step": 29300 }, { "epoch": 0.20976168324626066, "grad_norm": 0.0, "learning_rate": 7.90374293279897e-06, "loss": 0.0207, "step": 29310 }, { "epoch": 0.20983324983897517, "grad_norm": 0.0, "learning_rate": 7.903027266871824e-06, "loss": 0.0, "step": 29320 }, { "epoch": 0.20990481643168968, "grad_norm": 0.0, "learning_rate": 7.90231160094468e-06, "loss": 0.0, "step": 29330 }, { "epoch": 0.20997638302440422, "grad_norm": 0.00039066298631951213, "learning_rate": 7.901595935017534e-06, "loss": 0.0, "step": 29340 }, { "epoch": 0.21004794961711873, "grad_norm": 0.0, "learning_rate": 7.90088026909039e-06, "loss": 0.0, "step": 29350 }, { "epoch": 0.21011951620983324, "grad_norm": 761.07421875, "learning_rate": 7.900164603163243e-06, "loss": 1.3219, "step": 29360 }, { "epoch": 0.21019108280254778, "grad_norm": 4.717720969438233e-08, "learning_rate": 7.899448937236099e-06, "loss": 0.0, "step": 29370 }, { "epoch": 0.2102626493952623, "grad_norm": 0.0, "learning_rate": 7.898733271308954e-06, "loss": 0.0013, "step": 29380 }, { "epoch": 0.2103342159879768, "grad_norm": 0.0, "learning_rate": 7.898017605381808e-06, "loss": 0.0, "step": 29390 }, { "epoch": 0.21040578258069134, "grad_norm": 0.0, "learning_rate": 7.897301939454663e-06, "loss": 0.0, "step": 29400 }, { "epoch": 0.21047734917340585, "grad_norm": 0.051393963396549225, "learning_rate": 7.896586273527518e-06, "loss": 0.0, "step": 29410 }, { "epoch": 0.21054891576612036, "grad_norm": 6.817400333147816e-08, "learning_rate": 7.895870607600373e-06, "loss": 0.0, "step": 29420 }, { "epoch": 0.2106204823588349, "grad_norm": 1.0719889360188972e-05, "learning_rate": 7.895154941673227e-06, "loss": 0.0, "step": 29430 }, { "epoch": 0.21069204895154942, "grad_norm": 9.87578573585779e-07, "learning_rate": 7.894439275746082e-06, "loss": 0.0, "step": 29440 }, { "epoch": 0.21076361554426393, "grad_norm": 4.555056875688024e-05, "learning_rate": 7.893723609818938e-06, "loss": 0.0036, "step": 29450 }, { "epoch": 0.21083518213697847, "grad_norm": 0.0, "learning_rate": 7.893007943891791e-06, "loss": 0.0, "step": 29460 }, { "epoch": 0.21090674872969298, "grad_norm": 0.0, "learning_rate": 7.892292277964646e-06, "loss": 0.0, "step": 29470 }, { "epoch": 0.2109783153224075, "grad_norm": 0.0, "learning_rate": 7.891576612037502e-06, "loss": 0.0032, "step": 29480 }, { "epoch": 0.21104988191512203, "grad_norm": 0.0, "learning_rate": 7.890860946110355e-06, "loss": 0.182, "step": 29490 }, { "epoch": 0.21112144850783654, "grad_norm": 0.0, "learning_rate": 7.89014528018321e-06, "loss": 0.0002, "step": 29500 }, { "epoch": 0.21119301510055105, "grad_norm": 0.0, "learning_rate": 7.889429614256066e-06, "loss": 0.4273, "step": 29510 }, { "epoch": 0.2112645816932656, "grad_norm": 0.0, "learning_rate": 7.888713948328921e-06, "loss": 0.0, "step": 29520 }, { "epoch": 0.2113361482859801, "grad_norm": 0.6774659752845764, "learning_rate": 7.887998282401775e-06, "loss": 0.0059, "step": 29530 }, { "epoch": 0.2114077148786946, "grad_norm": 0.0, "learning_rate": 7.88728261647463e-06, "loss": 0.0, "step": 29540 }, { "epoch": 0.21147928147140915, "grad_norm": 5.937529567745514e-05, "learning_rate": 7.886566950547485e-06, "loss": 0.0, "step": 29550 }, { "epoch": 0.21155084806412366, "grad_norm": 0.0, "learning_rate": 7.885851284620339e-06, "loss": 0.0, "step": 29560 }, { "epoch": 0.21162241465683818, "grad_norm": 9.703514933789847e-07, "learning_rate": 7.885135618693194e-06, "loss": 0.0001, "step": 29570 }, { "epoch": 0.21169398124955272, "grad_norm": 0.0, "learning_rate": 7.88441995276605e-06, "loss": 0.0, "step": 29580 }, { "epoch": 0.21176554784226723, "grad_norm": 0.0, "learning_rate": 7.883704286838905e-06, "loss": 0.0001, "step": 29590 }, { "epoch": 0.21183711443498174, "grad_norm": 9.87030235322095e-10, "learning_rate": 7.882988620911759e-06, "loss": 0.0, "step": 29600 }, { "epoch": 0.21190868102769628, "grad_norm": 1.0608307093207259e-05, "learning_rate": 7.882272954984614e-06, "loss": 0.0, "step": 29610 }, { "epoch": 0.2119802476204108, "grad_norm": 0.0, "learning_rate": 7.881557289057469e-06, "loss": 0.01, "step": 29620 }, { "epoch": 0.2120518142131253, "grad_norm": 1.1125018684055021e-09, "learning_rate": 7.880841623130323e-06, "loss": 0.0, "step": 29630 }, { "epoch": 0.21212338080583984, "grad_norm": 1.3006775589019526e-05, "learning_rate": 7.880125957203178e-06, "loss": 0.0042, "step": 29640 }, { "epoch": 0.21219494739855435, "grad_norm": 1.10354836380111e-09, "learning_rate": 7.879410291276033e-06, "loss": 0.0, "step": 29650 }, { "epoch": 0.21226651399126886, "grad_norm": 0.0022566302213817835, "learning_rate": 7.878694625348889e-06, "loss": 0.0105, "step": 29660 }, { "epoch": 0.2123380805839834, "grad_norm": 0.0, "learning_rate": 7.877978959421742e-06, "loss": 0.0, "step": 29670 }, { "epoch": 0.2124096471766979, "grad_norm": 0.0, "learning_rate": 7.877263293494597e-06, "loss": 0.0, "step": 29680 }, { "epoch": 0.21248121376941242, "grad_norm": 0.0, "learning_rate": 7.876547627567453e-06, "loss": 0.002, "step": 29690 }, { "epoch": 0.21255278036212696, "grad_norm": 9.223642960520806e-10, "learning_rate": 7.875831961640306e-06, "loss": 0.0, "step": 29700 }, { "epoch": 0.21262434695484148, "grad_norm": 0.0, "learning_rate": 7.875116295713162e-06, "loss": 0.0, "step": 29710 }, { "epoch": 0.212695913547556, "grad_norm": 1.3721838513447437e-05, "learning_rate": 7.874400629786017e-06, "loss": 0.0075, "step": 29720 }, { "epoch": 0.21276748014027053, "grad_norm": 0.0, "learning_rate": 7.87368496385887e-06, "loss": 0.0, "step": 29730 }, { "epoch": 0.21283904673298504, "grad_norm": 0.00031412733369506896, "learning_rate": 7.872969297931726e-06, "loss": 0.0043, "step": 29740 }, { "epoch": 0.21291061332569958, "grad_norm": 6.486184247478377e-07, "learning_rate": 7.872253632004581e-06, "loss": 0.0, "step": 29750 }, { "epoch": 0.2129821799184141, "grad_norm": 0.6113245487213135, "learning_rate": 7.871537966077436e-06, "loss": 0.0001, "step": 29760 }, { "epoch": 0.2130537465111286, "grad_norm": 9.4999230704218e-10, "learning_rate": 7.87082230015029e-06, "loss": 0.1864, "step": 29770 }, { "epoch": 0.21312531310384314, "grad_norm": 0.0, "learning_rate": 7.870106634223145e-06, "loss": 0.0, "step": 29780 }, { "epoch": 0.21319687969655765, "grad_norm": 0.0, "learning_rate": 7.869390968296e-06, "loss": 0.0, "step": 29790 }, { "epoch": 0.21326844628927216, "grad_norm": 0.0, "learning_rate": 7.868675302368854e-06, "loss": 0.0, "step": 29800 }, { "epoch": 0.2133400128819867, "grad_norm": 0.0, "learning_rate": 7.86795963644171e-06, "loss": 0.0, "step": 29810 }, { "epoch": 0.2134115794747012, "grad_norm": 0.0, "learning_rate": 7.867243970514565e-06, "loss": 0.0, "step": 29820 }, { "epoch": 0.21348314606741572, "grad_norm": 0.0, "learning_rate": 7.86652830458742e-06, "loss": 0.0, "step": 29830 }, { "epoch": 0.21355471266013026, "grad_norm": 0.0, "learning_rate": 7.865812638660274e-06, "loss": 0.0, "step": 29840 }, { "epoch": 0.21362627925284478, "grad_norm": 1.4994321873018635e-07, "learning_rate": 7.865096972733129e-06, "loss": 0.0, "step": 29850 }, { "epoch": 0.2136978458455593, "grad_norm": 0.0, "learning_rate": 7.864381306805984e-06, "loss": 0.0, "step": 29860 }, { "epoch": 0.21376941243827383, "grad_norm": 5.714077815355267e-07, "learning_rate": 7.863665640878838e-06, "loss": 0.0, "step": 29870 }, { "epoch": 0.21384097903098834, "grad_norm": 0.0, "learning_rate": 7.862949974951693e-06, "loss": 0.8695, "step": 29880 }, { "epoch": 0.21391254562370285, "grad_norm": 0.0, "learning_rate": 7.862234309024548e-06, "loss": 0.0, "step": 29890 }, { "epoch": 0.2139841122164174, "grad_norm": 0.0, "learning_rate": 7.861518643097404e-06, "loss": 0.0, "step": 29900 }, { "epoch": 0.2140556788091319, "grad_norm": 0.0, "learning_rate": 7.860802977170257e-06, "loss": 0.0005, "step": 29910 }, { "epoch": 0.2141272454018464, "grad_norm": 0.0, "learning_rate": 7.860087311243113e-06, "loss": 0.0016, "step": 29920 }, { "epoch": 0.21419881199456095, "grad_norm": 0.0, "learning_rate": 7.859371645315968e-06, "loss": 0.0, "step": 29930 }, { "epoch": 0.21427037858727546, "grad_norm": 23.142271041870117, "learning_rate": 7.858655979388821e-06, "loss": 0.0031, "step": 29940 }, { "epoch": 0.21434194517998997, "grad_norm": 0.0, "learning_rate": 7.857940313461677e-06, "loss": 0.0, "step": 29950 }, { "epoch": 0.2144135117727045, "grad_norm": 4.547118237496761e-09, "learning_rate": 7.857224647534532e-06, "loss": 0.0, "step": 29960 }, { "epoch": 0.21448507836541902, "grad_norm": 0.13978543877601624, "learning_rate": 7.856508981607386e-06, "loss": 0.0, "step": 29970 }, { "epoch": 0.21455664495813354, "grad_norm": 0.0, "learning_rate": 7.855793315680241e-06, "loss": 0.0, "step": 29980 }, { "epoch": 0.21462821155084807, "grad_norm": 0.002518201945349574, "learning_rate": 7.855077649753096e-06, "loss": 0.0002, "step": 29990 }, { "epoch": 0.2146997781435626, "grad_norm": 0.006424850784242153, "learning_rate": 7.854361983825952e-06, "loss": 0.0, "step": 30000 }, { "epoch": 0.2147713447362771, "grad_norm": 0.0, "learning_rate": 7.853646317898805e-06, "loss": 0.0002, "step": 30010 }, { "epoch": 0.21484291132899164, "grad_norm": 0.0, "learning_rate": 7.85293065197166e-06, "loss": 0.0, "step": 30020 }, { "epoch": 0.21491447792170615, "grad_norm": 19.49517059326172, "learning_rate": 7.852214986044516e-06, "loss": 0.0044, "step": 30030 }, { "epoch": 0.21498604451442066, "grad_norm": 0.0, "learning_rate": 7.85149932011737e-06, "loss": 0.0, "step": 30040 }, { "epoch": 0.2150576111071352, "grad_norm": 0.0, "learning_rate": 7.850783654190225e-06, "loss": 0.0474, "step": 30050 }, { "epoch": 0.2151291776998497, "grad_norm": 0.0, "learning_rate": 7.85006798826308e-06, "loss": 0.0, "step": 30060 }, { "epoch": 0.21520074429256422, "grad_norm": 0.0, "learning_rate": 7.849352322335935e-06, "loss": 0.0001, "step": 30070 }, { "epoch": 0.21527231088527876, "grad_norm": 0.0, "learning_rate": 7.848636656408789e-06, "loss": 0.0, "step": 30080 }, { "epoch": 0.21534387747799327, "grad_norm": 0.0, "learning_rate": 7.847920990481644e-06, "loss": 0.0088, "step": 30090 }, { "epoch": 0.21541544407070778, "grad_norm": 0.0, "learning_rate": 7.8472053245545e-06, "loss": 0.0, "step": 30100 }, { "epoch": 0.21548701066342232, "grad_norm": 0.0, "learning_rate": 7.846489658627353e-06, "loss": 0.0, "step": 30110 }, { "epoch": 0.21555857725613684, "grad_norm": 0.0008000521920621395, "learning_rate": 7.845773992700208e-06, "loss": 0.0, "step": 30120 }, { "epoch": 0.21563014384885135, "grad_norm": 0.0, "learning_rate": 7.845058326773064e-06, "loss": 0.0006, "step": 30130 }, { "epoch": 0.21570171044156589, "grad_norm": 0.0, "learning_rate": 7.844342660845917e-06, "loss": 0.0001, "step": 30140 }, { "epoch": 0.2157732770342804, "grad_norm": 3.2126767912643572e-09, "learning_rate": 7.843626994918772e-06, "loss": 0.0, "step": 30150 }, { "epoch": 0.2158448436269949, "grad_norm": 0.0, "learning_rate": 7.842911328991628e-06, "loss": 0.0, "step": 30160 }, { "epoch": 0.21591641021970945, "grad_norm": 1.0695755463530077e-06, "learning_rate": 7.842195663064483e-06, "loss": 0.0001, "step": 30170 }, { "epoch": 0.21598797681242396, "grad_norm": 1.8648605646376382e-06, "learning_rate": 7.841479997137337e-06, "loss": 0.0, "step": 30180 }, { "epoch": 0.21605954340513847, "grad_norm": 0.0, "learning_rate": 7.840764331210192e-06, "loss": 0.0044, "step": 30190 }, { "epoch": 0.216131109997853, "grad_norm": 0.0, "learning_rate": 7.840048665283047e-06, "loss": 0.0, "step": 30200 }, { "epoch": 0.21620267659056752, "grad_norm": 0.0, "learning_rate": 7.8393329993559e-06, "loss": 0.0, "step": 30210 }, { "epoch": 0.21627424318328203, "grad_norm": 1.0409868878014095e-07, "learning_rate": 7.838617333428756e-06, "loss": 0.9553, "step": 30220 }, { "epoch": 0.21634580977599657, "grad_norm": 0.7030838131904602, "learning_rate": 7.837901667501611e-06, "loss": 0.0001, "step": 30230 }, { "epoch": 0.21641737636871108, "grad_norm": 0.0, "learning_rate": 7.837186001574467e-06, "loss": 0.021, "step": 30240 }, { "epoch": 0.2164889429614256, "grad_norm": 0.0, "learning_rate": 7.83647033564732e-06, "loss": 0.1623, "step": 30250 }, { "epoch": 0.21656050955414013, "grad_norm": 0.0, "learning_rate": 7.835754669720176e-06, "loss": 0.0, "step": 30260 }, { "epoch": 0.21663207614685465, "grad_norm": 0.0, "learning_rate": 7.835039003793031e-06, "loss": 0.0062, "step": 30270 }, { "epoch": 0.21670364273956916, "grad_norm": 454.6976318359375, "learning_rate": 7.834323337865884e-06, "loss": 0.1959, "step": 30280 }, { "epoch": 0.2167752093322837, "grad_norm": 0.0, "learning_rate": 7.83360767193874e-06, "loss": 0.0, "step": 30290 }, { "epoch": 0.2168467759249982, "grad_norm": 9.265245459744165e-09, "learning_rate": 7.832892006011595e-06, "loss": 0.0055, "step": 30300 }, { "epoch": 0.21691834251771272, "grad_norm": 0.0, "learning_rate": 7.83217634008445e-06, "loss": 0.0001, "step": 30310 }, { "epoch": 0.21698990911042726, "grad_norm": 0.0, "learning_rate": 7.831460674157304e-06, "loss": 0.0, "step": 30320 }, { "epoch": 0.21706147570314177, "grad_norm": 0.0, "learning_rate": 7.830745008230158e-06, "loss": 0.0062, "step": 30330 }, { "epoch": 0.21713304229585628, "grad_norm": 0.0, "learning_rate": 7.830029342303015e-06, "loss": 0.0, "step": 30340 }, { "epoch": 0.21720460888857082, "grad_norm": 0.0, "learning_rate": 7.829313676375868e-06, "loss": 0.0, "step": 30350 }, { "epoch": 0.21727617548128533, "grad_norm": 0.0, "learning_rate": 7.828598010448723e-06, "loss": 0.0, "step": 30360 }, { "epoch": 0.21734774207399984, "grad_norm": 0.0, "learning_rate": 7.827882344521579e-06, "loss": 0.0, "step": 30370 }, { "epoch": 0.21741930866671438, "grad_norm": 0.0, "learning_rate": 7.827166678594432e-06, "loss": 0.0, "step": 30380 }, { "epoch": 0.2174908752594289, "grad_norm": 0.0, "learning_rate": 7.826451012667288e-06, "loss": 0.0, "step": 30390 }, { "epoch": 0.2175624418521434, "grad_norm": 0.0, "learning_rate": 7.825735346740141e-06, "loss": 0.0, "step": 30400 }, { "epoch": 0.21763400844485795, "grad_norm": 0.0, "learning_rate": 7.825019680812998e-06, "loss": 0.0, "step": 30410 }, { "epoch": 0.21770557503757246, "grad_norm": 0.0, "learning_rate": 7.824304014885852e-06, "loss": 0.5676, "step": 30420 }, { "epoch": 0.21777714163028697, "grad_norm": 0.0, "learning_rate": 7.823588348958707e-06, "loss": 0.326, "step": 30430 }, { "epoch": 0.2178487082230015, "grad_norm": 0.0, "learning_rate": 7.822872683031562e-06, "loss": 0.0, "step": 30440 }, { "epoch": 0.21792027481571602, "grad_norm": 0.0, "learning_rate": 7.822157017104416e-06, "loss": 0.0, "step": 30450 }, { "epoch": 0.21799184140843053, "grad_norm": 4.344547748565674, "learning_rate": 7.821441351177271e-06, "loss": 0.0009, "step": 30460 }, { "epoch": 0.21806340800114507, "grad_norm": 0.0, "learning_rate": 7.820725685250125e-06, "loss": 0.019, "step": 30470 }, { "epoch": 0.21813497459385958, "grad_norm": 0.0, "learning_rate": 7.820010019322982e-06, "loss": 0.0, "step": 30480 }, { "epoch": 0.2182065411865741, "grad_norm": 0.0, "learning_rate": 7.819294353395835e-06, "loss": 0.0014, "step": 30490 }, { "epoch": 0.21827810777928863, "grad_norm": 1.7577262042323127e-05, "learning_rate": 7.818578687468689e-06, "loss": 0.3469, "step": 30500 }, { "epoch": 0.21834967437200314, "grad_norm": 0.0, "learning_rate": 7.817863021541546e-06, "loss": 0.0, "step": 30510 }, { "epoch": 0.21842124096471768, "grad_norm": 0.0, "learning_rate": 7.8171473556144e-06, "loss": 0.2266, "step": 30520 }, { "epoch": 0.2184928075574322, "grad_norm": 0.0, "learning_rate": 7.816431689687255e-06, "loss": 0.1158, "step": 30530 }, { "epoch": 0.2185643741501467, "grad_norm": 1.8251756728204782e-07, "learning_rate": 7.815716023760108e-06, "loss": 0.0002, "step": 30540 }, { "epoch": 0.21863594074286125, "grad_norm": 0.0, "learning_rate": 7.815000357832965e-06, "loss": 0.0, "step": 30550 }, { "epoch": 0.21870750733557576, "grad_norm": 0.0, "learning_rate": 7.814284691905819e-06, "loss": 0.0, "step": 30560 }, { "epoch": 0.21877907392829027, "grad_norm": 7.975599736198546e-09, "learning_rate": 7.813569025978673e-06, "loss": 0.0, "step": 30570 }, { "epoch": 0.2188506405210048, "grad_norm": 0.0, "learning_rate": 7.81285336005153e-06, "loss": 0.0, "step": 30580 }, { "epoch": 0.21892220711371932, "grad_norm": 0.0, "learning_rate": 7.812137694124383e-06, "loss": 0.0036, "step": 30590 }, { "epoch": 0.21899377370643383, "grad_norm": 0.0, "learning_rate": 7.811422028197239e-06, "loss": 0.0, "step": 30600 }, { "epoch": 0.21906534029914837, "grad_norm": 0.0, "learning_rate": 7.810706362270092e-06, "loss": 0.0022, "step": 30610 }, { "epoch": 0.21913690689186288, "grad_norm": 0.0, "learning_rate": 7.809990696342947e-06, "loss": 0.0, "step": 30620 }, { "epoch": 0.2192084734845774, "grad_norm": 0.0, "learning_rate": 7.809275030415803e-06, "loss": 0.0712, "step": 30630 }, { "epoch": 0.21928004007729193, "grad_norm": 0.0, "learning_rate": 7.808630931081372e-06, "loss": 0.936, "step": 30640 }, { "epoch": 0.21935160667000644, "grad_norm": 1.187871748697944e-05, "learning_rate": 7.807915265154226e-06, "loss": 0.0, "step": 30650 }, { "epoch": 0.21942317326272096, "grad_norm": 0.0, "learning_rate": 7.807199599227081e-06, "loss": 0.0001, "step": 30660 }, { "epoch": 0.2194947398554355, "grad_norm": 0.015981948003172874, "learning_rate": 7.806483933299937e-06, "loss": 0.0001, "step": 30670 }, { "epoch": 0.21956630644815, "grad_norm": 0.0, "learning_rate": 7.80576826737279e-06, "loss": 0.0, "step": 30680 }, { "epoch": 0.21963787304086452, "grad_norm": 0.0, "learning_rate": 7.805052601445646e-06, "loss": 0.0, "step": 30690 }, { "epoch": 0.21970943963357906, "grad_norm": 0.0, "learning_rate": 7.804336935518501e-06, "loss": 0.0003, "step": 30700 }, { "epoch": 0.21978100622629357, "grad_norm": 9.011749679821435e-10, "learning_rate": 7.803621269591354e-06, "loss": 0.019, "step": 30710 }, { "epoch": 0.21985257281900808, "grad_norm": 0.0, "learning_rate": 7.80290560366421e-06, "loss": 0.0019, "step": 30720 }, { "epoch": 0.21992413941172262, "grad_norm": 0.0, "learning_rate": 7.802189937737065e-06, "loss": 0.0, "step": 30730 }, { "epoch": 0.21999570600443713, "grad_norm": 0.0, "learning_rate": 7.80147427180992e-06, "loss": 0.6152, "step": 30740 }, { "epoch": 0.22006727259715164, "grad_norm": 0.0, "learning_rate": 7.800758605882774e-06, "loss": 0.0, "step": 30750 }, { "epoch": 0.22013883918986618, "grad_norm": 0.0, "learning_rate": 7.80004293995563e-06, "loss": 0.0, "step": 30760 }, { "epoch": 0.2202104057825807, "grad_norm": 1.9948929264046456e-07, "learning_rate": 7.799327274028485e-06, "loss": 0.0, "step": 30770 }, { "epoch": 0.2202819723752952, "grad_norm": 1.4590671526093502e-05, "learning_rate": 7.798611608101338e-06, "loss": 0.0, "step": 30780 }, { "epoch": 0.22035353896800974, "grad_norm": 2.6250324935972458e-06, "learning_rate": 7.797895942174193e-06, "loss": 0.0, "step": 30790 }, { "epoch": 0.22042510556072425, "grad_norm": 0.0, "learning_rate": 7.797180276247049e-06, "loss": 0.0, "step": 30800 }, { "epoch": 0.22049667215343877, "grad_norm": 0.0, "learning_rate": 7.796464610319904e-06, "loss": 0.0, "step": 30810 }, { "epoch": 0.2205682387461533, "grad_norm": 0.0, "learning_rate": 7.795748944392758e-06, "loss": 0.0, "step": 30820 }, { "epoch": 0.22063980533886782, "grad_norm": 0.0, "learning_rate": 7.795033278465613e-06, "loss": 0.0, "step": 30830 }, { "epoch": 0.22071137193158233, "grad_norm": 0.0, "learning_rate": 7.794317612538468e-06, "loss": 0.118, "step": 30840 }, { "epoch": 0.22078293852429687, "grad_norm": 0.0, "learning_rate": 7.793601946611322e-06, "loss": 0.0018, "step": 30850 }, { "epoch": 0.22085450511701138, "grad_norm": 5.919305476709269e-05, "learning_rate": 7.792886280684177e-06, "loss": 0.0002, "step": 30860 }, { "epoch": 0.2209260717097259, "grad_norm": 4.8472825255885255e-06, "learning_rate": 7.792170614757032e-06, "loss": 0.0, "step": 30870 }, { "epoch": 0.22099763830244043, "grad_norm": 0.0, "learning_rate": 7.791454948829888e-06, "loss": 0.0, "step": 30880 }, { "epoch": 0.22106920489515494, "grad_norm": 0.0025220480747520924, "learning_rate": 7.790739282902741e-06, "loss": 0.0, "step": 30890 }, { "epoch": 0.22114077148786945, "grad_norm": 0.0, "learning_rate": 7.790023616975597e-06, "loss": 0.0, "step": 30900 }, { "epoch": 0.221212338080584, "grad_norm": 4.05945588255463e-09, "learning_rate": 7.789307951048452e-06, "loss": 0.0, "step": 30910 }, { "epoch": 0.2212839046732985, "grad_norm": 0.0, "learning_rate": 7.788592285121305e-06, "loss": 0.0, "step": 30920 }, { "epoch": 0.22135547126601302, "grad_norm": 0.0, "learning_rate": 7.78787661919416e-06, "loss": 0.0, "step": 30930 }, { "epoch": 0.22142703785872755, "grad_norm": 0.0, "learning_rate": 7.787160953267016e-06, "loss": 0.0, "step": 30940 }, { "epoch": 0.22149860445144207, "grad_norm": 4.6550217014385e-09, "learning_rate": 7.78644528733987e-06, "loss": 0.0, "step": 30950 }, { "epoch": 0.22157017104415658, "grad_norm": 21.26675796508789, "learning_rate": 7.785729621412725e-06, "loss": 0.0031, "step": 30960 }, { "epoch": 0.22164173763687112, "grad_norm": 0.0, "learning_rate": 7.78501395548558e-06, "loss": 0.0, "step": 30970 }, { "epoch": 0.22171330422958563, "grad_norm": 0.0, "learning_rate": 7.784298289558435e-06, "loss": 0.0, "step": 30980 }, { "epoch": 0.22178487082230014, "grad_norm": 308.6845703125, "learning_rate": 7.783582623631289e-06, "loss": 0.2479, "step": 30990 }, { "epoch": 0.22185643741501468, "grad_norm": 0.002082215854898095, "learning_rate": 7.782866957704144e-06, "loss": 0.0001, "step": 31000 }, { "epoch": 0.2219280040077292, "grad_norm": 0.0, "learning_rate": 7.782151291777e-06, "loss": 0.0154, "step": 31010 }, { "epoch": 0.2219995706004437, "grad_norm": 0.0, "learning_rate": 7.781435625849853e-06, "loss": 0.0, "step": 31020 }, { "epoch": 0.22207113719315824, "grad_norm": 0.0, "learning_rate": 7.780719959922709e-06, "loss": 0.0011, "step": 31030 }, { "epoch": 0.22214270378587275, "grad_norm": 0.0, "learning_rate": 7.780004293995564e-06, "loss": 0.0353, "step": 31040 }, { "epoch": 0.22221427037858726, "grad_norm": 1.1874977667858388e-09, "learning_rate": 7.779288628068419e-06, "loss": 0.0, "step": 31050 }, { "epoch": 0.2222858369713018, "grad_norm": 0.0, "learning_rate": 7.778572962141273e-06, "loss": 0.0, "step": 31060 }, { "epoch": 0.22235740356401631, "grad_norm": 0.0, "learning_rate": 7.777857296214128e-06, "loss": 0.0, "step": 31070 }, { "epoch": 0.22242897015673083, "grad_norm": 0.0, "learning_rate": 7.777141630286983e-06, "loss": 0.0, "step": 31080 }, { "epoch": 0.22250053674944537, "grad_norm": 0.0, "learning_rate": 7.776425964359837e-06, "loss": 0.0, "step": 31090 }, { "epoch": 0.22257210334215988, "grad_norm": 1.2819937182939611e-06, "learning_rate": 7.775710298432692e-06, "loss": 0.0009, "step": 31100 }, { "epoch": 0.2226436699348744, "grad_norm": 0.0, "learning_rate": 7.774994632505547e-06, "loss": 0.0056, "step": 31110 }, { "epoch": 0.22271523652758893, "grad_norm": 0.0, "learning_rate": 7.774278966578401e-06, "loss": 0.0007, "step": 31120 }, { "epoch": 0.22278680312030344, "grad_norm": 1.279135517506802e-07, "learning_rate": 7.773563300651256e-06, "loss": 0.0, "step": 31130 }, { "epoch": 0.22285836971301795, "grad_norm": 2.1239547898233013e-09, "learning_rate": 7.772847634724112e-06, "loss": 0.0, "step": 31140 }, { "epoch": 0.2229299363057325, "grad_norm": 6.990787824179279e-07, "learning_rate": 7.772131968796967e-06, "loss": 0.2875, "step": 31150 }, { "epoch": 0.223001502898447, "grad_norm": 0.0, "learning_rate": 7.77141630286982e-06, "loss": 0.0, "step": 31160 }, { "epoch": 0.2230730694911615, "grad_norm": 0.0, "learning_rate": 7.770700636942676e-06, "loss": 0.0001, "step": 31170 }, { "epoch": 0.22314463608387605, "grad_norm": 0.0, "learning_rate": 7.769984971015531e-06, "loss": 0.0, "step": 31180 }, { "epoch": 0.22321620267659056, "grad_norm": 0.00026749129756353796, "learning_rate": 7.769269305088385e-06, "loss": 0.0, "step": 31190 }, { "epoch": 0.22328776926930508, "grad_norm": 0.0, "learning_rate": 7.76855363916124e-06, "loss": 0.0, "step": 31200 }, { "epoch": 0.22335933586201961, "grad_norm": 0.0, "learning_rate": 7.767837973234095e-06, "loss": 0.0001, "step": 31210 }, { "epoch": 0.22343090245473413, "grad_norm": 1.6028122900024755e-06, "learning_rate": 7.76712230730695e-06, "loss": 0.0005, "step": 31220 }, { "epoch": 0.22350246904744864, "grad_norm": 1108.447998046875, "learning_rate": 7.766406641379804e-06, "loss": 1.438, "step": 31230 }, { "epoch": 0.22357403564016318, "grad_norm": 0.0001705043250694871, "learning_rate": 7.76569097545266e-06, "loss": 0.0, "step": 31240 }, { "epoch": 0.2236456022328777, "grad_norm": 8.214142872020602e-05, "learning_rate": 7.764975309525515e-06, "loss": 0.0001, "step": 31250 }, { "epoch": 0.2237171688255922, "grad_norm": 3.97320665257439e-09, "learning_rate": 7.764259643598368e-06, "loss": 0.1514, "step": 31260 }, { "epoch": 0.22378873541830674, "grad_norm": 1.3894679796067066e-05, "learning_rate": 7.763543977671224e-06, "loss": 0.0, "step": 31270 }, { "epoch": 0.22386030201102125, "grad_norm": 0.0, "learning_rate": 7.762828311744079e-06, "loss": 0.0007, "step": 31280 }, { "epoch": 0.2239318686037358, "grad_norm": 0.0, "learning_rate": 7.762112645816934e-06, "loss": 0.0, "step": 31290 }, { "epoch": 0.2240034351964503, "grad_norm": 0.0, "learning_rate": 7.761396979889788e-06, "loss": 0.591, "step": 31300 }, { "epoch": 0.2240750017891648, "grad_norm": 0.0007156390929594636, "learning_rate": 7.760681313962643e-06, "loss": 0.0011, "step": 31310 }, { "epoch": 0.22414656838187935, "grad_norm": 1.0549179315567017, "learning_rate": 7.759965648035498e-06, "loss": 0.0003, "step": 31320 }, { "epoch": 0.22421813497459386, "grad_norm": 3.556991941877641e-05, "learning_rate": 7.759249982108352e-06, "loss": 0.0001, "step": 31330 }, { "epoch": 0.22428970156730837, "grad_norm": 5.358365342544857e-06, "learning_rate": 7.758534316181207e-06, "loss": 0.0003, "step": 31340 }, { "epoch": 0.22436126816002291, "grad_norm": 0.0, "learning_rate": 7.757818650254063e-06, "loss": 0.0009, "step": 31350 }, { "epoch": 0.22443283475273743, "grad_norm": 0.0, "learning_rate": 7.757102984326916e-06, "loss": 0.0, "step": 31360 }, { "epoch": 0.22450440134545194, "grad_norm": 3.3899540952120333e-09, "learning_rate": 7.756387318399772e-06, "loss": 0.0, "step": 31370 }, { "epoch": 0.22457596793816648, "grad_norm": 0.0, "learning_rate": 7.755671652472627e-06, "loss": 0.0, "step": 31380 }, { "epoch": 0.224647534530881, "grad_norm": 0.0, "learning_rate": 7.754955986545482e-06, "loss": 1.0087, "step": 31390 }, { "epoch": 0.2247191011235955, "grad_norm": 0.0, "learning_rate": 7.754240320618336e-06, "loss": 0.0, "step": 31400 }, { "epoch": 0.22479066771631004, "grad_norm": 1.6868094698807568e-09, "learning_rate": 7.753524654691191e-06, "loss": 1.7578, "step": 31410 }, { "epoch": 0.22486223430902455, "grad_norm": 0.0, "learning_rate": 7.752808988764046e-06, "loss": 0.0, "step": 31420 }, { "epoch": 0.22493380090173906, "grad_norm": 0.0, "learning_rate": 7.7520933228369e-06, "loss": 0.0, "step": 31430 }, { "epoch": 0.2250053674944536, "grad_norm": 0.0, "learning_rate": 7.751377656909755e-06, "loss": 0.9905, "step": 31440 }, { "epoch": 0.2250769340871681, "grad_norm": 0.0067655835300683975, "learning_rate": 7.75066199098261e-06, "loss": 0.3417, "step": 31450 }, { "epoch": 0.22514850067988262, "grad_norm": 2.9960367101011798e-05, "learning_rate": 7.749946325055466e-06, "loss": 0.0001, "step": 31460 }, { "epoch": 0.22522006727259716, "grad_norm": 5.427552605397068e-05, "learning_rate": 7.74923065912832e-06, "loss": 0.0, "step": 31470 }, { "epoch": 0.22529163386531167, "grad_norm": 0.0014448761940002441, "learning_rate": 7.748514993201175e-06, "loss": 0.0004, "step": 31480 }, { "epoch": 0.22536320045802619, "grad_norm": 0.0, "learning_rate": 7.74779932727403e-06, "loss": 0.0005, "step": 31490 }, { "epoch": 0.22543476705074073, "grad_norm": 0.0, "learning_rate": 7.747083661346884e-06, "loss": 0.0003, "step": 31500 }, { "epoch": 0.22550633364345524, "grad_norm": 0.0, "learning_rate": 7.746367995419739e-06, "loss": 0.0, "step": 31510 }, { "epoch": 0.22557790023616975, "grad_norm": 0.0, "learning_rate": 7.745652329492594e-06, "loss": 0.0005, "step": 31520 }, { "epoch": 0.2256494668288843, "grad_norm": 0.0, "learning_rate": 7.74493666356545e-06, "loss": 0.0, "step": 31530 }, { "epoch": 0.2257210334215988, "grad_norm": 0.0, "learning_rate": 7.744220997638303e-06, "loss": 0.0, "step": 31540 }, { "epoch": 0.2257926000143133, "grad_norm": 6.301693389332286e-09, "learning_rate": 7.743505331711158e-06, "loss": 0.0003, "step": 31550 }, { "epoch": 0.22586416660702785, "grad_norm": 0.0, "learning_rate": 7.742789665784014e-06, "loss": 0.0008, "step": 31560 }, { "epoch": 0.22593573319974236, "grad_norm": 0.0, "learning_rate": 7.742073999856867e-06, "loss": 0.0, "step": 31570 }, { "epoch": 0.22600729979245687, "grad_norm": 0.0, "learning_rate": 7.741358333929722e-06, "loss": 0.0039, "step": 31580 }, { "epoch": 0.2260788663851714, "grad_norm": 0.0, "learning_rate": 7.740642668002578e-06, "loss": 0.0, "step": 31590 }, { "epoch": 0.22615043297788592, "grad_norm": 0.0, "learning_rate": 7.739927002075431e-06, "loss": 0.0, "step": 31600 }, { "epoch": 0.22622199957060043, "grad_norm": 0.0, "learning_rate": 7.739211336148287e-06, "loss": 0.0, "step": 31610 }, { "epoch": 0.22629356616331497, "grad_norm": 0.0, "learning_rate": 7.738495670221142e-06, "loss": 0.0, "step": 31620 }, { "epoch": 0.22636513275602949, "grad_norm": 0.0, "learning_rate": 7.737780004293997e-06, "loss": 0.0, "step": 31630 }, { "epoch": 0.226436699348744, "grad_norm": 0.0, "learning_rate": 7.73706433836685e-06, "loss": 0.0, "step": 31640 }, { "epoch": 0.22650826594145854, "grad_norm": 0.0, "learning_rate": 7.736348672439706e-06, "loss": 0.0, "step": 31650 }, { "epoch": 0.22657983253417305, "grad_norm": 0.0, "learning_rate": 7.735633006512561e-06, "loss": 0.0, "step": 31660 }, { "epoch": 0.22665139912688756, "grad_norm": 0.0, "learning_rate": 7.734917340585415e-06, "loss": 0.0026, "step": 31670 }, { "epoch": 0.2267229657196021, "grad_norm": 0.0, "learning_rate": 7.73420167465827e-06, "loss": 0.0, "step": 31680 }, { "epoch": 0.2267945323123166, "grad_norm": 0.00322880526073277, "learning_rate": 7.733486008731126e-06, "loss": 0.0004, "step": 31690 }, { "epoch": 0.22686609890503112, "grad_norm": 0.0006685599801130593, "learning_rate": 7.732770342803981e-06, "loss": 0.0033, "step": 31700 }, { "epoch": 0.22693766549774566, "grad_norm": 0.0, "learning_rate": 7.732054676876834e-06, "loss": 0.0, "step": 31710 }, { "epoch": 0.22700923209046017, "grad_norm": 0.0, "learning_rate": 7.731339010949688e-06, "loss": 0.2551, "step": 31720 }, { "epoch": 0.22708079868317468, "grad_norm": 0.0, "learning_rate": 7.730623345022545e-06, "loss": 0.0, "step": 31730 }, { "epoch": 0.22715236527588922, "grad_norm": 0.0, "learning_rate": 7.729907679095399e-06, "loss": 0.0041, "step": 31740 }, { "epoch": 0.22722393186860373, "grad_norm": 0.0, "learning_rate": 7.729192013168254e-06, "loss": 0.3253, "step": 31750 }, { "epoch": 0.22729549846131825, "grad_norm": 0.9559914469718933, "learning_rate": 7.728476347241108e-06, "loss": 0.0001, "step": 31760 }, { "epoch": 0.22736706505403279, "grad_norm": 0.0, "learning_rate": 7.727760681313963e-06, "loss": 0.0, "step": 31770 }, { "epoch": 0.2274386316467473, "grad_norm": 0.0, "learning_rate": 7.727045015386818e-06, "loss": 0.0, "step": 31780 }, { "epoch": 0.2275101982394618, "grad_norm": 9.770971519174054e-05, "learning_rate": 7.726329349459672e-06, "loss": 0.0, "step": 31790 }, { "epoch": 0.22758176483217635, "grad_norm": 0.0, "learning_rate": 7.725613683532529e-06, "loss": 0.0, "step": 31800 }, { "epoch": 0.22765333142489086, "grad_norm": 0.03396591171622276, "learning_rate": 7.724898017605382e-06, "loss": 0.0, "step": 31810 }, { "epoch": 0.22772489801760537, "grad_norm": 0.0, "learning_rate": 7.724182351678238e-06, "loss": 0.0, "step": 31820 }, { "epoch": 0.2277964646103199, "grad_norm": 0.0, "learning_rate": 7.723466685751091e-06, "loss": 0.0, "step": 31830 }, { "epoch": 0.22786803120303442, "grad_norm": 1.731326415566059e-09, "learning_rate": 7.722751019823947e-06, "loss": 0.0, "step": 31840 }, { "epoch": 0.22793959779574893, "grad_norm": 0.0, "learning_rate": 7.722035353896802e-06, "loss": 0.0, "step": 31850 }, { "epoch": 0.22801116438846347, "grad_norm": 0.0, "learning_rate": 7.721319687969655e-06, "loss": 0.0, "step": 31860 }, { "epoch": 0.22808273098117798, "grad_norm": 0.0, "learning_rate": 7.720604022042512e-06, "loss": 0.0, "step": 31870 }, { "epoch": 0.2281542975738925, "grad_norm": 0.0, "learning_rate": 7.719888356115366e-06, "loss": 0.0, "step": 31880 }, { "epoch": 0.22822586416660703, "grad_norm": 0.0, "learning_rate": 7.71917269018822e-06, "loss": 0.0, "step": 31890 }, { "epoch": 0.22829743075932155, "grad_norm": 0.0, "learning_rate": 7.718457024261075e-06, "loss": 0.0, "step": 31900 }, { "epoch": 0.22836899735203606, "grad_norm": 0.0, "learning_rate": 7.71774135833393e-06, "loss": 0.0, "step": 31910 }, { "epoch": 0.2284405639447506, "grad_norm": 1.2109730995746304e-09, "learning_rate": 7.717025692406785e-06, "loss": 0.0, "step": 31920 }, { "epoch": 0.2285121305374651, "grad_norm": 0.0, "learning_rate": 7.716310026479639e-06, "loss": 0.0, "step": 31930 }, { "epoch": 0.22858369713017962, "grad_norm": 0.0, "learning_rate": 7.715594360552496e-06, "loss": 0.0, "step": 31940 }, { "epoch": 0.22865526372289416, "grad_norm": 0.0006763054989278316, "learning_rate": 7.71487869462535e-06, "loss": 0.0, "step": 31950 }, { "epoch": 0.22872683031560867, "grad_norm": 0.0, "learning_rate": 7.714163028698203e-06, "loss": 0.0, "step": 31960 }, { "epoch": 0.22879839690832318, "grad_norm": 0.0, "learning_rate": 7.713447362771059e-06, "loss": 0.0, "step": 31970 }, { "epoch": 0.22886996350103772, "grad_norm": 5.847632564837113e-07, "learning_rate": 7.712731696843914e-06, "loss": 0.7656, "step": 31980 }, { "epoch": 0.22894153009375223, "grad_norm": 0.00015833874931558967, "learning_rate": 7.712016030916769e-06, "loss": 0.0106, "step": 31990 }, { "epoch": 0.22901309668646674, "grad_norm": 0.0, "learning_rate": 7.711300364989623e-06, "loss": 0.0, "step": 32000 }, { "epoch": 0.22908466327918128, "grad_norm": 0.0, "learning_rate": 7.710584699062478e-06, "loss": 0.01, "step": 32010 }, { "epoch": 0.2291562298718958, "grad_norm": 0.0, "learning_rate": 7.709869033135333e-06, "loss": 0.0031, "step": 32020 }, { "epoch": 0.2292277964646103, "grad_norm": 0.0, "learning_rate": 7.709153367208187e-06, "loss": 0.0, "step": 32030 }, { "epoch": 0.22929936305732485, "grad_norm": 0.0, "learning_rate": 7.708437701281042e-06, "loss": 0.0001, "step": 32040 }, { "epoch": 0.22937092965003936, "grad_norm": 0.0, "learning_rate": 7.707722035353897e-06, "loss": 0.0, "step": 32050 }, { "epoch": 0.2294424962427539, "grad_norm": 0.0023080792743712664, "learning_rate": 7.707006369426753e-06, "loss": 0.0, "step": 32060 }, { "epoch": 0.2295140628354684, "grad_norm": 0.0, "learning_rate": 7.706290703499606e-06, "loss": 0.0016, "step": 32070 }, { "epoch": 0.22958562942818292, "grad_norm": 0.0, "learning_rate": 7.705575037572462e-06, "loss": 0.0, "step": 32080 }, { "epoch": 0.22965719602089746, "grad_norm": 0.0, "learning_rate": 7.704859371645317e-06, "loss": 0.0201, "step": 32090 }, { "epoch": 0.22972876261361197, "grad_norm": 4.431292222051297e-09, "learning_rate": 7.70414370571817e-06, "loss": 0.0, "step": 32100 }, { "epoch": 0.22980032920632648, "grad_norm": 0.0, "learning_rate": 7.703428039791026e-06, "loss": 0.0, "step": 32110 }, { "epoch": 0.22987189579904102, "grad_norm": 0.00012218010670039803, "learning_rate": 7.702712373863881e-06, "loss": 0.0, "step": 32120 }, { "epoch": 0.22994346239175553, "grad_norm": 0.0, "learning_rate": 7.701996707936735e-06, "loss": 0.0, "step": 32130 }, { "epoch": 0.23001502898447004, "grad_norm": 7.969838407007046e-06, "learning_rate": 7.70128104200959e-06, "loss": 0.0, "step": 32140 }, { "epoch": 0.23008659557718458, "grad_norm": 0.0, "learning_rate": 7.700565376082445e-06, "loss": 0.0, "step": 32150 }, { "epoch": 0.2301581621698991, "grad_norm": 0.0, "learning_rate": 7.6998497101553e-06, "loss": 0.0, "step": 32160 }, { "epoch": 0.2302297287626136, "grad_norm": 0.0, "learning_rate": 7.699134044228154e-06, "loss": 0.0034, "step": 32170 }, { "epoch": 0.23030129535532814, "grad_norm": 0.0, "learning_rate": 7.69841837830101e-06, "loss": 0.0, "step": 32180 }, { "epoch": 0.23037286194804266, "grad_norm": 0.0, "learning_rate": 7.697702712373865e-06, "loss": 0.0, "step": 32190 }, { "epoch": 0.23044442854075717, "grad_norm": 0.0, "learning_rate": 7.696987046446718e-06, "loss": 0.0, "step": 32200 }, { "epoch": 0.2305159951334717, "grad_norm": 0.0, "learning_rate": 7.696271380519574e-06, "loss": 0.0104, "step": 32210 }, { "epoch": 0.23058756172618622, "grad_norm": 0.0, "learning_rate": 7.695555714592429e-06, "loss": 0.0, "step": 32220 }, { "epoch": 0.23065912831890073, "grad_norm": 2.8805384635925293, "learning_rate": 7.694840048665284e-06, "loss": 0.0007, "step": 32230 }, { "epoch": 0.23073069491161527, "grad_norm": 0.0, "learning_rate": 7.694124382738138e-06, "loss": 0.0, "step": 32240 }, { "epoch": 0.23080226150432978, "grad_norm": 0.0, "learning_rate": 7.693408716810993e-06, "loss": 0.0015, "step": 32250 }, { "epoch": 0.2308738280970443, "grad_norm": 0.0006197792245075107, "learning_rate": 7.692693050883848e-06, "loss": 0.0, "step": 32260 }, { "epoch": 0.23094539468975883, "grad_norm": 0.0033606302458792925, "learning_rate": 7.691977384956702e-06, "loss": 0.0, "step": 32270 }, { "epoch": 0.23101696128247334, "grad_norm": 0.0017468365840613842, "learning_rate": 7.691261719029557e-06, "loss": 0.0, "step": 32280 }, { "epoch": 0.23108852787518785, "grad_norm": 0.0001063054078258574, "learning_rate": 7.690546053102413e-06, "loss": 0.0, "step": 32290 }, { "epoch": 0.2311600944679024, "grad_norm": 0.0, "learning_rate": 7.689830387175268e-06, "loss": 0.0, "step": 32300 }, { "epoch": 0.2312316610606169, "grad_norm": 0.0, "learning_rate": 7.689114721248122e-06, "loss": 0.0, "step": 32310 }, { "epoch": 0.23130322765333142, "grad_norm": 0.0, "learning_rate": 7.688399055320977e-06, "loss": 0.0, "step": 32320 }, { "epoch": 0.23137479424604596, "grad_norm": 0.0, "learning_rate": 7.687683389393832e-06, "loss": 0.0, "step": 32330 }, { "epoch": 0.23144636083876047, "grad_norm": 3.6787009239196777, "learning_rate": 7.686967723466686e-06, "loss": 1.1065, "step": 32340 }, { "epoch": 0.23151792743147498, "grad_norm": 0.0, "learning_rate": 7.686252057539541e-06, "loss": 0.0, "step": 32350 }, { "epoch": 0.23158949402418952, "grad_norm": 0.0, "learning_rate": 7.685536391612396e-06, "loss": 0.5934, "step": 32360 }, { "epoch": 0.23166106061690403, "grad_norm": 0.0002423661935608834, "learning_rate": 7.68482072568525e-06, "loss": 0.0002, "step": 32370 }, { "epoch": 0.23173262720961854, "grad_norm": 0.0, "learning_rate": 7.684105059758105e-06, "loss": 0.0, "step": 32380 }, { "epoch": 0.23180419380233308, "grad_norm": 9.391786443302408e-05, "learning_rate": 7.68338939383096e-06, "loss": 0.0678, "step": 32390 }, { "epoch": 0.2318757603950476, "grad_norm": 0.0006669021677225828, "learning_rate": 7.682673727903816e-06, "loss": 0.0, "step": 32400 }, { "epoch": 0.2319473269877621, "grad_norm": 9.75337388808839e-05, "learning_rate": 7.68195806197667e-06, "loss": 0.0, "step": 32410 }, { "epoch": 0.23201889358047664, "grad_norm": 0.0, "learning_rate": 7.681242396049525e-06, "loss": 0.0266, "step": 32420 }, { "epoch": 0.23209046017319115, "grad_norm": 0.0018792874179780483, "learning_rate": 7.68052673012238e-06, "loss": 0.0, "step": 32430 }, { "epoch": 0.23216202676590567, "grad_norm": 0.0, "learning_rate": 7.679811064195234e-06, "loss": 0.0185, "step": 32440 }, { "epoch": 0.2322335933586202, "grad_norm": 5.299972372085904e-07, "learning_rate": 7.679095398268089e-06, "loss": 0.0, "step": 32450 }, { "epoch": 0.23230515995133472, "grad_norm": 0.0, "learning_rate": 7.678379732340944e-06, "loss": 0.0, "step": 32460 }, { "epoch": 0.23237672654404923, "grad_norm": 0.0, "learning_rate": 7.6776640664138e-06, "loss": 0.0, "step": 32470 }, { "epoch": 0.23244829313676377, "grad_norm": 0.0, "learning_rate": 7.676948400486653e-06, "loss": 0.0, "step": 32480 }, { "epoch": 0.23251985972947828, "grad_norm": 0.0, "learning_rate": 7.676232734559508e-06, "loss": 0.0, "step": 32490 }, { "epoch": 0.2325914263221928, "grad_norm": 3.5835089562397116e-08, "learning_rate": 7.675517068632364e-06, "loss": 0.0, "step": 32500 }, { "epoch": 0.23266299291490733, "grad_norm": 1.0813997732839198e-06, "learning_rate": 7.674801402705217e-06, "loss": 0.0, "step": 32510 }, { "epoch": 0.23273455950762184, "grad_norm": 1.0071456927107647e-05, "learning_rate": 7.674085736778072e-06, "loss": 0.0001, "step": 32520 }, { "epoch": 0.23280612610033635, "grad_norm": 0.0019457568414509296, "learning_rate": 7.673370070850928e-06, "loss": 0.0, "step": 32530 }, { "epoch": 0.2328776926930509, "grad_norm": 2.5864155759336427e-05, "learning_rate": 7.672654404923781e-06, "loss": 0.0, "step": 32540 }, { "epoch": 0.2329492592857654, "grad_norm": 0.0, "learning_rate": 7.671938738996637e-06, "loss": 0.0, "step": 32550 }, { "epoch": 0.23302082587847991, "grad_norm": 501.0564270019531, "learning_rate": 7.671223073069492e-06, "loss": 0.0912, "step": 32560 }, { "epoch": 0.23309239247119445, "grad_norm": 0.0, "learning_rate": 7.670507407142347e-06, "loss": 0.0343, "step": 32570 }, { "epoch": 0.23316395906390897, "grad_norm": 0.0, "learning_rate": 7.6697917412152e-06, "loss": 0.0, "step": 32580 }, { "epoch": 0.23323552565662348, "grad_norm": 0.0, "learning_rate": 7.669076075288056e-06, "loss": 0.0, "step": 32590 }, { "epoch": 0.23330709224933802, "grad_norm": 14.781996726989746, "learning_rate": 7.668360409360911e-06, "loss": 0.0018, "step": 32600 }, { "epoch": 0.23337865884205253, "grad_norm": 0.0, "learning_rate": 7.667644743433765e-06, "loss": 0.4605, "step": 32610 }, { "epoch": 0.23345022543476704, "grad_norm": 0.00041548561421222985, "learning_rate": 7.66692907750662e-06, "loss": 0.0, "step": 32620 }, { "epoch": 0.23352179202748158, "grad_norm": 0.0, "learning_rate": 7.666213411579476e-06, "loss": 0.0018, "step": 32630 }, { "epoch": 0.2335933586201961, "grad_norm": 0.0, "learning_rate": 7.665497745652331e-06, "loss": 0.2975, "step": 32640 }, { "epoch": 0.2336649252129106, "grad_norm": 0.0, "learning_rate": 7.664782079725184e-06, "loss": 0.0, "step": 32650 }, { "epoch": 0.23373649180562514, "grad_norm": 0.0, "learning_rate": 7.66406641379804e-06, "loss": 0.0006, "step": 32660 }, { "epoch": 0.23380805839833965, "grad_norm": 0.0008631708915345371, "learning_rate": 7.663350747870895e-06, "loss": 2.0063, "step": 32670 }, { "epoch": 0.23387962499105416, "grad_norm": 0.0, "learning_rate": 7.662635081943749e-06, "loss": 0.0, "step": 32680 }, { "epoch": 0.2339511915837687, "grad_norm": 0.0001064461685018614, "learning_rate": 7.661919416016604e-06, "loss": 0.0, "step": 32690 }, { "epoch": 0.23402275817648321, "grad_norm": 3.239405850763433e-05, "learning_rate": 7.66120375008946e-06, "loss": 0.0065, "step": 32700 }, { "epoch": 0.23409432476919773, "grad_norm": 0.0, "learning_rate": 7.660488084162315e-06, "loss": 0.0095, "step": 32710 }, { "epoch": 0.23416589136191227, "grad_norm": 0.7274306416511536, "learning_rate": 7.659772418235168e-06, "loss": 0.4326, "step": 32720 }, { "epoch": 0.23423745795462678, "grad_norm": 4.270371078973767e-08, "learning_rate": 7.659056752308023e-06, "loss": 0.0, "step": 32730 }, { "epoch": 0.2343090245473413, "grad_norm": 0.0031583919189870358, "learning_rate": 7.658341086380879e-06, "loss": 0.0021, "step": 32740 }, { "epoch": 0.23438059114005583, "grad_norm": 4.2477354966763414e-10, "learning_rate": 7.657625420453732e-06, "loss": 0.0001, "step": 32750 }, { "epoch": 0.23445215773277034, "grad_norm": 0.0, "learning_rate": 7.656909754526588e-06, "loss": 0.0021, "step": 32760 }, { "epoch": 0.23452372432548485, "grad_norm": 0.0, "learning_rate": 7.656194088599443e-06, "loss": 0.0, "step": 32770 }, { "epoch": 0.2345952909181994, "grad_norm": 0.0, "learning_rate": 7.655478422672296e-06, "loss": 0.0, "step": 32780 }, { "epoch": 0.2346668575109139, "grad_norm": 1.0655001460690983e-05, "learning_rate": 7.654762756745152e-06, "loss": 0.0116, "step": 32790 }, { "epoch": 0.2347384241036284, "grad_norm": 1.2264330884192987e-08, "learning_rate": 7.654047090818007e-06, "loss": 0.0, "step": 32800 }, { "epoch": 0.23480999069634295, "grad_norm": 0.0, "learning_rate": 7.653331424890862e-06, "loss": 0.0, "step": 32810 }, { "epoch": 0.23488155728905746, "grad_norm": 0.0007999243098311126, "learning_rate": 7.652615758963716e-06, "loss": 0.0, "step": 32820 }, { "epoch": 0.234953123881772, "grad_norm": 4.90727902757726e-10, "learning_rate": 7.651900093036571e-06, "loss": 0.0005, "step": 32830 }, { "epoch": 0.23502469047448651, "grad_norm": 0.0, "learning_rate": 7.651184427109427e-06, "loss": 0.0, "step": 32840 }, { "epoch": 0.23509625706720103, "grad_norm": 0.0, "learning_rate": 7.65046876118228e-06, "loss": 0.0, "step": 32850 }, { "epoch": 0.23516782365991556, "grad_norm": 0.0, "learning_rate": 7.649753095255135e-06, "loss": 0.0, "step": 32860 }, { "epoch": 0.23523939025263008, "grad_norm": 0.0, "learning_rate": 7.64903742932799e-06, "loss": 0.0001, "step": 32870 }, { "epoch": 0.2353109568453446, "grad_norm": 0.0, "learning_rate": 7.648321763400846e-06, "loss": 0.0001, "step": 32880 }, { "epoch": 0.23538252343805913, "grad_norm": 4.311857815242348e-10, "learning_rate": 7.6476060974737e-06, "loss": 0.0016, "step": 32890 }, { "epoch": 0.23545409003077364, "grad_norm": 0.0, "learning_rate": 7.646890431546555e-06, "loss": 0.0007, "step": 32900 }, { "epoch": 0.23552565662348815, "grad_norm": 7.837699058654835e-07, "learning_rate": 7.64617476561941e-06, "loss": 0.0, "step": 32910 }, { "epoch": 0.2355972232162027, "grad_norm": 0.0, "learning_rate": 7.645459099692264e-06, "loss": 0.0, "step": 32920 }, { "epoch": 0.2356687898089172, "grad_norm": 0.5353636741638184, "learning_rate": 7.644743433765119e-06, "loss": 0.0003, "step": 32930 }, { "epoch": 0.2357403564016317, "grad_norm": 0.0033865892328321934, "learning_rate": 7.644027767837974e-06, "loss": 0.0, "step": 32940 }, { "epoch": 0.23581192299434625, "grad_norm": 0.0, "learning_rate": 7.64331210191083e-06, "loss": 0.0, "step": 32950 }, { "epoch": 0.23588348958706076, "grad_norm": 0.0, "learning_rate": 7.642596435983683e-06, "loss": 0.0, "step": 32960 }, { "epoch": 0.23595505617977527, "grad_norm": 0.000745573895983398, "learning_rate": 7.641880770056539e-06, "loss": 0.0, "step": 32970 }, { "epoch": 0.2360266227724898, "grad_norm": 0.0, "learning_rate": 7.641165104129394e-06, "loss": 0.0, "step": 32980 }, { "epoch": 0.23609818936520433, "grad_norm": 0.0006771166808903217, "learning_rate": 7.640449438202247e-06, "loss": 0.0, "step": 32990 }, { "epoch": 0.23616975595791884, "grad_norm": 0.0, "learning_rate": 7.639733772275103e-06, "loss": 0.0, "step": 33000 }, { "epoch": 0.23624132255063338, "grad_norm": 0.0, "learning_rate": 7.639018106347958e-06, "loss": 0.0, "step": 33010 }, { "epoch": 0.2363128891433479, "grad_norm": 0.0, "learning_rate": 7.638302440420812e-06, "loss": 0.0005, "step": 33020 }, { "epoch": 0.2363844557360624, "grad_norm": 0.0, "learning_rate": 7.637586774493667e-06, "loss": 0.0, "step": 33030 }, { "epoch": 0.23645602232877694, "grad_norm": 1.4548075199127197, "learning_rate": 7.636871108566522e-06, "loss": 0.0003, "step": 33040 }, { "epoch": 0.23652758892149145, "grad_norm": 0.0, "learning_rate": 7.636155442639378e-06, "loss": 0.0, "step": 33050 }, { "epoch": 0.23659915551420596, "grad_norm": 8.109524718413752e-10, "learning_rate": 7.635511343304946e-06, "loss": 0.3152, "step": 33060 }, { "epoch": 0.2366707221069205, "grad_norm": 0.010666720569133759, "learning_rate": 7.634795677377801e-06, "loss": 0.0006, "step": 33070 }, { "epoch": 0.236742288699635, "grad_norm": 1.0310702691640472e-06, "learning_rate": 7.634080011450654e-06, "loss": 0.0258, "step": 33080 }, { "epoch": 0.23681385529234952, "grad_norm": 0.0, "learning_rate": 7.633364345523511e-06, "loss": 0.0, "step": 33090 }, { "epoch": 0.23688542188506406, "grad_norm": 0.0, "learning_rate": 7.632648679596365e-06, "loss": 0.0001, "step": 33100 }, { "epoch": 0.23695698847777857, "grad_norm": 0.0, "learning_rate": 7.631933013669219e-06, "loss": 0.0, "step": 33110 }, { "epoch": 0.23702855507049309, "grad_norm": 0.0, "learning_rate": 7.631217347742076e-06, "loss": 0.0, "step": 33120 }, { "epoch": 0.23710012166320762, "grad_norm": 0.02082071267068386, "learning_rate": 7.63050168181493e-06, "loss": 0.0032, "step": 33130 }, { "epoch": 0.23717168825592214, "grad_norm": 0.0, "learning_rate": 7.629786015887785e-06, "loss": 0.0, "step": 33140 }, { "epoch": 0.23724325484863665, "grad_norm": 0.0, "learning_rate": 7.629070349960639e-06, "loss": 0.0, "step": 33150 }, { "epoch": 0.2373148214413512, "grad_norm": 0.0, "learning_rate": 7.628354684033494e-06, "loss": 0.0, "step": 33160 }, { "epoch": 0.2373863880340657, "grad_norm": 0.0, "learning_rate": 7.627639018106349e-06, "loss": 0.0133, "step": 33170 }, { "epoch": 0.2374579546267802, "grad_norm": 0.03072418086230755, "learning_rate": 7.626923352179203e-06, "loss": 0.0, "step": 33180 }, { "epoch": 0.23752952121949475, "grad_norm": 0.0, "learning_rate": 7.6262076862520584e-06, "loss": 0.0, "step": 33190 }, { "epoch": 0.23760108781220926, "grad_norm": 2.0154176105791066e-09, "learning_rate": 7.625492020324913e-06, "loss": 0.0, "step": 33200 }, { "epoch": 0.23767265440492377, "grad_norm": 5.2341255774024376e-09, "learning_rate": 7.624776354397768e-06, "loss": 0.0, "step": 33210 }, { "epoch": 0.2377442209976383, "grad_norm": 0.0, "learning_rate": 7.624060688470623e-06, "loss": 0.0001, "step": 33220 }, { "epoch": 0.23781578759035282, "grad_norm": 0.0, "learning_rate": 7.623345022543477e-06, "loss": 0.0, "step": 33230 }, { "epoch": 0.23788735418306733, "grad_norm": 0.0, "learning_rate": 7.622629356616332e-06, "loss": 0.5008, "step": 33240 }, { "epoch": 0.23795892077578187, "grad_norm": 0.0, "learning_rate": 7.621913690689187e-06, "loss": 0.0, "step": 33250 }, { "epoch": 0.23803048736849639, "grad_norm": 0.0, "learning_rate": 7.621198024762042e-06, "loss": 0.0, "step": 33260 }, { "epoch": 0.2381020539612109, "grad_norm": 0.0, "learning_rate": 7.6204823588348965e-06, "loss": 0.0, "step": 33270 }, { "epoch": 0.23817362055392544, "grad_norm": 0.00025836736313067377, "learning_rate": 7.619766692907752e-06, "loss": 0.0, "step": 33280 }, { "epoch": 0.23824518714663995, "grad_norm": 0.0, "learning_rate": 7.619051026980606e-06, "loss": 0.0001, "step": 33290 }, { "epoch": 0.23831675373935446, "grad_norm": 2.9532898793149798e-08, "learning_rate": 7.618335361053461e-06, "loss": 0.0, "step": 33300 }, { "epoch": 0.238388320332069, "grad_norm": 0.0, "learning_rate": 7.617619695126316e-06, "loss": 0.0001, "step": 33310 }, { "epoch": 0.2384598869247835, "grad_norm": 0.0, "learning_rate": 7.6169040291991705e-06, "loss": 0.0002, "step": 33320 }, { "epoch": 0.23853145351749802, "grad_norm": 0.0, "learning_rate": 7.616188363272026e-06, "loss": 0.0, "step": 33330 }, { "epoch": 0.23860302011021256, "grad_norm": 0.0, "learning_rate": 7.61547269734488e-06, "loss": 0.0, "step": 33340 }, { "epoch": 0.23867458670292707, "grad_norm": 0.0, "learning_rate": 7.614757031417735e-06, "loss": 0.0, "step": 33350 }, { "epoch": 0.23874615329564158, "grad_norm": 0.0, "learning_rate": 7.61404136549059e-06, "loss": 0.0, "step": 33360 }, { "epoch": 0.23881771988835612, "grad_norm": 0.0, "learning_rate": 7.613325699563444e-06, "loss": 0.0, "step": 33370 }, { "epoch": 0.23888928648107063, "grad_norm": 0.0, "learning_rate": 7.6126100336363e-06, "loss": 0.0, "step": 33380 }, { "epoch": 0.23896085307378515, "grad_norm": 0.0, "learning_rate": 7.611894367709154e-06, "loss": 0.0, "step": 33390 }, { "epoch": 0.23903241966649968, "grad_norm": 1.5596071989421034e-08, "learning_rate": 7.6111787017820086e-06, "loss": 0.0, "step": 33400 }, { "epoch": 0.2391039862592142, "grad_norm": 0.0, "learning_rate": 7.610463035854864e-06, "loss": 0.0, "step": 33410 }, { "epoch": 0.2391755528519287, "grad_norm": 0.0, "learning_rate": 7.609747369927718e-06, "loss": 0.0014, "step": 33420 }, { "epoch": 0.23924711944464325, "grad_norm": 0.0, "learning_rate": 7.609031704000574e-06, "loss": 0.0, "step": 33430 }, { "epoch": 0.23931868603735776, "grad_norm": 0.0, "learning_rate": 7.608316038073428e-06, "loss": 0.0, "step": 33440 }, { "epoch": 0.23939025263007227, "grad_norm": 0.0, "learning_rate": 7.607600372146283e-06, "loss": 0.0, "step": 33450 }, { "epoch": 0.2394618192227868, "grad_norm": 0.0018059952417388558, "learning_rate": 7.606884706219138e-06, "loss": 0.0001, "step": 33460 }, { "epoch": 0.23953338581550132, "grad_norm": 0.0, "learning_rate": 7.606169040291992e-06, "loss": 0.0, "step": 33470 }, { "epoch": 0.23960495240821583, "grad_norm": 7.602550506591797, "learning_rate": 7.6054533743648475e-06, "loss": 0.0017, "step": 33480 }, { "epoch": 0.23967651900093037, "grad_norm": 147.51795959472656, "learning_rate": 7.604737708437702e-06, "loss": 0.0228, "step": 33490 }, { "epoch": 0.23974808559364488, "grad_norm": 0.0, "learning_rate": 7.604022042510557e-06, "loss": 0.0, "step": 33500 }, { "epoch": 0.2398196521863594, "grad_norm": 4.638473285467626e-07, "learning_rate": 7.603306376583412e-06, "loss": 0.0, "step": 33510 }, { "epoch": 0.23989121877907393, "grad_norm": 1.1696352775203422e-09, "learning_rate": 7.602590710656266e-06, "loss": 0.0, "step": 33520 }, { "epoch": 0.23996278537178845, "grad_norm": 0.0, "learning_rate": 7.601875044729121e-06, "loss": 0.0006, "step": 33530 }, { "epoch": 0.24003435196450296, "grad_norm": 0.0, "learning_rate": 7.601159378801976e-06, "loss": 0.0, "step": 33540 }, { "epoch": 0.2401059185572175, "grad_norm": 0.0, "learning_rate": 7.600443712874831e-06, "loss": 0.0, "step": 33550 }, { "epoch": 0.240177485149932, "grad_norm": 0.0, "learning_rate": 7.599728046947686e-06, "loss": 0.0, "step": 33560 }, { "epoch": 0.24024905174264652, "grad_norm": 0.0, "learning_rate": 7.599012381020541e-06, "loss": 0.0, "step": 33570 }, { "epoch": 0.24032061833536106, "grad_norm": 0.0, "learning_rate": 7.598296715093395e-06, "loss": 0.0, "step": 33580 }, { "epoch": 0.24039218492807557, "grad_norm": 0.0, "learning_rate": 7.59758104916625e-06, "loss": 0.0, "step": 33590 }, { "epoch": 0.2404637515207901, "grad_norm": 9.245964065485168e-07, "learning_rate": 7.596865383239105e-06, "loss": 0.0, "step": 33600 }, { "epoch": 0.24053531811350462, "grad_norm": 4.996473762730602e-06, "learning_rate": 7.5961497173119595e-06, "loss": 0.0049, "step": 33610 }, { "epoch": 0.24060688470621913, "grad_norm": 0.0, "learning_rate": 7.595434051384815e-06, "loss": 0.0, "step": 33620 }, { "epoch": 0.24067845129893367, "grad_norm": 1.2789298580173636e-06, "learning_rate": 7.594718385457669e-06, "loss": 0.0, "step": 33630 }, { "epoch": 0.24075001789164818, "grad_norm": 0.0, "learning_rate": 7.594002719530523e-06, "loss": 0.0, "step": 33640 }, { "epoch": 0.2408215844843627, "grad_norm": 0.09602706134319305, "learning_rate": 7.593287053603379e-06, "loss": 0.0019, "step": 33650 }, { "epoch": 0.24089315107707723, "grad_norm": 0.0, "learning_rate": 7.5925713876762334e-06, "loss": 0.0005, "step": 33660 }, { "epoch": 0.24096471766979174, "grad_norm": 0.0, "learning_rate": 7.591855721749089e-06, "loss": 0.0, "step": 33670 }, { "epoch": 0.24103628426250626, "grad_norm": 0.0, "learning_rate": 7.591140055821943e-06, "loss": 0.0001, "step": 33680 }, { "epoch": 0.2411078508552208, "grad_norm": 9.420175552368164, "learning_rate": 7.5904243898947985e-06, "loss": 0.0153, "step": 33690 }, { "epoch": 0.2411794174479353, "grad_norm": 0.0, "learning_rate": 7.589708723967653e-06, "loss": 0.0, "step": 33700 }, { "epoch": 0.24125098404064982, "grad_norm": 0.0, "learning_rate": 7.5889930580405065e-06, "loss": 0.0, "step": 33710 }, { "epoch": 0.24132255063336436, "grad_norm": 0.0, "learning_rate": 7.588277392113363e-06, "loss": 0.0, "step": 33720 }, { "epoch": 0.24139411722607887, "grad_norm": 0.0, "learning_rate": 7.587561726186217e-06, "loss": 0.012, "step": 33730 }, { "epoch": 0.24146568381879338, "grad_norm": 0.0, "learning_rate": 7.586846060259072e-06, "loss": 0.0, "step": 33740 }, { "epoch": 0.24153725041150792, "grad_norm": 0.012127753347158432, "learning_rate": 7.586130394331927e-06, "loss": 0.001, "step": 33750 }, { "epoch": 0.24160881700422243, "grad_norm": 0.0, "learning_rate": 7.58541472840478e-06, "loss": 0.0, "step": 33760 }, { "epoch": 0.24168038359693694, "grad_norm": 0.0, "learning_rate": 7.5846990624776366e-06, "loss": 0.0, "step": 33770 }, { "epoch": 0.24175195018965148, "grad_norm": 0.0, "learning_rate": 7.58398339655049e-06, "loss": 0.0017, "step": 33780 }, { "epoch": 0.241823516782366, "grad_norm": 0.00081223453162238, "learning_rate": 7.583267730623346e-06, "loss": 0.0463, "step": 33790 }, { "epoch": 0.2418950833750805, "grad_norm": 0.0, "learning_rate": 7.5825520646962e-06, "loss": 0.0, "step": 33800 }, { "epoch": 0.24196664996779504, "grad_norm": 0.0, "learning_rate": 7.581836398769056e-06, "loss": 0.0, "step": 33810 }, { "epoch": 0.24203821656050956, "grad_norm": 3.3326166430924786e-06, "learning_rate": 7.5811207328419105e-06, "loss": 0.0002, "step": 33820 }, { "epoch": 0.24210978315322407, "grad_norm": 0.0, "learning_rate": 7.580405066914764e-06, "loss": 0.0, "step": 33830 }, { "epoch": 0.2421813497459386, "grad_norm": 2.4148541797330836e-06, "learning_rate": 7.57968940098762e-06, "loss": 0.0536, "step": 33840 }, { "epoch": 0.24225291633865312, "grad_norm": 0.0, "learning_rate": 7.578973735060474e-06, "loss": 0.0001, "step": 33850 }, { "epoch": 0.24232448293136763, "grad_norm": 0.0014876829227432609, "learning_rate": 7.57825806913333e-06, "loss": 0.0, "step": 33860 }, { "epoch": 0.24239604952408217, "grad_norm": 0.0, "learning_rate": 7.5775424032061835e-06, "loss": 0.0, "step": 33870 }, { "epoch": 0.24246761611679668, "grad_norm": 0.003667317796498537, "learning_rate": 7.576826737279038e-06, "loss": 0.0081, "step": 33880 }, { "epoch": 0.2425391827095112, "grad_norm": 0.0, "learning_rate": 7.576111071351894e-06, "loss": 0.0, "step": 33890 }, { "epoch": 0.24261074930222573, "grad_norm": 0.0, "learning_rate": 7.575395405424748e-06, "loss": 0.0, "step": 33900 }, { "epoch": 0.24268231589494024, "grad_norm": 0.0, "learning_rate": 7.574679739497604e-06, "loss": 0.0, "step": 33910 }, { "epoch": 0.24275388248765475, "grad_norm": 0.0, "learning_rate": 7.5739640735704575e-06, "loss": 0.016, "step": 33920 }, { "epoch": 0.2428254490803693, "grad_norm": 0.0, "learning_rate": 7.573248407643314e-06, "loss": 0.0, "step": 33930 }, { "epoch": 0.2428970156730838, "grad_norm": 0.0, "learning_rate": 7.572532741716167e-06, "loss": 0.0, "step": 33940 }, { "epoch": 0.24296858226579832, "grad_norm": 0.0, "learning_rate": 7.571817075789022e-06, "loss": 0.0, "step": 33950 }, { "epoch": 0.24304014885851286, "grad_norm": 0.0, "learning_rate": 7.571101409861877e-06, "loss": 0.0001, "step": 33960 }, { "epoch": 0.24311171545122737, "grad_norm": 0.32166787981987, "learning_rate": 7.570385743934731e-06, "loss": 0.0, "step": 33970 }, { "epoch": 0.24318328204394188, "grad_norm": 0.0, "learning_rate": 7.5696700780075875e-06, "loss": 0.0001, "step": 33980 }, { "epoch": 0.24325484863665642, "grad_norm": 0.0, "learning_rate": 7.568954412080441e-06, "loss": 0.0, "step": 33990 }, { "epoch": 0.24332641522937093, "grad_norm": 0.0, "learning_rate": 7.5682387461532956e-06, "loss": 0.0002, "step": 34000 }, { "epoch": 0.24339798182208544, "grad_norm": 0.018788035959005356, "learning_rate": 7.567523080226151e-06, "loss": 0.0, "step": 34010 }, { "epoch": 0.24346954841479998, "grad_norm": 0.00030417367815971375, "learning_rate": 7.566807414299005e-06, "loss": 0.0, "step": 34020 }, { "epoch": 0.2435411150075145, "grad_norm": 0.0, "learning_rate": 7.566091748371861e-06, "loss": 0.0001, "step": 34030 }, { "epoch": 0.243612681600229, "grad_norm": 0.0, "learning_rate": 7.565376082444715e-06, "loss": 0.0339, "step": 34040 }, { "epoch": 0.24368424819294354, "grad_norm": 0.0, "learning_rate": 7.564660416517571e-06, "loss": 0.0002, "step": 34050 }, { "epoch": 0.24375581478565805, "grad_norm": 0.0, "learning_rate": 7.563944750590425e-06, "loss": 0.0, "step": 34060 }, { "epoch": 0.24382738137837257, "grad_norm": 0.0, "learning_rate": 7.563229084663279e-06, "loss": 0.0002, "step": 34070 }, { "epoch": 0.2438989479710871, "grad_norm": 0.0, "learning_rate": 7.5625134187361345e-06, "loss": 0.3883, "step": 34080 }, { "epoch": 0.24397051456380162, "grad_norm": 0.0, "learning_rate": 7.561797752808989e-06, "loss": 0.0, "step": 34090 }, { "epoch": 0.24404208115651613, "grad_norm": 0.0, "learning_rate": 7.561082086881844e-06, "loss": 0.0, "step": 34100 }, { "epoch": 0.24411364774923067, "grad_norm": 0.0, "learning_rate": 7.560366420954699e-06, "loss": 0.0, "step": 34110 }, { "epoch": 0.24418521434194518, "grad_norm": 0.007458120584487915, "learning_rate": 7.559650755027553e-06, "loss": 0.0156, "step": 34120 }, { "epoch": 0.2442567809346597, "grad_norm": 0.0, "learning_rate": 7.5589350891004084e-06, "loss": 0.0, "step": 34130 }, { "epoch": 0.24432834752737423, "grad_norm": 0.0, "learning_rate": 7.558219423173263e-06, "loss": 0.0, "step": 34140 }, { "epoch": 0.24439991412008874, "grad_norm": 0.0, "learning_rate": 7.557503757246118e-06, "loss": 0.0, "step": 34150 }, { "epoch": 0.24447148071280325, "grad_norm": 0.0, "learning_rate": 7.556788091318973e-06, "loss": 0.0, "step": 34160 }, { "epoch": 0.2445430473055178, "grad_norm": 0.8730831742286682, "learning_rate": 7.556072425391827e-06, "loss": 0.0003, "step": 34170 }, { "epoch": 0.2446146138982323, "grad_norm": 0.6903591156005859, "learning_rate": 7.555356759464682e-06, "loss": 0.0313, "step": 34180 }, { "epoch": 0.24468618049094681, "grad_norm": 0.0, "learning_rate": 7.554641093537537e-06, "loss": 0.0, "step": 34190 }, { "epoch": 0.24475774708366135, "grad_norm": 0.0, "learning_rate": 7.553925427610392e-06, "loss": 0.0176, "step": 34200 }, { "epoch": 0.24482931367637586, "grad_norm": 0.0, "learning_rate": 7.5532097616832465e-06, "loss": 0.0, "step": 34210 }, { "epoch": 0.24490088026909038, "grad_norm": 2.866196169293289e-08, "learning_rate": 7.552494095756102e-06, "loss": 0.0, "step": 34220 }, { "epoch": 0.24497244686180492, "grad_norm": 0.0, "learning_rate": 7.551778429828956e-06, "loss": 0.0024, "step": 34230 }, { "epoch": 0.24504401345451943, "grad_norm": 0.0, "learning_rate": 7.551062763901811e-06, "loss": 0.0, "step": 34240 }, { "epoch": 0.24511558004723394, "grad_norm": 0.0, "learning_rate": 7.550347097974666e-06, "loss": 0.0, "step": 34250 }, { "epoch": 0.24518714663994848, "grad_norm": 0.0, "learning_rate": 7.5496314320475204e-06, "loss": 0.0232, "step": 34260 }, { "epoch": 0.245258713232663, "grad_norm": 0.0, "learning_rate": 7.548915766120376e-06, "loss": 0.0081, "step": 34270 }, { "epoch": 0.2453302798253775, "grad_norm": 0.0, "learning_rate": 7.54820010019323e-06, "loss": 0.0, "step": 34280 }, { "epoch": 0.24540184641809204, "grad_norm": 0.0, "learning_rate": 7.547484434266085e-06, "loss": 0.0004, "step": 34290 }, { "epoch": 0.24547341301080655, "grad_norm": 0.0, "learning_rate": 7.54676876833894e-06, "loss": 0.4574, "step": 34300 }, { "epoch": 0.24554497960352106, "grad_norm": 0.0, "learning_rate": 7.546053102411794e-06, "loss": 0.0, "step": 34310 }, { "epoch": 0.2456165461962356, "grad_norm": 0.0, "learning_rate": 7.54533743648465e-06, "loss": 0.0389, "step": 34320 }, { "epoch": 0.2456881127889501, "grad_norm": 3.420727580305538e-06, "learning_rate": 7.544621770557504e-06, "loss": 0.0, "step": 34330 }, { "epoch": 0.24575967938166463, "grad_norm": 0.0, "learning_rate": 7.543906104630359e-06, "loss": 0.0, "step": 34340 }, { "epoch": 0.24583124597437916, "grad_norm": 0.0, "learning_rate": 7.543190438703214e-06, "loss": 0.0, "step": 34350 }, { "epoch": 0.24590281256709368, "grad_norm": 0.0, "learning_rate": 7.542474772776068e-06, "loss": 0.0, "step": 34360 }, { "epoch": 0.24597437915980822, "grad_norm": 0.0, "learning_rate": 7.5417591068489236e-06, "loss": 0.0, "step": 34370 }, { "epoch": 0.24604594575252273, "grad_norm": 0.0001143149274867028, "learning_rate": 7.541043440921778e-06, "loss": 0.0, "step": 34380 }, { "epoch": 0.24611751234523724, "grad_norm": 0.0, "learning_rate": 7.540327774994633e-06, "loss": 0.0, "step": 34390 }, { "epoch": 0.24618907893795178, "grad_norm": 0.0, "learning_rate": 7.539612109067488e-06, "loss": 0.0, "step": 34400 }, { "epoch": 0.2462606455306663, "grad_norm": 0.0, "learning_rate": 7.538896443140342e-06, "loss": 0.0, "step": 34410 }, { "epoch": 0.2463322121233808, "grad_norm": 0.0, "learning_rate": 7.5381807772131975e-06, "loss": 0.0, "step": 34420 }, { "epoch": 0.24640377871609534, "grad_norm": 0.00034698835224844515, "learning_rate": 7.537465111286052e-06, "loss": 0.1964, "step": 34430 }, { "epoch": 0.24647534530880985, "grad_norm": 0.0, "learning_rate": 7.536749445358907e-06, "loss": 0.0, "step": 34440 }, { "epoch": 0.24654691190152436, "grad_norm": 0.0, "learning_rate": 7.536033779431762e-06, "loss": 0.0, "step": 34450 }, { "epoch": 0.2466184784942389, "grad_norm": 0.0, "learning_rate": 7.535318113504617e-06, "loss": 0.0, "step": 34460 }, { "epoch": 0.2466900450869534, "grad_norm": 0.0, "learning_rate": 7.534602447577471e-06, "loss": 0.0, "step": 34470 }, { "epoch": 0.24676161167966792, "grad_norm": 0.0, "learning_rate": 7.533886781650326e-06, "loss": 0.0, "step": 34480 }, { "epoch": 0.24683317827238246, "grad_norm": 0.0, "learning_rate": 7.533171115723181e-06, "loss": 0.0, "step": 34490 }, { "epoch": 0.24690474486509698, "grad_norm": 0.0, "learning_rate": 7.532455449796036e-06, "loss": 0.0, "step": 34500 }, { "epoch": 0.2469763114578115, "grad_norm": 0.0, "learning_rate": 7.531739783868891e-06, "loss": 0.0, "step": 34510 }, { "epoch": 0.24704787805052603, "grad_norm": 2.222907857785117e-09, "learning_rate": 7.531024117941745e-06, "loss": 0.0028, "step": 34520 }, { "epoch": 0.24711944464324054, "grad_norm": 8.846755550351304e-10, "learning_rate": 7.5303084520146e-06, "loss": 0.0, "step": 34530 }, { "epoch": 0.24719101123595505, "grad_norm": 4.0047169136414595e-08, "learning_rate": 7.529592786087455e-06, "loss": 0.0, "step": 34540 }, { "epoch": 0.2472625778286696, "grad_norm": 0.00936159212142229, "learning_rate": 7.5288771201603095e-06, "loss": 0.0003, "step": 34550 }, { "epoch": 0.2473341444213841, "grad_norm": 0.0, "learning_rate": 7.528161454233165e-06, "loss": 0.0, "step": 34560 }, { "epoch": 0.2474057110140986, "grad_norm": 0.0, "learning_rate": 7.527445788306019e-06, "loss": 0.0, "step": 34570 }, { "epoch": 0.24747727760681315, "grad_norm": 0.0, "learning_rate": 7.5267301223788745e-06, "loss": 0.0, "step": 34580 }, { "epoch": 0.24754884419952766, "grad_norm": 0.0, "learning_rate": 7.526014456451729e-06, "loss": 0.1359, "step": 34590 }, { "epoch": 0.24762041079224217, "grad_norm": 0.0, "learning_rate": 7.525298790524583e-06, "loss": 0.0, "step": 34600 }, { "epoch": 0.2476919773849567, "grad_norm": 0.0, "learning_rate": 7.524583124597439e-06, "loss": 0.0, "step": 34610 }, { "epoch": 0.24776354397767122, "grad_norm": 0.0, "learning_rate": 7.523867458670293e-06, "loss": 0.0001, "step": 34620 }, { "epoch": 0.24783511057038574, "grad_norm": 0.0, "learning_rate": 7.5231517927431484e-06, "loss": 0.0, "step": 34630 }, { "epoch": 0.24790667716310028, "grad_norm": 0.0, "learning_rate": 7.522436126816003e-06, "loss": 0.0, "step": 34640 }, { "epoch": 0.2479782437558148, "grad_norm": 1.7069092988967896, "learning_rate": 7.521720460888857e-06, "loss": 0.0002, "step": 34650 }, { "epoch": 0.2480498103485293, "grad_norm": 0.0, "learning_rate": 7.521004794961713e-06, "loss": 0.0, "step": 34660 }, { "epoch": 0.24812137694124384, "grad_norm": 8.990054084279109e-06, "learning_rate": 7.520289129034567e-06, "loss": 0.0, "step": 34670 }, { "epoch": 0.24819294353395835, "grad_norm": 0.0, "learning_rate": 7.519573463107422e-06, "loss": 1.325, "step": 34680 }, { "epoch": 0.24826451012667286, "grad_norm": 0.0, "learning_rate": 7.518857797180277e-06, "loss": 0.0103, "step": 34690 }, { "epoch": 0.2483360767193874, "grad_norm": 0.0, "learning_rate": 7.518142131253132e-06, "loss": 0.0, "step": 34700 }, { "epoch": 0.2484076433121019, "grad_norm": 0.0, "learning_rate": 7.5174264653259865e-06, "loss": 0.0, "step": 34710 }, { "epoch": 0.24847920990481642, "grad_norm": 0.0006796089000999928, "learning_rate": 7.516710799398841e-06, "loss": 0.0007, "step": 34720 }, { "epoch": 0.24855077649753096, "grad_norm": 0.0, "learning_rate": 7.515995133471696e-06, "loss": 0.0, "step": 34730 }, { "epoch": 0.24862234309024547, "grad_norm": 0.0009573515853844583, "learning_rate": 7.515279467544551e-06, "loss": 0.011, "step": 34740 }, { "epoch": 0.24869390968295998, "grad_norm": 6.139664649963379, "learning_rate": 7.514563801617406e-06, "loss": 0.0018, "step": 34750 }, { "epoch": 0.24876547627567452, "grad_norm": 1.2236006341481698e-06, "learning_rate": 7.5138481356902605e-06, "loss": 0.0001, "step": 34760 }, { "epoch": 0.24883704286838904, "grad_norm": 0.002196627901867032, "learning_rate": 7.513132469763115e-06, "loss": 0.4258, "step": 34770 }, { "epoch": 0.24890860946110355, "grad_norm": 0.18719744682312012, "learning_rate": 7.51241680383597e-06, "loss": 0.027, "step": 34780 }, { "epoch": 0.2489801760538181, "grad_norm": 2.1924417524132878e-05, "learning_rate": 7.511701137908825e-06, "loss": 0.0, "step": 34790 }, { "epoch": 0.2490517426465326, "grad_norm": 0.0, "learning_rate": 7.51098547198168e-06, "loss": 0.0, "step": 34800 }, { "epoch": 0.2491233092392471, "grad_norm": 0.0, "learning_rate": 7.510269806054534e-06, "loss": 0.0, "step": 34810 }, { "epoch": 0.24919487583196165, "grad_norm": 0.0008169662323780358, "learning_rate": 7.50955414012739e-06, "loss": 0.0, "step": 34820 }, { "epoch": 0.24926644242467616, "grad_norm": 0.0, "learning_rate": 7.508838474200244e-06, "loss": 0.0, "step": 34830 }, { "epoch": 0.24933800901739067, "grad_norm": 0.0, "learning_rate": 7.5081228082730986e-06, "loss": 0.0001, "step": 34840 }, { "epoch": 0.2494095756101052, "grad_norm": 0.0, "learning_rate": 7.507407142345954e-06, "loss": 0.0023, "step": 34850 }, { "epoch": 0.24948114220281972, "grad_norm": 0.0, "learning_rate": 7.506691476418808e-06, "loss": 0.0004, "step": 34860 }, { "epoch": 0.24955270879553423, "grad_norm": 0.005620930343866348, "learning_rate": 7.505975810491664e-06, "loss": 0.0, "step": 34870 }, { "epoch": 0.24962427538824877, "grad_norm": 0.0, "learning_rate": 7.505260144564518e-06, "loss": 0.0, "step": 34880 }, { "epoch": 0.24969584198096328, "grad_norm": 9.56789980577355e-10, "learning_rate": 7.5045444786373725e-06, "loss": 0.0, "step": 34890 }, { "epoch": 0.2497674085736778, "grad_norm": 0.0, "learning_rate": 7.503828812710228e-06, "loss": 0.0, "step": 34900 }, { "epoch": 0.24983897516639234, "grad_norm": 0.0, "learning_rate": 7.503113146783082e-06, "loss": 0.0, "step": 34910 }, { "epoch": 0.24991054175910685, "grad_norm": 0.0, "learning_rate": 7.5023974808559375e-06, "loss": 0.0002, "step": 34920 }, { "epoch": 0.24998210835182136, "grad_norm": 0.0, "learning_rate": 7.501681814928792e-06, "loss": 0.0, "step": 34930 }, { "epoch": 0.2500536749445359, "grad_norm": 0.0, "learning_rate": 7.500966149001646e-06, "loss": 0.0, "step": 34940 }, { "epoch": 0.2501252415372504, "grad_norm": 0.0, "learning_rate": 7.500250483074502e-06, "loss": 0.0, "step": 34950 }, { "epoch": 0.2501968081299649, "grad_norm": 0.0, "learning_rate": 7.499534817147356e-06, "loss": 0.0112, "step": 34960 }, { "epoch": 0.25026837472267943, "grad_norm": 6.892782211303711, "learning_rate": 7.498819151220211e-06, "loss": 0.001, "step": 34970 }, { "epoch": 0.250339941315394, "grad_norm": 1.8025264580501243e-05, "learning_rate": 7.498103485293066e-06, "loss": 0.0, "step": 34980 }, { "epoch": 0.2504115079081085, "grad_norm": 0.0, "learning_rate": 7.497387819365921e-06, "loss": 0.0, "step": 34990 }, { "epoch": 0.250483074500823, "grad_norm": 0.0, "learning_rate": 7.496672153438776e-06, "loss": 0.0, "step": 35000 }, { "epoch": 0.25055464109353753, "grad_norm": 0.0588691383600235, "learning_rate": 7.49595648751163e-06, "loss": 0.0, "step": 35010 }, { "epoch": 0.25062620768625204, "grad_norm": 0.0, "learning_rate": 7.495240821584485e-06, "loss": 0.0, "step": 35020 }, { "epoch": 0.25069777427896656, "grad_norm": 0.0013064929516986012, "learning_rate": 7.49452515565734e-06, "loss": 0.0, "step": 35030 }, { "epoch": 0.2507693408716811, "grad_norm": 0.08441948890686035, "learning_rate": 7.493809489730195e-06, "loss": 0.0, "step": 35040 }, { "epoch": 0.25084090746439563, "grad_norm": 0.0, "learning_rate": 7.4930938238030495e-06, "loss": 0.0001, "step": 35050 }, { "epoch": 0.25091247405711015, "grad_norm": 0.0, "learning_rate": 7.492378157875904e-06, "loss": 0.0001, "step": 35060 }, { "epoch": 0.25098404064982466, "grad_norm": 293.383056640625, "learning_rate": 7.491662491948759e-06, "loss": 0.0758, "step": 35070 }, { "epoch": 0.25105560724253917, "grad_norm": 6.747544034624298e-08, "learning_rate": 7.490946826021614e-06, "loss": 0.4379, "step": 35080 }, { "epoch": 0.2511271738352537, "grad_norm": 9.49084699719549e-10, "learning_rate": 7.490231160094469e-06, "loss": 0.0, "step": 35090 }, { "epoch": 0.25119874042796825, "grad_norm": 0.0, "learning_rate": 7.4895154941673234e-06, "loss": 0.0001, "step": 35100 }, { "epoch": 0.25127030702068276, "grad_norm": 0.0, "learning_rate": 7.488799828240179e-06, "loss": 0.0001, "step": 35110 }, { "epoch": 0.25134187361339727, "grad_norm": 3.489631490083411e-05, "learning_rate": 7.488084162313033e-06, "loss": 0.0, "step": 35120 }, { "epoch": 0.2514134402061118, "grad_norm": 0.027101578190922737, "learning_rate": 7.487368496385888e-06, "loss": 0.0, "step": 35130 }, { "epoch": 0.2514850067988263, "grad_norm": 0.0, "learning_rate": 7.486652830458743e-06, "loss": 0.0, "step": 35140 }, { "epoch": 0.2515565733915408, "grad_norm": 5.822503812424884e-09, "learning_rate": 7.485937164531597e-06, "loss": 0.0, "step": 35150 }, { "epoch": 0.2516281399842554, "grad_norm": 0.0, "learning_rate": 7.485221498604453e-06, "loss": 0.0, "step": 35160 }, { "epoch": 0.2516997065769699, "grad_norm": 0.0, "learning_rate": 7.484505832677307e-06, "loss": 0.0014, "step": 35170 }, { "epoch": 0.2517712731696844, "grad_norm": 0.0, "learning_rate": 7.4837901667501615e-06, "loss": 0.0028, "step": 35180 }, { "epoch": 0.2518428397623989, "grad_norm": 0.00011295374133624136, "learning_rate": 7.483074500823017e-06, "loss": 0.0, "step": 35190 }, { "epoch": 0.2519144063551134, "grad_norm": 0.0, "learning_rate": 7.482358834895871e-06, "loss": 0.0, "step": 35200 }, { "epoch": 0.25198597294782793, "grad_norm": 4.169101259776653e-07, "learning_rate": 7.4816431689687266e-06, "loss": 0.0, "step": 35210 }, { "epoch": 0.2520575395405425, "grad_norm": 0.0, "learning_rate": 7.480927503041581e-06, "loss": 0.0, "step": 35220 }, { "epoch": 0.252129106133257, "grad_norm": 0.010409394279122353, "learning_rate": 7.480211837114436e-06, "loss": 0.0005, "step": 35230 }, { "epoch": 0.2522006727259715, "grad_norm": 0.0, "learning_rate": 7.479496171187291e-06, "loss": 0.0, "step": 35240 }, { "epoch": 0.25227223931868603, "grad_norm": 0.0, "learning_rate": 7.478780505260144e-06, "loss": 0.0, "step": 35250 }, { "epoch": 0.25234380591140054, "grad_norm": 1.8347961372455757e-07, "learning_rate": 7.4780648393330005e-06, "loss": 1.0865, "step": 35260 }, { "epoch": 0.25241537250411505, "grad_norm": 4.63895616364951e-10, "learning_rate": 7.477349173405855e-06, "loss": 0.0003, "step": 35270 }, { "epoch": 0.2524869390968296, "grad_norm": 0.0, "learning_rate": 7.47663350747871e-06, "loss": 0.523, "step": 35280 }, { "epoch": 0.25255850568954413, "grad_norm": 0.0, "learning_rate": 7.475917841551565e-06, "loss": 0.0, "step": 35290 }, { "epoch": 0.25263007228225864, "grad_norm": 0.0, "learning_rate": 7.475202175624418e-06, "loss": 0.0, "step": 35300 }, { "epoch": 0.25270163887497316, "grad_norm": 0.0697610005736351, "learning_rate": 7.474486509697274e-06, "loss": 0.0, "step": 35310 }, { "epoch": 0.25277320546768767, "grad_norm": 0.0, "learning_rate": 7.473770843770128e-06, "loss": 0.1626, "step": 35320 }, { "epoch": 0.2528447720604022, "grad_norm": 0.0, "learning_rate": 7.473055177842984e-06, "loss": 0.0, "step": 35330 }, { "epoch": 0.25291633865311675, "grad_norm": 0.0, "learning_rate": 7.472339511915839e-06, "loss": 0.0, "step": 35340 }, { "epoch": 0.25298790524583126, "grad_norm": 23.77828598022461, "learning_rate": 7.471623845988694e-06, "loss": 0.005, "step": 35350 }, { "epoch": 0.25305947183854577, "grad_norm": 0.004619797691702843, "learning_rate": 7.470908180061548e-06, "loss": 0.0, "step": 35360 }, { "epoch": 0.2531310384312603, "grad_norm": 0.37820616364479065, "learning_rate": 7.470192514134402e-06, "loss": 0.0767, "step": 35370 }, { "epoch": 0.2532026050239748, "grad_norm": 0.0, "learning_rate": 7.469476848207258e-06, "loss": 0.0, "step": 35380 }, { "epoch": 0.2532741716166893, "grad_norm": 0.0, "learning_rate": 7.468761182280112e-06, "loss": 0.0, "step": 35390 }, { "epoch": 0.25334573820940387, "grad_norm": 4.826331556628816e-10, "learning_rate": 7.468045516352968e-06, "loss": 0.0, "step": 35400 }, { "epoch": 0.2534173048021184, "grad_norm": 0.0, "learning_rate": 7.467329850425822e-06, "loss": 0.0, "step": 35410 }, { "epoch": 0.2534888713948329, "grad_norm": 0.0, "learning_rate": 7.466614184498676e-06, "loss": 0.0, "step": 35420 }, { "epoch": 0.2535604379875474, "grad_norm": 0.0, "learning_rate": 7.465898518571532e-06, "loss": 0.0, "step": 35430 }, { "epoch": 0.2536320045802619, "grad_norm": 0.0, "learning_rate": 7.4651828526443856e-06, "loss": 0.0, "step": 35440 }, { "epoch": 0.2537035711729764, "grad_norm": 5.993135929107666, "learning_rate": 7.464467186717242e-06, "loss": 0.0011, "step": 35450 }, { "epoch": 0.253775137765691, "grad_norm": 0.014152539893984795, "learning_rate": 7.463751520790095e-06, "loss": 0.0057, "step": 35460 }, { "epoch": 0.2538467043584055, "grad_norm": 0.08699127286672592, "learning_rate": 7.4630358548629514e-06, "loss": 0.0021, "step": 35470 }, { "epoch": 0.25391827095112, "grad_norm": 0.0, "learning_rate": 7.462320188935805e-06, "loss": 0.0, "step": 35480 }, { "epoch": 0.25398983754383453, "grad_norm": 0.0006404116284102201, "learning_rate": 7.4616045230086595e-06, "loss": 0.0009, "step": 35490 }, { "epoch": 0.25406140413654904, "grad_norm": 793.0825805664062, "learning_rate": 7.460888857081516e-06, "loss": 0.7289, "step": 35500 }, { "epoch": 0.25413297072926355, "grad_norm": 8.427518127795963e-10, "learning_rate": 7.460173191154369e-06, "loss": 0.0, "step": 35510 }, { "epoch": 0.2542045373219781, "grad_norm": 0.054432980716228485, "learning_rate": 7.459457525227225e-06, "loss": 0.0, "step": 35520 }, { "epoch": 0.25427610391469263, "grad_norm": 0.0, "learning_rate": 7.458741859300079e-06, "loss": 0.0, "step": 35530 }, { "epoch": 0.25434767050740714, "grad_norm": 0.0, "learning_rate": 7.458026193372933e-06, "loss": 0.0001, "step": 35540 }, { "epoch": 0.25441923710012165, "grad_norm": 5.4542642402566344e-08, "learning_rate": 7.457310527445789e-06, "loss": 0.0, "step": 35550 }, { "epoch": 0.25449080369283616, "grad_norm": 0.0, "learning_rate": 7.456594861518643e-06, "loss": 0.0, "step": 35560 }, { "epoch": 0.25456237028555073, "grad_norm": 0.00010426014341646805, "learning_rate": 7.455879195591499e-06, "loss": 0.0, "step": 35570 }, { "epoch": 0.25463393687826524, "grad_norm": 0.0, "learning_rate": 7.455163529664353e-06, "loss": 0.0001, "step": 35580 }, { "epoch": 0.25470550347097975, "grad_norm": 0.0, "learning_rate": 7.454447863737209e-06, "loss": 0.0, "step": 35590 }, { "epoch": 0.25477707006369427, "grad_norm": 8.4280276496429e-05, "learning_rate": 7.453732197810063e-06, "loss": 0.0, "step": 35600 }, { "epoch": 0.2548486366564088, "grad_norm": 0.0, "learning_rate": 7.453016531882917e-06, "loss": 0.0232, "step": 35610 }, { "epoch": 0.2549202032491233, "grad_norm": 0.0034038564190268517, "learning_rate": 7.452300865955772e-06, "loss": 0.0013, "step": 35620 }, { "epoch": 0.25499176984183786, "grad_norm": 0.0, "learning_rate": 7.451585200028627e-06, "loss": 0.0, "step": 35630 }, { "epoch": 0.25506333643455237, "grad_norm": 1.0619931600786003e-07, "learning_rate": 7.450869534101482e-06, "loss": 0.0, "step": 35640 }, { "epoch": 0.2551349030272669, "grad_norm": 0.004339080769568682, "learning_rate": 7.4501538681743365e-06, "loss": 0.0055, "step": 35650 }, { "epoch": 0.2552064696199814, "grad_norm": 0.0, "learning_rate": 7.449438202247191e-06, "loss": 0.0, "step": 35660 }, { "epoch": 0.2552780362126959, "grad_norm": 0.0, "learning_rate": 7.448722536320046e-06, "loss": 0.0, "step": 35670 }, { "epoch": 0.2553496028054104, "grad_norm": 445.08038330078125, "learning_rate": 7.448006870392901e-06, "loss": 0.2492, "step": 35680 }, { "epoch": 0.255421169398125, "grad_norm": 0.0, "learning_rate": 7.447291204465756e-06, "loss": 0.0, "step": 35690 }, { "epoch": 0.2554927359908395, "grad_norm": 0.0, "learning_rate": 7.4465755385386104e-06, "loss": 0.0, "step": 35700 }, { "epoch": 0.255564302583554, "grad_norm": 0.003444256726652384, "learning_rate": 7.445859872611465e-06, "loss": 0.0, "step": 35710 }, { "epoch": 0.2556358691762685, "grad_norm": 0.002293471246957779, "learning_rate": 7.44514420668432e-06, "loss": 0.0, "step": 35720 }, { "epoch": 0.255707435768983, "grad_norm": 0.0, "learning_rate": 7.444428540757175e-06, "loss": 0.0001, "step": 35730 }, { "epoch": 0.25577900236169754, "grad_norm": 0.0, "learning_rate": 7.44371287483003e-06, "loss": 0.0, "step": 35740 }, { "epoch": 0.2558505689544121, "grad_norm": 0.0, "learning_rate": 7.442997208902884e-06, "loss": 0.0, "step": 35750 }, { "epoch": 0.2559221355471266, "grad_norm": 0.0, "learning_rate": 7.44228154297574e-06, "loss": 0.0, "step": 35760 }, { "epoch": 0.25599370213984113, "grad_norm": 0.0, "learning_rate": 7.441565877048594e-06, "loss": 0.0, "step": 35770 }, { "epoch": 0.25606526873255564, "grad_norm": 0.0, "learning_rate": 7.4408502111214485e-06, "loss": 0.0, "step": 35780 }, { "epoch": 0.25613683532527015, "grad_norm": 0.0, "learning_rate": 7.440134545194304e-06, "loss": 0.0003, "step": 35790 }, { "epoch": 0.25620840191798466, "grad_norm": 0.001112842932343483, "learning_rate": 7.439418879267158e-06, "loss": 0.0, "step": 35800 }, { "epoch": 0.25627996851069923, "grad_norm": 0.0, "learning_rate": 7.4387032133400136e-06, "loss": 0.0, "step": 35810 }, { "epoch": 0.25635153510341374, "grad_norm": 0.0, "learning_rate": 7.437987547412868e-06, "loss": 0.0, "step": 35820 }, { "epoch": 0.25642310169612825, "grad_norm": 0.0, "learning_rate": 7.4372718814857225e-06, "loss": 0.0, "step": 35830 }, { "epoch": 0.25649466828884276, "grad_norm": 0.0, "learning_rate": 7.436556215558578e-06, "loss": 0.165, "step": 35840 }, { "epoch": 0.2565662348815573, "grad_norm": 0.0, "learning_rate": 7.435840549631432e-06, "loss": 0.0, "step": 35850 }, { "epoch": 0.2566378014742718, "grad_norm": 0.0, "learning_rate": 7.4351248837042875e-06, "loss": 0.7598, "step": 35860 }, { "epoch": 0.25670936806698635, "grad_norm": 5.366465938294596e-09, "learning_rate": 7.434409217777142e-06, "loss": 0.0, "step": 35870 }, { "epoch": 0.25678093465970087, "grad_norm": 0.0, "learning_rate": 7.433693551849997e-06, "loss": 0.0, "step": 35880 }, { "epoch": 0.2568525012524154, "grad_norm": 0.0, "learning_rate": 7.432977885922852e-06, "loss": 0.0, "step": 35890 }, { "epoch": 0.2569240678451299, "grad_norm": 0.0010984598193317652, "learning_rate": 7.432262219995706e-06, "loss": 0.0, "step": 35900 }, { "epoch": 0.2569956344378444, "grad_norm": 0.0, "learning_rate": 7.431546554068561e-06, "loss": 0.0, "step": 35910 }, { "epoch": 0.2570672010305589, "grad_norm": 2.069885886157863e-05, "learning_rate": 7.430830888141416e-06, "loss": 0.2797, "step": 35920 }, { "epoch": 0.2571387676232735, "grad_norm": 0.0002294034929946065, "learning_rate": 7.430115222214271e-06, "loss": 0.0, "step": 35930 }, { "epoch": 0.257210334215988, "grad_norm": 0.00013421152834780514, "learning_rate": 7.429399556287126e-06, "loss": 0.001, "step": 35940 }, { "epoch": 0.2572819008087025, "grad_norm": 0.0, "learning_rate": 7.42868389035998e-06, "loss": 0.0086, "step": 35950 }, { "epoch": 0.257353467401417, "grad_norm": 0.005003618076443672, "learning_rate": 7.427968224432835e-06, "loss": 0.0, "step": 35960 }, { "epoch": 0.2574250339941315, "grad_norm": 0.0, "learning_rate": 7.42725255850569e-06, "loss": 0.0, "step": 35970 }, { "epoch": 0.25749660058684604, "grad_norm": 0.0, "learning_rate": 7.426536892578545e-06, "loss": 0.0, "step": 35980 }, { "epoch": 0.2575681671795606, "grad_norm": 0.0, "learning_rate": 7.4258212266513995e-06, "loss": 0.0, "step": 35990 }, { "epoch": 0.2576397337722751, "grad_norm": 0.0, "learning_rate": 7.425105560724255e-06, "loss": 0.0, "step": 36000 }, { "epoch": 0.2577113003649896, "grad_norm": 0.0, "learning_rate": 7.424389894797109e-06, "loss": 0.0, "step": 36010 }, { "epoch": 0.25778286695770414, "grad_norm": 11.836197853088379, "learning_rate": 7.423674228869964e-06, "loss": 0.0028, "step": 36020 }, { "epoch": 0.25785443355041865, "grad_norm": 0.0, "learning_rate": 7.422958562942819e-06, "loss": 0.0, "step": 36030 }, { "epoch": 0.25792600014313316, "grad_norm": 0.8516537547111511, "learning_rate": 7.422242897015673e-06, "loss": 1.1054, "step": 36040 }, { "epoch": 0.2579975667358477, "grad_norm": 0.0, "learning_rate": 7.421527231088529e-06, "loss": 0.0, "step": 36050 }, { "epoch": 0.25806913332856224, "grad_norm": 0.0, "learning_rate": 7.420811565161383e-06, "loss": 0.0, "step": 36060 }, { "epoch": 0.25814069992127675, "grad_norm": 1.0354825690228608e-06, "learning_rate": 7.420095899234238e-06, "loss": 0.0, "step": 36070 }, { "epoch": 0.25821226651399126, "grad_norm": 0.0006073574768379331, "learning_rate": 7.419380233307093e-06, "loss": 0.0038, "step": 36080 }, { "epoch": 0.2582838331067058, "grad_norm": 0.0, "learning_rate": 7.418736133972663e-06, "loss": 1.4953, "step": 36090 }, { "epoch": 0.2583553996994203, "grad_norm": 0.0, "learning_rate": 7.418020468045517e-06, "loss": 0.0, "step": 36100 }, { "epoch": 0.25842696629213485, "grad_norm": 0.0, "learning_rate": 7.4173048021183715e-06, "loss": 0.0, "step": 36110 }, { "epoch": 0.25849853288484936, "grad_norm": 0.0, "learning_rate": 7.416589136191227e-06, "loss": 0.0046, "step": 36120 }, { "epoch": 0.2585700994775639, "grad_norm": 0.0, "learning_rate": 7.415873470264081e-06, "loss": 0.0, "step": 36130 }, { "epoch": 0.2586416660702784, "grad_norm": 0.0, "learning_rate": 7.4151578043369366e-06, "loss": 0.0, "step": 36140 }, { "epoch": 0.2587132326629929, "grad_norm": 0.0, "learning_rate": 7.414442138409791e-06, "loss": 0.0172, "step": 36150 }, { "epoch": 0.2587847992557074, "grad_norm": 0.0, "learning_rate": 7.4137264724826455e-06, "loss": 0.1817, "step": 36160 }, { "epoch": 0.258856365848422, "grad_norm": 0.0, "learning_rate": 7.413010806555501e-06, "loss": 0.0, "step": 36170 }, { "epoch": 0.2589279324411365, "grad_norm": 0.09629223495721817, "learning_rate": 7.412295140628355e-06, "loss": 0.0, "step": 36180 }, { "epoch": 0.258999499033851, "grad_norm": 2.3444687968776634e-08, "learning_rate": 7.4115794747012105e-06, "loss": 0.0, "step": 36190 }, { "epoch": 0.2590710656265655, "grad_norm": 0.4183586537837982, "learning_rate": 7.410863808774065e-06, "loss": 0.0099, "step": 36200 }, { "epoch": 0.25914263221928, "grad_norm": 0.0, "learning_rate": 7.41014814284692e-06, "loss": 0.0, "step": 36210 }, { "epoch": 0.25921419881199453, "grad_norm": 0.0, "learning_rate": 7.409432476919775e-06, "loss": 0.9159, "step": 36220 }, { "epoch": 0.2592857654047091, "grad_norm": 0.0, "learning_rate": 7.408716810992629e-06, "loss": 0.0, "step": 36230 }, { "epoch": 0.2593573319974236, "grad_norm": 0.0, "learning_rate": 7.408001145065484e-06, "loss": 0.0001, "step": 36240 }, { "epoch": 0.2594288985901381, "grad_norm": 53.08433532714844, "learning_rate": 7.407285479138339e-06, "loss": 0.0054, "step": 36250 }, { "epoch": 0.25950046518285264, "grad_norm": 0.0, "learning_rate": 7.406569813211194e-06, "loss": 0.0, "step": 36260 }, { "epoch": 0.25957203177556715, "grad_norm": 59.9887809753418, "learning_rate": 7.405854147284049e-06, "loss": 0.0074, "step": 36270 }, { "epoch": 0.25964359836828166, "grad_norm": 0.030478108674287796, "learning_rate": 7.405138481356903e-06, "loss": 0.0, "step": 36280 }, { "epoch": 0.2597151649609962, "grad_norm": 0.004141657613217831, "learning_rate": 7.404422815429758e-06, "loss": 0.0, "step": 36290 }, { "epoch": 0.25978673155371074, "grad_norm": 0.0, "learning_rate": 7.403707149502613e-06, "loss": 0.0, "step": 36300 }, { "epoch": 0.25985829814642525, "grad_norm": 0.0, "learning_rate": 7.402991483575468e-06, "loss": 0.0, "step": 36310 }, { "epoch": 0.25992986473913976, "grad_norm": 0.0, "learning_rate": 7.4022758176483225e-06, "loss": 0.0, "step": 36320 }, { "epoch": 0.26000143133185427, "grad_norm": 0.0, "learning_rate": 7.401560151721178e-06, "loss": 0.0, "step": 36330 }, { "epoch": 0.26007299792456884, "grad_norm": 1.7195695978244885e-09, "learning_rate": 7.400844485794032e-06, "loss": 0.0693, "step": 36340 }, { "epoch": 0.26014456451728335, "grad_norm": 0.0, "learning_rate": 7.400128819866887e-06, "loss": 0.0297, "step": 36350 }, { "epoch": 0.26021613110999786, "grad_norm": 0.0, "learning_rate": 7.399413153939742e-06, "loss": 0.0001, "step": 36360 }, { "epoch": 0.2602876977027124, "grad_norm": 0.0, "learning_rate": 7.398697488012596e-06, "loss": 0.0, "step": 36370 }, { "epoch": 0.2603592642954269, "grad_norm": 0.0007064917590469122, "learning_rate": 7.397981822085452e-06, "loss": 0.0, "step": 36380 }, { "epoch": 0.2604308308881414, "grad_norm": 0.0, "learning_rate": 7.397266156158306e-06, "loss": 0.0, "step": 36390 }, { "epoch": 0.26050239748085596, "grad_norm": 0.0, "learning_rate": 7.396550490231161e-06, "loss": 0.0, "step": 36400 }, { "epoch": 0.2605739640735705, "grad_norm": 0.0, "learning_rate": 7.395834824304016e-06, "loss": 0.0, "step": 36410 }, { "epoch": 0.260645530666285, "grad_norm": 0.0, "learning_rate": 7.39511915837687e-06, "loss": 0.0, "step": 36420 }, { "epoch": 0.2607170972589995, "grad_norm": 0.0, "learning_rate": 7.394403492449726e-06, "loss": 0.0, "step": 36430 }, { "epoch": 0.260788663851714, "grad_norm": 0.0017470831517130136, "learning_rate": 7.39368782652258e-06, "loss": 0.0, "step": 36440 }, { "epoch": 0.2608602304444285, "grad_norm": 0.0, "learning_rate": 7.392972160595435e-06, "loss": 0.0, "step": 36450 }, { "epoch": 0.2609317970371431, "grad_norm": 0.0, "learning_rate": 7.39225649466829e-06, "loss": 0.0, "step": 36460 }, { "epoch": 0.2610033636298576, "grad_norm": 0.0, "learning_rate": 7.391540828741144e-06, "loss": 0.0, "step": 36470 }, { "epoch": 0.2610749302225721, "grad_norm": 0.0, "learning_rate": 7.3908251628139995e-06, "loss": 0.0, "step": 36480 }, { "epoch": 0.2611464968152866, "grad_norm": 8.64825210555864e-07, "learning_rate": 7.390109496886854e-06, "loss": 0.0, "step": 36490 }, { "epoch": 0.26121806340800113, "grad_norm": 0.0, "learning_rate": 7.389393830959709e-06, "loss": 0.0, "step": 36500 }, { "epoch": 0.26128963000071564, "grad_norm": 0.0, "learning_rate": 7.388678165032564e-06, "loss": 0.0, "step": 36510 }, { "epoch": 0.2613611965934302, "grad_norm": 0.0, "learning_rate": 7.387962499105417e-06, "loss": 0.0, "step": 36520 }, { "epoch": 0.2614327631861447, "grad_norm": 0.0, "learning_rate": 7.3872468331782735e-06, "loss": 0.0, "step": 36530 }, { "epoch": 0.26150432977885923, "grad_norm": 0.0, "learning_rate": 7.386531167251128e-06, "loss": 0.0, "step": 36540 }, { "epoch": 0.26157589637157375, "grad_norm": 0.0, "learning_rate": 7.385815501323983e-06, "loss": 0.0, "step": 36550 }, { "epoch": 0.26164746296428826, "grad_norm": 0.0, "learning_rate": 7.385099835396838e-06, "loss": 0.0, "step": 36560 }, { "epoch": 0.26171902955700277, "grad_norm": 4.480896677705459e-06, "learning_rate": 7.384384169469691e-06, "loss": 0.0, "step": 36570 }, { "epoch": 0.26179059614971734, "grad_norm": 0.0, "learning_rate": 7.383668503542547e-06, "loss": 0.0758, "step": 36580 }, { "epoch": 0.26186216274243185, "grad_norm": 0.0001082594390027225, "learning_rate": 7.382952837615401e-06, "loss": 0.0, "step": 36590 }, { "epoch": 0.26193372933514636, "grad_norm": 0.0, "learning_rate": 7.382237171688257e-06, "loss": 0.0, "step": 36600 }, { "epoch": 0.26200529592786087, "grad_norm": 0.0, "learning_rate": 7.3815215057611116e-06, "loss": 0.0, "step": 36610 }, { "epoch": 0.2620768625205754, "grad_norm": 0.0, "learning_rate": 7.380805839833967e-06, "loss": 0.0011, "step": 36620 }, { "epoch": 0.2621484291132899, "grad_norm": 0.0, "learning_rate": 7.380090173906821e-06, "loss": 0.0, "step": 36630 }, { "epoch": 0.26221999570600446, "grad_norm": 0.0, "learning_rate": 7.379374507979675e-06, "loss": 0.0, "step": 36640 }, { "epoch": 0.26229156229871897, "grad_norm": 0.0, "learning_rate": 7.378658842052531e-06, "loss": 0.0, "step": 36650 }, { "epoch": 0.2623631288914335, "grad_norm": 0.0, "learning_rate": 7.377943176125385e-06, "loss": 0.0, "step": 36660 }, { "epoch": 0.262434695484148, "grad_norm": 0.0, "learning_rate": 7.377227510198241e-06, "loss": 0.0, "step": 36670 }, { "epoch": 0.2625062620768625, "grad_norm": 0.0, "learning_rate": 7.376511844271094e-06, "loss": 0.0005, "step": 36680 }, { "epoch": 0.262577828669577, "grad_norm": 9.029204051103079e-10, "learning_rate": 7.375796178343949e-06, "loss": 0.0005, "step": 36690 }, { "epoch": 0.2626493952622916, "grad_norm": 0.0, "learning_rate": 7.375080512416805e-06, "loss": 0.0, "step": 36700 }, { "epoch": 0.2627209618550061, "grad_norm": 0.0, "learning_rate": 7.3743648464896586e-06, "loss": 0.0, "step": 36710 }, { "epoch": 0.2627925284477206, "grad_norm": 0.0, "learning_rate": 7.373649180562515e-06, "loss": 0.0, "step": 36720 }, { "epoch": 0.2628640950404351, "grad_norm": 0.0, "learning_rate": 7.372933514635368e-06, "loss": 0.0, "step": 36730 }, { "epoch": 0.26293566163314963, "grad_norm": 0.29159751534461975, "learning_rate": 7.3722178487082244e-06, "loss": 0.0001, "step": 36740 }, { "epoch": 0.26300722822586414, "grad_norm": 0.0, "learning_rate": 7.371502182781078e-06, "loss": 0.0, "step": 36750 }, { "epoch": 0.2630787948185787, "grad_norm": 0.0, "learning_rate": 7.3707865168539325e-06, "loss": 0.0005, "step": 36760 }, { "epoch": 0.2631503614112932, "grad_norm": 0.001710469601675868, "learning_rate": 7.370070850926789e-06, "loss": 0.0, "step": 36770 }, { "epoch": 0.26322192800400773, "grad_norm": 0.0, "learning_rate": 7.369355184999642e-06, "loss": 0.4109, "step": 36780 }, { "epoch": 0.26329349459672224, "grad_norm": 0.0, "learning_rate": 7.368639519072498e-06, "loss": 0.0, "step": 36790 }, { "epoch": 0.26336506118943676, "grad_norm": 0.0, "learning_rate": 7.367923853145352e-06, "loss": 0.0012, "step": 36800 }, { "epoch": 0.26343662778215127, "grad_norm": 4.9164752269348355e-09, "learning_rate": 7.367208187218206e-06, "loss": 1.8469, "step": 36810 }, { "epoch": 0.26350819437486583, "grad_norm": 9.147720336914062, "learning_rate": 7.366492521291062e-06, "loss": 0.002, "step": 36820 }, { "epoch": 0.26357976096758035, "grad_norm": 0.0, "learning_rate": 7.365776855363916e-06, "loss": 0.0, "step": 36830 }, { "epoch": 0.26365132756029486, "grad_norm": 2.841255991370417e-06, "learning_rate": 7.365061189436772e-06, "loss": 0.0, "step": 36840 }, { "epoch": 0.26372289415300937, "grad_norm": 0.0, "learning_rate": 7.364345523509626e-06, "loss": 0.0001, "step": 36850 }, { "epoch": 0.2637944607457239, "grad_norm": 2.906706981775642e-07, "learning_rate": 7.363629857582482e-06, "loss": 0.0, "step": 36860 }, { "epoch": 0.2638660273384384, "grad_norm": 0.0, "learning_rate": 7.362914191655336e-06, "loss": 0.0, "step": 36870 }, { "epoch": 0.26393759393115296, "grad_norm": 0.0, "learning_rate": 7.36219852572819e-06, "loss": 0.0018, "step": 36880 }, { "epoch": 0.26400916052386747, "grad_norm": 0.0, "learning_rate": 7.361482859801045e-06, "loss": 0.0, "step": 36890 }, { "epoch": 0.264080727116582, "grad_norm": 0.0, "learning_rate": 7.3607671938739e-06, "loss": 0.0001, "step": 36900 }, { "epoch": 0.2641522937092965, "grad_norm": 0.0, "learning_rate": 7.360051527946755e-06, "loss": 0.0, "step": 36910 }, { "epoch": 0.264223860302011, "grad_norm": 0.0, "learning_rate": 7.3593358620196095e-06, "loss": 0.0002, "step": 36920 }, { "epoch": 0.2642954268947255, "grad_norm": 0.0, "learning_rate": 7.358620196092464e-06, "loss": 0.0, "step": 36930 }, { "epoch": 0.2643669934874401, "grad_norm": 0.04181355610489845, "learning_rate": 7.357904530165319e-06, "loss": 0.0774, "step": 36940 }, { "epoch": 0.2644385600801546, "grad_norm": 8.124695360933742e-10, "learning_rate": 7.357188864238174e-06, "loss": 0.0, "step": 36950 }, { "epoch": 0.2645101266728691, "grad_norm": 0.0, "learning_rate": 7.356473198311029e-06, "loss": 0.001, "step": 36960 }, { "epoch": 0.2645816932655836, "grad_norm": 0.0, "learning_rate": 7.3557575323838834e-06, "loss": 0.0, "step": 36970 }, { "epoch": 0.26465325985829813, "grad_norm": 8.157812203535286e-10, "learning_rate": 7.355041866456739e-06, "loss": 0.0, "step": 36980 }, { "epoch": 0.26472482645101264, "grad_norm": 0.0, "learning_rate": 7.354326200529593e-06, "loss": 0.018, "step": 36990 }, { "epoch": 0.2647963930437272, "grad_norm": 1.94154225141574e-07, "learning_rate": 7.353610534602448e-06, "loss": 0.0001, "step": 37000 }, { "epoch": 0.2648679596364417, "grad_norm": 0.0, "learning_rate": 7.352894868675303e-06, "loss": 0.0092, "step": 37010 }, { "epoch": 0.26493952622915623, "grad_norm": 0.0, "learning_rate": 7.352179202748157e-06, "loss": 0.0, "step": 37020 }, { "epoch": 0.26501109282187074, "grad_norm": 0.0, "learning_rate": 7.351463536821013e-06, "loss": 0.0, "step": 37030 }, { "epoch": 0.26508265941458525, "grad_norm": 0.0, "learning_rate": 7.350747870893867e-06, "loss": 0.0278, "step": 37040 }, { "epoch": 0.26515422600729976, "grad_norm": 0.0, "learning_rate": 7.3500322049667215e-06, "loss": 0.0042, "step": 37050 }, { "epoch": 0.26522579260001433, "grad_norm": 0.0, "learning_rate": 7.349316539039577e-06, "loss": 0.4664, "step": 37060 }, { "epoch": 0.26529735919272884, "grad_norm": 0.0, "learning_rate": 7.348600873112431e-06, "loss": 0.0007, "step": 37070 }, { "epoch": 0.26536892578544335, "grad_norm": 0.0008400972001254559, "learning_rate": 7.3478852071852866e-06, "loss": 0.0014, "step": 37080 }, { "epoch": 0.26544049237815787, "grad_norm": 0.0, "learning_rate": 7.347169541258141e-06, "loss": 0.0, "step": 37090 }, { "epoch": 0.2655120589708724, "grad_norm": 430.360595703125, "learning_rate": 7.346453875330996e-06, "loss": 0.7047, "step": 37100 }, { "epoch": 0.26558362556358694, "grad_norm": 4.610706980656687e-07, "learning_rate": 7.345738209403851e-06, "loss": 0.0, "step": 37110 }, { "epoch": 0.26565519215630146, "grad_norm": 0.0, "learning_rate": 7.345022543476705e-06, "loss": 0.0, "step": 37120 }, { "epoch": 0.26572675874901597, "grad_norm": 0.0, "learning_rate": 7.3443068775495605e-06, "loss": 0.0, "step": 37130 }, { "epoch": 0.2657983253417305, "grad_norm": 0.0, "learning_rate": 7.343591211622415e-06, "loss": 0.0, "step": 37140 }, { "epoch": 0.265869891934445, "grad_norm": 7.39387542125769e-05, "learning_rate": 7.34287554569527e-06, "loss": 0.0, "step": 37150 }, { "epoch": 0.2659414585271595, "grad_norm": 0.0, "learning_rate": 7.342159879768125e-06, "loss": 0.0, "step": 37160 }, { "epoch": 0.26601302511987407, "grad_norm": 0.0, "learning_rate": 7.341444213840979e-06, "loss": 0.0, "step": 37170 }, { "epoch": 0.2660845917125886, "grad_norm": 0.0, "learning_rate": 7.340728547913834e-06, "loss": 0.0, "step": 37180 }, { "epoch": 0.2661561583053031, "grad_norm": 0.0, "learning_rate": 7.340012881986689e-06, "loss": 0.0006, "step": 37190 }, { "epoch": 0.2662277248980176, "grad_norm": 0.0, "learning_rate": 7.339297216059544e-06, "loss": 0.0, "step": 37200 }, { "epoch": 0.2662992914907321, "grad_norm": 0.0, "learning_rate": 7.3385815501323986e-06, "loss": 0.0, "step": 37210 }, { "epoch": 0.2663708580834466, "grad_norm": 4.1013236113940366e-06, "learning_rate": 7.337865884205254e-06, "loss": 0.0106, "step": 37220 }, { "epoch": 0.2664424246761612, "grad_norm": 444.7977294921875, "learning_rate": 7.337150218278108e-06, "loss": 0.1418, "step": 37230 }, { "epoch": 0.2665139912688757, "grad_norm": 8.334412768817856e-07, "learning_rate": 7.336434552350963e-06, "loss": 0.0, "step": 37240 }, { "epoch": 0.2665855578615902, "grad_norm": 0.0, "learning_rate": 7.335718886423818e-06, "loss": 0.0, "step": 37250 }, { "epoch": 0.26665712445430473, "grad_norm": 49.06147003173828, "learning_rate": 7.3350032204966725e-06, "loss": 0.011, "step": 37260 }, { "epoch": 0.26672869104701924, "grad_norm": 0.0, "learning_rate": 7.334287554569528e-06, "loss": 0.0, "step": 37270 }, { "epoch": 0.26680025763973375, "grad_norm": 3.787429037060974e-08, "learning_rate": 7.333571888642382e-06, "loss": 0.0424, "step": 37280 }, { "epoch": 0.2668718242324483, "grad_norm": 0.0, "learning_rate": 7.332856222715237e-06, "loss": 0.4453, "step": 37290 }, { "epoch": 0.26694339082516283, "grad_norm": 0.0, "learning_rate": 7.332140556788092e-06, "loss": 0.0, "step": 37300 }, { "epoch": 0.26701495741787734, "grad_norm": 0.0, "learning_rate": 7.331424890860946e-06, "loss": 0.0, "step": 37310 }, { "epoch": 0.26708652401059185, "grad_norm": 7.766184717183933e-05, "learning_rate": 7.330709224933802e-06, "loss": 0.0004, "step": 37320 }, { "epoch": 0.26715809060330636, "grad_norm": 22.464237213134766, "learning_rate": 7.329993559006656e-06, "loss": 0.0042, "step": 37330 }, { "epoch": 0.2672296571960209, "grad_norm": 0.0, "learning_rate": 7.329277893079511e-06, "loss": 0.0, "step": 37340 }, { "epoch": 0.26730122378873544, "grad_norm": 0.0013029095716774464, "learning_rate": 7.328562227152366e-06, "loss": 0.0, "step": 37350 }, { "epoch": 0.26737279038144995, "grad_norm": 0.0, "learning_rate": 7.32784656122522e-06, "loss": 0.0, "step": 37360 }, { "epoch": 0.26744435697416447, "grad_norm": 0.0004231824423186481, "learning_rate": 7.327130895298076e-06, "loss": 0.0, "step": 37370 }, { "epoch": 0.267515923566879, "grad_norm": 0.0, "learning_rate": 7.32641522937093e-06, "loss": 0.0418, "step": 37380 }, { "epoch": 0.2675874901595935, "grad_norm": 3.6147637274552835e-06, "learning_rate": 7.325699563443785e-06, "loss": 0.0001, "step": 37390 }, { "epoch": 0.267659056752308, "grad_norm": 0.0, "learning_rate": 7.32498389751664e-06, "loss": 0.0, "step": 37400 }, { "epoch": 0.26773062334502257, "grad_norm": 0.004232832230627537, "learning_rate": 7.324268231589494e-06, "loss": 0.0, "step": 37410 }, { "epoch": 0.2678021899377371, "grad_norm": 8.632537173980381e-08, "learning_rate": 7.3235525656623495e-06, "loss": 0.0001, "step": 37420 }, { "epoch": 0.2678737565304516, "grad_norm": 0.0, "learning_rate": 7.322836899735204e-06, "loss": 0.0, "step": 37430 }, { "epoch": 0.2679453231231661, "grad_norm": 0.0, "learning_rate": 7.322121233808059e-06, "loss": 0.0, "step": 37440 }, { "epoch": 0.2680168897158806, "grad_norm": 0.0, "learning_rate": 7.321405567880914e-06, "loss": 0.6082, "step": 37450 }, { "epoch": 0.2680884563085951, "grad_norm": 0.0, "learning_rate": 7.320689901953768e-06, "loss": 0.0002, "step": 37460 }, { "epoch": 0.2681600229013097, "grad_norm": 0.0, "learning_rate": 7.3199742360266235e-06, "loss": 0.0001, "step": 37470 }, { "epoch": 0.2682315894940242, "grad_norm": 1.0252522386622331e-08, "learning_rate": 7.319258570099478e-06, "loss": 0.0, "step": 37480 }, { "epoch": 0.2683031560867387, "grad_norm": 0.0, "learning_rate": 7.318542904172333e-06, "loss": 0.0001, "step": 37490 }, { "epoch": 0.2683747226794532, "grad_norm": 0.0, "learning_rate": 7.317827238245188e-06, "loss": 0.0002, "step": 37500 }, { "epoch": 0.26844628927216774, "grad_norm": 0.0, "learning_rate": 7.317111572318043e-06, "loss": 0.0, "step": 37510 }, { "epoch": 0.26851785586488225, "grad_norm": 0.0, "learning_rate": 7.316395906390897e-06, "loss": 0.0, "step": 37520 }, { "epoch": 0.2685894224575968, "grad_norm": 0.0, "learning_rate": 7.315680240463752e-06, "loss": 0.0064, "step": 37530 }, { "epoch": 0.2686609890503113, "grad_norm": 0.0, "learning_rate": 7.314964574536607e-06, "loss": 0.0, "step": 37540 }, { "epoch": 0.26873255564302584, "grad_norm": 0.0, "learning_rate": 7.3142489086094615e-06, "loss": 0.0, "step": 37550 }, { "epoch": 0.26880412223574035, "grad_norm": 0.0, "learning_rate": 7.313533242682317e-06, "loss": 0.0, "step": 37560 }, { "epoch": 0.26887568882845486, "grad_norm": 0.0, "learning_rate": 7.312817576755171e-06, "loss": 0.0, "step": 37570 }, { "epoch": 0.2689472554211694, "grad_norm": 134.01953125, "learning_rate": 7.312101910828026e-06, "loss": 0.0407, "step": 37580 }, { "epoch": 0.26901882201388394, "grad_norm": 0.0, "learning_rate": 7.311386244900881e-06, "loss": 0.4889, "step": 37590 }, { "epoch": 0.26909038860659845, "grad_norm": 0.0, "learning_rate": 7.3106705789737355e-06, "loss": 0.0, "step": 37600 }, { "epoch": 0.26916195519931296, "grad_norm": 0.0, "learning_rate": 7.309954913046591e-06, "loss": 0.0, "step": 37610 }, { "epoch": 0.2692335217920275, "grad_norm": 0.0, "learning_rate": 7.309239247119445e-06, "loss": 0.0146, "step": 37620 }, { "epoch": 0.269305088384742, "grad_norm": 0.0, "learning_rate": 7.3085235811923005e-06, "loss": 0.0003, "step": 37630 }, { "epoch": 0.2693766549774565, "grad_norm": 0.0, "learning_rate": 7.307807915265155e-06, "loss": 0.353, "step": 37640 }, { "epoch": 0.26944822157017106, "grad_norm": 0.14690810441970825, "learning_rate": 7.307092249338009e-06, "loss": 0.0, "step": 37650 }, { "epoch": 0.2695197881628856, "grad_norm": 5.0777000382140614e-08, "learning_rate": 7.306376583410865e-06, "loss": 0.0001, "step": 37660 }, { "epoch": 0.2695913547556001, "grad_norm": 0.0002470456238370389, "learning_rate": 7.305660917483719e-06, "loss": 0.0, "step": 37670 }, { "epoch": 0.2696629213483146, "grad_norm": 0.0, "learning_rate": 7.304945251556574e-06, "loss": 0.0, "step": 37680 }, { "epoch": 0.2697344879410291, "grad_norm": 0.0, "learning_rate": 7.304229585629429e-06, "loss": 0.0663, "step": 37690 }, { "epoch": 0.2698060545337436, "grad_norm": 2.284083535641912e-07, "learning_rate": 7.303513919702283e-06, "loss": 0.0, "step": 37700 }, { "epoch": 0.2698776211264582, "grad_norm": 96.29242706298828, "learning_rate": 7.302798253775139e-06, "loss": 0.0158, "step": 37710 }, { "epoch": 0.2699491877191727, "grad_norm": 0.0017020259983837605, "learning_rate": 7.302082587847993e-06, "loss": 0.0, "step": 37720 }, { "epoch": 0.2700207543118872, "grad_norm": 0.0, "learning_rate": 7.301366921920848e-06, "loss": 0.0, "step": 37730 }, { "epoch": 0.2700923209046017, "grad_norm": 0.02440420724451542, "learning_rate": 7.300651255993703e-06, "loss": 0.0, "step": 37740 }, { "epoch": 0.27016388749731624, "grad_norm": 0.0, "learning_rate": 7.299935590066558e-06, "loss": 0.0, "step": 37750 }, { "epoch": 0.27023545409003075, "grad_norm": 0.0, "learning_rate": 7.2992199241394125e-06, "loss": 0.0002, "step": 37760 }, { "epoch": 0.2703070206827453, "grad_norm": 0.0, "learning_rate": 7.298504258212267e-06, "loss": 0.0, "step": 37770 }, { "epoch": 0.2703785872754598, "grad_norm": 0.0, "learning_rate": 7.297788592285122e-06, "loss": 0.0, "step": 37780 }, { "epoch": 0.27045015386817434, "grad_norm": 0.0, "learning_rate": 7.297072926357977e-06, "loss": 0.0006, "step": 37790 }, { "epoch": 0.27052172046088885, "grad_norm": 0.0, "learning_rate": 7.296357260430832e-06, "loss": 0.0, "step": 37800 }, { "epoch": 0.27059328705360336, "grad_norm": 0.0, "learning_rate": 7.2956415945036864e-06, "loss": 0.0, "step": 37810 }, { "epoch": 0.27066485364631787, "grad_norm": 0.0, "learning_rate": 7.294925928576541e-06, "loss": 0.0, "step": 37820 }, { "epoch": 0.27073642023903244, "grad_norm": 0.0052345264703035355, "learning_rate": 7.294210262649396e-06, "loss": 0.0, "step": 37830 }, { "epoch": 0.27080798683174695, "grad_norm": 0.0, "learning_rate": 7.293494596722251e-06, "loss": 0.0046, "step": 37840 }, { "epoch": 0.27087955342446146, "grad_norm": 0.0001002460703602992, "learning_rate": 7.292778930795106e-06, "loss": 0.0125, "step": 37850 }, { "epoch": 0.270951120017176, "grad_norm": 0.0, "learning_rate": 7.29206326486796e-06, "loss": 0.6113, "step": 37860 }, { "epoch": 0.2710226866098905, "grad_norm": 2.9283064577612095e-05, "learning_rate": 7.291347598940816e-06, "loss": 0.0, "step": 37870 }, { "epoch": 0.27109425320260505, "grad_norm": 0.0, "learning_rate": 7.29063193301367e-06, "loss": 0.0, "step": 37880 }, { "epoch": 0.27116581979531956, "grad_norm": 0.0, "learning_rate": 7.2899162670865245e-06, "loss": 0.0002, "step": 37890 }, { "epoch": 0.2712373863880341, "grad_norm": 0.0, "learning_rate": 7.28920060115938e-06, "loss": 0.0, "step": 37900 }, { "epoch": 0.2713089529807486, "grad_norm": 0.016081832349300385, "learning_rate": 7.288484935232234e-06, "loss": 0.0, "step": 37910 }, { "epoch": 0.2713805195734631, "grad_norm": 3.0206286361078583e-09, "learning_rate": 7.2877692693050896e-06, "loss": 0.0, "step": 37920 }, { "epoch": 0.2714520861661776, "grad_norm": 3.3640921174082905e-05, "learning_rate": 7.287053603377944e-06, "loss": 0.0, "step": 37930 }, { "epoch": 0.2715236527588922, "grad_norm": 0.0, "learning_rate": 7.2863379374507984e-06, "loss": 0.0, "step": 37940 }, { "epoch": 0.2715952193516067, "grad_norm": 6.0852251237975e-08, "learning_rate": 7.285622271523654e-06, "loss": 0.0002, "step": 37950 }, { "epoch": 0.2716667859443212, "grad_norm": 0.0, "learning_rate": 7.284906605596508e-06, "loss": 0.0, "step": 37960 }, { "epoch": 0.2717383525370357, "grad_norm": 9.975902326431196e-10, "learning_rate": 7.2841909396693635e-06, "loss": 0.0, "step": 37970 }, { "epoch": 0.2718099191297502, "grad_norm": 0.0, "learning_rate": 7.283475273742218e-06, "loss": 0.0, "step": 37980 }, { "epoch": 0.27188148572246473, "grad_norm": 0.0, "learning_rate": 7.282759607815073e-06, "loss": 0.0, "step": 37990 }, { "epoch": 0.2719530523151793, "grad_norm": 0.0, "learning_rate": 7.282043941887928e-06, "loss": 0.0, "step": 38000 }, { "epoch": 0.2720246189078938, "grad_norm": 0.046086229383945465, "learning_rate": 7.281328275960782e-06, "loss": 0.0, "step": 38010 }, { "epoch": 0.2720961855006083, "grad_norm": 0.0, "learning_rate": 7.280612610033637e-06, "loss": 0.4063, "step": 38020 }, { "epoch": 0.27216775209332283, "grad_norm": 0.0, "learning_rate": 7.279896944106492e-06, "loss": 0.0655, "step": 38030 }, { "epoch": 0.27223931868603735, "grad_norm": 0.0021752871107310057, "learning_rate": 7.279181278179347e-06, "loss": 0.0, "step": 38040 }, { "epoch": 0.27231088527875186, "grad_norm": 1.3093923598717083e-06, "learning_rate": 7.2784656122522016e-06, "loss": 0.0, "step": 38050 }, { "epoch": 0.2723824518714664, "grad_norm": 0.0, "learning_rate": 7.277749946325056e-06, "loss": 0.0031, "step": 38060 }, { "epoch": 0.27245401846418094, "grad_norm": 0.0, "learning_rate": 7.277034280397911e-06, "loss": 0.0004, "step": 38070 }, { "epoch": 0.27252558505689545, "grad_norm": 0.0, "learning_rate": 7.276318614470766e-06, "loss": 0.0, "step": 38080 }, { "epoch": 0.27259715164960996, "grad_norm": 0.0, "learning_rate": 7.275602948543621e-06, "loss": 0.0, "step": 38090 }, { "epoch": 0.27266871824232447, "grad_norm": 0.0, "learning_rate": 7.2748872826164755e-06, "loss": 0.0, "step": 38100 }, { "epoch": 0.272740284835039, "grad_norm": 0.411204993724823, "learning_rate": 7.274171616689329e-06, "loss": 0.0001, "step": 38110 }, { "epoch": 0.27281185142775355, "grad_norm": 0.0, "learning_rate": 7.273455950762185e-06, "loss": 0.0, "step": 38120 }, { "epoch": 0.27288341802046806, "grad_norm": 0.0, "learning_rate": 7.27274028483504e-06, "loss": 0.0, "step": 38130 }, { "epoch": 0.27295498461318257, "grad_norm": 0.0010986292036250234, "learning_rate": 7.272024618907895e-06, "loss": 0.0, "step": 38140 }, { "epoch": 0.2730265512058971, "grad_norm": 0.0, "learning_rate": 7.271308952980749e-06, "loss": 0.0, "step": 38150 }, { "epoch": 0.2730981177986116, "grad_norm": 0.0, "learning_rate": 7.270593287053605e-06, "loss": 0.0, "step": 38160 }, { "epoch": 0.2731696843913261, "grad_norm": 2.461754888827272e-07, "learning_rate": 7.269877621126459e-06, "loss": 0.0, "step": 38170 }, { "epoch": 0.2732412509840407, "grad_norm": 0.0, "learning_rate": 7.269161955199313e-06, "loss": 0.0006, "step": 38180 }, { "epoch": 0.2733128175767552, "grad_norm": 0.0, "learning_rate": 7.268446289272169e-06, "loss": 0.0, "step": 38190 }, { "epoch": 0.2733843841694697, "grad_norm": 0.0, "learning_rate": 7.2677306233450225e-06, "loss": 0.3243, "step": 38200 }, { "epoch": 0.2734559507621842, "grad_norm": 0.0, "learning_rate": 7.267014957417879e-06, "loss": 0.0, "step": 38210 }, { "epoch": 0.2735275173548987, "grad_norm": 0.0, "learning_rate": 7.266299291490733e-06, "loss": 0.0001, "step": 38220 }, { "epoch": 0.27359908394761323, "grad_norm": 0.0, "learning_rate": 7.265583625563587e-06, "loss": 0.0, "step": 38230 }, { "epoch": 0.2736706505403278, "grad_norm": 0.12153123319149017, "learning_rate": 7.264867959636443e-06, "loss": 0.0, "step": 38240 }, { "epoch": 0.2737422171330423, "grad_norm": 0.0, "learning_rate": 7.264152293709296e-06, "loss": 0.0, "step": 38250 }, { "epoch": 0.2738137837257568, "grad_norm": 4.550774757028364e-10, "learning_rate": 7.2634366277821525e-06, "loss": 0.0, "step": 38260 }, { "epoch": 0.27388535031847133, "grad_norm": 0.0, "learning_rate": 7.262720961855006e-06, "loss": 0.0011, "step": 38270 }, { "epoch": 0.27395691691118584, "grad_norm": 0.0023139517288655043, "learning_rate": 7.262005295927862e-06, "loss": 0.0, "step": 38280 }, { "epoch": 0.27402848350390036, "grad_norm": 0.0, "learning_rate": 7.261289630000717e-06, "loss": 0.0, "step": 38290 }, { "epoch": 0.2741000500966149, "grad_norm": 0.0, "learning_rate": 7.26057396407357e-06, "loss": 0.0, "step": 38300 }, { "epoch": 0.27417161668932943, "grad_norm": 0.0, "learning_rate": 7.2598582981464264e-06, "loss": 0.0006, "step": 38310 }, { "epoch": 0.27424318328204395, "grad_norm": 0.0, "learning_rate": 7.25914263221928e-06, "loss": 0.3805, "step": 38320 }, { "epoch": 0.27431474987475846, "grad_norm": 6.345260143280029, "learning_rate": 7.258426966292136e-06, "loss": 0.002, "step": 38330 }, { "epoch": 0.27438631646747297, "grad_norm": 4.018901600399971e-10, "learning_rate": 7.25771130036499e-06, "loss": 0.0, "step": 38340 }, { "epoch": 0.2744578830601875, "grad_norm": 0.0, "learning_rate": 7.256995634437844e-06, "loss": 0.0019, "step": 38350 }, { "epoch": 0.27452944965290205, "grad_norm": 1.5192805528640747, "learning_rate": 7.2562799685106995e-06, "loss": 0.0055, "step": 38360 }, { "epoch": 0.27460101624561656, "grad_norm": 1.8273507151889135e-09, "learning_rate": 7.255564302583554e-06, "loss": 0.0, "step": 38370 }, { "epoch": 0.27467258283833107, "grad_norm": 0.0, "learning_rate": 7.25484863665641e-06, "loss": 0.0, "step": 38380 }, { "epoch": 0.2747441494310456, "grad_norm": 6.223752535561289e-08, "learning_rate": 7.254132970729264e-06, "loss": 0.0001, "step": 38390 }, { "epoch": 0.2748157160237601, "grad_norm": 0.0, "learning_rate": 7.25341730480212e-06, "loss": 0.0, "step": 38400 }, { "epoch": 0.2748872826164746, "grad_norm": 0.006242507137358189, "learning_rate": 7.2527016388749734e-06, "loss": 0.0, "step": 38410 }, { "epoch": 0.27495884920918917, "grad_norm": 0.0, "learning_rate": 7.251985972947828e-06, "loss": 0.0088, "step": 38420 }, { "epoch": 0.2750304158019037, "grad_norm": 3.858786357113786e-08, "learning_rate": 7.251270307020683e-06, "loss": 0.0, "step": 38430 }, { "epoch": 0.2751019823946182, "grad_norm": 9.351067831175897e-08, "learning_rate": 7.250554641093538e-06, "loss": 0.0, "step": 38440 }, { "epoch": 0.2751735489873327, "grad_norm": 0.0, "learning_rate": 7.249838975166394e-06, "loss": 0.0, "step": 38450 }, { "epoch": 0.2752451155800472, "grad_norm": 0.0, "learning_rate": 7.249123309239247e-06, "loss": 0.0025, "step": 38460 }, { "epoch": 0.27531668217276173, "grad_norm": 0.0, "learning_rate": 7.248407643312102e-06, "loss": 0.006, "step": 38470 }, { "epoch": 0.2753882487654763, "grad_norm": 0.0, "learning_rate": 7.247691977384957e-06, "loss": 0.0, "step": 38480 }, { "epoch": 0.2754598153581908, "grad_norm": 0.0, "learning_rate": 7.2469763114578115e-06, "loss": 0.0, "step": 38490 }, { "epoch": 0.2755313819509053, "grad_norm": 0.0, "learning_rate": 7.246260645530667e-06, "loss": 0.0, "step": 38500 }, { "epoch": 0.27560294854361983, "grad_norm": 0.0001957918721018359, "learning_rate": 7.245544979603521e-06, "loss": 0.0016, "step": 38510 }, { "epoch": 0.27567451513633434, "grad_norm": 0.0, "learning_rate": 7.244829313676377e-06, "loss": 0.0, "step": 38520 }, { "epoch": 0.27574608172904885, "grad_norm": 2.2231629870361758e-09, "learning_rate": 7.244113647749231e-06, "loss": 0.0, "step": 38530 }, { "epoch": 0.2758176483217634, "grad_norm": 0.0, "learning_rate": 7.2433979818220854e-06, "loss": 0.0002, "step": 38540 }, { "epoch": 0.27588921491447793, "grad_norm": 0.023176860064268112, "learning_rate": 7.242682315894941e-06, "loss": 0.0, "step": 38550 }, { "epoch": 0.27596078150719244, "grad_norm": 0.0, "learning_rate": 7.241966649967795e-06, "loss": 0.0052, "step": 38560 }, { "epoch": 0.27603234809990695, "grad_norm": 0.0, "learning_rate": 7.2412509840406505e-06, "loss": 0.0, "step": 38570 }, { "epoch": 0.27610391469262147, "grad_norm": 0.0, "learning_rate": 7.240535318113505e-06, "loss": 0.0121, "step": 38580 }, { "epoch": 0.276175481285336, "grad_norm": 0.0, "learning_rate": 7.239819652186359e-06, "loss": 0.0, "step": 38590 }, { "epoch": 0.27624704787805054, "grad_norm": 0.00021749353618361056, "learning_rate": 7.239103986259215e-06, "loss": 0.0052, "step": 38600 }, { "epoch": 0.27631861447076506, "grad_norm": 0.0, "learning_rate": 7.238388320332069e-06, "loss": 0.0, "step": 38610 }, { "epoch": 0.27639018106347957, "grad_norm": 0.0, "learning_rate": 7.237672654404924e-06, "loss": 0.0, "step": 38620 }, { "epoch": 0.2764617476561941, "grad_norm": 0.0, "learning_rate": 7.236956988477779e-06, "loss": 0.0023, "step": 38630 }, { "epoch": 0.2765333142489086, "grad_norm": 9.599582284636199e-08, "learning_rate": 7.236241322550634e-06, "loss": 0.0013, "step": 38640 }, { "epoch": 0.27660488084162316, "grad_norm": 0.0, "learning_rate": 7.2355256566234886e-06, "loss": 0.0001, "step": 38650 }, { "epoch": 0.27667644743433767, "grad_norm": 0.0, "learning_rate": 7.234809990696343e-06, "loss": 0.0, "step": 38660 }, { "epoch": 0.2767480140270522, "grad_norm": 0.0, "learning_rate": 7.234094324769198e-06, "loss": 0.0, "step": 38670 }, { "epoch": 0.2768195806197667, "grad_norm": 0.0, "learning_rate": 7.233378658842053e-06, "loss": 0.353, "step": 38680 }, { "epoch": 0.2768911472124812, "grad_norm": 0.0, "learning_rate": 7.232662992914908e-06, "loss": 0.0, "step": 38690 }, { "epoch": 0.2769627138051957, "grad_norm": 2.862911787815392e-05, "learning_rate": 7.2319473269877625e-06, "loss": 0.0007, "step": 38700 }, { "epoch": 0.2770342803979103, "grad_norm": 0.0, "learning_rate": 7.231231661060617e-06, "loss": 0.0, "step": 38710 }, { "epoch": 0.2771058469906248, "grad_norm": 0.0007398281013593078, "learning_rate": 7.230515995133472e-06, "loss": 0.0001, "step": 38720 }, { "epoch": 0.2771774135833393, "grad_norm": 0.0, "learning_rate": 7.229800329206327e-06, "loss": 0.0, "step": 38730 }, { "epoch": 0.2772489801760538, "grad_norm": 0.0, "learning_rate": 7.229084663279182e-06, "loss": 0.165, "step": 38740 }, { "epoch": 0.27732054676876833, "grad_norm": 0.0, "learning_rate": 7.228368997352036e-06, "loss": 0.0, "step": 38750 }, { "epoch": 0.27739211336148284, "grad_norm": 0.0002896845107898116, "learning_rate": 7.227653331424892e-06, "loss": 0.0, "step": 38760 }, { "epoch": 0.2774636799541974, "grad_norm": 0.4894784688949585, "learning_rate": 7.226937665497746e-06, "loss": 0.0007, "step": 38770 }, { "epoch": 0.2775352465469119, "grad_norm": 0.0, "learning_rate": 7.226221999570601e-06, "loss": 0.0, "step": 38780 }, { "epoch": 0.27760681313962643, "grad_norm": 1.5952691683196463e-06, "learning_rate": 7.225506333643456e-06, "loss": 0.1245, "step": 38790 }, { "epoch": 0.27767837973234094, "grad_norm": 4.4746670258000165e-10, "learning_rate": 7.22479066771631e-06, "loss": 0.0, "step": 38800 }, { "epoch": 0.27774994632505545, "grad_norm": 0.0011674963170662522, "learning_rate": 7.224075001789166e-06, "loss": 0.0, "step": 38810 }, { "epoch": 0.27782151291776996, "grad_norm": 0.0003465099725872278, "learning_rate": 7.22335933586202e-06, "loss": 0.0, "step": 38820 }, { "epoch": 0.27789307951048453, "grad_norm": 5.517922401428223, "learning_rate": 7.2226436699348745e-06, "loss": 0.0022, "step": 38830 }, { "epoch": 0.27796464610319904, "grad_norm": 0.0, "learning_rate": 7.22192800400773e-06, "loss": 0.0, "step": 38840 }, { "epoch": 0.27803621269591355, "grad_norm": 0.005087944678962231, "learning_rate": 7.221212338080584e-06, "loss": 0.0, "step": 38850 }, { "epoch": 0.27810777928862807, "grad_norm": 0.0063378759659826756, "learning_rate": 7.2204966721534395e-06, "loss": 0.0, "step": 38860 }, { "epoch": 0.2781793458813426, "grad_norm": 1.9480014088912867e-05, "learning_rate": 7.219781006226294e-06, "loss": 0.0, "step": 38870 }, { "epoch": 0.2782509124740571, "grad_norm": 0.0, "learning_rate": 7.2190653402991484e-06, "loss": 0.0, "step": 38880 }, { "epoch": 0.27832247906677166, "grad_norm": 0.0, "learning_rate": 7.218349674372004e-06, "loss": 0.0, "step": 38890 }, { "epoch": 0.27839404565948617, "grad_norm": 5.106241587782279e-06, "learning_rate": 7.217634008444858e-06, "loss": 0.0, "step": 38900 }, { "epoch": 0.2784656122522007, "grad_norm": 4.309170242855487e-10, "learning_rate": 7.2169183425177135e-06, "loss": 0.0002, "step": 38910 }, { "epoch": 0.2785371788449152, "grad_norm": 0.0, "learning_rate": 7.216202676590568e-06, "loss": 0.0, "step": 38920 }, { "epoch": 0.2786087454376297, "grad_norm": 0.0, "learning_rate": 7.215487010663423e-06, "loss": 0.0, "step": 38930 }, { "epoch": 0.2786803120303442, "grad_norm": 0.0, "learning_rate": 7.214771344736278e-06, "loss": 0.0102, "step": 38940 }, { "epoch": 0.2787518786230588, "grad_norm": 0.0, "learning_rate": 7.214055678809132e-06, "loss": 0.0003, "step": 38950 }, { "epoch": 0.2788234452157733, "grad_norm": 0.0, "learning_rate": 7.213340012881987e-06, "loss": 0.0019, "step": 38960 }, { "epoch": 0.2788950118084878, "grad_norm": 0.06026674434542656, "learning_rate": 7.212624346954842e-06, "loss": 0.0009, "step": 38970 }, { "epoch": 0.2789665784012023, "grad_norm": 0.0, "learning_rate": 7.211908681027697e-06, "loss": 0.0, "step": 38980 }, { "epoch": 0.2790381449939168, "grad_norm": 8.499247972082458e-10, "learning_rate": 7.2111930151005516e-06, "loss": 0.0, "step": 38990 }, { "epoch": 0.27910971158663134, "grad_norm": 0.010343912057578564, "learning_rate": 7.210477349173406e-06, "loss": 0.0, "step": 39000 }, { "epoch": 0.2791812781793459, "grad_norm": 3.0565868655685335e-05, "learning_rate": 7.209761683246261e-06, "loss": 0.0, "step": 39010 }, { "epoch": 0.2792528447720604, "grad_norm": 8.825175590310153e-10, "learning_rate": 7.209046017319116e-06, "loss": 0.4605, "step": 39020 }, { "epoch": 0.2793244113647749, "grad_norm": 1.570877194404602, "learning_rate": 7.208330351391971e-06, "loss": 0.0006, "step": 39030 }, { "epoch": 0.27939597795748944, "grad_norm": 0.0, "learning_rate": 7.2076146854648255e-06, "loss": 0.0, "step": 39040 }, { "epoch": 0.27946754455020395, "grad_norm": 0.00403223279863596, "learning_rate": 7.206899019537681e-06, "loss": 0.0004, "step": 39050 }, { "epoch": 0.27953911114291846, "grad_norm": 0.0, "learning_rate": 7.206183353610535e-06, "loss": 0.0008, "step": 39060 }, { "epoch": 0.27961067773563303, "grad_norm": 7.348970143539191e-08, "learning_rate": 7.20546768768339e-06, "loss": 0.0001, "step": 39070 }, { "epoch": 0.27968224432834754, "grad_norm": 0.0, "learning_rate": 7.204752021756245e-06, "loss": 0.0, "step": 39080 }, { "epoch": 0.27975381092106205, "grad_norm": 0.0, "learning_rate": 7.204036355829099e-06, "loss": 0.0, "step": 39090 }, { "epoch": 0.27982537751377656, "grad_norm": 0.0, "learning_rate": 7.203320689901955e-06, "loss": 0.0, "step": 39100 }, { "epoch": 0.2798969441064911, "grad_norm": 0.0, "learning_rate": 7.202605023974809e-06, "loss": 0.0001, "step": 39110 }, { "epoch": 0.2799685106992056, "grad_norm": 1.9609978085810553e-09, "learning_rate": 7.2018893580476636e-06, "loss": 0.0, "step": 39120 }, { "epoch": 0.28004007729192015, "grad_norm": 0.038008734583854675, "learning_rate": 7.201173692120519e-06, "loss": 0.0094, "step": 39130 }, { "epoch": 0.28011164388463466, "grad_norm": 0.0, "learning_rate": 7.200458026193373e-06, "loss": 0.0025, "step": 39140 }, { "epoch": 0.2801832104773492, "grad_norm": 0.0005902511184103787, "learning_rate": 7.199742360266229e-06, "loss": 0.0, "step": 39150 }, { "epoch": 0.2802547770700637, "grad_norm": 0.0, "learning_rate": 7.199026694339083e-06, "loss": 0.0004, "step": 39160 }, { "epoch": 0.2803263436627782, "grad_norm": 0.00856179092079401, "learning_rate": 7.198311028411938e-06, "loss": 0.0, "step": 39170 }, { "epoch": 0.2803979102554927, "grad_norm": 4.56391444458859e-06, "learning_rate": 7.197595362484793e-06, "loss": 0.0, "step": 39180 }, { "epoch": 0.2804694768482073, "grad_norm": 0.0, "learning_rate": 7.196879696557647e-06, "loss": 0.0009, "step": 39190 }, { "epoch": 0.2805410434409218, "grad_norm": 0.0, "learning_rate": 7.1961640306305025e-06, "loss": 0.0073, "step": 39200 }, { "epoch": 0.2806126100336363, "grad_norm": 0.0, "learning_rate": 7.195448364703357e-06, "loss": 0.0001, "step": 39210 }, { "epoch": 0.2806841766263508, "grad_norm": 0.0, "learning_rate": 7.194732698776212e-06, "loss": 0.0, "step": 39220 }, { "epoch": 0.2807557432190653, "grad_norm": 0.14815428853034973, "learning_rate": 7.194017032849067e-06, "loss": 0.0, "step": 39230 }, { "epoch": 0.28082730981177983, "grad_norm": 0.0, "learning_rate": 7.193301366921921e-06, "loss": 0.0002, "step": 39240 }, { "epoch": 0.2808988764044944, "grad_norm": 0.0, "learning_rate": 7.1925857009947764e-06, "loss": 0.0, "step": 39250 }, { "epoch": 0.2809704429972089, "grad_norm": 0.0, "learning_rate": 7.191870035067631e-06, "loss": 0.0033, "step": 39260 }, { "epoch": 0.2810420095899234, "grad_norm": 0.34387731552124023, "learning_rate": 7.191154369140486e-06, "loss": 0.0001, "step": 39270 }, { "epoch": 0.28111357618263794, "grad_norm": 0.0, "learning_rate": 7.190438703213341e-06, "loss": 0.0, "step": 39280 }, { "epoch": 0.28118514277535245, "grad_norm": 0.0, "learning_rate": 7.189723037286196e-06, "loss": 0.0018, "step": 39290 }, { "epoch": 0.28125670936806696, "grad_norm": 0.0, "learning_rate": 7.18900737135905e-06, "loss": 0.0, "step": 39300 }, { "epoch": 0.2813282759607815, "grad_norm": 0.0, "learning_rate": 7.188291705431905e-06, "loss": 0.0, "step": 39310 }, { "epoch": 0.28139984255349604, "grad_norm": 0.0, "learning_rate": 7.18757603950476e-06, "loss": 0.0, "step": 39320 }, { "epoch": 0.28147140914621055, "grad_norm": 36.3200798034668, "learning_rate": 7.1868603735776145e-06, "loss": 0.0037, "step": 39330 }, { "epoch": 0.28154297573892506, "grad_norm": 0.0, "learning_rate": 7.18614470765047e-06, "loss": 0.0, "step": 39340 }, { "epoch": 0.2816145423316396, "grad_norm": 0.0, "learning_rate": 7.185429041723324e-06, "loss": 0.0022, "step": 39350 }, { "epoch": 0.2816861089243541, "grad_norm": 0.0, "learning_rate": 7.184713375796179e-06, "loss": 0.0, "step": 39360 }, { "epoch": 0.28175767551706865, "grad_norm": 4.563980582350524e-10, "learning_rate": 7.183997709869034e-06, "loss": 0.0, "step": 39370 }, { "epoch": 0.28182924210978316, "grad_norm": 0.0, "learning_rate": 7.1832820439418884e-06, "loss": 0.0, "step": 39380 }, { "epoch": 0.2819008087024977, "grad_norm": 0.0, "learning_rate": 7.182566378014744e-06, "loss": 0.0, "step": 39390 }, { "epoch": 0.2819723752952122, "grad_norm": 7.483745845249246e-10, "learning_rate": 7.181850712087598e-06, "loss": 0.0125, "step": 39400 }, { "epoch": 0.2820439418879267, "grad_norm": 0.0, "learning_rate": 7.1811350461604535e-06, "loss": 0.0001, "step": 39410 }, { "epoch": 0.28211550848064126, "grad_norm": 0.0, "learning_rate": 7.180419380233308e-06, "loss": 0.0002, "step": 39420 }, { "epoch": 0.2821870750733558, "grad_norm": 0.0, "learning_rate": 7.179703714306162e-06, "loss": 0.0, "step": 39430 }, { "epoch": 0.2822586416660703, "grad_norm": 0.0, "learning_rate": 7.178988048379018e-06, "loss": 0.0, "step": 39440 }, { "epoch": 0.2823302082587848, "grad_norm": 0.00012573179265018553, "learning_rate": 7.178272382451872e-06, "loss": 0.0, "step": 39450 }, { "epoch": 0.2824017748514993, "grad_norm": 0.0, "learning_rate": 7.177556716524727e-06, "loss": 0.0, "step": 39460 }, { "epoch": 0.2824733414442138, "grad_norm": 0.0, "learning_rate": 7.176841050597582e-06, "loss": 0.0, "step": 39470 }, { "epoch": 0.2825449080369284, "grad_norm": 0.0, "learning_rate": 7.176125384670436e-06, "loss": 0.0, "step": 39480 }, { "epoch": 0.2826164746296429, "grad_norm": 0.0, "learning_rate": 7.1754097187432916e-06, "loss": 0.0, "step": 39490 }, { "epoch": 0.2826880412223574, "grad_norm": 212.94924926757812, "learning_rate": 7.174694052816146e-06, "loss": 0.6887, "step": 39500 }, { "epoch": 0.2827596078150719, "grad_norm": 0.0, "learning_rate": 7.173978386889001e-06, "loss": 0.0, "step": 39510 }, { "epoch": 0.28283117440778643, "grad_norm": 0.0, "learning_rate": 7.173262720961856e-06, "loss": 0.0, "step": 39520 }, { "epoch": 0.28290274100050095, "grad_norm": 0.0, "learning_rate": 7.172547055034711e-06, "loss": 0.0, "step": 39530 }, { "epoch": 0.2829743075932155, "grad_norm": 0.0, "learning_rate": 7.1718313891075655e-06, "loss": 0.0, "step": 39540 }, { "epoch": 0.28304587418593, "grad_norm": 0.0, "learning_rate": 7.17111572318042e-06, "loss": 0.0, "step": 39550 }, { "epoch": 0.28311744077864454, "grad_norm": 0.00025209420709870756, "learning_rate": 7.170400057253275e-06, "loss": 0.0, "step": 39560 }, { "epoch": 0.28318900737135905, "grad_norm": 0.00011538553371792659, "learning_rate": 7.16968439132613e-06, "loss": 0.0, "step": 39570 }, { "epoch": 0.28326057396407356, "grad_norm": 0.0, "learning_rate": 7.168968725398985e-06, "loss": 0.0, "step": 39580 }, { "epoch": 0.28333214055678807, "grad_norm": 0.0, "learning_rate": 7.168253059471839e-06, "loss": 0.0, "step": 39590 }, { "epoch": 0.28340370714950264, "grad_norm": 0.0, "learning_rate": 7.167537393544694e-06, "loss": 0.0, "step": 39600 }, { "epoch": 0.28347527374221715, "grad_norm": 0.0, "learning_rate": 7.166821727617549e-06, "loss": 0.0281, "step": 39610 }, { "epoch": 0.28354684033493166, "grad_norm": 0.0, "learning_rate": 7.166106061690404e-06, "loss": 0.0, "step": 39620 }, { "epoch": 0.28361840692764617, "grad_norm": 0.0, "learning_rate": 7.165390395763259e-06, "loss": 0.0, "step": 39630 }, { "epoch": 0.2836899735203607, "grad_norm": 4.7550554275512695, "learning_rate": 7.164674729836113e-06, "loss": 0.001, "step": 39640 }, { "epoch": 0.2837615401130752, "grad_norm": 0.0, "learning_rate": 7.163959063908967e-06, "loss": 0.0, "step": 39650 }, { "epoch": 0.28383310670578976, "grad_norm": 0.013711754232645035, "learning_rate": 7.163243397981823e-06, "loss": 0.0232, "step": 39660 }, { "epoch": 0.2839046732985043, "grad_norm": 0.0, "learning_rate": 7.1625277320546775e-06, "loss": 0.0002, "step": 39670 }, { "epoch": 0.2839762398912188, "grad_norm": 0.0, "learning_rate": 7.161812066127533e-06, "loss": 0.0, "step": 39680 }, { "epoch": 0.2840478064839333, "grad_norm": 2.701285382045171e-07, "learning_rate": 7.161096400200387e-06, "loss": 0.0084, "step": 39690 }, { "epoch": 0.2841193730766478, "grad_norm": 0.0, "learning_rate": 7.1603807342732425e-06, "loss": 0.0, "step": 39700 }, { "epoch": 0.2841909396693623, "grad_norm": 1.564362605677161e-06, "learning_rate": 7.159665068346097e-06, "loss": 0.0, "step": 39710 }, { "epoch": 0.2842625062620769, "grad_norm": 0.0, "learning_rate": 7.1589494024189506e-06, "loss": 0.0, "step": 39720 }, { "epoch": 0.2843340728547914, "grad_norm": 0.0, "learning_rate": 7.158233736491807e-06, "loss": 0.0156, "step": 39730 }, { "epoch": 0.2844056394475059, "grad_norm": 0.0, "learning_rate": 7.157518070564661e-06, "loss": 0.0, "step": 39740 }, { "epoch": 0.2844772060402204, "grad_norm": 0.0, "learning_rate": 7.1568024046375165e-06, "loss": 0.0252, "step": 39750 }, { "epoch": 0.28454877263293493, "grad_norm": 4.594153391046518e-10, "learning_rate": 7.156086738710371e-06, "loss": 0.0, "step": 39760 }, { "epoch": 0.28462033922564944, "grad_norm": 1.1243418157391716e-05, "learning_rate": 7.1553710727832245e-06, "loss": 0.0, "step": 39770 }, { "epoch": 0.284691905818364, "grad_norm": 4.2286538159963527e-10, "learning_rate": 7.154655406856081e-06, "loss": 0.0, "step": 39780 }, { "epoch": 0.2847634724110785, "grad_norm": 78.16729736328125, "learning_rate": 7.153939740928934e-06, "loss": 0.0185, "step": 39790 }, { "epoch": 0.28483503900379303, "grad_norm": 0.0009932841639965773, "learning_rate": 7.15322407500179e-06, "loss": 0.0, "step": 39800 }, { "epoch": 0.28490660559650755, "grad_norm": 3.22381993100862e-07, "learning_rate": 7.152508409074644e-06, "loss": 0.0054, "step": 39810 }, { "epoch": 0.28497817218922206, "grad_norm": 8.377970539541479e-10, "learning_rate": 7.1517927431475e-06, "loss": 0.0, "step": 39820 }, { "epoch": 0.28504973878193657, "grad_norm": 8.58723037122644e-10, "learning_rate": 7.1510770772203545e-06, "loss": 0.0, "step": 39830 }, { "epoch": 0.28512130537465114, "grad_norm": 3.086747142333479e-07, "learning_rate": 7.150361411293208e-06, "loss": 0.0, "step": 39840 }, { "epoch": 0.28519287196736565, "grad_norm": 0.0, "learning_rate": 7.149645745366064e-06, "loss": 0.0005, "step": 39850 }, { "epoch": 0.28526443856008016, "grad_norm": 0.0, "learning_rate": 7.148930079438918e-06, "loss": 0.0, "step": 39860 }, { "epoch": 0.28533600515279467, "grad_norm": 0.0, "learning_rate": 7.148214413511774e-06, "loss": 0.0, "step": 39870 }, { "epoch": 0.2854075717455092, "grad_norm": 0.0, "learning_rate": 7.147498747584628e-06, "loss": 0.0, "step": 39880 }, { "epoch": 0.2854791383382237, "grad_norm": 8.702404556970578e-06, "learning_rate": 7.146783081657482e-06, "loss": 0.0, "step": 39890 }, { "epoch": 0.28555070493093826, "grad_norm": 0.0, "learning_rate": 7.146067415730338e-06, "loss": 0.0, "step": 39900 }, { "epoch": 0.28562227152365277, "grad_norm": 0.0, "learning_rate": 7.145351749803192e-06, "loss": 0.0024, "step": 39910 }, { "epoch": 0.2856938381163673, "grad_norm": 0.0, "learning_rate": 7.144636083876048e-06, "loss": 0.1335, "step": 39920 }, { "epoch": 0.2857654047090818, "grad_norm": 0.0, "learning_rate": 7.1439204179489015e-06, "loss": 0.0, "step": 39930 }, { "epoch": 0.2858369713017963, "grad_norm": 0.0, "learning_rate": 7.143204752021758e-06, "loss": 0.0003, "step": 39940 }, { "epoch": 0.2859085378945108, "grad_norm": 0.0, "learning_rate": 7.142489086094611e-06, "loss": 0.0, "step": 39950 }, { "epoch": 0.2859801044872254, "grad_norm": 0.0, "learning_rate": 7.141773420167466e-06, "loss": 0.0, "step": 39960 }, { "epoch": 0.2860516710799399, "grad_norm": 1.6722715434625002e-09, "learning_rate": 7.141057754240322e-06, "loss": 0.0, "step": 39970 }, { "epoch": 0.2861232376726544, "grad_norm": 0.0, "learning_rate": 7.1403420883131755e-06, "loss": 0.0, "step": 39980 }, { "epoch": 0.2861948042653689, "grad_norm": 0.0, "learning_rate": 7.139626422386032e-06, "loss": 0.0, "step": 39990 }, { "epoch": 0.28626637085808343, "grad_norm": 0.0, "learning_rate": 7.138910756458885e-06, "loss": 0.0153, "step": 40000 }, { "epoch": 0.28633793745079794, "grad_norm": 0.0, "learning_rate": 7.13819509053174e-06, "loss": 0.0, "step": 40010 }, { "epoch": 0.2864095040435125, "grad_norm": 0.0, "learning_rate": 7.137479424604595e-06, "loss": 0.0, "step": 40020 }, { "epoch": 0.286481070636227, "grad_norm": 0.0, "learning_rate": 7.136763758677449e-06, "loss": 0.0, "step": 40030 }, { "epoch": 0.28655263722894153, "grad_norm": 0.0, "learning_rate": 7.136048092750305e-06, "loss": 0.0, "step": 40040 }, { "epoch": 0.28662420382165604, "grad_norm": 0.0, "learning_rate": 7.135332426823159e-06, "loss": 0.6027, "step": 40050 }, { "epoch": 0.28669577041437055, "grad_norm": 0.0, "learning_rate": 7.134616760896015e-06, "loss": 0.2105, "step": 40060 }, { "epoch": 0.28676733700708507, "grad_norm": 0.0, "learning_rate": 7.133901094968869e-06, "loss": 0.0, "step": 40070 }, { "epoch": 0.28683890359979963, "grad_norm": 0.0, "learning_rate": 7.133185429041723e-06, "loss": 0.0001, "step": 40080 }, { "epoch": 0.28691047019251414, "grad_norm": 0.03768864646553993, "learning_rate": 7.132469763114579e-06, "loss": 0.0011, "step": 40090 }, { "epoch": 0.28698203678522866, "grad_norm": 0.0, "learning_rate": 7.131754097187433e-06, "loss": 0.0, "step": 40100 }, { "epoch": 0.28705360337794317, "grad_norm": 0.0, "learning_rate": 7.131038431260288e-06, "loss": 0.0, "step": 40110 }, { "epoch": 0.2871251699706577, "grad_norm": 2.1397897342900762e-10, "learning_rate": 7.130322765333143e-06, "loss": 0.0, "step": 40120 }, { "epoch": 0.2871967365633722, "grad_norm": 0.0, "learning_rate": 7.129607099405997e-06, "loss": 0.0, "step": 40130 }, { "epoch": 0.28726830315608676, "grad_norm": 3.944300260627642e-05, "learning_rate": 7.1288914334788525e-06, "loss": 0.0, "step": 40140 }, { "epoch": 0.28733986974880127, "grad_norm": 0.0, "learning_rate": 7.128247334144422e-06, "loss": 0.2064, "step": 40150 }, { "epoch": 0.2874114363415158, "grad_norm": 0.0, "learning_rate": 7.127531668217277e-06, "loss": 0.0, "step": 40160 }, { "epoch": 0.2874830029342303, "grad_norm": 0.0, "learning_rate": 7.126816002290131e-06, "loss": 0.0, "step": 40170 }, { "epoch": 0.2875545695269448, "grad_norm": 0.0, "learning_rate": 7.1261003363629864e-06, "loss": 0.0, "step": 40180 }, { "epoch": 0.28762613611965937, "grad_norm": 0.0, "learning_rate": 7.125384670435841e-06, "loss": 0.0, "step": 40190 }, { "epoch": 0.2876977027123739, "grad_norm": 0.0, "learning_rate": 7.124669004508696e-06, "loss": 0.0, "step": 40200 }, { "epoch": 0.2877692693050884, "grad_norm": 0.0, "learning_rate": 7.123953338581551e-06, "loss": 0.0, "step": 40210 }, { "epoch": 0.2878408358978029, "grad_norm": 0.0, "learning_rate": 7.123237672654405e-06, "loss": 0.0445, "step": 40220 }, { "epoch": 0.2879124024905174, "grad_norm": 0.0, "learning_rate": 7.12252200672726e-06, "loss": 0.0011, "step": 40230 }, { "epoch": 0.2879839690832319, "grad_norm": 0.0, "learning_rate": 7.121806340800115e-06, "loss": 0.0, "step": 40240 }, { "epoch": 0.2880555356759465, "grad_norm": 0.0, "learning_rate": 7.12109067487297e-06, "loss": 0.0, "step": 40250 }, { "epoch": 0.288127102268661, "grad_norm": 0.04138386994600296, "learning_rate": 7.1203750089458245e-06, "loss": 0.0, "step": 40260 }, { "epoch": 0.2881986688613755, "grad_norm": 0.0, "learning_rate": 7.11965934301868e-06, "loss": 0.0, "step": 40270 }, { "epoch": 0.28827023545409003, "grad_norm": 0.0, "learning_rate": 7.118943677091534e-06, "loss": 0.0, "step": 40280 }, { "epoch": 0.28834180204680454, "grad_norm": 0.0013392925029620528, "learning_rate": 7.118228011164389e-06, "loss": 0.0248, "step": 40290 }, { "epoch": 0.28841336863951905, "grad_norm": 0.10537329316139221, "learning_rate": 7.117512345237244e-06, "loss": 0.0063, "step": 40300 }, { "epoch": 0.2884849352322336, "grad_norm": 0.00011387152335373685, "learning_rate": 7.1167966793100985e-06, "loss": 0.0, "step": 40310 }, { "epoch": 0.28855650182494813, "grad_norm": 0.0, "learning_rate": 7.116081013382954e-06, "loss": 0.0, "step": 40320 }, { "epoch": 0.28862806841766264, "grad_norm": 0.000360243022441864, "learning_rate": 7.115365347455808e-06, "loss": 0.0, "step": 40330 }, { "epoch": 0.28869963501037715, "grad_norm": 4.520174234912133e-10, "learning_rate": 7.114649681528663e-06, "loss": 0.0, "step": 40340 }, { "epoch": 0.28877120160309167, "grad_norm": 8.180622100830078, "learning_rate": 7.113934015601518e-06, "loss": 0.0021, "step": 40350 }, { "epoch": 0.2888427681958062, "grad_norm": 0.0, "learning_rate": 7.113218349674372e-06, "loss": 0.0, "step": 40360 }, { "epoch": 0.28891433478852074, "grad_norm": 0.0, "learning_rate": 7.112502683747228e-06, "loss": 0.0001, "step": 40370 }, { "epoch": 0.28898590138123526, "grad_norm": 0.00221831351518631, "learning_rate": 7.111787017820082e-06, "loss": 0.0783, "step": 40380 }, { "epoch": 0.28905746797394977, "grad_norm": 0.0, "learning_rate": 7.111071351892937e-06, "loss": 0.3852, "step": 40390 }, { "epoch": 0.2891290345666643, "grad_norm": 0.0, "learning_rate": 7.110355685965792e-06, "loss": 0.0326, "step": 40400 }, { "epoch": 0.2892006011593788, "grad_norm": 0.0, "learning_rate": 7.109640020038646e-06, "loss": 0.0, "step": 40410 }, { "epoch": 0.2892721677520933, "grad_norm": 0.0, "learning_rate": 7.108924354111502e-06, "loss": 0.0, "step": 40420 }, { "epoch": 0.28934373434480787, "grad_norm": 4.701204306911677e-05, "learning_rate": 7.108208688184356e-06, "loss": 0.0003, "step": 40430 }, { "epoch": 0.2894153009375224, "grad_norm": 0.0, "learning_rate": 7.107493022257211e-06, "loss": 0.0, "step": 40440 }, { "epoch": 0.2894868675302369, "grad_norm": 0.0, "learning_rate": 7.106777356330066e-06, "loss": 0.0, "step": 40450 }, { "epoch": 0.2895584341229514, "grad_norm": 9.628816632911708e-10, "learning_rate": 7.10606169040292e-06, "loss": 0.0, "step": 40460 }, { "epoch": 0.2896300007156659, "grad_norm": 0.026274356991052628, "learning_rate": 7.1053460244757755e-06, "loss": 0.0, "step": 40470 }, { "epoch": 0.2897015673083804, "grad_norm": 2.193805954675554e-07, "learning_rate": 7.10463035854863e-06, "loss": 0.0, "step": 40480 }, { "epoch": 0.289773133901095, "grad_norm": 0.0, "learning_rate": 7.103914692621485e-06, "loss": 0.0, "step": 40490 }, { "epoch": 0.2898447004938095, "grad_norm": 0.0, "learning_rate": 7.10319902669434e-06, "loss": 0.0, "step": 40500 }, { "epoch": 0.289916267086524, "grad_norm": 0.0, "learning_rate": 7.102483360767194e-06, "loss": 0.0536, "step": 40510 }, { "epoch": 0.2899878336792385, "grad_norm": 0.0, "learning_rate": 7.101767694840049e-06, "loss": 0.0, "step": 40520 }, { "epoch": 0.29005940027195304, "grad_norm": 0.0, "learning_rate": 7.101052028912904e-06, "loss": 0.0, "step": 40530 }, { "epoch": 0.29013096686466755, "grad_norm": 0.00012819051335100085, "learning_rate": 7.100336362985759e-06, "loss": 0.0, "step": 40540 }, { "epoch": 0.2902025334573821, "grad_norm": 0.0, "learning_rate": 7.099620697058614e-06, "loss": 0.0, "step": 40550 }, { "epoch": 0.29027410005009663, "grad_norm": 0.0, "learning_rate": 7.098905031131469e-06, "loss": 0.0151, "step": 40560 }, { "epoch": 0.29034566664281114, "grad_norm": 0.0, "learning_rate": 7.098189365204323e-06, "loss": 0.0, "step": 40570 }, { "epoch": 0.29041723323552565, "grad_norm": 0.0, "learning_rate": 7.097473699277178e-06, "loss": 0.0, "step": 40580 }, { "epoch": 0.29048879982824016, "grad_norm": 0.0010914659360423684, "learning_rate": 7.096758033350033e-06, "loss": 0.0003, "step": 40590 }, { "epoch": 0.2905603664209547, "grad_norm": 0.0, "learning_rate": 7.0960423674228875e-06, "loss": 0.0, "step": 40600 }, { "epoch": 0.29063193301366924, "grad_norm": 8.828404673977275e-10, "learning_rate": 7.095326701495743e-06, "loss": 0.0, "step": 40610 }, { "epoch": 0.29070349960638375, "grad_norm": 0.0, "learning_rate": 7.094611035568597e-06, "loss": 0.0006, "step": 40620 }, { "epoch": 0.29077506619909826, "grad_norm": 55.83233642578125, "learning_rate": 7.093895369641452e-06, "loss": 0.0073, "step": 40630 }, { "epoch": 0.2908466327918128, "grad_norm": 0.15504154562950134, "learning_rate": 7.093179703714307e-06, "loss": 0.0, "step": 40640 }, { "epoch": 0.2909181993845273, "grad_norm": 4.007919051218778e-05, "learning_rate": 7.0924640377871614e-06, "loss": 0.0, "step": 40650 }, { "epoch": 0.2909897659772418, "grad_norm": 0.0, "learning_rate": 7.091748371860017e-06, "loss": 0.0, "step": 40660 }, { "epoch": 0.29106133256995637, "grad_norm": 0.0, "learning_rate": 7.091032705932871e-06, "loss": 0.0, "step": 40670 }, { "epoch": 0.2911328991626709, "grad_norm": 4.612238924117662e-10, "learning_rate": 7.0903170400057265e-06, "loss": 0.0034, "step": 40680 }, { "epoch": 0.2912044657553854, "grad_norm": 0.0, "learning_rate": 7.089601374078581e-06, "loss": 0.0, "step": 40690 }, { "epoch": 0.2912760323480999, "grad_norm": 0.00013138577924109995, "learning_rate": 7.088885708151435e-06, "loss": 0.0, "step": 40700 }, { "epoch": 0.2913475989408144, "grad_norm": 0.0, "learning_rate": 7.088170042224291e-06, "loss": 0.0, "step": 40710 }, { "epoch": 0.2914191655335289, "grad_norm": 5.56419354325044e-06, "learning_rate": 7.087454376297145e-06, "loss": 0.0, "step": 40720 }, { "epoch": 0.2914907321262435, "grad_norm": 0.0, "learning_rate": 7.08673871037e-06, "loss": 0.0004, "step": 40730 }, { "epoch": 0.291562298718958, "grad_norm": 0.0, "learning_rate": 7.086023044442855e-06, "loss": 0.0, "step": 40740 }, { "epoch": 0.2916338653116725, "grad_norm": 4.150444987693902e-10, "learning_rate": 7.085307378515709e-06, "loss": 0.0, "step": 40750 }, { "epoch": 0.291705431904387, "grad_norm": 0.0, "learning_rate": 7.0845917125885646e-06, "loss": 0.0, "step": 40760 }, { "epoch": 0.29177699849710154, "grad_norm": 0.0, "learning_rate": 7.083876046661419e-06, "loss": 0.0, "step": 40770 }, { "epoch": 0.29184856508981605, "grad_norm": 0.0, "learning_rate": 7.083160380734274e-06, "loss": 0.0, "step": 40780 }, { "epoch": 0.2919201316825306, "grad_norm": 0.0, "learning_rate": 7.082444714807129e-06, "loss": 0.0, "step": 40790 }, { "epoch": 0.2919916982752451, "grad_norm": 0.0001538655924377963, "learning_rate": 7.081729048879984e-06, "loss": 0.0, "step": 40800 }, { "epoch": 0.29206326486795964, "grad_norm": 0.196880042552948, "learning_rate": 7.0810133829528385e-06, "loss": 0.0001, "step": 40810 }, { "epoch": 0.29213483146067415, "grad_norm": 0.002322258660569787, "learning_rate": 7.080297717025693e-06, "loss": 0.0, "step": 40820 }, { "epoch": 0.29220639805338866, "grad_norm": 1.6603896035860544e-09, "learning_rate": 7.079582051098548e-06, "loss": 0.0, "step": 40830 }, { "epoch": 0.29227796464610317, "grad_norm": 0.0, "learning_rate": 7.078866385171403e-06, "loss": 0.0, "step": 40840 }, { "epoch": 0.29234953123881774, "grad_norm": 0.0, "learning_rate": 7.078150719244258e-06, "loss": 0.0, "step": 40850 }, { "epoch": 0.29242109783153225, "grad_norm": 0.0, "learning_rate": 7.077435053317112e-06, "loss": 0.0, "step": 40860 }, { "epoch": 0.29249266442424676, "grad_norm": 0.004602829460054636, "learning_rate": 7.076719387389967e-06, "loss": 0.0139, "step": 40870 }, { "epoch": 0.2925642310169613, "grad_norm": 0.0, "learning_rate": 7.076003721462822e-06, "loss": 0.0, "step": 40880 }, { "epoch": 0.2926357976096758, "grad_norm": 0.0005272258422337472, "learning_rate": 7.0752880555356766e-06, "loss": 0.0, "step": 40890 }, { "epoch": 0.2927073642023903, "grad_norm": 0.0, "learning_rate": 7.074572389608532e-06, "loss": 0.0, "step": 40900 }, { "epoch": 0.29277893079510486, "grad_norm": 0.0, "learning_rate": 7.073856723681386e-06, "loss": 0.0, "step": 40910 }, { "epoch": 0.2928504973878194, "grad_norm": 0.0, "learning_rate": 7.073141057754242e-06, "loss": 0.0, "step": 40920 }, { "epoch": 0.2929220639805339, "grad_norm": 0.0, "learning_rate": 7.072425391827096e-06, "loss": 0.0, "step": 40930 }, { "epoch": 0.2929936305732484, "grad_norm": 0.0, "learning_rate": 7.0717097258999505e-06, "loss": 0.0, "step": 40940 }, { "epoch": 0.2930651971659629, "grad_norm": 0.0, "learning_rate": 7.070994059972806e-06, "loss": 0.0108, "step": 40950 }, { "epoch": 0.2931367637586775, "grad_norm": 0.0, "learning_rate": 7.07027839404566e-06, "loss": 0.0, "step": 40960 }, { "epoch": 0.293208330351392, "grad_norm": 0.0, "learning_rate": 7.0695627281185155e-06, "loss": 0.0, "step": 40970 }, { "epoch": 0.2932798969441065, "grad_norm": 0.0, "learning_rate": 7.06884706219137e-06, "loss": 0.0, "step": 40980 }, { "epoch": 0.293351463536821, "grad_norm": 0.0, "learning_rate": 7.0681313962642236e-06, "loss": 0.0, "step": 40990 }, { "epoch": 0.2934230301295355, "grad_norm": 0.08601561933755875, "learning_rate": 7.06741573033708e-06, "loss": 0.0002, "step": 41000 }, { "epoch": 0.29349459672225003, "grad_norm": 6.639542107222951e-07, "learning_rate": 7.066700064409934e-06, "loss": 0.0, "step": 41010 }, { "epoch": 0.2935661633149646, "grad_norm": 0.0, "learning_rate": 7.0659843984827894e-06, "loss": 0.0, "step": 41020 }, { "epoch": 0.2936377299076791, "grad_norm": 0.0, "learning_rate": 7.065268732555644e-06, "loss": 0.0, "step": 41030 }, { "epoch": 0.2937092965003936, "grad_norm": 0.0, "learning_rate": 7.064553066628499e-06, "loss": 0.0, "step": 41040 }, { "epoch": 0.29378086309310814, "grad_norm": 0.0, "learning_rate": 7.063837400701354e-06, "loss": 0.0, "step": 41050 }, { "epoch": 0.29385242968582265, "grad_norm": 0.0, "learning_rate": 7.063121734774207e-06, "loss": 0.0, "step": 41060 }, { "epoch": 0.29392399627853716, "grad_norm": 0.0, "learning_rate": 7.062406068847063e-06, "loss": 0.0, "step": 41070 }, { "epoch": 0.2939955628712517, "grad_norm": 2.107651198457461e-06, "learning_rate": 7.061690402919917e-06, "loss": 0.0, "step": 41080 }, { "epoch": 0.29406712946396624, "grad_norm": 0.0, "learning_rate": 7.060974736992773e-06, "loss": 0.0, "step": 41090 }, { "epoch": 0.29413869605668075, "grad_norm": 0.0, "learning_rate": 7.0602590710656275e-06, "loss": 0.0, "step": 41100 }, { "epoch": 0.29421026264939526, "grad_norm": 0.0, "learning_rate": 7.059543405138481e-06, "loss": 0.0, "step": 41110 }, { "epoch": 0.29428182924210977, "grad_norm": 0.0, "learning_rate": 7.058827739211337e-06, "loss": 0.0, "step": 41120 }, { "epoch": 0.2943533958348243, "grad_norm": 0.0, "learning_rate": 7.058112073284191e-06, "loss": 0.0, "step": 41130 }, { "epoch": 0.29442496242753885, "grad_norm": 0.0, "learning_rate": 7.057396407357047e-06, "loss": 0.0, "step": 41140 }, { "epoch": 0.29449652902025336, "grad_norm": 0.0, "learning_rate": 7.056680741429901e-06, "loss": 0.0, "step": 41150 }, { "epoch": 0.2945680956129679, "grad_norm": 857.9107055664062, "learning_rate": 7.055965075502757e-06, "loss": 0.4051, "step": 41160 }, { "epoch": 0.2946396622056824, "grad_norm": 0.0, "learning_rate": 7.055249409575611e-06, "loss": 0.0, "step": 41170 }, { "epoch": 0.2947112287983969, "grad_norm": 0.002422154415398836, "learning_rate": 7.054533743648465e-06, "loss": 0.0, "step": 41180 }, { "epoch": 0.2947827953911114, "grad_norm": 0.07973966002464294, "learning_rate": 7.053818077721321e-06, "loss": 0.0, "step": 41190 }, { "epoch": 0.294854361983826, "grad_norm": 0.00017220609879586846, "learning_rate": 7.0531024117941745e-06, "loss": 0.0, "step": 41200 }, { "epoch": 0.2949259285765405, "grad_norm": 0.0, "learning_rate": 7.052386745867031e-06, "loss": 0.0, "step": 41210 }, { "epoch": 0.294997495169255, "grad_norm": 0.0, "learning_rate": 7.051671079939884e-06, "loss": 0.0, "step": 41220 }, { "epoch": 0.2950690617619695, "grad_norm": 1.32111381390132e-07, "learning_rate": 7.050955414012739e-06, "loss": 0.0, "step": 41230 }, { "epoch": 0.295140628354684, "grad_norm": 1.0933861593898087e-09, "learning_rate": 7.050239748085594e-06, "loss": 0.0018, "step": 41240 }, { "epoch": 0.29521219494739853, "grad_norm": 0.0, "learning_rate": 7.0495240821584484e-06, "loss": 0.0199, "step": 41250 }, { "epoch": 0.2952837615401131, "grad_norm": 0.0003247082931920886, "learning_rate": 7.048808416231305e-06, "loss": 0.0, "step": 41260 }, { "epoch": 0.2953553281328276, "grad_norm": 0.0, "learning_rate": 7.048092750304158e-06, "loss": 0.0, "step": 41270 }, { "epoch": 0.2954268947255421, "grad_norm": 0.0, "learning_rate": 7.047377084377013e-06, "loss": 0.0003, "step": 41280 }, { "epoch": 0.29549846131825663, "grad_norm": 0.0, "learning_rate": 7.046661418449868e-06, "loss": 0.0, "step": 41290 }, { "epoch": 0.29557002791097114, "grad_norm": 0.0, "learning_rate": 7.045945752522722e-06, "loss": 0.0005, "step": 41300 }, { "epoch": 0.29564159450368566, "grad_norm": 0.0, "learning_rate": 7.045230086595578e-06, "loss": 0.0002, "step": 41310 }, { "epoch": 0.2957131610964002, "grad_norm": 0.14301422238349915, "learning_rate": 7.044514420668432e-06, "loss": 0.0, "step": 41320 }, { "epoch": 0.29578472768911473, "grad_norm": 4.2173819991830896e-10, "learning_rate": 7.043798754741288e-06, "loss": 0.0, "step": 41330 }, { "epoch": 0.29585629428182925, "grad_norm": 1.6737082830786676e-09, "learning_rate": 7.043083088814142e-06, "loss": 0.0, "step": 41340 }, { "epoch": 0.29592786087454376, "grad_norm": 0.0, "learning_rate": 7.042367422886996e-06, "loss": 0.0064, "step": 41350 }, { "epoch": 0.29599942746725827, "grad_norm": 0.0, "learning_rate": 7.0416517569598516e-06, "loss": 0.0, "step": 41360 }, { "epoch": 0.2960709940599728, "grad_norm": 0.0, "learning_rate": 7.040936091032706e-06, "loss": 0.0445, "step": 41370 }, { "epoch": 0.29614256065268735, "grad_norm": 0.0, "learning_rate": 7.040220425105561e-06, "loss": 0.0, "step": 41380 }, { "epoch": 0.29621412724540186, "grad_norm": 0.0, "learning_rate": 7.039504759178416e-06, "loss": 0.0, "step": 41390 }, { "epoch": 0.29628569383811637, "grad_norm": 0.0, "learning_rate": 7.03878909325127e-06, "loss": 0.0, "step": 41400 }, { "epoch": 0.2963572604308309, "grad_norm": 0.0, "learning_rate": 7.0380734273241255e-06, "loss": 0.0, "step": 41410 }, { "epoch": 0.2964288270235454, "grad_norm": 0.0, "learning_rate": 7.03735776139698e-06, "loss": 0.0, "step": 41420 }, { "epoch": 0.2965003936162599, "grad_norm": 7.401744142043754e-07, "learning_rate": 7.036642095469835e-06, "loss": 0.0703, "step": 41430 }, { "epoch": 0.29657196020897447, "grad_norm": 0.0, "learning_rate": 7.03592642954269e-06, "loss": 0.0, "step": 41440 }, { "epoch": 0.296643526801689, "grad_norm": 0.0, "learning_rate": 7.035210763615545e-06, "loss": 0.0, "step": 41450 }, { "epoch": 0.2967150933944035, "grad_norm": 0.0, "learning_rate": 7.034495097688399e-06, "loss": 0.0091, "step": 41460 }, { "epoch": 0.296786659987118, "grad_norm": 0.0, "learning_rate": 7.033779431761254e-06, "loss": 0.0, "step": 41470 }, { "epoch": 0.2968582265798325, "grad_norm": 0.0020419657230377197, "learning_rate": 7.033063765834109e-06, "loss": 0.0, "step": 41480 }, { "epoch": 0.29692979317254703, "grad_norm": 9.83082009042846e-06, "learning_rate": 7.032348099906964e-06, "loss": 0.0, "step": 41490 }, { "epoch": 0.2970013597652616, "grad_norm": 0.0, "learning_rate": 7.031632433979819e-06, "loss": 0.0, "step": 41500 }, { "epoch": 0.2970729263579761, "grad_norm": 0.0, "learning_rate": 7.030916768052673e-06, "loss": 0.0071, "step": 41510 }, { "epoch": 0.2971444929506906, "grad_norm": 7.111041782081884e-07, "learning_rate": 7.030201102125528e-06, "loss": 0.0, "step": 41520 }, { "epoch": 0.29721605954340513, "grad_norm": 0.0, "learning_rate": 7.029485436198383e-06, "loss": 0.0, "step": 41530 }, { "epoch": 0.29728762613611964, "grad_norm": 0.0, "learning_rate": 7.0287697702712375e-06, "loss": 0.0, "step": 41540 }, { "epoch": 0.29735919272883415, "grad_norm": 0.0, "learning_rate": 7.028054104344093e-06, "loss": 0.0, "step": 41550 }, { "epoch": 0.2974307593215487, "grad_norm": 675.4979248046875, "learning_rate": 7.027338438416947e-06, "loss": 0.7109, "step": 41560 }, { "epoch": 0.29750232591426323, "grad_norm": 0.0, "learning_rate": 7.0266227724898025e-06, "loss": 0.0008, "step": 41570 }, { "epoch": 0.29757389250697774, "grad_norm": 0.0, "learning_rate": 7.025907106562657e-06, "loss": 0.0003, "step": 41580 }, { "epoch": 0.29764545909969226, "grad_norm": 0.0028481052722781897, "learning_rate": 7.025191440635511e-06, "loss": 0.0, "step": 41590 }, { "epoch": 0.29771702569240677, "grad_norm": 4.3416675810092897e-10, "learning_rate": 7.024475774708367e-06, "loss": 0.0, "step": 41600 }, { "epoch": 0.2977885922851213, "grad_norm": 0.0021818235982209444, "learning_rate": 7.023760108781221e-06, "loss": 0.0007, "step": 41610 }, { "epoch": 0.29786015887783585, "grad_norm": 0.0, "learning_rate": 7.0230444428540764e-06, "loss": 0.0536, "step": 41620 }, { "epoch": 0.29793172547055036, "grad_norm": 0.010679362341761589, "learning_rate": 7.022328776926931e-06, "loss": 0.0, "step": 41630 }, { "epoch": 0.29800329206326487, "grad_norm": 0.001858349540270865, "learning_rate": 7.021613110999785e-06, "loss": 0.0, "step": 41640 }, { "epoch": 0.2980748586559794, "grad_norm": 0.0, "learning_rate": 7.020897445072641e-06, "loss": 0.0, "step": 41650 }, { "epoch": 0.2981464252486939, "grad_norm": 0.0, "learning_rate": 7.020181779145495e-06, "loss": 0.0001, "step": 41660 }, { "epoch": 0.2982179918414084, "grad_norm": 0.0, "learning_rate": 7.01946611321835e-06, "loss": 0.0, "step": 41670 }, { "epoch": 0.29828955843412297, "grad_norm": 0.0, "learning_rate": 7.018750447291205e-06, "loss": 0.0006, "step": 41680 }, { "epoch": 0.2983611250268375, "grad_norm": 0.0006646016263402998, "learning_rate": 7.01803478136406e-06, "loss": 0.0, "step": 41690 }, { "epoch": 0.298432691619552, "grad_norm": 2.777968290956778e-07, "learning_rate": 7.0173191154369145e-06, "loss": 0.0, "step": 41700 }, { "epoch": 0.2985042582122665, "grad_norm": 0.0, "learning_rate": 7.016603449509769e-06, "loss": 0.0, "step": 41710 }, { "epoch": 0.298575824804981, "grad_norm": 0.0, "learning_rate": 7.015887783582624e-06, "loss": 0.0, "step": 41720 }, { "epoch": 0.2986473913976956, "grad_norm": 1.2714627700916026e-05, "learning_rate": 7.015172117655479e-06, "loss": 0.0, "step": 41730 }, { "epoch": 0.2987189579904101, "grad_norm": 0.0, "learning_rate": 7.014456451728334e-06, "loss": 0.0, "step": 41740 }, { "epoch": 0.2987905245831246, "grad_norm": 0.0, "learning_rate": 7.0137407858011885e-06, "loss": 0.3242, "step": 41750 }, { "epoch": 0.2988620911758391, "grad_norm": 0.0, "learning_rate": 7.013025119874043e-06, "loss": 0.0008, "step": 41760 }, { "epoch": 0.29893365776855363, "grad_norm": 0.0, "learning_rate": 7.012309453946898e-06, "loss": 0.0285, "step": 41770 }, { "epoch": 0.29900522436126814, "grad_norm": 0.0, "learning_rate": 7.011593788019753e-06, "loss": 0.0, "step": 41780 }, { "epoch": 0.2990767909539827, "grad_norm": 0.0, "learning_rate": 7.010878122092608e-06, "loss": 0.0, "step": 41790 }, { "epoch": 0.2991483575466972, "grad_norm": 0.0, "learning_rate": 7.010162456165462e-06, "loss": 0.0005, "step": 41800 }, { "epoch": 0.29921992413941173, "grad_norm": 0.0, "learning_rate": 7.009446790238318e-06, "loss": 0.0, "step": 41810 }, { "epoch": 0.29929149073212624, "grad_norm": 0.0, "learning_rate": 7.008731124311172e-06, "loss": 0.0, "step": 41820 }, { "epoch": 0.29936305732484075, "grad_norm": 0.0, "learning_rate": 7.0080154583840266e-06, "loss": 0.0, "step": 41830 }, { "epoch": 0.29943462391755526, "grad_norm": 0.0, "learning_rate": 7.007299792456882e-06, "loss": 0.0, "step": 41840 }, { "epoch": 0.29950619051026983, "grad_norm": 0.0, "learning_rate": 7.006584126529736e-06, "loss": 0.0, "step": 41850 }, { "epoch": 0.29957775710298434, "grad_norm": 0.001177020836621523, "learning_rate": 7.005868460602592e-06, "loss": 0.0, "step": 41860 }, { "epoch": 0.29964932369569885, "grad_norm": 0.0, "learning_rate": 7.005152794675446e-06, "loss": 0.0003, "step": 41870 }, { "epoch": 0.29972089028841337, "grad_norm": 0.0, "learning_rate": 7.0044371287483005e-06, "loss": 0.0, "step": 41880 }, { "epoch": 0.2997924568811279, "grad_norm": 0.0007166175055317581, "learning_rate": 7.003721462821156e-06, "loss": 0.0289, "step": 41890 }, { "epoch": 0.2998640234738424, "grad_norm": 0.0, "learning_rate": 7.00300579689401e-06, "loss": 0.0, "step": 41900 }, { "epoch": 0.29993559006655696, "grad_norm": 0.0, "learning_rate": 7.0022901309668655e-06, "loss": 0.0, "step": 41910 }, { "epoch": 0.30000715665927147, "grad_norm": 0.0, "learning_rate": 7.00157446503972e-06, "loss": 0.0313, "step": 41920 }, { "epoch": 0.300078723251986, "grad_norm": 0.0, "learning_rate": 7.000858799112575e-06, "loss": 0.0, "step": 41930 }, { "epoch": 0.3001502898447005, "grad_norm": 0.0, "learning_rate": 7.00014313318543e-06, "loss": 0.0331, "step": 41940 }, { "epoch": 0.300221856437415, "grad_norm": 0.0, "learning_rate": 6.999427467258284e-06, "loss": 0.0001, "step": 41950 }, { "epoch": 0.3002934230301295, "grad_norm": 0.0, "learning_rate": 6.998711801331139e-06, "loss": 0.0076, "step": 41960 }, { "epoch": 0.3003649896228441, "grad_norm": 0.0, "learning_rate": 6.997996135403994e-06, "loss": 0.0002, "step": 41970 }, { "epoch": 0.3004365562155586, "grad_norm": 1.8265721379862043e-08, "learning_rate": 6.997280469476849e-06, "loss": 0.0001, "step": 41980 }, { "epoch": 0.3005081228082731, "grad_norm": 0.0, "learning_rate": 6.996564803549704e-06, "loss": 0.0028, "step": 41990 }, { "epoch": 0.3005796894009876, "grad_norm": 0.0, "learning_rate": 6.995849137622558e-06, "loss": 0.0, "step": 42000 }, { "epoch": 0.3006512559937021, "grad_norm": 8.068660122262372e-07, "learning_rate": 6.995133471695413e-06, "loss": 0.0554, "step": 42010 }, { "epoch": 0.30072282258641664, "grad_norm": 0.0, "learning_rate": 6.994417805768268e-06, "loss": 0.0, "step": 42020 }, { "epoch": 0.3007943891791312, "grad_norm": 0.0, "learning_rate": 6.993702139841123e-06, "loss": 0.0052, "step": 42030 }, { "epoch": 0.3008659557718457, "grad_norm": 0.0, "learning_rate": 6.9929864739139775e-06, "loss": 0.001, "step": 42040 }, { "epoch": 0.30093752236456023, "grad_norm": 0.0, "learning_rate": 6.992270807986832e-06, "loss": 0.0, "step": 42050 }, { "epoch": 0.30100908895727474, "grad_norm": 0.0, "learning_rate": 6.991555142059687e-06, "loss": 0.0, "step": 42060 }, { "epoch": 0.30108065554998925, "grad_norm": 0.0, "learning_rate": 6.990839476132542e-06, "loss": 0.0, "step": 42070 }, { "epoch": 0.30115222214270376, "grad_norm": 0.0, "learning_rate": 6.990123810205397e-06, "loss": 0.0053, "step": 42080 }, { "epoch": 0.30122378873541833, "grad_norm": 0.0, "learning_rate": 6.9894081442782514e-06, "loss": 0.0, "step": 42090 }, { "epoch": 0.30129535532813284, "grad_norm": 0.0, "learning_rate": 6.988692478351107e-06, "loss": 0.0, "step": 42100 }, { "epoch": 0.30136692192084735, "grad_norm": 0.0, "learning_rate": 6.987976812423961e-06, "loss": 0.0, "step": 42110 }, { "epoch": 0.30143848851356186, "grad_norm": 0.0, "learning_rate": 6.987261146496816e-06, "loss": 0.0, "step": 42120 }, { "epoch": 0.3015100551062764, "grad_norm": 2.1059223254127346e-09, "learning_rate": 6.986545480569671e-06, "loss": 0.0, "step": 42130 }, { "epoch": 0.3015816216989909, "grad_norm": 0.0, "learning_rate": 6.985829814642525e-06, "loss": 0.0215, "step": 42140 }, { "epoch": 0.30165318829170545, "grad_norm": 0.0, "learning_rate": 6.985114148715381e-06, "loss": 0.0, "step": 42150 }, { "epoch": 0.30172475488441997, "grad_norm": 0.0, "learning_rate": 6.984398482788235e-06, "loss": 0.0, "step": 42160 }, { "epoch": 0.3017963214771345, "grad_norm": 0.0, "learning_rate": 6.9836828168610895e-06, "loss": 0.0001, "step": 42170 }, { "epoch": 0.301867888069849, "grad_norm": 0.0, "learning_rate": 6.982967150933945e-06, "loss": 0.007, "step": 42180 }, { "epoch": 0.3019394546625635, "grad_norm": 0.0, "learning_rate": 6.982251485006799e-06, "loss": 0.003, "step": 42190 }, { "epoch": 0.302011021255278, "grad_norm": 0.0, "learning_rate": 6.9815358190796546e-06, "loss": 0.0, "step": 42200 }, { "epoch": 0.3020825878479926, "grad_norm": 4.2808574107766617e-07, "learning_rate": 6.980820153152509e-06, "loss": 0.0001, "step": 42210 }, { "epoch": 0.3021541544407071, "grad_norm": 0.0, "learning_rate": 6.980104487225364e-06, "loss": 0.0, "step": 42220 }, { "epoch": 0.3022257210334216, "grad_norm": 2.5469682441325858e-05, "learning_rate": 6.979388821298219e-06, "loss": 0.0011, "step": 42230 }, { "epoch": 0.3022972876261361, "grad_norm": 0.0, "learning_rate": 6.978673155371073e-06, "loss": 0.0, "step": 42240 }, { "epoch": 0.3023688542188506, "grad_norm": 0.002971747424453497, "learning_rate": 6.978029056036643e-06, "loss": 1.6629, "step": 42250 }, { "epoch": 0.30244042081156514, "grad_norm": 0.0, "learning_rate": 6.9773133901094965e-06, "loss": 0.0, "step": 42260 }, { "epoch": 0.3025119874042797, "grad_norm": 0.0, "learning_rate": 6.976597724182353e-06, "loss": 0.0014, "step": 42270 }, { "epoch": 0.3025835539969942, "grad_norm": 0.0, "learning_rate": 6.975882058255206e-06, "loss": 0.0, "step": 42280 }, { "epoch": 0.3026551205897087, "grad_norm": 0.0, "learning_rate": 6.975166392328062e-06, "loss": 0.2216, "step": 42290 }, { "epoch": 0.30272668718242324, "grad_norm": 0.0, "learning_rate": 6.974450726400917e-06, "loss": 0.0, "step": 42300 }, { "epoch": 0.30279825377513775, "grad_norm": 8.002389907836914, "learning_rate": 6.973735060473772e-06, "loss": 0.002, "step": 42310 }, { "epoch": 0.30286982036785226, "grad_norm": 0.0, "learning_rate": 6.973019394546627e-06, "loss": 0.0, "step": 42320 }, { "epoch": 0.3029413869605668, "grad_norm": 0.0, "learning_rate": 6.97230372861948e-06, "loss": 0.0004, "step": 42330 }, { "epoch": 0.30301295355328134, "grad_norm": 4.491366722980672e-10, "learning_rate": 6.971588062692336e-06, "loss": 0.0, "step": 42340 }, { "epoch": 0.30308452014599585, "grad_norm": 0.0, "learning_rate": 6.97087239676519e-06, "loss": 0.0, "step": 42350 }, { "epoch": 0.30315608673871036, "grad_norm": 0.0, "learning_rate": 6.970156730838046e-06, "loss": 0.0, "step": 42360 }, { "epoch": 0.3032276533314249, "grad_norm": 4.021121213781953e-10, "learning_rate": 6.9694410649109005e-06, "loss": 0.0, "step": 42370 }, { "epoch": 0.3032992199241394, "grad_norm": 0.0, "learning_rate": 6.968725398983754e-06, "loss": 0.0, "step": 42380 }, { "epoch": 0.30337078651685395, "grad_norm": 0.0, "learning_rate": 6.96800973305661e-06, "loss": 0.0, "step": 42390 }, { "epoch": 0.30344235310956846, "grad_norm": 0.0, "learning_rate": 6.967294067129464e-06, "loss": 0.0, "step": 42400 }, { "epoch": 0.303513919702283, "grad_norm": 0.0, "learning_rate": 6.96657840120232e-06, "loss": 0.0, "step": 42410 }, { "epoch": 0.3035854862949975, "grad_norm": 0.0, "learning_rate": 6.965862735275174e-06, "loss": 0.178, "step": 42420 }, { "epoch": 0.303657052887712, "grad_norm": 0.0, "learning_rate": 6.96514706934803e-06, "loss": 0.0001, "step": 42430 }, { "epoch": 0.3037286194804265, "grad_norm": 4.398694954943494e-07, "learning_rate": 6.964431403420884e-06, "loss": 0.0033, "step": 42440 }, { "epoch": 0.3038001860731411, "grad_norm": 0.0, "learning_rate": 6.963715737493738e-06, "loss": 0.0, "step": 42450 }, { "epoch": 0.3038717526658556, "grad_norm": 0.0, "learning_rate": 6.963000071566594e-06, "loss": 0.0, "step": 42460 }, { "epoch": 0.3039433192585701, "grad_norm": 0.0, "learning_rate": 6.9622844056394475e-06, "loss": 0.0, "step": 42470 }, { "epoch": 0.3040148858512846, "grad_norm": 0.0, "learning_rate": 6.961568739712304e-06, "loss": 0.0, "step": 42480 }, { "epoch": 0.3040864524439991, "grad_norm": 0.0, "learning_rate": 6.960853073785157e-06, "loss": 0.0007, "step": 42490 }, { "epoch": 0.3041580190367137, "grad_norm": 0.0, "learning_rate": 6.960137407858012e-06, "loss": 0.0001, "step": 42500 }, { "epoch": 0.3042295856294282, "grad_norm": 0.0, "learning_rate": 6.959421741930867e-06, "loss": 0.0, "step": 42510 }, { "epoch": 0.3043011522221427, "grad_norm": 0.0011083041317760944, "learning_rate": 6.9587060760037214e-06, "loss": 0.0, "step": 42520 }, { "epoch": 0.3043727188148572, "grad_norm": 0.0, "learning_rate": 6.9579904100765776e-06, "loss": 0.0, "step": 42530 }, { "epoch": 0.30444428540757174, "grad_norm": 0.0, "learning_rate": 6.957274744149431e-06, "loss": 0.0, "step": 42540 }, { "epoch": 0.30451585200028625, "grad_norm": 0.0, "learning_rate": 6.956559078222287e-06, "loss": 0.0, "step": 42550 }, { "epoch": 0.3045874185930008, "grad_norm": 0.0, "learning_rate": 6.955843412295141e-06, "loss": 0.0, "step": 42560 }, { "epoch": 0.3046589851857153, "grad_norm": 0.0, "learning_rate": 6.955127746367995e-06, "loss": 0.0, "step": 42570 }, { "epoch": 0.30473055177842984, "grad_norm": 0.0, "learning_rate": 6.954412080440851e-06, "loss": 0.0844, "step": 42580 }, { "epoch": 0.30480211837114435, "grad_norm": 0.0, "learning_rate": 6.953696414513705e-06, "loss": 0.0, "step": 42590 }, { "epoch": 0.30487368496385886, "grad_norm": 0.0, "learning_rate": 6.952980748586561e-06, "loss": 0.0, "step": 42600 }, { "epoch": 0.30494525155657337, "grad_norm": 0.0, "learning_rate": 6.952265082659415e-06, "loss": 0.0, "step": 42610 }, { "epoch": 0.30501681814928794, "grad_norm": 0.0, "learning_rate": 6.951549416732269e-06, "loss": 0.0, "step": 42620 }, { "epoch": 0.30508838474200245, "grad_norm": 0.0, "learning_rate": 6.9508337508051246e-06, "loss": 0.0, "step": 42630 }, { "epoch": 0.30515995133471696, "grad_norm": 0.0, "learning_rate": 6.950118084877979e-06, "loss": 0.0, "step": 42640 }, { "epoch": 0.3052315179274315, "grad_norm": 0.0, "learning_rate": 6.949402418950834e-06, "loss": 0.0, "step": 42650 }, { "epoch": 0.305303084520146, "grad_norm": 4.55957933809259e-06, "learning_rate": 6.948686753023689e-06, "loss": 0.0, "step": 42660 }, { "epoch": 0.3053746511128605, "grad_norm": 0.0, "learning_rate": 6.947971087096544e-06, "loss": 0.0, "step": 42670 }, { "epoch": 0.30544621770557506, "grad_norm": 0.0, "learning_rate": 6.9472554211693985e-06, "loss": 0.0404, "step": 42680 }, { "epoch": 0.3055177842982896, "grad_norm": 2.259043867525179e-05, "learning_rate": 6.946539755242253e-06, "loss": 0.0, "step": 42690 }, { "epoch": 0.3055893508910041, "grad_norm": 0.0, "learning_rate": 6.945824089315108e-06, "loss": 0.0005, "step": 42700 }, { "epoch": 0.3056609174837186, "grad_norm": 0.0, "learning_rate": 6.945108423387963e-06, "loss": 0.0, "step": 42710 }, { "epoch": 0.3057324840764331, "grad_norm": 781.324462890625, "learning_rate": 6.944392757460818e-06, "loss": 1.7191, "step": 42720 }, { "epoch": 0.3058040506691476, "grad_norm": 0.0, "learning_rate": 6.943677091533672e-06, "loss": 0.0001, "step": 42730 }, { "epoch": 0.3058756172618622, "grad_norm": 0.0, "learning_rate": 6.942961425606527e-06, "loss": 0.0, "step": 42740 }, { "epoch": 0.3059471838545767, "grad_norm": 0.0, "learning_rate": 6.942245759679382e-06, "loss": 0.0, "step": 42750 }, { "epoch": 0.3060187504472912, "grad_norm": 1.4576976292701715e-09, "learning_rate": 6.9415300937522366e-06, "loss": 0.0, "step": 42760 }, { "epoch": 0.3060903170400057, "grad_norm": 7.74261099678597e-10, "learning_rate": 6.940814427825092e-06, "loss": 0.0, "step": 42770 }, { "epoch": 0.30616188363272023, "grad_norm": 0.0, "learning_rate": 6.940098761897946e-06, "loss": 0.0, "step": 42780 }, { "epoch": 0.30623345022543474, "grad_norm": 0.0, "learning_rate": 6.939383095970802e-06, "loss": 0.0, "step": 42790 }, { "epoch": 0.3063050168181493, "grad_norm": 6.767818661046476e-08, "learning_rate": 6.938667430043656e-06, "loss": 0.0, "step": 42800 }, { "epoch": 0.3063765834108638, "grad_norm": 0.0, "learning_rate": 6.9379517641165105e-06, "loss": 0.0, "step": 42810 }, { "epoch": 0.30644815000357833, "grad_norm": 0.0, "learning_rate": 6.937236098189366e-06, "loss": 0.0, "step": 42820 }, { "epoch": 0.30651971659629285, "grad_norm": 0.0, "learning_rate": 6.93652043226222e-06, "loss": 0.0, "step": 42830 }, { "epoch": 0.30659128318900736, "grad_norm": 18.777917861938477, "learning_rate": 6.9358047663350755e-06, "loss": 0.0029, "step": 42840 }, { "epoch": 0.30666284978172187, "grad_norm": 0.0, "learning_rate": 6.93508910040793e-06, "loss": 0.0, "step": 42850 }, { "epoch": 0.30673441637443644, "grad_norm": 786.6710205078125, "learning_rate": 6.934373434480784e-06, "loss": 0.7682, "step": 42860 }, { "epoch": 0.30680598296715095, "grad_norm": 0.00110004807356745, "learning_rate": 6.93365776855364e-06, "loss": 0.21, "step": 42870 }, { "epoch": 0.30687754955986546, "grad_norm": 0.010639740154147148, "learning_rate": 6.932942102626494e-06, "loss": 0.0326, "step": 42880 }, { "epoch": 0.30694911615257997, "grad_norm": 0.00299554574303329, "learning_rate": 6.9322264366993494e-06, "loss": 0.0, "step": 42890 }, { "epoch": 0.3070206827452945, "grad_norm": 0.004174856934696436, "learning_rate": 6.931510770772204e-06, "loss": 0.0, "step": 42900 }, { "epoch": 0.307092249338009, "grad_norm": 1.5977768441644002e-07, "learning_rate": 6.930795104845058e-06, "loss": 0.1638, "step": 42910 }, { "epoch": 0.30716381593072356, "grad_norm": 0.0, "learning_rate": 6.930079438917914e-06, "loss": 0.0, "step": 42920 }, { "epoch": 0.30723538252343807, "grad_norm": 5.501211717273691e-07, "learning_rate": 6.929363772990768e-06, "loss": 0.0, "step": 42930 }, { "epoch": 0.3073069491161526, "grad_norm": 0.0, "learning_rate": 6.928648107063623e-06, "loss": 0.0229, "step": 42940 }, { "epoch": 0.3073785157088671, "grad_norm": 0.0, "learning_rate": 6.927932441136478e-06, "loss": 0.0011, "step": 42950 }, { "epoch": 0.3074500823015816, "grad_norm": 4.2537379174589773e-10, "learning_rate": 6.927216775209333e-06, "loss": 0.0, "step": 42960 }, { "epoch": 0.3075216488942961, "grad_norm": 0.07105091214179993, "learning_rate": 6.9265011092821875e-06, "loss": 0.0, "step": 42970 }, { "epoch": 0.3075932154870107, "grad_norm": 0.0, "learning_rate": 6.925785443355042e-06, "loss": 0.0, "step": 42980 }, { "epoch": 0.3076647820797252, "grad_norm": 0.0, "learning_rate": 6.925069777427897e-06, "loss": 0.0048, "step": 42990 }, { "epoch": 0.3077363486724397, "grad_norm": 0.0, "learning_rate": 6.924354111500752e-06, "loss": 0.0, "step": 43000 }, { "epoch": 0.3078079152651542, "grad_norm": 0.0, "learning_rate": 6.923638445573607e-06, "loss": 0.0, "step": 43010 }, { "epoch": 0.30787948185786873, "grad_norm": 0.0002413699694443494, "learning_rate": 6.9229227796464614e-06, "loss": 0.0, "step": 43020 }, { "epoch": 0.30795104845058324, "grad_norm": 0.0, "learning_rate": 6.922207113719316e-06, "loss": 0.0, "step": 43030 }, { "epoch": 0.3080226150432978, "grad_norm": 8.871720638126135e-05, "learning_rate": 6.921491447792171e-06, "loss": 0.0001, "step": 43040 }, { "epoch": 0.3080941816360123, "grad_norm": 9.417343790119048e-06, "learning_rate": 6.920775781865026e-06, "loss": 0.0007, "step": 43050 }, { "epoch": 0.30816574822872683, "grad_norm": 6.622679507017892e-07, "learning_rate": 6.920060115937881e-06, "loss": 0.0, "step": 43060 }, { "epoch": 0.30823731482144134, "grad_norm": 4.119943552982619e-10, "learning_rate": 6.919344450010735e-06, "loss": 0.0, "step": 43070 }, { "epoch": 0.30830888141415586, "grad_norm": 0.0, "learning_rate": 6.918628784083591e-06, "loss": 0.0, "step": 43080 }, { "epoch": 0.30838044800687037, "grad_norm": 0.0, "learning_rate": 6.917913118156445e-06, "loss": 0.0, "step": 43090 }, { "epoch": 0.30845201459958493, "grad_norm": 7.107190413080389e-06, "learning_rate": 6.9171974522292995e-06, "loss": 0.0, "step": 43100 }, { "epoch": 0.30852358119229945, "grad_norm": 0.04072333127260208, "learning_rate": 6.916481786302155e-06, "loss": 0.0, "step": 43110 }, { "epoch": 0.30859514778501396, "grad_norm": 0.0, "learning_rate": 6.915766120375009e-06, "loss": 0.0005, "step": 43120 }, { "epoch": 0.30866671437772847, "grad_norm": 0.0, "learning_rate": 6.9150504544478646e-06, "loss": 0.0, "step": 43130 }, { "epoch": 0.308738280970443, "grad_norm": 0.0, "learning_rate": 6.914334788520719e-06, "loss": 0.0, "step": 43140 }, { "epoch": 0.3088098475631575, "grad_norm": 1.771106690284796e-05, "learning_rate": 6.9136191225935735e-06, "loss": 0.0001, "step": 43150 }, { "epoch": 0.30888141415587206, "grad_norm": 0.8281849026679993, "learning_rate": 6.912903456666429e-06, "loss": 0.0002, "step": 43160 }, { "epoch": 0.30895298074858657, "grad_norm": 0.0, "learning_rate": 6.912187790739283e-06, "loss": 0.0, "step": 43170 }, { "epoch": 0.3090245473413011, "grad_norm": 0.0, "learning_rate": 6.9114721248121385e-06, "loss": 0.0035, "step": 43180 }, { "epoch": 0.3090961139340156, "grad_norm": 0.0, "learning_rate": 6.910756458884993e-06, "loss": 0.0074, "step": 43190 }, { "epoch": 0.3091676805267301, "grad_norm": 1.2859395742416382, "learning_rate": 6.910040792957848e-06, "loss": 0.0081, "step": 43200 }, { "epoch": 0.3092392471194446, "grad_norm": 0.0021693790331482887, "learning_rate": 6.909325127030703e-06, "loss": 0.0, "step": 43210 }, { "epoch": 0.3093108137121592, "grad_norm": 0.0, "learning_rate": 6.908609461103557e-06, "loss": 0.0, "step": 43220 }, { "epoch": 0.3093823803048737, "grad_norm": 0.0, "learning_rate": 6.907893795176412e-06, "loss": 0.0, "step": 43230 }, { "epoch": 0.3094539468975882, "grad_norm": 4.841784750908573e-09, "learning_rate": 6.907178129249267e-06, "loss": 0.0031, "step": 43240 }, { "epoch": 0.3095255134903027, "grad_norm": 4.318639639677713e-06, "learning_rate": 6.906462463322122e-06, "loss": 0.0002, "step": 43250 }, { "epoch": 0.30959708008301723, "grad_norm": 0.0, "learning_rate": 6.905746797394977e-06, "loss": 0.5265, "step": 43260 }, { "epoch": 0.3096686466757318, "grad_norm": 0.0, "learning_rate": 6.905031131467831e-06, "loss": 0.0, "step": 43270 }, { "epoch": 0.3097402132684463, "grad_norm": 0.0, "learning_rate": 6.904315465540686e-06, "loss": 0.0363, "step": 43280 }, { "epoch": 0.3098117798611608, "grad_norm": 0.0, "learning_rate": 6.903599799613541e-06, "loss": 0.0, "step": 43290 }, { "epoch": 0.30988334645387533, "grad_norm": 2.0630521646580746e-07, "learning_rate": 6.902884133686396e-06, "loss": 0.0, "step": 43300 }, { "epoch": 0.30995491304658984, "grad_norm": 1.9755318135139532e-05, "learning_rate": 6.9021684677592505e-06, "loss": 0.0002, "step": 43310 }, { "epoch": 0.31002647963930435, "grad_norm": 0.0, "learning_rate": 6.901452801832106e-06, "loss": 0.4125, "step": 43320 }, { "epoch": 0.3100980462320189, "grad_norm": 0.0, "learning_rate": 6.90073713590496e-06, "loss": 0.0, "step": 43330 }, { "epoch": 0.31016961282473343, "grad_norm": 0.0, "learning_rate": 6.900021469977815e-06, "loss": 0.0663, "step": 43340 }, { "epoch": 0.31024117941744794, "grad_norm": 1.6582671946707706e-07, "learning_rate": 6.89930580405067e-06, "loss": 0.0, "step": 43350 }, { "epoch": 0.31031274601016245, "grad_norm": 0.0, "learning_rate": 6.898590138123524e-06, "loss": 0.0, "step": 43360 }, { "epoch": 0.31038431260287697, "grad_norm": 0.0, "learning_rate": 6.89787447219638e-06, "loss": 0.0, "step": 43370 }, { "epoch": 0.3104558791955915, "grad_norm": 0.00011582143633859232, "learning_rate": 6.897158806269234e-06, "loss": 0.0, "step": 43380 }, { "epoch": 0.31052744578830604, "grad_norm": 0.6066110730171204, "learning_rate": 6.896443140342089e-06, "loss": 0.0053, "step": 43390 }, { "epoch": 0.31059901238102056, "grad_norm": 0.0, "learning_rate": 6.895727474414944e-06, "loss": 0.0001, "step": 43400 }, { "epoch": 0.31067057897373507, "grad_norm": 0.0, "learning_rate": 6.895011808487798e-06, "loss": 0.2768, "step": 43410 }, { "epoch": 0.3107421455664496, "grad_norm": 0.0, "learning_rate": 6.894296142560654e-06, "loss": 0.4332, "step": 43420 }, { "epoch": 0.3108137121591641, "grad_norm": 0.0, "learning_rate": 6.893580476633508e-06, "loss": 0.0, "step": 43430 }, { "epoch": 0.3108852787518786, "grad_norm": 0.0, "learning_rate": 6.892864810706363e-06, "loss": 0.0, "step": 43440 }, { "epoch": 0.31095684534459317, "grad_norm": 0.0, "learning_rate": 6.892149144779218e-06, "loss": 0.0, "step": 43450 }, { "epoch": 0.3110284119373077, "grad_norm": 0.021763421595096588, "learning_rate": 6.891433478852072e-06, "loss": 0.0005, "step": 43460 }, { "epoch": 0.3110999785300222, "grad_norm": 0.0, "learning_rate": 6.8907178129249275e-06, "loss": 0.0, "step": 43470 }, { "epoch": 0.3111715451227367, "grad_norm": 0.0, "learning_rate": 6.890002146997782e-06, "loss": 0.0, "step": 43480 }, { "epoch": 0.3112431117154512, "grad_norm": 0.0006398724508471787, "learning_rate": 6.889286481070637e-06, "loss": 0.0, "step": 43490 }, { "epoch": 0.3113146783081657, "grad_norm": 2.777509999773997e-09, "learning_rate": 6.888570815143492e-06, "loss": 0.0, "step": 43500 }, { "epoch": 0.3113862449008803, "grad_norm": 4.325839131347209e-10, "learning_rate": 6.887855149216346e-06, "loss": 0.0001, "step": 43510 }, { "epoch": 0.3114578114935948, "grad_norm": 1.4792738511459902e-05, "learning_rate": 6.8871394832892015e-06, "loss": 0.0, "step": 43520 }, { "epoch": 0.3115293780863093, "grad_norm": 0.0, "learning_rate": 6.886423817362056e-06, "loss": 0.0001, "step": 43530 }, { "epoch": 0.31160094467902383, "grad_norm": 9.550441973260604e-06, "learning_rate": 6.885708151434911e-06, "loss": 0.0002, "step": 43540 }, { "epoch": 0.31167251127173834, "grad_norm": 0.0, "learning_rate": 6.884992485507766e-06, "loss": 0.0003, "step": 43550 }, { "epoch": 0.31174407786445285, "grad_norm": 4.079640802956419e-07, "learning_rate": 6.884276819580621e-06, "loss": 0.0, "step": 43560 }, { "epoch": 0.3118156444571674, "grad_norm": 0.0, "learning_rate": 6.883561153653475e-06, "loss": 0.0, "step": 43570 }, { "epoch": 0.31188721104988193, "grad_norm": 3.1839217626838945e-06, "learning_rate": 6.88284548772633e-06, "loss": 0.0, "step": 43580 }, { "epoch": 0.31195877764259644, "grad_norm": 0.0, "learning_rate": 6.882129821799185e-06, "loss": 0.0, "step": 43590 }, { "epoch": 0.31203034423531095, "grad_norm": 0.0, "learning_rate": 6.8814141558720396e-06, "loss": 0.0, "step": 43600 }, { "epoch": 0.31210191082802546, "grad_norm": 0.0, "learning_rate": 6.880698489944895e-06, "loss": 0.0517, "step": 43610 }, { "epoch": 0.31217347742074, "grad_norm": 0.0, "learning_rate": 6.879982824017749e-06, "loss": 0.0, "step": 43620 }, { "epoch": 0.31224504401345454, "grad_norm": 6.114641564636258e-06, "learning_rate": 6.879267158090604e-06, "loss": 0.0, "step": 43630 }, { "epoch": 0.31231661060616905, "grad_norm": 0.0, "learning_rate": 6.878551492163459e-06, "loss": 0.1604, "step": 43640 }, { "epoch": 0.31238817719888357, "grad_norm": 0.0, "learning_rate": 6.8778358262363135e-06, "loss": 0.0, "step": 43650 }, { "epoch": 0.3124597437915981, "grad_norm": 0.0, "learning_rate": 6.877120160309169e-06, "loss": 0.0, "step": 43660 }, { "epoch": 0.3125313103843126, "grad_norm": 0.0, "learning_rate": 6.876404494382023e-06, "loss": 0.693, "step": 43670 }, { "epoch": 0.3126028769770271, "grad_norm": 0.0, "learning_rate": 6.875688828454878e-06, "loss": 0.0, "step": 43680 }, { "epoch": 0.31267444356974167, "grad_norm": 0.02217119373381138, "learning_rate": 6.874973162527733e-06, "loss": 0.0, "step": 43690 }, { "epoch": 0.3127460101624562, "grad_norm": 1.2521967391876387e-06, "learning_rate": 6.874257496600587e-06, "loss": 0.0, "step": 43700 }, { "epoch": 0.3128175767551707, "grad_norm": 0.0, "learning_rate": 6.873541830673443e-06, "loss": 0.0, "step": 43710 }, { "epoch": 0.3128891433478852, "grad_norm": 0.0, "learning_rate": 6.872826164746297e-06, "loss": 0.0003, "step": 43720 }, { "epoch": 0.3129607099405997, "grad_norm": 0.0, "learning_rate": 6.8721104988191524e-06, "loss": 0.0, "step": 43730 }, { "epoch": 0.3130322765333142, "grad_norm": 0.0, "learning_rate": 6.871394832892007e-06, "loss": 0.0, "step": 43740 }, { "epoch": 0.3131038431260288, "grad_norm": 8.024219937396992e-07, "learning_rate": 6.870679166964861e-06, "loss": 0.0, "step": 43750 }, { "epoch": 0.3131754097187433, "grad_norm": 0.0, "learning_rate": 6.869963501037717e-06, "loss": 0.0, "step": 43760 }, { "epoch": 0.3132469763114578, "grad_norm": 0.0, "learning_rate": 6.869247835110571e-06, "loss": 0.0008, "step": 43770 }, { "epoch": 0.3133185429041723, "grad_norm": 0.0, "learning_rate": 6.868532169183426e-06, "loss": 0.0, "step": 43780 }, { "epoch": 0.31339010949688684, "grad_norm": 0.0, "learning_rate": 6.867816503256281e-06, "loss": 1.0906, "step": 43790 }, { "epoch": 0.31346167608960135, "grad_norm": 0.0, "learning_rate": 6.867100837329134e-06, "loss": 0.0, "step": 43800 }, { "epoch": 0.3135332426823159, "grad_norm": 0.0, "learning_rate": 6.8663851714019905e-06, "loss": 0.0025, "step": 43810 }, { "epoch": 0.3136048092750304, "grad_norm": 0.010194140486419201, "learning_rate": 6.865669505474845e-06, "loss": 0.0, "step": 43820 }, { "epoch": 0.31367637586774494, "grad_norm": 8.47177605844962e-10, "learning_rate": 6.8649538395477e-06, "loss": 0.0, "step": 43830 }, { "epoch": 0.31374794246045945, "grad_norm": 0.0, "learning_rate": 6.864238173620555e-06, "loss": 0.0, "step": 43840 }, { "epoch": 0.31381950905317396, "grad_norm": 0.0, "learning_rate": 6.86352250769341e-06, "loss": 0.0205, "step": 43850 }, { "epoch": 0.3138910756458885, "grad_norm": 0.0, "learning_rate": 6.8628068417662644e-06, "loss": 0.0, "step": 43860 }, { "epoch": 0.31396264223860304, "grad_norm": 0.0, "learning_rate": 6.862091175839118e-06, "loss": 0.0, "step": 43870 }, { "epoch": 0.31403420883131755, "grad_norm": 0.00031792206573300064, "learning_rate": 6.861375509911974e-06, "loss": 0.0, "step": 43880 }, { "epoch": 0.31410577542403206, "grad_norm": 0.0, "learning_rate": 6.860659843984829e-06, "loss": 0.0, "step": 43890 }, { "epoch": 0.3141773420167466, "grad_norm": 0.0, "learning_rate": 6.859944178057684e-06, "loss": 0.0, "step": 43900 }, { "epoch": 0.3142489086094611, "grad_norm": 0.0, "learning_rate": 6.859228512130538e-06, "loss": 0.0, "step": 43910 }, { "epoch": 0.3143204752021756, "grad_norm": 0.0003518734883982688, "learning_rate": 6.858512846203392e-06, "loss": 0.0, "step": 43920 }, { "epoch": 0.31439204179489016, "grad_norm": 0.0, "learning_rate": 6.857797180276248e-06, "loss": 0.0, "step": 43930 }, { "epoch": 0.3144636083876047, "grad_norm": 0.0, "learning_rate": 6.857081514349102e-06, "loss": 0.0, "step": 43940 }, { "epoch": 0.3145351749803192, "grad_norm": 0.0, "learning_rate": 6.856365848421958e-06, "loss": 0.0, "step": 43950 }, { "epoch": 0.3146067415730337, "grad_norm": 0.0, "learning_rate": 6.8556501824948114e-06, "loss": 0.0, "step": 43960 }, { "epoch": 0.3146783081657482, "grad_norm": 0.0, "learning_rate": 6.8549345165676676e-06, "loss": 0.0, "step": 43970 }, { "epoch": 0.3147498747584627, "grad_norm": 0.0, "learning_rate": 6.854218850640522e-06, "loss": 0.0005, "step": 43980 }, { "epoch": 0.3148214413511773, "grad_norm": 0.0, "learning_rate": 6.853503184713376e-06, "loss": 0.0025, "step": 43990 }, { "epoch": 0.3148930079438918, "grad_norm": 0.0, "learning_rate": 6.852787518786232e-06, "loss": 0.0, "step": 44000 }, { "epoch": 0.3149645745366063, "grad_norm": 0.006730763241648674, "learning_rate": 6.852071852859085e-06, "loss": 0.0, "step": 44010 }, { "epoch": 0.3150361411293208, "grad_norm": 0.0, "learning_rate": 6.8513561869319415e-06, "loss": 0.0003, "step": 44020 }, { "epoch": 0.31510770772203534, "grad_norm": 0.0, "learning_rate": 6.850640521004795e-06, "loss": 0.0, "step": 44030 }, { "epoch": 0.3151792743147499, "grad_norm": 0.0, "learning_rate": 6.8499248550776495e-06, "loss": 0.0, "step": 44040 }, { "epoch": 0.3152508409074644, "grad_norm": 0.0, "learning_rate": 6.849209189150506e-06, "loss": 0.0, "step": 44050 }, { "epoch": 0.3153224075001789, "grad_norm": 0.0, "learning_rate": 6.848493523223359e-06, "loss": 0.0, "step": 44060 }, { "epoch": 0.31539397409289344, "grad_norm": 0.0, "learning_rate": 6.847777857296215e-06, "loss": 0.0, "step": 44070 }, { "epoch": 0.31546554068560795, "grad_norm": 0.0, "learning_rate": 6.847062191369069e-06, "loss": 0.0, "step": 44080 }, { "epoch": 0.31553710727832246, "grad_norm": 1.9528525463385904e-09, "learning_rate": 6.846346525441925e-06, "loss": 0.0, "step": 44090 }, { "epoch": 0.315608673871037, "grad_norm": 0.0, "learning_rate": 6.845630859514779e-06, "loss": 0.0001, "step": 44100 }, { "epoch": 0.31568024046375154, "grad_norm": 3.6688179534394294e-05, "learning_rate": 6.844915193587633e-06, "loss": 0.0, "step": 44110 }, { "epoch": 0.31575180705646605, "grad_norm": 0.0, "learning_rate": 6.8441995276604885e-06, "loss": 0.0, "step": 44120 }, { "epoch": 0.31582337364918056, "grad_norm": 4.0666722767035424e-10, "learning_rate": 6.843483861733343e-06, "loss": 0.0, "step": 44130 }, { "epoch": 0.3158949402418951, "grad_norm": 0.0, "learning_rate": 6.842768195806199e-06, "loss": 0.0, "step": 44140 }, { "epoch": 0.3159665068346096, "grad_norm": 0.0, "learning_rate": 6.842052529879053e-06, "loss": 0.0086, "step": 44150 }, { "epoch": 0.31603807342732415, "grad_norm": 0.0746806263923645, "learning_rate": 6.841336863951907e-06, "loss": 0.0, "step": 44160 }, { "epoch": 0.31610964002003866, "grad_norm": 0.0, "learning_rate": 6.840621198024762e-06, "loss": 0.0006, "step": 44170 }, { "epoch": 0.3161812066127532, "grad_norm": 2.0648701593017904e-06, "learning_rate": 6.839905532097617e-06, "loss": 0.0, "step": 44180 }, { "epoch": 0.3162527732054677, "grad_norm": 0.0, "learning_rate": 6.839189866170472e-06, "loss": 0.0001, "step": 44190 }, { "epoch": 0.3163243397981822, "grad_norm": 0.0, "learning_rate": 6.8384742002433266e-06, "loss": 0.0002, "step": 44200 }, { "epoch": 0.3163959063908967, "grad_norm": 0.0, "learning_rate": 6.837758534316183e-06, "loss": 0.0, "step": 44210 }, { "epoch": 0.3164674729836113, "grad_norm": 0.0, "learning_rate": 6.837042868389036e-06, "loss": 0.0001, "step": 44220 }, { "epoch": 0.3165390395763258, "grad_norm": 0.0, "learning_rate": 6.836327202461891e-06, "loss": 0.0, "step": 44230 }, { "epoch": 0.3166106061690403, "grad_norm": 0.0, "learning_rate": 6.835611536534746e-06, "loss": 0.0001, "step": 44240 }, { "epoch": 0.3166821727617548, "grad_norm": 0.0, "learning_rate": 6.8348958706076005e-06, "loss": 0.0, "step": 44250 }, { "epoch": 0.3167537393544693, "grad_norm": 0.0, "learning_rate": 6.834180204680456e-06, "loss": 0.0, "step": 44260 }, { "epoch": 0.31682530594718383, "grad_norm": 0.0002603458415251225, "learning_rate": 6.83346453875331e-06, "loss": 0.0, "step": 44270 }, { "epoch": 0.3168968725398984, "grad_norm": 0.0, "learning_rate": 6.832748872826165e-06, "loss": 0.0008, "step": 44280 }, { "epoch": 0.3169684391326129, "grad_norm": 0.0, "learning_rate": 6.83203320689902e-06, "loss": 0.0, "step": 44290 }, { "epoch": 0.3170400057253274, "grad_norm": 0.0, "learning_rate": 6.831317540971874e-06, "loss": 0.0, "step": 44300 }, { "epoch": 0.31711157231804193, "grad_norm": 0.06141766160726547, "learning_rate": 6.83060187504473e-06, "loss": 0.0029, "step": 44310 }, { "epoch": 0.31718313891075645, "grad_norm": 0.0, "learning_rate": 6.829886209117584e-06, "loss": 0.0, "step": 44320 }, { "epoch": 0.31725470550347096, "grad_norm": 0.0, "learning_rate": 6.8291705431904394e-06, "loss": 0.0, "step": 44330 }, { "epoch": 0.3173262720961855, "grad_norm": 0.0, "learning_rate": 6.828454877263294e-06, "loss": 0.0, "step": 44340 }, { "epoch": 0.31739783868890004, "grad_norm": 0.2565808892250061, "learning_rate": 6.827739211336148e-06, "loss": 0.0001, "step": 44350 }, { "epoch": 0.31746940528161455, "grad_norm": 0.0, "learning_rate": 6.827095112001718e-06, "loss": 0.759, "step": 44360 }, { "epoch": 0.31754097187432906, "grad_norm": 7.972300954861566e-05, "learning_rate": 6.8263794460745725e-06, "loss": 0.0, "step": 44370 }, { "epoch": 0.31761253846704357, "grad_norm": 0.0, "learning_rate": 6.825663780147428e-06, "loss": 0.001, "step": 44380 }, { "epoch": 0.3176841050597581, "grad_norm": 0.0, "learning_rate": 6.824948114220282e-06, "loss": 0.0, "step": 44390 }, { "epoch": 0.31775567165247265, "grad_norm": 0.0, "learning_rate": 6.8242324482931376e-06, "loss": 0.0, "step": 44400 }, { "epoch": 0.31782723824518716, "grad_norm": 2.2231161356245366e-09, "learning_rate": 6.823516782365992e-06, "loss": 0.0, "step": 44410 }, { "epoch": 0.31789880483790167, "grad_norm": 0.0, "learning_rate": 6.8228011164388464e-06, "loss": 0.0, "step": 44420 }, { "epoch": 0.3179703714306162, "grad_norm": 0.0, "learning_rate": 6.822085450511702e-06, "loss": 0.0, "step": 44430 }, { "epoch": 0.3180419380233307, "grad_norm": 0.00014501187251880765, "learning_rate": 6.821369784584556e-06, "loss": 0.0, "step": 44440 }, { "epoch": 0.3181135046160452, "grad_norm": 9.258849604520947e-05, "learning_rate": 6.8206541186574115e-06, "loss": 0.0, "step": 44450 }, { "epoch": 0.3181850712087598, "grad_norm": 0.0, "learning_rate": 6.819938452730266e-06, "loss": 0.0, "step": 44460 }, { "epoch": 0.3182566378014743, "grad_norm": 0.0, "learning_rate": 6.819222786803121e-06, "loss": 0.0, "step": 44470 }, { "epoch": 0.3183282043941888, "grad_norm": 0.0, "learning_rate": 6.818507120875976e-06, "loss": 0.0, "step": 44480 }, { "epoch": 0.3183997709869033, "grad_norm": 31.27703094482422, "learning_rate": 6.81779145494883e-06, "loss": 0.0068, "step": 44490 }, { "epoch": 0.3184713375796178, "grad_norm": 9.119366586674005e-05, "learning_rate": 6.817075789021685e-06, "loss": 0.0007, "step": 44500 }, { "epoch": 0.31854290417233233, "grad_norm": 0.0, "learning_rate": 6.81636012309454e-06, "loss": 0.0, "step": 44510 }, { "epoch": 0.3186144707650469, "grad_norm": 0.0, "learning_rate": 6.815644457167395e-06, "loss": 0.0, "step": 44520 }, { "epoch": 0.3186860373577614, "grad_norm": 0.0, "learning_rate": 6.8149287912402496e-06, "loss": 0.0, "step": 44530 }, { "epoch": 0.3187576039504759, "grad_norm": 0.0, "learning_rate": 6.814213125313104e-06, "loss": 0.0, "step": 44540 }, { "epoch": 0.31882917054319043, "grad_norm": 4.276213272369489e-10, "learning_rate": 6.813497459385959e-06, "loss": 0.0, "step": 44550 }, { "epoch": 0.31890073713590494, "grad_norm": 0.0, "learning_rate": 6.812781793458814e-06, "loss": 0.0, "step": 44560 }, { "epoch": 0.31897230372861946, "grad_norm": 0.0, "learning_rate": 6.812066127531669e-06, "loss": 0.0, "step": 44570 }, { "epoch": 0.319043870321334, "grad_norm": 9.721698113196453e-09, "learning_rate": 6.8113504616045235e-06, "loss": 0.0, "step": 44580 }, { "epoch": 0.31911543691404853, "grad_norm": 0.0, "learning_rate": 6.810634795677379e-06, "loss": 0.0, "step": 44590 }, { "epoch": 0.31918700350676305, "grad_norm": 0.0, "learning_rate": 6.809919129750233e-06, "loss": 0.0, "step": 44600 }, { "epoch": 0.31925857009947756, "grad_norm": 0.0, "learning_rate": 6.809203463823088e-06, "loss": 0.0, "step": 44610 }, { "epoch": 0.31933013669219207, "grad_norm": 0.0, "learning_rate": 6.808487797895943e-06, "loss": 0.0, "step": 44620 }, { "epoch": 0.3194017032849066, "grad_norm": 0.0, "learning_rate": 6.807772131968797e-06, "loss": 0.0, "step": 44630 }, { "epoch": 0.31947326987762115, "grad_norm": 0.0, "learning_rate": 6.807056466041653e-06, "loss": 0.0067, "step": 44640 }, { "epoch": 0.31954483647033566, "grad_norm": 2.719423548569466e-07, "learning_rate": 6.806340800114507e-06, "loss": 0.6687, "step": 44650 }, { "epoch": 0.31961640306305017, "grad_norm": 0.0, "learning_rate": 6.805625134187362e-06, "loss": 0.0, "step": 44660 }, { "epoch": 0.3196879696557647, "grad_norm": Infinity, "learning_rate": 6.804981034852931e-06, "loss": 0.3553, "step": 44670 }, { "epoch": 0.3197595362484792, "grad_norm": 0.0, "learning_rate": 6.804265368925787e-06, "loss": 0.0, "step": 44680 }, { "epoch": 0.3198311028411937, "grad_norm": 0.0, "learning_rate": 6.803549702998641e-06, "loss": 0.0, "step": 44690 }, { "epoch": 0.31990266943390827, "grad_norm": 1.7290644645690918, "learning_rate": 6.8028340370714955e-06, "loss": 0.0004, "step": 44700 }, { "epoch": 0.3199742360266228, "grad_norm": 0.0, "learning_rate": 6.802118371144351e-06, "loss": 0.0, "step": 44710 }, { "epoch": 0.3200458026193373, "grad_norm": 0.0, "learning_rate": 6.801402705217205e-06, "loss": 0.0, "step": 44720 }, { "epoch": 0.3201173692120518, "grad_norm": 0.0, "learning_rate": 6.8006870392900606e-06, "loss": 0.0, "step": 44730 }, { "epoch": 0.3201889358047663, "grad_norm": 0.0, "learning_rate": 6.799971373362915e-06, "loss": 0.0258, "step": 44740 }, { "epoch": 0.32026050239748083, "grad_norm": 146.4700469970703, "learning_rate": 6.799255707435769e-06, "loss": 0.0407, "step": 44750 }, { "epoch": 0.3203320689901954, "grad_norm": 0.0, "learning_rate": 6.798540041508625e-06, "loss": 0.0, "step": 44760 }, { "epoch": 0.3204036355829099, "grad_norm": 0.0, "learning_rate": 6.797824375581479e-06, "loss": 0.0, "step": 44770 }, { "epoch": 0.3204752021756244, "grad_norm": 2.5766948965610936e-06, "learning_rate": 6.7971087096543345e-06, "loss": 0.0, "step": 44780 }, { "epoch": 0.32054676876833893, "grad_norm": 0.0, "learning_rate": 6.796393043727189e-06, "loss": 0.0, "step": 44790 }, { "epoch": 0.32061833536105344, "grad_norm": 0.0, "learning_rate": 6.795677377800044e-06, "loss": 0.0, "step": 44800 }, { "epoch": 0.320689901953768, "grad_norm": 0.0, "learning_rate": 6.794961711872899e-06, "loss": 0.0741, "step": 44810 }, { "epoch": 0.3207614685464825, "grad_norm": 0.0007388858939521015, "learning_rate": 6.794246045945752e-06, "loss": 0.0, "step": 44820 }, { "epoch": 0.32083303513919703, "grad_norm": 1.2871248600276886e-06, "learning_rate": 6.793530380018608e-06, "loss": 0.0, "step": 44830 }, { "epoch": 0.32090460173191154, "grad_norm": 0.0, "learning_rate": 6.792814714091463e-06, "loss": 0.0007, "step": 44840 }, { "epoch": 0.32097616832462605, "grad_norm": 1.1512036323547363, "learning_rate": 6.792099048164318e-06, "loss": 0.0001, "step": 44850 }, { "epoch": 0.32104773491734057, "grad_norm": 0.0, "learning_rate": 6.7913833822371726e-06, "loss": 0.0005, "step": 44860 }, { "epoch": 0.32111930151005513, "grad_norm": 0.0, "learning_rate": 6.790667716310026e-06, "loss": 0.0, "step": 44870 }, { "epoch": 0.32119086810276964, "grad_norm": 0.0, "learning_rate": 6.789952050382882e-06, "loss": 0.7285, "step": 44880 }, { "epoch": 0.32126243469548416, "grad_norm": 2.2568844997294946e-06, "learning_rate": 6.789236384455736e-06, "loss": 0.0, "step": 44890 }, { "epoch": 0.32133400128819867, "grad_norm": 0.0, "learning_rate": 6.788520718528592e-06, "loss": 0.0002, "step": 44900 }, { "epoch": 0.3214055678809132, "grad_norm": 3.0609479395593553e-09, "learning_rate": 6.787805052601446e-06, "loss": 0.0, "step": 44910 }, { "epoch": 0.3214771344736277, "grad_norm": 0.0, "learning_rate": 6.787089386674302e-06, "loss": 0.0, "step": 44920 }, { "epoch": 0.32154870106634226, "grad_norm": 0.0, "learning_rate": 6.786373720747156e-06, "loss": 0.0641, "step": 44930 }, { "epoch": 0.32162026765905677, "grad_norm": 0.0, "learning_rate": 6.78565805482001e-06, "loss": 0.0305, "step": 44940 }, { "epoch": 0.3216918342517713, "grad_norm": 0.0, "learning_rate": 6.784942388892866e-06, "loss": 0.0, "step": 44950 }, { "epoch": 0.3217634008444858, "grad_norm": 0.0037614840548485518, "learning_rate": 6.7842267229657196e-06, "loss": 0.0009, "step": 44960 }, { "epoch": 0.3218349674372003, "grad_norm": 0.0, "learning_rate": 6.783511057038576e-06, "loss": 0.0008, "step": 44970 }, { "epoch": 0.3219065340299148, "grad_norm": 0.0, "learning_rate": 6.782795391111429e-06, "loss": 0.0, "step": 44980 }, { "epoch": 0.3219781006226294, "grad_norm": 0.0, "learning_rate": 6.782079725184284e-06, "loss": 0.0, "step": 44990 }, { "epoch": 0.3220496672153439, "grad_norm": 0.0, "learning_rate": 6.78136405925714e-06, "loss": 0.0, "step": 45000 }, { "epoch": 0.3221212338080584, "grad_norm": 0.0, "learning_rate": 6.7806483933299935e-06, "loss": 0.0, "step": 45010 }, { "epoch": 0.3221928004007729, "grad_norm": 0.0, "learning_rate": 6.77993272740285e-06, "loss": 0.0, "step": 45020 }, { "epoch": 0.32226436699348743, "grad_norm": 0.0, "learning_rate": 6.779217061475703e-06, "loss": 0.0, "step": 45030 }, { "epoch": 0.32233593358620194, "grad_norm": 41.99375534057617, "learning_rate": 6.778501395548559e-06, "loss": 0.0075, "step": 45040 }, { "epoch": 0.3224075001789165, "grad_norm": 0.0, "learning_rate": 6.777785729621413e-06, "loss": 0.0, "step": 45050 }, { "epoch": 0.322479066771631, "grad_norm": 0.0, "learning_rate": 6.777070063694267e-06, "loss": 0.0, "step": 45060 }, { "epoch": 0.32255063336434553, "grad_norm": 0.0, "learning_rate": 6.7763543977671235e-06, "loss": 0.0, "step": 45070 }, { "epoch": 0.32262219995706004, "grad_norm": 0.0, "learning_rate": 6.775638731839977e-06, "loss": 0.0, "step": 45080 }, { "epoch": 0.32269376654977455, "grad_norm": 0.0, "learning_rate": 6.774923065912833e-06, "loss": 0.0, "step": 45090 }, { "epoch": 0.32276533314248906, "grad_norm": 7.838716342689622e-10, "learning_rate": 6.774207399985687e-06, "loss": 0.0, "step": 45100 }, { "epoch": 0.32283689973520363, "grad_norm": 0.0, "learning_rate": 6.773491734058541e-06, "loss": 0.0, "step": 45110 }, { "epoch": 0.32290846632791814, "grad_norm": 0.0, "learning_rate": 6.772776068131397e-06, "loss": 0.0, "step": 45120 }, { "epoch": 0.32298003292063265, "grad_norm": 0.0, "learning_rate": 6.772060402204251e-06, "loss": 0.0, "step": 45130 }, { "epoch": 0.32305159951334717, "grad_norm": 0.0, "learning_rate": 6.771344736277106e-06, "loss": 0.0, "step": 45140 }, { "epoch": 0.3231231661060617, "grad_norm": 0.0, "learning_rate": 6.770629070349961e-06, "loss": 0.0, "step": 45150 }, { "epoch": 0.3231947326987762, "grad_norm": 2.7521255105966702e-05, "learning_rate": 6.769913404422817e-06, "loss": 0.0, "step": 45160 }, { "epoch": 0.32326629929149076, "grad_norm": 0.0, "learning_rate": 6.7691977384956705e-06, "loss": 0.0, "step": 45170 }, { "epoch": 0.32333786588420527, "grad_norm": 0.0, "learning_rate": 6.768482072568525e-06, "loss": 0.0, "step": 45180 }, { "epoch": 0.3234094324769198, "grad_norm": 0.0, "learning_rate": 6.76776640664138e-06, "loss": 0.0, "step": 45190 }, { "epoch": 0.3234809990696343, "grad_norm": 0.0, "learning_rate": 6.767050740714235e-06, "loss": 0.0, "step": 45200 }, { "epoch": 0.3235525656623488, "grad_norm": 0.0, "learning_rate": 6.76633507478709e-06, "loss": 0.0, "step": 45210 }, { "epoch": 0.3236241322550633, "grad_norm": 0.0, "learning_rate": 6.7656194088599444e-06, "loss": 0.0, "step": 45220 }, { "epoch": 0.3236956988477779, "grad_norm": 6.23957066636649e-06, "learning_rate": 6.764903742932799e-06, "loss": 0.1483, "step": 45230 }, { "epoch": 0.3237672654404924, "grad_norm": 0.0, "learning_rate": 6.764188077005654e-06, "loss": 0.0, "step": 45240 }, { "epoch": 0.3238388320332069, "grad_norm": 0.0, "learning_rate": 6.763472411078509e-06, "loss": 0.0, "step": 45250 }, { "epoch": 0.3239103986259214, "grad_norm": 0.0, "learning_rate": 6.762756745151364e-06, "loss": 0.0, "step": 45260 }, { "epoch": 0.3239819652186359, "grad_norm": 0.0, "learning_rate": 6.762041079224218e-06, "loss": 0.0, "step": 45270 }, { "epoch": 0.32405353181135044, "grad_norm": 0.0, "learning_rate": 6.761325413297073e-06, "loss": 0.0, "step": 45280 }, { "epoch": 0.324125098404065, "grad_norm": 0.0, "learning_rate": 6.760609747369928e-06, "loss": 0.0001, "step": 45290 }, { "epoch": 0.3241966649967795, "grad_norm": 4.3483098124852404e-05, "learning_rate": 6.7598940814427825e-06, "loss": 0.0, "step": 45300 }, { "epoch": 0.324268231589494, "grad_norm": 0.0, "learning_rate": 6.759178415515638e-06, "loss": 0.0, "step": 45310 }, { "epoch": 0.32433979818220854, "grad_norm": 0.0, "learning_rate": 6.758462749588492e-06, "loss": 0.0, "step": 45320 }, { "epoch": 0.32441136477492305, "grad_norm": 5.439916321847704e-07, "learning_rate": 6.7577470836613476e-06, "loss": 0.0001, "step": 45330 }, { "epoch": 0.32448293136763756, "grad_norm": 0.0, "learning_rate": 6.757031417734202e-06, "loss": 0.0, "step": 45340 }, { "epoch": 0.32455449796035213, "grad_norm": 47.62110900878906, "learning_rate": 6.7563157518070565e-06, "loss": 0.0035, "step": 45350 }, { "epoch": 0.32462606455306664, "grad_norm": 0.0, "learning_rate": 6.755600085879912e-06, "loss": 0.0055, "step": 45360 }, { "epoch": 0.32469763114578115, "grad_norm": 0.0, "learning_rate": 6.754884419952766e-06, "loss": 0.0001, "step": 45370 }, { "epoch": 0.32476919773849566, "grad_norm": 6.453285550378496e-07, "learning_rate": 6.7541687540256215e-06, "loss": 0.0, "step": 45380 }, { "epoch": 0.3248407643312102, "grad_norm": 0.1532488763332367, "learning_rate": 6.753453088098476e-06, "loss": 0.0, "step": 45390 }, { "epoch": 0.3249123309239247, "grad_norm": 0.0, "learning_rate": 6.75273742217133e-06, "loss": 0.0006, "step": 45400 }, { "epoch": 0.32498389751663925, "grad_norm": 3.253328395658173e-05, "learning_rate": 6.752021756244186e-06, "loss": 0.0, "step": 45410 }, { "epoch": 0.32505546410935376, "grad_norm": 0.0, "learning_rate": 6.75130609031704e-06, "loss": 0.0, "step": 45420 }, { "epoch": 0.3251270307020683, "grad_norm": 0.0, "learning_rate": 6.750590424389895e-06, "loss": 0.0, "step": 45430 }, { "epoch": 0.3251985972947828, "grad_norm": 0.0, "learning_rate": 6.74987475846275e-06, "loss": 0.0, "step": 45440 }, { "epoch": 0.3252701638874973, "grad_norm": 0.0, "learning_rate": 6.749159092535605e-06, "loss": 0.0, "step": 45450 }, { "epoch": 0.3253417304802118, "grad_norm": 0.0, "learning_rate": 6.74844342660846e-06, "loss": 0.0, "step": 45460 }, { "epoch": 0.3254132970729264, "grad_norm": 0.0, "learning_rate": 6.747727760681314e-06, "loss": 0.0, "step": 45470 }, { "epoch": 0.3254848636656409, "grad_norm": 0.00029297627042979, "learning_rate": 6.747012094754169e-06, "loss": 0.0, "step": 45480 }, { "epoch": 0.3255564302583554, "grad_norm": 8.95357588888146e-05, "learning_rate": 6.746296428827024e-06, "loss": 0.0, "step": 45490 }, { "epoch": 0.3256279968510699, "grad_norm": 0.0, "learning_rate": 6.745580762899879e-06, "loss": 0.0003, "step": 45500 }, { "epoch": 0.3256995634437844, "grad_norm": 0.0, "learning_rate": 6.7448650969727335e-06, "loss": 0.0, "step": 45510 }, { "epoch": 0.32577113003649893, "grad_norm": 0.0, "learning_rate": 6.744149431045588e-06, "loss": 0.0, "step": 45520 }, { "epoch": 0.3258426966292135, "grad_norm": 0.0, "learning_rate": 6.743433765118443e-06, "loss": 0.0, "step": 45530 }, { "epoch": 0.325914263221928, "grad_norm": 0.0, "learning_rate": 6.742718099191298e-06, "loss": 0.0, "step": 45540 }, { "epoch": 0.3259858298146425, "grad_norm": 0.0, "learning_rate": 6.742002433264153e-06, "loss": 0.0, "step": 45550 }, { "epoch": 0.32605739640735704, "grad_norm": 0.0, "learning_rate": 6.741286767337007e-06, "loss": 0.0, "step": 45560 }, { "epoch": 0.32612896300007155, "grad_norm": 0.5721230506896973, "learning_rate": 6.740571101409863e-06, "loss": 0.8681, "step": 45570 }, { "epoch": 0.3262005295927861, "grad_norm": 0.0, "learning_rate": 6.739855435482717e-06, "loss": 0.0, "step": 45580 }, { "epoch": 0.3262720961855006, "grad_norm": 0.0, "learning_rate": 6.739139769555572e-06, "loss": 0.0, "step": 45590 }, { "epoch": 0.32634366277821514, "grad_norm": 0.0, "learning_rate": 6.738424103628427e-06, "loss": 0.0, "step": 45600 }, { "epoch": 0.32641522937092965, "grad_norm": 0.025041628628969193, "learning_rate": 6.737708437701281e-06, "loss": 0.0047, "step": 45610 }, { "epoch": 0.32648679596364416, "grad_norm": 0.0, "learning_rate": 6.736992771774137e-06, "loss": 0.0, "step": 45620 }, { "epoch": 0.3265583625563587, "grad_norm": 0.0, "learning_rate": 6.736277105846991e-06, "loss": 0.0, "step": 45630 }, { "epoch": 0.32662992914907324, "grad_norm": 0.0004125014820601791, "learning_rate": 6.7355614399198455e-06, "loss": 0.0, "step": 45640 }, { "epoch": 0.32670149574178775, "grad_norm": 0.0, "learning_rate": 6.734845773992701e-06, "loss": 0.0008, "step": 45650 }, { "epoch": 0.32677306233450226, "grad_norm": 0.0, "learning_rate": 6.734130108065555e-06, "loss": 0.0, "step": 45660 }, { "epoch": 0.3268446289272168, "grad_norm": 0.0, "learning_rate": 6.7334144421384105e-06, "loss": 0.0, "step": 45670 }, { "epoch": 0.3269161955199313, "grad_norm": 3.092770202783868e-05, "learning_rate": 6.732698776211265e-06, "loss": 0.018, "step": 45680 }, { "epoch": 0.3269877621126458, "grad_norm": 0.0, "learning_rate": 6.73198311028412e-06, "loss": 0.0, "step": 45690 }, { "epoch": 0.32705932870536036, "grad_norm": 0.0, "learning_rate": 6.731267444356975e-06, "loss": 0.0, "step": 45700 }, { "epoch": 0.3271308952980749, "grad_norm": 0.0, "learning_rate": 6.730551778429829e-06, "loss": 0.0, "step": 45710 }, { "epoch": 0.3272024618907894, "grad_norm": 0.0, "learning_rate": 6.7298361125026845e-06, "loss": 0.0, "step": 45720 }, { "epoch": 0.3272740284835039, "grad_norm": 0.0, "learning_rate": 6.729120446575539e-06, "loss": 0.0, "step": 45730 }, { "epoch": 0.3273455950762184, "grad_norm": 0.0, "learning_rate": 6.728404780648394e-06, "loss": 0.0, "step": 45740 }, { "epoch": 0.3274171616689329, "grad_norm": 0.0, "learning_rate": 6.727689114721249e-06, "loss": 0.0014, "step": 45750 }, { "epoch": 0.3274887282616475, "grad_norm": 0.0, "learning_rate": 6.726973448794103e-06, "loss": 0.0007, "step": 45760 }, { "epoch": 0.327560294854362, "grad_norm": 0.0, "learning_rate": 6.726257782866958e-06, "loss": 0.0, "step": 45770 }, { "epoch": 0.3276318614470765, "grad_norm": 0.0, "learning_rate": 6.725542116939813e-06, "loss": 0.0, "step": 45780 }, { "epoch": 0.327703428039791, "grad_norm": 0.0, "learning_rate": 6.724826451012668e-06, "loss": 0.0185, "step": 45790 }, { "epoch": 0.32777499463250553, "grad_norm": 0.0, "learning_rate": 6.7241107850855226e-06, "loss": 0.0, "step": 45800 }, { "epoch": 0.32784656122522005, "grad_norm": 1.348790448219006e-07, "learning_rate": 6.723395119158378e-06, "loss": 0.0, "step": 45810 }, { "epoch": 0.3279181278179346, "grad_norm": 0.0, "learning_rate": 6.722679453231232e-06, "loss": 0.0, "step": 45820 }, { "epoch": 0.3279896944106491, "grad_norm": 0.000535456114448607, "learning_rate": 6.721963787304087e-06, "loss": 0.0, "step": 45830 }, { "epoch": 0.32806126100336364, "grad_norm": 0.0036264171358197927, "learning_rate": 6.721248121376942e-06, "loss": 0.0, "step": 45840 }, { "epoch": 0.32813282759607815, "grad_norm": 0.0, "learning_rate": 6.7205324554497965e-06, "loss": 0.0, "step": 45850 }, { "epoch": 0.32820439418879266, "grad_norm": 0.0, "learning_rate": 6.719816789522652e-06, "loss": 0.0004, "step": 45860 }, { "epoch": 0.32827596078150717, "grad_norm": 0.0, "learning_rate": 6.719101123595506e-06, "loss": 0.012, "step": 45870 }, { "epoch": 0.32834752737422174, "grad_norm": 0.0, "learning_rate": 6.718385457668361e-06, "loss": 0.0012, "step": 45880 }, { "epoch": 0.32841909396693625, "grad_norm": 0.0, "learning_rate": 6.717669791741216e-06, "loss": 0.0, "step": 45890 }, { "epoch": 0.32849066055965076, "grad_norm": 0.00015513574180658907, "learning_rate": 6.71695412581407e-06, "loss": 0.0, "step": 45900 }, { "epoch": 0.32856222715236527, "grad_norm": 0.0, "learning_rate": 6.716238459886926e-06, "loss": 0.0, "step": 45910 }, { "epoch": 0.3286337937450798, "grad_norm": 0.0, "learning_rate": 6.71552279395978e-06, "loss": 0.0001, "step": 45920 }, { "epoch": 0.3287053603377943, "grad_norm": 2.9538227863668e-06, "learning_rate": 6.714807128032635e-06, "loss": 0.0, "step": 45930 }, { "epoch": 0.32877692693050886, "grad_norm": 0.0, "learning_rate": 6.71409146210549e-06, "loss": 0.0, "step": 45940 }, { "epoch": 0.3288484935232234, "grad_norm": 0.0, "learning_rate": 6.713375796178344e-06, "loss": 0.0, "step": 45950 }, { "epoch": 0.3289200601159379, "grad_norm": 0.0, "learning_rate": 6.7126601302512e-06, "loss": 0.0, "step": 45960 }, { "epoch": 0.3289916267086524, "grad_norm": 0.0, "learning_rate": 6.711944464324054e-06, "loss": 0.0, "step": 45970 }, { "epoch": 0.3290631933013669, "grad_norm": 0.0, "learning_rate": 6.711228798396909e-06, "loss": 0.0015, "step": 45980 }, { "epoch": 0.3291347598940814, "grad_norm": 8.549152698833495e-05, "learning_rate": 6.710513132469764e-06, "loss": 0.0, "step": 45990 }, { "epoch": 0.329206326486796, "grad_norm": 0.0, "learning_rate": 6.709797466542618e-06, "loss": 0.0, "step": 46000 }, { "epoch": 0.3292778930795105, "grad_norm": 1.7422285964130424e-05, "learning_rate": 6.7090818006154735e-06, "loss": 0.002, "step": 46010 }, { "epoch": 0.329349459672225, "grad_norm": 0.0, "learning_rate": 6.708366134688328e-06, "loss": 0.0, "step": 46020 }, { "epoch": 0.3294210262649395, "grad_norm": 334.66082763671875, "learning_rate": 6.707650468761183e-06, "loss": 0.1625, "step": 46030 }, { "epoch": 0.32949259285765403, "grad_norm": 0.0, "learning_rate": 6.706934802834038e-06, "loss": 0.0, "step": 46040 }, { "epoch": 0.32956415945036854, "grad_norm": 0.0, "learning_rate": 6.706219136906892e-06, "loss": 0.0, "step": 46050 }, { "epoch": 0.3296357260430831, "grad_norm": 0.0, "learning_rate": 6.7055034709797474e-06, "loss": 0.0, "step": 46060 }, { "epoch": 0.3297072926357976, "grad_norm": 0.0, "learning_rate": 6.704787805052602e-06, "loss": 0.0, "step": 46070 }, { "epoch": 0.32977885922851213, "grad_norm": 0.0, "learning_rate": 6.704072139125457e-06, "loss": 0.0, "step": 46080 }, { "epoch": 0.32985042582122664, "grad_norm": 0.0, "learning_rate": 6.703356473198312e-06, "loss": 0.0, "step": 46090 }, { "epoch": 0.32992199241394116, "grad_norm": 0.0, "learning_rate": 6.702640807271167e-06, "loss": 0.0, "step": 46100 }, { "epoch": 0.32999355900665567, "grad_norm": 0.0, "learning_rate": 6.701925141344021e-06, "loss": 0.0, "step": 46110 }, { "epoch": 0.33006512559937023, "grad_norm": 0.0, "learning_rate": 6.701209475416876e-06, "loss": 0.0, "step": 46120 }, { "epoch": 0.33013669219208475, "grad_norm": 0.00013665917504113168, "learning_rate": 6.700493809489731e-06, "loss": 0.0, "step": 46130 }, { "epoch": 0.33020825878479926, "grad_norm": 0.0, "learning_rate": 6.6997781435625855e-06, "loss": 0.0, "step": 46140 }, { "epoch": 0.33027982537751377, "grad_norm": 0.0, "learning_rate": 6.699062477635441e-06, "loss": 0.0, "step": 46150 }, { "epoch": 0.3303513919702283, "grad_norm": 0.0, "learning_rate": 6.698346811708295e-06, "loss": 0.0011, "step": 46160 }, { "epoch": 0.3304229585629428, "grad_norm": 0.0, "learning_rate": 6.69763114578115e-06, "loss": 0.0, "step": 46170 }, { "epoch": 0.33049452515565736, "grad_norm": 3.1816532611846924, "learning_rate": 6.696915479854005e-06, "loss": 0.0004, "step": 46180 }, { "epoch": 0.33056609174837187, "grad_norm": 0.0, "learning_rate": 6.6961998139268595e-06, "loss": 0.0, "step": 46190 }, { "epoch": 0.3306376583410864, "grad_norm": 0.0, "learning_rate": 6.695484147999715e-06, "loss": 0.0, "step": 46200 }, { "epoch": 0.3307092249338009, "grad_norm": 0.0, "learning_rate": 6.694768482072569e-06, "loss": 0.0, "step": 46210 }, { "epoch": 0.3307807915265154, "grad_norm": 0.0, "learning_rate": 6.6940528161454245e-06, "loss": 0.0, "step": 46220 }, { "epoch": 0.3308523581192299, "grad_norm": 0.0, "learning_rate": 6.693337150218279e-06, "loss": 0.0, "step": 46230 }, { "epoch": 0.3309239247119445, "grad_norm": 0.0, "learning_rate": 6.692621484291133e-06, "loss": 0.0, "step": 46240 }, { "epoch": 0.330995491304659, "grad_norm": 0.0, "learning_rate": 6.691905818363989e-06, "loss": 0.0, "step": 46250 }, { "epoch": 0.3310670578973735, "grad_norm": 2.885686159133911, "learning_rate": 6.691190152436843e-06, "loss": 0.0006, "step": 46260 }, { "epoch": 0.331138624490088, "grad_norm": 0.0, "learning_rate": 6.690474486509698e-06, "loss": 0.0027, "step": 46270 }, { "epoch": 0.33121019108280253, "grad_norm": 0.0, "learning_rate": 6.689758820582553e-06, "loss": 0.0, "step": 46280 }, { "epoch": 0.33128175767551704, "grad_norm": 2.5198139042004186e-07, "learning_rate": 6.689043154655407e-06, "loss": 0.0, "step": 46290 }, { "epoch": 0.3313533242682316, "grad_norm": 0.0, "learning_rate": 6.688327488728263e-06, "loss": 0.0, "step": 46300 }, { "epoch": 0.3314248908609461, "grad_norm": 0.0, "learning_rate": 6.687611822801117e-06, "loss": 0.0, "step": 46310 }, { "epoch": 0.33149645745366063, "grad_norm": 1.6377588174520952e-09, "learning_rate": 6.686896156873972e-06, "loss": 0.0004, "step": 46320 }, { "epoch": 0.33156802404637514, "grad_norm": 0.0, "learning_rate": 6.686180490946827e-06, "loss": 0.0, "step": 46330 }, { "epoch": 0.33163959063908965, "grad_norm": 0.0, "learning_rate": 6.685464825019682e-06, "loss": 0.0, "step": 46340 }, { "epoch": 0.3317111572318042, "grad_norm": 0.0, "learning_rate": 6.6847491590925365e-06, "loss": 0.0, "step": 46350 }, { "epoch": 0.33178272382451873, "grad_norm": 0.0, "learning_rate": 6.684033493165391e-06, "loss": 0.0, "step": 46360 }, { "epoch": 0.33185429041723324, "grad_norm": 0.0, "learning_rate": 6.683317827238246e-06, "loss": 0.2666, "step": 46370 }, { "epoch": 0.33192585700994776, "grad_norm": 9.697772475192323e-05, "learning_rate": 6.682602161311101e-06, "loss": 0.001, "step": 46380 }, { "epoch": 0.33199742360266227, "grad_norm": 8.67354410516441e-10, "learning_rate": 6.681886495383956e-06, "loss": 0.0, "step": 46390 }, { "epoch": 0.3320689901953768, "grad_norm": 0.0, "learning_rate": 6.68117082945681e-06, "loss": 0.0, "step": 46400 }, { "epoch": 0.33214055678809135, "grad_norm": 0.0, "learning_rate": 6.680455163529664e-06, "loss": 0.0, "step": 46410 }, { "epoch": 0.33221212338080586, "grad_norm": 0.0, "learning_rate": 6.67973949760252e-06, "loss": 0.0007, "step": 46420 }, { "epoch": 0.33228368997352037, "grad_norm": 27.495389938354492, "learning_rate": 6.679023831675374e-06, "loss": 0.003, "step": 46430 }, { "epoch": 0.3323552565662349, "grad_norm": 0.0, "learning_rate": 6.67830816574823e-06, "loss": 0.0, "step": 46440 }, { "epoch": 0.3324268231589494, "grad_norm": 0.0, "learning_rate": 6.677592499821084e-06, "loss": 0.0003, "step": 46450 }, { "epoch": 0.3324983897516639, "grad_norm": 0.0, "learning_rate": 6.67687683389394e-06, "loss": 0.0, "step": 46460 }, { "epoch": 0.33256995634437847, "grad_norm": 0.0, "learning_rate": 6.676161167966794e-06, "loss": 0.0733, "step": 46470 }, { "epoch": 0.332641522937093, "grad_norm": 0.0, "learning_rate": 6.675445502039648e-06, "loss": 0.0, "step": 46480 }, { "epoch": 0.3327130895298075, "grad_norm": 0.0, "learning_rate": 6.674729836112504e-06, "loss": 0.0003, "step": 46490 }, { "epoch": 0.332784656122522, "grad_norm": 0.0, "learning_rate": 6.674014170185357e-06, "loss": 0.0001, "step": 46500 }, { "epoch": 0.3328562227152365, "grad_norm": 6.10848758242355e-08, "learning_rate": 6.6732985042582135e-06, "loss": 0.0, "step": 46510 }, { "epoch": 0.332927789307951, "grad_norm": 0.0, "learning_rate": 6.672582838331068e-06, "loss": 0.0, "step": 46520 }, { "epoch": 0.3329993559006656, "grad_norm": 4.225364023113798e-07, "learning_rate": 6.671867172403922e-06, "loss": 0.0, "step": 46530 }, { "epoch": 0.3330709224933801, "grad_norm": 0.0, "learning_rate": 6.671151506476778e-06, "loss": 0.0, "step": 46540 }, { "epoch": 0.3331424890860946, "grad_norm": 0.0, "learning_rate": 6.670435840549631e-06, "loss": 0.0, "step": 46550 }, { "epoch": 0.33321405567880913, "grad_norm": 0.0, "learning_rate": 6.6697201746224875e-06, "loss": 0.0, "step": 46560 }, { "epoch": 0.33328562227152364, "grad_norm": 0.0, "learning_rate": 6.669004508695341e-06, "loss": 0.0235, "step": 46570 }, { "epoch": 0.33335718886423815, "grad_norm": 0.0, "learning_rate": 6.668288842768197e-06, "loss": 0.7406, "step": 46580 }, { "epoch": 0.3334287554569527, "grad_norm": 0.0, "learning_rate": 6.667573176841051e-06, "loss": 0.0, "step": 46590 }, { "epoch": 0.33350032204966723, "grad_norm": 0.0, "learning_rate": 6.666857510913905e-06, "loss": 0.0, "step": 46600 }, { "epoch": 0.33357188864238174, "grad_norm": 0.0, "learning_rate": 6.666141844986761e-06, "loss": 0.0, "step": 46610 }, { "epoch": 0.33364345523509625, "grad_norm": 0.0, "learning_rate": 6.665426179059615e-06, "loss": 0.1075, "step": 46620 }, { "epoch": 0.33371502182781076, "grad_norm": 0.0, "learning_rate": 6.664710513132471e-06, "loss": 0.0, "step": 46630 }, { "epoch": 0.3337865884205253, "grad_norm": 0.0, "learning_rate": 6.663994847205325e-06, "loss": 0.0002, "step": 46640 }, { "epoch": 0.33385815501323984, "grad_norm": 0.0, "learning_rate": 6.663279181278179e-06, "loss": 0.0, "step": 46650 }, { "epoch": 0.33392972160595435, "grad_norm": 0.0, "learning_rate": 6.6625635153510344e-06, "loss": 0.0, "step": 46660 }, { "epoch": 0.33400128819866887, "grad_norm": 0.0, "learning_rate": 6.661847849423889e-06, "loss": 0.0, "step": 46670 }, { "epoch": 0.3340728547913834, "grad_norm": 0.0, "learning_rate": 6.661132183496745e-06, "loss": 0.0, "step": 46680 }, { "epoch": 0.3341444213840979, "grad_norm": 0.0, "learning_rate": 6.660416517569599e-06, "loss": 0.0, "step": 46690 }, { "epoch": 0.3342159879768124, "grad_norm": 0.0, "learning_rate": 6.659700851642455e-06, "loss": 0.0, "step": 46700 }, { "epoch": 0.33428755456952697, "grad_norm": 0.0, "learning_rate": 6.658985185715308e-06, "loss": 0.0, "step": 46710 }, { "epoch": 0.3343591211622415, "grad_norm": 0.0, "learning_rate": 6.658269519788163e-06, "loss": 0.0, "step": 46720 }, { "epoch": 0.334430687754956, "grad_norm": 0.0, "learning_rate": 6.657553853861018e-06, "loss": 0.0, "step": 46730 }, { "epoch": 0.3345022543476705, "grad_norm": 0.0, "learning_rate": 6.6568381879338725e-06, "loss": 0.0, "step": 46740 }, { "epoch": 0.334573820940385, "grad_norm": 0.0, "learning_rate": 6.656122522006729e-06, "loss": 0.0001, "step": 46750 }, { "epoch": 0.3346453875330995, "grad_norm": 0.0, "learning_rate": 6.655406856079582e-06, "loss": 0.0, "step": 46760 }, { "epoch": 0.3347169541258141, "grad_norm": 0.0, "learning_rate": 6.654691190152437e-06, "loss": 0.0137, "step": 46770 }, { "epoch": 0.3347885207185286, "grad_norm": 0.0, "learning_rate": 6.653975524225292e-06, "loss": 0.0, "step": 46780 }, { "epoch": 0.3348600873112431, "grad_norm": 2.1478135749930516e-05, "learning_rate": 6.6532598582981465e-06, "loss": 0.0, "step": 46790 }, { "epoch": 0.3349316539039576, "grad_norm": 4.334976821951386e-10, "learning_rate": 6.652544192371002e-06, "loss": 0.0, "step": 46800 }, { "epoch": 0.33500322049667214, "grad_norm": 0.0, "learning_rate": 6.651828526443856e-06, "loss": 0.0, "step": 46810 }, { "epoch": 0.33507478708938665, "grad_norm": 0.18271666765213013, "learning_rate": 6.651112860516711e-06, "loss": 0.0, "step": 46820 }, { "epoch": 0.3351463536821012, "grad_norm": 0.0, "learning_rate": 6.650397194589566e-06, "loss": 0.0, "step": 46830 }, { "epoch": 0.33521792027481573, "grad_norm": 0.0, "learning_rate": 6.64968152866242e-06, "loss": 0.0, "step": 46840 }, { "epoch": 0.33528948686753024, "grad_norm": 4.125526587017703e-08, "learning_rate": 6.648965862735276e-06, "loss": 0.0011, "step": 46850 }, { "epoch": 0.33536105346024475, "grad_norm": 0.0, "learning_rate": 6.64825019680813e-06, "loss": 0.1753, "step": 46860 }, { "epoch": 0.33543262005295926, "grad_norm": 0.00017729608225636184, "learning_rate": 6.647534530880985e-06, "loss": 0.0, "step": 46870 }, { "epoch": 0.3355041866456738, "grad_norm": 0.0, "learning_rate": 6.64681886495384e-06, "loss": 0.0001, "step": 46880 }, { "epoch": 0.33557575323838834, "grad_norm": 0.0, "learning_rate": 6.646103199026694e-06, "loss": 0.0025, "step": 46890 }, { "epoch": 0.33564731983110285, "grad_norm": 0.0, "learning_rate": 6.64538753309955e-06, "loss": 0.0004, "step": 46900 }, { "epoch": 0.33571888642381736, "grad_norm": 0.0, "learning_rate": 6.644671867172404e-06, "loss": 0.0, "step": 46910 }, { "epoch": 0.3357904530165319, "grad_norm": 1.9513545036315918, "learning_rate": 6.643956201245259e-06, "loss": 0.0003, "step": 46920 }, { "epoch": 0.3358620196092464, "grad_norm": 0.0, "learning_rate": 6.643240535318114e-06, "loss": 0.0036, "step": 46930 }, { "epoch": 0.3359335862019609, "grad_norm": 0.0, "learning_rate": 6.642524869390968e-06, "loss": 0.0, "step": 46940 }, { "epoch": 0.33600515279467547, "grad_norm": 0.0, "learning_rate": 6.6418092034638235e-06, "loss": 0.0, "step": 46950 }, { "epoch": 0.33607671938739, "grad_norm": 0.0, "learning_rate": 6.641093537536678e-06, "loss": 0.0, "step": 46960 }, { "epoch": 0.3361482859801045, "grad_norm": 4.8558195686609906e-08, "learning_rate": 6.640377871609533e-06, "loss": 0.0, "step": 46970 }, { "epoch": 0.336219852572819, "grad_norm": 0.0, "learning_rate": 6.639662205682388e-06, "loss": 0.0, "step": 46980 }, { "epoch": 0.3362914191655335, "grad_norm": 1.9235933734762511e-07, "learning_rate": 6.638946539755243e-06, "loss": 0.0, "step": 46990 }, { "epoch": 0.336362985758248, "grad_norm": 0.0, "learning_rate": 6.638230873828097e-06, "loss": 0.0, "step": 47000 }, { "epoch": 0.3364345523509626, "grad_norm": 0.00042553848470561206, "learning_rate": 6.637515207900952e-06, "loss": 0.0, "step": 47010 }, { "epoch": 0.3365061189436771, "grad_norm": 0.0, "learning_rate": 6.636799541973807e-06, "loss": 0.0, "step": 47020 }, { "epoch": 0.3365776855363916, "grad_norm": 4.5524237157756886e-10, "learning_rate": 6.636083876046662e-06, "loss": 0.0, "step": 47030 }, { "epoch": 0.3366492521291061, "grad_norm": 0.0, "learning_rate": 6.635368210119517e-06, "loss": 0.0026, "step": 47040 }, { "epoch": 0.33672081872182064, "grad_norm": 0.0, "learning_rate": 6.634652544192371e-06, "loss": 0.0, "step": 47050 }, { "epoch": 0.33679238531453515, "grad_norm": 4.9014583964890335e-06, "learning_rate": 6.633936878265226e-06, "loss": 0.0, "step": 47060 }, { "epoch": 0.3368639519072497, "grad_norm": 0.0, "learning_rate": 6.633221212338081e-06, "loss": 0.0, "step": 47070 }, { "epoch": 0.3369355184999642, "grad_norm": 0.0, "learning_rate": 6.6325055464109355e-06, "loss": 0.0, "step": 47080 }, { "epoch": 0.33700708509267874, "grad_norm": 0.0, "learning_rate": 6.631789880483791e-06, "loss": 0.0001, "step": 47090 }, { "epoch": 0.33707865168539325, "grad_norm": 0.0, "learning_rate": 6.631074214556645e-06, "loss": 0.0, "step": 47100 }, { "epoch": 0.33715021827810776, "grad_norm": 0.0, "learning_rate": 6.6303585486295005e-06, "loss": 0.0, "step": 47110 }, { "epoch": 0.3372217848708223, "grad_norm": 0.0, "learning_rate": 6.629642882702355e-06, "loss": 0.0, "step": 47120 }, { "epoch": 0.33729335146353684, "grad_norm": 0.0, "learning_rate": 6.6289272167752094e-06, "loss": 0.0, "step": 47130 }, { "epoch": 0.33736491805625135, "grad_norm": 0.0, "learning_rate": 6.628211550848065e-06, "loss": 0.0, "step": 47140 }, { "epoch": 0.33743648464896586, "grad_norm": 0.0, "learning_rate": 6.627495884920919e-06, "loss": 0.0, "step": 47150 }, { "epoch": 0.3375080512416804, "grad_norm": 0.0, "learning_rate": 6.6267802189937745e-06, "loss": 0.0065, "step": 47160 }, { "epoch": 0.3375796178343949, "grad_norm": 0.0, "learning_rate": 6.626064553066629e-06, "loss": 1.8609, "step": 47170 }, { "epoch": 0.33765118442710945, "grad_norm": 0.0, "learning_rate": 6.625348887139483e-06, "loss": 0.0, "step": 47180 }, { "epoch": 0.33772275101982396, "grad_norm": 0.0, "learning_rate": 6.624633221212339e-06, "loss": 0.0, "step": 47190 }, { "epoch": 0.3377943176125385, "grad_norm": 0.0, "learning_rate": 6.623917555285193e-06, "loss": 0.0, "step": 47200 }, { "epoch": 0.337865884205253, "grad_norm": 0.0, "learning_rate": 6.623201889358048e-06, "loss": 0.0092, "step": 47210 }, { "epoch": 0.3379374507979675, "grad_norm": 0.0, "learning_rate": 6.622486223430903e-06, "loss": 0.0001, "step": 47220 }, { "epoch": 0.338009017390682, "grad_norm": 0.0, "learning_rate": 6.621770557503758e-06, "loss": 0.0, "step": 47230 }, { "epoch": 0.3380805839833966, "grad_norm": 0.0, "learning_rate": 6.6210548915766126e-06, "loss": 0.0, "step": 47240 }, { "epoch": 0.3381521505761111, "grad_norm": 0.0, "learning_rate": 6.620339225649467e-06, "loss": 0.1158, "step": 47250 }, { "epoch": 0.3382237171688256, "grad_norm": 682.61279296875, "learning_rate": 6.619623559722322e-06, "loss": 0.1395, "step": 47260 }, { "epoch": 0.3382952837615401, "grad_norm": 0.0, "learning_rate": 6.618907893795177e-06, "loss": 0.0001, "step": 47270 }, { "epoch": 0.3383668503542546, "grad_norm": 0.0, "learning_rate": 6.618192227868032e-06, "loss": 0.0, "step": 47280 }, { "epoch": 0.33843841694696913, "grad_norm": 3.169129968227935e-06, "learning_rate": 6.6174765619408865e-06, "loss": 0.0, "step": 47290 }, { "epoch": 0.3385099835396837, "grad_norm": 4.2800300548151426e-07, "learning_rate": 6.616760896013741e-06, "loss": 0.0, "step": 47300 }, { "epoch": 0.3385815501323982, "grad_norm": 0.0, "learning_rate": 6.616045230086596e-06, "loss": 0.0, "step": 47310 }, { "epoch": 0.3386531167251127, "grad_norm": 0.0, "learning_rate": 6.615329564159451e-06, "loss": 0.0, "step": 47320 }, { "epoch": 0.33872468331782724, "grad_norm": 0.0, "learning_rate": 6.614613898232306e-06, "loss": 0.0, "step": 47330 }, { "epoch": 0.33879624991054175, "grad_norm": 0.0, "learning_rate": 6.61389823230516e-06, "loss": 0.0, "step": 47340 }, { "epoch": 0.33886781650325626, "grad_norm": 0.0, "learning_rate": 6.613182566378016e-06, "loss": 0.0003, "step": 47350 }, { "epoch": 0.3389393830959708, "grad_norm": 0.0, "learning_rate": 6.61246690045087e-06, "loss": 0.0, "step": 47360 }, { "epoch": 0.33901094968868534, "grad_norm": 0.0, "learning_rate": 6.611751234523725e-06, "loss": 0.0, "step": 47370 }, { "epoch": 0.33908251628139985, "grad_norm": 0.0, "learning_rate": 6.61103556859658e-06, "loss": 0.0038, "step": 47380 }, { "epoch": 0.33915408287411436, "grad_norm": 0.0, "learning_rate": 6.610319902669434e-06, "loss": 0.5382, "step": 47390 }, { "epoch": 0.33922564946682887, "grad_norm": 0.0, "learning_rate": 6.60960423674229e-06, "loss": 0.0, "step": 47400 }, { "epoch": 0.3392972160595434, "grad_norm": 1.934120597013589e-08, "learning_rate": 6.608888570815144e-06, "loss": 0.0663, "step": 47410 }, { "epoch": 0.33936878265225795, "grad_norm": 0.0, "learning_rate": 6.6081729048879985e-06, "loss": 0.0, "step": 47420 }, { "epoch": 0.33944034924497246, "grad_norm": 5.340770048434251e-10, "learning_rate": 6.607457238960854e-06, "loss": 0.0, "step": 47430 }, { "epoch": 0.339511915837687, "grad_norm": 0.0, "learning_rate": 6.606741573033708e-06, "loss": 0.0, "step": 47440 }, { "epoch": 0.3395834824304015, "grad_norm": 0.001198985381051898, "learning_rate": 6.6060259071065635e-06, "loss": 0.0001, "step": 47450 }, { "epoch": 0.339655049023116, "grad_norm": 14.999014854431152, "learning_rate": 6.605310241179418e-06, "loss": 0.0034, "step": 47460 }, { "epoch": 0.3397266156158305, "grad_norm": 0.0, "learning_rate": 6.604594575252273e-06, "loss": 0.0, "step": 47470 }, { "epoch": 0.3397981822085451, "grad_norm": 0.0, "learning_rate": 6.603878909325128e-06, "loss": 0.0002, "step": 47480 }, { "epoch": 0.3398697488012596, "grad_norm": 5.825281164106855e-07, "learning_rate": 6.603163243397982e-06, "loss": 0.0, "step": 47490 }, { "epoch": 0.3399413153939741, "grad_norm": 0.0, "learning_rate": 6.6024475774708374e-06, "loss": 0.0, "step": 47500 }, { "epoch": 0.3400128819866886, "grad_norm": 5.146715276183045e-10, "learning_rate": 6.601731911543692e-06, "loss": 0.0, "step": 47510 }, { "epoch": 0.3400844485794031, "grad_norm": 1.5096174138307106e-05, "learning_rate": 6.601016245616547e-06, "loss": 0.0, "step": 47520 }, { "epoch": 0.34015601517211763, "grad_norm": 0.0, "learning_rate": 6.600300579689402e-06, "loss": 0.0, "step": 47530 }, { "epoch": 0.3402275817648322, "grad_norm": 0.0, "learning_rate": 6.599584913762256e-06, "loss": 0.0, "step": 47540 }, { "epoch": 0.3402991483575467, "grad_norm": 0.0, "learning_rate": 6.598869247835111e-06, "loss": 0.0, "step": 47550 }, { "epoch": 0.3403707149502612, "grad_norm": 4.1801351269299403e-10, "learning_rate": 6.598153581907966e-06, "loss": 0.0, "step": 47560 }, { "epoch": 0.34044228154297573, "grad_norm": 0.0, "learning_rate": 6.597437915980821e-06, "loss": 0.0, "step": 47570 }, { "epoch": 0.34051384813569024, "grad_norm": 0.0, "learning_rate": 6.5967222500536755e-06, "loss": 0.0, "step": 47580 }, { "epoch": 0.34058541472840476, "grad_norm": 8.630109959995025e-10, "learning_rate": 6.59600658412653e-06, "loss": 0.0, "step": 47590 }, { "epoch": 0.3406569813211193, "grad_norm": 0.0, "learning_rate": 6.595290918199385e-06, "loss": 0.0, "step": 47600 }, { "epoch": 0.34072854791383383, "grad_norm": 0.0, "learning_rate": 6.59457525227224e-06, "loss": 0.0, "step": 47610 }, { "epoch": 0.34080011450654835, "grad_norm": 0.0, "learning_rate": 6.593859586345095e-06, "loss": 0.0, "step": 47620 }, { "epoch": 0.34087168109926286, "grad_norm": 0.0, "learning_rate": 6.5931439204179495e-06, "loss": 0.0, "step": 47630 }, { "epoch": 0.34094324769197737, "grad_norm": 2.7827594450968718e-08, "learning_rate": 6.592428254490805e-06, "loss": 0.0, "step": 47640 }, { "epoch": 0.3410148142846919, "grad_norm": 0.0, "learning_rate": 6.591712588563659e-06, "loss": 0.0183, "step": 47650 }, { "epoch": 0.34108638087740645, "grad_norm": 0.0, "learning_rate": 6.590996922636514e-06, "loss": 0.0002, "step": 47660 }, { "epoch": 0.34115794747012096, "grad_norm": 0.0, "learning_rate": 6.590281256709369e-06, "loss": 0.0, "step": 47670 }, { "epoch": 0.34122951406283547, "grad_norm": 0.0, "learning_rate": 6.589565590782223e-06, "loss": 0.0, "step": 47680 }, { "epoch": 0.34130108065555, "grad_norm": 0.0, "learning_rate": 6.588849924855079e-06, "loss": 0.0003, "step": 47690 }, { "epoch": 0.3413726472482645, "grad_norm": 0.028949685394763947, "learning_rate": 6.588134258927933e-06, "loss": 0.0, "step": 47700 }, { "epoch": 0.341444213840979, "grad_norm": 0.0, "learning_rate": 6.5874185930007876e-06, "loss": 0.165, "step": 47710 }, { "epoch": 0.34151578043369357, "grad_norm": 0.0, "learning_rate": 6.586702927073643e-06, "loss": 0.0, "step": 47720 }, { "epoch": 0.3415873470264081, "grad_norm": 3.38466918492486e-07, "learning_rate": 6.585987261146497e-06, "loss": 0.0, "step": 47730 }, { "epoch": 0.3416589136191226, "grad_norm": 0.0, "learning_rate": 6.585271595219353e-06, "loss": 0.0, "step": 47740 }, { "epoch": 0.3417304802118371, "grad_norm": 0.0, "learning_rate": 6.584555929292207e-06, "loss": 0.0, "step": 47750 }, { "epoch": 0.3418020468045516, "grad_norm": 0.0, "learning_rate": 6.583840263365062e-06, "loss": 0.0, "step": 47760 }, { "epoch": 0.34187361339726613, "grad_norm": 0.0, "learning_rate": 6.583124597437917e-06, "loss": 0.0, "step": 47770 }, { "epoch": 0.3419451799899807, "grad_norm": 0.00011598036508075893, "learning_rate": 6.582408931510771e-06, "loss": 0.0, "step": 47780 }, { "epoch": 0.3420167465826952, "grad_norm": 697.0700073242188, "learning_rate": 6.5816932655836265e-06, "loss": 0.1746, "step": 47790 }, { "epoch": 0.3420883131754097, "grad_norm": 0.0, "learning_rate": 6.580977599656481e-06, "loss": 0.3131, "step": 47800 }, { "epoch": 0.34215987976812423, "grad_norm": 0.0, "learning_rate": 6.580261933729336e-06, "loss": 0.0, "step": 47810 }, { "epoch": 0.34223144636083874, "grad_norm": 0.0, "learning_rate": 6.579546267802191e-06, "loss": 0.0001, "step": 47820 }, { "epoch": 0.34230301295355325, "grad_norm": 0.0, "learning_rate": 6.578830601875045e-06, "loss": 0.0018, "step": 47830 }, { "epoch": 0.3423745795462678, "grad_norm": 0.0, "learning_rate": 6.5781149359479e-06, "loss": 0.0, "step": 47840 }, { "epoch": 0.34244614613898233, "grad_norm": 0.0, "learning_rate": 6.577399270020755e-06, "loss": 0.0, "step": 47850 }, { "epoch": 0.34251771273169684, "grad_norm": 0.0, "learning_rate": 6.57668360409361e-06, "loss": 0.0, "step": 47860 }, { "epoch": 0.34258927932441136, "grad_norm": 0.0, "learning_rate": 6.575967938166465e-06, "loss": 0.0022, "step": 47870 }, { "epoch": 0.34266084591712587, "grad_norm": 2.796934127807617, "learning_rate": 6.57525227223932e-06, "loss": 0.0002, "step": 47880 }, { "epoch": 0.34273241250984043, "grad_norm": 0.0, "learning_rate": 6.574536606312174e-06, "loss": 0.0, "step": 47890 }, { "epoch": 0.34280397910255495, "grad_norm": 7.161511383912966e-09, "learning_rate": 6.573820940385029e-06, "loss": 0.0, "step": 47900 }, { "epoch": 0.34287554569526946, "grad_norm": 9.046065008178061e-10, "learning_rate": 6.573105274457884e-06, "loss": 0.0, "step": 47910 }, { "epoch": 0.34294711228798397, "grad_norm": 0.0, "learning_rate": 6.5723896085307385e-06, "loss": 0.0, "step": 47920 }, { "epoch": 0.3430186788806985, "grad_norm": 0.0, "learning_rate": 6.571673942603594e-06, "loss": 0.0, "step": 47930 }, { "epoch": 0.343090245473413, "grad_norm": 0.0002633284602779895, "learning_rate": 6.570958276676448e-06, "loss": 0.0, "step": 47940 }, { "epoch": 0.34316181206612756, "grad_norm": 0.00020533709903247654, "learning_rate": 6.570242610749302e-06, "loss": 0.1406, "step": 47950 }, { "epoch": 0.34323337865884207, "grad_norm": 0.0, "learning_rate": 6.569526944822158e-06, "loss": 0.0, "step": 47960 }, { "epoch": 0.3433049452515566, "grad_norm": 0.04289399832487106, "learning_rate": 6.5688112788950124e-06, "loss": 0.0022, "step": 47970 }, { "epoch": 0.3433765118442711, "grad_norm": 89.14783477783203, "learning_rate": 6.568095612967868e-06, "loss": 0.0196, "step": 47980 }, { "epoch": 0.3434480784369856, "grad_norm": 0.0, "learning_rate": 6.567379947040722e-06, "loss": 0.0, "step": 47990 }, { "epoch": 0.3435196450297001, "grad_norm": 7.118992016330594e-06, "learning_rate": 6.5666642811135775e-06, "loss": 0.0633, "step": 48000 }, { "epoch": 0.3435912116224147, "grad_norm": 0.0019269491313025355, "learning_rate": 6.565948615186432e-06, "loss": 0.0, "step": 48010 }, { "epoch": 0.3436627782151292, "grad_norm": 2.8189484169161005e-07, "learning_rate": 6.5652329492592855e-06, "loss": 0.0, "step": 48020 }, { "epoch": 0.3437343448078437, "grad_norm": 0.00010755359835457057, "learning_rate": 6.564517283332142e-06, "loss": 0.0, "step": 48030 }, { "epoch": 0.3438059114005582, "grad_norm": 0.0, "learning_rate": 6.563801617404995e-06, "loss": 0.0, "step": 48040 }, { "epoch": 0.34387747799327273, "grad_norm": 0.0, "learning_rate": 6.563085951477851e-06, "loss": 0.0, "step": 48050 }, { "epoch": 0.34394904458598724, "grad_norm": 0.0034708091989159584, "learning_rate": 6.562370285550706e-06, "loss": 0.0, "step": 48060 }, { "epoch": 0.3440206111787018, "grad_norm": 0.0, "learning_rate": 6.561654619623559e-06, "loss": 0.0001, "step": 48070 }, { "epoch": 0.3440921777714163, "grad_norm": 1.8320116534908948e-09, "learning_rate": 6.5609389536964156e-06, "loss": 0.0, "step": 48080 }, { "epoch": 0.34416374436413083, "grad_norm": 0.0, "learning_rate": 6.560223287769269e-06, "loss": 0.0, "step": 48090 }, { "epoch": 0.34423531095684534, "grad_norm": 0.0, "learning_rate": 6.559507621842125e-06, "loss": 0.0, "step": 48100 }, { "epoch": 0.34430687754955985, "grad_norm": 0.0, "learning_rate": 6.558791955914979e-06, "loss": 0.0, "step": 48110 }, { "epoch": 0.34437844414227436, "grad_norm": 0.0, "learning_rate": 6.558076289987835e-06, "loss": 0.0, "step": 48120 }, { "epoch": 0.34445001073498893, "grad_norm": 0.0, "learning_rate": 6.5573606240606895e-06, "loss": 0.0, "step": 48130 }, { "epoch": 0.34452157732770344, "grad_norm": 0.0, "learning_rate": 6.556644958133543e-06, "loss": 0.804, "step": 48140 }, { "epoch": 0.34459314392041795, "grad_norm": 0.0, "learning_rate": 6.555929292206399e-06, "loss": 0.0079, "step": 48150 }, { "epoch": 0.34466471051313247, "grad_norm": 2.3796660900115967, "learning_rate": 6.555213626279253e-06, "loss": 0.0004, "step": 48160 }, { "epoch": 0.344736277105847, "grad_norm": 9.704560943646356e-05, "learning_rate": 6.554497960352109e-06, "loss": 0.0, "step": 48170 }, { "epoch": 0.3448078436985615, "grad_norm": 0.0, "learning_rate": 6.5537822944249625e-06, "loss": 0.0, "step": 48180 }, { "epoch": 0.34487941029127606, "grad_norm": 0.0, "learning_rate": 6.553066628497817e-06, "loss": 0.0, "step": 48190 }, { "epoch": 0.34495097688399057, "grad_norm": 0.0014638678403571248, "learning_rate": 6.552350962570673e-06, "loss": 0.0004, "step": 48200 }, { "epoch": 0.3450225434767051, "grad_norm": 1.2412629985192325e-05, "learning_rate": 6.551635296643527e-06, "loss": 0.0, "step": 48210 }, { "epoch": 0.3450941100694196, "grad_norm": 0.0, "learning_rate": 6.550919630716383e-06, "loss": 0.0017, "step": 48220 }, { "epoch": 0.3451656766621341, "grad_norm": 0.0, "learning_rate": 6.5502039647892365e-06, "loss": 0.0, "step": 48230 }, { "epoch": 0.3452372432548486, "grad_norm": 0.0, "learning_rate": 6.549488298862093e-06, "loss": 0.0, "step": 48240 }, { "epoch": 0.3453088098475632, "grad_norm": 0.0, "learning_rate": 6.548772632934946e-06, "loss": 0.0, "step": 48250 }, { "epoch": 0.3453803764402777, "grad_norm": 0.0, "learning_rate": 6.548056967007801e-06, "loss": 0.0, "step": 48260 }, { "epoch": 0.3454519430329922, "grad_norm": 0.0, "learning_rate": 6.547341301080656e-06, "loss": 0.0004, "step": 48270 }, { "epoch": 0.3455235096257067, "grad_norm": 0.0, "learning_rate": 6.54662563515351e-06, "loss": 0.0002, "step": 48280 }, { "epoch": 0.3455950762184212, "grad_norm": 0.0, "learning_rate": 6.5459099692263665e-06, "loss": 0.0, "step": 48290 }, { "epoch": 0.34566664281113574, "grad_norm": 0.0, "learning_rate": 6.54519430329922e-06, "loss": 0.0, "step": 48300 }, { "epoch": 0.3457382094038503, "grad_norm": 0.0, "learning_rate": 6.5444786373720746e-06, "loss": 0.0, "step": 48310 }, { "epoch": 0.3458097759965648, "grad_norm": 0.0, "learning_rate": 6.54376297144493e-06, "loss": 0.0, "step": 48320 }, { "epoch": 0.34588134258927933, "grad_norm": 0.001595481182448566, "learning_rate": 6.543047305517784e-06, "loss": 0.0, "step": 48330 }, { "epoch": 0.34595290918199384, "grad_norm": 0.004977014381438494, "learning_rate": 6.54233163959064e-06, "loss": 0.0, "step": 48340 }, { "epoch": 0.34602447577470835, "grad_norm": 0.26746857166290283, "learning_rate": 6.541615973663494e-06, "loss": 0.0, "step": 48350 }, { "epoch": 0.34609604236742286, "grad_norm": 0.007114137522876263, "learning_rate": 6.5409003077363485e-06, "loss": 0.0, "step": 48360 }, { "epoch": 0.34616760896013743, "grad_norm": 0.0, "learning_rate": 6.540184641809204e-06, "loss": 0.0007, "step": 48370 }, { "epoch": 0.34623917555285194, "grad_norm": 0.43658214807510376, "learning_rate": 6.539468975882058e-06, "loss": 0.0001, "step": 48380 }, { "epoch": 0.34631074214556645, "grad_norm": 0.0, "learning_rate": 6.5387533099549135e-06, "loss": 0.0, "step": 48390 }, { "epoch": 0.34638230873828096, "grad_norm": 0.0, "learning_rate": 6.538037644027768e-06, "loss": 0.0, "step": 48400 }, { "epoch": 0.3464538753309955, "grad_norm": 0.0, "learning_rate": 6.537321978100623e-06, "loss": 0.0, "step": 48410 }, { "epoch": 0.34652544192371, "grad_norm": 0.0, "learning_rate": 6.536606312173478e-06, "loss": 0.0, "step": 48420 }, { "epoch": 0.34659700851642455, "grad_norm": 0.0, "learning_rate": 6.535890646246332e-06, "loss": 0.0263, "step": 48430 }, { "epoch": 0.34666857510913907, "grad_norm": 0.0, "learning_rate": 6.5351749803191874e-06, "loss": 0.0, "step": 48440 }, { "epoch": 0.3467401417018536, "grad_norm": 0.0, "learning_rate": 6.534459314392042e-06, "loss": 0.0, "step": 48450 }, { "epoch": 0.3468117082945681, "grad_norm": 1.7773070792870271e-09, "learning_rate": 6.533743648464897e-06, "loss": 0.0001, "step": 48460 }, { "epoch": 0.3468832748872826, "grad_norm": 0.0, "learning_rate": 6.533027982537752e-06, "loss": 0.0013, "step": 48470 }, { "epoch": 0.3469548414799971, "grad_norm": 0.0, "learning_rate": 6.532312316610606e-06, "loss": 0.0, "step": 48480 }, { "epoch": 0.3470264080727117, "grad_norm": 0.0, "learning_rate": 6.531596650683461e-06, "loss": 0.0, "step": 48490 }, { "epoch": 0.3470979746654262, "grad_norm": 0.0, "learning_rate": 6.530880984756316e-06, "loss": 0.0, "step": 48500 }, { "epoch": 0.3471695412581407, "grad_norm": 0.0, "learning_rate": 6.530165318829171e-06, "loss": 0.0, "step": 48510 }, { "epoch": 0.3472411078508552, "grad_norm": 0.0, "learning_rate": 6.5294496529020255e-06, "loss": 0.0, "step": 48520 }, { "epoch": 0.3473126744435697, "grad_norm": 7.610831858073652e-07, "learning_rate": 6.528733986974881e-06, "loss": 0.0, "step": 48530 }, { "epoch": 0.34738424103628424, "grad_norm": 0.0, "learning_rate": 6.528018321047735e-06, "loss": 0.0003, "step": 48540 }, { "epoch": 0.3474558076289988, "grad_norm": 3.6162969081487972e-06, "learning_rate": 6.52730265512059e-06, "loss": 0.0002, "step": 48550 }, { "epoch": 0.3475273742217133, "grad_norm": 0.001623978023417294, "learning_rate": 6.526586989193445e-06, "loss": 0.4582, "step": 48560 }, { "epoch": 0.3475989408144278, "grad_norm": 0.12028279900550842, "learning_rate": 6.5258713232662994e-06, "loss": 0.0012, "step": 48570 }, { "epoch": 0.34767050740714234, "grad_norm": 0.0, "learning_rate": 6.525155657339155e-06, "loss": 0.0, "step": 48580 }, { "epoch": 0.34774207399985685, "grad_norm": 0.0, "learning_rate": 6.524439991412009e-06, "loss": 0.0, "step": 48590 }, { "epoch": 0.34781364059257136, "grad_norm": 0.0, "learning_rate": 6.523724325484864e-06, "loss": 0.0, "step": 48600 }, { "epoch": 0.3478852071852859, "grad_norm": 0.0, "learning_rate": 6.523008659557719e-06, "loss": 0.0196, "step": 48610 }, { "epoch": 0.34795677377800044, "grad_norm": 0.0, "learning_rate": 6.522292993630573e-06, "loss": 0.0, "step": 48620 }, { "epoch": 0.34802834037071495, "grad_norm": 0.0, "learning_rate": 6.521577327703429e-06, "loss": 0.0161, "step": 48630 }, { "epoch": 0.34809990696342946, "grad_norm": 0.0, "learning_rate": 6.520861661776283e-06, "loss": 0.0, "step": 48640 }, { "epoch": 0.348171473556144, "grad_norm": 0.0, "learning_rate": 6.520145995849138e-06, "loss": 0.0, "step": 48650 }, { "epoch": 0.34824304014885854, "grad_norm": 0.0, "learning_rate": 6.519430329921993e-06, "loss": 0.0, "step": 48660 }, { "epoch": 0.34831460674157305, "grad_norm": 0.0, "learning_rate": 6.518714663994847e-06, "loss": 0.0, "step": 48670 }, { "epoch": 0.34838617333428756, "grad_norm": 0.0, "learning_rate": 6.5179989980677026e-06, "loss": 0.0, "step": 48680 }, { "epoch": 0.3484577399270021, "grad_norm": 0.0, "learning_rate": 6.517283332140557e-06, "loss": 0.0, "step": 48690 }, { "epoch": 0.3485293065197166, "grad_norm": 2.619555905525317e-09, "learning_rate": 6.516567666213412e-06, "loss": 0.0, "step": 48700 }, { "epoch": 0.3486008731124311, "grad_norm": 0.0, "learning_rate": 6.515852000286267e-06, "loss": 0.0, "step": 48710 }, { "epoch": 0.34867243970514566, "grad_norm": 0.0, "learning_rate": 6.515136334359121e-06, "loss": 0.0, "step": 48720 }, { "epoch": 0.3487440062978602, "grad_norm": 0.0, "learning_rate": 6.5144206684319765e-06, "loss": 0.0, "step": 48730 }, { "epoch": 0.3488155728905747, "grad_norm": 0.0, "learning_rate": 6.513705002504831e-06, "loss": 0.0, "step": 48740 }, { "epoch": 0.3488871394832892, "grad_norm": 0.00041563838021829724, "learning_rate": 6.512989336577686e-06, "loss": 0.0, "step": 48750 }, { "epoch": 0.3489587060760037, "grad_norm": 0.0, "learning_rate": 6.512273670650541e-06, "loss": 0.0, "step": 48760 }, { "epoch": 0.3490302726687182, "grad_norm": 0.0, "learning_rate": 6.511558004723396e-06, "loss": 0.0007, "step": 48770 }, { "epoch": 0.3491018392614328, "grad_norm": 0.0, "learning_rate": 6.51084233879625e-06, "loss": 0.0, "step": 48780 }, { "epoch": 0.3491734058541473, "grad_norm": 0.03355172276496887, "learning_rate": 6.510126672869105e-06, "loss": 0.0, "step": 48790 }, { "epoch": 0.3492449724468618, "grad_norm": 0.0, "learning_rate": 6.50941100694196e-06, "loss": 0.0, "step": 48800 }, { "epoch": 0.3493165390395763, "grad_norm": 0.0, "learning_rate": 6.508695341014815e-06, "loss": 0.0, "step": 48810 }, { "epoch": 0.34938810563229084, "grad_norm": 1.9476013335406606e-07, "learning_rate": 6.50797967508767e-06, "loss": 0.0, "step": 48820 }, { "epoch": 0.34945967222500535, "grad_norm": 0.0, "learning_rate": 6.507264009160524e-06, "loss": 0.0, "step": 48830 }, { "epoch": 0.3495312388177199, "grad_norm": 0.0, "learning_rate": 6.506548343233379e-06, "loss": 0.0, "step": 48840 }, { "epoch": 0.3496028054104344, "grad_norm": 0.0, "learning_rate": 6.505832677306234e-06, "loss": 0.0001, "step": 48850 }, { "epoch": 0.34967437200314894, "grad_norm": 0.0, "learning_rate": 6.505188577971804e-06, "loss": 1.0013, "step": 48860 }, { "epoch": 0.34974593859586345, "grad_norm": 0.0, "learning_rate": 6.504472912044658e-06, "loss": 0.0, "step": 48870 }, { "epoch": 0.34981750518857796, "grad_norm": 5.2782603177092824e-08, "learning_rate": 6.503757246117513e-06, "loss": 0.0, "step": 48880 }, { "epoch": 0.34988907178129247, "grad_norm": 0.0, "learning_rate": 6.503041580190368e-06, "loss": 0.0001, "step": 48890 }, { "epoch": 0.34996063837400704, "grad_norm": 1.6133281377506137e-08, "learning_rate": 6.5023259142632224e-06, "loss": 0.0213, "step": 48900 }, { "epoch": 0.35003220496672155, "grad_norm": 0.0, "learning_rate": 6.501610248336078e-06, "loss": 0.0, "step": 48910 }, { "epoch": 0.35010377155943606, "grad_norm": 0.0, "learning_rate": 6.500894582408932e-06, "loss": 0.0, "step": 48920 }, { "epoch": 0.3501753381521506, "grad_norm": 0.0, "learning_rate": 6.500178916481787e-06, "loss": 0.0, "step": 48930 }, { "epoch": 0.3502469047448651, "grad_norm": 0.0, "learning_rate": 6.499463250554642e-06, "loss": 0.0, "step": 48940 }, { "epoch": 0.3503184713375796, "grad_norm": 0.0, "learning_rate": 6.498747584627496e-06, "loss": 0.0, "step": 48950 }, { "epoch": 0.35039003793029416, "grad_norm": 0.0, "learning_rate": 6.498031918700352e-06, "loss": 0.0, "step": 48960 }, { "epoch": 0.3504616045230087, "grad_norm": 0.0, "learning_rate": 6.497316252773206e-06, "loss": 0.0, "step": 48970 }, { "epoch": 0.3505331711157232, "grad_norm": 0.14222192764282227, "learning_rate": 6.496600586846061e-06, "loss": 0.0492, "step": 48980 }, { "epoch": 0.3506047377084377, "grad_norm": 0.0, "learning_rate": 6.495884920918916e-06, "loss": 0.0036, "step": 48990 }, { "epoch": 0.3506763043011522, "grad_norm": 0.0, "learning_rate": 6.49516925499177e-06, "loss": 0.0, "step": 49000 }, { "epoch": 0.3507478708938667, "grad_norm": 0.0, "learning_rate": 6.4944535890646256e-06, "loss": 0.0, "step": 49010 }, { "epoch": 0.3508194374865813, "grad_norm": 0.0, "learning_rate": 6.49373792313748e-06, "loss": 0.0, "step": 49020 }, { "epoch": 0.3508910040792958, "grad_norm": 3.3726621495588915e-08, "learning_rate": 6.493022257210335e-06, "loss": 0.0, "step": 49030 }, { "epoch": 0.3509625706720103, "grad_norm": 0.0019275303930044174, "learning_rate": 6.49230659128319e-06, "loss": 0.0, "step": 49040 }, { "epoch": 0.3510341372647248, "grad_norm": 0.0, "learning_rate": 6.491590925356044e-06, "loss": 0.0026, "step": 49050 }, { "epoch": 0.35110570385743933, "grad_norm": 0.0, "learning_rate": 6.4908752594288995e-06, "loss": 0.0, "step": 49060 }, { "epoch": 0.35117727045015384, "grad_norm": 0.0, "learning_rate": 6.490159593501754e-06, "loss": 0.2783, "step": 49070 }, { "epoch": 0.3512488370428684, "grad_norm": 0.0, "learning_rate": 6.489443927574609e-06, "loss": 0.0, "step": 49080 }, { "epoch": 0.3513204036355829, "grad_norm": 7.005058932918473e-08, "learning_rate": 6.488728261647464e-06, "loss": 0.0, "step": 49090 }, { "epoch": 0.35139197022829743, "grad_norm": 0.0017364476807415485, "learning_rate": 6.488012595720319e-06, "loss": 0.0, "step": 49100 }, { "epoch": 0.35146353682101195, "grad_norm": 1.3627525277115637e-06, "learning_rate": 6.487296929793173e-06, "loss": 0.045, "step": 49110 }, { "epoch": 0.35153510341372646, "grad_norm": 0.0, "learning_rate": 6.486581263866028e-06, "loss": 0.0603, "step": 49120 }, { "epoch": 0.35160667000644097, "grad_norm": 0.0, "learning_rate": 6.485865597938883e-06, "loss": 0.0115, "step": 49130 }, { "epoch": 0.35167823659915554, "grad_norm": 0.0, "learning_rate": 6.485149932011738e-06, "loss": 0.0001, "step": 49140 }, { "epoch": 0.35174980319187005, "grad_norm": 0.002496599918231368, "learning_rate": 6.484434266084593e-06, "loss": 0.0, "step": 49150 }, { "epoch": 0.35182136978458456, "grad_norm": 0.0, "learning_rate": 6.483718600157447e-06, "loss": 0.0326, "step": 49160 }, { "epoch": 0.35189293637729907, "grad_norm": 0.0, "learning_rate": 6.483002934230302e-06, "loss": 0.0, "step": 49170 }, { "epoch": 0.3519645029700136, "grad_norm": 0.0, "learning_rate": 6.482287268303157e-06, "loss": 0.0, "step": 49180 }, { "epoch": 0.3520360695627281, "grad_norm": 0.0, "learning_rate": 6.4815716023760115e-06, "loss": 0.0, "step": 49190 }, { "epoch": 0.35210763615544266, "grad_norm": 0.0, "learning_rate": 6.480855936448867e-06, "loss": 0.0, "step": 49200 }, { "epoch": 0.35217920274815717, "grad_norm": 0.0, "learning_rate": 6.480140270521721e-06, "loss": 0.0, "step": 49210 }, { "epoch": 0.3522507693408717, "grad_norm": 0.0, "learning_rate": 6.479424604594575e-06, "loss": 0.0, "step": 49220 }, { "epoch": 0.3523223359335862, "grad_norm": 0.0, "learning_rate": 6.478708938667431e-06, "loss": 0.0014, "step": 49230 }, { "epoch": 0.3523939025263007, "grad_norm": 0.2576812505722046, "learning_rate": 6.477993272740285e-06, "loss": 0.0001, "step": 49240 }, { "epoch": 0.3524654691190152, "grad_norm": 2.565786019204097e-07, "learning_rate": 6.477277606813141e-06, "loss": 0.0259, "step": 49250 }, { "epoch": 0.3525370357117298, "grad_norm": 0.0, "learning_rate": 6.476561940885995e-06, "loss": 0.0074, "step": 49260 }, { "epoch": 0.3526086023044443, "grad_norm": 0.0, "learning_rate": 6.4758462749588504e-06, "loss": 0.0, "step": 49270 }, { "epoch": 0.3526801688971588, "grad_norm": 0.0, "learning_rate": 6.475130609031705e-06, "loss": 0.0259, "step": 49280 }, { "epoch": 0.3527517354898733, "grad_norm": 0.008081638254225254, "learning_rate": 6.4744149431045585e-06, "loss": 0.0036, "step": 49290 }, { "epoch": 0.35282330208258783, "grad_norm": 0.0, "learning_rate": 6.473699277177415e-06, "loss": 0.0002, "step": 49300 }, { "epoch": 0.35289486867530234, "grad_norm": 0.0, "learning_rate": 6.472983611250268e-06, "loss": 0.0, "step": 49310 }, { "epoch": 0.3529664352680169, "grad_norm": 2.559859275817871, "learning_rate": 6.472267945323124e-06, "loss": 0.0004, "step": 49320 }, { "epoch": 0.3530380018607314, "grad_norm": 0.0, "learning_rate": 6.471552279395979e-06, "loss": 0.3221, "step": 49330 }, { "epoch": 0.35310956845344593, "grad_norm": 0.0, "learning_rate": 6.470836613468832e-06, "loss": 0.0, "step": 49340 }, { "epoch": 0.35318113504616044, "grad_norm": 0.0, "learning_rate": 6.4701209475416885e-06, "loss": 0.0001, "step": 49350 }, { "epoch": 0.35325270163887496, "grad_norm": 5.4484100341796875, "learning_rate": 6.469405281614542e-06, "loss": 0.0692, "step": 49360 }, { "epoch": 0.35332426823158947, "grad_norm": 0.0, "learning_rate": 6.468689615687398e-06, "loss": 0.0, "step": 49370 }, { "epoch": 0.35339583482430403, "grad_norm": 0.0, "learning_rate": 6.467973949760252e-06, "loss": 0.0, "step": 49380 }, { "epoch": 0.35346740141701855, "grad_norm": 0.0, "learning_rate": 6.467258283833108e-06, "loss": 0.0, "step": 49390 }, { "epoch": 0.35353896800973306, "grad_norm": 0.0006761021795682609, "learning_rate": 6.4665426179059625e-06, "loss": 0.0, "step": 49400 }, { "epoch": 0.35361053460244757, "grad_norm": 0.0, "learning_rate": 6.465826951978816e-06, "loss": 0.0, "step": 49410 }, { "epoch": 0.3536821011951621, "grad_norm": 0.0, "learning_rate": 6.465111286051672e-06, "loss": 0.0, "step": 49420 }, { "epoch": 0.35375366778787665, "grad_norm": 0.0, "learning_rate": 6.464395620124526e-06, "loss": 0.0, "step": 49430 }, { "epoch": 0.35382523438059116, "grad_norm": 0.0, "learning_rate": 6.463679954197382e-06, "loss": 0.0, "step": 49440 }, { "epoch": 0.35389680097330567, "grad_norm": 0.0, "learning_rate": 6.4629642882702355e-06, "loss": 0.0003, "step": 49450 }, { "epoch": 0.3539683675660202, "grad_norm": 2.436284557916224e-05, "learning_rate": 6.46224862234309e-06, "loss": 0.0003, "step": 49460 }, { "epoch": 0.3540399341587347, "grad_norm": 0.0, "learning_rate": 6.461532956415945e-06, "loss": 0.0, "step": 49470 }, { "epoch": 0.3541115007514492, "grad_norm": 0.0, "learning_rate": 6.4608172904888e-06, "loss": 0.0097, "step": 49480 }, { "epoch": 0.35418306734416377, "grad_norm": 0.0, "learning_rate": 6.460101624561656e-06, "loss": 0.0, "step": 49490 }, { "epoch": 0.3542546339368783, "grad_norm": 0.0013569046277552843, "learning_rate": 6.4593859586345095e-06, "loss": 0.0003, "step": 49500 }, { "epoch": 0.3543262005295928, "grad_norm": 0.0, "learning_rate": 6.458670292707366e-06, "loss": 0.0003, "step": 49510 }, { "epoch": 0.3543977671223073, "grad_norm": 0.0, "learning_rate": 6.457954626780219e-06, "loss": 0.0007, "step": 49520 }, { "epoch": 0.3544693337150218, "grad_norm": 0.0, "learning_rate": 6.457238960853074e-06, "loss": 0.0, "step": 49530 }, { "epoch": 0.35454090030773633, "grad_norm": 0.0, "learning_rate": 6.456523294925929e-06, "loss": 0.0, "step": 49540 }, { "epoch": 0.3546124669004509, "grad_norm": 0.0, "learning_rate": 6.455807628998783e-06, "loss": 0.0001, "step": 49550 }, { "epoch": 0.3546840334931654, "grad_norm": 0.0, "learning_rate": 6.4550919630716395e-06, "loss": 0.0001, "step": 49560 }, { "epoch": 0.3547556000858799, "grad_norm": 0.0, "learning_rate": 6.454376297144493e-06, "loss": 0.0, "step": 49570 }, { "epoch": 0.35482716667859443, "grad_norm": 3.319082679809071e-06, "learning_rate": 6.4536606312173475e-06, "loss": 0.0, "step": 49580 }, { "epoch": 0.35489873327130894, "grad_norm": 0.0, "learning_rate": 6.452944965290203e-06, "loss": 0.0, "step": 49590 }, { "epoch": 0.35497029986402345, "grad_norm": 0.0, "learning_rate": 6.452229299363057e-06, "loss": 0.0, "step": 49600 }, { "epoch": 0.355041866456738, "grad_norm": 0.0, "learning_rate": 6.451513633435913e-06, "loss": 0.0, "step": 49610 }, { "epoch": 0.35511343304945253, "grad_norm": 0.0, "learning_rate": 6.450797967508767e-06, "loss": 0.3965, "step": 49620 }, { "epoch": 0.35518499964216704, "grad_norm": 0.0, "learning_rate": 6.450082301581623e-06, "loss": 0.0, "step": 49630 }, { "epoch": 0.35525656623488155, "grad_norm": 0.0, "learning_rate": 6.449366635654477e-06, "loss": 0.0, "step": 49640 }, { "epoch": 0.35532813282759607, "grad_norm": 1.0066500522043498e-07, "learning_rate": 6.448650969727331e-06, "loss": 0.0004, "step": 49650 }, { "epoch": 0.3553996994203106, "grad_norm": 0.0, "learning_rate": 6.4479353038001865e-06, "loss": 0.0, "step": 49660 }, { "epoch": 0.35547126601302514, "grad_norm": 0.0, "learning_rate": 6.447219637873041e-06, "loss": 0.0106, "step": 49670 }, { "epoch": 0.35554283260573966, "grad_norm": 0.0, "learning_rate": 6.446503971945896e-06, "loss": 0.21, "step": 49680 }, { "epoch": 0.35561439919845417, "grad_norm": 0.0, "learning_rate": 6.445788306018751e-06, "loss": 0.0, "step": 49690 }, { "epoch": 0.3556859657911687, "grad_norm": 0.0, "learning_rate": 6.445072640091605e-06, "loss": 0.0, "step": 49700 }, { "epoch": 0.3557575323838832, "grad_norm": 0.0, "learning_rate": 6.44435697416446e-06, "loss": 0.0, "step": 49710 }, { "epoch": 0.3558290989765977, "grad_norm": 0.0, "learning_rate": 6.443641308237315e-06, "loss": 0.0029, "step": 49720 }, { "epoch": 0.35590066556931227, "grad_norm": 0.0, "learning_rate": 6.44292564231017e-06, "loss": 0.0037, "step": 49730 }, { "epoch": 0.3559722321620268, "grad_norm": 0.0, "learning_rate": 6.442209976383025e-06, "loss": 0.0, "step": 49740 }, { "epoch": 0.3560437987547413, "grad_norm": 0.0, "learning_rate": 6.44149431045588e-06, "loss": 0.0, "step": 49750 }, { "epoch": 0.3561153653474558, "grad_norm": 0.0006813131039962173, "learning_rate": 6.440778644528734e-06, "loss": 0.0, "step": 49760 }, { "epoch": 0.3561869319401703, "grad_norm": 0.0, "learning_rate": 6.440062978601589e-06, "loss": 0.0004, "step": 49770 }, { "epoch": 0.3562584985328848, "grad_norm": 6.272060090850573e-06, "learning_rate": 6.439347312674444e-06, "loss": 0.0004, "step": 49780 }, { "epoch": 0.3563300651255994, "grad_norm": 0.0, "learning_rate": 6.4386316467472985e-06, "loss": 0.0, "step": 49790 }, { "epoch": 0.3564016317183139, "grad_norm": 0.0, "learning_rate": 6.437915980820154e-06, "loss": 0.0, "step": 49800 }, { "epoch": 0.3564731983110284, "grad_norm": 0.0, "learning_rate": 6.437200314893008e-06, "loss": 0.0, "step": 49810 }, { "epoch": 0.35654476490374293, "grad_norm": 0.0, "learning_rate": 6.436484648965863e-06, "loss": 0.0001, "step": 49820 }, { "epoch": 0.35661633149645744, "grad_norm": 0.00012680364307016134, "learning_rate": 6.435768983038718e-06, "loss": 0.0, "step": 49830 }, { "epoch": 0.35668789808917195, "grad_norm": 0.0, "learning_rate": 6.4350533171115724e-06, "loss": 0.0, "step": 49840 }, { "epoch": 0.3567594646818865, "grad_norm": 0.0, "learning_rate": 6.434337651184428e-06, "loss": 0.0002, "step": 49850 }, { "epoch": 0.35683103127460103, "grad_norm": 0.00226172199472785, "learning_rate": 6.433621985257282e-06, "loss": 0.0, "step": 49860 }, { "epoch": 0.35690259786731554, "grad_norm": 0.00013490965648088604, "learning_rate": 6.4329063193301375e-06, "loss": 0.0, "step": 49870 }, { "epoch": 0.35697416446003005, "grad_norm": 0.0, "learning_rate": 6.432190653402992e-06, "loss": 0.0, "step": 49880 }, { "epoch": 0.35704573105274456, "grad_norm": 0.0, "learning_rate": 6.431474987475846e-06, "loss": 0.0, "step": 49890 }, { "epoch": 0.3571172976454591, "grad_norm": 0.0, "learning_rate": 6.430759321548702e-06, "loss": 0.3221, "step": 49900 }, { "epoch": 0.35718886423817364, "grad_norm": 0.0, "learning_rate": 6.430043655621556e-06, "loss": 0.3996, "step": 49910 }, { "epoch": 0.35726043083088815, "grad_norm": 0.0, "learning_rate": 6.429327989694411e-06, "loss": 0.0, "step": 49920 }, { "epoch": 0.35733199742360267, "grad_norm": 0.0, "learning_rate": 6.428612323767266e-06, "loss": 0.0, "step": 49930 }, { "epoch": 0.3574035640163172, "grad_norm": 0.0, "learning_rate": 6.42789665784012e-06, "loss": 0.0, "step": 49940 }, { "epoch": 0.3574751306090317, "grad_norm": 0.0, "learning_rate": 6.4271809919129756e-06, "loss": 0.0, "step": 49950 }, { "epoch": 0.3575466972017462, "grad_norm": 0.0, "learning_rate": 6.42646532598583e-06, "loss": 0.0, "step": 49960 }, { "epoch": 0.35761826379446077, "grad_norm": 0.0, "learning_rate": 6.425749660058685e-06, "loss": 0.0, "step": 49970 }, { "epoch": 0.3576898303871753, "grad_norm": 0.0, "learning_rate": 6.42503399413154e-06, "loss": 0.0005, "step": 49980 }, { "epoch": 0.3577613969798898, "grad_norm": 0.0, "learning_rate": 6.424318328204394e-06, "loss": 0.0, "step": 49990 }, { "epoch": 0.3578329635726043, "grad_norm": 0.0, "learning_rate": 6.4236026622772495e-06, "loss": 0.0468, "step": 50000 }, { "epoch": 0.3579045301653188, "grad_norm": 0.0, "learning_rate": 6.422886996350104e-06, "loss": 0.0005, "step": 50010 }, { "epoch": 0.3579760967580333, "grad_norm": 0.0, "learning_rate": 6.422171330422959e-06, "loss": 0.702, "step": 50020 }, { "epoch": 0.3580476633507479, "grad_norm": 0.0, "learning_rate": 6.421455664495814e-06, "loss": 0.0001, "step": 50030 }, { "epoch": 0.3581192299434624, "grad_norm": 0.0, "learning_rate": 6.420739998568669e-06, "loss": 0.0, "step": 50040 }, { "epoch": 0.3581907965361769, "grad_norm": 0.0, "learning_rate": 6.420024332641523e-06, "loss": 0.0, "step": 50050 }, { "epoch": 0.3582623631288914, "grad_norm": 0.0, "learning_rate": 6.419308666714378e-06, "loss": 0.1999, "step": 50060 }, { "epoch": 0.35833392972160594, "grad_norm": 0.0, "learning_rate": 6.418593000787233e-06, "loss": 0.0018, "step": 50070 }, { "epoch": 0.35840549631432045, "grad_norm": 0.00029687603819184005, "learning_rate": 6.4178773348600876e-06, "loss": 0.0, "step": 50080 }, { "epoch": 0.358477062907035, "grad_norm": 0.0, "learning_rate": 6.417161668932943e-06, "loss": 0.0, "step": 50090 }, { "epoch": 0.3585486294997495, "grad_norm": 0.0, "learning_rate": 6.416446003005797e-06, "loss": 0.0, "step": 50100 }, { "epoch": 0.35862019609246404, "grad_norm": 0.0, "learning_rate": 6.415730337078652e-06, "loss": 0.0, "step": 50110 }, { "epoch": 0.35869176268517855, "grad_norm": 0.0, "learning_rate": 6.415014671151507e-06, "loss": 0.0, "step": 50120 }, { "epoch": 0.35876332927789306, "grad_norm": 0.0, "learning_rate": 6.4142990052243615e-06, "loss": 0.0007, "step": 50130 }, { "epoch": 0.3588348958706076, "grad_norm": 0.0, "learning_rate": 6.413583339297217e-06, "loss": 0.0, "step": 50140 }, { "epoch": 0.35890646246332214, "grad_norm": 0.0, "learning_rate": 6.412867673370071e-06, "loss": 0.0, "step": 50150 }, { "epoch": 0.35897802905603665, "grad_norm": 0.0, "learning_rate": 6.4121520074429265e-06, "loss": 0.0, "step": 50160 }, { "epoch": 0.35904959564875116, "grad_norm": 0.0, "learning_rate": 6.411436341515781e-06, "loss": 0.0, "step": 50170 }, { "epoch": 0.3591211622414657, "grad_norm": 0.0, "learning_rate": 6.410720675588635e-06, "loss": 0.0, "step": 50180 }, { "epoch": 0.3591927288341802, "grad_norm": 0.0, "learning_rate": 6.410005009661491e-06, "loss": 0.0004, "step": 50190 }, { "epoch": 0.35926429542689475, "grad_norm": 0.0, "learning_rate": 6.409289343734345e-06, "loss": 0.0, "step": 50200 }, { "epoch": 0.35933586201960926, "grad_norm": 4.518419154919684e-05, "learning_rate": 6.4085736778072004e-06, "loss": 0.0, "step": 50210 }, { "epoch": 0.3594074286123238, "grad_norm": 0.0, "learning_rate": 6.407858011880055e-06, "loss": 0.0, "step": 50220 }, { "epoch": 0.3594789952050383, "grad_norm": 0.0, "learning_rate": 6.407142345952909e-06, "loss": 0.0009, "step": 50230 }, { "epoch": 0.3595505617977528, "grad_norm": 0.0, "learning_rate": 6.406426680025765e-06, "loss": 0.0, "step": 50240 }, { "epoch": 0.3596221283904673, "grad_norm": 4.967680524714524e-06, "learning_rate": 6.405711014098619e-06, "loss": 0.0, "step": 50250 }, { "epoch": 0.3596936949831819, "grad_norm": 0.0, "learning_rate": 6.404995348171474e-06, "loss": 0.029, "step": 50260 }, { "epoch": 0.3597652615758964, "grad_norm": 0.0, "learning_rate": 6.404279682244329e-06, "loss": 0.0, "step": 50270 }, { "epoch": 0.3598368281686109, "grad_norm": 0.0, "learning_rate": 6.403564016317184e-06, "loss": 0.2709, "step": 50280 }, { "epoch": 0.3599083947613254, "grad_norm": 0.0007142903050407767, "learning_rate": 6.4028483503900385e-06, "loss": 0.0, "step": 50290 }, { "epoch": 0.3599799613540399, "grad_norm": 0.0, "learning_rate": 6.402132684462893e-06, "loss": 0.0, "step": 50300 }, { "epoch": 0.36005152794675443, "grad_norm": 2.3201151634566486e-05, "learning_rate": 6.401417018535748e-06, "loss": 0.0029, "step": 50310 }, { "epoch": 0.360123094539469, "grad_norm": 0.0, "learning_rate": 6.400701352608603e-06, "loss": 0.0, "step": 50320 }, { "epoch": 0.3601946611321835, "grad_norm": 0.0, "learning_rate": 6.399985686681458e-06, "loss": 0.0, "step": 50330 }, { "epoch": 0.360266227724898, "grad_norm": 0.0, "learning_rate": 6.3992700207543124e-06, "loss": 0.0, "step": 50340 }, { "epoch": 0.36033779431761254, "grad_norm": 0.0, "learning_rate": 6.398554354827167e-06, "loss": 0.0, "step": 50350 }, { "epoch": 0.36040936091032705, "grad_norm": 4.2927618437715864e-07, "learning_rate": 6.397838688900022e-06, "loss": 0.0, "step": 50360 }, { "epoch": 0.36048092750304156, "grad_norm": 0.0, "learning_rate": 6.397123022972877e-06, "loss": 0.0, "step": 50370 }, { "epoch": 0.3605524940957561, "grad_norm": 0.0, "learning_rate": 6.396478923638446e-06, "loss": 0.9172, "step": 50380 }, { "epoch": 0.36062406068847064, "grad_norm": 9.41231537154863e-10, "learning_rate": 6.395763257711301e-06, "loss": 0.0, "step": 50390 }, { "epoch": 0.36069562728118515, "grad_norm": 0.011773820035159588, "learning_rate": 6.395047591784156e-06, "loss": 0.0052, "step": 50400 }, { "epoch": 0.36076719387389966, "grad_norm": 0.0, "learning_rate": 6.3943319258570106e-06, "loss": 0.0, "step": 50410 }, { "epoch": 0.3608387604666142, "grad_norm": 0.0, "learning_rate": 6.393616259929866e-06, "loss": 0.0, "step": 50420 }, { "epoch": 0.3609103270593287, "grad_norm": 0.0, "learning_rate": 6.39290059400272e-06, "loss": 0.0002, "step": 50430 }, { "epoch": 0.36098189365204325, "grad_norm": 0.0, "learning_rate": 6.392184928075575e-06, "loss": 0.0014, "step": 50440 }, { "epoch": 0.36105346024475776, "grad_norm": 0.0, "learning_rate": 6.39146926214843e-06, "loss": 0.4234, "step": 50450 }, { "epoch": 0.3611250268374723, "grad_norm": 0.0, "learning_rate": 6.3907535962212845e-06, "loss": 0.0, "step": 50460 }, { "epoch": 0.3611965934301868, "grad_norm": 0.28449830412864685, "learning_rate": 6.39003793029414e-06, "loss": 0.0051, "step": 50470 }, { "epoch": 0.3612681600229013, "grad_norm": 0.0, "learning_rate": 6.389322264366994e-06, "loss": 0.0, "step": 50480 }, { "epoch": 0.3613397266156158, "grad_norm": 0.0, "learning_rate": 6.3886065984398495e-06, "loss": 0.0, "step": 50490 }, { "epoch": 0.3614112932083304, "grad_norm": 0.0, "learning_rate": 6.387890932512704e-06, "loss": 0.0045, "step": 50500 }, { "epoch": 0.3614828598010449, "grad_norm": 0.0, "learning_rate": 6.3871752665855576e-06, "loss": 0.0, "step": 50510 }, { "epoch": 0.3615544263937594, "grad_norm": 0.0, "learning_rate": 6.386459600658414e-06, "loss": 0.0004, "step": 50520 }, { "epoch": 0.3616259929864739, "grad_norm": 7.886332142881258e-10, "learning_rate": 6.385743934731268e-06, "loss": 0.0086, "step": 50530 }, { "epoch": 0.3616975595791884, "grad_norm": 0.0, "learning_rate": 6.3850282688041234e-06, "loss": 0.0, "step": 50540 }, { "epoch": 0.36176912617190293, "grad_norm": 0.0, "learning_rate": 6.384312602876978e-06, "loss": 0.5055, "step": 50550 }, { "epoch": 0.3618406927646175, "grad_norm": 0.0, "learning_rate": 6.3835969369498315e-06, "loss": 0.0, "step": 50560 }, { "epoch": 0.361912259357332, "grad_norm": 0.0, "learning_rate": 6.382881271022688e-06, "loss": 0.0, "step": 50570 }, { "epoch": 0.3619838259500465, "grad_norm": 0.0, "learning_rate": 6.382165605095541e-06, "loss": 0.0001, "step": 50580 }, { "epoch": 0.36205539254276103, "grad_norm": 4.540194640867412e-05, "learning_rate": 6.381449939168397e-06, "loss": 0.1502, "step": 50590 }, { "epoch": 0.36212695913547555, "grad_norm": 0.0, "learning_rate": 6.380734273241252e-06, "loss": 0.0, "step": 50600 }, { "epoch": 0.36219852572819006, "grad_norm": 0.0, "learning_rate": 6.380018607314107e-06, "loss": 0.0, "step": 50610 }, { "epoch": 0.3622700923209046, "grad_norm": 0.0, "learning_rate": 6.3793029413869615e-06, "loss": 0.0, "step": 50620 }, { "epoch": 0.36234165891361914, "grad_norm": 0.0, "learning_rate": 6.378587275459815e-06, "loss": 0.0, "step": 50630 }, { "epoch": 0.36241322550633365, "grad_norm": 0.0, "learning_rate": 6.377871609532671e-06, "loss": 0.0, "step": 50640 }, { "epoch": 0.36248479209904816, "grad_norm": 0.0, "learning_rate": 6.377155943605525e-06, "loss": 0.0, "step": 50650 }, { "epoch": 0.36255635869176267, "grad_norm": 0.0, "learning_rate": 6.376440277678381e-06, "loss": 0.0, "step": 50660 }, { "epoch": 0.3626279252844772, "grad_norm": 0.0, "learning_rate": 6.3757246117512354e-06, "loss": 0.0225, "step": 50670 }, { "epoch": 0.36269949187719175, "grad_norm": 0.0, "learning_rate": 6.375008945824089e-06, "loss": 0.0, "step": 50680 }, { "epoch": 0.36277105846990626, "grad_norm": 1.723316489510296e-09, "learning_rate": 6.374293279896945e-06, "loss": 0.0, "step": 50690 }, { "epoch": 0.36284262506262077, "grad_norm": 0.0, "learning_rate": 6.373577613969799e-06, "loss": 0.0, "step": 50700 }, { "epoch": 0.3629141916553353, "grad_norm": 0.0, "learning_rate": 6.372861948042655e-06, "loss": 0.0004, "step": 50710 }, { "epoch": 0.3629857582480498, "grad_norm": 0.0, "learning_rate": 6.3721462821155085e-06, "loss": 0.0, "step": 50720 }, { "epoch": 0.3630573248407643, "grad_norm": 0.0, "learning_rate": 6.371430616188365e-06, "loss": 0.0145, "step": 50730 }, { "epoch": 0.3631288914334789, "grad_norm": 0.0, "learning_rate": 6.370714950261218e-06, "loss": 0.0, "step": 50740 }, { "epoch": 0.3632004580261934, "grad_norm": 3.425022399028421e-08, "learning_rate": 6.369999284334073e-06, "loss": 0.0, "step": 50750 }, { "epoch": 0.3632720246189079, "grad_norm": 0.0, "learning_rate": 6.369283618406929e-06, "loss": 0.0, "step": 50760 }, { "epoch": 0.3633435912116224, "grad_norm": 4.112242698669434, "learning_rate": 6.3685679524797824e-06, "loss": 0.0009, "step": 50770 }, { "epoch": 0.3634151578043369, "grad_norm": 8.802372164495864e-10, "learning_rate": 6.3678522865526386e-06, "loss": 0.0, "step": 50780 }, { "epoch": 0.36348672439705143, "grad_norm": 0.0, "learning_rate": 6.367136620625492e-06, "loss": 0.0474, "step": 50790 }, { "epoch": 0.363558290989766, "grad_norm": 0.0, "learning_rate": 6.366420954698347e-06, "loss": 0.0, "step": 50800 }, { "epoch": 0.3636298575824805, "grad_norm": 2.4804746772133512e-06, "learning_rate": 6.365705288771202e-06, "loss": 0.0, "step": 50810 }, { "epoch": 0.363701424175195, "grad_norm": 1.5228969232339296e-06, "learning_rate": 6.364989622844056e-06, "loss": 0.0, "step": 50820 }, { "epoch": 0.36377299076790953, "grad_norm": 0.0, "learning_rate": 6.3642739569169125e-06, "loss": 0.0, "step": 50830 }, { "epoch": 0.36384455736062404, "grad_norm": 0.0, "learning_rate": 6.363558290989766e-06, "loss": 0.0, "step": 50840 }, { "epoch": 0.36391612395333856, "grad_norm": 4.174726786487781e-09, "learning_rate": 6.3628426250626205e-06, "loss": 0.0, "step": 50850 }, { "epoch": 0.3639876905460531, "grad_norm": 24.46541976928711, "learning_rate": 6.362126959135476e-06, "loss": 0.0041, "step": 50860 }, { "epoch": 0.36405925713876763, "grad_norm": 0.0, "learning_rate": 6.36141129320833e-06, "loss": 0.0001, "step": 50870 }, { "epoch": 0.36413082373148214, "grad_norm": 0.0, "learning_rate": 6.3606956272811856e-06, "loss": 0.0, "step": 50880 }, { "epoch": 0.36420239032419666, "grad_norm": 0.0, "learning_rate": 6.35997996135404e-06, "loss": 0.0, "step": 50890 }, { "epoch": 0.36427395691691117, "grad_norm": 0.0, "learning_rate": 6.359264295426895e-06, "loss": 0.0, "step": 50900 }, { "epoch": 0.3643455235096257, "grad_norm": 0.0, "learning_rate": 6.35854862949975e-06, "loss": 0.0, "step": 50910 }, { "epoch": 0.36441709010234025, "grad_norm": 0.0, "learning_rate": 6.357832963572604e-06, "loss": 0.0, "step": 50920 }, { "epoch": 0.36448865669505476, "grad_norm": 4.386601631267695e-06, "learning_rate": 6.3571172976454595e-06, "loss": 0.0, "step": 50930 }, { "epoch": 0.36456022328776927, "grad_norm": 0.004620238207280636, "learning_rate": 6.356401631718314e-06, "loss": 0.0, "step": 50940 }, { "epoch": 0.3646317898804838, "grad_norm": 1.8382080213541485e-07, "learning_rate": 6.355685965791169e-06, "loss": 0.0, "step": 50950 }, { "epoch": 0.3647033564731983, "grad_norm": 0.011535807512700558, "learning_rate": 6.354970299864024e-06, "loss": 0.0, "step": 50960 }, { "epoch": 0.36477492306591286, "grad_norm": 0.0, "learning_rate": 6.354254633936878e-06, "loss": 0.0, "step": 50970 }, { "epoch": 0.36484648965862737, "grad_norm": 0.0, "learning_rate": 6.353538968009733e-06, "loss": 0.0, "step": 50980 }, { "epoch": 0.3649180562513419, "grad_norm": 1.69397424087947e-07, "learning_rate": 6.352823302082588e-06, "loss": 0.0, "step": 50990 }, { "epoch": 0.3649896228440564, "grad_norm": 0.0, "learning_rate": 6.352107636155443e-06, "loss": 0.0005, "step": 51000 }, { "epoch": 0.3650611894367709, "grad_norm": 9.893754759104922e-05, "learning_rate": 6.351391970228298e-06, "loss": 0.0, "step": 51010 }, { "epoch": 0.3651327560294854, "grad_norm": 0.0, "learning_rate": 6.350676304301153e-06, "loss": 0.0001, "step": 51020 }, { "epoch": 0.3652043226222, "grad_norm": 0.0, "learning_rate": 6.349960638374007e-06, "loss": 0.0, "step": 51030 }, { "epoch": 0.3652758892149145, "grad_norm": 0.0, "learning_rate": 6.349244972446862e-06, "loss": 0.0, "step": 51040 }, { "epoch": 0.365347455807629, "grad_norm": 0.0, "learning_rate": 6.348529306519717e-06, "loss": 0.0001, "step": 51050 }, { "epoch": 0.3654190224003435, "grad_norm": 0.0, "learning_rate": 6.3478136405925715e-06, "loss": 0.0, "step": 51060 }, { "epoch": 0.36549058899305803, "grad_norm": 0.0, "learning_rate": 6.347097974665427e-06, "loss": 0.0123, "step": 51070 }, { "epoch": 0.36556215558577254, "grad_norm": 1.0533292293548584, "learning_rate": 6.346382308738281e-06, "loss": 0.0006, "step": 51080 }, { "epoch": 0.3656337221784871, "grad_norm": 0.0, "learning_rate": 6.345666642811136e-06, "loss": 0.0, "step": 51090 }, { "epoch": 0.3657052887712016, "grad_norm": 0.0, "learning_rate": 6.344950976883991e-06, "loss": 0.0, "step": 51100 }, { "epoch": 0.36577685536391613, "grad_norm": 0.0, "learning_rate": 6.344235310956845e-06, "loss": 0.0, "step": 51110 }, { "epoch": 0.36584842195663064, "grad_norm": 0.0, "learning_rate": 6.343519645029701e-06, "loss": 0.0, "step": 51120 }, { "epoch": 0.36591998854934515, "grad_norm": 1.6562788118790195e-07, "learning_rate": 6.342803979102555e-06, "loss": 0.0, "step": 51130 }, { "epoch": 0.36599155514205967, "grad_norm": 0.0, "learning_rate": 6.3420883131754104e-06, "loss": 0.0001, "step": 51140 }, { "epoch": 0.36606312173477423, "grad_norm": 0.0, "learning_rate": 6.341372647248265e-06, "loss": 0.0, "step": 51150 }, { "epoch": 0.36613468832748874, "grad_norm": 0.0, "learning_rate": 6.340656981321119e-06, "loss": 0.0, "step": 51160 }, { "epoch": 0.36620625492020326, "grad_norm": 0.0, "learning_rate": 6.339941315393975e-06, "loss": 0.0, "step": 51170 }, { "epoch": 0.36627782151291777, "grad_norm": 0.001578154624439776, "learning_rate": 6.339225649466829e-06, "loss": 0.0, "step": 51180 }, { "epoch": 0.3663493881056323, "grad_norm": 0.0, "learning_rate": 6.338509983539684e-06, "loss": 0.0, "step": 51190 }, { "epoch": 0.3664209546983468, "grad_norm": 0.0, "learning_rate": 6.337794317612539e-06, "loss": 0.0, "step": 51200 }, { "epoch": 0.36649252129106136, "grad_norm": 0.0, "learning_rate": 6.337078651685393e-06, "loss": 0.0004, "step": 51210 }, { "epoch": 0.36656408788377587, "grad_norm": 0.0, "learning_rate": 6.3363629857582485e-06, "loss": 0.0, "step": 51220 }, { "epoch": 0.3666356544764904, "grad_norm": 0.0, "learning_rate": 6.335647319831103e-06, "loss": 0.0, "step": 51230 }, { "epoch": 0.3667072210692049, "grad_norm": 0.2902023196220398, "learning_rate": 6.334931653903958e-06, "loss": 0.0004, "step": 51240 }, { "epoch": 0.3667787876619194, "grad_norm": 0.0, "learning_rate": 6.334215987976813e-06, "loss": 0.1397, "step": 51250 }, { "epoch": 0.3668503542546339, "grad_norm": 0.0, "learning_rate": 6.333500322049668e-06, "loss": 0.0, "step": 51260 }, { "epoch": 0.3669219208473485, "grad_norm": 0.0014147914480417967, "learning_rate": 6.3327846561225225e-06, "loss": 0.0, "step": 51270 }, { "epoch": 0.366993487440063, "grad_norm": 0.0, "learning_rate": 6.332068990195377e-06, "loss": 0.0, "step": 51280 }, { "epoch": 0.3670650540327775, "grad_norm": 0.0, "learning_rate": 6.331353324268232e-06, "loss": 0.0678, "step": 51290 }, { "epoch": 0.367136620625492, "grad_norm": 0.0, "learning_rate": 6.330637658341087e-06, "loss": 0.0, "step": 51300 }, { "epoch": 0.3672081872182065, "grad_norm": 0.0, "learning_rate": 6.329921992413942e-06, "loss": 0.0, "step": 51310 }, { "epoch": 0.36727975381092104, "grad_norm": 0.0, "learning_rate": 6.329206326486796e-06, "loss": 0.0, "step": 51320 }, { "epoch": 0.3673513204036356, "grad_norm": 0.0, "learning_rate": 6.328490660559651e-06, "loss": 0.0, "step": 51330 }, { "epoch": 0.3674228869963501, "grad_norm": 0.0, "learning_rate": 6.327774994632506e-06, "loss": 0.0, "step": 51340 }, { "epoch": 0.36749445358906463, "grad_norm": 0.0, "learning_rate": 6.3270593287053606e-06, "loss": 0.0, "step": 51350 }, { "epoch": 0.36756602018177914, "grad_norm": 0.0, "learning_rate": 6.326343662778216e-06, "loss": 0.0, "step": 51360 }, { "epoch": 0.36763758677449365, "grad_norm": 0.0, "learning_rate": 6.32562799685107e-06, "loss": 0.0, "step": 51370 }, { "epoch": 0.36770915336720816, "grad_norm": 0.0, "learning_rate": 6.324912330923926e-06, "loss": 0.0, "step": 51380 }, { "epoch": 0.36778071995992273, "grad_norm": 0.72018963098526, "learning_rate": 6.32419666499678e-06, "loss": 0.0001, "step": 51390 }, { "epoch": 0.36785228655263724, "grad_norm": 0.0, "learning_rate": 6.3234809990696345e-06, "loss": 0.0, "step": 51400 }, { "epoch": 0.36792385314535175, "grad_norm": 0.0, "learning_rate": 6.32276533314249e-06, "loss": 0.0001, "step": 51410 }, { "epoch": 0.36799541973806627, "grad_norm": 0.0, "learning_rate": 6.322049667215344e-06, "loss": 0.0, "step": 51420 }, { "epoch": 0.3680669863307808, "grad_norm": 0.00016949993732850999, "learning_rate": 6.3213340012881995e-06, "loss": 0.0, "step": 51430 }, { "epoch": 0.3681385529234953, "grad_norm": 9.41971492767334, "learning_rate": 6.320618335361054e-06, "loss": 0.0013, "step": 51440 }, { "epoch": 0.36821011951620986, "grad_norm": 0.0, "learning_rate": 6.319902669433908e-06, "loss": 0.0, "step": 51450 }, { "epoch": 0.36828168610892437, "grad_norm": 0.0, "learning_rate": 6.319187003506764e-06, "loss": 0.516, "step": 51460 }, { "epoch": 0.3683532527016389, "grad_norm": 0.0, "learning_rate": 6.318471337579618e-06, "loss": 0.0, "step": 51470 }, { "epoch": 0.3684248192943534, "grad_norm": 7.338267096201889e-07, "learning_rate": 6.317755671652473e-06, "loss": 0.0, "step": 51480 }, { "epoch": 0.3684963858870679, "grad_norm": 0.0, "learning_rate": 6.317040005725328e-06, "loss": 0.0106, "step": 51490 }, { "epoch": 0.3685679524797824, "grad_norm": 0.0, "learning_rate": 6.316324339798183e-06, "loss": 0.0, "step": 51500 }, { "epoch": 0.368639519072497, "grad_norm": 0.0, "learning_rate": 6.315608673871038e-06, "loss": 0.0, "step": 51510 }, { "epoch": 0.3687110856652115, "grad_norm": 0.0, "learning_rate": 6.314893007943892e-06, "loss": 0.0, "step": 51520 }, { "epoch": 0.368782652257926, "grad_norm": 0.0, "learning_rate": 6.314177342016747e-06, "loss": 0.0025, "step": 51530 }, { "epoch": 0.3688542188506405, "grad_norm": 0.0, "learning_rate": 6.313461676089602e-06, "loss": 0.0, "step": 51540 }, { "epoch": 0.368925785443355, "grad_norm": 0.012345283292233944, "learning_rate": 6.312746010162457e-06, "loss": 0.0, "step": 51550 }, { "epoch": 0.36899735203606954, "grad_norm": 0.0, "learning_rate": 6.3120303442353115e-06, "loss": 0.0, "step": 51560 }, { "epoch": 0.3690689186287841, "grad_norm": 0.0, "learning_rate": 6.311314678308166e-06, "loss": 0.0, "step": 51570 }, { "epoch": 0.3691404852214986, "grad_norm": 0.0, "learning_rate": 6.310599012381021e-06, "loss": 0.0, "step": 51580 }, { "epoch": 0.3692120518142131, "grad_norm": 0.0, "learning_rate": 6.309883346453876e-06, "loss": 0.0001, "step": 51590 }, { "epoch": 0.36928361840692764, "grad_norm": 0.0, "learning_rate": 6.309167680526731e-06, "loss": 0.0023, "step": 51600 }, { "epoch": 0.36935518499964215, "grad_norm": 0.006464601028710604, "learning_rate": 6.3084520145995854e-06, "loss": 0.0, "step": 51610 }, { "epoch": 0.36942675159235666, "grad_norm": 0.0, "learning_rate": 6.30773634867244e-06, "loss": 0.0045, "step": 51620 }, { "epoch": 0.36949831818507123, "grad_norm": 0.0, "learning_rate": 6.307020682745295e-06, "loss": 0.0, "step": 51630 }, { "epoch": 0.36956988477778574, "grad_norm": 0.0, "learning_rate": 6.30630501681815e-06, "loss": 0.0, "step": 51640 }, { "epoch": 0.36964145137050025, "grad_norm": 0.0032132696360349655, "learning_rate": 6.305589350891005e-06, "loss": 0.0, "step": 51650 }, { "epoch": 0.36971301796321476, "grad_norm": 0.0, "learning_rate": 6.304873684963859e-06, "loss": 0.0, "step": 51660 }, { "epoch": 0.3697845845559293, "grad_norm": 0.0, "learning_rate": 6.304158019036715e-06, "loss": 0.0, "step": 51670 }, { "epoch": 0.3698561511486438, "grad_norm": 7.639245902524294e-10, "learning_rate": 6.303442353109569e-06, "loss": 0.0678, "step": 51680 }, { "epoch": 0.36992771774135835, "grad_norm": 0.0, "learning_rate": 6.3027266871824235e-06, "loss": 0.0, "step": 51690 }, { "epoch": 0.36999928433407286, "grad_norm": 0.0, "learning_rate": 6.302011021255279e-06, "loss": 0.0, "step": 51700 }, { "epoch": 0.3700708509267874, "grad_norm": 0.0, "learning_rate": 6.301295355328133e-06, "loss": 0.0004, "step": 51710 }, { "epoch": 0.3701424175195019, "grad_norm": 0.0002724216610658914, "learning_rate": 6.3005796894009886e-06, "loss": 0.0037, "step": 51720 }, { "epoch": 0.3702139841122164, "grad_norm": 0.0, "learning_rate": 6.299864023473843e-06, "loss": 0.0, "step": 51730 }, { "epoch": 0.37028555070493097, "grad_norm": 0.0, "learning_rate": 6.2991483575466974e-06, "loss": 0.0, "step": 51740 }, { "epoch": 0.3703571172976455, "grad_norm": 0.0, "learning_rate": 6.298432691619553e-06, "loss": 0.0, "step": 51750 }, { "epoch": 0.37042868389036, "grad_norm": 0.0, "learning_rate": 6.297717025692407e-06, "loss": 0.0, "step": 51760 }, { "epoch": 0.3705002504830745, "grad_norm": 3.010871478181798e-05, "learning_rate": 6.2970013597652625e-06, "loss": 0.0003, "step": 51770 }, { "epoch": 0.370571817075789, "grad_norm": 0.0, "learning_rate": 6.296285693838117e-06, "loss": 0.0, "step": 51780 }, { "epoch": 0.3706433836685035, "grad_norm": 0.07009205222129822, "learning_rate": 6.295570027910972e-06, "loss": 0.0, "step": 51790 }, { "epoch": 0.3707149502612181, "grad_norm": 0.0, "learning_rate": 6.294854361983827e-06, "loss": 0.0, "step": 51800 }, { "epoch": 0.3707865168539326, "grad_norm": 0.0, "learning_rate": 6.294138696056681e-06, "loss": 0.0, "step": 51810 }, { "epoch": 0.3708580834466471, "grad_norm": 0.0, "learning_rate": 6.293423030129536e-06, "loss": 0.0, "step": 51820 }, { "epoch": 0.3709296500393616, "grad_norm": 0.0, "learning_rate": 6.292707364202391e-06, "loss": 0.0, "step": 51830 }, { "epoch": 0.37100121663207614, "grad_norm": 0.0003056660352740437, "learning_rate": 6.291991698275246e-06, "loss": 0.0, "step": 51840 }, { "epoch": 0.37107278322479065, "grad_norm": 0.0, "learning_rate": 6.2912760323481006e-06, "loss": 0.0, "step": 51850 }, { "epoch": 0.3711443498175052, "grad_norm": 1.1152737400266233e-08, "learning_rate": 6.290560366420955e-06, "loss": 0.0, "step": 51860 }, { "epoch": 0.3712159164102197, "grad_norm": 0.0, "learning_rate": 6.28984470049381e-06, "loss": 0.0011, "step": 51870 }, { "epoch": 0.37128748300293424, "grad_norm": 0.0, "learning_rate": 6.289129034566665e-06, "loss": 0.0, "step": 51880 }, { "epoch": 0.37135904959564875, "grad_norm": 0.0, "learning_rate": 6.28841336863952e-06, "loss": 0.0, "step": 51890 }, { "epoch": 0.37143061618836326, "grad_norm": 0.001458301325328648, "learning_rate": 6.2876977027123745e-06, "loss": 0.0, "step": 51900 }, { "epoch": 0.37150218278107777, "grad_norm": 305.93414306640625, "learning_rate": 6.28698203678523e-06, "loss": 0.0498, "step": 51910 }, { "epoch": 0.37157374937379234, "grad_norm": 9.640427833801368e-08, "learning_rate": 6.286266370858084e-06, "loss": 0.0177, "step": 51920 }, { "epoch": 0.37164531596650685, "grad_norm": 7.525789260398597e-05, "learning_rate": 6.285550704930939e-06, "loss": 0.0, "step": 51930 }, { "epoch": 0.37171688255922136, "grad_norm": 0.0, "learning_rate": 6.284835039003794e-06, "loss": 0.0004, "step": 51940 }, { "epoch": 0.3717884491519359, "grad_norm": 11.127408027648926, "learning_rate": 6.284119373076648e-06, "loss": 0.0009, "step": 51950 }, { "epoch": 0.3718600157446504, "grad_norm": 0.0, "learning_rate": 6.283403707149504e-06, "loss": 0.0, "step": 51960 }, { "epoch": 0.3719315823373649, "grad_norm": 0.00018583651399239898, "learning_rate": 6.282688041222358e-06, "loss": 0.0, "step": 51970 }, { "epoch": 0.37200314893007946, "grad_norm": 9.917490828570408e-09, "learning_rate": 6.281972375295213e-06, "loss": 0.0, "step": 51980 }, { "epoch": 0.372074715522794, "grad_norm": 0.0, "learning_rate": 6.281256709368068e-06, "loss": 0.4203, "step": 51990 }, { "epoch": 0.3721462821155085, "grad_norm": 0.0, "learning_rate": 6.280541043440922e-06, "loss": 0.0468, "step": 52000 }, { "epoch": 0.372217848708223, "grad_norm": 0.0, "learning_rate": 6.279825377513778e-06, "loss": 0.0002, "step": 52010 }, { "epoch": 0.3722894153009375, "grad_norm": 0.0, "learning_rate": 6.279109711586632e-06, "loss": 0.0, "step": 52020 }, { "epoch": 0.372360981893652, "grad_norm": 1.602762589314466e-09, "learning_rate": 6.278394045659487e-06, "loss": 0.0132, "step": 52030 }, { "epoch": 0.3724325484863666, "grad_norm": 0.0033784022089093924, "learning_rate": 6.277678379732342e-06, "loss": 0.0, "step": 52040 }, { "epoch": 0.3725041150790811, "grad_norm": 0.0, "learning_rate": 6.276962713805196e-06, "loss": 0.0003, "step": 52050 }, { "epoch": 0.3725756816717956, "grad_norm": 0.0, "learning_rate": 6.2762470478780515e-06, "loss": 0.0, "step": 52060 }, { "epoch": 0.3726472482645101, "grad_norm": 0.0, "learning_rate": 6.275531381950906e-06, "loss": 0.9336, "step": 52070 }, { "epoch": 0.37271881485722463, "grad_norm": 0.0, "learning_rate": 6.274815716023761e-06, "loss": 0.0, "step": 52080 }, { "epoch": 0.37279038144993915, "grad_norm": 0.0, "learning_rate": 6.274100050096616e-06, "loss": 0.0, "step": 52090 }, { "epoch": 0.3728619480426537, "grad_norm": 0.0, "learning_rate": 6.273384384169469e-06, "loss": 0.0001, "step": 52100 }, { "epoch": 0.3729335146353682, "grad_norm": 0.0, "learning_rate": 6.2726687182423255e-06, "loss": 0.0, "step": 52110 }, { "epoch": 0.37300508122808274, "grad_norm": 0.0, "learning_rate": 6.27195305231518e-06, "loss": 0.0, "step": 52120 }, { "epoch": 0.37307664782079725, "grad_norm": 0.0, "learning_rate": 6.271237386388035e-06, "loss": 0.0, "step": 52130 }, { "epoch": 0.37314821441351176, "grad_norm": 0.0, "learning_rate": 6.27052172046089e-06, "loss": 0.0, "step": 52140 }, { "epoch": 0.37321978100622627, "grad_norm": 0.0017127979081124067, "learning_rate": 6.269806054533745e-06, "loss": 0.0, "step": 52150 }, { "epoch": 0.37329134759894084, "grad_norm": 0.0, "learning_rate": 6.269090388606599e-06, "loss": 0.0, "step": 52160 }, { "epoch": 0.37336291419165535, "grad_norm": 0.0, "learning_rate": 6.268374722679453e-06, "loss": 0.0, "step": 52170 }, { "epoch": 0.37343448078436986, "grad_norm": 26.195308685302734, "learning_rate": 6.267659056752309e-06, "loss": 0.0035, "step": 52180 }, { "epoch": 0.37350604737708437, "grad_norm": 3.076905704801902e-05, "learning_rate": 6.266943390825163e-06, "loss": 0.01, "step": 52190 }, { "epoch": 0.3735776139697989, "grad_norm": 0.0, "learning_rate": 6.266227724898019e-06, "loss": 0.0, "step": 52200 }, { "epoch": 0.3736491805625134, "grad_norm": 0.0, "learning_rate": 6.265512058970873e-06, "loss": 0.0, "step": 52210 }, { "epoch": 0.37372074715522796, "grad_norm": 0.0, "learning_rate": 6.264796393043727e-06, "loss": 0.0115, "step": 52220 }, { "epoch": 0.3737923137479425, "grad_norm": 0.0, "learning_rate": 6.264080727116583e-06, "loss": 0.0, "step": 52230 }, { "epoch": 0.373863880340657, "grad_norm": 0.0, "learning_rate": 6.263365061189437e-06, "loss": 0.0, "step": 52240 }, { "epoch": 0.3739354469333715, "grad_norm": 0.0, "learning_rate": 6.262649395262293e-06, "loss": 0.0, "step": 52250 }, { "epoch": 0.374007013526086, "grad_norm": 0.00082258484326303, "learning_rate": 6.261933729335146e-06, "loss": 0.6086, "step": 52260 }, { "epoch": 0.3740785801188005, "grad_norm": 0.0, "learning_rate": 6.2612180634080025e-06, "loss": 0.0, "step": 52270 }, { "epoch": 0.3741501467115151, "grad_norm": 0.0, "learning_rate": 6.260502397480857e-06, "loss": 0.0, "step": 52280 }, { "epoch": 0.3742217133042296, "grad_norm": 0.00010117286728927866, "learning_rate": 6.2597867315537105e-06, "loss": 0.0, "step": 52290 }, { "epoch": 0.3742932798969441, "grad_norm": 0.0, "learning_rate": 6.259071065626567e-06, "loss": 0.0, "step": 52300 }, { "epoch": 0.3743648464896586, "grad_norm": 0.0, "learning_rate": 6.25835539969942e-06, "loss": 0.0, "step": 52310 }, { "epoch": 0.37443641308237313, "grad_norm": 0.0, "learning_rate": 6.257639733772276e-06, "loss": 0.0, "step": 52320 }, { "epoch": 0.37450797967508764, "grad_norm": 0.00015754300693515688, "learning_rate": 6.25692406784513e-06, "loss": 0.0, "step": 52330 }, { "epoch": 0.3745795462678022, "grad_norm": 0.0, "learning_rate": 6.2562084019179845e-06, "loss": 0.0, "step": 52340 }, { "epoch": 0.3746511128605167, "grad_norm": 0.0, "learning_rate": 6.25549273599084e-06, "loss": 0.0, "step": 52350 }, { "epoch": 0.37472267945323123, "grad_norm": 7.186151407267971e-08, "learning_rate": 6.254777070063694e-06, "loss": 0.0, "step": 52360 }, { "epoch": 0.37479424604594574, "grad_norm": 0.0, "learning_rate": 6.25406140413655e-06, "loss": 0.0, "step": 52370 }, { "epoch": 0.37486581263866026, "grad_norm": 0.0, "learning_rate": 6.253345738209404e-06, "loss": 0.0, "step": 52380 }, { "epoch": 0.37493737923137477, "grad_norm": 0.0, "learning_rate": 6.252630072282258e-06, "loss": 0.0, "step": 52390 }, { "epoch": 0.37500894582408933, "grad_norm": 0.0, "learning_rate": 6.251914406355114e-06, "loss": 0.0, "step": 52400 }, { "epoch": 0.37508051241680385, "grad_norm": 0.003177225822582841, "learning_rate": 6.251198740427968e-06, "loss": 0.0, "step": 52410 }, { "epoch": 0.37515207900951836, "grad_norm": 0.0, "learning_rate": 6.250483074500823e-06, "loss": 0.0, "step": 52420 }, { "epoch": 0.37522364560223287, "grad_norm": 0.0, "learning_rate": 6.249767408573678e-06, "loss": 0.0, "step": 52430 }, { "epoch": 0.3752952121949474, "grad_norm": 0.0, "learning_rate": 6.249051742646534e-06, "loss": 0.0, "step": 52440 }, { "epoch": 0.3753667787876619, "grad_norm": 0.0, "learning_rate": 6.248336076719388e-06, "loss": 0.0, "step": 52450 }, { "epoch": 0.37543834538037646, "grad_norm": 0.0, "learning_rate": 6.247620410792242e-06, "loss": 0.0145, "step": 52460 }, { "epoch": 0.37550991197309097, "grad_norm": 6.144927465356886e-05, "learning_rate": 6.246904744865097e-06, "loss": 0.0, "step": 52470 }, { "epoch": 0.3755814785658055, "grad_norm": 0.0, "learning_rate": 6.246189078937952e-06, "loss": 0.0, "step": 52480 }, { "epoch": 0.37565304515852, "grad_norm": 0.08306658267974854, "learning_rate": 6.245473413010807e-06, "loss": 0.0, "step": 52490 }, { "epoch": 0.3757246117512345, "grad_norm": 5.786457677459111e-07, "learning_rate": 6.2447577470836615e-06, "loss": 0.0002, "step": 52500 }, { "epoch": 0.37579617834394907, "grad_norm": 0.00012450384383555502, "learning_rate": 6.244042081156516e-06, "loss": 0.0, "step": 52510 }, { "epoch": 0.3758677449366636, "grad_norm": 0.0, "learning_rate": 6.243326415229371e-06, "loss": 0.1738, "step": 52520 }, { "epoch": 0.3759393115293781, "grad_norm": 0.0, "learning_rate": 6.242610749302226e-06, "loss": 0.0057, "step": 52530 }, { "epoch": 0.3760108781220926, "grad_norm": 0.0, "learning_rate": 6.241895083375081e-06, "loss": 0.0, "step": 52540 }, { "epoch": 0.3760824447148071, "grad_norm": 7.747545396341593e-07, "learning_rate": 6.241179417447935e-06, "loss": 0.0002, "step": 52550 }, { "epoch": 0.37615401130752163, "grad_norm": 0.0, "learning_rate": 6.240463751520791e-06, "loss": 0.2522, "step": 52560 }, { "epoch": 0.3762255779002362, "grad_norm": 0.0, "learning_rate": 6.239748085593645e-06, "loss": 0.0, "step": 52570 }, { "epoch": 0.3762971444929507, "grad_norm": 0.0, "learning_rate": 6.2390324196665e-06, "loss": 0.0, "step": 52580 }, { "epoch": 0.3763687110856652, "grad_norm": 0.0, "learning_rate": 6.238316753739355e-06, "loss": 0.0, "step": 52590 }, { "epoch": 0.37644027767837973, "grad_norm": 0.0, "learning_rate": 6.237601087812209e-06, "loss": 0.0, "step": 52600 }, { "epoch": 0.37651184427109424, "grad_norm": 0.0, "learning_rate": 6.236885421885065e-06, "loss": 0.0, "step": 52610 }, { "epoch": 0.37658341086380875, "grad_norm": 0.0, "learning_rate": 6.236169755957919e-06, "loss": 1.4689, "step": 52620 }, { "epoch": 0.3766549774565233, "grad_norm": 0.0, "learning_rate": 6.2354540900307735e-06, "loss": 0.0001, "step": 52630 }, { "epoch": 0.37672654404923783, "grad_norm": 8.062170309131034e-08, "learning_rate": 6.234738424103629e-06, "loss": 0.0002, "step": 52640 }, { "epoch": 0.37679811064195234, "grad_norm": 0.0, "learning_rate": 6.234022758176483e-06, "loss": 0.0, "step": 52650 }, { "epoch": 0.37686967723466686, "grad_norm": 0.0, "learning_rate": 6.2333070922493385e-06, "loss": 0.1753, "step": 52660 }, { "epoch": 0.37694124382738137, "grad_norm": 0.0, "learning_rate": 6.232591426322193e-06, "loss": 0.0, "step": 52670 }, { "epoch": 0.3770128104200959, "grad_norm": 0.0, "learning_rate": 6.231875760395048e-06, "loss": 0.0, "step": 52680 }, { "epoch": 0.37708437701281045, "grad_norm": 0.0, "learning_rate": 6.231160094467903e-06, "loss": 0.0, "step": 52690 }, { "epoch": 0.37715594360552496, "grad_norm": 0.0, "learning_rate": 6.230444428540757e-06, "loss": 0.0001, "step": 52700 }, { "epoch": 0.37722751019823947, "grad_norm": 4.392075592107858e-10, "learning_rate": 6.2297287626136125e-06, "loss": 0.0094, "step": 52710 }, { "epoch": 0.377299076790954, "grad_norm": 2.2430871467804536e-05, "learning_rate": 6.229013096686467e-06, "loss": 0.0035, "step": 52720 }, { "epoch": 0.3773706433836685, "grad_norm": 2.2688586565067226e-09, "learning_rate": 6.228297430759322e-06, "loss": 0.0, "step": 52730 }, { "epoch": 0.377442209976383, "grad_norm": 4.85966097585333e-07, "learning_rate": 6.227581764832177e-06, "loss": 0.0, "step": 52740 }, { "epoch": 0.37751377656909757, "grad_norm": 0.00017994250811170787, "learning_rate": 6.226866098905031e-06, "loss": 0.0, "step": 52750 }, { "epoch": 0.3775853431618121, "grad_norm": 0.0, "learning_rate": 6.226150432977886e-06, "loss": 0.0, "step": 52760 }, { "epoch": 0.3776569097545266, "grad_norm": 0.8291385173797607, "learning_rate": 6.225434767050741e-06, "loss": 0.0001, "step": 52770 }, { "epoch": 0.3777284763472411, "grad_norm": 0.0, "learning_rate": 6.224719101123596e-06, "loss": 0.0, "step": 52780 }, { "epoch": 0.3778000429399556, "grad_norm": 0.0, "learning_rate": 6.2240034351964506e-06, "loss": 0.0, "step": 52790 }, { "epoch": 0.3778716095326701, "grad_norm": 7.705863936280366e-06, "learning_rate": 6.223287769269306e-06, "loss": 0.0, "step": 52800 }, { "epoch": 0.3779431761253847, "grad_norm": 0.0, "learning_rate": 6.22257210334216e-06, "loss": 0.0, "step": 52810 }, { "epoch": 0.3780147427180992, "grad_norm": 0.0, "learning_rate": 6.221856437415015e-06, "loss": 0.0, "step": 52820 }, { "epoch": 0.3780863093108137, "grad_norm": 0.0, "learning_rate": 6.22114077148787e-06, "loss": 0.0, "step": 52830 }, { "epoch": 0.37815787590352823, "grad_norm": 0.0, "learning_rate": 6.2204251055607245e-06, "loss": 0.0, "step": 52840 }, { "epoch": 0.37822944249624274, "grad_norm": 0.0, "learning_rate": 6.21970943963358e-06, "loss": 0.0, "step": 52850 }, { "epoch": 0.37830100908895725, "grad_norm": 0.0, "learning_rate": 6.218993773706434e-06, "loss": 0.0, "step": 52860 }, { "epoch": 0.3783725756816718, "grad_norm": 0.0, "learning_rate": 6.218278107779289e-06, "loss": 0.0, "step": 52870 }, { "epoch": 0.37844414227438633, "grad_norm": 0.0, "learning_rate": 6.217562441852144e-06, "loss": 0.0, "step": 52880 }, { "epoch": 0.37851570886710084, "grad_norm": 1.216021610161988e-05, "learning_rate": 6.216846775924998e-06, "loss": 0.0, "step": 52890 }, { "epoch": 0.37858727545981535, "grad_norm": 0.0, "learning_rate": 6.216131109997854e-06, "loss": 0.0, "step": 52900 }, { "epoch": 0.37865884205252986, "grad_norm": 0.0, "learning_rate": 6.215415444070708e-06, "loss": 0.0, "step": 52910 }, { "epoch": 0.3787304086452444, "grad_norm": 0.0, "learning_rate": 6.214699778143563e-06, "loss": 0.0, "step": 52920 }, { "epoch": 0.37880197523795894, "grad_norm": 4.3131620497405265e-10, "learning_rate": 6.213984112216418e-06, "loss": 0.0001, "step": 52930 }, { "epoch": 0.37887354183067345, "grad_norm": 9.257171140752973e-10, "learning_rate": 6.213268446289272e-06, "loss": 0.0, "step": 52940 }, { "epoch": 0.37894510842338797, "grad_norm": 0.0, "learning_rate": 6.212552780362128e-06, "loss": 0.0, "step": 52950 }, { "epoch": 0.3790166750161025, "grad_norm": 0.0, "learning_rate": 6.211837114434982e-06, "loss": 0.0, "step": 52960 }, { "epoch": 0.379088241608817, "grad_norm": 0.0, "learning_rate": 6.211121448507837e-06, "loss": 0.0003, "step": 52970 }, { "epoch": 0.3791598082015315, "grad_norm": 1.609459638595581, "learning_rate": 6.210405782580692e-06, "loss": 0.0004, "step": 52980 }, { "epoch": 0.37923137479424607, "grad_norm": 0.0, "learning_rate": 6.209690116653546e-06, "loss": 0.0, "step": 52990 }, { "epoch": 0.3793029413869606, "grad_norm": 0.00017271676915697753, "learning_rate": 6.2089744507264015e-06, "loss": 0.0, "step": 53000 }, { "epoch": 0.3793745079796751, "grad_norm": 0.0, "learning_rate": 6.208258784799256e-06, "loss": 0.0, "step": 53010 }, { "epoch": 0.3794460745723896, "grad_norm": 0.0, "learning_rate": 6.207543118872111e-06, "loss": 0.0, "step": 53020 }, { "epoch": 0.3795176411651041, "grad_norm": 0.0, "learning_rate": 6.206827452944966e-06, "loss": 0.0039, "step": 53030 }, { "epoch": 0.3795892077578186, "grad_norm": 0.0, "learning_rate": 6.206111787017821e-06, "loss": 0.0, "step": 53040 }, { "epoch": 0.3796607743505332, "grad_norm": 0.18534378707408905, "learning_rate": 6.2053961210906754e-06, "loss": 0.0358, "step": 53050 }, { "epoch": 0.3797323409432477, "grad_norm": 0.0066243126057088375, "learning_rate": 6.20468045516353e-06, "loss": 0.0019, "step": 53060 }, { "epoch": 0.3798039075359622, "grad_norm": 0.0, "learning_rate": 6.203964789236385e-06, "loss": 0.0, "step": 53070 }, { "epoch": 0.3798754741286767, "grad_norm": 0.0, "learning_rate": 6.20324912330924e-06, "loss": 0.0, "step": 53080 }, { "epoch": 0.37994704072139124, "grad_norm": 9.574854630045593e-05, "learning_rate": 6.202533457382095e-06, "loss": 0.0, "step": 53090 }, { "epoch": 0.38001860731410575, "grad_norm": 0.0, "learning_rate": 6.201817791454949e-06, "loss": 0.0, "step": 53100 }, { "epoch": 0.3800901739068203, "grad_norm": 0.0, "learning_rate": 6.201102125527804e-06, "loss": 0.0, "step": 53110 }, { "epoch": 0.38016174049953483, "grad_norm": 0.0, "learning_rate": 6.200386459600659e-06, "loss": 0.0, "step": 53120 }, { "epoch": 0.38023330709224934, "grad_norm": 0.0, "learning_rate": 6.1996707936735135e-06, "loss": 0.0, "step": 53130 }, { "epoch": 0.38030487368496385, "grad_norm": 0.0, "learning_rate": 6.198955127746369e-06, "loss": 0.0002, "step": 53140 }, { "epoch": 0.38037644027767836, "grad_norm": 7.067951810313389e-05, "learning_rate": 6.198239461819223e-06, "loss": 0.0006, "step": 53150 }, { "epoch": 0.3804480068703929, "grad_norm": 0.0, "learning_rate": 6.197523795892078e-06, "loss": 0.0, "step": 53160 }, { "epoch": 0.38051957346310744, "grad_norm": 2.67588035285371e-07, "learning_rate": 6.196808129964933e-06, "loss": 0.0, "step": 53170 }, { "epoch": 0.38059114005582195, "grad_norm": 0.0, "learning_rate": 6.1960924640377875e-06, "loss": 0.0, "step": 53180 }, { "epoch": 0.38066270664853646, "grad_norm": 1.1518401343835194e-08, "learning_rate": 6.195376798110643e-06, "loss": 0.0, "step": 53190 }, { "epoch": 0.380734273241251, "grad_norm": 0.0, "learning_rate": 6.194661132183497e-06, "loss": 0.0, "step": 53200 }, { "epoch": 0.3808058398339655, "grad_norm": 0.0, "learning_rate": 6.1939454662563525e-06, "loss": 0.0, "step": 53210 }, { "epoch": 0.38087740642668, "grad_norm": 0.0009386945166625082, "learning_rate": 6.193229800329207e-06, "loss": 0.0, "step": 53220 }, { "epoch": 0.38094897301939457, "grad_norm": 0.0, "learning_rate": 6.192514134402061e-06, "loss": 0.0049, "step": 53230 }, { "epoch": 0.3810205396121091, "grad_norm": 0.0, "learning_rate": 6.191798468474917e-06, "loss": 0.0002, "step": 53240 }, { "epoch": 0.3810921062048236, "grad_norm": 0.0, "learning_rate": 6.191082802547771e-06, "loss": 0.0, "step": 53250 }, { "epoch": 0.3811636727975381, "grad_norm": 0.0, "learning_rate": 6.190367136620626e-06, "loss": 0.0, "step": 53260 }, { "epoch": 0.3812352393902526, "grad_norm": 0.0, "learning_rate": 6.189651470693481e-06, "loss": 0.0, "step": 53270 }, { "epoch": 0.3813068059829672, "grad_norm": 0.0, "learning_rate": 6.188935804766335e-06, "loss": 0.0, "step": 53280 }, { "epoch": 0.3813783725756817, "grad_norm": 0.0, "learning_rate": 6.188220138839191e-06, "loss": 0.0, "step": 53290 }, { "epoch": 0.3814499391683962, "grad_norm": 0.0, "learning_rate": 6.187504472912045e-06, "loss": 0.0, "step": 53300 }, { "epoch": 0.3815215057611107, "grad_norm": 2.592138571344549e-06, "learning_rate": 6.1867888069849e-06, "loss": 0.0, "step": 53310 }, { "epoch": 0.3815930723538252, "grad_norm": 0.0, "learning_rate": 6.186073141057755e-06, "loss": 0.0, "step": 53320 }, { "epoch": 0.38166463894653974, "grad_norm": 0.0, "learning_rate": 6.18535747513061e-06, "loss": 0.0, "step": 53330 }, { "epoch": 0.3817362055392543, "grad_norm": 0.0, "learning_rate": 6.1846418092034645e-06, "loss": 0.0, "step": 53340 }, { "epoch": 0.3818077721319688, "grad_norm": 0.0, "learning_rate": 6.183926143276319e-06, "loss": 0.0, "step": 53350 }, { "epoch": 0.3818793387246833, "grad_norm": 0.0, "learning_rate": 6.183210477349174e-06, "loss": 0.0, "step": 53360 }, { "epoch": 0.38195090531739784, "grad_norm": 0.0, "learning_rate": 6.182494811422029e-06, "loss": 0.0, "step": 53370 }, { "epoch": 0.38202247191011235, "grad_norm": 0.0, "learning_rate": 6.181779145494884e-06, "loss": 0.0069, "step": 53380 }, { "epoch": 0.38209403850282686, "grad_norm": 0.2859998941421509, "learning_rate": 6.181063479567738e-06, "loss": 0.0001, "step": 53390 }, { "epoch": 0.3821656050955414, "grad_norm": 6.976591110229492, "learning_rate": 6.180347813640593e-06, "loss": 0.0022, "step": 53400 }, { "epoch": 0.38223717168825594, "grad_norm": 0.0002571938093751669, "learning_rate": 6.179632147713448e-06, "loss": 0.0, "step": 53410 }, { "epoch": 0.38230873828097045, "grad_norm": 6.742837399542623e-07, "learning_rate": 6.178916481786303e-06, "loss": 0.0009, "step": 53420 }, { "epoch": 0.38238030487368496, "grad_norm": 0.0, "learning_rate": 6.178200815859158e-06, "loss": 0.0, "step": 53430 }, { "epoch": 0.3824518714663995, "grad_norm": 455.3139343261719, "learning_rate": 6.177485149932012e-06, "loss": 0.1979, "step": 53440 }, { "epoch": 0.382523438059114, "grad_norm": 0.0, "learning_rate": 6.176769484004868e-06, "loss": 0.0, "step": 53450 }, { "epoch": 0.38259500465182855, "grad_norm": 3.0702193498655106e-07, "learning_rate": 6.176053818077722e-06, "loss": 0.0001, "step": 53460 }, { "epoch": 0.38266657124454306, "grad_norm": 0.0, "learning_rate": 6.1753381521505765e-06, "loss": 0.0001, "step": 53470 }, { "epoch": 0.3827381378372576, "grad_norm": 2.08102665055776e-05, "learning_rate": 6.174622486223432e-06, "loss": 0.0, "step": 53480 }, { "epoch": 0.3828097044299721, "grad_norm": 0.0, "learning_rate": 6.173906820296286e-06, "loss": 0.0, "step": 53490 }, { "epoch": 0.3828812710226866, "grad_norm": 0.039362505078315735, "learning_rate": 6.1731911543691415e-06, "loss": 0.0, "step": 53500 }, { "epoch": 0.3829528376154011, "grad_norm": 0.0, "learning_rate": 6.172475488441996e-06, "loss": 0.0, "step": 53510 }, { "epoch": 0.3830244042081157, "grad_norm": 0.0, "learning_rate": 6.1717598225148504e-06, "loss": 0.0, "step": 53520 }, { "epoch": 0.3830959708008302, "grad_norm": 9.024116297950968e-05, "learning_rate": 6.171044156587706e-06, "loss": 0.0, "step": 53530 }, { "epoch": 0.3831675373935447, "grad_norm": 0.0, "learning_rate": 6.17032849066056e-06, "loss": 0.0, "step": 53540 }, { "epoch": 0.3832391039862592, "grad_norm": 0.0, "learning_rate": 6.1696128247334155e-06, "loss": 0.0032, "step": 53550 }, { "epoch": 0.3833106705789737, "grad_norm": 0.0, "learning_rate": 6.16889715880627e-06, "loss": 0.0, "step": 53560 }, { "epoch": 0.38338223717168823, "grad_norm": 0.0, "learning_rate": 6.168181492879125e-06, "loss": 0.0, "step": 53570 }, { "epoch": 0.3834538037644028, "grad_norm": 0.00014521800039801747, "learning_rate": 6.16746582695198e-06, "loss": 0.0, "step": 53580 }, { "epoch": 0.3835253703571173, "grad_norm": 0.0, "learning_rate": 6.166750161024834e-06, "loss": 0.0, "step": 53590 }, { "epoch": 0.3835969369498318, "grad_norm": 0.0006872057565487921, "learning_rate": 6.166034495097689e-06, "loss": 0.0, "step": 53600 }, { "epoch": 0.38366850354254634, "grad_norm": 0.0, "learning_rate": 6.165318829170544e-06, "loss": 0.0, "step": 53610 }, { "epoch": 0.38374007013526085, "grad_norm": 0.0, "learning_rate": 6.164603163243399e-06, "loss": 0.0, "step": 53620 }, { "epoch": 0.38381163672797536, "grad_norm": 0.0, "learning_rate": 6.1638874973162536e-06, "loss": 0.0, "step": 53630 }, { "epoch": 0.3838832033206899, "grad_norm": 0.0, "learning_rate": 6.163171831389107e-06, "loss": 0.0002, "step": 53640 }, { "epoch": 0.38395476991340444, "grad_norm": 0.0, "learning_rate": 6.162456165461963e-06, "loss": 0.0, "step": 53650 }, { "epoch": 0.38402633650611895, "grad_norm": 0.0, "learning_rate": 6.161740499534818e-06, "loss": 0.0007, "step": 53660 }, { "epoch": 0.38409790309883346, "grad_norm": 0.0, "learning_rate": 6.161024833607673e-06, "loss": 0.0001, "step": 53670 }, { "epoch": 0.38416946969154797, "grad_norm": 7.413425009872299e-06, "learning_rate": 6.1603091676805275e-06, "loss": 0.0002, "step": 53680 }, { "epoch": 0.3842410362842625, "grad_norm": 0.0, "learning_rate": 6.159593501753383e-06, "loss": 0.0, "step": 53690 }, { "epoch": 0.38431260287697705, "grad_norm": 1.0116231123902253e-06, "learning_rate": 6.158949402418951e-06, "loss": 0.0382, "step": 53700 }, { "epoch": 0.38438416946969156, "grad_norm": 0.0, "learning_rate": 6.158233736491807e-06, "loss": 0.0, "step": 53710 }, { "epoch": 0.3844557360624061, "grad_norm": 448.7454833984375, "learning_rate": 6.1575180705646606e-06, "loss": 0.4109, "step": 53720 }, { "epoch": 0.3845273026551206, "grad_norm": 0.0, "learning_rate": 6.156802404637515e-06, "loss": 0.0027, "step": 53730 }, { "epoch": 0.3845988692478351, "grad_norm": 0.0, "learning_rate": 6.15608673871037e-06, "loss": 0.0, "step": 53740 }, { "epoch": 0.3846704358405496, "grad_norm": 0.0, "learning_rate": 6.155371072783225e-06, "loss": 0.0, "step": 53750 }, { "epoch": 0.3847420024332642, "grad_norm": 0.0, "learning_rate": 6.15465540685608e-06, "loss": 0.0, "step": 53760 }, { "epoch": 0.3848135690259787, "grad_norm": 0.0, "learning_rate": 6.1539397409289345e-06, "loss": 0.0, "step": 53770 }, { "epoch": 0.3848851356186932, "grad_norm": 0.0, "learning_rate": 6.15322407500179e-06, "loss": 0.0006, "step": 53780 }, { "epoch": 0.3849567022114077, "grad_norm": 1.519543602057638e-09, "learning_rate": 6.152508409074644e-06, "loss": 0.0598, "step": 53790 }, { "epoch": 0.3850282688041222, "grad_norm": 0.0, "learning_rate": 6.151792743147499e-06, "loss": 0.0, "step": 53800 }, { "epoch": 0.38509983539683673, "grad_norm": 0.001336534391157329, "learning_rate": 6.151077077220354e-06, "loss": 0.0, "step": 53810 }, { "epoch": 0.3851714019895513, "grad_norm": 0.0, "learning_rate": 6.150361411293208e-06, "loss": 0.0001, "step": 53820 }, { "epoch": 0.3852429685822658, "grad_norm": 0.0, "learning_rate": 6.149645745366064e-06, "loss": 0.0, "step": 53830 }, { "epoch": 0.3853145351749803, "grad_norm": 7.564635211565474e-07, "learning_rate": 6.148930079438918e-06, "loss": 0.0002, "step": 53840 }, { "epoch": 0.38538610176769483, "grad_norm": 0.0, "learning_rate": 6.148214413511773e-06, "loss": 0.0, "step": 53850 }, { "epoch": 0.38545766836040934, "grad_norm": 0.0, "learning_rate": 6.147498747584628e-06, "loss": 0.0001, "step": 53860 }, { "epoch": 0.38552923495312386, "grad_norm": 0.0, "learning_rate": 6.146783081657482e-06, "loss": 0.0, "step": 53870 }, { "epoch": 0.3856008015458384, "grad_norm": 0.0, "learning_rate": 6.146067415730338e-06, "loss": 0.0, "step": 53880 }, { "epoch": 0.38567236813855293, "grad_norm": 0.0, "learning_rate": 6.145351749803192e-06, "loss": 0.1169, "step": 53890 }, { "epoch": 0.38574393473126745, "grad_norm": 0.0, "learning_rate": 6.144636083876047e-06, "loss": 0.0, "step": 53900 }, { "epoch": 0.38581550132398196, "grad_norm": 0.0, "learning_rate": 6.143920417948902e-06, "loss": 0.0, "step": 53910 }, { "epoch": 0.38588706791669647, "grad_norm": 0.0, "learning_rate": 6.143204752021756e-06, "loss": 0.0, "step": 53920 }, { "epoch": 0.385958634509411, "grad_norm": 0.0, "learning_rate": 6.1424890860946115e-06, "loss": 0.0, "step": 53930 }, { "epoch": 0.38603020110212555, "grad_norm": 0.0, "learning_rate": 6.141773420167466e-06, "loss": 0.0, "step": 53940 }, { "epoch": 0.38610176769484006, "grad_norm": 0.0, "learning_rate": 6.141057754240321e-06, "loss": 0.0429, "step": 53950 }, { "epoch": 0.38617333428755457, "grad_norm": 0.0, "learning_rate": 6.140342088313176e-06, "loss": 0.0001, "step": 53960 }, { "epoch": 0.3862449008802691, "grad_norm": 0.0, "learning_rate": 6.13962642238603e-06, "loss": 0.0094, "step": 53970 }, { "epoch": 0.3863164674729836, "grad_norm": 0.012723824009299278, "learning_rate": 6.1389107564588854e-06, "loss": 0.0, "step": 53980 }, { "epoch": 0.3863880340656981, "grad_norm": 0.2311169058084488, "learning_rate": 6.13819509053174e-06, "loss": 0.0, "step": 53990 }, { "epoch": 0.38645960065841267, "grad_norm": 0.0, "learning_rate": 6.137479424604595e-06, "loss": 0.0, "step": 54000 }, { "epoch": 0.3865311672511272, "grad_norm": 9.177229531864839e-10, "learning_rate": 6.13676375867745e-06, "loss": 0.0, "step": 54010 }, { "epoch": 0.3866027338438417, "grad_norm": 0.0, "learning_rate": 6.136048092750304e-06, "loss": 0.0015, "step": 54020 }, { "epoch": 0.3866743004365562, "grad_norm": 1.048627291311277e-05, "learning_rate": 6.135332426823159e-06, "loss": 0.0, "step": 54030 }, { "epoch": 0.3867458670292707, "grad_norm": 0.0, "learning_rate": 6.134616760896014e-06, "loss": 0.0, "step": 54040 }, { "epoch": 0.3868174336219853, "grad_norm": 0.0, "learning_rate": 6.133901094968869e-06, "loss": 0.0, "step": 54050 }, { "epoch": 0.3868890002146998, "grad_norm": 0.0, "learning_rate": 6.1331854290417235e-06, "loss": 0.0005, "step": 54060 }, { "epoch": 0.3869605668074143, "grad_norm": 0.0, "learning_rate": 6.132469763114579e-06, "loss": 0.0, "step": 54070 }, { "epoch": 0.3870321334001288, "grad_norm": 8.723272912902758e-05, "learning_rate": 6.131754097187433e-06, "loss": 0.0, "step": 54080 }, { "epoch": 0.38710369999284333, "grad_norm": 0.0, "learning_rate": 6.131038431260288e-06, "loss": 0.0002, "step": 54090 }, { "epoch": 0.38717526658555784, "grad_norm": 2.194707882097191e-08, "learning_rate": 6.130322765333143e-06, "loss": 0.048, "step": 54100 }, { "epoch": 0.3872468331782724, "grad_norm": 9.751610150487977e-07, "learning_rate": 6.1296070994059975e-06, "loss": 0.0859, "step": 54110 }, { "epoch": 0.3873183997709869, "grad_norm": 0.0, "learning_rate": 6.128891433478853e-06, "loss": 0.0, "step": 54120 }, { "epoch": 0.38738996636370143, "grad_norm": 0.04281797260046005, "learning_rate": 6.128175767551707e-06, "loss": 0.0, "step": 54130 }, { "epoch": 0.38746153295641594, "grad_norm": 8.068025181273697e-07, "learning_rate": 6.127460101624562e-06, "loss": 0.0, "step": 54140 }, { "epoch": 0.38753309954913046, "grad_norm": 0.0, "learning_rate": 6.126744435697417e-06, "loss": 0.0, "step": 54150 }, { "epoch": 0.38760466614184497, "grad_norm": 0.0, "learning_rate": 6.126028769770271e-06, "loss": 0.0001, "step": 54160 }, { "epoch": 0.38767623273455953, "grad_norm": 0.0, "learning_rate": 6.125313103843127e-06, "loss": 0.0, "step": 54170 }, { "epoch": 0.38774779932727405, "grad_norm": 0.0, "learning_rate": 6.124597437915981e-06, "loss": 0.0013, "step": 54180 }, { "epoch": 0.38781936591998856, "grad_norm": 0.0, "learning_rate": 6.123881771988836e-06, "loss": 0.0, "step": 54190 }, { "epoch": 0.38789093251270307, "grad_norm": 0.0, "learning_rate": 6.123166106061691e-06, "loss": 0.0, "step": 54200 }, { "epoch": 0.3879624991054176, "grad_norm": 0.0, "learning_rate": 6.122450440134545e-06, "loss": 0.0, "step": 54210 }, { "epoch": 0.3880340656981321, "grad_norm": 0.0, "learning_rate": 6.121734774207401e-06, "loss": 0.0037, "step": 54220 }, { "epoch": 0.38810563229084666, "grad_norm": 0.008791107684373856, "learning_rate": 6.121019108280255e-06, "loss": 0.0, "step": 54230 }, { "epoch": 0.38817719888356117, "grad_norm": 8.730867695483369e-10, "learning_rate": 6.12030344235311e-06, "loss": 0.0001, "step": 54240 }, { "epoch": 0.3882487654762757, "grad_norm": 0.0, "learning_rate": 6.119587776425965e-06, "loss": 0.0239, "step": 54250 }, { "epoch": 0.3883203320689902, "grad_norm": 0.0, "learning_rate": 6.118872110498819e-06, "loss": 0.0003, "step": 54260 }, { "epoch": 0.3883918986617047, "grad_norm": 28.066984176635742, "learning_rate": 6.1181564445716745e-06, "loss": 0.0061, "step": 54270 }, { "epoch": 0.3884634652544192, "grad_norm": 2.6507251504881424e-07, "learning_rate": 6.117440778644529e-06, "loss": 0.0, "step": 54280 }, { "epoch": 0.3885350318471338, "grad_norm": 0.0, "learning_rate": 6.116725112717384e-06, "loss": 0.0005, "step": 54290 }, { "epoch": 0.3886065984398483, "grad_norm": 0.0, "learning_rate": 6.116009446790239e-06, "loss": 0.0, "step": 54300 }, { "epoch": 0.3886781650325628, "grad_norm": 0.0, "learning_rate": 6.115293780863094e-06, "loss": 0.0, "step": 54310 }, { "epoch": 0.3887497316252773, "grad_norm": 0.0, "learning_rate": 6.114578114935948e-06, "loss": 0.0, "step": 54320 }, { "epoch": 0.38882129821799183, "grad_norm": 0.0, "learning_rate": 6.113862449008803e-06, "loss": 0.0001, "step": 54330 }, { "epoch": 0.38889286481070634, "grad_norm": 0.0, "learning_rate": 6.113146783081658e-06, "loss": 0.1224, "step": 54340 }, { "epoch": 0.3889644314034209, "grad_norm": 0.0, "learning_rate": 6.112431117154513e-06, "loss": 0.0, "step": 54350 }, { "epoch": 0.3890359979961354, "grad_norm": 0.0, "learning_rate": 6.111715451227368e-06, "loss": 0.0, "step": 54360 }, { "epoch": 0.38910756458884993, "grad_norm": 0.0, "learning_rate": 6.110999785300222e-06, "loss": 0.0, "step": 54370 }, { "epoch": 0.38917913118156444, "grad_norm": 0.0, "learning_rate": 6.110284119373077e-06, "loss": 0.0, "step": 54380 }, { "epoch": 0.38925069777427895, "grad_norm": 0.0, "learning_rate": 6.109568453445932e-06, "loss": 0.0, "step": 54390 }, { "epoch": 0.38932226436699346, "grad_norm": 0.0, "learning_rate": 6.1088527875187865e-06, "loss": 0.0, "step": 54400 }, { "epoch": 0.38939383095970803, "grad_norm": 0.0, "learning_rate": 6.108137121591642e-06, "loss": 0.0, "step": 54410 }, { "epoch": 0.38946539755242254, "grad_norm": 0.0, "learning_rate": 6.107421455664496e-06, "loss": 0.2182, "step": 54420 }, { "epoch": 0.38953696414513705, "grad_norm": 0.0, "learning_rate": 6.1067057897373515e-06, "loss": 0.0, "step": 54430 }, { "epoch": 0.38960853073785157, "grad_norm": 0.0, "learning_rate": 6.105990123810206e-06, "loss": 0.0, "step": 54440 }, { "epoch": 0.3896800973305661, "grad_norm": 9.032417591647857e-10, "learning_rate": 6.1052744578830604e-06, "loss": 0.0, "step": 54450 }, { "epoch": 0.3897516639232806, "grad_norm": 0.0, "learning_rate": 6.104558791955916e-06, "loss": 0.0225, "step": 54460 }, { "epoch": 0.38982323051599516, "grad_norm": 0.0, "learning_rate": 6.10384312602877e-06, "loss": 0.0, "step": 54470 }, { "epoch": 0.38989479710870967, "grad_norm": 0.0, "learning_rate": 6.1031274601016255e-06, "loss": 0.0, "step": 54480 }, { "epoch": 0.3899663637014242, "grad_norm": 0.0, "learning_rate": 6.10241179417448e-06, "loss": 0.0, "step": 54490 }, { "epoch": 0.3900379302941387, "grad_norm": 0.0, "learning_rate": 6.101696128247334e-06, "loss": 0.0, "step": 54500 }, { "epoch": 0.3901094968868532, "grad_norm": 0.0, "learning_rate": 6.10098046232019e-06, "loss": 0.0007, "step": 54510 }, { "epoch": 0.3901810634795677, "grad_norm": 0.0, "learning_rate": 6.100264796393044e-06, "loss": 0.0, "step": 54520 }, { "epoch": 0.3902526300722823, "grad_norm": 0.0, "learning_rate": 6.099549130465899e-06, "loss": 0.0, "step": 54530 }, { "epoch": 0.3903241966649968, "grad_norm": 0.00010455969459144399, "learning_rate": 6.098833464538754e-06, "loss": 0.0, "step": 54540 }, { "epoch": 0.3903957632577113, "grad_norm": 1.3048003211224568e-06, "learning_rate": 6.098117798611609e-06, "loss": 0.0, "step": 54550 }, { "epoch": 0.3904673298504258, "grad_norm": 0.0, "learning_rate": 6.0974021326844636e-06, "loss": 0.0, "step": 54560 }, { "epoch": 0.3905388964431403, "grad_norm": 0.0, "learning_rate": 6.096686466757318e-06, "loss": 0.1227, "step": 54570 }, { "epoch": 0.39061046303585484, "grad_norm": 0.0, "learning_rate": 6.095970800830173e-06, "loss": 0.0, "step": 54580 }, { "epoch": 0.3906820296285694, "grad_norm": 0.0, "learning_rate": 6.095255134903028e-06, "loss": 0.0, "step": 54590 }, { "epoch": 0.3907535962212839, "grad_norm": 0.0, "learning_rate": 6.094539468975883e-06, "loss": 0.0, "step": 54600 }, { "epoch": 0.39082516281399843, "grad_norm": 0.0, "learning_rate": 6.0938238030487375e-06, "loss": 0.0, "step": 54610 }, { "epoch": 0.39089672940671294, "grad_norm": 1.0901178626454566e-08, "learning_rate": 6.093108137121592e-06, "loss": 0.0034, "step": 54620 }, { "epoch": 0.39096829599942745, "grad_norm": 0.06056651845574379, "learning_rate": 6.092392471194447e-06, "loss": 0.0, "step": 54630 }, { "epoch": 0.39103986259214196, "grad_norm": 0.0, "learning_rate": 6.091676805267302e-06, "loss": 0.0, "step": 54640 }, { "epoch": 0.39111142918485653, "grad_norm": 0.0, "learning_rate": 6.090961139340157e-06, "loss": 0.0018, "step": 54650 }, { "epoch": 0.39118299577757104, "grad_norm": 0.0, "learning_rate": 6.090245473413011e-06, "loss": 0.0, "step": 54660 }, { "epoch": 0.39125456237028555, "grad_norm": 0.0, "learning_rate": 6.089529807485867e-06, "loss": 0.0, "step": 54670 }, { "epoch": 0.39132612896300006, "grad_norm": 2.109063098032493e-05, "learning_rate": 6.088814141558721e-06, "loss": 0.0, "step": 54680 }, { "epoch": 0.3913976955557146, "grad_norm": 0.0, "learning_rate": 6.088098475631576e-06, "loss": 0.0, "step": 54690 }, { "epoch": 0.3914692621484291, "grad_norm": 1.6344895362854004, "learning_rate": 6.087382809704431e-06, "loss": 0.8182, "step": 54700 }, { "epoch": 0.39154082874114365, "grad_norm": 0.0, "learning_rate": 6.086667143777285e-06, "loss": 0.0, "step": 54710 }, { "epoch": 0.39161239533385817, "grad_norm": 9.347109752866345e-10, "learning_rate": 6.085951477850141e-06, "loss": 0.0015, "step": 54720 }, { "epoch": 0.3916839619265727, "grad_norm": 0.0, "learning_rate": 6.085235811922995e-06, "loss": 0.0, "step": 54730 }, { "epoch": 0.3917555285192872, "grad_norm": 0.0, "learning_rate": 6.0845201459958495e-06, "loss": 0.0002, "step": 54740 }, { "epoch": 0.3918270951120017, "grad_norm": 0.0, "learning_rate": 6.083804480068705e-06, "loss": 0.0, "step": 54750 }, { "epoch": 0.3918986617047162, "grad_norm": 0.0, "learning_rate": 6.083088814141559e-06, "loss": 0.0, "step": 54760 }, { "epoch": 0.3919702282974308, "grad_norm": 0.0, "learning_rate": 6.0823731482144145e-06, "loss": 0.019, "step": 54770 }, { "epoch": 0.3920417948901453, "grad_norm": 0.0, "learning_rate": 6.081657482287269e-06, "loss": 0.0, "step": 54780 }, { "epoch": 0.3921133614828598, "grad_norm": 0.0, "learning_rate": 6.080941816360123e-06, "loss": 0.1964, "step": 54790 }, { "epoch": 0.3921849280755743, "grad_norm": 0.0, "learning_rate": 6.080226150432979e-06, "loss": 0.0, "step": 54800 }, { "epoch": 0.3922564946682888, "grad_norm": 0.0, "learning_rate": 6.079510484505833e-06, "loss": 0.0, "step": 54810 }, { "epoch": 0.3923280612610034, "grad_norm": 0.0, "learning_rate": 6.0787948185786884e-06, "loss": 0.0, "step": 54820 }, { "epoch": 0.3923996278537179, "grad_norm": 0.0, "learning_rate": 6.078079152651543e-06, "loss": 0.0, "step": 54830 }, { "epoch": 0.3924711944464324, "grad_norm": 0.0, "learning_rate": 6.077363486724398e-06, "loss": 0.0, "step": 54840 }, { "epoch": 0.3925427610391469, "grad_norm": 0.0, "learning_rate": 6.076647820797253e-06, "loss": 0.0, "step": 54850 }, { "epoch": 0.39261432763186144, "grad_norm": 1.3307055723998928e-07, "learning_rate": 6.075932154870107e-06, "loss": 0.0, "step": 54860 }, { "epoch": 0.39268589422457595, "grad_norm": 2.2132427091037243e-07, "learning_rate": 6.075216488942962e-06, "loss": 0.0, "step": 54870 }, { "epoch": 0.3927574608172905, "grad_norm": 8.829983966229804e-10, "learning_rate": 6.074500823015817e-06, "loss": 0.0, "step": 54880 }, { "epoch": 0.392829027410005, "grad_norm": 0.0, "learning_rate": 6.073785157088672e-06, "loss": 0.0, "step": 54890 }, { "epoch": 0.39290059400271954, "grad_norm": 0.0, "learning_rate": 6.0730694911615265e-06, "loss": 0.0, "step": 54900 }, { "epoch": 0.39297216059543405, "grad_norm": 0.003346778219565749, "learning_rate": 6.07235382523438e-06, "loss": 0.048, "step": 54910 }, { "epoch": 0.39304372718814856, "grad_norm": 2.3282989786821418e-05, "learning_rate": 6.071638159307236e-06, "loss": 0.1011, "step": 54920 }, { "epoch": 0.3931152937808631, "grad_norm": 0.0, "learning_rate": 6.070922493380091e-06, "loss": 0.0, "step": 54930 }, { "epoch": 0.39318686037357764, "grad_norm": 0.0, "learning_rate": 6.070206827452946e-06, "loss": 0.0, "step": 54940 }, { "epoch": 0.39325842696629215, "grad_norm": 0.0, "learning_rate": 6.0694911615258005e-06, "loss": 0.0, "step": 54950 }, { "epoch": 0.39332999355900666, "grad_norm": 68.23875427246094, "learning_rate": 6.068775495598656e-06, "loss": 0.0172, "step": 54960 }, { "epoch": 0.3934015601517212, "grad_norm": 0.04149175435304642, "learning_rate": 6.06805982967151e-06, "loss": 0.0, "step": 54970 }, { "epoch": 0.3934731267444357, "grad_norm": 0.0001751487870933488, "learning_rate": 6.067344163744364e-06, "loss": 0.0, "step": 54980 }, { "epoch": 0.3935446933371502, "grad_norm": 0.0, "learning_rate": 6.06662849781722e-06, "loss": 0.0001, "step": 54990 }, { "epoch": 0.39361625992986476, "grad_norm": 1.842909296101425e-05, "learning_rate": 6.065912831890074e-06, "loss": 0.0, "step": 55000 }, { "epoch": 0.3936878265225793, "grad_norm": 0.0, "learning_rate": 6.06519716596293e-06, "loss": 0.0, "step": 55010 }, { "epoch": 0.3937593931152938, "grad_norm": 0.0, "learning_rate": 6.064481500035784e-06, "loss": 0.0, "step": 55020 }, { "epoch": 0.3938309597080083, "grad_norm": 0.0, "learning_rate": 6.063765834108638e-06, "loss": 0.0, "step": 55030 }, { "epoch": 0.3939025263007228, "grad_norm": 0.0, "learning_rate": 6.063050168181494e-06, "loss": 0.0, "step": 55040 }, { "epoch": 0.3939740928934373, "grad_norm": 0.0, "learning_rate": 6.0623345022543474e-06, "loss": 0.5102, "step": 55050 }, { "epoch": 0.3940456594861519, "grad_norm": 0.0, "learning_rate": 6.061618836327204e-06, "loss": 0.3348, "step": 55060 }, { "epoch": 0.3941172260788664, "grad_norm": 0.0, "learning_rate": 6.060903170400057e-06, "loss": 0.365, "step": 55070 }, { "epoch": 0.3941887926715809, "grad_norm": 0.0, "learning_rate": 6.060187504472913e-06, "loss": 0.0, "step": 55080 }, { "epoch": 0.3942603592642954, "grad_norm": 0.0, "learning_rate": 6.059471838545768e-06, "loss": 0.0, "step": 55090 }, { "epoch": 0.39433192585700994, "grad_norm": 37.696990966796875, "learning_rate": 6.058756172618621e-06, "loss": 0.0625, "step": 55100 }, { "epoch": 0.39440349244972445, "grad_norm": 0.0, "learning_rate": 6.0580405066914775e-06, "loss": 0.0, "step": 55110 }, { "epoch": 0.394475059042439, "grad_norm": 0.0, "learning_rate": 6.057324840764331e-06, "loss": 0.1044, "step": 55120 }, { "epoch": 0.3945466256351535, "grad_norm": 0.0, "learning_rate": 6.056609174837187e-06, "loss": 0.0, "step": 55130 }, { "epoch": 0.39461819222786804, "grad_norm": 0.0, "learning_rate": 6.055893508910041e-06, "loss": 0.0002, "step": 55140 }, { "epoch": 0.39468975882058255, "grad_norm": 0.0, "learning_rate": 6.055177842982895e-06, "loss": 0.0, "step": 55150 }, { "epoch": 0.39476132541329706, "grad_norm": 0.0, "learning_rate": 6.054462177055751e-06, "loss": 0.0, "step": 55160 }, { "epoch": 0.39483289200601157, "grad_norm": 0.0, "learning_rate": 6.053746511128605e-06, "loss": 0.0, "step": 55170 }, { "epoch": 0.39490445859872614, "grad_norm": 0.0, "learning_rate": 6.053030845201461e-06, "loss": 0.0002, "step": 55180 }, { "epoch": 0.39497602519144065, "grad_norm": 0.0, "learning_rate": 6.052315179274315e-06, "loss": 0.0, "step": 55190 }, { "epoch": 0.39504759178415516, "grad_norm": 0.0, "learning_rate": 6.051599513347171e-06, "loss": 0.0, "step": 55200 }, { "epoch": 0.3951191583768697, "grad_norm": 0.0, "learning_rate": 6.0508838474200245e-06, "loss": 0.0, "step": 55210 }, { "epoch": 0.3951907249695842, "grad_norm": 0.0, "learning_rate": 6.050168181492879e-06, "loss": 0.0002, "step": 55220 }, { "epoch": 0.3952622915622987, "grad_norm": 0.0, "learning_rate": 6.049452515565735e-06, "loss": 0.0, "step": 55230 }, { "epoch": 0.39533385815501326, "grad_norm": 0.0, "learning_rate": 6.048736849638589e-06, "loss": 0.0, "step": 55240 }, { "epoch": 0.3954054247477278, "grad_norm": 0.0, "learning_rate": 6.048021183711445e-06, "loss": 0.0159, "step": 55250 }, { "epoch": 0.3954769913404423, "grad_norm": 0.000283490342553705, "learning_rate": 6.047305517784298e-06, "loss": 0.0, "step": 55260 }, { "epoch": 0.3955485579331568, "grad_norm": 0.0, "learning_rate": 6.046589851857153e-06, "loss": 0.0, "step": 55270 }, { "epoch": 0.3956201245258713, "grad_norm": 0.0014504382852464914, "learning_rate": 6.045874185930008e-06, "loss": 0.0, "step": 55280 }, { "epoch": 0.3956916911185858, "grad_norm": 0.0, "learning_rate": 6.045158520002863e-06, "loss": 0.0, "step": 55290 }, { "epoch": 0.3957632577113004, "grad_norm": 0.0, "learning_rate": 6.044442854075718e-06, "loss": 0.0, "step": 55300 }, { "epoch": 0.3958348243040149, "grad_norm": 0.0, "learning_rate": 6.043727188148572e-06, "loss": 0.0, "step": 55310 }, { "epoch": 0.3959063908967294, "grad_norm": 0.0, "learning_rate": 6.0430115222214285e-06, "loss": 0.3034, "step": 55320 }, { "epoch": 0.3959779574894439, "grad_norm": 8.788522245595232e-05, "learning_rate": 6.042295856294282e-06, "loss": 0.0022, "step": 55330 }, { "epoch": 0.39604952408215843, "grad_norm": 2.0845434846705757e-05, "learning_rate": 6.0415801903671365e-06, "loss": 0.003, "step": 55340 }, { "epoch": 0.39612109067487294, "grad_norm": 0.0, "learning_rate": 6.040864524439992e-06, "loss": 0.0001, "step": 55350 }, { "epoch": 0.3961926572675875, "grad_norm": 0.0, "learning_rate": 6.040148858512846e-06, "loss": 0.0, "step": 55360 }, { "epoch": 0.396264223860302, "grad_norm": 0.0, "learning_rate": 6.0394331925857015e-06, "loss": 0.0, "step": 55370 }, { "epoch": 0.39633579045301653, "grad_norm": 8.629476724308915e-06, "learning_rate": 6.038717526658556e-06, "loss": 0.0293, "step": 55380 }, { "epoch": 0.39640735704573105, "grad_norm": 0.0, "learning_rate": 6.03800186073141e-06, "loss": 0.0, "step": 55390 }, { "epoch": 0.39647892363844556, "grad_norm": 0.0, "learning_rate": 6.037286194804266e-06, "loss": 0.0, "step": 55400 }, { "epoch": 0.39655049023116007, "grad_norm": 0.0, "learning_rate": 6.03657052887712e-06, "loss": 0.0, "step": 55410 }, { "epoch": 0.39662205682387464, "grad_norm": 0.0, "learning_rate": 6.0358548629499755e-06, "loss": 0.0, "step": 55420 }, { "epoch": 0.39669362341658915, "grad_norm": 0.0, "learning_rate": 6.03513919702283e-06, "loss": 0.0, "step": 55430 }, { "epoch": 0.39676519000930366, "grad_norm": 0.0, "learning_rate": 6.034423531095685e-06, "loss": 0.0, "step": 55440 }, { "epoch": 0.39683675660201817, "grad_norm": 0.0, "learning_rate": 6.03370786516854e-06, "loss": 0.0, "step": 55450 }, { "epoch": 0.3969083231947327, "grad_norm": 0.0, "learning_rate": 6.032992199241394e-06, "loss": 0.0, "step": 55460 }, { "epoch": 0.3969798897874472, "grad_norm": 0.0, "learning_rate": 6.032276533314249e-06, "loss": 0.0, "step": 55470 }, { "epoch": 0.39705145638016176, "grad_norm": 0.0, "learning_rate": 6.031560867387104e-06, "loss": 0.0, "step": 55480 }, { "epoch": 0.39712302297287627, "grad_norm": 0.0, "learning_rate": 6.030845201459959e-06, "loss": 0.0, "step": 55490 }, { "epoch": 0.3971945895655908, "grad_norm": 0.0, "learning_rate": 6.0301295355328135e-06, "loss": 0.0, "step": 55500 }, { "epoch": 0.3972661561583053, "grad_norm": 0.0, "learning_rate": 6.029413869605668e-06, "loss": 0.0, "step": 55510 }, { "epoch": 0.3973377227510198, "grad_norm": 0.0, "learning_rate": 6.028698203678523e-06, "loss": 0.0073, "step": 55520 }, { "epoch": 0.3974092893437343, "grad_norm": 0.0, "learning_rate": 6.027982537751378e-06, "loss": 0.0, "step": 55530 }, { "epoch": 0.3974808559364489, "grad_norm": 0.0, "learning_rate": 6.027266871824233e-06, "loss": 0.0001, "step": 55540 }, { "epoch": 0.3975524225291634, "grad_norm": 0.0, "learning_rate": 6.0265512058970875e-06, "loss": 0.0, "step": 55550 }, { "epoch": 0.3976239891218779, "grad_norm": 0.0, "learning_rate": 6.025835539969942e-06, "loss": 0.0017, "step": 55560 }, { "epoch": 0.3976955557145924, "grad_norm": 9.108974130533909e-10, "learning_rate": 6.025119874042797e-06, "loss": 0.0, "step": 55570 }, { "epoch": 0.39776712230730693, "grad_norm": 0.2046077698469162, "learning_rate": 6.024404208115652e-06, "loss": 0.0, "step": 55580 }, { "epoch": 0.3978386889000215, "grad_norm": 0.00016274784866254777, "learning_rate": 6.023688542188507e-06, "loss": 0.0, "step": 55590 }, { "epoch": 0.397910255492736, "grad_norm": 3.25396728515625, "learning_rate": 6.022972876261361e-06, "loss": 0.0008, "step": 55600 }, { "epoch": 0.3979818220854505, "grad_norm": 0.0, "learning_rate": 6.022257210334217e-06, "loss": 0.0122, "step": 55610 }, { "epoch": 0.39805338867816503, "grad_norm": 0.0010795490816235542, "learning_rate": 6.021541544407071e-06, "loss": 0.0, "step": 55620 }, { "epoch": 0.39812495527087954, "grad_norm": 0.000939473626203835, "learning_rate": 6.0208258784799256e-06, "loss": 0.0, "step": 55630 }, { "epoch": 0.39819652186359406, "grad_norm": 0.0, "learning_rate": 6.020110212552781e-06, "loss": 0.0, "step": 55640 }, { "epoch": 0.3982680884563086, "grad_norm": 0.0, "learning_rate": 6.019394546625635e-06, "loss": 0.0012, "step": 55650 }, { "epoch": 0.39833965504902313, "grad_norm": 0.0, "learning_rate": 6.018678880698491e-06, "loss": 0.0, "step": 55660 }, { "epoch": 0.39841122164173765, "grad_norm": 0.0, "learning_rate": 6.017963214771345e-06, "loss": 0.0, "step": 55670 }, { "epoch": 0.39848278823445216, "grad_norm": 0.0, "learning_rate": 6.0172475488441995e-06, "loss": 0.0, "step": 55680 }, { "epoch": 0.39855435482716667, "grad_norm": 0.0, "learning_rate": 6.016531882917055e-06, "loss": 0.0, "step": 55690 }, { "epoch": 0.3986259214198812, "grad_norm": 0.0, "learning_rate": 6.015816216989909e-06, "loss": 0.0, "step": 55700 }, { "epoch": 0.39869748801259575, "grad_norm": 0.0, "learning_rate": 6.0151005510627645e-06, "loss": 0.0, "step": 55710 }, { "epoch": 0.39876905460531026, "grad_norm": 0.0, "learning_rate": 6.014384885135619e-06, "loss": 0.0, "step": 55720 }, { "epoch": 0.39884062119802477, "grad_norm": 1.819418503146153e-05, "learning_rate": 6.013669219208474e-06, "loss": 0.044, "step": 55730 }, { "epoch": 0.3989121877907393, "grad_norm": 0.0, "learning_rate": 6.012953553281329e-06, "loss": 0.0, "step": 55740 }, { "epoch": 0.3989837543834538, "grad_norm": 0.0, "learning_rate": 6.012237887354183e-06, "loss": 0.009, "step": 55750 }, { "epoch": 0.3990553209761683, "grad_norm": 0.0, "learning_rate": 6.0115222214270384e-06, "loss": 0.0, "step": 55760 }, { "epoch": 0.39912688756888287, "grad_norm": 0.0, "learning_rate": 6.010806555499893e-06, "loss": 0.0, "step": 55770 }, { "epoch": 0.3991984541615974, "grad_norm": 0.0, "learning_rate": 6.010090889572748e-06, "loss": 0.0002, "step": 55780 }, { "epoch": 0.3992700207543119, "grad_norm": 0.0, "learning_rate": 6.009375223645603e-06, "loss": 0.0, "step": 55790 }, { "epoch": 0.3993415873470264, "grad_norm": 0.0, "learning_rate": 6.008659557718457e-06, "loss": 0.0, "step": 55800 }, { "epoch": 0.3994131539397409, "grad_norm": 2.0738019301802524e-08, "learning_rate": 6.007943891791312e-06, "loss": 0.0252, "step": 55810 }, { "epoch": 0.39948472053245543, "grad_norm": 0.0, "learning_rate": 6.007228225864167e-06, "loss": 0.0087, "step": 55820 }, { "epoch": 0.39955628712517, "grad_norm": 0.0, "learning_rate": 6.006512559937022e-06, "loss": 0.0, "step": 55830 }, { "epoch": 0.3996278537178845, "grad_norm": 0.0, "learning_rate": 6.0057968940098765e-06, "loss": 0.0, "step": 55840 }, { "epoch": 0.399699420310599, "grad_norm": 0.0, "learning_rate": 6.005081228082732e-06, "loss": 0.0, "step": 55850 }, { "epoch": 0.39977098690331353, "grad_norm": 0.0, "learning_rate": 6.004365562155586e-06, "loss": 0.0, "step": 55860 }, { "epoch": 0.39984255349602804, "grad_norm": 0.0, "learning_rate": 6.003649896228441e-06, "loss": 0.0, "step": 55870 }, { "epoch": 0.39991412008874255, "grad_norm": 5.208087259234162e-07, "learning_rate": 6.002934230301296e-06, "loss": 0.0, "step": 55880 }, { "epoch": 0.3999856866814571, "grad_norm": 0.0, "learning_rate": 6.0022185643741504e-06, "loss": 0.0, "step": 55890 }, { "epoch": 0.40005725327417163, "grad_norm": 0.0, "learning_rate": 6.001502898447006e-06, "loss": 0.0, "step": 55900 }, { "epoch": 0.40012881986688614, "grad_norm": 2.6979117606629188e-08, "learning_rate": 6.00078723251986e-06, "loss": 0.0, "step": 55910 }, { "epoch": 0.40020038645960065, "grad_norm": 0.0, "learning_rate": 6.000071566592715e-06, "loss": 0.0, "step": 55920 }, { "epoch": 0.40027195305231517, "grad_norm": 0.0, "learning_rate": 5.99935590066557e-06, "loss": 0.0, "step": 55930 }, { "epoch": 0.4003435196450297, "grad_norm": 0.13627761602401733, "learning_rate": 5.998640234738424e-06, "loss": 0.0001, "step": 55940 }, { "epoch": 0.40041508623774424, "grad_norm": 0.0, "learning_rate": 5.99792456881128e-06, "loss": 0.0, "step": 55950 }, { "epoch": 0.40048665283045876, "grad_norm": 6.694575560572957e-09, "learning_rate": 5.997208902884134e-06, "loss": 0.0, "step": 55960 }, { "epoch": 0.40055821942317327, "grad_norm": 0.0, "learning_rate": 5.996493236956989e-06, "loss": 0.0, "step": 55970 }, { "epoch": 0.4006297860158878, "grad_norm": 0.0, "learning_rate": 5.995777571029844e-06, "loss": 0.0, "step": 55980 }, { "epoch": 0.4007013526086023, "grad_norm": 0.0, "learning_rate": 5.995061905102698e-06, "loss": 0.0, "step": 55990 }, { "epoch": 0.4007729192013168, "grad_norm": 0.0, "learning_rate": 5.9943462391755536e-06, "loss": 0.0, "step": 56000 }, { "epoch": 0.40084448579403137, "grad_norm": 0.0, "learning_rate": 5.993630573248408e-06, "loss": 0.0, "step": 56010 }, { "epoch": 0.4009160523867459, "grad_norm": 0.0, "learning_rate": 5.992914907321263e-06, "loss": 0.0029, "step": 56020 }, { "epoch": 0.4009876189794604, "grad_norm": 2.0041434254380874e-05, "learning_rate": 5.992199241394118e-06, "loss": 0.6391, "step": 56030 }, { "epoch": 0.4010591855721749, "grad_norm": 4.6525014396614495e-10, "learning_rate": 5.991483575466972e-06, "loss": 0.0, "step": 56040 }, { "epoch": 0.4011307521648894, "grad_norm": 0.0, "learning_rate": 5.9907679095398275e-06, "loss": 0.1518, "step": 56050 }, { "epoch": 0.4012023187576039, "grad_norm": 0.0003644042881205678, "learning_rate": 5.990052243612682e-06, "loss": 0.0004, "step": 56060 }, { "epoch": 0.4012738853503185, "grad_norm": 0.0, "learning_rate": 5.989336577685537e-06, "loss": 0.0004, "step": 56070 }, { "epoch": 0.401345451943033, "grad_norm": 0.0, "learning_rate": 5.988620911758392e-06, "loss": 0.0, "step": 56080 }, { "epoch": 0.4014170185357475, "grad_norm": 0.0, "learning_rate": 5.987905245831247e-06, "loss": 0.0, "step": 56090 }, { "epoch": 0.401488585128462, "grad_norm": 0.0, "learning_rate": 5.987189579904101e-06, "loss": 0.0, "step": 56100 }, { "epoch": 0.40156015172117654, "grad_norm": 0.0, "learning_rate": 5.986473913976956e-06, "loss": 0.0, "step": 56110 }, { "epoch": 0.40163171831389105, "grad_norm": 0.0, "learning_rate": 5.985758248049811e-06, "loss": 0.0, "step": 56120 }, { "epoch": 0.4017032849066056, "grad_norm": 3.79410636242028e-08, "learning_rate": 5.985042582122666e-06, "loss": 0.0, "step": 56130 }, { "epoch": 0.40177485149932013, "grad_norm": 3.873175202784296e-08, "learning_rate": 5.984326916195521e-06, "loss": 0.0, "step": 56140 }, { "epoch": 0.40184641809203464, "grad_norm": 0.0, "learning_rate": 5.983611250268375e-06, "loss": 0.0001, "step": 56150 }, { "epoch": 0.40191798468474915, "grad_norm": 0.0, "learning_rate": 5.98289558434123e-06, "loss": 0.0, "step": 56160 }, { "epoch": 0.40198955127746366, "grad_norm": 0.0, "learning_rate": 5.982179918414085e-06, "loss": 0.0054, "step": 56170 }, { "epoch": 0.4020611178701782, "grad_norm": 0.06946753710508347, "learning_rate": 5.9814642524869395e-06, "loss": 0.129, "step": 56180 }, { "epoch": 0.40213268446289274, "grad_norm": 0.0, "learning_rate": 5.980748586559795e-06, "loss": 0.0001, "step": 56190 }, { "epoch": 0.40220425105560725, "grad_norm": 0.0, "learning_rate": 5.980032920632649e-06, "loss": 0.0005, "step": 56200 }, { "epoch": 0.40227581764832177, "grad_norm": 0.004199676215648651, "learning_rate": 5.9793172547055045e-06, "loss": 0.0, "step": 56210 }, { "epoch": 0.4023473842410363, "grad_norm": 0.0, "learning_rate": 5.978601588778359e-06, "loss": 0.0, "step": 56220 }, { "epoch": 0.4024189508337508, "grad_norm": 0.0, "learning_rate": 5.977885922851213e-06, "loss": 0.0002, "step": 56230 }, { "epoch": 0.4024905174264653, "grad_norm": 0.0, "learning_rate": 5.977170256924069e-06, "loss": 0.0, "step": 56240 }, { "epoch": 0.40256208401917987, "grad_norm": 0.0, "learning_rate": 5.976454590996923e-06, "loss": 0.0008, "step": 56250 }, { "epoch": 0.4026336506118944, "grad_norm": 0.45615652203559875, "learning_rate": 5.9757389250697784e-06, "loss": 0.0001, "step": 56260 }, { "epoch": 0.4027052172046089, "grad_norm": 4.703668010641593e-10, "learning_rate": 5.975023259142633e-06, "loss": 0.0, "step": 56270 }, { "epoch": 0.4027767837973234, "grad_norm": 0.0, "learning_rate": 5.974307593215487e-06, "loss": 0.0, "step": 56280 }, { "epoch": 0.4028483503900379, "grad_norm": 0.0, "learning_rate": 5.973591927288343e-06, "loss": 0.2551, "step": 56290 }, { "epoch": 0.4029199169827524, "grad_norm": 0.0, "learning_rate": 5.972876261361197e-06, "loss": 0.0, "step": 56300 }, { "epoch": 0.402991483575467, "grad_norm": 0.004094849340617657, "learning_rate": 5.972160595434052e-06, "loss": 0.0, "step": 56310 }, { "epoch": 0.4030630501681815, "grad_norm": 0.0, "learning_rate": 5.971444929506907e-06, "loss": 0.0, "step": 56320 }, { "epoch": 0.403134616760896, "grad_norm": 0.0, "learning_rate": 5.970729263579761e-06, "loss": 0.0, "step": 56330 }, { "epoch": 0.4032061833536105, "grad_norm": 0.0, "learning_rate": 5.9700135976526165e-06, "loss": 0.0451, "step": 56340 }, { "epoch": 0.40327774994632504, "grad_norm": 7.303240678879774e-09, "learning_rate": 5.969297931725471e-06, "loss": 0.0, "step": 56350 }, { "epoch": 0.4033493165390396, "grad_norm": 0.0, "learning_rate": 5.968582265798326e-06, "loss": 0.0, "step": 56360 }, { "epoch": 0.4034208831317541, "grad_norm": 0.0, "learning_rate": 5.967866599871181e-06, "loss": 0.0, "step": 56370 }, { "epoch": 0.4034924497244686, "grad_norm": 0.42192044854164124, "learning_rate": 5.967150933944036e-06, "loss": 0.0001, "step": 56380 }, { "epoch": 0.40356401631718314, "grad_norm": 0.0, "learning_rate": 5.9664352680168905e-06, "loss": 0.0717, "step": 56390 }, { "epoch": 0.40363558290989765, "grad_norm": 0.0, "learning_rate": 5.965719602089745e-06, "loss": 0.0057, "step": 56400 }, { "epoch": 0.40370714950261216, "grad_norm": 0.0, "learning_rate": 5.9650039361626e-06, "loss": 0.0, "step": 56410 }, { "epoch": 0.40377871609532673, "grad_norm": 0.0, "learning_rate": 5.964288270235455e-06, "loss": 0.0, "step": 56420 }, { "epoch": 0.40385028268804124, "grad_norm": 0.0, "learning_rate": 5.96357260430831e-06, "loss": 0.0, "step": 56430 }, { "epoch": 0.40392184928075575, "grad_norm": 0.0, "learning_rate": 5.962856938381164e-06, "loss": 0.0, "step": 56440 }, { "epoch": 0.40399341587347026, "grad_norm": 0.0, "learning_rate": 5.962141272454019e-06, "loss": 0.0, "step": 56450 }, { "epoch": 0.4040649824661848, "grad_norm": 0.0, "learning_rate": 5.961425606526874e-06, "loss": 0.0, "step": 56460 }, { "epoch": 0.4041365490588993, "grad_norm": 0.0, "learning_rate": 5.9607099405997286e-06, "loss": 0.0, "step": 56470 }, { "epoch": 0.40420811565161385, "grad_norm": 0.27569377422332764, "learning_rate": 5.959994274672584e-06, "loss": 0.0001, "step": 56480 }, { "epoch": 0.40427968224432836, "grad_norm": 2.8153264164387792e-09, "learning_rate": 5.959278608745438e-06, "loss": 0.0, "step": 56490 }, { "epoch": 0.4043512488370429, "grad_norm": 0.0, "learning_rate": 5.958562942818294e-06, "loss": 0.0009, "step": 56500 }, { "epoch": 0.4044228154297574, "grad_norm": 7.86501641414361e-06, "learning_rate": 5.957847276891148e-06, "loss": 0.0, "step": 56510 }, { "epoch": 0.4044943820224719, "grad_norm": 0.0, "learning_rate": 5.9571316109640025e-06, "loss": 0.0297, "step": 56520 }, { "epoch": 0.4045659486151864, "grad_norm": 3.1102526776294326e-08, "learning_rate": 5.956415945036858e-06, "loss": 0.0, "step": 56530 }, { "epoch": 0.404637515207901, "grad_norm": 0.0, "learning_rate": 5.955700279109712e-06, "loss": 0.0, "step": 56540 }, { "epoch": 0.4047090818006155, "grad_norm": 0.00016307491750922054, "learning_rate": 5.9549846131825675e-06, "loss": 0.0, "step": 56550 }, { "epoch": 0.40478064839333, "grad_norm": 0.0, "learning_rate": 5.954268947255422e-06, "loss": 0.0, "step": 56560 }, { "epoch": 0.4048522149860445, "grad_norm": 0.0, "learning_rate": 5.9535532813282755e-06, "loss": 0.0, "step": 56570 }, { "epoch": 0.404923781578759, "grad_norm": 3.7125353458122845e-08, "learning_rate": 5.952837615401132e-06, "loss": 0.0, "step": 56580 }, { "epoch": 0.40499534817147353, "grad_norm": 0.0, "learning_rate": 5.952121949473985e-06, "loss": 0.0001, "step": 56590 }, { "epoch": 0.4050669147641881, "grad_norm": 0.0, "learning_rate": 5.951406283546841e-06, "loss": 0.0001, "step": 56600 }, { "epoch": 0.4051384813569026, "grad_norm": 0.0, "learning_rate": 5.950690617619696e-06, "loss": 0.0, "step": 56610 }, { "epoch": 0.4052100479496171, "grad_norm": 0.9107571840286255, "learning_rate": 5.949974951692551e-06, "loss": 0.0001, "step": 56620 }, { "epoch": 0.40528161454233164, "grad_norm": 0.0, "learning_rate": 5.949259285765406e-06, "loss": 0.0003, "step": 56630 }, { "epoch": 0.40535318113504615, "grad_norm": 0.0, "learning_rate": 5.948543619838259e-06, "loss": 0.0, "step": 56640 }, { "epoch": 0.40542474772776066, "grad_norm": 2.3498471364291618e-06, "learning_rate": 5.947827953911115e-06, "loss": 0.0, "step": 56650 }, { "epoch": 0.4054963143204752, "grad_norm": 0.0, "learning_rate": 5.947112287983969e-06, "loss": 0.0, "step": 56660 }, { "epoch": 0.40556788091318974, "grad_norm": 0.0, "learning_rate": 5.946396622056825e-06, "loss": 0.0016, "step": 56670 }, { "epoch": 0.40563944750590425, "grad_norm": 6.60484360537339e-08, "learning_rate": 5.9456809561296795e-06, "loss": 0.0002, "step": 56680 }, { "epoch": 0.40571101409861876, "grad_norm": 0.0, "learning_rate": 5.944965290202533e-06, "loss": 0.0, "step": 56690 }, { "epoch": 0.40578258069133327, "grad_norm": 0.00034770864294841886, "learning_rate": 5.944249624275389e-06, "loss": 0.0, "step": 56700 }, { "epoch": 0.4058541472840478, "grad_norm": 4.986860062672349e-07, "learning_rate": 5.943533958348243e-06, "loss": 0.0, "step": 56710 }, { "epoch": 0.40592571387676235, "grad_norm": 0.0, "learning_rate": 5.942818292421099e-06, "loss": 0.0, "step": 56720 }, { "epoch": 0.40599728046947686, "grad_norm": 0.0, "learning_rate": 5.942102626493953e-06, "loss": 0.0, "step": 56730 }, { "epoch": 0.4060688470621914, "grad_norm": 0.0, "learning_rate": 5.941386960566809e-06, "loss": 0.0, "step": 56740 }, { "epoch": 0.4061404136549059, "grad_norm": 0.07679369300603867, "learning_rate": 5.940671294639662e-06, "loss": 0.0, "step": 56750 }, { "epoch": 0.4062119802476204, "grad_norm": 0.0, "learning_rate": 5.939955628712517e-06, "loss": 0.0, "step": 56760 }, { "epoch": 0.4062835468403349, "grad_norm": 0.0, "learning_rate": 5.939239962785373e-06, "loss": 0.0451, "step": 56770 }, { "epoch": 0.4063551134330495, "grad_norm": 0.0, "learning_rate": 5.9385242968582265e-06, "loss": 0.0, "step": 56780 }, { "epoch": 0.406426680025764, "grad_norm": 0.0, "learning_rate": 5.937808630931083e-06, "loss": 0.0, "step": 56790 }, { "epoch": 0.4064982466184785, "grad_norm": 0.0, "learning_rate": 5.937092965003936e-06, "loss": 0.0106, "step": 56800 }, { "epoch": 0.406569813211193, "grad_norm": 0.0, "learning_rate": 5.936377299076791e-06, "loss": 0.0, "step": 56810 }, { "epoch": 0.4066413798039075, "grad_norm": 0.0047892010770738125, "learning_rate": 5.935661633149646e-06, "loss": 0.0, "step": 56820 }, { "epoch": 0.40671294639662203, "grad_norm": 0.0, "learning_rate": 5.9349459672225004e-06, "loss": 0.0, "step": 56830 }, { "epoch": 0.4067845129893366, "grad_norm": 0.0, "learning_rate": 5.9342303012953566e-06, "loss": 0.7996, "step": 56840 }, { "epoch": 0.4068560795820511, "grad_norm": 0.0, "learning_rate": 5.93351463536821e-06, "loss": 0.0, "step": 56850 }, { "epoch": 0.4069276461747656, "grad_norm": 0.0, "learning_rate": 5.932798969441066e-06, "loss": 0.0, "step": 56860 }, { "epoch": 0.40699921276748013, "grad_norm": 0.0, "learning_rate": 5.93208330351392e-06, "loss": 0.0, "step": 56870 }, { "epoch": 0.40707077936019465, "grad_norm": 0.0, "learning_rate": 5.931367637586774e-06, "loss": 0.0005, "step": 56880 }, { "epoch": 0.40714234595290916, "grad_norm": 0.0, "learning_rate": 5.93065197165963e-06, "loss": 0.0, "step": 56890 }, { "epoch": 0.4072139125456237, "grad_norm": 0.0034959756303578615, "learning_rate": 5.929936305732484e-06, "loss": 0.0001, "step": 56900 }, { "epoch": 0.40728547913833824, "grad_norm": 0.0, "learning_rate": 5.92922063980534e-06, "loss": 0.0001, "step": 56910 }, { "epoch": 0.40735704573105275, "grad_norm": 0.0, "learning_rate": 5.928504973878194e-06, "loss": 0.0, "step": 56920 }, { "epoch": 0.40742861232376726, "grad_norm": 0.0, "learning_rate": 5.927789307951048e-06, "loss": 0.0, "step": 56930 }, { "epoch": 0.40750017891648177, "grad_norm": 0.0, "learning_rate": 5.9270736420239036e-06, "loss": 0.0, "step": 56940 }, { "epoch": 0.4075717455091963, "grad_norm": 0.0, "learning_rate": 5.926357976096758e-06, "loss": 0.1058, "step": 56950 }, { "epoch": 0.40764331210191085, "grad_norm": 0.0, "learning_rate": 5.925642310169613e-06, "loss": 0.0279, "step": 56960 }, { "epoch": 0.40771487869462536, "grad_norm": 0.0, "learning_rate": 5.924926644242468e-06, "loss": 0.0, "step": 56970 }, { "epoch": 0.40778644528733987, "grad_norm": 3.2736395951360464e-05, "learning_rate": 5.924210978315323e-06, "loss": 0.0, "step": 56980 }, { "epoch": 0.4078580118800544, "grad_norm": 0.0, "learning_rate": 5.9234953123881775e-06, "loss": 0.0, "step": 56990 }, { "epoch": 0.4079295784727689, "grad_norm": 0.14401593804359436, "learning_rate": 5.922779646461032e-06, "loss": 0.0002, "step": 57000 }, { "epoch": 0.4080011450654834, "grad_norm": 0.0, "learning_rate": 5.922063980533887e-06, "loss": 0.0, "step": 57010 }, { "epoch": 0.408072711658198, "grad_norm": 0.0, "learning_rate": 5.921348314606742e-06, "loss": 0.0, "step": 57020 }, { "epoch": 0.4081442782509125, "grad_norm": 0.0, "learning_rate": 5.920632648679597e-06, "loss": 0.0, "step": 57030 }, { "epoch": 0.408215844843627, "grad_norm": 0.0, "learning_rate": 5.919916982752451e-06, "loss": 0.0, "step": 57040 }, { "epoch": 0.4082874114363415, "grad_norm": 9.02496867638547e-06, "learning_rate": 5.919201316825306e-06, "loss": 0.0, "step": 57050 }, { "epoch": 0.408358978029056, "grad_norm": 0.0, "learning_rate": 5.918485650898161e-06, "loss": 0.0, "step": 57060 }, { "epoch": 0.40843054462177053, "grad_norm": 0.00020666261843871325, "learning_rate": 5.9177699849710156e-06, "loss": 0.657, "step": 57070 }, { "epoch": 0.4085021112144851, "grad_norm": 0.0, "learning_rate": 5.917054319043871e-06, "loss": 0.0, "step": 57080 }, { "epoch": 0.4085736778071996, "grad_norm": 0.0, "learning_rate": 5.916338653116725e-06, "loss": 0.0, "step": 57090 }, { "epoch": 0.4086452443999141, "grad_norm": 0.0, "learning_rate": 5.91562298718958e-06, "loss": 0.0, "step": 57100 }, { "epoch": 0.40871681099262863, "grad_norm": 9.941480527686508e-09, "learning_rate": 5.914907321262435e-06, "loss": 0.0039, "step": 57110 }, { "epoch": 0.40878837758534314, "grad_norm": 0.0, "learning_rate": 5.9141916553352895e-06, "loss": 0.0, "step": 57120 }, { "epoch": 0.4088599441780577, "grad_norm": 0.0, "learning_rate": 5.913475989408145e-06, "loss": 0.0, "step": 57130 }, { "epoch": 0.4089315107707722, "grad_norm": 0.0, "learning_rate": 5.912760323480999e-06, "loss": 0.0022, "step": 57140 }, { "epoch": 0.40900307736348673, "grad_norm": 0.0, "learning_rate": 5.9120446575538545e-06, "loss": 0.0, "step": 57150 }, { "epoch": 0.40907464395620124, "grad_norm": 0.0, "learning_rate": 5.911328991626709e-06, "loss": 0.0, "step": 57160 }, { "epoch": 0.40914621054891576, "grad_norm": 0.0, "learning_rate": 5.910613325699563e-06, "loss": 0.0, "step": 57170 }, { "epoch": 0.40921777714163027, "grad_norm": 4.1458850241760103e-10, "learning_rate": 5.909897659772419e-06, "loss": 0.0, "step": 57180 }, { "epoch": 0.40928934373434483, "grad_norm": 0.0, "learning_rate": 5.909181993845273e-06, "loss": 0.0, "step": 57190 }, { "epoch": 0.40936091032705935, "grad_norm": 0.0, "learning_rate": 5.9084663279181284e-06, "loss": 0.0004, "step": 57200 }, { "epoch": 0.40943247691977386, "grad_norm": 0.0, "learning_rate": 5.907750661990983e-06, "loss": 0.0, "step": 57210 }, { "epoch": 0.40950404351248837, "grad_norm": 1.5842887890471502e-08, "learning_rate": 5.907034996063837e-06, "loss": 0.0, "step": 57220 }, { "epoch": 0.4095756101052029, "grad_norm": 0.0738564059138298, "learning_rate": 5.906319330136693e-06, "loss": 0.0, "step": 57230 }, { "epoch": 0.4096471766979174, "grad_norm": 0.0, "learning_rate": 5.905603664209547e-06, "loss": 0.0, "step": 57240 }, { "epoch": 0.40971874329063196, "grad_norm": 0.0, "learning_rate": 5.904887998282402e-06, "loss": 0.0, "step": 57250 }, { "epoch": 0.40979030988334647, "grad_norm": 4.167053369030782e-10, "learning_rate": 5.904172332355257e-06, "loss": 0.0151, "step": 57260 }, { "epoch": 0.409861876476061, "grad_norm": 0.0, "learning_rate": 5.903456666428112e-06, "loss": 0.0, "step": 57270 }, { "epoch": 0.4099334430687755, "grad_norm": 0.0, "learning_rate": 5.9027410005009665e-06, "loss": 0.0, "step": 57280 }, { "epoch": 0.41000500966149, "grad_norm": 0.00040678662480786443, "learning_rate": 5.902025334573821e-06, "loss": 0.0, "step": 57290 }, { "epoch": 0.4100765762542045, "grad_norm": 1.1518659448483959e-05, "learning_rate": 5.901309668646676e-06, "loss": 0.0, "step": 57300 }, { "epoch": 0.4101481428469191, "grad_norm": 0.0, "learning_rate": 5.900594002719531e-06, "loss": 0.0, "step": 57310 }, { "epoch": 0.4102197094396336, "grad_norm": 0.0, "learning_rate": 5.899878336792386e-06, "loss": 0.0, "step": 57320 }, { "epoch": 0.4102912760323481, "grad_norm": 0.0, "learning_rate": 5.8991626708652404e-06, "loss": 0.0, "step": 57330 }, { "epoch": 0.4103628426250626, "grad_norm": 5.167141239326156e-07, "learning_rate": 5.898447004938095e-06, "loss": 0.0, "step": 57340 }, { "epoch": 0.41043440921777713, "grad_norm": 1.1083443052228859e-09, "learning_rate": 5.89773133901095e-06, "loss": 0.0, "step": 57350 }, { "epoch": 0.41050597581049164, "grad_norm": 0.0019197560613974929, "learning_rate": 5.897015673083805e-06, "loss": 0.0, "step": 57360 }, { "epoch": 0.4105775424032062, "grad_norm": 0.0, "learning_rate": 5.89630000715666e-06, "loss": 0.0003, "step": 57370 }, { "epoch": 0.4106491089959207, "grad_norm": 0.0013133770553395152, "learning_rate": 5.895584341229514e-06, "loss": 0.0, "step": 57380 }, { "epoch": 0.41072067558863523, "grad_norm": 4.022016142357643e-08, "learning_rate": 5.89486867530237e-06, "loss": 0.0, "step": 57390 }, { "epoch": 0.41079224218134974, "grad_norm": 8.051190292235333e-08, "learning_rate": 5.894153009375224e-06, "loss": 0.0001, "step": 57400 }, { "epoch": 0.41086380877406425, "grad_norm": 0.0, "learning_rate": 5.8934373434480785e-06, "loss": 0.0, "step": 57410 }, { "epoch": 0.41093537536677877, "grad_norm": 0.0, "learning_rate": 5.892721677520934e-06, "loss": 0.0, "step": 57420 }, { "epoch": 0.41100694195949333, "grad_norm": 0.0, "learning_rate": 5.892006011593788e-06, "loss": 0.0001, "step": 57430 }, { "epoch": 0.41107850855220784, "grad_norm": 0.0, "learning_rate": 5.8912903456666436e-06, "loss": 0.0, "step": 57440 }, { "epoch": 0.41115007514492236, "grad_norm": 0.0, "learning_rate": 5.890574679739498e-06, "loss": 0.0, "step": 57450 }, { "epoch": 0.41122164173763687, "grad_norm": 0.0, "learning_rate": 5.8898590138123525e-06, "loss": 0.0, "step": 57460 }, { "epoch": 0.4112932083303514, "grad_norm": 0.0, "learning_rate": 5.889143347885208e-06, "loss": 0.0, "step": 57470 }, { "epoch": 0.4113647749230659, "grad_norm": 0.0, "learning_rate": 5.888427681958062e-06, "loss": 0.0001, "step": 57480 }, { "epoch": 0.41143634151578046, "grad_norm": 0.0, "learning_rate": 5.8877120160309175e-06, "loss": 0.0, "step": 57490 }, { "epoch": 0.41150790810849497, "grad_norm": 0.0, "learning_rate": 5.886996350103772e-06, "loss": 0.0, "step": 57500 }, { "epoch": 0.4115794747012095, "grad_norm": 0.0, "learning_rate": 5.886280684176627e-06, "loss": 0.0, "step": 57510 }, { "epoch": 0.411651041293924, "grad_norm": 0.0, "learning_rate": 5.885565018249482e-06, "loss": 0.0, "step": 57520 }, { "epoch": 0.4117226078866385, "grad_norm": 0.0, "learning_rate": 5.884849352322336e-06, "loss": 0.0007, "step": 57530 }, { "epoch": 0.411794174479353, "grad_norm": 0.0, "learning_rate": 5.884133686395191e-06, "loss": 0.0, "step": 57540 }, { "epoch": 0.4118657410720676, "grad_norm": 0.00019305766909383237, "learning_rate": 5.883418020468046e-06, "loss": 0.0, "step": 57550 }, { "epoch": 0.4119373076647821, "grad_norm": 0.0, "learning_rate": 5.882702354540901e-06, "loss": 0.0, "step": 57560 }, { "epoch": 0.4120088742574966, "grad_norm": 0.0, "learning_rate": 5.881986688613756e-06, "loss": 0.0001, "step": 57570 }, { "epoch": 0.4120804408502111, "grad_norm": 0.0, "learning_rate": 5.88127102268661e-06, "loss": 0.0036, "step": 57580 }, { "epoch": 0.4121520074429256, "grad_norm": 9.521244903609727e-10, "learning_rate": 5.880555356759465e-06, "loss": 0.0, "step": 57590 }, { "epoch": 0.41222357403564014, "grad_norm": 0.0, "learning_rate": 5.87983969083232e-06, "loss": 0.0, "step": 57600 }, { "epoch": 0.4122951406283547, "grad_norm": 1.3592559078290378e-08, "learning_rate": 5.879124024905175e-06, "loss": 0.0, "step": 57610 }, { "epoch": 0.4123667072210692, "grad_norm": 0.0, "learning_rate": 5.8784083589780295e-06, "loss": 0.0, "step": 57620 }, { "epoch": 0.41243827381378373, "grad_norm": 0.0, "learning_rate": 5.877692693050885e-06, "loss": 0.0001, "step": 57630 }, { "epoch": 0.41250984040649824, "grad_norm": 0.0, "learning_rate": 5.876977027123739e-06, "loss": 0.0002, "step": 57640 }, { "epoch": 0.41258140699921275, "grad_norm": 2.8663878381962604e-08, "learning_rate": 5.876261361196594e-06, "loss": 0.0, "step": 57650 }, { "epoch": 0.41265297359192726, "grad_norm": 6.478433988377219e-06, "learning_rate": 5.875545695269449e-06, "loss": 0.0, "step": 57660 }, { "epoch": 0.41272454018464183, "grad_norm": 0.0, "learning_rate": 5.874830029342303e-06, "loss": 0.0, "step": 57670 }, { "epoch": 0.41279610677735634, "grad_norm": 9.492566732660634e-10, "learning_rate": 5.874114363415159e-06, "loss": 0.0, "step": 57680 }, { "epoch": 0.41286767337007085, "grad_norm": 2.016063081100583e-05, "learning_rate": 5.873398697488013e-06, "loss": 0.0, "step": 57690 }, { "epoch": 0.41293923996278536, "grad_norm": 0.0, "learning_rate": 5.872683031560868e-06, "loss": 0.0, "step": 57700 }, { "epoch": 0.4130108065554999, "grad_norm": 0.0, "learning_rate": 5.871967365633723e-06, "loss": 0.0, "step": 57710 }, { "epoch": 0.4130823731482144, "grad_norm": 0.0, "learning_rate": 5.871251699706577e-06, "loss": 0.0, "step": 57720 }, { "epoch": 0.41315393974092895, "grad_norm": 0.0, "learning_rate": 5.870536033779433e-06, "loss": 0.0, "step": 57730 }, { "epoch": 0.41322550633364347, "grad_norm": 0.0, "learning_rate": 5.869820367852287e-06, "loss": 0.0, "step": 57740 }, { "epoch": 0.413297072926358, "grad_norm": 0.0, "learning_rate": 5.869104701925142e-06, "loss": 0.0, "step": 57750 }, { "epoch": 0.4133686395190725, "grad_norm": 0.0, "learning_rate": 5.868389035997997e-06, "loss": 0.0, "step": 57760 }, { "epoch": 0.413440206111787, "grad_norm": 0.0003992163110524416, "learning_rate": 5.867673370070851e-06, "loss": 0.0, "step": 57770 }, { "epoch": 0.4135117727045015, "grad_norm": 2.531459264432101e-09, "learning_rate": 5.8669577041437065e-06, "loss": 0.0, "step": 57780 }, { "epoch": 0.4135833392972161, "grad_norm": NaN, "learning_rate": 5.866313604809275e-06, "loss": 0.2738, "step": 57790 }, { "epoch": 0.4136549058899306, "grad_norm": 1.0447513432154665e-06, "learning_rate": 5.865597938882131e-06, "loss": 0.0, "step": 57800 }, { "epoch": 0.4137264724826451, "grad_norm": 0.0, "learning_rate": 5.864882272954985e-06, "loss": 0.0, "step": 57810 }, { "epoch": 0.4137980390753596, "grad_norm": 0.0, "learning_rate": 5.8641666070278405e-06, "loss": 0.0, "step": 57820 }, { "epoch": 0.4138696056680741, "grad_norm": 4.428437616610381e-10, "learning_rate": 5.863450941100695e-06, "loss": 0.0, "step": 57830 }, { "epoch": 0.41394117226078864, "grad_norm": 0.003058329690247774, "learning_rate": 5.86273527517355e-06, "loss": 0.0032, "step": 57840 }, { "epoch": 0.4140127388535032, "grad_norm": 0.0, "learning_rate": 5.862019609246405e-06, "loss": 0.0002, "step": 57850 }, { "epoch": 0.4140843054462177, "grad_norm": 4.3713555442081997e-07, "learning_rate": 5.861303943319258e-06, "loss": 0.0, "step": 57860 }, { "epoch": 0.4141558720389322, "grad_norm": 0.0, "learning_rate": 5.860588277392114e-06, "loss": 0.0, "step": 57870 }, { "epoch": 0.41422743863164674, "grad_norm": 0.0, "learning_rate": 5.859872611464969e-06, "loss": 0.5805, "step": 57880 }, { "epoch": 0.41429900522436125, "grad_norm": 0.0, "learning_rate": 5.859156945537824e-06, "loss": 0.01, "step": 57890 }, { "epoch": 0.4143705718170758, "grad_norm": 0.0, "learning_rate": 5.858441279610679e-06, "loss": 0.0095, "step": 57900 }, { "epoch": 0.41444213840979033, "grad_norm": 0.0, "learning_rate": 5.857725613683532e-06, "loss": 0.0, "step": 57910 }, { "epoch": 0.41451370500250484, "grad_norm": 0.0009798433165997267, "learning_rate": 5.857009947756388e-06, "loss": 0.0, "step": 57920 }, { "epoch": 0.41458527159521935, "grad_norm": 0.00017287940136156976, "learning_rate": 5.856294281829242e-06, "loss": 0.0, "step": 57930 }, { "epoch": 0.41465683818793386, "grad_norm": 0.0, "learning_rate": 5.855578615902098e-06, "loss": 0.0, "step": 57940 }, { "epoch": 0.4147284047806484, "grad_norm": 0.0, "learning_rate": 5.854862949974952e-06, "loss": 0.0001, "step": 57950 }, { "epoch": 0.41479997137336294, "grad_norm": 0.0, "learning_rate": 5.854147284047806e-06, "loss": 0.0, "step": 57960 }, { "epoch": 0.41487153796607745, "grad_norm": 3.8300554394687936e-10, "learning_rate": 5.853431618120662e-06, "loss": 0.0, "step": 57970 }, { "epoch": 0.41494310455879196, "grad_norm": 0.0, "learning_rate": 5.852715952193516e-06, "loss": 0.0, "step": 57980 }, { "epoch": 0.4150146711515065, "grad_norm": 0.0, "learning_rate": 5.852000286266372e-06, "loss": 0.0, "step": 57990 }, { "epoch": 0.415086237744221, "grad_norm": 0.0, "learning_rate": 5.851284620339226e-06, "loss": 0.0, "step": 58000 }, { "epoch": 0.4151578043369355, "grad_norm": 9.671976840763818e-06, "learning_rate": 5.850568954412082e-06, "loss": 0.0, "step": 58010 }, { "epoch": 0.41522937092965007, "grad_norm": 0.00013154468615539372, "learning_rate": 5.849853288484935e-06, "loss": 0.0003, "step": 58020 }, { "epoch": 0.4153009375223646, "grad_norm": 0.9215550422668457, "learning_rate": 5.84913762255779e-06, "loss": 0.0002, "step": 58030 }, { "epoch": 0.4153725041150791, "grad_norm": 0.0, "learning_rate": 5.848421956630646e-06, "loss": 0.0, "step": 58040 }, { "epoch": 0.4154440707077936, "grad_norm": 0.0, "learning_rate": 5.8477062907034995e-06, "loss": 0.0382, "step": 58050 }, { "epoch": 0.4155156373005081, "grad_norm": 0.0, "learning_rate": 5.846990624776356e-06, "loss": 0.0, "step": 58060 }, { "epoch": 0.4155872038932226, "grad_norm": 4.998611871087633e-07, "learning_rate": 5.846274958849209e-06, "loss": 0.0, "step": 58070 }, { "epoch": 0.4156587704859372, "grad_norm": 0.0, "learning_rate": 5.845559292922064e-06, "loss": 0.0, "step": 58080 }, { "epoch": 0.4157303370786517, "grad_norm": 0.0, "learning_rate": 5.844843626994919e-06, "loss": 0.0, "step": 58090 }, { "epoch": 0.4158019036713662, "grad_norm": 0.0, "learning_rate": 5.844127961067773e-06, "loss": 0.0001, "step": 58100 }, { "epoch": 0.4158734702640807, "grad_norm": 7.429644756484777e-05, "learning_rate": 5.8434122951406296e-06, "loss": 0.0, "step": 58110 }, { "epoch": 0.41594503685679524, "grad_norm": 0.0, "learning_rate": 5.842696629213483e-06, "loss": 0.0, "step": 58120 }, { "epoch": 0.41601660344950975, "grad_norm": 0.0, "learning_rate": 5.841980963286339e-06, "loss": 0.0031, "step": 58130 }, { "epoch": 0.4160881700422243, "grad_norm": 0.000698567891959101, "learning_rate": 5.841265297359193e-06, "loss": 0.0486, "step": 58140 }, { "epoch": 0.4161597366349388, "grad_norm": 0.0, "learning_rate": 5.840549631432047e-06, "loss": 0.0, "step": 58150 }, { "epoch": 0.41623130322765334, "grad_norm": 0.0, "learning_rate": 5.839833965504903e-06, "loss": 0.0, "step": 58160 }, { "epoch": 0.41630286982036785, "grad_norm": 0.0, "learning_rate": 5.839118299577757e-06, "loss": 0.0, "step": 58170 }, { "epoch": 0.41637443641308236, "grad_norm": 0.0, "learning_rate": 5.838402633650612e-06, "loss": 0.0, "step": 58180 }, { "epoch": 0.41644600300579687, "grad_norm": 1.5628504753112793, "learning_rate": 5.837686967723467e-06, "loss": 0.0002, "step": 58190 }, { "epoch": 0.41651756959851144, "grad_norm": 0.03353013098239899, "learning_rate": 5.836971301796321e-06, "loss": 0.0, "step": 58200 }, { "epoch": 0.41658913619122595, "grad_norm": 0.0, "learning_rate": 5.8362556358691765e-06, "loss": 0.1964, "step": 58210 }, { "epoch": 0.41666070278394046, "grad_norm": 0.0, "learning_rate": 5.835539969942031e-06, "loss": 0.0, "step": 58220 }, { "epoch": 0.416732269376655, "grad_norm": 0.0, "learning_rate": 5.834824304014886e-06, "loss": 0.0, "step": 58230 }, { "epoch": 0.4168038359693695, "grad_norm": 4.522297536446729e-10, "learning_rate": 5.834108638087741e-06, "loss": 1.0133, "step": 58240 }, { "epoch": 0.416875402562084, "grad_norm": 0.0, "learning_rate": 5.833392972160596e-06, "loss": 0.0008, "step": 58250 }, { "epoch": 0.41694696915479856, "grad_norm": 485.55572509765625, "learning_rate": 5.8326773062334505e-06, "loss": 0.3685, "step": 58260 }, { "epoch": 0.4170185357475131, "grad_norm": 0.0, "learning_rate": 5.831961640306305e-06, "loss": 0.0, "step": 58270 }, { "epoch": 0.4170901023402276, "grad_norm": 4.849654566818629e-10, "learning_rate": 5.83124597437916e-06, "loss": 0.1014, "step": 58280 }, { "epoch": 0.4171616689329421, "grad_norm": 2.8396103382110596, "learning_rate": 5.830530308452015e-06, "loss": 0.0008, "step": 58290 }, { "epoch": 0.4172332355256566, "grad_norm": 0.0, "learning_rate": 5.82981464252487e-06, "loss": 0.0, "step": 58300 }, { "epoch": 0.4173048021183711, "grad_norm": 0.0, "learning_rate": 5.829098976597724e-06, "loss": 0.0, "step": 58310 }, { "epoch": 0.4173763687110857, "grad_norm": 0.0, "learning_rate": 5.828383310670579e-06, "loss": 0.0, "step": 58320 }, { "epoch": 0.4174479353038002, "grad_norm": 0.0, "learning_rate": 5.827667644743434e-06, "loss": 0.4289, "step": 58330 }, { "epoch": 0.4175195018965147, "grad_norm": 0.0, "learning_rate": 5.8269519788162886e-06, "loss": 0.0348, "step": 58340 }, { "epoch": 0.4175910684892292, "grad_norm": 4.737472863780567e-06, "learning_rate": 5.826236312889144e-06, "loss": 0.0001, "step": 58350 }, { "epoch": 0.41766263508194373, "grad_norm": 0.0001520012301625684, "learning_rate": 5.825520646961998e-06, "loss": 0.001, "step": 58360 }, { "epoch": 0.41773420167465825, "grad_norm": 0.0, "learning_rate": 5.824804981034854e-06, "loss": 0.0, "step": 58370 }, { "epoch": 0.4178057682673728, "grad_norm": 0.0, "learning_rate": 5.824089315107708e-06, "loss": 0.0, "step": 58380 }, { "epoch": 0.4178773348600873, "grad_norm": 0.0, "learning_rate": 5.8233736491805625e-06, "loss": 0.0, "step": 58390 }, { "epoch": 0.41794890145280184, "grad_norm": 0.0, "learning_rate": 5.822657983253418e-06, "loss": 0.4676, "step": 58400 }, { "epoch": 0.41802046804551635, "grad_norm": 0.0, "learning_rate": 5.821942317326272e-06, "loss": 0.0, "step": 58410 }, { "epoch": 0.41809203463823086, "grad_norm": 0.0, "learning_rate": 5.8212266513991275e-06, "loss": 0.0, "step": 58420 }, { "epoch": 0.41816360123094537, "grad_norm": 0.0, "learning_rate": 5.820510985471982e-06, "loss": 0.0, "step": 58430 }, { "epoch": 0.41823516782365994, "grad_norm": 0.0, "learning_rate": 5.819795319544836e-06, "loss": 0.0, "step": 58440 }, { "epoch": 0.41830673441637445, "grad_norm": 0.0, "learning_rate": 5.819079653617692e-06, "loss": 0.0, "step": 58450 }, { "epoch": 0.41837830100908896, "grad_norm": 0.0034259045496582985, "learning_rate": 5.818363987690546e-06, "loss": 0.0, "step": 58460 }, { "epoch": 0.41844986760180347, "grad_norm": 4.478689918929746e-10, "learning_rate": 5.817648321763401e-06, "loss": 0.0, "step": 58470 }, { "epoch": 0.418521434194518, "grad_norm": 0.0, "learning_rate": 5.816932655836256e-06, "loss": 0.0006, "step": 58480 }, { "epoch": 0.4185930007872325, "grad_norm": 5.038820631853014e-07, "learning_rate": 5.816216989909111e-06, "loss": 0.0, "step": 58490 }, { "epoch": 0.41866456737994706, "grad_norm": 0.0, "learning_rate": 5.815501323981966e-06, "loss": 0.0, "step": 58500 }, { "epoch": 0.4187361339726616, "grad_norm": 0.0, "learning_rate": 5.81478565805482e-06, "loss": 0.0, "step": 58510 }, { "epoch": 0.4188077005653761, "grad_norm": 0.0, "learning_rate": 5.814069992127675e-06, "loss": 0.0, "step": 58520 }, { "epoch": 0.4188792671580906, "grad_norm": 7.591944722662447e-06, "learning_rate": 5.81335432620053e-06, "loss": 0.0, "step": 58530 }, { "epoch": 0.4189508337508051, "grad_norm": 0.0, "learning_rate": 5.812638660273385e-06, "loss": 0.0, "step": 58540 }, { "epoch": 0.4190224003435196, "grad_norm": 0.0, "learning_rate": 5.8119229943462395e-06, "loss": 0.0, "step": 58550 }, { "epoch": 0.4190939669362342, "grad_norm": 0.0, "learning_rate": 5.811207328419094e-06, "loss": 0.0, "step": 58560 }, { "epoch": 0.4191655335289487, "grad_norm": 0.0, "learning_rate": 5.810491662491949e-06, "loss": 0.0, "step": 58570 }, { "epoch": 0.4192371001216632, "grad_norm": 0.004799652379006147, "learning_rate": 5.809775996564804e-06, "loss": 0.0, "step": 58580 }, { "epoch": 0.4193086667143777, "grad_norm": 0.0, "learning_rate": 5.809060330637659e-06, "loss": 0.0, "step": 58590 }, { "epoch": 0.41938023330709223, "grad_norm": 0.0, "learning_rate": 5.8083446647105134e-06, "loss": 0.0, "step": 58600 }, { "epoch": 0.41945179989980674, "grad_norm": 0.0005704321665689349, "learning_rate": 5.807628998783369e-06, "loss": 0.0, "step": 58610 }, { "epoch": 0.4195233664925213, "grad_norm": 0.0, "learning_rate": 5.806913332856223e-06, "loss": 0.0001, "step": 58620 }, { "epoch": 0.4195949330852358, "grad_norm": 0.0, "learning_rate": 5.806197666929078e-06, "loss": 0.0, "step": 58630 }, { "epoch": 0.41966649967795033, "grad_norm": 6.154490694854076e-09, "learning_rate": 5.805482001001933e-06, "loss": 0.0, "step": 58640 }, { "epoch": 0.41973806627066484, "grad_norm": 0.0, "learning_rate": 5.804766335074787e-06, "loss": 0.0, "step": 58650 }, { "epoch": 0.41980963286337936, "grad_norm": 0.0, "learning_rate": 5.804050669147643e-06, "loss": 0.0099, "step": 58660 }, { "epoch": 0.41988119945609387, "grad_norm": 0.0, "learning_rate": 5.803335003220497e-06, "loss": 0.0, "step": 58670 }, { "epoch": 0.41995276604880843, "grad_norm": 0.0006774553330615163, "learning_rate": 5.8026193372933515e-06, "loss": 0.0, "step": 58680 }, { "epoch": 0.42002433264152295, "grad_norm": 0.0, "learning_rate": 5.801903671366207e-06, "loss": 0.0, "step": 58690 }, { "epoch": 0.42009589923423746, "grad_norm": 0.0, "learning_rate": 5.801188005439061e-06, "loss": 0.0, "step": 58700 }, { "epoch": 0.42016746582695197, "grad_norm": 0.0, "learning_rate": 5.8004723395119166e-06, "loss": 0.0, "step": 58710 }, { "epoch": 0.4202390324196665, "grad_norm": 4.770633887929421e-10, "learning_rate": 5.799756673584771e-06, "loss": 0.2164, "step": 58720 }, { "epoch": 0.42031059901238105, "grad_norm": 0.0, "learning_rate": 5.7990410076576254e-06, "loss": 0.0, "step": 58730 }, { "epoch": 0.42038216560509556, "grad_norm": 0.0, "learning_rate": 5.798325341730481e-06, "loss": 0.6168, "step": 58740 }, { "epoch": 0.42045373219781007, "grad_norm": 0.0, "learning_rate": 5.797609675803335e-06, "loss": 0.0001, "step": 58750 }, { "epoch": 0.4205252987905246, "grad_norm": 0.0, "learning_rate": 5.7968940098761905e-06, "loss": 0.0686, "step": 58760 }, { "epoch": 0.4205968653832391, "grad_norm": 7.978289795573801e-05, "learning_rate": 5.796178343949045e-06, "loss": 0.0, "step": 58770 }, { "epoch": 0.4206684319759536, "grad_norm": 4.0520922728326525e-10, "learning_rate": 5.7954626780219e-06, "loss": 0.0, "step": 58780 }, { "epoch": 0.42073999856866817, "grad_norm": 0.0, "learning_rate": 5.794747012094755e-06, "loss": 0.0, "step": 58790 }, { "epoch": 0.4208115651613827, "grad_norm": 0.0, "learning_rate": 5.794031346167609e-06, "loss": 0.0, "step": 58800 }, { "epoch": 0.4208831317540972, "grad_norm": 1.453379536542343e-05, "learning_rate": 5.793315680240464e-06, "loss": 0.0, "step": 58810 }, { "epoch": 0.4209546983468117, "grad_norm": 0.0, "learning_rate": 5.792600014313319e-06, "loss": 0.0, "step": 58820 }, { "epoch": 0.4210262649395262, "grad_norm": 0.0, "learning_rate": 5.791884348386174e-06, "loss": 0.0, "step": 58830 }, { "epoch": 0.42109783153224073, "grad_norm": 0.0, "learning_rate": 5.7911686824590286e-06, "loss": 0.0, "step": 58840 }, { "epoch": 0.4211693981249553, "grad_norm": 0.0, "learning_rate": 5.790453016531883e-06, "loss": 0.0001, "step": 58850 }, { "epoch": 0.4212409647176698, "grad_norm": 0.0, "learning_rate": 5.789737350604738e-06, "loss": 0.0, "step": 58860 }, { "epoch": 0.4213125313103843, "grad_norm": 0.0, "learning_rate": 5.789021684677593e-06, "loss": 0.0, "step": 58870 }, { "epoch": 0.42138409790309883, "grad_norm": 0.0, "learning_rate": 5.788306018750448e-06, "loss": 0.0002, "step": 58880 }, { "epoch": 0.42145566449581334, "grad_norm": 0.0, "learning_rate": 5.7875903528233025e-06, "loss": 0.0678, "step": 58890 }, { "epoch": 0.42152723108852785, "grad_norm": 0.0, "learning_rate": 5.786874686896158e-06, "loss": 0.0, "step": 58900 }, { "epoch": 0.4215987976812424, "grad_norm": 0.0, "learning_rate": 5.786159020969012e-06, "loss": 0.0, "step": 58910 }, { "epoch": 0.42167036427395693, "grad_norm": 0.0, "learning_rate": 5.785443355041867e-06, "loss": 0.0, "step": 58920 }, { "epoch": 0.42174193086667144, "grad_norm": 0.0, "learning_rate": 5.784727689114722e-06, "loss": 0.0, "step": 58930 }, { "epoch": 0.42181349745938596, "grad_norm": 0.0, "learning_rate": 5.784012023187576e-06, "loss": 0.0, "step": 58940 }, { "epoch": 0.42188506405210047, "grad_norm": 0.0, "learning_rate": 5.783296357260432e-06, "loss": 0.0322, "step": 58950 }, { "epoch": 0.421956630644815, "grad_norm": 0.0, "learning_rate": 5.782580691333286e-06, "loss": 0.001, "step": 58960 }, { "epoch": 0.42202819723752955, "grad_norm": 0.0, "learning_rate": 5.781865025406141e-06, "loss": 0.1753, "step": 58970 }, { "epoch": 0.42209976383024406, "grad_norm": 0.0, "learning_rate": 5.781149359478996e-06, "loss": 0.0001, "step": 58980 }, { "epoch": 0.42217133042295857, "grad_norm": 0.0, "learning_rate": 5.78043369355185e-06, "loss": 0.0, "step": 58990 }, { "epoch": 0.4222428970156731, "grad_norm": 0.0, "learning_rate": 5.779718027624706e-06, "loss": 0.1792, "step": 59000 }, { "epoch": 0.4223144636083876, "grad_norm": 0.188076451420784, "learning_rate": 5.77900236169756e-06, "loss": 0.0005, "step": 59010 }, { "epoch": 0.4223860302011021, "grad_norm": 0.0, "learning_rate": 5.778286695770415e-06, "loss": 0.0, "step": 59020 }, { "epoch": 0.42245759679381667, "grad_norm": 2.2529500043333428e-08, "learning_rate": 5.77757102984327e-06, "loss": 0.0, "step": 59030 }, { "epoch": 0.4225291633865312, "grad_norm": 0.0, "learning_rate": 5.776855363916124e-06, "loss": 0.0, "step": 59040 }, { "epoch": 0.4226007299792457, "grad_norm": 0.00014548336912412196, "learning_rate": 5.7761396979889795e-06, "loss": 0.1406, "step": 59050 }, { "epoch": 0.4226722965719602, "grad_norm": 0.0, "learning_rate": 5.775424032061834e-06, "loss": 0.0, "step": 59060 }, { "epoch": 0.4227438631646747, "grad_norm": 1.311936671299918e-06, "learning_rate": 5.774708366134689e-06, "loss": 0.0, "step": 59070 }, { "epoch": 0.4228154297573892, "grad_norm": 0.05434407666325569, "learning_rate": 5.773992700207544e-06, "loss": 0.0, "step": 59080 }, { "epoch": 0.4228869963501038, "grad_norm": 0.0, "learning_rate": 5.773277034280398e-06, "loss": 0.0, "step": 59090 }, { "epoch": 0.4229585629428183, "grad_norm": 7.847879169275984e-05, "learning_rate": 5.7725613683532535e-06, "loss": 0.0001, "step": 59100 }, { "epoch": 0.4230301295355328, "grad_norm": 0.0, "learning_rate": 5.771845702426108e-06, "loss": 0.0, "step": 59110 }, { "epoch": 0.42310169612824733, "grad_norm": 0.0, "learning_rate": 5.771130036498963e-06, "loss": 0.0, "step": 59120 }, { "epoch": 0.42317326272096184, "grad_norm": 0.0, "learning_rate": 5.770414370571818e-06, "loss": 0.0, "step": 59130 }, { "epoch": 0.42324482931367635, "grad_norm": 0.008860455825924873, "learning_rate": 5.769698704644673e-06, "loss": 0.0, "step": 59140 }, { "epoch": 0.4233163959063909, "grad_norm": 0.0, "learning_rate": 5.768983038717527e-06, "loss": 0.0, "step": 59150 }, { "epoch": 0.42338796249910543, "grad_norm": 0.0, "learning_rate": 5.768267372790382e-06, "loss": 0.0, "step": 59160 }, { "epoch": 0.42345952909181994, "grad_norm": 0.0, "learning_rate": 5.767551706863237e-06, "loss": 0.0, "step": 59170 }, { "epoch": 0.42353109568453445, "grad_norm": 0.0, "learning_rate": 5.7668360409360915e-06, "loss": 0.0004, "step": 59180 }, { "epoch": 0.42360266227724896, "grad_norm": 0.0, "learning_rate": 5.766120375008947e-06, "loss": 0.0001, "step": 59190 }, { "epoch": 0.4236742288699635, "grad_norm": 0.0, "learning_rate": 5.765404709081801e-06, "loss": 0.1063, "step": 59200 }, { "epoch": 0.42374579546267804, "grad_norm": 0.0, "learning_rate": 5.764689043154656e-06, "loss": 0.0001, "step": 59210 }, { "epoch": 0.42381736205539255, "grad_norm": 2.90765944654936e-09, "learning_rate": 5.763973377227511e-06, "loss": 0.0, "step": 59220 }, { "epoch": 0.42388892864810707, "grad_norm": 0.0, "learning_rate": 5.7632577113003655e-06, "loss": 0.0, "step": 59230 }, { "epoch": 0.4239604952408216, "grad_norm": 4.4642758934010374e-10, "learning_rate": 5.762542045373221e-06, "loss": 0.011, "step": 59240 }, { "epoch": 0.4240320618335361, "grad_norm": 0.0, "learning_rate": 5.761826379446075e-06, "loss": 0.0, "step": 59250 }, { "epoch": 0.4241036284262506, "grad_norm": 0.0, "learning_rate": 5.7611107135189305e-06, "loss": 0.0, "step": 59260 }, { "epoch": 0.42417519501896517, "grad_norm": 0.0, "learning_rate": 5.760395047591785e-06, "loss": 0.0048, "step": 59270 }, { "epoch": 0.4242467616116797, "grad_norm": 0.0, "learning_rate": 5.759679381664639e-06, "loss": 0.0007, "step": 59280 }, { "epoch": 0.4243183282043942, "grad_norm": 0.0, "learning_rate": 5.758963715737495e-06, "loss": 0.0, "step": 59290 }, { "epoch": 0.4243898947971087, "grad_norm": 0.0, "learning_rate": 5.758248049810349e-06, "loss": 0.0409, "step": 59300 }, { "epoch": 0.4244614613898232, "grad_norm": 0.0, "learning_rate": 5.757532383883204e-06, "loss": 0.0725, "step": 59310 }, { "epoch": 0.4245330279825377, "grad_norm": 0.0, "learning_rate": 5.756816717956059e-06, "loss": 0.4396, "step": 59320 }, { "epoch": 0.4246045945752523, "grad_norm": 0.0, "learning_rate": 5.756101052028913e-06, "loss": 0.0, "step": 59330 }, { "epoch": 0.4246761611679668, "grad_norm": 0.0, "learning_rate": 5.755385386101769e-06, "loss": 0.0, "step": 59340 }, { "epoch": 0.4247477277606813, "grad_norm": 3.212826868548291e-06, "learning_rate": 5.754669720174623e-06, "loss": 0.0004, "step": 59350 }, { "epoch": 0.4248192943533958, "grad_norm": 0.0, "learning_rate": 5.753954054247478e-06, "loss": 0.0429, "step": 59360 }, { "epoch": 0.42489086094611034, "grad_norm": 0.0, "learning_rate": 5.753238388320333e-06, "loss": 0.0, "step": 59370 }, { "epoch": 0.42496242753882485, "grad_norm": 0.0, "learning_rate": 5.752522722393188e-06, "loss": 0.0, "step": 59380 }, { "epoch": 0.4250339941315394, "grad_norm": 6.4468822529306635e-06, "learning_rate": 5.7518070564660425e-06, "loss": 0.0028, "step": 59390 }, { "epoch": 0.42510556072425393, "grad_norm": 0.0, "learning_rate": 5.751091390538897e-06, "loss": 0.0, "step": 59400 }, { "epoch": 0.42517712731696844, "grad_norm": 0.0, "learning_rate": 5.750375724611752e-06, "loss": 0.0, "step": 59410 }, { "epoch": 0.42524869390968295, "grad_norm": 0.0, "learning_rate": 5.749660058684607e-06, "loss": 0.0139, "step": 59420 }, { "epoch": 0.42532026050239746, "grad_norm": 0.0, "learning_rate": 5.748944392757462e-06, "loss": 0.0, "step": 59430 }, { "epoch": 0.425391827095112, "grad_norm": 4.5439017526405223e-07, "learning_rate": 5.7482287268303164e-06, "loss": 0.0002, "step": 59440 }, { "epoch": 0.42546339368782654, "grad_norm": 0.0, "learning_rate": 5.74751306090317e-06, "loss": 0.0, "step": 59450 }, { "epoch": 0.42553496028054105, "grad_norm": 0.0, "learning_rate": 5.746797394976026e-06, "loss": 0.0, "step": 59460 }, { "epoch": 0.42560652687325556, "grad_norm": 3.5687660329131177e-06, "learning_rate": 5.74608172904888e-06, "loss": 0.0, "step": 59470 }, { "epoch": 0.4256780934659701, "grad_norm": 2.3992382125470613e-07, "learning_rate": 5.745366063121736e-06, "loss": 0.0, "step": 59480 }, { "epoch": 0.4257496600586846, "grad_norm": 1.0681111461963155e-06, "learning_rate": 5.74465039719459e-06, "loss": 0.0, "step": 59490 }, { "epoch": 0.42582122665139915, "grad_norm": 0.0, "learning_rate": 5.743934731267444e-06, "loss": 0.0, "step": 59500 }, { "epoch": 0.42589279324411367, "grad_norm": 45.31603240966797, "learning_rate": 5.7432190653403e-06, "loss": 0.0086, "step": 59510 }, { "epoch": 0.4259643598368282, "grad_norm": 0.0, "learning_rate": 5.742503399413154e-06, "loss": 0.0, "step": 59520 }, { "epoch": 0.4260359264295427, "grad_norm": 724.7086181640625, "learning_rate": 5.74178773348601e-06, "loss": 0.5141, "step": 59530 }, { "epoch": 0.4261074930222572, "grad_norm": 0.0, "learning_rate": 5.741072067558863e-06, "loss": 0.048, "step": 59540 }, { "epoch": 0.4261790596149717, "grad_norm": 0.0, "learning_rate": 5.7403564016317196e-06, "loss": 0.0, "step": 59550 }, { "epoch": 0.4262506262076863, "grad_norm": 0.0, "learning_rate": 5.739640735704574e-06, "loss": 0.0004, "step": 59560 }, { "epoch": 0.4263221928004008, "grad_norm": 0.0, "learning_rate": 5.738925069777428e-06, "loss": 0.0, "step": 59570 }, { "epoch": 0.4263937593931153, "grad_norm": 0.0, "learning_rate": 5.738209403850284e-06, "loss": 0.0012, "step": 59580 }, { "epoch": 0.4264653259858298, "grad_norm": 0.0, "learning_rate": 5.737493737923137e-06, "loss": 0.0, "step": 59590 }, { "epoch": 0.4265368925785443, "grad_norm": 17.452863693237305, "learning_rate": 5.7367780719959935e-06, "loss": 0.0036, "step": 59600 }, { "epoch": 0.42660845917125884, "grad_norm": 0.0, "learning_rate": 5.736062406068847e-06, "loss": 0.0, "step": 59610 }, { "epoch": 0.4266800257639734, "grad_norm": 1.7125504570003613e-08, "learning_rate": 5.7353467401417015e-06, "loss": 0.0358, "step": 59620 }, { "epoch": 0.4267515923566879, "grad_norm": 0.0, "learning_rate": 5.734631074214557e-06, "loss": 0.0, "step": 59630 }, { "epoch": 0.4268231589494024, "grad_norm": 0.0, "learning_rate": 5.733915408287411e-06, "loss": 0.0004, "step": 59640 }, { "epoch": 0.42689472554211694, "grad_norm": 0.0, "learning_rate": 5.733199742360267e-06, "loss": 0.0, "step": 59650 }, { "epoch": 0.42696629213483145, "grad_norm": 0.0, "learning_rate": 5.732484076433121e-06, "loss": 0.0, "step": 59660 }, { "epoch": 0.42703785872754596, "grad_norm": 0.0, "learning_rate": 5.731768410505977e-06, "loss": 0.0, "step": 59670 }, { "epoch": 0.4271094253202605, "grad_norm": 0.0, "learning_rate": 5.731052744578831e-06, "loss": 0.0, "step": 59680 }, { "epoch": 0.42718099191297504, "grad_norm": 0.0, "learning_rate": 5.730337078651685e-06, "loss": 0.0, "step": 59690 }, { "epoch": 0.42725255850568955, "grad_norm": 0.0, "learning_rate": 5.7296214127245405e-06, "loss": 0.0, "step": 59700 }, { "epoch": 0.42732412509840406, "grad_norm": 0.0, "learning_rate": 5.728905746797395e-06, "loss": 0.0, "step": 59710 }, { "epoch": 0.4273956916911186, "grad_norm": 1.3752892300544772e-06, "learning_rate": 5.728190080870251e-06, "loss": 0.0001, "step": 59720 }, { "epoch": 0.4274672582838331, "grad_norm": 0.0, "learning_rate": 5.727474414943105e-06, "loss": 0.0, "step": 59730 }, { "epoch": 0.42753882487654765, "grad_norm": 0.0, "learning_rate": 5.726758749015959e-06, "loss": 0.0001, "step": 59740 }, { "epoch": 0.42761039146926216, "grad_norm": 4.762013912200928, "learning_rate": 5.726043083088814e-06, "loss": 0.001, "step": 59750 }, { "epoch": 0.4276819580619767, "grad_norm": 0.0, "learning_rate": 5.725327417161669e-06, "loss": 0.0, "step": 59760 }, { "epoch": 0.4277535246546912, "grad_norm": 0.0, "learning_rate": 5.724611751234524e-06, "loss": 0.0, "step": 59770 }, { "epoch": 0.4278250912474057, "grad_norm": 3.50750411826084e-07, "learning_rate": 5.7238960853073786e-06, "loss": 0.0, "step": 59780 }, { "epoch": 0.4278966578401202, "grad_norm": 5.986990345263621e-06, "learning_rate": 5.723180419380235e-06, "loss": 0.5363, "step": 59790 }, { "epoch": 0.4279682244328348, "grad_norm": 0.0, "learning_rate": 5.722464753453088e-06, "loss": 0.0, "step": 59800 }, { "epoch": 0.4280397910255493, "grad_norm": 0.0, "learning_rate": 5.721749087525943e-06, "loss": 0.0, "step": 59810 }, { "epoch": 0.4281113576182638, "grad_norm": 0.0, "learning_rate": 5.721033421598798e-06, "loss": 0.0, "step": 59820 }, { "epoch": 0.4281829242109783, "grad_norm": 0.0, "learning_rate": 5.7203177556716525e-06, "loss": 0.0001, "step": 59830 }, { "epoch": 0.4282544908036928, "grad_norm": 0.0, "learning_rate": 5.719602089744508e-06, "loss": 0.0, "step": 59840 }, { "epoch": 0.42832605739640733, "grad_norm": 0.0, "learning_rate": 5.718886423817362e-06, "loss": 0.0, "step": 59850 }, { "epoch": 0.4283976239891219, "grad_norm": 0.0, "learning_rate": 5.718170757890217e-06, "loss": 0.0143, "step": 59860 }, { "epoch": 0.4284691905818364, "grad_norm": 0.0, "learning_rate": 5.717455091963072e-06, "loss": 0.0, "step": 59870 }, { "epoch": 0.4285407571745509, "grad_norm": 3.6381930357265446e-08, "learning_rate": 5.716739426035926e-06, "loss": 0.0, "step": 59880 }, { "epoch": 0.42861232376726544, "grad_norm": 8.919822676034528e-07, "learning_rate": 5.716023760108782e-06, "loss": 0.0, "step": 59890 }, { "epoch": 0.42868389035997995, "grad_norm": 0.0, "learning_rate": 5.715308094181636e-06, "loss": 0.0, "step": 59900 }, { "epoch": 0.42875545695269446, "grad_norm": 0.0, "learning_rate": 5.714663994847206e-06, "loss": 0.1864, "step": 59910 }, { "epoch": 0.428827023545409, "grad_norm": 0.0, "learning_rate": 5.71394832892006e-06, "loss": 0.0, "step": 59920 }, { "epoch": 0.42889859013812354, "grad_norm": 6.087960173317697e-06, "learning_rate": 5.713232662992916e-06, "loss": 0.0, "step": 59930 }, { "epoch": 0.42897015673083805, "grad_norm": 0.0, "learning_rate": 5.71251699706577e-06, "loss": 0.0005, "step": 59940 }, { "epoch": 0.42904172332355256, "grad_norm": 0.0, "learning_rate": 5.7118013311386245e-06, "loss": 0.0, "step": 59950 }, { "epoch": 0.42911328991626707, "grad_norm": 0.0, "learning_rate": 5.71108566521148e-06, "loss": 0.1416, "step": 59960 }, { "epoch": 0.4291848565089816, "grad_norm": 0.0, "learning_rate": 5.710369999284334e-06, "loss": 0.0, "step": 59970 }, { "epoch": 0.42925642310169615, "grad_norm": 0.0, "learning_rate": 5.7096543333571895e-06, "loss": 0.0, "step": 59980 }, { "epoch": 0.42932798969441066, "grad_norm": 33.44990539550781, "learning_rate": 5.708938667430044e-06, "loss": 0.0051, "step": 59990 }, { "epoch": 0.4293995562871252, "grad_norm": 9.146350898880939e-10, "learning_rate": 5.708223001502899e-06, "loss": 0.0, "step": 60000 } ], "logging_steps": 10, "max_steps": 139730, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 20000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }