|
{ |
|
"best_metric": 0.8273947246645071, |
|
"best_model_checkpoint": "/tmp/logs/binary_classification_model_v3.1.5_spines/checkpoint-414", |
|
"epoch": 46.0, |
|
"eval_steps": 500, |
|
"global_step": 414, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1111111111111111, |
|
"grad_norm": 462486.0, |
|
"learning_rate": 1.148369315571888e-08, |
|
"loss": 0.3999, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 498353.25, |
|
"learning_rate": 2.296738631143776e-08, |
|
"loss": 0.4304, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 369319.8125, |
|
"learning_rate": 3.4451079467156634e-08, |
|
"loss": 0.4205, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 433521.6875, |
|
"learning_rate": 4.593477262287552e-08, |
|
"loss": 0.3986, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 341315.84375, |
|
"learning_rate": 5.74184657785944e-08, |
|
"loss": 0.4026, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 292268.09375, |
|
"learning_rate": 6.890215893431327e-08, |
|
"loss": 0.3945, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.7777777777777778, |
|
"grad_norm": 265327.46875, |
|
"learning_rate": 8.038585209003216e-08, |
|
"loss": 0.3947, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 311655.9375, |
|
"learning_rate": 9.186954524575104e-08, |
|
"loss": 0.408, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 534967.1875, |
|
"learning_rate": 1.0335323840146992e-07, |
|
"loss": 0.4136, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.809600367478181, |
|
"eval_f1": 0.7938323800049739, |
|
"eval_loss": 0.41161027550697327, |
|
"eval_precision": 0.8218331616889805, |
|
"eval_recall": 0.7676767676767676, |
|
"eval_runtime": 3.7209, |
|
"eval_samples_per_second": 1170.135, |
|
"eval_steps_per_second": 0.806, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 436558.1875, |
|
"learning_rate": 1.148369315571888e-07, |
|
"loss": 0.4109, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.2222222222222223, |
|
"grad_norm": 183180.109375, |
|
"learning_rate": 1.2632062471290768e-07, |
|
"loss": 0.3964, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 353782.28125, |
|
"learning_rate": 1.3780431786862654e-07, |
|
"loss": 0.4047, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 1.4444444444444444, |
|
"grad_norm": 326118.3125, |
|
"learning_rate": 1.4928801102434544e-07, |
|
"loss": 0.3885, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 1.5555555555555556, |
|
"grad_norm": 196583.015625, |
|
"learning_rate": 1.6077170418006432e-07, |
|
"loss": 0.3977, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 88230.421875, |
|
"learning_rate": 1.722553973357832e-07, |
|
"loss": 0.3985, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 1.7777777777777777, |
|
"grad_norm": 102945.453125, |
|
"learning_rate": 1.8373909049150207e-07, |
|
"loss": 0.4188, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 1.8888888888888888, |
|
"grad_norm": 132400.3125, |
|
"learning_rate": 1.9522278364722095e-07, |
|
"loss": 0.3939, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 187392.875, |
|
"learning_rate": 2.0670647680293983e-07, |
|
"loss": 0.4189, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8086816720257235, |
|
"eval_f1": 0.7966804979253111, |
|
"eval_loss": 0.41029924154281616, |
|
"eval_precision": 0.8087215064420218, |
|
"eval_recall": 0.784992784992785, |
|
"eval_runtime": 3.8262, |
|
"eval_samples_per_second": 1137.932, |
|
"eval_steps_per_second": 0.784, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 2.111111111111111, |
|
"grad_norm": 165157.5, |
|
"learning_rate": 2.181901699586587e-07, |
|
"loss": 0.3957, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 209579.34375, |
|
"learning_rate": 2.296738631143776e-07, |
|
"loss": 0.4188, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.3333333333333335, |
|
"grad_norm": 147438.40625, |
|
"learning_rate": 2.4115755627009647e-07, |
|
"loss": 0.4076, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 2.4444444444444446, |
|
"grad_norm": 164255.859375, |
|
"learning_rate": 2.5264124942581537e-07, |
|
"loss": 0.399, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 2.5555555555555554, |
|
"grad_norm": 68289.0234375, |
|
"learning_rate": 2.641249425815342e-07, |
|
"loss": 0.4097, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 116692.9453125, |
|
"learning_rate": 2.7560863573725307e-07, |
|
"loss": 0.3804, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"grad_norm": 144575.59375, |
|
"learning_rate": 2.87092328892972e-07, |
|
"loss": 0.3957, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.888888888888889, |
|
"grad_norm": 69518.9375, |
|
"learning_rate": 2.985760220486909e-07, |
|
"loss": 0.3953, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 198278.28125, |
|
"learning_rate": 3.100597152044098e-07, |
|
"loss": 0.3883, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8118971061093248, |
|
"eval_f1": 0.8021261174196666, |
|
"eval_loss": 0.4097851514816284, |
|
"eval_precision": 0.8058252427184466, |
|
"eval_recall": 0.7984607984607984, |
|
"eval_runtime": 3.8584, |
|
"eval_samples_per_second": 1128.45, |
|
"eval_steps_per_second": 0.778, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 3.111111111111111, |
|
"grad_norm": 104717.0, |
|
"learning_rate": 3.2154340836012864e-07, |
|
"loss": 0.3916, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 3.2222222222222223, |
|
"grad_norm": 70841.9375, |
|
"learning_rate": 3.330271015158475e-07, |
|
"loss": 0.3811, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 225365.171875, |
|
"learning_rate": 3.445107946715664e-07, |
|
"loss": 0.4059, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 3.4444444444444446, |
|
"grad_norm": 62955.94140625, |
|
"learning_rate": 3.5599448782728525e-07, |
|
"loss": 0.3893, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 3.5555555555555554, |
|
"grad_norm": 193636.390625, |
|
"learning_rate": 3.6747818098300415e-07, |
|
"loss": 0.4119, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 3.6666666666666665, |
|
"grad_norm": 166762.671875, |
|
"learning_rate": 3.7896187413872305e-07, |
|
"loss": 0.4034, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 3.7777777777777777, |
|
"grad_norm": 201156.375, |
|
"learning_rate": 3.904455672944419e-07, |
|
"loss": 0.4015, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 3.888888888888889, |
|
"grad_norm": 104657.8671875, |
|
"learning_rate": 4.0192926045016076e-07, |
|
"loss": 0.4104, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 176918.875, |
|
"learning_rate": 4.1341295360587966e-07, |
|
"loss": 0.3992, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.811437758383096, |
|
"eval_f1": 0.8013549479796758, |
|
"eval_loss": 0.4091891646385193, |
|
"eval_precision": 0.8062317429406037, |
|
"eval_recall": 0.7965367965367965, |
|
"eval_runtime": 3.9227, |
|
"eval_samples_per_second": 1109.959, |
|
"eval_steps_per_second": 0.765, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 4.111111111111111, |
|
"grad_norm": 102350.40625, |
|
"learning_rate": 4.248966467615985e-07, |
|
"loss": 0.4091, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 4.222222222222222, |
|
"grad_norm": 149349.15625, |
|
"learning_rate": 4.363803399173174e-07, |
|
"loss": 0.4158, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 4.333333333333333, |
|
"grad_norm": 58734.1015625, |
|
"learning_rate": 4.478640330730363e-07, |
|
"loss": 0.3904, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"grad_norm": 51783.84765625, |
|
"learning_rate": 4.593477262287552e-07, |
|
"loss": 0.3906, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 4.555555555555555, |
|
"grad_norm": 144734.21875, |
|
"learning_rate": 4.70831419384474e-07, |
|
"loss": 0.4004, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 4.666666666666667, |
|
"grad_norm": 81174.1640625, |
|
"learning_rate": 4.823151125401929e-07, |
|
"loss": 0.4091, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 4.777777777777778, |
|
"grad_norm": 214313.296875, |
|
"learning_rate": 4.937988056959118e-07, |
|
"loss": 0.4024, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 4.888888888888889, |
|
"grad_norm": 94263.2265625, |
|
"learning_rate": 5.052824988516307e-07, |
|
"loss": 0.3783, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 224359.484375, |
|
"learning_rate": 5.167661920073495e-07, |
|
"loss": 0.4167, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8128158015617822, |
|
"eval_f1": 0.8015583150718286, |
|
"eval_loss": 0.40888261795043945, |
|
"eval_precision": 0.8116370808678501, |
|
"eval_recall": 0.7917267917267917, |
|
"eval_runtime": 3.9592, |
|
"eval_samples_per_second": 1099.713, |
|
"eval_steps_per_second": 0.758, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 5.111111111111111, |
|
"grad_norm": 67686.3359375, |
|
"learning_rate": 5.282498851630684e-07, |
|
"loss": 0.4068, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 5.222222222222222, |
|
"grad_norm": 95565.5234375, |
|
"learning_rate": 5.397335783187873e-07, |
|
"loss": 0.3813, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 5.333333333333333, |
|
"grad_norm": 64774.375, |
|
"learning_rate": 5.512172714745061e-07, |
|
"loss": 0.3878, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 5.444444444444445, |
|
"grad_norm": 202234.890625, |
|
"learning_rate": 5.62700964630225e-07, |
|
"loss": 0.4293, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 5.555555555555555, |
|
"grad_norm": 201268.0625, |
|
"learning_rate": 5.74184657785944e-07, |
|
"loss": 0.3929, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 5.666666666666667, |
|
"grad_norm": 75988.9375, |
|
"learning_rate": 5.856683509416628e-07, |
|
"loss": 0.4009, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 5.777777777777778, |
|
"grad_norm": 96718.375, |
|
"learning_rate": 5.971520440973818e-07, |
|
"loss": 0.3964, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 5.888888888888889, |
|
"grad_norm": 159498.375, |
|
"learning_rate": 6.086357372531007e-07, |
|
"loss": 0.3998, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 182416.921875, |
|
"learning_rate": 6.201194304088196e-07, |
|
"loss": 0.4103, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8109784106568673, |
|
"eval_f1": 0.8012557353296306, |
|
"eval_loss": 0.4089536666870117, |
|
"eval_precision": 0.8045586808923375, |
|
"eval_recall": 0.797979797979798, |
|
"eval_runtime": 3.9918, |
|
"eval_samples_per_second": 1090.745, |
|
"eval_steps_per_second": 0.752, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 6.111111111111111, |
|
"grad_norm": 61874.33984375, |
|
"learning_rate": 6.316031235645384e-07, |
|
"loss": 0.3988, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 6.222222222222222, |
|
"grad_norm": 124532.828125, |
|
"learning_rate": 6.430868167202573e-07, |
|
"loss": 0.3987, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 6.333333333333333, |
|
"grad_norm": 236617.484375, |
|
"learning_rate": 6.545705098759762e-07, |
|
"loss": 0.4024, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 6.444444444444445, |
|
"grad_norm": 66161.015625, |
|
"learning_rate": 6.66054203031695e-07, |
|
"loss": 0.3784, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 6.555555555555555, |
|
"grad_norm": 266440.5, |
|
"learning_rate": 6.775378961874139e-07, |
|
"loss": 0.4216, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"grad_norm": 248766.765625, |
|
"learning_rate": 6.890215893431328e-07, |
|
"loss": 0.3842, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 6.777777777777778, |
|
"grad_norm": 87982.4609375, |
|
"learning_rate": 7.005052824988516e-07, |
|
"loss": 0.4236, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 6.888888888888889, |
|
"grad_norm": 87643.5390625, |
|
"learning_rate": 7.119889756545705e-07, |
|
"loss": 0.4015, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 199694.375, |
|
"learning_rate": 7.234726688102894e-07, |
|
"loss": 0.4042, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.811437758383096, |
|
"eval_f1": 0.8005829487490892, |
|
"eval_loss": 0.4083126485347748, |
|
"eval_precision": 0.8086359175662414, |
|
"eval_recall": 0.7926887926887927, |
|
"eval_runtime": 4.0191, |
|
"eval_samples_per_second": 1083.34, |
|
"eval_steps_per_second": 0.746, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 7.111111111111111, |
|
"grad_norm": 141664.1875, |
|
"learning_rate": 7.349563619660083e-07, |
|
"loss": 0.397, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 7.222222222222222, |
|
"grad_norm": 224376.9375, |
|
"learning_rate": 7.464400551217272e-07, |
|
"loss": 0.3918, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 7.333333333333333, |
|
"grad_norm": 53294.6640625, |
|
"learning_rate": 7.579237482774461e-07, |
|
"loss": 0.4015, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 7.444444444444445, |
|
"grad_norm": 104196.5859375, |
|
"learning_rate": 7.694074414331649e-07, |
|
"loss": 0.4077, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 7.555555555555555, |
|
"grad_norm": 242109.171875, |
|
"learning_rate": 7.808911345888838e-07, |
|
"loss": 0.4047, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 7.666666666666667, |
|
"grad_norm": 88400.484375, |
|
"learning_rate": 7.923748277446027e-07, |
|
"loss": 0.4036, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 7.777777777777778, |
|
"grad_norm": 98138.4140625, |
|
"learning_rate": 8.038585209003215e-07, |
|
"loss": 0.4145, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 7.888888888888889, |
|
"grad_norm": 126328.7421875, |
|
"learning_rate": 8.153422140560404e-07, |
|
"loss": 0.3981, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 134507.5, |
|
"learning_rate": 8.268259072117593e-07, |
|
"loss": 0.3705, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8112080845199816, |
|
"eval_f1": 0.7997076023391813, |
|
"eval_loss": 0.40784016251564026, |
|
"eval_precision": 0.8103703703703704, |
|
"eval_recall": 0.7893217893217893, |
|
"eval_runtime": 4.0716, |
|
"eval_samples_per_second": 1069.369, |
|
"eval_steps_per_second": 0.737, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 8.11111111111111, |
|
"grad_norm": 74213.140625, |
|
"learning_rate": 8.383096003674781e-07, |
|
"loss": 0.3839, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 8.222222222222221, |
|
"grad_norm": 123429.9375, |
|
"learning_rate": 8.49793293523197e-07, |
|
"loss": 0.4077, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 8.333333333333334, |
|
"grad_norm": 103865.6015625, |
|
"learning_rate": 8.61276986678916e-07, |
|
"loss": 0.4074, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 8.444444444444445, |
|
"grad_norm": 185123.984375, |
|
"learning_rate": 8.727606798346348e-07, |
|
"loss": 0.4, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 8.555555555555555, |
|
"grad_norm": 130541.5, |
|
"learning_rate": 8.842443729903537e-07, |
|
"loss": 0.4067, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 8.666666666666666, |
|
"grad_norm": 167941.90625, |
|
"learning_rate": 8.957280661460726e-07, |
|
"loss": 0.4234, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 8.777777777777779, |
|
"grad_norm": 214580.09375, |
|
"learning_rate": 9.072117593017914e-07, |
|
"loss": 0.382, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 8.88888888888889, |
|
"grad_norm": 253802.171875, |
|
"learning_rate": 9.186954524575103e-07, |
|
"loss": 0.3714, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 246684.4375, |
|
"learning_rate": 9.301791456132293e-07, |
|
"loss": 0.395, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.811437758383096, |
|
"eval_f1": 0.8004860267314702, |
|
"eval_loss": 0.40752363204956055, |
|
"eval_precision": 0.8089390962671905, |
|
"eval_recall": 0.7922077922077922, |
|
"eval_runtime": 4.1246, |
|
"eval_samples_per_second": 1055.617, |
|
"eval_steps_per_second": 0.727, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 9.11111111111111, |
|
"grad_norm": 134944.796875, |
|
"learning_rate": 9.41662838768948e-07, |
|
"loss": 0.3812, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 9.222222222222221, |
|
"grad_norm": 76687.96875, |
|
"learning_rate": 9.53146531924667e-07, |
|
"loss": 0.3871, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 9.333333333333334, |
|
"grad_norm": 134176.203125, |
|
"learning_rate": 9.646302250803859e-07, |
|
"loss": 0.408, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 9.444444444444445, |
|
"grad_norm": 89754.578125, |
|
"learning_rate": 9.761139182361047e-07, |
|
"loss": 0.3973, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 9.555555555555555, |
|
"grad_norm": 195871.5, |
|
"learning_rate": 9.875976113918237e-07, |
|
"loss": 0.3998, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 9.666666666666666, |
|
"grad_norm": 199506.5625, |
|
"learning_rate": 9.990813045475425e-07, |
|
"loss": 0.4201, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 9.777777777777779, |
|
"grad_norm": 71133.1171875, |
|
"learning_rate": 1.0105649977032615e-06, |
|
"loss": 0.4047, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 9.88888888888889, |
|
"grad_norm": 109423.734375, |
|
"learning_rate": 1.0220486908589803e-06, |
|
"loss": 0.3942, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 326489.625, |
|
"learning_rate": 1.033532384014699e-06, |
|
"loss": 0.3836, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8109784106568673, |
|
"eval_f1": 0.8011597004107273, |
|
"eval_loss": 0.4074622392654419, |
|
"eval_precision": 0.8048543689320389, |
|
"eval_recall": 0.7974987974987975, |
|
"eval_runtime": 4.1511, |
|
"eval_samples_per_second": 1048.879, |
|
"eval_steps_per_second": 0.723, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 10.11111111111111, |
|
"grad_norm": 141166.140625, |
|
"learning_rate": 1.045016077170418e-06, |
|
"loss": 0.408, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 10.222222222222221, |
|
"grad_norm": 69169.859375, |
|
"learning_rate": 1.0564997703261369e-06, |
|
"loss": 0.3887, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 10.333333333333334, |
|
"grad_norm": 56820.30859375, |
|
"learning_rate": 1.0679834634818557e-06, |
|
"loss": 0.4045, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 10.444444444444445, |
|
"grad_norm": 82678.1875, |
|
"learning_rate": 1.0794671566375747e-06, |
|
"loss": 0.3867, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 10.555555555555555, |
|
"grad_norm": 66057.0703125, |
|
"learning_rate": 1.0909508497932935e-06, |
|
"loss": 0.3956, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 10.666666666666666, |
|
"grad_norm": 208573.453125, |
|
"learning_rate": 1.1024345429490123e-06, |
|
"loss": 0.4234, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 10.777777777777779, |
|
"grad_norm": 89480.4375, |
|
"learning_rate": 1.1139182361047313e-06, |
|
"loss": 0.3979, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 10.88888888888889, |
|
"grad_norm": 162307.484375, |
|
"learning_rate": 1.12540192926045e-06, |
|
"loss": 0.396, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 89120.2109375, |
|
"learning_rate": 1.136885622416169e-06, |
|
"loss": 0.4105, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8123564538355535, |
|
"eval_f1": 0.8010713416118822, |
|
"eval_loss": 0.4068741500377655, |
|
"eval_precision": 0.8111439842209073, |
|
"eval_recall": 0.7912457912457912, |
|
"eval_runtime": 4.1736, |
|
"eval_samples_per_second": 1043.218, |
|
"eval_steps_per_second": 0.719, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 11.11111111111111, |
|
"grad_norm": 93675.4296875, |
|
"learning_rate": 1.148369315571888e-06, |
|
"loss": 0.3865, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 11.222222222222221, |
|
"grad_norm": 172021.828125, |
|
"learning_rate": 1.1598530087276067e-06, |
|
"loss": 0.3964, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 11.333333333333334, |
|
"grad_norm": 123080.0546875, |
|
"learning_rate": 1.1713367018833255e-06, |
|
"loss": 0.4166, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 11.444444444444445, |
|
"grad_norm": 71145.3203125, |
|
"learning_rate": 1.1828203950390445e-06, |
|
"loss": 0.3878, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 11.555555555555555, |
|
"grad_norm": 173710.0, |
|
"learning_rate": 1.1943040881947635e-06, |
|
"loss": 0.3828, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 11.666666666666666, |
|
"grad_norm": 165428.84375, |
|
"learning_rate": 1.2057877813504825e-06, |
|
"loss": 0.3871, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 11.777777777777779, |
|
"grad_norm": 267474.3125, |
|
"learning_rate": 1.2172714745062013e-06, |
|
"loss": 0.4003, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 11.88888888888889, |
|
"grad_norm": 162812.609375, |
|
"learning_rate": 1.2287551676619201e-06, |
|
"loss": 0.4023, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 166778.921875, |
|
"learning_rate": 1.2402388608176391e-06, |
|
"loss": 0.4018, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8118971061093248, |
|
"eval_f1": 0.8019347037484885, |
|
"eval_loss": 0.4067639112472534, |
|
"eval_precision": 0.806420233463035, |
|
"eval_recall": 0.7974987974987975, |
|
"eval_runtime": 4.1933, |
|
"eval_samples_per_second": 1038.322, |
|
"eval_steps_per_second": 0.715, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 12.11111111111111, |
|
"grad_norm": 112960.7890625, |
|
"learning_rate": 1.251722553973358e-06, |
|
"loss": 0.3929, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 12.222222222222221, |
|
"grad_norm": 301508.375, |
|
"learning_rate": 1.2632062471290767e-06, |
|
"loss": 0.4061, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 12.333333333333334, |
|
"grad_norm": 254152.53125, |
|
"learning_rate": 1.2746899402847958e-06, |
|
"loss": 0.4029, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 12.444444444444445, |
|
"grad_norm": 104616.0859375, |
|
"learning_rate": 1.2861736334405146e-06, |
|
"loss": 0.3967, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 12.555555555555555, |
|
"grad_norm": 166745.109375, |
|
"learning_rate": 1.2976573265962333e-06, |
|
"loss": 0.4009, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 12.666666666666666, |
|
"grad_norm": 58958.81640625, |
|
"learning_rate": 1.3091410197519524e-06, |
|
"loss": 0.3897, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 12.777777777777779, |
|
"grad_norm": 322038.0625, |
|
"learning_rate": 1.3206247129076712e-06, |
|
"loss": 0.405, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 12.88888888888889, |
|
"grad_norm": 92102.4140625, |
|
"learning_rate": 1.33210840606339e-06, |
|
"loss": 0.4068, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 150015.234375, |
|
"learning_rate": 1.343592099219109e-06, |
|
"loss": 0.365, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8118971061093248, |
|
"eval_f1": 0.8020304568527918, |
|
"eval_loss": 0.4065192937850952, |
|
"eval_precision": 0.8061224489795918, |
|
"eval_recall": 0.797979797979798, |
|
"eval_runtime": 4.2544, |
|
"eval_samples_per_second": 1023.409, |
|
"eval_steps_per_second": 0.705, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 13.11111111111111, |
|
"grad_norm": 95103.921875, |
|
"learning_rate": 1.3550757923748278e-06, |
|
"loss": 0.4172, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 13.222222222222221, |
|
"grad_norm": 113910.25, |
|
"learning_rate": 1.3665594855305466e-06, |
|
"loss": 0.3879, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 13.333333333333334, |
|
"grad_norm": 171457.375, |
|
"learning_rate": 1.3780431786862656e-06, |
|
"loss": 0.4148, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 13.444444444444445, |
|
"grad_norm": 266275.84375, |
|
"learning_rate": 1.3895268718419844e-06, |
|
"loss": 0.3913, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 13.555555555555555, |
|
"grad_norm": 56887.02734375, |
|
"learning_rate": 1.4010105649977032e-06, |
|
"loss": 0.4045, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 13.666666666666666, |
|
"grad_norm": 59645.34765625, |
|
"learning_rate": 1.4124942581534222e-06, |
|
"loss": 0.4117, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 13.777777777777779, |
|
"grad_norm": 236620.359375, |
|
"learning_rate": 1.423977951309141e-06, |
|
"loss": 0.3871, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 13.88888888888889, |
|
"grad_norm": 60322.78515625, |
|
"learning_rate": 1.4354616444648598e-06, |
|
"loss": 0.3879, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 121960.0, |
|
"learning_rate": 1.4469453376205788e-06, |
|
"loss": 0.3973, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.813275149288011, |
|
"eval_f1": 0.8043321299638989, |
|
"eval_loss": 0.4063498079776764, |
|
"eval_precision": 0.8049132947976878, |
|
"eval_recall": 0.8037518037518038, |
|
"eval_runtime": 4.2859, |
|
"eval_samples_per_second": 1015.878, |
|
"eval_steps_per_second": 0.7, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 14.11111111111111, |
|
"grad_norm": 207117.59375, |
|
"learning_rate": 1.4584290307762978e-06, |
|
"loss": 0.4161, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 14.222222222222221, |
|
"grad_norm": 126602.25, |
|
"learning_rate": 1.4699127239320166e-06, |
|
"loss": 0.4169, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 14.333333333333334, |
|
"grad_norm": 130595.4296875, |
|
"learning_rate": 1.4813964170877356e-06, |
|
"loss": 0.3908, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 14.444444444444445, |
|
"grad_norm": 112879.7578125, |
|
"learning_rate": 1.4928801102434544e-06, |
|
"loss": 0.3991, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 14.555555555555555, |
|
"grad_norm": 155991.609375, |
|
"learning_rate": 1.5043638033991732e-06, |
|
"loss": 0.3913, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 14.666666666666666, |
|
"grad_norm": 159955.5, |
|
"learning_rate": 1.5158474965548922e-06, |
|
"loss": 0.3898, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 14.777777777777779, |
|
"grad_norm": 118653.71875, |
|
"learning_rate": 1.527331189710611e-06, |
|
"loss": 0.3725, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 14.88888888888889, |
|
"grad_norm": 250835.40625, |
|
"learning_rate": 1.5388148828663298e-06, |
|
"loss": 0.384, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 181872.25, |
|
"learning_rate": 1.5502985760220488e-06, |
|
"loss": 0.3987, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8137344970142398, |
|
"eval_f1": 0.8015659407878639, |
|
"eval_loss": 0.4055207073688507, |
|
"eval_precision": 0.8157370517928287, |
|
"eval_recall": 0.7878787878787878, |
|
"eval_runtime": 4.3323, |
|
"eval_samples_per_second": 1005.014, |
|
"eval_steps_per_second": 0.692, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 15.11111111111111, |
|
"grad_norm": 95328.6484375, |
|
"learning_rate": 1.5617822691777676e-06, |
|
"loss": 0.3815, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 15.222222222222221, |
|
"grad_norm": 119895.4375, |
|
"learning_rate": 1.5732659623334864e-06, |
|
"loss": 0.4165, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 15.333333333333334, |
|
"grad_norm": 248995.484375, |
|
"learning_rate": 1.5847496554892054e-06, |
|
"loss": 0.3914, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 15.444444444444445, |
|
"grad_norm": 89783.515625, |
|
"learning_rate": 1.5962333486449242e-06, |
|
"loss": 0.3873, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 15.555555555555555, |
|
"grad_norm": 129638.3671875, |
|
"learning_rate": 1.607717041800643e-06, |
|
"loss": 0.3887, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 15.666666666666666, |
|
"grad_norm": 103552.0859375, |
|
"learning_rate": 1.619200734956362e-06, |
|
"loss": 0.4078, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 15.777777777777779, |
|
"grad_norm": 274991.84375, |
|
"learning_rate": 1.6306844281120808e-06, |
|
"loss": 0.3978, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 15.88888888888889, |
|
"grad_norm": 289934.40625, |
|
"learning_rate": 1.6421681212677996e-06, |
|
"loss": 0.3748, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 113621.3359375, |
|
"learning_rate": 1.6536518144235186e-06, |
|
"loss": 0.4035, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8125861276986679, |
|
"eval_f1": 0.8022297624818225, |
|
"eval_loss": 0.40545299649238586, |
|
"eval_precision": 0.8085002442598925, |
|
"eval_recall": 0.796055796055796, |
|
"eval_runtime": 4.347, |
|
"eval_samples_per_second": 1001.605, |
|
"eval_steps_per_second": 0.69, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 16.11111111111111, |
|
"grad_norm": 146140.265625, |
|
"learning_rate": 1.6651355075792374e-06, |
|
"loss": 0.3882, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 16.22222222222222, |
|
"grad_norm": 108806.140625, |
|
"learning_rate": 1.6766192007349562e-06, |
|
"loss": 0.409, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 16.333333333333332, |
|
"grad_norm": 157681.09375, |
|
"learning_rate": 1.6881028938906753e-06, |
|
"loss": 0.3915, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 16.444444444444443, |
|
"grad_norm": 100690.9375, |
|
"learning_rate": 1.699586587046394e-06, |
|
"loss": 0.3947, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 16.555555555555557, |
|
"grad_norm": 401684.625, |
|
"learning_rate": 1.7110702802021129e-06, |
|
"loss": 0.4007, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 16.666666666666668, |
|
"grad_norm": 195763.71875, |
|
"learning_rate": 1.722553973357832e-06, |
|
"loss": 0.3802, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 16.77777777777778, |
|
"grad_norm": 171871.40625, |
|
"learning_rate": 1.7340376665135509e-06, |
|
"loss": 0.3989, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 16.88888888888889, |
|
"grad_norm": 380800.71875, |
|
"learning_rate": 1.7455213596692697e-06, |
|
"loss": 0.394, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 131500.703125, |
|
"learning_rate": 1.7570050528249887e-06, |
|
"loss": 0.3933, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8135048231511254, |
|
"eval_f1": 0.8044315992292871, |
|
"eval_loss": 0.4050571024417877, |
|
"eval_precision": 0.8055957549445248, |
|
"eval_recall": 0.8032708032708032, |
|
"eval_runtime": 4.4245, |
|
"eval_samples_per_second": 984.067, |
|
"eval_steps_per_second": 0.678, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 17.11111111111111, |
|
"grad_norm": 149897.84375, |
|
"learning_rate": 1.7684887459807075e-06, |
|
"loss": 0.3868, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 17.22222222222222, |
|
"grad_norm": 170039.859375, |
|
"learning_rate": 1.7799724391364263e-06, |
|
"loss": 0.3922, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 17.333333333333332, |
|
"grad_norm": 97929.5234375, |
|
"learning_rate": 1.7914561322921453e-06, |
|
"loss": 0.3879, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 17.444444444444443, |
|
"grad_norm": 226100.71875, |
|
"learning_rate": 1.802939825447864e-06, |
|
"loss": 0.4043, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 17.555555555555557, |
|
"grad_norm": 128054.578125, |
|
"learning_rate": 1.8144235186035829e-06, |
|
"loss": 0.406, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 17.666666666666668, |
|
"grad_norm": 319005.8125, |
|
"learning_rate": 1.825907211759302e-06, |
|
"loss": 0.4046, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 17.77777777777778, |
|
"grad_norm": 65652.578125, |
|
"learning_rate": 1.8373909049150207e-06, |
|
"loss": 0.4033, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 17.88888888888889, |
|
"grad_norm": 153639.953125, |
|
"learning_rate": 1.8488745980707395e-06, |
|
"loss": 0.3785, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 459349.78125, |
|
"learning_rate": 1.8603582912264585e-06, |
|
"loss": 0.3933, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8137344970142398, |
|
"eval_f1": 0.801953601953602, |
|
"eval_loss": 0.40449145436286926, |
|
"eval_precision": 0.814484126984127, |
|
"eval_recall": 0.7898027898027898, |
|
"eval_runtime": 4.4226, |
|
"eval_samples_per_second": 984.498, |
|
"eval_steps_per_second": 0.678, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 18.11111111111111, |
|
"grad_norm": 164870.09375, |
|
"learning_rate": 1.8718419843821773e-06, |
|
"loss": 0.3948, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 18.22222222222222, |
|
"grad_norm": 48797.765625, |
|
"learning_rate": 1.883325677537896e-06, |
|
"loss": 0.3938, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 18.333333333333332, |
|
"grad_norm": 87001.015625, |
|
"learning_rate": 1.8948093706936151e-06, |
|
"loss": 0.3968, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 18.444444444444443, |
|
"grad_norm": 185905.75, |
|
"learning_rate": 1.906293063849334e-06, |
|
"loss": 0.3755, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 18.555555555555557, |
|
"grad_norm": 381337.5, |
|
"learning_rate": 1.917776757005053e-06, |
|
"loss": 0.4084, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 18.666666666666668, |
|
"grad_norm": 215949.28125, |
|
"learning_rate": 1.9292604501607717e-06, |
|
"loss": 0.4119, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 18.77777777777778, |
|
"grad_norm": 156070.921875, |
|
"learning_rate": 1.9407441433164905e-06, |
|
"loss": 0.3979, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 18.88888888888889, |
|
"grad_norm": 218265.953125, |
|
"learning_rate": 1.9522278364722093e-06, |
|
"loss": 0.3929, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 130221.5546875, |
|
"learning_rate": 1.963711529627928e-06, |
|
"loss": 0.3806, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8130454754248967, |
|
"eval_f1": 0.8001963672066765, |
|
"eval_loss": 0.4040713608264923, |
|
"eval_precision": 0.8170426065162907, |
|
"eval_recall": 0.7840307840307841, |
|
"eval_runtime": 4.4758, |
|
"eval_samples_per_second": 972.78, |
|
"eval_steps_per_second": 0.67, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 19.11111111111111, |
|
"grad_norm": 244566.015625, |
|
"learning_rate": 1.9751952227836473e-06, |
|
"loss": 0.3735, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 19.22222222222222, |
|
"grad_norm": 113897.046875, |
|
"learning_rate": 1.986678915939366e-06, |
|
"loss": 0.3872, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 19.333333333333332, |
|
"grad_norm": 167560.140625, |
|
"learning_rate": 1.998162609095085e-06, |
|
"loss": 0.4203, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 19.444444444444443, |
|
"grad_norm": 234886.8125, |
|
"learning_rate": 2.0096463022508037e-06, |
|
"loss": 0.3856, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 19.555555555555557, |
|
"grad_norm": 378111.03125, |
|
"learning_rate": 2.021129995406523e-06, |
|
"loss": 0.4226, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 19.666666666666668, |
|
"grad_norm": 128344.5546875, |
|
"learning_rate": 2.0326136885622413e-06, |
|
"loss": 0.3742, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 19.77777777777778, |
|
"grad_norm": 741984.8125, |
|
"learning_rate": 2.0440973817179606e-06, |
|
"loss": 0.4082, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 19.88888888888889, |
|
"grad_norm": 383275.90625, |
|
"learning_rate": 2.0555810748736794e-06, |
|
"loss": 0.3992, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 242016.46875, |
|
"learning_rate": 2.067064768029398e-06, |
|
"loss": 0.3855, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8174092788240698, |
|
"eval_f1": 0.8134240788547289, |
|
"eval_loss": 0.40715256333351135, |
|
"eval_precision": 0.7942254812098992, |
|
"eval_recall": 0.8335738335738335, |
|
"eval_runtime": 4.5663, |
|
"eval_samples_per_second": 953.516, |
|
"eval_steps_per_second": 0.657, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 20.11111111111111, |
|
"grad_norm": 443388.9375, |
|
"learning_rate": 2.078548461185117e-06, |
|
"loss": 0.4031, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 20.22222222222222, |
|
"grad_norm": 401805.96875, |
|
"learning_rate": 2.090032154340836e-06, |
|
"loss": 0.3979, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 20.333333333333332, |
|
"grad_norm": 141372.265625, |
|
"learning_rate": 2.1015158474965545e-06, |
|
"loss": 0.3798, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 20.444444444444443, |
|
"grad_norm": 205886.125, |
|
"learning_rate": 2.1129995406522738e-06, |
|
"loss": 0.3828, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 20.555555555555557, |
|
"grad_norm": 369303.71875, |
|
"learning_rate": 2.124483233807993e-06, |
|
"loss": 0.4017, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 20.666666666666668, |
|
"grad_norm": 126559.546875, |
|
"learning_rate": 2.1359669269637114e-06, |
|
"loss": 0.3991, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 20.77777777777778, |
|
"grad_norm": 251650.140625, |
|
"learning_rate": 2.1474506201194306e-06, |
|
"loss": 0.4049, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 20.88888888888889, |
|
"grad_norm": 539118.375, |
|
"learning_rate": 2.1589343132751494e-06, |
|
"loss": 0.3799, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"grad_norm": 433059.34375, |
|
"learning_rate": 2.170418006430868e-06, |
|
"loss": 0.4132, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.8135048231511254, |
|
"eval_f1": 0.8007850834151129, |
|
"eval_loss": 0.40335243940353394, |
|
"eval_precision": 0.8172258387581373, |
|
"eval_recall": 0.784992784992785, |
|
"eval_runtime": 4.5819, |
|
"eval_samples_per_second": 950.264, |
|
"eval_steps_per_second": 0.655, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 21.11111111111111, |
|
"grad_norm": 82284.109375, |
|
"learning_rate": 2.181901699586587e-06, |
|
"loss": 0.3892, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 21.22222222222222, |
|
"grad_norm": 77919.21875, |
|
"learning_rate": 2.193385392742306e-06, |
|
"loss": 0.4028, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 21.333333333333332, |
|
"grad_norm": 357442.125, |
|
"learning_rate": 2.2048690858980246e-06, |
|
"loss": 0.3961, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 21.444444444444443, |
|
"grad_norm": 94285.2109375, |
|
"learning_rate": 2.216352779053744e-06, |
|
"loss": 0.4058, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 21.555555555555557, |
|
"grad_norm": 243661.1875, |
|
"learning_rate": 2.2278364722094626e-06, |
|
"loss": 0.3708, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 21.666666666666668, |
|
"grad_norm": 63565.203125, |
|
"learning_rate": 2.2393201653651814e-06, |
|
"loss": 0.4102, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 21.77777777777778, |
|
"grad_norm": 339940.78125, |
|
"learning_rate": 2.2508038585209e-06, |
|
"loss": 0.4072, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 21.88888888888889, |
|
"grad_norm": 214842.84375, |
|
"learning_rate": 2.2622875516766194e-06, |
|
"loss": 0.384, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"grad_norm": 269673.40625, |
|
"learning_rate": 2.273771244832338e-06, |
|
"loss": 0.3774, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.8130454754248967, |
|
"eval_f1": 0.8005879470847623, |
|
"eval_loss": 0.40258845686912537, |
|
"eval_precision": 0.8157763354967549, |
|
"eval_recall": 0.785954785954786, |
|
"eval_runtime": 4.6468, |
|
"eval_samples_per_second": 936.992, |
|
"eval_steps_per_second": 0.646, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 22.11111111111111, |
|
"grad_norm": 147949.28125, |
|
"learning_rate": 2.285254937988057e-06, |
|
"loss": 0.3924, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 22.22222222222222, |
|
"grad_norm": 192279.4375, |
|
"learning_rate": 2.296738631143776e-06, |
|
"loss": 0.4075, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 22.333333333333332, |
|
"grad_norm": 478090.40625, |
|
"learning_rate": 2.3082223242994946e-06, |
|
"loss": 0.4006, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 22.444444444444443, |
|
"grad_norm": 264078.875, |
|
"learning_rate": 2.3197060174552134e-06, |
|
"loss": 0.4051, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 22.555555555555557, |
|
"grad_norm": 122800.484375, |
|
"learning_rate": 2.3311897106109326e-06, |
|
"loss": 0.3937, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 22.666666666666668, |
|
"grad_norm": 97360.9375, |
|
"learning_rate": 2.342673403766651e-06, |
|
"loss": 0.3764, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 22.77777777777778, |
|
"grad_norm": 108199.5546875, |
|
"learning_rate": 2.3541570969223702e-06, |
|
"loss": 0.3932, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 22.88888888888889, |
|
"grad_norm": 262427.3125, |
|
"learning_rate": 2.365640790078089e-06, |
|
"loss": 0.4015, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"grad_norm": 412134.5625, |
|
"learning_rate": 2.3771244832338083e-06, |
|
"loss": 0.3698, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.8174092788240698, |
|
"eval_f1": 0.8103078024337866, |
|
"eval_loss": 0.4036799371242523, |
|
"eval_precision": 0.8039772727272727, |
|
"eval_recall": 0.8167388167388168, |
|
"eval_runtime": 4.658, |
|
"eval_samples_per_second": 934.744, |
|
"eval_steps_per_second": 0.644, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 23.11111111111111, |
|
"grad_norm": 265666.25, |
|
"learning_rate": 2.388608176389527e-06, |
|
"loss": 0.386, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 23.22222222222222, |
|
"grad_norm": 138313.109375, |
|
"learning_rate": 2.400091869545246e-06, |
|
"loss": 0.3904, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 23.333333333333332, |
|
"grad_norm": 116725.1171875, |
|
"learning_rate": 2.411575562700965e-06, |
|
"loss": 0.3955, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 23.444444444444443, |
|
"grad_norm": 187285.765625, |
|
"learning_rate": 2.4230592558566834e-06, |
|
"loss": 0.4069, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 23.555555555555557, |
|
"grad_norm": 121359.4765625, |
|
"learning_rate": 2.4345429490124027e-06, |
|
"loss": 0.4008, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 23.666666666666668, |
|
"grad_norm": 115836.03125, |
|
"learning_rate": 2.4460266421681215e-06, |
|
"loss": 0.4002, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 23.77777777777778, |
|
"grad_norm": 76411.34375, |
|
"learning_rate": 2.4575103353238403e-06, |
|
"loss": 0.3934, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 23.88888888888889, |
|
"grad_norm": 154353.21875, |
|
"learning_rate": 2.468994028479559e-06, |
|
"loss": 0.4057, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 486543.96875, |
|
"learning_rate": 2.4804777216352783e-06, |
|
"loss": 0.3584, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.8158015617822691, |
|
"eval_f1": 0.8058111380145279, |
|
"eval_loss": 0.40210822224617004, |
|
"eval_precision": 0.8113115553388591, |
|
"eval_recall": 0.8003848003848004, |
|
"eval_runtime": 5.1111, |
|
"eval_samples_per_second": 851.866, |
|
"eval_steps_per_second": 0.587, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 24.11111111111111, |
|
"grad_norm": 76995.2578125, |
|
"learning_rate": 2.4919614147909967e-06, |
|
"loss": 0.4137, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 24.22222222222222, |
|
"grad_norm": 192273.9375, |
|
"learning_rate": 2.503445107946716e-06, |
|
"loss": 0.4011, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 24.333333333333332, |
|
"grad_norm": 61227.8359375, |
|
"learning_rate": 2.5149288011024347e-06, |
|
"loss": 0.3909, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 24.444444444444443, |
|
"grad_norm": 221185.5, |
|
"learning_rate": 2.5264124942581535e-06, |
|
"loss": 0.3973, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 24.555555555555557, |
|
"grad_norm": 130241.0859375, |
|
"learning_rate": 2.5378961874138723e-06, |
|
"loss": 0.3877, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 24.666666666666668, |
|
"grad_norm": 80599.9765625, |
|
"learning_rate": 2.5493798805695915e-06, |
|
"loss": 0.4051, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 24.77777777777778, |
|
"grad_norm": 128989.890625, |
|
"learning_rate": 2.56086357372531e-06, |
|
"loss": 0.3691, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 24.88888888888889, |
|
"grad_norm": 320645.8125, |
|
"learning_rate": 2.572347266881029e-06, |
|
"loss": 0.3791, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 284049.625, |
|
"learning_rate": 2.583830960036748e-06, |
|
"loss": 0.3947, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.813275149288011, |
|
"eval_f1": 0.7983130736789879, |
|
"eval_loss": 0.40306106209754944, |
|
"eval_precision": 0.8242827868852459, |
|
"eval_recall": 0.7739297739297739, |
|
"eval_runtime": 4.7309, |
|
"eval_samples_per_second": 920.339, |
|
"eval_steps_per_second": 0.634, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 25.11111111111111, |
|
"grad_norm": 519498.875, |
|
"learning_rate": 2.5953146531924667e-06, |
|
"loss": 0.413, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 25.22222222222222, |
|
"grad_norm": 502382.40625, |
|
"learning_rate": 2.6067983463481855e-06, |
|
"loss": 0.3725, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 25.333333333333332, |
|
"grad_norm": 379123.5, |
|
"learning_rate": 2.6182820395039047e-06, |
|
"loss": 0.3881, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 25.444444444444443, |
|
"grad_norm": 190483.9375, |
|
"learning_rate": 2.629765732659623e-06, |
|
"loss": 0.4083, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 25.555555555555557, |
|
"grad_norm": 216956.625, |
|
"learning_rate": 2.6412494258153423e-06, |
|
"loss": 0.3987, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 25.666666666666668, |
|
"grad_norm": 278627.96875, |
|
"learning_rate": 2.6527331189710615e-06, |
|
"loss": 0.3936, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 25.77777777777778, |
|
"grad_norm": 343526.4375, |
|
"learning_rate": 2.66421681212678e-06, |
|
"loss": 0.3827, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 25.88888888888889, |
|
"grad_norm": 536376.0625, |
|
"learning_rate": 2.675700505282499e-06, |
|
"loss": 0.3932, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"grad_norm": 188893.734375, |
|
"learning_rate": 2.687184198438218e-06, |
|
"loss": 0.3812, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.8180983004134129, |
|
"eval_f1": 0.8092485549132948, |
|
"eval_loss": 0.4012051820755005, |
|
"eval_precision": 0.8104196816208393, |
|
"eval_recall": 0.8080808080808081, |
|
"eval_runtime": 4.752, |
|
"eval_samples_per_second": 916.254, |
|
"eval_steps_per_second": 0.631, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 26.11111111111111, |
|
"grad_norm": 89162.7421875, |
|
"learning_rate": 2.6986678915939367e-06, |
|
"loss": 0.3756, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 26.22222222222222, |
|
"grad_norm": 246311.859375, |
|
"learning_rate": 2.7101515847496555e-06, |
|
"loss": 0.3888, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 26.333333333333332, |
|
"grad_norm": 127793.1015625, |
|
"learning_rate": 2.7216352779053748e-06, |
|
"loss": 0.375, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 26.444444444444443, |
|
"grad_norm": 67135.25, |
|
"learning_rate": 2.733118971061093e-06, |
|
"loss": 0.3977, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 26.555555555555557, |
|
"grad_norm": 297287.75, |
|
"learning_rate": 2.7446026642168124e-06, |
|
"loss": 0.3989, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 26.666666666666668, |
|
"grad_norm": 303139.96875, |
|
"learning_rate": 2.756086357372531e-06, |
|
"loss": 0.4048, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 26.77777777777778, |
|
"grad_norm": 606317.625, |
|
"learning_rate": 2.76757005052825e-06, |
|
"loss": 0.3873, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 26.88888888888889, |
|
"grad_norm": 172750.578125, |
|
"learning_rate": 2.7790537436839687e-06, |
|
"loss": 0.384, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"grad_norm": 192172.140625, |
|
"learning_rate": 2.790537436839688e-06, |
|
"loss": 0.3928, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.8176389526871842, |
|
"eval_f1": 0.8086746987951807, |
|
"eval_loss": 0.4002959132194519, |
|
"eval_precision": 0.8102366006760019, |
|
"eval_recall": 0.8071188071188071, |
|
"eval_runtime": 4.5908, |
|
"eval_samples_per_second": 948.411, |
|
"eval_steps_per_second": 0.653, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 27.11111111111111, |
|
"grad_norm": 91136.6484375, |
|
"learning_rate": 2.8020211299954063e-06, |
|
"loss": 0.3889, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 27.22222222222222, |
|
"grad_norm": 111455.0859375, |
|
"learning_rate": 2.8135048231511256e-06, |
|
"loss": 0.3781, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 27.333333333333332, |
|
"grad_norm": 241420.875, |
|
"learning_rate": 2.8249885163068444e-06, |
|
"loss": 0.3868, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 27.444444444444443, |
|
"grad_norm": 108206.5078125, |
|
"learning_rate": 2.836472209462563e-06, |
|
"loss": 0.4015, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 27.555555555555557, |
|
"grad_norm": 197683.21875, |
|
"learning_rate": 2.847955902618282e-06, |
|
"loss": 0.3877, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 27.666666666666668, |
|
"grad_norm": 141794.296875, |
|
"learning_rate": 2.859439595774001e-06, |
|
"loss": 0.4144, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 27.77777777777778, |
|
"grad_norm": 256132.671875, |
|
"learning_rate": 2.8709232889297196e-06, |
|
"loss": 0.3757, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 27.88888888888889, |
|
"grad_norm": 245631.25, |
|
"learning_rate": 2.8824069820854388e-06, |
|
"loss": 0.3927, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"grad_norm": 213683.140625, |
|
"learning_rate": 2.8938906752411576e-06, |
|
"loss": 0.3591, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8183279742765274, |
|
"eval_f1": 0.8090755491189959, |
|
"eval_loss": 0.39960750937461853, |
|
"eval_precision": 0.812015503875969, |
|
"eval_recall": 0.8061568061568062, |
|
"eval_runtime": 4.6253, |
|
"eval_samples_per_second": 941.347, |
|
"eval_steps_per_second": 0.649, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 28.11111111111111, |
|
"grad_norm": 148696.1875, |
|
"learning_rate": 2.9053743683968764e-06, |
|
"loss": 0.3861, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 28.22222222222222, |
|
"grad_norm": 119950.6875, |
|
"learning_rate": 2.9168580615525956e-06, |
|
"loss": 0.3861, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 28.333333333333332, |
|
"grad_norm": 110785.578125, |
|
"learning_rate": 2.9283417547083144e-06, |
|
"loss": 0.3765, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 28.444444444444443, |
|
"grad_norm": 103968.75, |
|
"learning_rate": 2.939825447864033e-06, |
|
"loss": 0.3874, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 28.555555555555557, |
|
"grad_norm": 231702.34375, |
|
"learning_rate": 2.951309141019752e-06, |
|
"loss": 0.4047, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 28.666666666666668, |
|
"grad_norm": 127703.3359375, |
|
"learning_rate": 2.9627928341754712e-06, |
|
"loss": 0.3974, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 28.77777777777778, |
|
"grad_norm": 246982.40625, |
|
"learning_rate": 2.9742765273311896e-06, |
|
"loss": 0.3823, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 28.88888888888889, |
|
"grad_norm": 218037.109375, |
|
"learning_rate": 2.985760220486909e-06, |
|
"loss": 0.4092, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"grad_norm": 91798.8125, |
|
"learning_rate": 2.9972439136426276e-06, |
|
"loss": 0.3872, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.8194763435920992, |
|
"eval_f1": 0.8103281853281853, |
|
"eval_loss": 0.39871689677238464, |
|
"eval_precision": 0.8130750605326876, |
|
"eval_recall": 0.8075998075998077, |
|
"eval_runtime": 4.6817, |
|
"eval_samples_per_second": 929.994, |
|
"eval_steps_per_second": 0.641, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 29.11111111111111, |
|
"grad_norm": 94122.96875, |
|
"learning_rate": 3.0087276067983464e-06, |
|
"loss": 0.3935, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 29.22222222222222, |
|
"grad_norm": 138885.125, |
|
"learning_rate": 3.0202112999540652e-06, |
|
"loss": 0.3795, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 29.333333333333332, |
|
"grad_norm": 433285.8125, |
|
"learning_rate": 3.0316949931097844e-06, |
|
"loss": 0.3886, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 29.444444444444443, |
|
"grad_norm": 320547.09375, |
|
"learning_rate": 3.043178686265503e-06, |
|
"loss": 0.3925, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 29.555555555555557, |
|
"grad_norm": 82943.8203125, |
|
"learning_rate": 3.054662379421222e-06, |
|
"loss": 0.3808, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 29.666666666666668, |
|
"grad_norm": 70880.1328125, |
|
"learning_rate": 3.066146072576941e-06, |
|
"loss": 0.3883, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 29.77777777777778, |
|
"grad_norm": 270253.625, |
|
"learning_rate": 3.0776297657326596e-06, |
|
"loss": 0.381, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 29.88888888888889, |
|
"grad_norm": 147631.03125, |
|
"learning_rate": 3.0891134588883784e-06, |
|
"loss": 0.3622, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 443471.0, |
|
"learning_rate": 3.1005971520440976e-06, |
|
"loss": 0.383, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.8208543867707855, |
|
"eval_f1": 0.8132183908045977, |
|
"eval_loss": 0.3988674283027649, |
|
"eval_precision": 0.8097281831187411, |
|
"eval_recall": 0.8167388167388168, |
|
"eval_runtime": 4.7223, |
|
"eval_samples_per_second": 922.003, |
|
"eval_steps_per_second": 0.635, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 30.11111111111111, |
|
"grad_norm": 141594.78125, |
|
"learning_rate": 3.112080845199816e-06, |
|
"loss": 0.3939, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 30.22222222222222, |
|
"grad_norm": 388412.25, |
|
"learning_rate": 3.1235645383555352e-06, |
|
"loss": 0.4021, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 30.333333333333332, |
|
"grad_norm": 296736.5, |
|
"learning_rate": 3.135048231511254e-06, |
|
"loss": 0.3748, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 30.444444444444443, |
|
"grad_norm": 245713.109375, |
|
"learning_rate": 3.146531924666973e-06, |
|
"loss": 0.3833, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 30.555555555555557, |
|
"grad_norm": 128015.53125, |
|
"learning_rate": 3.1580156178226916e-06, |
|
"loss": 0.382, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 30.666666666666668, |
|
"grad_norm": 517641.25, |
|
"learning_rate": 3.169499310978411e-06, |
|
"loss": 0.3751, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 30.77777777777778, |
|
"grad_norm": 620331.375, |
|
"learning_rate": 3.1809830041341297e-06, |
|
"loss": 0.385, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 30.88888888888889, |
|
"grad_norm": 141372.8125, |
|
"learning_rate": 3.1924666972898485e-06, |
|
"loss": 0.3737, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"grad_norm": 292104.09375, |
|
"learning_rate": 3.2039503904455677e-06, |
|
"loss": 0.3966, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.8135048231511254, |
|
"eval_f1": 0.8000984736582964, |
|
"eval_loss": 0.39696839451789856, |
|
"eval_precision": 0.8194654563792234, |
|
"eval_recall": 0.7816257816257817, |
|
"eval_runtime": 5.1915, |
|
"eval_samples_per_second": 838.675, |
|
"eval_steps_per_second": 0.578, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 31.11111111111111, |
|
"grad_norm": 268970.09375, |
|
"learning_rate": 3.215434083601286e-06, |
|
"loss": 0.3771, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 31.22222222222222, |
|
"grad_norm": 113689.015625, |
|
"learning_rate": 3.2269177767570053e-06, |
|
"loss": 0.3979, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 31.333333333333332, |
|
"grad_norm": 617523.125, |
|
"learning_rate": 3.238401469912724e-06, |
|
"loss": 0.3778, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 31.444444444444443, |
|
"grad_norm": 615918.875, |
|
"learning_rate": 3.249885163068443e-06, |
|
"loss": 0.393, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 31.555555555555557, |
|
"grad_norm": 262001.734375, |
|
"learning_rate": 3.2613688562241617e-06, |
|
"loss": 0.3789, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 31.666666666666668, |
|
"grad_norm": 454978.09375, |
|
"learning_rate": 3.272852549379881e-06, |
|
"loss": 0.4019, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 31.77777777777778, |
|
"grad_norm": 635157.0, |
|
"learning_rate": 3.2843362425355993e-06, |
|
"loss": 0.3867, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 31.88888888888889, |
|
"grad_norm": 180403.53125, |
|
"learning_rate": 3.2958199356913185e-06, |
|
"loss": 0.3861, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"grad_norm": 741212.1875, |
|
"learning_rate": 3.3073036288470373e-06, |
|
"loss": 0.4076, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.8139641708773542, |
|
"eval_f1": 0.7975, |
|
"eval_loss": 0.39702439308166504, |
|
"eval_precision": 0.8302967204580948, |
|
"eval_recall": 0.7671957671957672, |
|
"eval_runtime": 4.8033, |
|
"eval_samples_per_second": 906.467, |
|
"eval_steps_per_second": 0.625, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 32.111111111111114, |
|
"grad_norm": 535774.0625, |
|
"learning_rate": 3.318787322002756e-06, |
|
"loss": 0.4063, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 32.22222222222222, |
|
"grad_norm": 334477.8125, |
|
"learning_rate": 3.330271015158475e-06, |
|
"loss": 0.3786, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 32.333333333333336, |
|
"grad_norm": 257520.453125, |
|
"learning_rate": 3.341754708314194e-06, |
|
"loss": 0.3938, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 32.44444444444444, |
|
"grad_norm": 136671.375, |
|
"learning_rate": 3.3532384014699125e-06, |
|
"loss": 0.3573, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 32.55555555555556, |
|
"grad_norm": 202197.96875, |
|
"learning_rate": 3.3647220946256317e-06, |
|
"loss": 0.381, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 32.666666666666664, |
|
"grad_norm": 415211.40625, |
|
"learning_rate": 3.3762057877813505e-06, |
|
"loss": 0.3936, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 32.77777777777778, |
|
"grad_norm": 508906.4375, |
|
"learning_rate": 3.3876894809370693e-06, |
|
"loss": 0.3865, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 32.888888888888886, |
|
"grad_norm": 526735.875, |
|
"learning_rate": 3.399173174092788e-06, |
|
"loss": 0.3889, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"grad_norm": 254718.359375, |
|
"learning_rate": 3.4106568672485073e-06, |
|
"loss": 0.3876, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.8128158015617822, |
|
"eval_f1": 0.7953803665578709, |
|
"eval_loss": 0.3970797061920166, |
|
"eval_precision": 0.8319327731092437, |
|
"eval_recall": 0.7619047619047619, |
|
"eval_runtime": 4.8434, |
|
"eval_samples_per_second": 898.952, |
|
"eval_steps_per_second": 0.619, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 33.111111111111114, |
|
"grad_norm": 985664.3125, |
|
"learning_rate": 3.4221405604042257e-06, |
|
"loss": 0.393, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 33.22222222222222, |
|
"grad_norm": 617586.375, |
|
"learning_rate": 3.433624253559945e-06, |
|
"loss": 0.3748, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 33.333333333333336, |
|
"grad_norm": 1082033.875, |
|
"learning_rate": 3.445107946715664e-06, |
|
"loss": 0.367, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 33.44444444444444, |
|
"grad_norm": 715623.125, |
|
"learning_rate": 3.4565916398713825e-06, |
|
"loss": 0.4088, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 33.55555555555556, |
|
"grad_norm": 899131.875, |
|
"learning_rate": 3.4680753330271017e-06, |
|
"loss": 0.4071, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 33.666666666666664, |
|
"grad_norm": 365789.3125, |
|
"learning_rate": 3.4795590261828205e-06, |
|
"loss": 0.3735, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 33.77777777777778, |
|
"grad_norm": 465953.4375, |
|
"learning_rate": 3.4910427193385393e-06, |
|
"loss": 0.3921, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 33.888888888888886, |
|
"grad_norm": 895318.9375, |
|
"learning_rate": 3.502526412494258e-06, |
|
"loss": 0.3905, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"grad_norm": 879623.6875, |
|
"learning_rate": 3.5140101056499774e-06, |
|
"loss": 0.3815, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.8240698208543867, |
|
"eval_f1": 0.8204406938584153, |
|
"eval_loss": 0.39920851588249207, |
|
"eval_precision": 0.8001828989483311, |
|
"eval_recall": 0.8417508417508418, |
|
"eval_runtime": 4.8974, |
|
"eval_samples_per_second": 889.041, |
|
"eval_steps_per_second": 0.613, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 34.111111111111114, |
|
"grad_norm": 629364.25, |
|
"learning_rate": 3.5254937988056957e-06, |
|
"loss": 0.3837, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 34.22222222222222, |
|
"grad_norm": 289384.09375, |
|
"learning_rate": 3.536977491961415e-06, |
|
"loss": 0.3929, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 34.333333333333336, |
|
"grad_norm": 151251.28125, |
|
"learning_rate": 3.5484611851171338e-06, |
|
"loss": 0.397, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 34.44444444444444, |
|
"grad_norm": 263268.09375, |
|
"learning_rate": 3.5599448782728526e-06, |
|
"loss": 0.3895, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 34.55555555555556, |
|
"grad_norm": 384067.28125, |
|
"learning_rate": 3.5714285714285714e-06, |
|
"loss": 0.3933, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 34.666666666666664, |
|
"grad_norm": 820030.5, |
|
"learning_rate": 3.5829122645842906e-06, |
|
"loss": 0.3824, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 34.77777777777778, |
|
"grad_norm": 760393.3125, |
|
"learning_rate": 3.594395957740009e-06, |
|
"loss": 0.3926, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 34.888888888888886, |
|
"grad_norm": 87286.8359375, |
|
"learning_rate": 3.605879650895728e-06, |
|
"loss": 0.3757, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"grad_norm": 178669.046875, |
|
"learning_rate": 3.617363344051447e-06, |
|
"loss": 0.369, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.8231511254019293, |
|
"eval_f1": 0.8168411037107517, |
|
"eval_loss": 0.3967457413673401, |
|
"eval_precision": 0.808, |
|
"eval_recall": 0.8258778258778259, |
|
"eval_runtime": 4.9488, |
|
"eval_samples_per_second": 879.801, |
|
"eval_steps_per_second": 0.606, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 35.111111111111114, |
|
"grad_norm": 358338.8125, |
|
"learning_rate": 3.6288470372071658e-06, |
|
"loss": 0.3697, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 35.22222222222222, |
|
"grad_norm": 202919.984375, |
|
"learning_rate": 3.6403307303628846e-06, |
|
"loss": 0.3829, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 35.333333333333336, |
|
"grad_norm": 600207.375, |
|
"learning_rate": 3.651814423518604e-06, |
|
"loss": 0.389, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 35.44444444444444, |
|
"grad_norm": 101190.6875, |
|
"learning_rate": 3.663298116674322e-06, |
|
"loss": 0.3744, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 35.55555555555556, |
|
"grad_norm": 161650.578125, |
|
"learning_rate": 3.6747818098300414e-06, |
|
"loss": 0.4155, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 35.666666666666664, |
|
"grad_norm": 748048.6875, |
|
"learning_rate": 3.68626550298576e-06, |
|
"loss": 0.4249, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 35.77777777777778, |
|
"grad_norm": 1179157.0, |
|
"learning_rate": 3.697749196141479e-06, |
|
"loss": 0.3645, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 35.888888888888886, |
|
"grad_norm": 847522.25, |
|
"learning_rate": 3.709232889297198e-06, |
|
"loss": 0.4006, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"grad_norm": 1134314.125, |
|
"learning_rate": 3.720716582452917e-06, |
|
"loss": 0.3825, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.8167202572347267, |
|
"eval_f1": 0.8037383177570093, |
|
"eval_loss": 0.3938184976577759, |
|
"eval_precision": 0.8223452440865626, |
|
"eval_recall": 0.785954785954786, |
|
"eval_runtime": 4.9706, |
|
"eval_samples_per_second": 875.943, |
|
"eval_steps_per_second": 0.604, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 36.111111111111114, |
|
"grad_norm": 219243.015625, |
|
"learning_rate": 3.732200275608636e-06, |
|
"loss": 0.3819, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 36.22222222222222, |
|
"grad_norm": 178601.828125, |
|
"learning_rate": 3.7436839687643546e-06, |
|
"loss": 0.3902, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 36.333333333333336, |
|
"grad_norm": 770971.6875, |
|
"learning_rate": 3.755167661920074e-06, |
|
"loss": 0.3764, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 36.44444444444444, |
|
"grad_norm": 508608.3125, |
|
"learning_rate": 3.766651355075792e-06, |
|
"loss": 0.3837, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 36.55555555555556, |
|
"grad_norm": 697210.75, |
|
"learning_rate": 3.7781350482315114e-06, |
|
"loss": 0.3894, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 36.666666666666664, |
|
"grad_norm": 183407.328125, |
|
"learning_rate": 3.7896187413872302e-06, |
|
"loss": 0.3919, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 36.77777777777778, |
|
"grad_norm": 226463.875, |
|
"learning_rate": 3.801102434542949e-06, |
|
"loss": 0.3667, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 36.888888888888886, |
|
"grad_norm": 320285.21875, |
|
"learning_rate": 3.812586127698668e-06, |
|
"loss": 0.3953, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"grad_norm": 125119.46875, |
|
"learning_rate": 3.824069820854387e-06, |
|
"loss": 0.3684, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.815112540192926, |
|
"eval_f1": 0.8002976928801786, |
|
"eval_loss": 0.3938528597354889, |
|
"eval_precision": 0.8263319672131147, |
|
"eval_recall": 0.7758537758537759, |
|
"eval_runtime": 5.031, |
|
"eval_samples_per_second": 865.438, |
|
"eval_steps_per_second": 0.596, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 37.111111111111114, |
|
"grad_norm": 502826.6875, |
|
"learning_rate": 3.835553514010106e-06, |
|
"loss": 0.3702, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 37.22222222222222, |
|
"grad_norm": 216404.84375, |
|
"learning_rate": 3.847037207165825e-06, |
|
"loss": 0.3821, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 37.333333333333336, |
|
"grad_norm": 239700.234375, |
|
"learning_rate": 3.8585209003215434e-06, |
|
"loss": 0.3714, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 37.44444444444444, |
|
"grad_norm": 152418.90625, |
|
"learning_rate": 3.870004593477263e-06, |
|
"loss": 0.3965, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 37.55555555555556, |
|
"grad_norm": 279164.09375, |
|
"learning_rate": 3.881488286632981e-06, |
|
"loss": 0.4086, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 37.666666666666664, |
|
"grad_norm": 179164.71875, |
|
"learning_rate": 3.8929719797887e-06, |
|
"loss": 0.3946, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 37.77777777777778, |
|
"grad_norm": 264468.34375, |
|
"learning_rate": 3.904455672944419e-06, |
|
"loss": 0.374, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 37.888888888888886, |
|
"grad_norm": 131274.546875, |
|
"learning_rate": 3.915939366100138e-06, |
|
"loss": 0.3934, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"grad_norm": 277462.40625, |
|
"learning_rate": 3.927423059255856e-06, |
|
"loss": 0.3433, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.821773082223243, |
|
"eval_f1": 0.8121065375302663, |
|
"eval_loss": 0.3931880295276642, |
|
"eval_precision": 0.817649926864944, |
|
"eval_recall": 0.8066378066378066, |
|
"eval_runtime": 5.087, |
|
"eval_samples_per_second": 855.913, |
|
"eval_steps_per_second": 0.59, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 38.111111111111114, |
|
"grad_norm": 168592.34375, |
|
"learning_rate": 3.9389067524115755e-06, |
|
"loss": 0.3766, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 38.22222222222222, |
|
"grad_norm": 163537.234375, |
|
"learning_rate": 3.950390445567295e-06, |
|
"loss": 0.386, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 38.333333333333336, |
|
"grad_norm": 583295.9375, |
|
"learning_rate": 3.961874138723013e-06, |
|
"loss": 0.3867, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 38.44444444444444, |
|
"grad_norm": 70029.2734375, |
|
"learning_rate": 3.973357831878732e-06, |
|
"loss": 0.3735, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 38.55555555555556, |
|
"grad_norm": 503096.8125, |
|
"learning_rate": 3.9848415250344515e-06, |
|
"loss": 0.3764, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 38.666666666666664, |
|
"grad_norm": 505206.1875, |
|
"learning_rate": 3.99632521819017e-06, |
|
"loss": 0.3751, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 38.77777777777778, |
|
"grad_norm": 191592.875, |
|
"learning_rate": 4.007808911345889e-06, |
|
"loss": 0.4098, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 38.888888888888886, |
|
"grad_norm": 491039.40625, |
|
"learning_rate": 4.0192926045016075e-06, |
|
"loss": 0.3724, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"grad_norm": 815383.25, |
|
"learning_rate": 4.030776297657327e-06, |
|
"loss": 0.3968, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.823610473128158, |
|
"eval_f1": 0.8191238813000471, |
|
"eval_loss": 0.3962981402873993, |
|
"eval_precision": 0.8024919243193355, |
|
"eval_recall": 0.8364598364598365, |
|
"eval_runtime": 5.5822, |
|
"eval_samples_per_second": 779.985, |
|
"eval_steps_per_second": 0.537, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 39.111111111111114, |
|
"grad_norm": 616299.0, |
|
"learning_rate": 4.042259990813046e-06, |
|
"loss": 0.4, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 39.22222222222222, |
|
"grad_norm": 390517.84375, |
|
"learning_rate": 4.053743683968765e-06, |
|
"loss": 0.3874, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 39.333333333333336, |
|
"grad_norm": 101405.28125, |
|
"learning_rate": 4.065227377124483e-06, |
|
"loss": 0.3804, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 39.44444444444444, |
|
"grad_norm": 803522.0, |
|
"learning_rate": 4.076711070280202e-06, |
|
"loss": 0.4033, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 39.55555555555556, |
|
"grad_norm": 676551.6875, |
|
"learning_rate": 4.088194763435921e-06, |
|
"loss": 0.3897, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 39.666666666666664, |
|
"grad_norm": 1587823.0, |
|
"learning_rate": 4.09967845659164e-06, |
|
"loss": 0.3898, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 39.77777777777778, |
|
"grad_norm": 1047884.4375, |
|
"learning_rate": 4.111162149747359e-06, |
|
"loss": 0.3717, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 39.888888888888886, |
|
"grad_norm": 336138.5, |
|
"learning_rate": 4.122645842903078e-06, |
|
"loss": 0.3901, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 352737.96875, |
|
"learning_rate": 4.134129536058796e-06, |
|
"loss": 0.3728, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.823610473128158, |
|
"eval_f1": 0.8176638176638177, |
|
"eval_loss": 0.39358338713645935, |
|
"eval_precision": 0.8073136427566807, |
|
"eval_recall": 0.8282828282828283, |
|
"eval_runtime": 5.1339, |
|
"eval_samples_per_second": 848.091, |
|
"eval_steps_per_second": 0.584, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 40.111111111111114, |
|
"grad_norm": 635885.75, |
|
"learning_rate": 4.1456132292145155e-06, |
|
"loss": 0.3848, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 40.22222222222222, |
|
"grad_norm": 535277.4375, |
|
"learning_rate": 4.157096922370234e-06, |
|
"loss": 0.3836, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 40.333333333333336, |
|
"grad_norm": 503451.34375, |
|
"learning_rate": 4.168580615525953e-06, |
|
"loss": 0.3599, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 40.44444444444444, |
|
"grad_norm": 177537.828125, |
|
"learning_rate": 4.180064308681672e-06, |
|
"loss": 0.3727, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 40.55555555555556, |
|
"grad_norm": 400331.65625, |
|
"learning_rate": 4.1915480018373916e-06, |
|
"loss": 0.3874, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 40.666666666666664, |
|
"grad_norm": 1095307.75, |
|
"learning_rate": 4.203031694993109e-06, |
|
"loss": 0.3961, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 40.77777777777778, |
|
"grad_norm": 919089.5625, |
|
"learning_rate": 4.214515388148828e-06, |
|
"loss": 0.3891, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 40.888888888888886, |
|
"grad_norm": 973522.375, |
|
"learning_rate": 4.2259990813045475e-06, |
|
"loss": 0.3881, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"grad_norm": 517725.1875, |
|
"learning_rate": 4.237482774460267e-06, |
|
"loss": 0.3914, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.8240698208543867, |
|
"eval_f1": 0.8180522565320665, |
|
"eval_loss": 0.3933127820491791, |
|
"eval_precision": 0.8080713280150165, |
|
"eval_recall": 0.8282828282828283, |
|
"eval_runtime": 5.193, |
|
"eval_samples_per_second": 838.429, |
|
"eval_steps_per_second": 0.578, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 41.111111111111114, |
|
"grad_norm": 411101.21875, |
|
"learning_rate": 4.248966467615986e-06, |
|
"loss": 0.3875, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 41.22222222222222, |
|
"grad_norm": 953085.1875, |
|
"learning_rate": 4.260450160771704e-06, |
|
"loss": 0.3899, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 41.333333333333336, |
|
"grad_norm": 1096262.375, |
|
"learning_rate": 4.271933853927423e-06, |
|
"loss": 0.3916, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 41.44444444444444, |
|
"grad_norm": 834261.75, |
|
"learning_rate": 4.283417547083142e-06, |
|
"loss": 0.3889, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 41.55555555555556, |
|
"grad_norm": 399109.46875, |
|
"learning_rate": 4.294901240238861e-06, |
|
"loss": 0.3798, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 41.666666666666664, |
|
"grad_norm": 370690.375, |
|
"learning_rate": 4.3063849333945796e-06, |
|
"loss": 0.3775, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 41.77777777777778, |
|
"grad_norm": 660407.1875, |
|
"learning_rate": 4.317868626550299e-06, |
|
"loss": 0.3832, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 41.888888888888886, |
|
"grad_norm": 1159956.5, |
|
"learning_rate": 4.329352319706018e-06, |
|
"loss": 0.3639, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"grad_norm": 606383.1875, |
|
"learning_rate": 4.340836012861736e-06, |
|
"loss": 0.3738, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.8231511254019293, |
|
"eval_f1": 0.8135593220338984, |
|
"eval_loss": 0.39093542098999023, |
|
"eval_precision": 0.8191126279863481, |
|
"eval_recall": 0.8080808080808081, |
|
"eval_runtime": 5.2157, |
|
"eval_samples_per_second": 834.794, |
|
"eval_steps_per_second": 0.575, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 42.111111111111114, |
|
"grad_norm": 248742.28125, |
|
"learning_rate": 4.352319706017455e-06, |
|
"loss": 0.3937, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 42.22222222222222, |
|
"grad_norm": 585262.625, |
|
"learning_rate": 4.363803399173174e-06, |
|
"loss": 0.3623, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 42.333333333333336, |
|
"grad_norm": 200463.296875, |
|
"learning_rate": 4.375287092328893e-06, |
|
"loss": 0.3749, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 42.44444444444444, |
|
"grad_norm": 160448.734375, |
|
"learning_rate": 4.386770785484612e-06, |
|
"loss": 0.3893, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 42.55555555555556, |
|
"grad_norm": 348410.375, |
|
"learning_rate": 4.398254478640331e-06, |
|
"loss": 0.3899, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 42.666666666666664, |
|
"grad_norm": 115621.484375, |
|
"learning_rate": 4.409738171796049e-06, |
|
"loss": 0.3933, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 42.77777777777778, |
|
"grad_norm": 431595.0, |
|
"learning_rate": 4.421221864951768e-06, |
|
"loss": 0.3821, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 42.888888888888886, |
|
"grad_norm": 101460.4375, |
|
"learning_rate": 4.432705558107488e-06, |
|
"loss": 0.3743, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"grad_norm": 563180.0625, |
|
"learning_rate": 4.444189251263206e-06, |
|
"loss": 0.3924, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.8252181901699587, |
|
"eval_f1": 0.8221547090441692, |
|
"eval_loss": 0.39497968554496765, |
|
"eval_precision": 0.7995454545454546, |
|
"eval_recall": 0.8460798460798461, |
|
"eval_runtime": 5.2643, |
|
"eval_samples_per_second": 827.083, |
|
"eval_steps_per_second": 0.57, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 43.111111111111114, |
|
"grad_norm": 456643.25, |
|
"learning_rate": 4.455672944418925e-06, |
|
"loss": 0.371, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 43.22222222222222, |
|
"grad_norm": 873773.8125, |
|
"learning_rate": 4.4671566375746444e-06, |
|
"loss": 0.3962, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 43.333333333333336, |
|
"grad_norm": 112325.046875, |
|
"learning_rate": 4.478640330730363e-06, |
|
"loss": 0.3756, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 43.44444444444444, |
|
"grad_norm": 367788.90625, |
|
"learning_rate": 4.490124023886081e-06, |
|
"loss": 0.3788, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 43.55555555555556, |
|
"grad_norm": 390394.09375, |
|
"learning_rate": 4.5016077170418e-06, |
|
"loss": 0.3812, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 43.666666666666664, |
|
"grad_norm": 325314.1875, |
|
"learning_rate": 4.51309141019752e-06, |
|
"loss": 0.3731, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 43.77777777777778, |
|
"grad_norm": 156517.75, |
|
"learning_rate": 4.524575103353239e-06, |
|
"loss": 0.377, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 43.888888888888886, |
|
"grad_norm": 443482.53125, |
|
"learning_rate": 4.536058796508958e-06, |
|
"loss": 0.3858, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"grad_norm": 853189.375, |
|
"learning_rate": 4.547542489664676e-06, |
|
"loss": 0.3817, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.8270555810748736, |
|
"eval_f1": 0.8231148696264975, |
|
"eval_loss": 0.393361896276474, |
|
"eval_precision": 0.8044077134986226, |
|
"eval_recall": 0.8427128427128427, |
|
"eval_runtime": 5.3198, |
|
"eval_samples_per_second": 818.447, |
|
"eval_steps_per_second": 0.564, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 44.111111111111114, |
|
"grad_norm": 568736.6875, |
|
"learning_rate": 4.559026182820395e-06, |
|
"loss": 0.3612, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 44.22222222222222, |
|
"grad_norm": 603129.125, |
|
"learning_rate": 4.570509875976114e-06, |
|
"loss": 0.3805, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 44.333333333333336, |
|
"grad_norm": 218879.5625, |
|
"learning_rate": 4.581993569131833e-06, |
|
"loss": 0.3866, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 44.44444444444444, |
|
"grad_norm": 751706.625, |
|
"learning_rate": 4.593477262287552e-06, |
|
"loss": 0.3912, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 44.55555555555556, |
|
"grad_norm": 474342.125, |
|
"learning_rate": 4.604960955443271e-06, |
|
"loss": 0.3774, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 44.666666666666664, |
|
"grad_norm": 237391.578125, |
|
"learning_rate": 4.616444648598989e-06, |
|
"loss": 0.3791, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 44.77777777777778, |
|
"grad_norm": 143708.234375, |
|
"learning_rate": 4.6279283417547085e-06, |
|
"loss": 0.3844, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 44.888888888888886, |
|
"grad_norm": 788803.3125, |
|
"learning_rate": 4.639412034910427e-06, |
|
"loss": 0.3885, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"grad_norm": 1540259.125, |
|
"learning_rate": 4.650895728066146e-06, |
|
"loss": 0.415, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.8252181901699587, |
|
"eval_f1": 0.8227346843699045, |
|
"eval_loss": 0.3959347903728485, |
|
"eval_precision": 0.7976513098464318, |
|
"eval_recall": 0.8494468494468495, |
|
"eval_runtime": 5.3462, |
|
"eval_samples_per_second": 814.415, |
|
"eval_steps_per_second": 0.561, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 45.111111111111114, |
|
"grad_norm": 552211.75, |
|
"learning_rate": 4.662379421221865e-06, |
|
"loss": 0.3864, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 45.22222222222222, |
|
"grad_norm": 177492.828125, |
|
"learning_rate": 4.6738631143775845e-06, |
|
"loss": 0.3604, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 45.333333333333336, |
|
"grad_norm": 196209.4375, |
|
"learning_rate": 4.685346807533302e-06, |
|
"loss": 0.3826, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 45.44444444444444, |
|
"grad_norm": 857919.75, |
|
"learning_rate": 4.696830500689021e-06, |
|
"loss": 0.3903, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 45.55555555555556, |
|
"grad_norm": 1154738.25, |
|
"learning_rate": 4.7083141938447405e-06, |
|
"loss": 0.3909, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 45.666666666666664, |
|
"grad_norm": 1139759.75, |
|
"learning_rate": 4.71979788700046e-06, |
|
"loss": 0.3787, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 45.77777777777778, |
|
"grad_norm": 564532.8125, |
|
"learning_rate": 4.731281580156178e-06, |
|
"loss": 0.3599, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 45.888888888888886, |
|
"grad_norm": 143203.859375, |
|
"learning_rate": 4.742765273311897e-06, |
|
"loss": 0.3973, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"grad_norm": 554491.5625, |
|
"learning_rate": 4.7542489664676165e-06, |
|
"loss": 0.3882, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.8286632981166743, |
|
"eval_f1": 0.8273947246645071, |
|
"eval_loss": 0.3958016335964203, |
|
"eval_precision": 0.797146678555506, |
|
"eval_recall": 0.86002886002886, |
|
"eval_runtime": 5.3654, |
|
"eval_samples_per_second": 811.493, |
|
"eval_steps_per_second": 0.559, |
|
"step": 414 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 450, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.1583883938955264e+16, |
|
"train_batch_size": 2048, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|