Training in progress, step 400, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d1c35ae538f4b13ed118009e42d3af5ce35d02974649d677dcae751fa109040d
 size 59933632

 version https://git-lfs.github.com/spec/v1
+oid sha256:937fc5f94b084d3eb78ca648e5e282469bbf6dfb9116c44545ac208a5eee7095
 size 59933632

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1b4af661d5dbc2e4ded19c017724e4032b3e4ebda32ac08442406a113fb81189
 size 31823460

 version https://git-lfs.github.com/spec/v1
+oid sha256:a987323893943b5cb6c9038df21203d888c0501f022965597682cf818af9726f
 size 31823460

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e5cd4d6f1a56868495de31ebab839ccdff6500d945e21b9b50d123a2662f6221
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c7f62f7a514254178aafeb62bbf10dca9fc4851e21b058bd5fd1775d5039612b
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b881a4283fbf9ae4dd8073fe406895e163eb2a9281e2f7a0b2ffcfdeea673f59
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:381144ff10563e73a0d559f4eadf3a4c93568229fcc444bd951eaeaa68432ea4
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.5701812505722046,
   "best_model_checkpoint": "miner_id_24/checkpoint-250",
-  "epoch": 2.564102564102564,
   "eval_steps": 50,
-  "global_step": 350,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2521,6 +2521,364 @@
       "eval_samples_per_second": 7.441,
       "eval_steps_per_second": 7.441,
       "step": 350
     }
   ],
   "logging_steps": 1,
@@ -2535,7 +2893,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
@@ -2544,12 +2902,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.957792987250688e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.5701812505722046,
   "best_model_checkpoint": "miner_id_24/checkpoint-250",
+  "epoch": 2.9304029304029307,
   "eval_steps": 50,
+  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 7.441,
       "eval_steps_per_second": 7.441,
       "step": 350
+    },
+    {
+      "epoch": 2.571428571428571,
+      "grad_norm": 0.5820057988166809,
+      "learning_rate": 1.4928145131487267e-05,
+      "loss": 0.4702,
+      "step": 351
+    },
+    {
+      "epoch": 2.578754578754579,
+      "grad_norm": 0.5424510836601257,
+      "learning_rate": 1.4417429926676482e-05,
+      "loss": 0.4174,
+      "step": 352
+    },
+    {
+      "epoch": 2.586080586080586,
+      "grad_norm": 0.6577489376068115,
+      "learning_rate": 1.39151623581778e-05,
+      "loss": 0.4169,
+      "step": 353
+    },
+    {
+      "epoch": 2.5934065934065935,
+      "grad_norm": 0.5944687128067017,
+      "learning_rate": 1.3421373720380669e-05,
+      "loss": 0.4272,
+      "step": 354
+    },
+    {
+      "epoch": 2.600732600732601,
+      "grad_norm": 0.7125936150550842,
+      "learning_rate": 1.2936094779384486e-05,
+      "loss": 0.4438,
+      "step": 355
+    },
+    {
+      "epoch": 2.608058608058608,
+      "grad_norm": 0.679352879524231,
+      "learning_rate": 1.245935577108168e-05,
+      "loss": 0.4949,
+      "step": 356
+    },
+    {
+      "epoch": 2.6153846153846154,
+      "grad_norm": 1.0750776529312134,
+      "learning_rate": 1.199118639927385e-05,
+      "loss": 0.3986,
+      "step": 357
+    },
+    {
+      "epoch": 2.6227106227106227,
+      "grad_norm": 0.8349664211273193,
+      "learning_rate": 1.1531615833820906e-05,
+      "loss": 0.4152,
+      "step": 358
+    },
+    {
+      "epoch": 2.63003663003663,
+      "grad_norm": 0.770355761051178,
+      "learning_rate": 1.108067270882384e-05,
+      "loss": 0.4878,
+      "step": 359
+    },
+    {
+      "epoch": 2.6373626373626373,
+      "grad_norm": 0.7165803909301758,
+      "learning_rate": 1.0638385120840414e-05,
+      "loss": 0.3852,
+      "step": 360
+    },
+    {
+      "epoch": 2.6446886446886446,
+      "grad_norm": 0.7379334568977356,
+      "learning_rate": 1.0204780627134784e-05,
+      "loss": 0.4758,
+      "step": 361
+    },
+    {
+      "epoch": 2.652014652014652,
+      "grad_norm": 0.6477860808372498,
+      "learning_rate": 9.77988624396025e-06,
+      "loss": 0.3718,
+      "step": 362
+    },
+    {
+      "epoch": 2.659340659340659,
+      "grad_norm": 0.6215366721153259,
+      "learning_rate": 9.363728444876239e-06,
+      "loss": 0.3146,
+      "step": 363
+    },
+    {
+      "epoch": 2.6666666666666665,
+      "grad_norm": 0.7191038727760315,
+      "learning_rate": 8.956333159098677e-06,
+      "loss": 0.2567,
+      "step": 364
+    },
+    {
+      "epoch": 2.6739926739926743,
+      "grad_norm": 0.6438663005828857,
+      "learning_rate": 8.557725769884444e-06,
+      "loss": 0.3366,
+      "step": 365
+    },
+    {
+      "epoch": 2.6813186813186816,
+      "grad_norm": 0.7834159135818481,
+      "learning_rate": 8.167931112949955e-06,
+      "loss": 0.2802,
+      "step": 366
+    },
+    {
+      "epoch": 2.688644688644689,
+      "grad_norm": 0.687300443649292,
+      "learning_rate": 7.786973474923569e-06,
+      "loss": 0.3275,
+      "step": 367
+    },
+    {
+      "epoch": 2.695970695970696,
+      "grad_norm": 0.633350133895874,
+      "learning_rate": 7.41487659183258e-06,
+      "loss": 0.2462,
+      "step": 368
+    },
+    {
+      "epoch": 2.7032967032967035,
+      "grad_norm": 0.8299592137336731,
+      "learning_rate": 7.051663647624117e-06,
+      "loss": 0.2847,
+      "step": 369
+    },
+    {
+      "epoch": 2.7106227106227108,
+      "grad_norm": 0.6400296688079834,
+      "learning_rate": 6.697357272720782e-06,
+      "loss": 0.239,
+      "step": 370
+    },
+    {
+      "epoch": 2.717948717948718,
+      "grad_norm": 0.620663583278656,
+      "learning_rate": 6.35197954261058e-06,
+      "loss": 0.183,
+      "step": 371
+    },
+    {
+      "epoch": 2.7252747252747254,
+      "grad_norm": 0.7446674108505249,
+      "learning_rate": 6.015551976471433e-06,
+      "loss": 0.2334,
+      "step": 372
+    },
+    {
+      "epoch": 2.7326007326007327,
+      "grad_norm": 0.8476528525352478,
+      "learning_rate": 5.688095535830573e-06,
+      "loss": 0.1747,
+      "step": 373
+    },
+    {
+      "epoch": 2.73992673992674,
+      "grad_norm": 0.8313474655151367,
+      "learning_rate": 5.369630623258248e-06,
+      "loss": 0.189,
+      "step": 374
+    },
+    {
+      "epoch": 2.7472527472527473,
+      "grad_norm": 0.9323949217796326,
+      "learning_rate": 5.060177081096728e-06,
+      "loss": 0.0911,
+      "step": 375
+    },
+    {
+      "epoch": 2.7545787545787546,
+      "grad_norm": 0.33731380105018616,
+      "learning_rate": 4.759754190223925e-06,
+      "loss": 0.4471,
+      "step": 376
+    },
+    {
+      "epoch": 2.761904761904762,
+      "grad_norm": 0.37849536538124084,
+      "learning_rate": 4.468380668852068e-06,
+      "loss": 0.6115,
+      "step": 377
+    },
+    {
+      "epoch": 2.769230769230769,
+      "grad_norm": 0.4009314775466919,
+      "learning_rate": 4.186074671361456e-06,
+      "loss": 0.575,
+      "step": 378
+    },
+    {
+      "epoch": 2.7765567765567765,
+      "grad_norm": 0.3839149475097656,
+      "learning_rate": 3.912853787169345e-06,
+      "loss": 0.5056,
+      "step": 379
+    },
+    {
+      "epoch": 2.7838827838827838,
+      "grad_norm": 0.40725114941596985,
+      "learning_rate": 3.6487350396339597e-06,
+      "loss": 0.4408,
+      "step": 380
+    },
+    {
+      "epoch": 2.791208791208791,
+      "grad_norm": 0.4696556329727173,
+      "learning_rate": 3.3937348849939204e-06,
+      "loss": 0.5321,
+      "step": 381
+    },
+    {
+      "epoch": 2.7985347985347984,
+      "grad_norm": 0.43771475553512573,
+      "learning_rate": 3.147869211342818e-06,
+      "loss": 0.4356,
+      "step": 382
+    },
+    {
+      "epoch": 2.8058608058608057,
+      "grad_norm": 0.5101984739303589,
+      "learning_rate": 2.911153337639388e-06,
+      "loss": 0.4499,
+      "step": 383
+    },
+    {
+      "epoch": 2.813186813186813,
+      "grad_norm": 0.5081220865249634,
+      "learning_rate": 2.683602012752939e-06,
+      "loss": 0.4756,
+      "step": 384
+    },
+    {
+      "epoch": 2.8205128205128203,
+      "grad_norm": 0.5413169264793396,
+      "learning_rate": 2.4652294145445226e-06,
+      "loss": 0.4866,
+      "step": 385
+    },
+    {
+      "epoch": 2.8278388278388276,
+      "grad_norm": 0.5008475184440613,
+      "learning_rate": 2.256049148983441e-06,
+      "loss": 0.4641,
+      "step": 386
+    },
+    {
+      "epoch": 2.8351648351648353,
+      "grad_norm": 0.5848665833473206,
+      "learning_rate": 2.0560742492995885e-06,
+      "loss": 0.4572,
+      "step": 387
+    },
+    {
+      "epoch": 2.8424908424908426,
+      "grad_norm": 0.6183776259422302,
+      "learning_rate": 1.8653171751714379e-06,
+      "loss": 0.5198,
+      "step": 388
+    },
+    {
+      "epoch": 2.84981684981685,
+      "grad_norm": 0.5888795852661133,
+      "learning_rate": 1.6837898119496263e-06,
+      "loss": 0.451,
+      "step": 389
+    },
+    {
+      "epoch": 2.857142857142857,
+      "grad_norm": 0.5763834118843079,
+      "learning_rate": 1.5115034699164308e-06,
+      "loss": 0.4926,
+      "step": 390
+    },
+    {
+      "epoch": 2.8644688644688645,
+      "grad_norm": 0.6053770184516907,
+      "learning_rate": 1.348468883581183e-06,
+      "loss": 0.443,
+      "step": 391
+    },
+    {
+      "epoch": 2.871794871794872,
+      "grad_norm": 0.5707501769065857,
+      "learning_rate": 1.19469621101132e-06,
+      "loss": 0.4184,
+      "step": 392
+    },
+    {
+      "epoch": 2.879120879120879,
+      "grad_norm": 0.5886608958244324,
+      "learning_rate": 1.0501950331995578e-06,
+      "loss": 0.4002,
+      "step": 393
+    },
+    {
+      "epoch": 2.8864468864468864,
+      "grad_norm": 0.5797408223152161,
+      "learning_rate": 9.149743534668353e-07,
+      "loss": 0.4117,
+      "step": 394
+    },
+    {
+      "epoch": 2.8937728937728937,
+      "grad_norm": 0.7185364961624146,
+      "learning_rate": 7.890425969014625e-07,
+      "loss": 0.4059,
+      "step": 395
+    },
+    {
+      "epoch": 2.901098901098901,
+      "grad_norm": 0.7297194004058838,
+      "learning_rate": 6.724076098341247e-07,
+      "loss": 0.3407,
+      "step": 396
+    },
+    {
+      "epoch": 2.9084249084249083,
+      "grad_norm": 0.6948420405387878,
+      "learning_rate": 5.650766593489897e-07,
+      "loss": 0.3564,
+      "step": 397
+    },
+    {
+      "epoch": 2.9157509157509156,
+      "grad_norm": 0.6854040026664734,
+      "learning_rate": 4.6705643283102003e-07,
+      "loss": 0.4018,
+      "step": 398
+    },
+    {
+      "epoch": 2.9230769230769234,
+      "grad_norm": 0.7185117602348328,
+      "learning_rate": 3.7835303754918943e-07,
+      "loss": 0.3607,
+      "step": 399
+    },
+    {
+      "epoch": 2.9304029304029307,
+      "grad_norm": 0.6749255061149597,
+      "learning_rate": 2.9897200027598767e-07,
+      "loss": 0.276,
+      "step": 400
+    },
+    {
+      "epoch": 2.9304029304029307,
+      "eval_loss": 0.60358726978302,
+      "eval_runtime": 73.527,
+      "eval_samples_per_second": 7.439,
+      "eval_steps_per_second": 7.439,
+      "step": 400
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.659682780951347e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null