beit-base-patch16-224-OT / trainer_state.json
Augusto777's picture
End of training
5ec8984 verified
raw
history blame
14.8 kB
{
"best_metric": 0.8317757009345794,
"best_model_checkpoint": "beit-base-patch16-224-OT\\checkpoint-248",
"epoch": 40.0,
"eval_steps": 500,
"global_step": 320,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.5887850467289719,
"eval_loss": 0.6887365579605103,
"eval_runtime": 2.8498,
"eval_samples_per_second": 37.546,
"eval_steps_per_second": 2.456,
"step": 8
},
{
"epoch": 1.25,
"learning_rate": 1.5625e-05,
"loss": 0.692,
"step": 10
},
{
"epoch": 2.0,
"eval_accuracy": 0.5887850467289719,
"eval_loss": 0.678210437297821,
"eval_runtime": 1.9785,
"eval_samples_per_second": 54.082,
"eval_steps_per_second": 3.538,
"step": 16
},
{
"epoch": 2.5,
"learning_rate": 3.125e-05,
"loss": 0.6801,
"step": 20
},
{
"epoch": 3.0,
"eval_accuracy": 0.5887850467289719,
"eval_loss": 0.6668981313705444,
"eval_runtime": 2.0656,
"eval_samples_per_second": 51.802,
"eval_steps_per_second": 3.389,
"step": 24
},
{
"epoch": 3.75,
"learning_rate": 4.6875e-05,
"loss": 0.6696,
"step": 30
},
{
"epoch": 4.0,
"eval_accuracy": 0.5887850467289719,
"eval_loss": 0.6644209623336792,
"eval_runtime": 2.1267,
"eval_samples_per_second": 50.313,
"eval_steps_per_second": 3.291,
"step": 32
},
{
"epoch": 5.0,
"learning_rate": 4.8611111111111115e-05,
"loss": 0.6607,
"step": 40
},
{
"epoch": 5.0,
"eval_accuracy": 0.6635514018691588,
"eval_loss": 0.6661449074745178,
"eval_runtime": 1.995,
"eval_samples_per_second": 53.635,
"eval_steps_per_second": 3.509,
"step": 40
},
{
"epoch": 6.0,
"eval_accuracy": 0.6542056074766355,
"eval_loss": 0.6241438388824463,
"eval_runtime": 2.0189,
"eval_samples_per_second": 52.999,
"eval_steps_per_second": 3.467,
"step": 48
},
{
"epoch": 6.25,
"learning_rate": 4.6875e-05,
"loss": 0.6341,
"step": 50
},
{
"epoch": 7.0,
"eval_accuracy": 0.6542056074766355,
"eval_loss": 0.6234968900680542,
"eval_runtime": 1.9955,
"eval_samples_per_second": 53.622,
"eval_steps_per_second": 3.508,
"step": 56
},
{
"epoch": 7.5,
"learning_rate": 4.5138888888888894e-05,
"loss": 0.6089,
"step": 60
},
{
"epoch": 8.0,
"eval_accuracy": 0.6915887850467289,
"eval_loss": 0.6088296175003052,
"eval_runtime": 2.06,
"eval_samples_per_second": 51.942,
"eval_steps_per_second": 3.398,
"step": 64
},
{
"epoch": 8.75,
"learning_rate": 4.340277777777778e-05,
"loss": 0.6095,
"step": 70
},
{
"epoch": 9.0,
"eval_accuracy": 0.6915887850467289,
"eval_loss": 0.5911644101142883,
"eval_runtime": 2.01,
"eval_samples_per_second": 53.235,
"eval_steps_per_second": 3.483,
"step": 72
},
{
"epoch": 10.0,
"learning_rate": 4.166666666666667e-05,
"loss": 0.5632,
"step": 80
},
{
"epoch": 10.0,
"eval_accuracy": 0.6355140186915887,
"eval_loss": 0.660692036151886,
"eval_runtime": 2.126,
"eval_samples_per_second": 50.329,
"eval_steps_per_second": 3.293,
"step": 80
},
{
"epoch": 11.0,
"eval_accuracy": 0.7009345794392523,
"eval_loss": 0.5792553424835205,
"eval_runtime": 1.972,
"eval_samples_per_second": 54.261,
"eval_steps_per_second": 3.55,
"step": 88
},
{
"epoch": 11.25,
"learning_rate": 3.993055555555556e-05,
"loss": 0.5418,
"step": 90
},
{
"epoch": 12.0,
"eval_accuracy": 0.6822429906542056,
"eval_loss": 0.5953279733657837,
"eval_runtime": 2.2228,
"eval_samples_per_second": 48.137,
"eval_steps_per_second": 3.149,
"step": 96
},
{
"epoch": 12.5,
"learning_rate": 3.8194444444444444e-05,
"loss": 0.5336,
"step": 100
},
{
"epoch": 13.0,
"eval_accuracy": 0.7102803738317757,
"eval_loss": 0.5792534947395325,
"eval_runtime": 1.991,
"eval_samples_per_second": 53.742,
"eval_steps_per_second": 3.516,
"step": 104
},
{
"epoch": 13.75,
"learning_rate": 3.6458333333333336e-05,
"loss": 0.5102,
"step": 110
},
{
"epoch": 14.0,
"eval_accuracy": 0.719626168224299,
"eval_loss": 0.5291872024536133,
"eval_runtime": 2.0585,
"eval_samples_per_second": 51.98,
"eval_steps_per_second": 3.401,
"step": 112
},
{
"epoch": 15.0,
"learning_rate": 3.472222222222222e-05,
"loss": 0.4762,
"step": 120
},
{
"epoch": 15.0,
"eval_accuracy": 0.7009345794392523,
"eval_loss": 0.6557727456092834,
"eval_runtime": 1.9985,
"eval_samples_per_second": 53.541,
"eval_steps_per_second": 3.503,
"step": 120
},
{
"epoch": 16.0,
"eval_accuracy": 0.7102803738317757,
"eval_loss": 0.5371208786964417,
"eval_runtime": 1.971,
"eval_samples_per_second": 54.288,
"eval_steps_per_second": 3.552,
"step": 128
},
{
"epoch": 16.25,
"learning_rate": 3.2986111111111115e-05,
"loss": 0.544,
"step": 130
},
{
"epoch": 17.0,
"eval_accuracy": 0.7570093457943925,
"eval_loss": 0.5400705337524414,
"eval_runtime": 1.981,
"eval_samples_per_second": 54.014,
"eval_steps_per_second": 3.534,
"step": 136
},
{
"epoch": 17.5,
"learning_rate": 3.125e-05,
"loss": 0.4256,
"step": 140
},
{
"epoch": 18.0,
"eval_accuracy": 0.794392523364486,
"eval_loss": 0.49267861247062683,
"eval_runtime": 1.9622,
"eval_samples_per_second": 54.531,
"eval_steps_per_second": 3.567,
"step": 144
},
{
"epoch": 18.75,
"learning_rate": 2.951388888888889e-05,
"loss": 0.4082,
"step": 150
},
{
"epoch": 19.0,
"eval_accuracy": 0.7383177570093458,
"eval_loss": 0.5800967216491699,
"eval_runtime": 1.9825,
"eval_samples_per_second": 53.973,
"eval_steps_per_second": 3.531,
"step": 152
},
{
"epoch": 20.0,
"learning_rate": 2.777777777777778e-05,
"loss": 0.4014,
"step": 160
},
{
"epoch": 20.0,
"eval_accuracy": 0.7383177570093458,
"eval_loss": 0.5822688937187195,
"eval_runtime": 1.985,
"eval_samples_per_second": 53.905,
"eval_steps_per_second": 3.526,
"step": 160
},
{
"epoch": 21.0,
"eval_accuracy": 0.7757009345794392,
"eval_loss": 0.5392723083496094,
"eval_runtime": 2.117,
"eval_samples_per_second": 50.542,
"eval_steps_per_second": 3.306,
"step": 168
},
{
"epoch": 21.25,
"learning_rate": 2.604166666666667e-05,
"loss": 0.3483,
"step": 170
},
{
"epoch": 22.0,
"eval_accuracy": 0.7102803738317757,
"eval_loss": 0.5940819382667542,
"eval_runtime": 1.9765,
"eval_samples_per_second": 54.137,
"eval_steps_per_second": 3.542,
"step": 176
},
{
"epoch": 22.5,
"learning_rate": 2.4305555555555558e-05,
"loss": 0.3121,
"step": 180
},
{
"epoch": 23.0,
"eval_accuracy": 0.7383177570093458,
"eval_loss": 0.5568514466285706,
"eval_runtime": 2.1005,
"eval_samples_per_second": 50.94,
"eval_steps_per_second": 3.333,
"step": 184
},
{
"epoch": 23.75,
"learning_rate": 2.2569444444444447e-05,
"loss": 0.3484,
"step": 190
},
{
"epoch": 24.0,
"eval_accuracy": 0.7663551401869159,
"eval_loss": 0.5975044369697571,
"eval_runtime": 1.97,
"eval_samples_per_second": 54.316,
"eval_steps_per_second": 3.553,
"step": 192
},
{
"epoch": 25.0,
"learning_rate": 2.0833333333333336e-05,
"loss": 0.263,
"step": 200
},
{
"epoch": 25.0,
"eval_accuracy": 0.7570093457943925,
"eval_loss": 0.6544022560119629,
"eval_runtime": 2.3716,
"eval_samples_per_second": 45.118,
"eval_steps_per_second": 2.952,
"step": 200
},
{
"epoch": 26.0,
"eval_accuracy": 0.7757009345794392,
"eval_loss": 0.5743973851203918,
"eval_runtime": 2.0105,
"eval_samples_per_second": 53.221,
"eval_steps_per_second": 3.482,
"step": 208
},
{
"epoch": 26.25,
"learning_rate": 1.9097222222222222e-05,
"loss": 0.2633,
"step": 210
},
{
"epoch": 27.0,
"eval_accuracy": 0.7663551401869159,
"eval_loss": 0.6095036864280701,
"eval_runtime": 1.9725,
"eval_samples_per_second": 54.247,
"eval_steps_per_second": 3.549,
"step": 216
},
{
"epoch": 27.5,
"learning_rate": 1.736111111111111e-05,
"loss": 0.2935,
"step": 220
},
{
"epoch": 28.0,
"eval_accuracy": 0.7663551401869159,
"eval_loss": 0.528620719909668,
"eval_runtime": 2.0605,
"eval_samples_per_second": 51.929,
"eval_steps_per_second": 3.397,
"step": 224
},
{
"epoch": 28.75,
"learning_rate": 1.5625e-05,
"loss": 0.2332,
"step": 230
},
{
"epoch": 29.0,
"eval_accuracy": 0.7850467289719626,
"eval_loss": 0.6027860045433044,
"eval_runtime": 1.9665,
"eval_samples_per_second": 54.412,
"eval_steps_per_second": 3.56,
"step": 232
},
{
"epoch": 30.0,
"learning_rate": 1.388888888888889e-05,
"loss": 0.2314,
"step": 240
},
{
"epoch": 30.0,
"eval_accuracy": 0.794392523364486,
"eval_loss": 0.5935384631156921,
"eval_runtime": 1.962,
"eval_samples_per_second": 54.537,
"eval_steps_per_second": 3.568,
"step": 240
},
{
"epoch": 31.0,
"eval_accuracy": 0.8317757009345794,
"eval_loss": 0.5392867922782898,
"eval_runtime": 1.969,
"eval_samples_per_second": 54.343,
"eval_steps_per_second": 3.555,
"step": 248
},
{
"epoch": 31.25,
"learning_rate": 1.2152777777777779e-05,
"loss": 0.202,
"step": 250
},
{
"epoch": 32.0,
"eval_accuracy": 0.822429906542056,
"eval_loss": 0.5556337833404541,
"eval_runtime": 2.012,
"eval_samples_per_second": 53.182,
"eval_steps_per_second": 3.479,
"step": 256
},
{
"epoch": 32.5,
"learning_rate": 1.0416666666666668e-05,
"loss": 0.2127,
"step": 260
},
{
"epoch": 33.0,
"eval_accuracy": 0.8037383177570093,
"eval_loss": 0.5912833213806152,
"eval_runtime": 1.98,
"eval_samples_per_second": 54.041,
"eval_steps_per_second": 3.535,
"step": 264
},
{
"epoch": 33.75,
"learning_rate": 8.680555555555556e-06,
"loss": 0.2035,
"step": 270
},
{
"epoch": 34.0,
"eval_accuracy": 0.8037383177570093,
"eval_loss": 0.5337203741073608,
"eval_runtime": 1.9834,
"eval_samples_per_second": 53.947,
"eval_steps_per_second": 3.529,
"step": 272
},
{
"epoch": 35.0,
"learning_rate": 6.944444444444445e-06,
"loss": 0.2618,
"step": 280
},
{
"epoch": 35.0,
"eval_accuracy": 0.8037383177570093,
"eval_loss": 0.622107982635498,
"eval_runtime": 2.0025,
"eval_samples_per_second": 53.434,
"eval_steps_per_second": 3.496,
"step": 280
},
{
"epoch": 36.0,
"eval_accuracy": 0.8317757009345794,
"eval_loss": 0.5089600682258606,
"eval_runtime": 1.9755,
"eval_samples_per_second": 54.164,
"eval_steps_per_second": 3.543,
"step": 288
},
{
"epoch": 36.25,
"learning_rate": 5.208333333333334e-06,
"loss": 0.217,
"step": 290
},
{
"epoch": 37.0,
"eval_accuracy": 0.822429906542056,
"eval_loss": 0.5649047493934631,
"eval_runtime": 1.991,
"eval_samples_per_second": 53.743,
"eval_steps_per_second": 3.516,
"step": 296
},
{
"epoch": 37.5,
"learning_rate": 3.4722222222222224e-06,
"loss": 0.2111,
"step": 300
},
{
"epoch": 38.0,
"eval_accuracy": 0.8130841121495327,
"eval_loss": 0.568317174911499,
"eval_runtime": 2.0365,
"eval_samples_per_second": 52.542,
"eval_steps_per_second": 3.437,
"step": 304
},
{
"epoch": 38.75,
"learning_rate": 1.7361111111111112e-06,
"loss": 0.2085,
"step": 310
},
{
"epoch": 39.0,
"eval_accuracy": 0.822429906542056,
"eval_loss": 0.5397838950157166,
"eval_runtime": 1.9935,
"eval_samples_per_second": 53.675,
"eval_steps_per_second": 3.511,
"step": 312
},
{
"epoch": 40.0,
"learning_rate": 0.0,
"loss": 0.1912,
"step": 320
},
{
"epoch": 40.0,
"eval_accuracy": 0.822429906542056,
"eval_loss": 0.5548034310340881,
"eval_runtime": 1.978,
"eval_samples_per_second": 54.096,
"eval_steps_per_second": 3.539,
"step": 320
},
{
"epoch": 40.0,
"step": 320,
"total_flos": 1.5429806632629043e+18,
"train_loss": 0.4049976162612438,
"train_runtime": 701.3243,
"train_samples_per_second": 28.403,
"train_steps_per_second": 0.456
}
],
"logging_steps": 10,
"max_steps": 320,
"num_input_tokens_seen": 0,
"num_train_epochs": 40,
"save_steps": 500,
"total_flos": 1.5429806632629043e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}