|
{
|
|
"best_metric": 0.8317757009345794,
|
|
"best_model_checkpoint": "beit-base-patch16-224-OT\\checkpoint-248",
|
|
"epoch": 40.0,
|
|
"eval_steps": 500,
|
|
"global_step": 320,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 1.0,
|
|
"eval_accuracy": 0.5887850467289719,
|
|
"eval_loss": 0.6887365579605103,
|
|
"eval_runtime": 2.8498,
|
|
"eval_samples_per_second": 37.546,
|
|
"eval_steps_per_second": 2.456,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"learning_rate": 1.5625e-05,
|
|
"loss": 0.692,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"eval_accuracy": 0.5887850467289719,
|
|
"eval_loss": 0.678210437297821,
|
|
"eval_runtime": 1.9785,
|
|
"eval_samples_per_second": 54.082,
|
|
"eval_steps_per_second": 3.538,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"learning_rate": 3.125e-05,
|
|
"loss": 0.6801,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"eval_accuracy": 0.5887850467289719,
|
|
"eval_loss": 0.6668981313705444,
|
|
"eval_runtime": 2.0656,
|
|
"eval_samples_per_second": 51.802,
|
|
"eval_steps_per_second": 3.389,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 3.75,
|
|
"learning_rate": 4.6875e-05,
|
|
"loss": 0.6696,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"eval_accuracy": 0.5887850467289719,
|
|
"eval_loss": 0.6644209623336792,
|
|
"eval_runtime": 2.1267,
|
|
"eval_samples_per_second": 50.313,
|
|
"eval_steps_per_second": 3.291,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"learning_rate": 4.8611111111111115e-05,
|
|
"loss": 0.6607,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"eval_accuracy": 0.6635514018691588,
|
|
"eval_loss": 0.6661449074745178,
|
|
"eval_runtime": 1.995,
|
|
"eval_samples_per_second": 53.635,
|
|
"eval_steps_per_second": 3.509,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"eval_accuracy": 0.6542056074766355,
|
|
"eval_loss": 0.6241438388824463,
|
|
"eval_runtime": 2.0189,
|
|
"eval_samples_per_second": 52.999,
|
|
"eval_steps_per_second": 3.467,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 6.25,
|
|
"learning_rate": 4.6875e-05,
|
|
"loss": 0.6341,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"eval_accuracy": 0.6542056074766355,
|
|
"eval_loss": 0.6234968900680542,
|
|
"eval_runtime": 1.9955,
|
|
"eval_samples_per_second": 53.622,
|
|
"eval_steps_per_second": 3.508,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 7.5,
|
|
"learning_rate": 4.5138888888888894e-05,
|
|
"loss": 0.6089,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 8.0,
|
|
"eval_accuracy": 0.6915887850467289,
|
|
"eval_loss": 0.6088296175003052,
|
|
"eval_runtime": 2.06,
|
|
"eval_samples_per_second": 51.942,
|
|
"eval_steps_per_second": 3.398,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 8.75,
|
|
"learning_rate": 4.340277777777778e-05,
|
|
"loss": 0.6095,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 9.0,
|
|
"eval_accuracy": 0.6915887850467289,
|
|
"eval_loss": 0.5911644101142883,
|
|
"eval_runtime": 2.01,
|
|
"eval_samples_per_second": 53.235,
|
|
"eval_steps_per_second": 3.483,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 10.0,
|
|
"learning_rate": 4.166666666666667e-05,
|
|
"loss": 0.5632,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 10.0,
|
|
"eval_accuracy": 0.6355140186915887,
|
|
"eval_loss": 0.660692036151886,
|
|
"eval_runtime": 2.126,
|
|
"eval_samples_per_second": 50.329,
|
|
"eval_steps_per_second": 3.293,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 11.0,
|
|
"eval_accuracy": 0.7009345794392523,
|
|
"eval_loss": 0.5792553424835205,
|
|
"eval_runtime": 1.972,
|
|
"eval_samples_per_second": 54.261,
|
|
"eval_steps_per_second": 3.55,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 11.25,
|
|
"learning_rate": 3.993055555555556e-05,
|
|
"loss": 0.5418,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 12.0,
|
|
"eval_accuracy": 0.6822429906542056,
|
|
"eval_loss": 0.5953279733657837,
|
|
"eval_runtime": 2.2228,
|
|
"eval_samples_per_second": 48.137,
|
|
"eval_steps_per_second": 3.149,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 12.5,
|
|
"learning_rate": 3.8194444444444444e-05,
|
|
"loss": 0.5336,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 13.0,
|
|
"eval_accuracy": 0.7102803738317757,
|
|
"eval_loss": 0.5792534947395325,
|
|
"eval_runtime": 1.991,
|
|
"eval_samples_per_second": 53.742,
|
|
"eval_steps_per_second": 3.516,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 13.75,
|
|
"learning_rate": 3.6458333333333336e-05,
|
|
"loss": 0.5102,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 14.0,
|
|
"eval_accuracy": 0.719626168224299,
|
|
"eval_loss": 0.5291872024536133,
|
|
"eval_runtime": 2.0585,
|
|
"eval_samples_per_second": 51.98,
|
|
"eval_steps_per_second": 3.401,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 15.0,
|
|
"learning_rate": 3.472222222222222e-05,
|
|
"loss": 0.4762,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 15.0,
|
|
"eval_accuracy": 0.7009345794392523,
|
|
"eval_loss": 0.6557727456092834,
|
|
"eval_runtime": 1.9985,
|
|
"eval_samples_per_second": 53.541,
|
|
"eval_steps_per_second": 3.503,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 16.0,
|
|
"eval_accuracy": 0.7102803738317757,
|
|
"eval_loss": 0.5371208786964417,
|
|
"eval_runtime": 1.971,
|
|
"eval_samples_per_second": 54.288,
|
|
"eval_steps_per_second": 3.552,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 16.25,
|
|
"learning_rate": 3.2986111111111115e-05,
|
|
"loss": 0.544,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 17.0,
|
|
"eval_accuracy": 0.7570093457943925,
|
|
"eval_loss": 0.5400705337524414,
|
|
"eval_runtime": 1.981,
|
|
"eval_samples_per_second": 54.014,
|
|
"eval_steps_per_second": 3.534,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 17.5,
|
|
"learning_rate": 3.125e-05,
|
|
"loss": 0.4256,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 18.0,
|
|
"eval_accuracy": 0.794392523364486,
|
|
"eval_loss": 0.49267861247062683,
|
|
"eval_runtime": 1.9622,
|
|
"eval_samples_per_second": 54.531,
|
|
"eval_steps_per_second": 3.567,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 18.75,
|
|
"learning_rate": 2.951388888888889e-05,
|
|
"loss": 0.4082,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 19.0,
|
|
"eval_accuracy": 0.7383177570093458,
|
|
"eval_loss": 0.5800967216491699,
|
|
"eval_runtime": 1.9825,
|
|
"eval_samples_per_second": 53.973,
|
|
"eval_steps_per_second": 3.531,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 20.0,
|
|
"learning_rate": 2.777777777777778e-05,
|
|
"loss": 0.4014,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 20.0,
|
|
"eval_accuracy": 0.7383177570093458,
|
|
"eval_loss": 0.5822688937187195,
|
|
"eval_runtime": 1.985,
|
|
"eval_samples_per_second": 53.905,
|
|
"eval_steps_per_second": 3.526,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 21.0,
|
|
"eval_accuracy": 0.7757009345794392,
|
|
"eval_loss": 0.5392723083496094,
|
|
"eval_runtime": 2.117,
|
|
"eval_samples_per_second": 50.542,
|
|
"eval_steps_per_second": 3.306,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 21.25,
|
|
"learning_rate": 2.604166666666667e-05,
|
|
"loss": 0.3483,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 22.0,
|
|
"eval_accuracy": 0.7102803738317757,
|
|
"eval_loss": 0.5940819382667542,
|
|
"eval_runtime": 1.9765,
|
|
"eval_samples_per_second": 54.137,
|
|
"eval_steps_per_second": 3.542,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 22.5,
|
|
"learning_rate": 2.4305555555555558e-05,
|
|
"loss": 0.3121,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 23.0,
|
|
"eval_accuracy": 0.7383177570093458,
|
|
"eval_loss": 0.5568514466285706,
|
|
"eval_runtime": 2.1005,
|
|
"eval_samples_per_second": 50.94,
|
|
"eval_steps_per_second": 3.333,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 23.75,
|
|
"learning_rate": 2.2569444444444447e-05,
|
|
"loss": 0.3484,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 24.0,
|
|
"eval_accuracy": 0.7663551401869159,
|
|
"eval_loss": 0.5975044369697571,
|
|
"eval_runtime": 1.97,
|
|
"eval_samples_per_second": 54.316,
|
|
"eval_steps_per_second": 3.553,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 25.0,
|
|
"learning_rate": 2.0833333333333336e-05,
|
|
"loss": 0.263,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 25.0,
|
|
"eval_accuracy": 0.7570093457943925,
|
|
"eval_loss": 0.6544022560119629,
|
|
"eval_runtime": 2.3716,
|
|
"eval_samples_per_second": 45.118,
|
|
"eval_steps_per_second": 2.952,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 26.0,
|
|
"eval_accuracy": 0.7757009345794392,
|
|
"eval_loss": 0.5743973851203918,
|
|
"eval_runtime": 2.0105,
|
|
"eval_samples_per_second": 53.221,
|
|
"eval_steps_per_second": 3.482,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 26.25,
|
|
"learning_rate": 1.9097222222222222e-05,
|
|
"loss": 0.2633,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 27.0,
|
|
"eval_accuracy": 0.7663551401869159,
|
|
"eval_loss": 0.6095036864280701,
|
|
"eval_runtime": 1.9725,
|
|
"eval_samples_per_second": 54.247,
|
|
"eval_steps_per_second": 3.549,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 27.5,
|
|
"learning_rate": 1.736111111111111e-05,
|
|
"loss": 0.2935,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 28.0,
|
|
"eval_accuracy": 0.7663551401869159,
|
|
"eval_loss": 0.528620719909668,
|
|
"eval_runtime": 2.0605,
|
|
"eval_samples_per_second": 51.929,
|
|
"eval_steps_per_second": 3.397,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 28.75,
|
|
"learning_rate": 1.5625e-05,
|
|
"loss": 0.2332,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 29.0,
|
|
"eval_accuracy": 0.7850467289719626,
|
|
"eval_loss": 0.6027860045433044,
|
|
"eval_runtime": 1.9665,
|
|
"eval_samples_per_second": 54.412,
|
|
"eval_steps_per_second": 3.56,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 30.0,
|
|
"learning_rate": 1.388888888888889e-05,
|
|
"loss": 0.2314,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 30.0,
|
|
"eval_accuracy": 0.794392523364486,
|
|
"eval_loss": 0.5935384631156921,
|
|
"eval_runtime": 1.962,
|
|
"eval_samples_per_second": 54.537,
|
|
"eval_steps_per_second": 3.568,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 31.0,
|
|
"eval_accuracy": 0.8317757009345794,
|
|
"eval_loss": 0.5392867922782898,
|
|
"eval_runtime": 1.969,
|
|
"eval_samples_per_second": 54.343,
|
|
"eval_steps_per_second": 3.555,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 31.25,
|
|
"learning_rate": 1.2152777777777779e-05,
|
|
"loss": 0.202,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 32.0,
|
|
"eval_accuracy": 0.822429906542056,
|
|
"eval_loss": 0.5556337833404541,
|
|
"eval_runtime": 2.012,
|
|
"eval_samples_per_second": 53.182,
|
|
"eval_steps_per_second": 3.479,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 32.5,
|
|
"learning_rate": 1.0416666666666668e-05,
|
|
"loss": 0.2127,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 33.0,
|
|
"eval_accuracy": 0.8037383177570093,
|
|
"eval_loss": 0.5912833213806152,
|
|
"eval_runtime": 1.98,
|
|
"eval_samples_per_second": 54.041,
|
|
"eval_steps_per_second": 3.535,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 33.75,
|
|
"learning_rate": 8.680555555555556e-06,
|
|
"loss": 0.2035,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 34.0,
|
|
"eval_accuracy": 0.8037383177570093,
|
|
"eval_loss": 0.5337203741073608,
|
|
"eval_runtime": 1.9834,
|
|
"eval_samples_per_second": 53.947,
|
|
"eval_steps_per_second": 3.529,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 35.0,
|
|
"learning_rate": 6.944444444444445e-06,
|
|
"loss": 0.2618,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 35.0,
|
|
"eval_accuracy": 0.8037383177570093,
|
|
"eval_loss": 0.622107982635498,
|
|
"eval_runtime": 2.0025,
|
|
"eval_samples_per_second": 53.434,
|
|
"eval_steps_per_second": 3.496,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 36.0,
|
|
"eval_accuracy": 0.8317757009345794,
|
|
"eval_loss": 0.5089600682258606,
|
|
"eval_runtime": 1.9755,
|
|
"eval_samples_per_second": 54.164,
|
|
"eval_steps_per_second": 3.543,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 36.25,
|
|
"learning_rate": 5.208333333333334e-06,
|
|
"loss": 0.217,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 37.0,
|
|
"eval_accuracy": 0.822429906542056,
|
|
"eval_loss": 0.5649047493934631,
|
|
"eval_runtime": 1.991,
|
|
"eval_samples_per_second": 53.743,
|
|
"eval_steps_per_second": 3.516,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 37.5,
|
|
"learning_rate": 3.4722222222222224e-06,
|
|
"loss": 0.2111,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 38.0,
|
|
"eval_accuracy": 0.8130841121495327,
|
|
"eval_loss": 0.568317174911499,
|
|
"eval_runtime": 2.0365,
|
|
"eval_samples_per_second": 52.542,
|
|
"eval_steps_per_second": 3.437,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 38.75,
|
|
"learning_rate": 1.7361111111111112e-06,
|
|
"loss": 0.2085,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 39.0,
|
|
"eval_accuracy": 0.822429906542056,
|
|
"eval_loss": 0.5397838950157166,
|
|
"eval_runtime": 1.9935,
|
|
"eval_samples_per_second": 53.675,
|
|
"eval_steps_per_second": 3.511,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 40.0,
|
|
"learning_rate": 0.0,
|
|
"loss": 0.1912,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 40.0,
|
|
"eval_accuracy": 0.822429906542056,
|
|
"eval_loss": 0.5548034310340881,
|
|
"eval_runtime": 1.978,
|
|
"eval_samples_per_second": 54.096,
|
|
"eval_steps_per_second": 3.539,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 40.0,
|
|
"step": 320,
|
|
"total_flos": 1.5429806632629043e+18,
|
|
"train_loss": 0.4049976162612438,
|
|
"train_runtime": 701.3243,
|
|
"train_samples_per_second": 28.403,
|
|
"train_steps_per_second": 0.456
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 320,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 40,
|
|
"save_steps": 500,
|
|
"total_flos": 1.5429806632629043e+18,
|
|
"train_batch_size": 16,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|