wav2vec2-btb-cv-ft-cv-cy / trainer_state.json
DewiBrynJones's picture
End of training
66894f6 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.020080321285141,
"eval_steps": 200,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.10040160642570281,
"eval_loss": 0.3807084858417511,
"eval_runtime": 206.6442,
"eval_samples_per_second": 26.04,
"eval_steps_per_second": 0.411,
"eval_wer": 0.2514466384298753,
"step": 200
},
{
"epoch": 0.20080321285140562,
"eval_loss": 0.253967821598053,
"eval_runtime": 206.4967,
"eval_samples_per_second": 26.059,
"eval_steps_per_second": 0.412,
"eval_wer": 0.2642923899858816,
"step": 400
},
{
"epoch": 0.25100401606425704,
"grad_norm": 1.857898235321045,
"learning_rate": 0.0001491,
"loss": 2.4874,
"step": 500
},
{
"epoch": 0.30120481927710846,
"eval_loss": 0.2642447352409363,
"eval_runtime": 205.9594,
"eval_samples_per_second": 26.127,
"eval_steps_per_second": 0.413,
"eval_wer": 0.30376424267732505,
"step": 600
},
{
"epoch": 0.40160642570281124,
"eval_loss": 0.3125462234020233,
"eval_runtime": 205.1387,
"eval_samples_per_second": 26.231,
"eval_steps_per_second": 0.414,
"eval_wer": 0.39048300821253157,
"step": 800
},
{
"epoch": 0.5020080321285141,
"grad_norm": 3.634869337081909,
"learning_rate": 0.00029909999999999995,
"loss": 0.3991,
"step": 1000
},
{
"epoch": 0.5020080321285141,
"eval_loss": 0.3531426191329956,
"eval_runtime": 205.1455,
"eval_samples_per_second": 26.23,
"eval_steps_per_second": 0.414,
"eval_wer": 0.3939430094056354,
"step": 1000
},
{
"epoch": 0.6024096385542169,
"eval_loss": 0.3571958839893341,
"eval_runtime": 209.3856,
"eval_samples_per_second": 25.699,
"eval_steps_per_second": 0.406,
"eval_wer": 0.40390542663405515,
"step": 1200
},
{
"epoch": 0.7028112449799196,
"eval_loss": 0.36791086196899414,
"eval_runtime": 206.8164,
"eval_samples_per_second": 26.018,
"eval_steps_per_second": 0.411,
"eval_wer": 0.4052576110083716,
"step": 1400
},
{
"epoch": 0.7530120481927711,
"grad_norm": 3.5483558177948,
"learning_rate": 0.0002834333333333333,
"loss": 0.4512,
"step": 1500
},
{
"epoch": 0.8032128514056225,
"eval_loss": 0.35897189378738403,
"eval_runtime": 207.7252,
"eval_samples_per_second": 25.904,
"eval_steps_per_second": 0.409,
"eval_wer": 0.38767921414225776,
"step": 1600
},
{
"epoch": 0.9036144578313253,
"eval_loss": 0.3732704222202301,
"eval_runtime": 205.6494,
"eval_samples_per_second": 26.166,
"eval_steps_per_second": 0.413,
"eval_wer": 0.4006840462128895,
"step": 1800
},
{
"epoch": 1.0040160642570282,
"grad_norm": 2.2905380725860596,
"learning_rate": 0.00026676666666666663,
"loss": 0.4333,
"step": 2000
},
{
"epoch": 1.0040160642570282,
"eval_loss": 0.377088725566864,
"eval_runtime": 204.613,
"eval_samples_per_second": 26.298,
"eval_steps_per_second": 0.415,
"eval_wer": 0.4243273876990992,
"step": 2000
},
{
"epoch": 1.104417670682731,
"eval_loss": 0.3604430556297302,
"eval_runtime": 208.0965,
"eval_samples_per_second": 25.858,
"eval_steps_per_second": 0.408,
"eval_wer": 0.3867048459901768,
"step": 2200
},
{
"epoch": 1.2048192771084336,
"eval_loss": 0.3431110978126526,
"eval_runtime": 206.2637,
"eval_samples_per_second": 26.088,
"eval_steps_per_second": 0.412,
"eval_wer": 0.38137564875022373,
"step": 2400
},
{
"epoch": 1.2550200803212852,
"grad_norm": 1.8699342012405396,
"learning_rate": 0.00025009999999999995,
"loss": 0.3468,
"step": 2500
},
{
"epoch": 1.3052208835341366,
"eval_loss": 0.32902058959007263,
"eval_runtime": 205.4777,
"eval_samples_per_second": 26.188,
"eval_steps_per_second": 0.414,
"eval_wer": 0.3778559923641353,
"step": 2600
},
{
"epoch": 1.4056224899598393,
"eval_loss": 0.33407700061798096,
"eval_runtime": 205.6359,
"eval_samples_per_second": 26.168,
"eval_steps_per_second": 0.413,
"eval_wer": 0.3647119648432063,
"step": 2800
},
{
"epoch": 1.5060240963855422,
"grad_norm": 2.062389373779297,
"learning_rate": 0.0002334333333333333,
"loss": 0.3503,
"step": 3000
},
{
"epoch": 1.5060240963855422,
"eval_loss": 0.3247535228729248,
"eval_runtime": 206.3116,
"eval_samples_per_second": 26.082,
"eval_steps_per_second": 0.412,
"eval_wer": 0.3614706993577124,
"step": 3000
},
{
"epoch": 1.606425702811245,
"eval_loss": 0.33116209506988525,
"eval_runtime": 203.9912,
"eval_samples_per_second": 26.379,
"eval_steps_per_second": 0.417,
"eval_wer": 0.35512736383702204,
"step": 3200
},
{
"epoch": 1.7068273092369477,
"eval_loss": 0.3410908281803131,
"eval_runtime": 204.5054,
"eval_samples_per_second": 26.312,
"eval_steps_per_second": 0.416,
"eval_wer": 0.3836226610193084,
"step": 3400
},
{
"epoch": 1.7570281124497993,
"grad_norm": 0.9907544255256653,
"learning_rate": 0.00021679999999999998,
"loss": 0.3418,
"step": 3500
},
{
"epoch": 1.8072289156626506,
"eval_loss": 0.3116574287414551,
"eval_runtime": 205.0392,
"eval_samples_per_second": 26.244,
"eval_steps_per_second": 0.415,
"eval_wer": 0.33752908190658,
"step": 3600
},
{
"epoch": 1.9076305220883534,
"eval_loss": 0.3196774423122406,
"eval_runtime": 206.2716,
"eval_samples_per_second": 26.087,
"eval_steps_per_second": 0.412,
"eval_wer": 0.34317644017578397,
"step": 3800
},
{
"epoch": 2.0080321285140563,
"grad_norm": 1.0384626388549805,
"learning_rate": 0.0002001333333333333,
"loss": 0.3181,
"step": 4000
},
{
"epoch": 2.0080321285140563,
"eval_loss": 0.30675315856933594,
"eval_runtime": 206.0737,
"eval_samples_per_second": 26.112,
"eval_steps_per_second": 0.412,
"eval_wer": 0.3339696553918352,
"step": 4000
},
{
"epoch": 2.108433734939759,
"eval_loss": 0.31376445293426514,
"eval_runtime": 209.2791,
"eval_samples_per_second": 25.712,
"eval_steps_per_second": 0.406,
"eval_wer": 0.3358388514386844,
"step": 4200
},
{
"epoch": 2.208835341365462,
"eval_loss": 0.31388720870018005,
"eval_runtime": 204.9118,
"eval_samples_per_second": 26.26,
"eval_steps_per_second": 0.415,
"eval_wer": 0.3333731034619897,
"step": 4400
},
{
"epoch": 2.2590361445783134,
"grad_norm": 0.5868389010429382,
"learning_rate": 0.00018346666666666664,
"loss": 0.2423,
"step": 4500
},
{
"epoch": 2.3092369477911645,
"eval_loss": 0.3191888928413391,
"eval_runtime": 204.834,
"eval_samples_per_second": 26.27,
"eval_steps_per_second": 0.415,
"eval_wer": 0.32848137763725666,
"step": 4600
},
{
"epoch": 2.4096385542168672,
"eval_loss": 0.2928995192050934,
"eval_runtime": 204.43,
"eval_samples_per_second": 26.322,
"eval_steps_per_second": 0.416,
"eval_wer": 0.31682872994094136,
"step": 4800
},
{
"epoch": 2.5100401606425704,
"grad_norm": 1.3247759342193604,
"learning_rate": 0.0001668,
"loss": 0.2327,
"step": 5000
},
{
"epoch": 2.5100401606425704,
"eval_loss": 0.29208171367645264,
"eval_runtime": 206.3612,
"eval_samples_per_second": 26.076,
"eval_steps_per_second": 0.412,
"eval_wer": 0.3103064288412973,
"step": 5000
},
{
"epoch": 2.610441767068273,
"eval_loss": 0.2801830470561981,
"eval_runtime": 204.3678,
"eval_samples_per_second": 26.33,
"eval_steps_per_second": 0.416,
"eval_wer": 0.3037443576129969,
"step": 5200
},
{
"epoch": 2.710843373493976,
"eval_loss": 0.2811721861362457,
"eval_runtime": 204.4403,
"eval_samples_per_second": 26.321,
"eval_steps_per_second": 0.416,
"eval_wer": 0.29624768836127185,
"step": 5400
},
{
"epoch": 2.7610441767068274,
"grad_norm": 1.381541132926941,
"learning_rate": 0.00015013333333333331,
"loss": 0.2374,
"step": 5500
},
{
"epoch": 2.8112449799196786,
"eval_loss": 0.28872984647750854,
"eval_runtime": 204.3069,
"eval_samples_per_second": 26.338,
"eval_steps_per_second": 0.416,
"eval_wer": 0.30422159915687325,
"step": 5600
},
{
"epoch": 2.9116465863453813,
"eval_loss": 0.27397701144218445,
"eval_runtime": 204.1464,
"eval_samples_per_second": 26.359,
"eval_steps_per_second": 0.416,
"eval_wer": 0.2927081469108553,
"step": 5800
},
{
"epoch": 3.0120481927710845,
"grad_norm": 1.4617916345596313,
"learning_rate": 0.00013346666666666667,
"loss": 0.2136,
"step": 6000
},
{
"epoch": 3.0120481927710845,
"eval_loss": 0.2662462592124939,
"eval_runtime": 203.8941,
"eval_samples_per_second": 26.391,
"eval_steps_per_second": 0.417,
"eval_wer": 0.28296446539004555,
"step": 6000
},
{
"epoch": 3.112449799196787,
"eval_loss": 0.28285130858421326,
"eval_runtime": 206.1704,
"eval_samples_per_second": 26.1,
"eval_steps_per_second": 0.412,
"eval_wer": 0.2890294100101414,
"step": 6200
},
{
"epoch": 3.21285140562249,
"eval_loss": 0.2729070484638214,
"eval_runtime": 206.2438,
"eval_samples_per_second": 26.09,
"eval_steps_per_second": 0.412,
"eval_wer": 0.28692159319135396,
"step": 6400
},
{
"epoch": 3.2630522088353415,
"grad_norm": 0.8870707750320435,
"learning_rate": 0.00011679999999999998,
"loss": 0.167,
"step": 6500
},
{
"epoch": 3.3132530120481927,
"eval_loss": 0.2776893675327301,
"eval_runtime": 204.2022,
"eval_samples_per_second": 26.351,
"eval_steps_per_second": 0.416,
"eval_wer": 0.28892998468850045,
"step": 6600
},
{
"epoch": 3.4136546184738954,
"eval_loss": 0.2711654603481293,
"eval_runtime": 203.7376,
"eval_samples_per_second": 26.411,
"eval_steps_per_second": 0.417,
"eval_wer": 0.28095607389289906,
"step": 6800
},
{
"epoch": 3.5140562248995986,
"grad_norm": 1.0165985822677612,
"learning_rate": 0.00010013333333333333,
"loss": 0.1614,
"step": 7000
},
{
"epoch": 3.5140562248995986,
"eval_loss": 0.2688385844230652,
"eval_runtime": 204.1623,
"eval_samples_per_second": 26.356,
"eval_steps_per_second": 0.416,
"eval_wer": 0.27091411640716656,
"step": 7000
},
{
"epoch": 3.6144578313253013,
"eval_loss": 0.2589295208454132,
"eval_runtime": 205.2749,
"eval_samples_per_second": 26.214,
"eval_steps_per_second": 0.414,
"eval_wer": 0.26626101135437175,
"step": 7200
},
{
"epoch": 3.714859437751004,
"eval_loss": 0.26514673233032227,
"eval_runtime": 204.2135,
"eval_samples_per_second": 26.35,
"eval_steps_per_second": 0.416,
"eval_wer": 0.2669768736701863,
"step": 7400
},
{
"epoch": 3.765060240963855,
"grad_norm": 0.7397546172142029,
"learning_rate": 8.346666666666666e-05,
"loss": 0.1529,
"step": 7500
},
{
"epoch": 3.8152610441767068,
"eval_loss": 0.25074735283851624,
"eval_runtime": 204.2336,
"eval_samples_per_second": 26.347,
"eval_steps_per_second": 0.416,
"eval_wer": 0.2637157231203643,
"step": 7600
},
{
"epoch": 3.9156626506024095,
"eval_loss": 0.2493942528963089,
"eval_runtime": 206.072,
"eval_samples_per_second": 26.112,
"eval_steps_per_second": 0.412,
"eval_wer": 0.2567957207341566,
"step": 7800
},
{
"epoch": 4.016064257028113,
"grad_norm": 0.785851776599884,
"learning_rate": 6.68e-05,
"loss": 0.1496,
"step": 8000
},
{
"epoch": 4.016064257028113,
"eval_loss": 0.25821030139923096,
"eval_runtime": 204.9558,
"eval_samples_per_second": 26.254,
"eval_steps_per_second": 0.415,
"eval_wer": 0.2580484797868321,
"step": 8000
},
{
"epoch": 4.116465863453815,
"eval_loss": 0.2650238871574402,
"eval_runtime": 204.7852,
"eval_samples_per_second": 26.276,
"eval_steps_per_second": 0.415,
"eval_wer": 0.25753146811429933,
"step": 8200
},
{
"epoch": 4.216867469879518,
"eval_loss": 0.26561084389686584,
"eval_runtime": 210.1403,
"eval_samples_per_second": 25.607,
"eval_steps_per_second": 0.404,
"eval_wer": 0.25598043309670104,
"step": 8400
},
{
"epoch": 4.267068273092369,
"grad_norm": 0.34119465947151184,
"learning_rate": 5.013333333333332e-05,
"loss": 0.1128,
"step": 8500
},
{
"epoch": 4.317269076305221,
"eval_loss": 0.25430822372436523,
"eval_runtime": 203.852,
"eval_samples_per_second": 26.397,
"eval_steps_per_second": 0.417,
"eval_wer": 0.25118813259360895,
"step": 8600
},
{
"epoch": 4.417670682730924,
"eval_loss": 0.2586837112903595,
"eval_runtime": 202.7677,
"eval_samples_per_second": 26.538,
"eval_steps_per_second": 0.419,
"eval_wer": 0.24987571834794886,
"step": 8800
},
{
"epoch": 4.518072289156627,
"grad_norm": 0.5558347105979919,
"learning_rate": 3.346666666666666e-05,
"loss": 0.1109,
"step": 9000
},
{
"epoch": 4.518072289156627,
"eval_loss": 0.2540307939052582,
"eval_runtime": 202.8954,
"eval_samples_per_second": 26.521,
"eval_steps_per_second": 0.419,
"eval_wer": 0.24599813080395316,
"step": 9000
},
{
"epoch": 4.618473895582329,
"eval_loss": 0.2546459436416626,
"eval_runtime": 208.4343,
"eval_samples_per_second": 25.816,
"eval_steps_per_second": 0.408,
"eval_wer": 0.24245858935353656,
"step": 9200
},
{
"epoch": 4.718875502008032,
"eval_loss": 0.25800344347953796,
"eval_runtime": 205.7304,
"eval_samples_per_second": 26.156,
"eval_steps_per_second": 0.413,
"eval_wer": 0.24198134780966016,
"step": 9400
},
{
"epoch": 4.769076305220883,
"grad_norm": 0.9226210117340088,
"learning_rate": 1.68e-05,
"loss": 0.1028,
"step": 9500
},
{
"epoch": 4.8192771084337345,
"eval_loss": 0.25135332345962524,
"eval_runtime": 203.1799,
"eval_samples_per_second": 26.484,
"eval_steps_per_second": 0.418,
"eval_wer": 0.24035077253474915,
"step": 9600
},
{
"epoch": 4.919678714859438,
"eval_loss": 0.2509777843952179,
"eval_runtime": 203.2596,
"eval_samples_per_second": 26.474,
"eval_steps_per_second": 0.418,
"eval_wer": 0.24025134721310823,
"step": 9800
},
{
"epoch": 5.020080321285141,
"grad_norm": 4.224822521209717,
"learning_rate": 1.3333333333333334e-07,
"loss": 0.1069,
"step": 10000
},
{
"epoch": 5.020080321285141,
"eval_loss": 0.2515573501586914,
"eval_runtime": 209.1262,
"eval_samples_per_second": 25.731,
"eval_steps_per_second": 0.406,
"eval_wer": 0.24033088747042097,
"step": 10000
},
{
"epoch": 5.020080321285141,
"step": 10000,
"total_flos": 6.356146932571761e+18,
"train_loss": 0.3559208065032959,
"train_runtime": 12994.9936,
"train_samples_per_second": 3.078,
"train_steps_per_second": 0.77
}
],
"logging_steps": 500,
"max_steps": 10000,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.356146932571761e+18,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}