|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.020080321285141, |
|
"eval_steps": 200, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.10040160642570281, |
|
"eval_loss": 0.3807084858417511, |
|
"eval_runtime": 206.6442, |
|
"eval_samples_per_second": 26.04, |
|
"eval_steps_per_second": 0.411, |
|
"eval_wer": 0.2514466384298753, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.20080321285140562, |
|
"eval_loss": 0.253967821598053, |
|
"eval_runtime": 206.4967, |
|
"eval_samples_per_second": 26.059, |
|
"eval_steps_per_second": 0.412, |
|
"eval_wer": 0.2642923899858816, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.25100401606425704, |
|
"grad_norm": 1.857898235321045, |
|
"learning_rate": 0.0001491, |
|
"loss": 2.4874, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.30120481927710846, |
|
"eval_loss": 0.2642447352409363, |
|
"eval_runtime": 205.9594, |
|
"eval_samples_per_second": 26.127, |
|
"eval_steps_per_second": 0.413, |
|
"eval_wer": 0.30376424267732505, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.40160642570281124, |
|
"eval_loss": 0.3125462234020233, |
|
"eval_runtime": 205.1387, |
|
"eval_samples_per_second": 26.231, |
|
"eval_steps_per_second": 0.414, |
|
"eval_wer": 0.39048300821253157, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5020080321285141, |
|
"grad_norm": 3.634869337081909, |
|
"learning_rate": 0.00029909999999999995, |
|
"loss": 0.3991, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5020080321285141, |
|
"eval_loss": 0.3531426191329956, |
|
"eval_runtime": 205.1455, |
|
"eval_samples_per_second": 26.23, |
|
"eval_steps_per_second": 0.414, |
|
"eval_wer": 0.3939430094056354, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6024096385542169, |
|
"eval_loss": 0.3571958839893341, |
|
"eval_runtime": 209.3856, |
|
"eval_samples_per_second": 25.699, |
|
"eval_steps_per_second": 0.406, |
|
"eval_wer": 0.40390542663405515, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7028112449799196, |
|
"eval_loss": 0.36791086196899414, |
|
"eval_runtime": 206.8164, |
|
"eval_samples_per_second": 26.018, |
|
"eval_steps_per_second": 0.411, |
|
"eval_wer": 0.4052576110083716, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.7530120481927711, |
|
"grad_norm": 3.5483558177948, |
|
"learning_rate": 0.0002834333333333333, |
|
"loss": 0.4512, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8032128514056225, |
|
"eval_loss": 0.35897189378738403, |
|
"eval_runtime": 207.7252, |
|
"eval_samples_per_second": 25.904, |
|
"eval_steps_per_second": 0.409, |
|
"eval_wer": 0.38767921414225776, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.9036144578313253, |
|
"eval_loss": 0.3732704222202301, |
|
"eval_runtime": 205.6494, |
|
"eval_samples_per_second": 26.166, |
|
"eval_steps_per_second": 0.413, |
|
"eval_wer": 0.4006840462128895, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.0040160642570282, |
|
"grad_norm": 2.2905380725860596, |
|
"learning_rate": 0.00026676666666666663, |
|
"loss": 0.4333, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0040160642570282, |
|
"eval_loss": 0.377088725566864, |
|
"eval_runtime": 204.613, |
|
"eval_samples_per_second": 26.298, |
|
"eval_steps_per_second": 0.415, |
|
"eval_wer": 0.4243273876990992, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.104417670682731, |
|
"eval_loss": 0.3604430556297302, |
|
"eval_runtime": 208.0965, |
|
"eval_samples_per_second": 25.858, |
|
"eval_steps_per_second": 0.408, |
|
"eval_wer": 0.3867048459901768, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.2048192771084336, |
|
"eval_loss": 0.3431110978126526, |
|
"eval_runtime": 206.2637, |
|
"eval_samples_per_second": 26.088, |
|
"eval_steps_per_second": 0.412, |
|
"eval_wer": 0.38137564875022373, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.2550200803212852, |
|
"grad_norm": 1.8699342012405396, |
|
"learning_rate": 0.00025009999999999995, |
|
"loss": 0.3468, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.3052208835341366, |
|
"eval_loss": 0.32902058959007263, |
|
"eval_runtime": 205.4777, |
|
"eval_samples_per_second": 26.188, |
|
"eval_steps_per_second": 0.414, |
|
"eval_wer": 0.3778559923641353, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.4056224899598393, |
|
"eval_loss": 0.33407700061798096, |
|
"eval_runtime": 205.6359, |
|
"eval_samples_per_second": 26.168, |
|
"eval_steps_per_second": 0.413, |
|
"eval_wer": 0.3647119648432063, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.5060240963855422, |
|
"grad_norm": 2.062389373779297, |
|
"learning_rate": 0.0002334333333333333, |
|
"loss": 0.3503, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.5060240963855422, |
|
"eval_loss": 0.3247535228729248, |
|
"eval_runtime": 206.3116, |
|
"eval_samples_per_second": 26.082, |
|
"eval_steps_per_second": 0.412, |
|
"eval_wer": 0.3614706993577124, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.606425702811245, |
|
"eval_loss": 0.33116209506988525, |
|
"eval_runtime": 203.9912, |
|
"eval_samples_per_second": 26.379, |
|
"eval_steps_per_second": 0.417, |
|
"eval_wer": 0.35512736383702204, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.7068273092369477, |
|
"eval_loss": 0.3410908281803131, |
|
"eval_runtime": 204.5054, |
|
"eval_samples_per_second": 26.312, |
|
"eval_steps_per_second": 0.416, |
|
"eval_wer": 0.3836226610193084, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.7570281124497993, |
|
"grad_norm": 0.9907544255256653, |
|
"learning_rate": 0.00021679999999999998, |
|
"loss": 0.3418, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.8072289156626506, |
|
"eval_loss": 0.3116574287414551, |
|
"eval_runtime": 205.0392, |
|
"eval_samples_per_second": 26.244, |
|
"eval_steps_per_second": 0.415, |
|
"eval_wer": 0.33752908190658, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.9076305220883534, |
|
"eval_loss": 0.3196774423122406, |
|
"eval_runtime": 206.2716, |
|
"eval_samples_per_second": 26.087, |
|
"eval_steps_per_second": 0.412, |
|
"eval_wer": 0.34317644017578397, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.0080321285140563, |
|
"grad_norm": 1.0384626388549805, |
|
"learning_rate": 0.0002001333333333333, |
|
"loss": 0.3181, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.0080321285140563, |
|
"eval_loss": 0.30675315856933594, |
|
"eval_runtime": 206.0737, |
|
"eval_samples_per_second": 26.112, |
|
"eval_steps_per_second": 0.412, |
|
"eval_wer": 0.3339696553918352, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.108433734939759, |
|
"eval_loss": 0.31376445293426514, |
|
"eval_runtime": 209.2791, |
|
"eval_samples_per_second": 25.712, |
|
"eval_steps_per_second": 0.406, |
|
"eval_wer": 0.3358388514386844, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.208835341365462, |
|
"eval_loss": 0.31388720870018005, |
|
"eval_runtime": 204.9118, |
|
"eval_samples_per_second": 26.26, |
|
"eval_steps_per_second": 0.415, |
|
"eval_wer": 0.3333731034619897, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.2590361445783134, |
|
"grad_norm": 0.5868389010429382, |
|
"learning_rate": 0.00018346666666666664, |
|
"loss": 0.2423, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.3092369477911645, |
|
"eval_loss": 0.3191888928413391, |
|
"eval_runtime": 204.834, |
|
"eval_samples_per_second": 26.27, |
|
"eval_steps_per_second": 0.415, |
|
"eval_wer": 0.32848137763725666, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.4096385542168672, |
|
"eval_loss": 0.2928995192050934, |
|
"eval_runtime": 204.43, |
|
"eval_samples_per_second": 26.322, |
|
"eval_steps_per_second": 0.416, |
|
"eval_wer": 0.31682872994094136, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.5100401606425704, |
|
"grad_norm": 1.3247759342193604, |
|
"learning_rate": 0.0001668, |
|
"loss": 0.2327, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.5100401606425704, |
|
"eval_loss": 0.29208171367645264, |
|
"eval_runtime": 206.3612, |
|
"eval_samples_per_second": 26.076, |
|
"eval_steps_per_second": 0.412, |
|
"eval_wer": 0.3103064288412973, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.610441767068273, |
|
"eval_loss": 0.2801830470561981, |
|
"eval_runtime": 204.3678, |
|
"eval_samples_per_second": 26.33, |
|
"eval_steps_per_second": 0.416, |
|
"eval_wer": 0.3037443576129969, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.710843373493976, |
|
"eval_loss": 0.2811721861362457, |
|
"eval_runtime": 204.4403, |
|
"eval_samples_per_second": 26.321, |
|
"eval_steps_per_second": 0.416, |
|
"eval_wer": 0.29624768836127185, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.7610441767068274, |
|
"grad_norm": 1.381541132926941, |
|
"learning_rate": 0.00015013333333333331, |
|
"loss": 0.2374, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.8112449799196786, |
|
"eval_loss": 0.28872984647750854, |
|
"eval_runtime": 204.3069, |
|
"eval_samples_per_second": 26.338, |
|
"eval_steps_per_second": 0.416, |
|
"eval_wer": 0.30422159915687325, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.9116465863453813, |
|
"eval_loss": 0.27397701144218445, |
|
"eval_runtime": 204.1464, |
|
"eval_samples_per_second": 26.359, |
|
"eval_steps_per_second": 0.416, |
|
"eval_wer": 0.2927081469108553, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 3.0120481927710845, |
|
"grad_norm": 1.4617916345596313, |
|
"learning_rate": 0.00013346666666666667, |
|
"loss": 0.2136, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.0120481927710845, |
|
"eval_loss": 0.2662462592124939, |
|
"eval_runtime": 203.8941, |
|
"eval_samples_per_second": 26.391, |
|
"eval_steps_per_second": 0.417, |
|
"eval_wer": 0.28296446539004555, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.112449799196787, |
|
"eval_loss": 0.28285130858421326, |
|
"eval_runtime": 206.1704, |
|
"eval_samples_per_second": 26.1, |
|
"eval_steps_per_second": 0.412, |
|
"eval_wer": 0.2890294100101414, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 3.21285140562249, |
|
"eval_loss": 0.2729070484638214, |
|
"eval_runtime": 206.2438, |
|
"eval_samples_per_second": 26.09, |
|
"eval_steps_per_second": 0.412, |
|
"eval_wer": 0.28692159319135396, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 3.2630522088353415, |
|
"grad_norm": 0.8870707750320435, |
|
"learning_rate": 0.00011679999999999998, |
|
"loss": 0.167, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.3132530120481927, |
|
"eval_loss": 0.2776893675327301, |
|
"eval_runtime": 204.2022, |
|
"eval_samples_per_second": 26.351, |
|
"eval_steps_per_second": 0.416, |
|
"eval_wer": 0.28892998468850045, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 3.4136546184738954, |
|
"eval_loss": 0.2711654603481293, |
|
"eval_runtime": 203.7376, |
|
"eval_samples_per_second": 26.411, |
|
"eval_steps_per_second": 0.417, |
|
"eval_wer": 0.28095607389289906, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 3.5140562248995986, |
|
"grad_norm": 1.0165985822677612, |
|
"learning_rate": 0.00010013333333333333, |
|
"loss": 0.1614, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.5140562248995986, |
|
"eval_loss": 0.2688385844230652, |
|
"eval_runtime": 204.1623, |
|
"eval_samples_per_second": 26.356, |
|
"eval_steps_per_second": 0.416, |
|
"eval_wer": 0.27091411640716656, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.6144578313253013, |
|
"eval_loss": 0.2589295208454132, |
|
"eval_runtime": 205.2749, |
|
"eval_samples_per_second": 26.214, |
|
"eval_steps_per_second": 0.414, |
|
"eval_wer": 0.26626101135437175, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 3.714859437751004, |
|
"eval_loss": 0.26514673233032227, |
|
"eval_runtime": 204.2135, |
|
"eval_samples_per_second": 26.35, |
|
"eval_steps_per_second": 0.416, |
|
"eval_wer": 0.2669768736701863, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 3.765060240963855, |
|
"grad_norm": 0.7397546172142029, |
|
"learning_rate": 8.346666666666666e-05, |
|
"loss": 0.1529, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.8152610441767068, |
|
"eval_loss": 0.25074735283851624, |
|
"eval_runtime": 204.2336, |
|
"eval_samples_per_second": 26.347, |
|
"eval_steps_per_second": 0.416, |
|
"eval_wer": 0.2637157231203643, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 3.9156626506024095, |
|
"eval_loss": 0.2493942528963089, |
|
"eval_runtime": 206.072, |
|
"eval_samples_per_second": 26.112, |
|
"eval_steps_per_second": 0.412, |
|
"eval_wer": 0.2567957207341566, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 4.016064257028113, |
|
"grad_norm": 0.785851776599884, |
|
"learning_rate": 6.68e-05, |
|
"loss": 0.1496, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.016064257028113, |
|
"eval_loss": 0.25821030139923096, |
|
"eval_runtime": 204.9558, |
|
"eval_samples_per_second": 26.254, |
|
"eval_steps_per_second": 0.415, |
|
"eval_wer": 0.2580484797868321, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.116465863453815, |
|
"eval_loss": 0.2650238871574402, |
|
"eval_runtime": 204.7852, |
|
"eval_samples_per_second": 26.276, |
|
"eval_steps_per_second": 0.415, |
|
"eval_wer": 0.25753146811429933, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 4.216867469879518, |
|
"eval_loss": 0.26561084389686584, |
|
"eval_runtime": 210.1403, |
|
"eval_samples_per_second": 25.607, |
|
"eval_steps_per_second": 0.404, |
|
"eval_wer": 0.25598043309670104, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 4.267068273092369, |
|
"grad_norm": 0.34119465947151184, |
|
"learning_rate": 5.013333333333332e-05, |
|
"loss": 0.1128, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.317269076305221, |
|
"eval_loss": 0.25430822372436523, |
|
"eval_runtime": 203.852, |
|
"eval_samples_per_second": 26.397, |
|
"eval_steps_per_second": 0.417, |
|
"eval_wer": 0.25118813259360895, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 4.417670682730924, |
|
"eval_loss": 0.2586837112903595, |
|
"eval_runtime": 202.7677, |
|
"eval_samples_per_second": 26.538, |
|
"eval_steps_per_second": 0.419, |
|
"eval_wer": 0.24987571834794886, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 4.518072289156627, |
|
"grad_norm": 0.5558347105979919, |
|
"learning_rate": 3.346666666666666e-05, |
|
"loss": 0.1109, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.518072289156627, |
|
"eval_loss": 0.2540307939052582, |
|
"eval_runtime": 202.8954, |
|
"eval_samples_per_second": 26.521, |
|
"eval_steps_per_second": 0.419, |
|
"eval_wer": 0.24599813080395316, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.618473895582329, |
|
"eval_loss": 0.2546459436416626, |
|
"eval_runtime": 208.4343, |
|
"eval_samples_per_second": 25.816, |
|
"eval_steps_per_second": 0.408, |
|
"eval_wer": 0.24245858935353656, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 4.718875502008032, |
|
"eval_loss": 0.25800344347953796, |
|
"eval_runtime": 205.7304, |
|
"eval_samples_per_second": 26.156, |
|
"eval_steps_per_second": 0.413, |
|
"eval_wer": 0.24198134780966016, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 4.769076305220883, |
|
"grad_norm": 0.9226210117340088, |
|
"learning_rate": 1.68e-05, |
|
"loss": 0.1028, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.8192771084337345, |
|
"eval_loss": 0.25135332345962524, |
|
"eval_runtime": 203.1799, |
|
"eval_samples_per_second": 26.484, |
|
"eval_steps_per_second": 0.418, |
|
"eval_wer": 0.24035077253474915, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 4.919678714859438, |
|
"eval_loss": 0.2509777843952179, |
|
"eval_runtime": 203.2596, |
|
"eval_samples_per_second": 26.474, |
|
"eval_steps_per_second": 0.418, |
|
"eval_wer": 0.24025134721310823, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 5.020080321285141, |
|
"grad_norm": 4.224822521209717, |
|
"learning_rate": 1.3333333333333334e-07, |
|
"loss": 0.1069, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 5.020080321285141, |
|
"eval_loss": 0.2515573501586914, |
|
"eval_runtime": 209.1262, |
|
"eval_samples_per_second": 25.731, |
|
"eval_steps_per_second": 0.406, |
|
"eval_wer": 0.24033088747042097, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 5.020080321285141, |
|
"step": 10000, |
|
"total_flos": 6.356146932571761e+18, |
|
"train_loss": 0.3559208065032959, |
|
"train_runtime": 12994.9936, |
|
"train_samples_per_second": 3.078, |
|
"train_steps_per_second": 0.77 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.356146932571761e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|