|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.8555111364934325, |
|
"eval_steps": 1000, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.014277555682467162, |
|
"grad_norm": 7.571424961090088, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 1.5088, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.028555111364934323, |
|
"grad_norm": 5.992729187011719, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 1.2038, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04283266704740148, |
|
"grad_norm": 5.949503421783447, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.8879, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.05711022272986865, |
|
"grad_norm": 4.452832221984863, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.7647, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0713877784123358, |
|
"grad_norm": 4.690545558929443, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.6792, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.08566533409480297, |
|
"grad_norm": 4.969720840454102, |
|
"learning_rate": 3e-06, |
|
"loss": 0.6549, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09994288977727013, |
|
"grad_norm": 5.184281349182129, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.6376, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1142204454597373, |
|
"grad_norm": 5.00349235534668, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.5982, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12849800114220444, |
|
"grad_norm": 4.239490032196045, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.6084, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.1427755568246716, |
|
"grad_norm": 4.2740068435668945, |
|
"learning_rate": 5e-06, |
|
"loss": 0.58, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.15705311250713877, |
|
"grad_norm": 4.718848705291748, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.5759, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.17133066818960593, |
|
"grad_norm": 4.2935638427734375, |
|
"learning_rate": 6e-06, |
|
"loss": 0.5625, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1856082238720731, |
|
"grad_norm": 4.917020797729492, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 0.5621, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.19988577955454026, |
|
"grad_norm": 3.9521942138671875, |
|
"learning_rate": 7e-06, |
|
"loss": 0.5644, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.21416333523700742, |
|
"grad_norm": 4.506232738494873, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.5508, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.2284408909194746, |
|
"grad_norm": 4.1483540534973145, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.5244, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.24271844660194175, |
|
"grad_norm": 4.077396392822266, |
|
"learning_rate": 8.5e-06, |
|
"loss": 0.5051, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.2569960022844089, |
|
"grad_norm": 4.375626087188721, |
|
"learning_rate": 9e-06, |
|
"loss": 0.5222, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.2712735579668761, |
|
"grad_norm": 3.5698530673980713, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.5038, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.2855511136493432, |
|
"grad_norm": 4.99509859085083, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5196, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2998286693318104, |
|
"grad_norm": 3.666332721710205, |
|
"learning_rate": 9.944444444444445e-06, |
|
"loss": 0.5066, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.31410622501427754, |
|
"grad_norm": 3.9203736782073975, |
|
"learning_rate": 9.88888888888889e-06, |
|
"loss": 0.4822, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.32838378069674473, |
|
"grad_norm": 3.5677530765533447, |
|
"learning_rate": 9.833333333333333e-06, |
|
"loss": 0.519, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.34266133637921187, |
|
"grad_norm": 3.3873414993286133, |
|
"learning_rate": 9.777777777777779e-06, |
|
"loss": 0.5205, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.35693889206167906, |
|
"grad_norm": 3.9527816772460938, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 0.4769, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.3712164477441462, |
|
"grad_norm": 3.3437490463256836, |
|
"learning_rate": 9.666666666666667e-06, |
|
"loss": 0.4629, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.3854940034266134, |
|
"grad_norm": 3.7754790782928467, |
|
"learning_rate": 9.611111111111112e-06, |
|
"loss": 0.4812, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.3997715591090805, |
|
"grad_norm": 3.744267225265503, |
|
"learning_rate": 9.555555555555556e-06, |
|
"loss": 0.467, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4140491147915477, |
|
"grad_norm": 3.5076072216033936, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.4454, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.42832667047401485, |
|
"grad_norm": 3.556335687637329, |
|
"learning_rate": 9.444444444444445e-06, |
|
"loss": 0.4447, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.442604226156482, |
|
"grad_norm": 4.256951332092285, |
|
"learning_rate": 9.38888888888889e-06, |
|
"loss": 0.4809, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.4568817818389492, |
|
"grad_norm": 3.533447742462158, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 0.4425, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.4711593375214163, |
|
"grad_norm": 4.324098587036133, |
|
"learning_rate": 9.277777777777778e-06, |
|
"loss": 0.424, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.4854368932038835, |
|
"grad_norm": 2.913189649581909, |
|
"learning_rate": 9.222222222222224e-06, |
|
"loss": 0.4314, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.49971444888635064, |
|
"grad_norm": 3.432490825653076, |
|
"learning_rate": 9.166666666666666e-06, |
|
"loss": 0.4355, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.5139920045688178, |
|
"grad_norm": 3.645869255065918, |
|
"learning_rate": 9.111111111111112e-06, |
|
"loss": 0.4395, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.528269560251285, |
|
"grad_norm": 3.2094240188598633, |
|
"learning_rate": 9.055555555555556e-06, |
|
"loss": 0.4144, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.5425471159337522, |
|
"grad_norm": 3.4623546600341797, |
|
"learning_rate": 9e-06, |
|
"loss": 0.4277, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5568246716162193, |
|
"grad_norm": 3.640333414077759, |
|
"learning_rate": 8.944444444444446e-06, |
|
"loss": 0.4246, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.5711022272986864, |
|
"grad_norm": 3.0283167362213135, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.4047, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5711022272986864, |
|
"eval_loss": 0.4848648011684418, |
|
"eval_runtime": 1825.4203, |
|
"eval_samples_per_second": 2.137, |
|
"eval_steps_per_second": 0.134, |
|
"eval_wer": 0.35052641746353713, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5853797829811537, |
|
"grad_norm": 3.7762739658355713, |
|
"learning_rate": 8.833333333333334e-06, |
|
"loss": 0.4218, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.5996573386636208, |
|
"grad_norm": 3.495347023010254, |
|
"learning_rate": 8.777777777777778e-06, |
|
"loss": 0.3968, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.613934894346088, |
|
"grad_norm": 3.5088939666748047, |
|
"learning_rate": 8.722222222222224e-06, |
|
"loss": 0.4108, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.6282124500285551, |
|
"grad_norm": 3.555328845977783, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 0.4063, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.6424900057110223, |
|
"grad_norm": 2.9576587677001953, |
|
"learning_rate": 8.611111111111112e-06, |
|
"loss": 0.4116, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.6567675613934895, |
|
"grad_norm": 3.280855178833008, |
|
"learning_rate": 8.555555555555556e-06, |
|
"loss": 0.4083, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6710451170759566, |
|
"grad_norm": 3.903722047805786, |
|
"learning_rate": 8.5e-06, |
|
"loss": 0.411, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.6853226727584237, |
|
"grad_norm": 3.519038438796997, |
|
"learning_rate": 8.444444444444446e-06, |
|
"loss": 0.3964, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6996002284408909, |
|
"grad_norm": 3.3553972244262695, |
|
"learning_rate": 8.38888888888889e-06, |
|
"loss": 0.4049, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.7138777841233581, |
|
"grad_norm": 3.3820197582244873, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.4159, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.7281553398058253, |
|
"grad_norm": 2.782127857208252, |
|
"learning_rate": 8.277777777777778e-06, |
|
"loss": 0.3859, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.7424328954882924, |
|
"grad_norm": 3.5839345455169678, |
|
"learning_rate": 8.222222222222222e-06, |
|
"loss": 0.392, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.7567104511707595, |
|
"grad_norm": 3.0308761596679688, |
|
"learning_rate": 8.166666666666668e-06, |
|
"loss": 0.3899, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.7709880068532268, |
|
"grad_norm": 3.136904001235962, |
|
"learning_rate": 8.111111111111112e-06, |
|
"loss": 0.3907, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.7852655625356939, |
|
"grad_norm": 3.3192756175994873, |
|
"learning_rate": 8.055555555555557e-06, |
|
"loss": 0.3941, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.799543118218161, |
|
"grad_norm": 4.766107082366943, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.3887, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.8138206739006282, |
|
"grad_norm": 4.241744041442871, |
|
"learning_rate": 7.944444444444445e-06, |
|
"loss": 0.4033, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.8280982295830954, |
|
"grad_norm": 3.1559460163116455, |
|
"learning_rate": 7.88888888888889e-06, |
|
"loss": 0.3567, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.8423757852655626, |
|
"grad_norm": 3.142645835876465, |
|
"learning_rate": 7.833333333333333e-06, |
|
"loss": 0.3731, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.8566533409480297, |
|
"grad_norm": 3.1183199882507324, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 0.3668, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8709308966304968, |
|
"grad_norm": 2.7859325408935547, |
|
"learning_rate": 7.722222222222223e-06, |
|
"loss": 0.3965, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.885208452312964, |
|
"grad_norm": 3.191088914871216, |
|
"learning_rate": 7.666666666666667e-06, |
|
"loss": 0.3574, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.8994860079954312, |
|
"grad_norm": 3.0640053749084473, |
|
"learning_rate": 7.611111111111111e-06, |
|
"loss": 0.3811, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.9137635636778983, |
|
"grad_norm": 3.0769450664520264, |
|
"learning_rate": 7.555555555555556e-06, |
|
"loss": 0.3788, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.9280411193603655, |
|
"grad_norm": 3.1407933235168457, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.3698, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.9423186750428326, |
|
"grad_norm": 3.410187244415283, |
|
"learning_rate": 7.444444444444445e-06, |
|
"loss": 0.3907, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.9565962307252999, |
|
"grad_norm": 3.3382880687713623, |
|
"learning_rate": 7.38888888888889e-06, |
|
"loss": 0.3368, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.970873786407767, |
|
"grad_norm": 3.194368600845337, |
|
"learning_rate": 7.333333333333333e-06, |
|
"loss": 0.369, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.9851513420902341, |
|
"grad_norm": 3.089852809906006, |
|
"learning_rate": 7.277777777777778e-06, |
|
"loss": 0.3765, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.9994288977727013, |
|
"grad_norm": 3.0002810955047607, |
|
"learning_rate": 7.222222222222223e-06, |
|
"loss": 0.3705, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.0137064534551685, |
|
"grad_norm": 2.3977696895599365, |
|
"learning_rate": 7.166666666666667e-06, |
|
"loss": 0.2584, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 1.0279840091376355, |
|
"grad_norm": 2.3220465183258057, |
|
"learning_rate": 7.111111111111112e-06, |
|
"loss": 0.2538, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.0422615648201028, |
|
"grad_norm": 2.819687843322754, |
|
"learning_rate": 7.055555555555557e-06, |
|
"loss": 0.2571, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.05653912050257, |
|
"grad_norm": 2.514644145965576, |
|
"learning_rate": 7e-06, |
|
"loss": 0.2806, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.070816676185037, |
|
"grad_norm": 2.1887128353118896, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.2626, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.0850942318675043, |
|
"grad_norm": 2.592247486114502, |
|
"learning_rate": 6.88888888888889e-06, |
|
"loss": 0.2509, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.0993717875499716, |
|
"grad_norm": 2.371534824371338, |
|
"learning_rate": 6.833333333333334e-06, |
|
"loss": 0.2605, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.1136493432324386, |
|
"grad_norm": 3.1825778484344482, |
|
"learning_rate": 6.777777777777779e-06, |
|
"loss": 0.2495, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.1279268989149058, |
|
"grad_norm": 2.901749849319458, |
|
"learning_rate": 6.7222222222222235e-06, |
|
"loss": 0.261, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.1422044545973729, |
|
"grad_norm": 2.658766984939575, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.2476, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.1422044545973729, |
|
"eval_loss": 0.41870468854904175, |
|
"eval_runtime": 1722.2575, |
|
"eval_samples_per_second": 2.265, |
|
"eval_steps_per_second": 0.142, |
|
"eval_wer": 0.3136771950159374, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.15648201027984, |
|
"grad_norm": 2.711312770843506, |
|
"learning_rate": 6.6111111111111115e-06, |
|
"loss": 0.2414, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.1707595659623073, |
|
"grad_norm": 2.9044759273529053, |
|
"learning_rate": 6.555555555555556e-06, |
|
"loss": 0.2502, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.1850371216447744, |
|
"grad_norm": 2.549725294113159, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 0.2511, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.1993146773272416, |
|
"grad_norm": 2.95792555809021, |
|
"learning_rate": 6.444444444444445e-06, |
|
"loss": 0.2427, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.2135922330097086, |
|
"grad_norm": 2.686870574951172, |
|
"learning_rate": 6.3888888888888885e-06, |
|
"loss": 0.2637, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.227869788692176, |
|
"grad_norm": 3.7834455966949463, |
|
"learning_rate": 6.333333333333333e-06, |
|
"loss": 0.2554, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.2421473443746431, |
|
"grad_norm": 3.0891430377960205, |
|
"learning_rate": 6.277777777777778e-06, |
|
"loss": 0.2467, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.2564249000571102, |
|
"grad_norm": 2.771472930908203, |
|
"learning_rate": 6.222222222222223e-06, |
|
"loss": 0.2467, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.2707024557395774, |
|
"grad_norm": 2.6807925701141357, |
|
"learning_rate": 6.166666666666667e-06, |
|
"loss": 0.2682, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.2849800114220447, |
|
"grad_norm": 2.2320196628570557, |
|
"learning_rate": 6.111111111111112e-06, |
|
"loss": 0.2408, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.2992575671045117, |
|
"grad_norm": 3.066009759902954, |
|
"learning_rate": 6.055555555555555e-06, |
|
"loss": 0.2363, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.313535122786979, |
|
"grad_norm": 2.6043167114257812, |
|
"learning_rate": 6e-06, |
|
"loss": 0.2483, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.327812678469446, |
|
"grad_norm": 2.6250624656677246, |
|
"learning_rate": 5.944444444444445e-06, |
|
"loss": 0.2563, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.3420902341519132, |
|
"grad_norm": 2.508998394012451, |
|
"learning_rate": 5.88888888888889e-06, |
|
"loss": 0.2581, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.3563677898343802, |
|
"grad_norm": 2.872715473175049, |
|
"learning_rate": 5.833333333333334e-06, |
|
"loss": 0.2371, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.3706453455168475, |
|
"grad_norm": 3.1910557746887207, |
|
"learning_rate": 5.777777777777778e-06, |
|
"loss": 0.2515, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.3849229011993147, |
|
"grad_norm": 2.7466485500335693, |
|
"learning_rate": 5.722222222222222e-06, |
|
"loss": 0.2578, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.3992004568817817, |
|
"grad_norm": 2.388066530227661, |
|
"learning_rate": 5.666666666666667e-06, |
|
"loss": 0.2541, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.413478012564249, |
|
"grad_norm": 2.688497304916382, |
|
"learning_rate": 5.611111111111112e-06, |
|
"loss": 0.2514, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.4277555682467162, |
|
"grad_norm": 2.710899591445923, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.2765, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.4420331239291833, |
|
"grad_norm": 2.296635389328003, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.2487, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.4563106796116505, |
|
"grad_norm": 2.7988133430480957, |
|
"learning_rate": 5.444444444444445e-06, |
|
"loss": 0.2499, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.4705882352941178, |
|
"grad_norm": 3.1988582611083984, |
|
"learning_rate": 5.388888888888889e-06, |
|
"loss": 0.2456, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.4848657909765848, |
|
"grad_norm": 2.657517910003662, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 0.2613, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.499143346659052, |
|
"grad_norm": 2.5517725944519043, |
|
"learning_rate": 5.2777777777777785e-06, |
|
"loss": 0.2528, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.5134209023415193, |
|
"grad_norm": 2.7166850566864014, |
|
"learning_rate": 5.2222222222222226e-06, |
|
"loss": 0.2476, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.5276984580239863, |
|
"grad_norm": 2.7338292598724365, |
|
"learning_rate": 5.1666666666666675e-06, |
|
"loss": 0.2489, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.5419760137064533, |
|
"grad_norm": 2.1498470306396484, |
|
"learning_rate": 5.1111111111111115e-06, |
|
"loss": 0.2388, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.5562535693889206, |
|
"grad_norm": 2.595247745513916, |
|
"learning_rate": 5.0555555555555555e-06, |
|
"loss": 0.2566, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.5705311250713878, |
|
"grad_norm": 2.652132987976074, |
|
"learning_rate": 5e-06, |
|
"loss": 0.239, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.5848086807538548, |
|
"grad_norm": 2.436605930328369, |
|
"learning_rate": 4.944444444444445e-06, |
|
"loss": 0.2419, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.599086236436322, |
|
"grad_norm": 2.618035316467285, |
|
"learning_rate": 4.888888888888889e-06, |
|
"loss": 0.2295, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.6133637921187893, |
|
"grad_norm": 2.2901298999786377, |
|
"learning_rate": 4.833333333333333e-06, |
|
"loss": 0.2446, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.6276413478012564, |
|
"grad_norm": 2.899315595626831, |
|
"learning_rate": 4.777777777777778e-06, |
|
"loss": 0.2628, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.6419189034837236, |
|
"grad_norm": 2.616224527359009, |
|
"learning_rate": 4.722222222222222e-06, |
|
"loss": 0.2273, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.6561964591661908, |
|
"grad_norm": 2.43113112449646, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 0.2362, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.6704740148486579, |
|
"grad_norm": 2.5203065872192383, |
|
"learning_rate": 4.611111111111112e-06, |
|
"loss": 0.2428, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.6847515705311251, |
|
"grad_norm": 2.3064985275268555, |
|
"learning_rate": 4.555555555555556e-06, |
|
"loss": 0.2441, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.6990291262135924, |
|
"grad_norm": 2.201695680618286, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.2324, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.7133066818960594, |
|
"grad_norm": 2.442471981048584, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.2527, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.7133066818960594, |
|
"eval_loss": 0.3882293701171875, |
|
"eval_runtime": 1749.1422, |
|
"eval_samples_per_second": 2.23, |
|
"eval_steps_per_second": 0.139, |
|
"eval_wer": 0.2901091471071187, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.7275842375785264, |
|
"grad_norm": 2.77786922454834, |
|
"learning_rate": 4.388888888888889e-06, |
|
"loss": 0.2492, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.7418617932609937, |
|
"grad_norm": 2.5009052753448486, |
|
"learning_rate": 4.333333333333334e-06, |
|
"loss": 0.2341, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.756139348943461, |
|
"grad_norm": 2.780186176300049, |
|
"learning_rate": 4.277777777777778e-06, |
|
"loss": 0.2407, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.770416904625928, |
|
"grad_norm": 1.9574618339538574, |
|
"learning_rate": 4.222222222222223e-06, |
|
"loss": 0.2437, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.7846944603083952, |
|
"grad_norm": 2.151125907897949, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.2341, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.7989720159908624, |
|
"grad_norm": 2.170015811920166, |
|
"learning_rate": 4.111111111111111e-06, |
|
"loss": 0.2373, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.8132495716733295, |
|
"grad_norm": 3.0467231273651123, |
|
"learning_rate": 4.055555555555556e-06, |
|
"loss": 0.2317, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.8275271273557967, |
|
"grad_norm": 3.0150015354156494, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.228, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.841804683038264, |
|
"grad_norm": 3.275949001312256, |
|
"learning_rate": 3.944444444444445e-06, |
|
"loss": 0.2438, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.856082238720731, |
|
"grad_norm": 3.0381839275360107, |
|
"learning_rate": 3.88888888888889e-06, |
|
"loss": 0.2478, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.8703597944031982, |
|
"grad_norm": 2.770716428756714, |
|
"learning_rate": 3.833333333333334e-06, |
|
"loss": 0.2312, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.8846373500856655, |
|
"grad_norm": 2.6976678371429443, |
|
"learning_rate": 3.777777777777778e-06, |
|
"loss": 0.2284, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.8989149057681325, |
|
"grad_norm": 2.8799102306365967, |
|
"learning_rate": 3.7222222222222225e-06, |
|
"loss": 0.2484, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.9131924614505995, |
|
"grad_norm": 2.574629545211792, |
|
"learning_rate": 3.6666666666666666e-06, |
|
"loss": 0.2295, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.927470017133067, |
|
"grad_norm": 2.4746835231781006, |
|
"learning_rate": 3.6111111111111115e-06, |
|
"loss": 0.2335, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.941747572815534, |
|
"grad_norm": 3.084383964538574, |
|
"learning_rate": 3.555555555555556e-06, |
|
"loss": 0.212, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.956025128498001, |
|
"grad_norm": 2.4441068172454834, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.221, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.9703026841804683, |
|
"grad_norm": 3.031568765640259, |
|
"learning_rate": 3.444444444444445e-06, |
|
"loss": 0.2341, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.9845802398629355, |
|
"grad_norm": 2.3584327697753906, |
|
"learning_rate": 3.3888888888888893e-06, |
|
"loss": 0.2431, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.9988577955454025, |
|
"grad_norm": 2.1590421199798584, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.2357, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.0131353512278696, |
|
"grad_norm": 2.2845587730407715, |
|
"learning_rate": 3.277777777777778e-06, |
|
"loss": 0.1576, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 2.027412906910337, |
|
"grad_norm": 2.033133029937744, |
|
"learning_rate": 3.2222222222222227e-06, |
|
"loss": 0.1422, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.041690462592804, |
|
"grad_norm": 2.2549259662628174, |
|
"learning_rate": 3.1666666666666667e-06, |
|
"loss": 0.1473, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 2.055968018275271, |
|
"grad_norm": 1.5837754011154175, |
|
"learning_rate": 3.1111111111111116e-06, |
|
"loss": 0.143, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.0702455739577386, |
|
"grad_norm": 1.9988360404968262, |
|
"learning_rate": 3.055555555555556e-06, |
|
"loss": 0.1416, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 2.0845231296402056, |
|
"grad_norm": 2.148613929748535, |
|
"learning_rate": 3e-06, |
|
"loss": 0.1338, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.0988006853226726, |
|
"grad_norm": 1.8176393508911133, |
|
"learning_rate": 2.944444444444445e-06, |
|
"loss": 0.1514, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 2.11307824100514, |
|
"grad_norm": 2.60271954536438, |
|
"learning_rate": 2.888888888888889e-06, |
|
"loss": 0.1533, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.127355796687607, |
|
"grad_norm": 2.120281457901001, |
|
"learning_rate": 2.8333333333333335e-06, |
|
"loss": 0.1404, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 2.141633352370074, |
|
"grad_norm": 2.3522286415100098, |
|
"learning_rate": 2.7777777777777783e-06, |
|
"loss": 0.1511, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.1559109080525416, |
|
"grad_norm": 1.8738924264907837, |
|
"learning_rate": 2.7222222222222224e-06, |
|
"loss": 0.1417, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 2.1701884637350086, |
|
"grad_norm": 2.255291223526001, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 0.1437, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.1844660194174756, |
|
"grad_norm": 1.7046154737472534, |
|
"learning_rate": 2.6111111111111113e-06, |
|
"loss": 0.1446, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 2.198743575099943, |
|
"grad_norm": 2.0543861389160156, |
|
"learning_rate": 2.5555555555555557e-06, |
|
"loss": 0.1504, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.21302113078241, |
|
"grad_norm": 2.139716863632202, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1345, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 2.227298686464877, |
|
"grad_norm": 1.7999951839447021, |
|
"learning_rate": 2.4444444444444447e-06, |
|
"loss": 0.1389, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.241576242147344, |
|
"grad_norm": 1.7282090187072754, |
|
"learning_rate": 2.388888888888889e-06, |
|
"loss": 0.1324, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 2.2558537978298117, |
|
"grad_norm": 2.6271605491638184, |
|
"learning_rate": 2.3333333333333336e-06, |
|
"loss": 0.1551, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.2701313535122787, |
|
"grad_norm": 2.170382022857666, |
|
"learning_rate": 2.277777777777778e-06, |
|
"loss": 0.144, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 2.2844089091947457, |
|
"grad_norm": 1.796635627746582, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.1568, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.2844089091947457, |
|
"eval_loss": 0.3901652991771698, |
|
"eval_runtime": 1765.3609, |
|
"eval_samples_per_second": 2.21, |
|
"eval_steps_per_second": 0.138, |
|
"eval_wer": 0.28160919540229884, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.298686464877213, |
|
"grad_norm": 2.0357980728149414, |
|
"learning_rate": 2.166666666666667e-06, |
|
"loss": 0.161, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 2.31296402055968, |
|
"grad_norm": 2.027215003967285, |
|
"learning_rate": 2.1111111111111114e-06, |
|
"loss": 0.1353, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.3272415762421472, |
|
"grad_norm": 2.8169405460357666, |
|
"learning_rate": 2.0555555555555555e-06, |
|
"loss": 0.1449, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 2.3415191319246147, |
|
"grad_norm": 1.9528751373291016, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.1376, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.3557966876070817, |
|
"grad_norm": 2.5781335830688477, |
|
"learning_rate": 1.944444444444445e-06, |
|
"loss": 0.1383, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 2.3700742432895487, |
|
"grad_norm": 2.083077907562256, |
|
"learning_rate": 1.888888888888889e-06, |
|
"loss": 0.1362, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.384351798972016, |
|
"grad_norm": 2.431272029876709, |
|
"learning_rate": 1.8333333333333333e-06, |
|
"loss": 0.1329, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 2.3986293546544832, |
|
"grad_norm": 2.157139539718628, |
|
"learning_rate": 1.777777777777778e-06, |
|
"loss": 0.1377, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.4129069103369503, |
|
"grad_norm": 2.5328071117401123, |
|
"learning_rate": 1.7222222222222224e-06, |
|
"loss": 0.1361, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 2.4271844660194173, |
|
"grad_norm": 2.433239459991455, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.157, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.4414620217018848, |
|
"grad_norm": 2.5167510509490967, |
|
"learning_rate": 1.6111111111111113e-06, |
|
"loss": 0.132, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 2.455739577384352, |
|
"grad_norm": 1.9507442712783813, |
|
"learning_rate": 1.5555555555555558e-06, |
|
"loss": 0.1625, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.470017133066819, |
|
"grad_norm": 2.2467007637023926, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.1333, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 2.4842946887492863, |
|
"grad_norm": 2.4816768169403076, |
|
"learning_rate": 1.4444444444444445e-06, |
|
"loss": 0.1499, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.4985722444317533, |
|
"grad_norm": 2.0616416931152344, |
|
"learning_rate": 1.3888888888888892e-06, |
|
"loss": 0.1508, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 2.5128498001142203, |
|
"grad_norm": 2.089355230331421, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 0.1344, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.5271273557966873, |
|
"grad_norm": 2.2235498428344727, |
|
"learning_rate": 1.28e-06, |
|
"loss": 0.1717, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 2.541404911479155, |
|
"grad_norm": 1.9268138408660889, |
|
"learning_rate": 1.2244444444444445e-06, |
|
"loss": 0.143, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.555682467161622, |
|
"grad_norm": 1.8911551237106323, |
|
"learning_rate": 1.168888888888889e-06, |
|
"loss": 0.1439, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 2.5699600228440893, |
|
"grad_norm": 2.5078868865966797, |
|
"learning_rate": 1.1133333333333334e-06, |
|
"loss": 0.1341, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.5842375785265563, |
|
"grad_norm": 2.1232492923736572, |
|
"learning_rate": 1.0577777777777779e-06, |
|
"loss": 0.1415, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 2.5985151342090234, |
|
"grad_norm": 1.9214311838150024, |
|
"learning_rate": 1.0022222222222223e-06, |
|
"loss": 0.1301, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.6127926898914904, |
|
"grad_norm": 2.4226858615875244, |
|
"learning_rate": 9.466666666666667e-07, |
|
"loss": 0.1438, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 2.627070245573958, |
|
"grad_norm": 2.324777126312256, |
|
"learning_rate": 8.911111111111112e-07, |
|
"loss": 0.1306, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.641347801256425, |
|
"grad_norm": 2.427114486694336, |
|
"learning_rate": 8.355555555555556e-07, |
|
"loss": 0.1359, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 2.655625356938892, |
|
"grad_norm": 1.989882469177246, |
|
"learning_rate": 7.8e-07, |
|
"loss": 0.1386, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.6699029126213594, |
|
"grad_norm": 2.6079118251800537, |
|
"learning_rate": 7.244444444444446e-07, |
|
"loss": 0.135, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 2.6841804683038264, |
|
"grad_norm": 2.3429243564605713, |
|
"learning_rate": 6.68888888888889e-07, |
|
"loss": 0.1356, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.6984580239862934, |
|
"grad_norm": 2.3358540534973145, |
|
"learning_rate": 6.133333333333333e-07, |
|
"loss": 0.1304, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 2.7127355796687604, |
|
"grad_norm": 1.917809247970581, |
|
"learning_rate": 5.577777777777779e-07, |
|
"loss": 0.1395, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.727013135351228, |
|
"grad_norm": 2.0677952766418457, |
|
"learning_rate": 5.022222222222222e-07, |
|
"loss": 0.1309, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 2.741290691033695, |
|
"grad_norm": 2.135127305984497, |
|
"learning_rate": 4.466666666666667e-07, |
|
"loss": 0.1424, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.7555682467161624, |
|
"grad_norm": 2.3306682109832764, |
|
"learning_rate": 3.9111111111111115e-07, |
|
"loss": 0.1318, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 2.7698458023986294, |
|
"grad_norm": 2.0700454711914062, |
|
"learning_rate": 3.3555555555555556e-07, |
|
"loss": 0.1566, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.7841233580810965, |
|
"grad_norm": 1.8561683893203735, |
|
"learning_rate": 2.8e-07, |
|
"loss": 0.1453, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 2.7984009137635635, |
|
"grad_norm": 2.2682347297668457, |
|
"learning_rate": 2.2444444444444445e-07, |
|
"loss": 0.1415, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.812678469446031, |
|
"grad_norm": 2.2898778915405273, |
|
"learning_rate": 1.6888888888888888e-07, |
|
"loss": 0.1427, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 2.826956025128498, |
|
"grad_norm": 2.328401803970337, |
|
"learning_rate": 1.1333333333333336e-07, |
|
"loss": 0.1357, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.841233580810965, |
|
"grad_norm": 2.2169013023376465, |
|
"learning_rate": 5.777777777777778e-08, |
|
"loss": 0.1343, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 2.8555111364934325, |
|
"grad_norm": 2.42340350151062, |
|
"learning_rate": 2.2222222222222225e-09, |
|
"loss": 0.1313, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.8555111364934325, |
|
"eval_loss": 0.38383349776268005, |
|
"eval_runtime": 1820.062, |
|
"eval_samples_per_second": 2.143, |
|
"eval_steps_per_second": 0.134, |
|
"eval_wer": 0.27318168646769053, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.8555111364934325, |
|
"step": 5000, |
|
"total_flos": 5.435589590699213e+20, |
|
"train_loss": 0.3002769865989685, |
|
"train_runtime": 59305.2217, |
|
"train_samples_per_second": 2.698, |
|
"train_steps_per_second": 0.084 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.435589590699213e+20, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|