Automatic Speech Recognition
TensorBoard
Safetensors
Welsh
whisper
Generated from Trainer
verbatim
whisper-large-v3-ft-btb-cv-cy / trainer_state.json
DewiBrynJones's picture
End of training
7b51bc9 verified
raw
history blame
36.6 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.8555111364934325,
"eval_steps": 1000,
"global_step": 5000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.014277555682467162,
"grad_norm": 7.571424961090088,
"learning_rate": 5.000000000000001e-07,
"loss": 1.5088,
"step": 25
},
{
"epoch": 0.028555111364934323,
"grad_norm": 5.992729187011719,
"learning_rate": 1.0000000000000002e-06,
"loss": 1.2038,
"step": 50
},
{
"epoch": 0.04283266704740148,
"grad_norm": 5.949503421783447,
"learning_rate": 1.5e-06,
"loss": 0.8879,
"step": 75
},
{
"epoch": 0.05711022272986865,
"grad_norm": 4.452832221984863,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.7647,
"step": 100
},
{
"epoch": 0.0713877784123358,
"grad_norm": 4.690545558929443,
"learning_rate": 2.5e-06,
"loss": 0.6792,
"step": 125
},
{
"epoch": 0.08566533409480297,
"grad_norm": 4.969720840454102,
"learning_rate": 3e-06,
"loss": 0.6549,
"step": 150
},
{
"epoch": 0.09994288977727013,
"grad_norm": 5.184281349182129,
"learning_rate": 3.5e-06,
"loss": 0.6376,
"step": 175
},
{
"epoch": 0.1142204454597373,
"grad_norm": 5.00349235534668,
"learning_rate": 4.000000000000001e-06,
"loss": 0.5982,
"step": 200
},
{
"epoch": 0.12849800114220444,
"grad_norm": 4.239490032196045,
"learning_rate": 4.5e-06,
"loss": 0.6084,
"step": 225
},
{
"epoch": 0.1427755568246716,
"grad_norm": 4.2740068435668945,
"learning_rate": 5e-06,
"loss": 0.58,
"step": 250
},
{
"epoch": 0.15705311250713877,
"grad_norm": 4.718848705291748,
"learning_rate": 5.500000000000001e-06,
"loss": 0.5759,
"step": 275
},
{
"epoch": 0.17133066818960593,
"grad_norm": 4.2935638427734375,
"learning_rate": 6e-06,
"loss": 0.5625,
"step": 300
},
{
"epoch": 0.1856082238720731,
"grad_norm": 4.917020797729492,
"learning_rate": 6.5000000000000004e-06,
"loss": 0.5621,
"step": 325
},
{
"epoch": 0.19988577955454026,
"grad_norm": 3.9521942138671875,
"learning_rate": 7e-06,
"loss": 0.5644,
"step": 350
},
{
"epoch": 0.21416333523700742,
"grad_norm": 4.506232738494873,
"learning_rate": 7.500000000000001e-06,
"loss": 0.5508,
"step": 375
},
{
"epoch": 0.2284408909194746,
"grad_norm": 4.1483540534973145,
"learning_rate": 8.000000000000001e-06,
"loss": 0.5244,
"step": 400
},
{
"epoch": 0.24271844660194175,
"grad_norm": 4.077396392822266,
"learning_rate": 8.5e-06,
"loss": 0.5051,
"step": 425
},
{
"epoch": 0.2569960022844089,
"grad_norm": 4.375626087188721,
"learning_rate": 9e-06,
"loss": 0.5222,
"step": 450
},
{
"epoch": 0.2712735579668761,
"grad_norm": 3.5698530673980713,
"learning_rate": 9.5e-06,
"loss": 0.5038,
"step": 475
},
{
"epoch": 0.2855511136493432,
"grad_norm": 4.99509859085083,
"learning_rate": 1e-05,
"loss": 0.5196,
"step": 500
},
{
"epoch": 0.2998286693318104,
"grad_norm": 3.666332721710205,
"learning_rate": 9.944444444444445e-06,
"loss": 0.5066,
"step": 525
},
{
"epoch": 0.31410622501427754,
"grad_norm": 3.9203736782073975,
"learning_rate": 9.88888888888889e-06,
"loss": 0.4822,
"step": 550
},
{
"epoch": 0.32838378069674473,
"grad_norm": 3.5677530765533447,
"learning_rate": 9.833333333333333e-06,
"loss": 0.519,
"step": 575
},
{
"epoch": 0.34266133637921187,
"grad_norm": 3.3873414993286133,
"learning_rate": 9.777777777777779e-06,
"loss": 0.5205,
"step": 600
},
{
"epoch": 0.35693889206167906,
"grad_norm": 3.9527816772460938,
"learning_rate": 9.722222222222223e-06,
"loss": 0.4769,
"step": 625
},
{
"epoch": 0.3712164477441462,
"grad_norm": 3.3437490463256836,
"learning_rate": 9.666666666666667e-06,
"loss": 0.4629,
"step": 650
},
{
"epoch": 0.3854940034266134,
"grad_norm": 3.7754790782928467,
"learning_rate": 9.611111111111112e-06,
"loss": 0.4812,
"step": 675
},
{
"epoch": 0.3997715591090805,
"grad_norm": 3.744267225265503,
"learning_rate": 9.555555555555556e-06,
"loss": 0.467,
"step": 700
},
{
"epoch": 0.4140491147915477,
"grad_norm": 3.5076072216033936,
"learning_rate": 9.5e-06,
"loss": 0.4454,
"step": 725
},
{
"epoch": 0.42832667047401485,
"grad_norm": 3.556335687637329,
"learning_rate": 9.444444444444445e-06,
"loss": 0.4447,
"step": 750
},
{
"epoch": 0.442604226156482,
"grad_norm": 4.256951332092285,
"learning_rate": 9.38888888888889e-06,
"loss": 0.4809,
"step": 775
},
{
"epoch": 0.4568817818389492,
"grad_norm": 3.533447742462158,
"learning_rate": 9.333333333333334e-06,
"loss": 0.4425,
"step": 800
},
{
"epoch": 0.4711593375214163,
"grad_norm": 4.324098587036133,
"learning_rate": 9.277777777777778e-06,
"loss": 0.424,
"step": 825
},
{
"epoch": 0.4854368932038835,
"grad_norm": 2.913189649581909,
"learning_rate": 9.222222222222224e-06,
"loss": 0.4314,
"step": 850
},
{
"epoch": 0.49971444888635064,
"grad_norm": 3.432490825653076,
"learning_rate": 9.166666666666666e-06,
"loss": 0.4355,
"step": 875
},
{
"epoch": 0.5139920045688178,
"grad_norm": 3.645869255065918,
"learning_rate": 9.111111111111112e-06,
"loss": 0.4395,
"step": 900
},
{
"epoch": 0.528269560251285,
"grad_norm": 3.2094240188598633,
"learning_rate": 9.055555555555556e-06,
"loss": 0.4144,
"step": 925
},
{
"epoch": 0.5425471159337522,
"grad_norm": 3.4623546600341797,
"learning_rate": 9e-06,
"loss": 0.4277,
"step": 950
},
{
"epoch": 0.5568246716162193,
"grad_norm": 3.640333414077759,
"learning_rate": 8.944444444444446e-06,
"loss": 0.4246,
"step": 975
},
{
"epoch": 0.5711022272986864,
"grad_norm": 3.0283167362213135,
"learning_rate": 8.888888888888888e-06,
"loss": 0.4047,
"step": 1000
},
{
"epoch": 0.5711022272986864,
"eval_loss": 0.4848648011684418,
"eval_runtime": 1825.4203,
"eval_samples_per_second": 2.137,
"eval_steps_per_second": 0.134,
"eval_wer": 0.35052641746353713,
"step": 1000
},
{
"epoch": 0.5853797829811537,
"grad_norm": 3.7762739658355713,
"learning_rate": 8.833333333333334e-06,
"loss": 0.4218,
"step": 1025
},
{
"epoch": 0.5996573386636208,
"grad_norm": 3.495347023010254,
"learning_rate": 8.777777777777778e-06,
"loss": 0.3968,
"step": 1050
},
{
"epoch": 0.613934894346088,
"grad_norm": 3.5088939666748047,
"learning_rate": 8.722222222222224e-06,
"loss": 0.4108,
"step": 1075
},
{
"epoch": 0.6282124500285551,
"grad_norm": 3.555328845977783,
"learning_rate": 8.666666666666668e-06,
"loss": 0.4063,
"step": 1100
},
{
"epoch": 0.6424900057110223,
"grad_norm": 2.9576587677001953,
"learning_rate": 8.611111111111112e-06,
"loss": 0.4116,
"step": 1125
},
{
"epoch": 0.6567675613934895,
"grad_norm": 3.280855178833008,
"learning_rate": 8.555555555555556e-06,
"loss": 0.4083,
"step": 1150
},
{
"epoch": 0.6710451170759566,
"grad_norm": 3.903722047805786,
"learning_rate": 8.5e-06,
"loss": 0.411,
"step": 1175
},
{
"epoch": 0.6853226727584237,
"grad_norm": 3.519038438796997,
"learning_rate": 8.444444444444446e-06,
"loss": 0.3964,
"step": 1200
},
{
"epoch": 0.6996002284408909,
"grad_norm": 3.3553972244262695,
"learning_rate": 8.38888888888889e-06,
"loss": 0.4049,
"step": 1225
},
{
"epoch": 0.7138777841233581,
"grad_norm": 3.3820197582244873,
"learning_rate": 8.333333333333334e-06,
"loss": 0.4159,
"step": 1250
},
{
"epoch": 0.7281553398058253,
"grad_norm": 2.782127857208252,
"learning_rate": 8.277777777777778e-06,
"loss": 0.3859,
"step": 1275
},
{
"epoch": 0.7424328954882924,
"grad_norm": 3.5839345455169678,
"learning_rate": 8.222222222222222e-06,
"loss": 0.392,
"step": 1300
},
{
"epoch": 0.7567104511707595,
"grad_norm": 3.0308761596679688,
"learning_rate": 8.166666666666668e-06,
"loss": 0.3899,
"step": 1325
},
{
"epoch": 0.7709880068532268,
"grad_norm": 3.136904001235962,
"learning_rate": 8.111111111111112e-06,
"loss": 0.3907,
"step": 1350
},
{
"epoch": 0.7852655625356939,
"grad_norm": 3.3192756175994873,
"learning_rate": 8.055555555555557e-06,
"loss": 0.3941,
"step": 1375
},
{
"epoch": 0.799543118218161,
"grad_norm": 4.766107082366943,
"learning_rate": 8.000000000000001e-06,
"loss": 0.3887,
"step": 1400
},
{
"epoch": 0.8138206739006282,
"grad_norm": 4.241744041442871,
"learning_rate": 7.944444444444445e-06,
"loss": 0.4033,
"step": 1425
},
{
"epoch": 0.8280982295830954,
"grad_norm": 3.1559460163116455,
"learning_rate": 7.88888888888889e-06,
"loss": 0.3567,
"step": 1450
},
{
"epoch": 0.8423757852655626,
"grad_norm": 3.142645835876465,
"learning_rate": 7.833333333333333e-06,
"loss": 0.3731,
"step": 1475
},
{
"epoch": 0.8566533409480297,
"grad_norm": 3.1183199882507324,
"learning_rate": 7.77777777777778e-06,
"loss": 0.3668,
"step": 1500
},
{
"epoch": 0.8709308966304968,
"grad_norm": 2.7859325408935547,
"learning_rate": 7.722222222222223e-06,
"loss": 0.3965,
"step": 1525
},
{
"epoch": 0.885208452312964,
"grad_norm": 3.191088914871216,
"learning_rate": 7.666666666666667e-06,
"loss": 0.3574,
"step": 1550
},
{
"epoch": 0.8994860079954312,
"grad_norm": 3.0640053749084473,
"learning_rate": 7.611111111111111e-06,
"loss": 0.3811,
"step": 1575
},
{
"epoch": 0.9137635636778983,
"grad_norm": 3.0769450664520264,
"learning_rate": 7.555555555555556e-06,
"loss": 0.3788,
"step": 1600
},
{
"epoch": 0.9280411193603655,
"grad_norm": 3.1407933235168457,
"learning_rate": 7.500000000000001e-06,
"loss": 0.3698,
"step": 1625
},
{
"epoch": 0.9423186750428326,
"grad_norm": 3.410187244415283,
"learning_rate": 7.444444444444445e-06,
"loss": 0.3907,
"step": 1650
},
{
"epoch": 0.9565962307252999,
"grad_norm": 3.3382880687713623,
"learning_rate": 7.38888888888889e-06,
"loss": 0.3368,
"step": 1675
},
{
"epoch": 0.970873786407767,
"grad_norm": 3.194368600845337,
"learning_rate": 7.333333333333333e-06,
"loss": 0.369,
"step": 1700
},
{
"epoch": 0.9851513420902341,
"grad_norm": 3.089852809906006,
"learning_rate": 7.277777777777778e-06,
"loss": 0.3765,
"step": 1725
},
{
"epoch": 0.9994288977727013,
"grad_norm": 3.0002810955047607,
"learning_rate": 7.222222222222223e-06,
"loss": 0.3705,
"step": 1750
},
{
"epoch": 1.0137064534551685,
"grad_norm": 2.3977696895599365,
"learning_rate": 7.166666666666667e-06,
"loss": 0.2584,
"step": 1775
},
{
"epoch": 1.0279840091376355,
"grad_norm": 2.3220465183258057,
"learning_rate": 7.111111111111112e-06,
"loss": 0.2538,
"step": 1800
},
{
"epoch": 1.0422615648201028,
"grad_norm": 2.819687843322754,
"learning_rate": 7.055555555555557e-06,
"loss": 0.2571,
"step": 1825
},
{
"epoch": 1.05653912050257,
"grad_norm": 2.514644145965576,
"learning_rate": 7e-06,
"loss": 0.2806,
"step": 1850
},
{
"epoch": 1.070816676185037,
"grad_norm": 2.1887128353118896,
"learning_rate": 6.944444444444445e-06,
"loss": 0.2626,
"step": 1875
},
{
"epoch": 1.0850942318675043,
"grad_norm": 2.592247486114502,
"learning_rate": 6.88888888888889e-06,
"loss": 0.2509,
"step": 1900
},
{
"epoch": 1.0993717875499716,
"grad_norm": 2.371534824371338,
"learning_rate": 6.833333333333334e-06,
"loss": 0.2605,
"step": 1925
},
{
"epoch": 1.1136493432324386,
"grad_norm": 3.1825778484344482,
"learning_rate": 6.777777777777779e-06,
"loss": 0.2495,
"step": 1950
},
{
"epoch": 1.1279268989149058,
"grad_norm": 2.901749849319458,
"learning_rate": 6.7222222222222235e-06,
"loss": 0.261,
"step": 1975
},
{
"epoch": 1.1422044545973729,
"grad_norm": 2.658766984939575,
"learning_rate": 6.666666666666667e-06,
"loss": 0.2476,
"step": 2000
},
{
"epoch": 1.1422044545973729,
"eval_loss": 0.41870468854904175,
"eval_runtime": 1722.2575,
"eval_samples_per_second": 2.265,
"eval_steps_per_second": 0.142,
"eval_wer": 0.3136771950159374,
"step": 2000
},
{
"epoch": 1.15648201027984,
"grad_norm": 2.711312770843506,
"learning_rate": 6.6111111111111115e-06,
"loss": 0.2414,
"step": 2025
},
{
"epoch": 1.1707595659623073,
"grad_norm": 2.9044759273529053,
"learning_rate": 6.555555555555556e-06,
"loss": 0.2502,
"step": 2050
},
{
"epoch": 1.1850371216447744,
"grad_norm": 2.549725294113159,
"learning_rate": 6.5000000000000004e-06,
"loss": 0.2511,
"step": 2075
},
{
"epoch": 1.1993146773272416,
"grad_norm": 2.95792555809021,
"learning_rate": 6.444444444444445e-06,
"loss": 0.2427,
"step": 2100
},
{
"epoch": 1.2135922330097086,
"grad_norm": 2.686870574951172,
"learning_rate": 6.3888888888888885e-06,
"loss": 0.2637,
"step": 2125
},
{
"epoch": 1.227869788692176,
"grad_norm": 3.7834455966949463,
"learning_rate": 6.333333333333333e-06,
"loss": 0.2554,
"step": 2150
},
{
"epoch": 1.2421473443746431,
"grad_norm": 3.0891430377960205,
"learning_rate": 6.277777777777778e-06,
"loss": 0.2467,
"step": 2175
},
{
"epoch": 1.2564249000571102,
"grad_norm": 2.771472930908203,
"learning_rate": 6.222222222222223e-06,
"loss": 0.2467,
"step": 2200
},
{
"epoch": 1.2707024557395774,
"grad_norm": 2.6807925701141357,
"learning_rate": 6.166666666666667e-06,
"loss": 0.2682,
"step": 2225
},
{
"epoch": 1.2849800114220447,
"grad_norm": 2.2320196628570557,
"learning_rate": 6.111111111111112e-06,
"loss": 0.2408,
"step": 2250
},
{
"epoch": 1.2992575671045117,
"grad_norm": 3.066009759902954,
"learning_rate": 6.055555555555555e-06,
"loss": 0.2363,
"step": 2275
},
{
"epoch": 1.313535122786979,
"grad_norm": 2.6043167114257812,
"learning_rate": 6e-06,
"loss": 0.2483,
"step": 2300
},
{
"epoch": 1.327812678469446,
"grad_norm": 2.6250624656677246,
"learning_rate": 5.944444444444445e-06,
"loss": 0.2563,
"step": 2325
},
{
"epoch": 1.3420902341519132,
"grad_norm": 2.508998394012451,
"learning_rate": 5.88888888888889e-06,
"loss": 0.2581,
"step": 2350
},
{
"epoch": 1.3563677898343802,
"grad_norm": 2.872715473175049,
"learning_rate": 5.833333333333334e-06,
"loss": 0.2371,
"step": 2375
},
{
"epoch": 1.3706453455168475,
"grad_norm": 3.1910557746887207,
"learning_rate": 5.777777777777778e-06,
"loss": 0.2515,
"step": 2400
},
{
"epoch": 1.3849229011993147,
"grad_norm": 2.7466485500335693,
"learning_rate": 5.722222222222222e-06,
"loss": 0.2578,
"step": 2425
},
{
"epoch": 1.3992004568817817,
"grad_norm": 2.388066530227661,
"learning_rate": 5.666666666666667e-06,
"loss": 0.2541,
"step": 2450
},
{
"epoch": 1.413478012564249,
"grad_norm": 2.688497304916382,
"learning_rate": 5.611111111111112e-06,
"loss": 0.2514,
"step": 2475
},
{
"epoch": 1.4277555682467162,
"grad_norm": 2.710899591445923,
"learning_rate": 5.555555555555557e-06,
"loss": 0.2765,
"step": 2500
},
{
"epoch": 1.4420331239291833,
"grad_norm": 2.296635389328003,
"learning_rate": 5.500000000000001e-06,
"loss": 0.2487,
"step": 2525
},
{
"epoch": 1.4563106796116505,
"grad_norm": 2.7988133430480957,
"learning_rate": 5.444444444444445e-06,
"loss": 0.2499,
"step": 2550
},
{
"epoch": 1.4705882352941178,
"grad_norm": 3.1988582611083984,
"learning_rate": 5.388888888888889e-06,
"loss": 0.2456,
"step": 2575
},
{
"epoch": 1.4848657909765848,
"grad_norm": 2.657517910003662,
"learning_rate": 5.333333333333334e-06,
"loss": 0.2613,
"step": 2600
},
{
"epoch": 1.499143346659052,
"grad_norm": 2.5517725944519043,
"learning_rate": 5.2777777777777785e-06,
"loss": 0.2528,
"step": 2625
},
{
"epoch": 1.5134209023415193,
"grad_norm": 2.7166850566864014,
"learning_rate": 5.2222222222222226e-06,
"loss": 0.2476,
"step": 2650
},
{
"epoch": 1.5276984580239863,
"grad_norm": 2.7338292598724365,
"learning_rate": 5.1666666666666675e-06,
"loss": 0.2489,
"step": 2675
},
{
"epoch": 1.5419760137064533,
"grad_norm": 2.1498470306396484,
"learning_rate": 5.1111111111111115e-06,
"loss": 0.2388,
"step": 2700
},
{
"epoch": 1.5562535693889206,
"grad_norm": 2.595247745513916,
"learning_rate": 5.0555555555555555e-06,
"loss": 0.2566,
"step": 2725
},
{
"epoch": 1.5705311250713878,
"grad_norm": 2.652132987976074,
"learning_rate": 5e-06,
"loss": 0.239,
"step": 2750
},
{
"epoch": 1.5848086807538548,
"grad_norm": 2.436605930328369,
"learning_rate": 4.944444444444445e-06,
"loss": 0.2419,
"step": 2775
},
{
"epoch": 1.599086236436322,
"grad_norm": 2.618035316467285,
"learning_rate": 4.888888888888889e-06,
"loss": 0.2295,
"step": 2800
},
{
"epoch": 1.6133637921187893,
"grad_norm": 2.2901298999786377,
"learning_rate": 4.833333333333333e-06,
"loss": 0.2446,
"step": 2825
},
{
"epoch": 1.6276413478012564,
"grad_norm": 2.899315595626831,
"learning_rate": 4.777777777777778e-06,
"loss": 0.2628,
"step": 2850
},
{
"epoch": 1.6419189034837236,
"grad_norm": 2.616224527359009,
"learning_rate": 4.722222222222222e-06,
"loss": 0.2273,
"step": 2875
},
{
"epoch": 1.6561964591661908,
"grad_norm": 2.43113112449646,
"learning_rate": 4.666666666666667e-06,
"loss": 0.2362,
"step": 2900
},
{
"epoch": 1.6704740148486579,
"grad_norm": 2.5203065872192383,
"learning_rate": 4.611111111111112e-06,
"loss": 0.2428,
"step": 2925
},
{
"epoch": 1.6847515705311251,
"grad_norm": 2.3064985275268555,
"learning_rate": 4.555555555555556e-06,
"loss": 0.2441,
"step": 2950
},
{
"epoch": 1.6990291262135924,
"grad_norm": 2.201695680618286,
"learning_rate": 4.5e-06,
"loss": 0.2324,
"step": 2975
},
{
"epoch": 1.7133066818960594,
"grad_norm": 2.442471981048584,
"learning_rate": 4.444444444444444e-06,
"loss": 0.2527,
"step": 3000
},
{
"epoch": 1.7133066818960594,
"eval_loss": 0.3882293701171875,
"eval_runtime": 1749.1422,
"eval_samples_per_second": 2.23,
"eval_steps_per_second": 0.139,
"eval_wer": 0.2901091471071187,
"step": 3000
},
{
"epoch": 1.7275842375785264,
"grad_norm": 2.77786922454834,
"learning_rate": 4.388888888888889e-06,
"loss": 0.2492,
"step": 3025
},
{
"epoch": 1.7418617932609937,
"grad_norm": 2.5009052753448486,
"learning_rate": 4.333333333333334e-06,
"loss": 0.2341,
"step": 3050
},
{
"epoch": 1.756139348943461,
"grad_norm": 2.780186176300049,
"learning_rate": 4.277777777777778e-06,
"loss": 0.2407,
"step": 3075
},
{
"epoch": 1.770416904625928,
"grad_norm": 1.9574618339538574,
"learning_rate": 4.222222222222223e-06,
"loss": 0.2437,
"step": 3100
},
{
"epoch": 1.7846944603083952,
"grad_norm": 2.151125907897949,
"learning_rate": 4.166666666666667e-06,
"loss": 0.2341,
"step": 3125
},
{
"epoch": 1.7989720159908624,
"grad_norm": 2.170015811920166,
"learning_rate": 4.111111111111111e-06,
"loss": 0.2373,
"step": 3150
},
{
"epoch": 1.8132495716733295,
"grad_norm": 3.0467231273651123,
"learning_rate": 4.055555555555556e-06,
"loss": 0.2317,
"step": 3175
},
{
"epoch": 1.8275271273557967,
"grad_norm": 3.0150015354156494,
"learning_rate": 4.000000000000001e-06,
"loss": 0.228,
"step": 3200
},
{
"epoch": 1.841804683038264,
"grad_norm": 3.275949001312256,
"learning_rate": 3.944444444444445e-06,
"loss": 0.2438,
"step": 3225
},
{
"epoch": 1.856082238720731,
"grad_norm": 3.0381839275360107,
"learning_rate": 3.88888888888889e-06,
"loss": 0.2478,
"step": 3250
},
{
"epoch": 1.8703597944031982,
"grad_norm": 2.770716428756714,
"learning_rate": 3.833333333333334e-06,
"loss": 0.2312,
"step": 3275
},
{
"epoch": 1.8846373500856655,
"grad_norm": 2.6976678371429443,
"learning_rate": 3.777777777777778e-06,
"loss": 0.2284,
"step": 3300
},
{
"epoch": 1.8989149057681325,
"grad_norm": 2.8799102306365967,
"learning_rate": 3.7222222222222225e-06,
"loss": 0.2484,
"step": 3325
},
{
"epoch": 1.9131924614505995,
"grad_norm": 2.574629545211792,
"learning_rate": 3.6666666666666666e-06,
"loss": 0.2295,
"step": 3350
},
{
"epoch": 1.927470017133067,
"grad_norm": 2.4746835231781006,
"learning_rate": 3.6111111111111115e-06,
"loss": 0.2335,
"step": 3375
},
{
"epoch": 1.941747572815534,
"grad_norm": 3.084383964538574,
"learning_rate": 3.555555555555556e-06,
"loss": 0.212,
"step": 3400
},
{
"epoch": 1.956025128498001,
"grad_norm": 2.4441068172454834,
"learning_rate": 3.5e-06,
"loss": 0.221,
"step": 3425
},
{
"epoch": 1.9703026841804683,
"grad_norm": 3.031568765640259,
"learning_rate": 3.444444444444445e-06,
"loss": 0.2341,
"step": 3450
},
{
"epoch": 1.9845802398629355,
"grad_norm": 2.3584327697753906,
"learning_rate": 3.3888888888888893e-06,
"loss": 0.2431,
"step": 3475
},
{
"epoch": 1.9988577955454025,
"grad_norm": 2.1590421199798584,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.2357,
"step": 3500
},
{
"epoch": 2.0131353512278696,
"grad_norm": 2.2845587730407715,
"learning_rate": 3.277777777777778e-06,
"loss": 0.1576,
"step": 3525
},
{
"epoch": 2.027412906910337,
"grad_norm": 2.033133029937744,
"learning_rate": 3.2222222222222227e-06,
"loss": 0.1422,
"step": 3550
},
{
"epoch": 2.041690462592804,
"grad_norm": 2.2549259662628174,
"learning_rate": 3.1666666666666667e-06,
"loss": 0.1473,
"step": 3575
},
{
"epoch": 2.055968018275271,
"grad_norm": 1.5837754011154175,
"learning_rate": 3.1111111111111116e-06,
"loss": 0.143,
"step": 3600
},
{
"epoch": 2.0702455739577386,
"grad_norm": 1.9988360404968262,
"learning_rate": 3.055555555555556e-06,
"loss": 0.1416,
"step": 3625
},
{
"epoch": 2.0845231296402056,
"grad_norm": 2.148613929748535,
"learning_rate": 3e-06,
"loss": 0.1338,
"step": 3650
},
{
"epoch": 2.0988006853226726,
"grad_norm": 1.8176393508911133,
"learning_rate": 2.944444444444445e-06,
"loss": 0.1514,
"step": 3675
},
{
"epoch": 2.11307824100514,
"grad_norm": 2.60271954536438,
"learning_rate": 2.888888888888889e-06,
"loss": 0.1533,
"step": 3700
},
{
"epoch": 2.127355796687607,
"grad_norm": 2.120281457901001,
"learning_rate": 2.8333333333333335e-06,
"loss": 0.1404,
"step": 3725
},
{
"epoch": 2.141633352370074,
"grad_norm": 2.3522286415100098,
"learning_rate": 2.7777777777777783e-06,
"loss": 0.1511,
"step": 3750
},
{
"epoch": 2.1559109080525416,
"grad_norm": 1.8738924264907837,
"learning_rate": 2.7222222222222224e-06,
"loss": 0.1417,
"step": 3775
},
{
"epoch": 2.1701884637350086,
"grad_norm": 2.255291223526001,
"learning_rate": 2.666666666666667e-06,
"loss": 0.1437,
"step": 3800
},
{
"epoch": 2.1844660194174756,
"grad_norm": 1.7046154737472534,
"learning_rate": 2.6111111111111113e-06,
"loss": 0.1446,
"step": 3825
},
{
"epoch": 2.198743575099943,
"grad_norm": 2.0543861389160156,
"learning_rate": 2.5555555555555557e-06,
"loss": 0.1504,
"step": 3850
},
{
"epoch": 2.21302113078241,
"grad_norm": 2.139716863632202,
"learning_rate": 2.5e-06,
"loss": 0.1345,
"step": 3875
},
{
"epoch": 2.227298686464877,
"grad_norm": 1.7999951839447021,
"learning_rate": 2.4444444444444447e-06,
"loss": 0.1389,
"step": 3900
},
{
"epoch": 2.241576242147344,
"grad_norm": 1.7282090187072754,
"learning_rate": 2.388888888888889e-06,
"loss": 0.1324,
"step": 3925
},
{
"epoch": 2.2558537978298117,
"grad_norm": 2.6271605491638184,
"learning_rate": 2.3333333333333336e-06,
"loss": 0.1551,
"step": 3950
},
{
"epoch": 2.2701313535122787,
"grad_norm": 2.170382022857666,
"learning_rate": 2.277777777777778e-06,
"loss": 0.144,
"step": 3975
},
{
"epoch": 2.2844089091947457,
"grad_norm": 1.796635627746582,
"learning_rate": 2.222222222222222e-06,
"loss": 0.1568,
"step": 4000
},
{
"epoch": 2.2844089091947457,
"eval_loss": 0.3901652991771698,
"eval_runtime": 1765.3609,
"eval_samples_per_second": 2.21,
"eval_steps_per_second": 0.138,
"eval_wer": 0.28160919540229884,
"step": 4000
},
{
"epoch": 2.298686464877213,
"grad_norm": 2.0357980728149414,
"learning_rate": 2.166666666666667e-06,
"loss": 0.161,
"step": 4025
},
{
"epoch": 2.31296402055968,
"grad_norm": 2.027215003967285,
"learning_rate": 2.1111111111111114e-06,
"loss": 0.1353,
"step": 4050
},
{
"epoch": 2.3272415762421472,
"grad_norm": 2.8169405460357666,
"learning_rate": 2.0555555555555555e-06,
"loss": 0.1449,
"step": 4075
},
{
"epoch": 2.3415191319246147,
"grad_norm": 1.9528751373291016,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.1376,
"step": 4100
},
{
"epoch": 2.3557966876070817,
"grad_norm": 2.5781335830688477,
"learning_rate": 1.944444444444445e-06,
"loss": 0.1383,
"step": 4125
},
{
"epoch": 2.3700742432895487,
"grad_norm": 2.083077907562256,
"learning_rate": 1.888888888888889e-06,
"loss": 0.1362,
"step": 4150
},
{
"epoch": 2.384351798972016,
"grad_norm": 2.431272029876709,
"learning_rate": 1.8333333333333333e-06,
"loss": 0.1329,
"step": 4175
},
{
"epoch": 2.3986293546544832,
"grad_norm": 2.157139539718628,
"learning_rate": 1.777777777777778e-06,
"loss": 0.1377,
"step": 4200
},
{
"epoch": 2.4129069103369503,
"grad_norm": 2.5328071117401123,
"learning_rate": 1.7222222222222224e-06,
"loss": 0.1361,
"step": 4225
},
{
"epoch": 2.4271844660194173,
"grad_norm": 2.433239459991455,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.157,
"step": 4250
},
{
"epoch": 2.4414620217018848,
"grad_norm": 2.5167510509490967,
"learning_rate": 1.6111111111111113e-06,
"loss": 0.132,
"step": 4275
},
{
"epoch": 2.455739577384352,
"grad_norm": 1.9507442712783813,
"learning_rate": 1.5555555555555558e-06,
"loss": 0.1625,
"step": 4300
},
{
"epoch": 2.470017133066819,
"grad_norm": 2.2467007637023926,
"learning_rate": 1.5e-06,
"loss": 0.1333,
"step": 4325
},
{
"epoch": 2.4842946887492863,
"grad_norm": 2.4816768169403076,
"learning_rate": 1.4444444444444445e-06,
"loss": 0.1499,
"step": 4350
},
{
"epoch": 2.4985722444317533,
"grad_norm": 2.0616416931152344,
"learning_rate": 1.3888888888888892e-06,
"loss": 0.1508,
"step": 4375
},
{
"epoch": 2.5128498001142203,
"grad_norm": 2.089355230331421,
"learning_rate": 1.3333333333333334e-06,
"loss": 0.1344,
"step": 4400
},
{
"epoch": 2.5271273557966873,
"grad_norm": 2.2235498428344727,
"learning_rate": 1.28e-06,
"loss": 0.1717,
"step": 4425
},
{
"epoch": 2.541404911479155,
"grad_norm": 1.9268138408660889,
"learning_rate": 1.2244444444444445e-06,
"loss": 0.143,
"step": 4450
},
{
"epoch": 2.555682467161622,
"grad_norm": 1.8911551237106323,
"learning_rate": 1.168888888888889e-06,
"loss": 0.1439,
"step": 4475
},
{
"epoch": 2.5699600228440893,
"grad_norm": 2.5078868865966797,
"learning_rate": 1.1133333333333334e-06,
"loss": 0.1341,
"step": 4500
},
{
"epoch": 2.5842375785265563,
"grad_norm": 2.1232492923736572,
"learning_rate": 1.0577777777777779e-06,
"loss": 0.1415,
"step": 4525
},
{
"epoch": 2.5985151342090234,
"grad_norm": 1.9214311838150024,
"learning_rate": 1.0022222222222223e-06,
"loss": 0.1301,
"step": 4550
},
{
"epoch": 2.6127926898914904,
"grad_norm": 2.4226858615875244,
"learning_rate": 9.466666666666667e-07,
"loss": 0.1438,
"step": 4575
},
{
"epoch": 2.627070245573958,
"grad_norm": 2.324777126312256,
"learning_rate": 8.911111111111112e-07,
"loss": 0.1306,
"step": 4600
},
{
"epoch": 2.641347801256425,
"grad_norm": 2.427114486694336,
"learning_rate": 8.355555555555556e-07,
"loss": 0.1359,
"step": 4625
},
{
"epoch": 2.655625356938892,
"grad_norm": 1.989882469177246,
"learning_rate": 7.8e-07,
"loss": 0.1386,
"step": 4650
},
{
"epoch": 2.6699029126213594,
"grad_norm": 2.6079118251800537,
"learning_rate": 7.244444444444446e-07,
"loss": 0.135,
"step": 4675
},
{
"epoch": 2.6841804683038264,
"grad_norm": 2.3429243564605713,
"learning_rate": 6.68888888888889e-07,
"loss": 0.1356,
"step": 4700
},
{
"epoch": 2.6984580239862934,
"grad_norm": 2.3358540534973145,
"learning_rate": 6.133333333333333e-07,
"loss": 0.1304,
"step": 4725
},
{
"epoch": 2.7127355796687604,
"grad_norm": 1.917809247970581,
"learning_rate": 5.577777777777779e-07,
"loss": 0.1395,
"step": 4750
},
{
"epoch": 2.727013135351228,
"grad_norm": 2.0677952766418457,
"learning_rate": 5.022222222222222e-07,
"loss": 0.1309,
"step": 4775
},
{
"epoch": 2.741290691033695,
"grad_norm": 2.135127305984497,
"learning_rate": 4.466666666666667e-07,
"loss": 0.1424,
"step": 4800
},
{
"epoch": 2.7555682467161624,
"grad_norm": 2.3306682109832764,
"learning_rate": 3.9111111111111115e-07,
"loss": 0.1318,
"step": 4825
},
{
"epoch": 2.7698458023986294,
"grad_norm": 2.0700454711914062,
"learning_rate": 3.3555555555555556e-07,
"loss": 0.1566,
"step": 4850
},
{
"epoch": 2.7841233580810965,
"grad_norm": 1.8561683893203735,
"learning_rate": 2.8e-07,
"loss": 0.1453,
"step": 4875
},
{
"epoch": 2.7984009137635635,
"grad_norm": 2.2682347297668457,
"learning_rate": 2.2444444444444445e-07,
"loss": 0.1415,
"step": 4900
},
{
"epoch": 2.812678469446031,
"grad_norm": 2.2898778915405273,
"learning_rate": 1.6888888888888888e-07,
"loss": 0.1427,
"step": 4925
},
{
"epoch": 2.826956025128498,
"grad_norm": 2.328401803970337,
"learning_rate": 1.1333333333333336e-07,
"loss": 0.1357,
"step": 4950
},
{
"epoch": 2.841233580810965,
"grad_norm": 2.2169013023376465,
"learning_rate": 5.777777777777778e-08,
"loss": 0.1343,
"step": 4975
},
{
"epoch": 2.8555111364934325,
"grad_norm": 2.42340350151062,
"learning_rate": 2.2222222222222225e-09,
"loss": 0.1313,
"step": 5000
},
{
"epoch": 2.8555111364934325,
"eval_loss": 0.38383349776268005,
"eval_runtime": 1820.062,
"eval_samples_per_second": 2.143,
"eval_steps_per_second": 0.134,
"eval_wer": 0.27318168646769053,
"step": 5000
},
{
"epoch": 2.8555111364934325,
"step": 5000,
"total_flos": 5.435589590699213e+20,
"train_loss": 0.3002769865989685,
"train_runtime": 59305.2217,
"train_samples_per_second": 2.698,
"train_steps_per_second": 0.084
}
],
"logging_steps": 25,
"max_steps": 5000,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.435589590699213e+20,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}