prxy5604 commited on
Commit
2931eac
·
verified ·
1 Parent(s): 3f65178

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30a4ece454d0484c4a920e7ebdd5f3fa366db8fd0a3106f2aa04c503f33ebc9e
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e9eaae6c521bfae3b9d47ed57df548a6761eb1c4fd7666336de33072a97c6db
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85dda73baf9b17ed1e0833d516d10f58c58d6bf0e841053a63b0332a648fde38
3
- size 36135892
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04e3410fd4b44c3e7d011fa2122e549b33ea7e06dbd011798ee30e16ad841ce4
3
+ size 36136276
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf76e0a36aa3484dfebceff927791a6201375ca89f4fca943972b68b791c0b45
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1df72fa71e74de1525fc91f11cb5e2a1f9a332fbd559117fcdd7368b75e72f17
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc33e131fa6defcf31352ebc1dc63541771f8d9732b2772a4a16ecb6c33c3697
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd6612e1be5fc1a945d1a2e93ec2df274cca4c095f65d292f5fd095af43ba016
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.7786412239074707,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
- "epoch": 0.16675931072818231,
5
  "eval_steps": 50,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -144,6 +144,135 @@
144
  "eval_samples_per_second": 26.071,
145
  "eval_steps_per_second": 6.522,
146
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
  ],
149
  "logging_steps": 10,
@@ -172,7 +301,7 @@
172
  "attributes": {}
173
  }
174
  },
175
- "total_flos": 1.10737842241536e+16,
176
  "train_batch_size": 8,
177
  "trial_name": null,
178
  "trial_params": null
 
1
  {
2
+ "best_metric": 2.631667137145996,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
+ "epoch": 0.33351862145636463,
5
  "eval_steps": 50,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
144
  "eval_samples_per_second": 26.071,
145
  "eval_steps_per_second": 6.522,
146
  "step": 150
147
+ },
148
+ {
149
+ "epoch": 0.17787659811006115,
150
+ "grad_norm": 2.4696974754333496,
151
+ "learning_rate": 7.396244933600285e-05,
152
+ "loss": 2.9971,
153
+ "step": 160
154
+ },
155
+ {
156
+ "epoch": 0.18899388549193996,
157
+ "grad_norm": 2.099034070968628,
158
+ "learning_rate": 7.077075065009433e-05,
159
+ "loss": 2.64,
160
+ "step": 170
161
+ },
162
+ {
163
+ "epoch": 0.2001111728738188,
164
+ "grad_norm": 2.41749906539917,
165
+ "learning_rate": 6.747320897995493e-05,
166
+ "loss": 2.5229,
167
+ "step": 180
168
+ },
169
+ {
170
+ "epoch": 0.2112284602556976,
171
+ "grad_norm": 2.740168333053589,
172
+ "learning_rate": 6.408662784207149e-05,
173
+ "loss": 2.4978,
174
+ "step": 190
175
+ },
176
+ {
177
+ "epoch": 0.22234574763757642,
178
+ "grad_norm": 3.9902448654174805,
179
+ "learning_rate": 6.062826447764883e-05,
180
+ "loss": 2.4716,
181
+ "step": 200
182
+ },
183
+ {
184
+ "epoch": 0.22234574763757642,
185
+ "eval_loss": 2.7965714931488037,
186
+ "eval_runtime": 58.2938,
187
+ "eval_samples_per_second": 25.989,
188
+ "eval_steps_per_second": 6.502,
189
+ "step": 200
190
+ },
191
+ {
192
+ "epoch": 0.23346303501945526,
193
+ "grad_norm": 2.4260706901550293,
194
+ "learning_rate": 5.7115741913664264e-05,
195
+ "loss": 3.0674,
196
+ "step": 210
197
+ },
198
+ {
199
+ "epoch": 0.24458032240133407,
200
+ "grad_norm": 2.296048641204834,
201
+ "learning_rate": 5.3566959159961615e-05,
202
+ "loss": 2.5382,
203
+ "step": 220
204
+ },
205
+ {
206
+ "epoch": 0.2556976097832129,
207
+ "grad_norm": 2.220076084136963,
208
+ "learning_rate": 5e-05,
209
+ "loss": 2.4908,
210
+ "step": 230
211
+ },
212
+ {
213
+ "epoch": 0.2668148971650917,
214
+ "grad_norm": 2.6584367752075195,
215
+ "learning_rate": 4.643304084003839e-05,
216
+ "loss": 2.4995,
217
+ "step": 240
218
+ },
219
+ {
220
+ "epoch": 0.27793218454697055,
221
+ "grad_norm": 4.641247749328613,
222
+ "learning_rate": 4.288425808633575e-05,
223
+ "loss": 2.3999,
224
+ "step": 250
225
+ },
226
+ {
227
+ "epoch": 0.27793218454697055,
228
+ "eval_loss": 2.664534568786621,
229
+ "eval_runtime": 58.1209,
230
+ "eval_samples_per_second": 26.066,
231
+ "eval_steps_per_second": 6.521,
232
+ "step": 250
233
+ },
234
+ {
235
+ "epoch": 0.28904947192884933,
236
+ "grad_norm": 2.203540086746216,
237
+ "learning_rate": 3.937173552235117e-05,
238
+ "loss": 2.9398,
239
+ "step": 260
240
+ },
241
+ {
242
+ "epoch": 0.30016675931072817,
243
+ "grad_norm": 2.368499517440796,
244
+ "learning_rate": 3.591337215792852e-05,
245
+ "loss": 2.671,
246
+ "step": 270
247
+ },
248
+ {
249
+ "epoch": 0.311284046692607,
250
+ "grad_norm": 2.223212718963623,
251
+ "learning_rate": 3.2526791020045086e-05,
252
+ "loss": 2.5258,
253
+ "step": 280
254
+ },
255
+ {
256
+ "epoch": 0.32240133407448585,
257
+ "grad_norm": 2.5719311237335205,
258
+ "learning_rate": 2.9229249349905684e-05,
259
+ "loss": 2.5361,
260
+ "step": 290
261
+ },
262
+ {
263
+ "epoch": 0.33351862145636463,
264
+ "grad_norm": 5.5927734375,
265
+ "learning_rate": 2.603755066399718e-05,
266
+ "loss": 2.6185,
267
+ "step": 300
268
+ },
269
+ {
270
+ "epoch": 0.33351862145636463,
271
+ "eval_loss": 2.631667137145996,
272
+ "eval_runtime": 58.1811,
273
+ "eval_samples_per_second": 26.039,
274
+ "eval_steps_per_second": 6.514,
275
+ "step": 300
276
  }
277
  ],
278
  "logging_steps": 10,
 
301
  "attributes": {}
302
  }
303
  },
304
+ "total_flos": 2.21475684483072e+16,
305
  "train_batch_size": 8,
306
  "trial_name": null,
307
  "trial_params": null