debugger123 commited on
Commit
fb5bc67
·
verified ·
1 Parent(s): 8d3c0d1

Delete trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -508
trainer_state.json DELETED
@@ -1,508 +0,0 @@
1
- {
2
- "best_global_step": null,
3
- "best_metric": null,
4
- "best_model_checkpoint": null,
5
- "epoch": 3.0,
6
- "eval_steps": 1000,
7
- "global_step": 3180,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.047175374454534735,
14
- "grad_norm": 0.6658920049667358,
15
- "learning_rate": 2.0545073375262055e-06,
16
- "loss": 0.7831,
17
- "step": 50
18
- },
19
- {
20
- "epoch": 0.09435074890906947,
21
- "grad_norm": 0.6541114449501038,
22
- "learning_rate": 4.150943396226416e-06,
23
- "loss": 0.6891,
24
- "step": 100
25
- },
26
- {
27
- "epoch": 0.1415261233636042,
28
- "grad_norm": 0.64142906665802,
29
- "learning_rate": 6.247379454926625e-06,
30
- "loss": 0.6727,
31
- "step": 150
32
- },
33
- {
34
- "epoch": 0.18870149781813894,
35
- "grad_norm": 0.645492434501648,
36
- "learning_rate": 8.343815513626834e-06,
37
- "loss": 0.6608,
38
- "step": 200
39
- },
40
- {
41
- "epoch": 0.23587687227267368,
42
- "grad_norm": 0.6750236749649048,
43
- "learning_rate": 1.0440251572327045e-05,
44
- "loss": 0.6656,
45
- "step": 250
46
- },
47
- {
48
- "epoch": 0.2830522467272084,
49
- "grad_norm": 0.6192681789398193,
50
- "learning_rate": 1.2536687631027256e-05,
51
- "loss": 0.6613,
52
- "step": 300
53
- },
54
- {
55
- "epoch": 0.33022762118174315,
56
- "grad_norm": 0.6124400496482849,
57
- "learning_rate": 1.4633123689727464e-05,
58
- "loss": 0.6612,
59
- "step": 350
60
- },
61
- {
62
- "epoch": 0.3774029956362779,
63
- "grad_norm": 0.5999819040298462,
64
- "learning_rate": 1.6729559748427675e-05,
65
- "loss": 0.6587,
66
- "step": 400
67
- },
68
- {
69
- "epoch": 0.4245783700908126,
70
- "grad_norm": 0.6131274104118347,
71
- "learning_rate": 1.8825995807127882e-05,
72
- "loss": 0.6613,
73
- "step": 450
74
- },
75
- {
76
- "epoch": 0.47175374454534735,
77
- "grad_norm": 0.5893041491508484,
78
- "learning_rate": 1.9996731117142877e-05,
79
- "loss": 0.6615,
80
- "step": 500
81
- },
82
- {
83
- "epoch": 0.518929118999882,
84
- "grad_norm": 0.546073853969574,
85
- "learning_rate": 1.996500635384337e-05,
86
- "loss": 0.6598,
87
- "step": 550
88
- },
89
- {
90
- "epoch": 0.5661044934544168,
91
- "grad_norm": 0.5713281035423279,
92
- "learning_rate": 1.9899637947477248e-05,
93
- "loss": 0.6586,
94
- "step": 600
95
- },
96
- {
97
- "epoch": 0.6132798679089515,
98
- "grad_norm": 0.5555443167686462,
99
- "learning_rate": 1.9800846593471427e-05,
100
- "loss": 0.6602,
101
- "step": 650
102
- },
103
- {
104
- "epoch": 0.6604552423634863,
105
- "grad_norm": 0.5415436625480652,
106
- "learning_rate": 1.966896582909968e-05,
107
- "loss": 0.6577,
108
- "step": 700
109
- },
110
- {
111
- "epoch": 0.707630616818021,
112
- "grad_norm": 0.5836868286132812,
113
- "learning_rate": 1.9504440907401113e-05,
114
- "loss": 0.6544,
115
- "step": 750
116
- },
117
- {
118
- "epoch": 0.7548059912725558,
119
- "grad_norm": 0.539230465888977,
120
- "learning_rate": 1.9307827293926344e-05,
121
- "loss": 0.6577,
122
- "step": 800
123
- },
124
- {
125
- "epoch": 0.8019813657270904,
126
- "grad_norm": 0.5532299280166626,
127
- "learning_rate": 1.9079788791386468e-05,
128
- "loss": 0.6569,
129
- "step": 850
130
- },
131
- {
132
- "epoch": 0.8491567401816252,
133
- "grad_norm": 0.5102491974830627,
134
- "learning_rate": 1.8821095298536435e-05,
135
- "loss": 0.6502,
136
- "step": 900
137
- },
138
- {
139
- "epoch": 0.8963321146361599,
140
- "grad_norm": 0.5178349614143372,
141
- "learning_rate": 1.853262021085921e-05,
142
- "loss": 0.6559,
143
- "step": 950
144
- },
145
- {
146
- "epoch": 0.9435074890906947,
147
- "grad_norm": 0.5248669385910034,
148
- "learning_rate": 1.821533747182645e-05,
149
- "loss": 0.6549,
150
- "step": 1000
151
- },
152
- {
153
- "epoch": 0.9435074890906947,
154
- "eval_loss": 0.6482675671577454,
155
- "eval_runtime": 113.518,
156
- "eval_samples_per_second": 66.395,
157
- "eval_steps_per_second": 2.079,
158
- "step": 1000
159
- },
160
- {
161
- "epoch": 0.9906828635452294,
162
- "grad_norm": 0.5705227255821228,
163
- "learning_rate": 1.787031828469124e-05,
164
- "loss": 0.6477,
165
- "step": 1050
166
- },
167
- {
168
- "epoch": 1.0377402995636278,
169
- "grad_norm": 0.5323419570922852,
170
- "learning_rate": 1.7498727495914378e-05,
171
- "loss": 0.5975,
172
- "step": 1100
173
- },
174
- {
175
- "epoch": 1.0849156740181625,
176
- "grad_norm": 0.5340594053268433,
177
- "learning_rate": 1.710181966243447e-05,
178
- "loss": 0.5819,
179
- "step": 1150
180
- },
181
- {
182
- "epoch": 1.1320910484726974,
183
- "grad_norm": 0.49585267901420593,
184
- "learning_rate": 1.6680934816059403e-05,
185
- "loss": 0.5819,
186
- "step": 1200
187
- },
188
- {
189
- "epoch": 1.179266422927232,
190
- "grad_norm": 0.500704824924469,
191
- "learning_rate": 1.623749393927938e-05,
192
- "loss": 0.5853,
193
- "step": 1250
194
- },
195
- {
196
- "epoch": 1.2264417973817667,
197
- "grad_norm": 0.49850329756736755,
198
- "learning_rate": 1.5772994167775986e-05,
199
- "loss": 0.5844,
200
- "step": 1300
201
- },
202
- {
203
- "epoch": 1.2736171718363014,
204
- "grad_norm": 0.5016334652900696,
205
- "learning_rate": 1.5289003735824454e-05,
206
- "loss": 0.5866,
207
- "step": 1350
208
- },
209
- {
210
- "epoch": 1.320792546290836,
211
- "grad_norm": 0.5468031764030457,
212
- "learning_rate": 1.4787156681654358e-05,
213
- "loss": 0.5819,
214
- "step": 1400
215
- },
216
- {
217
- "epoch": 1.367967920745371,
218
- "grad_norm": 0.5233626961708069,
219
- "learning_rate": 1.426914733064444e-05,
220
- "loss": 0.5849,
221
- "step": 1450
222
- },
223
- {
224
- "epoch": 1.4151432951999057,
225
- "grad_norm": 0.5027523040771484,
226
- "learning_rate": 1.373672457497717e-05,
227
- "loss": 0.5844,
228
- "step": 1500
229
- },
230
- {
231
- "epoch": 1.4623186696544404,
232
- "grad_norm": 0.5051872730255127,
233
- "learning_rate": 1.3191685969066082e-05,
234
- "loss": 0.5846,
235
- "step": 1550
236
- },
237
- {
238
- "epoch": 1.5094940441089753,
239
- "grad_norm": 0.5215147733688354,
240
- "learning_rate": 1.2635871660690677e-05,
241
- "loss": 0.5829,
242
- "step": 1600
243
- },
244
- {
245
- "epoch": 1.55666941856351,
246
- "grad_norm": 0.5042491555213928,
247
- "learning_rate": 1.2071158178328547e-05,
248
- "loss": 0.5854,
249
- "step": 1650
250
- },
251
- {
252
- "epoch": 1.6038447930180446,
253
- "grad_norm": 0.5223520994186401,
254
- "learning_rate": 1.1499452095659713e-05,
255
- "loss": 0.5833,
256
- "step": 1700
257
- },
258
- {
259
- "epoch": 1.6510201674725793,
260
- "grad_norm": 0.4980650246143341,
261
- "learning_rate": 1.092268359463302e-05,
262
- "loss": 0.5808,
263
- "step": 1750
264
- },
265
- {
266
- "epoch": 1.698195541927114,
267
- "grad_norm": 0.5121437907218933,
268
- "learning_rate": 1.0342799948826788e-05,
269
- "loss": 0.5823,
270
- "step": 1800
271
- },
272
- {
273
- "epoch": 1.7453709163816487,
274
- "grad_norm": 0.4902792274951935,
275
- "learning_rate": 9.761758949105056e-06,
276
- "loss": 0.5824,
277
- "step": 1850
278
- },
279
- {
280
- "epoch": 1.7925462908361836,
281
- "grad_norm": 0.4946081042289734,
282
- "learning_rate": 9.18152229376561e-06,
283
- "loss": 0.5814,
284
- "step": 1900
285
- },
286
- {
287
- "epoch": 1.8397216652907182,
288
- "grad_norm": 0.5029374361038208,
289
- "learning_rate": 8.604048965495786e-06,
290
- "loss": 0.5795,
291
- "step": 1950
292
- },
293
- {
294
- "epoch": 1.8868970397452531,
295
- "grad_norm": 0.47936609387397766,
296
- "learning_rate": 8.031288617496686e-06,
297
- "loss": 0.5796,
298
- "step": 2000
299
- },
300
- {
301
- "epoch": 1.8868970397452531,
302
- "eval_loss": 0.6402788162231445,
303
- "eval_runtime": 113.5077,
304
- "eval_samples_per_second": 66.401,
305
- "eval_steps_per_second": 2.079,
306
- "step": 2000
307
- },
308
- {
309
- "epoch": 1.9340724141997878,
310
- "grad_norm": 0.5165300369262695,
311
- "learning_rate": 7.465174991105405e-06,
312
- "loss": 0.5762,
313
- "step": 2050
314
- },
315
- {
316
- "epoch": 1.9812477886543225,
317
- "grad_norm": 0.49469050765037537,
318
- "learning_rate": 6.90761938713854e-06,
319
- "loss": 0.5747,
320
- "step": 2100
321
- },
322
- {
323
- "epoch": 2.028305224672721,
324
- "grad_norm": 0.5297324061393738,
325
- "learning_rate": 6.360504212998903e-06,
326
- "loss": 0.5171,
327
- "step": 2150
328
- },
329
- {
330
- "epoch": 2.0754805991272556,
331
- "grad_norm": 0.5692960023880005,
332
- "learning_rate": 5.825676627331614e-06,
333
- "loss": 0.471,
334
- "step": 2200
335
- },
336
- {
337
- "epoch": 2.1226559735817903,
338
- "grad_norm": 0.5705838799476624,
339
- "learning_rate": 5.304942303686238e-06,
340
- "loss": 0.4737,
341
- "step": 2250
342
- },
343
- {
344
- "epoch": 2.169831348036325,
345
- "grad_norm": 0.5800350308418274,
346
- "learning_rate": 4.800059334240049e-06,
347
- "loss": 0.4734,
348
- "step": 2300
349
- },
350
- {
351
- "epoch": 2.2170067224908596,
352
- "grad_norm": 0.5653440952301025,
353
- "learning_rate": 4.3127322941645385e-06,
354
- "loss": 0.4713,
355
- "step": 2350
356
- },
357
- {
358
- "epoch": 2.2641820969453947,
359
- "grad_norm": 0.5822903513908386,
360
- "learning_rate": 3.844606486674862e-06,
361
- "loss": 0.4715,
362
- "step": 2400
363
- },
364
- {
365
- "epoch": 2.3113574713999294,
366
- "grad_norm": 0.599399209022522,
367
- "learning_rate": 3.3972623881920296e-06,
368
- "loss": 0.47,
369
- "step": 2450
370
- },
371
- {
372
- "epoch": 2.358532845854464,
373
- "grad_norm": 0.5810734033584595,
374
- "learning_rate": 2.9722103123719324e-06,
375
- "loss": 0.4661,
376
- "step": 2500
377
- },
378
- {
379
- "epoch": 2.405708220308999,
380
- "grad_norm": 0.5994310975074768,
381
- "learning_rate": 2.5708853110164346e-06,
382
- "loss": 0.4686,
383
- "step": 2550
384
- },
385
- {
386
- "epoch": 2.4528835947635335,
387
- "grad_norm": 0.577089786529541,
388
- "learning_rate": 2.194642329081902e-06,
389
- "loss": 0.4673,
390
- "step": 2600
391
- },
392
- {
393
- "epoch": 2.500058969218068,
394
- "grad_norm": 0.578800618648529,
395
- "learning_rate": 1.844751630142797e-06,
396
- "loss": 0.4664,
397
- "step": 2650
398
- },
399
- {
400
- "epoch": 2.547234343672603,
401
- "grad_norm": 0.5981403589248657,
402
- "learning_rate": 1.5223945077547253e-06,
403
- "loss": 0.4686,
404
- "step": 2700
405
- },
406
- {
407
- "epoch": 2.5944097181271375,
408
- "grad_norm": 0.6022927165031433,
409
- "learning_rate": 1.2286592971962152e-06,
410
- "loss": 0.4676,
411
- "step": 2750
412
- },
413
- {
414
- "epoch": 2.641585092581672,
415
- "grad_norm": 0.5744144916534424,
416
- "learning_rate": 9.645377010542212e-07,
417
- "loss": 0.4684,
418
- "step": 2800
419
- },
420
- {
421
- "epoch": 2.6887604670362073,
422
- "grad_norm": 0.5747597813606262,
423
- "learning_rate": 7.309214410588927e-07,
424
- "loss": 0.4632,
425
- "step": 2850
426
- },
427
- {
428
- "epoch": 2.735935841490742,
429
- "grad_norm": 0.5752475261688232,
430
- "learning_rate": 5.285992474715796e-07,
431
- "loss": 0.4643,
432
- "step": 2900
433
- },
434
- {
435
- "epoch": 2.7831112159452767,
436
- "grad_norm": 0.5861772894859314,
437
- "learning_rate": 3.5825419619046176e-07,
438
- "loss": 0.465,
439
- "step": 2950
440
- },
441
- {
442
- "epoch": 2.8302865903998113,
443
- "grad_norm": 0.5940195918083191,
444
- "learning_rate": 2.2046140256418713e-07,
445
- "loss": 0.4646,
446
- "step": 3000
447
- },
448
- {
449
- "epoch": 2.8302865903998113,
450
- "eval_loss": 0.6850141286849976,
451
- "eval_runtime": 113.5772,
452
- "eval_samples_per_second": 66.36,
453
- "eval_steps_per_second": 2.078,
454
- "step": 3000
455
- },
456
- {
457
- "epoch": 2.877461964854346,
458
- "grad_norm": 0.5916756987571716,
459
- "learning_rate": 1.1568607969963175e-07,
460
- "loss": 0.4644,
461
- "step": 3050
462
- },
463
- {
464
- "epoch": 2.9246373393088807,
465
- "grad_norm": 0.5982924103736877,
466
- "learning_rate": 4.42819678192774e-08,
467
- "loss": 0.4691,
468
- "step": 3100
469
- },
470
- {
471
- "epoch": 2.9718127137634154,
472
- "grad_norm": 0.5770459175109863,
473
- "learning_rate": 6.490139970976029e-09,
474
- "loss": 0.4661,
475
- "step": 3150
476
- },
477
- {
478
- "epoch": 3.0,
479
- "step": 3180,
480
- "total_flos": 1.7575386520355865e+19,
481
- "train_loss": 0.5721450322828953,
482
- "train_runtime": 14690.2311,
483
- "train_samples_per_second": 13.852,
484
- "train_steps_per_second": 0.216
485
- }
486
- ],
487
- "logging_steps": 50,
488
- "max_steps": 3180,
489
- "num_input_tokens_seen": 0,
490
- "num_train_epochs": 3,
491
- "save_steps": 1000,
492
- "stateful_callbacks": {
493
- "TrainerControl": {
494
- "args": {
495
- "should_epoch_stop": false,
496
- "should_evaluate": false,
497
- "should_log": false,
498
- "should_save": true,
499
- "should_training_stop": true
500
- },
501
- "attributes": {}
502
- }
503
- },
504
- "total_flos": 1.7575386520355865e+19,
505
- "train_batch_size": 1,
506
- "trial_name": null,
507
- "trial_params": null
508
- }