Upload trainer_state.json with huggingface_hub
Browse files- trainer_state.json +38 -3
trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -358,6 +358,41 @@
|
|
| 358 |
"learning_rate": 1.795226635093226e-05,
|
| 359 |
"loss": 0.6666,
|
| 360 |
"step": 500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 361 |
}
|
| 362 |
],
|
| 363 |
"logging_steps": 10,
|
|
@@ -377,7 +412,7 @@
|
|
| 377 |
"attributes": {}
|
| 378 |
}
|
| 379 |
},
|
| 380 |
-
"total_flos": 1.
|
| 381 |
"train_batch_size": 1,
|
| 382 |
"trial_name": null,
|
| 383 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 2.014702504020216,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 550,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 358 |
"learning_rate": 1.795226635093226e-05,
|
| 359 |
"loss": 0.6666,
|
| 360 |
"step": 500
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"epoch": 1.8711233631977946,
|
| 364 |
+
"grad_norm": 0.3329021632671356,
|
| 365 |
+
"learning_rate": 1.6986008468871783e-05,
|
| 366 |
+
"loss": 0.6766,
|
| 367 |
+
"step": 510
|
| 368 |
+
},
|
| 369 |
+
{
|
| 370 |
+
"epoch": 1.9078796232483346,
|
| 371 |
+
"grad_norm": 0.3460406959056854,
|
| 372 |
+
"learning_rate": 1.6032917553853936e-05,
|
| 373 |
+
"loss": 0.6735,
|
| 374 |
+
"step": 520
|
| 375 |
+
},
|
| 376 |
+
{
|
| 377 |
+
"epoch": 1.9446358832988744,
|
| 378 |
+
"grad_norm": 0.3377283215522766,
|
| 379 |
+
"learning_rate": 1.509455953174948e-05,
|
| 380 |
+
"loss": 0.669,
|
| 381 |
+
"step": 530
|
| 382 |
+
},
|
| 383 |
+
{
|
| 384 |
+
"epoch": 1.9813921433494142,
|
| 385 |
+
"grad_norm": 0.35212215781211853,
|
| 386 |
+
"learning_rate": 1.4172476122324806e-05,
|
| 387 |
+
"loss": 0.6608,
|
| 388 |
+
"step": 540
|
| 389 |
+
},
|
| 390 |
+
{
|
| 391 |
+
"epoch": 2.014702504020216,
|
| 392 |
+
"grad_norm": 0.4792023003101349,
|
| 393 |
+
"learning_rate": 1.3268182306200405e-05,
|
| 394 |
+
"loss": 0.5849,
|
| 395 |
+
"step": 550
|
| 396 |
}
|
| 397 |
],
|
| 398 |
"logging_steps": 10,
|
|
|
|
| 412 |
"attributes": {}
|
| 413 |
}
|
| 414 |
},
|
| 415 |
+
"total_flos": 1.8293318215532544e+16,
|
| 416 |
"train_batch_size": 1,
|
| 417 |
"trial_name": null,
|
| 418 |
"trial_params": null
|