| { | |
| "best_metric": 18.148300246851182, | |
| "best_model_checkpoint": "./checkpoint-9000", | |
| "epoch": 17.33102253032929, | |
| "global_step": 10000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 2.3000000000000004e-06, | |
| "loss": 2.8829, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.800000000000001e-06, | |
| "loss": 1.5187, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 7.3e-06, | |
| "loss": 0.8352, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 9.800000000000001e-06, | |
| "loss": 0.4087, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 9.953061224489797e-06, | |
| "loss": 0.3044, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 9.902040816326531e-06, | |
| "loss": 0.2602, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 9.851020408163267e-06, | |
| "loss": 0.2313, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 9.800000000000001e-06, | |
| "loss": 0.2162, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 9.748979591836735e-06, | |
| "loss": 0.2019, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 9.697959183673469e-06, | |
| "loss": 0.1869, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 9.646938775510205e-06, | |
| "loss": 0.1813, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 9.595918367346939e-06, | |
| "loss": 0.1619, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 9.544897959183675e-06, | |
| "loss": 0.1441, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 9.493877551020409e-06, | |
| "loss": 0.1389, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 9.442857142857144e-06, | |
| "loss": 0.1367, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 9.391836734693878e-06, | |
| "loss": 0.1346, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 9.340816326530612e-06, | |
| "loss": 0.1296, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 9.289795918367348e-06, | |
| "loss": 0.1258, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 9.238775510204082e-06, | |
| "loss": 0.1234, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 9.187755102040818e-06, | |
| "loss": 0.1189, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "eval_loss": 0.14634737372398376, | |
| "eval_runtime": 1874.6047, | |
| "eval_samples_per_second": 4.924, | |
| "eval_steps_per_second": 0.308, | |
| "eval_wer": 23.895374812153925, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 9.136734693877552e-06, | |
| "loss": 0.1164, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 9.085714285714286e-06, | |
| "loss": 0.1179, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 9.03469387755102e-06, | |
| "loss": 0.1155, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 8.983673469387756e-06, | |
| "loss": 0.0855, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 8.932653061224492e-06, | |
| "loss": 0.083, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 8.881632653061226e-06, | |
| "loss": 0.081, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 8.83061224489796e-06, | |
| "loss": 0.079, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 8.779591836734694e-06, | |
| "loss": 0.0773, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 8.72857142857143e-06, | |
| "loss": 0.0764, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 8.677551020408164e-06, | |
| "loss": 0.0804, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 8.6265306122449e-06, | |
| "loss": 0.0768, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 8.575510204081633e-06, | |
| "loss": 0.0765, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 8.524489795918367e-06, | |
| "loss": 0.081, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 8.473469387755101e-06, | |
| "loss": 0.0768, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 8.422448979591837e-06, | |
| "loss": 0.0676, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 8.371428571428573e-06, | |
| "loss": 0.0472, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 8.320408163265307e-06, | |
| "loss": 0.0503, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 8.269387755102043e-06, | |
| "loss": 0.0499, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 8.218367346938777e-06, | |
| "loss": 0.0492, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 8.16734693877551e-06, | |
| "loss": 0.0509, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "eval_loss": 0.1334686577320099, | |
| "eval_runtime": 1883.4767, | |
| "eval_samples_per_second": 4.901, | |
| "eval_steps_per_second": 0.306, | |
| "eval_wer": 20.229612213492427, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 8.116326530612245e-06, | |
| "loss": 0.0501, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 8.06530612244898e-06, | |
| "loss": 0.0496, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 8.014285714285715e-06, | |
| "loss": 0.0493, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 7.96326530612245e-06, | |
| "loss": 0.0497, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 7.912244897959184e-06, | |
| "loss": 0.0509, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 7.861224489795918e-06, | |
| "loss": 0.049, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 7.810204081632654e-06, | |
| "loss": 0.031, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 7.759183673469388e-06, | |
| "loss": 0.0288, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 7.708163265306124e-06, | |
| "loss": 0.0293, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 7.657142857142858e-06, | |
| "loss": 0.0279, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 7.606122448979593e-06, | |
| "loss": 0.0304, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 7.555102040816327e-06, | |
| "loss": 0.0303, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 7.504081632653062e-06, | |
| "loss": 0.0317, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "learning_rate": 7.4530612244897974e-06, | |
| "loss": 0.0324, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 7.4020408163265315e-06, | |
| "loss": 0.0294, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 7.351020408163266e-06, | |
| "loss": 0.0313, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 7.3e-06, | |
| "loss": 0.0313, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "learning_rate": 7.248979591836735e-06, | |
| "loss": 0.0265, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 7.197959183673469e-06, | |
| "loss": 0.0185, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "learning_rate": 7.146938775510205e-06, | |
| "loss": 0.0176, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "eval_loss": 0.15767353773117065, | |
| "eval_runtime": 1882.3239, | |
| "eval_samples_per_second": 4.904, | |
| "eval_steps_per_second": 0.307, | |
| "eval_wer": 20.16237121543745, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "learning_rate": 7.095918367346939e-06, | |
| "loss": 0.0168, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 5.37, | |
| "learning_rate": 7.044897959183674e-06, | |
| "loss": 0.017, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 5.46, | |
| "learning_rate": 6.993877551020408e-06, | |
| "loss": 0.0174, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "learning_rate": 6.942857142857144e-06, | |
| "loss": 0.0181, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "learning_rate": 6.891836734693879e-06, | |
| "loss": 0.0185, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "learning_rate": 6.840816326530613e-06, | |
| "loss": 0.0159, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 6.789795918367348e-06, | |
| "loss": 0.0185, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 6.738775510204082e-06, | |
| "loss": 0.0187, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 6.687755102040817e-06, | |
| "loss": 0.0179, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 6.636734693877551e-06, | |
| "loss": 0.0119, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "learning_rate": 6.585714285714286e-06, | |
| "loss": 0.0094, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "learning_rate": 6.53469387755102e-06, | |
| "loss": 0.0101, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 6.483673469387755e-06, | |
| "loss": 0.01, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "learning_rate": 6.432653061224491e-06, | |
| "loss": 0.0105, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "learning_rate": 6.381632653061225e-06, | |
| "loss": 0.0111, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "learning_rate": 6.33061224489796e-06, | |
| "loss": 0.0101, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 6.279591836734694e-06, | |
| "loss": 0.0101, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 6.22857142857143e-06, | |
| "loss": 0.0103, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "learning_rate": 6.177551020408164e-06, | |
| "loss": 0.0102, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "learning_rate": 6.126530612244899e-06, | |
| "loss": 0.0108, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "eval_loss": 0.1795009821653366, | |
| "eval_runtime": 1880.0918, | |
| "eval_samples_per_second": 4.909, | |
| "eval_steps_per_second": 0.307, | |
| "eval_wer": 19.51523301939158, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "learning_rate": 6.075510204081633e-06, | |
| "loss": 0.0093, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 7.11, | |
| "learning_rate": 6.0244897959183675e-06, | |
| "loss": 0.0054, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 7.19, | |
| "learning_rate": 5.973469387755102e-06, | |
| "loss": 0.0056, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "learning_rate": 5.922448979591837e-06, | |
| "loss": 0.006, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "learning_rate": 5.871428571428572e-06, | |
| "loss": 0.0062, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 5.820408163265306e-06, | |
| "loss": 0.0058, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 7.54, | |
| "learning_rate": 5.769387755102042e-06, | |
| "loss": 0.0064, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 7.63, | |
| "learning_rate": 5.718367346938776e-06, | |
| "loss": 0.0059, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "learning_rate": 5.667346938775511e-06, | |
| "loss": 0.0064, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 7.8, | |
| "learning_rate": 5.616326530612245e-06, | |
| "loss": 0.0066, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 7.89, | |
| "learning_rate": 5.56530612244898e-06, | |
| "loss": 0.0061, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 7.97, | |
| "learning_rate": 5.514285714285714e-06, | |
| "loss": 0.0064, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "learning_rate": 5.46326530612245e-06, | |
| "loss": 0.0049, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 8.15, | |
| "learning_rate": 5.4122448979591845e-06, | |
| "loss": 0.004, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 8.23, | |
| "learning_rate": 5.3612244897959186e-06, | |
| "loss": 0.0034, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "learning_rate": 5.310204081632654e-06, | |
| "loss": 0.0043, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 8.41, | |
| "learning_rate": 5.259183673469388e-06, | |
| "loss": 0.0038, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 8.49, | |
| "learning_rate": 5.208163265306123e-06, | |
| "loss": 0.0039, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 8.58, | |
| "learning_rate": 5.157142857142857e-06, | |
| "loss": 0.0048, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 8.67, | |
| "learning_rate": 5.106122448979592e-06, | |
| "loss": 0.0042, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 8.67, | |
| "eval_loss": 0.21113774180412292, | |
| "eval_runtime": 1878.1595, | |
| "eval_samples_per_second": 4.914, | |
| "eval_steps_per_second": 0.307, | |
| "eval_wer": 18.799951261558455, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "learning_rate": 5.055102040816326e-06, | |
| "loss": 0.0042, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 8.84, | |
| "learning_rate": 5.004081632653062e-06, | |
| "loss": 0.0043, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 8.93, | |
| "learning_rate": 4.953061224489796e-06, | |
| "loss": 0.004, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 9.01, | |
| "learning_rate": 4.902040816326531e-06, | |
| "loss": 0.004, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 9.1, | |
| "learning_rate": 4.851020408163266e-06, | |
| "loss": 0.0021, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 9.19, | |
| "learning_rate": 4.800000000000001e-06, | |
| "loss": 0.002, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 9.27, | |
| "learning_rate": 4.748979591836735e-06, | |
| "loss": 0.0022, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 9.36, | |
| "learning_rate": 4.69795918367347e-06, | |
| "loss": 0.0035, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 9.45, | |
| "learning_rate": 4.6469387755102044e-06, | |
| "loss": 0.0027, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 9.53, | |
| "learning_rate": 4.595918367346939e-06, | |
| "loss": 0.0032, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 9.62, | |
| "learning_rate": 4.544897959183674e-06, | |
| "loss": 0.0027, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 9.71, | |
| "learning_rate": 4.493877551020408e-06, | |
| "loss": 0.0029, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 9.79, | |
| "learning_rate": 4.442857142857143e-06, | |
| "loss": 0.0024, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 9.88, | |
| "learning_rate": 4.391836734693878e-06, | |
| "loss": 0.0026, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 9.97, | |
| "learning_rate": 4.340816326530612e-06, | |
| "loss": 0.0033, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 10.05, | |
| "learning_rate": 4.289795918367347e-06, | |
| "loss": 0.0021, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 10.14, | |
| "learning_rate": 4.238775510204082e-06, | |
| "loss": 0.0017, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 10.23, | |
| "learning_rate": 4.187755102040817e-06, | |
| "loss": 0.002, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 10.31, | |
| "learning_rate": 4.136734693877552e-06, | |
| "loss": 0.0019, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 10.4, | |
| "learning_rate": 4.0857142857142865e-06, | |
| "loss": 0.0023, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 10.4, | |
| "eval_loss": 0.23070654273033142, | |
| "eval_runtime": 1877.4278, | |
| "eval_samples_per_second": 4.916, | |
| "eval_steps_per_second": 0.307, | |
| "eval_wer": 18.98948964533758, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 10.49, | |
| "learning_rate": 4.0346938775510206e-06, | |
| "loss": 0.0016, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 10.57, | |
| "learning_rate": 3.9836734693877555e-06, | |
| "loss": 0.0017, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 10.66, | |
| "learning_rate": 3.93265306122449e-06, | |
| "loss": 0.0018, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 10.75, | |
| "learning_rate": 3.881632653061224e-06, | |
| "loss": 0.002, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 10.83, | |
| "learning_rate": 3.830612244897959e-06, | |
| "loss": 0.0017, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 10.92, | |
| "learning_rate": 3.779591836734694e-06, | |
| "loss": 0.0019, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 11.01, | |
| "learning_rate": 3.7285714285714286e-06, | |
| "loss": 0.002, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 11.09, | |
| "learning_rate": 3.677551020408164e-06, | |
| "loss": 0.0013, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 11.18, | |
| "learning_rate": 3.6265306122448984e-06, | |
| "loss": 0.0014, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 11.27, | |
| "learning_rate": 3.575510204081633e-06, | |
| "loss": 0.001, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 11.35, | |
| "learning_rate": 3.5244897959183678e-06, | |
| "loss": 0.0011, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 11.44, | |
| "learning_rate": 3.4734693877551022e-06, | |
| "loss": 0.0012, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 11.53, | |
| "learning_rate": 3.422448979591837e-06, | |
| "loss": 0.0015, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 11.61, | |
| "learning_rate": 3.3714285714285716e-06, | |
| "loss": 0.0013, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 11.7, | |
| "learning_rate": 3.320408163265306e-06, | |
| "loss": 0.0015, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 11.79, | |
| "learning_rate": 3.269387755102041e-06, | |
| "loss": 0.0013, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 11.87, | |
| "learning_rate": 3.2183673469387754e-06, | |
| "loss": 0.0015, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 11.96, | |
| "learning_rate": 3.1673469387755107e-06, | |
| "loss": 0.001, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 12.05, | |
| "learning_rate": 3.116326530612245e-06, | |
| "loss": 0.0009, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 12.13, | |
| "learning_rate": 3.06530612244898e-06, | |
| "loss": 0.0007, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 12.13, | |
| "eval_loss": 0.24875134229660034, | |
| "eval_runtime": 1886.6078, | |
| "eval_samples_per_second": 4.892, | |
| "eval_steps_per_second": 0.306, | |
| "eval_wer": 18.44569499663795, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 12.22, | |
| "learning_rate": 3.0142857142857145e-06, | |
| "loss": 0.0007, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 12.31, | |
| "learning_rate": 2.9632653061224494e-06, | |
| "loss": 0.0007, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 12.39, | |
| "learning_rate": 2.912244897959184e-06, | |
| "loss": 0.0009, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 12.48, | |
| "learning_rate": 2.8612244897959183e-06, | |
| "loss": 0.001, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 12.56, | |
| "learning_rate": 2.8102040816326532e-06, | |
| "loss": 0.0009, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 12.65, | |
| "learning_rate": 2.7591836734693877e-06, | |
| "loss": 0.0007, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 12.74, | |
| "learning_rate": 2.708163265306123e-06, | |
| "loss": 0.0006, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 12.82, | |
| "learning_rate": 2.6571428571428575e-06, | |
| "loss": 0.0006, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 12.91, | |
| "learning_rate": 2.6061224489795924e-06, | |
| "loss": 0.0006, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "learning_rate": 2.555102040816327e-06, | |
| "loss": 0.0006, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 13.08, | |
| "learning_rate": 2.5040816326530613e-06, | |
| "loss": 0.0005, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 13.17, | |
| "learning_rate": 2.453061224489796e-06, | |
| "loss": 0.0006, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 13.26, | |
| "learning_rate": 2.4020408163265306e-06, | |
| "loss": 0.0006, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 13.34, | |
| "learning_rate": 2.3510204081632655e-06, | |
| "loss": 0.0005, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 13.43, | |
| "learning_rate": 2.3000000000000004e-06, | |
| "loss": 0.0005, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 13.52, | |
| "learning_rate": 2.248979591836735e-06, | |
| "loss": 0.0004, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 13.6, | |
| "learning_rate": 2.1979591836734694e-06, | |
| "loss": 0.0006, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 13.69, | |
| "learning_rate": 2.1469387755102042e-06, | |
| "loss": 0.0005, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 13.78, | |
| "learning_rate": 2.0959183673469387e-06, | |
| "loss": 0.0005, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 13.86, | |
| "learning_rate": 2.0448979591836736e-06, | |
| "loss": 0.0005, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 13.86, | |
| "eval_loss": 0.260904461145401, | |
| "eval_runtime": 1875.5399, | |
| "eval_samples_per_second": 4.921, | |
| "eval_steps_per_second": 0.308, | |
| "eval_wer": 18.28864890722096, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 13.95, | |
| "learning_rate": 1.9938775510204085e-06, | |
| "loss": 0.0005, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 14.04, | |
| "learning_rate": 1.942857142857143e-06, | |
| "loss": 0.0005, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 14.12, | |
| "learning_rate": 1.8918367346938776e-06, | |
| "loss": 0.0003, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 14.21, | |
| "learning_rate": 1.8408163265306123e-06, | |
| "loss": 0.0004, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 14.3, | |
| "learning_rate": 1.7897959183673472e-06, | |
| "loss": 0.0004, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 14.38, | |
| "learning_rate": 1.7387755102040819e-06, | |
| "loss": 0.0003, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 14.47, | |
| "learning_rate": 1.6877551020408163e-06, | |
| "loss": 0.0004, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 14.56, | |
| "learning_rate": 1.636734693877551e-06, | |
| "loss": 0.0003, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 14.64, | |
| "learning_rate": 1.5857142857142857e-06, | |
| "loss": 0.0002, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 14.73, | |
| "learning_rate": 1.5346938775510206e-06, | |
| "loss": 0.0004, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 14.82, | |
| "learning_rate": 1.4836734693877553e-06, | |
| "loss": 0.0003, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 14.9, | |
| "learning_rate": 1.43265306122449e-06, | |
| "loss": 0.0003, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 14.99, | |
| "learning_rate": 1.3816326530612246e-06, | |
| "loss": 0.0004, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 15.08, | |
| "learning_rate": 1.3306122448979595e-06, | |
| "loss": 0.0002, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 15.16, | |
| "learning_rate": 1.279591836734694e-06, | |
| "loss": 0.0001, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 15.25, | |
| "learning_rate": 1.2285714285714286e-06, | |
| "loss": 0.0002, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 15.34, | |
| "learning_rate": 1.1775510204081633e-06, | |
| "loss": 0.0001, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 15.42, | |
| "learning_rate": 1.1265306122448982e-06, | |
| "loss": 0.0001, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 15.51, | |
| "learning_rate": 1.0755102040816327e-06, | |
| "loss": 0.0001, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 15.6, | |
| "learning_rate": 1.0244897959183673e-06, | |
| "loss": 0.0001, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 15.6, | |
| "eval_loss": 0.27568453550338745, | |
| "eval_runtime": 1880.4752, | |
| "eval_samples_per_second": 4.908, | |
| "eval_steps_per_second": 0.307, | |
| "eval_wer": 18.148300246851182, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 15.68, | |
| "learning_rate": 9.734693877551022e-07, | |
| "loss": 0.0001, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 15.77, | |
| "learning_rate": 9.224489795918368e-07, | |
| "loss": 0.0002, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 15.86, | |
| "learning_rate": 8.714285714285716e-07, | |
| "loss": 0.0001, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 15.94, | |
| "learning_rate": 8.204081632653062e-07, | |
| "loss": 0.0001, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 16.03, | |
| "learning_rate": 7.693877551020409e-07, | |
| "loss": 0.0001, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 16.12, | |
| "learning_rate": 7.183673469387756e-07, | |
| "loss": 0.0001, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 16.2, | |
| "learning_rate": 6.673469387755102e-07, | |
| "loss": 0.0001, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 16.29, | |
| "learning_rate": 6.163265306122449e-07, | |
| "loss": 0.0001, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 16.38, | |
| "learning_rate": 5.653061224489796e-07, | |
| "loss": 0.0001, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 16.46, | |
| "learning_rate": 5.142857142857143e-07, | |
| "loss": 0.0001, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 16.55, | |
| "learning_rate": 4.63265306122449e-07, | |
| "loss": 0.0001, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 16.64, | |
| "learning_rate": 4.1224489795918373e-07, | |
| "loss": 0.0001, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 16.72, | |
| "learning_rate": 3.612244897959184e-07, | |
| "loss": 0.0001, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 16.81, | |
| "learning_rate": 3.102040816326531e-07, | |
| "loss": 0.0001, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 16.9, | |
| "learning_rate": 2.5918367346938776e-07, | |
| "loss": 0.0001, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 16.98, | |
| "learning_rate": 2.0816326530612246e-07, | |
| "loss": 0.0001, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 17.07, | |
| "learning_rate": 1.5714285714285717e-07, | |
| "loss": 0.0, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 17.16, | |
| "learning_rate": 1.0612244897959186e-07, | |
| "loss": 0.0, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 17.24, | |
| "learning_rate": 5.510204081632654e-08, | |
| "loss": 0.0, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 17.33, | |
| "learning_rate": 4.081632653061225e-09, | |
| "loss": 0.0, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 17.33, | |
| "eval_loss": 0.2883334159851074, | |
| "eval_runtime": 1887.5729, | |
| "eval_samples_per_second": 4.89, | |
| "eval_steps_per_second": 0.306, | |
| "eval_wer": 18.192525869732975, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 17.33, | |
| "step": 10000, | |
| "total_flos": 9.232770429517824e+19, | |
| "train_loss": 0.05581043657779228, | |
| "train_runtime": 40648.5911, | |
| "train_samples_per_second": 7.872, | |
| "train_steps_per_second": 0.246 | |
| } | |
| ], | |
| "max_steps": 10000, | |
| "num_train_epochs": 18, | |
| "total_flos": 9.232770429517824e+19, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |