Text Classification
Transformers
TensorBoard
Safetensors
modernbert
sentiment
multilingual
sentiment-analysis
product-reviews
place-reviews
text-embeddings-inference
Instructions to use clapAI/modernBERT-base-multilingual-sentiment with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use clapAI/modernBERT-base-multilingual-sentiment with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-classification", model="clapAI/modernBERT-base-multilingual-sentiment")# Load model directly from transformers import AutoTokenizer, AutoModelForSequenceClassification tokenizer = AutoTokenizer.from_pretrained("clapAI/modernBERT-base-multilingual-sentiment") model = AutoModelForSequenceClassification.from_pretrained("clapAI/modernBERT-base-multilingual-sentiment") - Inference
- Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": 0.8012369099843738, | |
| "best_model_checkpoint": "/data/hungnm/unisentiment/modernBERT-base-sentiment/checkpoint-4611", | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 7685, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0032530904359141183, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 2.4862, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.006506180871828237, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 2.4619, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.009759271307742356, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 2.474, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.013012361743656473, | |
| "grad_norm": 5.975010395050049, | |
| "learning_rate": 2.5974025974025976e-06, | |
| "loss": 2.4748, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01626545217957059, | |
| "grad_norm": 4.729438781738281, | |
| "learning_rate": 5.194805194805195e-06, | |
| "loss": 2.4383, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.01951854261548471, | |
| "grad_norm": 4.140679359436035, | |
| "learning_rate": 8.441558441558442e-06, | |
| "loss": 2.2384, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.02277163305139883, | |
| "grad_norm": 2.7495357990264893, | |
| "learning_rate": 1.1688311688311688e-05, | |
| "loss": 2.16, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.026024723487312947, | |
| "grad_norm": 1.4239497184753418, | |
| "learning_rate": 1.4935064935064936e-05, | |
| "loss": 2.0898, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.029277813923227064, | |
| "grad_norm": 1.3778964281082153, | |
| "learning_rate": 1.8181818181818182e-05, | |
| "loss": 2.037, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.03253090435914118, | |
| "grad_norm": 1.6160250902175903, | |
| "learning_rate": 2.1428571428571428e-05, | |
| "loss": 2.0056, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.035783994795055306, | |
| "grad_norm": 1.090104579925537, | |
| "learning_rate": 2.4675324675324678e-05, | |
| "loss": 1.9513, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.03903708523096942, | |
| "grad_norm": 2.1062819957733154, | |
| "learning_rate": 2.792207792207792e-05, | |
| "loss": 1.9023, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.04229017566688354, | |
| "grad_norm": 3.310304880142212, | |
| "learning_rate": 3.1168831168831166e-05, | |
| "loss": 1.877, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.04554326610279766, | |
| "grad_norm": 5.446138858795166, | |
| "learning_rate": 3.4415584415584416e-05, | |
| "loss": 1.822, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.048796356538711776, | |
| "grad_norm": 1.910844087600708, | |
| "learning_rate": 3.7662337662337665e-05, | |
| "loss": 1.7707, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.05204944697462589, | |
| "grad_norm": 5.207052707672119, | |
| "learning_rate": 4.0909090909090915e-05, | |
| "loss": 1.7986, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.05530253741054001, | |
| "grad_norm": 4.687050819396973, | |
| "learning_rate": 4.415584415584416e-05, | |
| "loss": 1.7189, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.05855562784645413, | |
| "grad_norm": 4.655097961425781, | |
| "learning_rate": 4.740259740259741e-05, | |
| "loss": 1.7185, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.06180871828236825, | |
| "grad_norm": 5.834875106811523, | |
| "learning_rate": 4.999999786858144e-05, | |
| "loss": 1.6804, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.06506180871828236, | |
| "grad_norm": 2.986246109008789, | |
| "learning_rate": 4.99999232689698e-05, | |
| "loss": 1.6772, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06831489915419649, | |
| "grad_norm": 1.4194883108139038, | |
| "learning_rate": 4.999974209879331e-05, | |
| "loss": 1.602, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.07156798959011061, | |
| "grad_norm": 3.983574628829956, | |
| "learning_rate": 4.999945435882428e-05, | |
| "loss": 1.5656, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.07482108002602472, | |
| "grad_norm": 1.342112421989441, | |
| "learning_rate": 4.9999060050289286e-05, | |
| "loss": 1.511, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.07807417046193885, | |
| "grad_norm": 2.197117805480957, | |
| "learning_rate": 4.999855917486921e-05, | |
| "loss": 1.4768, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.08132726089785296, | |
| "grad_norm": 1.8786858320236206, | |
| "learning_rate": 4.999795173469919e-05, | |
| "loss": 1.473, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.08458035133376708, | |
| "grad_norm": 2.5618531703948975, | |
| "learning_rate": 4.9997237732368645e-05, | |
| "loss": 1.4527, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.08783344176968119, | |
| "grad_norm": 1.8612209558486938, | |
| "learning_rate": 4.999641717092126e-05, | |
| "loss": 1.4092, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.09108653220559532, | |
| "grad_norm": 1.912489891052246, | |
| "learning_rate": 4.999549005385494e-05, | |
| "loss": 1.3939, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.09433962264150944, | |
| "grad_norm": 2.8550467491149902, | |
| "learning_rate": 4.999445638512185e-05, | |
| "loss": 1.3562, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.09759271307742355, | |
| "grad_norm": 1.902714729309082, | |
| "learning_rate": 4.9993316169128334e-05, | |
| "loss": 1.3427, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.10084580351333768, | |
| "grad_norm": 3.12044620513916, | |
| "learning_rate": 4.999206941073496e-05, | |
| "loss": 1.3634, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.10409889394925179, | |
| "grad_norm": 2.6095197200775146, | |
| "learning_rate": 4.999071611525643e-05, | |
| "loss": 1.3605, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.10735198438516591, | |
| "grad_norm": 2.5530121326446533, | |
| "learning_rate": 4.998925628846164e-05, | |
| "loss": 1.3444, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.11060507482108002, | |
| "grad_norm": 1.9909695386886597, | |
| "learning_rate": 4.99876899365736e-05, | |
| "loss": 1.3192, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.11385816525699415, | |
| "grad_norm": 1.21974515914917, | |
| "learning_rate": 4.998601706626938e-05, | |
| "loss": 1.3085, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.11711125569290826, | |
| "grad_norm": 1.2985081672668457, | |
| "learning_rate": 4.9984237684680194e-05, | |
| "loss": 1.2848, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.12036434612882238, | |
| "grad_norm": 2.141941785812378, | |
| "learning_rate": 4.998235179939122e-05, | |
| "loss": 1.2729, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.1236174365647365, | |
| "grad_norm": 1.9323813915252686, | |
| "learning_rate": 4.998035941844167e-05, | |
| "loss": 1.275, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.12687052700065063, | |
| "grad_norm": 2.6978371143341064, | |
| "learning_rate": 4.997826055032476e-05, | |
| "loss": 1.2825, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.13012361743656473, | |
| "grad_norm": 2.018090009689331, | |
| "learning_rate": 4.997605520398762e-05, | |
| "loss": 1.2656, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.13337670787247885, | |
| "grad_norm": 1.0469837188720703, | |
| "learning_rate": 4.997374338883127e-05, | |
| "loss": 1.2584, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.13662979830839297, | |
| "grad_norm": 1.2959955930709839, | |
| "learning_rate": 4.99713251147106e-05, | |
| "loss": 1.2494, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.1398828887443071, | |
| "grad_norm": 2.215878486633301, | |
| "learning_rate": 4.996880039193431e-05, | |
| "loss": 1.2482, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.14313597918022122, | |
| "grad_norm": 1.711484432220459, | |
| "learning_rate": 4.996616923126488e-05, | |
| "loss": 1.2258, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.14638906961613532, | |
| "grad_norm": 1.5809857845306396, | |
| "learning_rate": 4.996343164391853e-05, | |
| "loss": 1.223, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.14964216005204944, | |
| "grad_norm": 1.6745812892913818, | |
| "learning_rate": 4.9960587641565125e-05, | |
| "loss": 1.2151, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.15289525048796357, | |
| "grad_norm": 1.5372675657272339, | |
| "learning_rate": 4.9957637236328195e-05, | |
| "loss": 1.1983, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.1561483409238777, | |
| "grad_norm": 1.5290815830230713, | |
| "learning_rate": 4.995458044078482e-05, | |
| "loss": 1.24, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.1594014313597918, | |
| "grad_norm": 1.4023972749710083, | |
| "learning_rate": 4.9951417267965626e-05, | |
| "loss": 1.1897, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.16265452179570591, | |
| "grad_norm": 1.8283660411834717, | |
| "learning_rate": 4.99481477313547e-05, | |
| "loss": 1.2029, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.16590761223162004, | |
| "grad_norm": 1.8741523027420044, | |
| "learning_rate": 4.9944771844889524e-05, | |
| "loss": 1.19, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.16916070266753416, | |
| "grad_norm": 1.552556037902832, | |
| "learning_rate": 4.994128962296097e-05, | |
| "loss": 1.1946, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.1724137931034483, | |
| "grad_norm": 2.1094107627868652, | |
| "learning_rate": 4.9937701080413165e-05, | |
| "loss": 1.1756, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.17566688353936238, | |
| "grad_norm": 1.7123149633407593, | |
| "learning_rate": 4.993400623254347e-05, | |
| "loss": 1.1789, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.1789199739752765, | |
| "grad_norm": 1.2891788482666016, | |
| "learning_rate": 4.993020509510243e-05, | |
| "loss": 1.1833, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.18217306441119063, | |
| "grad_norm": 1.2659103870391846, | |
| "learning_rate": 4.992629768429367e-05, | |
| "loss": 1.1697, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.18542615484710476, | |
| "grad_norm": 1.602931022644043, | |
| "learning_rate": 4.992228401677382e-05, | |
| "loss": 1.16, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.18867924528301888, | |
| "grad_norm": 1.1984357833862305, | |
| "learning_rate": 4.99181641096525e-05, | |
| "loss": 1.1415, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.19193233571893298, | |
| "grad_norm": 2.036529302597046, | |
| "learning_rate": 4.991393798049219e-05, | |
| "loss": 1.168, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.1951854261548471, | |
| "grad_norm": 1.9513144493103027, | |
| "learning_rate": 4.990960564730819e-05, | |
| "loss": 1.1623, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.19843851659076123, | |
| "grad_norm": 1.2966268062591553, | |
| "learning_rate": 4.9905167128568516e-05, | |
| "loss": 1.143, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.20169160702667535, | |
| "grad_norm": 1.3897426128387451, | |
| "learning_rate": 4.990062244319387e-05, | |
| "loss": 1.1431, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.20494469746258945, | |
| "grad_norm": 1.7485623359680176, | |
| "learning_rate": 4.989597161055746e-05, | |
| "loss": 1.1507, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.20819778789850357, | |
| "grad_norm": 1.1369644403457642, | |
| "learning_rate": 4.989121465048505e-05, | |
| "loss": 1.1447, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.2114508783344177, | |
| "grad_norm": 1.292037844657898, | |
| "learning_rate": 4.988635158325476e-05, | |
| "loss": 1.1289, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.21470396877033182, | |
| "grad_norm": 1.1460140943527222, | |
| "learning_rate": 4.988138242959707e-05, | |
| "loss": 1.1314, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.21795705920624595, | |
| "grad_norm": 1.9661816358566284, | |
| "learning_rate": 4.987630721069465e-05, | |
| "loss": 1.147, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.22121014964216004, | |
| "grad_norm": 1.3988662958145142, | |
| "learning_rate": 4.987112594818232e-05, | |
| "loss": 1.1443, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.22446324007807417, | |
| "grad_norm": 1.6520105600357056, | |
| "learning_rate": 4.986583866414696e-05, | |
| "loss": 1.1089, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.2277163305139883, | |
| "grad_norm": 1.6153268814086914, | |
| "learning_rate": 4.9860445381127385e-05, | |
| "loss": 1.1279, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.23096942094990242, | |
| "grad_norm": 1.0572576522827148, | |
| "learning_rate": 4.985494612211429e-05, | |
| "loss": 1.1073, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.2342225113858165, | |
| "grad_norm": 1.1980561017990112, | |
| "learning_rate": 4.984934091055009e-05, | |
| "loss": 1.1161, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.23747560182173064, | |
| "grad_norm": 3.1612489223480225, | |
| "learning_rate": 4.98436297703289e-05, | |
| "loss": 1.1473, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.24072869225764476, | |
| "grad_norm": 1.7351305484771729, | |
| "learning_rate": 4.983781272579636e-05, | |
| "loss": 1.1282, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.24398178269355889, | |
| "grad_norm": 1.4272353649139404, | |
| "learning_rate": 4.983188980174958e-05, | |
| "loss": 1.1486, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.247234873129473, | |
| "grad_norm": 1.6868839263916016, | |
| "learning_rate": 4.9825861023437016e-05, | |
| "loss": 1.1224, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.2504879635653871, | |
| "grad_norm": 1.1032485961914062, | |
| "learning_rate": 4.981972641655835e-05, | |
| "loss": 1.1186, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.25374105400130126, | |
| "grad_norm": 1.0825129747390747, | |
| "learning_rate": 4.981348600726441e-05, | |
| "loss": 1.093, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.25699414443721535, | |
| "grad_norm": 1.0156402587890625, | |
| "learning_rate": 4.980713982215703e-05, | |
| "loss": 1.0873, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.26024723487312945, | |
| "grad_norm": 2.106105089187622, | |
| "learning_rate": 4.9800687888288964e-05, | |
| "loss": 1.0924, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2635003253090436, | |
| "grad_norm": 1.6301723718643188, | |
| "learning_rate": 4.9794130233163735e-05, | |
| "loss": 1.1063, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.2667534157449577, | |
| "grad_norm": 1.30489981174469, | |
| "learning_rate": 4.978746688473556e-05, | |
| "loss": 1.0993, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.27000650618087185, | |
| "grad_norm": 1.1064469814300537, | |
| "learning_rate": 4.978069787140919e-05, | |
| "loss": 1.093, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.27325959661678595, | |
| "grad_norm": 1.1742445230484009, | |
| "learning_rate": 4.977382322203982e-05, | |
| "loss": 1.0848, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.27651268705270005, | |
| "grad_norm": 1.0716508626937866, | |
| "learning_rate": 4.976684296593295e-05, | |
| "loss": 1.1157, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.2797657774886142, | |
| "grad_norm": 1.4256720542907715, | |
| "learning_rate": 4.9759757132844256e-05, | |
| "loss": 1.0835, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.2830188679245283, | |
| "grad_norm": 1.2922230958938599, | |
| "learning_rate": 4.975256575297949e-05, | |
| "loss": 1.0804, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.28627195836044245, | |
| "grad_norm": 1.5222572088241577, | |
| "learning_rate": 4.974526885699432e-05, | |
| "loss": 1.077, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.28952504879635654, | |
| "grad_norm": 1.023868441581726, | |
| "learning_rate": 4.973786647599422e-05, | |
| "loss": 1.0782, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.29277813923227064, | |
| "grad_norm": 1.7092077732086182, | |
| "learning_rate": 4.9730358641534324e-05, | |
| "loss": 1.1011, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2960312296681848, | |
| "grad_norm": 1.0816203355789185, | |
| "learning_rate": 4.9722745385619285e-05, | |
| "loss": 1.0857, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.2992843201040989, | |
| "grad_norm": 0.9598567485809326, | |
| "learning_rate": 4.971502674070317e-05, | |
| "loss": 1.0874, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.302537410540013, | |
| "grad_norm": 1.1397418975830078, | |
| "learning_rate": 4.970720273968929e-05, | |
| "loss": 1.0743, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.30579050097592714, | |
| "grad_norm": 1.6813876628875732, | |
| "learning_rate": 4.969927341593008e-05, | |
| "loss": 1.0587, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.30904359141184123, | |
| "grad_norm": 1.4590063095092773, | |
| "learning_rate": 4.9691238803226944e-05, | |
| "loss": 1.0706, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.3122966818477554, | |
| "grad_norm": 0.988750696182251, | |
| "learning_rate": 4.9683098935830115e-05, | |
| "loss": 1.0569, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.3155497722836695, | |
| "grad_norm": 1.0971347093582153, | |
| "learning_rate": 4.9674853848438506e-05, | |
| "loss": 1.0441, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.3188028627195836, | |
| "grad_norm": 1.0693708658218384, | |
| "learning_rate": 4.9666503576199574e-05, | |
| "loss": 1.0644, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.32205595315549773, | |
| "grad_norm": 1.2514370679855347, | |
| "learning_rate": 4.965804815470916e-05, | |
| "loss": 1.0609, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.32530904359141183, | |
| "grad_norm": 1.5080784559249878, | |
| "learning_rate": 4.964948762001133e-05, | |
| "loss": 1.0682, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.328562134027326, | |
| "grad_norm": 1.1908406019210815, | |
| "learning_rate": 4.964082200859824e-05, | |
| "loss": 1.0418, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.3318152244632401, | |
| "grad_norm": 1.6586133241653442, | |
| "learning_rate": 4.963205135740997e-05, | |
| "loss": 1.0668, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.3350683148991542, | |
| "grad_norm": 0.7452509999275208, | |
| "learning_rate": 4.962317570383436e-05, | |
| "loss": 1.0508, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.3383214053350683, | |
| "grad_norm": 1.3133275508880615, | |
| "learning_rate": 4.961419508570686e-05, | |
| "loss": 1.0543, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.3415744957709824, | |
| "grad_norm": 1.1373653411865234, | |
| "learning_rate": 4.960510954131038e-05, | |
| "loss": 1.0711, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.3448275862068966, | |
| "grad_norm": 1.12503981590271, | |
| "learning_rate": 4.95959191093751e-05, | |
| "loss": 1.0486, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.34808067664281067, | |
| "grad_norm": 0.921503484249115, | |
| "learning_rate": 4.95866238290783e-05, | |
| "loss": 1.0543, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.35133376707872477, | |
| "grad_norm": 0.9198605418205261, | |
| "learning_rate": 4.957722374004427e-05, | |
| "loss": 1.0438, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.3545868575146389, | |
| "grad_norm": 1.630878210067749, | |
| "learning_rate": 4.9567718882344015e-05, | |
| "loss": 1.0544, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.357839947950553, | |
| "grad_norm": 2.2188167572021484, | |
| "learning_rate": 4.95581092964952e-05, | |
| "loss": 1.0541, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.36109303838646717, | |
| "grad_norm": 0.9371961355209351, | |
| "learning_rate": 4.95483950234619e-05, | |
| "loss": 1.0723, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.36434612882238127, | |
| "grad_norm": 1.0933233499526978, | |
| "learning_rate": 4.9538576104654466e-05, | |
| "loss": 1.052, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.36759921925829536, | |
| "grad_norm": 1.1232990026474, | |
| "learning_rate": 4.9528652581929335e-05, | |
| "loss": 1.0354, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.3708523096942095, | |
| "grad_norm": 1.000786542892456, | |
| "learning_rate": 4.951862449758885e-05, | |
| "loss": 1.0407, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.3741054001301236, | |
| "grad_norm": 0.939582884311676, | |
| "learning_rate": 4.9508491894381104e-05, | |
| "loss": 1.0206, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.37735849056603776, | |
| "grad_norm": 1.264381766319275, | |
| "learning_rate": 4.9498254815499694e-05, | |
| "loss": 1.0362, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.38061158100195186, | |
| "grad_norm": 0.673314094543457, | |
| "learning_rate": 4.948791330458363e-05, | |
| "loss": 1.0381, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.38386467143786596, | |
| "grad_norm": 1.441362738609314, | |
| "learning_rate": 4.947746740571706e-05, | |
| "loss": 1.0354, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.3871177618737801, | |
| "grad_norm": 1.1851030588150024, | |
| "learning_rate": 4.9466917163429124e-05, | |
| "loss": 1.0146, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.3903708523096942, | |
| "grad_norm": 0.9171844124794006, | |
| "learning_rate": 4.94562626226938e-05, | |
| "loss": 1.0103, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3936239427456083, | |
| "grad_norm": 1.5662965774536133, | |
| "learning_rate": 4.944550382892962e-05, | |
| "loss": 1.0466, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.39687703318152245, | |
| "grad_norm": 1.1077489852905273, | |
| "learning_rate": 4.943464082799955e-05, | |
| "loss": 1.0458, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.40013012361743655, | |
| "grad_norm": 1.5997633934020996, | |
| "learning_rate": 4.942367366621081e-05, | |
| "loss": 1.0464, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.4033832140533507, | |
| "grad_norm": 1.0540611743927002, | |
| "learning_rate": 4.9412602390314585e-05, | |
| "loss": 1.0242, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.4066363044892648, | |
| "grad_norm": 1.1247586011886597, | |
| "learning_rate": 4.94014270475059e-05, | |
| "loss": 1.0232, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.4098893949251789, | |
| "grad_norm": 1.065820336341858, | |
| "learning_rate": 4.939014768542342e-05, | |
| "loss": 1.0137, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.41314248536109305, | |
| "grad_norm": 0.8374763131141663, | |
| "learning_rate": 4.93787643521492e-05, | |
| "loss": 1.0203, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.41639557579700714, | |
| "grad_norm": 0.7515140771865845, | |
| "learning_rate": 4.936727709620853e-05, | |
| "loss": 1.0176, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.4196486662329213, | |
| "grad_norm": 0.8034088015556335, | |
| "learning_rate": 4.9355685966569684e-05, | |
| "loss": 1.0322, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.4229017566688354, | |
| "grad_norm": 1.2314985990524292, | |
| "learning_rate": 4.934399101264375e-05, | |
| "loss": 1.0198, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.4261548471047495, | |
| "grad_norm": 1.342058539390564, | |
| "learning_rate": 4.93321922842844e-05, | |
| "loss": 1.0133, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.42940793754066364, | |
| "grad_norm": 0.8881794214248657, | |
| "learning_rate": 4.932028983178766e-05, | |
| "loss": 1.0255, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.43266102797657774, | |
| "grad_norm": 1.3695508241653442, | |
| "learning_rate": 4.9308283705891736e-05, | |
| "loss": 1.0293, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.4359141184124919, | |
| "grad_norm": 0.9350308179855347, | |
| "learning_rate": 4.9296173957776776e-05, | |
| "loss": 1.03, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.439167208848406, | |
| "grad_norm": 0.9181856513023376, | |
| "learning_rate": 4.928396063906463e-05, | |
| "loss": 1.0234, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.4424202992843201, | |
| "grad_norm": 1.352927803993225, | |
| "learning_rate": 4.927164380181869e-05, | |
| "loss": 1.0474, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.44567338972023424, | |
| "grad_norm": 1.176147222518921, | |
| "learning_rate": 4.9259223498543597e-05, | |
| "loss": 1.0329, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.44892648015614833, | |
| "grad_norm": 1.0797678232192993, | |
| "learning_rate": 4.9246699782185055e-05, | |
| "loss": 1.0141, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.4521795705920625, | |
| "grad_norm": 0.9696300029754639, | |
| "learning_rate": 4.9234072706129627e-05, | |
| "loss": 0.999, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.4554326610279766, | |
| "grad_norm": 0.9436845779418945, | |
| "learning_rate": 4.922134232420445e-05, | |
| "loss": 1.0003, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.4586857514638907, | |
| "grad_norm": 1.1857705116271973, | |
| "learning_rate": 4.920850869067706e-05, | |
| "loss": 0.9831, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.46193884189980483, | |
| "grad_norm": 0.9158900380134583, | |
| "learning_rate": 4.919557186025512e-05, | |
| "loss": 1.0201, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.4651919323357189, | |
| "grad_norm": 0.8820152282714844, | |
| "learning_rate": 4.9182531888086205e-05, | |
| "loss": 0.9852, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.468445022771633, | |
| "grad_norm": 1.5595647096633911, | |
| "learning_rate": 4.916938882975759e-05, | |
| "loss": 1.0002, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.4716981132075472, | |
| "grad_norm": 1.1958764791488647, | |
| "learning_rate": 4.915614274129597e-05, | |
| "loss": 1.0375, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.4749512036434613, | |
| "grad_norm": 1.1134103536605835, | |
| "learning_rate": 4.914279367916724e-05, | |
| "loss": 1.0208, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.4782042940793754, | |
| "grad_norm": 0.8463726043701172, | |
| "learning_rate": 4.9129341700276266e-05, | |
| "loss": 0.9955, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.4814573845152895, | |
| "grad_norm": 0.8405961394309998, | |
| "learning_rate": 4.911578686196661e-05, | |
| "loss": 0.9754, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.4847104749512036, | |
| "grad_norm": 1.0310126543045044, | |
| "learning_rate": 4.9102129222020324e-05, | |
| "loss": 1.0213, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.48796356538711777, | |
| "grad_norm": 1.058269739151001, | |
| "learning_rate": 4.908836883865768e-05, | |
| "loss": 0.9966, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.49121665582303187, | |
| "grad_norm": 0.9762022495269775, | |
| "learning_rate": 4.907450577053694e-05, | |
| "loss": 1.0059, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.494469746258946, | |
| "grad_norm": 0.8593292832374573, | |
| "learning_rate": 4.906054007675408e-05, | |
| "loss": 0.9922, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.4977228366948601, | |
| "grad_norm": 1.3241448402404785, | |
| "learning_rate": 4.9046471816842565e-05, | |
| "loss": 1.007, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.5009759271307742, | |
| "grad_norm": 0.9241655468940735, | |
| "learning_rate": 4.903230105077306e-05, | |
| "loss": 1.0204, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.5042290175666884, | |
| "grad_norm": 0.8068680763244629, | |
| "learning_rate": 4.9018027838953226e-05, | |
| "loss": 0.9932, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.5074821080026025, | |
| "grad_norm": 1.2541546821594238, | |
| "learning_rate": 4.900365224222742e-05, | |
| "loss": 0.9945, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.5107351984385166, | |
| "grad_norm": 0.925835907459259, | |
| "learning_rate": 4.898917432187644e-05, | |
| "loss": 0.9745, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.5139882888744307, | |
| "grad_norm": 0.7561518549919128, | |
| "learning_rate": 4.897459413961729e-05, | |
| "loss": 1.0065, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.5172413793103449, | |
| "grad_norm": 1.056420922279358, | |
| "learning_rate": 4.8959911757602885e-05, | |
| "loss": 0.974, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.5204944697462589, | |
| "grad_norm": 1.219141960144043, | |
| "learning_rate": 4.89451272384218e-05, | |
| "loss": 0.9926, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.523747560182173, | |
| "grad_norm": 0.9372319579124451, | |
| "learning_rate": 4.8930240645098027e-05, | |
| "loss": 1.0141, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.5270006506180872, | |
| "grad_norm": 1.0118193626403809, | |
| "learning_rate": 4.891525204109065e-05, | |
| "loss": 0.9996, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.5302537410540012, | |
| "grad_norm": 0.91470867395401, | |
| "learning_rate": 4.890016149029365e-05, | |
| "loss": 0.9851, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.5335068314899154, | |
| "grad_norm": 0.787122368812561, | |
| "learning_rate": 4.888496905703554e-05, | |
| "loss": 0.9969, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.5367599219258296, | |
| "grad_norm": 0.8628039956092834, | |
| "learning_rate": 4.886967480607918e-05, | |
| "loss": 1.0024, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.5400130123617437, | |
| "grad_norm": 1.450460433959961, | |
| "learning_rate": 4.885427880262144e-05, | |
| "loss": 0.9743, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.5432661027976577, | |
| "grad_norm": 1.0362318754196167, | |
| "learning_rate": 4.883878111229296e-05, | |
| "loss": 0.9723, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.5465191932335719, | |
| "grad_norm": 0.9855751991271973, | |
| "learning_rate": 4.8823181801157844e-05, | |
| "loss": 0.9898, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.549772283669486, | |
| "grad_norm": 1.0782288312911987, | |
| "learning_rate": 4.880748093571339e-05, | |
| "loss": 0.9727, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.5530253741054001, | |
| "grad_norm": 1.5194872617721558, | |
| "learning_rate": 4.879167858288982e-05, | |
| "loss": 0.9922, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.5562784645413142, | |
| "grad_norm": 1.5501078367233276, | |
| "learning_rate": 4.877577481004995e-05, | |
| "loss": 0.9705, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.5595315549772284, | |
| "grad_norm": 1.5971125364303589, | |
| "learning_rate": 4.875976968498895e-05, | |
| "loss": 1.0078, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.5627846454131424, | |
| "grad_norm": 0.9124265313148499, | |
| "learning_rate": 4.874366327593406e-05, | |
| "loss": 0.9737, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.5660377358490566, | |
| "grad_norm": 0.8439720273017883, | |
| "learning_rate": 4.872745565154424e-05, | |
| "loss": 0.9967, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.5692908262849707, | |
| "grad_norm": 0.9340474009513855, | |
| "learning_rate": 4.871114688090992e-05, | |
| "loss": 0.9934, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.5725439167208849, | |
| "grad_norm": 0.8820469975471497, | |
| "learning_rate": 4.869473703355273e-05, | |
| "loss": 0.9917, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.5757970071567989, | |
| "grad_norm": 0.8724156618118286, | |
| "learning_rate": 4.867822617942514e-05, | |
| "loss": 0.9762, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.5790500975927131, | |
| "grad_norm": 0.9085761308670044, | |
| "learning_rate": 4.866161438891022e-05, | |
| "loss": 0.9686, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.5823031880286272, | |
| "grad_norm": 0.7215405106544495, | |
| "learning_rate": 4.864490173282128e-05, | |
| "loss": 0.9858, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.5855562784645413, | |
| "grad_norm": 1.0854041576385498, | |
| "learning_rate": 4.862808828240164e-05, | |
| "loss": 0.9935, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.5888093689004554, | |
| "grad_norm": 0.8779392242431641, | |
| "learning_rate": 4.861117410932429e-05, | |
| "loss": 0.9816, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.5920624593363696, | |
| "grad_norm": 1.2866002321243286, | |
| "learning_rate": 4.8594159285691546e-05, | |
| "loss": 0.9818, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.5953155497722836, | |
| "grad_norm": 0.7991343140602112, | |
| "learning_rate": 4.8577043884034826e-05, | |
| "loss": 0.9592, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.5985686402081978, | |
| "grad_norm": 0.9553494453430176, | |
| "learning_rate": 4.8559827977314254e-05, | |
| "loss": 0.9943, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.6018217306441119, | |
| "grad_norm": 1.2053009271621704, | |
| "learning_rate": 4.854251163891843e-05, | |
| "loss": 0.946, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.605074821080026, | |
| "grad_norm": 0.744791567325592, | |
| "learning_rate": 4.852509494266405e-05, | |
| "loss": 0.9804, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.6083279115159401, | |
| "grad_norm": 1.2371433973312378, | |
| "learning_rate": 4.850757796279563e-05, | |
| "loss": 0.9902, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.6115810019518543, | |
| "grad_norm": 0.723250150680542, | |
| "learning_rate": 4.8489960773985174e-05, | |
| "loss": 0.9839, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.6148340923877684, | |
| "grad_norm": 0.7003908753395081, | |
| "learning_rate": 4.847224345133188e-05, | |
| "loss": 0.9712, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.6180871828236825, | |
| "grad_norm": 0.8090314865112305, | |
| "learning_rate": 4.845442607036176e-05, | |
| "loss": 0.9631, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.6213402732595966, | |
| "grad_norm": 0.7971912622451782, | |
| "learning_rate": 4.8436508707027384e-05, | |
| "loss": 0.9722, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.6245933636955108, | |
| "grad_norm": 0.7696447968482971, | |
| "learning_rate": 4.841849143770754e-05, | |
| "loss": 0.9712, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.6278464541314248, | |
| "grad_norm": 0.9497612714767456, | |
| "learning_rate": 4.840037433920688e-05, | |
| "loss": 0.9653, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.631099544567339, | |
| "grad_norm": 1.1326346397399902, | |
| "learning_rate": 4.838215748875562e-05, | |
| "loss": 0.9648, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.6343526350032531, | |
| "grad_norm": 0.8858407139778137, | |
| "learning_rate": 4.83638409640092e-05, | |
| "loss": 0.9765, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.6376057254391672, | |
| "grad_norm": 0.9079559445381165, | |
| "learning_rate": 4.834542484304795e-05, | |
| "loss": 0.958, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.6408588158750813, | |
| "grad_norm": 0.9221760630607605, | |
| "learning_rate": 4.8326909204376776e-05, | |
| "loss": 0.9675, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.6441119063109955, | |
| "grad_norm": 0.8072174787521362, | |
| "learning_rate": 4.8308294126924794e-05, | |
| "loss": 0.9745, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.6473649967469096, | |
| "grad_norm": 0.9354230165481567, | |
| "learning_rate": 4.828957969004502e-05, | |
| "loss": 0.9581, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.6506180871828237, | |
| "grad_norm": 0.8067158460617065, | |
| "learning_rate": 4.827076597351403e-05, | |
| "loss": 0.9669, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.6538711776187378, | |
| "grad_norm": 1.0591189861297607, | |
| "learning_rate": 4.825185305753161e-05, | |
| "loss": 0.9682, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.657124268054652, | |
| "grad_norm": 0.7701990604400635, | |
| "learning_rate": 4.823284102272041e-05, | |
| "loss": 0.9756, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.660377358490566, | |
| "grad_norm": 0.9886049628257751, | |
| "learning_rate": 4.82137299501256e-05, | |
| "loss": 0.9646, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.6636304489264802, | |
| "grad_norm": 0.966618537902832, | |
| "learning_rate": 4.819451992121454e-05, | |
| "loss": 0.9673, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.6668835393623943, | |
| "grad_norm": 0.987940788269043, | |
| "learning_rate": 4.817521101787646e-05, | |
| "loss": 0.9647, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.6701366297983083, | |
| "grad_norm": 0.752627432346344, | |
| "learning_rate": 4.815580332242199e-05, | |
| "loss": 0.9545, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.6733897202342225, | |
| "grad_norm": 1.0263205766677856, | |
| "learning_rate": 4.813629691758299e-05, | |
| "loss": 0.9479, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.6766428106701367, | |
| "grad_norm": 0.8434374332427979, | |
| "learning_rate": 4.811669188651204e-05, | |
| "loss": 0.9747, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.6798959011060507, | |
| "grad_norm": 0.8626881837844849, | |
| "learning_rate": 4.8096988312782174e-05, | |
| "loss": 0.9713, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.6831489915419648, | |
| "grad_norm": 0.8781446814537048, | |
| "learning_rate": 4.8077186280386475e-05, | |
| "loss": 0.964, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.686402081977879, | |
| "grad_norm": 0.8338606953620911, | |
| "learning_rate": 4.8057285873737765e-05, | |
| "loss": 0.9916, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.6896551724137931, | |
| "grad_norm": 0.8619135022163391, | |
| "learning_rate": 4.803728717766821e-05, | |
| "loss": 0.9562, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.6929082628497072, | |
| "grad_norm": 0.8325028419494629, | |
| "learning_rate": 4.8017190277428956e-05, | |
| "loss": 0.9494, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.6961613532856213, | |
| "grad_norm": 0.772607684135437, | |
| "learning_rate": 4.799699525868979e-05, | |
| "loss": 0.9783, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.6994144437215355, | |
| "grad_norm": 0.7735521793365479, | |
| "learning_rate": 4.797670220753876e-05, | |
| "loss": 0.966, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.7026675341574495, | |
| "grad_norm": 0.8032121062278748, | |
| "learning_rate": 4.79563112104818e-05, | |
| "loss": 0.9569, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.7059206245933637, | |
| "grad_norm": 0.9248620271682739, | |
| "learning_rate": 4.7935822354442397e-05, | |
| "loss": 0.9676, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.7091737150292778, | |
| "grad_norm": 0.6317049264907837, | |
| "learning_rate": 4.7915235726761154e-05, | |
| "loss": 0.9443, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.7124268054651919, | |
| "grad_norm": 0.9738350510597229, | |
| "learning_rate": 4.789455141519551e-05, | |
| "loss": 0.9693, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.715679895901106, | |
| "grad_norm": 0.7499257922172546, | |
| "learning_rate": 4.7873769507919266e-05, | |
| "loss": 0.958, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.7189329863370202, | |
| "grad_norm": 0.8857749700546265, | |
| "learning_rate": 4.785289009352227e-05, | |
| "loss": 0.9596, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.7221860767729343, | |
| "grad_norm": 0.7081575393676758, | |
| "learning_rate": 4.7831913261010066e-05, | |
| "loss": 0.9454, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.7254391672088484, | |
| "grad_norm": 0.8387717604637146, | |
| "learning_rate": 4.781083909980342e-05, | |
| "loss": 0.9472, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.7286922576447625, | |
| "grad_norm": 0.9755154848098755, | |
| "learning_rate": 4.778966769973802e-05, | |
| "loss": 0.9668, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.7319453480806767, | |
| "grad_norm": 0.7101641893386841, | |
| "learning_rate": 4.7768399151064076e-05, | |
| "loss": 0.9457, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.7351984385165907, | |
| "grad_norm": 0.9372628331184387, | |
| "learning_rate": 4.774703354444591e-05, | |
| "loss": 0.9709, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.7384515289525049, | |
| "grad_norm": 0.9276643991470337, | |
| "learning_rate": 4.7725570970961586e-05, | |
| "loss": 0.9586, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.741704619388419, | |
| "grad_norm": 0.7329192757606506, | |
| "learning_rate": 4.770401152210253e-05, | |
| "loss": 0.9608, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.7449577098243331, | |
| "grad_norm": 0.7759012579917908, | |
| "learning_rate": 4.768235528977314e-05, | |
| "loss": 0.9469, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.7482108002602472, | |
| "grad_norm": 1.2127937078475952, | |
| "learning_rate": 4.766060236629037e-05, | |
| "loss": 0.9542, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.7514638906961614, | |
| "grad_norm": 0.7369085550308228, | |
| "learning_rate": 4.763875284438336e-05, | |
| "loss": 0.9643, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.7547169811320755, | |
| "grad_norm": 0.7963067293167114, | |
| "learning_rate": 4.7616806817193024e-05, | |
| "loss": 0.9678, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.7579700715679896, | |
| "grad_norm": 0.7773886919021606, | |
| "learning_rate": 4.759476437827168e-05, | |
| "loss": 0.9603, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.7612231620039037, | |
| "grad_norm": 0.8198060393333435, | |
| "learning_rate": 4.757262562158262e-05, | |
| "loss": 0.9759, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.7644762524398179, | |
| "grad_norm": 0.7127149701118469, | |
| "learning_rate": 4.7550390641499715e-05, | |
| "loss": 0.9244, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.7677293428757319, | |
| "grad_norm": 1.236286997795105, | |
| "learning_rate": 4.7528059532807045e-05, | |
| "loss": 0.9313, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.7709824333116461, | |
| "grad_norm": 0.6795628070831299, | |
| "learning_rate": 4.750563239069845e-05, | |
| "loss": 0.9586, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.7742355237475602, | |
| "grad_norm": 0.8040820956230164, | |
| "learning_rate": 4.7483109310777165e-05, | |
| "loss": 0.9483, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.7774886141834743, | |
| "grad_norm": 0.8001431226730347, | |
| "learning_rate": 4.7460490389055355e-05, | |
| "loss": 0.9408, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.7807417046193884, | |
| "grad_norm": 0.969782292842865, | |
| "learning_rate": 4.743777572195378e-05, | |
| "loss": 0.9778, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.7839947950553026, | |
| "grad_norm": 1.0955541133880615, | |
| "learning_rate": 4.741496540630134e-05, | |
| "loss": 0.9385, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.7872478854912166, | |
| "grad_norm": 0.7429236173629761, | |
| "learning_rate": 4.739205953933464e-05, | |
| "loss": 0.9642, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.7905009759271308, | |
| "grad_norm": 1.0475250482559204, | |
| "learning_rate": 4.736905821869765e-05, | |
| "loss": 0.9437, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.7937540663630449, | |
| "grad_norm": 0.7216660380363464, | |
| "learning_rate": 4.734596154244121e-05, | |
| "loss": 0.9289, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.7970071567989591, | |
| "grad_norm": 0.8584089279174805, | |
| "learning_rate": 4.732276960902267e-05, | |
| "loss": 0.9246, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.8002602472348731, | |
| "grad_norm": 0.8769578337669373, | |
| "learning_rate": 4.7299482517305404e-05, | |
| "loss": 0.9298, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.8035133376707873, | |
| "grad_norm": 0.7453442811965942, | |
| "learning_rate": 4.7276100366558474e-05, | |
| "loss": 0.9491, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.8067664281067014, | |
| "grad_norm": 0.906287431716919, | |
| "learning_rate": 4.7252623256456144e-05, | |
| "loss": 0.9539, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.8100195185426154, | |
| "grad_norm": 1.0656296014785767, | |
| "learning_rate": 4.722905128707749e-05, | |
| "loss": 0.9405, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.8132726089785296, | |
| "grad_norm": 0.6985450983047485, | |
| "learning_rate": 4.720538455890591e-05, | |
| "loss": 0.9369, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.8165256994144438, | |
| "grad_norm": 0.6577023267745972, | |
| "learning_rate": 4.718162317282882e-05, | |
| "loss": 0.9346, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.8197787898503578, | |
| "grad_norm": 0.7832421064376831, | |
| "learning_rate": 4.7157767230137064e-05, | |
| "loss": 0.9256, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.8230318802862719, | |
| "grad_norm": 0.7928493618965149, | |
| "learning_rate": 4.713381683252463e-05, | |
| "loss": 0.9477, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.8262849707221861, | |
| "grad_norm": 0.8775043487548828, | |
| "learning_rate": 4.710977208208812e-05, | |
| "loss": 0.9313, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.8295380611581002, | |
| "grad_norm": 0.7714875936508179, | |
| "learning_rate": 4.708563308132636e-05, | |
| "loss": 0.9469, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.8327911515940143, | |
| "grad_norm": 0.7258083820343018, | |
| "learning_rate": 4.706139993313994e-05, | |
| "loss": 0.9294, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.8360442420299284, | |
| "grad_norm": 0.7745918035507202, | |
| "learning_rate": 4.7037072740830785e-05, | |
| "loss": 0.9365, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.8392973324658426, | |
| "grad_norm": 0.7213959097862244, | |
| "learning_rate": 4.701265160810172e-05, | |
| "loss": 0.947, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.8425504229017566, | |
| "grad_norm": 0.825713038444519, | |
| "learning_rate": 4.6988136639056025e-05, | |
| "loss": 0.9404, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.8458035133376708, | |
| "grad_norm": 0.6750174164772034, | |
| "learning_rate": 4.696352793819698e-05, | |
| "loss": 0.9364, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.8490566037735849, | |
| "grad_norm": 0.8314560055732727, | |
| "learning_rate": 4.693882561042743e-05, | |
| "loss": 0.9521, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.852309694209499, | |
| "grad_norm": 1.0009961128234863, | |
| "learning_rate": 4.6914029761049357e-05, | |
| "loss": 0.9297, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.8555627846454131, | |
| "grad_norm": 0.7527256011962891, | |
| "learning_rate": 4.688914049576337e-05, | |
| "loss": 0.9269, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.8588158750813273, | |
| "grad_norm": 0.9169411659240723, | |
| "learning_rate": 4.686415792066833e-05, | |
| "loss": 0.9312, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.8620689655172413, | |
| "grad_norm": 0.9165216088294983, | |
| "learning_rate": 4.683908214226084e-05, | |
| "loss": 0.9524, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.8653220559531555, | |
| "grad_norm": 0.9357953071594238, | |
| "learning_rate": 4.6813913267434835e-05, | |
| "loss": 0.9245, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.8685751463890696, | |
| "grad_norm": 0.6473081707954407, | |
| "learning_rate": 4.678865140348108e-05, | |
| "loss": 0.9584, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.8718282368249838, | |
| "grad_norm": 0.884191632270813, | |
| "learning_rate": 4.676329665808677e-05, | |
| "loss": 0.9569, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.8750813272608978, | |
| "grad_norm": 1.0534435510635376, | |
| "learning_rate": 4.673784913933499e-05, | |
| "loss": 0.9178, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.878334417696812, | |
| "grad_norm": 0.8140066266059875, | |
| "learning_rate": 4.6712308955704346e-05, | |
| "loss": 0.9536, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.8815875081327261, | |
| "grad_norm": 0.71702641248703, | |
| "learning_rate": 4.668667621606845e-05, | |
| "loss": 0.947, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.8848405985686402, | |
| "grad_norm": 0.6529531478881836, | |
| "learning_rate": 4.666095102969544e-05, | |
| "loss": 0.9107, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.8880936890045543, | |
| "grad_norm": 0.9059852957725525, | |
| "learning_rate": 4.6635133506247585e-05, | |
| "loss": 0.9399, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.8913467794404685, | |
| "grad_norm": 0.8972651958465576, | |
| "learning_rate": 4.660922375578073e-05, | |
| "loss": 0.9511, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.8945998698763825, | |
| "grad_norm": 1.0316717624664307, | |
| "learning_rate": 4.658322188874388e-05, | |
| "loss": 0.9335, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.8978529603122967, | |
| "grad_norm": 0.7475149035453796, | |
| "learning_rate": 4.6557128015978726e-05, | |
| "loss": 0.9262, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.9011060507482108, | |
| "grad_norm": 1.035979986190796, | |
| "learning_rate": 4.653094224871916e-05, | |
| "loss": 0.9115, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.904359141184125, | |
| "grad_norm": 0.8210706114768982, | |
| "learning_rate": 4.650466469859079e-05, | |
| "loss": 0.9535, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.907612231620039, | |
| "grad_norm": 0.9931228160858154, | |
| "learning_rate": 4.647829547761053e-05, | |
| "loss": 0.9335, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.9108653220559532, | |
| "grad_norm": 0.7681549191474915, | |
| "learning_rate": 4.6451834698186e-05, | |
| "loss": 0.9434, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.9141184124918673, | |
| "grad_norm": 0.7461596727371216, | |
| "learning_rate": 4.642528247311518e-05, | |
| "loss": 0.9487, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.9173715029277814, | |
| "grad_norm": 1.4867486953735352, | |
| "learning_rate": 4.6398638915585835e-05, | |
| "loss": 0.9074, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.9206245933636955, | |
| "grad_norm": 0.890620231628418, | |
| "learning_rate": 4.637190413917506e-05, | |
| "loss": 0.9467, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.9238776837996097, | |
| "grad_norm": 0.6205281615257263, | |
| "learning_rate": 4.634507825784882e-05, | |
| "loss": 0.9242, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.9271307742355237, | |
| "grad_norm": 0.8957470655441284, | |
| "learning_rate": 4.631816138596145e-05, | |
| "loss": 0.94, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.9303838646714379, | |
| "grad_norm": 0.8642396330833435, | |
| "learning_rate": 4.629115363825514e-05, | |
| "loss": 0.9142, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.933636955107352, | |
| "grad_norm": 0.6721086502075195, | |
| "learning_rate": 4.626405512985948e-05, | |
| "loss": 0.9205, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.936890045543266, | |
| "grad_norm": 0.8930765986442566, | |
| "learning_rate": 4.623686597629098e-05, | |
| "loss": 0.9235, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.9401431359791802, | |
| "grad_norm": 0.9480865597724915, | |
| "learning_rate": 4.62095862934525e-05, | |
| "loss": 0.9309, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.9433962264150944, | |
| "grad_norm": 0.9130436778068542, | |
| "learning_rate": 4.618221619763287e-05, | |
| "loss": 0.9257, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.9466493168510085, | |
| "grad_norm": 0.63996821641922, | |
| "learning_rate": 4.6154755805506294e-05, | |
| "loss": 0.9364, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.9499024072869225, | |
| "grad_norm": 0.786276638507843, | |
| "learning_rate": 4.612720523413193e-05, | |
| "loss": 0.9389, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.9531554977228367, | |
| "grad_norm": 0.8122700452804565, | |
| "learning_rate": 4.609956460095332e-05, | |
| "loss": 0.9296, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.9564085881587508, | |
| "grad_norm": 1.0054434537887573, | |
| "learning_rate": 4.607183402379794e-05, | |
| "loss": 0.9118, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.9596616785946649, | |
| "grad_norm": 0.9399415850639343, | |
| "learning_rate": 4.6044013620876706e-05, | |
| "loss": 0.9311, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.962914769030579, | |
| "grad_norm": 0.6693314909934998, | |
| "learning_rate": 4.60161035107834e-05, | |
| "loss": 0.9322, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.9661678594664932, | |
| "grad_norm": 0.7549735903739929, | |
| "learning_rate": 4.598810381249425e-05, | |
| "loss": 0.9246, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.9694209499024072, | |
| "grad_norm": 0.8314823508262634, | |
| "learning_rate": 4.596001464536737e-05, | |
| "loss": 0.9335, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.9726740403383214, | |
| "grad_norm": 0.7478086948394775, | |
| "learning_rate": 4.593183612914225e-05, | |
| "loss": 0.9341, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.9759271307742355, | |
| "grad_norm": 0.9777085185050964, | |
| "learning_rate": 4.5903568383939284e-05, | |
| "loss": 0.9323, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.9791802212101497, | |
| "grad_norm": 0.893374502658844, | |
| "learning_rate": 4.587521153025922e-05, | |
| "loss": 0.939, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.9824333116460637, | |
| "grad_norm": 0.6938668489456177, | |
| "learning_rate": 4.584676568898267e-05, | |
| "loss": 0.9437, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.9856864020819779, | |
| "grad_norm": 0.6903214454650879, | |
| "learning_rate": 4.5818230981369584e-05, | |
| "loss": 0.9332, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.988939492517892, | |
| "grad_norm": 0.817034125328064, | |
| "learning_rate": 4.5789607529058715e-05, | |
| "loss": 0.9375, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.9921925829538061, | |
| "grad_norm": 0.8222942352294922, | |
| "learning_rate": 4.5760895454067154e-05, | |
| "loss": 0.9316, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.9954456733897202, | |
| "grad_norm": 0.7549692392349243, | |
| "learning_rate": 4.5732094878789756e-05, | |
| "loss": 0.9221, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.9986987638256344, | |
| "grad_norm": 0.8544319868087769, | |
| "learning_rate": 4.570320592599863e-05, | |
| "loss": 0.9287, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_f1": 0.7910057808991992, | |
| "eval_loss": 0.462646484375, | |
| "eval_precision": 0.7940469727119374, | |
| "eval_recall": 0.7896973937143991, | |
| "eval_runtime": 247.1562, | |
| "eval_samples_per_second": 1591.847, | |
| "eval_steps_per_second": 1.558, | |
| "step": 1537 | |
| }, | |
| { | |
| "epoch": 1.0019518542615484, | |
| "grad_norm": 0.7457589507102966, | |
| "learning_rate": 4.567422871884265e-05, | |
| "loss": 0.9279, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.0052049446974627, | |
| "grad_norm": 0.8609625697135925, | |
| "learning_rate": 4.564516338084688e-05, | |
| "loss": 0.8765, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 1.0084580351333767, | |
| "grad_norm": 0.8822636008262634, | |
| "learning_rate": 4.561601003591208e-05, | |
| "loss": 0.8427, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.0117111255692908, | |
| "grad_norm": 0.7266067266464233, | |
| "learning_rate": 4.558676880831417e-05, | |
| "loss": 0.8828, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 1.014964216005205, | |
| "grad_norm": 0.6970102787017822, | |
| "learning_rate": 4.555743982270369e-05, | |
| "loss": 0.8842, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.018217306441119, | |
| "grad_norm": 0.6802201867103577, | |
| "learning_rate": 4.5528023204105306e-05, | |
| "loss": 0.872, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 1.0214703968770331, | |
| "grad_norm": 0.7830452919006348, | |
| "learning_rate": 4.549851907791722e-05, | |
| "loss": 0.8624, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.0247234873129474, | |
| "grad_norm": 0.6845102906227112, | |
| "learning_rate": 4.5468927569910663e-05, | |
| "loss": 0.8744, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 1.0279765777488614, | |
| "grad_norm": 0.8832181692123413, | |
| "learning_rate": 4.5439248806229386e-05, | |
| "loss": 0.8722, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.0312296681847755, | |
| "grad_norm": 0.7359802722930908, | |
| "learning_rate": 4.5409482913389065e-05, | |
| "loss": 0.8567, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 1.0344827586206897, | |
| "grad_norm": 0.7686721086502075, | |
| "learning_rate": 4.5379630018276834e-05, | |
| "loss": 0.8509, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.0377358490566038, | |
| "grad_norm": 0.77400141954422, | |
| "learning_rate": 4.534969024815066e-05, | |
| "loss": 0.8676, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 1.0409889394925178, | |
| "grad_norm": 0.8024744987487793, | |
| "learning_rate": 4.531966373063886e-05, | |
| "loss": 0.8772, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.044242029928432, | |
| "grad_norm": 0.7155640721321106, | |
| "learning_rate": 4.528955059373956e-05, | |
| "loss": 0.8608, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 1.047495120364346, | |
| "grad_norm": 0.8553564548492432, | |
| "learning_rate": 4.52593509658201e-05, | |
| "loss": 0.8614, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.0507482108002602, | |
| "grad_norm": 0.6926222443580627, | |
| "learning_rate": 4.522906497561655e-05, | |
| "loss": 0.8582, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 1.0540013012361744, | |
| "grad_norm": 0.8300968408584595, | |
| "learning_rate": 4.519869275223309e-05, | |
| "loss": 0.8838, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.0572543916720885, | |
| "grad_norm": 0.8907480835914612, | |
| "learning_rate": 4.516823442514153e-05, | |
| "loss": 0.8656, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 1.0605074821080025, | |
| "grad_norm": 1.035863995552063, | |
| "learning_rate": 4.513769012418071e-05, | |
| "loss": 0.8814, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.0637605725439168, | |
| "grad_norm": 0.9308491945266724, | |
| "learning_rate": 4.510705997955596e-05, | |
| "loss": 0.8831, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 1.0670136629798308, | |
| "grad_norm": 1.0290710926055908, | |
| "learning_rate": 4.507634412183856e-05, | |
| "loss": 0.8566, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.070266753415745, | |
| "grad_norm": 0.9163823127746582, | |
| "learning_rate": 4.504554268196516e-05, | |
| "loss": 0.8646, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 1.073519843851659, | |
| "grad_norm": 0.7528260946273804, | |
| "learning_rate": 4.5014655791237245e-05, | |
| "loss": 0.8681, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.0767729342875731, | |
| "grad_norm": 0.9018992781639099, | |
| "learning_rate": 4.498368358132055e-05, | |
| "loss": 0.8667, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 1.0800260247234874, | |
| "grad_norm": 1.000990390777588, | |
| "learning_rate": 4.4952626184244504e-05, | |
| "loss": 0.8627, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.0832791151594015, | |
| "grad_norm": 1.1555023193359375, | |
| "learning_rate": 4.492148373240171e-05, | |
| "loss": 0.8488, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 1.0865322055953155, | |
| "grad_norm": 0.9759275913238525, | |
| "learning_rate": 4.4890256358547304e-05, | |
| "loss": 0.8775, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.0897852960312298, | |
| "grad_norm": 0.7439780235290527, | |
| "learning_rate": 4.485894419579846e-05, | |
| "loss": 0.8758, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 1.0930383864671438, | |
| "grad_norm": 0.8394938111305237, | |
| "learning_rate": 4.482754737763378e-05, | |
| "loss": 0.8797, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.0962914769030578, | |
| "grad_norm": 0.8299522399902344, | |
| "learning_rate": 4.4796066037892734e-05, | |
| "loss": 0.864, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 1.099544567338972, | |
| "grad_norm": 0.8585712909698486, | |
| "learning_rate": 4.4764500310775116e-05, | |
| "loss": 0.8586, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.1027976577748861, | |
| "grad_norm": 1.0859423875808716, | |
| "learning_rate": 4.473285033084043e-05, | |
| "loss": 0.8773, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 1.1060507482108002, | |
| "grad_norm": 0.7827959060668945, | |
| "learning_rate": 4.4701116233007314e-05, | |
| "loss": 0.8423, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.1093038386467144, | |
| "grad_norm": 0.7498010993003845, | |
| "learning_rate": 4.466929815255304e-05, | |
| "loss": 0.884, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 1.1125569290826285, | |
| "grad_norm": 0.7543908357620239, | |
| "learning_rate": 4.4637396225112846e-05, | |
| "loss": 0.8606, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.1158100195185425, | |
| "grad_norm": 1.3613898754119873, | |
| "learning_rate": 4.460541058667942e-05, | |
| "loss": 0.8909, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 1.1190631099544568, | |
| "grad_norm": 0.8409460783004761, | |
| "learning_rate": 4.457334137360226e-05, | |
| "loss": 0.8892, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.1223162003903708, | |
| "grad_norm": 0.9072450995445251, | |
| "learning_rate": 4.4541188722587165e-05, | |
| "loss": 0.8714, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 1.1255692908262849, | |
| "grad_norm": 1.02306067943573, | |
| "learning_rate": 4.450895277069561e-05, | |
| "loss": 0.8813, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.1288223812621991, | |
| "grad_norm": 1.0199263095855713, | |
| "learning_rate": 4.4476633655344144e-05, | |
| "loss": 0.8693, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 1.1320754716981132, | |
| "grad_norm": 0.7447525262832642, | |
| "learning_rate": 4.444423151430386e-05, | |
| "loss": 0.8894, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.1353285621340272, | |
| "grad_norm": 1.062179446220398, | |
| "learning_rate": 4.4411746485699744e-05, | |
| "loss": 0.8425, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 1.1385816525699415, | |
| "grad_norm": 0.7509242296218872, | |
| "learning_rate": 4.437917870801015e-05, | |
| "loss": 0.8666, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.1418347430058555, | |
| "grad_norm": 1.1955047845840454, | |
| "learning_rate": 4.434652832006616e-05, | |
| "loss": 0.8798, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 1.1450878334417696, | |
| "grad_norm": 1.1089417934417725, | |
| "learning_rate": 4.431379546105101e-05, | |
| "loss": 0.8808, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.1483409238776838, | |
| "grad_norm": 0.7296579480171204, | |
| "learning_rate": 4.4280980270499494e-05, | |
| "loss": 0.854, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 1.1515940143135979, | |
| "grad_norm": 1.0274302959442139, | |
| "learning_rate": 4.424808288829739e-05, | |
| "loss": 0.8775, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.1548471047495121, | |
| "grad_norm": 0.8249827027320862, | |
| "learning_rate": 4.421510345468082e-05, | |
| "loss": 0.8825, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 1.1581001951854262, | |
| "grad_norm": 0.814564049243927, | |
| "learning_rate": 4.4182042110235686e-05, | |
| "loss": 0.8354, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.1613532856213402, | |
| "grad_norm": 0.8738640546798706, | |
| "learning_rate": 4.414889899589709e-05, | |
| "loss": 0.8667, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 1.1646063760572545, | |
| "grad_norm": 0.873928427696228, | |
| "learning_rate": 4.411567425294867e-05, | |
| "loss": 0.8589, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.1678594664931685, | |
| "grad_norm": 1.0771477222442627, | |
| "learning_rate": 4.408236802302203e-05, | |
| "loss": 0.8677, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 1.1711125569290826, | |
| "grad_norm": 1.026843786239624, | |
| "learning_rate": 4.404898044809618e-05, | |
| "loss": 0.8613, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.1743656473649968, | |
| "grad_norm": 1.2807365655899048, | |
| "learning_rate": 4.401551167049686e-05, | |
| "loss": 0.8612, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 1.1776187378009109, | |
| "grad_norm": 1.086053729057312, | |
| "learning_rate": 4.398196183289595e-05, | |
| "loss": 0.8679, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.180871828236825, | |
| "grad_norm": 1.2245922088623047, | |
| "learning_rate": 4.394833107831091e-05, | |
| "loss": 0.8666, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 1.1841249186727392, | |
| "grad_norm": 0.788972020149231, | |
| "learning_rate": 4.3914619550104125e-05, | |
| "loss": 0.8549, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.1873780091086532, | |
| "grad_norm": 0.7560495734214783, | |
| "learning_rate": 4.388082739198229e-05, | |
| "loss": 0.8689, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 1.1906310995445673, | |
| "grad_norm": 0.9753955006599426, | |
| "learning_rate": 4.3846954747995825e-05, | |
| "loss": 0.8676, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.1938841899804815, | |
| "grad_norm": 0.7910217642784119, | |
| "learning_rate": 4.381300176253825e-05, | |
| "loss": 0.872, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 1.1971372804163956, | |
| "grad_norm": 0.9588011503219604, | |
| "learning_rate": 4.377896858034557e-05, | |
| "loss": 0.8903, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.2003903708523098, | |
| "grad_norm": 0.9886934757232666, | |
| "learning_rate": 4.374485534649562e-05, | |
| "loss": 0.879, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 1.2036434612882239, | |
| "grad_norm": 0.896848738193512, | |
| "learning_rate": 4.371066220640754e-05, | |
| "loss": 0.854, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.206896551724138, | |
| "grad_norm": 1.7082849740982056, | |
| "learning_rate": 4.367638930584105e-05, | |
| "loss": 0.8877, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 1.2101496421600522, | |
| "grad_norm": 1.307518482208252, | |
| "learning_rate": 4.36420367908959e-05, | |
| "loss": 0.8637, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.2134027325959662, | |
| "grad_norm": 0.9649641513824463, | |
| "learning_rate": 4.3607604808011213e-05, | |
| "loss": 0.8644, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 1.2166558230318802, | |
| "grad_norm": 0.958816409111023, | |
| "learning_rate": 4.357309350396488e-05, | |
| "loss": 0.8771, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.2199089134677945, | |
| "grad_norm": 0.7665415406227112, | |
| "learning_rate": 4.353850302587291e-05, | |
| "loss": 0.8559, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 1.2231620039037086, | |
| "grad_norm": 0.8145641088485718, | |
| "learning_rate": 4.3503833521188844e-05, | |
| "loss": 0.8776, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.2264150943396226, | |
| "grad_norm": 1.0663881301879883, | |
| "learning_rate": 4.346908513770306e-05, | |
| "loss": 0.8643, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 1.2296681847755369, | |
| "grad_norm": 0.7401409149169922, | |
| "learning_rate": 4.343425802354222e-05, | |
| "loss": 0.8646, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.232921275211451, | |
| "grad_norm": 0.7239570021629333, | |
| "learning_rate": 4.3399352327168595e-05, | |
| "loss": 0.8885, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 1.236174365647365, | |
| "grad_norm": 1.0525251626968384, | |
| "learning_rate": 4.3364368197379426e-05, | |
| "loss": 0.8817, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.2394274560832792, | |
| "grad_norm": 0.8934289813041687, | |
| "learning_rate": 4.33293057833063e-05, | |
| "loss": 0.8699, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 1.2426805465191932, | |
| "grad_norm": 0.8614199757575989, | |
| "learning_rate": 4.329416523441454e-05, | |
| "loss": 0.866, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.2459336369551073, | |
| "grad_norm": 0.884955644607544, | |
| "learning_rate": 4.3258946700502535e-05, | |
| "loss": 0.8641, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 1.2491867273910215, | |
| "grad_norm": 0.8655734062194824, | |
| "learning_rate": 4.322365033170109e-05, | |
| "loss": 0.8393, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.2524398178269356, | |
| "grad_norm": 1.0718590021133423, | |
| "learning_rate": 4.318827627847284e-05, | |
| "loss": 0.8788, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 1.2556929082628496, | |
| "grad_norm": 0.9467219710350037, | |
| "learning_rate": 4.315282469161156e-05, | |
| "loss": 0.8758, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.258945998698764, | |
| "grad_norm": 1.0598018169403076, | |
| "learning_rate": 4.311729572224153e-05, | |
| "loss": 0.8872, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 1.262199089134678, | |
| "grad_norm": 0.7586490511894226, | |
| "learning_rate": 4.308168952181691e-05, | |
| "loss": 0.8749, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.265452179570592, | |
| "grad_norm": 0.8791137933731079, | |
| "learning_rate": 4.304600624212109e-05, | |
| "loss": 0.8833, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 1.2687052700065062, | |
| "grad_norm": 1.0280482769012451, | |
| "learning_rate": 4.3017404223497385e-05, | |
| "loss": 0.893, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.2719583604424203, | |
| "grad_norm": 0.8759311437606812, | |
| "learning_rate": 4.298158258465592e-05, | |
| "loss": 0.8833, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 1.2752114508783343, | |
| "grad_norm": 0.8623502850532532, | |
| "learning_rate": 4.2945684293282685e-05, | |
| "loss": 0.8533, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.2784645413142486, | |
| "grad_norm": 0.9812124967575073, | |
| "learning_rate": 4.290970950240617e-05, | |
| "loss": 0.8832, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 1.2817176317501626, | |
| "grad_norm": 0.8114174008369446, | |
| "learning_rate": 4.2873658365381026e-05, | |
| "loss": 0.8657, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.2849707221860767, | |
| "grad_norm": 0.7681922912597656, | |
| "learning_rate": 4.2837531035887305e-05, | |
| "loss": 0.8563, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 1.288223812621991, | |
| "grad_norm": 0.9911778569221497, | |
| "learning_rate": 4.280132766792989e-05, | |
| "loss": 0.8401, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.291476903057905, | |
| "grad_norm": 0.7618448138237, | |
| "learning_rate": 4.276504841583778e-05, | |
| "loss": 0.8727, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 1.294729993493819, | |
| "grad_norm": 0.7748595476150513, | |
| "learning_rate": 4.2728693434263476e-05, | |
| "loss": 0.8726, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.2979830839297333, | |
| "grad_norm": 0.995187520980835, | |
| "learning_rate": 4.269226287818228e-05, | |
| "loss": 0.8606, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 1.3012361743656473, | |
| "grad_norm": 0.9184800386428833, | |
| "learning_rate": 4.2655756902891665e-05, | |
| "loss": 0.8881, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.3044892648015614, | |
| "grad_norm": 0.6605210304260254, | |
| "learning_rate": 4.261917566401061e-05, | |
| "loss": 0.8452, | |
| "step": 2005 | |
| }, | |
| { | |
| "epoch": 1.3077423552374756, | |
| "grad_norm": 0.9930521249771118, | |
| "learning_rate": 4.258251931747893e-05, | |
| "loss": 0.8661, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.3109954456733897, | |
| "grad_norm": 0.6971027255058289, | |
| "learning_rate": 4.25457880195566e-05, | |
| "loss": 0.8607, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 1.3142485361093037, | |
| "grad_norm": 0.8052083253860474, | |
| "learning_rate": 4.250898192682311e-05, | |
| "loss": 0.8407, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.317501626545218, | |
| "grad_norm": 0.7318537831306458, | |
| "learning_rate": 4.247210119617679e-05, | |
| "loss": 0.8703, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 1.320754716981132, | |
| "grad_norm": 1.0614877939224243, | |
| "learning_rate": 4.243514598483412e-05, | |
| "loss": 0.854, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.3240078074170463, | |
| "grad_norm": 1.2773613929748535, | |
| "learning_rate": 4.23981164503291e-05, | |
| "loss": 0.8728, | |
| "step": 2035 | |
| }, | |
| { | |
| "epoch": 1.3272608978529603, | |
| "grad_norm": 1.41408371925354, | |
| "learning_rate": 4.236101275051256e-05, | |
| "loss": 0.859, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.3305139882888743, | |
| "grad_norm": 0.7571334838867188, | |
| "learning_rate": 4.232383504355147e-05, | |
| "loss": 0.8588, | |
| "step": 2045 | |
| }, | |
| { | |
| "epoch": 1.3337670787247886, | |
| "grad_norm": 0.7090466618537903, | |
| "learning_rate": 4.228658348792828e-05, | |
| "loss": 0.8672, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.3370201691607027, | |
| "grad_norm": 0.826134204864502, | |
| "learning_rate": 4.224925824244025e-05, | |
| "loss": 0.8552, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 1.340273259596617, | |
| "grad_norm": 0.8876454830169678, | |
| "learning_rate": 4.2211859466198785e-05, | |
| "loss": 0.8733, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.343526350032531, | |
| "grad_norm": 0.7836646437644958, | |
| "learning_rate": 4.217438731862871e-05, | |
| "loss": 0.8643, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 1.346779440468445, | |
| "grad_norm": 0.795116662979126, | |
| "learning_rate": 4.213684195946762e-05, | |
| "loss": 0.8759, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.3500325309043593, | |
| "grad_norm": 0.9851782321929932, | |
| "learning_rate": 4.2099223548765224e-05, | |
| "loss": 0.872, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 1.3532856213402733, | |
| "grad_norm": 0.9454843997955322, | |
| "learning_rate": 4.206153224688264e-05, | |
| "loss": 0.8709, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.3565387117761873, | |
| "grad_norm": 0.7972314953804016, | |
| "learning_rate": 4.202376821449167e-05, | |
| "loss": 0.881, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 1.3597918022121016, | |
| "grad_norm": 0.7645969390869141, | |
| "learning_rate": 4.1985931612574186e-05, | |
| "loss": 0.8729, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.3630448926480156, | |
| "grad_norm": 1.1820120811462402, | |
| "learning_rate": 4.194802260242141e-05, | |
| "loss": 0.8556, | |
| "step": 2095 | |
| }, | |
| { | |
| "epoch": 1.3662979830839297, | |
| "grad_norm": 0.9157008528709412, | |
| "learning_rate": 4.191004134563322e-05, | |
| "loss": 0.8721, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.369551073519844, | |
| "grad_norm": 0.8286409974098206, | |
| "learning_rate": 4.187198800411748e-05, | |
| "loss": 0.8756, | |
| "step": 2105 | |
| }, | |
| { | |
| "epoch": 1.372804163955758, | |
| "grad_norm": 0.8742622137069702, | |
| "learning_rate": 4.183386274008932e-05, | |
| "loss": 0.8592, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 1.376057254391672, | |
| "grad_norm": 0.8968034386634827, | |
| "learning_rate": 4.1795665716070474e-05, | |
| "loss": 0.8641, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 1.3793103448275863, | |
| "grad_norm": 0.8291420340538025, | |
| "learning_rate": 4.1757397094888594e-05, | |
| "loss": 0.8529, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.3825634352635003, | |
| "grad_norm": 0.919009268283844, | |
| "learning_rate": 4.1719057039676515e-05, | |
| "loss": 0.8636, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 1.3858165256994144, | |
| "grad_norm": 1.0421229600906372, | |
| "learning_rate": 4.168064571387159e-05, | |
| "loss": 0.8681, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 1.3890696161353286, | |
| "grad_norm": 0.7388564944267273, | |
| "learning_rate": 4.1642163281214984e-05, | |
| "loss": 0.8513, | |
| "step": 2135 | |
| }, | |
| { | |
| "epoch": 1.3923227065712427, | |
| "grad_norm": 0.6921651363372803, | |
| "learning_rate": 4.160360990575099e-05, | |
| "loss": 0.8723, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.3955757970071567, | |
| "grad_norm": 0.7668315768241882, | |
| "learning_rate": 4.156498575182633e-05, | |
| "loss": 0.8621, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 1.398828887443071, | |
| "grad_norm": 0.7497116327285767, | |
| "learning_rate": 4.152629098408939e-05, | |
| "loss": 0.8604, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.402081977878985, | |
| "grad_norm": 0.7256556749343872, | |
| "learning_rate": 4.1487525767489635e-05, | |
| "loss": 0.8638, | |
| "step": 2155 | |
| }, | |
| { | |
| "epoch": 1.405335068314899, | |
| "grad_norm": 1.1155390739440918, | |
| "learning_rate": 4.144869026727681e-05, | |
| "loss": 0.8547, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.4085881587508133, | |
| "grad_norm": 0.9044195413589478, | |
| "learning_rate": 4.140978464900025e-05, | |
| "loss": 0.8792, | |
| "step": 2165 | |
| }, | |
| { | |
| "epoch": 1.4118412491867274, | |
| "grad_norm": 0.7881206274032593, | |
| "learning_rate": 4.137080907850823e-05, | |
| "loss": 0.874, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.4150943396226414, | |
| "grad_norm": 0.851743757724762, | |
| "learning_rate": 4.13317637219472e-05, | |
| "loss": 0.8551, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 1.4183474300585557, | |
| "grad_norm": 0.8619376420974731, | |
| "learning_rate": 4.129264874576111e-05, | |
| "loss": 0.8757, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.4216005204944697, | |
| "grad_norm": 1.2099318504333496, | |
| "learning_rate": 4.125346431669065e-05, | |
| "loss": 0.8567, | |
| "step": 2185 | |
| }, | |
| { | |
| "epoch": 1.4248536109303838, | |
| "grad_norm": 0.8172369599342346, | |
| "learning_rate": 4.121421060177263e-05, | |
| "loss": 0.8625, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.428106701366298, | |
| "grad_norm": 1.1485086679458618, | |
| "learning_rate": 4.1174887768339164e-05, | |
| "loss": 0.8681, | |
| "step": 2195 | |
| }, | |
| { | |
| "epoch": 1.431359791802212, | |
| "grad_norm": 0.8006755709648132, | |
| "learning_rate": 4.113549598401704e-05, | |
| "loss": 0.8657, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.434612882238126, | |
| "grad_norm": 0.7858587503433228, | |
| "learning_rate": 4.1096035416726966e-05, | |
| "loss": 0.8681, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 1.4378659726740404, | |
| "grad_norm": 1.0397981405258179, | |
| "learning_rate": 4.105650623468284e-05, | |
| "loss": 0.871, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.4411190631099544, | |
| "grad_norm": 1.409725546836853, | |
| "learning_rate": 4.101690860639108e-05, | |
| "loss": 0.8525, | |
| "step": 2215 | |
| }, | |
| { | |
| "epoch": 1.4443721535458685, | |
| "grad_norm": 1.0374292135238647, | |
| "learning_rate": 4.097724270064988e-05, | |
| "loss": 0.8561, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.4476252439817827, | |
| "grad_norm": 1.10367751121521, | |
| "learning_rate": 4.0937508686548455e-05, | |
| "loss": 0.8608, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 1.4508783344176968, | |
| "grad_norm": 0.9354111552238464, | |
| "learning_rate": 4.089770673346639e-05, | |
| "loss": 0.8556, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.4541314248536108, | |
| "grad_norm": 0.7732600569725037, | |
| "learning_rate": 4.085783701107288e-05, | |
| "loss": 0.8664, | |
| "step": 2235 | |
| }, | |
| { | |
| "epoch": 1.457384515289525, | |
| "grad_norm": 0.7464646697044373, | |
| "learning_rate": 4.0817899689325975e-05, | |
| "loss": 0.8544, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.460637605725439, | |
| "grad_norm": 0.7917648553848267, | |
| "learning_rate": 4.077789493847194e-05, | |
| "loss": 0.849, | |
| "step": 2245 | |
| }, | |
| { | |
| "epoch": 1.4638906961613534, | |
| "grad_norm": 0.8593052625656128, | |
| "learning_rate": 4.073782292904445e-05, | |
| "loss": 0.905, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.4671437865972674, | |
| "grad_norm": 0.7432965636253357, | |
| "learning_rate": 4.0697683831863877e-05, | |
| "loss": 0.8606, | |
| "step": 2255 | |
| }, | |
| { | |
| "epoch": 1.4703968770331814, | |
| "grad_norm": 1.0467164516448975, | |
| "learning_rate": 4.065747781803662e-05, | |
| "loss": 0.8733, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.4736499674690957, | |
| "grad_norm": 0.8533846735954285, | |
| "learning_rate": 4.06172050589543e-05, | |
| "loss": 0.8411, | |
| "step": 2265 | |
| }, | |
| { | |
| "epoch": 1.4769030579050098, | |
| "grad_norm": 0.7896531224250793, | |
| "learning_rate": 4.057686572629307e-05, | |
| "loss": 0.8732, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.480156148340924, | |
| "grad_norm": 0.7728810906410217, | |
| "learning_rate": 4.053645999201287e-05, | |
| "loss": 0.8822, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 1.483409238776838, | |
| "grad_norm": 0.791527271270752, | |
| "learning_rate": 4.0495988028356725e-05, | |
| "loss": 0.8692, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.486662329212752, | |
| "grad_norm": 1.7369199991226196, | |
| "learning_rate": 4.0455450007849945e-05, | |
| "loss": 0.878, | |
| "step": 2285 | |
| }, | |
| { | |
| "epoch": 1.4899154196486664, | |
| "grad_norm": 0.8174150586128235, | |
| "learning_rate": 4.041484610329945e-05, | |
| "loss": 0.8843, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.4931685100845804, | |
| "grad_norm": 0.8122901916503906, | |
| "learning_rate": 4.037417648779304e-05, | |
| "loss": 0.8511, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 1.4964216005204944, | |
| "grad_norm": 0.856270968914032, | |
| "learning_rate": 4.033344133469857e-05, | |
| "loss": 0.8576, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.4996746909564087, | |
| "grad_norm": 0.7714033126831055, | |
| "learning_rate": 4.029264081766333e-05, | |
| "loss": 0.8563, | |
| "step": 2305 | |
| }, | |
| { | |
| "epoch": 1.5029277813923227, | |
| "grad_norm": 0.7557379007339478, | |
| "learning_rate": 4.02517751106132e-05, | |
| "loss": 0.8632, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.5061808718282368, | |
| "grad_norm": 0.9310267567634583, | |
| "learning_rate": 4.021084438775199e-05, | |
| "loss": 0.8756, | |
| "step": 2315 | |
| }, | |
| { | |
| "epoch": 1.509433962264151, | |
| "grad_norm": 1.1613460779190063, | |
| "learning_rate": 4.016984882356063e-05, | |
| "loss": 0.8581, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.512687052700065, | |
| "grad_norm": 0.8737664222717285, | |
| "learning_rate": 4.0128788592796484e-05, | |
| "loss": 0.8463, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 1.5159401431359791, | |
| "grad_norm": 1.137432336807251, | |
| "learning_rate": 4.008766387049257e-05, | |
| "loss": 0.8668, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.5191932335718934, | |
| "grad_norm": 1.205127239227295, | |
| "learning_rate": 4.004647483195682e-05, | |
| "loss": 0.854, | |
| "step": 2335 | |
| }, | |
| { | |
| "epoch": 1.5224463240078074, | |
| "grad_norm": 1.2103711366653442, | |
| "learning_rate": 4.0005221652771326e-05, | |
| "loss": 0.8599, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.5256994144437215, | |
| "grad_norm": 0.8847302794456482, | |
| "learning_rate": 3.996390450879163e-05, | |
| "loss": 0.8902, | |
| "step": 2345 | |
| }, | |
| { | |
| "epoch": 1.5289525048796357, | |
| "grad_norm": 0.9139837622642517, | |
| "learning_rate": 3.992252357614591e-05, | |
| "loss": 0.8537, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.5322055953155498, | |
| "grad_norm": 0.6250112056732178, | |
| "learning_rate": 3.9881079031234295e-05, | |
| "loss": 0.8625, | |
| "step": 2355 | |
| }, | |
| { | |
| "epoch": 1.5354586857514638, | |
| "grad_norm": 1.3147530555725098, | |
| "learning_rate": 3.983957105072806e-05, | |
| "loss": 0.8594, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.538711776187378, | |
| "grad_norm": 0.8052361607551575, | |
| "learning_rate": 3.9797999811568916e-05, | |
| "loss": 0.8613, | |
| "step": 2365 | |
| }, | |
| { | |
| "epoch": 1.5419648666232921, | |
| "grad_norm": 0.963198721408844, | |
| "learning_rate": 3.9756365490968216e-05, | |
| "loss": 0.8846, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.5452179570592062, | |
| "grad_norm": 0.7471247911453247, | |
| "learning_rate": 3.971466826640622e-05, | |
| "loss": 0.8559, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 1.5484710474951204, | |
| "grad_norm": 0.9139803051948547, | |
| "learning_rate": 3.967290831563137e-05, | |
| "loss": 0.8734, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.5517241379310345, | |
| "grad_norm": 0.8502246141433716, | |
| "learning_rate": 3.963108581665945e-05, | |
| "loss": 0.8517, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 1.5549772283669485, | |
| "grad_norm": 1.010526418685913, | |
| "learning_rate": 3.958920094777292e-05, | |
| "loss": 0.8699, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 1.5582303188028628, | |
| "grad_norm": 0.9621404409408569, | |
| "learning_rate": 3.954725388752006e-05, | |
| "loss": 0.8715, | |
| "step": 2395 | |
| }, | |
| { | |
| "epoch": 1.5614834092387768, | |
| "grad_norm": 0.931891679763794, | |
| "learning_rate": 3.950524481471434e-05, | |
| "loss": 0.8639, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.5647364996746909, | |
| "grad_norm": 0.9025523066520691, | |
| "learning_rate": 3.94631739084335e-05, | |
| "loss": 0.8407, | |
| "step": 2405 | |
| }, | |
| { | |
| "epoch": 1.5679895901106051, | |
| "grad_norm": 0.7679696679115295, | |
| "learning_rate": 3.942104134801892e-05, | |
| "loss": 0.8703, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 1.5712426805465192, | |
| "grad_norm": 0.7461057901382446, | |
| "learning_rate": 3.937884731307477e-05, | |
| "loss": 0.8508, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 1.5744957709824332, | |
| "grad_norm": 0.8891671895980835, | |
| "learning_rate": 3.9336591983467296e-05, | |
| "loss": 0.8392, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.5777488614183475, | |
| "grad_norm": 0.7495052218437195, | |
| "learning_rate": 3.929427553932402e-05, | |
| "loss": 0.8617, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 1.5810019518542615, | |
| "grad_norm": 0.8563068509101868, | |
| "learning_rate": 3.925189816103298e-05, | |
| "loss": 0.8682, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 1.5842550422901756, | |
| "grad_norm": 0.8730781674385071, | |
| "learning_rate": 3.9209460029242e-05, | |
| "loss": 0.8634, | |
| "step": 2435 | |
| }, | |
| { | |
| "epoch": 1.5875081327260898, | |
| "grad_norm": 1.0046974420547485, | |
| "learning_rate": 3.916696132485783e-05, | |
| "loss": 0.8423, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.5907612231620039, | |
| "grad_norm": 0.8691470623016357, | |
| "learning_rate": 3.9124402229045495e-05, | |
| "loss": 0.8443, | |
| "step": 2445 | |
| }, | |
| { | |
| "epoch": 1.594014313597918, | |
| "grad_norm": 0.7887680530548096, | |
| "learning_rate": 3.90817829232274e-05, | |
| "loss": 0.8796, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.5972674040338322, | |
| "grad_norm": 0.8779820203781128, | |
| "learning_rate": 3.903910358908267e-05, | |
| "loss": 0.8808, | |
| "step": 2455 | |
| }, | |
| { | |
| "epoch": 1.6005204944697464, | |
| "grad_norm": 0.9116110801696777, | |
| "learning_rate": 3.8996364408546284e-05, | |
| "loss": 0.8539, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.6037735849056602, | |
| "grad_norm": 0.8549916744232178, | |
| "learning_rate": 3.895356556380833e-05, | |
| "loss": 0.8714, | |
| "step": 2465 | |
| }, | |
| { | |
| "epoch": 1.6070266753415745, | |
| "grad_norm": 0.7568048238754272, | |
| "learning_rate": 3.8910707237313274e-05, | |
| "loss": 0.8545, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 1.6102797657774888, | |
| "grad_norm": 0.873261034488678, | |
| "learning_rate": 3.886778961175909e-05, | |
| "loss": 0.861, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 1.6135328562134026, | |
| "grad_norm": 0.8435690999031067, | |
| "learning_rate": 3.8824812870096585e-05, | |
| "loss": 0.849, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.6167859466493169, | |
| "grad_norm": 0.7543259263038635, | |
| "learning_rate": 3.878177719552854e-05, | |
| "loss": 0.8389, | |
| "step": 2485 | |
| }, | |
| { | |
| "epoch": 1.6200390370852311, | |
| "grad_norm": 0.6784664392471313, | |
| "learning_rate": 3.8738682771508975e-05, | |
| "loss": 0.862, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.623292127521145, | |
| "grad_norm": 0.735149085521698, | |
| "learning_rate": 3.869552978174232e-05, | |
| "loss": 0.86, | |
| "step": 2495 | |
| }, | |
| { | |
| "epoch": 1.6265452179570592, | |
| "grad_norm": 1.1492180824279785, | |
| "learning_rate": 3.8652318410182696e-05, | |
| "loss": 0.8682, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.6297983083929735, | |
| "grad_norm": 1.2123005390167236, | |
| "learning_rate": 3.860904884103307e-05, | |
| "loss": 0.8767, | |
| "step": 2505 | |
| }, | |
| { | |
| "epoch": 1.6330513988288873, | |
| "grad_norm": 1.0573855638504028, | |
| "learning_rate": 3.85657212587445e-05, | |
| "loss": 0.8784, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.6363044892648015, | |
| "grad_norm": 0.7657274603843689, | |
| "learning_rate": 3.8522335848015354e-05, | |
| "loss": 0.8614, | |
| "step": 2515 | |
| }, | |
| { | |
| "epoch": 1.6395575797007158, | |
| "grad_norm": 0.7586051225662231, | |
| "learning_rate": 3.847889279379052e-05, | |
| "loss": 0.8522, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.6428106701366298, | |
| "grad_norm": 0.8660874366760254, | |
| "learning_rate": 3.843539228126058e-05, | |
| "loss": 0.8491, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 1.6460637605725439, | |
| "grad_norm": 0.8181445002555847, | |
| "learning_rate": 3.8391834495861104e-05, | |
| "loss": 0.8774, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 1.6493168510084582, | |
| "grad_norm": 0.8161119222640991, | |
| "learning_rate": 3.834821962327173e-05, | |
| "loss": 0.8446, | |
| "step": 2535 | |
| }, | |
| { | |
| "epoch": 1.6525699414443722, | |
| "grad_norm": 0.7471867203712463, | |
| "learning_rate": 3.830454784941552e-05, | |
| "loss": 0.8743, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.6558230318802862, | |
| "grad_norm": 0.8243322372436523, | |
| "learning_rate": 3.8260819360458066e-05, | |
| "loss": 0.8582, | |
| "step": 2545 | |
| }, | |
| { | |
| "epoch": 1.6590761223162005, | |
| "grad_norm": 0.7759085297584534, | |
| "learning_rate": 3.8217034342806726e-05, | |
| "loss": 0.8634, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.6623292127521145, | |
| "grad_norm": 0.7820890545845032, | |
| "learning_rate": 3.817319298310984e-05, | |
| "loss": 0.849, | |
| "step": 2555 | |
| }, | |
| { | |
| "epoch": 1.6655823031880286, | |
| "grad_norm": 0.7369856238365173, | |
| "learning_rate": 3.812929546825591e-05, | |
| "loss": 0.851, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.6688353936239428, | |
| "grad_norm": 0.6760427355766296, | |
| "learning_rate": 3.8085341985372847e-05, | |
| "loss": 0.8526, | |
| "step": 2565 | |
| }, | |
| { | |
| "epoch": 1.6720884840598569, | |
| "grad_norm": 0.7964663505554199, | |
| "learning_rate": 3.804133272182711e-05, | |
| "loss": 0.8369, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 1.675341574495771, | |
| "grad_norm": 0.7458584308624268, | |
| "learning_rate": 3.7997267865222966e-05, | |
| "loss": 0.858, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 1.6785946649316852, | |
| "grad_norm": 0.7713748812675476, | |
| "learning_rate": 3.795314760340165e-05, | |
| "loss": 0.8422, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.6818477553675992, | |
| "grad_norm": 1.1121766567230225, | |
| "learning_rate": 3.79089721244406e-05, | |
| "loss": 0.8564, | |
| "step": 2585 | |
| }, | |
| { | |
| "epoch": 1.6851008458035133, | |
| "grad_norm": 0.7054054141044617, | |
| "learning_rate": 3.786474161665261e-05, | |
| "loss": 0.8503, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 1.6883539362394275, | |
| "grad_norm": 0.8231985569000244, | |
| "learning_rate": 3.782045626858508e-05, | |
| "loss": 0.8459, | |
| "step": 2595 | |
| }, | |
| { | |
| "epoch": 1.6916070266753416, | |
| "grad_norm": 0.8120073676109314, | |
| "learning_rate": 3.7776116269019164e-05, | |
| "loss": 0.8579, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.6948601171112556, | |
| "grad_norm": 0.7463471293449402, | |
| "learning_rate": 3.773172180696899e-05, | |
| "loss": 0.8685, | |
| "step": 2605 | |
| }, | |
| { | |
| "epoch": 1.6981132075471699, | |
| "grad_norm": 0.9310842752456665, | |
| "learning_rate": 3.7687273071680875e-05, | |
| "loss": 0.8657, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 1.701366297983084, | |
| "grad_norm": 0.7997697591781616, | |
| "learning_rate": 3.7642770252632445e-05, | |
| "loss": 0.8536, | |
| "step": 2615 | |
| }, | |
| { | |
| "epoch": 1.704619388418998, | |
| "grad_norm": 0.9354361295700073, | |
| "learning_rate": 3.7598213539531924e-05, | |
| "loss": 0.8584, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.7078724788549122, | |
| "grad_norm": 0.8442994356155396, | |
| "learning_rate": 3.755360312231726e-05, | |
| "loss": 0.8509, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 1.7111255692908263, | |
| "grad_norm": 0.7156201601028442, | |
| "learning_rate": 3.7508939191155315e-05, | |
| "loss": 0.8587, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.7143786597267403, | |
| "grad_norm": 0.8114856481552124, | |
| "learning_rate": 3.7464221936441094e-05, | |
| "loss": 0.8575, | |
| "step": 2635 | |
| }, | |
| { | |
| "epoch": 1.7176317501626546, | |
| "grad_norm": 0.9958142042160034, | |
| "learning_rate": 3.741945154879691e-05, | |
| "loss": 0.8291, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.7208848405985686, | |
| "grad_norm": 0.8814706206321716, | |
| "learning_rate": 3.7374628219071576e-05, | |
| "loss": 0.8756, | |
| "step": 2645 | |
| }, | |
| { | |
| "epoch": 1.7241379310344827, | |
| "grad_norm": 0.9752816557884216, | |
| "learning_rate": 3.732975213833957e-05, | |
| "loss": 0.8526, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.727391021470397, | |
| "grad_norm": 1.069827914237976, | |
| "learning_rate": 3.728482349790025e-05, | |
| "loss": 0.85, | |
| "step": 2655 | |
| }, | |
| { | |
| "epoch": 1.730644111906311, | |
| "grad_norm": 0.7829200029373169, | |
| "learning_rate": 3.723984248927704e-05, | |
| "loss": 0.8775, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.733897202342225, | |
| "grad_norm": 0.9264289140701294, | |
| "learning_rate": 3.719480930421657e-05, | |
| "loss": 0.8561, | |
| "step": 2665 | |
| }, | |
| { | |
| "epoch": 1.7371502927781393, | |
| "grad_norm": 1.0062094926834106, | |
| "learning_rate": 3.7149724134687915e-05, | |
| "loss": 0.8734, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.7404033832140533, | |
| "grad_norm": 1.15998375415802, | |
| "learning_rate": 3.710458717288176e-05, | |
| "loss": 0.8817, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 1.7436564736499673, | |
| "grad_norm": 0.8632653951644897, | |
| "learning_rate": 3.705939861120952e-05, | |
| "loss": 0.8467, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.7469095640858816, | |
| "grad_norm": 0.9579365849494934, | |
| "learning_rate": 3.7014158642302645e-05, | |
| "loss": 0.8516, | |
| "step": 2685 | |
| }, | |
| { | |
| "epoch": 1.7501626545217959, | |
| "grad_norm": 0.7893072962760925, | |
| "learning_rate": 3.6968867459011675e-05, | |
| "loss": 0.8533, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.7534157449577097, | |
| "grad_norm": 0.8436265587806702, | |
| "learning_rate": 3.692352525440548e-05, | |
| "loss": 0.8661, | |
| "step": 2695 | |
| }, | |
| { | |
| "epoch": 1.756668835393624, | |
| "grad_norm": 0.7928500175476074, | |
| "learning_rate": 3.687813222177042e-05, | |
| "loss": 0.8617, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.7599219258295382, | |
| "grad_norm": 1.0979465246200562, | |
| "learning_rate": 3.683268855460955e-05, | |
| "loss": 0.8457, | |
| "step": 2705 | |
| }, | |
| { | |
| "epoch": 1.763175016265452, | |
| "grad_norm": 0.9280642867088318, | |
| "learning_rate": 3.678719444664174e-05, | |
| "loss": 0.8698, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.7664281067013663, | |
| "grad_norm": 0.7560756206512451, | |
| "learning_rate": 3.674165009180091e-05, | |
| "loss": 0.8476, | |
| "step": 2715 | |
| }, | |
| { | |
| "epoch": 1.7696811971372806, | |
| "grad_norm": 1.6937271356582642, | |
| "learning_rate": 3.669605568423515e-05, | |
| "loss": 0.8601, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.7729342875731944, | |
| "grad_norm": 0.7721190452575684, | |
| "learning_rate": 3.665041141830594e-05, | |
| "loss": 0.8479, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 1.7761873780091086, | |
| "grad_norm": 0.691184401512146, | |
| "learning_rate": 3.660471748858728e-05, | |
| "loss": 0.846, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.779440468445023, | |
| "grad_norm": 0.8458099961280823, | |
| "learning_rate": 3.655897408986487e-05, | |
| "loss": 0.8543, | |
| "step": 2735 | |
| }, | |
| { | |
| "epoch": 1.7826935588809367, | |
| "grad_norm": 0.7717384696006775, | |
| "learning_rate": 3.651318141713532e-05, | |
| "loss": 0.8555, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.785946649316851, | |
| "grad_norm": 0.7364319562911987, | |
| "learning_rate": 3.646733966560527e-05, | |
| "loss": 0.8693, | |
| "step": 2745 | |
| }, | |
| { | |
| "epoch": 1.7891997397527653, | |
| "grad_norm": 0.7715139389038086, | |
| "learning_rate": 3.642144903069055e-05, | |
| "loss": 0.8575, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.7924528301886793, | |
| "grad_norm": 0.7801803350448608, | |
| "learning_rate": 3.637550970801543e-05, | |
| "loss": 0.8832, | |
| "step": 2755 | |
| }, | |
| { | |
| "epoch": 1.7957059206245933, | |
| "grad_norm": 0.8797639012336731, | |
| "learning_rate": 3.632952189341166e-05, | |
| "loss": 0.8787, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.7989590110605076, | |
| "grad_norm": 0.8655262589454651, | |
| "learning_rate": 3.628348578291776e-05, | |
| "loss": 0.8527, | |
| "step": 2765 | |
| }, | |
| { | |
| "epoch": 1.8022121014964216, | |
| "grad_norm": 0.7039540410041809, | |
| "learning_rate": 3.623740157277811e-05, | |
| "loss": 0.8023, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.8054651919323357, | |
| "grad_norm": 0.8364835977554321, | |
| "learning_rate": 3.619126945944209e-05, | |
| "loss": 0.8428, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 1.80871828236825, | |
| "grad_norm": 0.8477578163146973, | |
| "learning_rate": 3.614508963956335e-05, | |
| "loss": 0.8364, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.811971372804164, | |
| "grad_norm": 0.790069043636322, | |
| "learning_rate": 3.609886230999886e-05, | |
| "loss": 0.8557, | |
| "step": 2785 | |
| }, | |
| { | |
| "epoch": 1.815224463240078, | |
| "grad_norm": 1.1685853004455566, | |
| "learning_rate": 3.605258766780815e-05, | |
| "loss": 0.8639, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.8184775536759923, | |
| "grad_norm": 0.6820409297943115, | |
| "learning_rate": 3.600626591025239e-05, | |
| "loss": 0.8561, | |
| "step": 2795 | |
| }, | |
| { | |
| "epoch": 1.8217306441119063, | |
| "grad_norm": 0.6816509366035461, | |
| "learning_rate": 3.595989723479363e-05, | |
| "loss": 0.8595, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.8249837345478204, | |
| "grad_norm": 0.6458393335342407, | |
| "learning_rate": 3.591348183909391e-05, | |
| "loss": 0.852, | |
| "step": 2805 | |
| }, | |
| { | |
| "epoch": 1.8282368249837346, | |
| "grad_norm": 0.8720667958259583, | |
| "learning_rate": 3.586701992101446e-05, | |
| "loss": 0.8493, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.8314899154196487, | |
| "grad_norm": 0.8076214790344238, | |
| "learning_rate": 3.582051167861477e-05, | |
| "loss": 0.8399, | |
| "step": 2815 | |
| }, | |
| { | |
| "epoch": 1.8347430058555627, | |
| "grad_norm": 1.1117894649505615, | |
| "learning_rate": 3.577395731015184e-05, | |
| "loss": 0.8462, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.837996096291477, | |
| "grad_norm": 0.8749067783355713, | |
| "learning_rate": 3.57273570140793e-05, | |
| "loss": 0.8484, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 1.841249186727391, | |
| "grad_norm": 0.9115192890167236, | |
| "learning_rate": 3.5680710989046565e-05, | |
| "loss": 0.8379, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.844502277163305, | |
| "grad_norm": 0.7345873117446899, | |
| "learning_rate": 3.5634019433897964e-05, | |
| "loss": 0.8521, | |
| "step": 2835 | |
| }, | |
| { | |
| "epoch": 1.8477553675992193, | |
| "grad_norm": 0.8665250539779663, | |
| "learning_rate": 3.558728254767192e-05, | |
| "loss": 0.8591, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.8510084580351334, | |
| "grad_norm": 0.6966584324836731, | |
| "learning_rate": 3.5540500529600096e-05, | |
| "loss": 0.8633, | |
| "step": 2845 | |
| }, | |
| { | |
| "epoch": 1.8542615484710474, | |
| "grad_norm": 0.9217740893363953, | |
| "learning_rate": 3.5493673579106555e-05, | |
| "loss": 0.8581, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.8575146389069617, | |
| "grad_norm": 1.1653602123260498, | |
| "learning_rate": 3.5446801895806904e-05, | |
| "loss": 0.8429, | |
| "step": 2855 | |
| }, | |
| { | |
| "epoch": 1.8607677293428757, | |
| "grad_norm": 1.0861412286758423, | |
| "learning_rate": 3.539988567950741e-05, | |
| "loss": 0.8385, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.8640208197787898, | |
| "grad_norm": 0.9099658727645874, | |
| "learning_rate": 3.53529251302042e-05, | |
| "loss": 0.8727, | |
| "step": 2865 | |
| }, | |
| { | |
| "epoch": 1.867273910214704, | |
| "grad_norm": 0.8507881760597229, | |
| "learning_rate": 3.530592044808237e-05, | |
| "loss": 0.8601, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.870527000650618, | |
| "grad_norm": 0.7487595677375793, | |
| "learning_rate": 3.525887183351517e-05, | |
| "loss": 0.8453, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 1.873780091086532, | |
| "grad_norm": 0.7527421116828918, | |
| "learning_rate": 3.521177948706311e-05, | |
| "loss": 0.856, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.8770331815224464, | |
| "grad_norm": 1.198721170425415, | |
| "learning_rate": 3.5164643609473114e-05, | |
| "loss": 0.8322, | |
| "step": 2885 | |
| }, | |
| { | |
| "epoch": 1.8802862719583604, | |
| "grad_norm": 0.7312609553337097, | |
| "learning_rate": 3.51174644016777e-05, | |
| "loss": 0.8571, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.8835393623942744, | |
| "grad_norm": 0.813762903213501, | |
| "learning_rate": 3.507024206479406e-05, | |
| "loss": 0.8485, | |
| "step": 2895 | |
| }, | |
| { | |
| "epoch": 1.8867924528301887, | |
| "grad_norm": 0.6589996814727783, | |
| "learning_rate": 3.502297680012327e-05, | |
| "loss": 0.8199, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.8900455432661027, | |
| "grad_norm": 0.8973954319953918, | |
| "learning_rate": 3.4975668809149375e-05, | |
| "loss": 0.8595, | |
| "step": 2905 | |
| }, | |
| { | |
| "epoch": 1.8932986337020168, | |
| "grad_norm": 0.8979359269142151, | |
| "learning_rate": 3.492831829353857e-05, | |
| "loss": 0.8637, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.896551724137931, | |
| "grad_norm": 0.7665019035339355, | |
| "learning_rate": 3.488092545513833e-05, | |
| "loss": 0.8753, | |
| "step": 2915 | |
| }, | |
| { | |
| "epoch": 1.8998048145738453, | |
| "grad_norm": 1.2857329845428467, | |
| "learning_rate": 3.483349049597653e-05, | |
| "loss": 0.8394, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.9030579050097591, | |
| "grad_norm": 0.7651403546333313, | |
| "learning_rate": 3.4786013618260615e-05, | |
| "loss": 0.846, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 1.9063109954456734, | |
| "grad_norm": 0.818390429019928, | |
| "learning_rate": 3.47384950243767e-05, | |
| "loss": 0.8919, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.9095640858815877, | |
| "grad_norm": 0.8343967795372009, | |
| "learning_rate": 3.4690934916888754e-05, | |
| "loss": 0.8451, | |
| "step": 2935 | |
| }, | |
| { | |
| "epoch": 1.9128171763175015, | |
| "grad_norm": 0.8200094699859619, | |
| "learning_rate": 3.464333349853769e-05, | |
| "loss": 0.8468, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.9160702667534157, | |
| "grad_norm": 0.8766981959342957, | |
| "learning_rate": 3.459569097224054e-05, | |
| "loss": 0.8455, | |
| "step": 2945 | |
| }, | |
| { | |
| "epoch": 1.91932335718933, | |
| "grad_norm": 0.7592107057571411, | |
| "learning_rate": 3.454800754108957e-05, | |
| "loss": 0.8564, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.9225764476252438, | |
| "grad_norm": 0.7694371938705444, | |
| "learning_rate": 3.45002834083514e-05, | |
| "loss": 0.8579, | |
| "step": 2955 | |
| }, | |
| { | |
| "epoch": 1.925829538061158, | |
| "grad_norm": 0.9310813546180725, | |
| "learning_rate": 3.445251877746616e-05, | |
| "loss": 0.853, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.9290826284970723, | |
| "grad_norm": 0.7357284426689148, | |
| "learning_rate": 3.440471385204664e-05, | |
| "loss": 0.843, | |
| "step": 2965 | |
| }, | |
| { | |
| "epoch": 1.9323357189329864, | |
| "grad_norm": 1.0630100965499878, | |
| "learning_rate": 3.4356868835877376e-05, | |
| "loss": 0.8656, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.9355888093689004, | |
| "grad_norm": 1.3015029430389404, | |
| "learning_rate": 3.430898393291381e-05, | |
| "loss": 0.8681, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 1.9388418998048147, | |
| "grad_norm": 0.941599428653717, | |
| "learning_rate": 3.426105934728141e-05, | |
| "loss": 0.8374, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.9420949902407287, | |
| "grad_norm": 0.827949583530426, | |
| "learning_rate": 3.4213095283274807e-05, | |
| "loss": 0.8342, | |
| "step": 2985 | |
| }, | |
| { | |
| "epoch": 1.9453480806766428, | |
| "grad_norm": 0.7155514359474182, | |
| "learning_rate": 3.416509194535693e-05, | |
| "loss": 0.8604, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.948601171112557, | |
| "grad_norm": 0.6395983099937439, | |
| "learning_rate": 3.411704953815813e-05, | |
| "loss": 0.8545, | |
| "step": 2995 | |
| }, | |
| { | |
| "epoch": 1.951854261548471, | |
| "grad_norm": 1.0403225421905518, | |
| "learning_rate": 3.406896826647528e-05, | |
| "loss": 0.8317, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.9551073519843851, | |
| "grad_norm": 0.809688925743103, | |
| "learning_rate": 3.4020848335270944e-05, | |
| "loss": 0.8459, | |
| "step": 3005 | |
| }, | |
| { | |
| "epoch": 1.9583604424202994, | |
| "grad_norm": 0.7284942865371704, | |
| "learning_rate": 3.397268994967248e-05, | |
| "loss": 0.8609, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.9616135328562134, | |
| "grad_norm": 0.8415728807449341, | |
| "learning_rate": 3.392449331497117e-05, | |
| "loss": 0.8421, | |
| "step": 3015 | |
| }, | |
| { | |
| "epoch": 1.9648666232921275, | |
| "grad_norm": 0.7867475152015686, | |
| "learning_rate": 3.387625863662137e-05, | |
| "loss": 0.8537, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.9681197137280417, | |
| "grad_norm": 0.8730093240737915, | |
| "learning_rate": 3.3827986120239556e-05, | |
| "loss": 0.8453, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 1.9713728041639558, | |
| "grad_norm": 1.0075076818466187, | |
| "learning_rate": 3.377967597160355e-05, | |
| "loss": 0.8485, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 1.9746258945998698, | |
| "grad_norm": 0.7558779716491699, | |
| "learning_rate": 3.373132839665159e-05, | |
| "loss": 0.8283, | |
| "step": 3035 | |
| }, | |
| { | |
| "epoch": 1.977878985035784, | |
| "grad_norm": 0.8635545969009399, | |
| "learning_rate": 3.368294360148141e-05, | |
| "loss": 0.8445, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.9811320754716981, | |
| "grad_norm": 0.7366521954536438, | |
| "learning_rate": 3.363452179234946e-05, | |
| "loss": 0.8377, | |
| "step": 3045 | |
| }, | |
| { | |
| "epoch": 1.9843851659076122, | |
| "grad_norm": 0.895798921585083, | |
| "learning_rate": 3.3586063175669957e-05, | |
| "loss": 0.8517, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.9876382563435264, | |
| "grad_norm": 0.8703877329826355, | |
| "learning_rate": 3.353756795801402e-05, | |
| "loss": 0.8635, | |
| "step": 3055 | |
| }, | |
| { | |
| "epoch": 1.9908913467794405, | |
| "grad_norm": 0.8399415612220764, | |
| "learning_rate": 3.348903634610879e-05, | |
| "loss": 0.8469, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.9941444372153545, | |
| "grad_norm": 0.6633405685424805, | |
| "learning_rate": 3.344046854683656e-05, | |
| "loss": 0.8265, | |
| "step": 3065 | |
| }, | |
| { | |
| "epoch": 1.9973975276512688, | |
| "grad_norm": 0.8422790765762329, | |
| "learning_rate": 3.3391864767233874e-05, | |
| "loss": 0.8356, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_f1": 0.8011475160594294, | |
| "eval_loss": 0.444091796875, | |
| "eval_precision": 0.8009366991425545, | |
| "eval_recall": 0.8015108608319047, | |
| "eval_runtime": 238.6273, | |
| "eval_samples_per_second": 1648.743, | |
| "eval_steps_per_second": 1.613, | |
| "step": 3074 | |
| }, | |
| { | |
| "epoch": 2.000650618087183, | |
| "grad_norm": 0.9484532475471497, | |
| "learning_rate": 3.334322521449066e-05, | |
| "loss": 0.8414, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 2.003903708523097, | |
| "grad_norm": 1.058498740196228, | |
| "learning_rate": 3.3294550095949325e-05, | |
| "loss": 0.7647, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 2.007156798959011, | |
| "grad_norm": 1.1817635297775269, | |
| "learning_rate": 3.3245839619103916e-05, | |
| "loss": 0.7739, | |
| "step": 3085 | |
| }, | |
| { | |
| "epoch": 2.0104098893949254, | |
| "grad_norm": 0.9960103034973145, | |
| "learning_rate": 3.319709399159919e-05, | |
| "loss": 0.7627, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 2.013662979830839, | |
| "grad_norm": 0.7337830066680908, | |
| "learning_rate": 3.314831342122974e-05, | |
| "loss": 0.7736, | |
| "step": 3095 | |
| }, | |
| { | |
| "epoch": 2.0169160702667535, | |
| "grad_norm": 0.8539023995399475, | |
| "learning_rate": 3.309949811593914e-05, | |
| "loss": 0.7677, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.0201691607026677, | |
| "grad_norm": 0.812573254108429, | |
| "learning_rate": 3.3050648283818985e-05, | |
| "loss": 0.7688, | |
| "step": 3105 | |
| }, | |
| { | |
| "epoch": 2.0234222511385815, | |
| "grad_norm": 0.8771811127662659, | |
| "learning_rate": 3.30017641331081e-05, | |
| "loss": 0.7873, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 2.026675341574496, | |
| "grad_norm": 0.8817070126533508, | |
| "learning_rate": 3.295284587219159e-05, | |
| "loss": 0.7516, | |
| "step": 3115 | |
| }, | |
| { | |
| "epoch": 2.02992843201041, | |
| "grad_norm": 0.8555654287338257, | |
| "learning_rate": 3.290389370959995e-05, | |
| "loss": 0.7245, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 2.033181522446324, | |
| "grad_norm": 0.9785915017127991, | |
| "learning_rate": 3.285490785400822e-05, | |
| "loss": 0.7591, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 2.036434612882238, | |
| "grad_norm": 1.1170217990875244, | |
| "learning_rate": 3.280588851423504e-05, | |
| "loss": 0.7545, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 2.0396877033181524, | |
| "grad_norm": 0.889552652835846, | |
| "learning_rate": 3.275683589924181e-05, | |
| "loss": 0.7509, | |
| "step": 3135 | |
| }, | |
| { | |
| "epoch": 2.0429407937540662, | |
| "grad_norm": 0.9748543500900269, | |
| "learning_rate": 3.270775021813177e-05, | |
| "loss": 0.7419, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 2.0461938841899805, | |
| "grad_norm": 0.9157707691192627, | |
| "learning_rate": 3.26586316801491e-05, | |
| "loss": 0.7476, | |
| "step": 3145 | |
| }, | |
| { | |
| "epoch": 2.0494469746258948, | |
| "grad_norm": 1.3593250513076782, | |
| "learning_rate": 3.2609480494678055e-05, | |
| "loss": 0.778, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 2.0527000650618086, | |
| "grad_norm": 0.8584513664245605, | |
| "learning_rate": 3.256029687124209e-05, | |
| "loss": 0.7634, | |
| "step": 3155 | |
| }, | |
| { | |
| "epoch": 2.055953155497723, | |
| "grad_norm": 1.1206103563308716, | |
| "learning_rate": 3.2511081019502875e-05, | |
| "loss": 0.7612, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 2.059206245933637, | |
| "grad_norm": 1.1010791063308716, | |
| "learning_rate": 3.2461833149259516e-05, | |
| "loss": 0.7631, | |
| "step": 3165 | |
| }, | |
| { | |
| "epoch": 2.062459336369551, | |
| "grad_norm": 1.0924779176712036, | |
| "learning_rate": 3.241255347044759e-05, | |
| "loss": 0.7592, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 2.065712426805465, | |
| "grad_norm": 0.9586931467056274, | |
| "learning_rate": 3.236324219313826e-05, | |
| "loss": 0.7591, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 2.0689655172413794, | |
| "grad_norm": 1.0838814973831177, | |
| "learning_rate": 3.231389952753742e-05, | |
| "loss": 0.7724, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 2.0722186076772933, | |
| "grad_norm": 0.9030594229698181, | |
| "learning_rate": 3.226452568398471e-05, | |
| "loss": 0.7627, | |
| "step": 3185 | |
| }, | |
| { | |
| "epoch": 2.0754716981132075, | |
| "grad_norm": 1.0417284965515137, | |
| "learning_rate": 3.221512087295275e-05, | |
| "loss": 0.765, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 2.078724788549122, | |
| "grad_norm": 1.3411697149276733, | |
| "learning_rate": 3.216568530504611e-05, | |
| "loss": 0.7718, | |
| "step": 3195 | |
| }, | |
| { | |
| "epoch": 2.0819778789850356, | |
| "grad_norm": 1.1210920810699463, | |
| "learning_rate": 3.21162191910005e-05, | |
| "loss": 0.7578, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.08523096942095, | |
| "grad_norm": 1.0522574186325073, | |
| "learning_rate": 3.2066722741681845e-05, | |
| "loss": 0.7645, | |
| "step": 3205 | |
| }, | |
| { | |
| "epoch": 2.088484059856864, | |
| "grad_norm": 0.9024161100387573, | |
| "learning_rate": 3.2017196168085345e-05, | |
| "loss": 0.7542, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 2.091737150292778, | |
| "grad_norm": 0.93799889087677, | |
| "learning_rate": 3.196763968133466e-05, | |
| "loss": 0.7675, | |
| "step": 3215 | |
| }, | |
| { | |
| "epoch": 2.094990240728692, | |
| "grad_norm": 0.9059098362922668, | |
| "learning_rate": 3.191805349268097e-05, | |
| "loss": 0.774, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 2.0982433311646065, | |
| "grad_norm": 0.954647958278656, | |
| "learning_rate": 3.1868437813502026e-05, | |
| "loss": 0.7591, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 2.1014964216005203, | |
| "grad_norm": 0.956679105758667, | |
| "learning_rate": 3.1818792855301316e-05, | |
| "loss": 0.7585, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 2.1047495120364346, | |
| "grad_norm": 0.8911952376365662, | |
| "learning_rate": 3.1769118829707156e-05, | |
| "loss": 0.7736, | |
| "step": 3235 | |
| }, | |
| { | |
| "epoch": 2.108002602472349, | |
| "grad_norm": 1.1105453968048096, | |
| "learning_rate": 3.171941594847173e-05, | |
| "loss": 0.746, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 2.1112556929082626, | |
| "grad_norm": 1.0151236057281494, | |
| "learning_rate": 3.1669684423470275e-05, | |
| "loss": 0.7628, | |
| "step": 3245 | |
| }, | |
| { | |
| "epoch": 2.114508783344177, | |
| "grad_norm": 1.0137097835540771, | |
| "learning_rate": 3.16199244667001e-05, | |
| "loss": 0.7611, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 2.117761873780091, | |
| "grad_norm": 0.9404064416885376, | |
| "learning_rate": 3.157013629027972e-05, | |
| "loss": 0.7601, | |
| "step": 3255 | |
| }, | |
| { | |
| "epoch": 2.121014964216005, | |
| "grad_norm": 1.3806120157241821, | |
| "learning_rate": 3.152032010644796e-05, | |
| "loss": 0.7647, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 2.1242680546519193, | |
| "grad_norm": 0.9700812697410583, | |
| "learning_rate": 3.147047612756302e-05, | |
| "loss": 0.766, | |
| "step": 3265 | |
| }, | |
| { | |
| "epoch": 2.1275211450878335, | |
| "grad_norm": 1.1779789924621582, | |
| "learning_rate": 3.142060456610159e-05, | |
| "loss": 0.7571, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 2.130774235523748, | |
| "grad_norm": 1.1766470670700073, | |
| "learning_rate": 3.137070563465796e-05, | |
| "loss": 0.7587, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 2.1340273259596616, | |
| "grad_norm": 1.1181317567825317, | |
| "learning_rate": 3.1320779545943034e-05, | |
| "loss": 0.7514, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 2.137280416395576, | |
| "grad_norm": 1.520752191543579, | |
| "learning_rate": 3.127082651278357e-05, | |
| "loss": 0.7383, | |
| "step": 3285 | |
| }, | |
| { | |
| "epoch": 2.14053350683149, | |
| "grad_norm": 1.1578936576843262, | |
| "learning_rate": 3.1220846748121105e-05, | |
| "loss": 0.7736, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 2.143786597267404, | |
| "grad_norm": 1.3091363906860352, | |
| "learning_rate": 3.117084046501119e-05, | |
| "loss": 0.7615, | |
| "step": 3295 | |
| }, | |
| { | |
| "epoch": 2.147039687703318, | |
| "grad_norm": 0.9620407223701477, | |
| "learning_rate": 3.112080787662237e-05, | |
| "loss": 0.7924, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.1502927781392325, | |
| "grad_norm": 0.9089716672897339, | |
| "learning_rate": 3.107074919623536e-05, | |
| "loss": 0.7455, | |
| "step": 3305 | |
| }, | |
| { | |
| "epoch": 2.1535458685751463, | |
| "grad_norm": 1.1510998010635376, | |
| "learning_rate": 3.102066463724209e-05, | |
| "loss": 0.765, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 2.1567989590110606, | |
| "grad_norm": 1.8722169399261475, | |
| "learning_rate": 3.0970554413144805e-05, | |
| "loss": 0.7627, | |
| "step": 3315 | |
| }, | |
| { | |
| "epoch": 2.160052049446975, | |
| "grad_norm": 1.0691964626312256, | |
| "learning_rate": 3.0920418737555144e-05, | |
| "loss": 0.7753, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 2.1633051398828886, | |
| "grad_norm": 0.9641361832618713, | |
| "learning_rate": 3.0870257824193263e-05, | |
| "loss": 0.7516, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 2.166558230318803, | |
| "grad_norm": 1.0590273141860962, | |
| "learning_rate": 3.08200718868869e-05, | |
| "loss": 0.7859, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 2.169811320754717, | |
| "grad_norm": 1.2373055219650269, | |
| "learning_rate": 3.076986113957044e-05, | |
| "loss": 0.772, | |
| "step": 3335 | |
| }, | |
| { | |
| "epoch": 2.173064411190631, | |
| "grad_norm": 1.160982608795166, | |
| "learning_rate": 3.071962579628408e-05, | |
| "loss": 0.7673, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 2.1763175016265452, | |
| "grad_norm": 0.8511375188827515, | |
| "learning_rate": 3.066936607117279e-05, | |
| "loss": 0.7558, | |
| "step": 3345 | |
| }, | |
| { | |
| "epoch": 2.1795705920624595, | |
| "grad_norm": 0.9551635384559631, | |
| "learning_rate": 3.061908217848556e-05, | |
| "loss": 0.7641, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.1828236824983733, | |
| "grad_norm": 0.9262502789497375, | |
| "learning_rate": 3.056877433257434e-05, | |
| "loss": 0.7667, | |
| "step": 3355 | |
| }, | |
| { | |
| "epoch": 2.1860767729342876, | |
| "grad_norm": 1.2747892141342163, | |
| "learning_rate": 3.051844274789321e-05, | |
| "loss": 0.7497, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 2.189329863370202, | |
| "grad_norm": 1.2817254066467285, | |
| "learning_rate": 3.046808763899745e-05, | |
| "loss": 0.7743, | |
| "step": 3365 | |
| }, | |
| { | |
| "epoch": 2.1925829538061157, | |
| "grad_norm": 1.3123672008514404, | |
| "learning_rate": 3.041770922054262e-05, | |
| "loss": 0.7681, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 2.19583604424203, | |
| "grad_norm": 1.0206502676010132, | |
| "learning_rate": 3.0367307707283626e-05, | |
| "loss": 0.7833, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 2.199089134677944, | |
| "grad_norm": 1.0204437971115112, | |
| "learning_rate": 3.0326970012795626e-05, | |
| "loss": 0.7575, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 2.202342225113858, | |
| "grad_norm": 1.0020246505737305, | |
| "learning_rate": 3.027652747038522e-05, | |
| "loss": 0.7702, | |
| "step": 3385 | |
| }, | |
| { | |
| "epoch": 2.2055953155497723, | |
| "grad_norm": 1.045996904373169, | |
| "learning_rate": 3.022606243500526e-05, | |
| "loss": 0.7609, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 2.2088484059856865, | |
| "grad_norm": 0.9325571060180664, | |
| "learning_rate": 3.0175575121779886e-05, | |
| "loss": 0.7363, | |
| "step": 3395 | |
| }, | |
| { | |
| "epoch": 2.2121014964216004, | |
| "grad_norm": 1.2504099607467651, | |
| "learning_rate": 3.012506574592825e-05, | |
| "loss": 0.7742, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.2153545868575146, | |
| "grad_norm": 1.0567350387573242, | |
| "learning_rate": 3.007453452276349e-05, | |
| "loss": 0.7544, | |
| "step": 3405 | |
| }, | |
| { | |
| "epoch": 2.218607677293429, | |
| "grad_norm": 0.9951023459434509, | |
| "learning_rate": 3.0023981667691926e-05, | |
| "loss": 0.7432, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 2.2218607677293427, | |
| "grad_norm": 1.0222620964050293, | |
| "learning_rate": 2.997340739621206e-05, | |
| "loss": 0.794, | |
| "step": 3415 | |
| }, | |
| { | |
| "epoch": 2.225113858165257, | |
| "grad_norm": 0.8401185870170593, | |
| "learning_rate": 2.9922811923913714e-05, | |
| "loss": 0.751, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 2.2283669486011712, | |
| "grad_norm": 1.1666043996810913, | |
| "learning_rate": 2.9872195466477054e-05, | |
| "loss": 0.7592, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 2.231620039037085, | |
| "grad_norm": 0.95232754945755, | |
| "learning_rate": 2.9821558239671744e-05, | |
| "loss": 0.7639, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 2.2348731294729993, | |
| "grad_norm": 0.8971825242042542, | |
| "learning_rate": 2.977090045935594e-05, | |
| "loss": 0.7553, | |
| "step": 3435 | |
| }, | |
| { | |
| "epoch": 2.2381262199089136, | |
| "grad_norm": 1.0237399339675903, | |
| "learning_rate": 2.9720222341475445e-05, | |
| "loss": 0.7504, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 2.2413793103448274, | |
| "grad_norm": 1.1775766611099243, | |
| "learning_rate": 2.966952410206275e-05, | |
| "loss": 0.7449, | |
| "step": 3445 | |
| }, | |
| { | |
| "epoch": 2.2446324007807417, | |
| "grad_norm": 0.885957658290863, | |
| "learning_rate": 2.9618805957236113e-05, | |
| "loss": 0.7631, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.247885491216656, | |
| "grad_norm": 1.3709341287612915, | |
| "learning_rate": 2.956806812319865e-05, | |
| "loss": 0.7589, | |
| "step": 3455 | |
| }, | |
| { | |
| "epoch": 2.2511385816525697, | |
| "grad_norm": 1.204150676727295, | |
| "learning_rate": 2.951731081623742e-05, | |
| "loss": 0.7662, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 2.254391672088484, | |
| "grad_norm": 1.6271796226501465, | |
| "learning_rate": 2.946653425272247e-05, | |
| "loss": 0.7821, | |
| "step": 3465 | |
| }, | |
| { | |
| "epoch": 2.2576447625243983, | |
| "grad_norm": 1.0852000713348389, | |
| "learning_rate": 2.9415738649105963e-05, | |
| "loss": 0.7408, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 2.260897852960312, | |
| "grad_norm": 1.0353608131408691, | |
| "learning_rate": 2.9364924221921185e-05, | |
| "loss": 0.7478, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 2.2641509433962264, | |
| "grad_norm": 1.881262183189392, | |
| "learning_rate": 2.9314091187781715e-05, | |
| "loss": 0.7584, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 2.2674040338321406, | |
| "grad_norm": 1.2990703582763672, | |
| "learning_rate": 2.9263239763380412e-05, | |
| "loss": 0.7566, | |
| "step": 3485 | |
| }, | |
| { | |
| "epoch": 2.2706571242680544, | |
| "grad_norm": 0.9985173940658569, | |
| "learning_rate": 2.921237016548854e-05, | |
| "loss": 0.7676, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 2.2739102147039687, | |
| "grad_norm": 0.9522629976272583, | |
| "learning_rate": 2.9161482610954842e-05, | |
| "loss": 0.7475, | |
| "step": 3495 | |
| }, | |
| { | |
| "epoch": 2.277163305139883, | |
| "grad_norm": 0.9219643473625183, | |
| "learning_rate": 2.9110577316704602e-05, | |
| "loss": 0.7613, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.280416395575797, | |
| "grad_norm": 0.9594421982765198, | |
| "learning_rate": 2.905965449973871e-05, | |
| "loss": 0.768, | |
| "step": 3505 | |
| }, | |
| { | |
| "epoch": 2.283669486011711, | |
| "grad_norm": 1.0452098846435547, | |
| "learning_rate": 2.900871437713279e-05, | |
| "loss": 0.7699, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 2.2869225764476253, | |
| "grad_norm": 0.9670342803001404, | |
| "learning_rate": 2.8957757166036193e-05, | |
| "loss": 0.7573, | |
| "step": 3515 | |
| }, | |
| { | |
| "epoch": 2.290175666883539, | |
| "grad_norm": 1.147403597831726, | |
| "learning_rate": 2.890678308367115e-05, | |
| "loss": 0.7688, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 2.2934287573194534, | |
| "grad_norm": 1.086470603942871, | |
| "learning_rate": 2.8855792347331793e-05, | |
| "loss": 0.7671, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 2.2966818477553677, | |
| "grad_norm": 1.6733858585357666, | |
| "learning_rate": 2.8804785174383248e-05, | |
| "loss": 0.7753, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 2.2999349381912815, | |
| "grad_norm": 1.0693230628967285, | |
| "learning_rate": 2.8753761782260723e-05, | |
| "loss": 0.7457, | |
| "step": 3535 | |
| }, | |
| { | |
| "epoch": 2.3031880286271957, | |
| "grad_norm": 1.079010009765625, | |
| "learning_rate": 2.8702722388468546e-05, | |
| "loss": 0.7701, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 2.30644111906311, | |
| "grad_norm": 0.9620556235313416, | |
| "learning_rate": 2.8651667210579257e-05, | |
| "loss": 0.759, | |
| "step": 3545 | |
| }, | |
| { | |
| "epoch": 2.3096942094990243, | |
| "grad_norm": 1.1349847316741943, | |
| "learning_rate": 2.8600596466232715e-05, | |
| "loss": 0.7776, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 2.312947299934938, | |
| "grad_norm": 1.4847538471221924, | |
| "learning_rate": 2.8549510373135092e-05, | |
| "loss": 0.7566, | |
| "step": 3555 | |
| }, | |
| { | |
| "epoch": 2.3162003903708523, | |
| "grad_norm": 1.657256007194519, | |
| "learning_rate": 2.8498409149058008e-05, | |
| "loss": 0.762, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 2.3194534808067666, | |
| "grad_norm": 1.0619240999221802, | |
| "learning_rate": 2.8447293011837596e-05, | |
| "loss": 0.771, | |
| "step": 3565 | |
| }, | |
| { | |
| "epoch": 2.3227065712426804, | |
| "grad_norm": 0.8844910264015198, | |
| "learning_rate": 2.8396162179373535e-05, | |
| "loss": 0.7573, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 2.3259596616785947, | |
| "grad_norm": 1.3543357849121094, | |
| "learning_rate": 2.8345016869628175e-05, | |
| "loss": 0.7736, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 2.329212752114509, | |
| "grad_norm": 0.9610804319381714, | |
| "learning_rate": 2.8293857300625555e-05, | |
| "loss": 0.7536, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 2.3324658425504228, | |
| "grad_norm": 1.2407771348953247, | |
| "learning_rate": 2.8242683690450518e-05, | |
| "loss": 0.7584, | |
| "step": 3585 | |
| }, | |
| { | |
| "epoch": 2.335718932986337, | |
| "grad_norm": 1.388168215751648, | |
| "learning_rate": 2.8191496257247764e-05, | |
| "loss": 0.7426, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 2.3389720234222513, | |
| "grad_norm": 1.1140729188919067, | |
| "learning_rate": 2.814029521922088e-05, | |
| "loss": 0.7418, | |
| "step": 3595 | |
| }, | |
| { | |
| "epoch": 2.342225113858165, | |
| "grad_norm": 1.0877522230148315, | |
| "learning_rate": 2.8089080794631512e-05, | |
| "loss": 0.7531, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.3454782042940794, | |
| "grad_norm": 1.0917423963546753, | |
| "learning_rate": 2.803785320179832e-05, | |
| "loss": 0.7435, | |
| "step": 3605 | |
| }, | |
| { | |
| "epoch": 2.3487312947299936, | |
| "grad_norm": 1.3571592569351196, | |
| "learning_rate": 2.7986612659096113e-05, | |
| "loss": 0.7594, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 2.3519843851659075, | |
| "grad_norm": 1.0520139932632446, | |
| "learning_rate": 2.7935359384954914e-05, | |
| "loss": 0.758, | |
| "step": 3615 | |
| }, | |
| { | |
| "epoch": 2.3552374756018217, | |
| "grad_norm": 1.271592617034912, | |
| "learning_rate": 2.7884093597858996e-05, | |
| "loss": 0.7457, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 2.358490566037736, | |
| "grad_norm": 0.9961024522781372, | |
| "learning_rate": 2.783281551634599e-05, | |
| "loss": 0.7626, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 2.36174365647365, | |
| "grad_norm": 1.3508564233779907, | |
| "learning_rate": 2.7781525359005943e-05, | |
| "loss": 0.734, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 2.364996746909564, | |
| "grad_norm": 1.0961614847183228, | |
| "learning_rate": 2.7730223344480348e-05, | |
| "loss": 0.7553, | |
| "step": 3635 | |
| }, | |
| { | |
| "epoch": 2.3682498373454783, | |
| "grad_norm": 1.032395839691162, | |
| "learning_rate": 2.7678909691461274e-05, | |
| "loss": 0.7915, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 2.371502927781392, | |
| "grad_norm": 1.1500605344772339, | |
| "learning_rate": 2.7627584618690394e-05, | |
| "loss": 0.7539, | |
| "step": 3645 | |
| }, | |
| { | |
| "epoch": 2.3747560182173064, | |
| "grad_norm": 1.0203113555908203, | |
| "learning_rate": 2.7576248344958054e-05, | |
| "loss": 0.7771, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 2.3780091086532207, | |
| "grad_norm": 2.247779607772827, | |
| "learning_rate": 2.7524901089102358e-05, | |
| "loss": 0.764, | |
| "step": 3655 | |
| }, | |
| { | |
| "epoch": 2.3812621990891345, | |
| "grad_norm": 1.131200909614563, | |
| "learning_rate": 2.7473543070008213e-05, | |
| "loss": 0.742, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 2.3845152895250488, | |
| "grad_norm": 1.2509359121322632, | |
| "learning_rate": 2.7422174506606413e-05, | |
| "loss": 0.7461, | |
| "step": 3665 | |
| }, | |
| { | |
| "epoch": 2.387768379960963, | |
| "grad_norm": 0.864366352558136, | |
| "learning_rate": 2.737079561787272e-05, | |
| "loss": 0.7405, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 2.391021470396877, | |
| "grad_norm": 0.9416084885597229, | |
| "learning_rate": 2.7319406622826878e-05, | |
| "loss": 0.7439, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 2.394274560832791, | |
| "grad_norm": 1.7094473838806152, | |
| "learning_rate": 2.726800774053173e-05, | |
| "loss": 0.7698, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 2.3975276512687054, | |
| "grad_norm": 0.9964091777801514, | |
| "learning_rate": 2.7216599190092273e-05, | |
| "loss": 0.7536, | |
| "step": 3685 | |
| }, | |
| { | |
| "epoch": 2.4007807417046196, | |
| "grad_norm": 1.1519944667816162, | |
| "learning_rate": 2.7165181190654702e-05, | |
| "loss": 0.7459, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 2.4040338321405335, | |
| "grad_norm": 1.2240533828735352, | |
| "learning_rate": 2.7113753961405515e-05, | |
| "loss": 0.7434, | |
| "step": 3695 | |
| }, | |
| { | |
| "epoch": 2.4072869225764477, | |
| "grad_norm": 1.122253656387329, | |
| "learning_rate": 2.7062317721570512e-05, | |
| "loss": 0.7471, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.410540013012362, | |
| "grad_norm": 1.0433543920516968, | |
| "learning_rate": 2.7010872690413956e-05, | |
| "loss": 0.7429, | |
| "step": 3705 | |
| }, | |
| { | |
| "epoch": 2.413793103448276, | |
| "grad_norm": 1.092159628868103, | |
| "learning_rate": 2.6959419087237553e-05, | |
| "loss": 0.7506, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 2.41704619388419, | |
| "grad_norm": 0.9082927107810974, | |
| "learning_rate": 2.6907957131379553e-05, | |
| "loss": 0.7666, | |
| "step": 3715 | |
| }, | |
| { | |
| "epoch": 2.4202992843201043, | |
| "grad_norm": 0.8798219561576843, | |
| "learning_rate": 2.6856487042213822e-05, | |
| "loss": 0.7637, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 2.423552374756018, | |
| "grad_norm": 0.8654388189315796, | |
| "learning_rate": 2.6805009039148897e-05, | |
| "loss": 0.7541, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 2.4268054651919324, | |
| "grad_norm": 1.0439229011535645, | |
| "learning_rate": 2.675352334162704e-05, | |
| "loss": 0.7618, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 2.4300585556278467, | |
| "grad_norm": 0.9634140729904175, | |
| "learning_rate": 2.6702030169123316e-05, | |
| "loss": 0.737, | |
| "step": 3735 | |
| }, | |
| { | |
| "epoch": 2.4333116460637605, | |
| "grad_norm": 0.8647895455360413, | |
| "learning_rate": 2.6650529741144665e-05, | |
| "loss": 0.7485, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 2.4365647364996748, | |
| "grad_norm": 1.984215259552002, | |
| "learning_rate": 2.6599022277228948e-05, | |
| "loss": 0.7541, | |
| "step": 3745 | |
| }, | |
| { | |
| "epoch": 2.439817826935589, | |
| "grad_norm": 1.074607014656067, | |
| "learning_rate": 2.6547507996944022e-05, | |
| "loss": 0.7595, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 2.443070917371503, | |
| "grad_norm": 0.9121082425117493, | |
| "learning_rate": 2.649598711988679e-05, | |
| "loss": 0.7741, | |
| "step": 3755 | |
| }, | |
| { | |
| "epoch": 2.446324007807417, | |
| "grad_norm": 1.6042678356170654, | |
| "learning_rate": 2.6444459865682297e-05, | |
| "loss": 0.7699, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 2.4495770982433314, | |
| "grad_norm": 0.9366397857666016, | |
| "learning_rate": 2.6392926453982748e-05, | |
| "loss": 0.7525, | |
| "step": 3765 | |
| }, | |
| { | |
| "epoch": 2.452830188679245, | |
| "grad_norm": 1.0728055238723755, | |
| "learning_rate": 2.6341387104466612e-05, | |
| "loss": 0.749, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 2.4560832791151594, | |
| "grad_norm": 0.988258957862854, | |
| "learning_rate": 2.6289842036837675e-05, | |
| "loss": 0.7563, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 2.4593363695510737, | |
| "grad_norm": 1.2626458406448364, | |
| "learning_rate": 2.6238291470824085e-05, | |
| "loss": 0.7367, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 2.4625894599869875, | |
| "grad_norm": 0.8835701942443848, | |
| "learning_rate": 2.6186735626177428e-05, | |
| "loss": 0.7534, | |
| "step": 3785 | |
| }, | |
| { | |
| "epoch": 2.465842550422902, | |
| "grad_norm": 0.8948650360107422, | |
| "learning_rate": 2.6135174722671813e-05, | |
| "loss": 0.7975, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 2.469095640858816, | |
| "grad_norm": 1.0557647943496704, | |
| "learning_rate": 2.608360898010288e-05, | |
| "loss": 0.7542, | |
| "step": 3795 | |
| }, | |
| { | |
| "epoch": 2.47234873129473, | |
| "grad_norm": 1.1379538774490356, | |
| "learning_rate": 2.603203861828693e-05, | |
| "loss": 0.7569, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.475601821730644, | |
| "grad_norm": 1.1298165321350098, | |
| "learning_rate": 2.598046385705994e-05, | |
| "loss": 0.7662, | |
| "step": 3805 | |
| }, | |
| { | |
| "epoch": 2.4788549121665584, | |
| "grad_norm": 0.9936167001724243, | |
| "learning_rate": 2.5928884916276635e-05, | |
| "loss": 0.7427, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 2.482108002602472, | |
| "grad_norm": 1.055421233177185, | |
| "learning_rate": 2.5877302015809574e-05, | |
| "loss": 0.741, | |
| "step": 3815 | |
| }, | |
| { | |
| "epoch": 2.4853610930383865, | |
| "grad_norm": 1.0035120248794556, | |
| "learning_rate": 2.5825715375548175e-05, | |
| "loss": 0.7495, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 2.4886141834743007, | |
| "grad_norm": 1.5768109560012817, | |
| "learning_rate": 2.5774125215397815e-05, | |
| "loss": 0.7677, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 2.4918672739102146, | |
| "grad_norm": 1.1085072755813599, | |
| "learning_rate": 2.5722531755278874e-05, | |
| "loss": 0.7693, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 2.495120364346129, | |
| "grad_norm": 0.9290764927864075, | |
| "learning_rate": 2.567093521512578e-05, | |
| "loss": 0.7734, | |
| "step": 3835 | |
| }, | |
| { | |
| "epoch": 2.498373454782043, | |
| "grad_norm": 1.2003841400146484, | |
| "learning_rate": 2.561933581488612e-05, | |
| "loss": 0.7529, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 2.501626545217957, | |
| "grad_norm": 0.9982072114944458, | |
| "learning_rate": 2.556773377451965e-05, | |
| "loss": 0.7555, | |
| "step": 3845 | |
| }, | |
| { | |
| "epoch": 2.504879635653871, | |
| "grad_norm": 0.9454076886177063, | |
| "learning_rate": 2.5516129313997388e-05, | |
| "loss": 0.7726, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 2.5081327260897854, | |
| "grad_norm": 0.9885278940200806, | |
| "learning_rate": 2.5464522653300676e-05, | |
| "loss": 0.7585, | |
| "step": 3855 | |
| }, | |
| { | |
| "epoch": 2.5113858165256993, | |
| "grad_norm": 1.0617841482162476, | |
| "learning_rate": 2.541291401242022e-05, | |
| "loss": 0.7613, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 2.5146389069616135, | |
| "grad_norm": 0.9445372223854065, | |
| "learning_rate": 2.536130361135518e-05, | |
| "loss": 0.7867, | |
| "step": 3865 | |
| }, | |
| { | |
| "epoch": 2.517891997397528, | |
| "grad_norm": 1.2932319641113281, | |
| "learning_rate": 2.5309691670112218e-05, | |
| "loss": 0.7509, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 2.5211450878334416, | |
| "grad_norm": 1.1702325344085693, | |
| "learning_rate": 2.525807840870455e-05, | |
| "loss": 0.7772, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 2.524398178269356, | |
| "grad_norm": 1.0334542989730835, | |
| "learning_rate": 2.5206464047151046e-05, | |
| "loss": 0.7478, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 2.52765126870527, | |
| "grad_norm": 2.0176279544830322, | |
| "learning_rate": 2.5154848805475224e-05, | |
| "loss": 0.759, | |
| "step": 3885 | |
| }, | |
| { | |
| "epoch": 2.530904359141184, | |
| "grad_norm": 1.1288046836853027, | |
| "learning_rate": 2.5103232903704393e-05, | |
| "loss": 0.7529, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 2.534157449577098, | |
| "grad_norm": 1.0248112678527832, | |
| "learning_rate": 2.5051616561868663e-05, | |
| "loss": 0.7748, | |
| "step": 3895 | |
| }, | |
| { | |
| "epoch": 2.5374105400130125, | |
| "grad_norm": 0.8906844258308411, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.7369, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.5406636304489263, | |
| "grad_norm": 1.1588047742843628, | |
| "learning_rate": 2.4948383438131346e-05, | |
| "loss": 0.7465, | |
| "step": 3905 | |
| }, | |
| { | |
| "epoch": 2.5439167208848406, | |
| "grad_norm": 1.0166900157928467, | |
| "learning_rate": 2.4896767096295613e-05, | |
| "loss": 0.7576, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 2.547169811320755, | |
| "grad_norm": 1.0682686567306519, | |
| "learning_rate": 2.484515119452478e-05, | |
| "loss": 0.7884, | |
| "step": 3915 | |
| }, | |
| { | |
| "epoch": 2.5504229017566686, | |
| "grad_norm": 0.9026442766189575, | |
| "learning_rate": 2.4793535952848963e-05, | |
| "loss": 0.7311, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 2.553675992192583, | |
| "grad_norm": 0.8642654418945312, | |
| "learning_rate": 2.4741921591295454e-05, | |
| "loss": 0.7547, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 2.556929082628497, | |
| "grad_norm": 1.1124982833862305, | |
| "learning_rate": 2.4690308329887788e-05, | |
| "loss": 0.7523, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 2.560182173064411, | |
| "grad_norm": 1.664115309715271, | |
| "learning_rate": 2.463869638864483e-05, | |
| "loss": 0.7249, | |
| "step": 3935 | |
| }, | |
| { | |
| "epoch": 2.5634352635003252, | |
| "grad_norm": 0.9926962852478027, | |
| "learning_rate": 2.458708598757979e-05, | |
| "loss": 0.7318, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 2.5666883539362395, | |
| "grad_norm": 1.076627254486084, | |
| "learning_rate": 2.4535477346699333e-05, | |
| "loss": 0.7586, | |
| "step": 3945 | |
| }, | |
| { | |
| "epoch": 2.5699414443721533, | |
| "grad_norm": 1.7046575546264648, | |
| "learning_rate": 2.4483870686002625e-05, | |
| "loss": 0.7482, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.5731945348080676, | |
| "grad_norm": 1.0066241025924683, | |
| "learning_rate": 2.443226622548036e-05, | |
| "loss": 0.7636, | |
| "step": 3955 | |
| }, | |
| { | |
| "epoch": 2.576447625243982, | |
| "grad_norm": 2.010552406311035, | |
| "learning_rate": 2.4380664185113887e-05, | |
| "loss": 0.7661, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 2.5797007156798957, | |
| "grad_norm": 1.1133430004119873, | |
| "learning_rate": 2.432906478487423e-05, | |
| "loss": 0.7597, | |
| "step": 3965 | |
| }, | |
| { | |
| "epoch": 2.58295380611581, | |
| "grad_norm": 1.1634178161621094, | |
| "learning_rate": 2.427746824472113e-05, | |
| "loss": 0.76, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 2.586206896551724, | |
| "grad_norm": 0.9780275821685791, | |
| "learning_rate": 2.4225874784602184e-05, | |
| "loss": 0.7688, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 2.589459986987638, | |
| "grad_norm": 1.2186133861541748, | |
| "learning_rate": 2.4174284624451824e-05, | |
| "loss": 0.7309, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 2.5927130774235523, | |
| "grad_norm": 0.9547963738441467, | |
| "learning_rate": 2.4122697984190428e-05, | |
| "loss": 0.7593, | |
| "step": 3985 | |
| }, | |
| { | |
| "epoch": 2.5959661678594665, | |
| "grad_norm": 0.943261444568634, | |
| "learning_rate": 2.4071115083723364e-05, | |
| "loss": 0.7562, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 2.5992192582953804, | |
| "grad_norm": 0.9355084896087646, | |
| "learning_rate": 2.401953614294006e-05, | |
| "loss": 0.7294, | |
| "step": 3995 | |
| }, | |
| { | |
| "epoch": 2.6024723487312946, | |
| "grad_norm": 1.0167070627212524, | |
| "learning_rate": 2.396796138171307e-05, | |
| "loss": 0.7578, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.605725439167209, | |
| "grad_norm": 0.9536129832267761, | |
| "learning_rate": 2.391639101989712e-05, | |
| "loss": 0.7363, | |
| "step": 4005 | |
| }, | |
| { | |
| "epoch": 2.6089785296031227, | |
| "grad_norm": 0.9292064309120178, | |
| "learning_rate": 2.3864825277328193e-05, | |
| "loss": 0.7517, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 2.612231620039037, | |
| "grad_norm": 1.1821918487548828, | |
| "learning_rate": 2.3813264373822578e-05, | |
| "loss": 0.7627, | |
| "step": 4015 | |
| }, | |
| { | |
| "epoch": 2.6154847104749512, | |
| "grad_norm": 0.9278668165206909, | |
| "learning_rate": 2.376170852917592e-05, | |
| "loss": 0.7673, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 2.618737800910865, | |
| "grad_norm": 0.9061160683631897, | |
| "learning_rate": 2.3710157963162328e-05, | |
| "loss": 0.774, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 2.6219908913467793, | |
| "grad_norm": 1.2330580949783325, | |
| "learning_rate": 2.3658612895533393e-05, | |
| "loss": 0.7514, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 2.6252439817826936, | |
| "grad_norm": 0.9609399437904358, | |
| "learning_rate": 2.3607073546017258e-05, | |
| "loss": 0.7373, | |
| "step": 4035 | |
| }, | |
| { | |
| "epoch": 2.6284970722186074, | |
| "grad_norm": 1.5064210891723633, | |
| "learning_rate": 2.3555540134317712e-05, | |
| "loss": 0.7487, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 2.6317501626545217, | |
| "grad_norm": 1.0178202390670776, | |
| "learning_rate": 2.3504012880113216e-05, | |
| "loss": 0.7789, | |
| "step": 4045 | |
| }, | |
| { | |
| "epoch": 2.635003253090436, | |
| "grad_norm": 0.8506657481193542, | |
| "learning_rate": 2.3452492003055984e-05, | |
| "loss": 0.7316, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.63825634352635, | |
| "grad_norm": 0.9458078145980835, | |
| "learning_rate": 2.3400977722771058e-05, | |
| "loss": 0.7703, | |
| "step": 4055 | |
| }, | |
| { | |
| "epoch": 2.641509433962264, | |
| "grad_norm": 1.1263021230697632, | |
| "learning_rate": 2.3349470258855337e-05, | |
| "loss": 0.7579, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 2.6447625243981783, | |
| "grad_norm": 0.8372018933296204, | |
| "learning_rate": 2.3297969830876686e-05, | |
| "loss": 0.76, | |
| "step": 4065 | |
| }, | |
| { | |
| "epoch": 2.6480156148340925, | |
| "grad_norm": 0.8701651692390442, | |
| "learning_rate": 2.3246476658372973e-05, | |
| "loss": 0.7476, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 2.6512687052700064, | |
| "grad_norm": 1.3167948722839355, | |
| "learning_rate": 2.3194990960851112e-05, | |
| "loss": 0.7628, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 2.6545217957059206, | |
| "grad_norm": 1.0400781631469727, | |
| "learning_rate": 2.3143512957786184e-05, | |
| "loss": 0.7773, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 2.657774886141835, | |
| "grad_norm": 0.9622422456741333, | |
| "learning_rate": 2.309204286862046e-05, | |
| "loss": 0.7469, | |
| "step": 4085 | |
| }, | |
| { | |
| "epoch": 2.6610279765777487, | |
| "grad_norm": 0.929834246635437, | |
| "learning_rate": 2.3040580912762456e-05, | |
| "loss": 0.7544, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 2.664281067013663, | |
| "grad_norm": 1.018149495124817, | |
| "learning_rate": 2.298912730958605e-05, | |
| "loss": 0.7746, | |
| "step": 4095 | |
| }, | |
| { | |
| "epoch": 2.6675341574495772, | |
| "grad_norm": 1.0057318210601807, | |
| "learning_rate": 2.2937682278429494e-05, | |
| "loss": 0.7352, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.6707872478854915, | |
| "grad_norm": 0.9973504543304443, | |
| "learning_rate": 2.288624603859449e-05, | |
| "loss": 0.721, | |
| "step": 4105 | |
| }, | |
| { | |
| "epoch": 2.6740403383214053, | |
| "grad_norm": 1.0883572101593018, | |
| "learning_rate": 2.2834818809345297e-05, | |
| "loss": 0.7474, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 2.6772934287573196, | |
| "grad_norm": 1.337254524230957, | |
| "learning_rate": 2.2783400809907726e-05, | |
| "loss": 0.7701, | |
| "step": 4115 | |
| }, | |
| { | |
| "epoch": 2.680546519193234, | |
| "grad_norm": 1.1612261533737183, | |
| "learning_rate": 2.2731992259468272e-05, | |
| "loss": 0.7547, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 2.6837996096291477, | |
| "grad_norm": 1.0043455362319946, | |
| "learning_rate": 2.2680593377173124e-05, | |
| "loss": 0.7576, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 2.687052700065062, | |
| "grad_norm": 1.180498719215393, | |
| "learning_rate": 2.2629204382127284e-05, | |
| "loss": 0.7533, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 2.690305790500976, | |
| "grad_norm": 1.0349406003952026, | |
| "learning_rate": 2.257782549339359e-05, | |
| "loss": 0.7636, | |
| "step": 4135 | |
| }, | |
| { | |
| "epoch": 2.69355888093689, | |
| "grad_norm": 1.073776125907898, | |
| "learning_rate": 2.2526456929991793e-05, | |
| "loss": 0.7718, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 2.6968119713728043, | |
| "grad_norm": 1.114530324935913, | |
| "learning_rate": 2.2475098910897645e-05, | |
| "loss": 0.7445, | |
| "step": 4145 | |
| }, | |
| { | |
| "epoch": 2.7000650618087185, | |
| "grad_norm": 0.9346311092376709, | |
| "learning_rate": 2.2423751655041952e-05, | |
| "loss": 0.7294, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.7033181522446323, | |
| "grad_norm": 1.086501955986023, | |
| "learning_rate": 2.237241538130961e-05, | |
| "loss": 0.7507, | |
| "step": 4155 | |
| }, | |
| { | |
| "epoch": 2.7065712426805466, | |
| "grad_norm": 0.9763929843902588, | |
| "learning_rate": 2.2321090308538732e-05, | |
| "loss": 0.743, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 2.709824333116461, | |
| "grad_norm": 0.8880870938301086, | |
| "learning_rate": 2.2269776655519658e-05, | |
| "loss": 0.7418, | |
| "step": 4165 | |
| }, | |
| { | |
| "epoch": 2.7130774235523747, | |
| "grad_norm": 0.9564589858055115, | |
| "learning_rate": 2.2218474640994063e-05, | |
| "loss": 0.765, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 2.716330513988289, | |
| "grad_norm": 1.169952630996704, | |
| "learning_rate": 2.2167184483654013e-05, | |
| "loss": 0.7531, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 2.719583604424203, | |
| "grad_norm": 0.9627036452293396, | |
| "learning_rate": 2.211590640214101e-05, | |
| "loss": 0.7623, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 2.722836694860117, | |
| "grad_norm": 0.9291010499000549, | |
| "learning_rate": 2.2064640615045092e-05, | |
| "loss": 0.7641, | |
| "step": 4185 | |
| }, | |
| { | |
| "epoch": 2.7260897852960313, | |
| "grad_norm": 1.0236008167266846, | |
| "learning_rate": 2.2013387340903893e-05, | |
| "loss": 0.7703, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 2.7293428757319456, | |
| "grad_norm": 1.2711366415023804, | |
| "learning_rate": 2.1962146798201684e-05, | |
| "loss": 0.7454, | |
| "step": 4195 | |
| }, | |
| { | |
| "epoch": 2.7325959661678594, | |
| "grad_norm": 1.1424434185028076, | |
| "learning_rate": 2.191091920536849e-05, | |
| "loss": 0.7559, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.7358490566037736, | |
| "grad_norm": 1.4138892889022827, | |
| "learning_rate": 2.1859704780779126e-05, | |
| "loss": 0.7569, | |
| "step": 4205 | |
| }, | |
| { | |
| "epoch": 2.739102147039688, | |
| "grad_norm": 0.967829704284668, | |
| "learning_rate": 2.1808503742752252e-05, | |
| "loss": 0.7432, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 2.7423552374756017, | |
| "grad_norm": 0.8999619483947754, | |
| "learning_rate": 2.175731630954949e-05, | |
| "loss": 0.7457, | |
| "step": 4215 | |
| }, | |
| { | |
| "epoch": 2.745608327911516, | |
| "grad_norm": 1.0657751560211182, | |
| "learning_rate": 2.1706142699374454e-05, | |
| "loss": 0.786, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 2.7488614183474303, | |
| "grad_norm": 1.5017127990722656, | |
| "learning_rate": 2.1654983130371837e-05, | |
| "loss": 0.7516, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 2.752114508783344, | |
| "grad_norm": 1.0914252996444702, | |
| "learning_rate": 2.1603837820626478e-05, | |
| "loss": 0.7616, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 2.7553675992192583, | |
| "grad_norm": 1.1397154331207275, | |
| "learning_rate": 2.1552706988162417e-05, | |
| "loss": 0.761, | |
| "step": 4235 | |
| }, | |
| { | |
| "epoch": 2.7586206896551726, | |
| "grad_norm": 1.162166714668274, | |
| "learning_rate": 2.1501590850941994e-05, | |
| "loss": 0.7353, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 2.7618737800910864, | |
| "grad_norm": 1.0100218057632446, | |
| "learning_rate": 2.1450489626864907e-05, | |
| "loss": 0.7446, | |
| "step": 4245 | |
| }, | |
| { | |
| "epoch": 2.7651268705270007, | |
| "grad_norm": 0.9108495116233826, | |
| "learning_rate": 2.139940353376728e-05, | |
| "loss": 0.7644, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.768379960962915, | |
| "grad_norm": 0.9544759392738342, | |
| "learning_rate": 2.134833278942074e-05, | |
| "loss": 0.7693, | |
| "step": 4255 | |
| }, | |
| { | |
| "epoch": 2.7716330513988288, | |
| "grad_norm": 1.6715203523635864, | |
| "learning_rate": 2.1297277611531456e-05, | |
| "loss": 0.764, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 2.774886141834743, | |
| "grad_norm": 1.0044587850570679, | |
| "learning_rate": 2.1246238217739283e-05, | |
| "loss": 0.7593, | |
| "step": 4265 | |
| }, | |
| { | |
| "epoch": 2.7781392322706573, | |
| "grad_norm": 0.9041277766227722, | |
| "learning_rate": 2.119521482561675e-05, | |
| "loss": 0.7427, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 2.781392322706571, | |
| "grad_norm": 0.8890901803970337, | |
| "learning_rate": 2.114420765266821e-05, | |
| "loss": 0.7462, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 2.7846454131424854, | |
| "grad_norm": 0.9522978663444519, | |
| "learning_rate": 2.1093216916328855e-05, | |
| "loss": 0.7398, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 2.7878985035783996, | |
| "grad_norm": 1.2829575538635254, | |
| "learning_rate": 2.104224283396381e-05, | |
| "loss": 0.7632, | |
| "step": 4285 | |
| }, | |
| { | |
| "epoch": 2.7911515940143135, | |
| "grad_norm": 0.9626341462135315, | |
| "learning_rate": 2.0991285622867215e-05, | |
| "loss": 0.7681, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 2.7944046844502277, | |
| "grad_norm": 0.952867865562439, | |
| "learning_rate": 2.0940345500261294e-05, | |
| "loss": 0.7518, | |
| "step": 4295 | |
| }, | |
| { | |
| "epoch": 2.797657774886142, | |
| "grad_norm": 1.0598902702331543, | |
| "learning_rate": 2.0889422683295407e-05, | |
| "loss": 0.7884, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.800910865322056, | |
| "grad_norm": 1.0540211200714111, | |
| "learning_rate": 2.083851738904516e-05, | |
| "loss": 0.7518, | |
| "step": 4305 | |
| }, | |
| { | |
| "epoch": 2.80416395575797, | |
| "grad_norm": 0.9470973014831543, | |
| "learning_rate": 2.0787629834511466e-05, | |
| "loss": 0.764, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 2.8074170461938843, | |
| "grad_norm": 1.127659559249878, | |
| "learning_rate": 2.0736760236619594e-05, | |
| "loss": 0.7332, | |
| "step": 4315 | |
| }, | |
| { | |
| "epoch": 2.810670136629798, | |
| "grad_norm": 1.0755411386489868, | |
| "learning_rate": 2.0685908812218287e-05, | |
| "loss": 0.7622, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 2.8139232270657124, | |
| "grad_norm": 1.1209520101547241, | |
| "learning_rate": 2.0635075778078817e-05, | |
| "loss": 0.7416, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 2.8171763175016267, | |
| "grad_norm": 1.0491728782653809, | |
| "learning_rate": 2.0584261350894046e-05, | |
| "loss": 0.7802, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 2.8204294079375405, | |
| "grad_norm": 1.025694727897644, | |
| "learning_rate": 2.0533465747277535e-05, | |
| "loss": 0.7487, | |
| "step": 4335 | |
| }, | |
| { | |
| "epoch": 2.8236824983734548, | |
| "grad_norm": 0.9486551880836487, | |
| "learning_rate": 2.0482689183762588e-05, | |
| "loss": 0.7594, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 2.826935588809369, | |
| "grad_norm": 0.9839990139007568, | |
| "learning_rate": 2.0431931876801352e-05, | |
| "loss": 0.7431, | |
| "step": 4345 | |
| }, | |
| { | |
| "epoch": 2.830188679245283, | |
| "grad_norm": 1.0050575733184814, | |
| "learning_rate": 2.03811940427639e-05, | |
| "loss": 0.7527, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.833441769681197, | |
| "grad_norm": 0.9743004441261292, | |
| "learning_rate": 2.033047589793726e-05, | |
| "loss": 0.7307, | |
| "step": 4355 | |
| }, | |
| { | |
| "epoch": 2.8366948601171114, | |
| "grad_norm": 1.0488122701644897, | |
| "learning_rate": 2.027977765852456e-05, | |
| "loss": 0.7598, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 2.839947950553025, | |
| "grad_norm": 1.074271321296692, | |
| "learning_rate": 2.022909954064407e-05, | |
| "loss": 0.7571, | |
| "step": 4365 | |
| }, | |
| { | |
| "epoch": 2.8432010409889394, | |
| "grad_norm": 0.9306830167770386, | |
| "learning_rate": 2.0178441760328268e-05, | |
| "loss": 0.735, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 2.8464541314248537, | |
| "grad_norm": 0.8995447754859924, | |
| "learning_rate": 2.0127804533522948e-05, | |
| "loss": 0.7519, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 2.8497072218607675, | |
| "grad_norm": 0.9495101571083069, | |
| "learning_rate": 2.0077188076086288e-05, | |
| "loss": 0.7544, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 2.852960312296682, | |
| "grad_norm": 1.3610079288482666, | |
| "learning_rate": 2.002659260378794e-05, | |
| "loss": 0.7573, | |
| "step": 4385 | |
| }, | |
| { | |
| "epoch": 2.856213402732596, | |
| "grad_norm": 0.9668116569519043, | |
| "learning_rate": 1.9976018332308077e-05, | |
| "loss": 0.7332, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 2.85946649316851, | |
| "grad_norm": 1.128670334815979, | |
| "learning_rate": 1.992546547723651e-05, | |
| "loss": 0.7512, | |
| "step": 4395 | |
| }, | |
| { | |
| "epoch": 2.862719583604424, | |
| "grad_norm": 1.276426911354065, | |
| "learning_rate": 1.987493425407176e-05, | |
| "loss": 0.7449, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.8659726740403384, | |
| "grad_norm": 0.9716594815254211, | |
| "learning_rate": 1.982442487822011e-05, | |
| "loss": 0.7432, | |
| "step": 4405 | |
| }, | |
| { | |
| "epoch": 2.869225764476252, | |
| "grad_norm": 0.9533106088638306, | |
| "learning_rate": 1.9773937564994745e-05, | |
| "loss": 0.7423, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 2.8724788549121665, | |
| "grad_norm": 1.0256469249725342, | |
| "learning_rate": 1.972347252961479e-05, | |
| "loss": 0.7614, | |
| "step": 4415 | |
| }, | |
| { | |
| "epoch": 2.8757319453480807, | |
| "grad_norm": 1.1626900434494019, | |
| "learning_rate": 1.967302998720438e-05, | |
| "loss": 0.7392, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 2.8789850357839946, | |
| "grad_norm": 0.9739611744880676, | |
| "learning_rate": 1.9622610152791792e-05, | |
| "loss": 0.7622, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 2.882238126219909, | |
| "grad_norm": 1.0657685995101929, | |
| "learning_rate": 1.9572213241308507e-05, | |
| "loss": 0.7507, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 2.885491216655823, | |
| "grad_norm": 1.029432773590088, | |
| "learning_rate": 1.952183946758826e-05, | |
| "loss": 0.7723, | |
| "step": 4435 | |
| }, | |
| { | |
| "epoch": 2.888744307091737, | |
| "grad_norm": 1.1281373500823975, | |
| "learning_rate": 1.9471489046366185e-05, | |
| "loss": 0.7479, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 2.891997397527651, | |
| "grad_norm": 1.1470041275024414, | |
| "learning_rate": 1.942116219227784e-05, | |
| "loss": 0.7341, | |
| "step": 4445 | |
| }, | |
| { | |
| "epoch": 2.8952504879635654, | |
| "grad_norm": 1.0326032638549805, | |
| "learning_rate": 1.937085911985834e-05, | |
| "loss": 0.7571, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 2.8985035783994793, | |
| "grad_norm": 0.9806135296821594, | |
| "learning_rate": 1.9320580043541425e-05, | |
| "loss": 0.734, | |
| "step": 4455 | |
| }, | |
| { | |
| "epoch": 2.9017566688353935, | |
| "grad_norm": 1.063024878501892, | |
| "learning_rate": 1.9270325177658523e-05, | |
| "loss": 0.7521, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 2.905009759271308, | |
| "grad_norm": 4.5842156410217285, | |
| "learning_rate": 1.922009473643787e-05, | |
| "loss": 0.7563, | |
| "step": 4465 | |
| }, | |
| { | |
| "epoch": 2.9082628497072216, | |
| "grad_norm": 1.3341448307037354, | |
| "learning_rate": 1.9169888934003598e-05, | |
| "loss": 0.7528, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 2.911515940143136, | |
| "grad_norm": 1.3391072750091553, | |
| "learning_rate": 1.9119707984374774e-05, | |
| "loss": 0.737, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 2.91476903057905, | |
| "grad_norm": 0.985970139503479, | |
| "learning_rate": 1.9069552101464552e-05, | |
| "loss": 0.7657, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 2.918022121014964, | |
| "grad_norm": 1.069992184638977, | |
| "learning_rate": 1.901942149907922e-05, | |
| "loss": 0.7526, | |
| "step": 4485 | |
| }, | |
| { | |
| "epoch": 2.921275211450878, | |
| "grad_norm": 0.8812434077262878, | |
| "learning_rate": 1.8969316390917288e-05, | |
| "loss": 0.7664, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 2.9245283018867925, | |
| "grad_norm": 1.2932692766189575, | |
| "learning_rate": 1.891923699056861e-05, | |
| "loss": 0.7553, | |
| "step": 4495 | |
| }, | |
| { | |
| "epoch": 2.9277813923227067, | |
| "grad_norm": 0.935070276260376, | |
| "learning_rate": 1.886918351151343e-05, | |
| "loss": 0.7583, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.9310344827586206, | |
| "grad_norm": 0.9840937852859497, | |
| "learning_rate": 1.881915616712151e-05, | |
| "loss": 0.748, | |
| "step": 4505 | |
| }, | |
| { | |
| "epoch": 2.934287573194535, | |
| "grad_norm": 1.0583505630493164, | |
| "learning_rate": 1.8769155170651203e-05, | |
| "loss": 0.7482, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 2.937540663630449, | |
| "grad_norm": 1.0253130197525024, | |
| "learning_rate": 1.8719180735248522e-05, | |
| "loss": 0.751, | |
| "step": 4515 | |
| }, | |
| { | |
| "epoch": 2.940793754066363, | |
| "grad_norm": 1.0491794347763062, | |
| "learning_rate": 1.8669233073946303e-05, | |
| "loss": 0.7533, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 2.944046844502277, | |
| "grad_norm": 1.1201449632644653, | |
| "learning_rate": 1.86193123996632e-05, | |
| "loss": 0.7486, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 2.9472999349381914, | |
| "grad_norm": 1.3683768510818481, | |
| "learning_rate": 1.856941892520284e-05, | |
| "loss": 0.7584, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 2.9505530253741052, | |
| "grad_norm": 1.0555903911590576, | |
| "learning_rate": 1.851955286325292e-05, | |
| "loss": 0.7554, | |
| "step": 4535 | |
| }, | |
| { | |
| "epoch": 2.9538061158100195, | |
| "grad_norm": 1.5055445432662964, | |
| "learning_rate": 1.846971442638426e-05, | |
| "loss": 0.7418, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 2.9570592062459338, | |
| "grad_norm": 1.222474455833435, | |
| "learning_rate": 1.841990382704993e-05, | |
| "loss": 0.7455, | |
| "step": 4545 | |
| }, | |
| { | |
| "epoch": 2.960312296681848, | |
| "grad_norm": 1.0359810590744019, | |
| "learning_rate": 1.8370121277584325e-05, | |
| "loss": 0.7404, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 2.963565387117762, | |
| "grad_norm": 1.2511727809906006, | |
| "learning_rate": 1.8320366990202276e-05, | |
| "loss": 0.7228, | |
| "step": 4555 | |
| }, | |
| { | |
| "epoch": 2.966818477553676, | |
| "grad_norm": 0.8730882406234741, | |
| "learning_rate": 1.827064117699814e-05, | |
| "loss": 0.7586, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 2.9700715679895904, | |
| "grad_norm": 1.5805312395095825, | |
| "learning_rate": 1.822094404994487e-05, | |
| "loss": 0.7499, | |
| "step": 4565 | |
| }, | |
| { | |
| "epoch": 2.973324658425504, | |
| "grad_norm": 1.1607098579406738, | |
| "learning_rate": 1.817127582089317e-05, | |
| "loss": 0.7637, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 2.9765777488614185, | |
| "grad_norm": 0.9193926453590393, | |
| "learning_rate": 1.8121636701570537e-05, | |
| "loss": 0.7532, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 2.9798308392973327, | |
| "grad_norm": 1.0218764543533325, | |
| "learning_rate": 1.807202690358037e-05, | |
| "loss": 0.7503, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 2.9830839297332465, | |
| "grad_norm": 1.0876221656799316, | |
| "learning_rate": 1.802244663840109e-05, | |
| "loss": 0.7707, | |
| "step": 4585 | |
| }, | |
| { | |
| "epoch": 2.986337020169161, | |
| "grad_norm": 1.0459486246109009, | |
| "learning_rate": 1.797289611738523e-05, | |
| "loss": 0.7397, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 2.989590110605075, | |
| "grad_norm": 1.0498055219650269, | |
| "learning_rate": 1.7923375551758505e-05, | |
| "loss": 0.7691, | |
| "step": 4595 | |
| }, | |
| { | |
| "epoch": 2.992843201040989, | |
| "grad_norm": 0.9780749082565308, | |
| "learning_rate": 1.7873885152618956e-05, | |
| "loss": 0.7525, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.996096291476903, | |
| "grad_norm": 1.0338603258132935, | |
| "learning_rate": 1.7824425130936023e-05, | |
| "loss": 0.7459, | |
| "step": 4605 | |
| }, | |
| { | |
| "epoch": 2.9993493819128174, | |
| "grad_norm": 0.9098593592643738, | |
| "learning_rate": 1.7774995697549645e-05, | |
| "loss": 0.7488, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_f1": 0.8012369099843738, | |
| "eval_loss": 0.45166015625, | |
| "eval_precision": 0.8020338050069477, | |
| "eval_recall": 0.8006626052475169, | |
| "eval_runtime": 238.3932, | |
| "eval_samples_per_second": 1650.361, | |
| "eval_steps_per_second": 1.615, | |
| "step": 4611 | |
| }, | |
| { | |
| "epoch": 3.0026024723487312, | |
| "grad_norm": 1.3282872438430786, | |
| "learning_rate": 1.7725597063169386e-05, | |
| "loss": 0.6622, | |
| "step": 4615 | |
| }, | |
| { | |
| "epoch": 3.0058555627846455, | |
| "grad_norm": 1.3152724504470825, | |
| "learning_rate": 1.767622943837349e-05, | |
| "loss": 0.6352, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 3.0091086532205593, | |
| "grad_norm": 1.105705976486206, | |
| "learning_rate": 1.7626893033608038e-05, | |
| "loss": 0.6291, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 3.0123617436564736, | |
| "grad_norm": 1.0462555885314941, | |
| "learning_rate": 1.7577588059186027e-05, | |
| "loss": 0.6476, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 3.015614834092388, | |
| "grad_norm": 1.0921547412872314, | |
| "learning_rate": 1.7528314725286443e-05, | |
| "loss": 0.6358, | |
| "step": 4635 | |
| }, | |
| { | |
| "epoch": 3.018867924528302, | |
| "grad_norm": 1.1877232789993286, | |
| "learning_rate": 1.747907324195342e-05, | |
| "loss": 0.6434, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 3.022121014964216, | |
| "grad_norm": 1.1791988611221313, | |
| "learning_rate": 1.7429863819095313e-05, | |
| "loss": 0.6372, | |
| "step": 4645 | |
| }, | |
| { | |
| "epoch": 3.02537410540013, | |
| "grad_norm": 1.23057222366333, | |
| "learning_rate": 1.738068666648379e-05, | |
| "loss": 0.6521, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 3.0286271958360445, | |
| "grad_norm": 1.0966289043426514, | |
| "learning_rate": 1.7331541993752993e-05, | |
| "loss": 0.6337, | |
| "step": 4655 | |
| }, | |
| { | |
| "epoch": 3.0318802862719583, | |
| "grad_norm": 1.108396291732788, | |
| "learning_rate": 1.7282430010398577e-05, | |
| "loss": 0.6394, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 3.0351333767078725, | |
| "grad_norm": 1.2432180643081665, | |
| "learning_rate": 1.723335092577686e-05, | |
| "loss": 0.6319, | |
| "step": 4665 | |
| }, | |
| { | |
| "epoch": 3.038386467143787, | |
| "grad_norm": 1.5450379848480225, | |
| "learning_rate": 1.718430494910391e-05, | |
| "loss": 0.632, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 3.0416395575797006, | |
| "grad_norm": 1.3607127666473389, | |
| "learning_rate": 1.713529228945466e-05, | |
| "loss": 0.6608, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 3.044892648015615, | |
| "grad_norm": 1.0697190761566162, | |
| "learning_rate": 1.7086313155762046e-05, | |
| "loss": 0.6263, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 3.048145738451529, | |
| "grad_norm": 1.3838845491409302, | |
| "learning_rate": 1.703736775681604e-05, | |
| "loss": 0.6367, | |
| "step": 4685 | |
| }, | |
| { | |
| "epoch": 3.051398828887443, | |
| "grad_norm": 1.324628233909607, | |
| "learning_rate": 1.6988456301262854e-05, | |
| "loss": 0.6435, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 3.0546519193233572, | |
| "grad_norm": 1.2009634971618652, | |
| "learning_rate": 1.6939578997603983e-05, | |
| "loss": 0.6467, | |
| "step": 4695 | |
| }, | |
| { | |
| "epoch": 3.0579050097592715, | |
| "grad_norm": 1.2275351285934448, | |
| "learning_rate": 1.689073605419533e-05, | |
| "loss": 0.6403, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 3.0611581001951853, | |
| "grad_norm": 1.9216879606246948, | |
| "learning_rate": 1.6841927679246345e-05, | |
| "loss": 0.6186, | |
| "step": 4705 | |
| }, | |
| { | |
| "epoch": 3.0644111906310996, | |
| "grad_norm": 2.3563551902770996, | |
| "learning_rate": 1.679315408081911e-05, | |
| "loss": 0.6202, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 3.067664281067014, | |
| "grad_norm": 1.435333490371704, | |
| "learning_rate": 1.6744415466827463e-05, | |
| "loss": 0.6273, | |
| "step": 4715 | |
| }, | |
| { | |
| "epoch": 3.0709173715029277, | |
| "grad_norm": 1.315987229347229, | |
| "learning_rate": 1.6695712045036104e-05, | |
| "loss": 0.6318, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 3.074170461938842, | |
| "grad_norm": 1.5982025861740112, | |
| "learning_rate": 1.6647044023059712e-05, | |
| "loss": 0.6384, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 3.077423552374756, | |
| "grad_norm": 1.998374104499817, | |
| "learning_rate": 1.659841160836207e-05, | |
| "loss": 0.6286, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 3.08067664281067, | |
| "grad_norm": 1.3811148405075073, | |
| "learning_rate": 1.6549815008255176e-05, | |
| "loss": 0.6482, | |
| "step": 4735 | |
| }, | |
| { | |
| "epoch": 3.0839297332465843, | |
| "grad_norm": 1.2464516162872314, | |
| "learning_rate": 1.6501254429898343e-05, | |
| "loss": 0.6433, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 3.0871828236824985, | |
| "grad_norm": 1.2944623231887817, | |
| "learning_rate": 1.6452730080297342e-05, | |
| "loss": 0.6328, | |
| "step": 4745 | |
| }, | |
| { | |
| "epoch": 3.0904359141184123, | |
| "grad_norm": 1.1027922630310059, | |
| "learning_rate": 1.6404242166303507e-05, | |
| "loss": 0.6357, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 3.0936890045543266, | |
| "grad_norm": 3.5568132400512695, | |
| "learning_rate": 1.6355790894612834e-05, | |
| "loss": 0.6081, | |
| "step": 4755 | |
| }, | |
| { | |
| "epoch": 3.096942094990241, | |
| "grad_norm": 1.588714838027954, | |
| "learning_rate": 1.630737647176514e-05, | |
| "loss": 0.6601, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 3.1001951854261547, | |
| "grad_norm": 1.1922274827957153, | |
| "learning_rate": 1.6258999104143157e-05, | |
| "loss": 0.6145, | |
| "step": 4765 | |
| }, | |
| { | |
| "epoch": 3.103448275862069, | |
| "grad_norm": 1.3667454719543457, | |
| "learning_rate": 1.621065899797165e-05, | |
| "loss": 0.6372, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 3.106701366297983, | |
| "grad_norm": 1.8918445110321045, | |
| "learning_rate": 1.616235635931655e-05, | |
| "loss": 0.6152, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 3.109954456733897, | |
| "grad_norm": 1.293562650680542, | |
| "learning_rate": 1.611409139408406e-05, | |
| "loss": 0.6211, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 3.1132075471698113, | |
| "grad_norm": 1.446754813194275, | |
| "learning_rate": 1.6065864308019807e-05, | |
| "loss": 0.6453, | |
| "step": 4785 | |
| }, | |
| { | |
| "epoch": 3.1164606376057256, | |
| "grad_norm": 1.1851979494094849, | |
| "learning_rate": 1.6017675306707926e-05, | |
| "loss": 0.631, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 3.1197137280416394, | |
| "grad_norm": 1.3031965494155884, | |
| "learning_rate": 1.5969524595570216e-05, | |
| "loss": 0.6184, | |
| "step": 4795 | |
| }, | |
| { | |
| "epoch": 3.1229668184775536, | |
| "grad_norm": 2.6355156898498535, | |
| "learning_rate": 1.5921412379865257e-05, | |
| "loss": 0.6451, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 3.126219908913468, | |
| "grad_norm": 1.4367573261260986, | |
| "learning_rate": 1.58733388646875e-05, | |
| "loss": 0.6466, | |
| "step": 4805 | |
| }, | |
| { | |
| "epoch": 3.1294729993493817, | |
| "grad_norm": 1.4838011264801025, | |
| "learning_rate": 1.5825304254966445e-05, | |
| "loss": 0.6181, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 3.132726089785296, | |
| "grad_norm": 1.2338780164718628, | |
| "learning_rate": 1.577730875546575e-05, | |
| "loss": 0.6179, | |
| "step": 4815 | |
| }, | |
| { | |
| "epoch": 3.1359791802212102, | |
| "grad_norm": 1.4179608821868896, | |
| "learning_rate": 1.5729352570782324e-05, | |
| "loss": 0.6362, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 3.139232270657124, | |
| "grad_norm": 1.2671458721160889, | |
| "learning_rate": 1.5681435905345522e-05, | |
| "loss": 0.6365, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 3.1424853610930383, | |
| "grad_norm": 1.368369221687317, | |
| "learning_rate": 1.5643131164122626e-05, | |
| "loss": 0.6102, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 3.1457384515289526, | |
| "grad_norm": 1.341280460357666, | |
| "learning_rate": 1.5595286147953364e-05, | |
| "loss": 0.637, | |
| "step": 4835 | |
| }, | |
| { | |
| "epoch": 3.1489915419648664, | |
| "grad_norm": 1.5806121826171875, | |
| "learning_rate": 1.5547481222533846e-05, | |
| "loss": 0.6296, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 3.1522446324007807, | |
| "grad_norm": 1.505342721939087, | |
| "learning_rate": 1.549971659164861e-05, | |
| "loss": 0.6284, | |
| "step": 4845 | |
| }, | |
| { | |
| "epoch": 3.155497722836695, | |
| "grad_norm": 1.2677946090698242, | |
| "learning_rate": 1.5451992458910442e-05, | |
| "loss": 0.6134, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 3.1587508132726088, | |
| "grad_norm": 1.2727744579315186, | |
| "learning_rate": 1.540430902775946e-05, | |
| "loss": 0.626, | |
| "step": 4855 | |
| }, | |
| { | |
| "epoch": 3.162003903708523, | |
| "grad_norm": 1.258187174797058, | |
| "learning_rate": 1.5356666501462314e-05, | |
| "loss": 0.6085, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 3.1652569941444373, | |
| "grad_norm": 1.589736819267273, | |
| "learning_rate": 1.5309065083111255e-05, | |
| "loss": 0.6247, | |
| "step": 4865 | |
| }, | |
| { | |
| "epoch": 3.168510084580351, | |
| "grad_norm": 1.2900131940841675, | |
| "learning_rate": 1.5261504975623306e-05, | |
| "loss": 0.624, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 3.1717631750162654, | |
| "grad_norm": 2.3252532482147217, | |
| "learning_rate": 1.5213986381739393e-05, | |
| "loss": 0.6295, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 3.1750162654521796, | |
| "grad_norm": 1.3652303218841553, | |
| "learning_rate": 1.5166509504023473e-05, | |
| "loss": 0.6274, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 3.178269355888094, | |
| "grad_norm": 1.8075648546218872, | |
| "learning_rate": 1.5119074544861678e-05, | |
| "loss": 0.6375, | |
| "step": 4885 | |
| }, | |
| { | |
| "epoch": 3.1815224463240077, | |
| "grad_norm": 1.2221382856369019, | |
| "learning_rate": 1.5071681706461438e-05, | |
| "loss": 0.6273, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 3.184775536759922, | |
| "grad_norm": 1.5147900581359863, | |
| "learning_rate": 1.5024331190850637e-05, | |
| "loss": 0.6381, | |
| "step": 4895 | |
| }, | |
| { | |
| "epoch": 3.1880286271958362, | |
| "grad_norm": 2.4453020095825195, | |
| "learning_rate": 1.4977023199876743e-05, | |
| "loss": 0.6552, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 3.19128171763175, | |
| "grad_norm": 2.3050053119659424, | |
| "learning_rate": 1.4929757935205951e-05, | |
| "loss": 0.6176, | |
| "step": 4905 | |
| }, | |
| { | |
| "epoch": 3.1945348080676643, | |
| "grad_norm": 1.289581060409546, | |
| "learning_rate": 1.4882535598322311e-05, | |
| "loss": 0.6253, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 3.1977878985035786, | |
| "grad_norm": 1.5076651573181152, | |
| "learning_rate": 1.4835356390526888e-05, | |
| "loss": 0.6194, | |
| "step": 4915 | |
| }, | |
| { | |
| "epoch": 3.2010409889394924, | |
| "grad_norm": 1.4202001094818115, | |
| "learning_rate": 1.478822051293689e-05, | |
| "loss": 0.6081, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 3.2042940793754067, | |
| "grad_norm": 1.287611961364746, | |
| "learning_rate": 1.4741128166484824e-05, | |
| "loss": 0.6429, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 3.207547169811321, | |
| "grad_norm": 1.2236043214797974, | |
| "learning_rate": 1.4694079551917629e-05, | |
| "loss": 0.6176, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 3.2108002602472347, | |
| "grad_norm": 1.3410075902938843, | |
| "learning_rate": 1.4656472282003922e-05, | |
| "loss": 0.6209, | |
| "step": 4935 | |
| }, | |
| { | |
| "epoch": 3.214053350683149, | |
| "grad_norm": 1.419541835784912, | |
| "learning_rate": 1.4609502890116145e-05, | |
| "loss": 0.6436, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 3.2173064411190633, | |
| "grad_norm": 1.7478810548782349, | |
| "learning_rate": 1.4562577791210158e-05, | |
| "loss": 0.6023, | |
| "step": 4945 | |
| }, | |
| { | |
| "epoch": 3.220559531554977, | |
| "grad_norm": 1.8083374500274658, | |
| "learning_rate": 1.4515697185319946e-05, | |
| "loss": 0.6166, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 3.2238126219908914, | |
| "grad_norm": 2.203806161880493, | |
| "learning_rate": 1.4468861272289818e-05, | |
| "loss": 0.636, | |
| "step": 4955 | |
| }, | |
| { | |
| "epoch": 3.2270657124268056, | |
| "grad_norm": 1.3574259281158447, | |
| "learning_rate": 1.4422070251773594e-05, | |
| "loss": 0.6012, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 3.2303188028627194, | |
| "grad_norm": 1.4441782236099243, | |
| "learning_rate": 1.4375324323233697e-05, | |
| "loss": 0.6197, | |
| "step": 4965 | |
| }, | |
| { | |
| "epoch": 3.2335718932986337, | |
| "grad_norm": 1.7502111196517944, | |
| "learning_rate": 1.4328623685940335e-05, | |
| "loss": 0.6354, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 3.236824983734548, | |
| "grad_norm": 1.5651460886001587, | |
| "learning_rate": 1.4281968538970646e-05, | |
| "loss": 0.6257, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 3.240078074170462, | |
| "grad_norm": 1.3271369934082031, | |
| "learning_rate": 1.4235359081207871e-05, | |
| "loss": 0.6378, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 3.243331164606376, | |
| "grad_norm": 1.354906678199768, | |
| "learning_rate": 1.4188795511340461e-05, | |
| "loss": 0.6324, | |
| "step": 4985 | |
| }, | |
| { | |
| "epoch": 3.2465842550422903, | |
| "grad_norm": 1.295578956604004, | |
| "learning_rate": 1.4142278027861253e-05, | |
| "loss": 0.6176, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 3.249837345478204, | |
| "grad_norm": 1.4495329856872559, | |
| "learning_rate": 1.4095806829066655e-05, | |
| "loss": 0.6387, | |
| "step": 4995 | |
| }, | |
| { | |
| "epoch": 3.2530904359141184, | |
| "grad_norm": 1.3459370136260986, | |
| "learning_rate": 1.404938211305574e-05, | |
| "loss": 0.6343, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.2563435263500327, | |
| "grad_norm": 1.299459457397461, | |
| "learning_rate": 1.4003004077729438e-05, | |
| "loss": 0.6394, | |
| "step": 5005 | |
| }, | |
| { | |
| "epoch": 3.2595966167859465, | |
| "grad_norm": 1.3181241750717163, | |
| "learning_rate": 1.3956672920789705e-05, | |
| "loss": 0.6135, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 3.2628497072218607, | |
| "grad_norm": 1.5811583995819092, | |
| "learning_rate": 1.3910388839738647e-05, | |
| "loss": 0.6377, | |
| "step": 5015 | |
| }, | |
| { | |
| "epoch": 3.266102797657775, | |
| "grad_norm": 1.3512473106384277, | |
| "learning_rate": 1.386415203187768e-05, | |
| "loss": 0.6293, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 3.269355888093689, | |
| "grad_norm": 1.8290486335754395, | |
| "learning_rate": 1.3817962694306747e-05, | |
| "loss": 0.635, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 3.272608978529603, | |
| "grad_norm": 1.5076416730880737, | |
| "learning_rate": 1.3771821023923383e-05, | |
| "loss": 0.6027, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 3.2758620689655173, | |
| "grad_norm": 1.5753469467163086, | |
| "learning_rate": 1.3725727217421947e-05, | |
| "loss": 0.6165, | |
| "step": 5035 | |
| }, | |
| { | |
| "epoch": 3.279115159401431, | |
| "grad_norm": 1.5028088092803955, | |
| "learning_rate": 1.3679681471292776e-05, | |
| "loss": 0.621, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 3.2823682498373454, | |
| "grad_norm": 1.4654455184936523, | |
| "learning_rate": 1.363368398182131e-05, | |
| "loss": 0.6266, | |
| "step": 5045 | |
| }, | |
| { | |
| "epoch": 3.2856213402732597, | |
| "grad_norm": 1.7276520729064941, | |
| "learning_rate": 1.3587734945087277e-05, | |
| "loss": 0.6258, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 3.288874430709174, | |
| "grad_norm": 1.710095763206482, | |
| "learning_rate": 1.3541834556963895e-05, | |
| "loss": 0.6388, | |
| "step": 5055 | |
| }, | |
| { | |
| "epoch": 3.2921275211450878, | |
| "grad_norm": 1.6146140098571777, | |
| "learning_rate": 1.3495983013116953e-05, | |
| "loss": 0.6466, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 3.295380611581002, | |
| "grad_norm": 1.3169276714324951, | |
| "learning_rate": 1.3450180509004066e-05, | |
| "loss": 0.6389, | |
| "step": 5065 | |
| }, | |
| { | |
| "epoch": 3.2986337020169163, | |
| "grad_norm": 2.564819574356079, | |
| "learning_rate": 1.3404427239873763e-05, | |
| "loss": 0.6158, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 3.30188679245283, | |
| "grad_norm": 1.6384319067001343, | |
| "learning_rate": 1.335872340076474e-05, | |
| "loss": 0.6241, | |
| "step": 5075 | |
| }, | |
| { | |
| "epoch": 3.3051398828887444, | |
| "grad_norm": 1.4620628356933594, | |
| "learning_rate": 1.3313069186504929e-05, | |
| "loss": 0.6203, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 3.3083929733246586, | |
| "grad_norm": 1.7426296472549438, | |
| "learning_rate": 1.3267464791710747e-05, | |
| "loss": 0.6238, | |
| "step": 5085 | |
| }, | |
| { | |
| "epoch": 3.3116460637605725, | |
| "grad_norm": 2.093579053878784, | |
| "learning_rate": 1.3221910410786248e-05, | |
| "loss": 0.6144, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 3.3148991541964867, | |
| "grad_norm": 1.4141899347305298, | |
| "learning_rate": 1.3176406237922262e-05, | |
| "loss": 0.6145, | |
| "step": 5095 | |
| }, | |
| { | |
| "epoch": 3.318152244632401, | |
| "grad_norm": 1.2416197061538696, | |
| "learning_rate": 1.3130952467095593e-05, | |
| "loss": 0.6134, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 3.321405335068315, | |
| "grad_norm": 1.6651731729507446, | |
| "learning_rate": 1.3085549292068213e-05, | |
| "loss": 0.6366, | |
| "step": 5105 | |
| }, | |
| { | |
| "epoch": 3.324658425504229, | |
| "grad_norm": 1.4123419523239136, | |
| "learning_rate": 1.3040196906386392e-05, | |
| "loss": 0.6363, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 3.3279115159401433, | |
| "grad_norm": 1.5788094997406006, | |
| "learning_rate": 1.2994895503379886e-05, | |
| "loss": 0.6463, | |
| "step": 5115 | |
| }, | |
| { | |
| "epoch": 3.331164606376057, | |
| "grad_norm": 1.9464671611785889, | |
| "learning_rate": 1.2949645276161149e-05, | |
| "loss": 0.6193, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 3.3344176968119714, | |
| "grad_norm": 1.3868358135223389, | |
| "learning_rate": 1.2904446417624457e-05, | |
| "loss": 0.6182, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 3.3376707872478857, | |
| "grad_norm": 7.827129364013672, | |
| "learning_rate": 1.2859299120445107e-05, | |
| "loss": 0.615, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 3.3409238776837995, | |
| "grad_norm": 1.3248870372772217, | |
| "learning_rate": 1.2814203577078626e-05, | |
| "loss": 0.6286, | |
| "step": 5135 | |
| }, | |
| { | |
| "epoch": 3.3441769681197138, | |
| "grad_norm": 1.3587925434112549, | |
| "learning_rate": 1.2769159979759899e-05, | |
| "loss": 0.6285, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 3.347430058555628, | |
| "grad_norm": 1.518294095993042, | |
| "learning_rate": 1.2724168520502371e-05, | |
| "loss": 0.6304, | |
| "step": 5145 | |
| }, | |
| { | |
| "epoch": 3.350683148991542, | |
| "grad_norm": 1.2859338521957397, | |
| "learning_rate": 1.2679229391097241e-05, | |
| "loss": 0.6299, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 3.353936239427456, | |
| "grad_norm": 1.3024553060531616, | |
| "learning_rate": 1.2634342783112646e-05, | |
| "loss": 0.6177, | |
| "step": 5155 | |
| }, | |
| { | |
| "epoch": 3.3571893298633704, | |
| "grad_norm": 3.6768040657043457, | |
| "learning_rate": 1.258950888789281e-05, | |
| "loss": 0.6385, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 3.360442420299284, | |
| "grad_norm": 1.476014256477356, | |
| "learning_rate": 1.2544727896557257e-05, | |
| "loss": 0.6313, | |
| "step": 5165 | |
| }, | |
| { | |
| "epoch": 3.3636955107351985, | |
| "grad_norm": 2.193185806274414, | |
| "learning_rate": 1.2500000000000006e-05, | |
| "loss": 0.6386, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 3.3669486011711127, | |
| "grad_norm": 1.4634368419647217, | |
| "learning_rate": 1.2455325388888726e-05, | |
| "loss": 0.617, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 3.3702016916070265, | |
| "grad_norm": 1.770553708076477, | |
| "learning_rate": 1.2410704253663932e-05, | |
| "loss": 0.637, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 3.373454782042941, | |
| "grad_norm": 1.7664306163787842, | |
| "learning_rate": 1.236613678453821e-05, | |
| "loss": 0.6203, | |
| "step": 5185 | |
| }, | |
| { | |
| "epoch": 3.376707872478855, | |
| "grad_norm": 1.4499051570892334, | |
| "learning_rate": 1.232162317149535e-05, | |
| "loss": 0.6417, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 3.379960962914769, | |
| "grad_norm": 2.710038661956787, | |
| "learning_rate": 1.2277163604289558e-05, | |
| "loss": 0.6246, | |
| "step": 5195 | |
| }, | |
| { | |
| "epoch": 3.383214053350683, | |
| "grad_norm": 1.9992517232894897, | |
| "learning_rate": 1.2232758272444672e-05, | |
| "loss": 0.6188, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 3.3864671437865974, | |
| "grad_norm": 1.1757420301437378, | |
| "learning_rate": 1.2188407365253337e-05, | |
| "loss": 0.6232, | |
| "step": 5205 | |
| }, | |
| { | |
| "epoch": 3.3897202342225112, | |
| "grad_norm": 1.3049498796463013, | |
| "learning_rate": 1.2144111071776174e-05, | |
| "loss": 0.6314, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 3.3929733246584255, | |
| "grad_norm": 1.2970354557037354, | |
| "learning_rate": 1.209986958084099e-05, | |
| "loss": 0.6361, | |
| "step": 5215 | |
| }, | |
| { | |
| "epoch": 3.3962264150943398, | |
| "grad_norm": 1.4407247304916382, | |
| "learning_rate": 1.205568308104201e-05, | |
| "loss": 0.6246, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 3.3994795055302536, | |
| "grad_norm": 1.673065185546875, | |
| "learning_rate": 1.2011551760739014e-05, | |
| "loss": 0.6318, | |
| "step": 5225 | |
| }, | |
| { | |
| "epoch": 3.402732595966168, | |
| "grad_norm": 1.4697465896606445, | |
| "learning_rate": 1.196747580805656e-05, | |
| "loss": 0.6417, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 3.405985686402082, | |
| "grad_norm": 1.6552962064743042, | |
| "learning_rate": 1.1923455410883212e-05, | |
| "loss": 0.6343, | |
| "step": 5235 | |
| }, | |
| { | |
| "epoch": 3.409238776837996, | |
| "grad_norm": 1.5813676118850708, | |
| "learning_rate": 1.1879490756870674e-05, | |
| "loss": 0.6352, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 3.41249186727391, | |
| "grad_norm": 3.213158130645752, | |
| "learning_rate": 1.1835582033433037e-05, | |
| "loss": 0.6352, | |
| "step": 5245 | |
| }, | |
| { | |
| "epoch": 3.4157449577098244, | |
| "grad_norm": 1.2842360734939575, | |
| "learning_rate": 1.1791729427745992e-05, | |
| "loss": 0.6416, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 3.4189980481457383, | |
| "grad_norm": 1.6811124086380005, | |
| "learning_rate": 1.1747933126745983e-05, | |
| "loss": 0.651, | |
| "step": 5255 | |
| }, | |
| { | |
| "epoch": 3.4222511385816525, | |
| "grad_norm": 1.2236487865447998, | |
| "learning_rate": 1.170419331712943e-05, | |
| "loss": 0.641, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 3.425504229017567, | |
| "grad_norm": 1.3968175649642944, | |
| "learning_rate": 1.1660510185351978e-05, | |
| "loss": 0.6271, | |
| "step": 5265 | |
| }, | |
| { | |
| "epoch": 3.4287573194534806, | |
| "grad_norm": 2.152369976043701, | |
| "learning_rate": 1.161688391762763e-05, | |
| "loss": 0.633, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 3.432010409889395, | |
| "grad_norm": 1.5563530921936035, | |
| "learning_rate": 1.1573314699927985e-05, | |
| "loss": 0.6429, | |
| "step": 5275 | |
| }, | |
| { | |
| "epoch": 3.435263500325309, | |
| "grad_norm": 1.4173344373703003, | |
| "learning_rate": 1.1529802717981475e-05, | |
| "loss": 0.6344, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 3.438516590761223, | |
| "grad_norm": 1.8149155378341675, | |
| "learning_rate": 1.1486348157272526e-05, | |
| "loss": 0.6278, | |
| "step": 5285 | |
| }, | |
| { | |
| "epoch": 3.441769681197137, | |
| "grad_norm": 1.4700722694396973, | |
| "learning_rate": 1.1442951203040775e-05, | |
| "loss": 0.607, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 3.4450227716330515, | |
| "grad_norm": 1.4950767755508423, | |
| "learning_rate": 1.139961204028033e-05, | |
| "loss": 0.6298, | |
| "step": 5295 | |
| }, | |
| { | |
| "epoch": 3.4482758620689653, | |
| "grad_norm": 1.702974796295166, | |
| "learning_rate": 1.1356330853738906e-05, | |
| "loss": 0.6599, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 3.4515289525048796, | |
| "grad_norm": 1.7694127559661865, | |
| "learning_rate": 1.1313107827917083e-05, | |
| "loss": 0.6235, | |
| "step": 5305 | |
| }, | |
| { | |
| "epoch": 3.454782042940794, | |
| "grad_norm": 1.2292397022247314, | |
| "learning_rate": 1.1269943147067535e-05, | |
| "loss": 0.6264, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 3.4580351333767076, | |
| "grad_norm": 1.3355427980422974, | |
| "learning_rate": 1.1226836995194196e-05, | |
| "loss": 0.6274, | |
| "step": 5315 | |
| }, | |
| { | |
| "epoch": 3.461288223812622, | |
| "grad_norm": 1.313506841659546, | |
| "learning_rate": 1.1183789556051508e-05, | |
| "loss": 0.6075, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 3.464541314248536, | |
| "grad_norm": 1.3950237035751343, | |
| "learning_rate": 1.1140801013143618e-05, | |
| "loss": 0.606, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 3.46779440468445, | |
| "grad_norm": 1.4222460985183716, | |
| "learning_rate": 1.1097871549723629e-05, | |
| "loss": 0.6238, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 3.4710474951203643, | |
| "grad_norm": 1.701815128326416, | |
| "learning_rate": 1.1055001348792807e-05, | |
| "loss": 0.6227, | |
| "step": 5335 | |
| }, | |
| { | |
| "epoch": 3.4743005855562785, | |
| "grad_norm": 1.5569487810134888, | |
| "learning_rate": 1.1012190593099744e-05, | |
| "loss": 0.643, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 3.4775536759921923, | |
| "grad_norm": 1.3712338209152222, | |
| "learning_rate": 1.0969439465139687e-05, | |
| "loss": 0.6167, | |
| "step": 5345 | |
| }, | |
| { | |
| "epoch": 3.4808067664281066, | |
| "grad_norm": 1.3950178623199463, | |
| "learning_rate": 1.0926748147153648e-05, | |
| "loss": 0.6318, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 3.484059856864021, | |
| "grad_norm": 1.347066044807434, | |
| "learning_rate": 1.088411682112771e-05, | |
| "loss": 0.6225, | |
| "step": 5355 | |
| }, | |
| { | |
| "epoch": 3.487312947299935, | |
| "grad_norm": 1.347697138786316, | |
| "learning_rate": 1.08415456687922e-05, | |
| "loss": 0.6225, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 3.490566037735849, | |
| "grad_norm": 1.5315964221954346, | |
| "learning_rate": 1.0799034871620958e-05, | |
| "loss": 0.6067, | |
| "step": 5365 | |
| }, | |
| { | |
| "epoch": 3.493819128171763, | |
| "grad_norm": 1.3384947776794434, | |
| "learning_rate": 1.0756584610830523e-05, | |
| "loss": 0.6235, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 3.4970722186076775, | |
| "grad_norm": 1.3656494617462158, | |
| "learning_rate": 1.071419506737937e-05, | |
| "loss": 0.6347, | |
| "step": 5375 | |
| }, | |
| { | |
| "epoch": 3.5003253090435913, | |
| "grad_norm": 1.3071860074996948, | |
| "learning_rate": 1.0671866421967175e-05, | |
| "loss": 0.6108, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 3.5035783994795056, | |
| "grad_norm": 1.3579492568969727, | |
| "learning_rate": 1.062959885503399e-05, | |
| "loss": 0.6354, | |
| "step": 5385 | |
| }, | |
| { | |
| "epoch": 3.5068314899154194, | |
| "grad_norm": 1.52472722530365, | |
| "learning_rate": 1.0587392546759498e-05, | |
| "loss": 0.6177, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 3.5100845803513336, | |
| "grad_norm": 1.7216352224349976, | |
| "learning_rate": 1.0545247677062273e-05, | |
| "loss": 0.6225, | |
| "step": 5395 | |
| }, | |
| { | |
| "epoch": 3.513337670787248, | |
| "grad_norm": 1.3169187307357788, | |
| "learning_rate": 1.050316442559896e-05, | |
| "loss": 0.6196, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 3.516590761223162, | |
| "grad_norm": 1.7447690963745117, | |
| "learning_rate": 1.0461142971763535e-05, | |
| "loss": 0.6338, | |
| "step": 5405 | |
| }, | |
| { | |
| "epoch": 3.519843851659076, | |
| "grad_norm": 1.4032801389694214, | |
| "learning_rate": 1.0419183494686574e-05, | |
| "loss": 0.6261, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 3.5230969420949902, | |
| "grad_norm": 1.6217771768569946, | |
| "learning_rate": 1.0377286173234416e-05, | |
| "loss": 0.6306, | |
| "step": 5415 | |
| }, | |
| { | |
| "epoch": 3.5263500325309045, | |
| "grad_norm": 1.2982110977172852, | |
| "learning_rate": 1.0335451186008454e-05, | |
| "loss": 0.6242, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 3.5296031229668183, | |
| "grad_norm": 1.2958654165267944, | |
| "learning_rate": 1.0293678711344382e-05, | |
| "loss": 0.6292, | |
| "step": 5425 | |
| }, | |
| { | |
| "epoch": 3.5328562134027326, | |
| "grad_norm": 1.7522900104522705, | |
| "learning_rate": 1.0251968927311384e-05, | |
| "loss": 0.6541, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 3.536109303838647, | |
| "grad_norm": 1.435259222984314, | |
| "learning_rate": 1.0210322011711408e-05, | |
| "loss": 0.6064, | |
| "step": 5435 | |
| }, | |
| { | |
| "epoch": 3.5393623942745607, | |
| "grad_norm": 1.3290374279022217, | |
| "learning_rate": 1.0168738142078429e-05, | |
| "loss": 0.6255, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 3.542615484710475, | |
| "grad_norm": 1.3328436613082886, | |
| "learning_rate": 1.012721749567764e-05, | |
| "loss": 0.6006, | |
| "step": 5445 | |
| }, | |
| { | |
| "epoch": 3.545868575146389, | |
| "grad_norm": 1.3372770547866821, | |
| "learning_rate": 1.0085760249504728e-05, | |
| "loss": 0.6194, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 3.5491216655823035, | |
| "grad_norm": 1.7760313749313354, | |
| "learning_rate": 1.0044366580285137e-05, | |
| "loss": 0.6067, | |
| "step": 5455 | |
| }, | |
| { | |
| "epoch": 3.5523747560182173, | |
| "grad_norm": 1.7420598268508911, | |
| "learning_rate": 1.0003036664473267e-05, | |
| "loss": 0.6071, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 3.5556278464541315, | |
| "grad_norm": 1.498193621635437, | |
| "learning_rate": 9.96177067825175e-06, | |
| "loss": 0.6146, | |
| "step": 5465 | |
| }, | |
| { | |
| "epoch": 3.558880936890046, | |
| "grad_norm": 1.8063032627105713, | |
| "learning_rate": 9.920568797530716e-06, | |
| "loss": 0.626, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 3.5621340273259596, | |
| "grad_norm": 1.2613329887390137, | |
| "learning_rate": 9.879431197947014e-06, | |
| "loss": 0.6049, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 3.565387117761874, | |
| "grad_norm": 1.34530770778656, | |
| "learning_rate": 9.83835805486347e-06, | |
| "loss": 0.6197, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 3.568640208197788, | |
| "grad_norm": 1.9523491859436035, | |
| "learning_rate": 9.797349543368128e-06, | |
| "loss": 0.6342, | |
| "step": 5485 | |
| }, | |
| { | |
| "epoch": 3.571893298633702, | |
| "grad_norm": 1.8784916400909424, | |
| "learning_rate": 9.756405838273558e-06, | |
| "loss": 0.64, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 3.5751463890696162, | |
| "grad_norm": 1.5533080101013184, | |
| "learning_rate": 9.715527114116035e-06, | |
| "loss": 0.6243, | |
| "step": 5495 | |
| }, | |
| { | |
| "epoch": 3.5783994795055305, | |
| "grad_norm": 1.385695219039917, | |
| "learning_rate": 9.674713545154831e-06, | |
| "loss": 0.6264, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 3.5816525699414443, | |
| "grad_norm": 1.3538482189178467, | |
| "learning_rate": 9.633965305371506e-06, | |
| "loss": 0.621, | |
| "step": 5505 | |
| }, | |
| { | |
| "epoch": 3.5849056603773586, | |
| "grad_norm": 1.6445493698120117, | |
| "learning_rate": 9.5932825684691e-06, | |
| "loss": 0.6239, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 3.588158750813273, | |
| "grad_norm": 1.803451657295227, | |
| "learning_rate": 9.552665507871428e-06, | |
| "loss": 0.6311, | |
| "step": 5515 | |
| }, | |
| { | |
| "epoch": 3.5914118412491867, | |
| "grad_norm": 1.3346718549728394, | |
| "learning_rate": 9.51211429672236e-06, | |
| "loss": 0.6396, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 3.594664931685101, | |
| "grad_norm": 2.1071603298187256, | |
| "learning_rate": 9.471629107885038e-06, | |
| "loss": 0.6238, | |
| "step": 5525 | |
| }, | |
| { | |
| "epoch": 3.597918022121015, | |
| "grad_norm": 1.4250411987304688, | |
| "learning_rate": 9.431210113941169e-06, | |
| "loss": 0.6063, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 3.601171112556929, | |
| "grad_norm": 1.3815439939498901, | |
| "learning_rate": 9.390857487190274e-06, | |
| "loss": 0.5978, | |
| "step": 5535 | |
| }, | |
| { | |
| "epoch": 3.6044242029928433, | |
| "grad_norm": 1.6549842357635498, | |
| "learning_rate": 9.350571399648988e-06, | |
| "loss": 0.6094, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 3.6076772934287575, | |
| "grad_norm": 1.4034509658813477, | |
| "learning_rate": 9.310352023050272e-06, | |
| "loss": 0.6187, | |
| "step": 5545 | |
| }, | |
| { | |
| "epoch": 3.6109303838646714, | |
| "grad_norm": 1.6350473165512085, | |
| "learning_rate": 9.270199528842715e-06, | |
| "loss": 0.6076, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 3.6141834743005856, | |
| "grad_norm": 1.4474992752075195, | |
| "learning_rate": 9.230114088189814e-06, | |
| "loss": 0.6507, | |
| "step": 5555 | |
| }, | |
| { | |
| "epoch": 3.6174365647365, | |
| "grad_norm": 1.4828194379806519, | |
| "learning_rate": 9.19009587196921e-06, | |
| "loss": 0.6264, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 3.6206896551724137, | |
| "grad_norm": 1.7121607065200806, | |
| "learning_rate": 9.150145050771972e-06, | |
| "loss": 0.6383, | |
| "step": 5565 | |
| }, | |
| { | |
| "epoch": 3.623942745608328, | |
| "grad_norm": 1.8459277153015137, | |
| "learning_rate": 9.110261794901903e-06, | |
| "loss": 0.6436, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 3.6271958360442422, | |
| "grad_norm": 1.4332444667816162, | |
| "learning_rate": 9.070446274374766e-06, | |
| "loss": 0.6313, | |
| "step": 5575 | |
| }, | |
| { | |
| "epoch": 3.630448926480156, | |
| "grad_norm": 1.2665612697601318, | |
| "learning_rate": 9.030698658917566e-06, | |
| "loss": 0.6003, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 3.6337020169160703, | |
| "grad_norm": 1.5076160430908203, | |
| "learning_rate": 8.99101911796788e-06, | |
| "loss": 0.6203, | |
| "step": 5585 | |
| }, | |
| { | |
| "epoch": 3.6369551073519846, | |
| "grad_norm": 1.567221999168396, | |
| "learning_rate": 8.951407820673058e-06, | |
| "loss": 0.6252, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 3.6402081977878984, | |
| "grad_norm": 1.504109263420105, | |
| "learning_rate": 8.911864935889544e-06, | |
| "loss": 0.6332, | |
| "step": 5595 | |
| }, | |
| { | |
| "epoch": 3.6434612882238127, | |
| "grad_norm": 1.6598913669586182, | |
| "learning_rate": 8.872390632182175e-06, | |
| "loss": 0.6258, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 3.646714378659727, | |
| "grad_norm": 1.3711302280426025, | |
| "learning_rate": 8.832985077823406e-06, | |
| "loss": 0.6273, | |
| "step": 5605 | |
| }, | |
| { | |
| "epoch": 3.6499674690956407, | |
| "grad_norm": 1.293453574180603, | |
| "learning_rate": 8.793648440792654e-06, | |
| "loss": 0.6041, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 3.653220559531555, | |
| "grad_norm": 1.6621414422988892, | |
| "learning_rate": 8.754380888775523e-06, | |
| "loss": 0.6177, | |
| "step": 5615 | |
| }, | |
| { | |
| "epoch": 3.6564736499674693, | |
| "grad_norm": 1.2931593656539917, | |
| "learning_rate": 8.715182589163153e-06, | |
| "loss": 0.6084, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 3.659726740403383, | |
| "grad_norm": 1.4701381921768188, | |
| "learning_rate": 8.676053709051446e-06, | |
| "loss": 0.6235, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 3.6629798308392973, | |
| "grad_norm": 2.272709369659424, | |
| "learning_rate": 8.636994415240376e-06, | |
| "loss": 0.6326, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 3.6662329212752116, | |
| "grad_norm": 1.3057537078857422, | |
| "learning_rate": 8.598004874233315e-06, | |
| "loss": 0.616, | |
| "step": 5635 | |
| }, | |
| { | |
| "epoch": 3.6694860117111254, | |
| "grad_norm": 1.6016069650650024, | |
| "learning_rate": 8.559085252236259e-06, | |
| "loss": 0.6126, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 3.6727391021470397, | |
| "grad_norm": 1.38706636428833, | |
| "learning_rate": 8.520235715157152e-06, | |
| "loss": 0.6424, | |
| "step": 5645 | |
| }, | |
| { | |
| "epoch": 3.675992192582954, | |
| "grad_norm": 1.403805136680603, | |
| "learning_rate": 8.481456428605205e-06, | |
| "loss": 0.6328, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 3.6792452830188678, | |
| "grad_norm": 2.8022546768188477, | |
| "learning_rate": 8.442747557890138e-06, | |
| "loss": 0.6225, | |
| "step": 5655 | |
| }, | |
| { | |
| "epoch": 3.682498373454782, | |
| "grad_norm": 1.2923667430877686, | |
| "learning_rate": 8.404109268021493e-06, | |
| "loss": 0.6068, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 3.6857514638906963, | |
| "grad_norm": 1.327010154724121, | |
| "learning_rate": 8.365541723707971e-06, | |
| "loss": 0.6032, | |
| "step": 5665 | |
| }, | |
| { | |
| "epoch": 3.68900455432661, | |
| "grad_norm": 3.022547960281372, | |
| "learning_rate": 8.327045089356663e-06, | |
| "loss": 0.6202, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 3.6922576447625244, | |
| "grad_norm": 1.7190786600112915, | |
| "learning_rate": 8.288619529072394e-06, | |
| "loss": 0.6136, | |
| "step": 5675 | |
| }, | |
| { | |
| "epoch": 3.6955107351984386, | |
| "grad_norm": 1.8883839845657349, | |
| "learning_rate": 8.250265206657025e-06, | |
| "loss": 0.626, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 3.6987638256343525, | |
| "grad_norm": 1.216133952140808, | |
| "learning_rate": 8.211982285608721e-06, | |
| "loss": 0.6084, | |
| "step": 5685 | |
| }, | |
| { | |
| "epoch": 3.7020169160702667, | |
| "grad_norm": 1.4318759441375732, | |
| "learning_rate": 8.17377092912128e-06, | |
| "loss": 0.6252, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 3.705270006506181, | |
| "grad_norm": 1.3429824113845825, | |
| "learning_rate": 8.135631300083448e-06, | |
| "loss": 0.6421, | |
| "step": 5695 | |
| }, | |
| { | |
| "epoch": 3.708523096942095, | |
| "grad_norm": 1.563573956489563, | |
| "learning_rate": 8.097563561078193e-06, | |
| "loss": 0.6426, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 3.711776187378009, | |
| "grad_norm": 1.3186182975769043, | |
| "learning_rate": 8.059567874382023e-06, | |
| "loss": 0.6148, | |
| "step": 5705 | |
| }, | |
| { | |
| "epoch": 3.7150292778139233, | |
| "grad_norm": 1.4381370544433594, | |
| "learning_rate": 8.021644401964305e-06, | |
| "loss": 0.6206, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 3.718282368249837, | |
| "grad_norm": 1.6375632286071777, | |
| "learning_rate": 7.983793305486583e-06, | |
| "loss": 0.6169, | |
| "step": 5715 | |
| }, | |
| { | |
| "epoch": 3.7215354586857514, | |
| "grad_norm": 1.426100730895996, | |
| "learning_rate": 7.946014746301858e-06, | |
| "loss": 0.6299, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 3.7247885491216657, | |
| "grad_norm": 1.6016979217529297, | |
| "learning_rate": 7.908308885453908e-06, | |
| "loss": 0.6039, | |
| "step": 5725 | |
| }, | |
| { | |
| "epoch": 3.7280416395575795, | |
| "grad_norm": 1.8250033855438232, | |
| "learning_rate": 7.87067588367664e-06, | |
| "loss": 0.6375, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 3.7312947299934938, | |
| "grad_norm": 1.6048786640167236, | |
| "learning_rate": 7.833115901393347e-06, | |
| "loss": 0.6469, | |
| "step": 5735 | |
| }, | |
| { | |
| "epoch": 3.734547820429408, | |
| "grad_norm": 1.473156213760376, | |
| "learning_rate": 7.795629098716045e-06, | |
| "loss": 0.6291, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 3.737800910865322, | |
| "grad_norm": 1.4616464376449585, | |
| "learning_rate": 7.758215635444848e-06, | |
| "loss": 0.6418, | |
| "step": 5745 | |
| }, | |
| { | |
| "epoch": 3.741054001301236, | |
| "grad_norm": 1.3316526412963867, | |
| "learning_rate": 7.720875671067188e-06, | |
| "loss": 0.6052, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 3.7443070917371504, | |
| "grad_norm": 2.7276248931884766, | |
| "learning_rate": 7.683609364757192e-06, | |
| "loss": 0.6311, | |
| "step": 5755 | |
| }, | |
| { | |
| "epoch": 3.747560182173064, | |
| "grad_norm": 1.4057763814926147, | |
| "learning_rate": 7.646416875374992e-06, | |
| "loss": 0.6262, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 3.7508132726089785, | |
| "grad_norm": 1.7808401584625244, | |
| "learning_rate": 7.609298361466083e-06, | |
| "loss": 0.6372, | |
| "step": 5765 | |
| }, | |
| { | |
| "epoch": 3.7540663630448927, | |
| "grad_norm": 1.5597418546676636, | |
| "learning_rate": 7.572253981260571e-06, | |
| "loss": 0.6181, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 3.7573194534808065, | |
| "grad_norm": 1.6378741264343262, | |
| "learning_rate": 7.535283892672562e-06, | |
| "loss": 0.6247, | |
| "step": 5775 | |
| }, | |
| { | |
| "epoch": 3.760572543916721, | |
| "grad_norm": 2.498858690261841, | |
| "learning_rate": 7.498388253299482e-06, | |
| "loss": 0.643, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 3.763825634352635, | |
| "grad_norm": 1.9484217166900635, | |
| "learning_rate": 7.46156722042137e-06, | |
| "loss": 0.6223, | |
| "step": 5785 | |
| }, | |
| { | |
| "epoch": 3.767078724788549, | |
| "grad_norm": 1.3782168626785278, | |
| "learning_rate": 7.424820951000233e-06, | |
| "loss": 0.6148, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 3.770331815224463, | |
| "grad_norm": 1.3748527765274048, | |
| "learning_rate": 7.388149601679392e-06, | |
| "loss": 0.6242, | |
| "step": 5795 | |
| }, | |
| { | |
| "epoch": 3.7735849056603774, | |
| "grad_norm": 1.4963568449020386, | |
| "learning_rate": 7.351553328782779e-06, | |
| "loss": 0.6014, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 3.7768379960962912, | |
| "grad_norm": 1.708061695098877, | |
| "learning_rate": 7.31503228831428e-06, | |
| "loss": 0.6154, | |
| "step": 5805 | |
| }, | |
| { | |
| "epoch": 3.7800910865322055, | |
| "grad_norm": 1.8436424732208252, | |
| "learning_rate": 7.278586635957107e-06, | |
| "loss": 0.6263, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 3.7833441769681198, | |
| "grad_norm": 1.9801384210586548, | |
| "learning_rate": 7.242216527073079e-06, | |
| "loss": 0.5955, | |
| "step": 5815 | |
| }, | |
| { | |
| "epoch": 3.7865972674040336, | |
| "grad_norm": 1.4177374839782715, | |
| "learning_rate": 7.205922116701985e-06, | |
| "loss": 0.6255, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 3.789850357839948, | |
| "grad_norm": 1.4929031133651733, | |
| "learning_rate": 7.169703559560953e-06, | |
| "loss": 0.6046, | |
| "step": 5825 | |
| }, | |
| { | |
| "epoch": 3.793103448275862, | |
| "grad_norm": 2.4425814151763916, | |
| "learning_rate": 7.133561010043724e-06, | |
| "loss": 0.6072, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 3.796356538711776, | |
| "grad_norm": 1.5860954523086548, | |
| "learning_rate": 7.097494622220049e-06, | |
| "loss": 0.6173, | |
| "step": 5835 | |
| }, | |
| { | |
| "epoch": 3.79960962914769, | |
| "grad_norm": 1.4166280031204224, | |
| "learning_rate": 7.0615045498350215e-06, | |
| "loss": 0.5985, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 3.8028627195836044, | |
| "grad_norm": 1.7926712036132812, | |
| "learning_rate": 7.025590946308402e-06, | |
| "loss": 0.6077, | |
| "step": 5845 | |
| }, | |
| { | |
| "epoch": 3.8061158100195187, | |
| "grad_norm": 1.411357045173645, | |
| "learning_rate": 6.9897539647339725e-06, | |
| "loss": 0.6126, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 3.8093689004554325, | |
| "grad_norm": 1.4378728866577148, | |
| "learning_rate": 6.95399375787891e-06, | |
| "loss": 0.6217, | |
| "step": 5855 | |
| }, | |
| { | |
| "epoch": 3.812621990891347, | |
| "grad_norm": 1.630339503288269, | |
| "learning_rate": 6.918310478183093e-06, | |
| "loss": 0.6081, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 3.815875081327261, | |
| "grad_norm": 1.4536669254302979, | |
| "learning_rate": 6.882704277758475e-06, | |
| "loss": 0.631, | |
| "step": 5865 | |
| }, | |
| { | |
| "epoch": 3.819128171763175, | |
| "grad_norm": 1.369432806968689, | |
| "learning_rate": 6.847175308388451e-06, | |
| "loss": 0.6023, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 3.822381262199089, | |
| "grad_norm": 1.8251979351043701, | |
| "learning_rate": 6.811723721527161e-06, | |
| "loss": 0.6088, | |
| "step": 5875 | |
| }, | |
| { | |
| "epoch": 3.8256343526350034, | |
| "grad_norm": 1.4121100902557373, | |
| "learning_rate": 6.776349668298912e-06, | |
| "loss": 0.6393, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 3.828887443070917, | |
| "grad_norm": 1.4803780317306519, | |
| "learning_rate": 6.741053299497468e-06, | |
| "loss": 0.601, | |
| "step": 5885 | |
| }, | |
| { | |
| "epoch": 3.8321405335068315, | |
| "grad_norm": 1.5110501050949097, | |
| "learning_rate": 6.705834765585459e-06, | |
| "loss": 0.6299, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 3.8353936239427457, | |
| "grad_norm": 1.8608803749084473, | |
| "learning_rate": 6.670694216693701e-06, | |
| "loss": 0.6394, | |
| "step": 5895 | |
| }, | |
| { | |
| "epoch": 3.8386467143786596, | |
| "grad_norm": 1.4101976156234741, | |
| "learning_rate": 6.635631802620576e-06, | |
| "loss": 0.6149, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 3.841899804814574, | |
| "grad_norm": 1.5235905647277832, | |
| "learning_rate": 6.600647672831406e-06, | |
| "loss": 0.6377, | |
| "step": 5905 | |
| }, | |
| { | |
| "epoch": 3.845152895250488, | |
| "grad_norm": 2.4760963916778564, | |
| "learning_rate": 6.565741976457782e-06, | |
| "loss": 0.6315, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 3.8484059856864024, | |
| "grad_norm": 1.4764820337295532, | |
| "learning_rate": 6.530914862296947e-06, | |
| "loss": 0.6148, | |
| "step": 5915 | |
| }, | |
| { | |
| "epoch": 3.851659076122316, | |
| "grad_norm": 1.408517599105835, | |
| "learning_rate": 6.496166478811164e-06, | |
| "loss": 0.629, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 3.8549121665582304, | |
| "grad_norm": 2.276674509048462, | |
| "learning_rate": 6.461496974127093e-06, | |
| "loss": 0.613, | |
| "step": 5925 | |
| }, | |
| { | |
| "epoch": 3.8581652569941447, | |
| "grad_norm": 1.5643647909164429, | |
| "learning_rate": 6.426906496035129e-06, | |
| "loss": 0.6063, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 3.8614183474300585, | |
| "grad_norm": 1.3531688451766968, | |
| "learning_rate": 6.39239519198879e-06, | |
| "loss": 0.6135, | |
| "step": 5935 | |
| }, | |
| { | |
| "epoch": 3.864671437865973, | |
| "grad_norm": 1.4261928796768188, | |
| "learning_rate": 6.357963209104106e-06, | |
| "loss": 0.6206, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 3.867924528301887, | |
| "grad_norm": 1.3013157844543457, | |
| "learning_rate": 6.32361069415896e-06, | |
| "loss": 0.6153, | |
| "step": 5945 | |
| }, | |
| { | |
| "epoch": 3.871177618737801, | |
| "grad_norm": 1.520578145980835, | |
| "learning_rate": 6.289337793592468e-06, | |
| "loss": 0.629, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 3.874430709173715, | |
| "grad_norm": 1.5987921953201294, | |
| "learning_rate": 6.255144653504382e-06, | |
| "loss": 0.645, | |
| "step": 5955 | |
| }, | |
| { | |
| "epoch": 3.8776837996096294, | |
| "grad_norm": 2.1227879524230957, | |
| "learning_rate": 6.221031419654444e-06, | |
| "loss": 0.6333, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 3.880936890045543, | |
| "grad_norm": 1.5177706480026245, | |
| "learning_rate": 6.1869982374617495e-06, | |
| "loss": 0.629, | |
| "step": 5965 | |
| }, | |
| { | |
| "epoch": 3.8841899804814575, | |
| "grad_norm": 1.3354036808013916, | |
| "learning_rate": 6.153045252004177e-06, | |
| "loss": 0.6055, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 3.8874430709173717, | |
| "grad_norm": 1.8337645530700684, | |
| "learning_rate": 6.119172608017718e-06, | |
| "loss": 0.623, | |
| "step": 5975 | |
| }, | |
| { | |
| "epoch": 3.8906961613532856, | |
| "grad_norm": 1.2876662015914917, | |
| "learning_rate": 6.08538044989588e-06, | |
| "loss": 0.6064, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 3.8939492517892, | |
| "grad_norm": 1.3676327466964722, | |
| "learning_rate": 6.051668921689094e-06, | |
| "loss": 0.6219, | |
| "step": 5985 | |
| }, | |
| { | |
| "epoch": 3.897202342225114, | |
| "grad_norm": 1.5804736614227295, | |
| "learning_rate": 6.0180381671040596e-06, | |
| "loss": 0.6135, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 3.900455432661028, | |
| "grad_norm": 2.2858810424804688, | |
| "learning_rate": 5.9844883295031515e-06, | |
| "loss": 0.6393, | |
| "step": 5995 | |
| }, | |
| { | |
| "epoch": 3.903708523096942, | |
| "grad_norm": 1.8066788911819458, | |
| "learning_rate": 5.9510195519038245e-06, | |
| "loss": 0.6056, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.9069616135328564, | |
| "grad_norm": 1.3947362899780273, | |
| "learning_rate": 5.917631976977975e-06, | |
| "loss": 0.6138, | |
| "step": 6005 | |
| }, | |
| { | |
| "epoch": 3.9102147039687702, | |
| "grad_norm": 1.551949381828308, | |
| "learning_rate": 5.884325747051336e-06, | |
| "loss": 0.614, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 3.9134677944046845, | |
| "grad_norm": 1.3901867866516113, | |
| "learning_rate": 5.851101004102907e-06, | |
| "loss": 0.6375, | |
| "step": 6015 | |
| }, | |
| { | |
| "epoch": 3.9167208848405988, | |
| "grad_norm": 1.4056464433670044, | |
| "learning_rate": 5.817957889764308e-06, | |
| "loss": 0.6141, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 3.9199739752765126, | |
| "grad_norm": 1.499922752380371, | |
| "learning_rate": 5.784896545319187e-06, | |
| "loss": 0.6074, | |
| "step": 6025 | |
| }, | |
| { | |
| "epoch": 3.923227065712427, | |
| "grad_norm": 1.2578163146972656, | |
| "learning_rate": 5.751917111702612e-06, | |
| "loss": 0.6143, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 3.926480156148341, | |
| "grad_norm": 1.2877789735794067, | |
| "learning_rate": 5.719019729500508e-06, | |
| "loss": 0.5956, | |
| "step": 6035 | |
| }, | |
| { | |
| "epoch": 3.929733246584255, | |
| "grad_norm": 1.576788067817688, | |
| "learning_rate": 5.686204538948997e-06, | |
| "loss": 0.6141, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 3.932986337020169, | |
| "grad_norm": 1.8292930126190186, | |
| "learning_rate": 5.653471679933839e-06, | |
| "loss": 0.5909, | |
| "step": 6045 | |
| }, | |
| { | |
| "epoch": 3.9362394274560835, | |
| "grad_norm": 1.5432319641113281, | |
| "learning_rate": 5.62082129198985e-06, | |
| "loss": 0.6199, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 3.9394925178919973, | |
| "grad_norm": 1.739689826965332, | |
| "learning_rate": 5.58825351430026e-06, | |
| "loss": 0.6035, | |
| "step": 6055 | |
| }, | |
| { | |
| "epoch": 3.9427456083279115, | |
| "grad_norm": 1.3205852508544922, | |
| "learning_rate": 5.555768485696144e-06, | |
| "loss": 0.6169, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 3.945998698763826, | |
| "grad_norm": 1.6433742046356201, | |
| "learning_rate": 5.523366344655856e-06, | |
| "loss": 0.6404, | |
| "step": 6065 | |
| }, | |
| { | |
| "epoch": 3.9492517891997396, | |
| "grad_norm": 1.6137924194335938, | |
| "learning_rate": 5.491047229304397e-06, | |
| "loss": 0.6219, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 3.952504879635654, | |
| "grad_norm": 1.5387951135635376, | |
| "learning_rate": 5.4588112774128314e-06, | |
| "loss": 0.5937, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 3.955757970071568, | |
| "grad_norm": 1.4663158655166626, | |
| "learning_rate": 5.42665862639774e-06, | |
| "loss": 0.6066, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 3.959011060507482, | |
| "grad_norm": 4.082248210906982, | |
| "learning_rate": 5.394589413320589e-06, | |
| "loss": 0.6311, | |
| "step": 6085 | |
| }, | |
| { | |
| "epoch": 3.9622641509433962, | |
| "grad_norm": 1.4563738107681274, | |
| "learning_rate": 5.3626037748871565e-06, | |
| "loss": 0.6142, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 3.9655172413793105, | |
| "grad_norm": 1.569101095199585, | |
| "learning_rate": 5.330701847446962e-06, | |
| "loss": 0.6014, | |
| "step": 6095 | |
| }, | |
| { | |
| "epoch": 3.9687703318152243, | |
| "grad_norm": 1.567270278930664, | |
| "learning_rate": 5.29888376699269e-06, | |
| "loss": 0.6155, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 3.9720234222511386, | |
| "grad_norm": 1.668445110321045, | |
| "learning_rate": 5.267149669159588e-06, | |
| "loss": 0.6171, | |
| "step": 6105 | |
| }, | |
| { | |
| "epoch": 3.975276512687053, | |
| "grad_norm": 1.7854609489440918, | |
| "learning_rate": 5.235499689224885e-06, | |
| "loss": 0.6135, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 3.9785296031229667, | |
| "grad_norm": 1.8517600297927856, | |
| "learning_rate": 5.203933962107266e-06, | |
| "loss": 0.6207, | |
| "step": 6115 | |
| }, | |
| { | |
| "epoch": 3.981782693558881, | |
| "grad_norm": 1.5116204023361206, | |
| "learning_rate": 5.172452622366228e-06, | |
| "loss": 0.614, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 3.985035783994795, | |
| "grad_norm": 1.4917980432510376, | |
| "learning_rate": 5.141055804201541e-06, | |
| "loss": 0.6118, | |
| "step": 6125 | |
| }, | |
| { | |
| "epoch": 3.988288874430709, | |
| "grad_norm": 1.527981162071228, | |
| "learning_rate": 5.109743641452699e-06, | |
| "loss": 0.6083, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 3.9915419648666233, | |
| "grad_norm": 1.3188831806182861, | |
| "learning_rate": 5.078516267598299e-06, | |
| "loss": 0.6141, | |
| "step": 6135 | |
| }, | |
| { | |
| "epoch": 3.9947950553025375, | |
| "grad_norm": 1.4134242534637451, | |
| "learning_rate": 5.047373815755496e-06, | |
| "loss": 0.6234, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 3.9980481457384514, | |
| "grad_norm": 1.5778809785842896, | |
| "learning_rate": 5.016316418679454e-06, | |
| "loss": 0.6177, | |
| "step": 6145 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_f1": 0.7989837428748611, | |
| "eval_loss": 0.491455078125, | |
| "eval_precision": 0.7989192926261178, | |
| "eval_recall": 0.7990541428374994, | |
| "eval_runtime": 238.1189, | |
| "eval_samples_per_second": 1652.263, | |
| "eval_steps_per_second": 1.617, | |
| "step": 6148 | |
| }, | |
| { | |
| "epoch": 4.001301236174366, | |
| "grad_norm": 1.376760721206665, | |
| "learning_rate": 4.985344208762757e-06, | |
| "loss": 0.5954, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 4.00455432661028, | |
| "grad_norm": 1.2846732139587402, | |
| "learning_rate": 4.954457318034841e-06, | |
| "loss": 0.533, | |
| "step": 6155 | |
| }, | |
| { | |
| "epoch": 4.007807417046194, | |
| "grad_norm": 1.16463303565979, | |
| "learning_rate": 4.92365587816144e-06, | |
| "loss": 0.533, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 4.011060507482108, | |
| "grad_norm": 1.4882513284683228, | |
| "learning_rate": 4.892940020444043e-06, | |
| "loss": 0.5236, | |
| "step": 6165 | |
| }, | |
| { | |
| "epoch": 4.014313597918022, | |
| "grad_norm": 3.275876998901367, | |
| "learning_rate": 4.862309875819299e-06, | |
| "loss": 0.5213, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 4.017566688353936, | |
| "grad_norm": 1.5742096900939941, | |
| "learning_rate": 4.837867561302392e-06, | |
| "loss": 0.5295, | |
| "step": 6175 | |
| }, | |
| { | |
| "epoch": 4.020819778789851, | |
| "grad_norm": 5.1677422523498535, | |
| "learning_rate": 4.807392029038138e-06, | |
| "loss": 0.5301, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 4.024072869225765, | |
| "grad_norm": 1.7716647386550903, | |
| "learning_rate": 4.77700257454356e-06, | |
| "loss": 0.5366, | |
| "step": 6185 | |
| }, | |
| { | |
| "epoch": 4.027325959661678, | |
| "grad_norm": 1.8003216981887817, | |
| "learning_rate": 4.746699327363918e-06, | |
| "loss": 0.5209, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 4.030579050097593, | |
| "grad_norm": 1.7417036294937134, | |
| "learning_rate": 4.7164824166769735e-06, | |
| "loss": 0.5335, | |
| "step": 6195 | |
| }, | |
| { | |
| "epoch": 4.033832140533507, | |
| "grad_norm": 1.7009021043777466, | |
| "learning_rate": 4.686351971292443e-06, | |
| "loss": 0.5222, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 4.037085230969421, | |
| "grad_norm": 2.0051186084747314, | |
| "learning_rate": 4.6563081196514786e-06, | |
| "loss": 0.5516, | |
| "step": 6205 | |
| }, | |
| { | |
| "epoch": 4.040338321405335, | |
| "grad_norm": 1.5723603963851929, | |
| "learning_rate": 4.626350989826075e-06, | |
| "loss": 0.5263, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 4.043591411841249, | |
| "grad_norm": 1.8875335454940796, | |
| "learning_rate": 4.596480709518547e-06, | |
| "loss": 0.5346, | |
| "step": 6215 | |
| }, | |
| { | |
| "epoch": 4.046844502277163, | |
| "grad_norm": 1.5543326139450073, | |
| "learning_rate": 4.566697406061005e-06, | |
| "loss": 0.5344, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 4.050097592713078, | |
| "grad_norm": 1.6131196022033691, | |
| "learning_rate": 4.53700120641477e-06, | |
| "loss": 0.5318, | |
| "step": 6225 | |
| }, | |
| { | |
| "epoch": 4.053350683148992, | |
| "grad_norm": 1.3502036333084106, | |
| "learning_rate": 4.5073922371698554e-06, | |
| "loss": 0.5234, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 4.056603773584905, | |
| "grad_norm": 2.2002179622650146, | |
| "learning_rate": 4.4778706245444475e-06, | |
| "loss": 0.5422, | |
| "step": 6235 | |
| }, | |
| { | |
| "epoch": 4.05985686402082, | |
| "grad_norm": 1.62948477268219, | |
| "learning_rate": 4.44843649438432e-06, | |
| "loss": 0.5136, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 4.063109954456734, | |
| "grad_norm": 1.563274621963501, | |
| "learning_rate": 4.419089972162327e-06, | |
| "loss": 0.5087, | |
| "step": 6245 | |
| }, | |
| { | |
| "epoch": 4.066363044892648, | |
| "grad_norm": 1.5413563251495361, | |
| "learning_rate": 4.389831182977882e-06, | |
| "loss": 0.535, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 4.0696161353285625, | |
| "grad_norm": 1.6265994310379028, | |
| "learning_rate": 4.360660251556395e-06, | |
| "loss": 0.5291, | |
| "step": 6255 | |
| }, | |
| { | |
| "epoch": 4.072869225764476, | |
| "grad_norm": 1.6212644577026367, | |
| "learning_rate": 4.331577302248746e-06, | |
| "loss": 0.5165, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 4.07612231620039, | |
| "grad_norm": 1.5618913173675537, | |
| "learning_rate": 4.302582459030769e-06, | |
| "loss": 0.5301, | |
| "step": 6265 | |
| }, | |
| { | |
| "epoch": 4.079375406636305, | |
| "grad_norm": 1.7876514196395874, | |
| "learning_rate": 4.273675845502722e-06, | |
| "loss": 0.5282, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 4.082628497072219, | |
| "grad_norm": 1.6155240535736084, | |
| "learning_rate": 4.244857584888748e-06, | |
| "loss": 0.5219, | |
| "step": 6275 | |
| }, | |
| { | |
| "epoch": 4.0858815875081325, | |
| "grad_norm": 1.826150894165039, | |
| "learning_rate": 4.2161278000363456e-06, | |
| "loss": 0.5254, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 4.089134677944047, | |
| "grad_norm": 1.569254755973816, | |
| "learning_rate": 4.187486613415878e-06, | |
| "loss": 0.5563, | |
| "step": 6285 | |
| }, | |
| { | |
| "epoch": 4.092387768379961, | |
| "grad_norm": 1.651341438293457, | |
| "learning_rate": 4.158934147120019e-06, | |
| "loss": 0.5196, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 4.095640858815875, | |
| "grad_norm": 1.960835337638855, | |
| "learning_rate": 4.130470522863231e-06, | |
| "loss": 0.5233, | |
| "step": 6295 | |
| }, | |
| { | |
| "epoch": 4.0988939492517895, | |
| "grad_norm": 1.762459397315979, | |
| "learning_rate": 4.102095861981275e-06, | |
| "loss": 0.5101, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 4.102147039687703, | |
| "grad_norm": 1.7269344329833984, | |
| "learning_rate": 4.073810285430668e-06, | |
| "loss": 0.5283, | |
| "step": 6305 | |
| }, | |
| { | |
| "epoch": 4.105400130123617, | |
| "grad_norm": 2.420794725418091, | |
| "learning_rate": 4.045613913788171e-06, | |
| "loss": 0.5168, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 4.108653220559532, | |
| "grad_norm": 1.5948150157928467, | |
| "learning_rate": 4.0175068672502784e-06, | |
| "loss": 0.535, | |
| "step": 6315 | |
| }, | |
| { | |
| "epoch": 4.111906310995446, | |
| "grad_norm": 2.1127867698669434, | |
| "learning_rate": 3.9894892656327235e-06, | |
| "loss": 0.5181, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 4.1151594014313595, | |
| "grad_norm": 2.1554746627807617, | |
| "learning_rate": 3.961561228369928e-06, | |
| "loss": 0.5314, | |
| "step": 6325 | |
| }, | |
| { | |
| "epoch": 4.118412491867274, | |
| "grad_norm": 1.7790179252624512, | |
| "learning_rate": 3.933722874514526e-06, | |
| "loss": 0.5327, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 4.121665582303188, | |
| "grad_norm": 1.5885546207427979, | |
| "learning_rate": 3.905974322736849e-06, | |
| "loss": 0.5221, | |
| "step": 6335 | |
| }, | |
| { | |
| "epoch": 4.124918672739102, | |
| "grad_norm": 1.4991848468780518, | |
| "learning_rate": 3.878315691324416e-06, | |
| "loss": 0.5134, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 4.1281717631750166, | |
| "grad_norm": 1.57703697681427, | |
| "learning_rate": 3.850747098181421e-06, | |
| "loss": 0.5239, | |
| "step": 6345 | |
| }, | |
| { | |
| "epoch": 4.13142485361093, | |
| "grad_norm": 3.0852479934692383, | |
| "learning_rate": 3.82326866082825e-06, | |
| "loss": 0.5216, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 4.134677944046844, | |
| "grad_norm": 1.6248340606689453, | |
| "learning_rate": 3.7958804964009692e-06, | |
| "loss": 0.5195, | |
| "step": 6355 | |
| }, | |
| { | |
| "epoch": 4.137931034482759, | |
| "grad_norm": 1.69948410987854, | |
| "learning_rate": 3.7685827216508124e-06, | |
| "loss": 0.507, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 4.141184124918673, | |
| "grad_norm": 1.6397584676742554, | |
| "learning_rate": 3.741375452943724e-06, | |
| "loss": 0.5353, | |
| "step": 6365 | |
| }, | |
| { | |
| "epoch": 4.1444372153545865, | |
| "grad_norm": 1.4918780326843262, | |
| "learning_rate": 3.714258806259807e-06, | |
| "loss": 0.5013, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 4.147690305790501, | |
| "grad_norm": 2.1283321380615234, | |
| "learning_rate": 3.6872328971928718e-06, | |
| "loss": 0.5289, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 4.150943396226415, | |
| "grad_norm": 2.7849512100219727, | |
| "learning_rate": 3.660297840949933e-06, | |
| "loss": 0.5289, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 4.154196486662329, | |
| "grad_norm": 1.7255409955978394, | |
| "learning_rate": 3.633453752350707e-06, | |
| "loss": 0.5174, | |
| "step": 6385 | |
| }, | |
| { | |
| "epoch": 4.157449577098244, | |
| "grad_norm": 1.7871309518814087, | |
| "learning_rate": 3.606700745827127e-06, | |
| "loss": 0.5231, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 4.160702667534157, | |
| "grad_norm": 1.5307867527008057, | |
| "learning_rate": 3.5800389354228748e-06, | |
| "loss": 0.524, | |
| "step": 6395 | |
| }, | |
| { | |
| "epoch": 4.163955757970071, | |
| "grad_norm": 1.9164159297943115, | |
| "learning_rate": 3.553468434792859e-06, | |
| "loss": 0.5321, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 4.167208848405986, | |
| "grad_norm": 1.539781093597412, | |
| "learning_rate": 3.526989357202756e-06, | |
| "loss": 0.5223, | |
| "step": 6405 | |
| }, | |
| { | |
| "epoch": 4.1704619388419, | |
| "grad_norm": 1.5751947164535522, | |
| "learning_rate": 3.5006018155285286e-06, | |
| "loss": 0.5302, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 4.173715029277814, | |
| "grad_norm": 1.7798151969909668, | |
| "learning_rate": 3.4743059222559298e-06, | |
| "loss": 0.5295, | |
| "step": 6415 | |
| }, | |
| { | |
| "epoch": 4.176968119713728, | |
| "grad_norm": 2.035566568374634, | |
| "learning_rate": 3.448101789480024e-06, | |
| "loss": 0.5249, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 4.180221210149642, | |
| "grad_norm": 1.6014204025268555, | |
| "learning_rate": 3.4219895289047317e-06, | |
| "loss": 0.5236, | |
| "step": 6425 | |
| }, | |
| { | |
| "epoch": 4.183474300585556, | |
| "grad_norm": 1.9151594638824463, | |
| "learning_rate": 3.395969251842329e-06, | |
| "loss": 0.5146, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 4.186727391021471, | |
| "grad_norm": 1.543568730354309, | |
| "learning_rate": 3.3700410692129815e-06, | |
| "loss": 0.518, | |
| "step": 6435 | |
| }, | |
| { | |
| "epoch": 4.189980481457384, | |
| "grad_norm": 1.5983526706695557, | |
| "learning_rate": 3.3442050915442615e-06, | |
| "loss": 0.5047, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 4.193233571893298, | |
| "grad_norm": 1.5908766984939575, | |
| "learning_rate": 3.318461428970707e-06, | |
| "loss": 0.5273, | |
| "step": 6445 | |
| }, | |
| { | |
| "epoch": 4.196486662329213, | |
| "grad_norm": 1.7272975444793701, | |
| "learning_rate": 3.2928101912333197e-06, | |
| "loss": 0.5143, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 4.199739752765127, | |
| "grad_norm": 1.6854057312011719, | |
| "learning_rate": 3.2672514876791044e-06, | |
| "loss": 0.5412, | |
| "step": 6455 | |
| }, | |
| { | |
| "epoch": 4.202992843201041, | |
| "grad_norm": 1.7159767150878906, | |
| "learning_rate": 3.2417854272606212e-06, | |
| "loss": 0.5328, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 4.206245933636955, | |
| "grad_norm": 2.0293431282043457, | |
| "learning_rate": 3.2164121185355026e-06, | |
| "loss": 0.5207, | |
| "step": 6465 | |
| }, | |
| { | |
| "epoch": 4.209499024072869, | |
| "grad_norm": 1.4942529201507568, | |
| "learning_rate": 3.1911316696659837e-06, | |
| "loss": 0.5098, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 4.212752114508783, | |
| "grad_norm": 1.5757249593734741, | |
| "learning_rate": 3.165944188418474e-06, | |
| "loss": 0.5075, | |
| "step": 6475 | |
| }, | |
| { | |
| "epoch": 4.216005204944698, | |
| "grad_norm": 1.6114063262939453, | |
| "learning_rate": 3.140849782163066e-06, | |
| "loss": 0.5283, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 4.2192582953806115, | |
| "grad_norm": 1.791574478149414, | |
| "learning_rate": 3.1158485578730883e-06, | |
| "loss": 0.5116, | |
| "step": 6485 | |
| }, | |
| { | |
| "epoch": 4.222511385816525, | |
| "grad_norm": 1.4832271337509155, | |
| "learning_rate": 3.090940622124644e-06, | |
| "loss": 0.5187, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 4.22576447625244, | |
| "grad_norm": 1.5384358167648315, | |
| "learning_rate": 3.066126081096185e-06, | |
| "loss": 0.5158, | |
| "step": 6495 | |
| }, | |
| { | |
| "epoch": 4.229017566688354, | |
| "grad_norm": 1.766423225402832, | |
| "learning_rate": 3.0414050405680155e-06, | |
| "loss": 0.5196, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 4.232270657124268, | |
| "grad_norm": 2.07438325881958, | |
| "learning_rate": 3.016777605921861e-06, | |
| "loss": 0.5062, | |
| "step": 6505 | |
| }, | |
| { | |
| "epoch": 4.235523747560182, | |
| "grad_norm": 4.485304355621338, | |
| "learning_rate": 2.9922438821404415e-06, | |
| "loss": 0.4975, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 4.238776837996096, | |
| "grad_norm": 1.6027443408966064, | |
| "learning_rate": 2.9678039738069845e-06, | |
| "loss": 0.5211, | |
| "step": 6515 | |
| }, | |
| { | |
| "epoch": 4.24202992843201, | |
| "grad_norm": 2.2789571285247803, | |
| "learning_rate": 2.9434579851047973e-06, | |
| "loss": 0.5084, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 4.245283018867925, | |
| "grad_norm": 1.481426477432251, | |
| "learning_rate": 2.919206019816842e-06, | |
| "loss": 0.5417, | |
| "step": 6525 | |
| }, | |
| { | |
| "epoch": 4.2485361093038385, | |
| "grad_norm": 1.6203233003616333, | |
| "learning_rate": 2.895048181325252e-06, | |
| "loss": 0.5114, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 4.251789199739752, | |
| "grad_norm": 1.5848479270935059, | |
| "learning_rate": 2.8709845726109243e-06, | |
| "loss": 0.5028, | |
| "step": 6535 | |
| }, | |
| { | |
| "epoch": 4.255042290175667, | |
| "grad_norm": 1.80342435836792, | |
| "learning_rate": 2.8470152962530723e-06, | |
| "loss": 0.5122, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 4.258295380611581, | |
| "grad_norm": 2.087617874145508, | |
| "learning_rate": 2.8231404544287796e-06, | |
| "loss": 0.506, | |
| "step": 6545 | |
| }, | |
| { | |
| "epoch": 4.261548471047496, | |
| "grad_norm": 1.7649626731872559, | |
| "learning_rate": 2.7993601489125693e-06, | |
| "loss": 0.5166, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 4.264801561483409, | |
| "grad_norm": 3.1642332077026367, | |
| "learning_rate": 2.7756744810759823e-06, | |
| "loss": 0.5107, | |
| "step": 6555 | |
| }, | |
| { | |
| "epoch": 4.268054651919323, | |
| "grad_norm": 1.9564752578735352, | |
| "learning_rate": 2.7520835518871302e-06, | |
| "loss": 0.5112, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 4.271307742355237, | |
| "grad_norm": 1.6043564081192017, | |
| "learning_rate": 2.7285874619102675e-06, | |
| "loss": 0.5084, | |
| "step": 6565 | |
| }, | |
| { | |
| "epoch": 4.274560832791152, | |
| "grad_norm": 1.9543806314468384, | |
| "learning_rate": 2.705186311305355e-06, | |
| "loss": 0.5135, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 4.2778139232270656, | |
| "grad_norm": 1.6966253519058228, | |
| "learning_rate": 2.6818801998276634e-06, | |
| "loss": 0.525, | |
| "step": 6575 | |
| }, | |
| { | |
| "epoch": 4.28106701366298, | |
| "grad_norm": 2.0935935974121094, | |
| "learning_rate": 2.658669226827315e-06, | |
| "loss": 0.5216, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 4.284320104098894, | |
| "grad_norm": 1.7863517999649048, | |
| "learning_rate": 2.6355534912488627e-06, | |
| "loss": 0.5271, | |
| "step": 6585 | |
| }, | |
| { | |
| "epoch": 4.287573194534808, | |
| "grad_norm": 1.611092448234558, | |
| "learning_rate": 2.612533091630903e-06, | |
| "loss": 0.5142, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 4.290826284970722, | |
| "grad_norm": 1.709322452545166, | |
| "learning_rate": 2.5896081261056138e-06, | |
| "loss": 0.5292, | |
| "step": 6595 | |
| }, | |
| { | |
| "epoch": 4.294079375406636, | |
| "grad_norm": 1.7398649454116821, | |
| "learning_rate": 2.5667786923983443e-06, | |
| "loss": 0.5253, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 4.29733246584255, | |
| "grad_norm": 1.5445489883422852, | |
| "learning_rate": 2.544044887827235e-06, | |
| "loss": 0.5443, | |
| "step": 6605 | |
| }, | |
| { | |
| "epoch": 4.300585556278465, | |
| "grad_norm": 1.763914704322815, | |
| "learning_rate": 2.5214068093027484e-06, | |
| "loss": 0.5301, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 4.303838646714379, | |
| "grad_norm": 2.1207916736602783, | |
| "learning_rate": 2.498864553327296e-06, | |
| "loss": 0.5351, | |
| "step": 6615 | |
| }, | |
| { | |
| "epoch": 4.307091737150293, | |
| "grad_norm": 1.8002142906188965, | |
| "learning_rate": 2.4764182159948133e-06, | |
| "loss": 0.5043, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 4.310344827586207, | |
| "grad_norm": 1.4603972434997559, | |
| "learning_rate": 2.454067892990347e-06, | |
| "loss": 0.5032, | |
| "step": 6625 | |
| }, | |
| { | |
| "epoch": 4.313597918022121, | |
| "grad_norm": 1.6874291896820068, | |
| "learning_rate": 2.431813679589645e-06, | |
| "loss": 0.5232, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 4.316851008458035, | |
| "grad_norm": 1.7689220905303955, | |
| "learning_rate": 2.4096556706587726e-06, | |
| "loss": 0.5218, | |
| "step": 6635 | |
| }, | |
| { | |
| "epoch": 4.32010409889395, | |
| "grad_norm": 1.5644956827163696, | |
| "learning_rate": 2.387593960653675e-06, | |
| "loss": 0.5164, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 4.3233571893298635, | |
| "grad_norm": 2.199660301208496, | |
| "learning_rate": 2.3656286436197965e-06, | |
| "loss": 0.538, | |
| "step": 6645 | |
| }, | |
| { | |
| "epoch": 4.326610279765777, | |
| "grad_norm": 2.4460320472717285, | |
| "learning_rate": 2.343759813191676e-06, | |
| "loss": 0.5197, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 4.329863370201692, | |
| "grad_norm": 1.8965719938278198, | |
| "learning_rate": 2.3219875625925452e-06, | |
| "loss": 0.5399, | |
| "step": 6655 | |
| }, | |
| { | |
| "epoch": 4.333116460637606, | |
| "grad_norm": 1.7241499423980713, | |
| "learning_rate": 2.3003119846339293e-06, | |
| "loss": 0.514, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 4.33636955107352, | |
| "grad_norm": 1.776291847229004, | |
| "learning_rate": 2.27873317171525e-06, | |
| "loss": 0.5217, | |
| "step": 6665 | |
| }, | |
| { | |
| "epoch": 4.339622641509434, | |
| "grad_norm": 1.6230307817459106, | |
| "learning_rate": 2.25725121582345e-06, | |
| "loss": 0.5208, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 4.342875731945348, | |
| "grad_norm": 1.5767405033111572, | |
| "learning_rate": 2.2358662085325723e-06, | |
| "loss": 0.5064, | |
| "step": 6675 | |
| }, | |
| { | |
| "epoch": 4.346128822381262, | |
| "grad_norm": 1.785072922706604, | |
| "learning_rate": 2.2145782410033844e-06, | |
| "loss": 0.5195, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 4.349381912817177, | |
| "grad_norm": 2.802659034729004, | |
| "learning_rate": 2.1933874039830078e-06, | |
| "loss": 0.5178, | |
| "step": 6685 | |
| }, | |
| { | |
| "epoch": 4.3526350032530905, | |
| "grad_norm": 1.8929702043533325, | |
| "learning_rate": 2.172293787804483e-06, | |
| "loss": 0.5281, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 4.355888093689004, | |
| "grad_norm": 2.050996780395508, | |
| "learning_rate": 2.1512974823864414e-06, | |
| "loss": 0.5432, | |
| "step": 6695 | |
| }, | |
| { | |
| "epoch": 4.359141184124919, | |
| "grad_norm": 1.6718263626098633, | |
| "learning_rate": 2.130398577232673e-06, | |
| "loss": 0.5267, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 4.362394274560833, | |
| "grad_norm": 1.8539758920669556, | |
| "learning_rate": 2.109597161431784e-06, | |
| "loss": 0.5334, | |
| "step": 6705 | |
| }, | |
| { | |
| "epoch": 4.365647364996747, | |
| "grad_norm": 1.541066288948059, | |
| "learning_rate": 2.088893323656793e-06, | |
| "loss": 0.5235, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 4.368900455432661, | |
| "grad_norm": 1.5558756589889526, | |
| "learning_rate": 2.068287152164747e-06, | |
| "loss": 0.5157, | |
| "step": 6715 | |
| }, | |
| { | |
| "epoch": 4.372153545868575, | |
| "grad_norm": 1.825431227684021, | |
| "learning_rate": 2.0477787347963823e-06, | |
| "loss": 0.521, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 4.375406636304489, | |
| "grad_norm": 1.558396816253662, | |
| "learning_rate": 2.0273681589757063e-06, | |
| "loss": 0.5082, | |
| "step": 6725 | |
| }, | |
| { | |
| "epoch": 4.378659726740404, | |
| "grad_norm": 1.8559561967849731, | |
| "learning_rate": 2.007055511709646e-06, | |
| "loss": 0.526, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 4.3819128171763175, | |
| "grad_norm": 1.8222005367279053, | |
| "learning_rate": 1.986840879587687e-06, | |
| "loss": 0.522, | |
| "step": 6735 | |
| }, | |
| { | |
| "epoch": 4.385165907612231, | |
| "grad_norm": 4.778210639953613, | |
| "learning_rate": 1.966724348781479e-06, | |
| "loss": 0.5089, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 4.388418998048146, | |
| "grad_norm": 1.7374241352081299, | |
| "learning_rate": 1.9467060050444824e-06, | |
| "loss": 0.5166, | |
| "step": 6745 | |
| }, | |
| { | |
| "epoch": 4.39167208848406, | |
| "grad_norm": 1.846447467803955, | |
| "learning_rate": 1.9267859337116195e-06, | |
| "loss": 0.5255, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 4.394925178919974, | |
| "grad_norm": 1.6373209953308105, | |
| "learning_rate": 1.9069642196988757e-06, | |
| "loss": 0.5103, | |
| "step": 6755 | |
| }, | |
| { | |
| "epoch": 4.398178269355888, | |
| "grad_norm": 2.6573219299316406, | |
| "learning_rate": 1.8872409475029524e-06, | |
| "loss": 0.5192, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 4.401431359791802, | |
| "grad_norm": 3.289806365966797, | |
| "learning_rate": 1.8676162012009307e-06, | |
| "loss": 0.5195, | |
| "step": 6765 | |
| }, | |
| { | |
| "epoch": 4.404684450227716, | |
| "grad_norm": 2.3919076919555664, | |
| "learning_rate": 1.8480900644498756e-06, | |
| "loss": 0.5139, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 4.407937540663631, | |
| "grad_norm": 2.7541277408599854, | |
| "learning_rate": 1.8286626204864903e-06, | |
| "loss": 0.5285, | |
| "step": 6775 | |
| }, | |
| { | |
| "epoch": 4.411190631099545, | |
| "grad_norm": 2.060319423675537, | |
| "learning_rate": 1.8093339521267876e-06, | |
| "loss": 0.5211, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 4.414443721535458, | |
| "grad_norm": 1.9002997875213623, | |
| "learning_rate": 1.7901041417657027e-06, | |
| "loss": 0.5189, | |
| "step": 6785 | |
| }, | |
| { | |
| "epoch": 4.417696811971373, | |
| "grad_norm": 2.1053810119628906, | |
| "learning_rate": 1.7709732713767497e-06, | |
| "loss": 0.5107, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 4.420949902407287, | |
| "grad_norm": 1.6905279159545898, | |
| "learning_rate": 1.7519414225116937e-06, | |
| "loss": 0.5147, | |
| "step": 6795 | |
| }, | |
| { | |
| "epoch": 4.424202992843201, | |
| "grad_norm": 2.2751264572143555, | |
| "learning_rate": 1.733008676300177e-06, | |
| "loss": 0.5065, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 4.427456083279115, | |
| "grad_norm": 1.9138133525848389, | |
| "learning_rate": 1.7141751134493815e-06, | |
| "loss": 0.5144, | |
| "step": 6805 | |
| }, | |
| { | |
| "epoch": 4.430709173715029, | |
| "grad_norm": 1.75284743309021, | |
| "learning_rate": 1.6954408142436955e-06, | |
| "loss": 0.5164, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 4.433962264150943, | |
| "grad_norm": 1.6290788650512695, | |
| "learning_rate": 1.6768058585443585e-06, | |
| "loss": 0.5197, | |
| "step": 6815 | |
| }, | |
| { | |
| "epoch": 4.437215354586858, | |
| "grad_norm": 2.135432243347168, | |
| "learning_rate": 1.6582703257891214e-06, | |
| "loss": 0.5252, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 4.440468445022772, | |
| "grad_norm": 1.6389341354370117, | |
| "learning_rate": 1.63983429499191e-06, | |
| "loss": 0.5217, | |
| "step": 6825 | |
| }, | |
| { | |
| "epoch": 4.443721535458685, | |
| "grad_norm": 1.6227918863296509, | |
| "learning_rate": 1.6214978447425062e-06, | |
| "loss": 0.5178, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 4.4469746258946, | |
| "grad_norm": 1.907899022102356, | |
| "learning_rate": 1.603261053206176e-06, | |
| "loss": 0.5235, | |
| "step": 6835 | |
| }, | |
| { | |
| "epoch": 4.450227716330514, | |
| "grad_norm": 2.548617362976074, | |
| "learning_rate": 1.5851239981233639e-06, | |
| "loss": 0.5238, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 4.453480806766428, | |
| "grad_norm": 1.8666588068008423, | |
| "learning_rate": 1.5670867568093633e-06, | |
| "loss": 0.5378, | |
| "step": 6845 | |
| }, | |
| { | |
| "epoch": 4.4567338972023425, | |
| "grad_norm": 1.6732510328292847, | |
| "learning_rate": 1.5491494061539658e-06, | |
| "loss": 0.5101, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 4.459986987638256, | |
| "grad_norm": 1.560084342956543, | |
| "learning_rate": 1.5313120226211452e-06, | |
| "loss": 0.5318, | |
| "step": 6855 | |
| }, | |
| { | |
| "epoch": 4.46324007807417, | |
| "grad_norm": 4.673284530639648, | |
| "learning_rate": 1.5135746822487419e-06, | |
| "loss": 0.5279, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 4.466493168510085, | |
| "grad_norm": 1.600279450416565, | |
| "learning_rate": 1.4959374606481251e-06, | |
| "loss": 0.4943, | |
| "step": 6865 | |
| }, | |
| { | |
| "epoch": 4.469746258945999, | |
| "grad_norm": 2.073321580886841, | |
| "learning_rate": 1.4784004330038653e-06, | |
| "loss": 0.5204, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 4.4729993493819125, | |
| "grad_norm": 3.2433438301086426, | |
| "learning_rate": 1.4609636740734316e-06, | |
| "loss": 0.5174, | |
| "step": 6875 | |
| }, | |
| { | |
| "epoch": 4.476252439817827, | |
| "grad_norm": 2.53226637840271, | |
| "learning_rate": 1.4436272581868665e-06, | |
| "loss": 0.54, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 4.479505530253741, | |
| "grad_norm": 1.7645595073699951, | |
| "learning_rate": 1.4263912592464597e-06, | |
| "loss": 0.5271, | |
| "step": 6885 | |
| }, | |
| { | |
| "epoch": 4.482758620689655, | |
| "grad_norm": 1.7925113439559937, | |
| "learning_rate": 1.4092557507264375e-06, | |
| "loss": 0.5169, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 4.4860117111255695, | |
| "grad_norm": 2.9148597717285156, | |
| "learning_rate": 1.3922208056726644e-06, | |
| "loss": 0.525, | |
| "step": 6895 | |
| }, | |
| { | |
| "epoch": 4.489264801561483, | |
| "grad_norm": 3.2308194637298584, | |
| "learning_rate": 1.3752864967023105e-06, | |
| "loss": 0.5341, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 4.492517891997397, | |
| "grad_norm": 1.633375644683838, | |
| "learning_rate": 1.358452896003548e-06, | |
| "loss": 0.5249, | |
| "step": 6905 | |
| }, | |
| { | |
| "epoch": 4.495770982433312, | |
| "grad_norm": 1.7651923894882202, | |
| "learning_rate": 1.3417200753352538e-06, | |
| "loss": 0.5211, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 4.499024072869226, | |
| "grad_norm": 1.584030032157898, | |
| "learning_rate": 1.3250881060266952e-06, | |
| "loss": 0.5164, | |
| "step": 6915 | |
| }, | |
| { | |
| "epoch": 4.5022771633051395, | |
| "grad_norm": 2.4326541423797607, | |
| "learning_rate": 1.3085570589772168e-06, | |
| "loss": 0.5306, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 4.505530253741054, | |
| "grad_norm": 1.5874032974243164, | |
| "learning_rate": 1.2921270046559658e-06, | |
| "loss": 0.5374, | |
| "step": 6925 | |
| }, | |
| { | |
| "epoch": 4.508783344176968, | |
| "grad_norm": 2.053276300430298, | |
| "learning_rate": 1.2757980131015563e-06, | |
| "loss": 0.5294, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 4.512036434612883, | |
| "grad_norm": 1.5977790355682373, | |
| "learning_rate": 1.2595701539217963e-06, | |
| "loss": 0.515, | |
| "step": 6935 | |
| }, | |
| { | |
| "epoch": 4.5152895250487965, | |
| "grad_norm": 1.5569490194320679, | |
| "learning_rate": 1.2434434962933866e-06, | |
| "loss": 0.5178, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 4.51854261548471, | |
| "grad_norm": 1.8135985136032104, | |
| "learning_rate": 1.2274181089616172e-06, | |
| "loss": 0.5268, | |
| "step": 6945 | |
| }, | |
| { | |
| "epoch": 4.521795705920624, | |
| "grad_norm": 1.5852515697479248, | |
| "learning_rate": 1.2114940602400788e-06, | |
| "loss": 0.5192, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 4.525048796356539, | |
| "grad_norm": 2.1236679553985596, | |
| "learning_rate": 1.19567141801038e-06, | |
| "loss": 0.527, | |
| "step": 6955 | |
| }, | |
| { | |
| "epoch": 4.528301886792453, | |
| "grad_norm": 2.562978744506836, | |
| "learning_rate": 1.1799502497218368e-06, | |
| "loss": 0.5379, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 4.531554977228367, | |
| "grad_norm": 1.632822871208191, | |
| "learning_rate": 1.164330622391213e-06, | |
| "loss": 0.5162, | |
| "step": 6965 | |
| }, | |
| { | |
| "epoch": 4.534808067664281, | |
| "grad_norm": 2.966524124145508, | |
| "learning_rate": 1.1488126026024087e-06, | |
| "loss": 0.5399, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 4.538061158100195, | |
| "grad_norm": 1.8411732912063599, | |
| "learning_rate": 1.1333962565061973e-06, | |
| "loss": 0.5232, | |
| "step": 6975 | |
| }, | |
| { | |
| "epoch": 4.541314248536109, | |
| "grad_norm": 1.5464459657669067, | |
| "learning_rate": 1.118081649819927e-06, | |
| "loss": 0.5168, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 4.544567338972024, | |
| "grad_norm": 1.7210750579833984, | |
| "learning_rate": 1.1028688478272459e-06, | |
| "loss": 0.5327, | |
| "step": 6985 | |
| }, | |
| { | |
| "epoch": 4.547820429407937, | |
| "grad_norm": 2.1294288635253906, | |
| "learning_rate": 1.0877579153778323e-06, | |
| "loss": 0.4963, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 4.551073519843852, | |
| "grad_norm": 1.5896108150482178, | |
| "learning_rate": 1.0727489168871092e-06, | |
| "loss": 0.537, | |
| "step": 6995 | |
| }, | |
| { | |
| "epoch": 4.554326610279766, | |
| "grad_norm": 1.6593022346496582, | |
| "learning_rate": 1.0578419163359666e-06, | |
| "loss": 0.5164, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 4.55757970071568, | |
| "grad_norm": 1.6132862567901611, | |
| "learning_rate": 1.0430369772705034e-06, | |
| "loss": 0.5246, | |
| "step": 7005 | |
| }, | |
| { | |
| "epoch": 4.560832791151594, | |
| "grad_norm": 1.6968963146209717, | |
| "learning_rate": 1.028334162801739e-06, | |
| "loss": 0.5169, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 4.564085881587508, | |
| "grad_norm": 3.422121524810791, | |
| "learning_rate": 1.0137335356053545e-06, | |
| "loss": 0.5306, | |
| "step": 7015 | |
| }, | |
| { | |
| "epoch": 4.567338972023422, | |
| "grad_norm": 2.2838146686553955, | |
| "learning_rate": 9.99235157921427e-07, | |
| "loss": 0.536, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 4.570592062459337, | |
| "grad_norm": 1.923091173171997, | |
| "learning_rate": 9.8483909155416e-07, | |
| "loss": 0.5165, | |
| "step": 7025 | |
| }, | |
| { | |
| "epoch": 4.573845152895251, | |
| "grad_norm": 1.5500158071517944, | |
| "learning_rate": 9.705453978716112e-07, | |
| "loss": 0.5086, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 4.577098243331164, | |
| "grad_norm": 1.948114037513733, | |
| "learning_rate": 9.56354137805457e-07, | |
| "loss": 0.5262, | |
| "step": 7035 | |
| }, | |
| { | |
| "epoch": 4.580351333767078, | |
| "grad_norm": 2.5097603797912598, | |
| "learning_rate": 9.422653718507007e-07, | |
| "loss": 0.5353, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 4.583604424202993, | |
| "grad_norm": 1.757633090019226, | |
| "learning_rate": 9.282791600654428e-07, | |
| "loss": 0.5167, | |
| "step": 7045 | |
| }, | |
| { | |
| "epoch": 4.586857514638907, | |
| "grad_norm": 2.2960455417633057, | |
| "learning_rate": 9.14395562070594e-07, | |
| "loss": 0.5264, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 4.5901106050748215, | |
| "grad_norm": 1.556706428527832, | |
| "learning_rate": 9.006146370496654e-07, | |
| "loss": 0.5177, | |
| "step": 7055 | |
| }, | |
| { | |
| "epoch": 4.593363695510735, | |
| "grad_norm": 1.7054029703140259, | |
| "learning_rate": 8.869364437484678e-07, | |
| "loss": 0.4893, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 4.596616785946649, | |
| "grad_norm": 1.746472716331482, | |
| "learning_rate": 8.733610404748904e-07, | |
| "loss": 0.5093, | |
| "step": 7065 | |
| }, | |
| { | |
| "epoch": 4.599869876382563, | |
| "grad_norm": 2.1942458152770996, | |
| "learning_rate": 8.598884850986533e-07, | |
| "loss": 0.5299, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 4.603122966818478, | |
| "grad_norm": 2.43866229057312, | |
| "learning_rate": 8.465188350510411e-07, | |
| "loss": 0.5282, | |
| "step": 7075 | |
| }, | |
| { | |
| "epoch": 4.6063760572543915, | |
| "grad_norm": 1.625575304031372, | |
| "learning_rate": 8.332521473246758e-07, | |
| "loss": 0.5189, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 4.609629147690306, | |
| "grad_norm": 2.3699636459350586, | |
| "learning_rate": 8.200884784732688e-07, | |
| "loss": 0.5249, | |
| "step": 7085 | |
| }, | |
| { | |
| "epoch": 4.61288223812622, | |
| "grad_norm": 1.750931739807129, | |
| "learning_rate": 8.070278846113749e-07, | |
| "loss": 0.5165, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 4.616135328562134, | |
| "grad_norm": 1.8055213689804077, | |
| "learning_rate": 7.940704214141614e-07, | |
| "loss": 0.5315, | |
| "step": 7095 | |
| }, | |
| { | |
| "epoch": 4.6193884189980485, | |
| "grad_norm": 2.2767059803009033, | |
| "learning_rate": 7.812161441171611e-07, | |
| "loss": 0.5232, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 4.622641509433962, | |
| "grad_norm": 1.4966483116149902, | |
| "learning_rate": 7.684651075160531e-07, | |
| "loss": 0.5045, | |
| "step": 7105 | |
| }, | |
| { | |
| "epoch": 4.625894599869876, | |
| "grad_norm": 2.188704490661621, | |
| "learning_rate": 7.558173659664075e-07, | |
| "loss": 0.5201, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 4.629147690305791, | |
| "grad_norm": 2.934805154800415, | |
| "learning_rate": 7.432729733834631e-07, | |
| "loss": 0.5247, | |
| "step": 7115 | |
| }, | |
| { | |
| "epoch": 4.632400780741705, | |
| "grad_norm": 1.9948830604553223, | |
| "learning_rate": 7.308319832419141e-07, | |
| "loss": 0.5247, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 4.6356538711776185, | |
| "grad_norm": 1.8401069641113281, | |
| "learning_rate": 7.18494448575649e-07, | |
| "loss": 0.5364, | |
| "step": 7125 | |
| }, | |
| { | |
| "epoch": 4.638906961613533, | |
| "grad_norm": 1.45015549659729, | |
| "learning_rate": 7.062604219775531e-07, | |
| "loss": 0.5106, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 4.642160052049447, | |
| "grad_norm": 1.7785407304763794, | |
| "learning_rate": 6.941299555992737e-07, | |
| "loss": 0.5117, | |
| "step": 7135 | |
| }, | |
| { | |
| "epoch": 4.645413142485361, | |
| "grad_norm": 2.026643753051758, | |
| "learning_rate": 6.821031011509937e-07, | |
| "loss": 0.5039, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 4.648666232921276, | |
| "grad_norm": 1.6481338739395142, | |
| "learning_rate": 6.701799099012141e-07, | |
| "loss": 0.5385, | |
| "step": 7145 | |
| }, | |
| { | |
| "epoch": 4.651919323357189, | |
| "grad_norm": 2.9961116313934326, | |
| "learning_rate": 6.583604326765496e-07, | |
| "loss": 0.5148, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 4.655172413793103, | |
| "grad_norm": 1.7340404987335205, | |
| "learning_rate": 6.466447198614806e-07, | |
| "loss": 0.4913, | |
| "step": 7155 | |
| }, | |
| { | |
| "epoch": 4.658425504229018, | |
| "grad_norm": 1.569608211517334, | |
| "learning_rate": 6.350328213981654e-07, | |
| "loss": 0.5052, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 4.661678594664932, | |
| "grad_norm": 1.9746705293655396, | |
| "learning_rate": 6.235247867862226e-07, | |
| "loss": 0.4885, | |
| "step": 7165 | |
| }, | |
| { | |
| "epoch": 4.6649316851008455, | |
| "grad_norm": 1.7358078956604004, | |
| "learning_rate": 6.121206650825162e-07, | |
| "loss": 0.5256, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 4.66818477553676, | |
| "grad_norm": 1.609820008277893, | |
| "learning_rate": 6.008205049009341e-07, | |
| "loss": 0.5275, | |
| "step": 7175 | |
| }, | |
| { | |
| "epoch": 4.671437865972674, | |
| "grad_norm": 1.8338040113449097, | |
| "learning_rate": 5.896243544122076e-07, | |
| "loss": 0.5019, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 4.674690956408588, | |
| "grad_norm": 1.7695443630218506, | |
| "learning_rate": 5.785322613436894e-07, | |
| "loss": 0.5287, | |
| "step": 7185 | |
| }, | |
| { | |
| "epoch": 4.677944046844503, | |
| "grad_norm": 1.9566013813018799, | |
| "learning_rate": 5.675442729791425e-07, | |
| "loss": 0.5262, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 4.681197137280416, | |
| "grad_norm": 1.9720107316970825, | |
| "learning_rate": 5.566604361585626e-07, | |
| "loss": 0.5327, | |
| "step": 7195 | |
| }, | |
| { | |
| "epoch": 4.68445022771633, | |
| "grad_norm": 2.7521474361419678, | |
| "learning_rate": 5.458807972779534e-07, | |
| "loss": 0.5002, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 4.687703318152245, | |
| "grad_norm": 6.726840019226074, | |
| "learning_rate": 5.352054022891406e-07, | |
| "loss": 0.52, | |
| "step": 7205 | |
| }, | |
| { | |
| "epoch": 4.690956408588159, | |
| "grad_norm": 1.8968901634216309, | |
| "learning_rate": 5.246342966995888e-07, | |
| "loss": 0.5259, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 4.694209499024073, | |
| "grad_norm": 1.647226333618164, | |
| "learning_rate": 5.141675255721762e-07, | |
| "loss": 0.532, | |
| "step": 7215 | |
| }, | |
| { | |
| "epoch": 4.697462589459987, | |
| "grad_norm": 2.063908576965332, | |
| "learning_rate": 5.038051335250316e-07, | |
| "loss": 0.5132, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 4.700715679895901, | |
| "grad_norm": 1.5827159881591797, | |
| "learning_rate": 4.935471647313284e-07, | |
| "loss": 0.515, | |
| "step": 7225 | |
| }, | |
| { | |
| "epoch": 4.703968770331815, | |
| "grad_norm": 1.9684885740280151, | |
| "learning_rate": 4.833936629191016e-07, | |
| "loss": 0.5054, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 4.70722186076773, | |
| "grad_norm": 2.0594069957733154, | |
| "learning_rate": 4.7334467137105933e-07, | |
| "loss": 0.5235, | |
| "step": 7235 | |
| }, | |
| { | |
| "epoch": 4.7104749512036435, | |
| "grad_norm": 1.9911025762557983, | |
| "learning_rate": 4.634002329244047e-07, | |
| "loss": 0.5146, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 4.713728041639557, | |
| "grad_norm": 1.7078765630722046, | |
| "learning_rate": 4.535603899706448e-07, | |
| "loss": 0.5174, | |
| "step": 7245 | |
| }, | |
| { | |
| "epoch": 4.716981132075472, | |
| "grad_norm": 1.6561988592147827, | |
| "learning_rate": 4.438251844554098e-07, | |
| "loss": 0.5201, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 4.720234222511386, | |
| "grad_norm": 1.7727643251419067, | |
| "learning_rate": 4.341946578782868e-07, | |
| "loss": 0.5185, | |
| "step": 7255 | |
| }, | |
| { | |
| "epoch": 4.7234873129473, | |
| "grad_norm": 1.8667991161346436, | |
| "learning_rate": 4.2466885129262004e-07, | |
| "loss": 0.5033, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 4.726740403383214, | |
| "grad_norm": 1.5502984523773193, | |
| "learning_rate": 4.152478053053632e-07, | |
| "loss": 0.5328, | |
| "step": 7265 | |
| }, | |
| { | |
| "epoch": 4.729993493819128, | |
| "grad_norm": 1.9481128454208374, | |
| "learning_rate": 4.059315600768887e-07, | |
| "loss": 0.5151, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 4.733246584255042, | |
| "grad_norm": 2.2370522022247314, | |
| "learning_rate": 3.967201553208122e-07, | |
| "loss": 0.5126, | |
| "step": 7275 | |
| }, | |
| { | |
| "epoch": 4.736499674690957, | |
| "grad_norm": 1.9233421087265015, | |
| "learning_rate": 3.876136303038458e-07, | |
| "loss": 0.5224, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 4.7397527651268705, | |
| "grad_norm": 1.6725999116897583, | |
| "learning_rate": 3.7861202384560644e-07, | |
| "loss": 0.5343, | |
| "step": 7285 | |
| }, | |
| { | |
| "epoch": 4.743005855562784, | |
| "grad_norm": 1.5591987371444702, | |
| "learning_rate": 3.6971537431846057e-07, | |
| "loss": 0.5073, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 4.746258945998699, | |
| "grad_norm": 1.721091866493225, | |
| "learning_rate": 3.609237196473658e-07, | |
| "loss": 0.5274, | |
| "step": 7295 | |
| }, | |
| { | |
| "epoch": 4.749512036434613, | |
| "grad_norm": 1.7789474725723267, | |
| "learning_rate": 3.5223709730970446e-07, | |
| "loss": 0.5072, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 4.752765126870527, | |
| "grad_norm": 1.7836154699325562, | |
| "learning_rate": 3.4365554433511416e-07, | |
| "loss": 0.5126, | |
| "step": 7305 | |
| }, | |
| { | |
| "epoch": 4.756018217306441, | |
| "grad_norm": 1.6633206605911255, | |
| "learning_rate": 3.3517909730534926e-07, | |
| "loss": 0.5137, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 4.759271307742355, | |
| "grad_norm": 3.098612070083618, | |
| "learning_rate": 3.268077923541085e-07, | |
| "loss": 0.5061, | |
| "step": 7315 | |
| }, | |
| { | |
| "epoch": 4.762524398178269, | |
| "grad_norm": 1.939909815788269, | |
| "learning_rate": 3.185416651668882e-07, | |
| "loss": 0.5349, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 4.765777488614184, | |
| "grad_norm": 1.7502937316894531, | |
| "learning_rate": 3.1038075098083485e-07, | |
| "loss": 0.5032, | |
| "step": 7325 | |
| }, | |
| { | |
| "epoch": 4.7690305790500975, | |
| "grad_norm": 1.7100647687911987, | |
| "learning_rate": 3.023250845845815e-07, | |
| "loss": 0.5133, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 4.772283669486011, | |
| "grad_norm": 1.752884030342102, | |
| "learning_rate": 2.943747003181091e-07, | |
| "loss": 0.5358, | |
| "step": 7335 | |
| }, | |
| { | |
| "epoch": 4.775536759921926, | |
| "grad_norm": 1.700315237045288, | |
| "learning_rate": 2.8652963207260184e-07, | |
| "loss": 0.5048, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 4.77878985035784, | |
| "grad_norm": 2.1294970512390137, | |
| "learning_rate": 2.787899132902949e-07, | |
| "loss": 0.4829, | |
| "step": 7345 | |
| }, | |
| { | |
| "epoch": 4.782042940793754, | |
| "grad_norm": 1.8150845766067505, | |
| "learning_rate": 2.711555769643381e-07, | |
| "loss": 0.512, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 4.785296031229668, | |
| "grad_norm": 2.119196653366089, | |
| "learning_rate": 2.636266556386546e-07, | |
| "loss": 0.5267, | |
| "step": 7355 | |
| }, | |
| { | |
| "epoch": 4.788549121665582, | |
| "grad_norm": 2.050795793533325, | |
| "learning_rate": 2.562031814077964e-07, | |
| "loss": 0.5089, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 4.791802212101496, | |
| "grad_norm": 1.6425533294677734, | |
| "learning_rate": 2.488851859168112e-07, | |
| "loss": 0.5168, | |
| "step": 7365 | |
| }, | |
| { | |
| "epoch": 4.795055302537411, | |
| "grad_norm": 1.8162248134613037, | |
| "learning_rate": 2.4167270036111743e-07, | |
| "loss": 0.5028, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 4.798308392973325, | |
| "grad_norm": 1.7280550003051758, | |
| "learning_rate": 2.345657554863545e-07, | |
| "loss": 0.5127, | |
| "step": 7375 | |
| }, | |
| { | |
| "epoch": 4.801561483409239, | |
| "grad_norm": 1.8187389373779297, | |
| "learning_rate": 2.2756438158826053e-07, | |
| "loss": 0.5349, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 4.804814573845153, | |
| "grad_norm": 1.7695400714874268, | |
| "learning_rate": 2.2066860851253922e-07, | |
| "loss": 0.5211, | |
| "step": 7385 | |
| }, | |
| { | |
| "epoch": 4.808067664281067, | |
| "grad_norm": 3.8797385692596436, | |
| "learning_rate": 2.1387846565474045e-07, | |
| "loss": 0.5189, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 4.811320754716981, | |
| "grad_norm": 1.7038609981536865, | |
| "learning_rate": 2.0719398196012707e-07, | |
| "loss": 0.5342, | |
| "step": 7395 | |
| }, | |
| { | |
| "epoch": 4.814573845152895, | |
| "grad_norm": 1.7032898664474487, | |
| "learning_rate": 2.0061518592355277e-07, | |
| "loss": 0.5139, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 4.817826935588809, | |
| "grad_norm": 1.818298101425171, | |
| "learning_rate": 1.9414210558934554e-07, | |
| "loss": 0.5198, | |
| "step": 7405 | |
| }, | |
| { | |
| "epoch": 4.821080026024724, | |
| "grad_norm": 1.8034625053405762, | |
| "learning_rate": 1.8777476855118547e-07, | |
| "loss": 0.5314, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 4.824333116460638, | |
| "grad_norm": 3.0345141887664795, | |
| "learning_rate": 1.8151320195197997e-07, | |
| "loss": 0.5387, | |
| "step": 7415 | |
| }, | |
| { | |
| "epoch": 4.827586206896552, | |
| "grad_norm": 1.8238238096237183, | |
| "learning_rate": 1.753574324837609e-07, | |
| "loss": 0.5219, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 4.830839297332465, | |
| "grad_norm": 1.8523563146591187, | |
| "learning_rate": 1.6930748638756266e-07, | |
| "loss": 0.5075, | |
| "step": 7425 | |
| }, | |
| { | |
| "epoch": 4.83409238776838, | |
| "grad_norm": 1.592421531677246, | |
| "learning_rate": 1.6336338945331098e-07, | |
| "loss": 0.512, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 4.837345478204294, | |
| "grad_norm": 1.459957480430603, | |
| "learning_rate": 1.57525167019712e-07, | |
| "loss": 0.5154, | |
| "step": 7435 | |
| }, | |
| { | |
| "epoch": 4.840598568640209, | |
| "grad_norm": 1.7186057567596436, | |
| "learning_rate": 1.517928439741495e-07, | |
| "loss": 0.5316, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 4.8438516590761225, | |
| "grad_norm": 1.5618077516555786, | |
| "learning_rate": 1.461664447525768e-07, | |
| "loss": 0.4997, | |
| "step": 7445 | |
| }, | |
| { | |
| "epoch": 4.847104749512036, | |
| "grad_norm": 1.9501081705093384, | |
| "learning_rate": 1.4064599333940555e-07, | |
| "loss": 0.5115, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 4.85035783994795, | |
| "grad_norm": 1.593405842781067, | |
| "learning_rate": 1.3523151326741702e-07, | |
| "loss": 0.5062, | |
| "step": 7455 | |
| }, | |
| { | |
| "epoch": 4.853610930383865, | |
| "grad_norm": 1.6193232536315918, | |
| "learning_rate": 1.299230276176483e-07, | |
| "loss": 0.5096, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 4.856864020819779, | |
| "grad_norm": 1.7456111907958984, | |
| "learning_rate": 1.247205590192979e-07, | |
| "loss": 0.5154, | |
| "step": 7465 | |
| }, | |
| { | |
| "epoch": 4.860117111255693, | |
| "grad_norm": 1.7586069107055664, | |
| "learning_rate": 1.1962412964964254e-07, | |
| "loss": 0.5285, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 4.863370201691607, | |
| "grad_norm": 2.715386152267456, | |
| "learning_rate": 1.1463376123391766e-07, | |
| "loss": 0.4909, | |
| "step": 7475 | |
| }, | |
| { | |
| "epoch": 4.866623292127521, | |
| "grad_norm": 2.343010902404785, | |
| "learning_rate": 1.0974947504524269e-07, | |
| "loss": 0.5142, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 4.869876382563435, | |
| "grad_norm": 1.7289972305297852, | |
| "learning_rate": 1.0497129190452926e-07, | |
| "loss": 0.5191, | |
| "step": 7485 | |
| }, | |
| { | |
| "epoch": 4.8731294729993495, | |
| "grad_norm": 1.742447018623352, | |
| "learning_rate": 1.0029923218038972e-07, | |
| "loss": 0.5248, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 4.876382563435263, | |
| "grad_norm": 1.901174545288086, | |
| "learning_rate": 9.573331578904e-08, | |
| "loss": 0.5213, | |
| "step": 7495 | |
| }, | |
| { | |
| "epoch": 4.879635653871178, | |
| "grad_norm": 2.5633485317230225, | |
| "learning_rate": 9.127356219423843e-08, | |
| "loss": 0.5136, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 4.882888744307092, | |
| "grad_norm": 1.8684697151184082, | |
| "learning_rate": 8.691999040717491e-08, | |
| "loss": 0.5188, | |
| "step": 7505 | |
| }, | |
| { | |
| "epoch": 4.886141834743006, | |
| "grad_norm": 2.1927521228790283, | |
| "learning_rate": 8.267261898641798e-08, | |
| "loss": 0.5119, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 4.8893949251789195, | |
| "grad_norm": 1.949514627456665, | |
| "learning_rate": 7.853146603780947e-08, | |
| "loss": 0.5147, | |
| "step": 7515 | |
| }, | |
| { | |
| "epoch": 4.892648015614834, | |
| "grad_norm": 2.0919501781463623, | |
| "learning_rate": 7.449654921440618e-08, | |
| "loss": 0.5064, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 4.895901106050748, | |
| "grad_norm": 1.5901788473129272, | |
| "learning_rate": 7.056788571639105e-08, | |
| "loss": 0.5109, | |
| "step": 7525 | |
| }, | |
| { | |
| "epoch": 4.899154196486663, | |
| "grad_norm": 1.8604559898376465, | |
| "learning_rate": 6.674549229101767e-08, | |
| "loss": 0.526, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 4.9024072869225765, | |
| "grad_norm": 1.8954790830612183, | |
| "learning_rate": 6.302938523251589e-08, | |
| "loss": 0.5039, | |
| "step": 7535 | |
| }, | |
| { | |
| "epoch": 4.90566037735849, | |
| "grad_norm": 1.7178046703338623, | |
| "learning_rate": 5.941958038204187e-08, | |
| "loss": 0.5219, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 4.908913467794405, | |
| "grad_norm": 5.730696201324463, | |
| "learning_rate": 5.59160931275976e-08, | |
| "loss": 0.5004, | |
| "step": 7545 | |
| }, | |
| { | |
| "epoch": 4.912166558230319, | |
| "grad_norm": 1.6460310220718384, | |
| "learning_rate": 5.2518938403978145e-08, | |
| "loss": 0.5319, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 4.915419648666233, | |
| "grad_norm": 1.6308308839797974, | |
| "learning_rate": 4.922813069269394e-08, | |
| "loss": 0.5214, | |
| "step": 7555 | |
| }, | |
| { | |
| "epoch": 4.918672739102147, | |
| "grad_norm": 1.5023380517959595, | |
| "learning_rate": 4.604368402191528e-08, | |
| "loss": 0.5008, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 4.921925829538061, | |
| "grad_norm": 1.7468260526657104, | |
| "learning_rate": 4.2965611966416796e-08, | |
| "loss": 0.5007, | |
| "step": 7565 | |
| }, | |
| { | |
| "epoch": 4.925178919973975, | |
| "grad_norm": 1.9006001949310303, | |
| "learning_rate": 3.9993927647516415e-08, | |
| "loss": 0.51, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 4.92843201040989, | |
| "grad_norm": 1.9338369369506836, | |
| "learning_rate": 3.71286437330115e-08, | |
| "loss": 0.5216, | |
| "step": 7575 | |
| }, | |
| { | |
| "epoch": 4.931685100845804, | |
| "grad_norm": 1.6572381258010864, | |
| "learning_rate": 3.4369772437137236e-08, | |
| "loss": 0.542, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 4.934938191281717, | |
| "grad_norm": 2.3215434551239014, | |
| "learning_rate": 3.1717325520513876e-08, | |
| "loss": 0.513, | |
| "step": 7585 | |
| }, | |
| { | |
| "epoch": 4.938191281717632, | |
| "grad_norm": 1.6987948417663574, | |
| "learning_rate": 2.9171314290080132e-08, | |
| "loss": 0.5284, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 4.941444372153546, | |
| "grad_norm": 1.7099159955978394, | |
| "learning_rate": 2.6731749599065435e-08, | |
| "loss": 0.5267, | |
| "step": 7595 | |
| }, | |
| { | |
| "epoch": 4.94469746258946, | |
| "grad_norm": 1.620719075202942, | |
| "learning_rate": 2.4398641846937187e-08, | |
| "loss": 0.5248, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 4.9479505530253745, | |
| "grad_norm": 1.8238213062286377, | |
| "learning_rate": 2.2172000979345242e-08, | |
| "loss": 0.5268, | |
| "step": 7605 | |
| }, | |
| { | |
| "epoch": 4.951203643461288, | |
| "grad_norm": 2.011178970336914, | |
| "learning_rate": 2.0051836488094167e-08, | |
| "loss": 0.5184, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 4.954456733897202, | |
| "grad_norm": 1.6856626272201538, | |
| "learning_rate": 1.8038157411101597e-08, | |
| "loss": 0.5102, | |
| "step": 7615 | |
| }, | |
| { | |
| "epoch": 4.957709824333117, | |
| "grad_norm": 2.0251147747039795, | |
| "learning_rate": 1.6130972332345505e-08, | |
| "loss": 0.5112, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 4.960962914769031, | |
| "grad_norm": 2.00230073928833, | |
| "learning_rate": 1.4330289381844775e-08, | |
| "loss": 0.5224, | |
| "step": 7625 | |
| }, | |
| { | |
| "epoch": 4.964216005204944, | |
| "grad_norm": 1.570320963859558, | |
| "learning_rate": 1.2636116235612005e-08, | |
| "loss": 0.5315, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 4.967469095640859, | |
| "grad_norm": 1.6889746189117432, | |
| "learning_rate": 1.1048460115634096e-08, | |
| "loss": 0.5193, | |
| "step": 7635 | |
| }, | |
| { | |
| "epoch": 4.970722186076773, | |
| "grad_norm": 2.201413631439209, | |
| "learning_rate": 9.567327789825054e-09, | |
| "loss": 0.5286, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 4.973975276512687, | |
| "grad_norm": 1.7942432165145874, | |
| "learning_rate": 8.192725572006565e-09, | |
| "loss": 0.5211, | |
| "step": 7645 | |
| }, | |
| { | |
| "epoch": 4.9772283669486015, | |
| "grad_norm": 1.7889728546142578, | |
| "learning_rate": 6.924659321888571e-09, | |
| "loss": 0.5164, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 4.980481457384515, | |
| "grad_norm": 2.543469190597534, | |
| "learning_rate": 5.763134445022078e-09, | |
| "loss": 0.5054, | |
| "step": 7655 | |
| }, | |
| { | |
| "epoch": 4.983734547820429, | |
| "grad_norm": 5.216160297393799, | |
| "learning_rate": 4.7081558927991594e-09, | |
| "loss": 0.4954, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 4.986987638256344, | |
| "grad_norm": 2.649937868118286, | |
| "learning_rate": 3.759728162422427e-09, | |
| "loss": 0.5127, | |
| "step": 7665 | |
| }, | |
| { | |
| "epoch": 4.990240728692258, | |
| "grad_norm": 1.9266875982284546, | |
| "learning_rate": 2.9178552968800454e-09, | |
| "loss": 0.5304, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 4.9934938191281715, | |
| "grad_norm": 1.6233766078948975, | |
| "learning_rate": 2.1825408849401873e-09, | |
| "loss": 0.5277, | |
| "step": 7675 | |
| }, | |
| { | |
| "epoch": 4.996746909564086, | |
| "grad_norm": 1.7894667387008667, | |
| "learning_rate": 1.5537880611260491e-09, | |
| "loss": 0.5239, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 1.5771998167037964, | |
| "learning_rate": 1.0315995057075256e-09, | |
| "loss": 0.5174, | |
| "step": 7685 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_f1": 0.7944165410554209, | |
| "eval_loss": 0.54638671875, | |
| "eval_precision": 0.7945063287994906, | |
| "eval_recall": 0.7943501451962497, | |
| "eval_runtime": 257.0765, | |
| "eval_samples_per_second": 1530.42, | |
| "eval_steps_per_second": 1.498, | |
| "step": 7685 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 7685, | |
| "total_flos": 5.363134814553637e+18, | |
| "train_loss": 0.7729351929743412, | |
| "train_runtime": 35402.7725, | |
| "train_samples_per_second": 444.524, | |
| "train_steps_per_second": 0.217 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 7685, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 5.0, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.363134814553637e+18, | |
| "train_batch_size": 512, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |