| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.4465031016252844, | |
| "eval_steps": 500, | |
| "global_step": 3410, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0013093932598981946, | |
| "grad_norm": 0.9729704260826111, | |
| "learning_rate": 0.00018, | |
| "loss": 2.9613, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.002618786519796389, | |
| "grad_norm": 0.4988560676574707, | |
| "learning_rate": 0.00019976402726796014, | |
| "loss": 2.2501, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.003928179779694584, | |
| "grad_norm": 0.3629654049873352, | |
| "learning_rate": 0.0001995018353434714, | |
| "loss": 1.9558, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.005237573039592778, | |
| "grad_norm": 0.42317306995391846, | |
| "learning_rate": 0.0001992396434189827, | |
| "loss": 1.8904, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.006546966299490973, | |
| "grad_norm": 0.4342662990093231, | |
| "learning_rate": 0.00019897745149449398, | |
| "loss": 1.9487, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.007856359559389167, | |
| "grad_norm": 0.4164058268070221, | |
| "learning_rate": 0.00019871525957000524, | |
| "loss": 1.845, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.009165752819287363, | |
| "grad_norm": 0.38950663805007935, | |
| "learning_rate": 0.0001984530676455165, | |
| "loss": 1.8264, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.010475146079185557, | |
| "grad_norm": 0.42093154788017273, | |
| "learning_rate": 0.00019819087572102778, | |
| "loss": 1.8418, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.011784539339083753, | |
| "grad_norm": 0.4716477394104004, | |
| "learning_rate": 0.00019792868379653908, | |
| "loss": 1.8346, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.013093932598981946, | |
| "grad_norm": 0.4358816146850586, | |
| "learning_rate": 0.00019766649187205035, | |
| "loss": 1.8271, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.014403325858880142, | |
| "grad_norm": 0.45478910207748413, | |
| "learning_rate": 0.00019740429994756162, | |
| "loss": 1.7506, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.015712719118778334, | |
| "grad_norm": 0.4366815388202667, | |
| "learning_rate": 0.00019714210802307289, | |
| "loss": 1.7854, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.01702211237867653, | |
| "grad_norm": 0.45096880197525024, | |
| "learning_rate": 0.00019687991609858418, | |
| "loss": 1.779, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.018331505638574726, | |
| "grad_norm": 0.4566694498062134, | |
| "learning_rate": 0.00019661772417409545, | |
| "loss": 1.7509, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.01964089889847292, | |
| "grad_norm": 0.4729042649269104, | |
| "learning_rate": 0.00019635553224960672, | |
| "loss": 1.7271, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.020950292158371114, | |
| "grad_norm": 0.46566858887672424, | |
| "learning_rate": 0.000196093340325118, | |
| "loss": 1.714, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.02225968541826931, | |
| "grad_norm": 0.45467349886894226, | |
| "learning_rate": 0.00019583114840062926, | |
| "loss": 1.702, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.023569078678167505, | |
| "grad_norm": 0.434721440076828, | |
| "learning_rate": 0.00019556895647614055, | |
| "loss": 1.7162, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.024878471938065697, | |
| "grad_norm": 0.5182896852493286, | |
| "learning_rate": 0.00019530676455165182, | |
| "loss": 1.688, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.026187865197963893, | |
| "grad_norm": 0.5060753226280212, | |
| "learning_rate": 0.0001950445726271631, | |
| "loss": 1.6955, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.02749725845786209, | |
| "grad_norm": 0.46147406101226807, | |
| "learning_rate": 0.00019478238070267436, | |
| "loss": 1.681, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.028806651717760284, | |
| "grad_norm": 0.4517662823200226, | |
| "learning_rate": 0.00019452018877818563, | |
| "loss": 1.6936, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.030116044977658477, | |
| "grad_norm": 0.44920527935028076, | |
| "learning_rate": 0.00019425799685369693, | |
| "loss": 1.6633, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.03142543823755667, | |
| "grad_norm": 0.5066579580307007, | |
| "learning_rate": 0.0001939958049292082, | |
| "loss": 1.6872, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.03273483149745487, | |
| "grad_norm": 0.5238184928894043, | |
| "learning_rate": 0.00019373361300471946, | |
| "loss": 1.6255, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.03404422475735306, | |
| "grad_norm": 0.4943958520889282, | |
| "learning_rate": 0.00019347142108023073, | |
| "loss": 1.6499, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.03535361801725126, | |
| "grad_norm": 0.48346492648124695, | |
| "learning_rate": 0.00019320922915574203, | |
| "loss": 1.672, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.03666301127714945, | |
| "grad_norm": 0.4401436746120453, | |
| "learning_rate": 0.0001929470372312533, | |
| "loss": 1.6863, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.037972404537047644, | |
| "grad_norm": 0.4602312743663788, | |
| "learning_rate": 0.00019268484530676457, | |
| "loss": 1.646, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.03928179779694584, | |
| "grad_norm": 0.4927528202533722, | |
| "learning_rate": 0.00019242265338227584, | |
| "loss": 1.6252, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.040591191056844035, | |
| "grad_norm": 0.5075507760047913, | |
| "learning_rate": 0.0001921604614577871, | |
| "loss": 1.6218, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.04190058431674223, | |
| "grad_norm": 0.5239428877830505, | |
| "learning_rate": 0.0001918982695332984, | |
| "loss": 1.6354, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.043209977576640426, | |
| "grad_norm": 0.5954804420471191, | |
| "learning_rate": 0.00019163607760880967, | |
| "loss": 1.7022, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.04451937083653862, | |
| "grad_norm": 0.5364096760749817, | |
| "learning_rate": 0.00019137388568432094, | |
| "loss": 1.5981, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.04582876409643681, | |
| "grad_norm": 0.55096435546875, | |
| "learning_rate": 0.0001911116937598322, | |
| "loss": 1.6211, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.04713815735633501, | |
| "grad_norm": 0.5193445682525635, | |
| "learning_rate": 0.00019084950183534348, | |
| "loss": 1.6195, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.0484475506162332, | |
| "grad_norm": 0.528788685798645, | |
| "learning_rate": 0.00019058730991085477, | |
| "loss": 1.6076, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.049756943876131395, | |
| "grad_norm": 0.5360815525054932, | |
| "learning_rate": 0.00019032511798636604, | |
| "loss": 1.5912, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.051066337136029594, | |
| "grad_norm": 0.5031074285507202, | |
| "learning_rate": 0.0001900629260618773, | |
| "loss": 1.6157, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.052375730395927786, | |
| "grad_norm": 0.5149925351142883, | |
| "learning_rate": 0.00018980073413738858, | |
| "loss": 1.579, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.053685123655825985, | |
| "grad_norm": 0.5419250726699829, | |
| "learning_rate": 0.00018953854221289985, | |
| "loss": 1.6242, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.05499451691572418, | |
| "grad_norm": 0.5513054728507996, | |
| "learning_rate": 0.00018927635028841112, | |
| "loss": 1.5948, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.05630391017562237, | |
| "grad_norm": 0.5670781135559082, | |
| "learning_rate": 0.0001890141583639224, | |
| "loss": 1.5314, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.05761330343552057, | |
| "grad_norm": 0.5327165722846985, | |
| "learning_rate": 0.00018875196643943366, | |
| "loss": 1.5716, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.05892269669541876, | |
| "grad_norm": 0.5244112610816956, | |
| "learning_rate": 0.00018848977451494493, | |
| "loss": 1.5347, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.06023208995531695, | |
| "grad_norm": 0.5349589586257935, | |
| "learning_rate": 0.00018822758259045622, | |
| "loss": 1.564, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.06154148321521515, | |
| "grad_norm": 0.5296887755393982, | |
| "learning_rate": 0.0001879653906659675, | |
| "loss": 1.5779, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.06285087647511334, | |
| "grad_norm": 0.5426337718963623, | |
| "learning_rate": 0.00018770319874147876, | |
| "loss": 1.5112, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.06416026973501154, | |
| "grad_norm": 0.5532763004302979, | |
| "learning_rate": 0.00018744100681699003, | |
| "loss": 1.5458, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.06546966299490974, | |
| "grad_norm": 0.5318668484687805, | |
| "learning_rate": 0.00018717881489250133, | |
| "loss": 1.5597, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.06677905625480793, | |
| "grad_norm": 0.6084654331207275, | |
| "learning_rate": 0.0001869166229680126, | |
| "loss": 1.5485, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.06808844951470612, | |
| "grad_norm": 0.5626131296157837, | |
| "learning_rate": 0.00018665443104352386, | |
| "loss": 1.5217, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.06939784277460431, | |
| "grad_norm": 0.528758704662323, | |
| "learning_rate": 0.00018639223911903513, | |
| "loss": 1.5343, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.07070723603450252, | |
| "grad_norm": 0.5894292593002319, | |
| "learning_rate": 0.0001861300471945464, | |
| "loss": 1.5604, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.07201662929440071, | |
| "grad_norm": 0.5676683187484741, | |
| "learning_rate": 0.0001858678552700577, | |
| "loss": 1.5216, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.0733260225542989, | |
| "grad_norm": 0.6381473541259766, | |
| "learning_rate": 0.00018560566334556897, | |
| "loss": 1.4334, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.0746354158141971, | |
| "grad_norm": 0.6644160151481628, | |
| "learning_rate": 0.00018534347142108024, | |
| "loss": 1.4832, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.07594480907409529, | |
| "grad_norm": 0.5856960415840149, | |
| "learning_rate": 0.0001850812794965915, | |
| "loss": 1.5118, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.07725420233399348, | |
| "grad_norm": 0.5892801880836487, | |
| "learning_rate": 0.00018481908757210277, | |
| "loss": 1.5028, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.07856359559389169, | |
| "grad_norm": 0.5674527883529663, | |
| "learning_rate": 0.00018455689564761407, | |
| "loss": 1.5125, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.07987298885378988, | |
| "grad_norm": 0.6059868335723877, | |
| "learning_rate": 0.00018429470372312534, | |
| "loss": 1.4543, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.08118238211368807, | |
| "grad_norm": 0.6255605816841125, | |
| "learning_rate": 0.0001840325117986366, | |
| "loss": 1.4851, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.08249177537358626, | |
| "grad_norm": 0.5904423594474792, | |
| "learning_rate": 0.00018377031987414788, | |
| "loss": 1.4154, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.08380116863348445, | |
| "grad_norm": 0.6035749912261963, | |
| "learning_rate": 0.00018350812794965917, | |
| "loss": 1.4276, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.08511056189338265, | |
| "grad_norm": 0.597172737121582, | |
| "learning_rate": 0.00018324593602517044, | |
| "loss": 1.4736, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.08641995515328085, | |
| "grad_norm": 0.6352164149284363, | |
| "learning_rate": 0.0001829837441006817, | |
| "loss": 1.4975, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.08772934841317905, | |
| "grad_norm": 0.5500873327255249, | |
| "learning_rate": 0.00018272155217619298, | |
| "loss": 1.4578, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.08903874167307724, | |
| "grad_norm": 0.6423613429069519, | |
| "learning_rate": 0.00018245936025170425, | |
| "loss": 1.3926, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.09034813493297543, | |
| "grad_norm": 0.665908694267273, | |
| "learning_rate": 0.00018219716832721555, | |
| "loss": 1.4548, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.09165752819287362, | |
| "grad_norm": 0.6354024410247803, | |
| "learning_rate": 0.00018193497640272682, | |
| "loss": 1.5, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.09296692145277183, | |
| "grad_norm": 0.6588740348815918, | |
| "learning_rate": 0.00018167278447823808, | |
| "loss": 1.3609, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.09427631471267002, | |
| "grad_norm": 0.6754702925682068, | |
| "learning_rate": 0.00018141059255374935, | |
| "loss": 1.3432, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.09558570797256821, | |
| "grad_norm": 0.6337271332740784, | |
| "learning_rate": 0.00018114840062926062, | |
| "loss": 1.4439, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.0968951012324664, | |
| "grad_norm": 0.6592088937759399, | |
| "learning_rate": 0.00018088620870477192, | |
| "loss": 1.3949, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.0982044944923646, | |
| "grad_norm": 0.6700498461723328, | |
| "learning_rate": 0.0001806240167802832, | |
| "loss": 1.4046, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.09951388775226279, | |
| "grad_norm": 0.708410382270813, | |
| "learning_rate": 0.00018036182485579446, | |
| "loss": 1.3021, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.100823281012161, | |
| "grad_norm": 0.6718457937240601, | |
| "learning_rate": 0.00018009963293130573, | |
| "loss": 1.3769, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.10213267427205919, | |
| "grad_norm": 0.661522388458252, | |
| "learning_rate": 0.00017983744100681702, | |
| "loss": 1.434, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.10344206753195738, | |
| "grad_norm": 0.6615481376647949, | |
| "learning_rate": 0.0001795752490823283, | |
| "loss": 1.3839, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.10475146079185557, | |
| "grad_norm": 0.696959376335144, | |
| "learning_rate": 0.00017931305715783956, | |
| "loss": 1.3634, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.10606085405175376, | |
| "grad_norm": 0.7320592403411865, | |
| "learning_rate": 0.00017905086523335083, | |
| "loss": 1.2737, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.10737024731165197, | |
| "grad_norm": 0.7200619578361511, | |
| "learning_rate": 0.0001787886733088621, | |
| "loss": 1.3732, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.10867964057155016, | |
| "grad_norm": 0.6982961297035217, | |
| "learning_rate": 0.00017852648138437337, | |
| "loss": 1.3019, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.10998903383144835, | |
| "grad_norm": 0.7427386045455933, | |
| "learning_rate": 0.00017826428945988464, | |
| "loss": 1.3398, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.11129842709134655, | |
| "grad_norm": 0.7897806763648987, | |
| "learning_rate": 0.0001780020975353959, | |
| "loss": 1.3216, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.11260782035124474, | |
| "grad_norm": 0.7520805597305298, | |
| "learning_rate": 0.00017773990561090717, | |
| "loss": 1.2875, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.11391721361114293, | |
| "grad_norm": 0.7332555055618286, | |
| "learning_rate": 0.00017747771368641844, | |
| "loss": 1.272, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.11522660687104114, | |
| "grad_norm": 0.7135840654373169, | |
| "learning_rate": 0.00017721552176192974, | |
| "loss": 1.3185, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.11653600013093933, | |
| "grad_norm": 0.6898264288902283, | |
| "learning_rate": 0.000176953329837441, | |
| "loss": 1.3089, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.11784539339083752, | |
| "grad_norm": 0.9488328099250793, | |
| "learning_rate": 0.00017669113791295228, | |
| "loss": 1.2258, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.11915478665073571, | |
| "grad_norm": 0.7257933616638184, | |
| "learning_rate": 0.00017642894598846355, | |
| "loss": 1.3284, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.1204641799106339, | |
| "grad_norm": 0.7688736915588379, | |
| "learning_rate": 0.00017616675406397484, | |
| "loss": 1.2878, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.1217735731705321, | |
| "grad_norm": 0.8328510522842407, | |
| "learning_rate": 0.0001759045621394861, | |
| "loss": 1.2346, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.1230829664304303, | |
| "grad_norm": 0.8448120951652527, | |
| "learning_rate": 0.00017564237021499738, | |
| "loss": 1.2926, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.1243923596903285, | |
| "grad_norm": 0.8510689735412598, | |
| "learning_rate": 0.00017538017829050865, | |
| "loss": 1.2109, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.12570175295022668, | |
| "grad_norm": 0.866874098777771, | |
| "learning_rate": 0.00017511798636601992, | |
| "loss": 1.3091, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.12701114621012488, | |
| "grad_norm": 0.9010233879089355, | |
| "learning_rate": 0.00017485579444153122, | |
| "loss": 1.2273, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.1283205394700231, | |
| "grad_norm": 0.9316047430038452, | |
| "learning_rate": 0.00017459360251704248, | |
| "loss": 1.2611, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.12962993272992127, | |
| "grad_norm": 0.9005467295646667, | |
| "learning_rate": 0.00017433141059255375, | |
| "loss": 1.1747, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.13093932598981947, | |
| "grad_norm": 0.8843415975570679, | |
| "learning_rate": 0.00017406921866806502, | |
| "loss": 1.1915, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.13224871924971765, | |
| "grad_norm": 0.8090497851371765, | |
| "learning_rate": 0.0001738070267435763, | |
| "loss": 1.2452, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.13355811250961586, | |
| "grad_norm": 1.2498819828033447, | |
| "learning_rate": 0.0001735448348190876, | |
| "loss": 1.276, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.13486750576951406, | |
| "grad_norm": 0.7861034870147705, | |
| "learning_rate": 0.00017328264289459886, | |
| "loss": 1.1989, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.13617689902941224, | |
| "grad_norm": 0.9525002837181091, | |
| "learning_rate": 0.00017302045097011013, | |
| "loss": 1.1338, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.13748629228931045, | |
| "grad_norm": 0.8066142201423645, | |
| "learning_rate": 0.0001727582590456214, | |
| "loss": 1.1421, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.13879568554920862, | |
| "grad_norm": 0.8200965523719788, | |
| "learning_rate": 0.0001724960671211327, | |
| "loss": 1.1596, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.14010507880910683, | |
| "grad_norm": 0.9981400370597839, | |
| "learning_rate": 0.00017223387519664396, | |
| "loss": 1.0562, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.14141447206900504, | |
| "grad_norm": 0.9273063540458679, | |
| "learning_rate": 0.00017197168327215523, | |
| "loss": 1.1275, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.14272386532890322, | |
| "grad_norm": 0.8812237977981567, | |
| "learning_rate": 0.0001717094913476665, | |
| "loss": 1.0406, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.14403325858880142, | |
| "grad_norm": 0.8970304727554321, | |
| "learning_rate": 0.00017144729942317777, | |
| "loss": 1.1263, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.1453426518486996, | |
| "grad_norm": 0.9097404479980469, | |
| "learning_rate": 0.00017118510749868906, | |
| "loss": 1.1956, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.1466520451085978, | |
| "grad_norm": 1.0246269702911377, | |
| "learning_rate": 0.00017092291557420033, | |
| "loss": 1.0717, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.14796143836849598, | |
| "grad_norm": 1.1149781942367554, | |
| "learning_rate": 0.0001706607236497116, | |
| "loss": 1.076, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.1492708316283942, | |
| "grad_norm": 1.1981500387191772, | |
| "learning_rate": 0.00017039853172522287, | |
| "loss": 1.142, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.1505802248882924, | |
| "grad_norm": 0.9477318525314331, | |
| "learning_rate": 0.00017013633980073414, | |
| "loss": 1.0799, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.15188961814819057, | |
| "grad_norm": 1.0102957487106323, | |
| "learning_rate": 0.00016987414787624544, | |
| "loss": 1.0531, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.15319901140808878, | |
| "grad_norm": 1.1728227138519287, | |
| "learning_rate": 0.0001696119559517567, | |
| "loss": 1.0903, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.15450840466798696, | |
| "grad_norm": 1.0086623430252075, | |
| "learning_rate": 0.00016934976402726797, | |
| "loss": 1.0677, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.15581779792788517, | |
| "grad_norm": 0.8586070537567139, | |
| "learning_rate": 0.00016908757210277924, | |
| "loss": 1.1022, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.15712719118778337, | |
| "grad_norm": 1.2628968954086304, | |
| "learning_rate": 0.00016882538017829054, | |
| "loss": 1.0575, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.15843658444768155, | |
| "grad_norm": 0.9629563689231873, | |
| "learning_rate": 0.0001685631882538018, | |
| "loss": 1.0844, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.15974597770757976, | |
| "grad_norm": 1.0898447036743164, | |
| "learning_rate": 0.00016830099632931308, | |
| "loss": 1.0654, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.16105537096747793, | |
| "grad_norm": 1.13120698928833, | |
| "learning_rate": 0.00016803880440482435, | |
| "loss": 1.0686, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.16236476422737614, | |
| "grad_norm": 1.0732567310333252, | |
| "learning_rate": 0.00016777661248033561, | |
| "loss": 1.084, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.16367415748727435, | |
| "grad_norm": 1.0681878328323364, | |
| "learning_rate": 0.00016751442055584688, | |
| "loss": 0.9979, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.16498355074717252, | |
| "grad_norm": 0.9773361086845398, | |
| "learning_rate": 0.00016725222863135815, | |
| "loss": 1.0841, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.16629294400707073, | |
| "grad_norm": 1.0342450141906738, | |
| "learning_rate": 0.00016699003670686942, | |
| "loss": 1.0176, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.1676023372669689, | |
| "grad_norm": 1.0580531358718872, | |
| "learning_rate": 0.0001667278447823807, | |
| "loss": 0.9858, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.16891173052686712, | |
| "grad_norm": 0.9744387865066528, | |
| "learning_rate": 0.000166465652857892, | |
| "loss": 0.9282, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.1702211237867653, | |
| "grad_norm": 0.9636452198028564, | |
| "learning_rate": 0.00016620346093340326, | |
| "loss": 0.9414, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.1715305170466635, | |
| "grad_norm": 1.1029468774795532, | |
| "learning_rate": 0.00016594126900891453, | |
| "loss": 0.8812, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.1728399103065617, | |
| "grad_norm": 1.2941449880599976, | |
| "learning_rate": 0.0001656790770844258, | |
| "loss": 0.9823, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.17414930356645988, | |
| "grad_norm": 1.627166509628296, | |
| "learning_rate": 0.00016541688515993706, | |
| "loss": 0.9585, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.1754586968263581, | |
| "grad_norm": 1.091630458831787, | |
| "learning_rate": 0.00016515469323544836, | |
| "loss": 0.9516, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.17676809008625627, | |
| "grad_norm": 1.1108227968215942, | |
| "learning_rate": 0.00016489250131095963, | |
| "loss": 0.8998, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.17807748334615447, | |
| "grad_norm": 1.0883326530456543, | |
| "learning_rate": 0.0001646303093864709, | |
| "loss": 0.916, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.17938687660605268, | |
| "grad_norm": 1.2917275428771973, | |
| "learning_rate": 0.00016436811746198217, | |
| "loss": 0.9112, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.18069626986595086, | |
| "grad_norm": 1.1828432083129883, | |
| "learning_rate": 0.00016410592553749344, | |
| "loss": 0.9721, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.18200566312584907, | |
| "grad_norm": 1.3447389602661133, | |
| "learning_rate": 0.00016384373361300473, | |
| "loss": 0.9198, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.18331505638574724, | |
| "grad_norm": 1.0735760927200317, | |
| "learning_rate": 0.000163581541688516, | |
| "loss": 0.8634, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.18462444964564545, | |
| "grad_norm": 1.0454446077346802, | |
| "learning_rate": 0.00016331934976402727, | |
| "loss": 0.9151, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.18593384290554366, | |
| "grad_norm": 1.2230719327926636, | |
| "learning_rate": 0.00016305715783953854, | |
| "loss": 0.9202, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.18724323616544183, | |
| "grad_norm": 1.1030149459838867, | |
| "learning_rate": 0.00016279496591504984, | |
| "loss": 0.9068, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.18855262942534004, | |
| "grad_norm": 1.4471871852874756, | |
| "learning_rate": 0.0001625327739905611, | |
| "loss": 0.8682, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.18986202268523822, | |
| "grad_norm": 1.2458796501159668, | |
| "learning_rate": 0.00016227058206607237, | |
| "loss": 0.8247, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.19117141594513642, | |
| "grad_norm": 1.1849644184112549, | |
| "learning_rate": 0.00016200839014158364, | |
| "loss": 0.8987, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.19248080920503463, | |
| "grad_norm": 1.2985557317733765, | |
| "learning_rate": 0.0001617461982170949, | |
| "loss": 0.8006, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.1937902024649328, | |
| "grad_norm": 1.7127928733825684, | |
| "learning_rate": 0.0001614840062926062, | |
| "loss": 0.8191, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.19509959572483102, | |
| "grad_norm": 1.440895915031433, | |
| "learning_rate": 0.00016122181436811748, | |
| "loss": 0.8129, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.1964089889847292, | |
| "grad_norm": 1.252194881439209, | |
| "learning_rate": 0.00016095962244362875, | |
| "loss": 0.8803, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.1977183822446274, | |
| "grad_norm": 1.138358235359192, | |
| "learning_rate": 0.00016069743051914001, | |
| "loss": 0.8744, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.19902777550452558, | |
| "grad_norm": 1.080971598625183, | |
| "learning_rate": 0.00016043523859465128, | |
| "loss": 0.8693, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.20033716876442378, | |
| "grad_norm": 1.1612547636032104, | |
| "learning_rate": 0.00016017304667016258, | |
| "loss": 0.7991, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.201646562024322, | |
| "grad_norm": 1.1773971319198608, | |
| "learning_rate": 0.00015991085474567385, | |
| "loss": 0.912, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.20295595528422017, | |
| "grad_norm": 1.1353998184204102, | |
| "learning_rate": 0.00015964866282118512, | |
| "loss": 0.7986, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.20426534854411837, | |
| "grad_norm": 1.6848335266113281, | |
| "learning_rate": 0.0001593864708966964, | |
| "loss": 0.6932, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.20557474180401655, | |
| "grad_norm": 1.4043173789978027, | |
| "learning_rate": 0.00015912427897220768, | |
| "loss": 0.8529, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.20688413506391476, | |
| "grad_norm": 1.2601439952850342, | |
| "learning_rate": 0.00015886208704771895, | |
| "loss": 0.8173, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.20819352832381297, | |
| "grad_norm": 1.2090034484863281, | |
| "learning_rate": 0.00015859989512323022, | |
| "loss": 0.7451, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.20950292158371114, | |
| "grad_norm": 1.3334815502166748, | |
| "learning_rate": 0.0001583377031987415, | |
| "loss": 0.775, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.21081231484360935, | |
| "grad_norm": 1.1993087530136108, | |
| "learning_rate": 0.00015807551127425276, | |
| "loss": 0.7733, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.21212170810350753, | |
| "grad_norm": 1.51642906665802, | |
| "learning_rate": 0.00015781331934976406, | |
| "loss": 0.6907, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.21343110136340573, | |
| "grad_norm": 1.3714466094970703, | |
| "learning_rate": 0.00015755112742527532, | |
| "loss": 0.7016, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.21474049462330394, | |
| "grad_norm": 1.2519642114639282, | |
| "learning_rate": 0.0001572889355007866, | |
| "loss": 0.7648, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.21604988788320212, | |
| "grad_norm": 1.3851202726364136, | |
| "learning_rate": 0.00015702674357629786, | |
| "loss": 0.7069, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.21735928114310032, | |
| "grad_norm": 1.334105134010315, | |
| "learning_rate": 0.00015676455165180913, | |
| "loss": 0.7338, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.2186686744029985, | |
| "grad_norm": 1.3785145282745361, | |
| "learning_rate": 0.0001565023597273204, | |
| "loss": 0.6299, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.2199780676628967, | |
| "grad_norm": 1.4771215915679932, | |
| "learning_rate": 0.00015624016780283167, | |
| "loss": 0.6828, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.2212874609227949, | |
| "grad_norm": 1.3885449171066284, | |
| "learning_rate": 0.00015597797587834294, | |
| "loss": 0.7141, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.2225968541826931, | |
| "grad_norm": 1.2664909362792969, | |
| "learning_rate": 0.00015571578395385423, | |
| "loss": 0.7667, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.2239062474425913, | |
| "grad_norm": 1.2576826810836792, | |
| "learning_rate": 0.0001554535920293655, | |
| "loss": 0.7395, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.22521564070248948, | |
| "grad_norm": 1.284826636314392, | |
| "learning_rate": 0.00015519140010487677, | |
| "loss": 0.6832, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.22652503396238768, | |
| "grad_norm": 1.272933006286621, | |
| "learning_rate": 0.00015492920818038804, | |
| "loss": 0.6892, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.22783442722228586, | |
| "grad_norm": 1.3465379476547241, | |
| "learning_rate": 0.0001546670162558993, | |
| "loss": 0.6449, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.22914382048218407, | |
| "grad_norm": 1.2862318754196167, | |
| "learning_rate": 0.00015440482433141058, | |
| "loss": 0.6883, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.23045321374208227, | |
| "grad_norm": 1.2469042539596558, | |
| "learning_rate": 0.00015414263240692188, | |
| "loss": 0.7593, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.23176260700198045, | |
| "grad_norm": 1.5080034732818604, | |
| "learning_rate": 0.00015388044048243315, | |
| "loss": 0.7009, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.23307200026187866, | |
| "grad_norm": 0.9788569211959839, | |
| "learning_rate": 0.00015361824855794441, | |
| "loss": 0.602, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.23438139352177684, | |
| "grad_norm": 1.3450673818588257, | |
| "learning_rate": 0.00015335605663345568, | |
| "loss": 0.6238, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.23569078678167504, | |
| "grad_norm": 1.4177800416946411, | |
| "learning_rate": 0.00015309386470896695, | |
| "loss": 0.6768, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.23700018004157325, | |
| "grad_norm": 1.3528062105178833, | |
| "learning_rate": 0.00015283167278447825, | |
| "loss": 0.6404, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.23830957330147143, | |
| "grad_norm": 1.2898012399673462, | |
| "learning_rate": 0.00015256948085998952, | |
| "loss": 0.6606, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.23961896656136963, | |
| "grad_norm": 1.311298131942749, | |
| "learning_rate": 0.0001523072889355008, | |
| "loss": 0.662, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.2409283598212678, | |
| "grad_norm": 1.6476584672927856, | |
| "learning_rate": 0.00015204509701101206, | |
| "loss": 0.671, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.24223775308116602, | |
| "grad_norm": 1.36719810962677, | |
| "learning_rate": 0.00015178290508652335, | |
| "loss": 0.7097, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.2435471463410642, | |
| "grad_norm": 1.3647184371948242, | |
| "learning_rate": 0.00015152071316203462, | |
| "loss": 0.6604, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.2448565396009624, | |
| "grad_norm": 1.2265934944152832, | |
| "learning_rate": 0.0001512585212375459, | |
| "loss": 0.6272, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.2461659328608606, | |
| "grad_norm": 1.4882850646972656, | |
| "learning_rate": 0.00015099632931305716, | |
| "loss": 0.7007, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.2474753261207588, | |
| "grad_norm": 1.408470869064331, | |
| "learning_rate": 0.00015073413738856843, | |
| "loss": 0.6526, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.248784719380657, | |
| "grad_norm": 1.3388913869857788, | |
| "learning_rate": 0.00015047194546407972, | |
| "loss": 0.6891, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.2500941126405552, | |
| "grad_norm": 1.3725926876068115, | |
| "learning_rate": 0.000150209753539591, | |
| "loss": 0.5763, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.25140350590045335, | |
| "grad_norm": 1.40208899974823, | |
| "learning_rate": 0.00014994756161510226, | |
| "loss": 0.5637, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.25271289916035156, | |
| "grad_norm": 1.8308840990066528, | |
| "learning_rate": 0.00014968536969061353, | |
| "loss": 0.6899, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.25402229242024976, | |
| "grad_norm": 1.4921183586120605, | |
| "learning_rate": 0.0001494231777661248, | |
| "loss": 0.5764, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.25533168568014797, | |
| "grad_norm": 1.5387523174285889, | |
| "learning_rate": 0.0001491609858416361, | |
| "loss": 0.5229, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.2566410789400462, | |
| "grad_norm": 1.3345798254013062, | |
| "learning_rate": 0.00014889879391714737, | |
| "loss": 0.5949, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.2579504721999443, | |
| "grad_norm": 1.682065486907959, | |
| "learning_rate": 0.00014863660199265863, | |
| "loss": 0.5619, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.25925986545984253, | |
| "grad_norm": 1.480276346206665, | |
| "learning_rate": 0.0001483744100681699, | |
| "loss": 0.5473, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.26056925871974074, | |
| "grad_norm": 1.3453810214996338, | |
| "learning_rate": 0.0001481122181436812, | |
| "loss": 0.5603, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.26187865197963894, | |
| "grad_norm": 1.4118777513504028, | |
| "learning_rate": 0.00014785002621919247, | |
| "loss": 0.5543, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.26318804523953715, | |
| "grad_norm": 1.2959351539611816, | |
| "learning_rate": 0.00014758783429470374, | |
| "loss": 0.4962, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.2644974384994353, | |
| "grad_norm": 1.3605815172195435, | |
| "learning_rate": 0.000147325642370215, | |
| "loss": 0.5699, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.2658068317593335, | |
| "grad_norm": 2.086613416671753, | |
| "learning_rate": 0.00014706345044572628, | |
| "loss": 0.565, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.2671162250192317, | |
| "grad_norm": 1.2892887592315674, | |
| "learning_rate": 0.00014680125852123757, | |
| "loss": 0.6062, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.2684256182791299, | |
| "grad_norm": 1.5760036706924438, | |
| "learning_rate": 0.00014653906659674884, | |
| "loss": 0.5642, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.2697350115390281, | |
| "grad_norm": 1.21380615234375, | |
| "learning_rate": 0.0001462768746722601, | |
| "loss": 0.5514, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.2710444047989263, | |
| "grad_norm": 1.4393121004104614, | |
| "learning_rate": 0.00014601468274777138, | |
| "loss": 0.5572, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.2723537980588245, | |
| "grad_norm": 1.2972021102905273, | |
| "learning_rate": 0.00014575249082328265, | |
| "loss": 0.535, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.2736631913187227, | |
| "grad_norm": 1.0208637714385986, | |
| "learning_rate": 0.00014549029889879392, | |
| "loss": 0.5835, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.2749725845786209, | |
| "grad_norm": 1.4418736696243286, | |
| "learning_rate": 0.00014522810697430521, | |
| "loss": 0.4829, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.2762819778385191, | |
| "grad_norm": 1.4326051473617554, | |
| "learning_rate": 0.00014496591504981648, | |
| "loss": 0.4711, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.27759137109841725, | |
| "grad_norm": 1.497841715812683, | |
| "learning_rate": 0.00014470372312532775, | |
| "loss": 0.4935, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.27890076435831546, | |
| "grad_norm": 1.5082463026046753, | |
| "learning_rate": 0.00014444153120083902, | |
| "loss": 0.4979, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.28021015761821366, | |
| "grad_norm": 1.2458934783935547, | |
| "learning_rate": 0.0001441793392763503, | |
| "loss": 0.5644, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.28151955087811187, | |
| "grad_norm": 1.730130910873413, | |
| "learning_rate": 0.00014391714735186156, | |
| "loss": 0.4749, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.2828289441380101, | |
| "grad_norm": 1.2587112188339233, | |
| "learning_rate": 0.00014365495542737283, | |
| "loss": 0.5175, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.2841383373979082, | |
| "grad_norm": 1.431119441986084, | |
| "learning_rate": 0.0001433927635028841, | |
| "loss": 0.5597, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.28544773065780643, | |
| "grad_norm": 1.5383937358856201, | |
| "learning_rate": 0.0001431305715783954, | |
| "loss": 0.5153, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.28675712391770464, | |
| "grad_norm": 1.4311727285385132, | |
| "learning_rate": 0.00014286837965390666, | |
| "loss": 0.5452, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.28806651717760284, | |
| "grad_norm": 1.2555975914001465, | |
| "learning_rate": 0.00014260618772941793, | |
| "loss": 0.4937, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.28937591043750105, | |
| "grad_norm": 1.3781330585479736, | |
| "learning_rate": 0.0001423439958049292, | |
| "loss": 0.4537, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.2906853036973992, | |
| "grad_norm": 1.4810888767242432, | |
| "learning_rate": 0.00014208180388044047, | |
| "loss": 0.396, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.2919946969572974, | |
| "grad_norm": 1.6619911193847656, | |
| "learning_rate": 0.00014181961195595177, | |
| "loss": 0.4756, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.2933040902171956, | |
| "grad_norm": 1.3403065204620361, | |
| "learning_rate": 0.00014155742003146303, | |
| "loss": 0.5157, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.2946134834770938, | |
| "grad_norm": 1.4188278913497925, | |
| "learning_rate": 0.0001412952281069743, | |
| "loss": 0.5237, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.29592287673699197, | |
| "grad_norm": 1.852266550064087, | |
| "learning_rate": 0.00014103303618248557, | |
| "loss": 0.4558, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.2972322699968902, | |
| "grad_norm": 1.3092072010040283, | |
| "learning_rate": 0.00014077084425799687, | |
| "loss": 0.4437, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.2985416632567884, | |
| "grad_norm": 1.4190593957901, | |
| "learning_rate": 0.00014050865233350814, | |
| "loss": 0.4717, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.2998510565166866, | |
| "grad_norm": 1.4562608003616333, | |
| "learning_rate": 0.0001402464604090194, | |
| "loss": 0.4744, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.3011604497765848, | |
| "grad_norm": 1.4576420783996582, | |
| "learning_rate": 0.00013998426848453068, | |
| "loss": 0.4429, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.30246984303648294, | |
| "grad_norm": 1.867145299911499, | |
| "learning_rate": 0.00013972207656004194, | |
| "loss": 0.4881, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.30377923629638115, | |
| "grad_norm": 1.3077807426452637, | |
| "learning_rate": 0.00013945988463555324, | |
| "loss": 0.4067, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.30508862955627936, | |
| "grad_norm": 1.3587473630905151, | |
| "learning_rate": 0.0001391976927110645, | |
| "loss": 0.4428, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.30639802281617756, | |
| "grad_norm": 1.6012579202651978, | |
| "learning_rate": 0.00013893550078657578, | |
| "loss": 0.4572, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.30770741607607577, | |
| "grad_norm": 1.2226955890655518, | |
| "learning_rate": 0.00013867330886208705, | |
| "loss": 0.4117, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.3090168093359739, | |
| "grad_norm": 1.4615281820297241, | |
| "learning_rate": 0.00013841111693759834, | |
| "loss": 0.4561, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.3103262025958721, | |
| "grad_norm": 1.401014804840088, | |
| "learning_rate": 0.0001381489250131096, | |
| "loss": 0.441, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.31163559585577033, | |
| "grad_norm": 1.4875798225402832, | |
| "learning_rate": 0.00013788673308862088, | |
| "loss": 0.3991, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.31294498911566854, | |
| "grad_norm": 1.1867239475250244, | |
| "learning_rate": 0.00013762454116413215, | |
| "loss": 0.4223, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.31425438237556674, | |
| "grad_norm": 1.3172953128814697, | |
| "learning_rate": 0.00013736234923964342, | |
| "loss": 0.4388, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.3155637756354649, | |
| "grad_norm": 1.4044665098190308, | |
| "learning_rate": 0.00013710015731515472, | |
| "loss": 0.4102, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.3168731688953631, | |
| "grad_norm": 1.5709283351898193, | |
| "learning_rate": 0.00013683796539066599, | |
| "loss": 0.4837, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.3181825621552613, | |
| "grad_norm": 1.2237786054611206, | |
| "learning_rate": 0.00013657577346617725, | |
| "loss": 0.4452, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.3194919554151595, | |
| "grad_norm": 1.8869267702102661, | |
| "learning_rate": 0.00013631358154168852, | |
| "loss": 0.4077, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.3208013486750577, | |
| "grad_norm": 1.226117491722107, | |
| "learning_rate": 0.0001360513896171998, | |
| "loss": 0.4109, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.32211074193495587, | |
| "grad_norm": 1.6273385286331177, | |
| "learning_rate": 0.0001357891976927111, | |
| "loss": 0.3596, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.3234201351948541, | |
| "grad_norm": 1.4535574913024902, | |
| "learning_rate": 0.00013552700576822236, | |
| "loss": 0.3996, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.3247295284547523, | |
| "grad_norm": 1.6052360534667969, | |
| "learning_rate": 0.00013526481384373363, | |
| "loss": 0.4082, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.3260389217146505, | |
| "grad_norm": 1.9104530811309814, | |
| "learning_rate": 0.0001350026219192449, | |
| "loss": 0.4089, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.3273483149745487, | |
| "grad_norm": 1.6006613969802856, | |
| "learning_rate": 0.0001347404299947562, | |
| "loss": 0.3848, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.32865770823444684, | |
| "grad_norm": 1.4406352043151855, | |
| "learning_rate": 0.00013447823807026746, | |
| "loss": 0.3926, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.32996710149434505, | |
| "grad_norm": 1.3455756902694702, | |
| "learning_rate": 0.00013421604614577873, | |
| "loss": 0.4203, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.33127649475424326, | |
| "grad_norm": 1.7718679904937744, | |
| "learning_rate": 0.00013395385422129, | |
| "loss": 0.3765, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.33258588801414146, | |
| "grad_norm": 1.410130500793457, | |
| "learning_rate": 0.00013369166229680127, | |
| "loss": 0.3646, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.33389528127403967, | |
| "grad_norm": 1.6361408233642578, | |
| "learning_rate": 0.00013342947037231254, | |
| "loss": 0.3917, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.3352046745339378, | |
| "grad_norm": 1.7627660036087036, | |
| "learning_rate": 0.0001331672784478238, | |
| "loss": 0.367, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.336514067793836, | |
| "grad_norm": 1.2431906461715698, | |
| "learning_rate": 0.00013290508652333508, | |
| "loss": 0.3708, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.33782346105373423, | |
| "grad_norm": 1.4763669967651367, | |
| "learning_rate": 0.00013264289459884634, | |
| "loss": 0.377, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.33913285431363244, | |
| "grad_norm": 2.1701712608337402, | |
| "learning_rate": 0.00013238070267435761, | |
| "loss": 0.344, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.3404422475735306, | |
| "grad_norm": 1.4388126134872437, | |
| "learning_rate": 0.0001321185107498689, | |
| "loss": 0.3556, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.3417516408334288, | |
| "grad_norm": 1.2981114387512207, | |
| "learning_rate": 0.00013185631882538018, | |
| "loss": 0.3272, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.343061034093327, | |
| "grad_norm": 1.539335012435913, | |
| "learning_rate": 0.00013159412690089145, | |
| "loss": 0.4132, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.3443704273532252, | |
| "grad_norm": 1.9272770881652832, | |
| "learning_rate": 0.00013133193497640272, | |
| "loss": 0.4121, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.3456798206131234, | |
| "grad_norm": 1.4415314197540283, | |
| "learning_rate": 0.000131069743051914, | |
| "loss": 0.3595, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.34698921387302156, | |
| "grad_norm": 1.3155860900878906, | |
| "learning_rate": 0.00013080755112742528, | |
| "loss": 0.3611, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.34829860713291977, | |
| "grad_norm": 1.507858157157898, | |
| "learning_rate": 0.00013054535920293655, | |
| "loss": 0.3813, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.349608000392818, | |
| "grad_norm": 1.5444693565368652, | |
| "learning_rate": 0.00013028316727844782, | |
| "loss": 0.3527, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.3509173936527162, | |
| "grad_norm": 1.4008456468582153, | |
| "learning_rate": 0.0001300209753539591, | |
| "loss": 0.3573, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.3522267869126144, | |
| "grad_norm": 1.6443661451339722, | |
| "learning_rate": 0.00012975878342947039, | |
| "loss": 0.3885, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.35353618017251254, | |
| "grad_norm": 1.513431429862976, | |
| "learning_rate": 0.00012949659150498165, | |
| "loss": 0.3332, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.35484557343241074, | |
| "grad_norm": 1.6663899421691895, | |
| "learning_rate": 0.00012923439958049292, | |
| "loss": 0.3769, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.35615496669230895, | |
| "grad_norm": 1.2655925750732422, | |
| "learning_rate": 0.0001289722076560042, | |
| "loss": 0.4177, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.35746435995220716, | |
| "grad_norm": 1.324833869934082, | |
| "learning_rate": 0.00012871001573151546, | |
| "loss": 0.3501, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.35877375321210536, | |
| "grad_norm": 1.4842655658721924, | |
| "learning_rate": 0.00012844782380702676, | |
| "loss": 0.3223, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.3600831464720035, | |
| "grad_norm": 1.4087761640548706, | |
| "learning_rate": 0.00012818563188253803, | |
| "loss": 0.3308, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.3613925397319017, | |
| "grad_norm": 1.7493972778320312, | |
| "learning_rate": 0.0001279234399580493, | |
| "loss": 0.3655, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.3627019329917999, | |
| "grad_norm": 1.4829336404800415, | |
| "learning_rate": 0.00012766124803356056, | |
| "loss": 0.3674, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.36401132625169813, | |
| "grad_norm": 1.39944589138031, | |
| "learning_rate": 0.00012739905610907186, | |
| "loss": 0.3285, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.36532071951159634, | |
| "grad_norm": 1.5995631217956543, | |
| "learning_rate": 0.00012713686418458313, | |
| "loss": 0.3431, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.3666301127714945, | |
| "grad_norm": 1.0113691091537476, | |
| "learning_rate": 0.0001268746722600944, | |
| "loss": 0.3389, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.3679395060313927, | |
| "grad_norm": 1.6544948816299438, | |
| "learning_rate": 0.00012661248033560567, | |
| "loss": 0.323, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.3692488992912909, | |
| "grad_norm": 1.8022606372833252, | |
| "learning_rate": 0.00012635028841111694, | |
| "loss": 0.3777, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.3705582925511891, | |
| "grad_norm": 1.6005665063858032, | |
| "learning_rate": 0.00012608809648662823, | |
| "loss": 0.3482, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.3718676858110873, | |
| "grad_norm": 1.2550064325332642, | |
| "learning_rate": 0.0001258259045621395, | |
| "loss": 0.3288, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.37317707907098546, | |
| "grad_norm": 2.43110728263855, | |
| "learning_rate": 0.00012556371263765077, | |
| "loss": 0.3511, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.37448647233088367, | |
| "grad_norm": 1.5041906833648682, | |
| "learning_rate": 0.00012530152071316204, | |
| "loss": 0.3578, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.3757958655907819, | |
| "grad_norm": 1.6031140089035034, | |
| "learning_rate": 0.0001250393287886733, | |
| "loss": 0.3213, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.3771052588506801, | |
| "grad_norm": 1.025795817375183, | |
| "learning_rate": 0.0001247771368641846, | |
| "loss": 0.3352, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.3784146521105783, | |
| "grad_norm": 1.934812068939209, | |
| "learning_rate": 0.00012451494493969587, | |
| "loss": 0.3365, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.37972404537047644, | |
| "grad_norm": 1.0730398893356323, | |
| "learning_rate": 0.00012425275301520714, | |
| "loss": 0.3365, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.38103343863037464, | |
| "grad_norm": 1.3496712446212769, | |
| "learning_rate": 0.0001239905610907184, | |
| "loss": 0.3548, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.38234283189027285, | |
| "grad_norm": 1.3053911924362183, | |
| "learning_rate": 0.0001237283691662297, | |
| "loss": 0.3563, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.38365222515017106, | |
| "grad_norm": 1.3640882968902588, | |
| "learning_rate": 0.00012346617724174098, | |
| "loss": 0.365, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.38496161841006926, | |
| "grad_norm": 1.3266191482543945, | |
| "learning_rate": 0.00012320398531725225, | |
| "loss": 0.2981, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.3862710116699674, | |
| "grad_norm": 1.32815682888031, | |
| "learning_rate": 0.00012294179339276352, | |
| "loss": 0.3544, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.3875804049298656, | |
| "grad_norm": 1.4236459732055664, | |
| "learning_rate": 0.00012267960146827479, | |
| "loss": 0.3095, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.3888897981897638, | |
| "grad_norm": 1.1536756753921509, | |
| "learning_rate": 0.00012241740954378605, | |
| "loss": 0.3125, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.39019919144966203, | |
| "grad_norm": 1.4237791299819946, | |
| "learning_rate": 0.00012215521761929732, | |
| "loss": 0.3207, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.3915085847095602, | |
| "grad_norm": 1.4023237228393555, | |
| "learning_rate": 0.0001218930256948086, | |
| "loss": 0.3714, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.3928179779694584, | |
| "grad_norm": 1.3556010723114014, | |
| "learning_rate": 0.00012163083377031987, | |
| "loss": 0.3313, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.3941273712293566, | |
| "grad_norm": 1.2301980257034302, | |
| "learning_rate": 0.00012136864184583114, | |
| "loss": 0.3062, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.3954367644892548, | |
| "grad_norm": 1.3532170057296753, | |
| "learning_rate": 0.00012110644992134244, | |
| "loss": 0.2946, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.396746157749153, | |
| "grad_norm": 1.2680764198303223, | |
| "learning_rate": 0.00012084425799685371, | |
| "loss": 0.3005, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.39805555100905116, | |
| "grad_norm": 1.5346810817718506, | |
| "learning_rate": 0.00012058206607236498, | |
| "loss": 0.3363, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.39936494426894936, | |
| "grad_norm": 1.423195242881775, | |
| "learning_rate": 0.00012031987414787625, | |
| "loss": 0.3294, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.40067433752884757, | |
| "grad_norm": 1.599571704864502, | |
| "learning_rate": 0.00012005768222338753, | |
| "loss": 0.3469, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.4019837307887458, | |
| "grad_norm": 1.2103453874588013, | |
| "learning_rate": 0.0001197954902988988, | |
| "loss": 0.2827, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.403293124048644, | |
| "grad_norm": 1.3197276592254639, | |
| "learning_rate": 0.00011953329837441007, | |
| "loss": 0.3194, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.40460251730854213, | |
| "grad_norm": 1.291038990020752, | |
| "learning_rate": 0.00011927110644992135, | |
| "loss": 0.2798, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.40591191056844034, | |
| "grad_norm": 1.1556978225708008, | |
| "learning_rate": 0.00011900891452543262, | |
| "loss": 0.3318, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.40722130382833854, | |
| "grad_norm": 1.3520278930664062, | |
| "learning_rate": 0.0001187467226009439, | |
| "loss": 0.3222, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.40853069708823675, | |
| "grad_norm": 1.0671277046203613, | |
| "learning_rate": 0.00011848453067645517, | |
| "loss": 0.268, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.40984009034813496, | |
| "grad_norm": 1.442131757736206, | |
| "learning_rate": 0.00011822233875196644, | |
| "loss": 0.3028, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.4111494836080331, | |
| "grad_norm": 1.5673497915267944, | |
| "learning_rate": 0.00011796014682747771, | |
| "loss": 0.31, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.4124588768679313, | |
| "grad_norm": 1.2009717226028442, | |
| "learning_rate": 0.00011769795490298898, | |
| "loss": 0.2986, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.4137682701278295, | |
| "grad_norm": 1.2754930257797241, | |
| "learning_rate": 0.00011743576297850027, | |
| "loss": 0.3352, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.4150776633877277, | |
| "grad_norm": 1.6189430952072144, | |
| "learning_rate": 0.00011717357105401154, | |
| "loss": 0.3804, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.41638705664762593, | |
| "grad_norm": 1.6117827892303467, | |
| "learning_rate": 0.00011691137912952281, | |
| "loss": 0.3239, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.4176964499075241, | |
| "grad_norm": 1.7495907545089722, | |
| "learning_rate": 0.00011664918720503408, | |
| "loss": 0.3145, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.4190058431674223, | |
| "grad_norm": 1.2301905155181885, | |
| "learning_rate": 0.00011638699528054538, | |
| "loss": 0.2776, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.4203152364273205, | |
| "grad_norm": 1.3571341037750244, | |
| "learning_rate": 0.00011612480335605665, | |
| "loss": 0.3019, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.4216246296872187, | |
| "grad_norm": 0.9271483421325684, | |
| "learning_rate": 0.00011586261143156792, | |
| "loss": 0.2929, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.4229340229471169, | |
| "grad_norm": 1.294146180152893, | |
| "learning_rate": 0.00011560041950707918, | |
| "loss": 0.3095, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.42424341620701506, | |
| "grad_norm": 1.5177209377288818, | |
| "learning_rate": 0.00011533822758259045, | |
| "loss": 0.2714, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.42555280946691326, | |
| "grad_norm": 1.1218962669372559, | |
| "learning_rate": 0.00011507603565810175, | |
| "loss": 0.282, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.42686220272681147, | |
| "grad_norm": 1.2807728052139282, | |
| "learning_rate": 0.00011481384373361302, | |
| "loss": 0.3461, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.4281715959867097, | |
| "grad_norm": 1.1680692434310913, | |
| "learning_rate": 0.00011455165180912429, | |
| "loss": 0.2842, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.4294809892466079, | |
| "grad_norm": 1.6534638404846191, | |
| "learning_rate": 0.00011428945988463556, | |
| "loss": 0.2774, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.43079038250650603, | |
| "grad_norm": 1.2321938276290894, | |
| "learning_rate": 0.00011402726796014683, | |
| "loss": 0.2841, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.43209977576640424, | |
| "grad_norm": 1.6666522026062012, | |
| "learning_rate": 0.00011376507603565811, | |
| "loss": 0.2993, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.43340916902630244, | |
| "grad_norm": 1.8330938816070557, | |
| "learning_rate": 0.00011350288411116938, | |
| "loss": 0.2834, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.43471856228620065, | |
| "grad_norm": 1.570809245109558, | |
| "learning_rate": 0.00011324069218668065, | |
| "loss": 0.2885, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.4360279555460988, | |
| "grad_norm": 1.4093183279037476, | |
| "learning_rate": 0.00011297850026219192, | |
| "loss": 0.2872, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.437337348805997, | |
| "grad_norm": 0.8298211097717285, | |
| "learning_rate": 0.00011271630833770321, | |
| "loss": 0.2884, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.4386467420658952, | |
| "grad_norm": 1.1143261194229126, | |
| "learning_rate": 0.00011245411641321448, | |
| "loss": 0.279, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.4399561353257934, | |
| "grad_norm": 1.1568537950515747, | |
| "learning_rate": 0.00011219192448872575, | |
| "loss": 0.2724, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.4412655285856916, | |
| "grad_norm": 0.8700618147850037, | |
| "learning_rate": 0.00011192973256423702, | |
| "loss": 0.2563, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.4425749218455898, | |
| "grad_norm": 0.974319577217102, | |
| "learning_rate": 0.00011166754063974829, | |
| "loss": 0.2864, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.443884315105488, | |
| "grad_norm": 0.9288910031318665, | |
| "learning_rate": 0.00011140534871525958, | |
| "loss": 0.2717, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.4451937083653862, | |
| "grad_norm": 1.0942648649215698, | |
| "learning_rate": 0.00011114315679077085, | |
| "loss": 0.2625, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.4465031016252844, | |
| "grad_norm": 1.3224159479141235, | |
| "learning_rate": 0.00011088096486628212, | |
| "loss": 0.2719, | |
| "step": 3410 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 7638, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 10, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.8131850187780976e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |