| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9966329966329965, | |
| "eval_steps": 500, | |
| "global_step": 555, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0053872053872053875, | |
| "grad_norm": 21.266149520874023, | |
| "learning_rate": 0.0, | |
| "loss": 0.9993, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.010774410774410775, | |
| "grad_norm": 21.13385009765625, | |
| "learning_rate": 5.882352941176471e-07, | |
| "loss": 1.0245, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.01616161616161616, | |
| "grad_norm": 20.182464599609375, | |
| "learning_rate": 1.1764705882352942e-06, | |
| "loss": 0.9562, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.02154882154882155, | |
| "grad_norm": 18.727153778076172, | |
| "learning_rate": 1.7647058823529414e-06, | |
| "loss": 0.9445, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.026936026936026935, | |
| "grad_norm": 16.479658126831055, | |
| "learning_rate": 2.3529411764705885e-06, | |
| "loss": 0.9854, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.03232323232323232, | |
| "grad_norm": 10.075958251953125, | |
| "learning_rate": 2.9411764705882355e-06, | |
| "loss": 0.7675, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.03771043771043771, | |
| "grad_norm": 8.65888500213623, | |
| "learning_rate": 3.529411764705883e-06, | |
| "loss": 0.7297, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0430976430976431, | |
| "grad_norm": 8.33163070678711, | |
| "learning_rate": 4.11764705882353e-06, | |
| "loss": 0.6672, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.048484848484848485, | |
| "grad_norm": 7.000586032867432, | |
| "learning_rate": 4.705882352941177e-06, | |
| "loss": 0.6599, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.05387205387205387, | |
| "grad_norm": 6.877265930175781, | |
| "learning_rate": 5.294117647058824e-06, | |
| "loss": 0.5728, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05925925925925926, | |
| "grad_norm": 6.868885040283203, | |
| "learning_rate": 5.882352941176471e-06, | |
| "loss": 0.6731, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.06464646464646465, | |
| "grad_norm": 6.862372875213623, | |
| "learning_rate": 6.470588235294119e-06, | |
| "loss": 0.7126, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.07003367003367003, | |
| "grad_norm": 6.195284843444824, | |
| "learning_rate": 7.058823529411766e-06, | |
| "loss": 0.644, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.07542087542087542, | |
| "grad_norm": 6.2631120681762695, | |
| "learning_rate": 7.647058823529411e-06, | |
| "loss": 0.5753, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.08080808080808081, | |
| "grad_norm": 5.94320011138916, | |
| "learning_rate": 8.23529411764706e-06, | |
| "loss": 0.6584, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0861952861952862, | |
| "grad_norm": 5.2665205001831055, | |
| "learning_rate": 8.823529411764707e-06, | |
| "loss": 0.6102, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.09158249158249158, | |
| "grad_norm": 5.388559341430664, | |
| "learning_rate": 9.411764705882354e-06, | |
| "loss": 0.6127, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.09696969696969697, | |
| "grad_norm": 5.109943866729736, | |
| "learning_rate": 1e-05, | |
| "loss": 0.6259, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.10235690235690235, | |
| "grad_norm": 4.631857395172119, | |
| "learning_rate": 9.999914754008063e-06, | |
| "loss": 0.6064, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.10774410774410774, | |
| "grad_norm": 4.755272388458252, | |
| "learning_rate": 9.999659018938999e-06, | |
| "loss": 0.5934, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.11313131313131314, | |
| "grad_norm": 4.383729934692383, | |
| "learning_rate": 9.999232803512967e-06, | |
| "loss": 0.6137, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.11851851851851852, | |
| "grad_norm": 4.2614593505859375, | |
| "learning_rate": 9.998636122263227e-06, | |
| "loss": 0.5914, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.12390572390572391, | |
| "grad_norm": 4.656721591949463, | |
| "learning_rate": 9.997868995535658e-06, | |
| "loss": 0.599, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.1292929292929293, | |
| "grad_norm": 4.374063491821289, | |
| "learning_rate": 9.996931449488046e-06, | |
| "loss": 0.6489, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.13468013468013468, | |
| "grad_norm": 4.434129238128662, | |
| "learning_rate": 9.99582351608921e-06, | |
| "loss": 0.5895, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.14006734006734006, | |
| "grad_norm": 4.682045936584473, | |
| "learning_rate": 9.994545233117904e-06, | |
| "loss": 0.6253, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.14545454545454545, | |
| "grad_norm": 4.347814559936523, | |
| "learning_rate": 9.993096644161526e-06, | |
| "loss": 0.617, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.15084175084175083, | |
| "grad_norm": 4.48855447769165, | |
| "learning_rate": 9.991477798614638e-06, | |
| "loss": 0.6468, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.15622895622895622, | |
| "grad_norm": 4.433114528656006, | |
| "learning_rate": 9.989688751677277e-06, | |
| "loss": 0.6084, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.16161616161616163, | |
| "grad_norm": 3.879382610321045, | |
| "learning_rate": 9.987729564353077e-06, | |
| "loss": 0.5468, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.16700336700336701, | |
| "grad_norm": 4.3543009757995605, | |
| "learning_rate": 9.985600303447185e-06, | |
| "loss": 0.6268, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.1723905723905724, | |
| "grad_norm": 6.505020618438721, | |
| "learning_rate": 9.98330104156398e-06, | |
| "loss": 0.5947, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.17777777777777778, | |
| "grad_norm": 4.169903755187988, | |
| "learning_rate": 9.980831857104612e-06, | |
| "loss": 0.574, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.18316498316498317, | |
| "grad_norm": 4.362861633300781, | |
| "learning_rate": 9.978192834264307e-06, | |
| "loss": 0.5851, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.18855218855218855, | |
| "grad_norm": 3.7690815925598145, | |
| "learning_rate": 9.975384063029516e-06, | |
| "loss": 0.6023, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.19393939393939394, | |
| "grad_norm": 4.335365295410156, | |
| "learning_rate": 9.972405639174833e-06, | |
| "loss": 0.6267, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.19932659932659932, | |
| "grad_norm": 4.149550914764404, | |
| "learning_rate": 9.96925766425974e-06, | |
| "loss": 0.599, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.2047138047138047, | |
| "grad_norm": 4.021537780761719, | |
| "learning_rate": 9.965940245625131e-06, | |
| "loss": 0.5859, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.2101010101010101, | |
| "grad_norm": 4.439505100250244, | |
| "learning_rate": 9.962453496389665e-06, | |
| "loss": 0.5895, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.21548821548821548, | |
| "grad_norm": 4.288372039794922, | |
| "learning_rate": 9.958797535445898e-06, | |
| "loss": 0.6212, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.22087542087542086, | |
| "grad_norm": 4.0634260177612305, | |
| "learning_rate": 9.95497248745624e-06, | |
| "loss": 0.6061, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.22626262626262628, | |
| "grad_norm": 4.286866188049316, | |
| "learning_rate": 9.950978482848694e-06, | |
| "loss": 0.6458, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.23164983164983166, | |
| "grad_norm": 3.877549409866333, | |
| "learning_rate": 9.946815657812416e-06, | |
| "loss": 0.5868, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.23703703703703705, | |
| "grad_norm": 4.321531295776367, | |
| "learning_rate": 9.94248415429306e-06, | |
| "loss": 0.6158, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.24242424242424243, | |
| "grad_norm": 3.8047635555267334, | |
| "learning_rate": 9.937984119987958e-06, | |
| "loss": 0.5437, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.24781144781144782, | |
| "grad_norm": 4.01943826675415, | |
| "learning_rate": 9.93331570834106e-06, | |
| "loss": 0.5668, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.2531986531986532, | |
| "grad_norm": 4.549412250518799, | |
| "learning_rate": 9.928479078537722e-06, | |
| "loss": 0.6271, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.2585858585858586, | |
| "grad_norm": 3.865027904510498, | |
| "learning_rate": 9.923474395499266e-06, | |
| "loss": 0.6187, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.26397306397306397, | |
| "grad_norm": 3.9334516525268555, | |
| "learning_rate": 9.91830182987736e-06, | |
| "loss": 0.614, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.26936026936026936, | |
| "grad_norm": 3.9490811824798584, | |
| "learning_rate": 9.912961558048196e-06, | |
| "loss": 0.5716, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.27474747474747474, | |
| "grad_norm": 3.834277391433716, | |
| "learning_rate": 9.907453762106484e-06, | |
| "loss": 0.5145, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.2801346801346801, | |
| "grad_norm": 3.9712698459625244, | |
| "learning_rate": 9.901778629859236e-06, | |
| "loss": 0.627, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.2855218855218855, | |
| "grad_norm": 4.146055698394775, | |
| "learning_rate": 9.895936354819362e-06, | |
| "loss": 0.5962, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.2909090909090909, | |
| "grad_norm": 4.930230140686035, | |
| "learning_rate": 9.889927136199075e-06, | |
| "loss": 0.5974, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.2962962962962963, | |
| "grad_norm": 4.270641803741455, | |
| "learning_rate": 9.883751178903095e-06, | |
| "loss": 0.6245, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.30168350168350166, | |
| "grad_norm": 8.589272499084473, | |
| "learning_rate": 9.877408693521664e-06, | |
| "loss": 0.6359, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.30707070707070705, | |
| "grad_norm": 8.13204288482666, | |
| "learning_rate": 9.870899896323368e-06, | |
| "loss": 0.6429, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.31245791245791243, | |
| "grad_norm": 7.613426208496094, | |
| "learning_rate": 9.864225009247753e-06, | |
| "loss": 0.577, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.3178451178451178, | |
| "grad_norm": 4.240153789520264, | |
| "learning_rate": 9.857384259897768e-06, | |
| "loss": 0.6715, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.32323232323232326, | |
| "grad_norm": 3.9827535152435303, | |
| "learning_rate": 9.850377881532e-06, | |
| "loss": 0.5256, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.32861952861952864, | |
| "grad_norm": 5.192502975463867, | |
| "learning_rate": 9.843206113056715e-06, | |
| "loss": 0.5537, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.33400673400673403, | |
| "grad_norm": 3.669801950454712, | |
| "learning_rate": 9.835869199017725e-06, | |
| "loss": 0.6018, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.3393939393939394, | |
| "grad_norm": 4.642088890075684, | |
| "learning_rate": 9.828367389592034e-06, | |
| "loss": 0.5001, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.3447811447811448, | |
| "grad_norm": 3.983962297439575, | |
| "learning_rate": 9.820700940579312e-06, | |
| "loss": 0.624, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.3501683501683502, | |
| "grad_norm": 3.97925066947937, | |
| "learning_rate": 9.812870113393185e-06, | |
| "loss": 0.5945, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.35555555555555557, | |
| "grad_norm": 4.082148551940918, | |
| "learning_rate": 9.804875175052304e-06, | |
| "loss": 0.5847, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.36094276094276095, | |
| "grad_norm": 3.4573113918304443, | |
| "learning_rate": 9.796716398171248e-06, | |
| "loss": 0.5016, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.36632996632996634, | |
| "grad_norm": 3.9368677139282227, | |
| "learning_rate": 9.788394060951228e-06, | |
| "loss": 0.5582, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.3717171717171717, | |
| "grad_norm": 3.7513315677642822, | |
| "learning_rate": 9.779908447170602e-06, | |
| "loss": 0.5525, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.3771043771043771, | |
| "grad_norm": 3.7674131393432617, | |
| "learning_rate": 9.771259846175195e-06, | |
| "loss": 0.5577, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.3824915824915825, | |
| "grad_norm": 3.596757650375366, | |
| "learning_rate": 9.762448552868433e-06, | |
| "loss": 0.553, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.3878787878787879, | |
| "grad_norm": 3.4366366863250732, | |
| "learning_rate": 9.753474867701294e-06, | |
| "loss": 0.533, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.39326599326599326, | |
| "grad_norm": 3.8846004009246826, | |
| "learning_rate": 9.744339096662056e-06, | |
| "loss": 0.5755, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.39865319865319865, | |
| "grad_norm": 3.593231439590454, | |
| "learning_rate": 9.735041551265862e-06, | |
| "loss": 0.5424, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.40404040404040403, | |
| "grad_norm": 3.5270259380340576, | |
| "learning_rate": 9.725582548544106e-06, | |
| "loss": 0.5218, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.4094276094276094, | |
| "grad_norm": 3.9130117893218994, | |
| "learning_rate": 9.715962411033614e-06, | |
| "loss": 0.5529, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.4148148148148148, | |
| "grad_norm": 3.5708324909210205, | |
| "learning_rate": 9.706181466765654e-06, | |
| "loss": 0.5047, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.4202020202020202, | |
| "grad_norm": 3.6041488647460938, | |
| "learning_rate": 9.696240049254744e-06, | |
| "loss": 0.4715, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.4255892255892256, | |
| "grad_norm": 3.532111644744873, | |
| "learning_rate": 9.686138497487282e-06, | |
| "loss": 0.5443, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.43097643097643096, | |
| "grad_norm": 3.3798911571502686, | |
| "learning_rate": 9.675877155909989e-06, | |
| "loss": 0.5196, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.43636363636363634, | |
| "grad_norm": 3.576612949371338, | |
| "learning_rate": 9.66545637441816e-06, | |
| "loss": 0.5593, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.4417508417508417, | |
| "grad_norm": 3.6367032527923584, | |
| "learning_rate": 9.654876508343739e-06, | |
| "loss": 0.5199, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.4471380471380471, | |
| "grad_norm": 4.221003532409668, | |
| "learning_rate": 9.644137918443198e-06, | |
| "loss": 0.5799, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.45252525252525255, | |
| "grad_norm": 3.6288747787475586, | |
| "learning_rate": 9.633240970885231e-06, | |
| "loss": 0.5702, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.45791245791245794, | |
| "grad_norm": 3.6418979167938232, | |
| "learning_rate": 9.622186037238286e-06, | |
| "loss": 0.5463, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.4632996632996633, | |
| "grad_norm": 3.5099191665649414, | |
| "learning_rate": 9.610973494457873e-06, | |
| "loss": 0.5871, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.4686868686868687, | |
| "grad_norm": 3.9148519039154053, | |
| "learning_rate": 9.599603724873725e-06, | |
| "loss": 0.6149, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.4740740740740741, | |
| "grad_norm": 3.3477306365966797, | |
| "learning_rate": 9.588077116176756e-06, | |
| "loss": 0.5618, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.4794612794612795, | |
| "grad_norm": 3.632464647293091, | |
| "learning_rate": 9.576394061405847e-06, | |
| "loss": 0.5747, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.48484848484848486, | |
| "grad_norm": 5.160216808319092, | |
| "learning_rate": 9.564554958934432e-06, | |
| "loss": 0.6318, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.49023569023569025, | |
| "grad_norm": 3.320161819458008, | |
| "learning_rate": 9.55256021245692e-06, | |
| "loss": 0.5472, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.49562289562289563, | |
| "grad_norm": 3.577775716781616, | |
| "learning_rate": 9.540410230974943e-06, | |
| "loss": 0.584, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.501010101010101, | |
| "grad_norm": 3.3152377605438232, | |
| "learning_rate": 9.52810542878339e-06, | |
| "loss": 0.5268, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.5063973063973064, | |
| "grad_norm": 3.468808889389038, | |
| "learning_rate": 9.515646225456283e-06, | |
| "loss": 0.6323, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.5117845117845118, | |
| "grad_norm": 5.061112880706787, | |
| "learning_rate": 9.503033045832484e-06, | |
| "loss": 0.5041, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.5171717171717172, | |
| "grad_norm": 5.695023059844971, | |
| "learning_rate": 9.490266320001195e-06, | |
| "loss": 0.5678, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.5225589225589226, | |
| "grad_norm": 4.4895920753479, | |
| "learning_rate": 9.4773464832873e-06, | |
| "loss": 0.6127, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.5279461279461279, | |
| "grad_norm": 3.6477298736572266, | |
| "learning_rate": 9.464273976236518e-06, | |
| "loss": 0.539, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "grad_norm": 5.325118541717529, | |
| "learning_rate": 9.451049244600381e-06, | |
| "loss": 0.5428, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.5387205387205387, | |
| "grad_norm": 3.778438091278076, | |
| "learning_rate": 9.437672739321034e-06, | |
| "loss": 0.5781, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5441077441077441, | |
| "grad_norm": 3.363888740539551, | |
| "learning_rate": 9.424144916515863e-06, | |
| "loss": 0.5424, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.5494949494949495, | |
| "grad_norm": 3.4057974815368652, | |
| "learning_rate": 9.410466237461937e-06, | |
| "loss": 0.527, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.5548821548821549, | |
| "grad_norm": 3.555009126663208, | |
| "learning_rate": 9.396637168580282e-06, | |
| "loss": 0.5645, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.5602693602693603, | |
| "grad_norm": 3.691166639328003, | |
| "learning_rate": 9.382658181419977e-06, | |
| "loss": 0.5689, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.5656565656565656, | |
| "grad_norm": 3.210749626159668, | |
| "learning_rate": 9.36852975264207e-06, | |
| "loss": 0.4849, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.571043771043771, | |
| "grad_norm": 3.507824659347534, | |
| "learning_rate": 9.354252364003334e-06, | |
| "loss": 0.5872, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.5764309764309764, | |
| "grad_norm": 3.4085872173309326, | |
| "learning_rate": 9.339826502339828e-06, | |
| "loss": 0.5664, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.5818181818181818, | |
| "grad_norm": 3.474592924118042, | |
| "learning_rate": 9.32525265955031e-06, | |
| "loss": 0.5818, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.5872053872053872, | |
| "grad_norm": 3.5888025760650635, | |
| "learning_rate": 9.310531332579453e-06, | |
| "loss": 0.567, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.5925925925925926, | |
| "grad_norm": 3.412595510482788, | |
| "learning_rate": 9.295663023400907e-06, | |
| "loss": 0.5482, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.597979797979798, | |
| "grad_norm": 3.397404193878174, | |
| "learning_rate": 9.280648239000174e-06, | |
| "loss": 0.5572, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.6033670033670033, | |
| "grad_norm": 3.6878013610839844, | |
| "learning_rate": 9.265487491357334e-06, | |
| "loss": 0.6044, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.6087542087542087, | |
| "grad_norm": 3.4067952632904053, | |
| "learning_rate": 9.250181297429573e-06, | |
| "loss": 0.519, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.6141414141414141, | |
| "grad_norm": 3.6102547645568848, | |
| "learning_rate": 9.234730179133564e-06, | |
| "loss": 0.5897, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.6195286195286195, | |
| "grad_norm": 3.254011392593384, | |
| "learning_rate": 9.219134663327672e-06, | |
| "loss": 0.5444, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.6249158249158249, | |
| "grad_norm": 3.4662082195281982, | |
| "learning_rate": 9.203395281793979e-06, | |
| "loss": 0.5689, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.6303030303030303, | |
| "grad_norm": 3.225325345993042, | |
| "learning_rate": 9.187512571220166e-06, | |
| "loss": 0.4967, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.6356902356902356, | |
| "grad_norm": 3.3803765773773193, | |
| "learning_rate": 9.171487073181198e-06, | |
| "loss": 0.5245, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.641077441077441, | |
| "grad_norm": 3.078711748123169, | |
| "learning_rate": 9.155319334120864e-06, | |
| "loss": 0.4871, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.6464646464646465, | |
| "grad_norm": 3.5471031665802, | |
| "learning_rate": 9.139009905333147e-06, | |
| "loss": 0.5674, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.6518518518518519, | |
| "grad_norm": 3.0351247787475586, | |
| "learning_rate": 9.122559342943423e-06, | |
| "loss": 0.4854, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.6572390572390573, | |
| "grad_norm": 3.3814985752105713, | |
| "learning_rate": 9.105968207889493e-06, | |
| "loss": 0.5141, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.6626262626262627, | |
| "grad_norm": 3.2874019145965576, | |
| "learning_rate": 9.089237065902464e-06, | |
| "loss": 0.5255, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.6680134680134681, | |
| "grad_norm": 3.173571825027466, | |
| "learning_rate": 9.072366487487451e-06, | |
| "loss": 0.5269, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.6734006734006734, | |
| "grad_norm": 3.3994832038879395, | |
| "learning_rate": 9.055357047904133e-06, | |
| "loss": 0.5768, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.6787878787878788, | |
| "grad_norm": 3.376079797744751, | |
| "learning_rate": 9.038209327147134e-06, | |
| "loss": 0.6, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.6841750841750842, | |
| "grad_norm": 3.5709731578826904, | |
| "learning_rate": 9.020923909926233e-06, | |
| "loss": 0.6137, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.6895622895622896, | |
| "grad_norm": 3.0871469974517822, | |
| "learning_rate": 9.00350138564645e-06, | |
| "loss": 0.5537, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.694949494949495, | |
| "grad_norm": 2.978905200958252, | |
| "learning_rate": 8.985942348387926e-06, | |
| "loss": 0.4888, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.7003367003367004, | |
| "grad_norm": 3.196749687194824, | |
| "learning_rate": 8.968247396885685e-06, | |
| "loss": 0.5279, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.7057239057239058, | |
| "grad_norm": 3.2792575359344482, | |
| "learning_rate": 8.950417134509201e-06, | |
| "loss": 0.5749, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.7111111111111111, | |
| "grad_norm": 3.157092332839966, | |
| "learning_rate": 8.932452169241838e-06, | |
| "loss": 0.619, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.7164983164983165, | |
| "grad_norm": 3.2496225833892822, | |
| "learning_rate": 8.914353113660107e-06, | |
| "loss": 0.5495, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.7218855218855219, | |
| "grad_norm": 3.2431371212005615, | |
| "learning_rate": 8.89612058491279e-06, | |
| "loss": 0.5297, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 3.2148752212524414, | |
| "learning_rate": 8.877755204699883e-06, | |
| "loss": 0.5175, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.7326599326599327, | |
| "grad_norm": 3.1605641841888428, | |
| "learning_rate": 8.859257599251408e-06, | |
| "loss": 0.5848, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.7380471380471381, | |
| "grad_norm": 3.1001222133636475, | |
| "learning_rate": 8.840628399306056e-06, | |
| "loss": 0.539, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.7434343434343434, | |
| "grad_norm": 3.3802716732025146, | |
| "learning_rate": 8.821868240089676e-06, | |
| "loss": 0.5782, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.7488215488215488, | |
| "grad_norm": 3.0083656311035156, | |
| "learning_rate": 8.802977761293625e-06, | |
| "loss": 0.5314, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.7542087542087542, | |
| "grad_norm": 3.2978479862213135, | |
| "learning_rate": 8.783957607052941e-06, | |
| "loss": 0.548, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.7595959595959596, | |
| "grad_norm": 3.177548885345459, | |
| "learning_rate": 8.764808425924392e-06, | |
| "loss": 0.4653, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.764983164983165, | |
| "grad_norm": 3.2603986263275146, | |
| "learning_rate": 8.745530870864351e-06, | |
| "loss": 0.5768, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.7703703703703704, | |
| "grad_norm": 3.4270477294921875, | |
| "learning_rate": 8.726125599206543e-06, | |
| "loss": 0.5426, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.7757575757575758, | |
| "grad_norm": 3.006866693496704, | |
| "learning_rate": 8.706593272639616e-06, | |
| "loss": 0.5038, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.7811447811447811, | |
| "grad_norm": 3.9326441287994385, | |
| "learning_rate": 8.686934557184594e-06, | |
| "loss": 0.618, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.7865319865319865, | |
| "grad_norm": 3.3260936737060547, | |
| "learning_rate": 8.667150123172159e-06, | |
| "loss": 0.5245, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.7919191919191919, | |
| "grad_norm": 3.189055919647217, | |
| "learning_rate": 8.647240645219787e-06, | |
| "loss": 0.5403, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.7973063973063973, | |
| "grad_norm": 3.107164144515991, | |
| "learning_rate": 8.62720680220876e-06, | |
| "loss": 0.5292, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.8026936026936027, | |
| "grad_norm": 3.372941493988037, | |
| "learning_rate": 8.607049277261005e-06, | |
| "loss": 0.5486, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.8080808080808081, | |
| "grad_norm": 3.3730578422546387, | |
| "learning_rate": 8.586768757715806e-06, | |
| "loss": 0.5845, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.8134680134680135, | |
| "grad_norm": 3.1509501934051514, | |
| "learning_rate": 8.566365935106367e-06, | |
| "loss": 0.5266, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.8188552188552188, | |
| "grad_norm": 3.464965581893921, | |
| "learning_rate": 8.545841505136224e-06, | |
| "loss": 0.5701, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.8242424242424242, | |
| "grad_norm": 3.0586905479431152, | |
| "learning_rate": 8.525196167655539e-06, | |
| "loss": 0.4934, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.8296296296296296, | |
| "grad_norm": 3.1889281272888184, | |
| "learning_rate": 8.504430626637215e-06, | |
| "loss": 0.5937, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.835016835016835, | |
| "grad_norm": 3.2143123149871826, | |
| "learning_rate": 8.483545590152915e-06, | |
| "loss": 0.5358, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.8404040404040404, | |
| "grad_norm": 3.3132236003875732, | |
| "learning_rate": 8.462541770348896e-06, | |
| "loss": 0.5258, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.8457912457912458, | |
| "grad_norm": 3.310232400894165, | |
| "learning_rate": 8.441419883421742e-06, | |
| "loss": 0.5908, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.8511784511784511, | |
| "grad_norm": 3.13468599319458, | |
| "learning_rate": 8.42018064959393e-06, | |
| "loss": 0.4796, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.8565656565656565, | |
| "grad_norm": 3.0902316570281982, | |
| "learning_rate": 8.398824793089287e-06, | |
| "loss": 0.5082, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.8619528619528619, | |
| "grad_norm": 3.193399429321289, | |
| "learning_rate": 8.377353042108278e-06, | |
| "loss": 0.5388, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.8673400673400673, | |
| "grad_norm": 3.0939056873321533, | |
| "learning_rate": 8.355766128803192e-06, | |
| "loss": 0.4641, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.8727272727272727, | |
| "grad_norm": 3.229541540145874, | |
| "learning_rate": 8.334064789253157e-06, | |
| "loss": 0.5247, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.8781144781144781, | |
| "grad_norm": 3.2554848194122314, | |
| "learning_rate": 8.312249763439066e-06, | |
| "loss": 0.5491, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.8835016835016835, | |
| "grad_norm": 3.2184009552001953, | |
| "learning_rate": 8.29032179521832e-06, | |
| "loss": 0.6099, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 3.2048168182373047, | |
| "learning_rate": 8.268281632299483e-06, | |
| "loss": 0.4963, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.8942760942760942, | |
| "grad_norm": 3.0308377742767334, | |
| "learning_rate": 8.246130026216777e-06, | |
| "loss": 0.5222, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.8996632996632996, | |
| "grad_norm": 3.189265012741089, | |
| "learning_rate": 8.22386773230445e-06, | |
| "loss": 0.4913, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.9050505050505051, | |
| "grad_norm": 3.2512941360473633, | |
| "learning_rate": 8.201495509671036e-06, | |
| "loss": 0.5717, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.9104377104377105, | |
| "grad_norm": 3.2874414920806885, | |
| "learning_rate": 8.179014121173461e-06, | |
| "loss": 0.5334, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.9158249158249159, | |
| "grad_norm": 3.307884931564331, | |
| "learning_rate": 8.156424333391026e-06, | |
| "loss": 0.5617, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.9212121212121213, | |
| "grad_norm": 3.0463500022888184, | |
| "learning_rate": 8.13372691659928e-06, | |
| "loss": 0.5305, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.9265993265993266, | |
| "grad_norm": 3.3068511486053467, | |
| "learning_rate": 8.110922644743747e-06, | |
| "loss": 0.549, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.931986531986532, | |
| "grad_norm": 3.1428866386413574, | |
| "learning_rate": 8.088012295413536e-06, | |
| "loss": 0.4856, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.9373737373737374, | |
| "grad_norm": 2.96205997467041, | |
| "learning_rate": 8.064996649814826e-06, | |
| "loss": 0.4599, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.9427609427609428, | |
| "grad_norm": 3.3233330249786377, | |
| "learning_rate": 8.041876492744239e-06, | |
| "loss": 0.5505, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.9481481481481482, | |
| "grad_norm": 3.2451870441436768, | |
| "learning_rate": 8.018652612562061e-06, | |
| "loss": 0.4739, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.9535353535353536, | |
| "grad_norm": 3.231306791305542, | |
| "learning_rate": 7.99532580116537e-06, | |
| "loss": 0.5119, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.958922558922559, | |
| "grad_norm": 3.147303342819214, | |
| "learning_rate": 7.971896853961043e-06, | |
| "loss": 0.496, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.9643097643097643, | |
| "grad_norm": 3.530423641204834, | |
| "learning_rate": 7.948366569838612e-06, | |
| "loss": 0.6025, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.9696969696969697, | |
| "grad_norm": 3.5202131271362305, | |
| "learning_rate": 7.924735751143044e-06, | |
| "loss": 0.4822, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.9750841750841751, | |
| "grad_norm": 3.288405656814575, | |
| "learning_rate": 7.901005203647373e-06, | |
| "loss": 0.5393, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.9804713804713805, | |
| "grad_norm": 3.291487693786621, | |
| "learning_rate": 7.877175736525217e-06, | |
| "loss": 0.6146, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.9858585858585859, | |
| "grad_norm": 2.933931350708008, | |
| "learning_rate": 7.853248162323208e-06, | |
| "loss": 0.4874, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.9912457912457913, | |
| "grad_norm": 3.0823869705200195, | |
| "learning_rate": 7.829223296933259e-06, | |
| "loss": 0.5756, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.9966329966329966, | |
| "grad_norm": 2.960385799407959, | |
| "learning_rate": 7.805101959564768e-06, | |
| "loss": 0.4738, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.0053872053872055, | |
| "grad_norm": 5.235624313354492, | |
| "learning_rate": 7.780884972716663e-06, | |
| "loss": 0.8368, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 1.0107744107744108, | |
| "grad_norm": 2.6471757888793945, | |
| "learning_rate": 7.75657316214937e-06, | |
| "loss": 0.2894, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 1.0161616161616163, | |
| "grad_norm": 2.7157034873962402, | |
| "learning_rate": 7.732167356856656e-06, | |
| "loss": 0.3068, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 1.0215488215488215, | |
| "grad_norm": 2.7159922122955322, | |
| "learning_rate": 7.70766838903735e-06, | |
| "loss": 0.3193, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 1.026936026936027, | |
| "grad_norm": 2.4954445362091064, | |
| "learning_rate": 7.683077094066981e-06, | |
| "loss": 0.2827, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.0323232323232323, | |
| "grad_norm": 2.3092992305755615, | |
| "learning_rate": 7.65839431046928e-06, | |
| "loss": 0.253, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 1.0377104377104378, | |
| "grad_norm": 2.994446039199829, | |
| "learning_rate": 7.63362087988759e-06, | |
| "loss": 0.2969, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.043097643097643, | |
| "grad_norm": 2.7718987464904785, | |
| "learning_rate": 7.608757647056186e-06, | |
| "loss": 0.2913, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 1.0484848484848486, | |
| "grad_norm": 2.769294500350952, | |
| "learning_rate": 7.583805459771443e-06, | |
| "loss": 0.2704, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 1.0538720538720538, | |
| "grad_norm": 3.459955930709839, | |
| "learning_rate": 7.5587651688629405e-06, | |
| "loss": 0.3051, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.0592592592592593, | |
| "grad_norm": 3.5698342323303223, | |
| "learning_rate": 7.533637628164456e-06, | |
| "loss": 0.2757, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 1.0646464646464646, | |
| "grad_norm": 3.165423631668091, | |
| "learning_rate": 7.508423694484841e-06, | |
| "loss": 0.2811, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 1.0700336700336701, | |
| "grad_norm": 3.1055243015289307, | |
| "learning_rate": 7.483124227578811e-06, | |
| "loss": 0.2594, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.0754208754208754, | |
| "grad_norm": 3.1683449745178223, | |
| "learning_rate": 7.457740090117627e-06, | |
| "loss": 0.3102, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 1.0808080808080809, | |
| "grad_norm": 3.477832317352295, | |
| "learning_rate": 7.432272147659678e-06, | |
| "loss": 0.3035, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.0861952861952862, | |
| "grad_norm": 2.9576289653778076, | |
| "learning_rate": 7.406721268620975e-06, | |
| "loss": 0.2653, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.0915824915824917, | |
| "grad_norm": 2.561279773712158, | |
| "learning_rate": 7.381088324245526e-06, | |
| "loss": 0.2485, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.096969696969697, | |
| "grad_norm": 3.1348936557769775, | |
| "learning_rate": 7.355374188575639e-06, | |
| "loss": 0.2715, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 1.1023569023569024, | |
| "grad_norm": 2.7675235271453857, | |
| "learning_rate": 7.3295797384221156e-06, | |
| "loss": 0.2805, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.1077441077441077, | |
| "grad_norm": 3.367643117904663, | |
| "learning_rate": 7.303705853334353e-06, | |
| "loss": 0.2897, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.1131313131313132, | |
| "grad_norm": 3.007518768310547, | |
| "learning_rate": 7.277753415570349e-06, | |
| "loss": 0.2699, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.1185185185185185, | |
| "grad_norm": 2.9317398071289062, | |
| "learning_rate": 7.2517233100666255e-06, | |
| "loss": 0.3282, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.123905723905724, | |
| "grad_norm": 3.2546324729919434, | |
| "learning_rate": 7.225616424408045e-06, | |
| "loss": 0.2947, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.1292929292929292, | |
| "grad_norm": 2.954130172729492, | |
| "learning_rate": 7.199433648797558e-06, | |
| "loss": 0.2994, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.1346801346801347, | |
| "grad_norm": 2.7771804332733154, | |
| "learning_rate": 7.1731758760258315e-06, | |
| "loss": 0.229, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.14006734006734, | |
| "grad_norm": 2.771481990814209, | |
| "learning_rate": 7.146844001440823e-06, | |
| "loss": 0.2725, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.1454545454545455, | |
| "grad_norm": 2.742431402206421, | |
| "learning_rate": 7.120438922917237e-06, | |
| "loss": 0.2514, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.1508417508417508, | |
| "grad_norm": 2.6713271141052246, | |
| "learning_rate": 7.09396154082592e-06, | |
| "loss": 0.2485, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.1562289562289563, | |
| "grad_norm": 2.492274284362793, | |
| "learning_rate": 7.067412758003154e-06, | |
| "loss": 0.2278, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.1616161616161615, | |
| "grad_norm": 2.8618505001068115, | |
| "learning_rate": 7.040793479719864e-06, | |
| "loss": 0.2854, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.167003367003367, | |
| "grad_norm": 2.6601178646087646, | |
| "learning_rate": 7.014104613650767e-06, | |
| "loss": 0.2966, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.1723905723905723, | |
| "grad_norm": 3.3377082347869873, | |
| "learning_rate": 6.987347069843406e-06, | |
| "loss": 0.3149, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.1777777777777778, | |
| "grad_norm": 2.778550863265991, | |
| "learning_rate": 6.96052176068713e-06, | |
| "loss": 0.2543, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.183164983164983, | |
| "grad_norm": 3.040800094604492, | |
| "learning_rate": 6.93362960088197e-06, | |
| "loss": 0.2438, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.1885521885521886, | |
| "grad_norm": 2.9394142627716064, | |
| "learning_rate": 6.906671507407463e-06, | |
| "loss": 0.2304, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.1939393939393939, | |
| "grad_norm": 2.869964122772217, | |
| "learning_rate": 6.879648399491376e-06, | |
| "loss": 0.2984, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.1993265993265994, | |
| "grad_norm": 2.9759936332702637, | |
| "learning_rate": 6.852561198578364e-06, | |
| "loss": 0.2603, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.2047138047138046, | |
| "grad_norm": 3.310718297958374, | |
| "learning_rate": 6.825410828298552e-06, | |
| "loss": 0.2233, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.2101010101010101, | |
| "grad_norm": 2.7231340408325195, | |
| "learning_rate": 6.79819821443604e-06, | |
| "loss": 0.2124, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.2154882154882154, | |
| "grad_norm": 2.9152019023895264, | |
| "learning_rate": 6.7709242848973326e-06, | |
| "loss": 0.2757, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.220875420875421, | |
| "grad_norm": 2.9841840267181396, | |
| "learning_rate": 6.743589969679697e-06, | |
| "loss": 0.2853, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.2262626262626264, | |
| "grad_norm": 3.3108832836151123, | |
| "learning_rate": 6.716196200839465e-06, | |
| "loss": 0.2608, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.2316498316498317, | |
| "grad_norm": 2.9652819633483887, | |
| "learning_rate": 6.6887439124602295e-06, | |
| "loss": 0.2598, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.237037037037037, | |
| "grad_norm": 2.812822103500366, | |
| "learning_rate": 6.661234040621017e-06, | |
| "loss": 0.2638, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.2424242424242424, | |
| "grad_norm": 3.03281831741333, | |
| "learning_rate": 6.63366752336435e-06, | |
| "loss": 0.2439, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.247811447811448, | |
| "grad_norm": 2.7430481910705566, | |
| "learning_rate": 6.606045300664272e-06, | |
| "loss": 0.2502, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.2531986531986532, | |
| "grad_norm": 3.0615146160125732, | |
| "learning_rate": 6.578368314394293e-06, | |
| "loss": 0.2494, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.2585858585858585, | |
| "grad_norm": 2.689999580383301, | |
| "learning_rate": 6.550637508295272e-06, | |
| "loss": 0.2309, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.263973063973064, | |
| "grad_norm": 3.2054049968719482, | |
| "learning_rate": 6.52285382794324e-06, | |
| "loss": 0.2942, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.2693602693602695, | |
| "grad_norm": 2.9260945320129395, | |
| "learning_rate": 6.49501822071715e-06, | |
| "loss": 0.2861, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.2747474747474747, | |
| "grad_norm": 3.240046262741089, | |
| "learning_rate": 6.467131635766585e-06, | |
| "loss": 0.2949, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.28013468013468, | |
| "grad_norm": 2.6747567653656006, | |
| "learning_rate": 6.439195023979381e-06, | |
| "loss": 0.2851, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.2855218855218855, | |
| "grad_norm": 3.605665445327759, | |
| "learning_rate": 6.411209337949214e-06, | |
| "loss": 0.3156, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.290909090909091, | |
| "grad_norm": 3.0257179737091064, | |
| "learning_rate": 6.383175531943106e-06, | |
| "loss": 0.2481, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.2962962962962963, | |
| "grad_norm": 3.004091739654541, | |
| "learning_rate": 6.355094561868902e-06, | |
| "loss": 0.2608, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.3016835016835016, | |
| "grad_norm": 2.927186965942383, | |
| "learning_rate": 6.3269673852426575e-06, | |
| "loss": 0.2298, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.307070707070707, | |
| "grad_norm": 2.5807888507843018, | |
| "learning_rate": 6.298794961156004e-06, | |
| "loss": 0.2263, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.3124579124579125, | |
| "grad_norm": 2.7014336585998535, | |
| "learning_rate": 6.270578250243437e-06, | |
| "loss": 0.2931, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.3178451178451178, | |
| "grad_norm": 3.1106925010681152, | |
| "learning_rate": 6.242318214649556e-06, | |
| "loss": 0.2789, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.3232323232323233, | |
| "grad_norm": 2.7850258350372314, | |
| "learning_rate": 6.214015817996273e-06, | |
| "loss": 0.3062, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.3286195286195286, | |
| "grad_norm": 2.841632127761841, | |
| "learning_rate": 6.185672025349936e-06, | |
| "loss": 0.2595, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.334006734006734, | |
| "grad_norm": 2.757871150970459, | |
| "learning_rate": 6.157287803188432e-06, | |
| "loss": 0.2408, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.3393939393939394, | |
| "grad_norm": 2.7471070289611816, | |
| "learning_rate": 6.128864119368234e-06, | |
| "loss": 0.2618, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.3447811447811449, | |
| "grad_norm": 3.062896490097046, | |
| "learning_rate": 6.100401943091386e-06, | |
| "loss": 0.2893, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.3501683501683501, | |
| "grad_norm": 2.937164068222046, | |
| "learning_rate": 6.0719022448724705e-06, | |
| "loss": 0.2735, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.3555555555555556, | |
| "grad_norm": 3.1469810009002686, | |
| "learning_rate": 6.043365996505506e-06, | |
| "loss": 0.3295, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.360942760942761, | |
| "grad_norm": 2.82350754737854, | |
| "learning_rate": 6.014794171030811e-06, | |
| "loss": 0.2778, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.3663299663299664, | |
| "grad_norm": 3.0384979248046875, | |
| "learning_rate": 5.986187742701825e-06, | |
| "loss": 0.2678, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.3717171717171717, | |
| "grad_norm": 2.782715082168579, | |
| "learning_rate": 5.9575476869518945e-06, | |
| "loss": 0.2664, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.3771043771043772, | |
| "grad_norm": 2.811166763305664, | |
| "learning_rate": 5.928874980361005e-06, | |
| "loss": 0.2387, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.3824915824915824, | |
| "grad_norm": 2.939649820327759, | |
| "learning_rate": 5.900170600622477e-06, | |
| "loss": 0.2957, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.387878787878788, | |
| "grad_norm": 3.0286529064178467, | |
| "learning_rate": 5.871435526509647e-06, | |
| "loss": 0.2937, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.3932659932659932, | |
| "grad_norm": 2.7286617755889893, | |
| "learning_rate": 5.8426707378424675e-06, | |
| "loss": 0.2543, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.3986531986531987, | |
| "grad_norm": 2.7167487144470215, | |
| "learning_rate": 5.813877215454118e-06, | |
| "loss": 0.2296, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.404040404040404, | |
| "grad_norm": 2.8393452167510986, | |
| "learning_rate": 5.78505594115755e-06, | |
| "loss": 0.2708, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.4094276094276095, | |
| "grad_norm": 2.903613567352295, | |
| "learning_rate": 5.756207897712011e-06, | |
| "loss": 0.267, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.4148148148148147, | |
| "grad_norm": 2.823423147201538, | |
| "learning_rate": 5.727334068789529e-06, | |
| "loss": 0.2774, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.4202020202020202, | |
| "grad_norm": 2.7938835620880127, | |
| "learning_rate": 5.698435438941382e-06, | |
| "loss": 0.2474, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.4255892255892255, | |
| "grad_norm": 2.9996232986450195, | |
| "learning_rate": 5.669512993564517e-06, | |
| "loss": 0.3086, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.430976430976431, | |
| "grad_norm": 3.045121908187866, | |
| "learning_rate": 5.640567718867951e-06, | |
| "loss": 0.2617, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.4363636363636363, | |
| "grad_norm": 2.7745134830474854, | |
| "learning_rate": 5.611600601839144e-06, | |
| "loss": 0.2604, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.4417508417508418, | |
| "grad_norm": 2.8739848136901855, | |
| "learning_rate": 5.582612630210349e-06, | |
| "loss": 0.2774, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.447138047138047, | |
| "grad_norm": 2.740999221801758, | |
| "learning_rate": 5.553604792424923e-06, | |
| "loss": 0.2341, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.4525252525252526, | |
| "grad_norm": 2.991398572921753, | |
| "learning_rate": 5.524578077603627e-06, | |
| "loss": 0.2299, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.457912457912458, | |
| "grad_norm": 2.636726140975952, | |
| "learning_rate": 5.495533475510901e-06, | |
| "loss": 0.2472, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.4632996632996633, | |
| "grad_norm": 3.0140764713287354, | |
| "learning_rate": 5.4664719765211125e-06, | |
| "loss": 0.2597, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.4686868686868686, | |
| "grad_norm": 2.988635778427124, | |
| "learning_rate": 5.4373945715847845e-06, | |
| "loss": 0.2939, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.474074074074074, | |
| "grad_norm": 3.1995465755462646, | |
| "learning_rate": 5.408302252194806e-06, | |
| "loss": 0.2678, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.4794612794612796, | |
| "grad_norm": 2.9540798664093018, | |
| "learning_rate": 5.379196010352629e-06, | |
| "loss": 0.3033, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.4848484848484849, | |
| "grad_norm": 3.282701253890991, | |
| "learning_rate": 5.3500768385344345e-06, | |
| "loss": 0.2588, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.4902356902356901, | |
| "grad_norm": 2.9532341957092285, | |
| "learning_rate": 5.320945729657299e-06, | |
| "loss": 0.289, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.4956228956228956, | |
| "grad_norm": 2.660553455352783, | |
| "learning_rate": 5.2918036770453285e-06, | |
| "loss": 0.2653, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.5010101010101011, | |
| "grad_norm": 2.7580904960632324, | |
| "learning_rate": 5.262651674395799e-06, | |
| "loss": 0.2585, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.5063973063973064, | |
| "grad_norm": 2.7895712852478027, | |
| "learning_rate": 5.2334907157452605e-06, | |
| "loss": 0.2425, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.5117845117845117, | |
| "grad_norm": 2.852928876876831, | |
| "learning_rate": 5.204321795435656e-06, | |
| "loss": 0.2702, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.5171717171717172, | |
| "grad_norm": 3.042116403579712, | |
| "learning_rate": 5.1751459080803986e-06, | |
| "loss": 0.2615, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.5225589225589227, | |
| "grad_norm": 2.737823724746704, | |
| "learning_rate": 5.145964048530475e-06, | |
| "loss": 0.2695, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.527946127946128, | |
| "grad_norm": 2.6959354877471924, | |
| "learning_rate": 5.11677721184051e-06, | |
| "loss": 0.2595, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.5333333333333332, | |
| "grad_norm": 3.073336601257324, | |
| "learning_rate": 5.08758639323484e-06, | |
| "loss": 0.249, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.5387205387205387, | |
| "grad_norm": 2.496995449066162, | |
| "learning_rate": 5.058392588073583e-06, | |
| "loss": 0.2409, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.5441077441077442, | |
| "grad_norm": 2.8654353618621826, | |
| "learning_rate": 5.029196791818688e-06, | |
| "loss": 0.2428, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.5494949494949495, | |
| "grad_norm": 2.753993034362793, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2768, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.5548821548821548, | |
| "grad_norm": 2.972564220428467, | |
| "learning_rate": 4.970803208181315e-06, | |
| "loss": 0.2451, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.5602693602693603, | |
| "grad_norm": 3.036773681640625, | |
| "learning_rate": 4.941607411926419e-06, | |
| "loss": 0.2642, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.5656565656565657, | |
| "grad_norm": 3.0601320266723633, | |
| "learning_rate": 4.9124136067651615e-06, | |
| "loss": 0.2803, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.571043771043771, | |
| "grad_norm": 3.3641974925994873, | |
| "learning_rate": 4.883222788159491e-06, | |
| "loss": 0.289, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.5764309764309763, | |
| "grad_norm": 3.0665841102600098, | |
| "learning_rate": 4.8540359514695266e-06, | |
| "loss": 0.2196, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.5818181818181818, | |
| "grad_norm": 2.884730339050293, | |
| "learning_rate": 4.824854091919601e-06, | |
| "loss": 0.2532, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.5872053872053873, | |
| "grad_norm": 3.1136231422424316, | |
| "learning_rate": 4.795678204564346e-06, | |
| "loss": 0.2545, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.5925925925925926, | |
| "grad_norm": 2.821955919265747, | |
| "learning_rate": 4.766509284254739e-06, | |
| "loss": 0.2524, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.5979797979797978, | |
| "grad_norm": 3.191521167755127, | |
| "learning_rate": 4.737348325604203e-06, | |
| "loss": 0.2638, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.6033670033670033, | |
| "grad_norm": 2.8502752780914307, | |
| "learning_rate": 4.708196322954673e-06, | |
| "loss": 0.2648, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.6087542087542088, | |
| "grad_norm": 3.3543736934661865, | |
| "learning_rate": 4.679054270342703e-06, | |
| "loss": 0.2884, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.614141414141414, | |
| "grad_norm": 2.9385459423065186, | |
| "learning_rate": 4.649923161465567e-06, | |
| "loss": 0.2422, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.6195286195286194, | |
| "grad_norm": 2.9000279903411865, | |
| "learning_rate": 4.620803989647373e-06, | |
| "loss": 0.244, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.6249158249158249, | |
| "grad_norm": 2.7263593673706055, | |
| "learning_rate": 4.591697747805196e-06, | |
| "loss": 0.2452, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.6303030303030304, | |
| "grad_norm": 2.7036728858947754, | |
| "learning_rate": 4.562605428415216e-06, | |
| "loss": 0.2555, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.6356902356902356, | |
| "grad_norm": 2.996410608291626, | |
| "learning_rate": 4.533528023478888e-06, | |
| "loss": 0.2212, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.641077441077441, | |
| "grad_norm": 2.6675851345062256, | |
| "learning_rate": 4.5044665244891e-06, | |
| "loss": 0.2411, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.6464646464646466, | |
| "grad_norm": 2.8888285160064697, | |
| "learning_rate": 4.475421922396375e-06, | |
| "loss": 0.2374, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.651851851851852, | |
| "grad_norm": 2.5365850925445557, | |
| "learning_rate": 4.446395207575081e-06, | |
| "loss": 0.2443, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.6572390572390572, | |
| "grad_norm": 2.7890241146087646, | |
| "learning_rate": 4.417387369789652e-06, | |
| "loss": 0.2219, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.6626262626262627, | |
| "grad_norm": 3.0111935138702393, | |
| "learning_rate": 4.388399398160857e-06, | |
| "loss": 0.2528, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.6680134680134682, | |
| "grad_norm": 2.897418260574341, | |
| "learning_rate": 4.359432281132051e-06, | |
| "loss": 0.2432, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.6734006734006734, | |
| "grad_norm": 2.736621618270874, | |
| "learning_rate": 4.330487006435485e-06, | |
| "loss": 0.2381, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.6787878787878787, | |
| "grad_norm": 2.9282073974609375, | |
| "learning_rate": 4.301564561058618e-06, | |
| "loss": 0.2405, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.6841750841750842, | |
| "grad_norm": 2.8673527240753174, | |
| "learning_rate": 4.272665931210472e-06, | |
| "loss": 0.2638, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.6895622895622897, | |
| "grad_norm": 3.049126148223877, | |
| "learning_rate": 4.243792102287991e-06, | |
| "loss": 0.2505, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.694949494949495, | |
| "grad_norm": 3.018843173980713, | |
| "learning_rate": 4.214944058842452e-06, | |
| "loss": 0.262, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.7003367003367003, | |
| "grad_norm": 3.1092209815979004, | |
| "learning_rate": 4.186122784545885e-06, | |
| "loss": 0.2784, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.7057239057239058, | |
| "grad_norm": 3.118446111679077, | |
| "learning_rate": 4.157329262157534e-06, | |
| "loss": 0.2645, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.7111111111111112, | |
| "grad_norm": 3.1034669876098633, | |
| "learning_rate": 4.128564473490357e-06, | |
| "loss": 0.244, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.7164983164983165, | |
| "grad_norm": 3.1367504596710205, | |
| "learning_rate": 4.099829399377524e-06, | |
| "loss": 0.2526, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.7218855218855218, | |
| "grad_norm": 3.1213414669036865, | |
| "learning_rate": 4.071125019638998e-06, | |
| "loss": 0.2603, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.7272727272727273, | |
| "grad_norm": 2.7703611850738525, | |
| "learning_rate": 4.0424523130481055e-06, | |
| "loss": 0.2302, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.7326599326599328, | |
| "grad_norm": 3.022610902786255, | |
| "learning_rate": 4.013812257298175e-06, | |
| "loss": 0.2637, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.738047138047138, | |
| "grad_norm": 2.519594192504883, | |
| "learning_rate": 3.985205828969191e-06, | |
| "loss": 0.2235, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.7434343434343433, | |
| "grad_norm": 2.90838360786438, | |
| "learning_rate": 3.956634003494496e-06, | |
| "loss": 0.2834, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.7488215488215488, | |
| "grad_norm": 2.999645948410034, | |
| "learning_rate": 3.9280977551275294e-06, | |
| "loss": 0.2463, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.7542087542087543, | |
| "grad_norm": 2.5574517250061035, | |
| "learning_rate": 3.899598056908615e-06, | |
| "loss": 0.2101, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.7595959595959596, | |
| "grad_norm": 2.567458391189575, | |
| "learning_rate": 3.871135880631769e-06, | |
| "loss": 0.2576, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.7649831649831649, | |
| "grad_norm": 2.833789110183716, | |
| "learning_rate": 3.842712196811569e-06, | |
| "loss": 0.2322, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.7703703703703704, | |
| "grad_norm": 2.6010053157806396, | |
| "learning_rate": 3.8143279746500665e-06, | |
| "loss": 0.2227, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.7757575757575759, | |
| "grad_norm": 2.8823626041412354, | |
| "learning_rate": 3.785984182003728e-06, | |
| "loss": 0.2646, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.7811447811447811, | |
| "grad_norm": 2.988429546356201, | |
| "learning_rate": 3.757681785350445e-06, | |
| "loss": 0.2626, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.7865319865319864, | |
| "grad_norm": 2.649637222290039, | |
| "learning_rate": 3.729421749756564e-06, | |
| "loss": 0.2145, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.791919191919192, | |
| "grad_norm": 2.893730401992798, | |
| "learning_rate": 3.701205038843997e-06, | |
| "loss": 0.2727, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.7973063973063974, | |
| "grad_norm": 2.917715549468994, | |
| "learning_rate": 3.6730326147573425e-06, | |
| "loss": 0.2281, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.8026936026936027, | |
| "grad_norm": 2.8687551021575928, | |
| "learning_rate": 3.6449054381311e-06, | |
| "loss": 0.2531, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.808080808080808, | |
| "grad_norm": 2.496572256088257, | |
| "learning_rate": 3.616824468056896e-06, | |
| "loss": 0.2227, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.8134680134680135, | |
| "grad_norm": 3.0722904205322266, | |
| "learning_rate": 3.5887906620507877e-06, | |
| "loss": 0.2803, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.818855218855219, | |
| "grad_norm": 2.8439204692840576, | |
| "learning_rate": 3.5608049760206203e-06, | |
| "loss": 0.2315, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.8242424242424242, | |
| "grad_norm": 2.7868878841400146, | |
| "learning_rate": 3.532868364233416e-06, | |
| "loss": 0.2806, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.8296296296296295, | |
| "grad_norm": 2.97046160697937, | |
| "learning_rate": 3.504981779282852e-06, | |
| "loss": 0.2521, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.835016835016835, | |
| "grad_norm": 2.795283794403076, | |
| "learning_rate": 3.4771461720567613e-06, | |
| "loss": 0.2522, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.8404040404040405, | |
| "grad_norm": 2.8273348808288574, | |
| "learning_rate": 3.4493624917047284e-06, | |
| "loss": 0.2462, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.8457912457912458, | |
| "grad_norm": 2.7871997356414795, | |
| "learning_rate": 3.4216316856057074e-06, | |
| "loss": 0.2334, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.851178451178451, | |
| "grad_norm": 2.754995346069336, | |
| "learning_rate": 3.3939546993357297e-06, | |
| "loss": 0.2533, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.8565656565656565, | |
| "grad_norm": 2.96561336517334, | |
| "learning_rate": 3.3663324766356524e-06, | |
| "loss": 0.261, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.861952861952862, | |
| "grad_norm": 2.781203269958496, | |
| "learning_rate": 3.3387659593789845e-06, | |
| "loss": 0.2312, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.8673400673400673, | |
| "grad_norm": 2.9479804039001465, | |
| "learning_rate": 3.3112560875397713e-06, | |
| "loss": 0.2618, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.8727272727272726, | |
| "grad_norm": 2.9663288593292236, | |
| "learning_rate": 3.283803799160537e-06, | |
| "loss": 0.2554, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.878114478114478, | |
| "grad_norm": 2.95430064201355, | |
| "learning_rate": 3.256410030320304e-06, | |
| "loss": 0.2873, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.8835016835016836, | |
| "grad_norm": 2.834928274154663, | |
| "learning_rate": 3.2290757151026687e-06, | |
| "loss": 0.2407, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.8888888888888888, | |
| "grad_norm": 2.61153244972229, | |
| "learning_rate": 3.2018017855639605e-06, | |
| "loss": 0.2482, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.8942760942760941, | |
| "grad_norm": 2.788770914077759, | |
| "learning_rate": 3.1745891717014477e-06, | |
| "loss": 0.224, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.8996632996632996, | |
| "grad_norm": 2.7962043285369873, | |
| "learning_rate": 3.147438801421638e-06, | |
| "loss": 0.2526, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.905050505050505, | |
| "grad_norm": 2.967076539993286, | |
| "learning_rate": 3.1203516005086276e-06, | |
| "loss": 0.2335, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.9104377104377104, | |
| "grad_norm": 2.598158836364746, | |
| "learning_rate": 3.093328492592539e-06, | |
| "loss": 0.2127, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.9158249158249159, | |
| "grad_norm": 2.835001230239868, | |
| "learning_rate": 3.0663703991180318e-06, | |
| "loss": 0.2273, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.9212121212121214, | |
| "grad_norm": 2.9137284755706787, | |
| "learning_rate": 3.0394782393128713e-06, | |
| "loss": 0.2754, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.9265993265993266, | |
| "grad_norm": 3.0207886695861816, | |
| "learning_rate": 3.0126529301565945e-06, | |
| "loss": 0.2449, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.931986531986532, | |
| "grad_norm": 2.987816095352173, | |
| "learning_rate": 2.9858953863492334e-06, | |
| "loss": 0.2521, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.9373737373737374, | |
| "grad_norm": 2.8369038105010986, | |
| "learning_rate": 2.9592065202801374e-06, | |
| "loss": 0.2383, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.942760942760943, | |
| "grad_norm": 2.73996639251709, | |
| "learning_rate": 2.9325872419968484e-06, | |
| "loss": 0.2536, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.9481481481481482, | |
| "grad_norm": 3.1415867805480957, | |
| "learning_rate": 2.906038459174081e-06, | |
| "loss": 0.2599, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.9535353535353535, | |
| "grad_norm": 3.266170024871826, | |
| "learning_rate": 2.879561077082764e-06, | |
| "loss": 0.2544, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.958922558922559, | |
| "grad_norm": 2.9058427810668945, | |
| "learning_rate": 2.853155998559179e-06, | |
| "loss": 0.244, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.9643097643097645, | |
| "grad_norm": 2.8677961826324463, | |
| "learning_rate": 2.826824123974171e-06, | |
| "loss": 0.2192, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.9696969696969697, | |
| "grad_norm": 3.0954580307006836, | |
| "learning_rate": 2.800566351202443e-06, | |
| "loss": 0.2538, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.975084175084175, | |
| "grad_norm": 3.023210287094116, | |
| "learning_rate": 2.774383575591956e-06, | |
| "loss": 0.248, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.9804713804713805, | |
| "grad_norm": 2.7636148929595947, | |
| "learning_rate": 2.748276689933377e-06, | |
| "loss": 0.2281, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.985858585858586, | |
| "grad_norm": 2.7266335487365723, | |
| "learning_rate": 2.722246584429652e-06, | |
| "loss": 0.2492, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.9912457912457913, | |
| "grad_norm": 2.8604986667633057, | |
| "learning_rate": 2.6962941466656477e-06, | |
| "loss": 0.2358, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.9966329966329965, | |
| "grad_norm": 2.7491540908813477, | |
| "learning_rate": 2.6704202615778844e-06, | |
| "loss": 0.2366, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.0053872053872053, | |
| "grad_norm": 5.093502998352051, | |
| "learning_rate": 2.6446258114243633e-06, | |
| "loss": 0.343, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 2.010774410774411, | |
| "grad_norm": 1.904625415802002, | |
| "learning_rate": 2.6189116757544765e-06, | |
| "loss": 0.0965, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 2.0161616161616163, | |
| "grad_norm": 1.87295663356781, | |
| "learning_rate": 2.593278731379027e-06, | |
| "loss": 0.1118, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 2.0215488215488215, | |
| "grad_norm": 2.0098869800567627, | |
| "learning_rate": 2.567727852340323e-06, | |
| "loss": 0.0975, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 2.026936026936027, | |
| "grad_norm": 1.6401960849761963, | |
| "learning_rate": 2.542259909882374e-06, | |
| "loss": 0.0918, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 2.0323232323232325, | |
| "grad_norm": 1.9632785320281982, | |
| "learning_rate": 2.51687577242119e-06, | |
| "loss": 0.0885, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 2.037710437710438, | |
| "grad_norm": 1.801023006439209, | |
| "learning_rate": 2.4915763055151615e-06, | |
| "loss": 0.0849, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 2.043097643097643, | |
| "grad_norm": 1.8630132675170898, | |
| "learning_rate": 2.4663623718355444e-06, | |
| "loss": 0.0782, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 2.0484848484848484, | |
| "grad_norm": 1.9627724885940552, | |
| "learning_rate": 2.4412348311370616e-06, | |
| "loss": 0.0985, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 2.053872053872054, | |
| "grad_norm": 2.127228260040283, | |
| "learning_rate": 2.416194540228559e-06, | |
| "loss": 0.0885, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.0592592592592593, | |
| "grad_norm": 2.282618284225464, | |
| "learning_rate": 2.3912423529438145e-06, | |
| "loss": 0.0705, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 2.0646464646464646, | |
| "grad_norm": 2.917990207672119, | |
| "learning_rate": 2.3663791201124093e-06, | |
| "loss": 0.0904, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 2.07003367003367, | |
| "grad_norm": 2.867617130279541, | |
| "learning_rate": 2.341605689530723e-06, | |
| "loss": 0.0766, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 2.0754208754208756, | |
| "grad_norm": 2.3559350967407227, | |
| "learning_rate": 2.316922905933022e-06, | |
| "loss": 0.0889, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 2.080808080808081, | |
| "grad_norm": 2.959153890609741, | |
| "learning_rate": 2.292331610962649e-06, | |
| "loss": 0.0759, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 2.086195286195286, | |
| "grad_norm": 2.87480092048645, | |
| "learning_rate": 2.2678326431433456e-06, | |
| "loss": 0.0836, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 2.0915824915824914, | |
| "grad_norm": 2.830786943435669, | |
| "learning_rate": 2.243426837850631e-06, | |
| "loss": 0.1042, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 2.096969696969697, | |
| "grad_norm": 2.9633374214172363, | |
| "learning_rate": 2.219115027283339e-06, | |
| "loss": 0.0958, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 2.1023569023569024, | |
| "grad_norm": 2.6659820079803467, | |
| "learning_rate": 2.194898040435234e-06, | |
| "loss": 0.0772, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 2.1077441077441077, | |
| "grad_norm": 2.3520843982696533, | |
| "learning_rate": 2.17077670306674e-06, | |
| "loss": 0.0564, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.113131313131313, | |
| "grad_norm": 2.393596887588501, | |
| "learning_rate": 2.146751837676794e-06, | |
| "loss": 0.075, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 2.1185185185185187, | |
| "grad_norm": 2.7160770893096924, | |
| "learning_rate": 2.122824263474784e-06, | |
| "loss": 0.1021, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 2.123905723905724, | |
| "grad_norm": 2.5906686782836914, | |
| "learning_rate": 2.098994796352629e-06, | |
| "loss": 0.0886, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 2.1292929292929292, | |
| "grad_norm": 2.3228564262390137, | |
| "learning_rate": 2.0752642488569557e-06, | |
| "loss": 0.0807, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 2.1346801346801345, | |
| "grad_norm": 2.289416790008545, | |
| "learning_rate": 2.0516334301613876e-06, | |
| "loss": 0.0804, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 2.1400673400673402, | |
| "grad_norm": 2.459120750427246, | |
| "learning_rate": 2.028103146038958e-06, | |
| "loss": 0.1073, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 2.1454545454545455, | |
| "grad_norm": 2.474850654602051, | |
| "learning_rate": 2.004674198834631e-06, | |
| "loss": 0.0746, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 2.1508417508417508, | |
| "grad_norm": 2.63972806930542, | |
| "learning_rate": 1.98134738743794e-06, | |
| "loss": 0.0754, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 2.156228956228956, | |
| "grad_norm": 2.22719407081604, | |
| "learning_rate": 1.9581235072557618e-06, | |
| "loss": 0.084, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 2.1616161616161618, | |
| "grad_norm": 2.08853816986084, | |
| "learning_rate": 1.935003350185174e-06, | |
| "loss": 0.0779, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.167003367003367, | |
| "grad_norm": 1.9397152662277222, | |
| "learning_rate": 1.911987704586466e-06, | |
| "loss": 0.07, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 2.1723905723905723, | |
| "grad_norm": 1.917934775352478, | |
| "learning_rate": 1.8890773552562564e-06, | |
| "loss": 0.0725, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 2.1777777777777776, | |
| "grad_norm": 2.1869399547576904, | |
| "learning_rate": 1.8662730834007204e-06, | |
| "loss": 0.0745, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 2.1831649831649833, | |
| "grad_norm": 2.0088367462158203, | |
| "learning_rate": 1.843575666608976e-06, | |
| "loss": 0.091, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 2.1885521885521886, | |
| "grad_norm": 2.3277580738067627, | |
| "learning_rate": 1.8209858788265411e-06, | |
| "loss": 0.0605, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 2.193939393939394, | |
| "grad_norm": 1.99192214012146, | |
| "learning_rate": 1.7985044903289645e-06, | |
| "loss": 0.0706, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 2.199326599326599, | |
| "grad_norm": 2.2638256549835205, | |
| "learning_rate": 1.7761322676955505e-06, | |
| "loss": 0.0728, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 2.204713804713805, | |
| "grad_norm": 2.2363462448120117, | |
| "learning_rate": 1.7538699737832237e-06, | |
| "loss": 0.0804, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 2.21010101010101, | |
| "grad_norm": 2.1804420948028564, | |
| "learning_rate": 1.7317183677005173e-06, | |
| "loss": 0.0882, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 2.2154882154882154, | |
| "grad_norm": 2.3650074005126953, | |
| "learning_rate": 1.7096782047816806e-06, | |
| "loss": 0.0784, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.2208754208754207, | |
| "grad_norm": 2.209190845489502, | |
| "learning_rate": 1.687750236560936e-06, | |
| "loss": 0.087, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 2.2262626262626264, | |
| "grad_norm": 2.8381571769714355, | |
| "learning_rate": 1.665935210746844e-06, | |
| "loss": 0.0656, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 2.2316498316498317, | |
| "grad_norm": 2.1763696670532227, | |
| "learning_rate": 1.6442338711968102e-06, | |
| "loss": 0.0884, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 2.237037037037037, | |
| "grad_norm": 2.410494327545166, | |
| "learning_rate": 1.622646957891722e-06, | |
| "loss": 0.0702, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 2.242424242424242, | |
| "grad_norm": 2.365952491760254, | |
| "learning_rate": 1.601175206910715e-06, | |
| "loss": 0.0902, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.247811447811448, | |
| "grad_norm": 2.4530627727508545, | |
| "learning_rate": 1.5798193504060693e-06, | |
| "loss": 0.0792, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 2.253198653198653, | |
| "grad_norm": 2.4529592990875244, | |
| "learning_rate": 1.5585801165782606e-06, | |
| "loss": 0.0863, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 2.2585858585858585, | |
| "grad_norm": 2.298218250274658, | |
| "learning_rate": 1.5374582296511054e-06, | |
| "loss": 0.0854, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 2.263973063973064, | |
| "grad_norm": 2.545762538909912, | |
| "learning_rate": 1.5164544098470862e-06, | |
| "loss": 0.0913, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 2.2693602693602695, | |
| "grad_norm": 2.3648526668548584, | |
| "learning_rate": 1.4955693733627869e-06, | |
| "loss": 0.0795, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.2747474747474747, | |
| "grad_norm": 2.335575819015503, | |
| "learning_rate": 1.474803832344463e-06, | |
| "loss": 0.084, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 2.28013468013468, | |
| "grad_norm": 2.2477426528930664, | |
| "learning_rate": 1.4541584948637777e-06, | |
| "loss": 0.0876, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 2.2855218855218853, | |
| "grad_norm": 2.9558703899383545, | |
| "learning_rate": 1.4336340648936342e-06, | |
| "loss": 0.079, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 2.290909090909091, | |
| "grad_norm": 2.1282129287719727, | |
| "learning_rate": 1.413231242284195e-06, | |
| "loss": 0.0689, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 2.2962962962962963, | |
| "grad_norm": 2.1239535808563232, | |
| "learning_rate": 1.3929507227389954e-06, | |
| "loss": 0.0701, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.3016835016835016, | |
| "grad_norm": 2.0963549613952637, | |
| "learning_rate": 1.3727931977912406e-06, | |
| "loss": 0.0758, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 2.3070707070707073, | |
| "grad_norm": 3.4831295013427734, | |
| "learning_rate": 1.352759354780215e-06, | |
| "loss": 0.086, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 2.3124579124579125, | |
| "grad_norm": 2.0869736671447754, | |
| "learning_rate": 1.332849876827842e-06, | |
| "loss": 0.072, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 2.317845117845118, | |
| "grad_norm": 2.1851084232330322, | |
| "learning_rate": 1.3130654428154066e-06, | |
| "loss": 0.0644, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 2.323232323232323, | |
| "grad_norm": 1.7817176580429077, | |
| "learning_rate": 1.2934067273603855e-06, | |
| "loss": 0.0522, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.328619528619529, | |
| "grad_norm": 2.0074706077575684, | |
| "learning_rate": 1.2738744007934595e-06, | |
| "loss": 0.0744, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 2.334006734006734, | |
| "grad_norm": 2.3214468955993652, | |
| "learning_rate": 1.2544691291356497e-06, | |
| "loss": 0.0759, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 2.3393939393939394, | |
| "grad_norm": 2.294804096221924, | |
| "learning_rate": 1.2351915740756087e-06, | |
| "loss": 0.068, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 2.3447811447811446, | |
| "grad_norm": 2.0611894130706787, | |
| "learning_rate": 1.2160423929470584e-06, | |
| "loss": 0.0667, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 2.3501683501683504, | |
| "grad_norm": 2.080531120300293, | |
| "learning_rate": 1.1970222387063756e-06, | |
| "loss": 0.0749, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.3555555555555556, | |
| "grad_norm": 2.0696070194244385, | |
| "learning_rate": 1.1781317599103238e-06, | |
| "loss": 0.0773, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 2.360942760942761, | |
| "grad_norm": 2.34531569480896, | |
| "learning_rate": 1.1593716006939455e-06, | |
| "loss": 0.0752, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 2.366329966329966, | |
| "grad_norm": 2.6101057529449463, | |
| "learning_rate": 1.140742400748593e-06, | |
| "loss": 0.0605, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 2.371717171717172, | |
| "grad_norm": 2.1780221462249756, | |
| "learning_rate": 1.1222447953001182e-06, | |
| "loss": 0.0638, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 2.377104377104377, | |
| "grad_norm": 2.247965097427368, | |
| "learning_rate": 1.1038794150872117e-06, | |
| "loss": 0.0714, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.3824915824915824, | |
| "grad_norm": 1.9487817287445068, | |
| "learning_rate": 1.0856468863398917e-06, | |
| "loss": 0.0654, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 2.3878787878787877, | |
| "grad_norm": 2.285243272781372, | |
| "learning_rate": 1.0675478307581627e-06, | |
| "loss": 0.0706, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 2.3932659932659934, | |
| "grad_norm": 2.08785343170166, | |
| "learning_rate": 1.0495828654907991e-06, | |
| "loss": 0.0828, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 2.3986531986531987, | |
| "grad_norm": 2.6061668395996094, | |
| "learning_rate": 1.0317526031143161e-06, | |
| "loss": 0.06, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 2.404040404040404, | |
| "grad_norm": 1.9994468688964844, | |
| "learning_rate": 1.014057651612076e-06, | |
| "loss": 0.0678, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 2.4094276094276093, | |
| "grad_norm": 2.335872173309326, | |
| "learning_rate": 9.964986143535515e-07, | |
| "loss": 0.0696, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 2.414814814814815, | |
| "grad_norm": 2.4777722358703613, | |
| "learning_rate": 9.790760900737683e-07, | |
| "loss": 0.0651, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 2.4202020202020202, | |
| "grad_norm": 2.2628719806671143, | |
| "learning_rate": 9.61790672852868e-07, | |
| "loss": 0.0789, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 2.4255892255892255, | |
| "grad_norm": 2.406503677368164, | |
| "learning_rate": 9.446429520958666e-07, | |
| "loss": 0.0812, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 2.430976430976431, | |
| "grad_norm": 2.8397791385650635, | |
| "learning_rate": 9.276335125125502e-07, | |
| "loss": 0.0678, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.4363636363636365, | |
| "grad_norm": 2.485055923461914, | |
| "learning_rate": 9.107629340975388e-07, | |
| "loss": 0.0619, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 2.441750841750842, | |
| "grad_norm": 2.066659927368164, | |
| "learning_rate": 8.940317921105085e-07, | |
| "loss": 0.0611, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 2.447138047138047, | |
| "grad_norm": 2.2130823135375977, | |
| "learning_rate": 8.774406570565791e-07, | |
| "loss": 0.0674, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 2.4525252525252528, | |
| "grad_norm": 2.1492106914520264, | |
| "learning_rate": 8.609900946668536e-07, | |
| "loss": 0.0744, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 2.457912457912458, | |
| "grad_norm": 2.2511839866638184, | |
| "learning_rate": 8.446806658791373e-07, | |
| "loss": 0.0689, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 2.4632996632996633, | |
| "grad_norm": 2.078249454498291, | |
| "learning_rate": 8.285129268188042e-07, | |
| "loss": 0.0726, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 2.4686868686868686, | |
| "grad_norm": 2.2379488945007324, | |
| "learning_rate": 8.124874287798352e-07, | |
| "loss": 0.0748, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 2.474074074074074, | |
| "grad_norm": 2.272982120513916, | |
| "learning_rate": 7.966047182060226e-07, | |
| "loss": 0.0549, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 2.4794612794612796, | |
| "grad_norm": 1.9955648183822632, | |
| "learning_rate": 7.808653366723296e-07, | |
| "loss": 0.0603, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 2.484848484848485, | |
| "grad_norm": 1.8981883525848389, | |
| "learning_rate": 7.652698208664377e-07, | |
| "loss": 0.0675, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.49023569023569, | |
| "grad_norm": 2.4488866329193115, | |
| "learning_rate": 7.498187025704296e-07, | |
| "loss": 0.0768, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 2.495622895622896, | |
| "grad_norm": 2.1295886039733887, | |
| "learning_rate": 7.345125086426675e-07, | |
| "loss": 0.0662, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 2.501010101010101, | |
| "grad_norm": 2.2743725776672363, | |
| "learning_rate": 7.193517609998263e-07, | |
| "loss": 0.0686, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 2.5063973063973064, | |
| "grad_norm": 2.235623836517334, | |
| "learning_rate": 7.043369765990943e-07, | |
| "loss": 0.0615, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 2.5117845117845117, | |
| "grad_norm": 2.076993942260742, | |
| "learning_rate": 6.894686674205481e-07, | |
| "loss": 0.0803, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 2.517171717171717, | |
| "grad_norm": 2.2475011348724365, | |
| "learning_rate": 6.747473404496902e-07, | |
| "loss": 0.0851, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 2.5225589225589227, | |
| "grad_norm": 2.5577120780944824, | |
| "learning_rate": 6.601734976601737e-07, | |
| "loss": 0.0735, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 2.527946127946128, | |
| "grad_norm": 2.3084797859191895, | |
| "learning_rate": 6.457476359966685e-07, | |
| "loss": 0.0724, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 2.533333333333333, | |
| "grad_norm": 2.051790237426758, | |
| "learning_rate": 6.314702473579309e-07, | |
| "loss": 0.0851, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 2.538720538720539, | |
| "grad_norm": 2.8228673934936523, | |
| "learning_rate": 6.17341818580024e-07, | |
| "loss": 0.0715, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.544107744107744, | |
| "grad_norm": 2.070128917694092, | |
| "learning_rate": 6.033628314197176e-07, | |
| "loss": 0.0615, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 2.5494949494949495, | |
| "grad_norm": 2.154543876647949, | |
| "learning_rate": 5.895337625380632e-07, | |
| "loss": 0.0646, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 2.5548821548821548, | |
| "grad_norm": 1.9985536336898804, | |
| "learning_rate": 5.758550834841381e-07, | |
| "loss": 0.0574, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 2.56026936026936, | |
| "grad_norm": 2.2103183269500732, | |
| "learning_rate": 5.62327260678967e-07, | |
| "loss": 0.0694, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 2.5656565656565657, | |
| "grad_norm": 2.3436076641082764, | |
| "learning_rate": 5.489507553996204e-07, | |
| "loss": 0.065, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 2.571043771043771, | |
| "grad_norm": 2.371115207672119, | |
| "learning_rate": 5.357260237634826e-07, | |
| "loss": 0.0804, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 2.5764309764309763, | |
| "grad_norm": 2.1717820167541504, | |
| "learning_rate": 5.226535167127e-07, | |
| "loss": 0.0744, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 2.581818181818182, | |
| "grad_norm": 2.0997424125671387, | |
| "learning_rate": 5.097336799988067e-07, | |
| "loss": 0.0582, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 2.5872053872053873, | |
| "grad_norm": 1.9539695978164673, | |
| "learning_rate": 4.96966954167517e-07, | |
| "loss": 0.0843, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 2.5925925925925926, | |
| "grad_norm": 2.401609182357788, | |
| "learning_rate": 4.843537745437188e-07, | |
| "loss": 0.0628, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.597979797979798, | |
| "grad_norm": 2.3277831077575684, | |
| "learning_rate": 4.718945712166123e-07, | |
| "loss": 0.0904, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 2.603367003367003, | |
| "grad_norm": 2.537806510925293, | |
| "learning_rate": 4.595897690250567e-07, | |
| "loss": 0.0653, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 2.608754208754209, | |
| "grad_norm": 2.5211031436920166, | |
| "learning_rate": 4.4743978754308027e-07, | |
| "loss": 0.0762, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 2.614141414141414, | |
| "grad_norm": 2.538830280303955, | |
| "learning_rate": 4.3544504106557026e-07, | |
| "loss": 0.0722, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 2.6195286195286194, | |
| "grad_norm": 2.389099597930908, | |
| "learning_rate": 4.2360593859415433e-07, | |
| "loss": 0.0669, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 2.624915824915825, | |
| "grad_norm": 2.186370372772217, | |
| "learning_rate": 4.1192288382324363e-07, | |
| "loss": 0.0719, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 2.6303030303030304, | |
| "grad_norm": 2.426302909851074, | |
| "learning_rate": 4.003962751262763e-07, | |
| "loss": 0.065, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 2.6356902356902356, | |
| "grad_norm": 2.080082893371582, | |
| "learning_rate": 3.890265055421283e-07, | |
| "loss": 0.0677, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 2.641077441077441, | |
| "grad_norm": 2.4764468669891357, | |
| "learning_rate": 3.77813962761715e-07, | |
| "loss": 0.0775, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 2.6464646464646466, | |
| "grad_norm": 2.2122390270233154, | |
| "learning_rate": 3.6675902911476937e-07, | |
| "loss": 0.0754, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.651851851851852, | |
| "grad_norm": 2.6265482902526855, | |
| "learning_rate": 3.558620815568048e-07, | |
| "loss": 0.0631, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 2.657239057239057, | |
| "grad_norm": 2.3554742336273193, | |
| "learning_rate": 3.451234916562618e-07, | |
| "loss": 0.0653, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 2.6626262626262625, | |
| "grad_norm": 2.077880382537842, | |
| "learning_rate": 3.3454362558184075e-07, | |
| "loss": 0.0749, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 2.668013468013468, | |
| "grad_norm": 2.258436918258667, | |
| "learning_rate": 3.241228440900124e-07, | |
| "loss": 0.067, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 2.6734006734006734, | |
| "grad_norm": 2.1589324474334717, | |
| "learning_rate": 3.1386150251271897e-07, | |
| "loss": 0.0814, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 2.6787878787878787, | |
| "grad_norm": 2.316006898880005, | |
| "learning_rate": 3.0375995074525764e-07, | |
| "loss": 0.0624, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 2.6841750841750844, | |
| "grad_norm": 2.2028238773345947, | |
| "learning_rate": 2.9381853323434627e-07, | |
| "loss": 0.0583, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 2.6895622895622897, | |
| "grad_norm": 2.372264862060547, | |
| "learning_rate": 2.840375889663871e-07, | |
| "loss": 0.0638, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 2.694949494949495, | |
| "grad_norm": 2.3102543354034424, | |
| "learning_rate": 2.744174514558956e-07, | |
| "loss": 0.0601, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 2.7003367003367003, | |
| "grad_norm": 2.3564910888671875, | |
| "learning_rate": 2.6495844873413944e-07, | |
| "loss": 0.0721, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.7057239057239055, | |
| "grad_norm": 2.442258834838867, | |
| "learning_rate": 2.556609033379459e-07, | |
| "loss": 0.0616, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 2.7111111111111112, | |
| "grad_norm": 2.313163995742798, | |
| "learning_rate": 2.465251322987061e-07, | |
| "loss": 0.0634, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 2.7164983164983165, | |
| "grad_norm": 2.4522969722747803, | |
| "learning_rate": 2.3755144713156819e-07, | |
| "loss": 0.0613, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 2.721885521885522, | |
| "grad_norm": 2.2570788860321045, | |
| "learning_rate": 2.287401538248074e-07, | |
| "loss": 0.0737, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 2.7272727272727275, | |
| "grad_norm": 2.2716591358184814, | |
| "learning_rate": 2.20091552829399e-07, | |
| "loss": 0.0639, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 2.732659932659933, | |
| "grad_norm": 2.105753183364868, | |
| "learning_rate": 2.1160593904877236e-07, | |
| "loss": 0.0625, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 2.738047138047138, | |
| "grad_norm": 2.383596658706665, | |
| "learning_rate": 2.0328360182875262e-07, | |
| "loss": 0.0682, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 2.7434343434343433, | |
| "grad_norm": 2.4483511447906494, | |
| "learning_rate": 1.9512482494769613e-07, | |
| "loss": 0.0649, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 2.7488215488215486, | |
| "grad_norm": 2.1391537189483643, | |
| "learning_rate": 1.8712988660681498e-07, | |
| "loss": 0.0704, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 2.7542087542087543, | |
| "grad_norm": 2.9412190914154053, | |
| "learning_rate": 1.7929905942068836e-07, | |
| "loss": 0.0717, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.7595959595959596, | |
| "grad_norm": 2.366955280303955, | |
| "learning_rate": 1.7163261040796797e-07, | |
| "loss": 0.0645, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 2.764983164983165, | |
| "grad_norm": 2.511876344680786, | |
| "learning_rate": 1.6413080098227562e-07, | |
| "loss": 0.0762, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.7703703703703706, | |
| "grad_norm": 2.14850115776062, | |
| "learning_rate": 1.5679388694328446e-07, | |
| "loss": 0.0613, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 2.775757575757576, | |
| "grad_norm": 2.2042980194091797, | |
| "learning_rate": 1.4962211846800078e-07, | |
| "loss": 0.0648, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 2.781144781144781, | |
| "grad_norm": 2.243152379989624, | |
| "learning_rate": 1.426157401022321e-07, | |
| "loss": 0.0769, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 2.7865319865319864, | |
| "grad_norm": 2.4439616203308105, | |
| "learning_rate": 1.3577499075224821e-07, | |
| "loss": 0.0726, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 2.7919191919191917, | |
| "grad_norm": 2.2987587451934814, | |
| "learning_rate": 1.2910010367663317e-07, | |
| "loss": 0.0665, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 2.7973063973063974, | |
| "grad_norm": 2.111358642578125, | |
| "learning_rate": 1.2259130647833627e-07, | |
| "loss": 0.0523, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 2.8026936026936027, | |
| "grad_norm": 2.131275177001953, | |
| "learning_rate": 1.162488210969065e-07, | |
| "loss": 0.0687, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 2.808080808080808, | |
| "grad_norm": 2.1112232208251953, | |
| "learning_rate": 1.100728638009263e-07, | |
| "loss": 0.0603, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.8134680134680137, | |
| "grad_norm": 2.212636947631836, | |
| "learning_rate": 1.0406364518063927e-07, | |
| "loss": 0.0565, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 2.818855218855219, | |
| "grad_norm": 2.0088913440704346, | |
| "learning_rate": 9.822137014076472e-08, | |
| "loss": 0.0597, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.824242424242424, | |
| "grad_norm": 2.1878387928009033, | |
| "learning_rate": 9.254623789351714e-08, | |
| "loss": 0.0751, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 2.8296296296296295, | |
| "grad_norm": 2.465935230255127, | |
| "learning_rate": 8.703844195180555e-08, | |
| "loss": 0.0753, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 2.8350168350168348, | |
| "grad_norm": 2.2098045349121094, | |
| "learning_rate": 8.169817012264214e-08, | |
| "loss": 0.0586, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 2.8404040404040405, | |
| "grad_norm": 2.3172450065612793, | |
| "learning_rate": 7.652560450073454e-08, | |
| "loss": 0.0639, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 2.8457912457912458, | |
| "grad_norm": 1.8119255304336548, | |
| "learning_rate": 7.152092146227806e-08, | |
| "loss": 0.0762, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 2.851178451178451, | |
| "grad_norm": 2.9995367527008057, | |
| "learning_rate": 6.668429165893996e-08, | |
| "loss": 0.0802, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.8565656565656568, | |
| "grad_norm": 2.5341761112213135, | |
| "learning_rate": 6.20158800120435e-08, | |
| "loss": 0.0751, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 2.861952861952862, | |
| "grad_norm": 2.48641300201416, | |
| "learning_rate": 5.7515845706940246e-08, | |
| "loss": 0.0678, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.8673400673400673, | |
| "grad_norm": 2.129096746444702, | |
| "learning_rate": 5.31843421875855e-08, | |
| "loss": 0.057, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 2.8727272727272726, | |
| "grad_norm": 1.9396432638168335, | |
| "learning_rate": 4.9021517151305875e-08, | |
| "loss": 0.0492, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 2.878114478114478, | |
| "grad_norm": 2.2072770595550537, | |
| "learning_rate": 4.502751254375992e-08, | |
| "loss": 0.0734, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 2.8835016835016836, | |
| "grad_norm": 2.1861319541931152, | |
| "learning_rate": 4.120246455410204e-08, | |
| "loss": 0.0537, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.888888888888889, | |
| "grad_norm": 2.1539671421051025, | |
| "learning_rate": 3.7546503610336183e-08, | |
| "loss": 0.0496, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 2.894276094276094, | |
| "grad_norm": 1.8679490089416504, | |
| "learning_rate": 3.405975437486997e-08, | |
| "loss": 0.0702, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 2.8996632996633, | |
| "grad_norm": 2.585775375366211, | |
| "learning_rate": 3.074233574026087e-08, | |
| "loss": 0.0626, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 2.905050505050505, | |
| "grad_norm": 2.1468751430511475, | |
| "learning_rate": 2.7594360825166644e-08, | |
| "loss": 0.0575, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.9104377104377104, | |
| "grad_norm": 2.1872782707214355, | |
| "learning_rate": 2.4615936970485144e-08, | |
| "loss": 0.0712, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 2.915824915824916, | |
| "grad_norm": 2.4800572395324707, | |
| "learning_rate": 2.180716573569386e-08, | |
| "loss": 0.0646, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.9212121212121214, | |
| "grad_norm": 2.528630495071411, | |
| "learning_rate": 1.9168142895389376e-08, | |
| "loss": 0.075, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 2.9265993265993266, | |
| "grad_norm": 2.408411741256714, | |
| "learning_rate": 1.6698958436019986e-08, | |
| "loss": 0.0717, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 2.931986531986532, | |
| "grad_norm": 2.5206246376037598, | |
| "learning_rate": 1.4399696552816477e-08, | |
| "loss": 0.088, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 2.937373737373737, | |
| "grad_norm": 2.5237998962402344, | |
| "learning_rate": 1.2270435646922763e-08, | |
| "loss": 0.0667, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 2.942760942760943, | |
| "grad_norm": 2.4456536769866943, | |
| "learning_rate": 1.031124832272301e-08, | |
| "loss": 0.0484, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 2.948148148148148, | |
| "grad_norm": 2.1876487731933594, | |
| "learning_rate": 8.522201385362528e-09, | |
| "loss": 0.0683, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 2.9535353535353535, | |
| "grad_norm": 2.1443562507629395, | |
| "learning_rate": 6.903355838475123e-09, | |
| "loss": 0.0688, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 2.958922558922559, | |
| "grad_norm": 2.5573830604553223, | |
| "learning_rate": 5.454766882097007e-09, | |
| "loss": 0.0497, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 2.9643097643097645, | |
| "grad_norm": 2.2723801136016846, | |
| "learning_rate": 4.1764839107905074e-09, | |
| "loss": 0.0777, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 2.9696969696969697, | |
| "grad_norm": 2.2643778324127197, | |
| "learning_rate": 3.068550511955426e-09, | |
| "loss": 0.0725, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.975084175084175, | |
| "grad_norm": 2.395113945007324, | |
| "learning_rate": 2.131004464343556e-09, | |
| "loss": 0.0715, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 2.9804713804713803, | |
| "grad_norm": 2.2361373901367188, | |
| "learning_rate": 1.3638777367724898e-09, | |
| "loss": 0.0843, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 2.985858585858586, | |
| "grad_norm": 2.993990182876587, | |
| "learning_rate": 7.671964870337168e-10, | |
| "loss": 0.0732, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 2.9912457912457913, | |
| "grad_norm": 2.1068203449249268, | |
| "learning_rate": 3.4098106100166616e-10, | |
| "loss": 0.0671, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 2.9966329966329965, | |
| "grad_norm": 2.407553195953369, | |
| "learning_rate": 8.52459919381543e-11, | |
| "loss": 0.0774, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 2.9966329966329965, | |
| "step": 555, | |
| "total_flos": 9.477952550322831e+17, | |
| "train_loss": 0.30530234318193017, | |
| "train_runtime": 3941.2467, | |
| "train_samples_per_second": 4.521, | |
| "train_steps_per_second": 0.141 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 555, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.477952550322831e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |