{"train_lr": 0.00014956716896441166, "train_min_lr": 0.00014956716896441166, "train_loss": 0.3762836395136009, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.219930504854673, "epoch": 0, "n_parameters": 631477312} {"train_lr": 0.00044966335363898674, "train_min_lr": 0.00044966335363898674, "train_loss": 0.36233740786197954, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03059889937046533, "epoch": 1, "n_parameters": 631477312} {"train_lr": 0.000749759538313562, "train_min_lr": 0.000749759538313562, "train_loss": 0.3604755045559544, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016236139091257103, "epoch": 2, "n_parameters": 631477312} {"train_lr": 0.0010498557229881365, "train_min_lr": 0.0010498557229881365, "train_loss": 0.3593102954925062, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010971919848368717, "epoch": 3, "n_parameters": 631477312} {"train_lr": 0.0013499519076627113, "train_min_lr": 0.0013499519076627113, "train_loss": 0.3579025494191461, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008710255759815948, "epoch": 4, "n_parameters": 631477312} {"train_lr": 0.0016500480923372883, "train_min_lr": 0.0016500480923372883, "train_loss": 0.3555748825892806, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008093729392828371, "epoch": 5, "n_parameters": 631477312} {"train_lr": 0.0019501442770118633, "train_min_lr": 0.0019501442770118633, "train_loss": 0.35229708700297546, "train_loss_scale": 104395.48717948717, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008249949774322793, "epoch": 6, "n_parameters": 631477312} {"train_lr": 0.002250240461686437, "train_min_lr": 0.002250240461686437, "train_loss": 0.34795528617448723, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008780377506553076, "epoch": 7, "n_parameters": 631477312} {"train_lr": 0.0025503366463610127, "train_min_lr": 0.0025503366463610127, "train_loss": 0.3423900009246352, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00945164543391468, "epoch": 8, "n_parameters": 631477312} {"train_lr": 0.002850432831035588, "train_min_lr": 0.002850432831035588, "train_loss": 0.3361705700890758, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010530552652198821, "epoch": 9, "n_parameters": 631477312} {"train_lr": 0.0029999932048716707, "train_min_lr": 0.0029999932048716707, "train_loss": 0.32943082776947474, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009689072033581443, "epoch": 11, "n_parameters": 631477312} {"train_lr": 0.0029999815414149186, "train_min_lr": 0.0029999815414149186, "train_loss": 0.32339698032368547, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009277637595704828, "epoch": 12, "n_parameters": 631477312} {"train_lr": 0.002999964041610077, "train_min_lr": 0.002999964041610077, "train_loss": 0.3181772627300607, "train_loss_scale": 155017.84615384616, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008759370822913181, "epoch": 13, "n_parameters": 631477312} {"train_lr": 0.0029999407055254644, "train_min_lr": 0.0029999407055254644, "train_loss": 0.31389754625538796, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008352689983812759, "epoch": 14, "n_parameters": 631477312} {"train_lr": 0.002999911533252189, "train_min_lr": 0.002999911533252189, "train_loss": 0.3100536342543096, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00801377543188536, "epoch": 15, "n_parameters": 631477312} {"train_lr": 0.0029998765249041335, "train_min_lr": 0.0029998765249041335, "train_loss": 0.3068192872779969, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00776898961675425, "epoch": 16, "n_parameters": 631477312} {"train_lr": 0.002999835680617969, "train_min_lr": 0.002999835680617969, "train_loss": 0.30403782358954257, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007619597525216448, "epoch": 17, "n_parameters": 631477312} {"train_lr": 0.002999789000553154, "train_min_lr": 0.002999789000553154, "train_loss": 0.30155547361414975, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007373458535697024, "epoch": 18, "n_parameters": 631477312} {"train_lr": 0.002999736484891923, "train_min_lr": 0.002999736484891923, "train_loss": 0.2994003286973263, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007231216906056477, "epoch": 19, "n_parameters": 631477312} {"train_lr": 0.0029996781338392934, "train_min_lr": 0.0029996781338392934, "train_loss": 0.2974849368612736, "train_loss_scale": 464633.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007192076712201994, "epoch": 20, "n_parameters": 631477312} {"train_lr": 0.0029996139476230668, "train_min_lr": 0.0029996139476230668, "train_loss": 0.29573119973811585, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007103971442959916, "epoch": 21, "n_parameters": 631477312} {"train_lr": 0.0029995439264938278, "train_min_lr": 0.0029995439264938278, "train_loss": 0.2941351014290912, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006924284063876631, "epoch": 22, "n_parameters": 631477312} {"train_lr": 0.002999468070724929, "train_min_lr": 0.002999468070724929, "train_loss": 0.29278824370330536, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006964788634855396, "epoch": 23, "n_parameters": 631477312} {"train_lr": 0.0029993863806125134, "train_min_lr": 0.0029993863806125134, "train_loss": 0.2915007552424541, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0068425224685014635, "epoch": 24, "n_parameters": 631477312} {"train_lr": 0.0029992988564754917, "train_min_lr": 0.0029992988564754917, "train_loss": 0.2903131091346343, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006796890972719456, "epoch": 25, "n_parameters": 631477312} {"train_lr": 0.0029992054986555587, "train_min_lr": 0.0029992054986555587, "train_loss": 0.2892534056266483, "train_loss_scale": 714174.358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006804759257461111, "epoch": 26, "n_parameters": 631477312} {"train_lr": 0.002999106307517179, "train_min_lr": 0.002999106307517179, "train_loss": 0.2882610055056807, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0068150842353367274, "epoch": 27, "n_parameters": 631477312} {"train_lr": 0.00299900128344759, "train_min_lr": 0.00299900128344759, "train_loss": 0.28728443042685586, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067373048708153265, "epoch": 28, "n_parameters": 631477312} {"train_lr": 0.002998890426856795, "train_min_lr": 0.002998890426856795, "train_loss": 0.2863408819420072, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006663581622071946, "epoch": 29, "n_parameters": 631477312} {"train_lr": 0.002998651217865505, "train_min_lr": 0.002998651217865505, "train_loss": 0.2855846123226608, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006676359182725159, "epoch": 31, "n_parameters": 631477312} {"train_lr": 0.0029985228663988615, "train_min_lr": 0.0029985228663988615, "train_loss": 0.2848354927174604, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006686611054763676, "epoch": 32, "n_parameters": 631477312} {"train_lr": 0.002998388684278744, "train_min_lr": 0.002998388684278744, "train_loss": 0.2841375052111032, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006717834550027664, "epoch": 33, "n_parameters": 631477312} {"train_lr": 0.00299824867202899, "train_min_lr": 0.00299824867202899, "train_loss": 0.2834733720146454, "train_loss_scale": 2046739.6923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006685246683609409, "epoch": 34, "n_parameters": 631477312} {"train_lr": 0.0029981028301961947, "train_min_lr": 0.0029981028301961947, "train_loss": 0.28285679785916823, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006680265924212738, "epoch": 35, "n_parameters": 631477312} {"train_lr": 0.0029979511593497257, "train_min_lr": 0.0029979511593497257, "train_loss": 0.2822155895111605, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006662065454293042, "epoch": 36, "n_parameters": 631477312} {"train_lr": 0.002997793660081702, "train_min_lr": 0.002997793660081702, "train_loss": 0.28161296255301493, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00665541946177538, "epoch": 37, "n_parameters": 631477312} {"train_lr": 0.002997630333006995, "train_min_lr": 0.002997630333006995, "train_loss": 0.2811014130592155, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006605184672830196, "epoch": 38, "n_parameters": 631477312} {"train_lr": 0.002997461178763217, "train_min_lr": 0.002997461178763217, "train_loss": 0.28064428644481665, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006664670357564226, "epoch": 39, "n_parameters": 631477312} {"train_lr": 0.0029972861980107515, "train_min_lr": 0.0029972861980107515, "train_loss": 0.2801371549721807, "train_loss_scale": 3233109.3333333335, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006682938067779805, "epoch": 40, "n_parameters": 631477312} {"train_lr": 0.0029971053914327112, "train_min_lr": 0.0029971053914327112, "train_loss": 0.2796198789579555, "train_loss_scale": 2197976.6153846155, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 41, "n_parameters": 631477312} {"train_lr": 0.0029969187597349537, "train_min_lr": 0.0029969187597349537, "train_loss": 0.2791765438219628, "train_loss_scale": 1310720.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 42, "n_parameters": 631477312} {"train_lr": 0.0029967263036460904, "train_min_lr": 0.0029967263036460904, "train_loss": 0.278813859495597, "train_loss_scale": 993122.4615384615, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 43, "n_parameters": 631477312} {"train_lr": 0.0029965280239174527, "train_min_lr": 0.0029965280239174527, "train_loss": 0.278404200849577, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066692087253054175, "epoch": 44, "n_parameters": 631477312} {"train_lr": 0.0029963239213231223, "train_min_lr": 0.0029963239213231223, "train_loss": 0.27794832818341464, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006589387583797081, "epoch": 45, "n_parameters": 631477312} {"train_lr": 0.002996113996659908, "train_min_lr": 0.002996113996659908, "train_loss": 0.2776522172214941, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006680978236731906, "epoch": 46, "n_parameters": 631477312} {"train_lr": 0.0029958982507473477, "train_min_lr": 0.0029958982507473477, "train_loss": 0.2772712598089129, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006648826500914322, "epoch": 47, "n_parameters": 631477312} {"train_lr": 0.0029956766844277003, "train_min_lr": 0.0029956766844277003, "train_loss": 0.27696681930086553, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006693171371574489, "epoch": 48, "n_parameters": 631477312} {"train_lr": 0.002995449298565954, "train_min_lr": 0.002995449298565954, "train_loss": 0.27659607119559765, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006642024308884851, "epoch": 49, "n_parameters": 631477312} {"train_lr": 0.0029952160940498185, "train_min_lr": 0.0029952160940498185, "train_loss": 0.27631730085704476, "train_loss_scale": 744421.7435897436, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 50, "n_parameters": 631477312} {"train_lr": 0.002994977071789708, "train_min_lr": 0.002994977071789708, "train_loss": 0.2760152844204687, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006642045239762713, "epoch": 51, "n_parameters": 631477312} {"train_lr": 0.002994732232718759, "train_min_lr": 0.002994732232718759, "train_loss": 0.27568689056743795, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006648006184826581, "epoch": 52, "n_parameters": 631477312} {"train_lr": 0.0029944815777928125, "train_min_lr": 0.0029944815777928125, "train_loss": 0.27546317072119564, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006647615389038737, "epoch": 53, "n_parameters": 631477312} {"train_lr": 0.0029942251079904166, "train_min_lr": 0.0029942251079904166, "train_loss": 0.2751755064132456, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006674255887703158, "epoch": 54, "n_parameters": 631477312} {"train_lr": 0.002993962824312818, "train_min_lr": 0.002993962824312818, "train_loss": 0.27485486829223543, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006617780983782349, "epoch": 55, "n_parameters": 631477312} {"train_lr": 0.002993694727783965, "train_min_lr": 0.002993694727783965, "train_loss": 0.2746047629730012, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006597905477974564, "epoch": 56, "n_parameters": 631477312} {"train_lr": 0.0029934208194504915, "train_min_lr": 0.0029934208194504915, "train_loss": 0.2743614808662245, "train_loss_scale": 557896.2051282051, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 57, "n_parameters": 631477312} {"train_lr": 0.0029931411003817263, "train_min_lr": 0.0029931411003817263, "train_loss": 0.2741395687350096, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066022517246146425, "epoch": 58, "n_parameters": 631477312} {"train_lr": 0.0029928555716696795, "train_min_lr": 0.0029928555716696795, "train_loss": 0.273888002621392, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006604071119979311, "epoch": 59, "n_parameters": 631477312} {"train_lr": 0.002992564234429045, "train_min_lr": 0.002992564234429045, "train_loss": 0.2737152803516111, "train_loss_scale": 444468.5128205128, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 60, "n_parameters": 631477312} {"train_lr": 0.0029922670897972003, "train_min_lr": 0.0029922670897972003, "train_loss": 0.2735071821191993, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006586091069038957, "epoch": 61, "n_parameters": 631477312} {"train_lr": 0.002991964138934168, "train_min_lr": 0.002991964138934168, "train_loss": 0.27326867799871624, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006595374028293941, "epoch": 62, "n_parameters": 631477312} {"train_lr": 0.0029916553830226754, "train_min_lr": 0.0029916553830226754, "train_loss": 0.27306148403873426, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006576628766010682, "epoch": 63, "n_parameters": 631477312} {"train_lr": 0.0029913408232680786, "train_min_lr": 0.0029913408232680786, "train_loss": 0.2729144222700061, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00663111160377948, "epoch": 64, "n_parameters": 631477312} {"train_lr": 0.0029910204608984106, "train_min_lr": 0.0029910204608984106, "train_loss": 0.2726892998848015, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006598212620398651, "epoch": 65, "n_parameters": 631477312} {"train_lr": 0.002990694297164359, "train_min_lr": 0.002990694297164359, "train_loss": 0.27251347426910144, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006616400365228168, "epoch": 66, "n_parameters": 631477312} {"train_lr": 0.0029903623333392484, "train_min_lr": 0.0029903623333392484, "train_loss": 0.27231658806499, "train_loss_scale": 496561.23076923075, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006578992138831661, "epoch": 67, "n_parameters": 631477312} {"train_lr": 0.002990024570719051, "train_min_lr": 0.002990024570719051, "train_loss": 0.2721459461996953, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00662355341662008, "epoch": 68, "n_parameters": 631477312} {"train_lr": 0.002989681010622383, "train_min_lr": 0.002989681010622383, "train_loss": 0.2719804856281441, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006582943092768964, "epoch": 69, "n_parameters": 631477312} {"train_lr": 0.002989331654390483, "train_min_lr": 0.002989331654390483, "train_loss": 0.2718659131668317, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006665328354873241, "epoch": 70, "n_parameters": 631477312} {"train_lr": 0.0029889765033872333, "train_min_lr": 0.0029889765033872333, "train_loss": 0.2716690606694334, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006553877451015302, "epoch": 71, "n_parameters": 631477312} {"train_lr": 0.00298861555899912, "train_min_lr": 0.00298861555899912, "train_loss": 0.2718068308208902, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007062928182466959, "epoch": 72, "n_parameters": 631477312} {"train_lr": 0.002988248822635262, "train_min_lr": 0.002988248822635262, "train_loss": 0.2714404644898306, "train_loss_scale": 778029.9487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006590971644096172, "epoch": 73, "n_parameters": 631477312} {"train_lr": 0.0029878762957273783, "train_min_lr": 0.0029878762957273783, "train_loss": 0.27129766633674407, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006618544733963716, "epoch": 74, "n_parameters": 631477312} {"train_lr": 0.002987497979729805, "train_min_lr": 0.002987497979729805, "train_loss": 0.2711355872422409, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006533487847683808, "epoch": 75, "n_parameters": 631477312} {"train_lr": 0.002987113876119467, "train_min_lr": 0.002987113876119467, "train_loss": 0.2710242243274712, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066491667291698735, "epoch": 76, "n_parameters": 631477312} {"train_lr": 0.002986723986395889, "train_min_lr": 0.002986723986395889, "train_loss": 0.27080615358845067, "train_loss_scale": 873813.3333333334, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 77, "n_parameters": 631477312} {"train_lr": 0.0029863283120811898, "train_min_lr": 0.0029863283120811898, "train_loss": 0.2708612179550796, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006894145325088921, "epoch": 78, "n_parameters": 631477312} {"train_lr": 0.002985926854720063, "train_min_lr": 0.002985926854720063, "train_loss": 0.2705733726780193, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006555064917148019, "epoch": 79, "n_parameters": 631477312} {"train_lr": 0.002985519615879786, "train_min_lr": 0.002985519615879786, "train_loss": 0.2704087294614277, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006573839742953005, "epoch": 80, "n_parameters": 631477312} {"train_lr": 0.002985106597150196, "train_min_lr": 0.002985106597150196, "train_loss": 0.27029253248399937, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00651747222768907, "epoch": 81, "n_parameters": 631477312} {"train_lr": 0.0029846878001437093, "train_min_lr": 0.0029846878001437093, "train_loss": 0.2701553068577479, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065847231636349205, "epoch": 82, "n_parameters": 631477312} {"train_lr": 0.002984263226495282, "train_min_lr": 0.002984263226495282, "train_loss": 0.2700448330814162, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006542500466681444, "epoch": 83, "n_parameters": 631477312} {"train_lr": 0.002983832877862442, "train_min_lr": 0.002983832877862442, "train_loss": 0.26989041476582104, "train_loss_scale": 1008246.1538461539, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006538025502008027, "epoch": 84, "n_parameters": 631477312} {"train_lr": 0.002983396755925252, "train_min_lr": 0.002983396755925252, "train_loss": 0.269863588636956, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006605230031523089, "epoch": 85, "n_parameters": 631477312} {"train_lr": 0.0029829548623863107, "train_min_lr": 0.0029829548623863107, "train_loss": 0.2696658456071208, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006578377321159515, "epoch": 86, "n_parameters": 631477312} {"train_lr": 0.0029825071989707597, "train_min_lr": 0.0029825071989707597, "train_loss": 0.269576303789225, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006556679909595122, "epoch": 87, "n_parameters": 631477312} {"train_lr": 0.002982053767426249, "train_min_lr": 0.002982053767426249, "train_loss": 0.26950177961351496, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006556421122812212, "epoch": 88, "n_parameters": 631477312} {"train_lr": 0.0029815945695229615, "train_min_lr": 0.0029815945695229615, "train_loss": 0.2693768112669484, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006545019753945944, "epoch": 89, "n_parameters": 631477312} {"train_lr": 0.002981129607053593, "train_min_lr": 0.002981129607053593, "train_loss": 0.2692665574707998, "train_loss_scale": 1586307.282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006527053226585476, "epoch": 90, "n_parameters": 631477312} {"train_lr": 0.0029806588818333314, "train_min_lr": 0.0029806588818333314, "train_loss": 0.2691412120221708, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006544175583164757, "epoch": 91, "n_parameters": 631477312} {"train_lr": 0.002980182395699876, "train_min_lr": 0.002980182395699876, "train_loss": 0.26909733020091575, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006584652798035397, "epoch": 92, "n_parameters": 631477312} {"train_lr": 0.0029797001505133982, "train_min_lr": 0.0029797001505133982, "train_loss": 0.2689770647766403, "train_loss_scale": 1720740.1025641025, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 93, "n_parameters": 631477312} {"train_lr": 0.002979212148156572, "train_min_lr": 0.002979212148156572, "train_loss": 0.269078544815644, "train_loss_scale": 611669.3333333334, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 94, "n_parameters": 631477312} {"train_lr": 0.0029787183905345444, "train_min_lr": 0.0029787183905345444, "train_loss": 0.2688264806205646, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006624933094896663, "epoch": 95, "n_parameters": 631477312} {"train_lr": 0.0029782188795749115, "train_min_lr": 0.0029782188795749115, "train_loss": 0.2686924328758883, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006522069443375445, "epoch": 96, "n_parameters": 631477312} {"train_lr": 0.0029777136172277536, "train_min_lr": 0.0029777136172277536, "train_loss": 0.26859513404540336, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006516222482344183, "epoch": 97, "n_parameters": 631477312} {"train_lr": 0.0029772026054655956, "train_min_lr": 0.0029772026054655956, "train_loss": 0.26854229220547354, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006578936520665406, "epoch": 98, "n_parameters": 631477312} {"train_lr": 0.002976685846283399, "train_min_lr": 0.002976685846283399, "train_loss": 0.2684581979500273, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006545366165646089, "epoch": 99, "n_parameters": 631477312} {"train_lr": 0.002976163341698581, "train_min_lr": 0.002976163341698581, "train_loss": 0.2683847997104749, "train_loss_scale": 746102.1538461539, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006587665775814691, "epoch": 100, "n_parameters": 631477312} {"train_lr": 0.0029756350937509696, "train_min_lr": 0.0029756350937509696, "train_loss": 0.26830698334826875, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006566806644117698, "epoch": 101, "n_parameters": 631477312} {"train_lr": 0.0029751011045028287, "train_min_lr": 0.0029751011045028287, "train_loss": 0.26818879916129684, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006548732578849945, "epoch": 102, "n_parameters": 631477312} {"train_lr": 0.0029745613760388313, "train_min_lr": 0.0029745613760388313, "train_loss": 0.2681491745826908, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006559299160308467, "epoch": 103, "n_parameters": 631477312} {"train_lr": 0.0029740159104660532, "train_min_lr": 0.0029740159104660532, "train_loss": 0.26806418714412034, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006567970013771302, "epoch": 104, "n_parameters": 631477312} {"train_lr": 0.00297346470991397, "train_min_lr": 0.00297346470991397, "train_loss": 0.2679767335872524, "train_loss_scale": 767947.4871794871, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 105, "n_parameters": 631477312} {"train_lr": 0.0029729077765344473, "train_min_lr": 0.0029729077765344473, "train_loss": 0.26790684468459147, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006604876815496634, "epoch": 106, "n_parameters": 631477312} {"train_lr": 0.0029723451125017353, "train_min_lr": 0.0029723451125017353, "train_loss": 0.2677700662424263, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006519454246303305, "epoch": 107, "n_parameters": 631477312} {"train_lr": 0.002971776720012444, "train_min_lr": 0.002971776720012444, "train_loss": 0.2677516357626957, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006547636474864796, "epoch": 108, "n_parameters": 631477312} {"train_lr": 0.0029712026012855583, "train_min_lr": 0.0029712026012855583, "train_loss": 0.2676776558793604, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006536204238028194, "epoch": 109, "n_parameters": 631477312} {"train_lr": 0.002970622758562414, "train_min_lr": 0.002970622758562414, "train_loss": 0.2676659008571639, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006569338254988766, "epoch": 110, "n_parameters": 631477312} {"train_lr": 0.0029700371941066978, "train_min_lr": 0.0029700371941066978, "train_loss": 0.2675550375939705, "train_loss_scale": 589824.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006554608513863805, "epoch": 111, "n_parameters": 631477312} {"train_lr": 0.002969445910204429, "train_min_lr": 0.002969445910204429, "train_loss": 0.2674941812367298, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00654294479262227, "epoch": 112, "n_parameters": 631477312} {"train_lr": 0.002968848909163951, "train_min_lr": 0.002968848909163951, "train_loss": 0.2674549649117323, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006621804159994309, "epoch": 113, "n_parameters": 631477312} {"train_lr": 0.0029682461933159374, "train_min_lr": 0.0029682461933159374, "train_loss": 0.26735275423225874, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006565786380237207, "epoch": 114, "n_parameters": 631477312} {"train_lr": 0.0029676377650133612, "train_min_lr": 0.0029676377650133612, "train_loss": 0.2673114504312905, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006619835850030471, "epoch": 115, "n_parameters": 631477312} {"train_lr": 0.0029670236266315076, "train_min_lr": 0.0029670236266315076, "train_loss": 0.267213166392862, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065408752684911284, "epoch": 116, "n_parameters": 631477312} {"train_lr": 0.002966403780567945, "train_min_lr": 0.002966403780567945, "train_loss": 0.26718427361442876, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006525287067350478, "epoch": 117, "n_parameters": 631477312} {"train_lr": 0.002965778229242529, "train_min_lr": 0.002965778229242529, "train_loss": 0.26714012521701175, "train_loss_scale": 1798038.9743589743, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00653369948029136, "epoch": 118, "n_parameters": 631477312} {"train_lr": 0.0029651469750973905, "train_min_lr": 0.0029651469750973905, "train_loss": 0.2670196918860221, "train_loss_scale": 1660245.3333333333, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 119, "n_parameters": 631477312} {"train_lr": 0.0029645100205969127, "train_min_lr": 0.0029645100205969127, "train_loss": 0.2669636393771865, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006537801893547368, "epoch": 120, "n_parameters": 631477312} {"train_lr": 0.002963867368227746, "train_min_lr": 0.002963867368227746, "train_loss": 0.2669382819858117, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00654222071917059, "epoch": 121, "n_parameters": 631477312} {"train_lr": 0.002963219020498775, "train_min_lr": 0.002963219020498775, "train_loss": 0.26686617718615496, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006583438930997195, "epoch": 122, "n_parameters": 631477312} {"train_lr": 0.002962564979941127, "train_min_lr": 0.002962564979941127, "train_loss": 0.2668010621564463, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006538377355378217, "epoch": 123, "n_parameters": 631477312} {"train_lr": 0.0029619052491081507, "train_min_lr": 0.0029619052491081507, "train_loss": 0.26675632587657905, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006563179556113214, "epoch": 124, "n_parameters": 631477312} {"train_lr": 0.0029612398305754115, "train_min_lr": 0.0029612398305754115, "train_loss": 0.26672236228254265, "train_loss_scale": 1055297.641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006593003143699696, "epoch": 125, "n_parameters": 631477312} {"train_lr": 0.0029605687269406663, "train_min_lr": 0.0029605687269406663, "train_loss": 0.26660817118289953, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006534602413944996, "epoch": 126, "n_parameters": 631477312} {"train_lr": 0.0029598919408238863, "train_min_lr": 0.0029598919408238863, "train_loss": 0.2666065750542121, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006581226701084047, "epoch": 127, "n_parameters": 631477312} {"train_lr": 0.0029592094748672145, "train_min_lr": 0.0029592094748672145, "train_loss": 0.2665979409835134, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066155273652372835, "epoch": 128, "n_parameters": 631477312} {"train_lr": 0.0029585213317349685, "train_min_lr": 0.0029585213317349685, "train_loss": 0.266442861688586, "train_loss_scale": 1095627.4871794872, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 129, "n_parameters": 631477312} {"train_lr": 0.002957827514113639, "train_min_lr": 0.002957827514113639, "train_loss": 0.2664560670636069, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006557937226413439, "epoch": 130, "n_parameters": 631477312} {"train_lr": 0.002957128024711851, "train_min_lr": 0.002957128024711851, "train_loss": 0.2664048592201792, "train_loss_scale": 626793.0256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 131, "n_parameters": 631477312} {"train_lr": 0.00295642286626039, "train_min_lr": 0.00295642286626039, "train_loss": 0.2663123585247936, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006523019971493154, "epoch": 132, "n_parameters": 631477312} {"train_lr": 0.0029557120415121658, "train_min_lr": 0.0029557120415121658, "train_loss": 0.26630949886599314, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006567404389035148, "epoch": 133, "n_parameters": 631477312} {"train_lr": 0.0029549955532422074, "train_min_lr": 0.0029549955532422074, "train_loss": 0.26624190092433053, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065439123128994536, "epoch": 134, "n_parameters": 631477312} {"train_lr": 0.0029542734042476613, "train_min_lr": 0.0029542734042476613, "train_loss": 0.266160569491032, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006544911233970943, "epoch": 135, "n_parameters": 631477312} {"train_lr": 0.0029535455973477634, "train_min_lr": 0.0029535455973477634, "train_loss": 0.26613511146607405, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065292442256274326, "epoch": 136, "n_parameters": 631477312} {"train_lr": 0.0029528121353838465, "train_min_lr": 0.0029528121353838465, "train_loss": 0.2660517660387529, "train_loss_scale": 730978.4615384615, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006513871415219723, "epoch": 137, "n_parameters": 631477312} {"train_lr": 0.002952073021219313, "train_min_lr": 0.002952073021219313, "train_loss": 0.2659987007411054, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065334789016140765, "epoch": 138, "n_parameters": 631477312} {"train_lr": 0.002951328257739638, "train_min_lr": 0.002951328257739638, "train_loss": 0.26601393388894695, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065724195297568655, "epoch": 139, "n_parameters": 631477312} {"train_lr": 0.002950577847852346, "train_min_lr": 0.002950577847852346, "train_loss": 0.26596894080284983, "train_loss_scale": 1028411.0769230769, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 140, "n_parameters": 631477312} {"train_lr": 0.0029498217944870075, "train_min_lr": 0.0029498217944870075, "train_loss": 0.2659373104912587, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006561086614592335, "epoch": 141, "n_parameters": 631477312} {"train_lr": 0.002949060100595227, "train_min_lr": 0.002949060100595227, "train_loss": 0.265859980458537, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006547628342209814, "epoch": 142, "n_parameters": 631477312} {"train_lr": 0.0029482927691506233, "train_min_lr": 0.0029482927691506233, "train_loss": 0.26583803509875464, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006562493204210813, "epoch": 143, "n_parameters": 631477312} {"train_lr": 0.0029475198031488313, "train_min_lr": 0.0029475198031488313, "train_loss": 0.2657960658528818, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006564917251802025, "epoch": 144, "n_parameters": 631477312} {"train_lr": 0.002946741205607474, "train_min_lr": 0.002946741205607474, "train_loss": 0.26576759239049774, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006568280387466821, "epoch": 145, "n_parameters": 631477312} {"train_lr": 0.002945956979566174, "train_min_lr": 0.002945956979566174, "train_loss": 0.26569968399902183, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065513374746586075, "epoch": 146, "n_parameters": 631477312} {"train_lr": 0.0029451671280865087, "train_min_lr": 0.0029451671280865087, "train_loss": 0.2656303164220821, "train_loss_scale": 853648.4102564103, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006532919451367492, "epoch": 147, "n_parameters": 631477312} {"train_lr": 0.002944371654252032, "train_min_lr": 0.002944371654252032, "train_loss": 0.2656627440084823, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065791907165056234, "epoch": 148, "n_parameters": 631477312} {"train_lr": 0.0029435705611682425, "train_min_lr": 0.0029435705611682425, "train_loss": 0.26560622982334536, "train_loss_scale": 581421.9487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 149, "n_parameters": 631477312} {"train_lr": 0.0029427638519625714, "train_min_lr": 0.0029427638519625714, "train_loss": 0.2655483261138822, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006547867799571787, "epoch": 150, "n_parameters": 631477312} {"train_lr": 0.002941951529784382, "train_min_lr": 0.002941951529784382, "train_loss": 0.2655112442996305, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006562162119035537, "epoch": 151, "n_parameters": 631477312} {"train_lr": 0.00294113359780495, "train_min_lr": 0.00294113359780495, "train_loss": 0.2654912523799934, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006556345320020158, "epoch": 152, "n_parameters": 631477312} {"train_lr": 0.0029403100592174437, "train_min_lr": 0.0029403100592174437, "train_loss": 0.2654626227867527, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006544733437924431, "epoch": 153, "n_parameters": 631477312} {"train_lr": 0.0029394809172369253, "train_min_lr": 0.0029394809172369253, "train_loss": 0.26540009969940936, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006572478686650403, "epoch": 154, "n_parameters": 631477312} {"train_lr": 0.002938646175100337, "train_min_lr": 0.002938646175100337, "train_loss": 0.2653151648089242, "train_loss_scale": 776349.5384615385, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006562914657525909, "epoch": 155, "n_parameters": 631477312} {"train_lr": 0.0029378058360664777, "train_min_lr": 0.0029378058360664777, "train_loss": 0.2653267325606579, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065723236244267375, "epoch": 156, "n_parameters": 631477312} {"train_lr": 0.002936959903415989, "train_min_lr": 0.002936959903415989, "train_loss": 0.26529493492741424, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006548206617410939, "epoch": 157, "n_parameters": 631477312} {"train_lr": 0.0029361083804513677, "train_min_lr": 0.0029361083804513677, "train_loss": 0.26526674724673516, "train_loss_scale": 609988.9230769231, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 158, "n_parameters": 631477312} {"train_lr": 0.0029352512704969233, "train_min_lr": 0.0029352512704969233, "train_loss": 0.2652391279205823, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00660322754470727, "epoch": 159, "n_parameters": 631477312} {"train_lr": 0.0029343885768987757, "train_min_lr": 0.0029343885768987757, "train_loss": 0.2651830961724791, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065480219140553325, "epoch": 160, "n_parameters": 631477312} {"train_lr": 0.002933520303024848, "train_min_lr": 0.002933520303024848, "train_loss": 0.2651266627258454, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006546541824578666, "epoch": 161, "n_parameters": 631477312} {"train_lr": 0.0029326464522648503, "train_min_lr": 0.0029326464522648503, "train_loss": 0.2651330054415247, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065535250439260825, "epoch": 162, "n_parameters": 631477312} {"train_lr": 0.0029317670280302522, "train_min_lr": 0.0029317670280302522, "train_loss": 0.26506177667910474, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006599667970126925, "epoch": 163, "n_parameters": 631477312} {"train_lr": 0.0029308820337542985, "train_min_lr": 0.0029308820337542985, "train_loss": 0.26499145216523456, "train_loss_scale": 747782.5641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006540568564863254, "epoch": 164, "n_parameters": 631477312} {"train_lr": 0.0029299914728919654, "train_min_lr": 0.0029299914728919654, "train_loss": 0.2650001044653786, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065391178851803905, "epoch": 165, "n_parameters": 631477312} {"train_lr": 0.0029290953489199754, "train_min_lr": 0.0029290953489199754, "train_loss": 0.26492455258416253, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006548815769537424, "epoch": 166, "n_parameters": 631477312} {"train_lr": 0.00292819366533675, "train_min_lr": 0.00292819366533675, "train_loss": 0.2649194178058026, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006541854254185007, "epoch": 167, "n_parameters": 631477312} {"train_lr": 0.0029272864256624344, "train_min_lr": 0.0029272864256624344, "train_loss": 0.26491341159308857, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00654745168494395, "epoch": 168, "n_parameters": 631477312} {"train_lr": 0.002926373633438852, "train_min_lr": 0.002926373633438852, "train_loss": 0.26491227283035046, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00658075941595225, "epoch": 169, "n_parameters": 631477312} {"train_lr": 0.002925455292229509, "train_min_lr": 0.002925455292229509, "train_loss": 0.264811400598727, "train_loss_scale": 1065380.1025641025, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006576964718241913, "epoch": 170, "n_parameters": 631477312} {"train_lr": 0.0029245314056195694, "train_min_lr": 0.0029245314056195694, "train_loss": 0.26482007384765893, "train_loss_scale": 1055297.641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 171, "n_parameters": 631477312} {"train_lr": 0.0029236019772158526, "train_min_lr": 0.0029236019772158526, "train_loss": 0.2647033050411548, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006549136043012811, "epoch": 172, "n_parameters": 631477312} {"train_lr": 0.0029226670106468075, "train_min_lr": 0.0029226670106468075, "train_loss": 0.2647855150579021, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006561379893145596, "epoch": 173, "n_parameters": 631477312} {"train_lr": 0.0029217265095625097, "train_min_lr": 0.0029217265095625097, "train_loss": 0.2647385492485064, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006575706683529111, "epoch": 174, "n_parameters": 631477312} {"train_lr": 0.002920780477634638, "train_min_lr": 0.002920780477634638, "train_loss": 0.2647300412197812, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006570608141676833, "epoch": 175, "n_parameters": 631477312} {"train_lr": 0.002919828918556457, "train_min_lr": 0.002919828918556457, "train_loss": 0.26469876060787684, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006568999001338409, "epoch": 176, "n_parameters": 631477312} {"train_lr": 0.0029188718360428187, "train_min_lr": 0.0029188718360428187, "train_loss": 0.26460430677383184, "train_loss_scale": 1660245.3333333333, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006537606300881658, "epoch": 177, "n_parameters": 631477312} {"train_lr": 0.0029179092338301395, "train_min_lr": 0.0029179092338301395, "train_loss": 0.2646068007094212, "train_loss_scale": 1629997.9487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 178, "n_parameters": 631477312} {"train_lr": 0.002916941115676371, "train_min_lr": 0.002916941115676371, "train_loss": 0.26459475396236837, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006575506661111155, "epoch": 179, "n_parameters": 631477312} {"train_lr": 0.0029159674853610168, "train_min_lr": 0.0029159674853610168, "train_loss": 0.26455197675834197, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006573972207171699, "epoch": 180, "n_parameters": 631477312} {"train_lr": 0.0029149883466850833, "train_min_lr": 0.0029149883466850833, "train_loss": 0.2645434021251276, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006685648453780091, "epoch": 181, "n_parameters": 631477312} {"train_lr": 0.0029140037034710927, "train_min_lr": 0.0029140037034710927, "train_loss": 0.2827565395243418, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008702406150032168, "epoch": 182, "n_parameters": 631477312} {"train_lr": 0.0029130135595630516, "train_min_lr": 0.0029130135595630516, "train_loss": 0.3620699073922319, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.004341430803693724, "epoch": 183, "n_parameters": 631477312} {"train_lr": 0.0029120179188264392, "train_min_lr": 0.0029120179188264392, "train_loss": 0.27531475428706753, "train_loss_scale": 1085545.0256410257, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008762381724619236, "epoch": 184, "n_parameters": 631477312} {"train_lr": 0.002911016785148203, "train_min_lr": 0.002911016785148203, "train_loss": 0.2664563362695611, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006682236705763409, "epoch": 185, "n_parameters": 631477312} {"train_lr": 0.0029100101624367283, "train_min_lr": 0.0029100101624367283, "train_loss": 0.2656506281047582, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006537490366146159, "epoch": 186, "n_parameters": 631477312} {"train_lr": 0.002908998054621824, "train_min_lr": 0.002908998054621824, "train_loss": 0.265363519369529, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006536838461238987, "epoch": 187, "n_parameters": 631477312} {"train_lr": 0.0029079804656547237, "train_min_lr": 0.0029079804656547237, "train_loss": 0.26503121566910964, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006497113053531696, "epoch": 188, "n_parameters": 631477312} {"train_lr": 0.0029069573995080546, "train_min_lr": 0.0029069573995080546, "train_loss": 0.26488704753753084, "train_loss_scale": 1283833.435897436, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 189, "n_parameters": 631477312} {"train_lr": 0.002905928860175819, "train_min_lr": 0.002905928860175819, "train_loss": 0.2648868960961222, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006728840245602605, "epoch": 190, "n_parameters": 631477312} {"train_lr": 0.002904894851673399, "train_min_lr": 0.002904894851673399, "train_loss": 0.2646683709612355, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006469340927791423, "epoch": 191, "n_parameters": 631477312} {"train_lr": 0.002903855378037524, "train_min_lr": 0.002903855378037524, "train_loss": 0.264594364991913, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006517679578302285, "epoch": 192, "n_parameters": 631477312} {"train_lr": 0.0029028104433262503, "train_min_lr": 0.0029028104433262503, "train_loss": 0.2644641782778005, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006470264768442855, "epoch": 193, "n_parameters": 631477312} {"train_lr": 0.0029017600516189723, "train_min_lr": 0.0029017600516189723, "train_loss": 0.2644525117461737, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006498132396537142, "epoch": 194, "n_parameters": 631477312} {"train_lr": 0.0029007042070163655, "train_min_lr": 0.0029007042070163655, "train_loss": 0.2643603117396243, "train_loss_scale": 1431709.5384615385, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006502126694286767, "epoch": 195, "n_parameters": 631477312} {"train_lr": 0.002899642913640412, "train_min_lr": 0.002899642913640412, "train_loss": 0.26431960572237867, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006477582605722814, "epoch": 196, "n_parameters": 631477312} {"train_lr": 0.002898576175634354, "train_min_lr": 0.002898576175634354, "train_loss": 0.2642808115372482, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006502011126707284, "epoch": 197, "n_parameters": 631477312} {"train_lr": 0.002897503997162699, "train_min_lr": 0.002897503997162699, "train_loss": 0.264274086859913, "train_loss_scale": 1421627.076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 198, "n_parameters": 631477312} {"train_lr": 0.002896426382411189, "train_min_lr": 0.002896426382411189, "train_loss": 0.2641983444306952, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006516318829455532, "epoch": 199, "n_parameters": 631477312} {"train_lr": 0.002895343335586782, "train_min_lr": 0.002895343335586782, "train_loss": 0.26417087249827975, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006504011195666419, "epoch": 200, "n_parameters": 631477312} {"train_lr": 0.002894254860917656, "train_min_lr": 0.002894254860917656, "train_loss": 0.2641432182117103, "train_loss_scale": 937668.9230769231, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 201, "n_parameters": 631477312} {"train_lr": 0.0028931609626531673, "train_min_lr": 0.0028931609626531673, "train_loss": 0.26414511802618224, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065265977934289435, "epoch": 202, "n_parameters": 631477312} {"train_lr": 0.0028920616450638596, "train_min_lr": 0.0028920616450638596, "train_loss": 0.2640827912508916, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006543165836960842, "epoch": 203, "n_parameters": 631477312} {"train_lr": 0.0028909569124414124, "train_min_lr": 0.0028909569124414124, "train_loss": 0.2641084650620961, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006550617945881991, "epoch": 204, "n_parameters": 631477312} {"train_lr": 0.002889846769098671, "train_min_lr": 0.002889846769098671, "train_loss": 0.26399114984983146, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006545340178164247, "epoch": 205, "n_parameters": 631477312} {"train_lr": 0.0028887312193695805, "train_min_lr": 0.0028887312193695805, "train_loss": 0.2640001279576562, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006539751838247936, "epoch": 206, "n_parameters": 631477312} {"train_lr": 0.002887610267609204, "train_min_lr": 0.002887610267609204, "train_loss": 0.2640142569419904, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006604244945964848, "epoch": 207, "n_parameters": 631477312} {"train_lr": 0.002886483918193695, "train_min_lr": 0.002886483918193695, "train_loss": 0.2639998899241432, "train_loss_scale": 944390.5641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006560698712769992, "epoch": 208, "n_parameters": 631477312} {"train_lr": 0.0028853521755202796, "train_min_lr": 0.0028853521755202796, "train_loss": 0.2639470641286327, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006554047441265236, "epoch": 209, "n_parameters": 631477312} {"train_lr": 0.0028842150440072253, "train_min_lr": 0.0028842150440072253, "train_loss": 0.26390155360329515, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006561824416096967, "epoch": 210, "n_parameters": 631477312} {"train_lr": 0.0028830725280938638, "train_min_lr": 0.0028830725280938638, "train_loss": 0.2638536180393436, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006552172563552188, "epoch": 211, "n_parameters": 631477312} {"train_lr": 0.002881924632240516, "train_min_lr": 0.002881924632240516, "train_loss": 0.2638514670752323, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006540830166508945, "epoch": 212, "n_parameters": 631477312} {"train_lr": 0.002880771360928527, "train_min_lr": 0.002880771360928527, "train_loss": 0.2638097742948538, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006528641361420831, "epoch": 213, "n_parameters": 631477312} {"train_lr": 0.0028796127186602205, "train_min_lr": 0.0028796127186602205, "train_loss": 0.26378852421811855, "train_loss_scale": 1458596.1025641025, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006539802687863509, "epoch": 214, "n_parameters": 631477312} {"train_lr": 0.0028784487099588964, "train_min_lr": 0.0028784487099588964, "train_loss": 0.26375675842297286, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065607952971894964, "epoch": 215, "n_parameters": 631477312} {"train_lr": 0.0028772793393687934, "train_min_lr": 0.0028772793393687934, "train_loss": 0.2636970101993006, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006537033395710377, "epoch": 216, "n_parameters": 631477312} {"train_lr": 0.002876104611455086, "train_min_lr": 0.002876104611455086, "train_loss": 0.26370543959992343, "train_loss_scale": 1297276.717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 217, "n_parameters": 631477312} {"train_lr": 0.002874924530803863, "train_min_lr": 0.002874924530803863, "train_loss": 0.2636751189815979, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065465139953467325, "epoch": 218, "n_parameters": 631477312} {"train_lr": 0.002873739102022118, "train_min_lr": 0.002873739102022118, "train_loss": 0.26367801395304596, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006554891561301282, "epoch": 219, "n_parameters": 631477312} {"train_lr": 0.0028725483297377154, "train_min_lr": 0.0028725483297377154, "train_loss": 0.2636764326926846, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006573593852898249, "epoch": 220, "n_parameters": 631477312} {"train_lr": 0.0028713522185993853, "train_min_lr": 0.0028713522185993853, "train_loss": 0.26359315121021026, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006559846106355485, "epoch": 221, "n_parameters": 631477312} {"train_lr": 0.0028701507732766898, "train_min_lr": 0.0028701507732766898, "train_loss": 0.26359585516202527, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006557979280702197, "epoch": 222, "n_parameters": 631477312} {"train_lr": 0.002868943998460023, "train_min_lr": 0.002868943998460023, "train_loss": 0.2635470098988989, "train_loss_scale": 1414905.435897436, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 223, "n_parameters": 631477312} {"train_lr": 0.0028677318988605963, "train_min_lr": 0.0028677318988605963, "train_loss": 0.2635368946939707, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006557439944933718, "epoch": 224, "n_parameters": 631477312} {"train_lr": 0.0028665144792103924, "train_min_lr": 0.0028665144792103924, "train_loss": 0.2635211028719846, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006584299864092221, "epoch": 225, "n_parameters": 631477312} {"train_lr": 0.0028652917442621704, "train_min_lr": 0.0028652917442621704, "train_loss": 0.2635278669484437, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006599123653872177, "epoch": 226, "n_parameters": 631477312} {"train_lr": 0.0028640636987894296, "train_min_lr": 0.0028640636987894296, "train_loss": 0.26350922518087405, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065740765466426425, "epoch": 227, "n_parameters": 631477312} {"train_lr": 0.002862830347586419, "train_min_lr": 0.002862830347586419, "train_loss": 0.26342913056186473, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006551043012549575, "epoch": 228, "n_parameters": 631477312} {"train_lr": 0.002861591695468095, "train_min_lr": 0.002861591695468095, "train_loss": 0.26344874426304626, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006642756486633936, "epoch": 229, "n_parameters": 631477312} {"train_lr": 0.0028603477472700974, "train_min_lr": 0.0028603477472700974, "train_loss": 0.2634168291936079, "train_loss_scale": 1670327.7948717948, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006587289128965961, "epoch": 230, "n_parameters": 631477312} {"train_lr": 0.002859098507848755, "train_min_lr": 0.002859098507848755, "train_loss": 0.26343479234343153, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006595612555825844, "epoch": 231, "n_parameters": 631477312} {"train_lr": 0.0028578439820810507, "train_min_lr": 0.0028578439820810507, "train_loss": 0.2633856885935156, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006561705190389871, "epoch": 232, "n_parameters": 631477312} {"train_lr": 0.0028565841748646012, "train_min_lr": 0.0028565841748646012, "train_loss": 0.2633497452106661, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006562650041021884, "epoch": 233, "n_parameters": 631477312} {"train_lr": 0.0028553190911176384, "train_min_lr": 0.0028553190911176384, "train_loss": 0.2633154574327935, "train_loss_scale": 1377936.4102564103, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 234, "n_parameters": 631477312} {"train_lr": 0.0028540487357790047, "train_min_lr": 0.0028540487357790047, "train_loss": 0.26327941331188554, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006576307825385951, "epoch": 235, "n_parameters": 631477312} {"train_lr": 0.0028527731138081117, "train_min_lr": 0.0028527731138081117, "train_loss": 0.2632905230887282, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006563651915759039, "epoch": 236, "n_parameters": 631477312} {"train_lr": 0.0028514922301849416, "train_min_lr": 0.0028514922301849416, "train_loss": 0.26329306450087386, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006596927220622699, "epoch": 237, "n_parameters": 631477312} {"train_lr": 0.002850206089910009, "train_min_lr": 0.002850206089910009, "train_loss": 0.2632844497073585, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006581047039407377, "epoch": 238, "n_parameters": 631477312} {"train_lr": 0.0028489146980043545, "train_min_lr": 0.0028489146980043545, "train_loss": 0.2632326792973356, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065672558505470175, "epoch": 239, "n_parameters": 631477312} {"train_lr": 0.0028476180595095237, "train_min_lr": 0.0028476180595095237, "train_loss": 0.2631745212412893, "train_loss_scale": 1082184.2051282052, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 240, "n_parameters": 631477312} {"train_lr": 0.002846316179487536, "train_min_lr": 0.002846316179487536, "train_loss": 0.2631972414226486, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006627326478990607, "epoch": 241, "n_parameters": 631477312} {"train_lr": 0.0028450090630208814, "train_min_lr": 0.0028450090630208814, "train_loss": 0.2631154488563203, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006566858905642174, "epoch": 242, "n_parameters": 631477312} {"train_lr": 0.0028436967152124944, "train_min_lr": 0.0028436967152124944, "train_loss": 0.26310177645287836, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006530491459087875, "epoch": 243, "n_parameters": 631477312} {"train_lr": 0.0028423791411857206, "train_min_lr": 0.0028423791411857206, "train_loss": 0.26311223045325816, "train_loss_scale": 858689.641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 244, "n_parameters": 631477312} {"train_lr": 0.0028410563460843246, "train_min_lr": 0.0028410563460843246, "train_loss": 0.2630877485588336, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006552842120305659, "epoch": 245, "n_parameters": 631477312} {"train_lr": 0.0028397283350724365, "train_min_lr": 0.0028397283350724365, "train_loss": 0.26310124996607787, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006575683704935587, "epoch": 246, "n_parameters": 631477312} {"train_lr": 0.002838395113334564, "train_min_lr": 0.002838395113334564, "train_loss": 0.26303004637921756, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006544813348087841, "epoch": 247, "n_parameters": 631477312} {"train_lr": 0.0028370566860755502, "train_min_lr": 0.0028370566860755502, "train_loss": 0.26304915830349695, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00658981511160397, "epoch": 248, "n_parameters": 631477312} {"train_lr": 0.0028357130585205613, "train_min_lr": 0.0028357130585205613, "train_loss": 0.262998830831538, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00657280924049421, "epoch": 249, "n_parameters": 631477312} {"train_lr": 0.0028343642359150677, "train_min_lr": 0.0028343642359150677, "train_loss": 0.2630161976543231, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006607614991923747, "epoch": 250, "n_parameters": 631477312} {"train_lr": 0.002833010223524816, "train_min_lr": 0.002833010223524816, "train_loss": 0.2629789485870741, "train_loss_scale": 1023369.8461538461, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00657855187697957, "epoch": 251, "n_parameters": 631477312} {"train_lr": 0.0028316510266358143, "train_min_lr": 0.0028316510266358143, "train_loss": 0.26299568502387655, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00660558216399155, "epoch": 252, "n_parameters": 631477312} {"train_lr": 0.0028302866505543183, "train_min_lr": 0.0028302866505543183, "train_loss": 0.26295352768069374, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006563909565253804, "epoch": 253, "n_parameters": 631477312} {"train_lr": 0.002828917100606794, "train_min_lr": 0.002828917100606794, "train_loss": 0.2629275294647624, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006614170365560896, "epoch": 254, "n_parameters": 631477312} {"train_lr": 0.0028275423821399106, "train_min_lr": 0.0028275423821399106, "train_loss": 0.2629558066568839, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006612658817082262, "epoch": 255, "n_parameters": 631477312} {"train_lr": 0.002826162500520514, "train_min_lr": 0.002826162500520514, "train_loss": 0.2629457742889197, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006585044372910395, "epoch": 256, "n_parameters": 631477312} {"train_lr": 0.002824777461135607, "train_min_lr": 0.002824777461135607, "train_loss": 0.26287501119375706, "train_loss_scale": 1616554.6666666667, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006599269784951152, "epoch": 257, "n_parameters": 631477312} {"train_lr": 0.0028233872693923315, "train_min_lr": 0.0028233872693923315, "train_loss": 0.2628529711017528, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006592540976364547, "epoch": 258, "n_parameters": 631477312} {"train_lr": 0.0028219919307179283, "train_min_lr": 0.0028219919307179283, "train_loss": 0.2628644816965486, "train_loss_scale": 1297276.717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 259, "n_parameters": 631477312} {"train_lr": 0.0028205914505597556, "train_min_lr": 0.0028205914505597556, "train_loss": 0.2627764153950967, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006569043507811447, "epoch": 260, "n_parameters": 631477312} {"train_lr": 0.002819185834385233, "train_min_lr": 0.002819185834385233, "train_loss": 0.262799058527423, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066224706018916685, "epoch": 261, "n_parameters": 631477312} {"train_lr": 0.002817775087681821, "train_min_lr": 0.002817775087681821, "train_loss": 0.26274470425843716, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006588006696592157, "epoch": 262, "n_parameters": 631477312} {"train_lr": 0.0028163592159570175, "train_min_lr": 0.0028163592159570175, "train_loss": 0.26284767744334364, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066275915283208284, "epoch": 263, "n_parameters": 631477312} {"train_lr": 0.0028149382247383407, "train_min_lr": 0.0028149382247383407, "train_loss": 0.2627495086262337, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066011779469987136, "epoch": 264, "n_parameters": 631477312} {"train_lr": 0.0028135121195732755, "train_min_lr": 0.0028135121195732755, "train_loss": 0.2627813475809466, "train_loss_scale": 1418266.2564102565, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066117518566524945, "epoch": 265, "n_parameters": 631477312} {"train_lr": 0.002812080906029277, "train_min_lr": 0.002812080906029277, "train_loss": 0.26271034753582895, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066032722825184464, "epoch": 266, "n_parameters": 631477312} {"train_lr": 0.0028106445896937493, "train_min_lr": 0.0028106445896937493, "train_loss": 0.2626845704463239, "train_loss_scale": 1734183.3846153845, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 267, "n_parameters": 631477312} {"train_lr": 0.002809203176174018, "train_min_lr": 0.002809203176174018, "train_loss": 0.26269257250719535, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066403581638438385, "epoch": 268, "n_parameters": 631477312} {"train_lr": 0.0028077566710972965, "train_min_lr": 0.0028077566710972965, "train_loss": 0.26271327215139395, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006652207894382091, "epoch": 269, "n_parameters": 631477312} {"train_lr": 0.002806305080110684, "train_min_lr": 0.002806305080110684, "train_loss": 0.2626312723001226, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006585554003369254, "epoch": 270, "n_parameters": 631477312} {"train_lr": 0.002804848408881137, "train_min_lr": 0.002804848408881137, "train_loss": 0.26263986884031254, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006597586449247618, "epoch": 271, "n_parameters": 631477312} {"train_lr": 0.0028033866630954372, "train_min_lr": 0.0028033866630954372, "train_loss": 0.26262007119098246, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065997927703369316, "epoch": 272, "n_parameters": 631477312} {"train_lr": 0.0028019198484601905, "train_min_lr": 0.0028019198484601905, "train_loss": 0.2626432527949174, "train_loss_scale": 851968.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 273, "n_parameters": 631477312} {"train_lr": 0.0028004479707017717, "train_min_lr": 0.0028004479707017717, "train_loss": 0.2625883071301266, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006616538277086921, "epoch": 274, "n_parameters": 631477312} {"train_lr": 0.002798971035566338, "train_min_lr": 0.002798971035566338, "train_loss": 0.29123151544720316, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008372896618078439, "epoch": 275, "n_parameters": 631477312} {"train_lr": 0.002797489048819781, "train_min_lr": 0.002797489048819781, "train_loss": 0.3319710034948702, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00822662157625951, "epoch": 276, "n_parameters": 631477312} {"train_lr": 0.002796002016247727, "train_min_lr": 0.002796002016247727, "train_loss": 0.2654220850642723, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007030997971574275, "epoch": 277, "n_parameters": 631477312} {"train_lr": 0.0027945099436554794, "train_min_lr": 0.0027945099436554794, "train_loss": 0.26393361786643094, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006663856557749499, "epoch": 278, "n_parameters": 631477312} {"train_lr": 0.0027930128368680407, "train_min_lr": 0.0027930128368680407, "train_loss": 0.26341805662601614, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066060105932709305, "epoch": 279, "n_parameters": 631477312} {"train_lr": 0.002791510701730047, "train_min_lr": 0.002791510701730047, "train_loss": 0.26309075778147256, "train_loss_scale": 1030091.4871794871, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006515495107473375, "epoch": 280, "n_parameters": 631477312} {"train_lr": 0.0027900035441057827, "train_min_lr": 0.0027900035441057827, "train_loss": 0.262918028410357, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006500894444075246, "epoch": 281, "n_parameters": 631477312} {"train_lr": 0.0027884913698791205, "train_min_lr": 0.0027884913698791205, "train_loss": 0.26290274824565035, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006540893678529522, "epoch": 282, "n_parameters": 631477312} {"train_lr": 0.002786974184953536, "train_min_lr": 0.002786974184953536, "train_loss": 0.26273413848442334, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00653481281374414, "epoch": 283, "n_parameters": 631477312} {"train_lr": 0.0027854519952520565, "train_min_lr": 0.0027854519952520565, "train_loss": 0.2626607693427314, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006530100645389981, "epoch": 284, "n_parameters": 631477312} {"train_lr": 0.002783924806717247, "train_min_lr": 0.002783924806717247, "train_loss": 0.26263887000282127, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065428644660502095, "epoch": 285, "n_parameters": 631477312} {"train_lr": 0.0027823926253111963, "train_min_lr": 0.0027823926253111963, "train_loss": 0.26262710821659613, "train_loss_scale": 1629997.9487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006564782217491227, "epoch": 286, "n_parameters": 631477312} {"train_lr": 0.0027808554570154715, "train_min_lr": 0.0027808554570154715, "train_loss": 0.2625475460723138, "train_loss_scale": 1535894.9743589743, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 287, "n_parameters": 631477312} {"train_lr": 0.0027793133078311215, "train_min_lr": 0.0027793133078311215, "train_loss": 0.2625465663836505, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006557151970740121, "epoch": 288, "n_parameters": 631477312} {"train_lr": 0.0027777661837786348, "train_min_lr": 0.0027777661837786348, "train_loss": 0.2625021021783304, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006539210572671623, "epoch": 289, "n_parameters": 631477312} {"train_lr": 0.0027762140908979215, "train_min_lr": 0.0027762140908979215, "train_loss": 0.2624562163299953, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006582773135354121, "epoch": 290, "n_parameters": 631477312} {"train_lr": 0.002774657035248286, "train_min_lr": 0.002774657035248286, "train_loss": 0.26242388465489525, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006539259250693692, "epoch": 291, "n_parameters": 631477312} {"train_lr": 0.002773095022908419, "train_min_lr": 0.002773095022908419, "train_loss": 0.2624075788503083, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00657226927488899, "epoch": 292, "n_parameters": 631477312} {"train_lr": 0.00277152805997634, "train_min_lr": 0.00277152805997634, "train_loss": 0.26235316329313296, "train_loss_scale": 1179648.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006578913042083001, "epoch": 293, "n_parameters": 631477312} {"train_lr": 0.002769956152569427, "train_min_lr": 0.002769956152569427, "train_loss": 0.2809866104537669, "train_loss_scale": 1660245.3333333333, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 294, "n_parameters": 631477312} {"train_lr": 0.002768379306824332, "train_min_lr": 0.002768379306824332, "train_loss": 0.26502365314473325, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007050528647736288, "epoch": 295, "n_parameters": 631477312} {"train_lr": 0.002766797528897003, "train_min_lr": 0.002766797528897003, "train_loss": 0.2634434991498263, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006609727167345297, "epoch": 296, "n_parameters": 631477312} {"train_lr": 0.0027652108249626352, "train_min_lr": 0.0027652108249626352, "train_loss": 0.2629275426017837, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00651300854037683, "epoch": 297, "n_parameters": 631477312} {"train_lr": 0.002763619201215655, "train_min_lr": 0.002763619201215655, "train_loss": 0.26267690997487175, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00650164378562178, "epoch": 298, "n_parameters": 631477312} {"train_lr": 0.002762022663869706, "train_min_lr": 0.002762022663869706, "train_loss": 0.262567362595851, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006519422936551751, "epoch": 299, "n_parameters": 631477312} {"train_lr": 0.0027604212191575973, "train_min_lr": 0.0027604212191575973, "train_loss": 0.2624827190600813, "train_loss_scale": 1055297.641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006489278071034604, "epoch": 300, "n_parameters": 631477312} {"train_lr": 0.0027588148733313092, "train_min_lr": 0.0027588148733313092, "train_loss": 0.2623606871676225, "train_loss_scale": 1784595.6923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 301, "n_parameters": 631477312} {"train_lr": 0.002757203632661952, "train_min_lr": 0.002757203632661952, "train_loss": 0.26232638803776354, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006513039600880195, "epoch": 302, "n_parameters": 631477312} {"train_lr": 0.002755587503439741, "train_min_lr": 0.002755587503439741, "train_loss": 0.26230316175124013, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006513656230452351, "epoch": 303, "n_parameters": 631477312} {"train_lr": 0.002753966491973985, "train_min_lr": 0.002753966491973985, "train_loss": 0.2622372233099901, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006540418951772153, "epoch": 304, "n_parameters": 631477312} {"train_lr": 0.0027523406045930456, "train_min_lr": 0.0027523406045930456, "train_loss": 0.2622303194097745, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006530854027145184, "epoch": 305, "n_parameters": 631477312} {"train_lr": 0.0027507098476443215, "train_min_lr": 0.0027507098476443215, "train_loss": 0.2622037911071227, "train_loss_scale": 739380.5128205129, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 306, "n_parameters": 631477312} {"train_lr": 0.0027490742274942233, "train_min_lr": 0.0027490742274942233, "train_loss": 0.262187444512398, "train_loss_scale": 265504.8205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 307, "n_parameters": 631477312} {"train_lr": 0.002747433750528143, "train_min_lr": 0.002747433750528143, "train_loss": 0.2622121414229369, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006746895084432207, "epoch": 308, "n_parameters": 631477312} {"train_lr": 0.0027457884231504442, "train_min_lr": 0.0027457884231504442, "train_loss": 0.2621664089682058, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006605474945480147, "epoch": 309, "n_parameters": 631477312} {"train_lr": 0.002744138251784411, "train_min_lr": 0.002744138251784411, "train_loss": 0.26220044702028805, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006661541712207672, "epoch": 310, "n_parameters": 631477312} {"train_lr": 0.00274248324287225, "train_min_lr": 0.00274248324287225, "train_loss": 0.2621510202984493, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006613348749502061, "epoch": 311, "n_parameters": 631477312} {"train_lr": 0.0027408234028750557, "train_min_lr": 0.0027408234028750557, "train_loss": 0.26212055880862933, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065765205967741516, "epoch": 312, "n_parameters": 631477312} {"train_lr": 0.0027391587382727695, "train_min_lr": 0.0027391587382727695, "train_loss": 0.2620754563607849, "train_loss_scale": 413380.92307692306, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006578048932939195, "epoch": 313, "n_parameters": 631477312} {"train_lr": 0.0027374892555641762, "train_min_lr": 0.0027374892555641762, "train_loss": 0.2620669735673194, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006549629490249432, "epoch": 314, "n_parameters": 631477312} {"train_lr": 0.002735814961266868, "train_min_lr": 0.002735814961266868, "train_loss": 0.2620461559914148, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065976282736907406, "epoch": 315, "n_parameters": 631477312} {"train_lr": 0.002734135861917227, "train_min_lr": 0.002734135861917227, "train_loss": 0.2619965531988643, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006575637380592525, "epoch": 316, "n_parameters": 631477312} {"train_lr": 0.002732451964070391, "train_min_lr": 0.002732451964070391, "train_loss": 0.2619846568604072, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006587689766326012, "epoch": 317, "n_parameters": 631477312} {"train_lr": 0.0027307632743002247, "train_min_lr": 0.0027307632743002247, "train_loss": 0.2619940447949398, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006571683047625881, "epoch": 318, "n_parameters": 631477312} {"train_lr": 0.0027290697991993085, "train_min_lr": 0.0027290697991993085, "train_loss": 0.2619834672420835, "train_loss_scale": 611669.3333333334, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006611346822375288, "epoch": 319, "n_parameters": 631477312} {"train_lr": 0.0027273715453788993, "train_min_lr": 0.0027273715453788993, "train_loss": 0.2619715805631131, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006630331221132133, "epoch": 320, "n_parameters": 631477312} {"train_lr": 0.0027256685194689133, "train_min_lr": 0.0027256685194689133, "train_loss": 0.26192819966206277, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066091132660706835, "epoch": 321, "n_parameters": 631477312} {"train_lr": 0.0027239607281178977, "train_min_lr": 0.0027239607281178977, "train_loss": 0.26192835142585236, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065980590297840536, "epoch": 322, "n_parameters": 631477312} {"train_lr": 0.002722248177992999, "train_min_lr": 0.002722248177992999, "train_loss": 0.26193258985996437, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006636538075653311, "epoch": 323, "n_parameters": 631477312} {"train_lr": 0.0027205308757799426, "train_min_lr": 0.0027205308757799426, "train_loss": 0.26191795909574306, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006621314579835878, "epoch": 324, "n_parameters": 631477312} {"train_lr": 0.002718808828183009, "train_min_lr": 0.002718808828183009, "train_loss": 0.2618772254820961, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006651196242250406, "epoch": 325, "n_parameters": 631477312} {"train_lr": 0.002717082041925007, "train_min_lr": 0.002717082041925007, "train_loss": 0.26185948905857426, "train_loss_scale": 1841729.641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006607642480864739, "epoch": 326, "n_parameters": 631477312} {"train_lr": 0.0027153505237472383, "train_min_lr": 0.0027153505237472383, "train_loss": 0.26188063646595067, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006625565540535041, "epoch": 327, "n_parameters": 631477312} {"train_lr": 0.0027136142804094735, "train_min_lr": 0.0027136142804094735, "train_loss": 0.2618335885300229, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066255198027461, "epoch": 328, "n_parameters": 631477312} {"train_lr": 0.0027118733186899478, "train_min_lr": 0.0027118733186899478, "train_loss": 0.2618434911396784, "train_loss_scale": 1424987.8974358975, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 329, "n_parameters": 631477312} {"train_lr": 0.0027101276453853035, "train_min_lr": 0.0027101276453853035, "train_loss": 0.2618416638071768, "train_loss_scale": 762906.2564102564, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 330, "n_parameters": 631477312} {"train_lr": 0.0027083772673105774, "train_min_lr": 0.0027083772673105774, "train_loss": 0.26177542176670754, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006611824916221966, "epoch": 331, "n_parameters": 631477312} {"train_lr": 0.0027066221912991715, "train_min_lr": 0.0027066221912991715, "train_loss": 0.2617881037026214, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006636231406353032, "epoch": 332, "n_parameters": 631477312} {"train_lr": 0.002704862424202841, "train_min_lr": 0.002704862424202841, "train_loss": 0.2617893631552131, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066422209403334325, "epoch": 333, "n_parameters": 631477312} {"train_lr": 0.0027030979728916386, "train_min_lr": 0.0027030979728916386, "train_loss": 0.2617776683943633, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006635310034303425, "epoch": 334, "n_parameters": 631477312} {"train_lr": 0.002701328844253914, "train_min_lr": 0.002701328844253914, "train_loss": 0.26174562422630304, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006621844123284786, "epoch": 335, "n_parameters": 631477312} {"train_lr": 0.0026995550451962757, "train_min_lr": 0.0026995550451962757, "train_loss": 0.27434820600319654, "train_loss_scale": 594865.2307692308, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01246341514156367, "epoch": 336, "n_parameters": 631477312} {"train_lr": 0.002697776582643566, "train_min_lr": 0.002697776582643566, "train_loss": 0.2668208605639684, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008195882710890893, "epoch": 337, "n_parameters": 631477312} {"train_lr": 0.00269599346353883, "train_min_lr": 0.00269599346353883, "train_loss": 0.2629414137560301, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067073840510625485, "epoch": 338, "n_parameters": 631477312} {"train_lr": 0.0026942056948432926, "train_min_lr": 0.0026942056948432926, "train_loss": 0.26238987607379943, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006622615541952352, "epoch": 339, "n_parameters": 631477312} {"train_lr": 0.0026924132835363284, "train_min_lr": 0.0026924132835363284, "train_loss": 0.2621023961378691, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006548529319190539, "epoch": 340, "n_parameters": 631477312} {"train_lr": 0.002690616236615441, "train_min_lr": 0.002690616236615441, "train_loss": 0.2619010190694378, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006543190924462696, "epoch": 341, "n_parameters": 631477312} {"train_lr": 0.0026888145610962336, "train_min_lr": 0.0026888145610962336, "train_loss": 0.2618150401514215, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006568998102850925, "epoch": 342, "n_parameters": 631477312} {"train_lr": 0.0026870082640123717, "train_min_lr": 0.0026870082640123717, "train_loss": 0.2617831662004718, "train_loss_scale": 1808121.435897436, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006559282121881365, "epoch": 343, "n_parameters": 631477312} {"train_lr": 0.0026851973524155666, "train_min_lr": 0.0026851973524155666, "train_loss": 0.26170988917314947, "train_loss_scale": 1818203.8974358975, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 344, "n_parameters": 631477312} {"train_lr": 0.0026833818333755464, "train_min_lr": 0.0026833818333755464, "train_loss": 0.2616621806160905, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006588294961824058, "epoch": 345, "n_parameters": 631477312} {"train_lr": 0.002681561713980024, "train_min_lr": 0.002681561713980024, "train_loss": 0.26162171895460534, "train_loss_scale": 594865.2307692308, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 346, "n_parameters": 631477312} {"train_lr": 0.002679737001334669, "train_min_lr": 0.002679737001334669, "train_loss": 0.2616051785104598, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006600707990284531, "epoch": 347, "n_parameters": 631477312} {"train_lr": 0.0026779077025630943, "train_min_lr": 0.0026779077025630943, "train_loss": 0.26158945948983997, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006601805752441765, "epoch": 348, "n_parameters": 631477312} {"train_lr": 0.002676073824806804, "train_min_lr": 0.002676073824806804, "train_loss": 0.2615474621430995, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006600164344164137, "epoch": 349, "n_parameters": 631477312} {"train_lr": 0.0026742353752251882, "train_min_lr": 0.0026742353752251882, "train_loss": 0.2615787388834481, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006620145216285705, "epoch": 350, "n_parameters": 631477312} {"train_lr": 0.002672392360995473, "train_min_lr": 0.002672392360995473, "train_loss": 0.26155120203008825, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006611581302236001, "epoch": 351, "n_parameters": 631477312} {"train_lr": 0.002670544789312714, "train_min_lr": 0.002670544789312714, "train_loss": 0.26153468768088484, "train_loss_scale": 762906.2564102564, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006636689860337915, "epoch": 352, "n_parameters": 631477312} {"train_lr": 0.0026686926673897596, "train_min_lr": 0.0026686926673897596, "train_loss": 0.2614977153746459, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006629014966818385, "epoch": 353, "n_parameters": 631477312} {"train_lr": 0.0026668360024572186, "train_min_lr": 0.0026668360024572186, "train_loss": 0.26146475806868136, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006634052254808828, "epoch": 354, "n_parameters": 631477312} {"train_lr": 0.0026649748017634396, "train_min_lr": 0.0026649748017634396, "train_loss": 0.2615159559899416, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006661283498546347, "epoch": 355, "n_parameters": 631477312} {"train_lr": 0.002663109072574473, "train_min_lr": 0.002663109072574473, "train_loss": 0.2614818986099309, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066440160466859546, "epoch": 356, "n_parameters": 631477312} {"train_lr": 0.00266123882217405, "train_min_lr": 0.00266123882217405, "train_loss": 0.2614506137622998, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006636899590002707, "epoch": 357, "n_parameters": 631477312} {"train_lr": 0.0026593640578635516, "train_min_lr": 0.0026593640578635516, "train_loss": 0.26145325134842634, "train_loss_scale": 1095627.4871794872, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006684459989162114, "epoch": 358, "n_parameters": 631477312} {"train_lr": 0.002657484786961986, "train_min_lr": 0.002657484786961986, "train_loss": 0.26143764699092853, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00664127299723287, "epoch": 359, "n_parameters": 631477312} {"train_lr": 0.0026556010168059534, "train_min_lr": 0.0026556010168059534, "train_loss": 0.2614194789113334, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006645846837319625, "epoch": 360, "n_parameters": 631477312} {"train_lr": 0.0026537127547496104, "train_min_lr": 0.0026537127547496104, "train_loss": 0.2613925660381285, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006645855869954595, "epoch": 361, "n_parameters": 631477312} {"train_lr": 0.0026518200081646626, "train_min_lr": 0.0026518200081646626, "train_loss": 0.2613680849466712, "train_loss_scale": 1717379.282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 362, "n_parameters": 631477312} {"train_lr": 0.0026499227844403155, "train_min_lr": 0.0026499227844403155, "train_loss": 0.26137455835795176, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006668155296490743, "epoch": 363, "n_parameters": 631477312} {"train_lr": 0.002648021090983251, "train_min_lr": 0.002648021090983251, "train_loss": 0.26140387401056403, "train_loss_scale": 853648.4102564103, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 364, "n_parameters": 631477312} {"train_lr": 0.0026461149352176097, "train_min_lr": 0.0026461149352176097, "train_loss": 0.2613885810700221, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006703173749160786, "epoch": 365, "n_parameters": 631477312} {"train_lr": 0.00264420432458494, "train_min_lr": 0.00264420432458494, "train_loss": 0.261317252772502, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066786744461681405, "epoch": 366, "n_parameters": 631477312} {"train_lr": 0.0026422892665441985, "train_min_lr": 0.0026422892665441985, "train_loss": 0.2612890552711458, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006654830727105339, "epoch": 367, "n_parameters": 631477312} {"train_lr": 0.002640369768571687, "train_min_lr": 0.002640369768571687, "train_loss": 0.26132228829336757, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006677658265778938, "epoch": 368, "n_parameters": 631477312} {"train_lr": 0.0026384458381610567, "train_min_lr": 0.0026384458381610567, "train_loss": 0.26136729368068373, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006784180719524813, "epoch": 369, "n_parameters": 631477312} {"train_lr": 0.002636517482823248, "train_min_lr": 0.002636517482823248, "train_loss": 0.2612748893383795, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067081909570007175, "epoch": 370, "n_parameters": 631477312} {"train_lr": 0.0026345847100864854, "train_min_lr": 0.0026345847100864854, "train_loss": 0.26128039322196484, "train_loss_scale": 1028411.0769230769, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006695618569397201, "epoch": 371, "n_parameters": 631477312} {"train_lr": 0.002632647527496237, "train_min_lr": 0.002632647527496237, "train_loss": 0.26125409688131934, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006685809577063013, "epoch": 372, "n_parameters": 631477312} {"train_lr": 0.0026307059426151857, "train_min_lr": 0.0026307059426151857, "train_loss": 0.26123618026180434, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006670650327578187, "epoch": 373, "n_parameters": 631477312} {"train_lr": 0.002628759963023199, "train_min_lr": 0.002628759963023199, "train_loss": 0.2612074230475208, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006690865594487733, "epoch": 374, "n_parameters": 631477312} {"train_lr": 0.0026268095963173075, "train_min_lr": 0.0026268095963173075, "train_loss": 0.2612319071448814, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006704411729095647, "epoch": 375, "n_parameters": 631477312} {"train_lr": 0.0026248548501116606, "train_min_lr": 0.0026248548501116606, "train_loss": 0.26120179490102696, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006673027001595937, "epoch": 376, "n_parameters": 631477312} {"train_lr": 0.002622895732037515, "train_min_lr": 0.002622895732037515, "train_loss": 0.26118628128778, "train_loss_scale": 1388018.8717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 377, "n_parameters": 631477312} {"train_lr": 0.0026209322497431787, "train_min_lr": 0.0026209322497431787, "train_loss": 0.26117716666060287, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00671250834607352, "epoch": 378, "n_parameters": 631477312} {"train_lr": 0.0026189644108940142, "train_min_lr": 0.0026189644108940142, "train_loss": 0.26114153708271587, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006670846196357161, "epoch": 379, "n_parameters": 631477312} {"train_lr": 0.002616992223172395, "train_min_lr": 0.002616992223172395, "train_loss": 0.2611444439762869, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006703101941802276, "epoch": 380, "n_parameters": 631477312} {"train_lr": 0.00261501569427765, "train_min_lr": 0.00261501569427765, "train_loss": 0.261189113311374, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00675663315744784, "epoch": 381, "n_parameters": 631477312} {"train_lr": 0.002613034831926069, "train_min_lr": 0.002613034831926069, "train_loss": 0.2611489669025804, "train_loss_scale": 662081.641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 382, "n_parameters": 631477312} {"train_lr": 0.002611049643850867, "train_min_lr": 0.002611049643850867, "train_loss": 0.2610852118217362, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066979309448446985, "epoch": 383, "n_parameters": 631477312} {"train_lr": 0.0026090601378021354, "train_min_lr": 0.0026090601378021354, "train_loss": 0.2610872373916209, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066926121571435565, "epoch": 384, "n_parameters": 631477312} {"train_lr": 0.0026070663215468225, "train_min_lr": 0.0026070663215468225, "train_loss": 0.26110367816037094, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006725321410414882, "epoch": 385, "n_parameters": 631477312} {"train_lr": 0.002605068202868711, "train_min_lr": 0.002605068202868711, "train_loss": 0.26107307721992046, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067120192345613856, "epoch": 386, "n_parameters": 631477312} {"train_lr": 0.0026030657895683754, "train_min_lr": 0.0026030657895683754, "train_loss": 0.2611021338066516, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006800964158267165, "epoch": 387, "n_parameters": 631477312} {"train_lr": 0.002601059089463159, "train_min_lr": 0.002601059089463159, "train_loss": 0.2610257487708273, "train_loss_scale": 695689.8461538461, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006710117761917316, "epoch": 388, "n_parameters": 631477312} {"train_lr": 0.0025990481103871357, "train_min_lr": 0.0025990481103871357, "train_loss": 0.2610336430161857, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006716447291322625, "epoch": 389, "n_parameters": 631477312} {"train_lr": 0.0025970328601910876, "train_min_lr": 0.0025970328601910876, "train_loss": 0.2610358785127457, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006718340338780903, "epoch": 390, "n_parameters": 631477312} {"train_lr": 0.002595013346742474, "train_min_lr": 0.002595013346742474, "train_loss": 0.2610054210616419, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006705646215078349, "epoch": 391, "n_parameters": 631477312} {"train_lr": 0.002592989577925391, "train_min_lr": 0.002592989577925391, "train_loss": 0.2609542436563434, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006696073553309991, "epoch": 392, "n_parameters": 631477312} {"train_lr": 0.0025909615616405536, "train_min_lr": 0.0025909615616405536, "train_loss": 0.26100747040114725, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067411343543193275, "epoch": 393, "n_parameters": 631477312} {"train_lr": 0.0025889293058052524, "train_min_lr": 0.0025889293058052524, "train_loss": 0.260972180036613, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006714678190361995, "epoch": 394, "n_parameters": 631477312} {"train_lr": 0.002586892818353339, "train_min_lr": 0.002586892818353339, "train_loss": 0.26095533819021416, "train_loss_scale": 1522451.6923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 395, "n_parameters": 631477312} {"train_lr": 0.0025848521072351733, "train_min_lr": 0.0025848521072351733, "train_loss": 0.260919935350569, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006723743400727518, "epoch": 396, "n_parameters": 631477312} {"train_lr": 0.0025828071804176074, "train_min_lr": 0.0025828071804176074, "train_loss": 0.2608851239574739, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067142318405855736, "epoch": 397, "n_parameters": 631477312} {"train_lr": 0.0025807580458839627, "train_min_lr": 0.0025807580458839627, "train_loss": 0.2609347392064639, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067522611173514565, "epoch": 398, "n_parameters": 631477312} {"train_lr": 0.0025787047116339716, "train_min_lr": 0.0025787047116339716, "train_loss": 0.2608702408376699, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006713488721288741, "epoch": 399, "n_parameters": 631477312} {"train_lr": 0.0025766471856837757, "train_min_lr": 0.0025766471856837757, "train_loss": 0.26086656520596874, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006713744887234405, "epoch": 400, "n_parameters": 631477312} {"train_lr": 0.0025745854760658637, "train_min_lr": 0.0025745854760658637, "train_loss": 0.2608406231081925, "train_loss_scale": 1105709.9487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006733600166626275, "epoch": 401, "n_parameters": 631477312} {"train_lr": 0.002572519590829076, "train_min_lr": 0.002572519590829076, "train_loss": 0.2608536145088669, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006731359514957055, "epoch": 402, "n_parameters": 631477312} {"train_lr": 0.002570449538038541, "train_min_lr": 0.002570449538038541, "train_loss": 0.2608288874288496, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006730852003854055, "epoch": 403, "n_parameters": 631477312} {"train_lr": 0.0025683753257756624, "train_min_lr": 0.0025683753257756624, "train_loss": 0.2608498692100581, "train_loss_scale": 1814843.076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 404, "n_parameters": 631477312} {"train_lr": 0.0025662969621380854, "train_min_lr": 0.0025662969621380854, "train_loss": 0.26080902123179, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006754891021559253, "epoch": 405, "n_parameters": 631477312} {"train_lr": 0.0025642144552396537, "train_min_lr": 0.0025642144552396537, "train_loss": 0.26081656243425244, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006756509291545416, "epoch": 406, "n_parameters": 631477312} {"train_lr": 0.0025621278132103966, "train_min_lr": 0.0025621278132103966, "train_loss": 0.26079067903964853, "train_loss_scale": 596545.641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 407, "n_parameters": 631477312} {"train_lr": 0.002560037044196476, "train_min_lr": 0.002560037044196476, "train_loss": 0.260796482480752, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006769969623583632, "epoch": 408, "n_parameters": 631477312} {"train_lr": 0.0025579421563601715, "train_min_lr": 0.0025579421563601715, "train_loss": 0.26074286796248114, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006778109599298869, "epoch": 409, "n_parameters": 631477312} {"train_lr": 0.002555843157879843, "train_min_lr": 0.002555843157879843, "train_loss": 0.2607528650207827, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006745484214311896, "epoch": 410, "n_parameters": 631477312} {"train_lr": 0.0025537400569498967, "train_min_lr": 0.0025537400569498967, "train_loss": 0.26072011129536593, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006768282155136172, "epoch": 411, "n_parameters": 631477312} {"train_lr": 0.002551632861780751, "train_min_lr": 0.002551632861780751, "train_loss": 0.26073140253086025, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00676149078973163, "epoch": 412, "n_parameters": 631477312} {"train_lr": 0.002549521580598816, "train_min_lr": 0.002549521580598816, "train_loss": 0.2606810415163636, "train_loss_scale": 761225.8461538461, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067594817985828295, "epoch": 413, "n_parameters": 631477312} {"train_lr": 0.002547406221646444, "train_min_lr": 0.002547406221646444, "train_loss": 0.26070445026450145, "train_loss_scale": 784751.5897435897, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 414, "n_parameters": 631477312} {"train_lr": 0.002545286793181916, "train_min_lr": 0.002545286793181916, "train_loss": 0.2606846690506268, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067701771622523665, "epoch": 415, "n_parameters": 631477312} {"train_lr": 0.0025431633034793937, "train_min_lr": 0.0025431633034793937, "train_loss": 0.2606677970586297, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00676345241178448, "epoch": 416, "n_parameters": 631477312} {"train_lr": 0.002541035760828894, "train_min_lr": 0.002541035760828894, "train_loss": 0.2606595623969602, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00676685564357262, "epoch": 417, "n_parameters": 631477312} {"train_lr": 0.0025389041735362663, "train_min_lr": 0.0025389041735362663, "train_loss": 0.26064972824249893, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006746157627696028, "epoch": 418, "n_parameters": 631477312} {"train_lr": 0.0025367685499231326, "train_min_lr": 0.0025367685499231326, "train_loss": 0.2606460024566891, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00679679676436652, "epoch": 419, "n_parameters": 631477312} {"train_lr": 0.0025346288983268852, "train_min_lr": 0.0025346288983268852, "train_loss": 0.26061186048751456, "train_loss_scale": 573019.8974358974, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006755039770490466, "epoch": 420, "n_parameters": 631477312} {"train_lr": 0.0025324852271006383, "train_min_lr": 0.0025324852271006383, "train_loss": 0.2606283387211032, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067617958530974695, "epoch": 421, "n_parameters": 631477312} {"train_lr": 0.002530337544613201, "train_min_lr": 0.002530337544613201, "train_loss": 0.2605945677287542, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006753581979025442, "epoch": 422, "n_parameters": 631477312} {"train_lr": 0.0025281858592490396, "train_min_lr": 0.0025281858592490396, "train_loss": 0.2605568443138439, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067678538049595095, "epoch": 423, "n_parameters": 631477312} {"train_lr": 0.0025260301794082433, "train_min_lr": 0.0025260301794082433, "train_loss": 0.26055078199789977, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006789733415457587, "epoch": 424, "n_parameters": 631477312} {"train_lr": 0.0025238705135065017, "train_min_lr": 0.0025238705135065017, "train_loss": 0.2605513702922811, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006786386345107204, "epoch": 425, "n_parameters": 631477312} {"train_lr": 0.002521706869975065, "train_min_lr": 0.002521706869975065, "train_loss": 0.2605444497267644, "train_loss_scale": 846926.7692307692, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 426, "n_parameters": 631477312} {"train_lr": 0.002519539257260711, "train_min_lr": 0.002519539257260711, "train_loss": 0.26049726808336204, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006773102807645232, "epoch": 427, "n_parameters": 631477312} {"train_lr": 0.0025173676838257156, "train_min_lr": 0.0025173676838257156, "train_loss": 0.2605638345106481, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007031753465222816, "epoch": 428, "n_parameters": 631477312} {"train_lr": 0.0025151921581478085, "train_min_lr": 0.0025151921581478085, "train_loss": 0.2605411779398146, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006808228251966051, "epoch": 429, "n_parameters": 631477312} {"train_lr": 0.0025130126887201593, "train_min_lr": 0.0025130126887201593, "train_loss": 0.2605010965683808, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006793593222167916, "epoch": 430, "n_parameters": 631477312} {"train_lr": 0.002510829284051327, "train_min_lr": 0.002510829284051327, "train_loss": 0.2604803545609451, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006777361502500776, "epoch": 431, "n_parameters": 631477312} {"train_lr": 0.002508641952665238, "train_min_lr": 0.002508641952665238, "train_loss": 0.27368459462797123, "train_loss_scale": 488159.1794871795, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 432, "n_parameters": 631477312} {"train_lr": 0.0025064507031011445, "train_min_lr": 0.0025064507031011445, "train_loss": 0.2999196760654927, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01149084895008948, "epoch": 433, "n_parameters": 631477312} {"train_lr": 0.0025042555439135986, "train_min_lr": 0.0025042555439135986, "train_loss": 0.26236078466097706, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006952169324414661, "epoch": 434, "n_parameters": 631477312} {"train_lr": 0.002502056483672411, "train_min_lr": 0.002502056483672411, "train_loss": 0.26140170789455086, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006741380659290231, "epoch": 435, "n_parameters": 631477312} {"train_lr": 0.0024998535309626348, "train_min_lr": 0.0024998535309626348, "train_loss": 0.26104205797235364, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006714587892369868, "epoch": 436, "n_parameters": 631477312} {"train_lr": 0.0024976466943844944, "train_min_lr": 0.0024976466943844944, "train_loss": 0.2608142552825694, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006671815858377765, "epoch": 437, "n_parameters": 631477312} {"train_lr": 0.0024954359825533996, "train_min_lr": 0.0024954359825533996, "train_loss": 0.2606702542398125, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00669279017408068, "epoch": 438, "n_parameters": 631477312} {"train_lr": 0.0024932214040998806, "train_min_lr": 0.0024932214040998806, "train_loss": 0.2606147388742568, "train_loss_scale": 452870.5641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006686832617001178, "epoch": 439, "n_parameters": 631477312} {"train_lr": 0.0024910029676695655, "train_min_lr": 0.0024910029676695655, "train_loss": 0.2605512101167383, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00666682350073153, "epoch": 440, "n_parameters": 631477312} {"train_lr": 0.002488780681923135, "train_min_lr": 0.002488780681923135, "train_loss": 0.260500792790061, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006741959407615165, "epoch": 441, "n_parameters": 631477312} {"train_lr": 0.0024865545555363086, "train_min_lr": 0.0024865545555363086, "train_loss": 0.26046304509807855, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006710812185282031, "epoch": 442, "n_parameters": 631477312} {"train_lr": 0.002484324597199788, "train_min_lr": 0.002484324597199788, "train_loss": 0.26044548828847325, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067093271153190965, "epoch": 443, "n_parameters": 631477312} {"train_lr": 0.002482090815619252, "train_min_lr": 0.002482090815619252, "train_loss": 0.26041133377330905, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006744717656730268, "epoch": 444, "n_parameters": 631477312} {"train_lr": 0.002479853219515286, "train_min_lr": 0.002479853219515286, "train_loss": 0.260403517451591, "train_loss_scale": 690648.6153846154, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066968012236966155, "epoch": 445, "n_parameters": 631477312} {"train_lr": 0.0024776118176233776, "train_min_lr": 0.0024776118176233776, "train_loss": 0.26049213490198153, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007066803339582223, "epoch": 446, "n_parameters": 631477312} {"train_lr": 0.002475366618693875, "train_min_lr": 0.002475366618693875, "train_loss": 0.26032595604789466, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006733388169847716, "epoch": 447, "n_parameters": 631477312} {"train_lr": 0.0024731176314919346, "train_min_lr": 0.0024731176314919346, "train_loss": 0.2603366192436228, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00675651513469907, "epoch": 448, "n_parameters": 631477312} {"train_lr": 0.0024708648647975245, "train_min_lr": 0.0024708648647975245, "train_loss": 0.26028723910880774, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006740055464876768, "epoch": 449, "n_parameters": 631477312} {"train_lr": 0.0024686083274053426, "train_min_lr": 0.0024686083274053426, "train_loss": 0.2602953368988939, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006753871399455537, "epoch": 450, "n_parameters": 631477312} {"train_lr": 0.0024663480281248338, "train_min_lr": 0.0024663480281248338, "train_loss": 0.2602702913244661, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006752519608618548, "epoch": 451, "n_parameters": 631477312} {"train_lr": 0.002464083975780106, "train_min_lr": 0.002464083975780106, "train_loss": 0.2602893674555115, "train_loss_scale": 1139318.1538461538, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 452, "n_parameters": 631477312} {"train_lr": 0.0024618161792099333, "train_min_lr": 0.0024618161792099333, "train_loss": 0.26022003645686287, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006782944888795893, "epoch": 453, "n_parameters": 631477312} {"train_lr": 0.002459544647267703, "train_min_lr": 0.002459544647267703, "train_loss": 0.26020333107608634, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006778379514192541, "epoch": 454, "n_parameters": 631477312} {"train_lr": 0.0024572693888213837, "train_min_lr": 0.0024572693888213837, "train_loss": 0.26021276340664673, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006779856444037973, "epoch": 455, "n_parameters": 631477312} {"train_lr": 0.002454990412753494, "train_min_lr": 0.002454990412753494, "train_loss": 0.26024409212792915, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006809075546558374, "epoch": 456, "n_parameters": 631477312} {"train_lr": 0.002452707727961063, "train_min_lr": 0.002452707727961063, "train_loss": 0.2602136980664606, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006795266920772309, "epoch": 457, "n_parameters": 631477312} {"train_lr": 0.002450421343355605, "train_min_lr": 0.002450421343355605, "train_loss": 0.26016393412525457, "train_loss_scale": 1139318.1538461538, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 458, "n_parameters": 631477312} {"train_lr": 0.0024481312678630704, "train_min_lr": 0.0024481312678630704, "train_loss": 0.26017267988003695, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006820336566306651, "epoch": 459, "n_parameters": 631477312} {"train_lr": 0.0024458375104238228, "train_min_lr": 0.0024458375104238228, "train_loss": 0.2601575094024436, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006825981819882798, "epoch": 460, "n_parameters": 631477312} {"train_lr": 0.002443540079992599, "train_min_lr": 0.002443540079992599, "train_loss": 0.2601714914915367, "train_loss_scale": 846926.7692307692, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 461, "n_parameters": 631477312} {"train_lr": 0.0024412389855384803, "train_min_lr": 0.0024412389855384803, "train_loss": 0.26020096075565863, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006809132721406432, "epoch": 462, "n_parameters": 631477312} {"train_lr": 0.002438934236044838, "train_min_lr": 0.002438934236044838, "train_loss": 0.2601086346116156, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006820624804673478, "epoch": 463, "n_parameters": 631477312} {"train_lr": 0.002436625840509331, "train_min_lr": 0.002436625840509331, "train_loss": 0.2600998809883514, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0068320577728371015, "epoch": 464, "n_parameters": 631477312} {"train_lr": 0.002434313807943839, "train_min_lr": 0.002434313807943839, "train_loss": 0.2600953527009831, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006869574632638922, "epoch": 465, "n_parameters": 631477312} {"train_lr": 0.002431998147374442, "train_min_lr": 0.002431998147374442, "train_loss": 0.2601050837413193, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006840905480576345, "epoch": 466, "n_parameters": 631477312} {"train_lr": 0.0024296788678413927, "train_min_lr": 0.0024296788678413927, "train_loss": 0.2600811619299631, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006843465626633798, "epoch": 467, "n_parameters": 631477312} {"train_lr": 0.0024273559783990592, "train_min_lr": 0.0024273559783990592, "train_loss": 0.26003706922683006, "train_loss_scale": 1035132.717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006833669393717383, "epoch": 468, "n_parameters": 631477312} {"train_lr": 0.0024250294881159144, "train_min_lr": 0.0024250294881159144, "train_loss": 0.26004795148932874, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00684384331972792, "epoch": 469, "n_parameters": 631477312} {"train_lr": 0.0024226994060744782, "train_min_lr": 0.0024226994060744782, "train_loss": 0.2599983669501037, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006845106066276248, "epoch": 470, "n_parameters": 631477312} {"train_lr": 0.0024203657413713023, "train_min_lr": 0.0024203657413713023, "train_loss": 0.2600097898274469, "train_loss_scale": 673844.5128205129, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 471, "n_parameters": 631477312} {"train_lr": 0.002418028503116915, "train_min_lr": 0.002418028503116915, "train_loss": 0.25999408898254234, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0068552985187405, "epoch": 472, "n_parameters": 631477312} {"train_lr": 0.0024156877004358054, "train_min_lr": 0.0024156877004358054, "train_loss": 0.26000505826997167, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006852230301891, "epoch": 473, "n_parameters": 631477312} {"train_lr": 0.0024133433424663683, "train_min_lr": 0.0024133433424663683, "train_loss": 0.2599520635826943, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006882265416117242, "epoch": 474, "n_parameters": 631477312} {"train_lr": 0.002410995438360891, "train_min_lr": 0.002410995438360891, "train_loss": 0.2599984007374121, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006869722718898303, "epoch": 475, "n_parameters": 631477312} {"train_lr": 0.0024086439972854892, "train_min_lr": 0.0024086439972854892, "train_loss": 0.25997919809574693, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006877011006089071, "epoch": 476, "n_parameters": 631477312} {"train_lr": 0.002406289028420101, "train_min_lr": 0.002406289028420101, "train_loss": 0.25993729314993685, "train_loss_scale": 683926.9743589744, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006851833913690195, "epoch": 477, "n_parameters": 631477312} {"train_lr": 0.002403930540958429, "train_min_lr": 0.002403930540958429, "train_loss": 0.2598998556391169, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006863630776747297, "epoch": 478, "n_parameters": 631477312} {"train_lr": 0.0024015685441079117, "train_min_lr": 0.0024015685441079117, "train_loss": 0.25987968335334116, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006880252777288358, "epoch": 479, "n_parameters": 631477312} {"train_lr": 0.002399203047089689, "train_min_lr": 0.002399203047089689, "train_loss": 0.2599016305250235, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006860641347698103, "epoch": 480, "n_parameters": 631477312} {"train_lr": 0.0023968340591385717, "train_min_lr": 0.0023968340591385717, "train_loss": 0.25986810696597856, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006864305663233002, "epoch": 481, "n_parameters": 631477312} {"train_lr": 0.002394461589502989, "train_min_lr": 0.002394461589502989, "train_loss": 0.25988260294513726, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006871802909956433, "epoch": 482, "n_parameters": 631477312} {"train_lr": 0.0023920856474449704, "train_min_lr": 0.0023920856474449704, "train_loss": 0.25986840330267275, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006862974534623134, "epoch": 483, "n_parameters": 631477312} {"train_lr": 0.0023897062422400975, "train_min_lr": 0.0023897062422400975, "train_loss": 0.25987956220976627, "train_loss_scale": 1129235.6923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 484, "n_parameters": 631477312} {"train_lr": 0.0023873233831774756, "train_min_lr": 0.0023873233831774756, "train_loss": 0.2598635393541115, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00686193066678989, "epoch": 485, "n_parameters": 631477312} {"train_lr": 0.0023849370795596837, "train_min_lr": 0.0023849370795596837, "train_loss": 0.2597599645019867, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006875170564923722, "epoch": 486, "n_parameters": 631477312} {"train_lr": 0.0023825473407027647, "train_min_lr": 0.0023825473407027647, "train_loss": 0.25981077703778654, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006899889880934587, "epoch": 487, "n_parameters": 631477312} {"train_lr": 0.0023801541759361604, "train_min_lr": 0.0023801541759361604, "train_loss": 0.25982260803441304, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006928031471593736, "epoch": 488, "n_parameters": 631477312} {"train_lr": 0.002377757594602684, "train_min_lr": 0.002377757594602684, "train_loss": 0.25977604428771883, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006868318939665094, "epoch": 489, "n_parameters": 631477312} {"train_lr": 0.0023753576060585027, "train_min_lr": 0.0023753576060585027, "train_loss": 0.25975547623760903, "train_loss_scale": 1475400.2051282052, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006893729426874182, "epoch": 490, "n_parameters": 631477312} {"train_lr": 0.0023729542196730697, "train_min_lr": 0.0023729542196730697, "train_loss": 0.25976585083676934, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00688810667130523, "epoch": 491, "n_parameters": 631477312} {"train_lr": 0.0023705474448291095, "train_min_lr": 0.0023705474448291095, "train_loss": 0.2597050526394294, "train_loss_scale": 1804760.6153846155, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 492, "n_parameters": 631477312} {"train_lr": 0.002368137290922579, "train_min_lr": 0.002368137290922579, "train_loss": 0.2597613477607807, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00695968574235359, "epoch": 493, "n_parameters": 631477312} {"train_lr": 0.002365723767362617, "train_min_lr": 0.002365723767362617, "train_loss": 0.2597161917947233, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006887851275491695, "epoch": 494, "n_parameters": 631477312} {"train_lr": 0.0023633068835715253, "train_min_lr": 0.0023633068835715253, "train_loss": 0.2597228134295736, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0069400924347484344, "epoch": 495, "n_parameters": 631477312} {"train_lr": 0.0023608866489847244, "train_min_lr": 0.0023608866489847244, "train_loss": 0.2596868366892569, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006886620961058025, "epoch": 496, "n_parameters": 631477312} {"train_lr": 0.0023584630730507047, "train_min_lr": 0.0023584630730507047, "train_loss": 0.25970953467409486, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006912793891014866, "epoch": 497, "n_parameters": 631477312} {"train_lr": 0.00235603616523102, "train_min_lr": 0.00235603616523102, "train_loss": 0.25962593055700356, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006907778133888944, "epoch": 498, "n_parameters": 631477312} {"train_lr": 0.002353605935000214, "train_min_lr": 0.002353605935000214, "train_loss": 0.2596718807740567, "train_loss_scale": 1586307.282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 499, "n_parameters": 631477312} {"train_lr": 0.0023511723918458135, "train_min_lr": 0.0023511723918458135, "train_loss": 0.25965143217203707, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006938896803125643, "epoch": 500, "n_parameters": 631477312} {"train_lr": 0.0023487355452682713, "train_min_lr": 0.0023487355452682713, "train_loss": 0.26255900328620696, "train_loss_scale": 1028411.0769230769, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 501, "n_parameters": 631477312} {"train_lr": 0.002346295404780935, "train_min_lr": 0.002346295404780935, "train_loss": 0.28469008369407117, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012255037601929732, "epoch": 502, "n_parameters": 631477312} {"train_lr": 0.002343851979910019, "train_min_lr": 0.002343851979910019, "train_loss": 0.2610410335473716, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007142428580361108, "epoch": 503, "n_parameters": 631477312} {"train_lr": 0.002341405280194559, "train_min_lr": 0.002341405280194559, "train_loss": 0.26037645114896196, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006911094627307298, "epoch": 504, "n_parameters": 631477312} {"train_lr": 0.0023389553151863694, "train_min_lr": 0.0023389553151863694, "train_loss": 0.26009171117001617, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0068265954226565855, "epoch": 505, "n_parameters": 631477312} {"train_lr": 0.0023365020944500123, "train_min_lr": 0.0023365020944500123, "train_loss": 0.2598441921932718, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006816815778923531, "epoch": 506, "n_parameters": 631477312} {"train_lr": 0.0023340456275627703, "train_min_lr": 0.0023340456275627703, "train_loss": 0.25982940764142537, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006886344665709214, "epoch": 507, "n_parameters": 631477312} {"train_lr": 0.0023315859241145867, "train_min_lr": 0.0023315859241145867, "train_loss": 0.259765103730994, "train_loss_scale": 853648.4102564103, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006998711496961709, "epoch": 508, "n_parameters": 631477312} {"train_lr": 0.002329122993708048, "train_min_lr": 0.002329122993708048, "train_loss": 0.25966785750829446, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006859985474222459, "epoch": 509, "n_parameters": 631477312} {"train_lr": 0.0023266568459583315, "train_min_lr": 0.0023266568459583315, "train_loss": 0.2596393405752352, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006847008522480535, "epoch": 510, "n_parameters": 631477312} {"train_lr": 0.002324187490493184, "train_min_lr": 0.002324187490493184, "train_loss": 0.2595976479620171, "train_loss_scale": 887256.6153846154, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 511, "n_parameters": 631477312} {"train_lr": 0.0023217149369528702, "train_min_lr": 0.0023217149369528702, "train_loss": 0.25956974200343186, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006891731066832271, "epoch": 512, "n_parameters": 631477312} {"train_lr": 0.0023192391949901393, "train_min_lr": 0.0023192391949901393, "train_loss": 0.25951864591572815, "train_loss_scale": 357927.3846153846, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 513, "n_parameters": 631477312} {"train_lr": 0.002316760274270188, "train_min_lr": 0.002316760274270188, "train_loss": 0.25950502692602384, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006881965645171033, "epoch": 514, "n_parameters": 631477312} {"train_lr": 0.002314278184470623, "train_min_lr": 0.002314278184470623, "train_loss": 0.2594530578875819, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0069144510914786505, "epoch": 515, "n_parameters": 631477312} {"train_lr": 0.0023117929352814244, "train_min_lr": 0.0023117929352814244, "train_loss": 0.2594900097136792, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006913063192489342, "epoch": 516, "n_parameters": 631477312} {"train_lr": 0.0023093045364049047, "train_min_lr": 0.0023093045364049047, "train_loss": 0.259453989401197, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006913996588152189, "epoch": 517, "n_parameters": 631477312} {"train_lr": 0.0023068129975556753, "train_min_lr": 0.0023068129975556753, "train_loss": 0.2594530913584794, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006912267319141672, "epoch": 518, "n_parameters": 631477312} {"train_lr": 0.002304318328460604, "train_min_lr": 0.002304318328460604, "train_loss": 0.2594308918580795, "train_loss_scale": 320958.358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006918991693498519, "epoch": 519, "n_parameters": 631477312} {"train_lr": 0.002301820538858778, "train_min_lr": 0.002301820538858778, "train_loss": 0.25941609830046314, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006932790788153234, "epoch": 520, "n_parameters": 631477312} {"train_lr": 0.002299319638501468, "train_min_lr": 0.002299319638501468, "train_loss": 0.25943319629937506, "train_loss_scale": 292391.3846153846, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 521, "n_parameters": 631477312} {"train_lr": 0.0022968156371520896, "train_min_lr": 0.0022968156371520896, "train_loss": 0.2594201855767423, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006937435759494129, "epoch": 522, "n_parameters": 631477312} {"train_lr": 0.0022943085445861614, "train_min_lr": 0.0022943085445861614, "train_loss": 0.2593944095528852, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0069195427140818005, "epoch": 523, "n_parameters": 631477312} {"train_lr": 0.0022917983705912763, "train_min_lr": 0.0022917983705912763, "train_loss": 0.2593434696855883, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006947496840252708, "epoch": 524, "n_parameters": 631477312} {"train_lr": 0.0022892851249670496, "train_min_lr": 0.0022892851249670496, "train_loss": 0.259353852383076, "train_loss_scale": 189466.2564102564, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 525, "n_parameters": 631477312} {"train_lr": 0.0022867688175250956, "train_min_lr": 0.0022867688175250956, "train_loss": 0.2593591528353639, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00698316174869736, "epoch": 526, "n_parameters": 631477312} {"train_lr": 0.0022842494580889716, "train_min_lr": 0.0022842494580889716, "train_loss": 0.25935578333416903, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006960340918829808, "epoch": 527, "n_parameters": 631477312} {"train_lr": 0.0022817270564941617, "train_min_lr": 0.0022817270564941617, "train_loss": 0.25935228142397815, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006967064376789121, "epoch": 528, "n_parameters": 631477312} {"train_lr": 0.002279201622588018, "train_min_lr": 0.002279201622588018, "train_loss": 0.25933887764739877, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006967933744024963, "epoch": 529, "n_parameters": 631477312} {"train_lr": 0.0022766731662297347, "train_min_lr": 0.0022766731662297347, "train_loss": 0.2592882409853001, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006981978927237483, "epoch": 530, "n_parameters": 631477312} {"train_lr": 0.002274141697290305, "train_min_lr": 0.002274141697290305, "train_loss": 0.259320571523112, "train_loss_scale": 149976.61538461538, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006983565321813027, "epoch": 531, "n_parameters": 631477312} {"train_lr": 0.0022716072256524756, "train_min_lr": 0.0022716072256524756, "train_loss": 0.2592598363392962, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00695922831670405, "epoch": 532, "n_parameters": 631477312} {"train_lr": 0.002269069761210729, "train_min_lr": 0.002269069761210729, "train_loss": 0.2593135826755315, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006990948514356159, "epoch": 533, "n_parameters": 631477312} {"train_lr": 0.0022665293138712287, "train_min_lr": 0.0022665293138712287, "train_loss": 0.25926773145329207, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006978035599996264, "epoch": 534, "n_parameters": 631477312} {"train_lr": 0.002263985893551773, "train_min_lr": 0.002263985893551773, "train_loss": 0.25918868031257236, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00696265027195645, "epoch": 535, "n_parameters": 631477312} {"train_lr": 0.002261439510181771, "train_min_lr": 0.002261439510181771, "train_loss": 0.25917976934952325, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006991905948290458, "epoch": 536, "n_parameters": 631477312} {"train_lr": 0.002258890173702205, "train_min_lr": 0.002258890173702205, "train_loss": 0.2592326949726647, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007041170520003503, "epoch": 537, "n_parameters": 631477312} {"train_lr": 0.002256337894065582, "train_min_lr": 0.002256337894065582, "train_loss": 0.2591818631298315, "train_loss_scale": 454550.9743589744, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006997276675135184, "epoch": 538, "n_parameters": 631477312} {"train_lr": 0.0022537826812358995, "train_min_lr": 0.0022537826812358995, "train_loss": 0.25922164016474897, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007027290462969969, "epoch": 539, "n_parameters": 631477312} {"train_lr": 0.002251224545188606, "train_min_lr": 0.002251224545188606, "train_loss": 0.25916706796329564, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007010154730568712, "epoch": 540, "n_parameters": 631477312} {"train_lr": 0.0022486634959105628, "train_min_lr": 0.0022486634959105628, "train_loss": 0.2591938043359476, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006997972713389362, "epoch": 541, "n_parameters": 631477312} {"train_lr": 0.002246099543400005, "train_min_lr": 0.002246099543400005, "train_loss": 0.2591944944877655, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007068199016010532, "epoch": 542, "n_parameters": 631477312} {"train_lr": 0.0022435326976664965, "train_min_lr": 0.0022435326976664965, "train_loss": 0.25916673885718083, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007006489553858932, "epoch": 543, "n_parameters": 631477312} {"train_lr": 0.0022409629687309036, "train_min_lr": 0.0022409629687309036, "train_loss": 0.2591424279261189, "train_loss_scale": 694009.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007000253113428465, "epoch": 544, "n_parameters": 631477312} {"train_lr": 0.0022383903666253436, "train_min_lr": 0.0022383903666253436, "train_loss": 0.2591186635261879, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007017964227065348, "epoch": 545, "n_parameters": 631477312} {"train_lr": 0.002235814901393154, "train_min_lr": 0.002235814901393154, "train_loss": 0.25911090198235637, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007022263849369035, "epoch": 546, "n_parameters": 631477312} {"train_lr": 0.0022332365830888467, "train_min_lr": 0.0022332365830888467, "train_loss": 0.2590916966034386, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007043239272510012, "epoch": 547, "n_parameters": 631477312} {"train_lr": 0.0022306554217780723, "train_min_lr": 0.0022306554217780723, "train_loss": 0.2590701534406831, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007023601821110321, "epoch": 548, "n_parameters": 631477312} {"train_lr": 0.002228071427537582, "train_min_lr": 0.002228071427537582, "train_loss": 0.25907065043858707, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007021768879778205, "epoch": 549, "n_parameters": 631477312} {"train_lr": 0.0022254846104551853, "train_min_lr": 0.0022254846104551853, "train_loss": 0.2768212161802997, "train_loss_scale": 573019.8974358974, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 550, "n_parameters": 631477312} {"train_lr": 0.002222894980629715, "train_min_lr": 0.002222894980629715, "train_loss": 0.2629867660968254, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011663659432461152, "epoch": 551, "n_parameters": 631477312} {"train_lr": 0.0022203025481709825, "train_min_lr": 0.0022203025481709825, "train_loss": 0.2617700283851427, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010731485073800938, "epoch": 552, "n_parameters": 631477312} {"train_lr": 0.002217707323199736, "train_min_lr": 0.002217707323199736, "train_loss": 0.2601188912522048, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007807789268927314, "epoch": 553, "n_parameters": 631477312} {"train_lr": 0.0022151093158476296, "train_min_lr": 0.0022151093158476296, "train_loss": 0.2596345917793373, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007450427609280898, "epoch": 554, "n_parameters": 631477312} {"train_lr": 0.0022125085362571847, "train_min_lr": 0.0022125085362571847, "train_loss": 0.2597883561363396, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008305922977459163, "epoch": 555, "n_parameters": 631477312} {"train_lr": 0.0022099049945817385, "train_min_lr": 0.0022099049945817385, "train_loss": 0.25925539990660185, "train_loss_scale": 784751.5897435897, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007125769259754377, "epoch": 556, "n_parameters": 631477312} {"train_lr": 0.0022072987009854137, "train_min_lr": 0.0022072987009854137, "train_loss": 0.2591913223463612, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0070054806106222365, "epoch": 557, "n_parameters": 631477312} {"train_lr": 0.0022046896656430785, "train_min_lr": 0.0022046896656430785, "train_loss": 0.2590722610893397, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006936118485800062, "epoch": 558, "n_parameters": 631477312} {"train_lr": 0.0022020778987402964, "train_min_lr": 0.0022020778987402964, "train_loss": 0.2590531428099777, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006952145683149306, "epoch": 559, "n_parameters": 631477312} {"train_lr": 0.0021994634104733073, "train_min_lr": 0.0021994634104733073, "train_loss": 0.25900868364633656, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00693602480728609, "epoch": 560, "n_parameters": 631477312} {"train_lr": 0.0021968462110489636, "train_min_lr": 0.0021968462110489636, "train_loss": 0.25899245243412083, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006938296353790718, "epoch": 561, "n_parameters": 631477312} {"train_lr": 0.0021942263106847085, "train_min_lr": 0.0021942263106847085, "train_loss": 0.2589664453885351, "train_loss_scale": 959514.2564102564, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 562, "n_parameters": 631477312} {"train_lr": 0.00219160371960853, "train_min_lr": 0.00219160371960853, "train_loss": 0.25887835206678855, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006941976737625037, "epoch": 563, "n_parameters": 631477312} {"train_lr": 0.0021889784480589184, "train_min_lr": 0.0021889784480589184, "train_loss": 0.2589046309540908, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007005778011471893, "epoch": 564, "n_parameters": 631477312} {"train_lr": 0.002186350506284827, "train_min_lr": 0.002186350506284827, "train_loss": 0.2588709225752749, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006977631786587433, "epoch": 565, "n_parameters": 631477312} {"train_lr": 0.0021837199045456382, "train_min_lr": 0.0021837199045456382, "train_loss": 0.258862611795895, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006971081172545942, "epoch": 566, "n_parameters": 631477312} {"train_lr": 0.002181086653111113, "train_min_lr": 0.002181086653111113, "train_loss": 0.2588565099644117, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006997917629539585, "epoch": 567, "n_parameters": 631477312} {"train_lr": 0.0021784507622613566, "train_min_lr": 0.0021784507622613566, "train_loss": 0.25893635031146306, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0070559892725820346, "epoch": 568, "n_parameters": 631477312} {"train_lr": 0.0021758122422867888, "train_min_lr": 0.0021758122422867888, "train_loss": 0.2588309397120984, "train_loss_scale": 922545.2307692308, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006997459566590782, "epoch": 569, "n_parameters": 631477312} {"train_lr": 0.0021731711034880846, "train_min_lr": 0.0021731711034880846, "train_loss": 0.25882113247047156, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00701829078547561, "epoch": 570, "n_parameters": 631477312} {"train_lr": 0.002170527356176138, "train_min_lr": 0.002170527356176138, "train_loss": 0.25886221253910124, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007023840914301287, "epoch": 571, "n_parameters": 631477312} {"train_lr": 0.0021678810106720405, "train_min_lr": 0.0021678810106720405, "train_loss": 0.2588008327427535, "train_loss_scale": 542772.5128205129, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 572, "n_parameters": 631477312} {"train_lr": 0.0021652320773070076, "train_min_lr": 0.0021652320773070076, "train_loss": 0.25878683568085903, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007033559505660565, "epoch": 573, "n_parameters": 631477312} {"train_lr": 0.0021625805664223837, "train_min_lr": 0.0021625805664223837, "train_loss": 0.25877873469681406, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007024923079790404, "epoch": 574, "n_parameters": 631477312} {"train_lr": 0.0021599264883695505, "train_min_lr": 0.0021599264883695505, "train_loss": 0.2587390141143726, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007032622136951734, "epoch": 575, "n_parameters": 631477312} {"train_lr": 0.002157269853509928, "train_min_lr": 0.002157269853509928, "train_loss": 0.25878447748254985, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0070646231731352135, "epoch": 576, "n_parameters": 631477312} {"train_lr": 0.0021546106722149095, "train_min_lr": 0.0021546106722149095, "train_loss": 0.25874988358611095, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007064044899426592, "epoch": 577, "n_parameters": 631477312} {"train_lr": 0.002151948954865835, "train_min_lr": 0.002151948954865835, "train_loss": 0.26284271830883926, "train_loss_scale": 814998.9743589744, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013604633990699092, "epoch": 578, "n_parameters": 631477312} {"train_lr": 0.0021492847118539373, "train_min_lr": 0.0021492847118539373, "train_loss": 0.2593116847386297, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007288210683514197, "epoch": 579, "n_parameters": 631477312} {"train_lr": 0.002146617953580322, "train_min_lr": 0.002146617953580322, "train_loss": 0.2591074995087603, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0071114833207012946, "epoch": 580, "n_parameters": 631477312} {"train_lr": 0.0021439486904558996, "train_min_lr": 0.0021439486904558996, "train_loss": 0.25884707767564136, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007090177412371701, "epoch": 581, "n_parameters": 631477312} {"train_lr": 0.0021412769329013656, "train_min_lr": 0.0021412769329013656, "train_loss": 0.258761122646646, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007029397293734245, "epoch": 582, "n_parameters": 631477312} {"train_lr": 0.0021386026913471664, "train_min_lr": 0.0021386026913471664, "train_loss": 0.2588032281730706, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007088781204611923, "epoch": 583, "n_parameters": 631477312} {"train_lr": 0.0021359259762334173, "train_min_lr": 0.0021359259762334173, "train_loss": 0.25873003541551626, "train_loss_scale": 1199812.923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0070588014309461685, "epoch": 584, "n_parameters": 631477312} {"train_lr": 0.0021332467980099226, "train_min_lr": 0.0021332467980099226, "train_loss": 0.2587249471107498, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007065806494583973, "epoch": 585, "n_parameters": 631477312} {"train_lr": 0.0021305651671360733, "train_min_lr": 0.0021305651671360733, "train_loss": 0.2586361433480842, "train_loss_scale": 1139318.1538461538, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 586, "n_parameters": 631477312} {"train_lr": 0.0021278810940808597, "train_min_lr": 0.0021278810940808597, "train_loss": 0.2586147518834481, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0070650927298582895, "epoch": 587, "n_parameters": 631477312} {"train_lr": 0.0021251945893227893, "train_min_lr": 0.0021251945893227893, "train_loss": 0.2586678796214983, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007093071522644888, "epoch": 588, "n_parameters": 631477312} {"train_lr": 0.0021225056633498727, "train_min_lr": 0.0021225056633498727, "train_loss": 0.25859436421738696, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007086682057258888, "epoch": 589, "n_parameters": 631477312} {"train_lr": 0.002119814326659565, "train_min_lr": 0.002119814326659565, "train_loss": 0.25862570392946976, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007094446209987673, "epoch": 590, "n_parameters": 631477312} {"train_lr": 0.002117120589758743, "train_min_lr": 0.002117120589758743, "train_loss": 0.2585545470520185, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00710451263988104, "epoch": 591, "n_parameters": 631477312} {"train_lr": 0.002114424463163643, "train_min_lr": 0.002114424463163643, "train_loss": 0.25855986039954215, "train_loss_scale": 899019.4871794871, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 592, "n_parameters": 631477312} {"train_lr": 0.0021117259573998353, "train_min_lr": 0.0021117259573998353, "train_loss": 0.25857144418375516, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007102657957241321, "epoch": 593, "n_parameters": 631477312} {"train_lr": 0.002109025083002179, "train_min_lr": 0.002109025083002179, "train_loss": 0.2585845096842744, "train_loss_scale": 402458.25641025644, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 594, "n_parameters": 631477312} {"train_lr": 0.0021063218505147814, "train_min_lr": 0.0021063218505147814, "train_loss": 0.2585298926300871, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007176326501230972, "epoch": 595, "n_parameters": 631477312} {"train_lr": 0.0021036162704909512, "train_min_lr": 0.0021036162704909512, "train_loss": 0.2584496463726585, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007071726364441789, "epoch": 596, "n_parameters": 631477312} {"train_lr": 0.0021009083534931686, "train_min_lr": 0.0021009083534931686, "train_loss": 0.2584624688188808, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007104336303503563, "epoch": 597, "n_parameters": 631477312} {"train_lr": 0.0020981981100930327, "train_min_lr": 0.0020981981100930327, "train_loss": 0.2585098648336358, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0071609910870066434, "epoch": 598, "n_parameters": 631477312} {"train_lr": 0.0020954855508712156, "train_min_lr": 0.0020954855508712156, "train_loss": 0.2584485776829891, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007113490792480894, "epoch": 599, "n_parameters": 631477312} {"train_lr": 0.002092770686417453, "train_min_lr": 0.002092770686417453, "train_loss": 0.25846860345560485, "train_loss_scale": 276427.4871794872, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007132115503713393, "epoch": 600, "n_parameters": 631477312} {"train_lr": 0.0020900535273304594, "train_min_lr": 0.0020900535273304594, "train_loss": 0.25846122447938585, "train_loss_scale": 441947.89743589744, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 601, "n_parameters": 631477312} {"train_lr": 0.0020873340842179158, "train_min_lr": 0.0020873340842179158, "train_loss": 0.2583863027871419, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007120925831632354, "epoch": 602, "n_parameters": 631477312} {"train_lr": 0.0020846123676964215, "train_min_lr": 0.0020846123676964215, "train_loss": 0.2584170174355117, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0071899669889647225, "epoch": 603, "n_parameters": 631477312} {"train_lr": 0.002081888388391451, "train_min_lr": 0.002081888388391451, "train_loss": 0.2584361298314224, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007167365636306409, "epoch": 604, "n_parameters": 631477312} {"train_lr": 0.0020791621569373037, "train_min_lr": 0.0020791621569373037, "train_loss": 0.25840016841016805, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007171965575812815, "epoch": 605, "n_parameters": 631477312} {"train_lr": 0.0020764336839770837, "train_min_lr": 0.0020764336839770837, "train_loss": 0.25838662097111154, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007161836205527951, "epoch": 606, "n_parameters": 631477312} {"train_lr": 0.0020737029801626366, "train_min_lr": 0.0020737029801626366, "train_loss": 0.2583828868242936, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007173360146295566, "epoch": 607, "n_parameters": 631477312} {"train_lr": 0.0020709700561545212, "train_min_lr": 0.0020709700561545212, "train_loss": 0.25838377974175203, "train_loss_scale": 499081.8461538461, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00720696129466001, "epoch": 608, "n_parameters": 631477312} {"train_lr": 0.0020682349226219697, "train_min_lr": 0.0020682349226219697, "train_loss": 0.2584133283682884, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007205939297982229, "epoch": 609, "n_parameters": 631477312} {"train_lr": 0.0020654975902428246, "train_min_lr": 0.0020654975902428246, "train_loss": 0.2583835873549852, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007238034760424246, "epoch": 610, "n_parameters": 631477312} {"train_lr": 0.0020627580697035255, "train_min_lr": 0.0020627580697035255, "train_loss": 0.2583872149310385, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007230424216220108, "epoch": 611, "n_parameters": 631477312} {"train_lr": 0.0020600163716990544, "train_min_lr": 0.0020600163716990544, "train_loss": 0.25831202932526, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00717889225611893, "epoch": 612, "n_parameters": 631477312} {"train_lr": 0.002057272506932887, "train_min_lr": 0.002057272506932887, "train_loss": 0.2582809262138863, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007216978025658486, "epoch": 613, "n_parameters": 631477312} {"train_lr": 0.00205452648611696, "train_min_lr": 0.00205452648611696, "train_loss": 0.25832142688345927, "train_loss_scale": 375571.6923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 614, "n_parameters": 631477312} {"train_lr": 0.002051778319971633, "train_min_lr": 0.002051778319971633, "train_loss": 0.258269508745378, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007175557767578329, "epoch": 615, "n_parameters": 631477312} {"train_lr": 0.0020490280192256333, "train_min_lr": 0.0020490280192256333, "train_loss": 0.25830767799515086, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007321004581172019, "epoch": 616, "n_parameters": 631477312} {"train_lr": 0.002046275594616027, "train_min_lr": 0.002046275594616027, "train_loss": 0.2583030943597595, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007233895376348534, "epoch": 617, "n_parameters": 631477312} {"train_lr": 0.002043521056888168, "train_min_lr": 0.002043521056888168, "train_loss": 0.2582949669971967, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007233682675108027, "epoch": 618, "n_parameters": 631477312} {"train_lr": 0.002040764416795663, "train_min_lr": 0.002040764416795663, "train_loss": 0.25821148984444636, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007213412746727371, "epoch": 619, "n_parameters": 631477312} {"train_lr": 0.002038005685100323, "train_min_lr": 0.002038005685100323, "train_loss": 0.25931857105714673, "train_loss_scale": 303314.0512820513, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009913841026644103, "epoch": 620, "n_parameters": 631477312} {"train_lr": 0.0020352448725721264, "train_min_lr": 0.0020352448725721264, "train_loss": 0.258393732436861, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007375012192194565, "epoch": 621, "n_parameters": 631477312} {"train_lr": 0.0020324819899891758, "train_min_lr": 0.0020324819899891758, "train_loss": 0.258324943894807, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007262087947963618, "epoch": 622, "n_parameters": 631477312} {"train_lr": 0.0020297170481376535, "train_min_lr": 0.0020297170481376535, "train_loss": 0.25822712354457533, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0072358989034397286, "epoch": 623, "n_parameters": 631477312} {"train_lr": 0.0020269500578117827, "train_min_lr": 0.0020269500578117827, "train_loss": 0.2582289034841009, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007269516333531684, "epoch": 624, "n_parameters": 631477312} {"train_lr": 0.00202418102981378, "train_min_lr": 0.00202418102981378, "train_loss": 0.2581948596948328, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0072547259847991746, "epoch": 625, "n_parameters": 631477312} {"train_lr": 0.002021409974953821, "train_min_lr": 0.002021409974953821, "train_loss": 0.25816618554735893, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007235182659664692, "epoch": 626, "n_parameters": 631477312} {"train_lr": 0.0020186369040499945, "train_min_lr": 0.0020186369040499945, "train_loss": 0.2581606797593383, "train_loss_scale": 781390.7692307692, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 627, "n_parameters": 631477312} {"train_lr": 0.0020158618279282566, "train_min_lr": 0.0020158618279282566, "train_loss": 0.2581277255368873, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007258315621397625, "epoch": 628, "n_parameters": 631477312} {"train_lr": 0.0020130847574223974, "train_min_lr": 0.0020130847574223974, "train_loss": 0.2581103283655233, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007276020008318413, "epoch": 629, "n_parameters": 631477312} {"train_lr": 0.0020103057033739845, "train_min_lr": 0.0020103057033739845, "train_loss": 0.25812689238824904, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007282061690392976, "epoch": 630, "n_parameters": 631477312} {"train_lr": 0.0020075246766323336, "train_min_lr": 0.0020075246766323336, "train_loss": 0.2580784365254192, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007242796729163577, "epoch": 631, "n_parameters": 631477312} {"train_lr": 0.0020047416880544654, "train_min_lr": 0.0020047416880544654, "train_loss": 0.2580249534131816, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007220429684728002, "epoch": 632, "n_parameters": 631477312} {"train_lr": 0.0020019567485050536, "train_min_lr": 0.0020019567485050536, "train_loss": 0.2580317481456754, "train_loss_scale": 382293.3333333333, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 633, "n_parameters": 631477312} {"train_lr": 0.0019991698688563877, "train_min_lr": 0.0019991698688563877, "train_loss": 0.25802282974398577, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007235463822069459, "epoch": 634, "n_parameters": 631477312} {"train_lr": 0.0019963810599883383, "train_min_lr": 0.0019963810599883383, "train_loss": 0.25801479453459764, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007284178814958208, "epoch": 635, "n_parameters": 631477312} {"train_lr": 0.001993590332788305, "train_min_lr": 0.001993590332788305, "train_loss": 0.25806435706237185, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007309536433193642, "epoch": 636, "n_parameters": 631477312} {"train_lr": 0.001990797698151171, "train_min_lr": 0.001990797698151171, "train_loss": 0.2579719069360111, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007297076075934829, "epoch": 637, "n_parameters": 631477312} {"train_lr": 0.0019880031669792728, "train_min_lr": 0.0019880031669792728, "train_loss": 0.2579685947410046, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007298785984181823, "epoch": 638, "n_parameters": 631477312} {"train_lr": 0.0019852067501823466, "train_min_lr": 0.0019852067501823466, "train_loss": 0.2579950495855883, "train_loss_scale": 296592.41025641025, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007314596513214593, "epoch": 639, "n_parameters": 631477312} {"train_lr": 0.001982408458677493, "train_min_lr": 0.001982408458677493, "train_loss": 0.257979430959751, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007319157156579865, "epoch": 640, "n_parameters": 631477312} {"train_lr": 0.001979608303389129, "train_min_lr": 0.001979608303389129, "train_loss": 0.25798612198410314, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0073226786917075515, "epoch": 641, "n_parameters": 631477312} {"train_lr": 0.001976806295248947, "train_min_lr": 0.001976806295248947, "train_loss": 0.2579562472185502, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0072744126604774445, "epoch": 642, "n_parameters": 631477312} {"train_lr": 0.001974002445195878, "train_min_lr": 0.001974002445195878, "train_loss": 0.25788868682530636, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007282771225361966, "epoch": 643, "n_parameters": 631477312} {"train_lr": 0.0019711967641760394, "train_min_lr": 0.0019711967641760394, "train_loss": 0.25787073803635746, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00727672935075437, "epoch": 644, "n_parameters": 631477312} {"train_lr": 0.001968389263142698, "train_min_lr": 0.001968389263142698, "train_loss": 0.2579129522767825, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007348946277768566, "epoch": 645, "n_parameters": 631477312} {"train_lr": 0.0019655799530562206, "train_min_lr": 0.0019655799530562206, "train_loss": 0.2579036822441058, "train_loss_scale": 902380.3076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007288955844556674, "epoch": 646, "n_parameters": 631477312} {"train_lr": 0.0019627688448840505, "train_min_lr": 0.0019627688448840505, "train_loss": 0.2579289665266585, "train_loss_scale": 778029.9487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 647, "n_parameters": 631477312} {"train_lr": 0.001959955949600632, "train_min_lr": 0.001959955949600632, "train_loss": 0.25791839877358425, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007349664194640728, "epoch": 648, "n_parameters": 631477312} {"train_lr": 0.0019571412781874023, "train_min_lr": 0.0019571412781874023, "train_loss": 0.2578634144529366, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007335339075861833, "epoch": 649, "n_parameters": 631477312} {"train_lr": 0.001954324841632723, "train_min_lr": 0.001954324841632723, "train_loss": 0.25780847623275643, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007339959616402689, "epoch": 650, "n_parameters": 631477312} {"train_lr": 0.0019515066509318512, "train_min_lr": 0.0019515066509318512, "train_loss": 0.2579083494418181, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0074174236237572935, "epoch": 651, "n_parameters": 631477312} {"train_lr": 0.0019486867170868882, "train_min_lr": 0.0019486867170868882, "train_loss": 0.25791565306448877, "train_loss_scale": 389014.9743589744, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 652, "n_parameters": 631477312} {"train_lr": 0.0019458650511067438, "train_min_lr": 0.0019458650511067438, "train_loss": 0.25788426524302804, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007365226887393361, "epoch": 653, "n_parameters": 631477312} {"train_lr": 0.0019430416640070901, "train_min_lr": 0.0019430416640070901, "train_loss": 0.25958978108022934, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011198052478273613, "epoch": 654, "n_parameters": 631477312} {"train_lr": 0.001940216566810318, "train_min_lr": 0.001940216566810318, "train_loss": 0.2580720395124398, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007431172573258384, "epoch": 655, "n_parameters": 631477312} {"train_lr": 0.0019373897705454927, "train_min_lr": 0.0019373897705454927, "train_loss": 0.2579350516361256, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0073814162408383805, "epoch": 656, "n_parameters": 631477312} {"train_lr": 0.0019345612862483096, "train_min_lr": 0.0019345612862483096, "train_loss": 0.2578937436442058, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007382927572903916, "epoch": 657, "n_parameters": 631477312} {"train_lr": 0.0019317311249610642, "train_min_lr": 0.0019317311249610642, "train_loss": 0.2578404914014615, "train_loss_scale": 289870.76923076925, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007366923746676781, "epoch": 658, "n_parameters": 631477312} {"train_lr": 0.0019288992977325938, "train_min_lr": 0.0019288992977325938, "train_loss": 0.2577574237500532, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007333662023600669, "epoch": 659, "n_parameters": 631477312} {"train_lr": 0.0019260658156182299, "train_min_lr": 0.0019260658156182299, "train_loss": 0.25773145967067623, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00734735148636481, "epoch": 660, "n_parameters": 631477312} {"train_lr": 0.0019232306896797824, "train_min_lr": 0.0019232306896797824, "train_loss": 0.2577080746832041, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007361096000143637, "epoch": 661, "n_parameters": 631477312} {"train_lr": 0.0019203939309854635, "train_min_lr": 0.0019203939309854635, "train_loss": 0.25773236894896495, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007342771173287661, "epoch": 662, "n_parameters": 631477312} {"train_lr": 0.0019175555506098707, "train_min_lr": 0.0019175555506098707, "train_loss": 0.2576478488641815, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007319876755504176, "epoch": 663, "n_parameters": 631477312} {"train_lr": 0.0019147155596339223, "train_min_lr": 0.0019147155596339223, "train_loss": 0.25764644898784655, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007355997065762774, "epoch": 664, "n_parameters": 631477312} {"train_lr": 0.0019118739691448333, "train_min_lr": 0.0019118739691448333, "train_loss": 0.2576477486693945, "train_loss_scale": 888937.0256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007362589128136348, "epoch": 665, "n_parameters": 631477312} {"train_lr": 0.001909030790236056, "train_min_lr": 0.001909030790236056, "train_loss": 0.2576003094884352, "train_loss_scale": 672164.1025641026, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 666, "n_parameters": 631477312} {"train_lr": 0.0019061860340072474, "train_min_lr": 0.0019061860340072474, "train_loss": 0.2575840246505462, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007356316827309246, "epoch": 667, "n_parameters": 631477312} {"train_lr": 0.001903339711564228, "train_min_lr": 0.001903339711564228, "train_loss": 0.2576311155407427, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0074262594675812395, "epoch": 668, "n_parameters": 631477312} {"train_lr": 0.0019004918340189175, "train_min_lr": 0.0019004918340189175, "train_loss": 0.2576095945446585, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00739625646905878, "epoch": 669, "n_parameters": 631477312} {"train_lr": 0.0018976424124893226, "train_min_lr": 0.0018976424124893226, "train_loss": 0.25754934339784086, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007343021140863689, "epoch": 670, "n_parameters": 631477312} {"train_lr": 0.0018947914580994662, "train_min_lr": 0.0018947914580994662, "train_loss": 0.25760002210378075, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007383784456835248, "epoch": 671, "n_parameters": 631477312} {"train_lr": 0.0018919389819793608, "train_min_lr": 0.0018919389819793608, "train_loss": 0.25755225678081983, "train_loss_scale": 685607.3846153846, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007376808300017355, "epoch": 672, "n_parameters": 631477312} {"train_lr": 0.0018890849952649664, "train_min_lr": 0.0018890849952649664, "train_loss": 0.25753313625076163, "train_loss_scale": 783071.1794871795, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 673, "n_parameters": 631477312} {"train_lr": 0.0018862295090981218, "train_min_lr": 0.0018862295090981218, "train_loss": 0.25751732918732345, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007448382005513383, "epoch": 674, "n_parameters": 631477312} {"train_lr": 0.0018833725346265372, "train_min_lr": 0.0018833725346265372, "train_loss": 0.2575373830040917, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007470490918375361, "epoch": 675, "n_parameters": 631477312} {"train_lr": 0.0018805140830037252, "train_min_lr": 0.0018805140830037252, "train_loss": 0.257541626602268, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007449137845422882, "epoch": 676, "n_parameters": 631477312} {"train_lr": 0.001877654165388965, "train_min_lr": 0.001877654165388965, "train_loss": 0.25746912975759745, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007402565128778895, "epoch": 677, "n_parameters": 631477312} {"train_lr": 0.001874792792947265, "train_min_lr": 0.001874792792947265, "train_loss": 0.25748288086675203, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00740283464666647, "epoch": 678, "n_parameters": 631477312} {"train_lr": 0.0018719299768493057, "train_min_lr": 0.0018719299768493057, "train_loss": 0.2574400114886558, "train_loss_scale": 574700.3076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007472217209840146, "epoch": 679, "n_parameters": 631477312} {"train_lr": 0.0018690657282714014, "train_min_lr": 0.0018690657282714014, "train_loss": 0.2574725348699408, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007418991333673684, "epoch": 680, "n_parameters": 631477312} {"train_lr": 0.0018662000583954724, "train_min_lr": 0.0018662000583954724, "train_loss": 0.2574232314199878, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00746058602220355, "epoch": 681, "n_parameters": 631477312} {"train_lr": 0.0018633329784089752, "train_min_lr": 0.0018633329784089752, "train_loss": 0.25745070970748574, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007455653552777874, "epoch": 682, "n_parameters": 631477312} {"train_lr": 0.0018604644995048785, "train_min_lr": 0.0018604644995048785, "train_loss": 0.25735789612652016, "train_loss_scale": 593184.8205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 683, "n_parameters": 631477312} {"train_lr": 0.0018575946328816017, "train_min_lr": 0.0018575946328816017, "train_loss": 0.2573820703382341, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007444031254495853, "epoch": 684, "n_parameters": 631477312} {"train_lr": 0.0018547233897429978, "train_min_lr": 0.0018547233897429978, "train_loss": 0.26091927690849376, "train_loss_scale": 273906.8717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 685, "n_parameters": 631477312} {"train_lr": 0.0018518507812982775, "train_min_lr": 0.0018518507812982775, "train_loss": 0.25790713182710207, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007772482277897115, "epoch": 686, "n_parameters": 631477312} {"train_lr": 0.0018489768187619955, "train_min_lr": 0.0018489768187619955, "train_loss": 0.2576851010358391, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007482101990041347, "epoch": 687, "n_parameters": 631477312} {"train_lr": 0.0018461015133539854, "train_min_lr": 0.0018461015133539854, "train_loss": 0.2575647064950317, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00757528814033438, "epoch": 688, "n_parameters": 631477312} {"train_lr": 0.0018432248762993175, "train_min_lr": 0.0018432248762993175, "train_loss": 0.2574065612682786, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007409753331627984, "epoch": 689, "n_parameters": 631477312} {"train_lr": 0.0018403469188282779, "train_min_lr": 0.0018403469188282779, "train_loss": 0.2574357366732632, "train_loss_scale": 168881.23076923078, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 690, "n_parameters": 631477312} {"train_lr": 0.0018374676521762895, "train_min_lr": 0.0018374676521762895, "train_loss": 0.2573566006329388, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007442740504092609, "epoch": 691, "n_parameters": 631477312} {"train_lr": 0.0018345870875838982, "train_min_lr": 0.0018345870875838982, "train_loss": 0.2572874967008829, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007413347257683292, "epoch": 692, "n_parameters": 631477312} {"train_lr": 0.0018317052362967102, "train_min_lr": 0.0018317052362967102, "train_loss": 0.2572918595292438, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007514535243479678, "epoch": 693, "n_parameters": 631477312} {"train_lr": 0.0018288221095653606, "train_min_lr": 0.0018288221095653606, "train_loss": 0.25732733111064404, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007511538399669986, "epoch": 694, "n_parameters": 631477312} {"train_lr": 0.0018259377186454588, "train_min_lr": 0.0018259377186454588, "train_loss": 0.25725036162811404, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007438499852119444, "epoch": 695, "n_parameters": 631477312} {"train_lr": 0.0018230520747975509, "train_min_lr": 0.0018230520747975509, "train_loss": 0.2572612075660473, "train_loss_scale": 170561.64102564103, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007492894388866635, "epoch": 696, "n_parameters": 631477312} {"train_lr": 0.0018201651892870796, "train_min_lr": 0.0018201651892870796, "train_loss": 0.2572631240070153, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007486469630235568, "epoch": 697, "n_parameters": 631477312} {"train_lr": 0.0018172770733843224, "train_min_lr": 0.0018172770733843224, "train_loss": 0.25723067205100775, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00747160763384249, "epoch": 698, "n_parameters": 631477312} {"train_lr": 0.0018143877383643727, "train_min_lr": 0.0018143877383643727, "train_loss": 0.25720246778263783, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007460267483017957, "epoch": 699, "n_parameters": 631477312} {"train_lr": 0.0018114971955070808, "train_min_lr": 0.0018114971955070808, "train_loss": 0.2572053457156588, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00749504659175634, "epoch": 700, "n_parameters": 631477312} {"train_lr": 0.0018086054560970055, "train_min_lr": 0.0018086054560970055, "train_loss": 0.2571493071527817, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007502616672681119, "epoch": 701, "n_parameters": 631477312} {"train_lr": 0.0018057125314233852, "train_min_lr": 0.0018057125314233852, "train_loss": 0.2571215156125478, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007453618535342125, "epoch": 702, "n_parameters": 631477312} {"train_lr": 0.00180281843278008, "train_min_lr": 0.00180281843278008, "train_loss": 0.2571548189287289, "train_loss_scale": 495721.0256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007555952174768139, "epoch": 703, "n_parameters": 631477312} {"train_lr": 0.0017999231714655396, "train_min_lr": 0.0017999231714655396, "train_loss": 0.2570990022027101, "train_loss_scale": 329360.41025641025, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 704, "n_parameters": 631477312} {"train_lr": 0.0017970267587827415, "train_min_lr": 0.0017970267587827415, "train_loss": 0.2570752904851897, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007512430055962445, "epoch": 705, "n_parameters": 631477312} {"train_lr": 0.0017941292060391677, "train_min_lr": 0.0017941292060391677, "train_loss": 0.257099472060919, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007559696290021141, "epoch": 706, "n_parameters": 631477312} {"train_lr": 0.001791230524546753, "train_min_lr": 0.001791230524546753, "train_loss": 0.2571026380532063, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007515765831041603, "epoch": 707, "n_parameters": 631477312} {"train_lr": 0.0017883307256218244, "train_min_lr": 0.0017883307256218244, "train_loss": 0.2570533874796895, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007508314763888335, "epoch": 708, "n_parameters": 631477312} {"train_lr": 0.001785429820585086, "train_min_lr": 0.001785429820585086, "train_loss": 0.2570564945807489, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0075281328844049806, "epoch": 709, "n_parameters": 631477312} {"train_lr": 0.0017825278207615554, "train_min_lr": 0.0017825278207615554, "train_loss": 0.2570677784313328, "train_loss_scale": 349525.3333333333, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00754700435582214, "epoch": 710, "n_parameters": 631477312} {"train_lr": 0.0017796247374805261, "train_min_lr": 0.0017796247374805261, "train_loss": 0.25706499154703355, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0075978490938910116, "epoch": 711, "n_parameters": 631477312} {"train_lr": 0.0017767205820755137, "train_min_lr": 0.0017767205820755137, "train_loss": 0.2570464437893138, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00754668150926964, "epoch": 712, "n_parameters": 631477312} {"train_lr": 0.0017738153658842265, "train_min_lr": 0.0017738153658842265, "train_loss": 0.25706118512719583, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007574751171677445, "epoch": 713, "n_parameters": 631477312} {"train_lr": 0.0017709091002485128, "train_min_lr": 0.0017709091002485128, "train_loss": 0.2570616727342638, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007599354427582465, "epoch": 714, "n_parameters": 631477312} {"train_lr": 0.0017680017965143165, "train_min_lr": 0.0017680017965143165, "train_loss": 0.25745675388950473, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010088411149365876, "epoch": 715, "n_parameters": 631477312} {"train_lr": 0.001765093466031638, "train_min_lr": 0.001765093466031638, "train_loss": 0.25814377848142517, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009642421597770104, "epoch": 716, "n_parameters": 631477312} {"train_lr": 0.0017621841201544786, "train_min_lr": 0.0017621841201544786, "train_loss": 0.2571322832566996, "train_loss_scale": 947751.3846153846, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 717, "n_parameters": 631477312} {"train_lr": 0.0017592737702408106, "train_min_lr": 0.0017592737702408106, "train_loss": 0.2570317384118262, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007600862010477636, "epoch": 718, "n_parameters": 631477312} {"train_lr": 0.001756362427652523, "train_min_lr": 0.001756362427652523, "train_loss": 0.25698344198425704, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007597850029691098, "epoch": 719, "n_parameters": 631477312} {"train_lr": 0.0017534501037553814, "train_min_lr": 0.0017534501037553814, "train_loss": 0.25703518962165195, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007656922610178112, "epoch": 720, "n_parameters": 631477312} {"train_lr": 0.0017505368099189806, "train_min_lr": 0.0017505368099189806, "train_loss": 0.25695177614509773, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007564479283666095, "epoch": 721, "n_parameters": 631477312} {"train_lr": 0.0017476225575167052, "train_min_lr": 0.0017476225575167052, "train_loss": 0.25688593761804396, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007537394381450633, "epoch": 722, "n_parameters": 631477312} {"train_lr": 0.0017447073579256777, "train_min_lr": 0.0017447073579256777, "train_loss": 0.2569131267627176, "train_loss_scale": 472195.28205128206, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 723, "n_parameters": 631477312} {"train_lr": 0.0017417912225267211, "train_min_lr": 0.0017417912225267211, "train_loss": 0.2568792189972905, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007580616620357315, "epoch": 724, "n_parameters": 631477312} {"train_lr": 0.0017388741627043104, "train_min_lr": 0.0017388741627043104, "train_loss": 0.2568080995816929, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007562037629003708, "epoch": 725, "n_parameters": 631477312} {"train_lr": 0.0017359561898465316, "train_min_lr": 0.0017359561898465316, "train_loss": 0.25682357552115065, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007595708690738927, "epoch": 726, "n_parameters": 631477312} {"train_lr": 0.0017330373153450322, "train_min_lr": 0.0017330373153450322, "train_loss": 0.2568161610322885, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007610809425106988, "epoch": 727, "n_parameters": 631477312} {"train_lr": 0.001730117550594988, "train_min_lr": 0.001730117550594988, "train_loss": 0.2567907425019747, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007559453889441032, "epoch": 728, "n_parameters": 631477312} {"train_lr": 0.0017271969069950334, "train_min_lr": 0.0017271969069950334, "train_loss": 0.2567945331072387, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007594541985338601, "epoch": 729, "n_parameters": 631477312} {"train_lr": 0.001724275395947252, "train_min_lr": 0.001724275395947252, "train_loss": 0.2567865290493967, "train_loss_scale": 468834.46153846156, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007594902017631401, "epoch": 730, "n_parameters": 631477312} {"train_lr": 0.0017213530288571037, "train_min_lr": 0.0017213530288571037, "train_loss": 0.2567874080417917, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007589983453866667, "epoch": 731, "n_parameters": 631477312} {"train_lr": 0.0017184298171333916, "train_min_lr": 0.0017184298171333916, "train_loss": 0.25671074992845744, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007604810762971353, "epoch": 732, "n_parameters": 631477312} {"train_lr": 0.0017155057721882157, "train_min_lr": 0.0017155057721882157, "train_loss": 0.2567418664186381, "train_loss_scale": 456231.3846153846, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 733, "n_parameters": 631477312} {"train_lr": 0.0017125809054369357, "train_min_lr": 0.0017125809054369357, "train_loss": 0.256718642773847, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007666008097704691, "epoch": 734, "n_parameters": 631477312} {"train_lr": 0.0017096552282981094, "train_min_lr": 0.0017096552282981094, "train_loss": 0.2567215479301432, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0076320802446645805, "epoch": 735, "n_parameters": 631477312} {"train_lr": 0.001706728752193467, "train_min_lr": 0.001706728752193467, "train_loss": 0.2567655823450201, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007641878252168401, "epoch": 736, "n_parameters": 631477312} {"train_lr": 0.0017038014885478482, "train_min_lr": 0.0017038014885478482, "train_loss": 0.25668728972474736, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007628966966130508, "epoch": 737, "n_parameters": 631477312} {"train_lr": 0.0017008734487891795, "train_min_lr": 0.0017008734487891795, "train_loss": 0.2566274256021596, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0076050828434885125, "epoch": 738, "n_parameters": 631477312} {"train_lr": 0.0016979446443484076, "train_min_lr": 0.0016979446443484076, "train_loss": 0.2566217400897772, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007662888932734346, "epoch": 739, "n_parameters": 631477312} {"train_lr": 0.0016950150866594704, "train_min_lr": 0.0016950150866594704, "train_loss": 0.2566369346444471, "train_loss_scale": 484798.358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007652579660479648, "epoch": 740, "n_parameters": 631477312} {"train_lr": 0.0016920847871592394, "train_min_lr": 0.0016920847871592394, "train_loss": 0.2566115236393391, "train_loss_scale": 368009.8461538461, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 741, "n_parameters": 631477312} {"train_lr": 0.001689153757287491, "train_min_lr": 0.001689153757287491, "train_loss": 0.25667869427110046, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007745648141747388, "epoch": 742, "n_parameters": 631477312} {"train_lr": 0.0016862220084868456, "train_min_lr": 0.0016862220084868456, "train_loss": 0.25658819996393645, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007694901594032462, "epoch": 743, "n_parameters": 631477312} {"train_lr": 0.0016832895522027335, "train_min_lr": 0.0016832895522027335, "train_loss": 0.25655338935291344, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00769354521523779, "epoch": 744, "n_parameters": 631477312} {"train_lr": 0.001680356399883348, "train_min_lr": 0.001680356399883348, "train_loss": 0.25661438338171977, "train_loss_scale": 162579.6923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 745, "n_parameters": 631477312} {"train_lr": 0.001677422562979598, "train_min_lr": 0.001677422562979598, "train_loss": 0.25656328169712556, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00768687860825314, "epoch": 746, "n_parameters": 631477312} {"train_lr": 0.0016744880529450644, "train_min_lr": 0.0016744880529450644, "train_loss": 0.2565850848570848, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007694118657710556, "epoch": 747, "n_parameters": 631477312} {"train_lr": 0.0016715528812359585, "train_min_lr": 0.0016715528812359585, "train_loss": 0.25653584947726, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007718794234395505, "epoch": 748, "n_parameters": 631477312} {"train_lr": 0.0016686170593110696, "train_min_lr": 0.0016686170593110696, "train_loss": 0.2565452972761331, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0076879869462946095, "epoch": 749, "n_parameters": 631477312} {"train_lr": 0.0016656805986317326, "train_min_lr": 0.0016656805986317326, "train_loss": 0.25649577032368726, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007710304778391639, "epoch": 750, "n_parameters": 631477312} {"train_lr": 0.001662743510661771, "train_min_lr": 0.001662743510661771, "train_loss": 0.2565122539840209, "train_loss_scale": 176863.1794871795, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007721566775730118, "epoch": 751, "n_parameters": 631477312} {"train_lr": 0.0016598058068674597, "train_min_lr": 0.0016598058068674597, "train_loss": 0.2564709611917631, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007734012640773868, "epoch": 752, "n_parameters": 631477312} {"train_lr": 0.0016568674987174749, "train_min_lr": 0.0016568674987174749, "train_loss": 0.2564787741046017, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007700320476522813, "epoch": 753, "n_parameters": 631477312} {"train_lr": 0.001653928597682856, "train_min_lr": 0.001653928597682856, "train_loss": 0.25641250499607754, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007713962689591333, "epoch": 754, "n_parameters": 631477312} {"train_lr": 0.0016509891152369522, "train_min_lr": 0.0016509891152369522, "train_loss": 0.2564860203016836, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007796949699509124, "epoch": 755, "n_parameters": 631477312} {"train_lr": 0.0016480490628553886, "train_min_lr": 0.0016480490628553886, "train_loss": 0.25718122001032895, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01003718726002635, "epoch": 756, "n_parameters": 631477312} {"train_lr": 0.0016451084520160096, "train_min_lr": 0.0016451084520160096, "train_loss": 0.2564473483675661, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007813235086639626, "epoch": 757, "n_parameters": 631477312} {"train_lr": 0.001642167294198843, "train_min_lr": 0.001642167294198843, "train_loss": 0.25846680620303136, "train_loss_scale": 508324.10256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014312639314597711, "epoch": 758, "n_parameters": 631477312} {"train_lr": 0.0016392256008860519, "train_min_lr": 0.0016392256008860519, "train_loss": 0.25785990057286257, "train_loss_scale": 315076.92307692306, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 759, "n_parameters": 631477312} {"train_lr": 0.0016362833835618874, "train_min_lr": 0.0016362833835618874, "train_loss": 0.2566134316416887, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008168111901539259, "epoch": 760, "n_parameters": 631477312} {"train_lr": 0.00163334065371265, "train_min_lr": 0.00163334065371265, "train_loss": 0.2564766756706656, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007757416142759701, "epoch": 761, "n_parameters": 631477312} {"train_lr": 0.0016303974228266398, "train_min_lr": 0.0016303974228266398, "train_loss": 0.25642267044167966, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0078081772886574845, "epoch": 762, "n_parameters": 631477312} {"train_lr": 0.0016274537023941095, "train_min_lr": 0.0016274537023941095, "train_loss": 0.2563271929915899, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007719287133883112, "epoch": 763, "n_parameters": 631477312} {"train_lr": 0.0016245095039072266, "train_min_lr": 0.0016245095039072266, "train_loss": 0.2562973007159785, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007696097131627493, "epoch": 764, "n_parameters": 631477312} {"train_lr": 0.0016215648388600266, "train_min_lr": 0.0016215648388600266, "train_loss": 0.25626438106887806, "train_loss_scale": 363808.8205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007697437765996139, "epoch": 765, "n_parameters": 631477312} {"train_lr": 0.001618619718748361, "train_min_lr": 0.001618619718748361, "train_loss": 0.2562424623801445, "train_loss_scale": 338602.6666666667, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 766, "n_parameters": 631477312} {"train_lr": 0.0016156741550698618, "train_min_lr": 0.0016156741550698618, "train_loss": 0.25624114729893893, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007795029831751703, "epoch": 767, "n_parameters": 631477312} {"train_lr": 0.0016127281593238927, "train_min_lr": 0.0016127281593238927, "train_loss": 0.25624067690641356, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007737873057918385, "epoch": 768, "n_parameters": 631477312} {"train_lr": 0.0016097817430115046, "train_min_lr": 0.0016097817430115046, "train_loss": 0.25614627432197523, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00768194082700528, "epoch": 769, "n_parameters": 631477312} {"train_lr": 0.0016068349176353834, "train_min_lr": 0.0016068349176353834, "train_loss": 0.25616983310856783, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007733783208562109, "epoch": 770, "n_parameters": 631477312} {"train_lr": 0.001603887694699822, "train_min_lr": 0.001603887694699822, "train_loss": 0.25616898094542706, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007791684625538973, "epoch": 771, "n_parameters": 631477312} {"train_lr": 0.0016009400857106583, "train_min_lr": 0.0016009400857106583, "train_loss": 0.2562235125567382, "train_loss_scale": 340283.07692307694, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007814682226102704, "epoch": 772, "n_parameters": 631477312} {"train_lr": 0.0015979921021752421, "train_min_lr": 0.0015979921021752421, "train_loss": 0.2561963401012457, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007810014852274878, "epoch": 773, "n_parameters": 631477312} {"train_lr": 0.001595043755602381, "train_min_lr": 0.001595043755602381, "train_loss": 0.2561943457724574, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007858961144498048, "epoch": 774, "n_parameters": 631477312} {"train_lr": 0.001592095057502303, "train_min_lr": 0.001592095057502303, "train_loss": 0.25614846692695165, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007809885225306528, "epoch": 775, "n_parameters": 631477312} {"train_lr": 0.0015891460193866087, "train_min_lr": 0.0015891460193866087, "train_loss": 0.2561280397274412, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007765035121104656, "epoch": 776, "n_parameters": 631477312} {"train_lr": 0.0015861966527682212, "train_min_lr": 0.0015861966527682212, "train_loss": 0.2561115845112512, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007764679805124895, "epoch": 777, "n_parameters": 631477312} {"train_lr": 0.0015832469691613538, "train_min_lr": 0.0015832469691613538, "train_loss": 0.25611543651813495, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00784239901576, "epoch": 778, "n_parameters": 631477312} {"train_lr": 0.00158029698008145, "train_min_lr": 0.00158029698008145, "train_loss": 0.2561007391136044, "train_loss_scale": 564617.8461538461, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 779, "n_parameters": 631477312} {"train_lr": 0.0015773466970451506, "train_min_lr": 0.0015773466970451506, "train_loss": 0.2560985390699875, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00784403330927046, "epoch": 780, "n_parameters": 631477312} {"train_lr": 0.0015743961315702415, "train_min_lr": 0.0015743961315702415, "train_loss": 0.25608801484430355, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007846210228028493, "epoch": 781, "n_parameters": 631477312} {"train_lr": 0.001571445295175614, "train_min_lr": 0.001571445295175614, "train_loss": 0.2560333179202504, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007812439774473508, "epoch": 782, "n_parameters": 631477312} {"train_lr": 0.0015684941993812142, "train_min_lr": 0.0015684941993812142, "train_loss": 0.2560575618677271, "train_loss_scale": 433545.8461538461, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 783, "n_parameters": 631477312} {"train_lr": 0.0015655428557080017, "train_min_lr": 0.0015655428557080017, "train_loss": 0.2560986585270327, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007937778853094922, "epoch": 784, "n_parameters": 631477312} {"train_lr": 0.0015625912756779038, "train_min_lr": 0.0015625912756779038, "train_loss": 0.2560270179469043, "train_loss_scale": 218873.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 785, "n_parameters": 631477312} {"train_lr": 0.0015596394708137726, "train_min_lr": 0.0015596394708137726, "train_loss": 0.25610795280096144, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008022013417361543, "epoch": 786, "n_parameters": 631477312} {"train_lr": 0.0015566874526393342, "train_min_lr": 0.0015566874526393342, "train_loss": 0.25621682695913106, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009665096199545914, "epoch": 787, "n_parameters": 631477312} {"train_lr": 0.0015537352326791501, "train_min_lr": 0.0015537352326791501, "train_loss": 0.2561679302732675, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00818339258712979, "epoch": 788, "n_parameters": 631477312} {"train_lr": 0.0015507828224585658, "train_min_lr": 0.0015507828224585658, "train_loss": 0.25603614654093504, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00797753191093365, "epoch": 789, "n_parameters": 631477312} {"train_lr": 0.0015478302335036798, "train_min_lr": 0.0015478302335036798, "train_loss": 0.2560512650644598, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00798003918503244, "epoch": 790, "n_parameters": 631477312} {"train_lr": 0.0015448774773412749, "train_min_lr": 0.0015448774773412749, "train_loss": 0.2559587513514532, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007870538730341464, "epoch": 791, "n_parameters": 631477312} {"train_lr": 0.001541924565498795, "train_min_lr": 0.001541924565498795, "train_loss": 0.255932859533156, "train_loss_scale": 251641.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007886180495962692, "epoch": 792, "n_parameters": 631477312} {"train_lr": 0.0015389715095042898, "train_min_lr": 0.0015389715095042898, "train_loss": 0.25587465270207477, "train_loss_scale": 241138.87179487178, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 793, "n_parameters": 631477312} {"train_lr": 0.0015360183208863727, "train_min_lr": 0.0015360183208863727, "train_loss": 0.2558826216054746, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007887949552148199, "epoch": 794, "n_parameters": 631477312} {"train_lr": 0.0015330650111741698, "train_min_lr": 0.0015330650111741698, "train_loss": 0.25584216530506426, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00790105751888731, "epoch": 795, "n_parameters": 631477312} {"train_lr": 0.0015301115918972897, "train_min_lr": 0.0015301115918972897, "train_loss": 0.2558873697506407, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007957006906135341, "epoch": 796, "n_parameters": 631477312} {"train_lr": 0.001527158074585758, "train_min_lr": 0.001527158074585758, "train_loss": 0.2558301482260084, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007891196475471728, "epoch": 797, "n_parameters": 631477312} {"train_lr": 0.0015242044707699904, "train_min_lr": 0.0015242044707699904, "train_loss": 0.255772762433387, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007912265695034502, "epoch": 798, "n_parameters": 631477312} {"train_lr": 0.0015212507919807395, "train_min_lr": 0.0015212507919807395, "train_loss": 0.2558128456674659, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00792710761127707, "epoch": 799, "n_parameters": 631477312} {"train_lr": 0.0015182970497490454, "train_min_lr": 0.0015182970497490454, "train_loss": 0.2558172753480717, "train_loss_scale": 229376.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00800112716984959, "epoch": 800, "n_parameters": 631477312} {"train_lr": 0.0015153432556062026, "train_min_lr": 0.0015153432556062026, "train_loss": 0.2558753298422417, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008056320290331943, "epoch": 801, "n_parameters": 631477312} {"train_lr": 0.0015123894210837038, "train_min_lr": 0.0015123894210837038, "train_loss": 0.2557741494443363, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008033122177808903, "epoch": 802, "n_parameters": 631477312} {"train_lr": 0.0015094355577131994, "train_min_lr": 0.0015094355577131994, "train_loss": 0.2558154263384401, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008008071710952582, "epoch": 803, "n_parameters": 631477312} {"train_lr": 0.0015064816770264577, "train_min_lr": 0.0015064816770264577, "train_loss": 0.25572708481624246, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007938636138509864, "epoch": 804, "n_parameters": 631477312} {"train_lr": 0.0015035277905553067, "train_min_lr": 0.0015035277905553067, "train_loss": 0.2557705420290287, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007997489474618282, "epoch": 805, "n_parameters": 631477312} {"train_lr": 0.0015005739098316025, "train_min_lr": 0.0015005739098316025, "train_loss": 0.2557194930716203, "train_loss_scale": 351205.74358974356, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007993398880502448, "epoch": 806, "n_parameters": 631477312} {"train_lr": 0.001497620046387179, "train_min_lr": 0.001497620046387179, "train_loss": 0.2556881928094066, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007959304437136803, "epoch": 807, "n_parameters": 631477312} {"train_lr": 0.001494666211753796, "train_min_lr": 0.001494666211753796, "train_loss": 0.2556873252466082, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007958929099214192, "epoch": 808, "n_parameters": 631477312} {"train_lr": 0.0014917124174631104, "train_min_lr": 0.0014917124174631104, "train_loss": 0.2556371877226644, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007935887296648266, "epoch": 809, "n_parameters": 631477312} {"train_lr": 0.001488758675046614, "train_min_lr": 0.001488758675046614, "train_loss": 0.2556318110601308, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007979829988191621, "epoch": 810, "n_parameters": 631477312} {"train_lr": 0.0014858049960356009, "train_min_lr": 0.0014858049960356009, "train_loss": 0.25559947427469665, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007999973813406168, "epoch": 811, "n_parameters": 631477312} {"train_lr": 0.0014828513919611134, "train_min_lr": 0.0014828513919611134, "train_loss": 0.25566186852609885, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008064273023942055, "epoch": 812, "n_parameters": 631477312} {"train_lr": 0.0014798978743539074, "train_min_lr": 0.0014798978743539074, "train_loss": 0.25561975894603306, "train_loss_scale": 1011606.9743589744, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00803271302422031, "epoch": 813, "n_parameters": 631477312} {"train_lr": 0.001476944454744393, "train_min_lr": 0.001476944454744393, "train_loss": 0.25558595582902527, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008031238418203803, "epoch": 814, "n_parameters": 631477312} {"train_lr": 0.0014739911446626046, "train_min_lr": 0.0014739911446626046, "train_loss": 0.25556915063554275, "train_loss_scale": 673844.5128205129, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 815, "n_parameters": 631477312} {"train_lr": 0.0014710379556381466, "train_min_lr": 0.0014710379556381466, "train_loss": 0.2556159754301636, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00809595918802258, "epoch": 816, "n_parameters": 631477312} {"train_lr": 0.001468084899200151, "train_min_lr": 0.001468084899200151, "train_loss": 0.25557673275052833, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008077837174567275, "epoch": 817, "n_parameters": 631477312} {"train_lr": 0.0014651319868772296, "train_min_lr": 0.0014651319868772296, "train_loss": 0.2556173726558112, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008131990815178515, "epoch": 818, "n_parameters": 631477312} {"train_lr": 0.001462179230197436, "train_min_lr": 0.001462179230197436, "train_loss": 0.2556037210489217, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00816527785261711, "epoch": 819, "n_parameters": 631477312} {"train_lr": 0.0014592266406882124, "train_min_lr": 0.0014592266406882124, "train_loss": 0.2556096405829661, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008130907957167484, "epoch": 820, "n_parameters": 631477312} {"train_lr": 0.0014562742298763521, "train_min_lr": 0.0014562742298763521, "train_loss": 0.25553469678375107, "train_loss_scale": 420102.5641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 821, "n_parameters": 631477312} {"train_lr": 0.0014533220092879473, "train_min_lr": 0.0014533220092879473, "train_loss": 0.2555002860026434, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00806266184335049, "epoch": 822, "n_parameters": 631477312} {"train_lr": 0.0014503699904483498, "train_min_lr": 0.0014503699904483498, "train_loss": 0.25550505507868737, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00804400604773456, "epoch": 823, "n_parameters": 631477312} {"train_lr": 0.0014474181848821218, "train_min_lr": 0.0014474181848821218, "train_loss": 0.25545413888250595, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008090687341068704, "epoch": 824, "n_parameters": 631477312} {"train_lr": 0.0014444666041129952, "train_min_lr": 0.0014444666041129952, "train_loss": 0.2555076971482963, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008156930232265344, "epoch": 825, "n_parameters": 631477312} {"train_lr": 0.0014415152596638215, "train_min_lr": 0.0014415152596638215, "train_loss": 0.2554991159808989, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008175783699605232, "epoch": 826, "n_parameters": 631477312} {"train_lr": 0.0014385641630565349, "train_min_lr": 0.0014385641630565349, "train_loss": 0.255472906677124, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008158591413237631, "epoch": 827, "n_parameters": 631477312} {"train_lr": 0.001435613325812093, "train_min_lr": 0.001435613325812093, "train_loss": 0.25548856508416623, "train_loss_scale": 287350.1538461539, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 828, "n_parameters": 631477312} {"train_lr": 0.001432662759450452, "train_min_lr": 0.001432662759450452, "train_loss": 0.2554334717599723, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0081257131920817, "epoch": 829, "n_parameters": 631477312} {"train_lr": 0.0014297124754905023, "train_min_lr": 0.0014297124754905023, "train_loss": 0.25544956441705996, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008217413764960395, "epoch": 830, "n_parameters": 631477312} {"train_lr": 0.0014267624854500333, "train_min_lr": 0.0014267624854500333, "train_loss": 0.25537624577872264, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008133471307631295, "epoch": 831, "n_parameters": 631477312} {"train_lr": 0.0014238128008456877, "train_min_lr": 0.0014238128008456877, "train_loss": 0.25539724002831066, "train_loss_scale": 222654.35897435897, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 832, "n_parameters": 631477312} {"train_lr": 0.00142086343319292, "train_min_lr": 0.00142086343319292, "train_loss": 0.25542222468170506, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008273453596465958, "epoch": 833, "n_parameters": 631477312} {"train_lr": 0.0014179143940059404, "train_min_lr": 0.0014179143940059404, "train_loss": 0.25538445621406514, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008162741250812244, "epoch": 834, "n_parameters": 631477312} {"train_lr": 0.001414965694797677, "train_min_lr": 0.001414965694797677, "train_loss": 0.25584735515085644, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01077321111314142, "epoch": 835, "n_parameters": 631477312} {"train_lr": 0.00141201734707974, "train_min_lr": 0.00141201734707974, "train_loss": 0.25529418057069564, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008182140603028715, "epoch": 836, "n_parameters": 631477312} {"train_lr": 0.0014090693623623558, "train_min_lr": 0.0014090693623623558, "train_loss": 0.25533771093409413, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008213330612470133, "epoch": 837, "n_parameters": 631477312} {"train_lr": 0.001406121752154341, "train_min_lr": 0.001406121752154341, "train_loss": 0.25535918293723786, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0082423762516835, "epoch": 838, "n_parameters": 631477312} {"train_lr": 0.0014031745279630477, "train_min_lr": 0.0014031745279630477, "train_loss": 0.25528912115185404, "train_loss_scale": 247860.5128205128, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008188652195549833, "epoch": 839, "n_parameters": 631477312} {"train_lr": 0.0014002277012943208, "train_min_lr": 0.0014002277012943208, "train_loss": 0.2552596958807837, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008169909880962223, "epoch": 840, "n_parameters": 631477312} {"train_lr": 0.0013972812836524518, "train_min_lr": 0.0013972812836524518, "train_loss": 0.255224554029365, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008206915590935985, "epoch": 841, "n_parameters": 631477312} {"train_lr": 0.0013943352865401412, "train_min_lr": 0.0013943352865401412, "train_loss": 0.2552252079700478, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008167289688478773, "epoch": 842, "n_parameters": 631477312} {"train_lr": 0.0013913897214584403, "train_min_lr": 0.0013913897214584403, "train_loss": 0.25524069204066807, "train_loss_scale": 143675.07692307694, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 843, "n_parameters": 631477312} {"train_lr": 0.0013884445999067208, "train_min_lr": 0.0013884445999067208, "train_loss": 0.2551910594487802, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008181253154105388, "epoch": 844, "n_parameters": 631477312} {"train_lr": 0.0013854999333826144, "train_min_lr": 0.0013854999333826144, "train_loss": 0.2551603532867888, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0081756999835563, "epoch": 845, "n_parameters": 631477312} {"train_lr": 0.0013825557333819853, "train_min_lr": 0.0013825557333819853, "train_loss": 0.25509779950759065, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00812019811023791, "epoch": 846, "n_parameters": 631477312} {"train_lr": 0.0013796120113988711, "train_min_lr": 0.0013796120113988711, "train_loss": 0.2551322208257774, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008219059532842575, "epoch": 847, "n_parameters": 631477312} {"train_lr": 0.001376668778925445, "train_min_lr": 0.001376668778925445, "train_loss": 0.2551037336562354, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008214789681518689, "epoch": 848, "n_parameters": 631477312} {"train_lr": 0.0013737260474519669, "train_min_lr": 0.0013737260474519669, "train_loss": 0.25515796433990967, "train_loss_scale": 68476.71794871795, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 849, "n_parameters": 631477312} {"train_lr": 0.0013707838284667446, "train_min_lr": 0.0013707838284667446, "train_loss": 0.25516906769599956, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008350664018414533, "epoch": 850, "n_parameters": 631477312} {"train_lr": 0.0013678421334560834, "train_min_lr": 0.0013678421334560834, "train_loss": 0.2551007809427877, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00829840992959455, "epoch": 851, "n_parameters": 631477312} {"train_lr": 0.0013649009739042414, "train_min_lr": 0.0013649009739042414, "train_loss": 0.25515919971542483, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008436504673833648, "epoch": 852, "n_parameters": 631477312} {"train_lr": 0.00136196036129339, "train_min_lr": 0.00136196036129339, "train_loss": 0.2557772326862248, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01135765069958348, "epoch": 853, "n_parameters": 631477312} {"train_lr": 0.0013590203071035607, "train_min_lr": 0.0013590203071035607, "train_loss": 0.25509236972982013, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008359283784953639, "epoch": 854, "n_parameters": 631477312} {"train_lr": 0.0013560808228126077, "train_min_lr": 0.0013560808228126077, "train_loss": 0.2549975152658776, "train_loss_scale": 101244.71794871795, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008203320973851264, "epoch": 855, "n_parameters": 631477312} {"train_lr": 0.0013531419198961616, "train_min_lr": 0.0013531419198961616, "train_loss": 0.254941803146488, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008187939487517072, "epoch": 856, "n_parameters": 631477312} {"train_lr": 0.0013502036098275803, "train_min_lr": 0.0013502036098275803, "train_loss": 0.25497263092368555, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008283047254782362, "epoch": 857, "n_parameters": 631477312} {"train_lr": 0.0013472659040779105, "train_min_lr": 0.0013472659040779105, "train_loss": 0.2550554224827255, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008423383189484669, "epoch": 858, "n_parameters": 631477312} {"train_lr": 0.0013443288141158347, "train_min_lr": 0.0013443288141158347, "train_loss": 0.255002414715142, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008360915866572984, "epoch": 859, "n_parameters": 631477312} {"train_lr": 0.0013413923514076363, "train_min_lr": 0.0013413923514076363, "train_loss": 0.25495953124780685, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008348848484158039, "epoch": 860, "n_parameters": 631477312} {"train_lr": 0.0013384565274171493, "train_min_lr": 0.0013384565274171493, "train_loss": 0.25491373076473767, "train_loss_scale": 148716.3076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008298868749242945, "epoch": 861, "n_parameters": 631477312} {"train_lr": 0.001335521353605712, "train_min_lr": 0.001335521353605712, "train_loss": 0.25486045871347857, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008247338395076206, "epoch": 862, "n_parameters": 631477312} {"train_lr": 0.0013325868414321238, "train_min_lr": 0.0013325868414321238, "train_loss": 0.2548773963935673, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008271555417778496, "epoch": 863, "n_parameters": 631477312} {"train_lr": 0.0013296530023526054, "train_min_lr": 0.0013296530023526054, "train_loss": 0.25484387492402816, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008246300689792499, "epoch": 864, "n_parameters": 631477312} {"train_lr": 0.0013267198478207453, "train_min_lr": 0.0013267198478207453, "train_loss": 0.254838540762042, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008330577788934207, "epoch": 865, "n_parameters": 631477312} {"train_lr": 0.0013237873892874622, "train_min_lr": 0.0013237873892874622, "train_loss": 0.25484217644537777, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008339752514774982, "epoch": 866, "n_parameters": 631477312} {"train_lr": 0.0013208556382009557, "train_min_lr": 0.0013208556382009557, "train_loss": 0.2547942867042879, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008381432658121085, "epoch": 867, "n_parameters": 631477312} {"train_lr": 0.0013179246060066679, "train_min_lr": 0.0013179246060066679, "train_loss": 0.2547723563770071, "train_loss_scale": 452030.358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00833039660490333, "epoch": 868, "n_parameters": 631477312} {"train_lr": 0.0013149943041472286, "train_min_lr": 0.0013149943041472286, "train_loss": 0.25471133467418927, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00827043597252132, "epoch": 869, "n_parameters": 631477312} {"train_lr": 0.0013120647440624184, "train_min_lr": 0.0013120647440624184, "train_loss": 0.2547568638259784, "train_loss_scale": 383133.53846153844, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 870, "n_parameters": 631477312} {"train_lr": 0.0013091359371891224, "train_min_lr": 0.0013091359371891224, "train_loss": 0.25468482862775904, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008249234499182934, "epoch": 871, "n_parameters": 631477312} {"train_lr": 0.0013062078949612887, "train_min_lr": 0.0013062078949612887, "train_loss": 0.2547126785249043, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008304080170674775, "epoch": 872, "n_parameters": 631477312} {"train_lr": 0.0013032806288098726, "train_min_lr": 0.0013032806288098726, "train_loss": 0.25462820906586087, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008274298046345416, "epoch": 873, "n_parameters": 631477312} {"train_lr": 0.001300354150162807, "train_min_lr": 0.001300354150162807, "train_loss": 0.25477007012933683, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008550467850186694, "epoch": 874, "n_parameters": 631477312} {"train_lr": 0.0012974284704449469, "train_min_lr": 0.0012974284704449469, "train_loss": 0.2547187624403682, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008407257524175713, "epoch": 875, "n_parameters": 631477312} {"train_lr": 0.0012945036010780276, "train_min_lr": 0.0012945036010780276, "train_loss": 0.2546855138304333, "train_loss_scale": 295752.2051282051, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008409619537325433, "epoch": 876, "n_parameters": 631477312} {"train_lr": 0.0012915795534806228, "train_min_lr": 0.0012915795534806228, "train_loss": 0.2546666903475013, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008379416889511049, "epoch": 877, "n_parameters": 631477312} {"train_lr": 0.0012886563390680972, "train_min_lr": 0.0012886563390680972, "train_loss": 0.25466325793128747, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008400450302407337, "epoch": 878, "n_parameters": 631477312} {"train_lr": 0.0012857339692525628, "train_min_lr": 0.0012857339692525628, "train_loss": 0.25456682321293134, "train_loss_scale": 430185.0256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 879, "n_parameters": 631477312} {"train_lr": 0.0012828124554428338, "train_min_lr": 0.0012828124554428338, "train_loss": 0.2545887759516541, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008389626571442932, "epoch": 880, "n_parameters": 631477312} {"train_lr": 0.0012798918090443838, "train_min_lr": 0.0012798918090443838, "train_loss": 0.25460107137675947, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00840815247143977, "epoch": 881, "n_parameters": 631477312} {"train_lr": 0.0012769720414592995, "train_min_lr": 0.0012769720414592995, "train_loss": 0.2545981101411132, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00844201787810725, "epoch": 882, "n_parameters": 631477312} {"train_lr": 0.0012740531640862353, "train_min_lr": 0.0012740531640862353, "train_loss": 0.2545482052060274, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008442035141902475, "epoch": 883, "n_parameters": 631477312} {"train_lr": 0.0012711351883203758, "train_min_lr": 0.0012711351883203758, "train_loss": 0.25455350611800664, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008424574424190303, "epoch": 884, "n_parameters": 631477312} {"train_lr": 0.0012682181255533767, "train_min_lr": 0.0012682181255533767, "train_loss": 0.2545140428552165, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008380959108949471, "epoch": 885, "n_parameters": 631477312} {"train_lr": 0.0012653019871733367, "train_min_lr": 0.0012653019871733367, "train_loss": 0.25450889049217296, "train_loss_scale": 453710.76923076925, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 886, "n_parameters": 631477312} {"train_lr": 0.0012623867845647414, "train_min_lr": 0.0012623867845647414, "train_loss": 0.25451746112249124, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00849286433404837, "epoch": 887, "n_parameters": 631477312} {"train_lr": 0.0012594725291084278, "train_min_lr": 0.0012594725291084278, "train_loss": 0.2545198857730541, "train_loss_scale": 197448.20512820513, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 888, "n_parameters": 631477312} {"train_lr": 0.0012565592321815298, "train_min_lr": 0.0012565592321815298, "train_loss": 0.25449211879048306, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008445022532191033, "epoch": 889, "n_parameters": 631477312} {"train_lr": 0.001253646905157445, "train_min_lr": 0.001253646905157445, "train_loss": 0.2545069740959801, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008507078785138825, "epoch": 890, "n_parameters": 631477312} {"train_lr": 0.0012507355594057786, "train_min_lr": 0.0012507355594057786, "train_loss": 0.25440536248676765, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00844967246760065, "epoch": 891, "n_parameters": 631477312} {"train_lr": 0.001247825206292309, "train_min_lr": 0.001247825206292309, "train_loss": 0.2544101470669445, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008449760686534528, "epoch": 892, "n_parameters": 631477312} {"train_lr": 0.0012449158571789395, "train_min_lr": 0.0012449158571789395, "train_loss": 0.2544534149317023, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008537985327749107, "epoch": 893, "n_parameters": 631477312} {"train_lr": 0.0012420075234236513, "train_min_lr": 0.0012420075234236513, "train_loss": 0.2549086176187325, "train_loss_scale": 141994.66666666666, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010865063448317157, "epoch": 894, "n_parameters": 631477312} {"train_lr": 0.0012391002163804619, "train_min_lr": 0.0012391002163804619, "train_loss": 0.25441963022921044, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008590417418581171, "epoch": 895, "n_parameters": 631477312} {"train_lr": 0.0012361939473993834, "train_min_lr": 0.0012361939473993834, "train_loss": 0.25442807264148426, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008564170208186485, "epoch": 896, "n_parameters": 631477312} {"train_lr": 0.0012332887278263727, "train_min_lr": 0.0012332887278263727, "train_loss": 0.2543452354834582, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008575152453536598, "epoch": 897, "n_parameters": 631477312} {"train_lr": 0.0012303845690032904, "train_min_lr": 0.0012303845690032904, "train_loss": 0.2543051538762087, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008451128315825302, "epoch": 898, "n_parameters": 631477312} {"train_lr": 0.0012274814822678575, "train_min_lr": 0.0012274814822678575, "train_loss": 0.25426690621922415, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008426477595304068, "epoch": 899, "n_parameters": 631477312} {"train_lr": 0.0012245794789536078, "train_min_lr": 0.0012245794789536078, "train_loss": 0.25425722910007703, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008474019194881503, "epoch": 900, "n_parameters": 631477312} {"train_lr": 0.0012216785703898449, "train_min_lr": 0.0012216785703898449, "train_loss": 0.2542314749563304, "train_loss_scale": 438587.07692307694, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008525851919936637, "epoch": 901, "n_parameters": 631477312} {"train_lr": 0.0012187787679016017, "train_min_lr": 0.0012187787679016017, "train_loss": 0.2542390162632681, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008571850989634791, "epoch": 902, "n_parameters": 631477312} {"train_lr": 0.001215880082809589, "train_min_lr": 0.001215880082809589, "train_loss": 0.25428014528901816, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00866173098508555, "epoch": 903, "n_parameters": 631477312} {"train_lr": 0.0012129825264301601, "train_min_lr": 0.0012129825264301601, "train_loss": 0.25423939544588137, "train_loss_scale": 435226.25641025644, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 904, "n_parameters": 631477312} {"train_lr": 0.0012100861100752568, "train_min_lr": 0.0012100861100752568, "train_loss": 0.25421732071285635, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008564393242033055, "epoch": 905, "n_parameters": 631477312} {"train_lr": 0.0012071908450523715, "train_min_lr": 0.0012071908450523715, "train_loss": 0.25416398510480154, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008506486132645454, "epoch": 906, "n_parameters": 631477312} {"train_lr": 0.0012042967426645064, "train_min_lr": 0.0012042967426645064, "train_loss": 0.25411066422776246, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008546583593273774, "epoch": 907, "n_parameters": 631477312} {"train_lr": 0.0012014038142101181, "train_min_lr": 0.0012014038142101181, "train_loss": 0.25422728251224047, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008736210830438022, "epoch": 908, "n_parameters": 631477312} {"train_lr": 0.0011985120709830882, "train_min_lr": 0.0011985120709830882, "train_loss": 0.2542217310326986, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00868859246540337, "epoch": 909, "n_parameters": 631477312} {"train_lr": 0.0011956215242726605, "train_min_lr": 0.0011956215242726605, "train_loss": 0.25415881649734307, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008644963169876391, "epoch": 910, "n_parameters": 631477312} {"train_lr": 0.0011927321853634168, "train_min_lr": 0.0011927321853634168, "train_loss": 0.25413959255764407, "train_loss_scale": 505803.4871794872, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008579611778259277, "epoch": 911, "n_parameters": 631477312} {"train_lr": 0.001189844065535221, "train_min_lr": 0.001189844065535221, "train_loss": 0.2541504244600685, "train_loss_scale": 331040.8205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 912, "n_parameters": 631477312} {"train_lr": 0.0011869571760631749, "train_min_lr": 0.0011869571760631749, "train_loss": 0.254162630922973, "train_loss_scale": 252901.7435897436, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 913, "n_parameters": 631477312} {"train_lr": 0.0011840715282175822, "train_min_lr": 0.0011840715282175822, "train_loss": 0.2540678565438168, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008669072621836303, "epoch": 914, "n_parameters": 631477312} {"train_lr": 0.0011811871332638945, "train_min_lr": 0.0011811871332638945, "train_loss": 0.25431935568578923, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010508245534788912, "epoch": 915, "n_parameters": 631477312} {"train_lr": 0.001178304002462676, "train_min_lr": 0.001178304002462676, "train_loss": 0.25407923826685125, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00873673641278098, "epoch": 916, "n_parameters": 631477312} {"train_lr": 0.0011754221470695527, "train_min_lr": 0.0011754221470695527, "train_loss": 0.25405494808458173, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0086672374059279, "epoch": 917, "n_parameters": 631477312} {"train_lr": 0.0011725415783351723, "train_min_lr": 0.0011725415783351723, "train_loss": 0.2539945905759501, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008663305714248847, "epoch": 918, "n_parameters": 631477312} {"train_lr": 0.0011696623075051608, "train_min_lr": 0.0011696623075051608, "train_loss": 0.2540162994663637, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008706971510456732, "epoch": 919, "n_parameters": 631477312} {"train_lr": 0.0011667843458200756, "train_min_lr": 0.0011667843458200756, "train_loss": 0.2539421134980587, "train_loss_scale": 217613.12820512822, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00859430425752623, "epoch": 920, "n_parameters": 631477312} {"train_lr": 0.001163907704515365, "train_min_lr": 0.001163907704515365, "train_loss": 0.25391498404161, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008608087891330704, "epoch": 921, "n_parameters": 631477312} {"train_lr": 0.001161032394821319, "train_min_lr": 0.001161032394821319, "train_loss": 0.25392474458576775, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00874813944578935, "epoch": 922, "n_parameters": 631477312} {"train_lr": 0.0011581584279630355, "train_min_lr": 0.0011581584279630355, "train_loss": 0.2539189765170121, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008680165422937045, "epoch": 923, "n_parameters": 631477312} {"train_lr": 0.0011552858151603633, "train_min_lr": 0.0011552858151603633, "train_loss": 0.2539190163817973, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008720539626665413, "epoch": 924, "n_parameters": 631477312} {"train_lr": 0.0011524145676278675, "train_min_lr": 0.0011524145676278675, "train_loss": 0.25385632244452166, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008711659650986966, "epoch": 925, "n_parameters": 631477312} {"train_lr": 0.001149544696574784, "train_min_lr": 0.001149544696574784, "train_loss": 0.2541440449571476, "train_loss_scale": 327680.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010501054811100356, "epoch": 926, "n_parameters": 631477312} {"train_lr": 0.0011466762132049761, "train_min_lr": 0.0011466762132049761, "train_loss": 0.25487418772760206, "train_loss_scale": 440267.4871794872, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 927, "n_parameters": 631477312} {"train_lr": 0.0011438091287168863, "train_min_lr": 0.0011438091287168863, "train_loss": 0.25386103105325347, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00890723096791846, "epoch": 928, "n_parameters": 631477312} {"train_lr": 0.001140943454303497, "train_min_lr": 0.001140943454303497, "train_loss": 0.25386090960222274, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008741136264199248, "epoch": 929, "n_parameters": 631477312} {"train_lr": 0.001138079201152288, "train_min_lr": 0.001138079201152288, "train_loss": 0.25381542896577275, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008761370939632447, "epoch": 930, "n_parameters": 631477312} {"train_lr": 0.0011352163804451891, "train_min_lr": 0.0011352163804451891, "train_loss": 0.2538185105330717, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00874924888380636, "epoch": 931, "n_parameters": 631477312} {"train_lr": 0.0011323550033585377, "train_min_lr": 0.0011323550033585377, "train_loss": 0.25381474380787367, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008824351718291067, "epoch": 932, "n_parameters": 631477312} {"train_lr": 0.0011294950810630336, "train_min_lr": 0.0011294950810630336, "train_loss": 0.25372379056464595, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00872549808357293, "epoch": 933, "n_parameters": 631477312} {"train_lr": 0.001126636624723699, "train_min_lr": 0.001126636624723699, "train_loss": 0.25373683411042947, "train_loss_scale": 500762.25641025644, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008819013484753668, "epoch": 934, "n_parameters": 631477312} {"train_lr": 0.001123779645499835, "train_min_lr": 0.001123779645499835, "train_loss": 0.25374164826606804, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008836091484110326, "epoch": 935, "n_parameters": 631477312} {"train_lr": 0.0011209241545449753, "train_min_lr": 0.0011209241545449753, "train_loss": 0.25370899393247104, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008859401407579962, "epoch": 936, "n_parameters": 631477312} {"train_lr": 0.001118070163006838, "train_min_lr": 0.001118070163006838, "train_loss": 0.25370546590942794, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008802721542545045, "epoch": 937, "n_parameters": 631477312} {"train_lr": 0.0011152176820272937, "train_min_lr": 0.0011152176820272937, "train_loss": 0.2537029311568357, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008914970109263102, "epoch": 938, "n_parameters": 631477312} {"train_lr": 0.0011123667227423146, "train_min_lr": 0.0011123667227423146, "train_loss": 0.25362189751990044, "train_loss_scale": 270546.0512820513, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 939, "n_parameters": 631477312} {"train_lr": 0.0011095172962819328, "train_min_lr": 0.0011095172962819328, "train_loss": 0.25365876250613767, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008863526677044157, "epoch": 940, "n_parameters": 631477312} {"train_lr": 0.0011066694137701917, "train_min_lr": 0.0011066694137701917, "train_loss": 0.253621916179187, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008900047629140317, "epoch": 941, "n_parameters": 631477312} {"train_lr": 0.001103823086325113, "train_min_lr": 0.001103823086325113, "train_loss": 0.2536453116798582, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008914631194411179, "epoch": 942, "n_parameters": 631477312} {"train_lr": 0.0011009783250586459, "train_min_lr": 0.0011009783250586459, "train_loss": 0.253585697683052, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008820718467737047, "epoch": 943, "n_parameters": 631477312} {"train_lr": 0.001098135141076621, "train_min_lr": 0.001098135141076621, "train_loss": 0.2536256403948825, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008982658377275445, "epoch": 944, "n_parameters": 631477312} {"train_lr": 0.0010952935454787188, "train_min_lr": 0.0010952935454787188, "train_loss": 0.25359451066917527, "train_loss_scale": 395736.6153846154, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 945, "n_parameters": 631477312} {"train_lr": 0.0010924535493584122, "train_min_lr": 0.0010924535493584122, "train_loss": 0.2535410594696609, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008881555768685082, "epoch": 946, "n_parameters": 631477312} {"train_lr": 0.0010896151638029327, "train_min_lr": 0.0010896151638029327, "train_loss": 0.2535768325906247, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008918108660369538, "epoch": 947, "n_parameters": 631477312} {"train_lr": 0.0010867783998932247, "train_min_lr": 0.0010867783998932247, "train_loss": 0.25354973341708475, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0089732221662043, "epoch": 948, "n_parameters": 631477312} {"train_lr": 0.0010839432687039, "train_min_lr": 0.0010839432687039, "train_loss": 0.2534764259008882, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008820888108741015, "epoch": 949, "n_parameters": 631477312} {"train_lr": 0.0010811097813031988, "train_min_lr": 0.0010811097813031988, "train_loss": 0.25347924446508, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008927462607598266, "epoch": 950, "n_parameters": 631477312} {"train_lr": 0.0010782779487529402, "train_min_lr": 0.0010782779487529402, "train_loss": 0.2535168281905592, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009048223877564454, "epoch": 951, "n_parameters": 631477312} {"train_lr": 0.0010754477821084867, "train_min_lr": 0.0010754477821084867, "train_loss": 0.253475275902579, "train_loss_scale": 429344.8205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008930304878725646, "epoch": 952, "n_parameters": 631477312} {"train_lr": 0.0010726192924186942, "train_min_lr": 0.0010726192924186942, "train_loss": 0.25342927278222466, "train_loss_scale": 433545.8461538461, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 953, "n_parameters": 631477312} {"train_lr": 0.001069792490725876, "train_min_lr": 0.001069792490725876, "train_loss": 0.2534151575856436, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00890166302008602, "epoch": 954, "n_parameters": 631477312} {"train_lr": 0.0010669673880657497, "train_min_lr": 0.0010669673880657497, "train_loss": 0.25335723405572563, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008877484366679803, "epoch": 955, "n_parameters": 631477312} {"train_lr": 0.0010641439954674056, "train_min_lr": 0.0010641439954674056, "train_loss": 0.2533486806966651, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008845416578249289, "epoch": 956, "n_parameters": 631477312} {"train_lr": 0.0010613223239532518, "train_min_lr": 0.0010613223239532518, "train_loss": 0.2532785577215971, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008854206585779022, "epoch": 957, "n_parameters": 631477312} {"train_lr": 0.001058502384538984, "train_min_lr": 0.001058502384538984, "train_loss": 0.2533204483930977, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008949608330089504, "epoch": 958, "n_parameters": 631477312} {"train_lr": 0.0010556841882335324, "train_min_lr": 0.0010556841882335324, "train_loss": 0.2533177071316645, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00897566182538867, "epoch": 959, "n_parameters": 631477312} {"train_lr": 0.0010528677460390219, "train_min_lr": 0.0010528677460390219, "train_loss": 0.2532696037505491, "train_loss_scale": 507483.89743589744, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008935327561070712, "epoch": 960, "n_parameters": 631477312} {"train_lr": 0.001050053068950731, "train_min_lr": 0.001050053068950731, "train_loss": 0.25327038136012375, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00896232818456319, "epoch": 961, "n_parameters": 631477312} {"train_lr": 0.0010472401679570446, "train_min_lr": 0.0010472401679570446, "train_loss": 0.2532570702859607, "train_loss_scale": 457071.58974358975, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 962, "n_parameters": 631477312} {"train_lr": 0.0010444290540394176, "train_min_lr": 0.0010444290540394176, "train_loss": 0.25326937672276145, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009019791820229819, "epoch": 963, "n_parameters": 631477312} {"train_lr": 0.0010416197381723248, "train_min_lr": 0.0010416197381723248, "train_loss": 0.2532410403050912, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009054529095570056, "epoch": 964, "n_parameters": 631477312} {"train_lr": 0.001038812231323222, "train_min_lr": 0.001038812231323222, "train_loss": 0.253193597536152, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00899610357084431, "epoch": 965, "n_parameters": 631477312} {"train_lr": 0.0010360065444525053, "train_min_lr": 0.0010360065444525053, "train_loss": 0.25311304924364847, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008912689634598792, "epoch": 966, "n_parameters": 631477312} {"train_lr": 0.0010332026885134641, "train_min_lr": 0.0010332026885134641, "train_loss": 0.2531121178942088, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008970327732057717, "epoch": 967, "n_parameters": 631477312} {"train_lr": 0.0010304006744522387, "train_min_lr": 0.0010304006744522387, "train_loss": 0.25309690615783137, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008979360107332468, "epoch": 968, "n_parameters": 631477312} {"train_lr": 0.00102760051320778, "train_min_lr": 0.00102760051320778, "train_loss": 0.2530792780423489, "train_loss_scale": 483958.1538461539, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008982828221259972, "epoch": 969, "n_parameters": 631477312} {"train_lr": 0.0010248022157118056, "train_min_lr": 0.0010248022157118056, "train_loss": 0.2530786310358403, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00900336874063867, "epoch": 970, "n_parameters": 631477312} {"train_lr": 0.0010220057928887576, "train_min_lr": 0.0010220057928887576, "train_loss": 0.2530784587202689, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009000812540762126, "epoch": 971, "n_parameters": 631477312} {"train_lr": 0.001019211255655757, "train_min_lr": 0.001019211255655757, "train_loss": 0.25308446555087966, "train_loss_scale": 362128.41025641025, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 972, "n_parameters": 631477312} {"train_lr": 0.0010164186149225658, "train_min_lr": 0.0010164186149225658, "train_loss": 0.25302332959388596, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009021119599660428, "epoch": 973, "n_parameters": 631477312} {"train_lr": 0.0010136278815915433, "train_min_lr": 0.0010136278815915433, "train_loss": 0.25301306963695264, "train_loss_scale": 210051.28205128206, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 974, "n_parameters": 631477312} {"train_lr": 0.0010108390665575985, "train_min_lr": 0.0010108390665575985, "train_loss": 0.25309649985940313, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009439854306550935, "epoch": 975, "n_parameters": 631477312} {"train_lr": 0.0010080521807081556, "train_min_lr": 0.0010080521807081556, "train_loss": 0.2531326726252118, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009231363966440162, "epoch": 976, "n_parameters": 631477312} {"train_lr": 0.0010052672349231044, "train_min_lr": 0.0010052672349231044, "train_loss": 0.25302149287651843, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009148435344776282, "epoch": 977, "n_parameters": 631477312} {"train_lr": 0.001002484240074762, "train_min_lr": 0.001002484240074762, "train_loss": 0.25302862502240503, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009208686539354041, "epoch": 978, "n_parameters": 631477312} {"train_lr": 0.0009997032070278265, "train_min_lr": 0.0009997032070278265, "train_loss": 0.25302560394629836, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009202607761089427, "epoch": 979, "n_parameters": 631477312} {"train_lr": 0.000996924146639344, "train_min_lr": 0.000996924146639344, "train_loss": 0.25302954533925426, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009292309778408172, "epoch": 980, "n_parameters": 631477312} {"train_lr": 0.0009941470697586525, "train_min_lr": 0.0009941470697586525, "train_loss": 0.25295941098831976, "train_loss_scale": 260463.58974358975, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009237193603976032, "epoch": 981, "n_parameters": 631477312} {"train_lr": 0.0009913719872273496, "train_min_lr": 0.0009913719872273496, "train_loss": 0.2529507960783891, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009201765454445895, "epoch": 982, "n_parameters": 631477312} {"train_lr": 0.000988598909879245, "train_min_lr": 0.000988598909879245, "train_loss": 0.252904886761919, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009147875775129367, "epoch": 983, "n_parameters": 631477312} {"train_lr": 0.0009858278485403227, "train_min_lr": 0.0009858278485403227, "train_loss": 0.2529140717606657, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009250576261622019, "epoch": 984, "n_parameters": 631477312} {"train_lr": 0.000983058814028695, "train_min_lr": 0.000983058814028695, "train_loss": 0.2529235329473009, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009220468847510906, "epoch": 985, "n_parameters": 631477312} {"train_lr": 0.0009802918171545627, "train_min_lr": 0.0009802918171545627, "train_loss": 0.25286329509272504, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009216561226938397, "epoch": 986, "n_parameters": 631477312} {"train_lr": 0.0009775268687201692, "train_min_lr": 0.0009775268687201692, "train_loss": 0.2527668413509113, "train_loss_scale": 413380.92307692306, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009116259919933211, "epoch": 987, "n_parameters": 631477312} {"train_lr": 0.0009747639795197641, "train_min_lr": 0.0009747639795197641, "train_loss": 0.25279594080833095, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00920779283086841, "epoch": 988, "n_parameters": 631477312} {"train_lr": 0.0009720031603395551, "train_min_lr": 0.0009720031603395551, "train_loss": 0.2527870539760886, "train_loss_scale": 318437.74358974356, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 989, "n_parameters": 631477312} {"train_lr": 0.0009692444219576709, "train_min_lr": 0.0009692444219576709, "train_loss": 0.2527995104334341, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009308512194249302, "epoch": 990, "n_parameters": 631477312} {"train_lr": 0.0009664877751441156, "train_min_lr": 0.0009664877751441156, "train_loss": 0.25282733456100315, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009501659269564046, "epoch": 991, "n_parameters": 631477312} {"train_lr": 0.0009637332306607262, "train_min_lr": 0.0009637332306607262, "train_loss": 0.2528685078036804, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00953625523759864, "epoch": 992, "n_parameters": 631477312} {"train_lr": 0.0009609807992611344, "train_min_lr": 0.0009609807992611344, "train_loss": 0.25275702768853175, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009221981413877355, "epoch": 993, "n_parameters": 631477312} {"train_lr": 0.0009582304916907242, "train_min_lr": 0.0009582304916907242, "train_loss": 0.25266386627350956, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009227262926287949, "epoch": 994, "n_parameters": 631477312} {"train_lr": 0.0009554823186865848, "train_min_lr": 0.0009554823186865848, "train_loss": 0.2527214808520885, "train_loss_scale": 265504.8205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 995, "n_parameters": 631477312} {"train_lr": 0.0009527362909774747, "train_min_lr": 0.0009527362909774747, "train_loss": 0.25269275399235386, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009439873494183978, "epoch": 996, "n_parameters": 631477312} {"train_lr": 0.0009499924192837747, "train_min_lr": 0.0009499924192837747, "train_loss": 0.2527403672918295, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00940637107198246, "epoch": 997, "n_parameters": 631477312} {"train_lr": 0.0009472507143174505, "train_min_lr": 0.0009472507143174505, "train_loss": 0.2526749070130814, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00947463238098396, "epoch": 998, "n_parameters": 631477312} {"train_lr": 0.0009445111867820084, "train_min_lr": 0.0009445111867820084, "train_loss": 0.2526199144269459, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009322218692455536, "epoch": 999, "n_parameters": 631477312} {"train_lr": 0.0009417738473724552, "train_min_lr": 0.0009417738473724552, "train_loss": 0.25260419603317785, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009306267533714, "epoch": 1000, "n_parameters": 631477312} {"train_lr": 0.0009390387067752538, "train_min_lr": 0.0009390387067752538, "train_loss": 0.25252057171355075, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009242446045391262, "epoch": 1001, "n_parameters": 631477312} {"train_lr": 0.000936305775668283, "train_min_lr": 0.000936305775668283, "train_loss": 0.2525207072120303, "train_loss_scale": 511684.92307692306, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0092929272303501, "epoch": 1002, "n_parameters": 631477312} {"train_lr": 0.0009335750647207968, "train_min_lr": 0.0009335750647207968, "train_loss": 0.2525517691929753, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009434185277384061, "epoch": 1003, "n_parameters": 631477312} {"train_lr": 0.0009308465845933817, "train_min_lr": 0.0009308465845933817, "train_loss": 0.25292817946487606, "train_loss_scale": 263824.41025641025, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1004, "n_parameters": 631477312} {"train_lr": 0.0009281203459379158, "train_min_lr": 0.0009281203459379158, "train_loss": 0.2525450735567854, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009501360926944286, "epoch": 1005, "n_parameters": 631477312} {"train_lr": 0.0009253963593975229, "train_min_lr": 0.0009253963593975229, "train_loss": 0.2527951843415697, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011350871813005935, "epoch": 1006, "n_parameters": 631477312} {"train_lr": 0.0009226746356065399, "train_min_lr": 0.0009226746356065399, "train_loss": 0.2524587587865356, "train_loss_scale": 192827.07692307694, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1007, "n_parameters": 631477312} {"train_lr": 0.0009199551851904667, "train_min_lr": 0.0009199551851904667, "train_loss": 0.2535286251700316, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015109979135629076, "epoch": 1008, "n_parameters": 631477312} {"train_lr": 0.0009172380187659294, "train_min_lr": 0.0009172380187659294, "train_loss": 0.2524448147443577, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009440051132025054, "epoch": 1009, "n_parameters": 631477312} {"train_lr": 0.000914523146940636, "train_min_lr": 0.000914523146940636, "train_loss": 0.25247620885821587, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009529417246365203, "epoch": 1010, "n_parameters": 631477312} {"train_lr": 0.0009118105803133375, "train_min_lr": 0.0009118105803133375, "train_loss": 0.25240383544960654, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00942075933711842, "epoch": 1011, "n_parameters": 631477312} {"train_lr": 0.000909100329473786, "train_min_lr": 0.000909100329473786, "train_loss": 0.25233691935845387, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00937366456641123, "epoch": 1012, "n_parameters": 631477312} {"train_lr": 0.0009063924050026917, "train_min_lr": 0.0009063924050026917, "train_loss": 0.2524035920191986, "train_loss_scale": 146615.79487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009479374782994198, "epoch": 1013, "n_parameters": 631477312} {"train_lr": 0.0009036868174716841, "train_min_lr": 0.0009036868174716841, "train_loss": 0.2523178764505503, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009364690371932318, "epoch": 1014, "n_parameters": 631477312} {"train_lr": 0.0009009835774432676, "train_min_lr": 0.0009009835774432676, "train_loss": 0.25228880688798827, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00946259062187985, "epoch": 1015, "n_parameters": 631477312} {"train_lr": 0.000898282695470784, "train_min_lr": 0.000898282695470784, "train_loss": 0.25227501631403965, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009436631008075217, "epoch": 1016, "n_parameters": 631477312} {"train_lr": 0.0008955841820983682, "train_min_lr": 0.0008955841820983682, "train_loss": 0.2522403878870253, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009467322874862032, "epoch": 1017, "n_parameters": 631477312} {"train_lr": 0.0008928880478609084, "train_min_lr": 0.0008928880478609084, "train_loss": 0.252258770672294, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009517164500907827, "epoch": 1018, "n_parameters": 631477312} {"train_lr": 0.000890194303284004, "train_min_lr": 0.000890194303284004, "train_loss": 0.25227701815310866, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009533570595802022, "epoch": 1019, "n_parameters": 631477312} {"train_lr": 0.0008875029588839267, "train_min_lr": 0.0008875029588839267, "train_loss": 0.25227960914027137, "train_loss_scale": 347844.92307692306, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1020, "n_parameters": 631477312} {"train_lr": 0.0008848140251675762, "train_min_lr": 0.0008848140251675762, "train_loss": 0.25220656074965614, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009498651700619703, "epoch": 1021, "n_parameters": 631477312} {"train_lr": 0.0008821275126324441, "train_min_lr": 0.0008821275126324441, "train_loss": 0.25221950840503454, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009590362106115581, "epoch": 1022, "n_parameters": 631477312} {"train_lr": 0.0008794434317665664, "train_min_lr": 0.0008794434317665664, "train_loss": 0.25222201908950526, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00968120478762266, "epoch": 1023, "n_parameters": 631477312} {"train_lr": 0.0008767617930484874, "train_min_lr": 0.0008767617930484874, "train_loss": 0.2522141203666345, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009610875742509961, "epoch": 1024, "n_parameters": 631477312} {"train_lr": 0.000874082606947218, "train_min_lr": 0.000874082606947218, "train_loss": 0.2522124616303839, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009618049397921333, "epoch": 1025, "n_parameters": 631477312} {"train_lr": 0.0008714058839221914, "train_min_lr": 0.0008714058839221914, "train_loss": 0.2521530216356787, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009608132940812562, "epoch": 1026, "n_parameters": 631477312} {"train_lr": 0.0008687316344232313, "train_min_lr": 0.0008687316344232313, "train_loss": 0.252145002139374, "train_loss_scale": 516726.1538461539, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009576580585696949, "epoch": 1027, "n_parameters": 631477312} {"train_lr": 0.0008660598688904959, "train_min_lr": 0.0008660598688904959, "train_loss": 0.2520941774844407, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009600328836733332, "epoch": 1028, "n_parameters": 631477312} {"train_lr": 0.0008633905977544545, "train_min_lr": 0.0008633905977544545, "train_loss": 0.2520583920413628, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009582091307936188, "epoch": 1029, "n_parameters": 631477312} {"train_lr": 0.0008607238314358315, "train_min_lr": 0.0008607238314358315, "train_loss": 0.2520577943471905, "train_loss_scale": 339442.8717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1030, "n_parameters": 631477312} {"train_lr": 0.0008580595803455788, "train_min_lr": 0.0008580595803455788, "train_loss": 0.2522429567791569, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011680154631344171, "epoch": 1031, "n_parameters": 631477312} {"train_lr": 0.0008553978548848254, "train_min_lr": 0.0008553978548848254, "train_loss": 0.25200399132863355, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00973451317837223, "epoch": 1032, "n_parameters": 631477312} {"train_lr": 0.0008527386654448397, "train_min_lr": 0.0008527386654448397, "train_loss": 0.25204428339985985, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00968169052906048, "epoch": 1033, "n_parameters": 631477312} {"train_lr": 0.0008500820224069921, "train_min_lr": 0.0008500820224069921, "train_loss": 0.2519956907758919, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009670740575529635, "epoch": 1034, "n_parameters": 631477312} {"train_lr": 0.0008474279361427082, "train_min_lr": 0.0008474279361427082, "train_loss": 0.25195035110347164, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009698133658952056, "epoch": 1035, "n_parameters": 631477312} {"train_lr": 0.0008447764170134383, "train_min_lr": 0.0008447764170134383, "train_loss": 0.25192531956156766, "train_loss_scale": 339442.8717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009726599452849954, "epoch": 1036, "n_parameters": 631477312} {"train_lr": 0.0008421274753706043, "train_min_lr": 0.0008421274753706043, "train_loss": 0.25189864728301287, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009647041720409807, "epoch": 1037, "n_parameters": 631477312} {"train_lr": 0.0008394811215555701, "train_min_lr": 0.0008394811215555701, "train_loss": 0.2518939113495155, "train_loss_scale": 265504.8205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1038, "n_parameters": 631477312} {"train_lr": 0.000836837365899592, "train_min_lr": 0.000836837365899592, "train_loss": 0.2518646571522531, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009666957026825119, "epoch": 1039, "n_parameters": 631477312} {"train_lr": 0.0008341962187237897, "train_min_lr": 0.0008341962187237897, "train_loss": 0.2518445648975336, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009732289463639833, "epoch": 1040, "n_parameters": 631477312} {"train_lr": 0.0008315576903390954, "train_min_lr": 0.0008315576903390954, "train_loss": 0.2518557600157622, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009718525321939243, "epoch": 1041, "n_parameters": 631477312} {"train_lr": 0.000828921791046216, "train_min_lr": 0.000828921791046216, "train_loss": 0.2517740358777631, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009642341893572265, "epoch": 1042, "n_parameters": 631477312} {"train_lr": 0.0008262885311355988, "train_min_lr": 0.0008262885311355988, "train_loss": 0.2518042479193984, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009687567732702846, "epoch": 1043, "n_parameters": 631477312} {"train_lr": 0.0008236579208873839, "train_min_lr": 0.0008236579208873839, "train_loss": 0.2517228500278762, "train_loss_scale": 413380.92307692306, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009715238765168648, "epoch": 1044, "n_parameters": 631477312} {"train_lr": 0.00082102997057137, "train_min_lr": 0.00082102997057137, "train_loss": 0.2517730460400717, "train_loss_scale": 457911.7948717949, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1045, "n_parameters": 631477312} {"train_lr": 0.0008184046904469691, "train_min_lr": 0.0008184046904469691, "train_loss": 0.25169777543021316, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009774296054950891, "epoch": 1046, "n_parameters": 631477312} {"train_lr": 0.0008157820907631688, "train_min_lr": 0.0008157820907631688, "train_loss": 0.2516968859425292, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009727895462837739, "epoch": 1047, "n_parameters": 631477312} {"train_lr": 0.0008131621817584946, "train_min_lr": 0.0008131621817584946, "train_loss": 0.2516514574870085, "train_loss_scale": 180224.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1048, "n_parameters": 631477312} {"train_lr": 0.000810544973660965, "train_min_lr": 0.000810544973660965, "train_loss": 0.2516472889558197, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009789321893969407, "epoch": 1049, "n_parameters": 631477312} {"train_lr": 0.0008079304766880569, "train_min_lr": 0.0008079304766880569, "train_loss": 0.25166581634682816, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00983467315717672, "epoch": 1050, "n_parameters": 631477312} {"train_lr": 0.0008053187010466622, "train_min_lr": 0.0008053187010466622, "train_loss": 0.2516407660590724, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00987966651789462, "epoch": 1051, "n_parameters": 631477312} {"train_lr": 0.0008027096569330467, "train_min_lr": 0.0008027096569330467, "train_loss": 0.25162749115723926, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00984348454930557, "epoch": 1052, "n_parameters": 631477312} {"train_lr": 0.0008001033545328149, "train_min_lr": 0.0008001033545328149, "train_loss": 0.2516078141768678, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009815185503938641, "epoch": 1053, "n_parameters": 631477312} {"train_lr": 0.0007974998040208676, "train_min_lr": 0.0007974998040208676, "train_loss": 0.2515890420271227, "train_loss_scale": 159218.87179487178, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009861256207864827, "epoch": 1054, "n_parameters": 631477312} {"train_lr": 0.0007948990155613614, "train_min_lr": 0.0007948990155613614, "train_loss": 0.25154175587559646, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009870431830700582, "epoch": 1055, "n_parameters": 631477312} {"train_lr": 0.0007923009993076708, "train_min_lr": 0.0007923009993076708, "train_loss": 0.25160386173341137, "train_loss_scale": 256682.66666666666, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1056, "n_parameters": 631477312} {"train_lr": 0.0007897057654023456, "train_min_lr": 0.0007897057654023456, "train_loss": 0.251552470962708, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009894096650756322, "epoch": 1057, "n_parameters": 631477312} {"train_lr": 0.0007871133239770778, "train_min_lr": 0.0007871133239770778, "train_loss": 0.2515182973196109, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009950426395218342, "epoch": 1058, "n_parameters": 631477312} {"train_lr": 0.0007845236851526527, "train_min_lr": 0.0007845236851526527, "train_loss": 0.25147943914187354, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009839462504411737, "epoch": 1059, "n_parameters": 631477312} {"train_lr": 0.0007819368590389165, "train_min_lr": 0.0007819368590389165, "train_loss": 0.25148961794240254, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009974652405780477, "epoch": 1060, "n_parameters": 631477312} {"train_lr": 0.0007793528557347355, "train_min_lr": 0.0007793528557347355, "train_loss": 0.251462474396715, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009964036850784069, "epoch": 1061, "n_parameters": 631477312} {"train_lr": 0.000776771685327956, "train_min_lr": 0.000776771685327956, "train_loss": 0.2515054315221138, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010171207229200846, "epoch": 1062, "n_parameters": 631477312} {"train_lr": 0.0007741933578953627, "train_min_lr": 0.0007741933578953627, "train_loss": 0.2514947739232761, "train_loss_scale": 213832.20512820513, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010038032718838599, "epoch": 1063, "n_parameters": 631477312} {"train_lr": 0.0007716178835026435, "train_min_lr": 0.0007716178835026435, "train_loss": 0.2514206263391922, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010047389346604737, "epoch": 1064, "n_parameters": 631477312} {"train_lr": 0.0007690452722043463, "train_min_lr": 0.0007690452722043463, "train_loss": 0.251398383332297, "train_loss_scale": 191986.87179487178, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1065, "n_parameters": 631477312} {"train_lr": 0.000766475534043844, "train_min_lr": 0.000766475534043844, "train_loss": 0.2513959532370791, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010048558544486953, "epoch": 1066, "n_parameters": 631477312} {"train_lr": 0.0007639086790532912, "train_min_lr": 0.0007639086790532912, "train_loss": 0.2514469555447785, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010166740098681588, "epoch": 1067, "n_parameters": 631477312} {"train_lr": 0.0007613447172535847, "train_min_lr": 0.0007613447172535847, "train_loss": 0.25135248980950564, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010034870111550659, "epoch": 1068, "n_parameters": 631477312} {"train_lr": 0.0007587836586543333, "train_min_lr": 0.0007587836586543333, "train_loss": 0.25132943825044024, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010124068056495909, "epoch": 1069, "n_parameters": 631477312} {"train_lr": 0.0007562255132538018, "train_min_lr": 0.0007562255132538018, "train_loss": 0.25133467360566825, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010189596450147338, "epoch": 1070, "n_parameters": 631477312} {"train_lr": 0.000753670291038892, "train_min_lr": 0.000753670291038892, "train_loss": 0.2513134380898032, "train_loss_scale": 147456.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01005030054754267, "epoch": 1071, "n_parameters": 631477312} {"train_lr": 0.0007511180019850862, "train_min_lr": 0.0007511180019850862, "train_loss": 0.251272225074876, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01021851756526396, "epoch": 1072, "n_parameters": 631477312} {"train_lr": 0.0007485686560564195, "train_min_lr": 0.0007485686560564195, "train_loss": 0.25131645395408553, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010175739134399172, "epoch": 1073, "n_parameters": 631477312} {"train_lr": 0.0007460222632054375, "train_min_lr": 0.0007460222632054375, "train_loss": 0.2511916648065194, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01010327012791561, "epoch": 1074, "n_parameters": 631477312} {"train_lr": 0.0007434788333731559, "train_min_lr": 0.0007434788333731559, "train_loss": 0.251274266336949, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010219869135210339, "epoch": 1075, "n_parameters": 631477312} {"train_lr": 0.0007409383764890203, "train_min_lr": 0.0007409383764890203, "train_loss": 0.2512305066831267, "train_loss_scale": 155858.05128205128, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1076, "n_parameters": 631477312} {"train_lr": 0.0007384009024708765, "train_min_lr": 0.0007384009024708765, "train_loss": 0.2512247279787866, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010210685772248186, "epoch": 1077, "n_parameters": 631477312} {"train_lr": 0.000735866421224917, "train_min_lr": 0.000735866421224917, "train_loss": 0.2512321031730001, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010289092888482489, "epoch": 1078, "n_parameters": 631477312} {"train_lr": 0.0007333349426456595, "train_min_lr": 0.0007333349426456595, "train_loss": 0.2511679114361938, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010137662428837175, "epoch": 1079, "n_parameters": 631477312} {"train_lr": 0.0007308064766158923, "train_min_lr": 0.0007308064766158923, "train_loss": 0.25112280040048063, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010147863419917531, "epoch": 1080, "n_parameters": 631477312} {"train_lr": 0.0007282810330066472, "train_min_lr": 0.0007282810330066472, "train_loss": 0.2511467456471366, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0102132256471146, "epoch": 1081, "n_parameters": 631477312} {"train_lr": 0.0007257586216771538, "train_min_lr": 0.0007257586216771538, "train_loss": 0.2510702962724444, "train_loss_scale": 161319.38461538462, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1082, "n_parameters": 631477312} {"train_lr": 0.0007232392524748043, "train_min_lr": 0.0007232392524748043, "train_loss": 0.2511290244859619, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01038968798596985, "epoch": 1083, "n_parameters": 631477312} {"train_lr": 0.0007207229352351171, "train_min_lr": 0.0007207229352351171, "train_loss": 0.25104141767877036, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010223168652886763, "epoch": 1084, "n_parameters": 631477312} {"train_lr": 0.0007182096797816934, "train_min_lr": 0.0007182096797816934, "train_loss": 0.2509798961930359, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010157446150118724, "epoch": 1085, "n_parameters": 631477312} {"train_lr": 0.0007156994959261803, "train_min_lr": 0.0007156994959261803, "train_loss": 0.25101264152162445, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0102891820238139, "epoch": 1086, "n_parameters": 631477312} {"train_lr": 0.0007131923934682372, "train_min_lr": 0.0007131923934682372, "train_loss": 0.25101083373794186, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010337965171903564, "epoch": 1087, "n_parameters": 631477312} {"train_lr": 0.0007106883821954903, "train_min_lr": 0.0007106883821954903, "train_loss": 0.25100561674051464, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010499019235468064, "epoch": 1088, "n_parameters": 631477312} {"train_lr": 0.0007081874718835006, "train_min_lr": 0.0007081874718835006, "train_loss": 0.2510336918445925, "train_loss_scale": 75198.35897435897, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1089, "n_parameters": 631477312} {"train_lr": 0.0007056896722957201, "train_min_lr": 0.0007056896722957201, "train_loss": 0.250944422178854, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01023444003210618, "epoch": 1090, "n_parameters": 631477312} {"train_lr": 0.0007031949931834597, "train_min_lr": 0.0007031949931834597, "train_loss": 0.2508778474950351, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01020200598017814, "epoch": 1091, "n_parameters": 631477312} {"train_lr": 0.0007007034442858478, "train_min_lr": 0.0007007034442858478, "train_loss": 0.25089702323879093, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010331626053159244, "epoch": 1092, "n_parameters": 631477312} {"train_lr": 0.0006982150353297889, "train_min_lr": 0.0006982150353297889, "train_loss": 0.2510811367866177, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01285375569624683, "epoch": 1093, "n_parameters": 631477312} {"train_lr": 0.0006957297760299335, "train_min_lr": 0.0006957297760299335, "train_loss": 0.25078051986519057, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010405030853759784, "epoch": 1094, "n_parameters": 631477312} {"train_lr": 0.0006932476760886348, "train_min_lr": 0.0006932476760886348, "train_loss": 0.25084366793266666, "train_loss_scale": 94523.07692307692, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010352992506411213, "epoch": 1095, "n_parameters": 631477312} {"train_lr": 0.0006907687451959105, "train_min_lr": 0.0006907687451959105, "train_loss": 0.2507702886598567, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010285461043867353, "epoch": 1096, "n_parameters": 631477312} {"train_lr": 0.0006882929930294079, "train_min_lr": 0.0006882929930294079, "train_loss": 0.25081592097353095, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01056651631071686, "epoch": 1097, "n_parameters": 631477312} {"train_lr": 0.0006858204292543649, "train_min_lr": 0.0006858204292543649, "train_loss": 0.25078101431091243, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010473029132789144, "epoch": 1098, "n_parameters": 631477312} {"train_lr": 0.0006833510635235713, "train_min_lr": 0.0006833510635235713, "train_loss": 0.2507538135127666, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010528089944273233, "epoch": 1099, "n_parameters": 631477312} {"train_lr": 0.0006808849054773309, "train_min_lr": 0.0006808849054773309, "train_loss": 0.2507946001341901, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01053886016448721, "epoch": 1100, "n_parameters": 631477312} {"train_lr": 0.0006784219647434278, "train_min_lr": 0.0006784219647434278, "train_loss": 0.25073720446119135, "train_loss_scale": 108386.46153846153, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1101, "n_parameters": 631477312} {"train_lr": 0.0006759622509370837, "train_min_lr": 0.0006759622509370837, "train_loss": 0.2507434574325975, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010618227450415874, "epoch": 1102, "n_parameters": 631477312} {"train_lr": 0.0006735057736609214, "train_min_lr": 0.0006735057736609214, "train_loss": 0.25073048743443227, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010528453386937961, "epoch": 1103, "n_parameters": 631477312} {"train_lr": 0.0006710525425049303, "train_min_lr": 0.0006710525425049303, "train_loss": 0.25071514578751075, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010544591526596401, "epoch": 1104, "n_parameters": 631477312} {"train_lr": 0.0006686025670464282, "train_min_lr": 0.0006686025670464282, "train_loss": 0.250614715134128, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010425250705642005, "epoch": 1105, "n_parameters": 631477312} {"train_lr": 0.0006661558568500193, "train_min_lr": 0.0006661558568500193, "train_loss": 0.2506479492190127, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012096506985238729, "epoch": 1106, "n_parameters": 631477312} {"train_lr": 0.0006637124214675638, "train_min_lr": 0.0006637124214675638, "train_loss": 0.25056153662108743, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010558901097959815, "epoch": 1107, "n_parameters": 631477312} {"train_lr": 0.0006612722704381341, "train_min_lr": 0.0006612722704381341, "train_loss": 0.2505378771800166, "train_loss_scale": 126870.97435897436, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010526477368190311, "epoch": 1108, "n_parameters": 631477312} {"train_lr": 0.000658835413287983, "train_min_lr": 0.000658835413287983, "train_loss": 0.25057602929584205, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010730732671128443, "epoch": 1109, "n_parameters": 631477312} {"train_lr": 0.0006564018595305038, "train_min_lr": 0.0006564018595305038, "train_loss": 0.2505611506296704, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010551863564894749, "epoch": 1110, "n_parameters": 631477312} {"train_lr": 0.0006539716186661912, "train_min_lr": 0.0006539716186661912, "train_loss": 0.25053575247203785, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010772743384735897, "epoch": 1111, "n_parameters": 631477312} {"train_lr": 0.0006515447001826097, "train_min_lr": 0.0006515447001826097, "train_loss": 0.2505286266872038, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010543850253527172, "epoch": 1112, "n_parameters": 631477312} {"train_lr": 0.000649121113554352, "train_min_lr": 0.000649121113554352, "train_loss": 0.25045802026616937, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010551022750755342, "epoch": 1113, "n_parameters": 631477312} {"train_lr": 0.0006467008682430024, "train_min_lr": 0.0006467008682430024, "train_loss": 0.25051768595137847, "train_loss_scale": 199968.8205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010891891221921796, "epoch": 1114, "n_parameters": 631477312} {"train_lr": 0.0006442839736971013, "train_min_lr": 0.0006442839736971013, "train_loss": 0.25045954084728295, "train_loss_scale": 152917.33333333334, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1115, "n_parameters": 631477312} {"train_lr": 0.0006418704393521103, "train_min_lr": 0.0006418704393521103, "train_loss": 0.2504033263301095, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01058484576582813, "epoch": 1116, "n_parameters": 631477312} {"train_lr": 0.0006394602746303688, "train_min_lr": 0.0006394602746303688, "train_loss": 0.2504144596497122, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010711678318703214, "epoch": 1117, "n_parameters": 631477312} {"train_lr": 0.0006370534889410649, "train_min_lr": 0.0006370534889410649, "train_loss": 0.25030961620191544, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010692157454263324, "epoch": 1118, "n_parameters": 631477312} {"train_lr": 0.0006346500916801923, "train_min_lr": 0.0006346500916801923, "train_loss": 0.25036837608040047, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01068089582109585, "epoch": 1119, "n_parameters": 631477312} {"train_lr": 0.0006322500922305184, "train_min_lr": 0.0006322500922305184, "train_loss": 0.2503447534623914, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010713831584255856, "epoch": 1120, "n_parameters": 631477312} {"train_lr": 0.0006298534999615448, "train_min_lr": 0.0006298534999615448, "train_loss": 0.2502510624442202, "train_loss_scale": 186525.53846153847, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010617448781163264, "epoch": 1121, "n_parameters": 631477312} {"train_lr": 0.0006274603242294721, "train_min_lr": 0.0006274603242294721, "train_loss": 0.2502731273297029, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010749480907972425, "epoch": 1122, "n_parameters": 631477312} {"train_lr": 0.0006250705743771617, "train_min_lr": 0.0006250705743771617, "train_loss": 0.25026201010227966, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010827244638513105, "epoch": 1123, "n_parameters": 631477312} {"train_lr": 0.000622684259734102, "train_min_lr": 0.000622684259734102, "train_loss": 0.2501906638021748, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010569695768376382, "epoch": 1124, "n_parameters": 631477312} {"train_lr": 0.0006203013896163704, "train_min_lr": 0.0006203013896163704, "train_loss": 0.25017854735517925, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010720812685739918, "epoch": 1125, "n_parameters": 631477312} {"train_lr": 0.0006179219733265951, "train_min_lr": 0.0006179219733265951, "train_loss": 0.2501598293146787, "train_loss_scale": 188205.94871794872, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1126, "n_parameters": 631477312} {"train_lr": 0.0006155460201539221, "train_min_lr": 0.0006155460201539221, "train_loss": 0.25013630703772205, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01070631755133852, "epoch": 1127, "n_parameters": 631477312} {"train_lr": 0.0006131735393739788, "train_min_lr": 0.0006131735393739788, "train_loss": 0.25013871351853967, "train_loss_scale": 123720.20512820513, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1128, "n_parameters": 631477312} {"train_lr": 0.0006108045402488355, "train_min_lr": 0.0006108045402488355, "train_loss": 0.2501375108288649, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010856024911985375, "epoch": 1129, "n_parameters": 631477312} {"train_lr": 0.000608439032026968, "train_min_lr": 0.000608439032026968, "train_loss": 0.25013368631689215, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01094169319810298, "epoch": 1130, "n_parameters": 631477312} {"train_lr": 0.0006060770239432257, "train_min_lr": 0.0006060770239432257, "train_loss": 0.2501084651165188, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010963692362659061, "epoch": 1131, "n_parameters": 631477312} {"train_lr": 0.0006037185252187955, "train_min_lr": 0.0006037185252187955, "train_loss": 0.2500465113597994, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010809868475637184, "epoch": 1132, "n_parameters": 631477312} {"train_lr": 0.0006013635450611617, "train_min_lr": 0.0006013635450611617, "train_loss": 0.25006432802332806, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010955295600116445, "epoch": 1133, "n_parameters": 631477312} {"train_lr": 0.0005990120926640702, "train_min_lr": 0.0005990120926640702, "train_loss": 0.25007675683054215, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011039540879070185, "epoch": 1134, "n_parameters": 631477312} {"train_lr": 0.0005966641772074993, "train_min_lr": 0.0005966641772074993, "train_loss": 0.25007324654441804, "train_loss_scale": 111537.23076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011149859694668498, "epoch": 1135, "n_parameters": 631477312} {"train_lr": 0.0005943198078576163, "train_min_lr": 0.0005943198078576163, "train_loss": 0.2500981119795678, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011133214587178558, "epoch": 1136, "n_parameters": 631477312} {"train_lr": 0.0005919789937667458, "train_min_lr": 0.0005919789937667458, "train_loss": 0.25001990244102973, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010973696441700062, "epoch": 1137, "n_parameters": 631477312} {"train_lr": 0.0005896417440733318, "train_min_lr": 0.0005896417440733318, "train_loss": 0.25022444481221146, "train_loss_scale": 77718.97435897436, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1138, "n_parameters": 631477312} {"train_lr": 0.0005873080679019029, "train_min_lr": 0.0005873080679019029, "train_loss": 0.24996654299469903, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011123164637515752, "epoch": 1139, "n_parameters": 631477312} {"train_lr": 0.0005849779743630389, "train_min_lr": 0.0005849779743630389, "train_loss": 0.2504967019284287, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01690504236672169, "epoch": 1140, "n_parameters": 631477312} {"train_lr": 0.00058265147255333, "train_min_lr": 0.00058265147255333, "train_loss": 0.24993930297652975, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011185811778817039, "epoch": 1141, "n_parameters": 631477312} {"train_lr": 0.0005803285715553476, "train_min_lr": 0.0005803285715553476, "train_loss": 0.2499286724278369, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01105892249287512, "epoch": 1142, "n_parameters": 631477312} {"train_lr": 0.0005780092804376041, "train_min_lr": 0.0005780092804376041, "train_loss": 0.24987922995112455, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011009643761775432, "epoch": 1143, "n_parameters": 631477312} {"train_lr": 0.0005756936082545175, "train_min_lr": 0.0005756936082545175, "train_loss": 0.2498998364720207, "train_loss_scale": 92002.46153846153, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011271096855545273, "epoch": 1144, "n_parameters": 631477312} {"train_lr": 0.0005733815640463811, "train_min_lr": 0.0005733815640463811, "train_loss": 0.249793249152553, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011049061267374035, "epoch": 1145, "n_parameters": 631477312} {"train_lr": 0.0005710731568393219, "train_min_lr": 0.0005710731568393219, "train_loss": 0.2498030659641163, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011017225984221276, "epoch": 1146, "n_parameters": 631477312} {"train_lr": 0.0005687683956452703, "train_min_lr": 0.0005687683956452703, "train_loss": 0.24975583254574582, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011034189759252163, "epoch": 1147, "n_parameters": 631477312} {"train_lr": 0.0005664672894619201, "train_min_lr": 0.0005664672894619201, "train_loss": 0.2496975381494476, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011118293820450513, "epoch": 1148, "n_parameters": 631477312} {"train_lr": 0.0005641698472727003, "train_min_lr": 0.0005641698472727003, "train_loss": 0.24977762912674686, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011326475776373766, "epoch": 1149, "n_parameters": 631477312} {"train_lr": 0.0005618760780467304, "train_min_lr": 0.0005618760780467304, "train_loss": 0.2496925777881048, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01106820953412889, "epoch": 1150, "n_parameters": 631477312} {"train_lr": 0.0005595859907387952, "train_min_lr": 0.0005595859907387952, "train_loss": 0.2495947297399816, "train_loss_scale": 261303.79487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01090429985943513, "epoch": 1151, "n_parameters": 631477312} {"train_lr": 0.0005572995942893032, "train_min_lr": 0.0005572995942893032, "train_loss": 0.24961433783830264, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011141624250926841, "epoch": 1152, "n_parameters": 631477312} {"train_lr": 0.0005550168976242548, "train_min_lr": 0.0005550168976242548, "train_loss": 0.24962605755382145, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011282517735917982, "epoch": 1153, "n_parameters": 631477312} {"train_lr": 0.0005527379096552076, "train_min_lr": 0.0005527379096552076, "train_loss": 0.2495973773079757, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011198716599327058, "epoch": 1154, "n_parameters": 631477312} {"train_lr": 0.000550462639279237, "train_min_lr": 0.000550462639279237, "train_loss": 0.249492954591122, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011023196782797383, "epoch": 1155, "n_parameters": 631477312} {"train_lr": 0.0005481910953789097, "train_min_lr": 0.0005481910953789097, "train_loss": 0.24948308143454292, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011104895452706095, "epoch": 1156, "n_parameters": 631477312} {"train_lr": 0.0005459232868222405, "train_min_lr": 0.0005459232868222405, "train_loss": 0.24954078072467104, "train_loss_scale": 415061.3333333333, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011264188507667337, "epoch": 1157, "n_parameters": 631477312} {"train_lr": 0.0005436592224626639, "train_min_lr": 0.0005436592224626639, "train_loss": 0.2494432957789216, "train_loss_scale": 332721.23076923075, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1158, "n_parameters": 631477312} {"train_lr": 0.0005413989111389974, "train_min_lr": 0.0005413989111389974, "train_loss": 0.2494985470936323, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011296535579439921, "epoch": 1159, "n_parameters": 631477312} {"train_lr": 0.0005391423616754045, "train_min_lr": 0.0005391423616754045, "train_loss": 0.249355360516347, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011100786532132098, "epoch": 1160, "n_parameters": 631477312} {"train_lr": 0.0005368895828813646, "train_min_lr": 0.0005368895828813646, "train_loss": 0.24939023528415233, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011159672867506742, "epoch": 1161, "n_parameters": 631477312} {"train_lr": 0.0005346405835516359, "train_min_lr": 0.0005346405835516359, "train_loss": 0.24941131782132941, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01150630910295802, "epoch": 1162, "n_parameters": 631477312} {"train_lr": 0.0005323953724662217, "train_min_lr": 0.0005323953724662217, "train_loss": 0.24941368968500635, "train_loss_scale": 228115.6923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1163, "n_parameters": 631477312} {"train_lr": 0.0005301539583903355, "train_min_lr": 0.0005301539583903355, "train_loss": 0.2493393968175858, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011367812919884156, "epoch": 1164, "n_parameters": 631477312} {"train_lr": 0.00052791635007437, "train_min_lr": 0.00052791635007437, "train_loss": 0.24938628523168752, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01145173662688392, "epoch": 1165, "n_parameters": 631477312} {"train_lr": 0.0005256825562538566, "train_min_lr": 0.0005256825562538566, "train_loss": 0.24934093061034593, "train_loss_scale": 76878.76923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1166, "n_parameters": 631477312} {"train_lr": 0.0005234525856494381, "train_min_lr": 0.0005234525856494381, "train_loss": 0.2493435452119089, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011488600708663655, "epoch": 1167, "n_parameters": 631477312} {"train_lr": 0.0005212264469668297, "train_min_lr": 0.0005212264469668297, "train_loss": 0.24932595487469092, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01168613342078737, "epoch": 1168, "n_parameters": 631477312} {"train_lr": 0.0005190041488967883, "train_min_lr": 0.0005190041488967883, "train_loss": 0.24923752153065437, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011262588868610179, "epoch": 1169, "n_parameters": 631477312} {"train_lr": 0.0005167857001150759, "train_min_lr": 0.0005167857001150759, "train_loss": 0.24921095735118845, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01128572228555687, "epoch": 1170, "n_parameters": 631477312} {"train_lr": 0.0005145711092824282, "train_min_lr": 0.0005145711092824282, "train_loss": 0.2491919771523382, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01147306926596241, "epoch": 1171, "n_parameters": 631477312} {"train_lr": 0.0005123603850445193, "train_min_lr": 0.0005123603850445193, "train_loss": 0.2492079517672746, "train_loss_scale": 92842.66666666667, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011465582617510779, "epoch": 1172, "n_parameters": 631477312} {"train_lr": 0.0005101535360319284, "train_min_lr": 0.0005101535360319284, "train_loss": 0.2491355870002642, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011247185843948944, "epoch": 1173, "n_parameters": 631477312} {"train_lr": 0.0005079505708601042, "train_min_lr": 0.0005079505708601042, "train_loss": 0.24909501649940816, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011439687993305806, "epoch": 1174, "n_parameters": 631477312} {"train_lr": 0.000505751498129336, "train_min_lr": 0.000505751498129336, "train_loss": 0.24916609968297565, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01166893898222882, "epoch": 1175, "n_parameters": 631477312} {"train_lr": 0.0005035563264247157, "train_min_lr": 0.0005035563264247157, "train_loss": 0.2491277824096286, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011660509939806966, "epoch": 1176, "n_parameters": 631477312} {"train_lr": 0.0005013650643161046, "train_min_lr": 0.0005013650643161046, "train_loss": 0.24915184191643044, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01167334673794894, "epoch": 1177, "n_parameters": 631477312} {"train_lr": 0.0004991777203581037, "train_min_lr": 0.0004991777203581037, "train_loss": 0.24911436369009793, "train_loss_scale": 131912.20512820513, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011902467282011341, "epoch": 1178, "n_parameters": 631477312} {"train_lr": 0.0004969943030900144, "train_min_lr": 0.0004969943030900144, "train_loss": 0.24915414383539405, "train_loss_scale": 231056.41025641025, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1179, "n_parameters": 631477312} {"train_lr": 0.0004948148210358123, "train_min_lr": 0.0004948148210358123, "train_loss": 0.24903069792661625, "train_loss_scale": 78769.23076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1180, "n_parameters": 631477312} {"train_lr": 0.000492639282704107, "train_min_lr": 0.000492639282704107, "train_loss": 0.24904146728714785, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011651716433847562, "epoch": 1181, "n_parameters": 631477312} {"train_lr": 0.0004904676965881128, "train_min_lr": 0.0004904676965881128, "train_loss": 0.2490344263660984, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011609397052476803, "epoch": 1182, "n_parameters": 631477312} {"train_lr": 0.0004883000711656163, "train_min_lr": 0.0004883000711656163, "train_loss": 0.24896038761541534, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011602353325519616, "epoch": 1183, "n_parameters": 631477312} {"train_lr": 0.000486136414898939, "train_min_lr": 0.000486136414898939, "train_loss": 0.24895113268091032, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011823543502638737, "epoch": 1184, "n_parameters": 631477312} {"train_lr": 0.00048397673623490903, "train_min_lr": 0.00048397673623490903, "train_loss": 0.24895070130841282, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011801908646399776, "epoch": 1185, "n_parameters": 631477312} {"train_lr": 0.0004818210436048244, "train_min_lr": 0.0004818210436048244, "train_loss": 0.24890026161208367, "train_loss_scale": 90952.20512820513, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011633543306603454, "epoch": 1186, "n_parameters": 631477312} {"train_lr": 0.00047966934542442426, "train_min_lr": 0.00047966934542442426, "train_loss": 0.24887925687914667, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011690770750507139, "epoch": 1187, "n_parameters": 631477312} {"train_lr": 0.000477521650093852, "train_min_lr": 0.000477521650093852, "train_loss": 0.2488165979172127, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011659056733314617, "epoch": 1188, "n_parameters": 631477312} {"train_lr": 0.000475377965997623, "train_min_lr": 0.000475377965997623, "train_loss": 0.24884031299286738, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01160259645145673, "epoch": 1189, "n_parameters": 631477312} {"train_lr": 0.00047323830150459544, "train_min_lr": 0.00047323830150459544, "train_loss": 0.24876938839084828, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011734168678044509, "epoch": 1190, "n_parameters": 631477312} {"train_lr": 0.000471102664967933, "train_min_lr": 0.000471102664967933, "train_loss": 0.24873525184543374, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011664506853916325, "epoch": 1191, "n_parameters": 631477312} {"train_lr": 0.00046897106472507544, "train_min_lr": 0.00046897106472507544, "train_loss": 0.2486818689488782, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011550449055786699, "epoch": 1192, "n_parameters": 631477312} {"train_lr": 0.00046684350909770566, "train_min_lr": 0.00046684350909770566, "train_loss": 0.24874296080834496, "train_loss_scale": 170561.64102564103, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1193, "n_parameters": 631477312} {"train_lr": 0.00046472000639171394, "train_min_lr": 0.00046472000639171394, "train_loss": 0.2487528545047658, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011961683451842803, "epoch": 1194, "n_parameters": 631477312} {"train_lr": 0.00046260056489717095, "train_min_lr": 0.00046260056489717095, "train_loss": 0.24867809197316185, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011735508558698572, "epoch": 1195, "n_parameters": 631477312} {"train_lr": 0.0004604851928882911, "train_min_lr": 0.0004604851928882911, "train_loss": 0.24862771177401718, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011752535380088748, "epoch": 1196, "n_parameters": 631477312} {"train_lr": 0.0004583738986234033, "train_min_lr": 0.0004583738986234033, "train_loss": 0.24857783596365687, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011794447934684845, "epoch": 1197, "n_parameters": 631477312} {"train_lr": 0.0004562666903449135, "train_min_lr": 0.0004562666903449135, "train_loss": 0.24858071794136402, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011794199644683454, "epoch": 1198, "n_parameters": 631477312} {"train_lr": 0.0004541635762792799, "train_min_lr": 0.0004541635762792799, "train_loss": 0.24857171166998646, "train_loss_scale": 165940.5128205128, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011819718644405022, "epoch": 1199, "n_parameters": 631477312} {"train_lr": 0.00045206456463697597, "train_min_lr": 0.00045206456463697597, "train_loss": 0.24852898473648402, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01186298488233334, "epoch": 1200, "n_parameters": 631477312} {"train_lr": 0.000449969663612458, "train_min_lr": 0.000449969663612458, "train_loss": 0.24851546692064938, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011998713279190736, "epoch": 1201, "n_parameters": 631477312} {"train_lr": 0.00044787888138413627, "train_min_lr": 0.00044787888138413627, "train_loss": 0.24851429660338908, "train_loss_scale": 212992.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1202, "n_parameters": 631477312} {"train_lr": 0.00044579222611434153, "train_min_lr": 0.00044579222611434153, "train_loss": 0.24843236189693785, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011808817059947895, "epoch": 1203, "n_parameters": 631477312} {"train_lr": 0.0004437097059492909, "train_min_lr": 0.0004437097059492909, "train_loss": 0.24842380054701024, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011787009888137572, "epoch": 1204, "n_parameters": 631477312} {"train_lr": 0.00044163132901906124, "train_min_lr": 0.00044163132901906124, "train_loss": 0.2483955112954554, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011863820607033677, "epoch": 1205, "n_parameters": 631477312} {"train_lr": 0.00043955710343755196, "train_min_lr": 0.00043955710343755196, "train_loss": 0.24831632010710353, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011814028457499659, "epoch": 1206, "n_parameters": 631477312} {"train_lr": 0.0004374870373024571, "train_min_lr": 0.0004374870373024571, "train_loss": 0.24829768331554264, "train_loss_scale": 103345.23076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1207, "n_parameters": 631477312} {"train_lr": 0.0004354211386952321, "train_min_lr": 0.0004354211386952321, "train_loss": 0.24825372490494585, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01199029441564702, "epoch": 1208, "n_parameters": 631477312} {"train_lr": 0.00043335941568106186, "train_min_lr": 0.00043335941568106186, "train_loss": 0.24830710121955818, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012072294753474684, "epoch": 1209, "n_parameters": 631477312} {"train_lr": 0.0004313018763088307, "train_min_lr": 0.0004313018763088307, "train_loss": 0.24834431780991742, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012118901738013403, "epoch": 1210, "n_parameters": 631477312} {"train_lr": 0.0004292485286110903, "train_min_lr": 0.0004292485286110903, "train_loss": 0.2483825258259924, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01234800096314687, "epoch": 1211, "n_parameters": 631477312} {"train_lr": 0.0004271993806040275, "train_min_lr": 0.0004271993806040275, "train_loss": 0.24828227891777763, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012141575520404447, "epoch": 1212, "n_parameters": 631477312} {"train_lr": 0.00042515444028743435, "train_min_lr": 0.00042515444028743435, "train_loss": 0.248275685750951, "train_loss_scale": 66376.20512820513, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012261994069633193, "epoch": 1213, "n_parameters": 631477312} {"train_lr": 0.00042311371564467587, "train_min_lr": 0.00042311371564467587, "train_loss": 0.24832848338290858, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012587310098565351, "epoch": 1214, "n_parameters": 631477312} {"train_lr": 0.00042107721464265945, "train_min_lr": 0.00042107721464265945, "train_loss": 0.24824086845649454, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012238159489173155, "epoch": 1215, "n_parameters": 631477312} {"train_lr": 0.000419044945231803, "train_min_lr": 0.000419044945231803, "train_loss": 0.24819144030185178, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012298838163797673, "epoch": 1216, "n_parameters": 631477312} {"train_lr": 0.00041701691534600573, "train_min_lr": 0.00041701691534600573, "train_loss": 0.24820833436499995, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012377580326313201, "epoch": 1217, "n_parameters": 631477312} {"train_lr": 0.0004149931329026143, "train_min_lr": 0.0004149931329026143, "train_loss": 0.2481644817550356, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012437684861465525, "epoch": 1218, "n_parameters": 631477312} {"train_lr": 0.00041297360580239503, "train_min_lr": 0.00041297360580239503, "train_loss": 0.24812141419328654, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01238824982339373, "epoch": 1219, "n_parameters": 631477312} {"train_lr": 0.00041095834192950083, "train_min_lr": 0.00041095834192950083, "train_loss": 0.24814560449120995, "train_loss_scale": 180224.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1220, "n_parameters": 631477312} {"train_lr": 0.00040894734915144056, "train_min_lr": 0.00040894734915144056, "train_loss": 0.24817359611953202, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014757712034341425, "epoch": 1221, "n_parameters": 631477312} {"train_lr": 0.0004069406353190497, "train_min_lr": 0.0004069406353190497, "train_loss": 0.24806457013787273, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012543450130555684, "epoch": 1222, "n_parameters": 631477312} {"train_lr": 0.000404938208266459, "train_min_lr": 0.000404938208266459, "train_loss": 0.2481086679125348, "train_loss_scale": 111117.1282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1223, "n_parameters": 631477312} {"train_lr": 0.0004029400758110638, "train_min_lr": 0.0004029400758110638, "train_loss": 0.2481475849277698, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012739814426869346, "epoch": 1224, "n_parameters": 631477312} {"train_lr": 0.0004009462457534931, "train_min_lr": 0.0004009462457534931, "train_loss": 0.2480388885626617, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012437508414642742, "epoch": 1225, "n_parameters": 631477312} {"train_lr": 0.0003989567258775788, "train_min_lr": 0.0003989567258775788, "train_loss": 0.24810520718948773, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01283616449039143, "epoch": 1226, "n_parameters": 631477312} {"train_lr": 0.0003969715239503275, "train_min_lr": 0.0003969715239503275, "train_loss": 0.24804398944923797, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01262409381198291, "epoch": 1227, "n_parameters": 631477312} {"train_lr": 0.00039499064772188796, "train_min_lr": 0.00039499064772188796, "train_loss": 0.24802525610460016, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012972564287244892, "epoch": 1228, "n_parameters": 631477312} {"train_lr": 0.0003930141049255215, "train_min_lr": 0.0003930141049255215, "train_loss": 0.24796770112768102, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01252283043681811, "epoch": 1229, "n_parameters": 631477312} {"train_lr": 0.0003910419032775726, "train_min_lr": 0.0003910419032775726, "train_loss": 0.24793977889184576, "train_loss_scale": 124140.30769230769, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012675689052766522, "epoch": 1230, "n_parameters": 631477312} {"train_lr": 0.0003890740504774367, "train_min_lr": 0.0003890740504774367, "train_loss": 0.24790641732322863, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012591877558196966, "epoch": 1231, "n_parameters": 631477312} {"train_lr": 0.00038711055420753357, "train_min_lr": 0.00038711055420753357, "train_loss": 0.24788158716192135, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012499964130349839, "epoch": 1232, "n_parameters": 631477312} {"train_lr": 0.00038515142213327275, "train_min_lr": 0.00038515142213327275, "train_loss": 0.24792480466660494, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01303815825854261, "epoch": 1233, "n_parameters": 631477312} {"train_lr": 0.0003831966619030283, "train_min_lr": 0.0003831966619030283, "train_loss": 0.247955938657889, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013051032034966808, "epoch": 1234, "n_parameters": 631477312} {"train_lr": 0.0003812462811481052, "train_min_lr": 0.0003812462811481052, "train_loss": 0.24792787665799737, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01302269685607499, "epoch": 1235, "n_parameters": 631477312} {"train_lr": 0.00037930028748271266, "train_min_lr": 0.00037930028748271266, "train_loss": 0.24782160328080258, "train_loss_scale": 191566.76923076922, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1236, "n_parameters": 631477312} {"train_lr": 0.0003773586885039312, "train_min_lr": 0.0003773586885039312, "train_loss": 0.24775692196383786, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012571551317635637, "epoch": 1237, "n_parameters": 631477312} {"train_lr": 0.00037542149179168607, "train_min_lr": 0.00037542149179168607, "train_loss": 0.24773188848167849, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01261330773242009, "epoch": 1238, "n_parameters": 631477312} {"train_lr": 0.00037348870490871565, "train_min_lr": 0.00037348870490871565, "train_loss": 0.24770233006431505, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012784813059111819, "epoch": 1239, "n_parameters": 631477312} {"train_lr": 0.0003715603354005422, "train_min_lr": 0.0003715603354005422, "train_loss": 0.24770623712967604, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012750581447154475, "epoch": 1240, "n_parameters": 631477312} {"train_lr": 0.00036963639079544305, "train_min_lr": 0.00036963639079544305, "train_loss": 0.24764778412794933, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012712646359338975, "epoch": 1241, "n_parameters": 631477312} {"train_lr": 0.00036771687860442183, "train_min_lr": 0.00036771687860442183, "train_loss": 0.24762241411297461, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012738662891280957, "epoch": 1242, "n_parameters": 631477312} {"train_lr": 0.000365801806321176, "train_min_lr": 0.000365801806321176, "train_loss": 0.24758308225024778, "train_loss_scale": 211311.58974358975, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012744449228287125, "epoch": 1243, "n_parameters": 631477312} {"train_lr": 0.00036389118142207233, "train_min_lr": 0.00036389118142207233, "train_loss": 0.2475649222212199, "train_loss_scale": 251641.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1244, "n_parameters": 631477312} {"train_lr": 0.00036198501136611275, "train_min_lr": 0.00036198501136611275, "train_loss": 0.2476239369925446, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013082558292155273, "epoch": 1245, "n_parameters": 631477312} {"train_lr": 0.0003600833035949099, "train_min_lr": 0.0003600833035949099, "train_loss": 0.2475147448324909, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01290817829207159, "epoch": 1246, "n_parameters": 631477312} {"train_lr": 0.0003581860655326535, "train_min_lr": 0.0003581860655326535, "train_loss": 0.24753464942571157, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012944723691385334, "epoch": 1247, "n_parameters": 631477312} {"train_lr": 0.0003562933045860865, "train_min_lr": 0.0003562933045860865, "train_loss": 0.24743564176158264, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012643366308811192, "epoch": 1248, "n_parameters": 631477312} {"train_lr": 0.0003544050281444713, "train_min_lr": 0.0003544050281444713, "train_loss": 0.24739717004092363, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012736209826424526, "epoch": 1249, "n_parameters": 631477312} {"train_lr": 0.00035252124357956267, "train_min_lr": 0.00035252124357956267, "train_loss": 0.24738015717146203, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012784489170791438, "epoch": 1250, "n_parameters": 631477312} {"train_lr": 0.0003506419582455813, "train_min_lr": 0.0003506419582455813, "train_loss": 0.247355701145119, "train_loss_scale": 218873.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01267578410438429, "epoch": 1251, "n_parameters": 631477312} {"train_lr": 0.00034876717947918156, "train_min_lr": 0.00034876717947918156, "train_loss": 0.24728693186830825, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012783170173255106, "epoch": 1252, "n_parameters": 631477312} {"train_lr": 0.00034689691459942405, "train_min_lr": 0.00034689691459942405, "train_loss": 0.247316908571893, "train_loss_scale": 161739.4871794872, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1253, "n_parameters": 631477312} {"train_lr": 0.0003450311709077491, "train_min_lr": 0.0003450311709077491, "train_loss": 0.24731554101125744, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01313806391464403, "epoch": 1254, "n_parameters": 631477312} {"train_lr": 0.00034316995568794414, "train_min_lr": 0.00034316995568794414, "train_loss": 0.24729787189967167, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01327962016997238, "epoch": 1255, "n_parameters": 631477312} {"train_lr": 0.00034131327620612003, "train_min_lr": 0.00034131327620612003, "train_loss": 0.2472588861755167, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013220498078645995, "epoch": 1256, "n_parameters": 631477312} {"train_lr": 0.00033946113971067857, "train_min_lr": 0.00033946113971067857, "train_loss": 0.24727238831707302, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01318703740178488, "epoch": 1257, "n_parameters": 631477312} {"train_lr": 0.0003376135534322866, "train_min_lr": 0.0003376135534322866, "train_loss": 0.24723242782056332, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013045894161153298, "epoch": 1258, "n_parameters": 631477312} {"train_lr": 0.0003357705245838467, "train_min_lr": 0.0003357705245838467, "train_loss": 0.247166585481654, "train_loss_scale": 90742.15384615384, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1259, "n_parameters": 631477312} {"train_lr": 0.00033393206036047053, "train_min_lr": 0.00033393206036047053, "train_loss": 0.24715066796992546, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013066560546389949, "epoch": 1260, "n_parameters": 631477312} {"train_lr": 0.0003320981679394479, "train_min_lr": 0.0003320981679394479, "train_loss": 0.2470829725587884, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012911410993323304, "epoch": 1261, "n_parameters": 631477312} {"train_lr": 0.0003302688544802215, "train_min_lr": 0.0003302688544802215, "train_loss": 0.24708037399865973, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013014882271631788, "epoch": 1262, "n_parameters": 631477312} {"train_lr": 0.0003284441271243585, "train_min_lr": 0.0003284441271243585, "train_loss": 0.24703658356044728, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013307682202699093, "epoch": 1263, "n_parameters": 631477312} {"train_lr": 0.0003266239929955209, "train_min_lr": 0.0003266239929955209, "train_loss": 0.24705995190351343, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013339018204416603, "epoch": 1264, "n_parameters": 631477312} {"train_lr": 0.00032480845919943997, "train_min_lr": 0.00032480845919943997, "train_loss": 0.24706982042138967, "train_loss_scale": 78979.28205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013428419950800255, "epoch": 1265, "n_parameters": 631477312} {"train_lr": 0.0003229975328238862, "train_min_lr": 0.0003229975328238862, "train_loss": 0.24701669602953374, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013421995248884344, "epoch": 1266, "n_parameters": 631477312} {"train_lr": 0.00032119122093864414, "train_min_lr": 0.00032119122093864414, "train_loss": 0.24697295118135232, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013270090471427793, "epoch": 1267, "n_parameters": 631477312} {"train_lr": 0.00031938953059548313, "train_min_lr": 0.00031938953059548313, "train_loss": 0.24697955042267075, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013378019184948733, "epoch": 1268, "n_parameters": 631477312} {"train_lr": 0.0003175924688281298, "train_min_lr": 0.0003175924688281298, "train_loss": 0.24696947880184802, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013708459780527612, "epoch": 1269, "n_parameters": 631477312} {"train_lr": 0.00031580004265224147, "train_min_lr": 0.00031580004265224147, "train_loss": 0.24697440668033102, "train_loss_scale": 116158.35897435897, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1270, "n_parameters": 631477312} {"train_lr": 0.0003140122590653787, "train_min_lr": 0.0003140122590653787, "train_loss": 0.24695643543815002, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013662346889479803, "epoch": 1271, "n_parameters": 631477312} {"train_lr": 0.0003122291250469768, "train_min_lr": 0.0003122291250469768, "train_loss": 0.24698823720861512, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014115584742588302, "epoch": 1272, "n_parameters": 631477312} {"train_lr": 0.00031045064755831896, "train_min_lr": 0.00031045064755831896, "train_loss": 0.24693886443483046, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013806978930146076, "epoch": 1273, "n_parameters": 631477312} {"train_lr": 0.0003086768335425105, "train_min_lr": 0.0003086768335425105, "train_loss": 0.2469074781267689, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013748041658590619, "epoch": 1274, "n_parameters": 631477312} {"train_lr": 0.00030690768992445067, "train_min_lr": 0.00030690768992445067, "train_loss": 0.2468744523715801, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013599873654759275, "epoch": 1275, "n_parameters": 631477312} {"train_lr": 0.00030514322361080464, "train_min_lr": 0.00030514322361080464, "train_loss": 0.24683862601896414, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013774043752047688, "epoch": 1276, "n_parameters": 631477312} {"train_lr": 0.0003033834414899792, "train_min_lr": 0.0003033834414899792, "train_loss": 0.246747371298858, "train_loss_scale": 119099.07692307692, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01329838082445069, "epoch": 1277, "n_parameters": 631477312} {"train_lr": 0.0003016283504320918, "train_min_lr": 0.0003016283504320918, "train_loss": 0.24675788276064664, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013412257614664925, "epoch": 1278, "n_parameters": 631477312} {"train_lr": 0.00029987795728894943, "train_min_lr": 0.00029987795728894943, "train_loss": 0.24674881601001686, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013611698645549133, "epoch": 1279, "n_parameters": 631477312} {"train_lr": 0.0002981322688940158, "train_min_lr": 0.0002981322688940158, "train_loss": 0.24667195157291225, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013567422334558498, "epoch": 1280, "n_parameters": 631477312} {"train_lr": 0.0002963912920623888, "train_min_lr": 0.0002963912920623888, "train_loss": 0.24666028131002513, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013662643539599882, "epoch": 1281, "n_parameters": 631477312} {"train_lr": 0.0002946550335907722, "train_min_lr": 0.0002946550335907722, "train_loss": 0.24662201656088328, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013873134187470453, "epoch": 1282, "n_parameters": 631477312} {"train_lr": 0.0002929235002574496, "train_min_lr": 0.0002929235002574496, "train_loss": 0.24660901612160394, "train_loss_scale": 178963.6923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1283, "n_parameters": 631477312} {"train_lr": 0.0002911966988222576, "train_min_lr": 0.0002911966988222576, "train_loss": 0.24659812225339314, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013759527164391981, "epoch": 1284, "n_parameters": 631477312} {"train_lr": 0.00028947463602656026, "train_min_lr": 0.00028947463602656026, "train_loss": 0.2465688842617405, "train_loss_scale": 81920.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1285, "n_parameters": 631477312} {"train_lr": 0.0002877573185932216, "train_min_lr": 0.0002877573185932216, "train_loss": 0.2465283868726916, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014039069679804528, "epoch": 1286, "n_parameters": 631477312} {"train_lr": 0.0002860447532265804, "train_min_lr": 0.0002860447532265804, "train_loss": 0.24656583742202762, "train_loss_scale": 43060.51282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1287, "n_parameters": 631477312} {"train_lr": 0.00028433694661242245, "train_min_lr": 0.00028433694661242245, "train_loss": 0.24653037852094245, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014239348387584472, "epoch": 1288, "n_parameters": 631477312} {"train_lr": 0.0002826339054179573, "train_min_lr": 0.0002826339054179573, "train_loss": 0.24653022141697314, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013993097657266144, "epoch": 1289, "n_parameters": 631477312} {"train_lr": 0.00028093563629178934, "train_min_lr": 0.00028093563629178934, "train_loss": 0.24644810744883636, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013734355542020729, "epoch": 1290, "n_parameters": 631477312} {"train_lr": 0.00027924214586389314, "train_min_lr": 0.00027924214586389314, "train_loss": 0.24641145098333558, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01370591761615987, "epoch": 1291, "n_parameters": 631477312} {"train_lr": 0.00027755344074558737, "train_min_lr": 0.00027755344074558737, "train_loss": 0.24636759146307716, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013727718641838202, "epoch": 1292, "n_parameters": 631477312} {"train_lr": 0.00027586952752950975, "train_min_lr": 0.00027586952752950975, "train_loss": 0.24629969750710118, "train_loss_scale": 41800.205128205125, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013832515872752246, "epoch": 1293, "n_parameters": 631477312} {"train_lr": 0.0002741904127895894, "train_min_lr": 0.0002741904127895894, "train_loss": 0.24631840323145765, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013841765052161347, "epoch": 1294, "n_parameters": 631477312} {"train_lr": 0.00027251610308102385, "train_min_lr": 0.00027251610308102385, "train_loss": 0.24623273389760214, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013752018364194112, "epoch": 1295, "n_parameters": 631477312} {"train_lr": 0.00027084660494025017, "train_min_lr": 0.00027084660494025017, "train_loss": 0.2462297521579342, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013893299637577282, "epoch": 1296, "n_parameters": 631477312} {"train_lr": 0.00026918192488492327, "train_min_lr": 0.00026918192488492327, "train_loss": 0.24617874896070227, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013852537740977148, "epoch": 1297, "n_parameters": 631477312} {"train_lr": 0.0002675220694138866, "train_min_lr": 0.0002675220694138866, "train_loss": 0.24615587124553245, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01395730778742104, "epoch": 1298, "n_parameters": 631477312} {"train_lr": 0.0002658670450071499, "train_min_lr": 0.0002658670450071499, "train_loss": 0.24615683742404842, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013938257240881331, "epoch": 1299, "n_parameters": 631477312} {"train_lr": 0.00026421685812586204, "train_min_lr": 0.00026421685812586204, "train_loss": 0.24613228650130808, "train_loss_scale": 122249.84615384616, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01397918408903747, "epoch": 1300, "n_parameters": 631477312} {"train_lr": 0.00026257151521228675, "train_min_lr": 0.00026257151521228675, "train_loss": 0.24608538578598735, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014066937657144781, "epoch": 1301, "n_parameters": 631477312} {"train_lr": 0.0002609310226897767, "train_min_lr": 0.0002609310226897767, "train_loss": 0.24609500708249518, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014157730724829704, "epoch": 1302, "n_parameters": 631477312} {"train_lr": 0.0002592953869627493, "train_min_lr": 0.0002592953869627493, "train_loss": 0.2460786622757904, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014110053570057528, "epoch": 1303, "n_parameters": 631477312} {"train_lr": 0.0002576646144166603, "train_min_lr": 0.0002576646144166603, "train_loss": 0.2460673205817166, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014243069955577644, "epoch": 1304, "n_parameters": 631477312} {"train_lr": 0.0002560387114179813, "train_min_lr": 0.0002560387114179813, "train_loss": 0.24602024422851032, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01425627428584565, "epoch": 1305, "n_parameters": 631477312} {"train_lr": 0.0002544176843141719, "train_min_lr": 0.0002544176843141719, "train_loss": 0.2460029226136752, "train_loss_scale": 190726.5641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014334717995296113, "epoch": 1306, "n_parameters": 631477312} {"train_lr": 0.0002528015394336573, "train_min_lr": 0.0002528015394336573, "train_loss": 0.2460126167652794, "train_loss_scale": 153757.53846153847, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1307, "n_parameters": 631477312} {"train_lr": 0.00025119028308580186, "train_min_lr": 0.00025119028308580186, "train_loss": 0.2460146083866652, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014669207250102399, "epoch": 1308, "n_parameters": 631477312} {"train_lr": 0.00024958392156088685, "train_min_lr": 0.00024958392156088685, "train_loss": 0.24602150621835905, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014816531502546217, "epoch": 1309, "n_parameters": 631477312} {"train_lr": 0.0002479824611300827, "train_min_lr": 0.0002479824611300827, "train_loss": 0.24598089759297764, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014787043182131572, "epoch": 1310, "n_parameters": 631477312} {"train_lr": 0.00024638590804542716, "train_min_lr": 0.00024638590804542716, "train_loss": 0.24588267805759245, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014466573594687268, "epoch": 1311, "n_parameters": 631477312} {"train_lr": 0.0002447942685397993, "train_min_lr": 0.0002447942685397993, "train_loss": 0.24588993699468958, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014713892489313506, "epoch": 1312, "n_parameters": 631477312} {"train_lr": 0.00024320754882689558, "train_min_lr": 0.00024320754882689558, "train_loss": 0.24582058485322753, "train_loss_scale": 185685.33333333334, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014289190900177719, "epoch": 1313, "n_parameters": 631477312} {"train_lr": 0.00024162575510120723, "train_min_lr": 0.00024162575510120723, "train_loss": 0.2457742557132569, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014418486231126083, "epoch": 1314, "n_parameters": 631477312} {"train_lr": 0.00024004889353799303, "train_min_lr": 0.00024004889353799303, "train_loss": 0.24576718142877022, "train_loss_scale": 258363.07692307694, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1315, "n_parameters": 631477312} {"train_lr": 0.00023847697029325722, "train_min_lr": 0.00023847697029325722, "train_loss": 0.2458088311384647, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01464996109298693, "epoch": 1316, "n_parameters": 631477312} {"train_lr": 0.00023690999150372558, "train_min_lr": 0.00023690999150372558, "train_loss": 0.24576165820531642, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014537609334533604, "epoch": 1317, "n_parameters": 631477312} {"train_lr": 0.00023534796328682044, "train_min_lr": 0.00023534796328682044, "train_loss": 0.24566732864014995, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014491454501134845, "epoch": 1318, "n_parameters": 631477312} {"train_lr": 0.0002337908917406379, "train_min_lr": 0.0002337908917406379, "train_loss": 0.245698524122604, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014820748671459464, "epoch": 1319, "n_parameters": 631477312} {"train_lr": 0.0002322387829439219, "train_min_lr": 0.0002322387829439219, "train_loss": 0.24570490531503963, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01524234318640083, "epoch": 1320, "n_parameters": 631477312} {"train_lr": 0.00023069164295604397, "train_min_lr": 0.00023069164295604397, "train_loss": 0.24562116155926234, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014680350062031394, "epoch": 1321, "n_parameters": 631477312} {"train_lr": 0.00022914947781697628, "train_min_lr": 0.00022914947781697628, "train_loss": 0.2456532890597979, "train_loss_scale": 139894.15384615384, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1322, "n_parameters": 631477312} {"train_lr": 0.0002276122935472699, "train_min_lr": 0.0002276122935472699, "train_loss": 0.24560776230198547, "train_loss_scale": 116578.46153846153, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1323, "n_parameters": 631477312} {"train_lr": 0.0002260800961480308, "train_min_lr": 0.0002260800961480308, "train_loss": 0.24553439691841888, "train_loss_scale": 49887.179487179485, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1324, "n_parameters": 631477312} {"train_lr": 0.00022455289160089586, "train_min_lr": 0.00022455289160089586, "train_loss": 0.24552671637875625, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014834952322790066, "epoch": 1325, "n_parameters": 631477312} {"train_lr": 0.0002230306858680111, "train_min_lr": 0.0002230306858680111, "train_loss": 0.24549025882226536, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015007679380524235, "epoch": 1326, "n_parameters": 631477312} {"train_lr": 0.0002215134848920061, "train_min_lr": 0.0002215134848920061, "train_loss": 0.24549043532580328, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01502053453348195, "epoch": 1327, "n_parameters": 631477312} {"train_lr": 0.00022000129459597318, "train_min_lr": 0.00022000129459597318, "train_loss": 0.24550028906788868, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015165497319032557, "epoch": 1328, "n_parameters": 631477312} {"train_lr": 0.00021849412088344262, "train_min_lr": 0.00021849412088344262, "train_loss": 0.24549506278410077, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015562484993073994, "epoch": 1329, "n_parameters": 631477312} {"train_lr": 0.00021699196963836007, "train_min_lr": 0.00021699196963836007, "train_loss": 0.24542872292192605, "train_loss_scale": 34973.53846153846, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01535931135265109, "epoch": 1330, "n_parameters": 631477312} {"train_lr": 0.0002154948467250644, "train_min_lr": 0.0002154948467250644, "train_loss": 0.24541275427402118, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015197534982759792, "epoch": 1331, "n_parameters": 631477312} {"train_lr": 0.00021400275798826295, "train_min_lr": 0.00021400275798826295, "train_loss": 0.2453559695934065, "train_loss_scale": 49467.07692307692, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1332, "n_parameters": 631477312} {"train_lr": 0.00021251570925301055, "train_min_lr": 0.00021251570925301055, "train_loss": 0.24539880546180007, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015620227082250401, "epoch": 1333, "n_parameters": 631477312} {"train_lr": 0.00021103370632468622, "train_min_lr": 0.00021103370632468622, "train_loss": 0.24532469122921333, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015409440112610659, "epoch": 1334, "n_parameters": 631477312} {"train_lr": 0.00020955675498896944, "train_min_lr": 0.00020955675498896944, "train_loss": 0.24530188019614285, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015230055241726147, "epoch": 1335, "n_parameters": 631477312} {"train_lr": 0.00020808486101181987, "train_min_lr": 0.00020808486101181987, "train_loss": 0.24525545239269447, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015096686849107917, "epoch": 1336, "n_parameters": 631477312} {"train_lr": 0.00020661803013945218, "train_min_lr": 0.00020661803013945218, "train_loss": 0.24519446675773138, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01490050217268081, "epoch": 1337, "n_parameters": 631477312} {"train_lr": 0.00020515626809831545, "train_min_lr": 0.00020515626809831545, "train_loss": 0.24516761736669698, "train_loss_scale": 35393.64102564102, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014975562130506987, "epoch": 1338, "n_parameters": 631477312} {"train_lr": 0.00020369958059507004, "train_min_lr": 0.00020369958059507004, "train_loss": 0.24512855374278167, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015186965838671686, "epoch": 1339, "n_parameters": 631477312} {"train_lr": 0.00020224797331656628, "train_min_lr": 0.00020224797331656628, "train_loss": 0.24510772262986463, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015401788724538608, "epoch": 1340, "n_parameters": 631477312} {"train_lr": 0.00020080145192982, "train_min_lr": 0.00020080145192982, "train_loss": 0.2451214130866365, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015686830995270074, "epoch": 1341, "n_parameters": 631477312} {"train_lr": 0.00019936002208199326, "train_min_lr": 0.00019936002208199326, "train_loss": 0.2450507812303467, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015666443296373844, "epoch": 1342, "n_parameters": 631477312} {"train_lr": 0.00019792368940037044, "train_min_lr": 0.00019792368940037044, "train_loss": 0.24507434385565993, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015828386856577337, "epoch": 1343, "n_parameters": 631477312} {"train_lr": 0.00019649245949233696, "train_min_lr": 0.00019649245949233696, "train_loss": 0.2450506463796139, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015458730350917157, "epoch": 1344, "n_parameters": 631477312} {"train_lr": 0.0001950663379453567, "train_min_lr": 0.0001950663379453567, "train_loss": 0.24501565898147723, "train_loss_scale": 109436.71794871795, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015490326194617992, "epoch": 1345, "n_parameters": 631477312} {"train_lr": 0.00019364533032695125, "train_min_lr": 0.00019364533032695125, "train_loss": 0.24496804397756186, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01556226957995349, "epoch": 1346, "n_parameters": 631477312} {"train_lr": 0.00019222944218467777, "train_min_lr": 0.00019222944218467777, "train_loss": 0.24491173940567443, "train_loss_scale": 75198.35897435897, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1347, "n_parameters": 631477312} {"train_lr": 0.00019081867904610655, "train_min_lr": 0.00019081867904610655, "train_loss": 0.2448919067481676, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015381137538963977, "epoch": 1348, "n_parameters": 631477312} {"train_lr": 0.0001894130464188002, "train_min_lr": 0.0001894130464188002, "train_loss": 0.2448805397740589, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015505720204554306, "epoch": 1349, "n_parameters": 631477312} {"train_lr": 0.00018801254979029276, "train_min_lr": 0.00018801254979029276, "train_loss": 0.24484963932683548, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015609364544686217, "epoch": 1350, "n_parameters": 631477312} {"train_lr": 0.00018661719462806676, "train_min_lr": 0.00018661719462806676, "train_loss": 0.2448075501493011, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015430012105916364, "epoch": 1351, "n_parameters": 631477312} {"train_lr": 0.000185226986379533, "train_min_lr": 0.000185226986379533, "train_loss": 0.24477640511945653, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015450712738152688, "epoch": 1352, "n_parameters": 631477312} {"train_lr": 0.00018384193047200835, "train_min_lr": 0.00018384193047200835, "train_loss": 0.24476401639791825, "train_loss_scale": 94523.07692307692, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015641089051197737, "epoch": 1353, "n_parameters": 631477312} {"train_lr": 0.00018246203231269632, "train_min_lr": 0.00018246203231269632, "train_loss": 0.24471932154101056, "train_loss_scale": 107756.30769230769, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1354, "n_parameters": 631477312} {"train_lr": 0.00018108729728866365, "train_min_lr": 0.00018108729728866365, "train_loss": 0.24470702986209056, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015608255181294411, "epoch": 1355, "n_parameters": 631477312} {"train_lr": 0.00017971773076682075, "train_min_lr": 0.00017971773076682075, "train_loss": 0.24465172725896805, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01568044877002159, "epoch": 1356, "n_parameters": 631477312} {"train_lr": 0.0001783533380939009, "train_min_lr": 0.0001783533380939009, "train_loss": 0.24464110267133668, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015690615982748568, "epoch": 1357, "n_parameters": 631477312} {"train_lr": 0.00017699412459643834, "train_min_lr": 0.00017699412459643834, "train_loss": 0.24458918598396942, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01584640889464376, "epoch": 1358, "n_parameters": 631477312} {"train_lr": 0.00017564009558074763, "train_min_lr": 0.00017564009558074763, "train_loss": 0.24464393788888955, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016400183076397158, "epoch": 1359, "n_parameters": 631477312} {"train_lr": 0.0001742912563329047, "train_min_lr": 0.0001742912563329047, "train_loss": 0.2445722030242905, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015919067347661044, "epoch": 1360, "n_parameters": 631477312} {"train_lr": 0.00017294761211872326, "train_min_lr": 0.00017294761211872326, "train_loss": 0.24453693956960565, "train_loss_scale": 127501.1282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015821443351272207, "epoch": 1361, "n_parameters": 631477312} {"train_lr": 0.00017160916818373685, "train_min_lr": 0.00017160916818373685, "train_loss": 0.2445232498143107, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01616800381527402, "epoch": 1362, "n_parameters": 631477312} {"train_lr": 0.00017027592975317707, "train_min_lr": 0.00017027592975317707, "train_loss": 0.24448307199427524, "train_loss_scale": 96833.64102564103, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1363, "n_parameters": 631477312} {"train_lr": 0.0001689479020319532, "train_min_lr": 0.0001689479020319532, "train_loss": 0.24445785461471248, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01622009140331871, "epoch": 1364, "n_parameters": 631477312} {"train_lr": 0.0001676250902046324, "train_min_lr": 0.0001676250902046324, "train_loss": 0.24443987323544347, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016529831283081036, "epoch": 1365, "n_parameters": 631477312} {"train_lr": 0.00016630749943541908, "train_min_lr": 0.00016630749943541908, "train_loss": 0.24442301333272973, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01641868889176597, "epoch": 1366, "n_parameters": 631477312} {"train_lr": 0.00016499513486813497, "train_min_lr": 0.00016499513486813497, "train_loss": 0.24440405031558698, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016653232297334723, "epoch": 1367, "n_parameters": 631477312} {"train_lr": 0.00016368800162619838, "train_min_lr": 0.00016368800162619838, "train_loss": 0.24438613867208075, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016705239108071115, "epoch": 1368, "n_parameters": 631477312} {"train_lr": 0.0001623861048126056, "train_min_lr": 0.0001623861048126056, "train_loss": 0.2443569137577493, "train_loss_scale": 72887.79487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016598826996647775, "epoch": 1369, "n_parameters": 631477312} {"train_lr": 0.0001610894495099096, "train_min_lr": 0.0001610894495099096, "train_loss": 0.24429612949442786, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01635569954911868, "epoch": 1370, "n_parameters": 631477312} {"train_lr": 0.00015979804078020056, "train_min_lr": 0.00015979804078020056, "train_loss": 0.2443311098867502, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01655484453285447, "epoch": 1371, "n_parameters": 631477312} {"train_lr": 0.00015851188366508654, "train_min_lr": 0.00015851188366508654, "train_loss": 0.24433670262507617, "train_loss_scale": 80029.53846153847, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1372, "n_parameters": 631477312} {"train_lr": 0.00015723098318567354, "train_min_lr": 0.00015723098318567354, "train_loss": 0.24425276208262986, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016370938541606452, "epoch": 1373, "n_parameters": 631477312} {"train_lr": 0.00015595534434254574, "train_min_lr": 0.00015595534434254574, "train_loss": 0.24420518732856578, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01655351320723406, "epoch": 1374, "n_parameters": 631477312} {"train_lr": 0.000154684972115746, "train_min_lr": 0.000154684972115746, "train_loss": 0.24418652170481017, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016278679563234057, "epoch": 1375, "n_parameters": 631477312} {"train_lr": 0.00015341987146475668, "train_min_lr": 0.00015341987146475668, "train_loss": 0.24416379448181638, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016566383353888225, "epoch": 1376, "n_parameters": 631477312} {"train_lr": 0.00015216004732847995, "train_min_lr": 0.00015216004732847995, "train_loss": 0.2441528341315018, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017068704799151957, "epoch": 1377, "n_parameters": 631477312} {"train_lr": 0.00015090550462521928, "train_min_lr": 0.00015090550462521928, "train_loss": 0.24414346471894532, "train_loss_scale": 49572.10256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1378, "n_parameters": 631477312} {"train_lr": 0.00014965624825265868, "train_min_lr": 0.00014965624825265868, "train_loss": 0.24410061599710622, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017138796959979795, "epoch": 1379, "n_parameters": 631477312} {"train_lr": 0.00014841228308784527, "train_min_lr": 0.00014841228308784527, "train_loss": 0.2440534547980254, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01706422374953922, "epoch": 1380, "n_parameters": 631477312} {"train_lr": 0.00014717361398716892, "train_min_lr": 0.00014717361398716892, "train_loss": 0.2440201218198173, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01675831627411147, "epoch": 1381, "n_parameters": 631477312} {"train_lr": 0.00014594024578634438, "train_min_lr": 0.00014594024578634438, "train_loss": 0.24400120780755502, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01684943576140377, "epoch": 1382, "n_parameters": 631477312} {"train_lr": 0.0001447121833003921, "train_min_lr": 0.0001447121833003921, "train_loss": 0.2439512531631268, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016950967166429527, "epoch": 1383, "n_parameters": 631477312} {"train_lr": 0.00014348943132361824, "train_min_lr": 0.00014348943132361824, "train_loss": 0.24392452839917192, "train_loss_scale": 35288.61538461538, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01681948149911104, "epoch": 1384, "n_parameters": 631477312} {"train_lr": 0.00014227199462959794, "train_min_lr": 0.00014227199462959794, "train_loss": 0.24394001406080162, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017189814607636668, "epoch": 1385, "n_parameters": 631477312} {"train_lr": 0.00014105987797115546, "train_min_lr": 0.00014105987797115546, "train_loss": 0.24392642685546514, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01722927520182939, "epoch": 1386, "n_parameters": 631477312} {"train_lr": 0.00013985308608034525, "train_min_lr": 0.00013985308608034525, "train_loss": 0.2439115031902941, "train_loss_scale": 62910.35897435898, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1387, "n_parameters": 631477312} {"train_lr": 0.00013865162366843504, "train_min_lr": 0.00013865162366843504, "train_loss": 0.2439077782134215, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017703867386071347, "epoch": 1388, "n_parameters": 631477312} {"train_lr": 0.0001374554954258855, "train_min_lr": 0.0001374554954258855, "train_loss": 0.24389815747212523, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01783566536883322, "epoch": 1389, "n_parameters": 631477312} {"train_lr": 0.00013626470602233357, "train_min_lr": 0.00013626470602233357, "train_loss": 0.24385475292383957, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017613066187223945, "epoch": 1390, "n_parameters": 631477312} {"train_lr": 0.00013507926010657354, "train_min_lr": 0.00013507926010657354, "train_loss": 0.24382591913513935, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017830039201399837, "epoch": 1391, "n_parameters": 631477312} {"train_lr": 0.00013389916230653877, "train_min_lr": 0.00013389916230653877, "train_loss": 0.24382216021872294, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01792314562958498, "epoch": 1392, "n_parameters": 631477312} {"train_lr": 0.00013272441722928392, "train_min_lr": 0.00013272441722928392, "train_loss": 0.2437470514082517, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018012146561000593, "epoch": 1393, "n_parameters": 631477312} {"train_lr": 0.00013155502946096624, "train_min_lr": 0.00013155502946096624, "train_loss": 0.24377899766207123, "train_loss_scale": 54718.35897435898, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017848484427071154, "epoch": 1394, "n_parameters": 631477312} {"train_lr": 0.0001303910035668295, "train_min_lr": 0.0001303910035668295, "train_loss": 0.24372914421539277, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017810867621730536, "epoch": 1395, "n_parameters": 631477312} {"train_lr": 0.00012923234409118378, "train_min_lr": 0.00012923234409118378, "train_loss": 0.24369076908064577, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017796533827025156, "epoch": 1396, "n_parameters": 631477312} {"train_lr": 0.00012807905555738917, "train_min_lr": 0.00012807905555738917, "train_loss": 0.2436551171928071, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017302684390392058, "epoch": 1397, "n_parameters": 631477312} {"train_lr": 0.0001269311424678383, "train_min_lr": 0.0001269311424678383, "train_loss": 0.24359877606352362, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017503370507023275, "epoch": 1398, "n_parameters": 631477312} {"train_lr": 0.00012578860930393768, "train_min_lr": 0.00012578860930393768, "train_loss": 0.24361081324064005, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017852102854074195, "epoch": 1399, "n_parameters": 631477312} {"train_lr": 0.00012465146052609096, "train_min_lr": 0.00012465146052609096, "train_loss": 0.2435568798190126, "train_loss_scale": 82550.15384615384, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017428136520231, "epoch": 1400, "n_parameters": 631477312} {"train_lr": 0.0001235197005736816, "train_min_lr": 0.0001235197005736816, "train_loss": 0.2434943163086875, "train_loss_scale": 75198.35897435897, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1401, "n_parameters": 631477312} {"train_lr": 0.00012239333386505511, "train_min_lr": 0.00012239333386505511, "train_loss": 0.24347840963552395, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01746706903959887, "epoch": 1402, "n_parameters": 631477312} {"train_lr": 0.00012127236479750209, "train_min_lr": 0.00012127236479750209, "train_loss": 0.24346184396507362, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017419330560817167, "epoch": 1403, "n_parameters": 631477312} {"train_lr": 0.00012015679774724091, "train_min_lr": 0.00012015679774724091, "train_loss": 0.24342751354934314, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017477498228590075, "epoch": 1404, "n_parameters": 631477312} {"train_lr": 0.00011904663706940038, "train_min_lr": 0.00011904663706940038, "train_loss": 0.243390184859984, "train_loss_scale": 54403.282051282054, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1405, "n_parameters": 631477312} {"train_lr": 0.00011794188709800375, "train_min_lr": 0.00011794188709800375, "train_loss": 0.24342472358153033, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0178456662210803, "epoch": 1406, "n_parameters": 631477312} {"train_lr": 0.0001168425521459504, "train_min_lr": 0.0001168425521459504, "train_loss": 0.2433591993400254, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01796663605655806, "epoch": 1407, "n_parameters": 631477312} {"train_lr": 0.000115748636505, "train_min_lr": 0.000115748636505, "train_loss": 0.24335200196872345, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01771483366162731, "epoch": 1408, "n_parameters": 631477312} {"train_lr": 0.00011466014444575561, "train_min_lr": 0.00011466014444575561, "train_loss": 0.24326546506311458, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017554113760781594, "epoch": 1409, "n_parameters": 631477312} {"train_lr": 0.00011357708021764657, "train_min_lr": 0.00011357708021764657, "train_loss": 0.24325606878548384, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017866508700908758, "epoch": 1410, "n_parameters": 631477312} {"train_lr": 0.00011249944804891208, "train_min_lr": 0.00011249944804891208, "train_loss": 0.24324193158939195, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01837738419476992, "epoch": 1411, "n_parameters": 631477312} {"train_lr": 0.00011142725214658523, "train_min_lr": 0.00011142725214658523, "train_loss": 0.24321328267120781, "train_loss_scale": 36758.97435897436, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1412, "n_parameters": 631477312} {"train_lr": 0.00011036049669647565, "train_min_lr": 0.00011036049669647565, "train_loss": 0.24317161643650764, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017852098119851105, "epoch": 1413, "n_parameters": 631477312} {"train_lr": 0.0001092991858631544, "train_min_lr": 0.0001092991858631544, "train_loss": 0.2431403441199412, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017625233439059976, "epoch": 1414, "n_parameters": 631477312} {"train_lr": 0.00010824332378993593, "train_min_lr": 0.00010824332378993593, "train_loss": 0.2431125362815622, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01780446142388078, "epoch": 1415, "n_parameters": 631477312} {"train_lr": 0.00010719291459886363, "train_min_lr": 0.00010719291459886363, "train_loss": 0.24305819865399733, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017612638304201074, "epoch": 1416, "n_parameters": 631477312} {"train_lr": 0.0001061479623906931, "train_min_lr": 0.0001061479623906931, "train_loss": 0.24307390202421886, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017909782114796914, "epoch": 1417, "n_parameters": 631477312} {"train_lr": 0.0001051084712448757, "train_min_lr": 0.0001051084712448757, "train_loss": 0.242998120530198, "train_loss_scale": 45791.179487179485, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01794160244604334, "epoch": 1418, "n_parameters": 631477312} {"train_lr": 0.00010407444521954368, "train_min_lr": 0.00010407444521954368, "train_loss": 0.24298080971213773, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0179657211037687, "epoch": 1419, "n_parameters": 631477312} {"train_lr": 0.00010304588835149287, "train_min_lr": 0.00010304588835149287, "train_loss": 0.2429565706451495, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017944264768933255, "epoch": 1420, "n_parameters": 631477312} {"train_lr": 0.00010202280465616816, "train_min_lr": 0.00010202280465616816, "train_loss": 0.24291573236517322, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01769981896265959, "epoch": 1421, "n_parameters": 631477312} {"train_lr": 0.00010100519812764733, "train_min_lr": 0.00010100519812764733, "train_loss": 0.2429137651462299, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018135211227509457, "epoch": 1422, "n_parameters": 631477312} {"train_lr": 9.999307273862563e-05, "train_min_lr": 9.999307273862563e-05, "train_loss": 0.2428346184673361, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017952312578041203, "epoch": 1423, "n_parameters": 631477312} {"train_lr": 9.898643244039997e-05, "train_min_lr": 9.898643244039997e-05, "train_loss": 0.24281152892105567, "train_loss_scale": 51882.666666666664, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1424, "n_parameters": 631477312} {"train_lr": 9.79852811628539e-05, "train_min_lr": 9.79852811628539e-05, "train_loss": 0.2428134265415466, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01801580669453893, "epoch": 1425, "n_parameters": 631477312} {"train_lr": 9.698962281444164e-05, "train_min_lr": 9.698962281444164e-05, "train_loss": 0.24278371607084784, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018131321606536705, "epoch": 1426, "n_parameters": 631477312} {"train_lr": 9.599946128217389e-05, "train_min_lr": 9.599946128217389e-05, "train_loss": 0.2427813041364201, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01838722128349428, "epoch": 1427, "n_parameters": 631477312} {"train_lr": 9.501480043160137e-05, "train_min_lr": 9.501480043160137e-05, "train_loss": 0.24271200699373507, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018131498229474977, "epoch": 1428, "n_parameters": 631477312} {"train_lr": 9.403564410680083e-05, "train_min_lr": 9.403564410680083e-05, "train_loss": 0.24270869209431112, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018327382494671605, "epoch": 1429, "n_parameters": 631477312} {"train_lr": 9.306199613035916e-05, "train_min_lr": 9.306199613035916e-05, "train_loss": 0.24267287594743836, "train_loss_scale": 32978.05128205128, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01831156691798988, "epoch": 1430, "n_parameters": 631477312} {"train_lr": 9.209386030335916e-05, "train_min_lr": 9.209386030335916e-05, "train_loss": 0.24264452344355866, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01832526246420084, "epoch": 1431, "n_parameters": 631477312} {"train_lr": 9.113124040536432e-05, "train_min_lr": 9.113124040536432e-05, "train_loss": 0.24261778316651592, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01857546807075731, "epoch": 1432, "n_parameters": 631477312} {"train_lr": 9.01741401944042e-05, "train_min_lr": 9.01741401944042e-05, "train_loss": 0.24257070742822132, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018417663854331925, "epoch": 1433, "n_parameters": 631477312} {"train_lr": 8.922256340695968e-05, "train_min_lr": 8.922256340695968e-05, "train_loss": 0.2425532129306633, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0184702933109246, "epoch": 1434, "n_parameters": 631477312} {"train_lr": 8.82765137579486e-05, "train_min_lr": 8.82765137579486e-05, "train_loss": 0.24257958976802632, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018642200688377786, "epoch": 1435, "n_parameters": 631477312} {"train_lr": 8.733599494071077e-05, "train_min_lr": 8.733599494071077e-05, "train_loss": 0.2425261736035538, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018860206610929124, "epoch": 1436, "n_parameters": 631477312} {"train_lr": 8.640101062699404e-05, "train_min_lr": 8.640101062699404e-05, "train_loss": 0.24248369242279574, "train_loss_scale": 94733.1282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1437, "n_parameters": 631477312} {"train_lr": 8.547156446693963e-05, "train_min_lr": 8.547156446693963e-05, "train_loss": 0.2425145473695384, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018777117419701356, "epoch": 1438, "n_parameters": 631477312} {"train_lr": 8.454766008906833e-05, "train_min_lr": 8.454766008906833e-05, "train_loss": 0.24242649438420835, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01870070282035531, "epoch": 1439, "n_parameters": 631477312} {"train_lr": 8.362930110026567e-05, "train_min_lr": 8.362930110026567e-05, "train_loss": 0.2423915265736958, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01852338985563853, "epoch": 1440, "n_parameters": 631477312} {"train_lr": 8.271649108576848e-05, "train_min_lr": 8.271649108576848e-05, "train_loss": 0.2424089760841945, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018474600647982113, "epoch": 1441, "n_parameters": 631477312} {"train_lr": 8.180923360915051e-05, "train_min_lr": 8.180923360915051e-05, "train_loss": 0.24236549008398867, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01856977130787877, "epoch": 1442, "n_parameters": 631477312} {"train_lr": 8.090753221230857e-05, "train_min_lr": 8.090753221230857e-05, "train_loss": 0.24235144197546804, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018695484112518337, "epoch": 1443, "n_parameters": 631477312} {"train_lr": 8.00113904154489e-05, "train_min_lr": 8.00113904154489e-05, "train_loss": 0.2422663983859074, "train_loss_scale": 114057.84615384616, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018392955597776633, "epoch": 1444, "n_parameters": 631477312} {"train_lr": 7.912081171707306e-05, "train_min_lr": 7.912081171707306e-05, "train_loss": 0.24226907725469807, "train_loss_scale": 122039.79487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1445, "n_parameters": 631477312} {"train_lr": 7.82357995939648e-05, "train_min_lr": 7.82357995939648e-05, "train_loss": 0.24225664792809254, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01898060159948774, "epoch": 1446, "n_parameters": 631477312} {"train_lr": 7.735635750117588e-05, "train_min_lr": 7.735635750117588e-05, "train_loss": 0.24225508073607507, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019648813135110032, "epoch": 1447, "n_parameters": 631477312} {"train_lr": 7.648248887201305e-05, "train_min_lr": 7.648248887201305e-05, "train_loss": 0.24221863683301192, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019245618231928885, "epoch": 1448, "n_parameters": 631477312} {"train_lr": 7.561419711802458e-05, "train_min_lr": 7.561419711802458e-05, "train_loss": 0.24217100105642414, "train_loss_scale": 55033.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1449, "n_parameters": 631477312} {"train_lr": 7.47514856289866e-05, "train_min_lr": 7.47514856289866e-05, "train_loss": 0.24217925285204098, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019422620976678073, "epoch": 1450, "n_parameters": 631477312} {"train_lr": 7.389435777289031e-05, "train_min_lr": 7.389435777289031e-05, "train_loss": 0.24213509193549937, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019514478331145186, "epoch": 1451, "n_parameters": 631477312} {"train_lr": 7.304281689592842e-05, "train_min_lr": 7.304281689592842e-05, "train_loss": 0.24212525799297369, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01934949044759075, "epoch": 1452, "n_parameters": 631477312} {"train_lr": 7.219686632248242e-05, "train_min_lr": 7.219686632248242e-05, "train_loss": 0.24211801403763297, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019742239218873855, "epoch": 1453, "n_parameters": 631477312} {"train_lr": 7.13565093551097e-05, "train_min_lr": 7.13565093551097e-05, "train_loss": 0.24208032358616877, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018910809348408993, "epoch": 1454, "n_parameters": 631477312} {"train_lr": 7.052174927452995e-05, "train_min_lr": 7.052174927452995e-05, "train_loss": 0.24204304633148682, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019361799487318747, "epoch": 1455, "n_parameters": 631477312} {"train_lr": 6.969258933961333e-05, "train_min_lr": 6.969258933961333e-05, "train_loss": 0.24200743972025335, "train_loss_scale": 62595.282051282054, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019427023875789765, "epoch": 1456, "n_parameters": 631477312} {"train_lr": 6.886903278736681e-05, "train_min_lr": 6.886903278736681e-05, "train_loss": 0.2419920703694702, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019745802357554052, "epoch": 1457, "n_parameters": 631477312} {"train_lr": 6.805108283292237e-05, "train_min_lr": 6.805108283292237e-05, "train_loss": 0.24194103924151605, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018964690120460894, "epoch": 1458, "n_parameters": 631477312} {"train_lr": 6.723874266952386e-05, "train_min_lr": 6.723874266952386e-05, "train_loss": 0.24191725882403076, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019396321286853306, "epoch": 1459, "n_parameters": 631477312} {"train_lr": 6.643201546851466e-05, "train_min_lr": 6.643201546851466e-05, "train_loss": 0.24188790907366917, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019555448076854914, "epoch": 1460, "n_parameters": 631477312} {"train_lr": 6.563090437932561e-05, "train_min_lr": 6.563090437932561e-05, "train_loss": 0.24189448231556573, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01950119014304036, "epoch": 1461, "n_parameters": 631477312} {"train_lr": 6.483541252946215e-05, "train_min_lr": 6.483541252946215e-05, "train_loss": 0.24185125768566743, "train_loss_scale": 98304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01999081366767104, "epoch": 1462, "n_parameters": 631477312} {"train_lr": 6.40455430244928e-05, "train_min_lr": 6.40455430244928e-05, "train_loss": 0.2418520285968836, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019741996937694076, "epoch": 1463, "n_parameters": 631477312} {"train_lr": 6.32612989480364e-05, "train_min_lr": 6.32612989480364e-05, "train_loss": 0.2417990539718467, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019755395803934872, "epoch": 1464, "n_parameters": 631477312} {"train_lr": 6.248268336175046e-05, "train_min_lr": 6.248268336175046e-05, "train_loss": 0.24179691486717322, "train_loss_scale": 109436.71794871795, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1465, "n_parameters": 631477312} {"train_lr": 6.170969930531892e-05, "train_min_lr": 6.170969930531892e-05, "train_loss": 0.24178272754085275, "train_loss_scale": 37809.230769230766, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1466, "n_parameters": 631477312} {"train_lr": 6.0942349796440837e-05, "train_min_lr": 6.0942349796440837e-05, "train_loss": 0.2417369968114564, "train_loss_scale": 26624.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1467, "n_parameters": 631477312} {"train_lr": 6.0180637830817734e-05, "train_min_lr": 6.0180637830817734e-05, "train_loss": 0.24171133145081022, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020400464898930527, "epoch": 1468, "n_parameters": 631477312} {"train_lr": 5.942456638214276e-05, "train_min_lr": 5.942456638214276e-05, "train_loss": 0.24171443263558337, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020216218888377532, "epoch": 1469, "n_parameters": 631477312} {"train_lr": 5.867413840208859e-05, "train_min_lr": 5.867413840208859e-05, "train_loss": 0.24162837439097273, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020265386260759372, "epoch": 1470, "n_parameters": 631477312} {"train_lr": 5.7929356820295953e-05, "train_min_lr": 5.7929356820295953e-05, "train_loss": 0.2416731690068562, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021567185278026722, "epoch": 1471, "n_parameters": 631477312} {"train_lr": 5.719022454436236e-05, "train_min_lr": 5.719022454436236e-05, "train_loss": 0.24168632892318642, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020794921668055348, "epoch": 1472, "n_parameters": 631477312} {"train_lr": 5.645674445983068e-05, "train_min_lr": 5.645674445983068e-05, "train_loss": 0.24160562607591066, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020465288472433504, "epoch": 1473, "n_parameters": 631477312} {"train_lr": 5.572891943017771e-05, "train_min_lr": 5.572891943017771e-05, "train_loss": 0.24161359277637437, "train_loss_scale": 32190.358974358973, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020883992272548568, "epoch": 1474, "n_parameters": 631477312} {"train_lr": 5.500675229680325e-05, "train_min_lr": 5.500675229680325e-05, "train_loss": 0.24156699888706684, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02061782238813929, "epoch": 1475, "n_parameters": 631477312} {"train_lr": 5.42902458790189e-05, "train_min_lr": 5.42902458790189e-05, "train_loss": 0.2415300020470451, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020531953897518225, "epoch": 1476, "n_parameters": 631477312} {"train_lr": 5.357940297403706e-05, "train_min_lr": 5.357940297403706e-05, "train_loss": 0.2414984914999551, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02074988831121188, "epoch": 1477, "n_parameters": 631477312} {"train_lr": 5.287422635695986e-05, "train_min_lr": 5.287422635695986e-05, "train_loss": 0.24151619292127016, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020507546147713676, "epoch": 1478, "n_parameters": 631477312} {"train_lr": 5.217471878076868e-05, "train_min_lr": 5.217471878076868e-05, "train_loss": 0.24147243481666708, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020357148429283347, "epoch": 1479, "n_parameters": 631477312} {"train_lr": 5.148088297631303e-05, "train_min_lr": 5.148088297631303e-05, "train_loss": 0.24143546246266803, "train_loss_scale": 50937.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020528729903535582, "epoch": 1480, "n_parameters": 631477312} {"train_lr": 5.079272165230025e-05, "train_min_lr": 5.079272165230025e-05, "train_loss": 0.24143835814156306, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02047812047963723, "epoch": 1481, "n_parameters": 631477312} {"train_lr": 5.0110237495284404e-05, "train_min_lr": 5.0110237495284404e-05, "train_loss": 0.24139257358351293, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020584954533917017, "epoch": 1482, "n_parameters": 631477312} {"train_lr": 4.943343316965651e-05, "train_min_lr": 4.943343316965651e-05, "train_loss": 0.2413748528730745, "train_loss_scale": 51567.58974358974, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1483, "n_parameters": 631477312} {"train_lr": 4.8762311317633326e-05, "train_min_lr": 4.8762311317633326e-05, "train_loss": 0.241306582149357, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02082551936380183, "epoch": 1484, "n_parameters": 631477312} {"train_lr": 4.809687455924794e-05, "train_min_lr": 4.809687455924794e-05, "train_loss": 0.2413202563438039, "train_loss_scale": 24208.410256410258, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1485, "n_parameters": 631477312} {"train_lr": 4.743712549233872e-05, "train_min_lr": 4.743712549233872e-05, "train_loss": 0.2413102724822238, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021585746799619533, "epoch": 1486, "n_parameters": 631477312} {"train_lr": 4.678306669253953e-05, "train_min_lr": 4.678306669253953e-05, "train_loss": 0.24130965299450624, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02125620745265713, "epoch": 1487, "n_parameters": 631477312} {"train_lr": 4.6134700713269854e-05, "train_min_lr": 4.6134700713269854e-05, "train_loss": 0.24126808128307742, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021905849048963342, "epoch": 1488, "n_parameters": 631477312} {"train_lr": 4.549203008572446e-05, "train_min_lr": 4.549203008572446e-05, "train_loss": 0.24125661459178305, "train_loss_scale": 12918.153846153846, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1489, "n_parameters": 631477312} {"train_lr": 4.485505731886384e-05, "train_min_lr": 4.485505731886384e-05, "train_loss": 0.2412089101689605, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021416394917580944, "epoch": 1490, "n_parameters": 631477312} {"train_lr": 4.4223784899403976e-05, "train_min_lr": 4.4223784899403976e-05, "train_loss": 0.24119962395753902, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02111248389029732, "epoch": 1491, "n_parameters": 631477312} {"train_lr": 4.3598215291807275e-05, "train_min_lr": 4.3598215291807275e-05, "train_loss": 0.2411493971102083, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021483743575234443, "epoch": 1492, "n_parameters": 631477312} {"train_lr": 4.2978350938272475e-05, "train_min_lr": 4.2978350938272475e-05, "train_loss": 0.24112790098222783, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021146242482922017, "epoch": 1493, "n_parameters": 631477312} {"train_lr": 4.23641942587251e-05, "train_min_lr": 4.23641942587251e-05, "train_loss": 0.24112215663533276, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021238064733692087, "epoch": 1494, "n_parameters": 631477312} {"train_lr": 4.175574765080827e-05, "train_min_lr": 4.175574765080827e-05, "train_loss": 0.24111097474666074, "train_loss_scale": 8297.02564102564, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02105184707742853, "epoch": 1495, "n_parameters": 631477312} {"train_lr": 4.1153013489873176e-05, "train_min_lr": 4.1153013489873176e-05, "train_loss": 0.24106610366083586, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02102648791594383, "epoch": 1496, "n_parameters": 631477312} {"train_lr": 4.055599412896989e-05, "train_min_lr": 4.055599412896989e-05, "train_loss": 0.2410308277586666, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020784885324060153, "epoch": 1497, "n_parameters": 631477312} {"train_lr": 3.9964691898838054e-05, "train_min_lr": 3.9964691898838054e-05, "train_loss": 0.2410359209498916, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020978278134209223, "epoch": 1498, "n_parameters": 631477312} {"train_lr": 3.937910910789793e-05, "train_min_lr": 3.937910910789793e-05, "train_loss": 0.24100691644259944, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021150173004477833, "epoch": 1499, "n_parameters": 631477312} {"train_lr": 3.8799248042241287e-05, "train_min_lr": 3.8799248042241287e-05, "train_loss": 0.2410071267035957, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021074204962366283, "epoch": 1500, "n_parameters": 631477312} {"train_lr": 3.822511096562247e-05, "train_min_lr": 3.822511096562247e-05, "train_loss": 0.24095228016794396, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021214547017828014, "epoch": 1501, "n_parameters": 631477312} {"train_lr": 3.7656700119449665e-05, "train_min_lr": 3.7656700119449665e-05, "train_loss": 0.24096593866721752, "train_loss_scale": 26256.410256410258, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02209171522050523, "epoch": 1502, "n_parameters": 631477312} {"train_lr": 3.709401772277616e-05, "train_min_lr": 3.709401772277616e-05, "train_loss": 0.2409157515473616, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021691193063862812, "epoch": 1503, "n_parameters": 631477312} {"train_lr": 3.6537065972291316e-05, "train_min_lr": 3.6537065972291316e-05, "train_loss": 0.24093505153313088, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022039610576123383, "epoch": 1504, "n_parameters": 631477312} {"train_lr": 3.598584704231254e-05, "train_min_lr": 3.598584704231254e-05, "train_loss": 0.24091944745622385, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021475173747883394, "epoch": 1505, "n_parameters": 631477312} {"train_lr": 3.544036308477659e-05, "train_min_lr": 3.544036308477659e-05, "train_loss": 0.24084886306455025, "train_loss_scale": 29984.82051282051, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1506, "n_parameters": 631477312} {"train_lr": 3.4900616229230846e-05, "train_min_lr": 3.4900616229230846e-05, "train_loss": 0.2409031685704413, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021825498053565238, "epoch": 1507, "n_parameters": 631477312} {"train_lr": 3.436660858282553e-05, "train_min_lr": 3.436660858282553e-05, "train_loss": 0.2408467801884772, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0219577826702824, "epoch": 1508, "n_parameters": 631477312} {"train_lr": 3.383834223030501e-05, "train_min_lr": 3.383834223030501e-05, "train_loss": 0.2408235436394954, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021535298894517697, "epoch": 1509, "n_parameters": 631477312} {"train_lr": 3.331581923400004e-05, "train_min_lr": 3.331581923400004e-05, "train_loss": 0.2408527091670877, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021861549568338655, "epoch": 1510, "n_parameters": 631477312} {"train_lr": 3.2799041633819495e-05, "train_min_lr": 3.2799041633819495e-05, "train_loss": 0.24076392648455042, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021846090729993124, "epoch": 1511, "n_parameters": 631477312} {"train_lr": 3.228801144724241e-05, "train_min_lr": 3.228801144724241e-05, "train_loss": 0.2407919394885166, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022387643840211708, "epoch": 1512, "n_parameters": 631477312} {"train_lr": 3.178273066931021e-05, "train_min_lr": 3.178273066931021e-05, "train_loss": 0.2407709645996921, "train_loss_scale": 28829.53846153846, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022267742667538233, "epoch": 1513, "n_parameters": 631477312} {"train_lr": 3.12832012726187e-05, "train_min_lr": 3.12832012726187e-05, "train_loss": 0.24071028409824252, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02163576195613505, "epoch": 1514, "n_parameters": 631477312} {"train_lr": 3.078942520731082e-05, "train_min_lr": 3.078942520731082e-05, "train_loss": 0.24069280602419987, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021620826109145314, "epoch": 1515, "n_parameters": 631477312} {"train_lr": 3.030140440106846e-05, "train_min_lr": 3.030140440106846e-05, "train_loss": 0.2406819090456105, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02224822014044875, "epoch": 1516, "n_parameters": 631477312} {"train_lr": 2.981914075910532e-05, "train_min_lr": 2.981914075910532e-05, "train_loss": 0.24069544088799888, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02236230309622792, "epoch": 1517, "n_parameters": 631477312} {"train_lr": 2.93426361641594e-05, "train_min_lr": 2.93426361641594e-05, "train_loss": 0.2406548812466029, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022037555147201206, "epoch": 1518, "n_parameters": 631477312} {"train_lr": 2.8871892476485508e-05, "train_min_lr": 2.8871892476485508e-05, "train_loss": 0.24062126585079405, "train_loss_scale": 44215.794871794875, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022334728079537552, "epoch": 1519, "n_parameters": 631477312} {"train_lr": 2.8406911533848164e-05, "train_min_lr": 2.8406911533848164e-05, "train_loss": 0.24058947635169786, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022537406390676133, "epoch": 1520, "n_parameters": 631477312} {"train_lr": 2.794769515151437e-05, "train_min_lr": 2.794769515151437e-05, "train_loss": 0.24063650981332055, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022957226052545965, "epoch": 1521, "n_parameters": 631477312} {"train_lr": 2.7494245122246477e-05, "train_min_lr": 2.7494245122246477e-05, "train_loss": 0.24059530039234325, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022969706354137413, "epoch": 1522, "n_parameters": 631477312} {"train_lr": 2.7046563216295282e-05, "train_min_lr": 2.7046563216295282e-05, "train_loss": 0.24057610358935422, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02243921432333688, "epoch": 1523, "n_parameters": 631477312} {"train_lr": 2.660465118139297e-05, "train_min_lr": 2.660465118139297e-05, "train_loss": 0.24055812903082904, "train_loss_scale": 41695.179487179485, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1524, "n_parameters": 631477312} {"train_lr": 2.6168510742746464e-05, "train_min_lr": 2.6168510742746464e-05, "train_loss": 0.24051981998714977, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023257542329912003, "epoch": 1525, "n_parameters": 631477312} {"train_lr": 2.573814360303059e-05, "train_min_lr": 2.573814360303059e-05, "train_loss": 0.2405406364854664, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02266831496037925, "epoch": 1526, "n_parameters": 631477312} {"train_lr": 2.5313551442381402e-05, "train_min_lr": 2.5313551442381402e-05, "train_loss": 0.2405278481519184, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02299289937274387, "epoch": 1527, "n_parameters": 631477312} {"train_lr": 2.489473591838974e-05, "train_min_lr": 2.489473591838974e-05, "train_loss": 0.24049442449811465, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02299800810093681, "epoch": 1528, "n_parameters": 631477312} {"train_lr": 2.4481698666094585e-05, "train_min_lr": 2.4481698666094585e-05, "train_loss": 0.24049454323278788, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02334285725075274, "epoch": 1529, "n_parameters": 631477312} {"train_lr": 2.4074441297976873e-05, "train_min_lr": 2.4074441297976873e-05, "train_loss": 0.24047217113324082, "train_loss_scale": 43165.53846153846, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023290148273540232, "epoch": 1530, "n_parameters": 631477312} {"train_lr": 2.3672965403953075e-05, "train_min_lr": 2.3672965403953075e-05, "train_loss": 0.2404032514179842, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022358944221662406, "epoch": 1531, "n_parameters": 631477312} {"train_lr": 2.327727255136899e-05, "train_min_lr": 2.327727255136899e-05, "train_loss": 0.24040645510197067, "train_loss_scale": 39174.5641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1532, "n_parameters": 631477312} {"train_lr": 2.2887364284993653e-05, "train_min_lr": 2.2887364284993653e-05, "train_loss": 0.24041876197159767, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022509067116352994, "epoch": 1533, "n_parameters": 631477312} {"train_lr": 2.2503242127013326e-05, "train_min_lr": 2.2503242127013326e-05, "train_loss": 0.24039293270116338, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022432140695552032, "epoch": 1534, "n_parameters": 631477312} {"train_lr": 2.2124907577025608e-05, "train_min_lr": 2.2124907577025608e-05, "train_loss": 0.24034100816023943, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023070791461624395, "epoch": 1535, "n_parameters": 631477312} {"train_lr": 2.175236211203337e-05, "train_min_lr": 2.175236211203337e-05, "train_loss": 0.24033301833193177, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02255523428082084, "epoch": 1536, "n_parameters": 631477312} {"train_lr": 2.13856071864392e-05, "train_min_lr": 2.13856071864392e-05, "train_loss": 0.24035506300592366, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022977611276870355, "epoch": 1537, "n_parameters": 631477312} {"train_lr": 2.1024644232039657e-05, "train_min_lr": 2.1024644232039657e-05, "train_loss": 0.24032698684813789, "train_loss_scale": 42325.333333333336, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1538, "n_parameters": 631477312} {"train_lr": 2.0669474658019664e-05, "train_min_lr": 2.0669474658019664e-05, "train_loss": 0.2403154901545256, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022674398156456076, "epoch": 1539, "n_parameters": 631477312} {"train_lr": 2.032009985094699e-05, "train_min_lr": 2.032009985094699e-05, "train_loss": 0.24027225222044551, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02384686555999976, "epoch": 1540, "n_parameters": 631477312} {"train_lr": 1.9976521174766968e-05, "train_min_lr": 1.9976521174766968e-05, "train_loss": 0.24028622672761765, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023339617955617797, "epoch": 1541, "n_parameters": 631477312} {"train_lr": 1.963873997079691e-05, "train_min_lr": 1.963873997079691e-05, "train_loss": 0.24024776067250433, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023257827487750314, "epoch": 1542, "n_parameters": 631477312} {"train_lr": 1.930675755772116e-05, "train_min_lr": 1.930675755772116e-05, "train_loss": 0.24023581738285243, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023528775731579233, "epoch": 1543, "n_parameters": 631477312} {"train_lr": 1.8980575231585747e-05, "train_min_lr": 1.8980575231585747e-05, "train_loss": 0.24026281814365527, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023821528111465085, "epoch": 1544, "n_parameters": 631477312} {"train_lr": 1.8660194265793465e-05, "train_min_lr": 1.8660194265793465e-05, "train_loss": 0.24024831439750508, "train_loss_scale": 55453.53846153846, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02285631330540547, "epoch": 1545, "n_parameters": 631477312} {"train_lr": 1.8345615911098684e-05, "train_min_lr": 1.8345615911098684e-05, "train_loss": 0.2402354769844514, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022903598647994492, "epoch": 1546, "n_parameters": 631477312} {"train_lr": 1.803684139560281e-05, "train_min_lr": 1.803684139560281e-05, "train_loss": 0.24018913698203576, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02339497818133961, "epoch": 1547, "n_parameters": 631477312} {"train_lr": 1.773387192474912e-05, "train_min_lr": 1.773387192474912e-05, "train_loss": 0.24017269626104584, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02337078897592922, "epoch": 1548, "n_parameters": 631477312} {"train_lr": 1.743670868131832e-05, "train_min_lr": 1.743670868131832e-05, "train_loss": 0.24014775934091842, "train_loss_scale": 47681.64102564102, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1549, "n_parameters": 631477312} {"train_lr": 1.7145352825423828e-05, "train_min_lr": 1.7145352825423828e-05, "train_loss": 0.24018237442793086, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022995078763088737, "epoch": 1550, "n_parameters": 631477312} {"train_lr": 1.68598054945072e-05, "train_min_lr": 1.68598054945072e-05, "train_loss": 0.2401306682284205, "train_loss_scale": 28619.48717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1551, "n_parameters": 631477312} {"train_lr": 1.6580067803333854e-05, "train_min_lr": 1.6580067803333854e-05, "train_loss": 0.24013621238872218, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023699210920872595, "epoch": 1552, "n_parameters": 631477312} {"train_lr": 1.6306140843988466e-05, "train_min_lr": 1.6306140843988466e-05, "train_loss": 0.24013255608196443, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023367154167200893, "epoch": 1553, "n_parameters": 631477312} {"train_lr": 1.6038025685870983e-05, "train_min_lr": 1.6038025685870983e-05, "train_loss": 0.24011598210722113, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02387243699736129, "epoch": 1554, "n_parameters": 631477312} {"train_lr": 1.5775723375692182e-05, "train_min_lr": 1.5775723375692182e-05, "train_loss": 0.24011006246273142, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02328169258693472, "epoch": 1555, "n_parameters": 631477312} {"train_lr": 1.5519234937469837e-05, "train_min_lr": 1.5519234937469837e-05, "train_loss": 0.24008366766457373, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023476738817034624, "epoch": 1556, "n_parameters": 631477312} {"train_lr": 1.5268561372524495e-05, "train_min_lr": 1.5268561372524495e-05, "train_loss": 0.24008175631901488, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024202283238992095, "epoch": 1557, "n_parameters": 631477312} {"train_lr": 1.5023703659475781e-05, "train_min_lr": 1.5023703659475781e-05, "train_loss": 0.24007433840336326, "train_loss_scale": 30194.871794871793, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023345689868363433, "epoch": 1558, "n_parameters": 631477312} {"train_lr": 1.478466275423835e-05, "train_min_lr": 1.478466275423835e-05, "train_loss": 0.24004465541265047, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024145755200431898, "epoch": 1559, "n_parameters": 631477312} {"train_lr": 1.455143959001833e-05, "train_min_lr": 1.455143959001833e-05, "train_loss": 0.24005011826132736, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02387196975401961, "epoch": 1560, "n_parameters": 631477312} {"train_lr": 1.432403507730965e-05, "train_min_lr": 1.432403507730965e-05, "train_loss": 0.2400532806895148, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02340775451813944, "epoch": 1561, "n_parameters": 631477312} {"train_lr": 1.41024501038904e-05, "train_min_lr": 1.41024501038904e-05, "train_loss": 0.24005330055474472, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023719056139294155, "epoch": 1562, "n_parameters": 631477312} {"train_lr": 1.388668553481944e-05, "train_min_lr": 1.388668553481944e-05, "train_loss": 0.24001527646293816, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02424181475996589, "epoch": 1563, "n_parameters": 631477312} {"train_lr": 1.3676742212433047e-05, "train_min_lr": 1.3676742212433047e-05, "train_loss": 0.24001829558983445, "train_loss_scale": 46946.46153846154, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02450146852658154, "epoch": 1564, "n_parameters": 631477312} {"train_lr": 1.3472620956341499e-05, "train_min_lr": 1.3472620956341499e-05, "train_loss": 0.2400382162632946, "train_loss_scale": 38334.35897435898, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1565, "n_parameters": 631477312} {"train_lr": 1.3274322563426021e-05, "train_min_lr": 1.3274322563426021e-05, "train_loss": 0.2399912046519323, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02398966180566603, "epoch": 1566, "n_parameters": 631477312} {"train_lr": 1.3081847807835623e-05, "train_min_lr": 1.3081847807835623e-05, "train_loss": 0.2399900472866228, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0241573963385935, "epoch": 1567, "n_parameters": 631477312} {"train_lr": 1.2895197440984016e-05, "train_min_lr": 1.2895197440984016e-05, "train_loss": 0.23997007879333046, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024712662439411268, "epoch": 1568, "n_parameters": 631477312} {"train_lr": 1.2714372191546779e-05, "train_min_lr": 1.2714372191546779e-05, "train_loss": 0.23994478524829715, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024512801432791047, "epoch": 1569, "n_parameters": 631477312} {"train_lr": 1.2539372765458446e-05, "train_min_lr": 1.2539372765458446e-05, "train_loss": 0.2400156905134328, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024127616093326837, "epoch": 1570, "n_parameters": 631477312} {"train_lr": 1.2370199845909771e-05, "train_min_lr": 1.2370199845909771e-05, "train_loss": 0.23995738612332693, "train_loss_scale": 39804.717948717946, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1571, "n_parameters": 631477312} {"train_lr": 1.2206854093345032e-05, "train_min_lr": 1.2206854093345032e-05, "train_loss": 0.23991876477017426, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02427295313622707, "epoch": 1572, "n_parameters": 631477312} {"train_lr": 1.2049336145459547e-05, "train_min_lr": 1.2049336145459547e-05, "train_loss": 0.23993458372887033, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024349766329695016, "epoch": 1573, "n_parameters": 631477312} {"train_lr": 1.1897646617197056e-05, "train_min_lr": 1.1897646617197056e-05, "train_loss": 0.23995681538377914, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02416210397719764, "epoch": 1574, "n_parameters": 631477312} {"train_lr": 1.1751786100747415e-05, "train_min_lr": 1.1751786100747415e-05, "train_loss": 0.23990944323416513, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024121917431983046, "epoch": 1575, "n_parameters": 631477312} {"train_lr": 1.1611755165544217e-05, "train_min_lr": 1.1611755165544217e-05, "train_loss": 0.23991744428013378, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024494273528361168, "epoch": 1576, "n_parameters": 631477312} {"train_lr": 1.147755435826266e-05, "train_min_lr": 1.147755435826266e-05, "train_loss": 0.2398924898917381, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024669101073717076, "epoch": 1577, "n_parameters": 631477312} {"train_lr": 1.1349184202817314e-05, "train_min_lr": 1.1349184202817314e-05, "train_loss": 0.23992675059558585, "train_loss_scale": 58814.35897435898, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024973171745212033, "epoch": 1578, "n_parameters": 631477312} {"train_lr": 1.1226645200360109e-05, "train_min_lr": 1.1226645200360109e-05, "train_loss": 0.23987673102125812, "train_loss_scale": 36758.97435897436, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1579, "n_parameters": 631477312} {"train_lr": 1.1109937829278423e-05, "train_min_lr": 1.1109937829278423e-05, "train_loss": 0.23986366828187153, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02407960853396127, "epoch": 1580, "n_parameters": 631477312} {"train_lr": 1.0999062545193157e-05, "train_min_lr": 1.0999062545193157e-05, "train_loss": 0.23988507319098482, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024138571246742055, "epoch": 1581, "n_parameters": 631477312} {"train_lr": 1.0894019780956976e-05, "train_min_lr": 1.0894019780956976e-05, "train_loss": 0.2398651703230989, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023990898554284986, "epoch": 1582, "n_parameters": 631477312} {"train_lr": 1.0794809946652626e-05, "train_min_lr": 1.0794809946652626e-05, "train_loss": 0.23986896706553987, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02402492801252848, "epoch": 1583, "n_parameters": 631477312} {"train_lr": 1.0701433429591352e-05, "train_min_lr": 1.0701433429591352e-05, "train_loss": 0.2398587398034019, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02383798385898654, "epoch": 1584, "n_parameters": 631477312} {"train_lr": 1.0613890594311302e-05, "train_min_lr": 1.0613890594311302e-05, "train_loss": 0.23986641267457834, "train_loss_scale": 34133.333333333336, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1585, "n_parameters": 631477312} {"train_lr": 1.05321817825762e-05, "train_min_lr": 1.05321817825762e-05, "train_loss": 0.23983695938025051, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024358323625981424, "epoch": 1586, "n_parameters": 631477312} {"train_lr": 1.0456307313374012e-05, "train_min_lr": 1.0456307313374012e-05, "train_loss": 0.23981253161298063, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02409187058965938, "epoch": 1587, "n_parameters": 631477312} {"train_lr": 1.0386267482915607e-05, "train_min_lr": 1.0386267482915607e-05, "train_loss": 0.23984499300758427, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024048328722039096, "epoch": 1588, "n_parameters": 631477312} {"train_lr": 1.032206256463369e-05, "train_min_lr": 1.032206256463369e-05, "train_loss": 0.2398228617802931, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024055798639519475, "epoch": 1589, "n_parameters": 631477312} {"train_lr": 1.0263692809181657e-05, "train_min_lr": 1.0263692809181657e-05, "train_loss": 0.23980856503252512, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02403906255793304, "epoch": 1590, "n_parameters": 631477312} {"train_lr": 1.0211158444432734e-05, "train_min_lr": 1.0211158444432734e-05, "train_loss": 0.23981452309705603, "train_loss_scale": 21687.79487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1591, "n_parameters": 631477312} {"train_lr": 1.0164459675478965e-05, "train_min_lr": 1.0164459675478965e-05, "train_loss": 0.23980695888614997, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024274518844695427, "epoch": 1592, "n_parameters": 631477312} {"train_lr": 1.0123596684630482e-05, "train_min_lr": 1.0123596684630482e-05, "train_loss": 0.2397757187867776, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024267403456645135, "epoch": 1593, "n_parameters": 631477312} {"train_lr": 1.008856963141474e-05, "train_min_lr": 1.008856963141474e-05, "train_loss": 0.2397646751278677, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024071091445736013, "epoch": 1594, "n_parameters": 631477312} {"train_lr": 1.0059378652576001e-05, "train_min_lr": 1.0059378652576001e-05, "train_loss": 0.23973416548687965, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024058164730190467, "epoch": 1595, "n_parameters": 631477312} {"train_lr": 1.0036023862074616e-05, "train_min_lr": 1.0036023862074616e-05, "train_loss": 0.23980102334350634, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024686067681520794, "epoch": 1596, "n_parameters": 631477312} {"train_lr": 1.0018505351086794e-05, "train_min_lr": 1.0018505351086794e-05, "train_loss": 0.23979092416508743, "train_loss_scale": 20742.5641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024084271158640966, "epoch": 1597, "n_parameters": 631477312} {"train_lr": 1.00068231880041e-05, "train_min_lr": 1.00068231880041e-05, "train_loss": 0.23974433441001636, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024339405700373344, "epoch": 1598, "n_parameters": 631477312} {"train_lr": 1.0000977418433219e-05, "train_min_lr": 1.0000977418433219e-05, "train_loss": 0.2397724284688966, "train_loss_scale": 28409.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1599, "n_parameters": 631477312}