{"train_lr": 0.00014956716896441166, "train_min_lr": 0.00014956716896441166, "train_loss": 0.31573240488218385, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.3474371629791, "epoch": 0, "n_parameters": 86059856} {"train_lr": 0.00044966335363898674, "train_min_lr": 0.00044966335363898674, "train_loss": 0.2935541342848387, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05501143070749748, "epoch": 1, "n_parameters": 86059856} {"train_lr": 0.000749759538313562, "train_min_lr": 0.000749759538313562, "train_loss": 0.28991386467495406, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.029223842173217773, "epoch": 2, "n_parameters": 86059856} {"train_lr": 0.0010498557229881365, "train_min_lr": 0.0010498557229881365, "train_loss": 0.28705685286639404, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020486167506673016, "epoch": 3, "n_parameters": 86059856} {"train_lr": 0.0013499519076627113, "train_min_lr": 0.0013499519076627113, "train_loss": 0.284713162551037, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0163278897286942, "epoch": 4, "n_parameters": 86059856} {"train_lr": 0.0016500480923372883, "train_min_lr": 0.0016500480923372883, "train_loss": 0.28070299360208595, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01496015458057324, "epoch": 5, "n_parameters": 86059856} {"train_lr": 0.0019501442770118633, "train_min_lr": 0.0019501442770118633, "train_loss": 0.27680526299473757, "train_loss_scale": 104395.48717948717, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014661775367597166, "epoch": 6, "n_parameters": 86059856} {"train_lr": 0.002250240461686437, "train_min_lr": 0.002250240461686437, "train_loss": 0.2716342757145564, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01601458446851048, "epoch": 7, "n_parameters": 86059856} {"train_lr": 0.002550336646361013, "train_min_lr": 0.002550336646361013, "train_loss": 0.2653637401616344, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017171296264188222, "epoch": 8, "n_parameters": 86059856} {"train_lr": 0.002850432831035588, "train_min_lr": 0.002850432831035588, "train_loss": 0.2599303722799493, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016726993469115443, "epoch": 9, "n_parameters": 86059856} {"train_lr": 0.0029999990319348056, "train_min_lr": 0.0029999990319348056, "train_loss": 0.2548549122726306, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01586304659035821, "epoch": 10, "n_parameters": 86059856} {"train_lr": 0.0029999932048716707, "train_min_lr": 0.0029999932048716707, "train_loss": 0.24984971343133694, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01430609054529132, "epoch": 11, "n_parameters": 86059856} {"train_lr": 0.002999981541414918, "train_min_lr": 0.002999981541414918, "train_loss": 0.24692193052182212, "train_loss_scale": 155017.84615384616, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014363429846409231, "epoch": 12, "n_parameters": 86059856} {"train_lr": 0.002999964041610077, "train_min_lr": 0.002999964041610077, "train_loss": 0.24266431785116976, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012374150668247005, "epoch": 13, "n_parameters": 86059856} {"train_lr": 0.0029999407055254644, "train_min_lr": 0.0029999407055254644, "train_loss": 0.2392409691790071, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011695089370895846, "epoch": 14, "n_parameters": 86059856} {"train_lr": 0.002999911533252189, "train_min_lr": 0.002999911533252189, "train_loss": 0.2366573536911836, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011513333376616431, "epoch": 15, "n_parameters": 86059856} {"train_lr": 0.0029998765249041335, "train_min_lr": 0.0029998765249041335, "train_loss": 0.23408901812031102, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010812793738948993, "epoch": 16, "n_parameters": 86059856} {"train_lr": 0.0029998356806179693, "train_min_lr": 0.0029998356806179693, "train_loss": 0.23180920822927967, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010604220749929739, "epoch": 17, "n_parameters": 86059856} {"train_lr": 0.002999789000553154, "train_min_lr": 0.002999789000553154, "train_loss": 0.23003239311779347, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010514047966959575, "epoch": 18, "n_parameters": 86059856} {"train_lr": 0.0029997364848919233, "train_min_lr": 0.0029997364848919233, "train_loss": 0.22820251455339483, "train_loss_scale": 464633.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010191592599790638, "epoch": 19, "n_parameters": 86059856} {"train_lr": 0.0029996781338392934, "train_min_lr": 0.0029996781338392934, "train_loss": 0.2265936968406328, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00988989056368621, "epoch": 20, "n_parameters": 86059856} {"train_lr": 0.002999613947623067, "train_min_lr": 0.002999613947623067, "train_loss": 0.22507181694993797, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009568790582796702, "epoch": 21, "n_parameters": 86059856} {"train_lr": 0.0029995439264938278, "train_min_lr": 0.0029995439264938278, "train_loss": 0.22374759414472067, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009570154159938773, "epoch": 22, "n_parameters": 86059856} {"train_lr": 0.002999468070724929, "train_min_lr": 0.002999468070724929, "train_loss": 0.2224636992188887, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009333757016485414, "epoch": 23, "n_parameters": 86059856} {"train_lr": 0.0029993863806125134, "train_min_lr": 0.0029993863806125134, "train_loss": 0.2215926418833148, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009563545996430688, "epoch": 24, "n_parameters": 86059856} {"train_lr": 0.0029992988564754917, "train_min_lr": 0.0029992988564754917, "train_loss": 0.22040328598963335, "train_loss_scale": 457071.58974358975, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 25, "n_parameters": 86059856} {"train_lr": 0.0029992054986555587, "train_min_lr": 0.0029992054986555587, "train_loss": 0.21952202446305025, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009186872485607194, "epoch": 26, "n_parameters": 86059856} {"train_lr": 0.002999106307517179, "train_min_lr": 0.002999106307517179, "train_loss": 0.2185941311554649, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009042250696951762, "epoch": 27, "n_parameters": 86059856} {"train_lr": 0.00299900128344759, "train_min_lr": 0.00299900128344759, "train_loss": 0.21777202931638712, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008880081596987274, "epoch": 28, "n_parameters": 86059856} {"train_lr": 0.0029988904268567944, "train_min_lr": 0.0029988904268567944, "train_loss": 0.21693920253967056, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008837273853341451, "epoch": 29, "n_parameters": 86059856} {"train_lr": 0.0029987737381775878, "train_min_lr": 0.0029987737381775878, "train_loss": 0.21617827016430405, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008750826386042321, "epoch": 30, "n_parameters": 86059856} {"train_lr": 0.0029986512178655045, "train_min_lr": 0.0029986512178655045, "train_loss": 0.21549493202175468, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008815150312446535, "epoch": 31, "n_parameters": 86059856} {"train_lr": 0.0029985228663988615, "train_min_lr": 0.0029985228663988615, "train_loss": 0.21471064385528174, "train_loss_scale": 485638.5641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00858157296748593, "epoch": 32, "n_parameters": 86059856} {"train_lr": 0.002998388684278744, "train_min_lr": 0.002998388684278744, "train_loss": 0.21411820867051107, "train_loss_scale": 364649.0256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 33, "n_parameters": 86059856} {"train_lr": 0.00299824867202899, "train_min_lr": 0.00299824867202899, "train_loss": 0.21349418685079002, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008545203238892823, "epoch": 34, "n_parameters": 86059856} {"train_lr": 0.0029981028301961947, "train_min_lr": 0.0029981028301961947, "train_loss": 0.21292088315702784, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008670000550456537, "epoch": 35, "n_parameters": 86059856} {"train_lr": 0.0029979511593497257, "train_min_lr": 0.0029979511593497257, "train_loss": 0.21233382489150152, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008525685398266293, "epoch": 36, "n_parameters": 86059856} {"train_lr": 0.002997793660081702, "train_min_lr": 0.002997793660081702, "train_loss": 0.2117847291370615, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008478295013236884, "epoch": 37, "n_parameters": 86059856} {"train_lr": 0.0029976303330069946, "train_min_lr": 0.0029976303330069946, "train_loss": 0.2113675460076103, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008530321966808958, "epoch": 38, "n_parameters": 86059856} {"train_lr": 0.002997461178763217, "train_min_lr": 0.002997461178763217, "train_loss": 0.21091477085764593, "train_loss_scale": 310875.89743589744, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 39, "n_parameters": 86059856} {"train_lr": 0.0029972861980107515, "train_min_lr": 0.0029972861980107515, "train_loss": 0.21048183749931362, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008541965127313653, "epoch": 40, "n_parameters": 86059856} {"train_lr": 0.0029971053914327112, "train_min_lr": 0.0029971053914327112, "train_loss": 0.2099627052577069, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008399996413992574, "epoch": 41, "n_parameters": 86059856} {"train_lr": 0.0029969187597349537, "train_min_lr": 0.0029969187597349537, "train_loss": 0.20959021909257922, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008421571780509578, "epoch": 42, "n_parameters": 86059856} {"train_lr": 0.0029967263036460904, "train_min_lr": 0.0029967263036460904, "train_loss": 0.20917038487580916, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0083197313405645, "epoch": 43, "n_parameters": 86059856} {"train_lr": 0.0029965280239174527, "train_min_lr": 0.0029965280239174527, "train_loss": 0.20878022453055167, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008396849243674809, "epoch": 44, "n_parameters": 86059856} {"train_lr": 0.0029963239213231223, "train_min_lr": 0.0029963239213231223, "train_loss": 0.20851442998705003, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008328220332399584, "epoch": 45, "n_parameters": 86059856} {"train_lr": 0.002996113996659908, "train_min_lr": 0.002996113996659908, "train_loss": 0.2079972828595111, "train_loss_scale": 333561.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 46, "n_parameters": 86059856} {"train_lr": 0.0029958982507473477, "train_min_lr": 0.0029958982507473477, "train_loss": 0.21530612029182988, "train_loss_scale": 109856.82051282052, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 47, "n_parameters": 86059856} {"train_lr": 0.0029956766844277003, "train_min_lr": 0.0029956766844277003, "train_loss": 0.20834716926448238, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008739362842439173, "epoch": 48, "n_parameters": 86059856} {"train_lr": 0.002995449298565954, "train_min_lr": 0.002995449298565954, "train_loss": 0.20759233981609726, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00815408362989099, "epoch": 49, "n_parameters": 86059856} {"train_lr": 0.0029952160940498185, "train_min_lr": 0.0029952160940498185, "train_loss": 0.2071502569059913, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008064348290925128, "epoch": 50, "n_parameters": 86059856} {"train_lr": 0.002994977071789708, "train_min_lr": 0.002994977071789708, "train_loss": 0.2067080588103869, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007935636196261605, "epoch": 51, "n_parameters": 86059856} {"train_lr": 0.002994732232718759, "train_min_lr": 0.002994732232718759, "train_loss": 0.20635936636095628, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007972663689333085, "epoch": 52, "n_parameters": 86059856} {"train_lr": 0.0029944815777928125, "train_min_lr": 0.0029944815777928125, "train_loss": 0.20607972462685445, "train_loss_scale": 89271.79487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007960914653570702, "epoch": 53, "n_parameters": 86059856} {"train_lr": 0.0029942251079904166, "train_min_lr": 0.0029942251079904166, "train_loss": 0.2061175380845387, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008498971604216749, "epoch": 54, "n_parameters": 86059856} {"train_lr": 0.002993962824312818, "train_min_lr": 0.002993962824312818, "train_loss": 0.20559412582467, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007895578373557864, "epoch": 55, "n_parameters": 86059856} {"train_lr": 0.002993694727783965, "train_min_lr": 0.002993694727783965, "train_loss": 0.20527691025143632, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007899807878017712, "epoch": 56, "n_parameters": 86059856} {"train_lr": 0.0029934208194504915, "train_min_lr": 0.0029934208194504915, "train_loss": 0.20508669341245714, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007952366112313496, "epoch": 57, "n_parameters": 86059856} {"train_lr": 0.0029931411003817263, "train_min_lr": 0.0029931411003817263, "train_loss": 0.20482559557048938, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007957366929473117, "epoch": 58, "n_parameters": 86059856} {"train_lr": 0.0029928555716696795, "train_min_lr": 0.0029928555716696795, "train_loss": 0.20461326607097036, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007980367350869644, "epoch": 59, "n_parameters": 86059856} {"train_lr": 0.002992564234429045, "train_min_lr": 0.002992564234429045, "train_loss": 0.20442438256353712, "train_loss_scale": 255842.46153846153, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007965219451878697, "epoch": 60, "n_parameters": 86059856} {"train_lr": 0.0029922670897972003, "train_min_lr": 0.0029922670897972003, "train_loss": 0.2041821708687796, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008001087840873366, "epoch": 61, "n_parameters": 86059856} {"train_lr": 0.002991964138934168, "train_min_lr": 0.002991964138934168, "train_loss": 0.20404146467216122, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008063921931748971, "epoch": 62, "n_parameters": 86059856} {"train_lr": 0.0029916553830226754, "train_min_lr": 0.0029916553830226754, "train_loss": 0.2037894204020118, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007954846661633406, "epoch": 63, "n_parameters": 86059856} {"train_lr": 0.0029913408232680786, "train_min_lr": 0.0029913408232680786, "train_loss": 0.2035744497910715, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007954553278604856, "epoch": 64, "n_parameters": 86059856} {"train_lr": 0.002991020460898411, "train_min_lr": 0.002991020460898411, "train_loss": 0.20338759232216921, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007880848274231913, "epoch": 65, "n_parameters": 86059856} {"train_lr": 0.002990694297164359, "train_min_lr": 0.002990694297164359, "train_loss": 0.20340880986231452, "train_loss_scale": 404138.6666666667, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008396166926011061, "epoch": 66, "n_parameters": 86059856} {"train_lr": 0.0029903623333392484, "train_min_lr": 0.0029903623333392484, "train_loss": 0.20304128447643074, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007849697413173718, "epoch": 67, "n_parameters": 86059856} {"train_lr": 0.002990024570719051, "train_min_lr": 0.002990024570719051, "train_loss": 0.2029143046730986, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008044099727741037, "epoch": 68, "n_parameters": 86059856} {"train_lr": 0.0029896810106223833, "train_min_lr": 0.0029896810106223833, "train_loss": 0.20267826742612016, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007872947181264559, "epoch": 69, "n_parameters": 86059856} {"train_lr": 0.002989331654390483, "train_min_lr": 0.002989331654390483, "train_loss": 0.20253367559650007, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007939966449227471, "epoch": 70, "n_parameters": 86059856} {"train_lr": 0.0029889765033872333, "train_min_lr": 0.0029889765033872333, "train_loss": 0.20232502510174152, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007888957989425996, "epoch": 71, "n_parameters": 86059856} {"train_lr": 0.00298861555899912, "train_min_lr": 0.00298861555899912, "train_loss": 0.20218967258309326, "train_loss_scale": 593184.8205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00804077583597973, "epoch": 72, "n_parameters": 86059856} {"train_lr": 0.0029882488226352618, "train_min_lr": 0.0029882488226352618, "train_loss": 0.20208985325044546, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00805168239793812, "epoch": 73, "n_parameters": 86059856} {"train_lr": 0.0029878762957273783, "train_min_lr": 0.0029878762957273783, "train_loss": 0.20192582675446877, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007983798668600427, "epoch": 74, "n_parameters": 86059856} {"train_lr": 0.0029874979797298046, "train_min_lr": 0.0029874979797298046, "train_loss": 0.20173080097167537, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007898389887160216, "epoch": 75, "n_parameters": 86059856} {"train_lr": 0.002987113876119467, "train_min_lr": 0.002987113876119467, "train_loss": 0.20155376915891582, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007936696278659675, "epoch": 76, "n_parameters": 86059856} {"train_lr": 0.002986723986395889, "train_min_lr": 0.002986723986395889, "train_loss": 0.20140663800665584, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007847062380721744, "epoch": 77, "n_parameters": 86059856} {"train_lr": 0.0029863283120811898, "train_min_lr": 0.0029863283120811898, "train_loss": 0.20124472591739434, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007883177441843331, "epoch": 78, "n_parameters": 86059856} {"train_lr": 0.002985926854720063, "train_min_lr": 0.002985926854720063, "train_loss": 0.2011579563602423, "train_loss_scale": 1115792.4102564103, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 79, "n_parameters": 86059856} {"train_lr": 0.0029855196158797863, "train_min_lr": 0.0029855196158797863, "train_loss": 0.20103420318367007, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007921836161329292, "epoch": 80, "n_parameters": 86059856} {"train_lr": 0.002985106597150196, "train_min_lr": 0.002985106597150196, "train_loss": 0.2008597368362527, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00783742062944489, "epoch": 81, "n_parameters": 86059856} {"train_lr": 0.0029846878001437093, "train_min_lr": 0.0029846878001437093, "train_loss": 0.2008361088183637, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007998285601691654, "epoch": 82, "n_parameters": 86059856} {"train_lr": 0.002984263226495282, "train_min_lr": 0.002984263226495282, "train_loss": 0.20059434808671284, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007897290145942511, "epoch": 83, "n_parameters": 86059856} {"train_lr": 0.002983832877862442, "train_min_lr": 0.002983832877862442, "train_loss": 0.20052861707070127, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007959383963726653, "epoch": 84, "n_parameters": 86059856} {"train_lr": 0.0029833967559252514, "train_min_lr": 0.0029833967559252514, "train_loss": 0.2004486926867125, "train_loss_scale": 1058658.4615384615, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 85, "n_parameters": 86059856} {"train_lr": 0.0029829548623863107, "train_min_lr": 0.0029829548623863107, "train_loss": 0.20028483201988423, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007839641159173483, "epoch": 86, "n_parameters": 86059856} {"train_lr": 0.0029825071989707597, "train_min_lr": 0.0029825071989707597, "train_loss": 0.2001323476732255, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007882031735976059, "epoch": 87, "n_parameters": 86059856} {"train_lr": 0.002982053767426249, "train_min_lr": 0.002982053767426249, "train_loss": 0.2000554662066488, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007906674677565789, "epoch": 88, "n_parameters": 86059856} {"train_lr": 0.0029815945695229615, "train_min_lr": 0.0029815945695229615, "train_loss": 0.19994973520246836, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007855244867432002, "epoch": 89, "n_parameters": 86059856} {"train_lr": 0.002981129607053593, "train_min_lr": 0.002981129607053593, "train_loss": 0.19983373425351694, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007896935152343642, "epoch": 90, "n_parameters": 86059856} {"train_lr": 0.002980658881833332, "train_min_lr": 0.002980658881833332, "train_loss": 0.19977861405230868, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008066122114168814, "epoch": 91, "n_parameters": 86059856} {"train_lr": 0.002980182395699876, "train_min_lr": 0.002980182395699876, "train_loss": 0.19961308244353113, "train_loss_scale": 1428348.717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 92, "n_parameters": 86059856} {"train_lr": 0.0029797001505133982, "train_min_lr": 0.0029797001505133982, "train_loss": 0.19956266026920041, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0078548172620149, "epoch": 93, "n_parameters": 86059856} {"train_lr": 0.002979212148156572, "train_min_lr": 0.002979212148156572, "train_loss": 0.1994195063896955, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007883284767815031, "epoch": 94, "n_parameters": 86059856} {"train_lr": 0.002978718390534544, "train_min_lr": 0.002978718390534544, "train_loss": 0.19934945012060687, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007917513887290485, "epoch": 95, "n_parameters": 86059856} {"train_lr": 0.0029782188795749115, "train_min_lr": 0.0029782188795749115, "train_loss": 0.1992001315423598, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007799205353829819, "epoch": 96, "n_parameters": 86059856} {"train_lr": 0.0029777136172277536, "train_min_lr": 0.0029777136172277536, "train_loss": 0.1991212516784286, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007855228397648973, "epoch": 97, "n_parameters": 86059856} {"train_lr": 0.002977202605465595, "train_min_lr": 0.002977202605465595, "train_loss": 0.19903438459508693, "train_loss_scale": 1105709.9487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007832033967133611, "epoch": 98, "n_parameters": 86059856} {"train_lr": 0.002976685846283399, "train_min_lr": 0.002976685846283399, "train_loss": 0.19897953752213374, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008022070893695435, "epoch": 99, "n_parameters": 86059856} {"train_lr": 0.002976163341698581, "train_min_lr": 0.002976163341698581, "train_loss": 0.19881459569725662, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007810219067136924, "epoch": 100, "n_parameters": 86059856} {"train_lr": 0.0029756350937509696, "train_min_lr": 0.0029756350937509696, "train_loss": 0.19874754428672484, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007842405166369505, "epoch": 101, "n_parameters": 86059856} {"train_lr": 0.0029751011045028282, "train_min_lr": 0.0029751011045028282, "train_loss": 0.19863684615717295, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007810652536602738, "epoch": 102, "n_parameters": 86059856} {"train_lr": 0.0029745613760388317, "train_min_lr": 0.0029745613760388317, "train_loss": 0.19865888312984353, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00794380196991066, "epoch": 103, "n_parameters": 86059856} {"train_lr": 0.0029740159104660532, "train_min_lr": 0.0029740159104660532, "train_loss": 0.19852872563000673, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007833578035784647, "epoch": 104, "n_parameters": 86059856} {"train_lr": 0.0029734647099139695, "train_min_lr": 0.0029734647099139695, "train_loss": 0.19841930892270726, "train_loss_scale": 2117316.923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 105, "n_parameters": 86059856} {"train_lr": 0.0029729077765344473, "train_min_lr": 0.0029729077765344473, "train_loss": 0.19837625062045378, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007863891996049251, "epoch": 106, "n_parameters": 86059856} {"train_lr": 0.0029723451125017353, "train_min_lr": 0.0029723451125017353, "train_loss": 0.19825894406471306, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00784870908738902, "epoch": 107, "n_parameters": 86059856} {"train_lr": 0.002971776720012444, "train_min_lr": 0.002971776720012444, "train_loss": 0.1982696535758292, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008015455009463506, "epoch": 108, "n_parameters": 86059856} {"train_lr": 0.0029712026012855583, "train_min_lr": 0.0029712026012855583, "train_loss": 0.19813771398427585, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007935059137451343, "epoch": 109, "n_parameters": 86059856} {"train_lr": 0.002970622758562414, "train_min_lr": 0.002970622758562414, "train_loss": 0.19803142893868378, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007806166879629764, "epoch": 110, "n_parameters": 86059856} {"train_lr": 0.0029700371941066978, "train_min_lr": 0.0029700371941066978, "train_loss": 0.21402605377639142, "train_loss_scale": 935988.5128205129, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 111, "n_parameters": 86059856} {"train_lr": 0.0029694459102044294, "train_min_lr": 0.0029694459102044294, "train_loss": 0.19943419802719012, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008291636129471067, "epoch": 112, "n_parameters": 86059856} {"train_lr": 0.002968848909163951, "train_min_lr": 0.002968848909163951, "train_loss": 0.19856517252703318, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007864895806564257, "epoch": 113, "n_parameters": 86059856} {"train_lr": 0.0029682461933159374, "train_min_lr": 0.0029682461933159374, "train_loss": 0.19816715175954577, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007707234752627137, "epoch": 114, "n_parameters": 86059856} {"train_lr": 0.0029676377650133612, "train_min_lr": 0.0029676377650133612, "train_loss": 0.19794038658698973, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007669978869111778, "epoch": 115, "n_parameters": 86059856} {"train_lr": 0.0029670236266315076, "train_min_lr": 0.0029670236266315076, "train_loss": 0.19779871969531554, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007721437472635164, "epoch": 116, "n_parameters": 86059856} {"train_lr": 0.002966403780567945, "train_min_lr": 0.002966403780567945, "train_loss": 0.19770477435742626, "train_loss_scale": 319277.9487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007842482827329196, "epoch": 117, "n_parameters": 86059856} {"train_lr": 0.002965778229242529, "train_min_lr": 0.002965778229242529, "train_loss": 0.19757212915171224, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007739021207014911, "epoch": 118, "n_parameters": 86059856} {"train_lr": 0.0029651469750973905, "train_min_lr": 0.0029651469750973905, "train_loss": 0.19747472412358874, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007723153809097429, "epoch": 119, "n_parameters": 86059856} {"train_lr": 0.0029645100205969127, "train_min_lr": 0.0029645100205969127, "train_loss": 0.19749176864607784, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007907322509429203, "epoch": 120, "n_parameters": 86059856} {"train_lr": 0.002963867368227746, "train_min_lr": 0.002963867368227746, "train_loss": 0.1973565316412789, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007709448013860636, "epoch": 121, "n_parameters": 86059856} {"train_lr": 0.002963219020498775, "train_min_lr": 0.002963219020498775, "train_loss": 0.19732610346415105, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007802817892904083, "epoch": 122, "n_parameters": 86059856} {"train_lr": 0.002962564979941127, "train_min_lr": 0.002962564979941127, "train_loss": 0.19716987499179175, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007686571501649152, "epoch": 123, "n_parameters": 86059856} {"train_lr": 0.0029619052491081507, "train_min_lr": 0.0029619052491081507, "train_loss": 0.19717893635448164, "train_loss_scale": 947751.3846153846, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007780255146849996, "epoch": 124, "n_parameters": 86059856} {"train_lr": 0.0029612398305754115, "train_min_lr": 0.0029612398305754115, "train_loss": 0.1971385793354458, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007845834388624495, "epoch": 125, "n_parameters": 86059856} {"train_lr": 0.0029605687269406663, "train_min_lr": 0.0029605687269406663, "train_loss": 0.1970699176323624, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007792751309282791, "epoch": 126, "n_parameters": 86059856} {"train_lr": 0.0029598919408238863, "train_min_lr": 0.0029598919408238863, "train_loss": 0.19698238875478125, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007749465016576533, "epoch": 127, "n_parameters": 86059856} {"train_lr": 0.0029592094748672145, "train_min_lr": 0.0029592094748672145, "train_loss": 0.19697373665463275, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007895252443491839, "epoch": 128, "n_parameters": 86059856} {"train_lr": 0.0029585213317349685, "train_min_lr": 0.0029585213317349685, "train_loss": 0.1969445614526287, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00786814228661406, "epoch": 129, "n_parameters": 86059856} {"train_lr": 0.002957827514113639, "train_min_lr": 0.002957827514113639, "train_loss": 0.1968766293452623, "train_loss_scale": 1465317.7435897435, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007823267594899218, "epoch": 130, "n_parameters": 86059856} {"train_lr": 0.002957128024711851, "train_min_lr": 0.002957128024711851, "train_loss": 0.19682448860019064, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007866117574919302, "epoch": 131, "n_parameters": 86059856} {"train_lr": 0.00295642286626039, "train_min_lr": 0.00295642286626039, "train_loss": 0.1967706942680077, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007817265989545446, "epoch": 132, "n_parameters": 86059856} {"train_lr": 0.0029557120415121658, "train_min_lr": 0.0029557120415121658, "train_loss": 0.1967034379306894, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007829511309794795, "epoch": 133, "n_parameters": 86059856} {"train_lr": 0.0029549955532422074, "train_min_lr": 0.0029549955532422074, "train_loss": 0.19667374844161364, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007849700114606187, "epoch": 134, "n_parameters": 86059856} {"train_lr": 0.0029542734042476613, "train_min_lr": 0.0029542734042476613, "train_loss": 0.1965532867799107, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007805205398024275, "epoch": 135, "n_parameters": 86059856} {"train_lr": 0.0029535455973477634, "train_min_lr": 0.0029535455973477634, "train_loss": 0.1965371611325118, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007861449274246413, "epoch": 136, "n_parameters": 86059856} {"train_lr": 0.0029528121353838465, "train_min_lr": 0.0029528121353838465, "train_loss": 0.19651240376660076, "train_loss_scale": 3582634.6666666665, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 137, "n_parameters": 86059856} {"train_lr": 0.002952073021219313, "train_min_lr": 0.002952073021219313, "train_loss": 0.19651788387758037, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007918345482231906, "epoch": 138, "n_parameters": 86059856} {"train_lr": 0.0029513282577396374, "train_min_lr": 0.0029513282577396374, "train_loss": 0.19646520044606847, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00787709367264492, "epoch": 139, "n_parameters": 86059856} {"train_lr": 0.002950577847852346, "train_min_lr": 0.002950577847852346, "train_loss": 0.19650939361860928, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008073207320800671, "epoch": 140, "n_parameters": 86059856} {"train_lr": 0.0029498217944870075, "train_min_lr": 0.0029498217944870075, "train_loss": 0.19635604561951298, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0077839004090772225, "epoch": 141, "n_parameters": 86059856} {"train_lr": 0.002949060100595227, "train_min_lr": 0.002949060100595227, "train_loss": 0.19634833023883402, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007945114361623732, "epoch": 142, "n_parameters": 86059856} {"train_lr": 0.0029482927691506233, "train_min_lr": 0.0029482927691506233, "train_loss": 0.19627154431043145, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00781391778936944, "epoch": 143, "n_parameters": 86059856} {"train_lr": 0.002947519803148831, "train_min_lr": 0.002947519803148831, "train_loss": 0.19615536835044622, "train_loss_scale": 3918716.717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007805299569064608, "epoch": 144, "n_parameters": 86059856} {"train_lr": 0.002946741205607474, "train_min_lr": 0.002946741205607474, "train_loss": 0.19618759648158, "train_loss_scale": 3118841.435897436, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 145, "n_parameters": 86059856} {"train_lr": 0.002945956979566174, "train_min_lr": 0.002945956979566174, "train_loss": 0.19611969874359858, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007793868830702148, "epoch": 146, "n_parameters": 86059856} {"train_lr": 0.0029451671280865083, "train_min_lr": 0.0029451671280865083, "train_loss": 0.19608818520553029, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007787728547835006, "epoch": 147, "n_parameters": 86059856} {"train_lr": 0.002944371654252032, "train_min_lr": 0.002944371654252032, "train_loss": 0.19693280714683425, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00909091472530212, "epoch": 148, "n_parameters": 86059856} {"train_lr": 0.002943570561168243, "train_min_lr": 0.002943570561168243, "train_loss": 0.19615533835708332, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007956589324375948, "epoch": 149, "n_parameters": 86059856} {"train_lr": 0.0029427638519625714, "train_min_lr": 0.0029427638519625714, "train_loss": 0.19599393705049387, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007782425356801981, "epoch": 150, "n_parameters": 86059856} {"train_lr": 0.002941951529784382, "train_min_lr": 0.002941951529784382, "train_loss": 0.19593221104393402, "train_loss_scale": 2312244.512820513, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007778805953701242, "epoch": 151, "n_parameters": 86059856} {"train_lr": 0.00294113359780495, "train_min_lr": 0.00294113359780495, "train_loss": 0.19584987263601178, "train_loss_scale": 3777562.2564102565, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 152, "n_parameters": 86059856} {"train_lr": 0.0029403100592174437, "train_min_lr": 0.0029403100592174437, "train_loss": 0.1958545989010674, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007805404860753184, "epoch": 153, "n_parameters": 86059856} {"train_lr": 0.0029394809172369253, "train_min_lr": 0.0029394809172369253, "train_loss": 0.1958026487248926, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007786717534495087, "epoch": 154, "n_parameters": 86059856} {"train_lr": 0.002938646175100337, "train_min_lr": 0.002938646175100337, "train_loss": 0.19579595975721112, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007859916388928795, "epoch": 155, "n_parameters": 86059856} {"train_lr": 0.0029378058360664777, "train_min_lr": 0.0029378058360664777, "train_loss": 0.19573493847527948, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007838867137578722, "epoch": 156, "n_parameters": 86059856} {"train_lr": 0.0029369599034159886, "train_min_lr": 0.0029369599034159886, "train_loss": 0.19570939489401495, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007804770453964384, "epoch": 157, "n_parameters": 86059856} {"train_lr": 0.0029361083804513677, "train_min_lr": 0.0029361083804513677, "train_loss": 0.20586685586171463, "train_loss_scale": 1683771.076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 158, "n_parameters": 86059856} {"train_lr": 0.0029352512704969233, "train_min_lr": 0.0029352512704969233, "train_loss": 0.19721043577346092, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008365785025466138, "epoch": 159, "n_parameters": 86059856} {"train_lr": 0.0029343885768987757, "train_min_lr": 0.0029343885768987757, "train_loss": 0.19632784394810024, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008034703163549496, "epoch": 160, "n_parameters": 86059856} {"train_lr": 0.0029335203030248476, "train_min_lr": 0.0029335203030248476, "train_loss": 0.19595701644459787, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007764223934664654, "epoch": 161, "n_parameters": 86059856} {"train_lr": 0.0029326464522648503, "train_min_lr": 0.0029326464522648503, "train_loss": 0.19572721931558007, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007655991835352511, "epoch": 162, "n_parameters": 86059856} {"train_lr": 0.0029317670280302522, "train_min_lr": 0.0029317670280302522, "train_loss": 0.19559837756559062, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00773795588592736, "epoch": 163, "n_parameters": 86059856} {"train_lr": 0.0029308820337542985, "train_min_lr": 0.0029308820337542985, "train_loss": 0.1955783596763817, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007754936253126615, "epoch": 164, "n_parameters": 86059856} {"train_lr": 0.0029299914728919654, "train_min_lr": 0.0029299914728919654, "train_loss": 0.1954811910728518, "train_loss_scale": 2080347.8974358975, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007737865281971888, "epoch": 165, "n_parameters": 86059856} {"train_lr": 0.0029290953489199754, "train_min_lr": 0.0029290953489199754, "train_loss": 0.19539032343966076, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007712600219887323, "epoch": 166, "n_parameters": 86059856} {"train_lr": 0.00292819366533675, "train_min_lr": 0.00292819366533675, "train_loss": 0.19619733977537507, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008989110572609859, "epoch": 167, "n_parameters": 86059856} {"train_lr": 0.0029272864256624344, "train_min_lr": 0.0029272864256624344, "train_loss": 0.19544175412290946, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007727048510554222, "epoch": 168, "n_parameters": 86059856} {"train_lr": 0.0029263736334388525, "train_min_lr": 0.0029263736334388525, "train_loss": 0.19536104161913195, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00784388345439369, "epoch": 169, "n_parameters": 86059856} {"train_lr": 0.002925455292229509, "train_min_lr": 0.002925455292229509, "train_loss": 0.19528836318148443, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007761825405908987, "epoch": 170, "n_parameters": 86059856} {"train_lr": 0.0029245314056195694, "train_min_lr": 0.0029245314056195694, "train_loss": 0.1952766972558143, "train_loss_scale": 3300325.7435897435, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007799819510322637, "epoch": 171, "n_parameters": 86059856} {"train_lr": 0.0029236019772158526, "train_min_lr": 0.0029236019772158526, "train_loss": 0.19520752799386779, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007728866355207104, "epoch": 172, "n_parameters": 86059856} {"train_lr": 0.0029226670106468075, "train_min_lr": 0.0029226670106468075, "train_loss": 0.19517162301314947, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0077493087275144765, "epoch": 173, "n_parameters": 86059856} {"train_lr": 0.0029217265095625097, "train_min_lr": 0.0029217265095625097, "train_loss": 0.1951736833136051, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007850218706059819, "epoch": 174, "n_parameters": 86059856} {"train_lr": 0.002920780477634638, "train_min_lr": 0.002920780477634638, "train_loss": 0.1951254515681798, "train_loss_scale": 3616242.871794872, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 175, "n_parameters": 86059856} {"train_lr": 0.002919828918556457, "train_min_lr": 0.002919828918556457, "train_loss": 0.19508225709581986, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007828659730223127, "epoch": 176, "n_parameters": 86059856} {"train_lr": 0.002918871836042819, "train_min_lr": 0.002918871836042819, "train_loss": 0.19506399205718666, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007850777569752282, "epoch": 177, "n_parameters": 86059856} {"train_lr": 0.0029179092338301395, "train_min_lr": 0.0029179092338301395, "train_loss": 0.19508389687428299, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007831260087326743, "epoch": 178, "n_parameters": 86059856} {"train_lr": 0.002916941115676371, "train_min_lr": 0.002916941115676371, "train_loss": 0.19499503754270384, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007820514850139331, "epoch": 179, "n_parameters": 86059856} {"train_lr": 0.0029159674853610168, "train_min_lr": 0.0029159674853610168, "train_loss": 0.1950158948580233, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007832522933872847, "epoch": 180, "n_parameters": 86059856} {"train_lr": 0.0029149883466850837, "train_min_lr": 0.0029149883466850837, "train_loss": 0.19499170373026758, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007870003624329677, "epoch": 181, "n_parameters": 86059856} {"train_lr": 0.0029140037034710922, "train_min_lr": 0.0029140037034710922, "train_loss": 0.19495263874817353, "train_loss_scale": 3911995.076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00789359897130336, "epoch": 182, "n_parameters": 86059856} {"train_lr": 0.0029130135595630516, "train_min_lr": 0.0029130135595630516, "train_loss": 0.19495521717442152, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007857036415654689, "epoch": 183, "n_parameters": 86059856} {"train_lr": 0.0029120179188264392, "train_min_lr": 0.0029120179188264392, "train_loss": 0.19487752221548596, "train_loss_scale": 2802924.3076923075, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 184, "n_parameters": 86059856} {"train_lr": 0.002911016785148203, "train_min_lr": 0.002911016785148203, "train_loss": 0.1948985963438948, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007900712650436431, "epoch": 185, "n_parameters": 86059856} {"train_lr": 0.0029100101624367283, "train_min_lr": 0.0029100101624367283, "train_loss": 0.19485442667530897, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007846398474588895, "epoch": 186, "n_parameters": 86059856} {"train_lr": 0.002908998054621824, "train_min_lr": 0.002908998054621824, "train_loss": 0.19503472183639994, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008272101039974352, "epoch": 187, "n_parameters": 86059856} {"train_lr": 0.0029079804656547237, "train_min_lr": 0.0029079804656547237, "train_loss": 0.1948320892137977, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007889239833905147, "epoch": 188, "n_parameters": 86059856} {"train_lr": 0.0029069573995080546, "train_min_lr": 0.0029069573995080546, "train_loss": 0.1947586104237976, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007839268782379057, "epoch": 189, "n_parameters": 86059856} {"train_lr": 0.002905928860175819, "train_min_lr": 0.002905928860175819, "train_loss": 0.19473169128506038, "train_loss_scale": 2628161.641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007833749461823549, "epoch": 190, "n_parameters": 86059856} {"train_lr": 0.0029048948516733992, "train_min_lr": 0.0029048948516733992, "train_loss": 0.19467661086207208, "train_loss_scale": 2184533.3333333335, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 191, "n_parameters": 86059856} {"train_lr": 0.002903855378037524, "train_min_lr": 0.002903855378037524, "train_loss": 0.19463496426574123, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007812464239601142, "epoch": 192, "n_parameters": 86059856} {"train_lr": 0.0029028104433262503, "train_min_lr": 0.0029028104433262503, "train_loss": 0.19467864110946465, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007884191676007153, "epoch": 193, "n_parameters": 86059856} {"train_lr": 0.0029017600516189723, "train_min_lr": 0.0029017600516189723, "train_loss": 0.19464191143663648, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00784868413570504, "epoch": 194, "n_parameters": 86059856} {"train_lr": 0.0029007042070163655, "train_min_lr": 0.0029007042070163655, "train_loss": 0.19460624356706363, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007834175943385046, "epoch": 195, "n_parameters": 86059856} {"train_lr": 0.002899642913640412, "train_min_lr": 0.002899642913640412, "train_loss": 0.19502870010164303, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008615802955598785, "epoch": 196, "n_parameters": 86059856} {"train_lr": 0.0028985761756343538, "train_min_lr": 0.0028985761756343538, "train_loss": 0.19466592965480417, "train_loss_scale": 3246552.6153846155, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007850606686984882, "epoch": 197, "n_parameters": 86059856} {"train_lr": 0.0028975039971626984, "train_min_lr": 0.0028975039971626984, "train_loss": 0.19456663955814946, "train_loss_scale": 2628161.641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 198, "n_parameters": 86059856} {"train_lr": 0.002896426382411189, "train_min_lr": 0.002896426382411189, "train_loss": 0.19455646549781355, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007910126176937364, "epoch": 199, "n_parameters": 86059856} {"train_lr": 0.002895343335586782, "train_min_lr": 0.002895343335586782, "train_loss": 0.1945127767856973, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007854977445020221, "epoch": 200, "n_parameters": 86059856} {"train_lr": 0.002894254860917656, "train_min_lr": 0.002894254860917656, "train_loss": 0.19445297727361321, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007793256505511892, "epoch": 201, "n_parameters": 86059856} {"train_lr": 0.002893160962653167, "train_min_lr": 0.002893160962653167, "train_loss": 0.1944748071523813, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0079042515060745, "epoch": 202, "n_parameters": 86059856} {"train_lr": 0.0028920616450638596, "train_min_lr": 0.0028920616450638596, "train_loss": 0.1943982454816787, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007847458601762086, "epoch": 203, "n_parameters": 86059856} {"train_lr": 0.002890956912441413, "train_min_lr": 0.002890956912441413, "train_loss": 0.19440631352317256, "train_loss_scale": 2802924.3076923075, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007862331590448052, "epoch": 204, "n_parameters": 86059856} {"train_lr": 0.0028898467690986708, "train_min_lr": 0.0028898467690986708, "train_loss": 0.19436534272673994, "train_loss_scale": 2103873.641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 205, "n_parameters": 86059856} {"train_lr": 0.0028887312193695805, "train_min_lr": 0.0028887312193695805, "train_loss": 0.19435458354508647, "train_loss_scale": 1609833.0256410257, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 206, "n_parameters": 86059856} {"train_lr": 0.002887610267609204, "train_min_lr": 0.002887610267609204, "train_loss": 0.19436614060153565, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007902196012974646, "epoch": 207, "n_parameters": 86059856} {"train_lr": 0.002886483918193695, "train_min_lr": 0.002886483918193695, "train_loss": 0.1943659541877703, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007890535593152238, "epoch": 208, "n_parameters": 86059856} {"train_lr": 0.0028853521755202796, "train_min_lr": 0.0028853521755202796, "train_loss": 0.1942985042129667, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007896976387247635, "epoch": 209, "n_parameters": 86059856} {"train_lr": 0.0028842150440072253, "train_min_lr": 0.0028842150440072253, "train_loss": 0.19421702893809056, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007851467749927767, "epoch": 210, "n_parameters": 86059856} {"train_lr": 0.0028830725280938638, "train_min_lr": 0.0028830725280938638, "train_loss": 0.19422196289405036, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00789691014842202, "epoch": 211, "n_parameters": 86059856} {"train_lr": 0.002881924632240516, "train_min_lr": 0.002881924632240516, "train_loss": 0.19421509443185267, "train_loss_scale": 1105709.9487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007916815740127977, "epoch": 212, "n_parameters": 86059856} {"train_lr": 0.0028807713609285265, "train_min_lr": 0.0028807713609285265, "train_loss": 0.19422818589000365, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007894984277813002, "epoch": 213, "n_parameters": 86059856} {"train_lr": 0.0028796127186602205, "train_min_lr": 0.0028796127186602205, "train_loss": 0.19419107283465564, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00790775255807556, "epoch": 214, "n_parameters": 86059856} {"train_lr": 0.002878448709958897, "train_min_lr": 0.002878448709958897, "train_loss": 0.19414969851286748, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007779714739463555, "epoch": 215, "n_parameters": 86059856} {"train_lr": 0.002877279339368794, "train_min_lr": 0.002877279339368794, "train_loss": 0.1941525504912417, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008133669500239193, "epoch": 216, "n_parameters": 86059856} {"train_lr": 0.002876104611455086, "train_min_lr": 0.002876104611455086, "train_loss": 0.19425035933725154, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00812483469221311, "epoch": 217, "n_parameters": 86059856} {"train_lr": 0.002874924530803863, "train_min_lr": 0.002874924530803863, "train_loss": 0.19412492980989507, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00785325088341219, "epoch": 218, "n_parameters": 86059856} {"train_lr": 0.002873739102022118, "train_min_lr": 0.002873739102022118, "train_loss": 0.1940683373130667, "train_loss_scale": 3448201.846153846, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007901382484795669, "epoch": 219, "n_parameters": 86059856} {"train_lr": 0.0028725483297377154, "train_min_lr": 0.0028725483297377154, "train_loss": 0.19403837046299416, "train_loss_scale": 3374263.794871795, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 220, "n_parameters": 86059856} {"train_lr": 0.0028713522185993853, "train_min_lr": 0.0028713522185993853, "train_loss": 0.19404942229485664, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007933143936754324, "epoch": 221, "n_parameters": 86059856} {"train_lr": 0.0028701507732766898, "train_min_lr": 0.0028701507732766898, "train_loss": 0.19399804272091922, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007906227653177503, "epoch": 222, "n_parameters": 86059856} {"train_lr": 0.002868943998460023, "train_min_lr": 0.002868943998460023, "train_loss": 0.19400056887967274, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007906185504860984, "epoch": 223, "n_parameters": 86059856} {"train_lr": 0.0028677318988605963, "train_min_lr": 0.0028677318988605963, "train_loss": 0.19398144689890054, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007869669769066744, "epoch": 224, "n_parameters": 86059856} {"train_lr": 0.0028665144792103924, "train_min_lr": 0.0028665144792103924, "train_loss": 0.19393037976577687, "train_loss_scale": 1861894.564102564, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 225, "n_parameters": 86059856} {"train_lr": 0.0028652917442621704, "train_min_lr": 0.0028652917442621704, "train_loss": 0.1939015207418169, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007861255202442408, "epoch": 226, "n_parameters": 86059856} {"train_lr": 0.0028640636987894296, "train_min_lr": 0.0028640636987894296, "train_loss": 0.1939174199106697, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00792481556821328, "epoch": 227, "n_parameters": 86059856} {"train_lr": 0.002862830347586419, "train_min_lr": 0.002862830347586419, "train_loss": 0.19391001851703876, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007894331838589353, "epoch": 228, "n_parameters": 86059856} {"train_lr": 0.002861591695468095, "train_min_lr": 0.002861591695468095, "train_loss": 0.19384721006290653, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007959125481414584, "epoch": 229, "n_parameters": 86059856} {"train_lr": 0.002860347747270098, "train_min_lr": 0.002860347747270098, "train_loss": 0.19386328232044783, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007858097941304246, "epoch": 230, "n_parameters": 86059856} {"train_lr": 0.002859098507848755, "train_min_lr": 0.002859098507848755, "train_loss": 0.19386013353673312, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007935096642587526, "epoch": 231, "n_parameters": 86059856} {"train_lr": 0.0028578439820810507, "train_min_lr": 0.0028578439820810507, "train_loss": 0.19379874979121944, "train_loss_scale": 1902224.4102564103, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007812606952845668, "epoch": 232, "n_parameters": 86059856} {"train_lr": 0.0028565841748646017, "train_min_lr": 0.0028565841748646017, "train_loss": 0.19391794294978565, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008126309205694363, "epoch": 233, "n_parameters": 86059856} {"train_lr": 0.002855319091117638, "train_min_lr": 0.002855319091117638, "train_loss": 0.19379994328706884, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00787923068781264, "epoch": 234, "n_parameters": 86059856} {"train_lr": 0.0028540487357790047, "train_min_lr": 0.0028540487357790047, "train_loss": 0.19378593352097923, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007846370743862234, "epoch": 235, "n_parameters": 86059856} {"train_lr": 0.0028527731138081117, "train_min_lr": 0.0028527731138081117, "train_loss": 0.19372297027028906, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007879853590379637, "epoch": 236, "n_parameters": 86059856} {"train_lr": 0.0028514922301849416, "train_min_lr": 0.0028514922301849416, "train_loss": 0.19369754737887818, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007852072005148213, "epoch": 237, "n_parameters": 86059856} {"train_lr": 0.002850206089910009, "train_min_lr": 0.002850206089910009, "train_loss": 0.1937278983159325, "train_loss_scale": 2944078.769230769, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007964921127168987, "epoch": 238, "n_parameters": 86059856} {"train_lr": 0.002848914698004355, "train_min_lr": 0.002848914698004355, "train_loss": 0.19367111229504913, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007915616486197671, "epoch": 239, "n_parameters": 86059856} {"train_lr": 0.002847618059509524, "train_min_lr": 0.002847618059509524, "train_loss": 0.19365161983893278, "train_loss_scale": 2177811.6923076925, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 240, "n_parameters": 86059856} {"train_lr": 0.002846316179487536, "train_min_lr": 0.002846316179487536, "train_loss": 0.19367775103698173, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008031938726512285, "epoch": 241, "n_parameters": 86059856} {"train_lr": 0.0028450090630208814, "train_min_lr": 0.0028450090630208814, "train_loss": 0.1936241730581969, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00787172248461642, "epoch": 242, "n_parameters": 86059856} {"train_lr": 0.0028436967152124944, "train_min_lr": 0.0028436967152124944, "train_loss": 0.19390975393784735, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00855071479991938, "epoch": 243, "n_parameters": 86059856} {"train_lr": 0.0028423791411857206, "train_min_lr": 0.0028423791411857206, "train_loss": 0.193610546626867, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00786437414204463, "epoch": 244, "n_parameters": 86059856} {"train_lr": 0.0028410563460843246, "train_min_lr": 0.0028410563460843246, "train_loss": 0.19356712029100612, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00788217709841541, "epoch": 245, "n_parameters": 86059856} {"train_lr": 0.002839728335072437, "train_min_lr": 0.002839728335072437, "train_loss": 0.19356535985850945, "train_loss_scale": 2453398.9743589745, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 246, "n_parameters": 86059856} {"train_lr": 0.002838395113334564, "train_min_lr": 0.002838395113334564, "train_loss": 0.19352349835949448, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007838334222562993, "epoch": 247, "n_parameters": 86059856} {"train_lr": 0.0028370566860755502, "train_min_lr": 0.0028370566860755502, "train_loss": 0.19353763674958968, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007945110625885904, "epoch": 248, "n_parameters": 86059856} {"train_lr": 0.002835713058520561, "train_min_lr": 0.002835713058520561, "train_loss": 0.19349966414320546, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007913309328544598, "epoch": 249, "n_parameters": 86059856} {"train_lr": 0.0028343642359150677, "train_min_lr": 0.0028343642359150677, "train_loss": 0.1935000783369805, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007945042952763824, "epoch": 250, "n_parameters": 86059856} {"train_lr": 0.002833010223524816, "train_min_lr": 0.002833010223524816, "train_loss": 0.19347086865216112, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007880122700813584, "epoch": 251, "n_parameters": 86059856} {"train_lr": 0.0028316510266358143, "train_min_lr": 0.0028316510266358143, "train_loss": 0.193510607475988, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00803137697183933, "epoch": 252, "n_parameters": 86059856} {"train_lr": 0.0028302866505543183, "train_min_lr": 0.0028302866505543183, "train_loss": 0.19339027596064484, "train_loss_scale": 2728986.2564102565, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 253, "n_parameters": 86059856} {"train_lr": 0.002828917100606794, "train_min_lr": 0.002828917100606794, "train_loss": 0.19345721900940704, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007896482474349726, "epoch": 254, "n_parameters": 86059856} {"train_lr": 0.0028275423821399106, "train_min_lr": 0.0028275423821399106, "train_loss": 0.19342925721325743, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007900151655448075, "epoch": 255, "n_parameters": 86059856} {"train_lr": 0.002826162500520514, "train_min_lr": 0.002826162500520514, "train_loss": 0.1933812357318134, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007876672316342592, "epoch": 256, "n_parameters": 86059856} {"train_lr": 0.0028247774611356072, "train_min_lr": 0.0028247774611356072, "train_loss": 0.19333517057701755, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007892157066052254, "epoch": 257, "n_parameters": 86059856} {"train_lr": 0.0028233872693923315, "train_min_lr": 0.0028233872693923315, "train_loss": 0.1933675864353203, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00789092159203182, "epoch": 258, "n_parameters": 86059856} {"train_lr": 0.0028219919307179283, "train_min_lr": 0.0028219919307179283, "train_loss": 0.19329899550081256, "train_loss_scale": 2641604.923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00788881591348073, "epoch": 259, "n_parameters": 86059856} {"train_lr": 0.0028205914505597556, "train_min_lr": 0.0028205914505597556, "train_loss": 0.19333657438460833, "train_loss_scale": 2298801.230769231, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 260, "n_parameters": 86059856} {"train_lr": 0.002819185834385233, "train_min_lr": 0.002819185834385233, "train_loss": 0.19332141457841948, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008002080112838974, "epoch": 261, "n_parameters": 86059856} {"train_lr": 0.0028177750876818212, "train_min_lr": 0.0028177750876818212, "train_loss": 0.19324771434259722, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007899406254112434, "epoch": 262, "n_parameters": 86059856} {"train_lr": 0.0028163592159570175, "train_min_lr": 0.0028163592159570175, "train_loss": 0.19327124950881952, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007900070216960441, "epoch": 263, "n_parameters": 86059856} {"train_lr": 0.0028149382247383403, "train_min_lr": 0.0028149382247383403, "train_loss": 0.19327316852286458, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007967458399108205, "epoch": 264, "n_parameters": 86059856} {"train_lr": 0.0028135121195732755, "train_min_lr": 0.0028135121195732755, "train_loss": 0.19326831990829071, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007933020860386582, "epoch": 265, "n_parameters": 86059856} {"train_lr": 0.002812080906029277, "train_min_lr": 0.002812080906029277, "train_loss": 0.1932386811524152, "train_loss_scale": 3024738.4615384615, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 266, "n_parameters": 86059856} {"train_lr": 0.0028106445896937493, "train_min_lr": 0.0028106445896937493, "train_loss": 0.19351011625706002, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008580761218371872, "epoch": 267, "n_parameters": 86059856} {"train_lr": 0.002809203176174018, "train_min_lr": 0.002809203176174018, "train_loss": 0.19324461352199507, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007826171858677974, "epoch": 268, "n_parameters": 86059856} {"train_lr": 0.0028077566710972965, "train_min_lr": 0.0028077566710972965, "train_loss": 0.19319713456580082, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007980570183672871, "epoch": 269, "n_parameters": 86059856} {"train_lr": 0.002806305080110684, "train_min_lr": 0.002806305080110684, "train_loss": 0.19316592058525062, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007902302553889174, "epoch": 270, "n_parameters": 86059856} {"train_lr": 0.0028048484088811373, "train_min_lr": 0.0028048484088811373, "train_loss": 0.19310727599076927, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007838643545535607, "epoch": 271, "n_parameters": 86059856} {"train_lr": 0.0028033866630954372, "train_min_lr": 0.0028033866630954372, "train_loss": 0.193103038914836, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007878860115455702, "epoch": 272, "n_parameters": 86059856} {"train_lr": 0.00280191984846019, "train_min_lr": 0.00280191984846019, "train_loss": 0.19310234226764011, "train_loss_scale": 2439955.6923076925, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 273, "n_parameters": 86059856} {"train_lr": 0.0028004479707017717, "train_min_lr": 0.0028004479707017717, "train_loss": 0.19307106146469522, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00786363823303523, "epoch": 274, "n_parameters": 86059856} {"train_lr": 0.002798971035566338, "train_min_lr": 0.002798971035566338, "train_loss": 0.1930855691062812, "train_loss_scale": 1468678.564102564, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 275, "n_parameters": 86059856} {"train_lr": 0.002797489048819781, "train_min_lr": 0.002797489048819781, "train_loss": 0.1930849353532092, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007907862383991672, "epoch": 276, "n_parameters": 86059856} {"train_lr": 0.0027960020162477272, "train_min_lr": 0.0027960020162477272, "train_loss": 0.19302263853546137, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007840008876370028, "epoch": 277, "n_parameters": 86059856} {"train_lr": 0.0027945099436554794, "train_min_lr": 0.0027945099436554794, "train_loss": 0.19305928899811056, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00796668425662061, "epoch": 278, "n_parameters": 86059856} {"train_lr": 0.002793012836868041, "train_min_lr": 0.002793012836868041, "train_loss": 0.19302850323490417, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007983834279748874, "epoch": 279, "n_parameters": 86059856} {"train_lr": 0.002791510701730047, "train_min_lr": 0.002791510701730047, "train_loss": 0.19299136604874945, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007889606506348802, "epoch": 280, "n_parameters": 86059856} {"train_lr": 0.0027900035441057827, "train_min_lr": 0.0027900035441057827, "train_loss": 0.19306996687410924, "train_loss_scale": 1246864.4102564103, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007970742081315855, "epoch": 281, "n_parameters": 86059856} {"train_lr": 0.0027884913698791205, "train_min_lr": 0.0027884913698791205, "train_loss": 0.19300584366712242, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007934191120931735, "epoch": 282, "n_parameters": 86059856} {"train_lr": 0.002786974184953536, "train_min_lr": 0.002786974184953536, "train_loss": 0.19295709896593904, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007892338926187502, "epoch": 283, "n_parameters": 86059856} {"train_lr": 0.0027854519952520565, "train_min_lr": 0.0027854519952520565, "train_loss": 0.19294247574483356, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007858958729626372, "epoch": 284, "n_parameters": 86059856} {"train_lr": 0.0027839248067172465, "train_min_lr": 0.0027839248067172465, "train_loss": 0.192972978302397, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008014821787722982, "epoch": 285, "n_parameters": 86059856} {"train_lr": 0.0027823926253111968, "train_min_lr": 0.0027823926253111968, "train_loss": 0.19293699408164963, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007920328967977697, "epoch": 286, "n_parameters": 86059856} {"train_lr": 0.0027808554570154715, "train_min_lr": 0.0027808554570154715, "train_loss": 0.1929167190578599, "train_loss_scale": 1663606.1538461538, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 287, "n_parameters": 86059856} {"train_lr": 0.0027793133078311215, "train_min_lr": 0.0027793133078311215, "train_loss": 0.19289674610496524, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00792564309011094, "epoch": 288, "n_parameters": 86059856} {"train_lr": 0.0027777661837786343, "train_min_lr": 0.0027777661837786343, "train_loss": 0.19291234185369924, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00797143519575445, "epoch": 289, "n_parameters": 86059856} {"train_lr": 0.0027762140908979215, "train_min_lr": 0.0027762140908979215, "train_loss": 0.19286551057265547, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0079397970604567, "epoch": 290, "n_parameters": 86059856} {"train_lr": 0.002774657035248286, "train_min_lr": 0.002774657035248286, "train_loss": 0.19281446074063963, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007886141237134162, "epoch": 291, "n_parameters": 86059856} {"train_lr": 0.002773095022908419, "train_min_lr": 0.002773095022908419, "train_loss": 0.19288986115358198, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007918316923165455, "epoch": 292, "n_parameters": 86059856} {"train_lr": 0.00277152805997634, "train_min_lr": 0.00277152805997634, "train_loss": 0.19284050934351024, "train_loss_scale": 1051936.8205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008110835194551887, "epoch": 293, "n_parameters": 86059856} {"train_lr": 0.002769956152569427, "train_min_lr": 0.002769956152569427, "train_loss": 0.19285968577680299, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007946489874321299, "epoch": 294, "n_parameters": 86059856} {"train_lr": 0.002768379306824332, "train_min_lr": 0.002768379306824332, "train_loss": 0.19279302121140063, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007866215753333213, "epoch": 295, "n_parameters": 86059856} {"train_lr": 0.002766797528897003, "train_min_lr": 0.002766797528897003, "train_loss": 0.1927535440414571, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007905151965156293, "epoch": 296, "n_parameters": 86059856} {"train_lr": 0.0027652108249626352, "train_min_lr": 0.0027652108249626352, "train_loss": 0.1927932570151125, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008031498157204343, "epoch": 297, "n_parameters": 86059856} {"train_lr": 0.002763619201215655, "train_min_lr": 0.002763619201215655, "train_loss": 0.19272706129301626, "train_loss_scale": 1146039.7948717948, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 298, "n_parameters": 86059856} {"train_lr": 0.002762022663869706, "train_min_lr": 0.002762022663869706, "train_loss": 0.19276049323021793, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007971007118706042, "epoch": 299, "n_parameters": 86059856} {"train_lr": 0.0027604212191575978, "train_min_lr": 0.0027604212191575978, "train_loss": 0.19273210583756176, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0079150671327415, "epoch": 300, "n_parameters": 86059856} {"train_lr": 0.0027588148733313092, "train_min_lr": 0.0027588148733313092, "train_loss": 0.19269773399887177, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007950918013170266, "epoch": 301, "n_parameters": 86059856} {"train_lr": 0.0027572036326619525, "train_min_lr": 0.0027572036326619525, "train_loss": 0.19281765010852653, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008278007457892481, "epoch": 302, "n_parameters": 86059856} {"train_lr": 0.002755587503439741, "train_min_lr": 0.002755587503439741, "train_loss": 0.19270126301890764, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00799488683995337, "epoch": 303, "n_parameters": 86059856} {"train_lr": 0.002753966491973985, "train_min_lr": 0.002753966491973985, "train_loss": 0.1926976069031904, "train_loss_scale": 1569503.1794871795, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007936842873907434, "epoch": 304, "n_parameters": 86059856} {"train_lr": 0.002752340604593045, "train_min_lr": 0.002752340604593045, "train_loss": 0.1926697606746203, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007902560592927516, "epoch": 305, "n_parameters": 86059856} {"train_lr": 0.0027507098476443215, "train_min_lr": 0.0027507098476443215, "train_loss": 0.1926423238339619, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0079560040955981, "epoch": 306, "n_parameters": 86059856} {"train_lr": 0.0027490742274942233, "train_min_lr": 0.0027490742274942233, "train_loss": 0.19270513896257258, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00810565786042179, "epoch": 307, "n_parameters": 86059856} {"train_lr": 0.0027474337505281428, "train_min_lr": 0.0027474337505281428, "train_loss": 0.19259294235290816, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007962240242005255, "epoch": 308, "n_parameters": 86059856} {"train_lr": 0.0027457884231504442, "train_min_lr": 0.0027457884231504442, "train_loss": 0.1926456554733121, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007987179369546281, "epoch": 309, "n_parameters": 86059856} {"train_lr": 0.002744138251784411, "train_min_lr": 0.002744138251784411, "train_loss": 0.1925951876044751, "train_loss_scale": 2204698.2564102565, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 310, "n_parameters": 86059856} {"train_lr": 0.00274248324287225, "train_min_lr": 0.00274248324287225, "train_loss": 0.19261071813674882, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008003532828297466, "epoch": 311, "n_parameters": 86059856} {"train_lr": 0.0027408234028750557, "train_min_lr": 0.0027408234028750557, "train_loss": 0.19254356392253286, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007931898812202211, "epoch": 312, "n_parameters": 86059856} {"train_lr": 0.0027391587382727695, "train_min_lr": 0.0027391587382727695, "train_loss": 0.19255001568354857, "train_loss_scale": 1835008.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 313, "n_parameters": 86059856} {"train_lr": 0.0027374892555641767, "train_min_lr": 0.0027374892555641767, "train_loss": 0.1925092588047473, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007918050112680364, "epoch": 314, "n_parameters": 86059856} {"train_lr": 0.002735814961266868, "train_min_lr": 0.002735814961266868, "train_loss": 0.1925266241021932, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008005347717792178, "epoch": 315, "n_parameters": 86059856} {"train_lr": 0.002734135861917227, "train_min_lr": 0.002734135861917227, "train_loss": 0.19248025794513524, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008010857499753818, "epoch": 316, "n_parameters": 86059856} {"train_lr": 0.002732451964070391, "train_min_lr": 0.002732451964070391, "train_loss": 0.19247778097334772, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00796918747558569, "epoch": 317, "n_parameters": 86059856} {"train_lr": 0.0027307632743002247, "train_min_lr": 0.0027307632743002247, "train_loss": 0.1924508075014903, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008017836107263485, "epoch": 318, "n_parameters": 86059856} {"train_lr": 0.002729069799199308, "train_min_lr": 0.002729069799199308, "train_loss": 0.19241351576951835, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007970914343157066, "epoch": 319, "n_parameters": 86059856} {"train_lr": 0.0027273715453788997, "train_min_lr": 0.0027273715453788997, "train_loss": 0.19249597153005502, "train_loss_scale": 1929110.9743589743, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008047194839514887, "epoch": 320, "n_parameters": 86059856} {"train_lr": 0.0027256685194689133, "train_min_lr": 0.0027256685194689133, "train_loss": 0.19244912263149253, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007944201796840971, "epoch": 321, "n_parameters": 86059856} {"train_lr": 0.0027239607281178977, "train_min_lr": 0.0027239607281178977, "train_loss": 0.19264273992620218, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008449452832185974, "epoch": 322, "n_parameters": 86059856} {"train_lr": 0.002722248177992999, "train_min_lr": 0.002722248177992999, "train_loss": 0.19246405077716097, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007989100834283117, "epoch": 323, "n_parameters": 86059856} {"train_lr": 0.0027205308757799426, "train_min_lr": 0.0027205308757799426, "train_loss": 0.1924073666226692, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007953305939176621, "epoch": 324, "n_parameters": 86059856} {"train_lr": 0.002718808828183009, "train_min_lr": 0.002718808828183009, "train_loss": 0.19237605885315973, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007985984190152241, "epoch": 325, "n_parameters": 86059856} {"train_lr": 0.002717082041925007, "train_min_lr": 0.002717082041925007, "train_loss": 0.19236003924519396, "train_loss_scale": 2171090.0512820515, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 326, "n_parameters": 86059856} {"train_lr": 0.0027153505237472383, "train_min_lr": 0.0027153505237472383, "train_loss": 0.1923423664799581, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007958120964945126, "epoch": 327, "n_parameters": 86059856} {"train_lr": 0.0027136142804094735, "train_min_lr": 0.0027136142804094735, "train_loss": 0.19239038778827167, "train_loss_scale": 1791317.3333333333, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 328, "n_parameters": 86059856} {"train_lr": 0.0027118733186899478, "train_min_lr": 0.0027118733186899478, "train_loss": 0.19231865955636096, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007975642730553564, "epoch": 329, "n_parameters": 86059856} {"train_lr": 0.0027101276453853035, "train_min_lr": 0.0027101276453853035, "train_loss": 0.192358583999941, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008007262721478652, "epoch": 330, "n_parameters": 86059856} {"train_lr": 0.0027083772673105774, "train_min_lr": 0.0027083772673105774, "train_loss": 0.19232464999629137, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007983919939038176, "epoch": 331, "n_parameters": 86059856} {"train_lr": 0.002706622191299172, "train_min_lr": 0.002706622191299172, "train_loss": 0.19232691226837537, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008090941393628525, "epoch": 332, "n_parameters": 86059856} {"train_lr": 0.002704862424202841, "train_min_lr": 0.002704862424202841, "train_loss": 0.19228800042317465, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008030064193567691, "epoch": 333, "n_parameters": 86059856} {"train_lr": 0.002703097972891639, "train_min_lr": 0.002703097972891639, "train_loss": 0.1922887910503703, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008052796038846748, "epoch": 334, "n_parameters": 86059856} {"train_lr": 0.0027013288442539144, "train_min_lr": 0.0027013288442539144, "train_loss": 0.19230291751834253, "train_loss_scale": 1972801.641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008041228412483364, "epoch": 335, "n_parameters": 86059856} {"train_lr": 0.0026995550451962757, "train_min_lr": 0.0026995550451962757, "train_loss": 0.1922992208841233, "train_loss_scale": 1579585.641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 336, "n_parameters": 86059856} {"train_lr": 0.002697776582643566, "train_min_lr": 0.002697776582643566, "train_loss": 0.19224204315254703, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008026084319377938, "epoch": 337, "n_parameters": 86059856} {"train_lr": 0.00269599346353883, "train_min_lr": 0.00269599346353883, "train_loss": 0.19223459818973565, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008058669581376493, "epoch": 338, "n_parameters": 86059856} {"train_lr": 0.002694205694843292, "train_min_lr": 0.002694205694843292, "train_loss": 0.1921852788864038, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007982925422346363, "epoch": 339, "n_parameters": 86059856} {"train_lr": 0.002692413283536328, "train_min_lr": 0.002692413283536328, "train_loss": 0.19217740307347134, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008027941019883236, "epoch": 340, "n_parameters": 86059856} {"train_lr": 0.002690616236615441, "train_min_lr": 0.002690616236615441, "train_loss": 0.19215506891934916, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008049313596879633, "epoch": 341, "n_parameters": 86059856} {"train_lr": 0.0026888145610962336, "train_min_lr": 0.0026888145610962336, "train_loss": 0.19216657801268575, "train_loss_scale": 1135957.3333333333, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008040469472642796, "epoch": 342, "n_parameters": 86059856} {"train_lr": 0.002687008264012372, "train_min_lr": 0.002687008264012372, "train_loss": 0.19218230737635913, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008048300350753542, "epoch": 343, "n_parameters": 86059856} {"train_lr": 0.0026851973524155666, "train_min_lr": 0.0026851973524155666, "train_loss": 0.1921723273057395, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008056093878948536, "epoch": 344, "n_parameters": 86059856} {"train_lr": 0.0026833818333755464, "train_min_lr": 0.0026833818333755464, "train_loss": 0.19212284966562995, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008015882255187115, "epoch": 345, "n_parameters": 86059856} {"train_lr": 0.002681561713980024, "train_min_lr": 0.002681561713980024, "train_loss": 0.1921260331530506, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008061004335430857, "epoch": 346, "n_parameters": 86059856} {"train_lr": 0.002679737001334669, "train_min_lr": 0.002679737001334669, "train_loss": 0.1920960321365736, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008069109171628952, "epoch": 347, "n_parameters": 86059856} {"train_lr": 0.0026779077025630943, "train_min_lr": 0.0026779077025630943, "train_loss": 0.1921160691477454, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008043353496573102, "epoch": 348, "n_parameters": 86059856} {"train_lr": 0.002676073824806804, "train_min_lr": 0.002676073824806804, "train_loss": 0.19228018000602531, "train_loss_scale": 2419790.769230769, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 349, "n_parameters": 86059856} {"train_lr": 0.0026742353752251882, "train_min_lr": 0.0026742353752251882, "train_loss": 0.19207408839765078, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007980547290152082, "epoch": 350, "n_parameters": 86059856} {"train_lr": 0.002672392360995473, "train_min_lr": 0.002672392360995473, "train_loss": 0.19206310397324464, "train_loss_scale": 1892141.9487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 351, "n_parameters": 86059856} {"train_lr": 0.002670544789312714, "train_min_lr": 0.002670544789312714, "train_loss": 0.19204678184066254, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007979801596285632, "epoch": 352, "n_parameters": 86059856} {"train_lr": 0.0026686926673897596, "train_min_lr": 0.0026686926673897596, "train_loss": 0.19207186451874292, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008091320361321172, "epoch": 353, "n_parameters": 86059856} {"train_lr": 0.0026668360024572186, "train_min_lr": 0.0026668360024572186, "train_loss": 0.19203593455947554, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008101366891657026, "epoch": 354, "n_parameters": 86059856} {"train_lr": 0.0026649748017634396, "train_min_lr": 0.0026649748017634396, "train_loss": 0.1920260380093868, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008024132912023326, "epoch": 355, "n_parameters": 86059856} {"train_lr": 0.002663109072574473, "train_min_lr": 0.002663109072574473, "train_loss": 0.19203428883487597, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008060606889044628, "epoch": 356, "n_parameters": 86059856} {"train_lr": 0.00266123882217405, "train_min_lr": 0.00266123882217405, "train_loss": 0.19204988528807193, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008305526099717006, "epoch": 357, "n_parameters": 86059856} {"train_lr": 0.0026593640578635516, "train_min_lr": 0.0026593640578635516, "train_loss": 0.1919819721414779, "train_loss_scale": 1871977.0256410257, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008025721616995258, "epoch": 358, "n_parameters": 86059856} {"train_lr": 0.0026574847869619855, "train_min_lr": 0.0026574847869619855, "train_loss": 0.19200948840126586, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008095461228945985, "epoch": 359, "n_parameters": 86059856} {"train_lr": 0.0026537127547496104, "train_min_lr": 0.0026537127547496104, "train_loss": 0.19198991560663742, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008061889009789014, "epoch": 361, "n_parameters": 86059856} {"train_lr": 0.0026518200081646626, "train_min_lr": 0.0026518200081646626, "train_loss": 0.19197371615192446, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008075010534435606, "epoch": 362, "n_parameters": 86059856} {"train_lr": 0.002649922784440315, "train_min_lr": 0.002649922784440315, "train_loss": 0.19191724984930494, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008083945734259218, "epoch": 363, "n_parameters": 86059856} {"train_lr": 0.002648021090983251, "train_min_lr": 0.002648021090983251, "train_loss": 0.19193179566317645, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008138826561685747, "epoch": 364, "n_parameters": 86059856} {"train_lr": 0.0026461149352176093, "train_min_lr": 0.0026461149352176093, "train_loss": 0.19193026973292804, "train_loss_scale": 2171090.0512820515, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 365, "n_parameters": 86059856} {"train_lr": 0.00264420432458494, "train_min_lr": 0.00264420432458494, "train_loss": 0.19189663764495307, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008111223172491942, "epoch": 366, "n_parameters": 86059856} {"train_lr": 0.0026422892665441985, "train_min_lr": 0.0026422892665441985, "train_loss": 0.1919142431758631, "train_loss_scale": 1835008.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 367, "n_parameters": 86059856} {"train_lr": 0.002640369768571687, "train_min_lr": 0.002640369768571687, "train_loss": 0.19190056114576948, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0081411297686804, "epoch": 368, "n_parameters": 86059856} {"train_lr": 0.0026384458381610567, "train_min_lr": 0.0026384458381610567, "train_loss": 0.19188018668538484, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007990110567054497, "epoch": 369, "n_parameters": 86059856} {"train_lr": 0.002636517482823248, "train_min_lr": 0.002636517482823248, "train_loss": 0.1918732061600074, "train_loss_scale": 821720.6153846154, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 370, "n_parameters": 86059856} {"train_lr": 0.0026345847100864854, "train_min_lr": 0.0026345847100864854, "train_loss": 0.19183542671947715, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00808598052483434, "epoch": 371, "n_parameters": 86059856} {"train_lr": 0.002632647527496237, "train_min_lr": 0.002632647527496237, "train_loss": 0.19185724808500174, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008037861433620445, "epoch": 372, "n_parameters": 86059856} {"train_lr": 0.0026307059426151857, "train_min_lr": 0.0026307059426151857, "train_loss": 0.1918271580675187, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008095603337726342, "epoch": 373, "n_parameters": 86059856} {"train_lr": 0.002628759963023199, "train_min_lr": 0.002628759963023199, "train_loss": 0.19176367627313504, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008038079215344997, "epoch": 374, "n_parameters": 86059856} {"train_lr": 0.0026268095963173075, "train_min_lr": 0.0026268095963173075, "train_loss": 0.1918182757217437, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008067703147669537, "epoch": 375, "n_parameters": 86059856} {"train_lr": 0.0026248548501116606, "train_min_lr": 0.0026248548501116606, "train_loss": 0.191778685634908, "train_loss_scale": 536050.8717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008055457293103712, "epoch": 376, "n_parameters": 86059856} {"train_lr": 0.002622895732037515, "train_min_lr": 0.002622895732037515, "train_loss": 0.1917857398183491, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008136365558199871, "epoch": 377, "n_parameters": 86059856} {"train_lr": 0.0026209322497431783, "train_min_lr": 0.0026209322497431783, "train_loss": 0.19176879710255143, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008146563470244216, "epoch": 378, "n_parameters": 86059856} {"train_lr": 0.0026189644108940142, "train_min_lr": 0.0026189644108940142, "train_loss": 0.1917964671499645, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00807256437241076, "epoch": 379, "n_parameters": 86059856} {"train_lr": 0.002616992223172395, "train_min_lr": 0.002616992223172395, "train_loss": 0.19176303106360137, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008117306686472148, "epoch": 380, "n_parameters": 86059856} {"train_lr": 0.00261501569427765, "train_min_lr": 0.00261501569427765, "train_loss": 0.19173050687934917, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008124876674861671, "epoch": 381, "n_parameters": 86059856} {"train_lr": 0.002613034831926069, "train_min_lr": 0.002613034831926069, "train_loss": 0.19173571843510637, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008121991377825348, "epoch": 382, "n_parameters": 86059856} {"train_lr": 0.002611049643850867, "train_min_lr": 0.002611049643850867, "train_loss": 0.19170944033286128, "train_loss_scale": 1690492.717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008122965886006849, "epoch": 383, "n_parameters": 86059856} {"train_lr": 0.0026090601378021354, "train_min_lr": 0.0026090601378021354, "train_loss": 0.19273817311160457, "train_loss_scale": 1717379.282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 384, "n_parameters": 86059856} {"train_lr": 0.002607066321546822, "train_min_lr": 0.002607066321546822, "train_loss": 0.19187634293801892, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008209834951393975, "epoch": 385, "n_parameters": 86059856} {"train_lr": 0.002605068202868711, "train_min_lr": 0.002605068202868711, "train_loss": 0.1917786986256639, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008074731733172368, "epoch": 386, "n_parameters": 86059856} {"train_lr": 0.0026030657895683754, "train_min_lr": 0.0026030657895683754, "train_loss": 0.1917066219358299, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008052465141726991, "epoch": 387, "n_parameters": 86059856} {"train_lr": 0.002601059089463159, "train_min_lr": 0.002601059089463159, "train_loss": 0.19165522729954085, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008063009317713575, "epoch": 388, "n_parameters": 86059856} {"train_lr": 0.0025990481103871357, "train_min_lr": 0.0025990481103871357, "train_loss": 0.1916809187999043, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008095585502302036, "epoch": 389, "n_parameters": 86059856} {"train_lr": 0.002597032860191087, "train_min_lr": 0.002597032860191087, "train_loss": 0.19165835280019122, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008067215240608232, "epoch": 390, "n_parameters": 86059856} {"train_lr": 0.002595013346742474, "train_min_lr": 0.002595013346742474, "train_loss": 0.1916446129504878, "train_loss_scale": 2046739.6923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008138345638051247, "epoch": 391, "n_parameters": 86059856} {"train_lr": 0.002592989577925391, "train_min_lr": 0.002592989577925391, "train_loss": 0.19161143330618358, "train_loss_scale": 2080347.8974358975, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 392, "n_parameters": 86059856} {"train_lr": 0.0025909615616405536, "train_min_lr": 0.0025909615616405536, "train_loss": 0.19162841273758274, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008112611840120874, "epoch": 393, "n_parameters": 86059856} {"train_lr": 0.0025889293058052524, "train_min_lr": 0.0025889293058052524, "train_loss": 0.19159256145716286, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008102031702247377, "epoch": 394, "n_parameters": 86059856} {"train_lr": 0.0025868928183533394, "train_min_lr": 0.0025868928183533394, "train_loss": 0.1915693873217186, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008091411780183896, "epoch": 395, "n_parameters": 86059856} {"train_lr": 0.0025848521072351733, "train_min_lr": 0.0025848521072351733, "train_loss": 0.19156052210392097, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00808044705981723, "epoch": 396, "n_parameters": 86059856} {"train_lr": 0.0025828071804176074, "train_min_lr": 0.0025828071804176074, "train_loss": 0.19154875037164834, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008088662039834814, "epoch": 397, "n_parameters": 86059856} {"train_lr": 0.0025807580458839627, "train_min_lr": 0.0025807580458839627, "train_loss": 0.1915462410184913, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008144952988144584, "epoch": 398, "n_parameters": 86059856} {"train_lr": 0.0025787047116339716, "train_min_lr": 0.0025787047116339716, "train_loss": 0.19155426267295694, "train_loss_scale": 1640080.4102564103, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 399, "n_parameters": 86059856} {"train_lr": 0.002574585476065864, "train_min_lr": 0.002574585476065864, "train_loss": 0.1915023549197194, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008124143035951046, "epoch": 401, "n_parameters": 86059856} {"train_lr": 0.002572519590829076, "train_min_lr": 0.002572519590829076, "train_loss": 0.19152605719864368, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00815923629143538, "epoch": 402, "n_parameters": 86059856} {"train_lr": 0.002570449538038541, "train_min_lr": 0.002570449538038541, "train_loss": 0.1914997603983069, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00820333373028403, "epoch": 403, "n_parameters": 86059856} {"train_lr": 0.0025683753257756624, "train_min_lr": 0.0025683753257756624, "train_loss": 0.1914733309442034, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008077647804151265, "epoch": 404, "n_parameters": 86059856} {"train_lr": 0.0025662969621380854, "train_min_lr": 0.0025662969621380854, "train_loss": 0.19168220385192677, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008718957246925969, "epoch": 405, "n_parameters": 86059856} {"train_lr": 0.0025642144552396537, "train_min_lr": 0.0025642144552396537, "train_loss": 0.19151627533257198, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00814302951300469, "epoch": 406, "n_parameters": 86059856} {"train_lr": 0.002562127813210396, "train_min_lr": 0.002562127813210396, "train_loss": 0.19149311700573143, "train_loss_scale": 1710657.641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008184132633360628, "epoch": 407, "n_parameters": 86059856} {"train_lr": 0.002560037044196476, "train_min_lr": 0.002560037044196476, "train_loss": 0.19145591816721627, "train_loss_scale": 1283833.435897436, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 408, "n_parameters": 86059856} {"train_lr": 0.0025579421563601715, "train_min_lr": 0.0025579421563601715, "train_loss": 0.19142025696615186, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00814465328436106, "epoch": 409, "n_parameters": 86059856} {"train_lr": 0.002555843157879843, "train_min_lr": 0.002555843157879843, "train_loss": 0.1914015721099881, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008125104574271692, "epoch": 410, "n_parameters": 86059856} {"train_lr": 0.0025537400569498967, "train_min_lr": 0.0025537400569498967, "train_loss": 0.19139128125821933, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008111549275688445, "epoch": 411, "n_parameters": 86059856} {"train_lr": 0.002551632861780751, "train_min_lr": 0.002551632861780751, "train_loss": 0.1914291717291165, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008186451829551982, "epoch": 412, "n_parameters": 86059856} {"train_lr": 0.002549521580598816, "train_min_lr": 0.002549521580598816, "train_loss": 0.19138712922517115, "train_loss_scale": 562937.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 413, "n_parameters": 86059856} {"train_lr": 0.002547406221646444, "train_min_lr": 0.002547406221646444, "train_loss": 0.19137547792007142, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008150476278761068, "epoch": 414, "n_parameters": 86059856} {"train_lr": 0.0025452867931819155, "train_min_lr": 0.0025452867931819155, "train_loss": 0.1916391503674766, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008908035384359745, "epoch": 415, "n_parameters": 86059856} {"train_lr": 0.0025431633034793937, "train_min_lr": 0.0025431633034793937, "train_loss": 0.19144728067569816, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00815798913110764, "epoch": 416, "n_parameters": 86059856} {"train_lr": 0.002541035760828894, "train_min_lr": 0.002541035760828894, "train_loss": 0.1913937400572766, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008134907392116312, "epoch": 417, "n_parameters": 86059856} {"train_lr": 0.002538904173536266, "train_min_lr": 0.002538904173536266, "train_loss": 0.1913403850287581, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008195261302320525, "epoch": 418, "n_parameters": 86059856} {"train_lr": 0.0025367685499231326, "train_min_lr": 0.0025367685499231326, "train_loss": 0.19135472854265037, "train_loss_scale": 794834.0512820513, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008170021499375789, "epoch": 419, "n_parameters": 86059856} {"train_lr": 0.0025346288983268852, "train_min_lr": 0.0025346288983268852, "train_loss": 0.19134523742832243, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008168281082851956, "epoch": 420, "n_parameters": 86059856} {"train_lr": 0.002532485227100638, "train_min_lr": 0.002532485227100638, "train_loss": 0.19130983425734135, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008179394602894975, "epoch": 421, "n_parameters": 86059856} {"train_lr": 0.002530337544613201, "train_min_lr": 0.002530337544613201, "train_loss": 0.19130665171915331, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008158911956856266, "epoch": 422, "n_parameters": 86059856} {"train_lr": 0.0025281858592490396, "train_min_lr": 0.0025281858592490396, "train_loss": 0.19129063673198032, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008120734396820458, "epoch": 423, "n_parameters": 86059856} {"train_lr": 0.0025260301794082433, "train_min_lr": 0.0025260301794082433, "train_loss": 0.1912763060655636, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00828003169497093, "epoch": 424, "n_parameters": 86059856} {"train_lr": 0.0025238705135065017, "train_min_lr": 0.0025238705135065017, "train_loss": 0.19126909349758464, "train_loss_scale": 1159483.076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00815050174535897, "epoch": 425, "n_parameters": 86059856} {"train_lr": 0.002521706869975065, "train_min_lr": 0.002521706869975065, "train_loss": 0.19128098744803515, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00818414268686842, "epoch": 426, "n_parameters": 86059856} {"train_lr": 0.0025195392572607113, "train_min_lr": 0.0025195392572607113, "train_loss": 0.19124349002511457, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008245423200349204, "epoch": 427, "n_parameters": 86059856} {"train_lr": 0.002517367683825716, "train_min_lr": 0.002517367683825716, "train_loss": 0.19123818478785837, "train_loss_scale": 1976162.4615384615, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 428, "n_parameters": 86059856} {"train_lr": 0.002515192158147809, "train_min_lr": 0.002515192158147809, "train_loss": 0.19124287331046966, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008206361998768093, "epoch": 429, "n_parameters": 86059856} {"train_lr": 0.0025130126887201593, "train_min_lr": 0.0025130126887201593, "train_loss": 0.19120526036772972, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008239872584967181, "epoch": 430, "n_parameters": 86059856} {"train_lr": 0.002510829284051327, "train_min_lr": 0.002510829284051327, "train_loss": 0.1912346604363754, "train_loss_scale": 957833.8461538461, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 431, "n_parameters": 86059856} {"train_lr": 0.002508641952665238, "train_min_lr": 0.002508641952665238, "train_loss": 0.19119695221814206, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00821006600297271, "epoch": 432, "n_parameters": 86059856} {"train_lr": 0.0025064507031011445, "train_min_lr": 0.0025064507031011445, "train_loss": 0.19121489650569856, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00827264862630564, "epoch": 433, "n_parameters": 86059856} {"train_lr": 0.0025042555439135986, "train_min_lr": 0.0025042555439135986, "train_loss": 0.19123767343206474, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008284913025235232, "epoch": 434, "n_parameters": 86059856} {"train_lr": 0.0025020564836724106, "train_min_lr": 0.0025020564836724106, "train_loss": 0.1915590913596157, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009217058732293738, "epoch": 435, "n_parameters": 86059856} {"train_lr": 0.0024998535309626348, "train_min_lr": 0.0024998535309626348, "train_loss": 0.19119832573386913, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00822423265214102, "epoch": 436, "n_parameters": 86059856} {"train_lr": 0.0024976466943844944, "train_min_lr": 0.0024976466943844944, "train_loss": 0.1911459615609298, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008180415771232966, "epoch": 437, "n_parameters": 86059856} {"train_lr": 0.0024954359825534, "train_min_lr": 0.0024954359825534, "train_loss": 0.19110852625603095, "train_loss_scale": 924225.641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008168234089867044, "epoch": 438, "n_parameters": 86059856} {"train_lr": 0.002493221404099881, "train_min_lr": 0.002493221404099881, "train_loss": 0.19113598671407464, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008278399034259984, "epoch": 439, "n_parameters": 86059856} {"train_lr": 0.0024910029676695655, "train_min_lr": 0.0024910029676695655, "train_loss": 0.19111471324681473, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008282748246505761, "epoch": 440, "n_parameters": 86059856} {"train_lr": 0.0024887806819231355, "train_min_lr": 0.0024887806819231355, "train_loss": 0.19112288862323532, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008281076665764723, "epoch": 441, "n_parameters": 86059856} {"train_lr": 0.0024865545555363086, "train_min_lr": 0.0024865545555363086, "train_loss": 0.19109025064449853, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008237738396602277, "epoch": 442, "n_parameters": 86059856} {"train_lr": 0.002484324597199788, "train_min_lr": 0.002484324597199788, "train_loss": 0.1910416234212999, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008225385965301823, "epoch": 443, "n_parameters": 86059856} {"train_lr": 0.002482090815619252, "train_min_lr": 0.002482090815619252, "train_loss": 0.19107566605536982, "train_loss_scale": 1418266.2564102565, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00825773883694544, "epoch": 444, "n_parameters": 86059856} {"train_lr": 0.002479853219515286, "train_min_lr": 0.002479853219515286, "train_loss": 0.1910206651709114, "train_loss_scale": 2040018.0512820513, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 445, "n_parameters": 86059856} {"train_lr": 0.0024776118176233776, "train_min_lr": 0.0024776118176233776, "train_loss": 0.1910358501287798, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008230661840524333, "epoch": 446, "n_parameters": 86059856} {"train_lr": 0.002475366618693875, "train_min_lr": 0.002475366618693875, "train_loss": 0.19100930209820852, "train_loss_scale": 814998.9743589744, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 447, "n_parameters": 86059856} {"train_lr": 0.0024731176314919346, "train_min_lr": 0.0024731176314919346, "train_loss": 0.19100902968635544, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00828045491541091, "epoch": 448, "n_parameters": 86059856} {"train_lr": 0.002470864864797525, "train_min_lr": 0.002470864864797525, "train_loss": 0.1910588435637645, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008342114324944142, "epoch": 449, "n_parameters": 86059856} {"train_lr": 0.0024686083274053426, "train_min_lr": 0.0024686083274053426, "train_loss": 0.19104658416472375, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00826684448074024, "epoch": 450, "n_parameters": 86059856} {"train_lr": 0.0024663480281248338, "train_min_lr": 0.0024663480281248338, "train_loss": 0.1910085387002581, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008310813114990313, "epoch": 451, "n_parameters": 86059856} {"train_lr": 0.002464083975780106, "train_min_lr": 0.002464083975780106, "train_loss": 0.1910050549926475, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008356289705261588, "epoch": 452, "n_parameters": 86059856} {"train_lr": 0.0024618161792099333, "train_min_lr": 0.0024618161792099333, "train_loss": 0.19097333776955613, "train_loss_scale": 542772.5128205129, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00832296911185273, "epoch": 453, "n_parameters": 86059856} {"train_lr": 0.0024595446472677027, "train_min_lr": 0.0024595446472677027, "train_loss": 0.1909606181897032, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008261589154314537, "epoch": 454, "n_parameters": 86059856} {"train_lr": 0.0024572693888213837, "train_min_lr": 0.0024572693888213837, "train_loss": 0.19101375842896792, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008367498764971223, "epoch": 455, "n_parameters": 86059856} {"train_lr": 0.002454990412753494, "train_min_lr": 0.002454990412753494, "train_loss": 0.19098466089090857, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00827017719618594, "epoch": 456, "n_parameters": 86059856} {"train_lr": 0.002452707727961063, "train_min_lr": 0.002452707727961063, "train_loss": 0.19092972294236413, "train_loss_scale": 720896.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 457, "n_parameters": 86059856} {"train_lr": 0.002450421343355605, "train_min_lr": 0.002450421343355605, "train_loss": 0.19093091279650345, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008302223050775819, "epoch": 458, "n_parameters": 86059856} {"train_lr": 0.00244813126786307, "train_min_lr": 0.00244813126786307, "train_loss": 0.1909790394636683, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008417364626299016, "epoch": 459, "n_parameters": 86059856} {"train_lr": 0.002445837510423823, "train_min_lr": 0.002445837510423823, "train_loss": 0.19093678695842242, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008376049152456032, "epoch": 460, "n_parameters": 86059856} {"train_lr": 0.002443540079992599, "train_min_lr": 0.002443540079992599, "train_loss": 0.19088170024303672, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008320079650729895, "epoch": 461, "n_parameters": 86059856} {"train_lr": 0.00244123898553848, "train_min_lr": 0.00244123898553848, "train_loss": 0.19089741783383757, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00833825952707766, "epoch": 462, "n_parameters": 86059856} {"train_lr": 0.002438934236044838, "train_min_lr": 0.002438934236044838, "train_loss": 0.19087578121644372, "train_loss_scale": 636875.4871794871, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008319184790031077, "epoch": 463, "n_parameters": 86059856} {"train_lr": 0.002436625840509331, "train_min_lr": 0.002436625840509331, "train_loss": 0.19084664902243859, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008352114853317825, "epoch": 464, "n_parameters": 86059856} {"train_lr": 0.0024343138079438395, "train_min_lr": 0.0024343138079438395, "train_loss": 0.19087230707876957, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 465, "n_parameters": 86059856} {"train_lr": 0.0024319981473744415, "train_min_lr": 0.0024319981473744415, "train_loss": 0.19085143211608133, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008463953926156346, "epoch": 466, "n_parameters": 86059856} {"train_lr": 0.0024296788678413927, "train_min_lr": 0.0024296788678413927, "train_loss": 0.1908548642606594, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008357463634381883, "epoch": 467, "n_parameters": 86059856} {"train_lr": 0.0024273559783990592, "train_min_lr": 0.0024273559783990592, "train_loss": 0.19082482801511502, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00838192665609173, "epoch": 468, "n_parameters": 86059856} {"train_lr": 0.0024250294881159144, "train_min_lr": 0.0024250294881159144, "train_loss": 0.19080275651783898, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008453629669160224, "epoch": 469, "n_parameters": 86059856} {"train_lr": 0.0024226994060744782, "train_min_lr": 0.0024226994060744782, "train_loss": 0.19081565473849574, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008401337540887583, "epoch": 470, "n_parameters": 86059856} {"train_lr": 0.0024203657413713023, "train_min_lr": 0.0024203657413713023, "train_loss": 0.1908247385126276, "train_loss_scale": 833483.4871794871, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008449293728261136, "epoch": 471, "n_parameters": 86059856} {"train_lr": 0.002418028503116915, "train_min_lr": 0.002418028503116915, "train_loss": 0.19077727233823866, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008358134458271356, "epoch": 472, "n_parameters": 86059856} {"train_lr": 0.002415687700435805, "train_min_lr": 0.002415687700435805, "train_loss": 0.19075531688614342, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008426646255732825, "epoch": 473, "n_parameters": 86059856} {"train_lr": 0.0024133433424663683, "train_min_lr": 0.0024133433424663683, "train_loss": 0.190793991381398, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008459976635491237, "epoch": 474, "n_parameters": 86059856} {"train_lr": 0.002410995438360891, "train_min_lr": 0.002410995438360891, "train_loss": 0.19075634618265888, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008362878615466448, "epoch": 475, "n_parameters": 86059856} {"train_lr": 0.0024086439972854892, "train_min_lr": 0.0024086439972854892, "train_loss": 0.19076156223383853, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008455364136562612, "epoch": 476, "n_parameters": 86059856} {"train_lr": 0.002406289028420101, "train_min_lr": 0.002406289028420101, "train_loss": 0.19073300002118906, "train_loss_scale": 561257.0256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 477, "n_parameters": 86059856} {"train_lr": 0.002403930540958429, "train_min_lr": 0.002403930540958429, "train_loss": 0.1907303491487908, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008438964107205184, "epoch": 478, "n_parameters": 86059856} {"train_lr": 0.0024015685441079117, "train_min_lr": 0.0024015685441079117, "train_loss": 0.1906731470117871, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00841504823601542, "epoch": 479, "n_parameters": 86059856} {"train_lr": 0.002399203047089689, "train_min_lr": 0.002399203047089689, "train_loss": 0.19076175895782235, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00861023519283686, "epoch": 480, "n_parameters": 86059856} {"train_lr": 0.0023968340591385717, "train_min_lr": 0.0023968340591385717, "train_loss": 0.19072795753629926, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008560672314324154, "epoch": 481, "n_parameters": 86059856} {"train_lr": 0.002394461589502989, "train_min_lr": 0.002394461589502989, "train_loss": 0.1906728506541978, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00850254350538867, "epoch": 482, "n_parameters": 86059856} {"train_lr": 0.0023920856474449704, "train_min_lr": 0.0023920856474449704, "train_loss": 0.19065680638409388, "train_loss_scale": 796514.4615384615, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008400128193151874, "epoch": 483, "n_parameters": 86059856} {"train_lr": 0.0023897062422400975, "train_min_lr": 0.0023897062422400975, "train_loss": 0.1906878974658843, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008539378092301866, "epoch": 484, "n_parameters": 86059856} {"train_lr": 0.0023873233831774756, "train_min_lr": 0.0023873233831774756, "train_loss": 0.19070019140744057, "train_loss_scale": 806596.9230769231, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 485, "n_parameters": 86059856} {"train_lr": 0.0023849370795596837, "train_min_lr": 0.0023849370795596837, "train_loss": 0.1910882675841164, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00981902260070619, "epoch": 486, "n_parameters": 86059856} {"train_lr": 0.0023825473407027647, "train_min_lr": 0.0023825473407027647, "train_loss": 0.1906946093405191, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008474190862706074, "epoch": 487, "n_parameters": 86059856} {"train_lr": 0.0023801541759361604, "train_min_lr": 0.0023801541759361604, "train_loss": 0.19070712139662832, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008566360829624897, "epoch": 488, "n_parameters": 86059856} {"train_lr": 0.002377757594602684, "train_min_lr": 0.002377757594602684, "train_loss": 0.1906513299387044, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008469738987071488, "epoch": 489, "n_parameters": 86059856} {"train_lr": 0.0023753576060585027, "train_min_lr": 0.0023753576060585027, "train_loss": 0.19061622563271952, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008444748920364639, "epoch": 490, "n_parameters": 86059856} {"train_lr": 0.0023729542196730697, "train_min_lr": 0.0023729542196730697, "train_loss": 0.1912457436287346, "train_loss_scale": 551174.5641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010131513247660432, "epoch": 491, "n_parameters": 86059856} {"train_lr": 0.0023705474448291095, "train_min_lr": 0.0023705474448291095, "train_loss": 0.19067303417250514, "train_loss_scale": 930947.282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 492, "n_parameters": 86059856} {"train_lr": 0.002368137290922579, "train_min_lr": 0.002368137290922579, "train_loss": 0.1906491658793619, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008447788881126028, "epoch": 493, "n_parameters": 86059856} {"train_lr": 0.002365723767362617, "train_min_lr": 0.002365723767362617, "train_loss": 0.19060747917646018, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008564509809590302, "epoch": 494, "n_parameters": 86059856} {"train_lr": 0.0023633068835715253, "train_min_lr": 0.0023633068835715253, "train_loss": 0.19060050895533118, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008524705176778998, "epoch": 495, "n_parameters": 86059856} {"train_lr": 0.0023608866489847244, "train_min_lr": 0.0023608866489847244, "train_loss": 0.19052369154106158, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008464051697116632, "epoch": 496, "n_parameters": 86059856} {"train_lr": 0.0023584630730507047, "train_min_lr": 0.0023584630730507047, "train_loss": 0.1905290350700036, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008628166529935045, "epoch": 497, "n_parameters": 86059856} {"train_lr": 0.00235603616523102, "train_min_lr": 0.00235603616523102, "train_loss": 0.19053459691647917, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00850326877582866, "epoch": 498, "n_parameters": 86059856} {"train_lr": 0.002353605935000214, "train_min_lr": 0.002353605935000214, "train_loss": 0.19050237914929405, "train_loss_scale": 951112.2051282051, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008557774113097157, "epoch": 499, "n_parameters": 86059856} {"train_lr": 0.0023511723918458135, "train_min_lr": 0.0023511723918458135, "train_loss": 0.19048981665251538, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008424358155864935, "epoch": 500, "n_parameters": 86059856} {"train_lr": 0.0023487355452682713, "train_min_lr": 0.0023487355452682713, "train_loss": 0.19048584069913396, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008560785352109144, "epoch": 501, "n_parameters": 86059856} {"train_lr": 0.002346295404780935, "train_min_lr": 0.002346295404780935, "train_loss": 0.19050155109606492, "train_loss_scale": 880534.9743589744, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 502, "n_parameters": 86059856} {"train_lr": 0.002343851979910019, "train_min_lr": 0.002343851979910019, "train_loss": 0.19050679557646313, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00864383237860285, "epoch": 503, "n_parameters": 86059856} {"train_lr": 0.002341405280194559, "train_min_lr": 0.002341405280194559, "train_loss": 0.19044200910661274, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008572472466860348, "epoch": 504, "n_parameters": 86059856} {"train_lr": 0.0023389553151863694, "train_min_lr": 0.0023389553151863694, "train_loss": 0.19048055331222713, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00862063029452633, "epoch": 505, "n_parameters": 86059856} {"train_lr": 0.0023365020944500123, "train_min_lr": 0.0023365020944500123, "train_loss": 0.1904589419838232, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00863748679582316, "epoch": 506, "n_parameters": 86059856} {"train_lr": 0.00233404562756277, "train_min_lr": 0.00233404562756277, "train_loss": 0.19048301104265145, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008704461043891616, "epoch": 507, "n_parameters": 86059856} {"train_lr": 0.0023315859241145867, "train_min_lr": 0.0023315859241145867, "train_loss": 0.19038068992682758, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008638201826192344, "epoch": 508, "n_parameters": 86059856} {"train_lr": 0.0023291229937080476, "train_min_lr": 0.0023291229937080476, "train_loss": 0.19041730378730556, "train_loss_scale": 1001524.5128205129, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008604890250493415, "epoch": 509, "n_parameters": 86059856} {"train_lr": 0.0023266568459583315, "train_min_lr": 0.0023266568459583315, "train_loss": 0.19040235283020407, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008780304638024133, "epoch": 510, "n_parameters": 86059856} {"train_lr": 0.002324187490493184, "train_min_lr": 0.002324187490493184, "train_loss": 0.19046836422207072, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00893128138835518, "epoch": 511, "n_parameters": 86059856} {"train_lr": 0.0023217149369528702, "train_min_lr": 0.0023217149369528702, "train_loss": 0.190397312064679, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008612175279547675, "epoch": 512, "n_parameters": 86059856} {"train_lr": 0.0023192391949901393, "train_min_lr": 0.0023192391949901393, "train_loss": 0.19039886730770844, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008594405317965608, "epoch": 513, "n_parameters": 86059856} {"train_lr": 0.002316760274270188, "train_min_lr": 0.002316760274270188, "train_loss": 0.19036357136419377, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008727708153832609, "epoch": 514, "n_parameters": 86059856} {"train_lr": 0.002314278184470623, "train_min_lr": 0.002314278184470623, "train_loss": 0.19037593954887527, "train_loss_scale": 606628.1025641026, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 515, "n_parameters": 86059856} {"train_lr": 0.0023117929352814244, "train_min_lr": 0.0023117929352814244, "train_loss": 0.19033674742930975, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008742970302521896, "epoch": 516, "n_parameters": 86059856} {"train_lr": 0.0023093045364049047, "train_min_lr": 0.0023093045364049047, "train_loss": 0.19034215855674866, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008604120078066794, "epoch": 517, "n_parameters": 86059856} {"train_lr": 0.0023068129975556753, "train_min_lr": 0.0023068129975556753, "train_loss": 0.19042800118525824, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009013036965953711, "epoch": 518, "n_parameters": 86059856} {"train_lr": 0.002304318328460604, "train_min_lr": 0.002304318328460604, "train_loss": 0.19033800062532416, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008689984067295415, "epoch": 519, "n_parameters": 86059856} {"train_lr": 0.002301820538858778, "train_min_lr": 0.002301820538858778, "train_loss": 0.19031430157617882, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00870811844166989, "epoch": 520, "n_parameters": 86059856} {"train_lr": 0.002299319638501468, "train_min_lr": 0.002299319638501468, "train_loss": 0.19031056513985953, "train_loss_scale": 751143.3846153846, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008668187032573117, "epoch": 521, "n_parameters": 86059856} {"train_lr": 0.0022968156371520896, "train_min_lr": 0.0022968156371520896, "train_loss": 0.19029427660653034, "train_loss_scale": 672164.1025641026, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 522, "n_parameters": 86059856} {"train_lr": 0.0022943085445861614, "train_min_lr": 0.0022943085445861614, "train_loss": 0.19023242799374154, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00869487471675548, "epoch": 523, "n_parameters": 86059856} {"train_lr": 0.0022917983705912763, "train_min_lr": 0.0022917983705912763, "train_loss": 0.19029529032727274, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008740625708387831, "epoch": 524, "n_parameters": 86059856} {"train_lr": 0.0022892851249670496, "train_min_lr": 0.0022892851249670496, "train_loss": 0.19025911977037024, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008751221808592956, "epoch": 525, "n_parameters": 86059856} {"train_lr": 0.002286768817525095, "train_min_lr": 0.002286768817525095, "train_loss": 0.19028864737050846, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008703216171572702, "epoch": 526, "n_parameters": 86059856} {"train_lr": 0.0022842494580889716, "train_min_lr": 0.0022842494580889716, "train_loss": 0.19023246033929098, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008791232757712117, "epoch": 527, "n_parameters": 86059856} {"train_lr": 0.0022817270564941617, "train_min_lr": 0.0022817270564941617, "train_loss": 0.19022452656346828, "train_loss_scale": 685607.3846153846, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008764956094241008, "epoch": 528, "n_parameters": 86059856} {"train_lr": 0.002279201622588018, "train_min_lr": 0.002279201622588018, "train_loss": 0.19025670507779488, "train_loss_scale": 729298.0512820513, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 529, "n_parameters": 86059856} {"train_lr": 0.0022766731662297347, "train_min_lr": 0.0022766731662297347, "train_loss": 0.19021063324852058, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008797090205077369, "epoch": 530, "n_parameters": 86059856} {"train_lr": 0.002274141697290305, "train_min_lr": 0.002274141697290305, "train_loss": 0.1901956520652255, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008685312002527121, "epoch": 531, "n_parameters": 86059856} {"train_lr": 0.0022716072256524756, "train_min_lr": 0.0022716072256524756, "train_loss": 0.19017239310181674, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008782463519869802, "epoch": 532, "n_parameters": 86059856} {"train_lr": 0.002269069761210729, "train_min_lr": 0.002269069761210729, "train_loss": 0.19020935032779399, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008811504986041632, "epoch": 533, "n_parameters": 86059856} {"train_lr": 0.0022665293138712287, "train_min_lr": 0.0022665293138712287, "train_loss": 0.1901970178915713, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008806660443019027, "epoch": 534, "n_parameters": 86059856} {"train_lr": 0.0022639858935517725, "train_min_lr": 0.0022639858935517725, "train_loss": 0.19017407117793575, "train_loss_scale": 628473.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008789009135514975, "epoch": 535, "n_parameters": 86059856} {"train_lr": 0.002261439510181771, "train_min_lr": 0.002261439510181771, "train_loss": 0.19013829020043024, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008762782799282994, "epoch": 536, "n_parameters": 86059856} {"train_lr": 0.0022588901737022053, "train_min_lr": 0.0022588901737022053, "train_loss": 0.1901290663338911, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008912115229460865, "epoch": 537, "n_parameters": 86059856} {"train_lr": 0.002256337894065582, "train_min_lr": 0.002256337894065582, "train_loss": 0.19011737234914342, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008783331351318898, "epoch": 538, "n_parameters": 86059856} {"train_lr": 0.0022537826812358995, "train_min_lr": 0.0022537826812358995, "train_loss": 0.19011806750383514, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008806404535276577, "epoch": 539, "n_parameters": 86059856} {"train_lr": 0.002251224545188606, "train_min_lr": 0.002251224545188606, "train_loss": 0.190125751380737, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008778574431721026, "epoch": 540, "n_parameters": 86059856} {"train_lr": 0.0022486634959105628, "train_min_lr": 0.0022486634959105628, "train_loss": 0.19009906829645237, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00888510975532998, "epoch": 541, "n_parameters": 86059856} {"train_lr": 0.0022460995434000056, "train_min_lr": 0.0022460995434000056, "train_loss": 0.19012783497619706, "train_loss_scale": 1068740.923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 542, "n_parameters": 86059856} {"train_lr": 0.0022435326976664965, "train_min_lr": 0.0022435326976664965, "train_loss": 0.19006203728704116, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00887949245934112, "epoch": 543, "n_parameters": 86059856} {"train_lr": 0.0022409629687309036, "train_min_lr": 0.0022409629687309036, "train_loss": 0.1900312441866845, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008894357621335449, "epoch": 544, "n_parameters": 86059856} {"train_lr": 0.0022383903666253436, "train_min_lr": 0.0022383903666253436, "train_loss": 0.19005612768710423, "train_loss_scale": 788112.4102564103, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 545, "n_parameters": 86059856} {"train_lr": 0.0022358149013931546, "train_min_lr": 0.0022358149013931546, "train_loss": 0.19004837365653843, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008951910946052521, "epoch": 546, "n_parameters": 86059856} {"train_lr": 0.0022332365830888467, "train_min_lr": 0.0022332365830888467, "train_loss": 0.19004913272622687, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008903852543041397, "epoch": 547, "n_parameters": 86059856} {"train_lr": 0.0022306554217780723, "train_min_lr": 0.0022306554217780723, "train_loss": 0.19001172622665763, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008779463788064627, "epoch": 548, "n_parameters": 86059856} {"train_lr": 0.002228071427537582, "train_min_lr": 0.002228071427537582, "train_loss": 0.19002103224659386, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008920495377447551, "epoch": 549, "n_parameters": 86059856} {"train_lr": 0.0022254846104551853, "train_min_lr": 0.0022254846104551853, "train_loss": 0.1900201578063365, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008929861306499403, "epoch": 550, "n_parameters": 86059856} {"train_lr": 0.002222894980629715, "train_min_lr": 0.002222894980629715, "train_loss": 0.19003313453868032, "train_loss_scale": 569659.0769230769, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008926022342609193, "epoch": 551, "n_parameters": 86059856} {"train_lr": 0.0022203025481709825, "train_min_lr": 0.0022203025481709825, "train_loss": 0.1899849818661236, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008921457411279568, "epoch": 552, "n_parameters": 86059856} {"train_lr": 0.002217707323199736, "train_min_lr": 0.002217707323199736, "train_loss": 0.18999308322627956, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008931261918638857, "epoch": 553, "n_parameters": 86059856} {"train_lr": 0.0022151093158476296, "train_min_lr": 0.0022151093158476296, "train_loss": 0.18998719375126827, "train_loss_scale": 734339.282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 554, "n_parameters": 86059856} {"train_lr": 0.0022125085362571847, "train_min_lr": 0.0022125085362571847, "train_loss": 0.18997769073869747, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009060267561998887, "epoch": 555, "n_parameters": 86059856} {"train_lr": 0.0022099049945817385, "train_min_lr": 0.0022099049945817385, "train_loss": 0.1899887326782426, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009034510127280671, "epoch": 556, "n_parameters": 86059856} {"train_lr": 0.0022072987009854137, "train_min_lr": 0.0022072987009854137, "train_loss": 0.189939611698859, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008947234575409824, "epoch": 557, "n_parameters": 86059856} {"train_lr": 0.002204689665643079, "train_min_lr": 0.002204689665643079, "train_loss": 0.1899169816289288, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008978360971340384, "epoch": 558, "n_parameters": 86059856} {"train_lr": 0.0022020778987402964, "train_min_lr": 0.0022020778987402964, "train_loss": 0.18992893820485243, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009011004935018718, "epoch": 559, "n_parameters": 86059856} {"train_lr": 0.0021994634104733073, "train_min_lr": 0.0021994634104733073, "train_loss": 0.18992444582116336, "train_loss_scale": 623432.2051282051, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00891197700268374, "epoch": 560, "n_parameters": 86059856} {"train_lr": 0.0021968462110489636, "train_min_lr": 0.0021968462110489636, "train_loss": 0.18991156676425958, "train_loss_scale": 988081.2307692308, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 561, "n_parameters": 86059856} {"train_lr": 0.0021942263106847085, "train_min_lr": 0.0021942263106847085, "train_loss": 0.18992309734965554, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009059643708706763, "epoch": 562, "n_parameters": 86059856} {"train_lr": 0.00219160371960853, "train_min_lr": 0.00219160371960853, "train_loss": 0.18989751304690847, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008950035359400014, "epoch": 563, "n_parameters": 86059856} {"train_lr": 0.0021889784480589184, "train_min_lr": 0.0021889784480589184, "train_loss": 0.1898670737954, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009055602994078819, "epoch": 564, "n_parameters": 86059856} {"train_lr": 0.002186350506284827, "train_min_lr": 0.002186350506284827, "train_loss": 0.1898769181376944, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009012050583409384, "epoch": 565, "n_parameters": 86059856} {"train_lr": 0.0021837199045456382, "train_min_lr": 0.0021837199045456382, "train_loss": 0.18984622359634018, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009084313760141436, "epoch": 566, "n_parameters": 86059856} {"train_lr": 0.002181086653111113, "train_min_lr": 0.002181086653111113, "train_loss": 0.18989312660713226, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009117524941057827, "epoch": 567, "n_parameters": 86059856} {"train_lr": 0.0021784507622613566, "train_min_lr": 0.0021784507622613566, "train_loss": 0.18986018255161932, "train_loss_scale": 675524.9230769231, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 568, "n_parameters": 86059856} {"train_lr": 0.002175812242286789, "train_min_lr": 0.002175812242286789, "train_loss": 0.18984841129694802, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009035372066323478, "epoch": 569, "n_parameters": 86059856} {"train_lr": 0.0021731711034880846, "train_min_lr": 0.0021731711034880846, "train_loss": 0.18986813905529487, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009186736640866654, "epoch": 570, "n_parameters": 86059856} {"train_lr": 0.002170527356176138, "train_min_lr": 0.002170527356176138, "train_loss": 0.18978324079981598, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008999070239205582, "epoch": 571, "n_parameters": 86059856} {"train_lr": 0.0021678810106720405, "train_min_lr": 0.0021678810106720405, "train_loss": 0.18976660608313978, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008992164503209867, "epoch": 572, "n_parameters": 86059856} {"train_lr": 0.002165232077307008, "train_min_lr": 0.002165232077307008, "train_loss": 0.1897349882309731, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009040591889061034, "epoch": 573, "n_parameters": 86059856} {"train_lr": 0.0021625805664223837, "train_min_lr": 0.0021625805664223837, "train_loss": 0.18976990821269843, "train_loss_scale": 527648.8205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009024924634454342, "epoch": 574, "n_parameters": 86059856} {"train_lr": 0.0021599264883695505, "train_min_lr": 0.0021599264883695505, "train_loss": 0.1897944659412576, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00905956106278329, "epoch": 575, "n_parameters": 86059856} {"train_lr": 0.002157269853509928, "train_min_lr": 0.002157269853509928, "train_loss": 0.18977234083920336, "train_loss_scale": 658720.8205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 576, "n_parameters": 86059856} {"train_lr": 0.00215461067221491, "train_min_lr": 0.00215461067221491, "train_loss": 0.18972860129239658, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009037120763260202, "epoch": 577, "n_parameters": 86059856} {"train_lr": 0.002151948954865835, "train_min_lr": 0.002151948954865835, "train_loss": 0.18974699946239781, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009199143391854774, "epoch": 578, "n_parameters": 86059856} {"train_lr": 0.0021492847118539373, "train_min_lr": 0.0021492847118539373, "train_loss": 0.18974683217656535, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009020532733605554, "epoch": 579, "n_parameters": 86059856} {"train_lr": 0.002146617953580322, "train_min_lr": 0.002146617953580322, "train_loss": 0.1897504020553942, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009224099168279327, "epoch": 580, "n_parameters": 86059856} {"train_lr": 0.0021439486904558996, "train_min_lr": 0.0021439486904558996, "train_loss": 0.1896834751794067, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009053847467443213, "epoch": 581, "n_parameters": 86059856} {"train_lr": 0.0021412769329013656, "train_min_lr": 0.0021412769329013656, "train_loss": 0.18971037654540476, "train_loss_scale": 699050.6666666666, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009196058052997941, "epoch": 582, "n_parameters": 86059856} {"train_lr": 0.0021386026913471664, "train_min_lr": 0.0021386026913471664, "train_loss": 0.18966870691078022, "train_loss_scale": 983040.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 583, "n_parameters": 86059856} {"train_lr": 0.0021359259762334173, "train_min_lr": 0.0021359259762334173, "train_loss": 0.1898640217498327, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009812319461698046, "epoch": 584, "n_parameters": 86059856} {"train_lr": 0.0021332467980099226, "train_min_lr": 0.0021332467980099226, "train_loss": 0.18966397245486194, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009114082737001948, "epoch": 585, "n_parameters": 86059856} {"train_lr": 0.0021305651671360733, "train_min_lr": 0.0021305651671360733, "train_loss": 0.18966236352944413, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009199478345600745, "epoch": 586, "n_parameters": 86059856} {"train_lr": 0.0021278810940808597, "train_min_lr": 0.0021278810940808597, "train_loss": 0.1896338704913759, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009168303968647543, "epoch": 587, "n_parameters": 86059856} {"train_lr": 0.0021251945893227893, "train_min_lr": 0.0021251945893227893, "train_loss": 0.18968218757818717, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009140264753323909, "epoch": 588, "n_parameters": 86059856} {"train_lr": 0.0021225056633498727, "train_min_lr": 0.0021225056633498727, "train_loss": 0.18964818662677246, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009189193892197158, "epoch": 589, "n_parameters": 86059856} {"train_lr": 0.002119814326659565, "train_min_lr": 0.002119814326659565, "train_loss": 0.18964743045851207, "train_loss_scale": 801555.6923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 590, "n_parameters": 86059856} {"train_lr": 0.002117120589758743, "train_min_lr": 0.002117120589758743, "train_loss": 0.18960750341797486, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00913726940500335, "epoch": 591, "n_parameters": 86059856} {"train_lr": 0.002114424463163643, "train_min_lr": 0.002114424463163643, "train_loss": 0.1895706464942449, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009349824453238398, "epoch": 592, "n_parameters": 86059856} {"train_lr": 0.0021117259573998353, "train_min_lr": 0.0021117259573998353, "train_loss": 0.1895885085567641, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009287070833707754, "epoch": 593, "n_parameters": 86059856} {"train_lr": 0.002109025083002179, "train_min_lr": 0.002109025083002179, "train_loss": 0.18956353806126386, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009285658054674665, "epoch": 594, "n_parameters": 86059856} {"train_lr": 0.0021063218505147814, "train_min_lr": 0.0021063218505147814, "train_loss": 0.1896064633037895, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00920339262423416, "epoch": 595, "n_parameters": 86059856} {"train_lr": 0.002103616270490951, "train_min_lr": 0.002103616270490951, "train_loss": 0.18955095303961292, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009161673441457633, "epoch": 596, "n_parameters": 86059856} {"train_lr": 0.0021009083534931686, "train_min_lr": 0.0021009083534931686, "train_loss": 0.18953619755279177, "train_loss_scale": 557896.2051282051, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 597, "n_parameters": 86059856} {"train_lr": 0.0020981981100930327, "train_min_lr": 0.0020981981100930327, "train_loss": 0.18950789343589583, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009329352059210531, "epoch": 598, "n_parameters": 86059856} {"train_lr": 0.0020954855508712156, "train_min_lr": 0.0020954855508712156, "train_loss": 0.1895227586098302, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009225811413787782, "epoch": 599, "n_parameters": 86059856} {"train_lr": 0.002092770686417453, "train_min_lr": 0.002092770686417453, "train_loss": 0.18951997870746523, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009250027991723843, "epoch": 600, "n_parameters": 86059856} {"train_lr": 0.0020900535273304594, "train_min_lr": 0.0020900535273304594, "train_loss": 0.18948278019133097, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009315953506395603, "epoch": 601, "n_parameters": 86059856} {"train_lr": 0.0020873340842179158, "train_min_lr": 0.0020873340842179158, "train_loss": 0.18946049209588614, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009449730260082735, "epoch": 602, "n_parameters": 86059856} {"train_lr": 0.0020846123676964215, "train_min_lr": 0.0020846123676964215, "train_loss": 0.18947025457134423, "train_loss_scale": 682246.5641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009382462672268351, "epoch": 603, "n_parameters": 86059856} {"train_lr": 0.002081888388391451, "train_min_lr": 0.002081888388391451, "train_loss": 0.18951579774968708, "train_loss_scale": 536050.8717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 604, "n_parameters": 86059856} {"train_lr": 0.0020791621569373037, "train_min_lr": 0.0020791621569373037, "train_loss": 0.18946474254465637, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00923239896282888, "epoch": 605, "n_parameters": 86059856} {"train_lr": 0.002076433683977084, "train_min_lr": 0.002076433683977084, "train_loss": 0.1894615726569333, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009268119814805686, "epoch": 606, "n_parameters": 86059856} {"train_lr": 0.0020737029801626366, "train_min_lr": 0.0020737029801626366, "train_loss": 0.18946445064069942, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009328731313311996, "epoch": 607, "n_parameters": 86059856} {"train_lr": 0.0020709700561545212, "train_min_lr": 0.0020709700561545212, "train_loss": 0.18943241507244798, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009309531989483496, "epoch": 608, "n_parameters": 86059856} {"train_lr": 0.0020682349226219697, "train_min_lr": 0.0020682349226219697, "train_loss": 0.1894167254272944, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009386941451782312, "epoch": 609, "n_parameters": 86059856} {"train_lr": 0.0020654975902428246, "train_min_lr": 0.0020654975902428246, "train_loss": 0.18944291233753738, "train_loss_scale": 650318.7692307692, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 610, "n_parameters": 86059856} {"train_lr": 0.0020627580697035255, "train_min_lr": 0.0020627580697035255, "train_loss": 0.18942565815403867, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0093760493865165, "epoch": 611, "n_parameters": 86059856} {"train_lr": 0.002060016371699054, "train_min_lr": 0.002060016371699054, "train_loss": 0.18941056631051767, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009542936865144815, "epoch": 612, "n_parameters": 86059856} {"train_lr": 0.0020572725069328873, "train_min_lr": 0.0020572725069328873, "train_loss": 0.1893950287754146, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009419833251740783, "epoch": 613, "n_parameters": 86059856} {"train_lr": 0.0020545264861169606, "train_min_lr": 0.0020545264861169606, "train_loss": 0.1893948579374223, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009399707824624598, "epoch": 614, "n_parameters": 86059856} {"train_lr": 0.002051778319971633, "train_min_lr": 0.002051778319971633, "train_loss": 0.18936039149188078, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00952971534570679, "epoch": 615, "n_parameters": 86059856} {"train_lr": 0.0020490280192256333, "train_min_lr": 0.0020490280192256333, "train_loss": 0.1893687955617236, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00944754512508949, "epoch": 616, "n_parameters": 86059856} {"train_lr": 0.0020462755946160267, "train_min_lr": 0.0020462755946160267, "train_loss": 0.18935929845946914, "train_loss_scale": 1004885.3333333334, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009364667269460952, "epoch": 617, "n_parameters": 86059856} {"train_lr": 0.002043521056888168, "train_min_lr": 0.002043521056888168, "train_loss": 0.18933955140602896, "train_loss_scale": 742741.3333333334, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 618, "n_parameters": 86059856} {"train_lr": 0.002040764416795663, "train_min_lr": 0.002040764416795663, "train_loss": 0.1893314950597974, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009384307555125978, "epoch": 619, "n_parameters": 86059856} {"train_lr": 0.002038005685100323, "train_min_lr": 0.002038005685100323, "train_loss": 0.189355900296225, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009636698795172075, "epoch": 620, "n_parameters": 86059856} {"train_lr": 0.0020352448725721264, "train_min_lr": 0.0020352448725721264, "train_loss": 0.18931108097641322, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009607555582008969, "epoch": 621, "n_parameters": 86059856} {"train_lr": 0.0020324819899891758, "train_min_lr": 0.0020324819899891758, "train_loss": 0.18928717931087774, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00952048594943988, "epoch": 622, "n_parameters": 86059856} {"train_lr": 0.002029717048137654, "train_min_lr": 0.002029717048137654, "train_loss": 0.18931418660884866, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009701840603389801, "epoch": 623, "n_parameters": 86059856} {"train_lr": 0.0020269500578117827, "train_min_lr": 0.0020269500578117827, "train_loss": 0.18931653074776897, "train_loss_scale": 559576.6153846154, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 624, "n_parameters": 86059856} {"train_lr": 0.0020241810298137794, "train_min_lr": 0.0020241810298137794, "train_loss": 0.18927193151261562, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00951637208270721, "epoch": 625, "n_parameters": 86059856} {"train_lr": 0.002021409974953821, "train_min_lr": 0.002021409974953821, "train_loss": 0.18928897667986652, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009465744032357365, "epoch": 626, "n_parameters": 86059856} {"train_lr": 0.0020186369040499945, "train_min_lr": 0.0020186369040499945, "train_loss": 0.18924658993282953, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009584043040656699, "epoch": 627, "n_parameters": 86059856} {"train_lr": 0.0020158618279282566, "train_min_lr": 0.0020158618279282566, "train_loss": 0.1892173236104636, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00954321040533101, "epoch": 628, "n_parameters": 86059856} {"train_lr": 0.002013084757422398, "train_min_lr": 0.002013084757422398, "train_loss": 0.189259333225588, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009618061020050962, "epoch": 629, "n_parameters": 86059856} {"train_lr": 0.0020103057033739845, "train_min_lr": 0.0020103057033739845, "train_loss": 0.1892119612222394, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00955308902149017, "epoch": 630, "n_parameters": 86059856} {"train_lr": 0.002007524676632334, "train_min_lr": 0.002007524676632334, "train_loss": 0.18924387287682828, "train_loss_scale": 672164.1025641026, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 631, "n_parameters": 86059856} {"train_lr": 0.002004741688054465, "train_min_lr": 0.002004741688054465, "train_loss": 0.18919870937362504, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009657479903230874, "epoch": 632, "n_parameters": 86059856} {"train_lr": 0.002001956748505053, "train_min_lr": 0.002001956748505053, "train_loss": 0.18939855937750485, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010308774304576218, "epoch": 633, "n_parameters": 86059856} {"train_lr": 0.0019991698688563877, "train_min_lr": 0.0019991698688563877, "train_loss": 0.1891971368354578, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009510529317105046, "epoch": 634, "n_parameters": 86059856} {"train_lr": 0.0019963810599883383, "train_min_lr": 0.0019963810599883383, "train_loss": 0.18921313304692888, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00970675773285807, "epoch": 635, "n_parameters": 86059856} {"train_lr": 0.001993590332788305, "train_min_lr": 0.001993590332788305, "train_loss": 0.18919417150438023, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009589412204849605, "epoch": 636, "n_parameters": 86059856} {"train_lr": 0.001990797698151171, "train_min_lr": 0.001990797698151171, "train_loss": 0.18918764025856477, "train_loss_scale": 525968.4102564103, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009635722398972856, "epoch": 637, "n_parameters": 86059856} {"train_lr": 0.0019880031669792728, "train_min_lr": 0.0019880031669792728, "train_loss": 0.1891209586427953, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009576160783091417, "epoch": 638, "n_parameters": 86059856} {"train_lr": 0.0019852067501823466, "train_min_lr": 0.0019852067501823466, "train_loss": 0.18916590261058167, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009707144683955286, "epoch": 639, "n_parameters": 86059856} {"train_lr": 0.001982408458677493, "train_min_lr": 0.001982408458677493, "train_loss": 0.1891338239925412, "train_loss_scale": 927586.4615384615, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 640, "n_parameters": 86059856} {"train_lr": 0.001979608303389129, "train_min_lr": 0.001979608303389129, "train_loss": 0.1891566612280141, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009769998023656603, "epoch": 641, "n_parameters": 86059856} {"train_lr": 0.001976806295248947, "train_min_lr": 0.001976806295248947, "train_loss": 0.18910009060532618, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009532011061823234, "epoch": 642, "n_parameters": 86059856} {"train_lr": 0.001974002445195878, "train_min_lr": 0.001974002445195878, "train_loss": 0.1890884791679967, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009717536748398859, "epoch": 643, "n_parameters": 86059856} {"train_lr": 0.0019711967641760394, "train_min_lr": 0.0019711967641760394, "train_loss": 0.18908888736787516, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009707152136028387, "epoch": 644, "n_parameters": 86059856} {"train_lr": 0.001968389263142698, "train_min_lr": 0.001968389263142698, "train_loss": 0.1890420651075263, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009671128426606838, "epoch": 645, "n_parameters": 86059856} {"train_lr": 0.0019655799530562206, "train_min_lr": 0.0019655799530562206, "train_loss": 0.1892253125850589, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010300341253288282, "epoch": 646, "n_parameters": 86059856} {"train_lr": 0.0019627688448840505, "train_min_lr": 0.0019627688448840505, "train_loss": 0.1890552299730002, "train_loss_scale": 611669.3333333334, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 647, "n_parameters": 86059856} {"train_lr": 0.0019599559496006323, "train_min_lr": 0.0019599559496006323, "train_loss": 0.18905648961066243, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00982825354171487, "epoch": 648, "n_parameters": 86059856} {"train_lr": 0.0019571412781874023, "train_min_lr": 0.0019571412781874023, "train_loss": 0.18903830970446459, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009578519106770936, "epoch": 649, "n_parameters": 86059856} {"train_lr": 0.0019543248416327226, "train_min_lr": 0.0019543248416327226, "train_loss": 0.18902172901643774, "train_loss_scale": 509164.3076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 650, "n_parameters": 86059856} {"train_lr": 0.0019515066509318512, "train_min_lr": 0.0019515066509318512, "train_loss": 0.18903337718130878, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009850891459231766, "epoch": 651, "n_parameters": 86059856} {"train_lr": 0.0019486867170868882, "train_min_lr": 0.0019486867170868882, "train_loss": 0.18903580164680114, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009749785182066262, "epoch": 652, "n_parameters": 86059856} {"train_lr": 0.0019458650511067438, "train_min_lr": 0.0019458650511067438, "train_loss": 0.18901152157176954, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009851138286579113, "epoch": 653, "n_parameters": 86059856} {"train_lr": 0.0019430416640070901, "train_min_lr": 0.0019430416640070901, "train_loss": 0.18899511431272215, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009948178716839697, "epoch": 654, "n_parameters": 86059856} {"train_lr": 0.001940216566810318, "train_min_lr": 0.001940216566810318, "train_loss": 0.18901363785903996, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009906518389470875, "epoch": 655, "n_parameters": 86059856} {"train_lr": 0.0019373897705454927, "train_min_lr": 0.0019373897705454927, "train_loss": 0.1889929639068074, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00982251494543818, "epoch": 656, "n_parameters": 86059856} {"train_lr": 0.0019345612862483098, "train_min_lr": 0.0019345612862483098, "train_loss": 0.18893610942177474, "train_loss_scale": 431865.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009751118774800442, "epoch": 657, "n_parameters": 86059856} {"train_lr": 0.0019317311249610646, "train_min_lr": 0.0019317311249610646, "train_loss": 0.1889283944410869, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009840337535700737, "epoch": 658, "n_parameters": 86059856} {"train_lr": 0.0019288992977325935, "train_min_lr": 0.0019288992977325935, "train_loss": 0.18898147899800768, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010042304758173533, "epoch": 659, "n_parameters": 86059856} {"train_lr": 0.0019260658156182303, "train_min_lr": 0.0019260658156182303, "train_loss": 0.18891451024235442, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010009694116142316, "epoch": 660, "n_parameters": 86059856} {"train_lr": 0.0019232306896797824, "train_min_lr": 0.0019232306896797824, "train_loss": 0.18893076924201196, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009788541851613002, "epoch": 661, "n_parameters": 86059856} {"train_lr": 0.001920393930985464, "train_min_lr": 0.001920393930985464, "train_loss": 0.18893670855472103, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010031418215172986, "epoch": 662, "n_parameters": 86059856} {"train_lr": 0.0019175555506098707, "train_min_lr": 0.0019175555506098707, "train_loss": 0.18890000983642843, "train_loss_scale": 648638.358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009886830289943669, "epoch": 663, "n_parameters": 86059856} {"train_lr": 0.0019147155596339227, "train_min_lr": 0.0019147155596339227, "train_loss": 0.18891075022446996, "train_loss_scale": 1036813.1282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 664, "n_parameters": 86059856} {"train_lr": 0.0019118739691448329, "train_min_lr": 0.0019118739691448329, "train_loss": 0.1888890163316272, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009756531776824536, "epoch": 665, "n_parameters": 86059856} {"train_lr": 0.0019090307902360558, "train_min_lr": 0.0019090307902360558, "train_loss": 0.18883902387908444, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010032398878441503, "epoch": 666, "n_parameters": 86059856} {"train_lr": 0.0019061860340072474, "train_min_lr": 0.0019061860340072474, "train_loss": 0.18887190056893116, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00993631222548011, "epoch": 667, "n_parameters": 86059856} {"train_lr": 0.0019033397115642285, "train_min_lr": 0.0019033397115642285, "train_loss": 0.18886706361976954, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010048504712848136, "epoch": 668, "n_parameters": 86059856} {"train_lr": 0.0019004918340189175, "train_min_lr": 0.0019004918340189175, "train_loss": 0.1888270810341988, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009989108541049063, "epoch": 669, "n_parameters": 86059856} {"train_lr": 0.0018976424124893222, "train_min_lr": 0.0018976424124893222, "train_loss": 0.18879100975270072, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009888533052080909, "epoch": 670, "n_parameters": 86059856} {"train_lr": 0.0018947914580994664, "train_min_lr": 0.0018947914580994664, "train_loss": 0.18884443755572042, "train_loss_scale": 638555.8974358974, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 671, "n_parameters": 86059856} {"train_lr": 0.0018919389819793612, "train_min_lr": 0.0018919389819793612, "train_loss": 0.18880819668993354, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009976644603272852, "epoch": 672, "n_parameters": 86059856} {"train_lr": 0.0018890849952649664, "train_min_lr": 0.0018890849952649664, "train_loss": 0.18877797836485583, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010156624311676774, "epoch": 673, "n_parameters": 86059856} {"train_lr": 0.0018862295090981214, "train_min_lr": 0.0018862295090981214, "train_loss": 0.1887776748253367, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01007001878371319, "epoch": 674, "n_parameters": 86059856} {"train_lr": 0.0018833725346265372, "train_min_lr": 0.0018833725346265372, "train_loss": 0.18877626333433467, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009927956667991403, "epoch": 675, "n_parameters": 86059856} {"train_lr": 0.0018805140830037252, "train_min_lr": 0.0018805140830037252, "train_loss": 0.1887364306140882, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010007302014109416, "epoch": 676, "n_parameters": 86059856} {"train_lr": 0.0018776541653889654, "train_min_lr": 0.0018776541653889654, "train_loss": 0.18874880784931472, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010098210082222253, "epoch": 677, "n_parameters": 86059856} {"train_lr": 0.001874792792947265, "train_min_lr": 0.001874792792947265, "train_loss": 0.1890830516946526, "train_loss_scale": 136953.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 678, "n_parameters": 86059856} {"train_lr": 0.0018719299768493057, "train_min_lr": 0.0018719299768493057, "train_loss": 0.18875879120344344, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010009243657502035, "epoch": 679, "n_parameters": 86059856} {"train_lr": 0.0018690657282714011, "train_min_lr": 0.0018690657282714011, "train_loss": 0.1887362609939793, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009979080178965934, "epoch": 680, "n_parameters": 86059856} {"train_lr": 0.0018662000583954724, "train_min_lr": 0.0018662000583954724, "train_loss": 0.18870303478354636, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009906691236373706, "epoch": 681, "n_parameters": 86059856} {"train_lr": 0.0018633329784089757, "train_min_lr": 0.0018633329784089757, "train_loss": 0.1886843617181652, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009910171096308682, "epoch": 682, "n_parameters": 86059856} {"train_lr": 0.0018604644995048785, "train_min_lr": 0.0018604644995048785, "train_loss": 0.1889991917862342, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011143693500437224, "epoch": 683, "n_parameters": 86059856} {"train_lr": 0.0018575946328816017, "train_min_lr": 0.0018575946328816017, "train_loss": 0.18870716477529362, "train_loss_scale": 205850.2564102564, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010172005175636748, "epoch": 684, "n_parameters": 86059856} {"train_lr": 0.0018547233897429978, "train_min_lr": 0.0018547233897429978, "train_loss": 0.1886789873122978, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01004031686207805, "epoch": 685, "n_parameters": 86059856} {"train_lr": 0.0018518507812982775, "train_min_lr": 0.0018518507812982775, "train_loss": 0.18868201114953712, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010273207328282297, "epoch": 686, "n_parameters": 86059856} {"train_lr": 0.0018489768187619953, "train_min_lr": 0.0018489768187619953, "train_loss": 0.1886414206909159, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010027886204158839, "epoch": 687, "n_parameters": 86059856} {"train_lr": 0.0018461015133539854, "train_min_lr": 0.0018461015133539854, "train_loss": 0.18862716319899148, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010000529233366251, "epoch": 688, "n_parameters": 86059856} {"train_lr": 0.0018432248762993175, "train_min_lr": 0.0018432248762993175, "train_loss": 0.18864035717426583, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010205090917551365, "epoch": 689, "n_parameters": 86059856} {"train_lr": 0.0018403469188282779, "train_min_lr": 0.0018403469188282779, "train_loss": 0.18861002123986298, "train_loss_scale": 304154.25641025644, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009974252887798522, "epoch": 690, "n_parameters": 86059856} {"train_lr": 0.00183746765217629, "train_min_lr": 0.00183746765217629, "train_loss": 0.1886271320653554, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010131339360482227, "epoch": 691, "n_parameters": 86059856} {"train_lr": 0.0018345870875838986, "train_min_lr": 0.0018345870875838986, "train_loss": 0.18862976072929227, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010181642510593893, "epoch": 692, "n_parameters": 86059856} {"train_lr": 0.0018317052362967102, "train_min_lr": 0.0018317052362967102, "train_loss": 0.18859469662539852, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010214733553883165, "epoch": 693, "n_parameters": 86059856} {"train_lr": 0.0018288221095653606, "train_min_lr": 0.0018288221095653606, "train_loss": 0.1886046391684944, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010297825467438461, "epoch": 694, "n_parameters": 86059856} {"train_lr": 0.0018259377186454588, "train_min_lr": 0.0018259377186454588, "train_loss": 0.18854990063641125, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010247109863811579, "epoch": 695, "n_parameters": 86059856} {"train_lr": 0.0018230520747975505, "train_min_lr": 0.0018230520747975505, "train_loss": 0.18854047595642698, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010266583538256012, "epoch": 696, "n_parameters": 86059856} {"train_lr": 0.0018201651892870796, "train_min_lr": 0.0018201651892870796, "train_loss": 0.18855608529888856, "train_loss_scale": 917504.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01020666795412604, "epoch": 697, "n_parameters": 86059856} {"train_lr": 0.0018172770733843229, "train_min_lr": 0.0018172770733843229, "train_loss": 0.18853198597804668, "train_loss_scale": 665442.4615384615, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 698, "n_parameters": 86059856} {"train_lr": 0.0018143877383643727, "train_min_lr": 0.0018143877383643727, "train_loss": 0.18853105881060353, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010221905758580526, "epoch": 699, "n_parameters": 86059856} {"train_lr": 0.0018114971955070804, "train_min_lr": 0.0018114971955070804, "train_loss": 0.18851594956448445, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010323825859440824, "epoch": 700, "n_parameters": 86059856} {"train_lr": 0.0018086054560970055, "train_min_lr": 0.0018086054560970055, "train_loss": 0.18850617845638248, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010163807904180616, "epoch": 701, "n_parameters": 86059856} {"train_lr": 0.001805712531423385, "train_min_lr": 0.001805712531423385, "train_loss": 0.18851814427389166, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010260565355658913, "epoch": 702, "n_parameters": 86059856} {"train_lr": 0.00180281843278008, "train_min_lr": 0.00180281843278008, "train_loss": 0.18848393760764828, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01020147280098918, "epoch": 703, "n_parameters": 86059856} {"train_lr": 0.0017999231714655396, "train_min_lr": 0.0017999231714655396, "train_loss": 0.18910712204897442, "train_loss_scale": 86961.23076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 704, "n_parameters": 86059856} {"train_lr": 0.0017970267587827415, "train_min_lr": 0.0017970267587827415, "train_loss": 0.18851594604217473, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010118045115795655, "epoch": 705, "n_parameters": 86059856} {"train_lr": 0.0017941292060391677, "train_min_lr": 0.0017941292060391677, "train_loss": 0.1884989701390553, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01012179960245983, "epoch": 706, "n_parameters": 86059856} {"train_lr": 0.001791230524546753, "train_min_lr": 0.001791230524546753, "train_loss": 0.18848020506377977, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010267160516471053, "epoch": 707, "n_parameters": 86059856} {"train_lr": 0.0017883307256218246, "train_min_lr": 0.0017883307256218246, "train_loss": 0.18843215920675832, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01014459084194058, "epoch": 708, "n_parameters": 86059856} {"train_lr": 0.001785429820585086, "train_min_lr": 0.001785429820585086, "train_loss": 0.18843788536599812, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010278519075841475, "epoch": 709, "n_parameters": 86059856} {"train_lr": 0.0017825278207615554, "train_min_lr": 0.0017825278207615554, "train_loss": 0.18837645299470004, "train_loss_scale": 100824.61538461539, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010298514010336919, "epoch": 710, "n_parameters": 86059856} {"train_lr": 0.0017796247374805261, "train_min_lr": 0.0017796247374805261, "train_loss": 0.18838678302171713, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010260095485509971, "epoch": 711, "n_parameters": 86059856} {"train_lr": 0.0017767205820755137, "train_min_lr": 0.0017767205820755137, "train_loss": 0.18839408260865662, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01061160864833838, "epoch": 712, "n_parameters": 86059856} {"train_lr": 0.0017738153658842265, "train_min_lr": 0.0017738153658842265, "train_loss": 0.18837046611289948, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010371844496172017, "epoch": 713, "n_parameters": 86059856} {"train_lr": 0.0017709091002485128, "train_min_lr": 0.0017709091002485128, "train_loss": 0.18836103229281995, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010275336937644543, "epoch": 714, "n_parameters": 86059856} {"train_lr": 0.0017680017965143165, "train_min_lr": 0.0017680017965143165, "train_loss": 0.18839157877179483, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010429765184362156, "epoch": 715, "n_parameters": 86059856} {"train_lr": 0.0017650934660316377, "train_min_lr": 0.0017650934660316377, "train_loss": 0.18835345985821617, "train_loss_scale": 147876.10256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010442861421511341, "epoch": 716, "n_parameters": 86059856} {"train_lr": 0.0017621841201544786, "train_min_lr": 0.0017621841201544786, "train_loss": 0.18832114641554654, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010342835305401912, "epoch": 717, "n_parameters": 86059856} {"train_lr": 0.0017592737702408108, "train_min_lr": 0.0017592737702408108, "train_loss": 0.18833718182614598, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010458042116787953, "epoch": 718, "n_parameters": 86059856} {"train_lr": 0.001756362427652523, "train_min_lr": 0.001756362427652523, "train_loss": 0.1883370563901292, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01053478203725834, "epoch": 719, "n_parameters": 86059856} {"train_lr": 0.0017534501037553814, "train_min_lr": 0.0017534501037553814, "train_loss": 0.18834969817469707, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010344495298340917, "epoch": 720, "n_parameters": 86059856} {"train_lr": 0.001750536809918981, "train_min_lr": 0.001750536809918981, "train_loss": 0.18833916053438607, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01043840549671306, "epoch": 721, "n_parameters": 86059856} {"train_lr": 0.0017476225575167052, "train_min_lr": 0.0017476225575167052, "train_loss": 0.18830692248108485, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010658470406316413, "epoch": 722, "n_parameters": 86059856} {"train_lr": 0.0017447073579256772, "train_min_lr": 0.0017447073579256772, "train_loss": 0.1883058184160827, "train_loss_scale": 450349.9487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010420782246794075, "epoch": 723, "n_parameters": 86059856} {"train_lr": 0.0017417912225267207, "train_min_lr": 0.0017417912225267207, "train_loss": 0.1882891957886899, "train_loss_scale": 447829.3333333333, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 724, "n_parameters": 86059856} {"train_lr": 0.0017388741627043104, "train_min_lr": 0.0017388741627043104, "train_loss": 0.1882479960600344, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010754495868101142, "epoch": 725, "n_parameters": 86059856} {"train_lr": 0.0017359561898465316, "train_min_lr": 0.0017359561898465316, "train_loss": 0.18827811998721117, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01053343658359387, "epoch": 726, "n_parameters": 86059856} {"train_lr": 0.0017330373153450322, "train_min_lr": 0.0017330373153450322, "train_loss": 0.18824971246962938, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010708727318650255, "epoch": 727, "n_parameters": 86059856} {"train_lr": 0.001730117550594988, "train_min_lr": 0.001730117550594988, "train_loss": 0.1881884252294325, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010409528592553658, "epoch": 728, "n_parameters": 86059856} {"train_lr": 0.0017271969069950334, "train_min_lr": 0.0017271969069950334, "train_loss": 0.18822738506162587, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010594656231263891, "epoch": 729, "n_parameters": 86059856} {"train_lr": 0.001724275395947252, "train_min_lr": 0.001724275395947252, "train_loss": 0.18820482424985713, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010646567796356976, "epoch": 730, "n_parameters": 86059856} {"train_lr": 0.0017213530288571032, "train_min_lr": 0.0017213530288571032, "train_loss": 0.1882037244593868, "train_loss_scale": 493200.41025641025, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010463387436734943, "epoch": 731, "n_parameters": 86059856} {"train_lr": 0.0017184298171333912, "train_min_lr": 0.0017184298171333912, "train_loss": 0.18817407741115835, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010473836783486873, "epoch": 732, "n_parameters": 86059856} {"train_lr": 0.0017155057721882157, "train_min_lr": 0.0017155057721882157, "train_loss": 0.1881559678198149, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010461101344285103, "epoch": 733, "n_parameters": 86059856} {"train_lr": 0.0017125809054369357, "train_min_lr": 0.0017125809054369357, "train_loss": 0.18821365595795214, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010874369017517146, "epoch": 734, "n_parameters": 86059856} {"train_lr": 0.0017096552282981094, "train_min_lr": 0.0017096552282981094, "train_loss": 0.18814444749687725, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010514471578435639, "epoch": 735, "n_parameters": 86059856} {"train_lr": 0.0017067287521934666, "train_min_lr": 0.0017067287521934666, "train_loss": 0.1881324937268614, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010553205732065134, "epoch": 736, "n_parameters": 86059856} {"train_lr": 0.0017038014885478482, "train_min_lr": 0.0017038014885478482, "train_loss": 0.18810898607047513, "train_loss_scale": 724256.8205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 737, "n_parameters": 86059856} {"train_lr": 0.0017008734487891795, "train_min_lr": 0.0017008734487891795, "train_loss": 0.18811567288298064, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010709110799675377, "epoch": 738, "n_parameters": 86059856} {"train_lr": 0.0016979446443484076, "train_min_lr": 0.0016979446443484076, "train_loss": 0.1880994579062248, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010622150730341673, "epoch": 739, "n_parameters": 86059856} {"train_lr": 0.0016920847871592394, "train_min_lr": 0.0016920847871592394, "train_loss": 0.18809426067253718, "train_loss_scale": 408339.6923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 741, "n_parameters": 86059856} {"train_lr": 0.001689153757287491, "train_min_lr": 0.001689153757287491, "train_loss": 0.18807281590759373, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01062836630556446, "epoch": 742, "n_parameters": 86059856} {"train_lr": 0.0016862220084868456, "train_min_lr": 0.0016862220084868456, "train_loss": 0.18808629997194004, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010683952687451473, "epoch": 743, "n_parameters": 86059856} {"train_lr": 0.0016832895522027335, "train_min_lr": 0.0016832895522027335, "train_loss": 0.18807702660799408, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0109728337593902, "epoch": 744, "n_parameters": 86059856} {"train_lr": 0.001680356399883348, "train_min_lr": 0.001680356399883348, "train_loss": 0.1880308367873136, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010764658230189711, "epoch": 745, "n_parameters": 86059856} {"train_lr": 0.001677422562979598, "train_min_lr": 0.001677422562979598, "train_loss": 0.18803339542295688, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01083766704556556, "epoch": 746, "n_parameters": 86059856} {"train_lr": 0.0016744880529450644, "train_min_lr": 0.0016744880529450644, "train_loss": 0.18803314024654144, "train_loss_scale": 270546.0512820513, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010774987364689318, "epoch": 747, "n_parameters": 86059856} {"train_lr": 0.0016715528812359585, "train_min_lr": 0.0016715528812359585, "train_loss": 0.18800818169107422, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010847805241027322, "epoch": 748, "n_parameters": 86059856} {"train_lr": 0.0016686170593110696, "train_min_lr": 0.0016686170593110696, "train_loss": 0.1880097500203798, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010807206582588455, "epoch": 749, "n_parameters": 86059856} {"train_lr": 0.0016656805986317326, "train_min_lr": 0.0016656805986317326, "train_loss": 0.18798609082109463, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010530423787900079, "epoch": 750, "n_parameters": 86059856} {"train_lr": 0.001662743510661771, "train_min_lr": 0.001662743510661771, "train_loss": 0.18798715888689727, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010933585479879418, "epoch": 751, "n_parameters": 86059856} {"train_lr": 0.0016598058068674592, "train_min_lr": 0.0016598058068674592, "train_loss": 0.18801246073025352, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010844343413527194, "epoch": 752, "n_parameters": 86059856} {"train_lr": 0.0016568674987174749, "train_min_lr": 0.0016568674987174749, "train_loss": 0.18798172515100584, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010979768906672223, "epoch": 753, "n_parameters": 86059856} {"train_lr": 0.001653928597682856, "train_min_lr": 0.001653928597682856, "train_loss": 0.18793447404646146, "train_loss_scale": 525968.4102564103, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 754, "n_parameters": 86059856} {"train_lr": 0.0016509891152369522, "train_min_lr": 0.0016509891152369522, "train_loss": 0.18795619618434173, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010757484724028753, "epoch": 755, "n_parameters": 86059856} {"train_lr": 0.0016480490628553884, "train_min_lr": 0.0016480490628553884, "train_loss": 0.1878944867147276, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010861959711361963, "epoch": 756, "n_parameters": 86059856} {"train_lr": 0.0016451084520160096, "train_min_lr": 0.0016451084520160096, "train_loss": 0.18792516044460428, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010899618888894716, "epoch": 757, "n_parameters": 86059856} {"train_lr": 0.001642167294198843, "train_min_lr": 0.001642167294198843, "train_loss": 0.18792037475400436, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01106643662131272, "epoch": 758, "n_parameters": 86059856} {"train_lr": 0.0016392256008860519, "train_min_lr": 0.0016392256008860519, "train_loss": 0.18795407119875726, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011175946141473757, "epoch": 759, "n_parameters": 86059856} {"train_lr": 0.0016362833835618874, "train_min_lr": 0.0016362833835618874, "train_loss": 0.1879067168188974, "train_loss_scale": 567978.6666666666, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 760, "n_parameters": 86059856} {"train_lr": 0.00163334065371265, "train_min_lr": 0.00163334065371265, "train_loss": 0.1880082388360722, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011612877455683282, "epoch": 761, "n_parameters": 86059856} {"train_lr": 0.0016303974228266398, "train_min_lr": 0.0016303974228266398, "train_loss": 0.1879020683013667, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010960298109775743, "epoch": 762, "n_parameters": 86059856} {"train_lr": 0.0016274537023941095, "train_min_lr": 0.0016274537023941095, "train_loss": 0.1878415806374202, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011040974820319276, "epoch": 763, "n_parameters": 86059856} {"train_lr": 0.0016245095039072266, "train_min_lr": 0.0016245095039072266, "train_loss": 0.18783619161695242, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01089882243794795, "epoch": 764, "n_parameters": 86059856} {"train_lr": 0.0016215648388600266, "train_min_lr": 0.0016215648388600266, "train_loss": 0.1878773183740962, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011039032114263719, "epoch": 765, "n_parameters": 86059856} {"train_lr": 0.001618619718748361, "train_min_lr": 0.001618619718748361, "train_loss": 0.18785006653230923, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010913378009811426, "epoch": 766, "n_parameters": 86059856} {"train_lr": 0.0016156741550698618, "train_min_lr": 0.0016156741550698618, "train_loss": 0.18781552961072287, "train_loss_scale": 798194.8717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 767, "n_parameters": 86059856} {"train_lr": 0.0016127281593238927, "train_min_lr": 0.0016127281593238927, "train_loss": 0.18783137973589012, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011140054438072138, "epoch": 768, "n_parameters": 86059856} {"train_lr": 0.0016097817430115041, "train_min_lr": 0.0016097817430115041, "train_loss": 0.18777734044199, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011015370903082956, "epoch": 769, "n_parameters": 86059856} {"train_lr": 0.0016068349176353834, "train_min_lr": 0.0016068349176353834, "train_loss": 0.18779321192190623, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01120031819654963, "epoch": 770, "n_parameters": 86059856} {"train_lr": 0.001603887694699822, "train_min_lr": 0.001603887694699822, "train_loss": 0.1878038627656702, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0113160183133844, "epoch": 771, "n_parameters": 86059856} {"train_lr": 0.0016009400857106583, "train_min_lr": 0.0016009400857106583, "train_loss": 0.1877737111495569, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010870984552475886, "epoch": 772, "n_parameters": 86059856} {"train_lr": 0.0015979921021752421, "train_min_lr": 0.0015979921021752421, "train_loss": 0.18773808440742776, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011100912117399275, "epoch": 773, "n_parameters": 86059856} {"train_lr": 0.001595043755602381, "train_min_lr": 0.001595043755602381, "train_loss": 0.18771422126640877, "train_loss_scale": 448669.53846153844, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011040822686388707, "epoch": 774, "n_parameters": 86059856} {"train_lr": 0.001592095057502303, "train_min_lr": 0.001592095057502303, "train_loss": 0.18771456964672184, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011210155789740384, "epoch": 775, "n_parameters": 86059856} {"train_lr": 0.0015891460193866087, "train_min_lr": 0.0015891460193866087, "train_loss": 0.18770646890744758, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011250246438579872, "epoch": 776, "n_parameters": 86059856} {"train_lr": 0.0015861966527682212, "train_min_lr": 0.0015861966527682212, "train_loss": 0.18770468227254847, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011153344805232989, "epoch": 777, "n_parameters": 86059856} {"train_lr": 0.0015832469691613538, "train_min_lr": 0.0015832469691613538, "train_loss": 0.18768018200539816, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011098834548670894, "epoch": 778, "n_parameters": 86059856} {"train_lr": 0.00158029698008145, "train_min_lr": 0.00158029698008145, "train_loss": 0.1876829570839898, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011115152245530715, "epoch": 779, "n_parameters": 86059856} {"train_lr": 0.0015773466970451506, "train_min_lr": 0.0015773466970451506, "train_loss": 0.18773724410969478, "train_loss_scale": 620071.3846153846, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 780, "n_parameters": 86059856} {"train_lr": 0.0015743961315702415, "train_min_lr": 0.0015743961315702415, "train_loss": 0.1876845580394356, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011087512481026351, "epoch": 781, "n_parameters": 86059856} {"train_lr": 0.0015714452951756139, "train_min_lr": 0.0015714452951756139, "train_loss": 0.18763762393082756, "train_loss_scale": 378932.5128205128, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 782, "n_parameters": 86059856} {"train_lr": 0.0015684941993812142, "train_min_lr": 0.0015684941993812142, "train_loss": 0.18762710621843162, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011348904525407422, "epoch": 783, "n_parameters": 86059856} {"train_lr": 0.0015655428557080015, "train_min_lr": 0.0015655428557080015, "train_loss": 0.18764712711652884, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011223130468398524, "epoch": 784, "n_parameters": 86059856} {"train_lr": 0.001562591275677904, "train_min_lr": 0.001562591275677904, "train_loss": 0.18761833022253063, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01125025576673066, "epoch": 785, "n_parameters": 86059856} {"train_lr": 0.0015596394708137726, "train_min_lr": 0.0015596394708137726, "train_loss": 0.18762936337421146, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011294554939899499, "epoch": 786, "n_parameters": 86059856} {"train_lr": 0.0015566874526393342, "train_min_lr": 0.0015566874526393342, "train_loss": 0.18760116110579708, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011542310218852108, "epoch": 787, "n_parameters": 86059856} {"train_lr": 0.0015537352326791501, "train_min_lr": 0.0015537352326791501, "train_loss": 0.187603759048029, "train_loss_scale": 299953.23076923075, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011248845807037866, "epoch": 788, "n_parameters": 86059856} {"train_lr": 0.0015507828224585656, "train_min_lr": 0.0015507828224585656, "train_loss": 0.18757154670161888, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011300300683778448, "epoch": 789, "n_parameters": 86059856} {"train_lr": 0.0015478302335036798, "train_min_lr": 0.0015478302335036798, "train_loss": 0.18758477485523775, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011419179792611454, "epoch": 790, "n_parameters": 86059856} {"train_lr": 0.0015448774773412749, "train_min_lr": 0.0015448774773412749, "train_loss": 0.18757368716148612, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011410794241759831, "epoch": 791, "n_parameters": 86059856} {"train_lr": 0.001541924565498795, "train_min_lr": 0.001541924565498795, "train_loss": 0.18797077088115308, "train_loss_scale": 437746.8717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 792, "n_parameters": 86059856} {"train_lr": 0.0015389715095042895, "train_min_lr": 0.0015389715095042895, "train_loss": 0.18759383395529136, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011502158353463389, "epoch": 793, "n_parameters": 86059856} {"train_lr": 0.0015360183208863727, "train_min_lr": 0.0015360183208863727, "train_loss": 0.18760460804407605, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011321821405241886, "epoch": 794, "n_parameters": 86059856} {"train_lr": 0.0015330650111741698, "train_min_lr": 0.0015330650111741698, "train_loss": 0.18753919063900143, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011280111517781058, "epoch": 795, "n_parameters": 86059856} {"train_lr": 0.0015301115918972897, "train_min_lr": 0.0015301115918972897, "train_loss": 0.18751634533803624, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01150923429761464, "epoch": 796, "n_parameters": 86059856} {"train_lr": 0.001527158074585758, "train_min_lr": 0.001527158074585758, "train_loss": 0.18748926208354533, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01116304312689373, "epoch": 797, "n_parameters": 86059856} {"train_lr": 0.0015242044707699904, "train_min_lr": 0.0015242044707699904, "train_loss": 0.18748583810594985, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01120604867509638, "epoch": 798, "n_parameters": 86059856} {"train_lr": 0.0015212507919807395, "train_min_lr": 0.0015212507919807395, "train_loss": 0.18759413579932582, "train_loss_scale": 236097.64102564103, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011543382902462512, "epoch": 799, "n_parameters": 86059856} {"train_lr": 0.0015182970497490454, "train_min_lr": 0.0015182970497490454, "train_loss": 0.18749074929226667, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011416414128735853, "epoch": 800, "n_parameters": 86059856} {"train_lr": 0.0015153432556062026, "train_min_lr": 0.0015153432556062026, "train_loss": 0.18744478368749604, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011308111560841402, "epoch": 801, "n_parameters": 86059856} {"train_lr": 0.0015123894210837038, "train_min_lr": 0.0015123894210837038, "train_loss": 0.18745800498156592, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011378722563863564, "epoch": 802, "n_parameters": 86059856} {"train_lr": 0.0015094355577131994, "train_min_lr": 0.0015094355577131994, "train_loss": 0.18743028558599642, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011588734052239511, "epoch": 803, "n_parameters": 86059856} {"train_lr": 0.0015064816770264575, "train_min_lr": 0.0015064816770264575, "train_loss": 0.18743299560931823, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011239207131812015, "epoch": 804, "n_parameters": 86059856} {"train_lr": 0.0015035277905553067, "train_min_lr": 0.0015035277905553067, "train_loss": 0.18741384399935412, "train_loss_scale": 364649.0256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01115923651601546, "epoch": 805, "n_parameters": 86059856} {"train_lr": 0.0015005739098316025, "train_min_lr": 0.0015005739098316025, "train_loss": 0.18738221132960647, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011595940936762745, "epoch": 806, "n_parameters": 86059856} {"train_lr": 0.001497620046387179, "train_min_lr": 0.001497620046387179, "train_loss": 0.18735940243380192, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011683717295408057, "epoch": 807, "n_parameters": 86059856} {"train_lr": 0.001494666211753796, "train_min_lr": 0.001494666211753796, "train_loss": 0.1874175321382399, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011723666744402204, "epoch": 808, "n_parameters": 86059856} {"train_lr": 0.0014917124174631104, "train_min_lr": 0.0014917124174631104, "train_loss": 0.18734820248582998, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011605668377178984, "epoch": 809, "n_parameters": 86059856} {"train_lr": 0.001488758675046614, "train_min_lr": 0.001488758675046614, "train_loss": 0.1873496147708442, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011544728929248568, "epoch": 810, "n_parameters": 86059856} {"train_lr": 0.0014858049960356009, "train_min_lr": 0.0014858049960356009, "train_loss": 0.1873549041696466, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01182472650319911, "epoch": 811, "n_parameters": 86059856} {"train_lr": 0.0014828513919611134, "train_min_lr": 0.0014828513919611134, "train_loss": 0.18734853422770706, "train_loss_scale": 1013287.3846153846, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 812, "n_parameters": 86059856} {"train_lr": 0.0014798978743539074, "train_min_lr": 0.0014798978743539074, "train_loss": 0.1872680768645249, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011461918009445071, "epoch": 813, "n_parameters": 86059856} {"train_lr": 0.001476944454744393, "train_min_lr": 0.001476944454744393, "train_loss": 0.18729543540841684, "train_loss_scale": 317597.53846153844, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 814, "n_parameters": 86059856} {"train_lr": 0.0014739911446626046, "train_min_lr": 0.0014739911446626046, "train_loss": 0.18733287218193978, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011625308077782393, "epoch": 815, "n_parameters": 86059856} {"train_lr": 0.0014710379556381466, "train_min_lr": 0.0014710379556381466, "train_loss": 0.1872787486272267, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011503726285762893, "epoch": 816, "n_parameters": 86059856} {"train_lr": 0.001468084899200151, "train_min_lr": 0.001468084899200151, "train_loss": 0.18725135251999092, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011545104508956846, "epoch": 817, "n_parameters": 86059856} {"train_lr": 0.0014651319868772296, "train_min_lr": 0.0014651319868772296, "train_loss": 0.18727952620993632, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011813954700930761, "epoch": 818, "n_parameters": 86059856} {"train_lr": 0.001462179230197436, "train_min_lr": 0.001462179230197436, "train_loss": 0.18726677730536231, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011722388901174642, "epoch": 819, "n_parameters": 86059856} {"train_lr": 0.0014592266406882122, "train_min_lr": 0.0014592266406882122, "train_loss": 0.18727068145138523, "train_loss_scale": 361288.2051282051, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011746501994844621, "epoch": 820, "n_parameters": 86059856} {"train_lr": 0.0014562742298763524, "train_min_lr": 0.0014562742298763524, "train_loss": 0.1872216646786397, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011866780186000351, "epoch": 821, "n_parameters": 86059856} {"train_lr": 0.0014533220092879473, "train_min_lr": 0.0014533220092879473, "train_loss": 0.18729499714353529, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012066029218168786, "epoch": 822, "n_parameters": 86059856} {"train_lr": 0.0014503699904483498, "train_min_lr": 0.0014503699904483498, "train_loss": 0.1872096554053804, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011756204158210984, "epoch": 823, "n_parameters": 86059856} {"train_lr": 0.0014474181848821218, "train_min_lr": 0.0014474181848821218, "train_loss": 0.1872240035639455, "train_loss_scale": 369690.25641025644, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 824, "n_parameters": 86059856} {"train_lr": 0.0014444666041129952, "train_min_lr": 0.0014444666041129952, "train_loss": 0.1871674407321291, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011720298750636479, "epoch": 825, "n_parameters": 86059856} {"train_lr": 0.0014415152596638215, "train_min_lr": 0.0014415152596638215, "train_loss": 0.18717787625530782, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011762083388673954, "epoch": 826, "n_parameters": 86059856} {"train_lr": 0.0014385641630565349, "train_min_lr": 0.0014385641630565349, "train_loss": 0.18719149489576617, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01174239878459141, "epoch": 827, "n_parameters": 86059856} {"train_lr": 0.001435613325812093, "train_min_lr": 0.001435613325812093, "train_loss": 0.18716628795179227, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012003904417491494, "epoch": 828, "n_parameters": 86059856} {"train_lr": 0.001432662759450452, "train_min_lr": 0.001432662759450452, "train_loss": 0.18713861657306552, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01213434851095558, "epoch": 829, "n_parameters": 86059856} {"train_lr": 0.0014297124754905023, "train_min_lr": 0.0014297124754905023, "train_loss": 0.18712044627859425, "train_loss_scale": 309195.4871794872, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011586047729883248, "epoch": 830, "n_parameters": 86059856} {"train_lr": 0.0014267624854500333, "train_min_lr": 0.0014267624854500333, "train_loss": 0.18709576583634585, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011770033062650608, "epoch": 831, "n_parameters": 86059856} {"train_lr": 0.0014238128008456877, "train_min_lr": 0.0014238128008456877, "train_loss": 0.18711407830676016, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012085533930729024, "epoch": 832, "n_parameters": 86059856} {"train_lr": 0.00142086343319292, "train_min_lr": 0.00142086343319292, "train_loss": 0.18707120049601564, "train_loss_scale": 316757.3333333333, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 833, "n_parameters": 86059856} {"train_lr": 0.0014179143940059404, "train_min_lr": 0.0014179143940059404, "train_loss": 0.18707379315478298, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011604701622556418, "epoch": 834, "n_parameters": 86059856} {"train_lr": 0.001414965694797677, "train_min_lr": 0.001414965694797677, "train_loss": 0.18705485811313757, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011982961157814432, "epoch": 835, "n_parameters": 86059856} {"train_lr": 0.00141201734707974, "train_min_lr": 0.00141201734707974, "train_loss": 0.18710456530038172, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011887931957458839, "epoch": 836, "n_parameters": 86059856} {"train_lr": 0.0014090693623623558, "train_min_lr": 0.0014090693623623558, "train_loss": 0.1870628618515837, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012008561272150239, "epoch": 837, "n_parameters": 86059856} {"train_lr": 0.001406121752154341, "train_min_lr": 0.001406121752154341, "train_loss": 0.1870391421700613, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012053862052897994, "epoch": 838, "n_parameters": 86059856} {"train_lr": 0.0014031745279630477, "train_min_lr": 0.0014031745279630477, "train_loss": 0.18705488960497463, "train_loss_scale": 362128.41025641025, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01179242057678027, "epoch": 839, "n_parameters": 86059856} {"train_lr": 0.0014002277012943208, "train_min_lr": 0.0014002277012943208, "train_loss": 0.18701150739947572, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011747960078840455, "epoch": 840, "n_parameters": 86059856} {"train_lr": 0.0013972812836524518, "train_min_lr": 0.0013972812836524518, "train_loss": 0.18699519280105448, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012206785252484947, "epoch": 841, "n_parameters": 86059856} {"train_lr": 0.001394335286540141, "train_min_lr": 0.001394335286540141, "train_loss": 0.18697786949670467, "train_loss_scale": 362968.6153846154, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 842, "n_parameters": 86059856} {"train_lr": 0.0013913897214584406, "train_min_lr": 0.0013913897214584406, "train_loss": 0.1870178428108398, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01222188143322292, "epoch": 843, "n_parameters": 86059856} {"train_lr": 0.0013884445999067208, "train_min_lr": 0.0013884445999067208, "train_loss": 0.18694983394697118, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011932523085329777, "epoch": 844, "n_parameters": 86059856} {"train_lr": 0.0013854999333826144, "train_min_lr": 0.0013854999333826144, "train_loss": 0.18695404301755703, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011965065707977956, "epoch": 845, "n_parameters": 86059856} {"train_lr": 0.0013825557333819853, "train_min_lr": 0.0013825557333819853, "train_loss": 0.1869628490605511, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01186356998872585, "epoch": 846, "n_parameters": 86059856} {"train_lr": 0.001379612011398871, "train_min_lr": 0.001379612011398871, "train_loss": 0.18694925501059073, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012015068476709226, "epoch": 847, "n_parameters": 86059856} {"train_lr": 0.001376668778925445, "train_min_lr": 0.001376668778925445, "train_loss": 0.18691897127801219, "train_loss_scale": 315917.1282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011870865536949191, "epoch": 848, "n_parameters": 86059856} {"train_lr": 0.0013737260474519669, "train_min_lr": 0.0013737260474519669, "train_loss": 0.18690589679858816, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011770537081293954, "epoch": 849, "n_parameters": 86059856} {"train_lr": 0.0013707838284667446, "train_min_lr": 0.0013707838284667446, "train_loss": 0.1868765808773251, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01217354293494748, "epoch": 850, "n_parameters": 86059856} {"train_lr": 0.0013678421334560834, "train_min_lr": 0.0013678421334560834, "train_loss": 0.18690394785685036, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012163937775394281, "epoch": 851, "n_parameters": 86059856} {"train_lr": 0.0013649009739042414, "train_min_lr": 0.0013649009739042414, "train_loss": 0.1868662423490045, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012197674637158902, "epoch": 852, "n_parameters": 86059856} {"train_lr": 0.0013619603612933897, "train_min_lr": 0.0013619603612933897, "train_loss": 0.1868487473589201, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011893475031217512, "epoch": 853, "n_parameters": 86059856} {"train_lr": 0.0013590203071035607, "train_min_lr": 0.0013590203071035607, "train_loss": 0.18685642822096363, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01230551906897185, "epoch": 854, "n_parameters": 86059856} {"train_lr": 0.0013560808228126077, "train_min_lr": 0.0013560808228126077, "train_loss": 0.1868345533223011, "train_loss_scale": 532690.0512820513, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 855, "n_parameters": 86059856} {"train_lr": 0.0013531419198961616, "train_min_lr": 0.0013531419198961616, "train_loss": 0.18683683208357066, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012037036749414908, "epoch": 856, "n_parameters": 86059856} {"train_lr": 0.0013502036098275803, "train_min_lr": 0.0013502036098275803, "train_loss": 0.18693589466289642, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012741770195321012, "epoch": 857, "n_parameters": 86059856} {"train_lr": 0.0013472659040779105, "train_min_lr": 0.0013472659040779105, "train_loss": 0.18681770708794013, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01219183466850947, "epoch": 858, "n_parameters": 86059856} {"train_lr": 0.0013443288141158347, "train_min_lr": 0.0013443288141158347, "train_loss": 0.18677005840417665, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012059181695804, "epoch": 859, "n_parameters": 86059856} {"train_lr": 0.0013413923514076363, "train_min_lr": 0.0013413923514076363, "train_loss": 0.18681003970619386, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012408316413012262, "epoch": 860, "n_parameters": 86059856} {"train_lr": 0.0013384565274171493, "train_min_lr": 0.0013384565274171493, "train_loss": 0.1867722265291004, "train_loss_scale": 567978.6666666666, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 861, "n_parameters": 86059856} {"train_lr": 0.001335521353605712, "train_min_lr": 0.001335521353605712, "train_loss": 0.18676764524589556, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012477269954382418, "epoch": 862, "n_parameters": 86059856} {"train_lr": 0.0013325868414321238, "train_min_lr": 0.0013325868414321238, "train_loss": 0.18677478219764546, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012096517617838124, "epoch": 863, "n_parameters": 86059856} {"train_lr": 0.0013296530023526054, "train_min_lr": 0.0013296530023526054, "train_loss": 0.18674777677425972, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012147468762496153, "epoch": 864, "n_parameters": 86059856} {"train_lr": 0.0013267198478207455, "train_min_lr": 0.0013267198478207455, "train_loss": 0.18672915948077273, "train_loss_scale": 485638.5641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 865, "n_parameters": 86059856} {"train_lr": 0.0013237873892874622, "train_min_lr": 0.0013237873892874622, "train_loss": 0.1867384419609339, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01251761513786056, "epoch": 866, "n_parameters": 86059856} {"train_lr": 0.0013208556382009557, "train_min_lr": 0.0013208556382009557, "train_loss": 0.18669038692799708, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012345940200014947, "epoch": 867, "n_parameters": 86059856} {"train_lr": 0.0013179246060066679, "train_min_lr": 0.0013179246060066679, "train_loss": 0.18672773975114793, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01261982917546844, "epoch": 868, "n_parameters": 86059856} {"train_lr": 0.0013149943041472286, "train_min_lr": 0.0013149943041472286, "train_loss": 0.18671045472057393, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012204236822584882, "epoch": 869, "n_parameters": 86059856} {"train_lr": 0.0013120647440624186, "train_min_lr": 0.0013120647440624186, "train_loss": 0.1866687673370903, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012312859183368392, "epoch": 870, "n_parameters": 86059856} {"train_lr": 0.0013091359371891224, "train_min_lr": 0.0013091359371891224, "train_loss": 0.18669717540391362, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012386643002406718, "epoch": 871, "n_parameters": 86059856} {"train_lr": 0.0013062078949612887, "train_min_lr": 0.0013062078949612887, "train_loss": 0.18664841609409985, "train_loss_scale": 455391.1794871795, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012175947499389831, "epoch": 872, "n_parameters": 86059856} {"train_lr": 0.0013032806288098726, "train_min_lr": 0.0013032806288098726, "train_loss": 0.18665993045299098, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012551944099892026, "epoch": 873, "n_parameters": 86059856} {"train_lr": 0.001300354150162807, "train_min_lr": 0.001300354150162807, "train_loss": 0.18664655095945376, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012272897991351783, "epoch": 874, "n_parameters": 86059856} {"train_lr": 0.0012974284704449469, "train_min_lr": 0.0012974284704449469, "train_loss": 0.18663195650188777, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0123217216012283, "epoch": 875, "n_parameters": 86059856} {"train_lr": 0.0012945036010780276, "train_min_lr": 0.0012945036010780276, "train_loss": 0.18657084356826276, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012525808588506129, "epoch": 876, "n_parameters": 86059856} {"train_lr": 0.0012915795534806228, "train_min_lr": 0.0012915795534806228, "train_loss": 0.18659591419287982, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012546620612295393, "epoch": 877, "n_parameters": 86059856} {"train_lr": 0.0012886563390680972, "train_min_lr": 0.0012886563390680972, "train_loss": 0.18658277838347623, "train_loss_scale": 434386.0512820513, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 878, "n_parameters": 86059856} {"train_lr": 0.0012857339692525628, "train_min_lr": 0.0012857339692525628, "train_loss": 0.18659566761925817, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012576386257886695, "epoch": 879, "n_parameters": 86059856} {"train_lr": 0.0012828124554428338, "train_min_lr": 0.0012828124554428338, "train_loss": 0.18655313993207154, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012408220579322332, "epoch": 880, "n_parameters": 86059856} {"train_lr": 0.0012798918090443838, "train_min_lr": 0.0012798918090443838, "train_loss": 0.1865725222772035, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012509021821479576, "epoch": 881, "n_parameters": 86059856} {"train_lr": 0.0012769720414592995, "train_min_lr": 0.0012769720414592995, "train_loss": 0.18650894058056366, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012200842435376193, "epoch": 882, "n_parameters": 86059856} {"train_lr": 0.0012740531640862353, "train_min_lr": 0.0012740531640862353, "train_loss": 0.1864929006446917, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012538847934383038, "epoch": 883, "n_parameters": 86059856} {"train_lr": 0.0012711351883203758, "train_min_lr": 0.0012711351883203758, "train_loss": 0.18650771730627197, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012385922567680096, "epoch": 884, "n_parameters": 86059856} {"train_lr": 0.001268218125553377, "train_min_lr": 0.001268218125553377, "train_loss": 0.18648534308139902, "train_loss_scale": 506643.6923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012822813536518086, "epoch": 885, "n_parameters": 86059856} {"train_lr": 0.0012653019871733367, "train_min_lr": 0.0012653019871733367, "train_loss": 0.18649748224430743, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01260277753074964, "epoch": 886, "n_parameters": 86059856} {"train_lr": 0.0012623867845647414, "train_min_lr": 0.0012623867845647414, "train_loss": 0.18647391605787936, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012678422284527468, "epoch": 887, "n_parameters": 86059856} {"train_lr": 0.0012594725291084278, "train_min_lr": 0.0012594725291084278, "train_loss": 0.18643776291551498, "train_loss_scale": 364649.0256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 888, "n_parameters": 86059856} {"train_lr": 0.0012565592321815298, "train_min_lr": 0.0012565592321815298, "train_loss": 0.18645210502645335, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012797491630921379, "epoch": 889, "n_parameters": 86059856} {"train_lr": 0.0012536469051574448, "train_min_lr": 0.0012536469051574448, "train_loss": 0.18642350418779713, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012400007150016533, "epoch": 890, "n_parameters": 86059856} {"train_lr": 0.0012507355594057784, "train_min_lr": 0.0012507355594057784, "train_loss": 0.1864273051241747, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012736745671225855, "epoch": 891, "n_parameters": 86059856} {"train_lr": 0.001247825206292309, "train_min_lr": 0.001247825206292309, "train_loss": 0.18642165821093398, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012533823398347849, "epoch": 892, "n_parameters": 86059856} {"train_lr": 0.0012449158571789395, "train_min_lr": 0.0012449158571789395, "train_loss": 0.1864078766785753, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013134041678709671, "epoch": 893, "n_parameters": 86059856} {"train_lr": 0.0012420075234236513, "train_min_lr": 0.0012420075234236513, "train_loss": 0.1863921264735743, "train_loss_scale": 314236.71794871794, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012346694917561343, "epoch": 894, "n_parameters": 86059856} {"train_lr": 0.0012391002163804619, "train_min_lr": 0.0012391002163804619, "train_loss": 0.18636782445873207, "train_loss_scale": 458752.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 895, "n_parameters": 86059856} {"train_lr": 0.0012361939473993834, "train_min_lr": 0.0012361939473993834, "train_loss": 0.18636852111189794, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012972510715301793, "epoch": 896, "n_parameters": 86059856} {"train_lr": 0.0012332887278263727, "train_min_lr": 0.0012332887278263727, "train_loss": 0.18637935045318535, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01285011354738321, "epoch": 897, "n_parameters": 86059856} {"train_lr": 0.0012303845690032904, "train_min_lr": 0.0012303845690032904, "train_loss": 0.18635459003659585, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01255812412366653, "epoch": 898, "n_parameters": 86059856} {"train_lr": 0.0012274814822678573, "train_min_lr": 0.0012274814822678573, "train_loss": 0.1863397983714747, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012756119715049863, "epoch": 899, "n_parameters": 86059856} {"train_lr": 0.0012245794789536078, "train_min_lr": 0.0012245794789536078, "train_loss": 0.18631683301944763, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012649493930765834, "epoch": 900, "n_parameters": 86059856} {"train_lr": 0.0012216785703898449, "train_min_lr": 0.0012216785703898449, "train_loss": 0.18629838689827385, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012890897631549682, "epoch": 901, "n_parameters": 86059856} {"train_lr": 0.0012187787679016017, "train_min_lr": 0.0012187787679016017, "train_loss": 0.18630585408148667, "train_loss_scale": 482277.74358974356, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01276383583004085, "epoch": 902, "n_parameters": 86059856} {"train_lr": 0.0012158800828095892, "train_min_lr": 0.0012158800828095892, "train_loss": 0.18628536349234098, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012698879981867205, "epoch": 903, "n_parameters": 86059856} {"train_lr": 0.0012129825264301601, "train_min_lr": 0.0012129825264301601, "train_loss": 0.18630039635806894, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012603298949006084, "epoch": 904, "n_parameters": 86059856} {"train_lr": 0.001210086110075257, "train_min_lr": 0.001210086110075257, "train_loss": 0.18624731046386445, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012817471550825315, "epoch": 905, "n_parameters": 86059856} {"train_lr": 0.0012071908450523715, "train_min_lr": 0.0012071908450523715, "train_loss": 0.18623621065694934, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012763593304090394, "epoch": 906, "n_parameters": 86059856} {"train_lr": 0.0012042967426645064, "train_min_lr": 0.0012042967426645064, "train_loss": 0.18625285094365096, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012885901017281681, "epoch": 907, "n_parameters": 86059856} {"train_lr": 0.0012014038142101181, "train_min_lr": 0.0012014038142101181, "train_loss": 0.18625482936533025, "train_loss_scale": 529329.2307692308, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 908, "n_parameters": 86059856} {"train_lr": 0.0011985120709830882, "train_min_lr": 0.0011985120709830882, "train_loss": 0.18622115234104106, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012843068420457153, "epoch": 909, "n_parameters": 86059856} {"train_lr": 0.0011956215242726607, "train_min_lr": 0.0011956215242726607, "train_loss": 0.18623738792032385, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012991603675823754, "epoch": 910, "n_parameters": 86059856} {"train_lr": 0.0011927321853634168, "train_min_lr": 0.0011927321853634168, "train_loss": 0.18619332877465355, "train_loss_scale": 421782.9743589744, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 911, "n_parameters": 86059856} {"train_lr": 0.001189844065535221, "train_min_lr": 0.001189844065535221, "train_loss": 0.18619351200043008, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012990307203159692, "epoch": 912, "n_parameters": 86059856} {"train_lr": 0.0011869571760631749, "train_min_lr": 0.0011869571760631749, "train_loss": 0.18617287398769689, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013295201044410275, "epoch": 913, "n_parameters": 86059856} {"train_lr": 0.0011840715282175822, "train_min_lr": 0.0011840715282175822, "train_loss": 0.18611968982702073, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013078882642544042, "epoch": 914, "n_parameters": 86059856} {"train_lr": 0.0011811871332638945, "train_min_lr": 0.0011811871332638945, "train_loss": 0.1861313398127468, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012887880510578934, "epoch": 915, "n_parameters": 86059856} {"train_lr": 0.001178304002462676, "train_min_lr": 0.001178304002462676, "train_loss": 0.18612515064887702, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013318298345742127, "epoch": 916, "n_parameters": 86059856} {"train_lr": 0.001175422147069553, "train_min_lr": 0.001175422147069553, "train_loss": 0.18613512311369562, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01283065122194015, "epoch": 917, "n_parameters": 86059856} {"train_lr": 0.0011725415783351723, "train_min_lr": 0.0011725415783351723, "train_loss": 0.18610204607606506, "train_loss_scale": 333561.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 918, "n_parameters": 86059856} {"train_lr": 0.0011696623075051608, "train_min_lr": 0.0011696623075051608, "train_loss": 0.18609533252385566, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013099396004317662, "epoch": 919, "n_parameters": 86059856} {"train_lr": 0.0011667843458200756, "train_min_lr": 0.0011667843458200756, "train_loss": 0.1860859775557541, "train_loss_scale": 185265.23076923078, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 920, "n_parameters": 86059856} {"train_lr": 0.001163907704515365, "train_min_lr": 0.001163907704515365, "train_loss": 0.18605981805385688, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013401542933514485, "epoch": 921, "n_parameters": 86059856} {"train_lr": 0.001161032394821319, "train_min_lr": 0.001161032394821319, "train_loss": 0.18607663869475707, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012902329225117007, "epoch": 922, "n_parameters": 86059856} {"train_lr": 0.0011581584279630355, "train_min_lr": 0.0011581584279630355, "train_loss": 0.18605167259318897, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012760349155332033, "epoch": 923, "n_parameters": 86059856} {"train_lr": 0.0011552858151603633, "train_min_lr": 0.0011552858151603633, "train_loss": 0.18603451129120702, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012996320496313274, "epoch": 924, "n_parameters": 86059856} {"train_lr": 0.0011524145676278675, "train_min_lr": 0.0011524145676278675, "train_loss": 0.18598027015701893, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013137610339655135, "epoch": 925, "n_parameters": 86059856} {"train_lr": 0.0011495446965747841, "train_min_lr": 0.0011495446965747841, "train_loss": 0.1860254887992946, "train_loss_scale": 154177.64102564103, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013621960154686792, "epoch": 926, "n_parameters": 86059856} {"train_lr": 0.0011466762132049761, "train_min_lr": 0.0011466762132049761, "train_loss": 0.18599839110333377, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013081193313551828, "epoch": 927, "n_parameters": 86059856} {"train_lr": 0.0011438091287168863, "train_min_lr": 0.0011438091287168863, "train_loss": 0.18600404500746384, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01321541912889538, "epoch": 928, "n_parameters": 86059856} {"train_lr": 0.001140943454303497, "train_min_lr": 0.001140943454303497, "train_loss": 0.1859999125620398, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013265320661071783, "epoch": 929, "n_parameters": 86059856} {"train_lr": 0.001138079201152288, "train_min_lr": 0.001138079201152288, "train_loss": 0.18597360930811518, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013125733350021526, "epoch": 930, "n_parameters": 86059856} {"train_lr": 0.0011352163804451891, "train_min_lr": 0.0011352163804451891, "train_loss": 0.18598634615325585, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013465786636329424, "epoch": 931, "n_parameters": 86059856} {"train_lr": 0.0011323550033585377, "train_min_lr": 0.0011323550033585377, "train_loss": 0.18594828117877626, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013611718501824025, "epoch": 932, "n_parameters": 86059856} {"train_lr": 0.0011294950810630338, "train_min_lr": 0.0011294950810630338, "train_loss": 0.18589447333047596, "train_loss_scale": 462953.0256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01311026910881106, "epoch": 933, "n_parameters": 86059856} {"train_lr": 0.001126636624723699, "train_min_lr": 0.001126636624723699, "train_loss": 0.185929658357054, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01313708131858267, "epoch": 934, "n_parameters": 86059856} {"train_lr": 0.001123779645499835, "train_min_lr": 0.001123779645499835, "train_loss": 0.18593404181779194, "train_loss_scale": 492360.2051282051, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 935, "n_parameters": 86059856} {"train_lr": 0.0011209241545449753, "train_min_lr": 0.0011209241545449753, "train_loss": 0.1859232025961272, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01299143588850991, "epoch": 936, "n_parameters": 86059856} {"train_lr": 0.001118070163006838, "train_min_lr": 0.001118070163006838, "train_loss": 0.18586774775161383, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01342296772576773, "epoch": 937, "n_parameters": 86059856} {"train_lr": 0.0011152176820272937, "train_min_lr": 0.0011152176820272937, "train_loss": 0.18588228463434064, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012794331883271344, "epoch": 938, "n_parameters": 86059856} {"train_lr": 0.0011123667227423146, "train_min_lr": 0.0011123667227423146, "train_loss": 0.1858291948476854, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013291479285376577, "epoch": 939, "n_parameters": 86059856} {"train_lr": 0.0011066694137701917, "train_min_lr": 0.0011066694137701917, "train_loss": 0.18582752730864555, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013376044622885112, "epoch": 941, "n_parameters": 86059856} {"train_lr": 0.001103823086325113, "train_min_lr": 0.001103823086325113, "train_loss": 0.1858263379559876, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01348097668065188, "epoch": 942, "n_parameters": 86059856} {"train_lr": 0.0011009783250586459, "train_min_lr": 0.0011009783250586459, "train_loss": 0.18582012322253716, "train_loss_scale": 448669.53846153844, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013221699932518486, "epoch": 943, "n_parameters": 86059856} {"train_lr": 0.0010981351410766207, "train_min_lr": 0.0010981351410766207, "train_loss": 0.18579725918766016, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013365223221719647, "epoch": 944, "n_parameters": 86059856} {"train_lr": 0.001095293545478719, "train_min_lr": 0.001095293545478719, "train_loss": 0.1858087403114694, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013274003432180064, "epoch": 945, "n_parameters": 86059856} {"train_lr": 0.0010924535493584122, "train_min_lr": 0.0010924535493584122, "train_loss": 0.18576867159647056, "train_loss_scale": 455391.1794871795, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 946, "n_parameters": 86059856} {"train_lr": 0.0010896151638029327, "train_min_lr": 0.0010896151638029327, "train_loss": 0.18576672392037624, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013299465615254564, "epoch": 947, "n_parameters": 86059856} {"train_lr": 0.0010867783998932247, "train_min_lr": 0.0010867783998932247, "train_loss": 0.18575090064237323, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013773917766789405, "epoch": 948, "n_parameters": 86059856} {"train_lr": 0.0010839432687039, "train_min_lr": 0.0010839432687039, "train_loss": 0.18574103642589387, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013621785713789554, "epoch": 949, "n_parameters": 86059856} {"train_lr": 0.0010811097813031988, "train_min_lr": 0.0010811097813031988, "train_loss": 0.18573756320760226, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013273792977158267, "epoch": 950, "n_parameters": 86059856} {"train_lr": 0.0010782779487529402, "train_min_lr": 0.0010782779487529402, "train_loss": 0.18566622361779594, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013604517193296207, "epoch": 951, "n_parameters": 86059856} {"train_lr": 0.0010754477821084867, "train_min_lr": 0.0010754477821084867, "train_loss": 0.18569348317284423, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013551082285956886, "epoch": 952, "n_parameters": 86059856} {"train_lr": 0.0010726192924186942, "train_min_lr": 0.0010726192924186942, "train_loss": 0.18570543875774512, "train_loss_scale": 277267.6923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 953, "n_parameters": 86059856} {"train_lr": 0.0010697924907258757, "train_min_lr": 0.0010697924907258757, "train_loss": 0.18567187048327655, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013811387539578553, "epoch": 954, "n_parameters": 86059856} {"train_lr": 0.0010669673880657497, "train_min_lr": 0.0010669673880657497, "train_loss": 0.18568501020304096, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013696479115587397, "epoch": 955, "n_parameters": 86059856} {"train_lr": 0.0010641439954674056, "train_min_lr": 0.0010641439954674056, "train_loss": 0.18566424432449424, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014034761515899729, "epoch": 956, "n_parameters": 86059856} {"train_lr": 0.0010613223239532518, "train_min_lr": 0.0010613223239532518, "train_loss": 0.18561845262630436, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013378902738436293, "epoch": 957, "n_parameters": 86059856} {"train_lr": 0.0010585023845389838, "train_min_lr": 0.0010585023845389838, "train_loss": 0.1856500839527983, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014022546671092128, "epoch": 958, "n_parameters": 86059856} {"train_lr": 0.0010556841882335324, "train_min_lr": 0.0010556841882335324, "train_loss": 0.18559989216140446, "train_loss_scale": 362968.6153846154, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013760090527984385, "epoch": 959, "n_parameters": 86059856} {"train_lr": 0.0010528677460390219, "train_min_lr": 0.0010528677460390219, "train_loss": 0.185618185175535, "train_loss_scale": 420942.76923076925, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 960, "n_parameters": 86059856} {"train_lr": 0.0010500530689507312, "train_min_lr": 0.0010500530689507312, "train_loss": 0.18554142873901397, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01340694749692025, "epoch": 961, "n_parameters": 86059856} {"train_lr": 0.0010472401679570446, "train_min_lr": 0.0010472401679570446, "train_loss": 0.1855911914175615, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013791985162056219, "epoch": 962, "n_parameters": 86059856} {"train_lr": 0.0010444290540394176, "train_min_lr": 0.0010444290540394176, "train_loss": 0.1855207487451247, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013395884963803185, "epoch": 963, "n_parameters": 86059856} {"train_lr": 0.0010416197381723248, "train_min_lr": 0.0010416197381723248, "train_loss": 0.18555150623433292, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013949428607399264, "epoch": 964, "n_parameters": 86059856} {"train_lr": 0.0010388122313232221, "train_min_lr": 0.0010388122313232221, "train_loss": 0.18556719456011286, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013925393694080412, "epoch": 965, "n_parameters": 86059856} {"train_lr": 0.0010360065444525053, "train_min_lr": 0.0010360065444525053, "train_loss": 0.18549004629946864, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013739814731077505, "epoch": 966, "n_parameters": 86059856} {"train_lr": 0.0010332026885134643, "train_min_lr": 0.0010332026885134643, "train_loss": 0.1855192078420749, "train_loss_scale": 520086.9743589744, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013953401101156114, "epoch": 967, "n_parameters": 86059856} {"train_lr": 0.0010304006744522387, "train_min_lr": 0.0010304006744522387, "train_loss": 0.185534288157494, "train_loss_scale": 309195.4871794872, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 968, "n_parameters": 86059856} {"train_lr": 0.00102760051320778, "train_min_lr": 0.00102760051320778, "train_loss": 0.1854691139768618, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01389838177830172, "epoch": 969, "n_parameters": 86059856} {"train_lr": 0.0010248022157118056, "train_min_lr": 0.0010248022157118056, "train_loss": 0.1855005307827527, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013535887319355821, "epoch": 970, "n_parameters": 86059856} {"train_lr": 0.0010220057928887573, "train_min_lr": 0.0010220057928887573, "train_loss": 0.18550104791155228, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01369796062592799, "epoch": 971, "n_parameters": 86059856} {"train_lr": 0.001019211255655757, "train_min_lr": 0.001019211255655757, "train_loss": 0.1854405540566987, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013840979526344782, "epoch": 972, "n_parameters": 86059856} {"train_lr": 0.0010164186149225658, "train_min_lr": 0.0010164186149225658, "train_loss": 0.18544109671925887, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013738586749427784, "epoch": 973, "n_parameters": 86059856} {"train_lr": 0.0010136278815915433, "train_min_lr": 0.0010136278815915433, "train_loss": 0.18544242449868947, "train_loss_scale": 369690.25641025644, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014039937287378006, "epoch": 974, "n_parameters": 86059856} {"train_lr": 0.0010108390665575985, "train_min_lr": 0.0010108390665575985, "train_loss": 0.18543374231562781, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01374888977406021, "epoch": 975, "n_parameters": 86059856} {"train_lr": 0.0010080521807081556, "train_min_lr": 0.0010080521807081556, "train_loss": 0.18541701109363481, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014278438825828906, "epoch": 976, "n_parameters": 86059856} {"train_lr": 0.0010052672349231044, "train_min_lr": 0.0010052672349231044, "train_loss": 0.18537994641930056, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01391464247344396, "epoch": 977, "n_parameters": 86059856} {"train_lr": 0.001002484240074762, "train_min_lr": 0.001002484240074762, "train_loss": 0.1853894245607826, "train_loss_scale": 282308.92307692306, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 978, "n_parameters": 86059856} {"train_lr": 0.0009997032070278265, "train_min_lr": 0.0009997032070278265, "train_loss": 0.1853455001106247, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0139496740556728, "epoch": 979, "n_parameters": 86059856} {"train_lr": 0.000996924146639344, "train_min_lr": 0.000996924146639344, "train_loss": 0.1853576814636397, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01369215515203392, "epoch": 980, "n_parameters": 86059856} {"train_lr": 0.0009941470697586525, "train_min_lr": 0.0009941470697586525, "train_loss": 0.18536002307127303, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01393441311441935, "epoch": 981, "n_parameters": 86059856} {"train_lr": 0.0009913719872273494, "train_min_lr": 0.0009913719872273494, "train_loss": 0.18535063994857362, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013847881241534382, "epoch": 982, "n_parameters": 86059856} {"train_lr": 0.000988598909879245, "train_min_lr": 0.000988598909879245, "train_loss": 0.18533253331238833, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014657343107944306, "epoch": 983, "n_parameters": 86059856} {"train_lr": 0.0009858278485403225, "train_min_lr": 0.0009858278485403225, "train_loss": 0.18529644295071754, "train_loss_scale": 396576.8205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014077255896364268, "epoch": 984, "n_parameters": 86059856} {"train_lr": 0.0009830588140286947, "train_min_lr": 0.0009830588140286947, "train_loss": 0.1852907748439182, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01440525373730522, "epoch": 985, "n_parameters": 86059856} {"train_lr": 0.0009802918171545627, "train_min_lr": 0.0009802918171545627, "train_loss": 0.18530408697775924, "train_loss_scale": 352045.9487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 986, "n_parameters": 86059856} {"train_lr": 0.0009775268687201692, "train_min_lr": 0.0009775268687201692, "train_loss": 0.18525640219330597, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013946466786691394, "epoch": 987, "n_parameters": 86059856} {"train_lr": 0.0009747639795197641, "train_min_lr": 0.0009747639795197641, "train_loss": 0.18523349600414243, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014501061281547524, "epoch": 988, "n_parameters": 86059856} {"train_lr": 0.0009720031603395554, "train_min_lr": 0.0009720031603395554, "train_loss": 0.18520301858034843, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014044882807259759, "epoch": 989, "n_parameters": 86059856} {"train_lr": 0.000969244421957671, "train_min_lr": 0.000969244421957671, "train_loss": 0.18521524589652053, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014309941495482165, "epoch": 990, "n_parameters": 86059856} {"train_lr": 0.0009664877751441156, "train_min_lr": 0.0009664877751441156, "train_loss": 0.18519694570046014, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014227828064646859, "epoch": 991, "n_parameters": 86059856} {"train_lr": 0.0009637332306607262, "train_min_lr": 0.0009637332306607262, "train_loss": 0.1852190952974921, "train_loss_scale": 326839.7948717949, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014088439450670894, "epoch": 992, "n_parameters": 86059856} {"train_lr": 0.0009609807992611344, "train_min_lr": 0.0009609807992611344, "train_loss": 0.1852099404682238, "train_loss_scale": 408339.6923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 993, "n_parameters": 86059856} {"train_lr": 0.0009582304916907244, "train_min_lr": 0.0009582304916907244, "train_loss": 0.1851631857168216, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014216807455970691, "epoch": 994, "n_parameters": 86059856} {"train_lr": 0.0009554823186865848, "train_min_lr": 0.0009554823186865848, "train_loss": 0.18517456136237925, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014461579884832295, "epoch": 995, "n_parameters": 86059856} {"train_lr": 0.0009527362909774747, "train_min_lr": 0.0009527362909774747, "train_loss": 0.18517363503073844, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014374740788928019, "epoch": 996, "n_parameters": 86059856} {"train_lr": 0.0009499924192837745, "train_min_lr": 0.0009499924192837745, "train_loss": 0.1851130769086572, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014291233756245138, "epoch": 997, "n_parameters": 86059856} {"train_lr": 0.0009472507143174505, "train_min_lr": 0.0009472507143174505, "train_loss": 0.18511868505858076, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013720070220864354, "epoch": 998, "n_parameters": 86059856} {"train_lr": 0.0009445111867820084, "train_min_lr": 0.0009445111867820084, "train_loss": 0.18509430247239578, "train_loss_scale": 270546.0512820513, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014653006938692087, "epoch": 999, "n_parameters": 86059856} {"train_lr": 0.0009417738473724554, "train_min_lr": 0.0009417738473724554, "train_loss": 0.18511771519954962, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014614826674238803, "epoch": 1000, "n_parameters": 86059856} {"train_lr": 0.0009390387067752538, "train_min_lr": 0.0009390387067752538, "train_loss": 0.1850877169113702, "train_loss_scale": 456231.3846153846, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1001, "n_parameters": 86059856} {"train_lr": 0.000936305775668283, "train_min_lr": 0.000936305775668283, "train_loss": 0.1850965568342079, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014444436952591134, "epoch": 1002, "n_parameters": 86059856} {"train_lr": 0.0009335750647207968, "train_min_lr": 0.0009335750647207968, "train_loss": 0.18508790827427918, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014360869243645515, "epoch": 1003, "n_parameters": 86059856} {"train_lr": 0.0009308465845933817, "train_min_lr": 0.0009308465845933817, "train_loss": 0.1850427211716007, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014615529575026952, "epoch": 1004, "n_parameters": 86059856} {"train_lr": 0.0009281203459379158, "train_min_lr": 0.0009281203459379158, "train_loss": 0.18499890620557544, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014399845466519205, "epoch": 1005, "n_parameters": 86059856} {"train_lr": 0.0009253963593975229, "train_min_lr": 0.0009253963593975229, "train_loss": 0.18500259279822692, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014408924542248057, "epoch": 1006, "n_parameters": 86059856} {"train_lr": 0.0009226746356065399, "train_min_lr": 0.0009226746356065399, "train_loss": 0.185032724074494, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014944217905091742, "epoch": 1007, "n_parameters": 86059856} {"train_lr": 0.0009199551851904669, "train_min_lr": 0.0009199551851904669, "train_loss": 0.18497946629157433, "train_loss_scale": 484798.358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014365351985757932, "epoch": 1008, "n_parameters": 86059856} {"train_lr": 0.0009172380187659294, "train_min_lr": 0.0009172380187659294, "train_loss": 0.1849823770029709, "train_loss_scale": 266345.0256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1009, "n_parameters": 86059856} {"train_lr": 0.000914523146940636, "train_min_lr": 0.000914523146940636, "train_loss": 0.18499868581644618, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014885639986739708, "epoch": 1010, "n_parameters": 86059856} {"train_lr": 0.0009118105803133375, "train_min_lr": 0.0009118105803133375, "train_loss": 0.18496386419074276, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0145391130921407, "epoch": 1011, "n_parameters": 86059856} {"train_lr": 0.000909100329473786, "train_min_lr": 0.000909100329473786, "train_loss": 0.18490834281636545, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0140082577500158, "epoch": 1012, "n_parameters": 86059856} {"train_lr": 0.0009063924050026917, "train_min_lr": 0.0009063924050026917, "train_loss": 0.18493132298000348, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014809293720202569, "epoch": 1013, "n_parameters": 86059856} {"train_lr": 0.0009036868174716841, "train_min_lr": 0.0009036868174716841, "train_loss": 0.1848872938694862, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014897680489155345, "epoch": 1014, "n_parameters": 86059856} {"train_lr": 0.0009009835774432676, "train_min_lr": 0.0009009835774432676, "train_loss": 0.1849216734453176, "train_loss_scale": 286509.9487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1015, "n_parameters": 86059856} {"train_lr": 0.000898282695470784, "train_min_lr": 0.000898282695470784, "train_loss": 0.1849125844414513, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014215204470719282, "epoch": 1016, "n_parameters": 86059856} {"train_lr": 0.0008955841820983682, "train_min_lr": 0.0008955841820983682, "train_loss": 0.18485792350764266, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014692962811423991, "epoch": 1017, "n_parameters": 86059856} {"train_lr": 0.0008928880478609086, "train_min_lr": 0.0008928880478609086, "train_loss": 0.18486333064114055, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014493209739717154, "epoch": 1018, "n_parameters": 86059856} {"train_lr": 0.000890194303284004, "train_min_lr": 0.000890194303284004, "train_loss": 0.18487215941198742, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014889277162579581, "epoch": 1019, "n_parameters": 86059856} {"train_lr": 0.0008875029588839267, "train_min_lr": 0.0008875029588839267, "train_loss": 0.1848413508015279, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015083639100432778, "epoch": 1020, "n_parameters": 86059856} {"train_lr": 0.0008848140251675762, "train_min_lr": 0.0008848140251675762, "train_loss": 0.1848256605414626, "train_loss_scale": 280628.5128205128, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014364343730565637, "epoch": 1021, "n_parameters": 86059856} {"train_lr": 0.0008821275126324441, "train_min_lr": 0.0008821275126324441, "train_loss": 0.18484199342007437, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014561133826366411, "epoch": 1022, "n_parameters": 86059856} {"train_lr": 0.0008794434317665664, "train_min_lr": 0.0008794434317665664, "train_loss": 0.18478056140100727, "train_loss_scale": 362968.6153846154, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1023, "n_parameters": 86059856} {"train_lr": 0.0008767617930484874, "train_min_lr": 0.0008767617930484874, "train_loss": 0.1847711594429058, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014795418279484296, "epoch": 1024, "n_parameters": 86059856} {"train_lr": 0.000874082606947218, "train_min_lr": 0.000874082606947218, "train_loss": 0.18478370102074665, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014487334777815985, "epoch": 1025, "n_parameters": 86059856} {"train_lr": 0.0008714058839221914, "train_min_lr": 0.0008714058839221914, "train_loss": 0.1848077931596587, "train_loss_scale": 223914.66666666666, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1026, "n_parameters": 86059856} {"train_lr": 0.0008687316344232313, "train_min_lr": 0.0008687316344232313, "train_loss": 0.18479095965337294, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014321295248989303, "epoch": 1027, "n_parameters": 86059856} {"train_lr": 0.0008660598688904959, "train_min_lr": 0.0008660598688904959, "train_loss": 0.18470197851554707, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015092535577236842, "epoch": 1028, "n_parameters": 86059856} {"train_lr": 0.0008633905977544545, "train_min_lr": 0.0008633905977544545, "train_loss": 0.1847229176141226, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015345432503053393, "epoch": 1029, "n_parameters": 86059856} {"train_lr": 0.0008607238314358315, "train_min_lr": 0.0008607238314358315, "train_loss": 0.1846818290053843, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01469265512871341, "epoch": 1030, "n_parameters": 86059856} {"train_lr": 0.0008580595803455788, "train_min_lr": 0.0008580595803455788, "train_loss": 0.18470594928695414, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01482482875088373, "epoch": 1031, "n_parameters": 86059856} {"train_lr": 0.0008553978548848254, "train_min_lr": 0.0008553978548848254, "train_loss": 0.18467560135878813, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015278603051765224, "epoch": 1032, "n_parameters": 86059856} {"train_lr": 0.0008527386654448397, "train_min_lr": 0.0008527386654448397, "train_loss": 0.1846372116691409, "train_loss_scale": 246600.20512820513, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015034164961737892, "epoch": 1033, "n_parameters": 86059856} {"train_lr": 0.0008500820224069921, "train_min_lr": 0.0008500820224069921, "train_loss": 0.18466670972963747, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014837132125066068, "epoch": 1034, "n_parameters": 86059856} {"train_lr": 0.0008474279361427084, "train_min_lr": 0.0008474279361427084, "train_loss": 0.18464493463771084, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014991487668922696, "epoch": 1035, "n_parameters": 86059856} {"train_lr": 0.0008447764170134383, "train_min_lr": 0.0008447764170134383, "train_loss": 0.18463839709352797, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015056967317389373, "epoch": 1036, "n_parameters": 86059856} {"train_lr": 0.0008421274753706043, "train_min_lr": 0.0008421274753706043, "train_loss": 0.18458328352500805, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01480208117610369, "epoch": 1037, "n_parameters": 86059856} {"train_lr": 0.0008394811215555701, "train_min_lr": 0.0008394811215555701, "train_loss": 0.18464906686821428, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014880657521410821, "epoch": 1038, "n_parameters": 86059856} {"train_lr": 0.000836837365899592, "train_min_lr": 0.000836837365899592, "train_loss": 0.18458851059086812, "train_loss_scale": 330200.6153846154, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1039, "n_parameters": 86059856} {"train_lr": 0.0008341962187237897, "train_min_lr": 0.0008341962187237897, "train_loss": 0.18454382041445336, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014715595749947123, "epoch": 1040, "n_parameters": 86059856} {"train_lr": 0.0008315576903390954, "train_min_lr": 0.0008315576903390954, "train_loss": 0.18456318449133483, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01514158800889093, "epoch": 1041, "n_parameters": 86059856} {"train_lr": 0.000828921791046216, "train_min_lr": 0.000828921791046216, "train_loss": 0.18457002638695905, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015114172803572355, "epoch": 1042, "n_parameters": 86059856} {"train_lr": 0.0008262885311355988, "train_min_lr": 0.0008262885311355988, "train_loss": 0.1845213358875555, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015218014673640331, "epoch": 1043, "n_parameters": 86059856} {"train_lr": 0.0008236579208873839, "train_min_lr": 0.0008236579208873839, "train_loss": 0.18447788659101114, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015297512577560086, "epoch": 1044, "n_parameters": 86059856} {"train_lr": 0.00082102997057137, "train_min_lr": 0.00082102997057137, "train_loss": 0.18450812751857135, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015150305525495265, "epoch": 1045, "n_parameters": 86059856} {"train_lr": 0.0008184046904469691, "train_min_lr": 0.0008184046904469691, "train_loss": 0.18448303320492881, "train_loss_scale": 472195.28205128206, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015177974602780663, "epoch": 1046, "n_parameters": 86059856} {"train_lr": 0.0008157820907631688, "train_min_lr": 0.0008157820907631688, "train_loss": 0.1844809598599871, "train_loss_scale": 513365.3333333333, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1047, "n_parameters": 86059856} {"train_lr": 0.0008131621817584946, "train_min_lr": 0.0008131621817584946, "train_loss": 0.1844744546016535, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014754626488026518, "epoch": 1048, "n_parameters": 86059856} {"train_lr": 0.000810544973660965, "train_min_lr": 0.000810544973660965, "train_loss": 0.1844633491590428, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015453383639956323, "epoch": 1049, "n_parameters": 86059856} {"train_lr": 0.0008079304766880569, "train_min_lr": 0.0008079304766880569, "train_loss": 0.18444765680350172, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015587003961301003, "epoch": 1050, "n_parameters": 86059856} {"train_lr": 0.0008053187010466622, "train_min_lr": 0.0008053187010466622, "train_loss": 0.18443063536706644, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015401648497805955, "epoch": 1051, "n_parameters": 86059856} {"train_lr": 0.0008027096569330467, "train_min_lr": 0.0008027096569330467, "train_loss": 0.18442202469286245, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01527676175838002, "epoch": 1052, "n_parameters": 86059856} {"train_lr": 0.0008001033545328149, "train_min_lr": 0.0008001033545328149, "train_loss": 0.18444073473055583, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015071164959898362, "epoch": 1053, "n_parameters": 86059856} {"train_lr": 0.0007974998040208676, "train_min_lr": 0.0007974998040208676, "train_loss": 0.18437794229994792, "train_loss_scale": 427664.41025641025, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015146245347121013, "epoch": 1054, "n_parameters": 86059856} {"train_lr": 0.0007948990155613614, "train_min_lr": 0.0007948990155613614, "train_loss": 0.18438823230397433, "train_loss_scale": 425984.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1055, "n_parameters": 86059856} {"train_lr": 0.0007923009993076708, "train_min_lr": 0.0007923009993076708, "train_loss": 0.18435949355793688, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015074494008261424, "epoch": 1056, "n_parameters": 86059856} {"train_lr": 0.0007897057654023456, "train_min_lr": 0.0007897057654023456, "train_loss": 0.18435081179874638, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015687438441464342, "epoch": 1057, "n_parameters": 86059856} {"train_lr": 0.0007871133239770778, "train_min_lr": 0.0007871133239770778, "train_loss": 0.18434986220792127, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015402448885978606, "epoch": 1058, "n_parameters": 86059856} {"train_lr": 0.0007845236851526527, "train_min_lr": 0.0007845236851526527, "train_loss": 0.1843411698937416, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016017223636691388, "epoch": 1059, "n_parameters": 86059856} {"train_lr": 0.0007819368590389165, "train_min_lr": 0.0007819368590389165, "train_loss": 0.18432524452248636, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01563319852217459, "epoch": 1060, "n_parameters": 86059856} {"train_lr": 0.0007793528557347355, "train_min_lr": 0.0007793528557347355, "train_loss": 0.184304360837604, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015703558572568, "epoch": 1061, "n_parameters": 86059856} {"train_lr": 0.000776771685327956, "train_min_lr": 0.000776771685327956, "train_loss": 0.18427614548291343, "train_loss_scale": 281468.71794871794, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1062, "n_parameters": 86059856} {"train_lr": 0.0007741933578953627, "train_min_lr": 0.0007741933578953627, "train_loss": 0.18425508253037548, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015488601588190366, "epoch": 1063, "n_parameters": 86059856} {"train_lr": 0.0007716178835026436, "train_min_lr": 0.0007716178835026436, "train_loss": 0.18430493942175347, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015231946241468765, "epoch": 1064, "n_parameters": 86059856} {"train_lr": 0.0007690452722043463, "train_min_lr": 0.0007690452722043463, "train_loss": 0.18427133154816544, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015598986820222285, "epoch": 1065, "n_parameters": 86059856} {"train_lr": 0.000766475534043844, "train_min_lr": 0.000766475534043844, "train_loss": 0.1841883639064737, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015624932732839042, "epoch": 1066, "n_parameters": 86059856} {"train_lr": 0.0007639086790532912, "train_min_lr": 0.0007639086790532912, "train_loss": 0.18420836739600277, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015765014382150885, "epoch": 1067, "n_parameters": 86059856} {"train_lr": 0.0007613447172535847, "train_min_lr": 0.0007613447172535847, "train_loss": 0.1842135628625655, "train_loss_scale": 388174.76923076925, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015581639331335632, "epoch": 1068, "n_parameters": 86059856} {"train_lr": 0.0007587836586543333, "train_min_lr": 0.0007587836586543333, "train_loss": 0.1842059158684256, "train_loss_scale": 340283.07692307694, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1069, "n_parameters": 86059856} {"train_lr": 0.0007562255132538018, "train_min_lr": 0.0007562255132538018, "train_loss": 0.18416748443045294, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01546223528492145, "epoch": 1070, "n_parameters": 86059856} {"train_lr": 0.000753670291038892, "train_min_lr": 0.000753670291038892, "train_loss": 0.1841758026395184, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015769243753777865, "epoch": 1071, "n_parameters": 86059856} {"train_lr": 0.0007511180019850861, "train_min_lr": 0.0007511180019850861, "train_loss": 0.18414456623558623, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01615771626193936, "epoch": 1072, "n_parameters": 86059856} {"train_lr": 0.0007485686560564195, "train_min_lr": 0.0007485686560564195, "train_loss": 0.18409460346596554, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015418791039011035, "epoch": 1073, "n_parameters": 86059856} {"train_lr": 0.0007460222632054375, "train_min_lr": 0.0007460222632054375, "train_loss": 0.1841226129267269, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015013695730326267, "epoch": 1074, "n_parameters": 86059856} {"train_lr": 0.0007434788333731559, "train_min_lr": 0.0007434788333731559, "train_loss": 0.18408706970512867, "train_loss_scale": 267185.23076923075, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1075, "n_parameters": 86059856} {"train_lr": 0.0007409383764890203, "train_min_lr": 0.0007409383764890203, "train_loss": 0.1841156736601335, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0160992983280896, "epoch": 1076, "n_parameters": 86059856} {"train_lr": 0.0007384009024708765, "train_min_lr": 0.0007384009024708765, "train_loss": 0.18405969993163568, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016225287741503846, "epoch": 1077, "n_parameters": 86059856} {"train_lr": 0.000735866421224917, "train_min_lr": 0.000735866421224917, "train_loss": 0.18407033767718345, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015834035927979037, "epoch": 1078, "n_parameters": 86059856} {"train_lr": 0.0007333349426456595, "train_min_lr": 0.0007333349426456595, "train_loss": 0.18404377078733, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015768283172152363, "epoch": 1079, "n_parameters": 86059856} {"train_lr": 0.0007308064766158923, "train_min_lr": 0.0007308064766158923, "train_loss": 0.1840355657518674, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01628230389756843, "epoch": 1080, "n_parameters": 86059856} {"train_lr": 0.0007282810330066472, "train_min_lr": 0.0007282810330066472, "train_loss": 0.18406169363058722, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01587379694235726, "epoch": 1081, "n_parameters": 86059856} {"train_lr": 0.0007257586216771538, "train_min_lr": 0.0007257586216771538, "train_loss": 0.1839923697332732, "train_loss_scale": 326839.7948717949, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1082, "n_parameters": 86059856} {"train_lr": 0.0007232392524748043, "train_min_lr": 0.0007232392524748043, "train_loss": 0.18398091071046507, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01596817367662413, "epoch": 1083, "n_parameters": 86059856} {"train_lr": 0.0007207229352351171, "train_min_lr": 0.0007207229352351171, "train_loss": 0.1839864247968086, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015609971922225295, "epoch": 1084, "n_parameters": 86059856} {"train_lr": 0.0007182096797816934, "train_min_lr": 0.0007182096797816934, "train_loss": 0.1839686958835675, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016042172368496466, "epoch": 1085, "n_parameters": 86059856} {"train_lr": 0.0007156994959261803, "train_min_lr": 0.0007156994959261803, "train_loss": 0.18392660175091946, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015999119230307255, "epoch": 1086, "n_parameters": 86059856} {"train_lr": 0.0007131923934682372, "train_min_lr": 0.0007131923934682372, "train_loss": 0.18396277006309575, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016043275185765173, "epoch": 1087, "n_parameters": 86059856} {"train_lr": 0.0007106883821954903, "train_min_lr": 0.0007106883821954903, "train_loss": 0.18394905837396017, "train_loss_scale": 315917.1282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016082790369788807, "epoch": 1088, "n_parameters": 86059856} {"train_lr": 0.0007081874718835006, "train_min_lr": 0.0007081874718835006, "train_loss": 0.18395010071496168, "train_loss_scale": 410860.3076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1089, "n_parameters": 86059856} {"train_lr": 0.00070568967229572, "train_min_lr": 0.00070568967229572, "train_loss": 0.18389945795449117, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01595495473474073, "epoch": 1090, "n_parameters": 86059856} {"train_lr": 0.0007031949931834597, "train_min_lr": 0.0007031949931834597, "train_loss": 0.1838842717369493, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016161723731037896, "epoch": 1091, "n_parameters": 86059856} {"train_lr": 0.0007007034442858479, "train_min_lr": 0.0007007034442858479, "train_loss": 0.18388448435908708, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016458721150859043, "epoch": 1092, "n_parameters": 86059856} {"train_lr": 0.0006982150353297889, "train_min_lr": 0.0006982150353297889, "train_loss": 0.18385139212179452, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0161576484156868, "epoch": 1093, "n_parameters": 86059856} {"train_lr": 0.0006957297760299336, "train_min_lr": 0.0006957297760299336, "train_loss": 0.18381335457357076, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01594647518000924, "epoch": 1094, "n_parameters": 86059856} {"train_lr": 0.0006932476760886348, "train_min_lr": 0.0006932476760886348, "train_loss": 0.18383354079933503, "train_loss_scale": 268025.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016262765970821373, "epoch": 1095, "n_parameters": 86059856} {"train_lr": 0.0006907687451959105, "train_min_lr": 0.0006907687451959105, "train_loss": 0.18384233239488915, "train_loss_scale": 319277.9487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1096, "n_parameters": 86059856} {"train_lr": 0.0006882929930294079, "train_min_lr": 0.0006882929930294079, "train_loss": 0.18380819572708929, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016198748510736875, "epoch": 1097, "n_parameters": 86059856} {"train_lr": 0.000685820429254365, "train_min_lr": 0.000685820429254365, "train_loss": 0.1837851484783758, "train_loss_scale": 237357.94871794872, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1098, "n_parameters": 86059856} {"train_lr": 0.0006833510635235713, "train_min_lr": 0.0006833510635235713, "train_loss": 0.18374175569997767, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01650019884348298, "epoch": 1099, "n_parameters": 86059856} {"train_lr": 0.0006808849054773309, "train_min_lr": 0.0006808849054773309, "train_loss": 0.18379721752344033, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0166678196655061, "epoch": 1100, "n_parameters": 86059856} {"train_lr": 0.0006784219647434278, "train_min_lr": 0.0006784219647434278, "train_loss": 0.18376472579816786, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01623530420534408, "epoch": 1101, "n_parameters": 86059856} {"train_lr": 0.0006759622509370837, "train_min_lr": 0.0006759622509370837, "train_loss": 0.18374989643239248, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015857733484620277, "epoch": 1102, "n_parameters": 86059856} {"train_lr": 0.0006735057736609213, "train_min_lr": 0.0006735057736609213, "train_loss": 0.18374508049768898, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01689406438396336, "epoch": 1103, "n_parameters": 86059856} {"train_lr": 0.0006710525425049303, "train_min_lr": 0.0006710525425049303, "train_loss": 0.1837382069370972, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01664962871179271, "epoch": 1104, "n_parameters": 86059856} {"train_lr": 0.0006686025670464282, "train_min_lr": 0.0006686025670464282, "train_loss": 0.18368307921366814, "train_loss_scale": 233156.92307692306, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016486258389284976, "epoch": 1105, "n_parameters": 86059856} {"train_lr": 0.0006661558568500193, "train_min_lr": 0.0006661558568500193, "train_loss": 0.18366222039987454, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016336812551181096, "epoch": 1106, "n_parameters": 86059856} {"train_lr": 0.0006637124214675638, "train_min_lr": 0.0006637124214675638, "train_loss": 0.18363613672912693, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017079000561259303, "epoch": 1107, "n_parameters": 86059856} {"train_lr": 0.000661272270438134, "train_min_lr": 0.000661272270438134, "train_loss": 0.1836303697588543, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016721310207429223, "epoch": 1108, "n_parameters": 86059856} {"train_lr": 0.000658835413287983, "train_min_lr": 0.000658835413287983, "train_loss": 0.18366788413065174, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016499512712471187, "epoch": 1109, "n_parameters": 86059856} {"train_lr": 0.0006564018595305038, "train_min_lr": 0.0006564018595305038, "train_loss": 0.18364044611390012, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016692248047687687, "epoch": 1110, "n_parameters": 86059856} {"train_lr": 0.0006539716186661912, "train_min_lr": 0.0006539716186661912, "train_loss": 0.1835827519520162, "train_loss_scale": 358767.58974358975, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01698760687110898, "epoch": 1111, "n_parameters": 86059856} {"train_lr": 0.0006515447001826097, "train_min_lr": 0.0006515447001826097, "train_loss": 0.1836190627923665, "train_loss_scale": 290710.9743589744, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1112, "n_parameters": 86059856} {"train_lr": 0.000649121113554352, "train_min_lr": 0.000649121113554352, "train_loss": 0.18358934864115256, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01700196784431449, "epoch": 1113, "n_parameters": 86059856} {"train_lr": 0.0006467008682430024, "train_min_lr": 0.0006467008682430024, "train_loss": 0.18355741136325285, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01621745175156647, "epoch": 1114, "n_parameters": 86059856} {"train_lr": 0.0006442839736971013, "train_min_lr": 0.0006442839736971013, "train_loss": 0.1835547507298776, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017040813034280945, "epoch": 1115, "n_parameters": 86059856} {"train_lr": 0.0006418704393521104, "train_min_lr": 0.0006418704393521104, "train_loss": 0.18353905602215, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016563770328409586, "epoch": 1116, "n_parameters": 86059856} {"train_lr": 0.0006394602746303688, "train_min_lr": 0.0006394602746303688, "train_loss": 0.18353565571566996, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01641772328637158, "epoch": 1117, "n_parameters": 86059856} {"train_lr": 0.0006370534889410649, "train_min_lr": 0.0006370534889410649, "train_loss": 0.18349511383507305, "train_loss_scale": 266345.0256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1118, "n_parameters": 86059856} {"train_lr": 0.0006346500916801923, "train_min_lr": 0.0006346500916801923, "train_loss": 0.1834829209300761, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01689743126432101, "epoch": 1119, "n_parameters": 86059856} {"train_lr": 0.0006322500922305184, "train_min_lr": 0.0006322500922305184, "train_loss": 0.18350878443258503, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01704612801161905, "epoch": 1120, "n_parameters": 86059856} {"train_lr": 0.0006298534999615448, "train_min_lr": 0.0006298534999615448, "train_loss": 0.18347411413485995, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016141253326518033, "epoch": 1121, "n_parameters": 86059856} {"train_lr": 0.0006274603242294722, "train_min_lr": 0.0006274603242294722, "train_loss": 0.18346502927418512, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017105861468455538, "epoch": 1122, "n_parameters": 86059856} {"train_lr": 0.0006250705743771617, "train_min_lr": 0.0006250705743771617, "train_loss": 0.18344990749700138, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017303463358145494, "epoch": 1123, "n_parameters": 86059856} {"train_lr": 0.000622684259734102, "train_min_lr": 0.000622684259734102, "train_loss": 0.18346693784070128, "train_loss_scale": 276427.4871794872, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01661737944978552, "epoch": 1124, "n_parameters": 86059856} {"train_lr": 0.0006203013896163704, "train_min_lr": 0.0006203013896163704, "train_loss": 0.18342446477319568, "train_loss_scale": 328520.2051282051, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1125, "n_parameters": 86059856} {"train_lr": 0.0006179219733265951, "train_min_lr": 0.0006179219733265951, "train_loss": 0.18335507145652977, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017269829329915155, "epoch": 1126, "n_parameters": 86059856} {"train_lr": 0.0006155460201539222, "train_min_lr": 0.0006155460201539222, "train_loss": 0.18338561444901502, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016994261066429317, "epoch": 1127, "n_parameters": 86059856} {"train_lr": 0.0006131735393739788, "train_min_lr": 0.0006131735393739788, "train_loss": 0.18336389124250183, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016781118611852903, "epoch": 1128, "n_parameters": 86059856} {"train_lr": 0.0006108045402488355, "train_min_lr": 0.0006108045402488355, "train_loss": 0.18332167304097077, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017531092257764287, "epoch": 1129, "n_parameters": 86059856} {"train_lr": 0.0006084390320269679, "train_min_lr": 0.0006084390320269679, "train_loss": 0.18333360028620332, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01752500786768416, "epoch": 1130, "n_parameters": 86059856} {"train_lr": 0.0006060770239432257, "train_min_lr": 0.0006060770239432257, "train_loss": 0.18328778337663373, "train_loss_scale": 275587.28205128206, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1131, "n_parameters": 86059856} {"train_lr": 0.0006037185252187954, "train_min_lr": 0.0006037185252187954, "train_loss": 0.1833261516637718, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0167203018686567, "epoch": 1132, "n_parameters": 86059856} {"train_lr": 0.0006013635450611616, "train_min_lr": 0.0006013635450611616, "train_loss": 0.18331635075334746, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016842587573382143, "epoch": 1133, "n_parameters": 86059856} {"train_lr": 0.0005990120926640701, "train_min_lr": 0.0005990120926640701, "train_loss": 0.18326917339641696, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017412795977762494, "epoch": 1134, "n_parameters": 86059856} {"train_lr": 0.0005966641772074993, "train_min_lr": 0.0005966641772074993, "train_loss": 0.1832876643524147, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017480647003349777, "epoch": 1135, "n_parameters": 86059856} {"train_lr": 0.0005943198078576163, "train_min_lr": 0.0005943198078576163, "train_loss": 0.18327531655533955, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016955803408144184, "epoch": 1136, "n_parameters": 86059856} {"train_lr": 0.0005919789937667458, "train_min_lr": 0.0005919789937667458, "train_loss": 0.18323636977957228, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017111669781092458, "epoch": 1137, "n_parameters": 86059856} {"train_lr": 0.0005896417440733318, "train_min_lr": 0.0005896417440733318, "train_loss": 0.18323131871576875, "train_loss_scale": 328520.2051282051, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1138, "n_parameters": 86059856} {"train_lr": 0.0005873080679019029, "train_min_lr": 0.0005873080679019029, "train_loss": 0.18318595779605973, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01737138179715914, "epoch": 1139, "n_parameters": 86059856} {"train_lr": 0.0005849779743630389, "train_min_lr": 0.0005849779743630389, "train_loss": 0.18321579014166042, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0172196893612496, "epoch": 1140, "n_parameters": 86059856} {"train_lr": 0.00058265147255333, "train_min_lr": 0.00058265147255333, "train_loss": 0.18317333706177008, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017209257814101875, "epoch": 1141, "n_parameters": 86059856} {"train_lr": 0.0005803285715553476, "train_min_lr": 0.0005803285715553476, "train_loss": 0.183179095453129, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017189122300642807, "epoch": 1142, "n_parameters": 86059856} {"train_lr": 0.0005780092804376041, "train_min_lr": 0.0005780092804376041, "train_loss": 0.18314190537859806, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016870761081242025, "epoch": 1143, "n_parameters": 86059856} {"train_lr": 0.0005756936082545176, "train_min_lr": 0.0005756936082545176, "train_loss": 0.18314328460763088, "train_loss_scale": 297432.6153846154, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1144, "n_parameters": 86059856} {"train_lr": 0.0005733815640463811, "train_min_lr": 0.0005733815640463811, "train_loss": 0.1830954427818935, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01736068596932082, "epoch": 1145, "n_parameters": 86059856} {"train_lr": 0.0005710731568393219, "train_min_lr": 0.0005710731568393219, "train_loss": 0.1831068244929879, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017723005921699297, "epoch": 1146, "n_parameters": 86059856} {"train_lr": 0.0005687683956452703, "train_min_lr": 0.0005687683956452703, "train_loss": 0.18307114557888454, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017260532453059196, "epoch": 1147, "n_parameters": 86059856} {"train_lr": 0.0005664672894619201, "train_min_lr": 0.0005664672894619201, "train_loss": 0.18309234875517014, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017143926860836264, "epoch": 1148, "n_parameters": 86059856} {"train_lr": 0.0005641698472727003, "train_min_lr": 0.0005641698472727003, "train_loss": 0.1830519350985877, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017522358022128735, "epoch": 1149, "n_parameters": 86059856} {"train_lr": 0.0005618760780467304, "train_min_lr": 0.0005618760780467304, "train_loss": 0.18304505549633923, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017898191608345278, "epoch": 1150, "n_parameters": 86059856} {"train_lr": 0.0005595859907387952, "train_min_lr": 0.0005595859907387952, "train_loss": 0.18303296650544956, "train_loss_scale": 304994.46153846156, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1151, "n_parameters": 86059856} {"train_lr": 0.0005572995942893032, "train_min_lr": 0.0005572995942893032, "train_loss": 0.18302573866019836, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0181121239796854, "epoch": 1152, "n_parameters": 86059856} {"train_lr": 0.0005550168976242548, "train_min_lr": 0.0005550168976242548, "train_loss": 0.18303388216867086, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01706682372157677, "epoch": 1153, "n_parameters": 86059856} {"train_lr": 0.0005527379096552076, "train_min_lr": 0.0005527379096552076, "train_loss": 0.1829828409036287, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017874109170710046, "epoch": 1154, "n_parameters": 86059856} {"train_lr": 0.000550462639279237, "train_min_lr": 0.000550462639279237, "train_loss": 0.18296606250059527, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016992609318083104, "epoch": 1155, "n_parameters": 86059856} {"train_lr": 0.0005481910953789097, "train_min_lr": 0.0005481910953789097, "train_loss": 0.18296391413045618, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018077958945352107, "epoch": 1156, "n_parameters": 86059856} {"train_lr": 0.0005459232868222406, "train_min_lr": 0.0005459232868222406, "train_loss": 0.1829756930207786, "train_loss_scale": 286509.9487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01771699572996929, "epoch": 1157, "n_parameters": 86059856} {"train_lr": 0.000543659222462664, "train_min_lr": 0.000543659222462664, "train_loss": 0.18292798082200953, "train_loss_scale": 275587.28205128206, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1158, "n_parameters": 86059856} {"train_lr": 0.0005413989111389974, "train_min_lr": 0.0005413989111389974, "train_loss": 0.182906768260858, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01753257002490453, "epoch": 1159, "n_parameters": 86059856} {"train_lr": 0.0005391423616754045, "train_min_lr": 0.0005391423616754045, "train_loss": 0.1828877421704909, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01725023545814344, "epoch": 1160, "n_parameters": 86059856} {"train_lr": 0.0005368895828813646, "train_min_lr": 0.0005368895828813646, "train_loss": 0.18288265676715243, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017132744407997683, "epoch": 1161, "n_parameters": 86059856} {"train_lr": 0.0005346405835516359, "train_min_lr": 0.0005346405835516359, "train_loss": 0.18288008857948276, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017604772652236696, "epoch": 1162, "n_parameters": 86059856} {"train_lr": 0.0005323953724662217, "train_min_lr": 0.0005323953724662217, "train_loss": 0.18284986479780996, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017486880593097363, "epoch": 1163, "n_parameters": 86059856} {"train_lr": 0.0005301539583903355, "train_min_lr": 0.0005301539583903355, "train_loss": 0.18285385655979505, "train_loss_scale": 290710.9743589744, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1164, "n_parameters": 86059856} {"train_lr": 0.0005279163500743699, "train_min_lr": 0.0005279163500743699, "train_loss": 0.18279924789348093, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017742085786990058, "epoch": 1165, "n_parameters": 86059856} {"train_lr": 0.0005256825562538566, "train_min_lr": 0.0005256825562538566, "train_loss": 0.18280482769776613, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01778342176037721, "epoch": 1166, "n_parameters": 86059856} {"train_lr": 0.0005234525856494381, "train_min_lr": 0.0005234525856494381, "train_loss": 0.18280994376310936, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017742290492885962, "epoch": 1167, "n_parameters": 86059856} {"train_lr": 0.0005212264469668297, "train_min_lr": 0.0005212264469668297, "train_loss": 0.18278483782584468, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018093091949151877, "epoch": 1168, "n_parameters": 86059856} {"train_lr": 0.0005190041488967883, "train_min_lr": 0.0005190041488967883, "train_loss": 0.1827694922267722, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017659708735150978, "epoch": 1169, "n_parameters": 86059856} {"train_lr": 0.0005167857001150759, "train_min_lr": 0.0005167857001150759, "train_loss": 0.1827699196560738, "train_loss_scale": 267185.23076923075, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01802271289918094, "epoch": 1170, "n_parameters": 86059856} {"train_lr": 0.0005145711092824282, "train_min_lr": 0.0005145711092824282, "train_loss": 0.18274695115784803, "train_loss_scale": 315917.1282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1171, "n_parameters": 86059856} {"train_lr": 0.0005123603850445193, "train_min_lr": 0.0005123603850445193, "train_loss": 0.1827023807519044, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017787056712385934, "epoch": 1172, "n_parameters": 86059856} {"train_lr": 0.0005101535360319283, "train_min_lr": 0.0005101535360319283, "train_loss": 0.18270649552010956, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017620933433182728, "epoch": 1173, "n_parameters": 86059856} {"train_lr": 0.0005079505708601042, "train_min_lr": 0.0005079505708601042, "train_loss": 0.18272104255783445, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018062033514396694, "epoch": 1174, "n_parameters": 86059856} {"train_lr": 0.000505751498129336, "train_min_lr": 0.000505751498129336, "train_loss": 0.1826904058480301, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018324275915988553, "epoch": 1175, "n_parameters": 86059856} {"train_lr": 0.0005035563264247157, "train_min_lr": 0.0005035563264247157, "train_loss": 0.1826993003487587, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017713772240453042, "epoch": 1176, "n_parameters": 86059856} {"train_lr": 0.0005013650643161046, "train_min_lr": 0.0005013650643161046, "train_loss": 0.18266529061544973, "train_loss_scale": 339442.8717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1177, "n_parameters": 86059856} {"train_lr": 0.0004991777203581036, "train_min_lr": 0.0004991777203581036, "train_loss": 0.18264630034518164, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017951138619070824, "epoch": 1178, "n_parameters": 86059856} {"train_lr": 0.0004969943030900144, "train_min_lr": 0.0004969943030900144, "train_loss": 0.18264855391298157, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01816511169099846, "epoch": 1179, "n_parameters": 86059856} {"train_lr": 0.0004948148210358124, "train_min_lr": 0.0004948148210358124, "train_loss": 0.18261020809698564, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018435630571240418, "epoch": 1180, "n_parameters": 86059856} {"train_lr": 0.000492639282704107, "train_min_lr": 0.000492639282704107, "train_loss": 0.18258205386929405, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01865553895298105, "epoch": 1181, "n_parameters": 86059856} {"train_lr": 0.0004904676965881128, "train_min_lr": 0.0004904676965881128, "train_loss": 0.18258269912658784, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01879939106770624, "epoch": 1182, "n_parameters": 86059856} {"train_lr": 0.0004883000711656161, "train_min_lr": 0.0004883000711656161, "train_loss": 0.18252505621132561, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017863559897821873, "epoch": 1183, "n_parameters": 86059856} {"train_lr": 0.0004861364148989391, "train_min_lr": 0.0004861364148989391, "train_loss": 0.1825773604393292, "train_loss_scale": 262984.2051282051, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1184, "n_parameters": 86059856} {"train_lr": 0.00048397673623490903, "train_min_lr": 0.00048397673623490903, "train_loss": 0.1825582173246986, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018836039930391006, "epoch": 1185, "n_parameters": 86059856} {"train_lr": 0.00048182104360482435, "train_min_lr": 0.00048182104360482435, "train_loss": 0.18252888657391453, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01835171901023923, "epoch": 1186, "n_parameters": 86059856} {"train_lr": 0.00047966934542442426, "train_min_lr": 0.00047966934542442426, "train_loss": 0.18249675336007315, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018007373195499755, "epoch": 1187, "n_parameters": 86059856} {"train_lr": 0.000477521650093852, "train_min_lr": 0.000477521650093852, "train_loss": 0.1824888699174596, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018512389667403813, "epoch": 1188, "n_parameters": 86059856} {"train_lr": 0.00047537796599762306, "train_min_lr": 0.00047537796599762306, "train_loss": 0.1824609881075911, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018447206230261005, "epoch": 1189, "n_parameters": 86059856} {"train_lr": 0.00047323830150459555, "train_min_lr": 0.00047323830150459555, "train_loss": 0.1824661296290847, "train_loss_scale": 293231.58974358975, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1190, "n_parameters": 86059856} {"train_lr": 0.000471102664967933, "train_min_lr": 0.000471102664967933, "train_loss": 0.1824389132969559, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01837907180500527, "epoch": 1191, "n_parameters": 86059856} {"train_lr": 0.00046897106472507544, "train_min_lr": 0.00046897106472507544, "train_loss": 0.18240947186803588, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018878397445242185, "epoch": 1192, "n_parameters": 86059856} {"train_lr": 0.00046684350909770577, "train_min_lr": 0.00046684350909770577, "train_loss": 0.18241433140176994, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019310003391299874, "epoch": 1193, "n_parameters": 86059856} {"train_lr": 0.000464720006391714, "train_min_lr": 0.000464720006391714, "train_loss": 0.18242025571182752, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018380649232616026, "epoch": 1194, "n_parameters": 86059856} {"train_lr": 0.00046260056489717106, "train_min_lr": 0.00046260056489717106, "train_loss": 0.18239934593200302, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01870087095990013, "epoch": 1195, "n_parameters": 86059856} {"train_lr": 0.00046048519288829123, "train_min_lr": 0.00046048519288829123, "train_loss": 0.18235177643453845, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018773056680145554, "epoch": 1196, "n_parameters": 86059856} {"train_lr": 0.0004583738986234033, "train_min_lr": 0.0004583738986234033, "train_loss": 0.18233261556507876, "train_loss_scale": 265504.8205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1197, "n_parameters": 86059856} {"train_lr": 0.0004562666903449135, "train_min_lr": 0.0004562666903449135, "train_loss": 0.18234070521206236, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018203751004945774, "epoch": 1198, "n_parameters": 86059856} {"train_lr": 0.0004541635762792799, "train_min_lr": 0.0004541635762792799, "train_loss": 0.18233094319629556, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018978787135356702, "epoch": 1199, "n_parameters": 86059856} {"train_lr": 0.00045206456463697597, "train_min_lr": 0.00045206456463697597, "train_loss": 0.1822862873235956, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018970650602848485, "epoch": 1200, "n_parameters": 86059856} {"train_lr": 0.000449969663612458, "train_min_lr": 0.000449969663612458, "train_loss": 0.18227872087691838, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018925131026965875, "epoch": 1201, "n_parameters": 86059856} {"train_lr": 0.0004478788813841364, "train_min_lr": 0.0004478788813841364, "train_loss": 0.18226313026836857, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018595940612543087, "epoch": 1202, "n_parameters": 86059856} {"train_lr": 0.0004457922261143414, "train_min_lr": 0.0004457922261143414, "train_loss": 0.18227630247099277, "train_loss_scale": 325999.58974358975, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1203, "n_parameters": 86059856} {"train_lr": 0.0004437097059492909, "train_min_lr": 0.0004437097059492909, "train_loss": 0.18224785522295114, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01791358743615162, "epoch": 1204, "n_parameters": 86059856} {"train_lr": 0.00044163132901906124, "train_min_lr": 0.00044163132901906124, "train_loss": 0.18223271477155578, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019144623062740534, "epoch": 1205, "n_parameters": 86059856} {"train_lr": 0.00043955710343755196, "train_min_lr": 0.00043955710343755196, "train_loss": 0.18223800021820727, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01903592651256193, "epoch": 1206, "n_parameters": 86059856} {"train_lr": 0.0004374870373024571, "train_min_lr": 0.0004374870373024571, "train_loss": 0.18220925867820206, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018388899673039142, "epoch": 1207, "n_parameters": 86059856} {"train_lr": 0.00043542113869523197, "train_min_lr": 0.00043542113869523197, "train_loss": 0.18218817943945909, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01886560482223733, "epoch": 1208, "n_parameters": 86059856} {"train_lr": 0.00043335941568106186, "train_min_lr": 0.00043335941568106186, "train_loss": 0.18218626675960153, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019022099217041753, "epoch": 1209, "n_parameters": 86059856} {"train_lr": 0.0004313018763088307, "train_min_lr": 0.0004313018763088307, "train_loss": 0.18217045355301636, "train_loss_scale": 275587.28205128206, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1210, "n_parameters": 86059856} {"train_lr": 0.0004292485286110903, "train_min_lr": 0.0004292485286110903, "train_loss": 0.18213414527380314, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019066705058018368, "epoch": 1211, "n_parameters": 86059856} {"train_lr": 0.0004271993806040275, "train_min_lr": 0.0004271993806040275, "train_loss": 0.18215976765331549, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019072348613124818, "epoch": 1212, "n_parameters": 86059856} {"train_lr": 0.00042515444028743435, "train_min_lr": 0.00042515444028743435, "train_loss": 0.18210312973660153, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019105112001013298, "epoch": 1213, "n_parameters": 86059856} {"train_lr": 0.00042311371564467587, "train_min_lr": 0.00042311371564467587, "train_loss": 0.18210305512333527, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01918346543187419, "epoch": 1214, "n_parameters": 86059856} {"train_lr": 0.00042107721464265945, "train_min_lr": 0.00042107721464265945, "train_loss": 0.18206867434156054, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019763763671597608, "epoch": 1215, "n_parameters": 86059856} {"train_lr": 0.000419044945231803, "train_min_lr": 0.000419044945231803, "train_loss": 0.1820773987027888, "train_loss_scale": 268025.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1216, "n_parameters": 86059856} {"train_lr": 0.00041701691534600573, "train_min_lr": 0.00041701691534600573, "train_loss": 0.18202263206983796, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01985763857821719, "epoch": 1217, "n_parameters": 86059856} {"train_lr": 0.0004149931329026143, "train_min_lr": 0.0004149931329026143, "train_loss": 0.18203174681641543, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019343740118739124, "epoch": 1218, "n_parameters": 86059856} {"train_lr": 0.00041297360580239503, "train_min_lr": 0.00041297360580239503, "train_loss": 0.18202089758303303, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01939879044985924, "epoch": 1219, "n_parameters": 86059856} {"train_lr": 0.00041095834192950083, "train_min_lr": 0.00041095834192950083, "train_loss": 0.18198207198666075, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01973609013769489, "epoch": 1220, "n_parameters": 86059856} {"train_lr": 0.00040894734915144056, "train_min_lr": 0.00040894734915144056, "train_loss": 0.18198656814937025, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02007678024864827, "epoch": 1221, "n_parameters": 86059856} {"train_lr": 0.0004069406353190497, "train_min_lr": 0.0004069406353190497, "train_loss": 0.18197480865563145, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018844923646284793, "epoch": 1222, "n_parameters": 86059856} {"train_lr": 0.000404938208266459, "train_min_lr": 0.000404938208266459, "train_loss": 0.181955427934344, "train_loss_scale": 288190.358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1223, "n_parameters": 86059856} {"train_lr": 0.0004029400758110638, "train_min_lr": 0.0004029400758110638, "train_loss": 0.18192231292143846, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01931222417475417, "epoch": 1224, "n_parameters": 86059856} {"train_lr": 0.0004009462457534931, "train_min_lr": 0.0004009462457534931, "train_loss": 0.1819014746027115, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019464647340683792, "epoch": 1225, "n_parameters": 86059856} {"train_lr": 0.00039895672587757875, "train_min_lr": 0.00039895672587757875, "train_loss": 0.18191819934126657, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020054736401503667, "epoch": 1226, "n_parameters": 86059856} {"train_lr": 0.0003969715239503275, "train_min_lr": 0.0003969715239503275, "train_loss": 0.18188715135105527, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01974468343019581, "epoch": 1227, "n_parameters": 86059856} {"train_lr": 0.00039499064772188796, "train_min_lr": 0.00039499064772188796, "train_loss": 0.18189222766802862, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02008168482615684, "epoch": 1228, "n_parameters": 86059856} {"train_lr": 0.0003930141049255215, "train_min_lr": 0.0003930141049255215, "train_loss": 0.1818391300427417, "train_loss_scale": 307515.07692307694, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1229, "n_parameters": 86059856} {"train_lr": 0.00039104190327757254, "train_min_lr": 0.00039104190327757254, "train_loss": 0.18184847719609165, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019679793646224797, "epoch": 1230, "n_parameters": 86059856} {"train_lr": 0.0003890740504774367, "train_min_lr": 0.0003890740504774367, "train_loss": 0.18182891880711302, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020050133139683075, "epoch": 1231, "n_parameters": 86059856} {"train_lr": 0.0003871105542075335, "train_min_lr": 0.0003871105542075335, "train_loss": 0.18182175226796132, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01946275191417394, "epoch": 1232, "n_parameters": 86059856} {"train_lr": 0.00038515142213327275, "train_min_lr": 0.00038515142213327275, "train_loss": 0.1817717069807725, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019733546865889087, "epoch": 1233, "n_parameters": 86059856} {"train_lr": 0.0003831966619030283, "train_min_lr": 0.0003831966619030283, "train_loss": 0.18175066715010849, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020313136822854478, "epoch": 1234, "n_parameters": 86059856} {"train_lr": 0.0003812462811481052, "train_min_lr": 0.0003812462811481052, "train_loss": 0.1817630161292469, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01973157862905795, "epoch": 1235, "n_parameters": 86059856} {"train_lr": 0.00037930028748271266, "train_min_lr": 0.00037930028748271266, "train_loss": 0.18172606919557813, "train_loss_scale": 291551.1794871795, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1236, "n_parameters": 86059856} {"train_lr": 0.0003773586885039312, "train_min_lr": 0.0003773586885039312, "train_loss": 0.18173949010908985, "train_loss_scale": 219713.64102564103, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1237, "n_parameters": 86059856} {"train_lr": 0.0003754214917916861, "train_min_lr": 0.0003754214917916861, "train_loss": 0.18169248221107784, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019970976878912784, "epoch": 1238, "n_parameters": 86059856} {"train_lr": 0.00037348870490871565, "train_min_lr": 0.00037348870490871565, "train_loss": 0.181686046324336, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020072283918778293, "epoch": 1239, "n_parameters": 86059856} {"train_lr": 0.00036963639079544305, "train_min_lr": 0.00036963639079544305, "train_loss": 0.18166389771235678, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020336987632804383, "epoch": 1241, "n_parameters": 86059856} {"train_lr": 0.00036771687860442183, "train_min_lr": 0.00036771687860442183, "train_loss": 0.1816663785288349, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02014517702926428, "epoch": 1242, "n_parameters": 86059856} {"train_lr": 0.0003658018063211761, "train_min_lr": 0.0003658018063211761, "train_loss": 0.18163284328049764, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01946092879650398, "epoch": 1243, "n_parameters": 86059856} {"train_lr": 0.00036389118142207233, "train_min_lr": 0.00036389118142207233, "train_loss": 0.18162982906692493, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020486796197171014, "epoch": 1244, "n_parameters": 86059856} {"train_lr": 0.00036198501136611275, "train_min_lr": 0.00036198501136611275, "train_loss": 0.18160905898548663, "train_loss_scale": 250801.23076923078, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02077518636957766, "epoch": 1245, "n_parameters": 86059856} {"train_lr": 0.0003600833035949099, "train_min_lr": 0.0003600833035949099, "train_loss": 0.1815755680346718, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020256087112312134, "epoch": 1246, "n_parameters": 86059856} {"train_lr": 0.0003581860655326535, "train_min_lr": 0.0003581860655326535, "train_loss": 0.18155732601045224, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020405805938375685, "epoch": 1247, "n_parameters": 86059856} {"train_lr": 0.0003562933045860865, "train_min_lr": 0.0003562933045860865, "train_loss": 0.18158849326368326, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020114575960458472, "epoch": 1248, "n_parameters": 86059856} {"train_lr": 0.0003544050281444713, "train_min_lr": 0.0003544050281444713, "train_loss": 0.18153984346188223, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020262504897963923, "epoch": 1249, "n_parameters": 86059856} {"train_lr": 0.00035252124357956267, "train_min_lr": 0.00035252124357956267, "train_loss": 0.18150175908484903, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020569023858302105, "epoch": 1250, "n_parameters": 86059856} {"train_lr": 0.0003506419582455813, "train_min_lr": 0.0003506419582455813, "train_loss": 0.18148577912376287, "train_loss_scale": 304154.25641025644, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1251, "n_parameters": 86059856} {"train_lr": 0.0003487671794791816, "train_min_lr": 0.0003487671794791816, "train_loss": 0.1814887936298664, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021024727068531018, "epoch": 1252, "n_parameters": 86059856} {"train_lr": 0.00034689691459942405, "train_min_lr": 0.00034689691459942405, "train_loss": 0.1814869134925688, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01965378607229258, "epoch": 1253, "n_parameters": 86059856} {"train_lr": 0.0003450311709077491, "train_min_lr": 0.0003450311709077491, "train_loss": 0.18145708951096123, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020125546716511823, "epoch": 1254, "n_parameters": 86059856} {"train_lr": 0.00034316995568794414, "train_min_lr": 0.00034316995568794414, "train_loss": 0.18149961814738047, "train_loss_scale": 252061.53846153847, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1255, "n_parameters": 86059856} {"train_lr": 0.00034131327620612003, "train_min_lr": 0.00034131327620612003, "train_loss": 0.1814198095458918, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021156520334979854, "epoch": 1256, "n_parameters": 86059856} {"train_lr": 0.00033946113971067857, "train_min_lr": 0.00033946113971067857, "train_loss": 0.18140447004817617, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019446807179170158, "epoch": 1257, "n_parameters": 86059856} {"train_lr": 0.0003376135534322866, "train_min_lr": 0.0003376135534322866, "train_loss": 0.18139834876339406, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02041329698672948, "epoch": 1258, "n_parameters": 86059856} {"train_lr": 0.0003357705245838467, "train_min_lr": 0.0003357705245838467, "train_loss": 0.18138130819902587, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02006612144983732, "epoch": 1259, "n_parameters": 86059856} {"train_lr": 0.0003320981679394479, "train_min_lr": 0.0003320981679394479, "train_loss": 0.18135934238895202, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02100441495899875, "epoch": 1261, "n_parameters": 86059856} {"train_lr": 0.00033026885448022154, "train_min_lr": 0.00033026885448022154, "train_loss": 0.1813762231897085, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02037079019460063, "epoch": 1262, "n_parameters": 86059856} {"train_lr": 0.0003284441271243585, "train_min_lr": 0.0003284441271243585, "train_loss": 0.181317301813322, "train_loss_scale": 140314.2564102564, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1263, "n_parameters": 86059856} {"train_lr": 0.0003266239929955209, "train_min_lr": 0.0003266239929955209, "train_loss": 0.18129290706024337, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019781085149122354, "epoch": 1264, "n_parameters": 86059856} {"train_lr": 0.00032480845919943997, "train_min_lr": 0.00032480845919943997, "train_loss": 0.1812486452444528, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02086591045372188, "epoch": 1265, "n_parameters": 86059856} {"train_lr": 0.0003229975328238862, "train_min_lr": 0.0003229975328238862, "train_loss": 0.18126952675027916, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019892138264810618, "epoch": 1266, "n_parameters": 86059856} {"train_lr": 0.00032119122093864414, "train_min_lr": 0.00032119122093864414, "train_loss": 0.18123865507256526, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021911171730607748, "epoch": 1267, "n_parameters": 86059856} {"train_lr": 0.00031938953059548313, "train_min_lr": 0.00031938953059548313, "train_loss": 0.1812518760203742, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021110131321713712, "epoch": 1268, "n_parameters": 86059856} {"train_lr": 0.0003175924688281298, "train_min_lr": 0.0003175924688281298, "train_loss": 0.18124985445338565, "train_loss_scale": 155437.94871794872, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020621425818460874, "epoch": 1269, "n_parameters": 86059856} {"train_lr": 0.00031580004265224147, "train_min_lr": 0.00031580004265224147, "train_loss": 0.18119114677373988, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020859489736791987, "epoch": 1270, "n_parameters": 86059856} {"train_lr": 0.0003140122590653787, "train_min_lr": 0.0003140122590653787, "train_loss": 0.18118543843499935, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020993480870786767, "epoch": 1271, "n_parameters": 86059856} {"train_lr": 0.0003122291250469768, "train_min_lr": 0.0003122291250469768, "train_loss": 0.18118680490610692, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020389454430327393, "epoch": 1272, "n_parameters": 86059856} {"train_lr": 0.00031045064755831896, "train_min_lr": 0.00031045064755831896, "train_loss": 0.18114829638925117, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020962222022577547, "epoch": 1273, "n_parameters": 86059856} {"train_lr": 0.0003086768335425105, "train_min_lr": 0.0003086768335425105, "train_loss": 0.1811310818944222, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021396308125426564, "epoch": 1274, "n_parameters": 86059856} {"train_lr": 0.00030690768992445067, "train_min_lr": 0.00030690768992445067, "train_loss": 0.1811241262878936, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021161433580952387, "epoch": 1275, "n_parameters": 86059856} {"train_lr": 0.00030514322361080464, "train_min_lr": 0.00030514322361080464, "train_loss": 0.18113458138078642, "train_loss_scale": 274747.07692307694, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1276, "n_parameters": 86059856} {"train_lr": 0.0003033834414899792, "train_min_lr": 0.0003033834414899792, "train_loss": 0.18112209996280187, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02048865097980851, "epoch": 1277, "n_parameters": 86059856} {"train_lr": 0.0003016283504320918, "train_min_lr": 0.0003016283504320918, "train_loss": 0.18109641784133437, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021518554320582785, "epoch": 1278, "n_parameters": 86059856} {"train_lr": 0.00029987795728894943, "train_min_lr": 0.00029987795728894943, "train_loss": 0.18106221163478226, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021501250684452362, "epoch": 1279, "n_parameters": 86059856} {"train_lr": 0.0002981322688940158, "train_min_lr": 0.0002981322688940158, "train_loss": 0.18105285158619666, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021363627911808018, "epoch": 1280, "n_parameters": 86059856} {"train_lr": 0.0002963912920623888, "train_min_lr": 0.0002963912920623888, "train_loss": 0.1810443608675343, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022163190714752253, "epoch": 1281, "n_parameters": 86059856} {"train_lr": 0.00029465503359077215, "train_min_lr": 0.00029465503359077215, "train_loss": 0.18103138860673285, "train_loss_scale": 322638.76923076925, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1282, "n_parameters": 86059856} {"train_lr": 0.0002929235002574496, "train_min_lr": 0.0002929235002574496, "train_loss": 0.18098827530271733, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02051711424540442, "epoch": 1283, "n_parameters": 86059856} {"train_lr": 0.0002911966988222576, "train_min_lr": 0.0002911966988222576, "train_loss": 0.18098374014386, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020798171658474855, "epoch": 1284, "n_parameters": 86059856} {"train_lr": 0.00028947463602656026, "train_min_lr": 0.00028947463602656026, "train_loss": 0.18095093455690986, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02178786463366869, "epoch": 1285, "n_parameters": 86059856} {"train_lr": 0.0002877573185932216, "train_min_lr": 0.0002877573185932216, "train_loss": 0.1809298069968533, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0209104872863883, "epoch": 1286, "n_parameters": 86059856} {"train_lr": 0.0002860447532265804, "train_min_lr": 0.0002860447532265804, "train_loss": 0.1809397160421866, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021928657482091624, "epoch": 1287, "n_parameters": 86059856} {"train_lr": 0.00028433694661242245, "train_min_lr": 0.00028433694661242245, "train_loss": 0.18093949428592354, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02129670287757061, "epoch": 1288, "n_parameters": 86059856} {"train_lr": 0.0002826339054179573, "train_min_lr": 0.0002826339054179573, "train_loss": 0.1808739822680274, "train_loss_scale": 263824.41025641025, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1289, "n_parameters": 86059856} {"train_lr": 0.00028093563629178934, "train_min_lr": 0.00028093563629178934, "train_loss": 0.1808678696397692, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021507459310575936, "epoch": 1290, "n_parameters": 86059856} {"train_lr": 0.00027924214586389314, "train_min_lr": 0.00027924214586389314, "train_loss": 0.180839158481178, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021592608927629698, "epoch": 1291, "n_parameters": 86059856} {"train_lr": 0.00027755344074558737, "train_min_lr": 0.00027755344074558737, "train_loss": 0.18085592369644496, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021738294338902984, "epoch": 1292, "n_parameters": 86059856} {"train_lr": 0.00027586952752950975, "train_min_lr": 0.00027586952752950975, "train_loss": 0.18085453965557882, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021719094371805206, "epoch": 1293, "n_parameters": 86059856} {"train_lr": 0.00027419041278958946, "train_min_lr": 0.00027419041278958946, "train_loss": 0.18077731794582155, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02217133534857287, "epoch": 1294, "n_parameters": 86059856} {"train_lr": 0.00027251610308102385, "train_min_lr": 0.00027251610308102385, "train_loss": 0.1807784866183423, "train_loss_scale": 293231.58974358975, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1295, "n_parameters": 86059856} {"train_lr": 0.00027084660494025017, "train_min_lr": 0.00027084660494025017, "train_loss": 0.1807827360044496, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021171813424772177, "epoch": 1296, "n_parameters": 86059856} {"train_lr": 0.00026918192488492327, "train_min_lr": 0.00026918192488492327, "train_loss": 0.18075127579056874, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02158874963135578, "epoch": 1297, "n_parameters": 86059856} {"train_lr": 0.0002675220694138866, "train_min_lr": 0.0002675220694138866, "train_loss": 0.18073501385366306, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02232629682820959, "epoch": 1298, "n_parameters": 86059856} {"train_lr": 0.0002658670450071499, "train_min_lr": 0.0002658670450071499, "train_loss": 0.18073408887721598, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02110848469265665, "epoch": 1299, "n_parameters": 86059856} {"train_lr": 0.00026421685812586204, "train_min_lr": 0.00026421685812586204, "train_loss": 0.18070972097130159, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021298235673337985, "epoch": 1300, "n_parameters": 86059856} {"train_lr": 0.00026257151521228675, "train_min_lr": 0.00026257151521228675, "train_loss": 0.18070558388717473, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02215033820949686, "epoch": 1301, "n_parameters": 86059856} {"train_lr": 0.0002609310226897767, "train_min_lr": 0.0002609310226897767, "train_loss": 0.1806836317245586, "train_loss_scale": 293231.58974358975, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1302, "n_parameters": 86059856} {"train_lr": 0.0002592953869627493, "train_min_lr": 0.0002592953869627493, "train_loss": 0.18065402229340413, "train_loss_scale": 132332.3076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1303, "n_parameters": 86059856} {"train_lr": 0.00025766461441666024, "train_min_lr": 0.00025766461441666024, "train_loss": 0.18067260996366924, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022074921877505496, "epoch": 1304, "n_parameters": 86059856} {"train_lr": 0.0002560387114179814, "train_min_lr": 0.0002560387114179814, "train_loss": 0.18064027580504233, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02237330643663135, "epoch": 1305, "n_parameters": 86059856} {"train_lr": 0.0002544176843141719, "train_min_lr": 0.0002544176843141719, "train_loss": 0.18062327531739497, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02289063826454087, "epoch": 1306, "n_parameters": 86059856} {"train_lr": 0.0002528015394336573, "train_min_lr": 0.0002528015394336573, "train_loss": 0.180555493499224, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021849876215967994, "epoch": 1307, "n_parameters": 86059856} {"train_lr": 0.0002511902830858019, "train_min_lr": 0.0002511902830858019, "train_loss": 0.18062214082321867, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022475920910111222, "epoch": 1308, "n_parameters": 86059856} {"train_lr": 0.00024958392156088685, "train_min_lr": 0.00024958392156088685, "train_loss": 0.1805699234529852, "train_loss_scale": 207110.5641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02246542165402132, "epoch": 1309, "n_parameters": 86059856} {"train_lr": 0.0002479824611300827, "train_min_lr": 0.0002479824611300827, "train_loss": 0.18052126857667014, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02226276222902995, "epoch": 1310, "n_parameters": 86059856} {"train_lr": 0.0002463859080454272, "train_min_lr": 0.0002463859080454272, "train_loss": 0.1805279818841089, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021124122771792687, "epoch": 1311, "n_parameters": 86059856} {"train_lr": 0.0002447942685397993, "train_min_lr": 0.0002447942685397993, "train_loss": 0.18049897977676338, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021924973510277387, "epoch": 1312, "n_parameters": 86059856} {"train_lr": 0.00024320754882689558, "train_min_lr": 0.00024320754882689558, "train_loss": 0.1805615958232337, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022039855585600704, "epoch": 1313, "n_parameters": 86059856} {"train_lr": 0.00024162575510120723, "train_min_lr": 0.00024162575510120723, "train_loss": 0.1804867630633406, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023028413571106892, "epoch": 1314, "n_parameters": 86059856} {"train_lr": 0.00024004889353799303, "train_min_lr": 0.00024004889353799303, "train_loss": 0.18047533196229965, "train_loss_scale": 266345.0256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1315, "n_parameters": 86059856} {"train_lr": 0.00023847697029325722, "train_min_lr": 0.00023847697029325722, "train_loss": 0.18045889685312524, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02247769749150253, "epoch": 1316, "n_parameters": 86059856} {"train_lr": 0.00023690999150372558, "train_min_lr": 0.00023690999150372558, "train_loss": 0.18043395397492135, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023282536224891934, "epoch": 1317, "n_parameters": 86059856} {"train_lr": 0.00023534796328682044, "train_min_lr": 0.00023534796328682044, "train_loss": 0.18043493991120693, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021919420454651117, "epoch": 1318, "n_parameters": 86059856} {"train_lr": 0.00023379089174063795, "train_min_lr": 0.00023379089174063795, "train_loss": 0.1804204034881714, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021957699263181824, "epoch": 1319, "n_parameters": 86059856} {"train_lr": 0.0002322387829439219, "train_min_lr": 0.0002322387829439219, "train_loss": 0.18040848912623447, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022876847427911483, "epoch": 1320, "n_parameters": 86059856} {"train_lr": 0.00023069164295604397, "train_min_lr": 0.00023069164295604397, "train_loss": 0.18036613708887345, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023063950124196708, "epoch": 1321, "n_parameters": 86059856} {"train_lr": 0.00022914947781697628, "train_min_lr": 0.00022914947781697628, "train_loss": 0.18037781504412684, "train_loss_scale": 212151.79487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1322, "n_parameters": 86059856} {"train_lr": 0.0002276122935472699, "train_min_lr": 0.0002276122935472699, "train_loss": 0.1803406914934898, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023696546395046588, "epoch": 1323, "n_parameters": 86059856} {"train_lr": 0.00022608009614803076, "train_min_lr": 0.00022608009614803076, "train_loss": 0.18032067775344238, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022746044067809217, "epoch": 1324, "n_parameters": 86059856} {"train_lr": 0.00022455289160089586, "train_min_lr": 0.00022455289160089586, "train_loss": 0.180281291077009, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022710947958656993, "epoch": 1325, "n_parameters": 86059856} {"train_lr": 0.00022303068586801116, "train_min_lr": 0.00022303068586801116, "train_loss": 0.18028906929211166, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02230815884943765, "epoch": 1326, "n_parameters": 86059856} {"train_lr": 0.0002215134848920061, "train_min_lr": 0.0002215134848920061, "train_loss": 0.1802674013022811, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0233998520192332, "epoch": 1327, "n_parameters": 86059856} {"train_lr": 0.00022000129459597324, "train_min_lr": 0.00022000129459597324, "train_loss": 0.18022733000823513, "train_loss_scale": 132332.3076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022743395565507505, "epoch": 1328, "n_parameters": 86059856} {"train_lr": 0.00021849412088344262, "train_min_lr": 0.00021849412088344262, "train_loss": 0.18021172041503283, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022948164176434662, "epoch": 1329, "n_parameters": 86059856} {"train_lr": 0.00021699196963836007, "train_min_lr": 0.00021699196963836007, "train_loss": 0.18021377983192602, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02203034044792637, "epoch": 1330, "n_parameters": 86059856} {"train_lr": 0.0002154948467250644, "train_min_lr": 0.0002154948467250644, "train_loss": 0.18020449973977146, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023272021470639188, "epoch": 1331, "n_parameters": 86059856} {"train_lr": 0.00021400275798826295, "train_min_lr": 0.00021400275798826295, "train_loss": 0.180180027269018, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023034396617171857, "epoch": 1332, "n_parameters": 86059856} {"train_lr": 0.00021251570925301055, "train_min_lr": 0.00021251570925301055, "train_loss": 0.18016640411522716, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022442834607015055, "epoch": 1333, "n_parameters": 86059856} {"train_lr": 0.00021103370632468617, "train_min_lr": 0.00021103370632468617, "train_loss": 0.18015519537939093, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022119804198901433, "epoch": 1334, "n_parameters": 86059856} {"train_lr": 0.00020955675498896944, "train_min_lr": 0.00020955675498896944, "train_loss": 0.1801342367946815, "train_loss_scale": 273906.8717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1335, "n_parameters": 86059856} {"train_lr": 0.00020808486101181987, "train_min_lr": 0.00020808486101181987, "train_loss": 0.18010155839296296, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022637867422487874, "epoch": 1336, "n_parameters": 86059856} {"train_lr": 0.00020661803013945218, "train_min_lr": 0.00020661803013945218, "train_loss": 0.18010833552584815, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023239855024700936, "epoch": 1337, "n_parameters": 86059856} {"train_lr": 0.00020515626809831545, "train_min_lr": 0.00020515626809831545, "train_loss": 0.18008970943852687, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022670555567273345, "epoch": 1338, "n_parameters": 86059856} {"train_lr": 0.00020369958059507004, "train_min_lr": 0.00020369958059507004, "train_loss": 0.1800823344443089, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023376503499606863, "epoch": 1339, "n_parameters": 86059856} {"train_lr": 0.0002022479733165663, "train_min_lr": 0.0002022479733165663, "train_loss": 0.18007329132598945, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02285345856888363, "epoch": 1340, "n_parameters": 86059856} {"train_lr": 0.00020080145192982, "train_min_lr": 0.00020080145192982, "train_loss": 0.1800074138177129, "train_loss_scale": 268865.641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1341, "n_parameters": 86059856} {"train_lr": 0.00019936002208199326, "train_min_lr": 0.00019936002208199326, "train_loss": 0.17997420857994792, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0232718790260454, "epoch": 1342, "n_parameters": 86059856} {"train_lr": 0.00019792368940037044, "train_min_lr": 0.00019792368940037044, "train_loss": 0.18002158926967055, "train_loss_scale": 246600.20512820513, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1343, "n_parameters": 86059856} {"train_lr": 0.00019649245949233696, "train_min_lr": 0.00019649245949233696, "train_loss": 0.17997983923086372, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022539997515149224, "epoch": 1344, "n_parameters": 86059856} {"train_lr": 0.0001950663379453567, "train_min_lr": 0.0001950663379453567, "train_loss": 0.17999795955032682, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02334720379887865, "epoch": 1345, "n_parameters": 86059856} {"train_lr": 0.00019364533032695125, "train_min_lr": 0.00019364533032695125, "train_loss": 0.17995660722207946, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023365471189698346, "epoch": 1346, "n_parameters": 86059856} {"train_lr": 0.00019222944218467774, "train_min_lr": 0.00019222944218467774, "train_loss": 0.17997422981529665, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023543305504016388, "epoch": 1347, "n_parameters": 86059856} {"train_lr": 0.00019081867904610655, "train_min_lr": 0.00019081867904610655, "train_loss": 0.17991722277246225, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024419592060626317, "epoch": 1348, "n_parameters": 86059856} {"train_lr": 0.0001894130464188002, "train_min_lr": 0.0001894130464188002, "train_loss": 0.17990382025854137, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024214863036878597, "epoch": 1349, "n_parameters": 86059856} {"train_lr": 0.00018801254979029276, "train_min_lr": 0.00018801254979029276, "train_loss": 0.1798796684254343, "train_loss_scale": 223914.66666666666, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022887748355666798, "epoch": 1350, "n_parameters": 86059856} {"train_lr": 0.00018661719462806676, "train_min_lr": 0.00018661719462806676, "train_loss": 0.17983563212104714, "train_loss_scale": 168461.12820512822, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1351, "n_parameters": 86059856} {"train_lr": 0.000185226986379533, "train_min_lr": 0.000185226986379533, "train_loss": 0.17986804880321217, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02334714274948988, "epoch": 1352, "n_parameters": 86059856} {"train_lr": 0.00018384193047200835, "train_min_lr": 0.00018384193047200835, "train_loss": 0.17982634358131924, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023274735762522772, "epoch": 1353, "n_parameters": 86059856} {"train_lr": 0.00018246203231269634, "train_min_lr": 0.00018246203231269634, "train_loss": 0.1798211185572048, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023242824597475238, "epoch": 1354, "n_parameters": 86059856} {"train_lr": 0.00018108729728866365, "train_min_lr": 0.00018108729728866365, "train_loss": 0.17979056989320386, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02330455471140643, "epoch": 1355, "n_parameters": 86059856} {"train_lr": 0.00017971773076682078, "train_min_lr": 0.00017971773076682078, "train_loss": 0.17976622300174755, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023656804693671755, "epoch": 1356, "n_parameters": 86059856} {"train_lr": 0.0001783533380939009, "train_min_lr": 0.0001783533380939009, "train_loss": 0.17974392043498272, "train_loss_scale": 170981.7435897436, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023989420539389055, "epoch": 1357, "n_parameters": 86059856} {"train_lr": 0.00017699412459643834, "train_min_lr": 0.00017699412459643834, "train_loss": 0.17976237753501687, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02356960062080851, "epoch": 1358, "n_parameters": 86059856} {"train_lr": 0.00017564009558074763, "train_min_lr": 0.00017564009558074763, "train_loss": 0.17970001219938964, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02321364765222638, "epoch": 1359, "n_parameters": 86059856} {"train_lr": 0.00017294761211872326, "train_min_lr": 0.00017294761211872326, "train_loss": 0.17970905983104155, "train_loss_scale": 142414.76923076922, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1361, "n_parameters": 86059856} {"train_lr": 0.00017160916818373685, "train_min_lr": 0.00017160916818373685, "train_loss": 0.17969048206801885, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024334214926243592, "epoch": 1362, "n_parameters": 86059856} {"train_lr": 0.00017027592975317707, "train_min_lr": 0.00017027592975317707, "train_loss": 0.17965637407719326, "train_loss_scale": 126660.92307692308, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1363, "n_parameters": 86059856} {"train_lr": 0.0001689479020319532, "train_min_lr": 0.0001689479020319532, "train_loss": 0.1796414983452847, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02412608071254232, "epoch": 1364, "n_parameters": 86059856} {"train_lr": 0.0001676250902046324, "train_min_lr": 0.0001676250902046324, "train_loss": 0.17961982041537666, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024182793015661914, "epoch": 1365, "n_parameters": 86059856} {"train_lr": 0.00016630749943541908, "train_min_lr": 0.00016630749943541908, "train_loss": 0.1796033915866596, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02413100917608692, "epoch": 1366, "n_parameters": 86059856} {"train_lr": 0.00016499513486813497, "train_min_lr": 0.00016499513486813497, "train_loss": 0.17961902047495526, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02463426038575096, "epoch": 1367, "n_parameters": 86059856} {"train_lr": 0.00016368800162619838, "train_min_lr": 0.00016368800162619838, "train_loss": 0.17956996048526028, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023838203800364565, "epoch": 1368, "n_parameters": 86059856} {"train_lr": 0.0001623861048126056, "train_min_lr": 0.0001623861048126056, "train_loss": 0.17958184079123804, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023415132180954784, "epoch": 1369, "n_parameters": 86059856} {"train_lr": 0.0001610894495099096, "train_min_lr": 0.0001610894495099096, "train_loss": 0.1795297734146842, "train_loss_scale": 108596.51282051283, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023361456831200764, "epoch": 1370, "n_parameters": 86059856} {"train_lr": 0.00015979804078020056, "train_min_lr": 0.00015979804078020056, "train_loss": 0.17953766668692994, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024149373675195072, "epoch": 1371, "n_parameters": 86059856} {"train_lr": 0.00015851188366508654, "train_min_lr": 0.00015851188366508654, "train_loss": 0.17952862435963768, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024190003965766385, "epoch": 1372, "n_parameters": 86059856} {"train_lr": 0.00015723098318567354, "train_min_lr": 0.00015723098318567354, "train_loss": 0.17951877902632055, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02368438416399444, "epoch": 1373, "n_parameters": 86059856} {"train_lr": 0.00015595534434254574, "train_min_lr": 0.00015595534434254574, "train_loss": 0.17948707625962412, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025074457803454537, "epoch": 1374, "n_parameters": 86059856} {"train_lr": 0.000154684972115746, "train_min_lr": 0.000154684972115746, "train_loss": 0.17947068163611662, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02491613389709248, "epoch": 1375, "n_parameters": 86059856} {"train_lr": 0.00015341987146475668, "train_min_lr": 0.00015341987146475668, "train_loss": 0.17947446870414588, "train_loss_scale": 163419.89743589744, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023969841756834052, "epoch": 1376, "n_parameters": 86059856} {"train_lr": 0.00015216004732847995, "train_min_lr": 0.00015216004732847995, "train_loss": 0.1794018864315242, "train_loss_scale": 152497.23076923078, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1377, "n_parameters": 86059856} {"train_lr": 0.00015090550462521928, "train_min_lr": 0.00015090550462521928, "train_loss": 0.17940628435761213, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024617479260390002, "epoch": 1378, "n_parameters": 86059856} {"train_lr": 0.00014965624825265868, "train_min_lr": 0.00014965624825265868, "train_loss": 0.17938886743743354, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024893604995061953, "epoch": 1379, "n_parameters": 86059856} {"train_lr": 0.00014841228308784527, "train_min_lr": 0.00014841228308784527, "train_loss": 0.1793341534301782, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023790175507131677, "epoch": 1380, "n_parameters": 86059856} {"train_lr": 0.00014717361398716892, "train_min_lr": 0.00014717361398716892, "train_loss": 0.1794011148606212, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02351031752112202, "epoch": 1381, "n_parameters": 86059856} {"train_lr": 0.00014594024578634438, "train_min_lr": 0.00014594024578634438, "train_loss": 0.1793696166893754, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02504685738434394, "epoch": 1382, "n_parameters": 86059856} {"train_lr": 0.0001447121833003921, "train_min_lr": 0.0001447121833003921, "train_loss": 0.17932892654938862, "train_loss_scale": 186945.64102564103, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02603248672750898, "epoch": 1383, "n_parameters": 86059856} {"train_lr": 0.00014348943132361824, "train_min_lr": 0.00014348943132361824, "train_loss": 0.17928935388903108, "train_loss_scale": 134012.71794871794, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1384, "n_parameters": 86059856} {"train_lr": 0.00014227199462959794, "train_min_lr": 0.00014227199462959794, "train_loss": 0.17929415253755182, "train_loss_scale": 90532.10256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1385, "n_parameters": 86059856} {"train_lr": 0.00014105987797115546, "train_min_lr": 0.00014105987797115546, "train_loss": 0.17932052499590775, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024258286190720704, "epoch": 1386, "n_parameters": 86059856} {"train_lr": 0.00013985308608034525, "train_min_lr": 0.00013985308608034525, "train_loss": 0.1792523096082732, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025656021128480252, "epoch": 1387, "n_parameters": 86059856} {"train_lr": 0.00013865162366843504, "train_min_lr": 0.00013865162366843504, "train_loss": 0.17924385225006306, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024392466580208678, "epoch": 1388, "n_parameters": 86059856} {"train_lr": 0.0001374554954258855, "train_min_lr": 0.0001374554954258855, "train_loss": 0.17922136754107973, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024369861303757016, "epoch": 1389, "n_parameters": 86059856} {"train_lr": 0.00013626470602233357, "train_min_lr": 0.00013626470602233357, "train_loss": 0.17919078161522078, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025362908852119476, "epoch": 1390, "n_parameters": 86059856} {"train_lr": 0.00013507926010657354, "train_min_lr": 0.00013507926010657354, "train_loss": 0.17921774374703184, "train_loss_scale": 79189.33333333333, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024925335967101347, "epoch": 1391, "n_parameters": 86059856} {"train_lr": 0.00013389916230653877, "train_min_lr": 0.00013389916230653877, "train_loss": 0.1791553187321346, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02466773188028198, "epoch": 1392, "n_parameters": 86059856} {"train_lr": 0.00013272441722928392, "train_min_lr": 0.00013272441722928392, "train_loss": 0.17917513388555306, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02497731620230927, "epoch": 1393, "n_parameters": 86059856} {"train_lr": 0.00013155502946096624, "train_min_lr": 0.00013155502946096624, "train_loss": 0.17915321717289492, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02608709808630057, "epoch": 1394, "n_parameters": 86059856} {"train_lr": 0.0001303910035668295, "train_min_lr": 0.0001303910035668295, "train_loss": 0.17912004836118564, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024922760639291924, "epoch": 1395, "n_parameters": 86059856} {"train_lr": 0.00012923234409118378, "train_min_lr": 0.00012923234409118378, "train_loss": 0.17911988365141532, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025300912051581036, "epoch": 1396, "n_parameters": 86059856} {"train_lr": 0.00012807905555738917, "train_min_lr": 0.00012807905555738917, "train_loss": 0.17909874688559332, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025473870993711244, "epoch": 1397, "n_parameters": 86059856} {"train_lr": 0.0001269311424678383, "train_min_lr": 0.0001269311424678383, "train_loss": 0.17910376219795301, "train_loss_scale": 135693.12820512822, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1398, "n_parameters": 86059856} {"train_lr": 0.00012578860930393768, "train_min_lr": 0.00012578860930393768, "train_loss": 0.17906915090488604, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025971547992995534, "epoch": 1399, "n_parameters": 86059856} {"train_lr": 0.00012465146052609096, "train_min_lr": 0.00012465146052609096, "train_loss": 0.17904227730841973, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025240081410186414, "epoch": 1400, "n_parameters": 86059856} {"train_lr": 0.0001235197005736816, "train_min_lr": 0.0001235197005736816, "train_loss": 0.1789971616698238, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024703001400503594, "epoch": 1401, "n_parameters": 86059856} {"train_lr": 0.00012239333386505511, "train_min_lr": 0.00012239333386505511, "train_loss": 0.17903068150954846, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02529076023552662, "epoch": 1402, "n_parameters": 86059856} {"train_lr": 0.00012127236479750209, "train_min_lr": 0.00012127236479750209, "train_loss": 0.17898347049067992, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02479254352884033, "epoch": 1403, "n_parameters": 86059856} {"train_lr": 0.00012015679774724091, "train_min_lr": 0.00012015679774724091, "train_loss": 0.17893921801498017, "train_loss_scale": 99144.20512820513, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1404, "n_parameters": 86059856} {"train_lr": 0.00011904663706940038, "train_min_lr": 0.00011904663706940038, "train_loss": 0.17895159086224455, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02571620169477776, "epoch": 1405, "n_parameters": 86059856} {"train_lr": 0.00011794188709800375, "train_min_lr": 0.00011794188709800375, "train_loss": 0.1789501366796545, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025373617461763132, "epoch": 1406, "n_parameters": 86059856} {"train_lr": 0.0001168425521459504, "train_min_lr": 0.0001168425521459504, "train_loss": 0.17893445690890822, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025676225347874258, "epoch": 1407, "n_parameters": 86059856} {"train_lr": 0.000115748636505, "train_min_lr": 0.000115748636505, "train_loss": 0.1789100558664172, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02522854026980125, "epoch": 1408, "n_parameters": 86059856} {"train_lr": 0.00011466014444575561, "train_min_lr": 0.00011466014444575561, "train_loss": 0.178893762273499, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02601807365098443, "epoch": 1409, "n_parameters": 86059856} {"train_lr": 0.00011357708021764657, "train_min_lr": 0.00011357708021764657, "train_loss": 0.17888243520298067, "train_loss_scale": 70577.23076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026113895126260243, "epoch": 1410, "n_parameters": 86059856} {"train_lr": 0.00011249944804891208, "train_min_lr": 0.00011249944804891208, "train_loss": 0.17888935009101167, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025542394192411732, "epoch": 1411, "n_parameters": 86059856} {"train_lr": 0.00011142725214658523, "train_min_lr": 0.00011142725214658523, "train_loss": 0.1788547175894611, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027336830052379996, "epoch": 1412, "n_parameters": 86059856} {"train_lr": 0.00011036049669647565, "train_min_lr": 0.00011036049669647565, "train_loss": 0.1787813050338091, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024664585436813723, "epoch": 1413, "n_parameters": 86059856} {"train_lr": 0.0001092991858631544, "train_min_lr": 0.0001092991858631544, "train_loss": 0.17879852926872003, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026692987676375568, "epoch": 1414, "n_parameters": 86059856} {"train_lr": 0.00010824332378993593, "train_min_lr": 0.00010824332378993593, "train_loss": 0.1787711033054317, "train_loss_scale": 68056.61538461539, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1415, "n_parameters": 86059856} {"train_lr": 0.00010719291459886363, "train_min_lr": 0.00010719291459886363, "train_loss": 0.1787695698081874, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0262534748762846, "epoch": 1416, "n_parameters": 86059856} {"train_lr": 0.0001061479623906931, "train_min_lr": 0.0001061479623906931, "train_loss": 0.17877468023485002, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0261534599479861, "epoch": 1417, "n_parameters": 86059856} {"train_lr": 0.0001051084712448757, "train_min_lr": 0.0001051084712448757, "train_loss": 0.17875474679152456, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026817122116111793, "epoch": 1418, "n_parameters": 86059856} {"train_lr": 0.00010407444521954368, "train_min_lr": 0.00010407444521954368, "train_loss": 0.17875268147327006, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025486155526521496, "epoch": 1419, "n_parameters": 86059856} {"train_lr": 0.00010304588835149287, "train_min_lr": 0.00010304588835149287, "train_loss": 0.17871860014453816, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025997093460785273, "epoch": 1420, "n_parameters": 86059856} {"train_lr": 0.00010202280465616816, "train_min_lr": 0.00010202280465616816, "train_loss": 0.17871561082294926, "train_loss_scale": 101664.82051282052, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02671665700080876, "epoch": 1421, "n_parameters": 86059856} {"train_lr": 0.00010100519812764733, "train_min_lr": 0.00010100519812764733, "train_loss": 0.17867379371100703, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026117989799389854, "epoch": 1422, "n_parameters": 86059856} {"train_lr": 9.999307273862563e-05, "train_min_lr": 9.999307273862563e-05, "train_loss": 0.17868846148634568, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026286675952948056, "epoch": 1423, "n_parameters": 86059856} {"train_lr": 9.898643244039997e-05, "train_min_lr": 9.898643244039997e-05, "train_loss": 0.1786461284664722, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025662464238942053, "epoch": 1424, "n_parameters": 86059856} {"train_lr": 9.79852811628539e-05, "train_min_lr": 9.79852811628539e-05, "train_loss": 0.1786412970515159, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025817195299821787, "epoch": 1425, "n_parameters": 86059856} {"train_lr": 9.698962281444164e-05, "train_min_lr": 9.698962281444164e-05, "train_loss": 0.17861896499286956, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025860928112449937, "epoch": 1426, "n_parameters": 86059856} {"train_lr": 9.599946128217389e-05, "train_min_lr": 9.599946128217389e-05, "train_loss": 0.17860183520361972, "train_loss_scale": 131492.10256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1427, "n_parameters": 86059856} {"train_lr": 9.501480043160137e-05, "train_min_lr": 9.501480043160137e-05, "train_loss": 0.1785678474489265, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026078925038186405, "epoch": 1428, "n_parameters": 86059856} {"train_lr": 9.403564410680083e-05, "train_min_lr": 9.403564410680083e-05, "train_loss": 0.17857472961231208, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02778318474212518, "epoch": 1429, "n_parameters": 86059856} {"train_lr": 9.306199613035916e-05, "train_min_lr": 9.306199613035916e-05, "train_loss": 0.17858962420648775, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02630045916288136, "epoch": 1430, "n_parameters": 86059856} {"train_lr": 9.209386030335916e-05, "train_min_lr": 9.209386030335916e-05, "train_loss": 0.1785115512768523, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025627640798353613, "epoch": 1431, "n_parameters": 86059856} {"train_lr": 9.113124040536432e-05, "train_min_lr": 9.113124040536432e-05, "train_loss": 0.17852575408641058, "train_loss_scale": 76878.76923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1432, "n_parameters": 86059856} {"train_lr": 9.01741401944042e-05, "train_min_lr": 9.01741401944042e-05, "train_loss": 0.17851212216863552, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026882514793378037, "epoch": 1433, "n_parameters": 86059856} {"train_lr": 8.922256340695968e-05, "train_min_lr": 8.922256340695968e-05, "train_loss": 0.17850798471735266, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025770664937459886, "epoch": 1434, "n_parameters": 86059856} {"train_lr": 8.82765137579486e-05, "train_min_lr": 8.82765137579486e-05, "train_loss": 0.1784553518339705, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026691082841119707, "epoch": 1435, "n_parameters": 86059856} {"train_lr": 8.733599494071077e-05, "train_min_lr": 8.733599494071077e-05, "train_loss": 0.17847085988316208, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02631393223045728, "epoch": 1436, "n_parameters": 86059856} {"train_lr": 8.640101062699404e-05, "train_min_lr": 8.640101062699404e-05, "train_loss": 0.1784496151466066, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026003297107915085, "epoch": 1437, "n_parameters": 86059856} {"train_lr": 8.547156446693963e-05, "train_min_lr": 8.547156446693963e-05, "train_loss": 0.17842071036844012, "train_loss_scale": 92842.66666666667, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02577475697781222, "epoch": 1438, "n_parameters": 86059856} {"train_lr": 8.454766008906833e-05, "train_min_lr": 8.454766008906833e-05, "train_loss": 0.17839640683124366, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027013087883973733, "epoch": 1439, "n_parameters": 86059856} {"train_lr": 8.362930110026567e-05, "train_min_lr": 8.362930110026567e-05, "train_loss": 0.1783953883822482, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025874215273711927, "epoch": 1440, "n_parameters": 86059856} {"train_lr": 8.271649108576848e-05, "train_min_lr": 8.271649108576848e-05, "train_loss": 0.17838597210804716, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025973716541790426, "epoch": 1441, "n_parameters": 86059856} {"train_lr": 8.180923360915051e-05, "train_min_lr": 8.180923360915051e-05, "train_loss": 0.1783704889448694, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027267748119834907, "epoch": 1442, "n_parameters": 86059856} {"train_lr": 8.090753221230857e-05, "train_min_lr": 8.090753221230857e-05, "train_loss": 0.178332194697876, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02627022237254259, "epoch": 1443, "n_parameters": 86059856} {"train_lr": 8.00113904154489e-05, "train_min_lr": 8.00113904154489e-05, "train_loss": 0.17835647035211039, "train_loss_scale": 125610.66666666667, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1444, "n_parameters": 86059856} {"train_lr": 7.912081171707306e-05, "train_min_lr": 7.912081171707306e-05, "train_loss": 0.1782818337281545, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026090319310195554, "epoch": 1445, "n_parameters": 86059856} {"train_lr": 7.82357995939648e-05, "train_min_lr": 7.82357995939648e-05, "train_loss": 0.1782990710851617, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027584934016116537, "epoch": 1446, "n_parameters": 86059856} {"train_lr": 7.735635750117588e-05, "train_min_lr": 7.735635750117588e-05, "train_loss": 0.17832799314951095, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027204252463072922, "epoch": 1447, "n_parameters": 86059856} {"train_lr": 7.648248887201305e-05, "train_min_lr": 7.648248887201305e-05, "train_loss": 0.17826538260142583, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02757288484523694, "epoch": 1448, "n_parameters": 86059856} {"train_lr": 7.561419711802458e-05, "train_min_lr": 7.561419711802458e-05, "train_loss": 0.17826621393517902, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026250698442499224, "epoch": 1449, "n_parameters": 86059856} {"train_lr": 7.47514856289866e-05, "train_min_lr": 7.47514856289866e-05, "train_loss": 0.17825937383354473, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026317352518582575, "epoch": 1450, "n_parameters": 86059856} {"train_lr": 7.389435777289031e-05, "train_min_lr": 7.389435777289031e-05, "train_loss": 0.17823949962173805, "train_loss_scale": 109646.76923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027209508835744016, "epoch": 1451, "n_parameters": 86059856} {"train_lr": 7.304281689592842e-05, "train_min_lr": 7.304281689592842e-05, "train_loss": 0.17822543754934883, "train_loss_scale": 106075.89743589744, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1452, "n_parameters": 86059856} {"train_lr": 7.219686632248242e-05, "train_min_lr": 7.219686632248242e-05, "train_loss": 0.17821562606602526, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02644375977345193, "epoch": 1453, "n_parameters": 86059856} {"train_lr": 7.13565093551097e-05, "train_min_lr": 7.13565093551097e-05, "train_loss": 0.17817313869137508, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02722067645010658, "epoch": 1454, "n_parameters": 86059856} {"train_lr": 7.052174927452995e-05, "train_min_lr": 7.052174927452995e-05, "train_loss": 0.17817213546293667, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026854980265339598, "epoch": 1455, "n_parameters": 86059856} {"train_lr": 6.969258933961333e-05, "train_min_lr": 6.969258933961333e-05, "train_loss": 0.17813660862604871, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028069758334029943, "epoch": 1456, "n_parameters": 86059856} {"train_lr": 6.886903278736681e-05, "train_min_lr": 6.886903278736681e-05, "train_loss": 0.17816413876911005, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026467622777160544, "epoch": 1457, "n_parameters": 86059856} {"train_lr": 6.805108283292237e-05, "train_min_lr": 6.805108283292237e-05, "train_loss": 0.17813301242243212, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027449159112830575, "epoch": 1458, "n_parameters": 86059856} {"train_lr": 6.723874266952386e-05, "train_min_lr": 6.723874266952386e-05, "train_loss": 0.17812498841876498, "train_loss_scale": 129181.53846153847, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02692083703974883, "epoch": 1459, "n_parameters": 86059856} {"train_lr": 6.643201546851466e-05, "train_min_lr": 6.643201546851466e-05, "train_loss": 0.17810994445179135, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026900605459769186, "epoch": 1460, "n_parameters": 86059856} {"train_lr": 6.563090437932561e-05, "train_min_lr": 6.563090437932561e-05, "train_loss": 0.1780728789535948, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026685535543574355, "epoch": 1461, "n_parameters": 86059856} {"train_lr": 6.483541252946215e-05, "train_min_lr": 6.483541252946215e-05, "train_loss": 0.17806742553455898, "train_loss_scale": 91792.41025641025, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1462, "n_parameters": 86059856} {"train_lr": 6.40455430244928e-05, "train_min_lr": 6.40455430244928e-05, "train_loss": 0.17805266747466073, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02723633294375852, "epoch": 1463, "n_parameters": 86059856} {"train_lr": 6.32612989480364e-05, "train_min_lr": 6.32612989480364e-05, "train_loss": 0.17802711978602487, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027335967504395507, "epoch": 1464, "n_parameters": 86059856} {"train_lr": 6.248268336175046e-05, "train_min_lr": 6.248268336175046e-05, "train_loss": 0.1780429974061031, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02725467073898285, "epoch": 1465, "n_parameters": 86059856} {"train_lr": 6.170969930531892e-05, "train_min_lr": 6.170969930531892e-05, "train_loss": 0.1780165989931004, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027390544834093023, "epoch": 1466, "n_parameters": 86059856} {"train_lr": 6.0942349796440837e-05, "train_min_lr": 6.0942349796440837e-05, "train_loss": 0.17800836574930984, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027735535562850345, "epoch": 1467, "n_parameters": 86059856} {"train_lr": 6.0180637830817734e-05, "train_min_lr": 6.0180637830817734e-05, "train_loss": 0.17796690622643113, "train_loss_scale": 77929.02564102564, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028435654806880616, "epoch": 1468, "n_parameters": 86059856} {"train_lr": 5.942456638214276e-05, "train_min_lr": 5.942456638214276e-05, "train_loss": 0.17796265384636056, "train_loss_scale": 129811.69230769231, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1469, "n_parameters": 86059856} {"train_lr": 5.867413840208859e-05, "train_min_lr": 5.867413840208859e-05, "train_loss": 0.17797384498258814, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026920980964906704, "epoch": 1470, "n_parameters": 86059856} {"train_lr": 5.7929356820295953e-05, "train_min_lr": 5.7929356820295953e-05, "train_loss": 0.17793177760158402, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027329297706437033, "epoch": 1471, "n_parameters": 86059856} {"train_lr": 5.719022454436236e-05, "train_min_lr": 5.719022454436236e-05, "train_loss": 0.17791651163059166, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027594824596188772, "epoch": 1472, "n_parameters": 86059856} {"train_lr": 5.645674445983068e-05, "train_min_lr": 5.645674445983068e-05, "train_loss": 0.1779165206453166, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027303888181893107, "epoch": 1473, "n_parameters": 86059856} {"train_lr": 5.572891943017771e-05, "train_min_lr": 5.572891943017771e-05, "train_loss": 0.1779107921398603, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0264248198722131, "epoch": 1474, "n_parameters": 86059856} {"train_lr": 5.500675229680325e-05, "train_min_lr": 5.500675229680325e-05, "train_loss": 0.17786180498436666, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02661536268603343, "epoch": 1475, "n_parameters": 86059856} {"train_lr": 5.42902458790189e-05, "train_min_lr": 5.42902458790189e-05, "train_loss": 0.17788953736472207, "train_loss_scale": 105445.7435897436, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026281881665524382, "epoch": 1476, "n_parameters": 86059856} {"train_lr": 5.357940297403706e-05, "train_min_lr": 5.357940297403706e-05, "train_loss": 0.17786920162884948, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026783200392786127, "epoch": 1477, "n_parameters": 86059856} {"train_lr": 5.287422635695986e-05, "train_min_lr": 5.287422635695986e-05, "train_loss": 0.17784036004032272, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02907135184758749, "epoch": 1478, "n_parameters": 86059856} {"train_lr": 5.217471878076868e-05, "train_min_lr": 5.217471878076868e-05, "train_loss": 0.17781099829022798, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027891503372349035, "epoch": 1479, "n_parameters": 86059856} {"train_lr": 5.148088297631303e-05, "train_min_lr": 5.148088297631303e-05, "train_loss": 0.17784756808899915, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02711120128440551, "epoch": 1480, "n_parameters": 86059856} {"train_lr": 5.079272165230025e-05, "train_min_lr": 5.079272165230025e-05, "train_loss": 0.1778257955970147, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02660751550530012, "epoch": 1481, "n_parameters": 86059856} {"train_lr": 5.0110237495284404e-05, "train_min_lr": 5.0110237495284404e-05, "train_loss": 0.17778512730131832, "train_loss_scale": 134012.71794871794, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1482, "n_parameters": 86059856} {"train_lr": 4.943343316965651e-05, "train_min_lr": 4.943343316965651e-05, "train_loss": 0.17777019028122035, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02803819400903124, "epoch": 1483, "n_parameters": 86059856} {"train_lr": 4.8762311317633326e-05, "train_min_lr": 4.8762311317633326e-05, "train_loss": 0.17777003264293456, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02740421964046665, "epoch": 1484, "n_parameters": 86059856} {"train_lr": 4.809687455924794e-05, "train_min_lr": 4.809687455924794e-05, "train_loss": 0.17773308251829198, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028184038754075002, "epoch": 1485, "n_parameters": 86059856} {"train_lr": 4.743712549233872e-05, "train_min_lr": 4.743712549233872e-05, "train_loss": 0.17777182661199895, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027450515494610254, "epoch": 1486, "n_parameters": 86059856} {"train_lr": 4.678306669253953e-05, "train_min_lr": 4.678306669253953e-05, "train_loss": 0.1777276884800253, "train_loss_scale": 130021.7435897436, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1487, "n_parameters": 86059856} {"train_lr": 4.6134700713269854e-05, "train_min_lr": 4.6134700713269854e-05, "train_loss": 0.177717117813583, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026981823432904024, "epoch": 1488, "n_parameters": 86059856} {"train_lr": 4.549203008572446e-05, "train_min_lr": 4.549203008572446e-05, "train_loss": 0.1777494556706351, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0278884943944808, "epoch": 1489, "n_parameters": 86059856} {"train_lr": 4.485505731886384e-05, "train_min_lr": 4.485505731886384e-05, "train_loss": 0.1776864881364581, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02733797085686372, "epoch": 1490, "n_parameters": 86059856} {"train_lr": 4.4223784899403976e-05, "train_min_lr": 4.4223784899403976e-05, "train_loss": 0.17766563232558277, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02770693883753549, "epoch": 1491, "n_parameters": 86059856} {"train_lr": 4.3598215291807275e-05, "train_min_lr": 4.3598215291807275e-05, "train_loss": 0.1776655404141937, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02700084765465596, "epoch": 1492, "n_parameters": 86059856} {"train_lr": 4.2978350938272475e-05, "train_min_lr": 4.2978350938272475e-05, "train_loss": 0.17767662746318352, "train_loss_scale": 36023.794871794875, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1493, "n_parameters": 86059856} {"train_lr": 4.23641942587251e-05, "train_min_lr": 4.23641942587251e-05, "train_loss": 0.17765517346859455, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02820186201745692, "epoch": 1494, "n_parameters": 86059856} {"train_lr": 4.175574765080827e-05, "train_min_lr": 4.175574765080827e-05, "train_loss": 0.1776435338318921, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027923068151068993, "epoch": 1495, "n_parameters": 86059856} {"train_lr": 4.1153013489873176e-05, "train_min_lr": 4.1153013489873176e-05, "train_loss": 0.17765260910173544, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02927287616647589, "epoch": 1496, "n_parameters": 86059856} {"train_lr": 4.055599412896989e-05, "train_min_lr": 4.055599412896989e-05, "train_loss": 0.177624715378699, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02769807148247193, "epoch": 1497, "n_parameters": 86059856} {"train_lr": 3.9964691898838054e-05, "train_min_lr": 3.9964691898838054e-05, "train_loss": 0.17762317323985582, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026687464605157193, "epoch": 1498, "n_parameters": 86059856} {"train_lr": 3.937910910789793e-05, "train_min_lr": 3.937910910789793e-05, "train_loss": 0.17758217993646097, "train_loss_scale": 48836.92307692308, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02751251428316419, "epoch": 1499, "n_parameters": 86059856} {"train_lr": 3.8799248042241287e-05, "train_min_lr": 3.8799248042241287e-05, "train_loss": 0.17757999033141786, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028516191971273377, "epoch": 1500, "n_parameters": 86059856} {"train_lr": 3.822511096562247e-05, "train_min_lr": 3.822511096562247e-05, "train_loss": 0.17755203736193764, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02791956330362994, "epoch": 1501, "n_parameters": 86059856} {"train_lr": 3.7656700119449665e-05, "train_min_lr": 3.7656700119449665e-05, "train_loss": 0.17756447102277514, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0270675306315892, "epoch": 1502, "n_parameters": 86059856} {"train_lr": 3.709401772277616e-05, "train_min_lr": 3.709401772277616e-05, "train_loss": 0.17754549108660564, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027647719611055575, "epoch": 1503, "n_parameters": 86059856} {"train_lr": 3.6537065972291316e-05, "train_min_lr": 3.6537065972291316e-05, "train_loss": 0.17751512540659556, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02786923376007531, "epoch": 1504, "n_parameters": 86059856} {"train_lr": 3.598584704231254e-05, "train_min_lr": 3.598584704231254e-05, "train_loss": 0.1775246227476507, "train_loss_scale": 70787.28205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027352660518282883, "epoch": 1505, "n_parameters": 86059856} {"train_lr": 3.544036308477659e-05, "train_min_lr": 3.544036308477659e-05, "train_loss": 0.17749759819633207, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02710786012884898, "epoch": 1506, "n_parameters": 86059856} {"train_lr": 3.4900616229230846e-05, "train_min_lr": 3.4900616229230846e-05, "train_loss": 0.17747415096248284, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02694242900141921, "epoch": 1507, "n_parameters": 86059856} {"train_lr": 3.436660858282553e-05, "train_min_lr": 3.436660858282553e-05, "train_loss": 0.17749050962858093, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02805270059392429, "epoch": 1508, "n_parameters": 86059856} {"train_lr": 3.383834223030501e-05, "train_min_lr": 3.383834223030501e-05, "train_loss": 0.1774547894168884, "train_loss_scale": 84650.66666666667, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1509, "n_parameters": 86059856} {"train_lr": 3.331581923400004e-05, "train_min_lr": 3.331581923400004e-05, "train_loss": 0.1774396367627793, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027386195671099883, "epoch": 1510, "n_parameters": 86059856} {"train_lr": 3.2799041633819495e-05, "train_min_lr": 3.2799041633819495e-05, "train_loss": 0.17740307439667866, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027512296407411877, "epoch": 1511, "n_parameters": 86059856} {"train_lr": 3.228801144724241e-05, "train_min_lr": 3.228801144724241e-05, "train_loss": 0.1774336755848848, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027115395137419302, "epoch": 1512, "n_parameters": 86059856} {"train_lr": 3.178273066931021e-05, "train_min_lr": 3.178273066931021e-05, "train_loss": 0.1774503361696425, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027438244824178327, "epoch": 1513, "n_parameters": 86059856} {"train_lr": 3.12832012726187e-05, "train_min_lr": 3.12832012726187e-05, "train_loss": 0.17741743441468152, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02900087074018442, "epoch": 1514, "n_parameters": 86059856} {"train_lr": 3.078942520731082e-05, "train_min_lr": 3.078942520731082e-05, "train_loss": 0.17740618948263523, "train_loss_scale": 74148.10256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1515, "n_parameters": 86059856} {"train_lr": 3.030140440106846e-05, "train_min_lr": 3.030140440106846e-05, "train_loss": 0.17740035399448317, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028511209425349265, "epoch": 1516, "n_parameters": 86059856} {"train_lr": 2.981914075910532e-05, "train_min_lr": 2.981914075910532e-05, "train_loss": 0.17738886335446763, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028371436348280463, "epoch": 1517, "n_parameters": 86059856} {"train_lr": 2.93426361641594e-05, "train_min_lr": 2.93426361641594e-05, "train_loss": 0.17738384080644792, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027661695085370388, "epoch": 1518, "n_parameters": 86059856} {"train_lr": 2.8871892476485508e-05, "train_min_lr": 2.8871892476485508e-05, "train_loss": 0.1773534894395524, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02781679526043053, "epoch": 1519, "n_parameters": 86059856} {"train_lr": 2.8406911533848164e-05, "train_min_lr": 2.8406911533848164e-05, "train_loss": 0.17736193675320977, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027994448898169093, "epoch": 1520, "n_parameters": 86059856} {"train_lr": 2.794769515151437e-05, "train_min_lr": 2.794769515151437e-05, "train_loss": 0.17732711032761309, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027978953189001635, "epoch": 1521, "n_parameters": 86059856} {"train_lr": 2.7494245122246477e-05, "train_min_lr": 2.7494245122246477e-05, "train_loss": 0.1773424723949761, "train_loss_scale": 115108.10256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02793472801717237, "epoch": 1522, "n_parameters": 86059856} {"train_lr": 2.7046563216295282e-05, "train_min_lr": 2.7046563216295282e-05, "train_loss": 0.1773083847380267, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027271578297353327, "epoch": 1523, "n_parameters": 86059856} {"train_lr": 2.660465118139297e-05, "train_min_lr": 2.660465118139297e-05, "train_loss": 0.17728295841385636, "train_loss_scale": 76668.71794871795, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1524, "n_parameters": 86059856} {"train_lr": 2.6168510742746464e-05, "train_min_lr": 2.6168510742746464e-05, "train_loss": 0.17730411799409643, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027236356829794552, "epoch": 1525, "n_parameters": 86059856} {"train_lr": 2.573814360303059e-05, "train_min_lr": 2.573814360303059e-05, "train_loss": 0.17730953488260126, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027032267421675034, "epoch": 1526, "n_parameters": 86059856} {"train_lr": 2.5313551442381402e-05, "train_min_lr": 2.5313551442381402e-05, "train_loss": 0.1772828356971821, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02814968493886483, "epoch": 1527, "n_parameters": 86059856} {"train_lr": 2.489473591838974e-05, "train_min_lr": 2.489473591838974e-05, "train_loss": 0.17728620532971734, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027582717378838703, "epoch": 1528, "n_parameters": 86059856} {"train_lr": 2.4481698666094585e-05, "train_min_lr": 2.4481698666094585e-05, "train_loss": 0.1772528664322379, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027846650978645835, "epoch": 1529, "n_parameters": 86059856} {"train_lr": 2.4074441297976873e-05, "train_min_lr": 2.4074441297976873e-05, "train_loss": 0.17728505354935828, "train_loss_scale": 93052.71794871795, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027279328232487805, "epoch": 1530, "n_parameters": 86059856} {"train_lr": 2.3672965403953075e-05, "train_min_lr": 2.3672965403953075e-05, "train_loss": 0.17725040530188915, "train_loss_scale": 70997.33333333333, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1531, "n_parameters": 86059856} {"train_lr": 2.327727255136899e-05, "train_min_lr": 2.327727255136899e-05, "train_loss": 0.17726215947037324, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026874973038689066, "epoch": 1532, "n_parameters": 86059856} {"train_lr": 2.2887364284993653e-05, "train_min_lr": 2.2887364284993653e-05, "train_loss": 0.17724702975306755, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028212128314547814, "epoch": 1533, "n_parameters": 86059856} {"train_lr": 2.2503242127013326e-05, "train_min_lr": 2.2503242127013326e-05, "train_loss": 0.17721076796237284, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027320200637078438, "epoch": 1534, "n_parameters": 86059856} {"train_lr": 2.2124907577025608e-05, "train_min_lr": 2.2124907577025608e-05, "train_loss": 0.17721324868930075, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02684170355160649, "epoch": 1535, "n_parameters": 86059856} {"train_lr": 2.175236211203337e-05, "train_min_lr": 2.175236211203337e-05, "train_loss": 0.17718591737763909, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028736004916330177, "epoch": 1536, "n_parameters": 86059856} {"train_lr": 2.13856071864392e-05, "train_min_lr": 2.13856071864392e-05, "train_loss": 0.17715169921612892, "train_loss_scale": 94943.17948717948, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1537, "n_parameters": 86059856} {"train_lr": 2.1024644232039657e-05, "train_min_lr": 2.1024644232039657e-05, "train_loss": 0.1772150471299075, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027429010910101425, "epoch": 1538, "n_parameters": 86059856} {"train_lr": 2.0669474658019664e-05, "train_min_lr": 2.0669474658019664e-05, "train_loss": 0.17720053978383732, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027452233826550536, "epoch": 1539, "n_parameters": 86059856} {"train_lr": 2.032009985094699e-05, "train_min_lr": 2.032009985094699e-05, "train_loss": 0.1771762476583274, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02849484691157555, "epoch": 1540, "n_parameters": 86059856} {"train_lr": 1.9976521174766968e-05, "train_min_lr": 1.9976521174766968e-05, "train_loss": 0.1771609249841183, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02875298772675869, "epoch": 1541, "n_parameters": 86059856} {"train_lr": 1.963873997079691e-05, "train_min_lr": 1.963873997079691e-05, "train_loss": 0.17717870469921485, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02772394331315389, "epoch": 1542, "n_parameters": 86059856} {"train_lr": 1.930675755772116e-05, "train_min_lr": 1.930675755772116e-05, "train_loss": 0.17715892389130133, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02769445367038059, "epoch": 1543, "n_parameters": 86059856} {"train_lr": 1.8980575231585747e-05, "train_min_lr": 1.8980575231585747e-05, "train_loss": 0.17712548388860738, "train_loss_scale": 107966.35897435897, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028517630082578994, "epoch": 1544, "n_parameters": 86059856} {"train_lr": 1.8660194265793465e-05, "train_min_lr": 1.8660194265793465e-05, "train_loss": 0.17715904895419207, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027768883066108592, "epoch": 1545, "n_parameters": 86059856} {"train_lr": 1.8345615911098684e-05, "train_min_lr": 1.8345615911098684e-05, "train_loss": 0.17709454198857436, "train_loss_scale": 71837.53846153847, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1546, "n_parameters": 86059856} {"train_lr": 1.803684139560281e-05, "train_min_lr": 1.803684139560281e-05, "train_loss": 0.17711829682752395, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027847167808944598, "epoch": 1547, "n_parameters": 86059856} {"train_lr": 1.773387192474912e-05, "train_min_lr": 1.773387192474912e-05, "train_loss": 0.17711386932298923, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028318302490963385, "epoch": 1548, "n_parameters": 86059856} {"train_lr": 1.743670868131832e-05, "train_min_lr": 1.743670868131832e-05, "train_loss": 0.17710314669873184, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028060585224571135, "epoch": 1549, "n_parameters": 86059856} {"train_lr": 1.7145352825423828e-05, "train_min_lr": 1.7145352825423828e-05, "train_loss": 0.17711151310672554, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028274078954918645, "epoch": 1550, "n_parameters": 86059856} {"train_lr": 1.68598054945072e-05, "train_min_lr": 1.68598054945072e-05, "train_loss": 0.17708129928005525, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02823343954813213, "epoch": 1551, "n_parameters": 86059856} {"train_lr": 1.6580067803333854e-05, "train_min_lr": 1.6580067803333854e-05, "train_loss": 0.17705771546416843, "train_loss_scale": 97883.89743589744, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028010701826319862, "epoch": 1552, "n_parameters": 86059856} {"train_lr": 1.6306140843988466e-05, "train_min_lr": 1.6306140843988466e-05, "train_loss": 0.17708456533877417, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028347898316450227, "epoch": 1553, "n_parameters": 86059856} {"train_lr": 1.6038025685870983e-05, "train_min_lr": 1.6038025685870983e-05, "train_loss": 0.17706976767080143, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028317265481186602, "epoch": 1554, "n_parameters": 86059856} {"train_lr": 1.5775723375692182e-05, "train_min_lr": 1.5775723375692182e-05, "train_loss": 0.17706174804912642, "train_loss_scale": 77298.8717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1555, "n_parameters": 86059856} {"train_lr": 1.5519234937469837e-05, "train_min_lr": 1.5519234937469837e-05, "train_loss": 0.17709695586922747, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02785104935248502, "epoch": 1556, "n_parameters": 86059856} {"train_lr": 1.5268561372524495e-05, "train_min_lr": 1.5268561372524495e-05, "train_loss": 0.1770492898992812, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028601254981297713, "epoch": 1557, "n_parameters": 86059856} {"train_lr": 1.5023703659475781e-05, "train_min_lr": 1.5023703659475781e-05, "train_loss": 0.1770439539850952, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02844306584805823, "epoch": 1558, "n_parameters": 86059856} {"train_lr": 1.478466275423835e-05, "train_min_lr": 1.478466275423835e-05, "train_loss": 0.17708232255282405, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02812439976976468, "epoch": 1559, "n_parameters": 86059856} {"train_lr": 1.455143959001833e-05, "train_min_lr": 1.455143959001833e-05, "train_loss": 0.17704072176442984, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02807639824011578, "epoch": 1560, "n_parameters": 86059856} {"train_lr": 1.432403507730965e-05, "train_min_lr": 1.432403507730965e-05, "train_loss": 0.17705666082791793, "train_loss_scale": 92422.56410256411, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02849397239370797, "epoch": 1561, "n_parameters": 86059856} {"train_lr": 1.41024501038904e-05, "train_min_lr": 1.41024501038904e-05, "train_loss": 0.17705078462126833, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028440561169424117, "epoch": 1562, "n_parameters": 86059856} {"train_lr": 1.388668553481944e-05, "train_min_lr": 1.388668553481944e-05, "train_loss": 0.17701454881507045, "train_loss_scale": 94943.17948717948, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1563, "n_parameters": 86059856} {"train_lr": 1.3676742212433047e-05, "train_min_lr": 1.3676742212433047e-05, "train_loss": 0.1769935717501749, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028222648844791528, "epoch": 1564, "n_parameters": 86059856} {"train_lr": 1.3472620956341499e-05, "train_min_lr": 1.3472620956341499e-05, "train_loss": 0.17701628889578083, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028266787182730742, "epoch": 1565, "n_parameters": 86059856} {"train_lr": 1.3274322563426021e-05, "train_min_lr": 1.3274322563426021e-05, "train_loss": 0.1770120131639907, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028646863585051436, "epoch": 1566, "n_parameters": 86059856} {"train_lr": 1.3081847807835623e-05, "train_min_lr": 1.3081847807835623e-05, "train_loss": 0.17700027828462994, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02845929607223624, "epoch": 1567, "n_parameters": 86059856} {"train_lr": 1.2895197440984016e-05, "train_min_lr": 1.2895197440984016e-05, "train_loss": 0.17699359541830534, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028518652262834784, "epoch": 1568, "n_parameters": 86059856} {"train_lr": 1.2714372191546779e-05, "train_min_lr": 1.2714372191546779e-05, "train_loss": 0.17700131827941498, "train_loss_scale": 74778.2564102564, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02824892954996381, "epoch": 1569, "n_parameters": 86059856} {"train_lr": 1.2539372765458446e-05, "train_min_lr": 1.2539372765458446e-05, "train_loss": 0.17698726496504, "train_loss_scale": 123300.10256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1570, "n_parameters": 86059856} {"train_lr": 1.2370199845909771e-05, "train_min_lr": 1.2370199845909771e-05, "train_loss": 0.17697615878811726, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028532591212588627, "epoch": 1571, "n_parameters": 86059856} {"train_lr": 1.2206854093345032e-05, "train_min_lr": 1.2206854093345032e-05, "train_loss": 0.17695296729783502, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028028575727381766, "epoch": 1572, "n_parameters": 86059856} {"train_lr": 1.2049336145459547e-05, "train_min_lr": 1.2049336145459547e-05, "train_loss": 0.17696531026815185, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027972571026438322, "epoch": 1573, "n_parameters": 86059856} {"train_lr": 1.1897646617197056e-05, "train_min_lr": 1.1897646617197056e-05, "train_loss": 0.17695117282728928, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02931207801716832, "epoch": 1574, "n_parameters": 86059856} {"train_lr": 1.1751786100747415e-05, "train_min_lr": 1.1751786100747415e-05, "train_loss": 0.17697576090741235, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02836842122129523, "epoch": 1575, "n_parameters": 86059856} {"train_lr": 1.1611755165544217e-05, "train_min_lr": 1.1611755165544217e-05, "train_loss": 0.17696220504466253, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.029204507268821962, "epoch": 1576, "n_parameters": 86059856} {"train_lr": 1.147755435826266e-05, "train_min_lr": 1.147755435826266e-05, "train_loss": 0.1769823709067005, "train_loss_scale": 111957.33333333333, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028348748178149644, "epoch": 1577, "n_parameters": 86059856} {"train_lr": 1.1349184202817314e-05, "train_min_lr": 1.1349184202817314e-05, "train_loss": 0.17692130690333074, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02825575944585487, "epoch": 1578, "n_parameters": 86059856} {"train_lr": 1.1226645200360109e-05, "train_min_lr": 1.1226645200360109e-05, "train_loss": 0.1769640251623992, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02871152230251867, "epoch": 1579, "n_parameters": 86059856} {"train_lr": 1.1109937829278423e-05, "train_min_lr": 1.1109937829278423e-05, "train_loss": 0.17692478020221758, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.029040187943535738, "epoch": 1580, "n_parameters": 86059856} {"train_lr": 1.0999062545193157e-05, "train_min_lr": 1.0999062545193157e-05, "train_loss": 0.17695325682871044, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027948032324321758, "epoch": 1581, "n_parameters": 86059856} {"train_lr": 1.0894019780956976e-05, "train_min_lr": 1.0894019780956976e-05, "train_loss": 0.17694880599036622, "train_loss_scale": 104395.48717948717, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1582, "n_parameters": 86059856} {"train_lr": 1.0794809946652626e-05, "train_min_lr": 1.0794809946652626e-05, "train_loss": 0.17694528096427137, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028933998537608065, "epoch": 1583, "n_parameters": 86059856} {"train_lr": 1.0701433429591352e-05, "train_min_lr": 1.0701433429591352e-05, "train_loss": 0.1769298811753591, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.029394752220608868, "epoch": 1584, "n_parameters": 86059856} {"train_lr": 1.0613890594311302e-05, "train_min_lr": 1.0613890594311302e-05, "train_loss": 0.1769544959354859, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02879538996001849, "epoch": 1585, "n_parameters": 86059856} {"train_lr": 1.05321817825762e-05, "train_min_lr": 1.05321817825762e-05, "train_loss": 0.17694326161215893, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02855625551026792, "epoch": 1586, "n_parameters": 86059856} {"train_lr": 1.0456307313374012e-05, "train_min_lr": 1.0456307313374012e-05, "train_loss": 0.17692473830165675, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02865586327150082, "epoch": 1587, "n_parameters": 86059856} {"train_lr": 1.0386267482915607e-05, "train_min_lr": 1.0386267482915607e-05, "train_loss": 0.17690122507226008, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02890958096115635, "epoch": 1588, "n_parameters": 86059856} {"train_lr": 1.032206256463369e-05, "train_min_lr": 1.032206256463369e-05, "train_loss": 0.17693900821121553, "train_loss_scale": 96833.64102564103, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 1589, "n_parameters": 86059856} {"train_lr": 1.0263692809181657e-05, "train_min_lr": 1.0263692809181657e-05, "train_loss": 0.17694689248067638, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028821322660988722, "epoch": 1590, "n_parameters": 86059856} {"train_lr": 1.0211158444432734e-05, "train_min_lr": 1.0211158444432734e-05, "train_loss": 0.17691589633110338, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02892069316182572, "epoch": 1591, "n_parameters": 86059856} {"train_lr": 1.0164459675478965e-05, "train_min_lr": 1.0164459675478965e-05, "train_loss": 0.17690497054121432, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.029086721368516102, "epoch": 1592, "n_parameters": 86059856} {"train_lr": 1.0123596684630482e-05, "train_min_lr": 1.0123596684630482e-05, "train_loss": 0.17689213230429837, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028606293430456366, "epoch": 1593, "n_parameters": 86059856} {"train_lr": 1.008856963141474e-05, "train_min_lr": 1.008856963141474e-05, "train_loss": 0.1769320870576522, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02973642130382359, "epoch": 1594, "n_parameters": 86059856} {"train_lr": 1.0059378652576001e-05, "train_min_lr": 1.0059378652576001e-05, "train_loss": 0.17689297753625, "train_loss_scale": 72677.7435897436, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02963116858751537, "epoch": 1595, "n_parameters": 86059856} {"train_lr": 1.0036023862074616e-05, "train_min_lr": 1.0036023862074616e-05, "train_loss": 0.17692221594580376, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02861682880216111, "epoch": 1596, "n_parameters": 86059856} {"train_lr": 1.0018505351086794e-05, "train_min_lr": 1.0018505351086794e-05, "train_loss": 0.17690475225054586, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.028415874536268603, "epoch": 1597, "n_parameters": 86059856} {"train_lr": 1.00068231880041e-05, "train_min_lr": 1.00068231880041e-05, "train_loss": 0.17690716511032617, "train_loss_scale": 81709.94871794872, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1598, "n_parameters": 86059856} {"train_lr": 1.0000977418433219e-05, "train_min_lr": 1.0000977418433219e-05, "train_loss": 0.17689578268581477, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02879136753602861, "epoch": 1599, "n_parameters": 86059856}