{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "eval_steps": 500, "global_step": 3780, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.009259259259259259, "grad_norm": 19.112235093128195, "learning_rate": 4.232804232804233e-07, "loss": 0.8275, "loss_nan_ranks": 0, "loss_rank_avg": 0.803094744682312, "step": 5, "valid_targets_mean": 3444.3, "valid_targets_min": 828 }, { "epoch": 0.018518518518518517, "grad_norm": 17.940707174937973, "learning_rate": 9.523809523809525e-07, "loss": 0.7963, "loss_nan_ranks": 0, "loss_rank_avg": 0.8306764960289001, "step": 10, "valid_targets_mean": 4536.1, "valid_targets_min": 1670 }, { "epoch": 0.027777777777777776, "grad_norm": 15.664620071655396, "learning_rate": 1.4814814814814815e-06, "loss": 0.7903, "loss_nan_ranks": 0, "loss_rank_avg": 0.8159181475639343, "step": 15, "valid_targets_mean": 3574.7, "valid_targets_min": 435 }, { "epoch": 0.037037037037037035, "grad_norm": 10.770635827936216, "learning_rate": 2.0105820105820108e-06, "loss": 0.7797, "loss_nan_ranks": 0, "loss_rank_avg": 0.7561392188072205, "step": 20, "valid_targets_mean": 4583.4, "valid_targets_min": 577 }, { "epoch": 0.046296296296296294, "grad_norm": 7.426416040771228, "learning_rate": 2.53968253968254e-06, "loss": 0.7281, "loss_nan_ranks": 0, "loss_rank_avg": 0.7589821815490723, "step": 25, "valid_targets_mean": 3544.4, "valid_targets_min": 763 }, { "epoch": 0.05555555555555555, "grad_norm": 3.713460240688579, "learning_rate": 3.068783068783069e-06, "loss": 0.669, "loss_nan_ranks": 0, "loss_rank_avg": 0.6719295978546143, "step": 30, "valid_targets_mean": 3903.3, "valid_targets_min": 697 }, { "epoch": 0.06481481481481481, "grad_norm": 2.1703955297067186, "learning_rate": 3.597883597883598e-06, "loss": 0.5837, "loss_nan_ranks": 0, "loss_rank_avg": 0.5826764106750488, "step": 35, "valid_targets_mean": 3806.2, "valid_targets_min": 1937 }, { "epoch": 0.07407407407407407, "grad_norm": 1.8165232850431967, "learning_rate": 4.126984126984127e-06, "loss": 0.6208, "loss_nan_ranks": 0, "loss_rank_avg": 0.6975480318069458, "step": 40, "valid_targets_mean": 3366.0, "valid_targets_min": 878 }, { "epoch": 0.08333333333333333, "grad_norm": 1.3433617476391144, "learning_rate": 4.656084656084656e-06, "loss": 0.5655, "loss_nan_ranks": 0, "loss_rank_avg": 0.5773887634277344, "step": 45, "valid_targets_mean": 4220.3, "valid_targets_min": 724 }, { "epoch": 0.09259259259259259, "grad_norm": 1.1527914357135887, "learning_rate": 5.185185185185185e-06, "loss": 0.5912, "loss_nan_ranks": 0, "loss_rank_avg": 0.6037584543228149, "step": 50, "valid_targets_mean": 4641.8, "valid_targets_min": 2175 }, { "epoch": 0.10185185185185185, "grad_norm": 0.9246044784239899, "learning_rate": 5.7142857142857145e-06, "loss": 0.601, "loss_nan_ranks": 0, "loss_rank_avg": 0.5939217805862427, "step": 55, "valid_targets_mean": 4679.5, "valid_targets_min": 663 }, { "epoch": 0.1111111111111111, "grad_norm": 0.8146470542392132, "learning_rate": 6.243386243386243e-06, "loss": 0.5341, "loss_nan_ranks": 0, "loss_rank_avg": 0.5055123567581177, "step": 60, "valid_targets_mean": 3902.7, "valid_targets_min": 923 }, { "epoch": 0.12037037037037036, "grad_norm": 0.7976083063634939, "learning_rate": 6.772486772486773e-06, "loss": 0.532, "loss_nan_ranks": 0, "loss_rank_avg": 0.4894016683101654, "step": 65, "valid_targets_mean": 3446.8, "valid_targets_min": 556 }, { "epoch": 0.12962962962962962, "grad_norm": 0.7321082473641598, "learning_rate": 7.301587301587301e-06, "loss": 0.5207, "loss_nan_ranks": 0, "loss_rank_avg": 0.46321815252304077, "step": 70, "valid_targets_mean": 4182.4, "valid_targets_min": 2033 }, { "epoch": 0.1388888888888889, "grad_norm": 0.7291697059938048, "learning_rate": 7.830687830687831e-06, "loss": 0.512, "loss_nan_ranks": 0, "loss_rank_avg": 0.5047429800033569, "step": 75, "valid_targets_mean": 3302.7, "valid_targets_min": 627 }, { "epoch": 0.14814814814814814, "grad_norm": 0.7739549159047908, "learning_rate": 8.35978835978836e-06, "loss": 0.4734, "loss_nan_ranks": 0, "loss_rank_avg": 0.47949308156967163, "step": 80, "valid_targets_mean": 3213.7, "valid_targets_min": 1752 }, { "epoch": 0.1574074074074074, "grad_norm": 0.7111733388587708, "learning_rate": 8.888888888888888e-06, "loss": 0.4823, "loss_nan_ranks": 0, "loss_rank_avg": 0.44027847051620483, "step": 85, "valid_targets_mean": 3505.9, "valid_targets_min": 1994 }, { "epoch": 0.16666666666666666, "grad_norm": 0.6831309764350063, "learning_rate": 9.417989417989418e-06, "loss": 0.4392, "loss_nan_ranks": 0, "loss_rank_avg": 0.40418338775634766, "step": 90, "valid_targets_mean": 3209.4, "valid_targets_min": 512 }, { "epoch": 0.17592592592592593, "grad_norm": 0.6213729941542423, "learning_rate": 9.947089947089947e-06, "loss": 0.4356, "loss_nan_ranks": 0, "loss_rank_avg": 0.43431776762008667, "step": 95, "valid_targets_mean": 3941.1, "valid_targets_min": 723 }, { "epoch": 0.18518518518518517, "grad_norm": 0.7201560667011954, "learning_rate": 1.0476190476190477e-05, "loss": 0.4769, "loss_nan_ranks": 0, "loss_rank_avg": 0.5052137970924377, "step": 100, "valid_targets_mean": 3591.6, "valid_targets_min": 880 }, { "epoch": 0.19444444444444445, "grad_norm": 0.6092157523393694, "learning_rate": 1.1005291005291006e-05, "loss": 0.4378, "loss_nan_ranks": 0, "loss_rank_avg": 0.41137802600860596, "step": 105, "valid_targets_mean": 3784.0, "valid_targets_min": 546 }, { "epoch": 0.2037037037037037, "grad_norm": 0.6962357662281705, "learning_rate": 1.1534391534391536e-05, "loss": 0.4424, "loss_nan_ranks": 0, "loss_rank_avg": 0.5407352447509766, "step": 110, "valid_targets_mean": 4280.3, "valid_targets_min": 764 }, { "epoch": 0.21296296296296297, "grad_norm": 0.6281974355729566, "learning_rate": 1.2063492063492064e-05, "loss": 0.4527, "loss_nan_ranks": 0, "loss_rank_avg": 0.4453981816768646, "step": 115, "valid_targets_mean": 3953.9, "valid_targets_min": 1669 }, { "epoch": 0.2222222222222222, "grad_norm": 0.618723801537215, "learning_rate": 1.2592592592592593e-05, "loss": 0.4168, "loss_nan_ranks": 0, "loss_rank_avg": 0.3996565639972687, "step": 120, "valid_targets_mean": 4443.4, "valid_targets_min": 1748 }, { "epoch": 0.23148148148148148, "grad_norm": 0.6419954427627002, "learning_rate": 1.3121693121693123e-05, "loss": 0.4195, "loss_nan_ranks": 0, "loss_rank_avg": 0.4261256754398346, "step": 125, "valid_targets_mean": 4298.9, "valid_targets_min": 2102 }, { "epoch": 0.24074074074074073, "grad_norm": 0.8161990345008757, "learning_rate": 1.3650793650793652e-05, "loss": 0.4461, "loss_nan_ranks": 0, "loss_rank_avg": 0.5341705083847046, "step": 130, "valid_targets_mean": 4825.4, "valid_targets_min": 1620 }, { "epoch": 0.25, "grad_norm": 0.8509610334596335, "learning_rate": 1.417989417989418e-05, "loss": 0.436, "loss_nan_ranks": 0, "loss_rank_avg": 0.44645223021507263, "step": 135, "valid_targets_mean": 4780.1, "valid_targets_min": 789 }, { "epoch": 0.25925925925925924, "grad_norm": 0.5904975210645568, "learning_rate": 1.470899470899471e-05, "loss": 0.4288, "loss_nan_ranks": 0, "loss_rank_avg": 0.40643012523651123, "step": 140, "valid_targets_mean": 4096.2, "valid_targets_min": 541 }, { "epoch": 0.26851851851851855, "grad_norm": 0.6739039533538748, "learning_rate": 1.523809523809524e-05, "loss": 0.389, "loss_nan_ranks": 0, "loss_rank_avg": 0.4152180552482605, "step": 145, "valid_targets_mean": 4372.8, "valid_targets_min": 1808 }, { "epoch": 0.2777777777777778, "grad_norm": 0.6677022093982549, "learning_rate": 1.576719576719577e-05, "loss": 0.3913, "loss_nan_ranks": 0, "loss_rank_avg": 0.36584043502807617, "step": 150, "valid_targets_mean": 3181.2, "valid_targets_min": 892 }, { "epoch": 0.28703703703703703, "grad_norm": 0.6028158751437971, "learning_rate": 1.6296296296296297e-05, "loss": 0.4025, "loss_nan_ranks": 0, "loss_rank_avg": 0.3777807652950287, "step": 155, "valid_targets_mean": 3913.8, "valid_targets_min": 1488 }, { "epoch": 0.2962962962962963, "grad_norm": 0.5663817438154167, "learning_rate": 1.6825396825396828e-05, "loss": 0.433, "loss_nan_ranks": 0, "loss_rank_avg": 0.42696279287338257, "step": 160, "valid_targets_mean": 4789.0, "valid_targets_min": 1714 }, { "epoch": 0.3055555555555556, "grad_norm": 0.7015646909300565, "learning_rate": 1.7354497354497356e-05, "loss": 0.4013, "loss_nan_ranks": 0, "loss_rank_avg": 0.37903106212615967, "step": 165, "valid_targets_mean": 4173.1, "valid_targets_min": 520 }, { "epoch": 0.3148148148148148, "grad_norm": 0.6234945858741571, "learning_rate": 1.7883597883597884e-05, "loss": 0.3997, "loss_nan_ranks": 0, "loss_rank_avg": 0.3800799548625946, "step": 170, "valid_targets_mean": 4546.9, "valid_targets_min": 1768 }, { "epoch": 0.32407407407407407, "grad_norm": 0.598847110558081, "learning_rate": 1.8412698412698415e-05, "loss": 0.3828, "loss_nan_ranks": 0, "loss_rank_avg": 0.3486472964286804, "step": 175, "valid_targets_mean": 5024.6, "valid_targets_min": 1684 }, { "epoch": 0.3333333333333333, "grad_norm": 0.850399133293019, "learning_rate": 1.8941798941798943e-05, "loss": 0.3629, "loss_nan_ranks": 0, "loss_rank_avg": 0.3398283123970032, "step": 180, "valid_targets_mean": 4075.1, "valid_targets_min": 1613 }, { "epoch": 0.3425925925925926, "grad_norm": 0.6301928805898117, "learning_rate": 1.947089947089947e-05, "loss": 0.4053, "loss_nan_ranks": 0, "loss_rank_avg": 0.28791162371635437, "step": 185, "valid_targets_mean": 3739.5, "valid_targets_min": 790 }, { "epoch": 0.35185185185185186, "grad_norm": 0.6696380315329734, "learning_rate": 2e-05, "loss": 0.3974, "loss_nan_ranks": 0, "loss_rank_avg": 0.4022134840488434, "step": 190, "valid_targets_mean": 4268.4, "valid_targets_min": 1592 }, { "epoch": 0.3611111111111111, "grad_norm": 0.707972099980987, "learning_rate": 2.0529100529100533e-05, "loss": 0.4258, "loss_nan_ranks": 0, "loss_rank_avg": 0.407692015171051, "step": 195, "valid_targets_mean": 3323.2, "valid_targets_min": 721 }, { "epoch": 0.37037037037037035, "grad_norm": 0.6176399796412881, "learning_rate": 2.105820105820106e-05, "loss": 0.3572, "loss_nan_ranks": 0, "loss_rank_avg": 0.32153958082199097, "step": 200, "valid_targets_mean": 3555.6, "valid_targets_min": 840 }, { "epoch": 0.37962962962962965, "grad_norm": 0.625992536194421, "learning_rate": 2.158730158730159e-05, "loss": 0.3921, "loss_nan_ranks": 0, "loss_rank_avg": 0.49598437547683716, "step": 205, "valid_targets_mean": 5771.6, "valid_targets_min": 675 }, { "epoch": 0.3888888888888889, "grad_norm": 0.6818922510280604, "learning_rate": 2.211640211640212e-05, "loss": 0.3721, "loss_nan_ranks": 0, "loss_rank_avg": 0.39517930150032043, "step": 210, "valid_targets_mean": 3803.9, "valid_targets_min": 523 }, { "epoch": 0.39814814814814814, "grad_norm": 0.5873676150067556, "learning_rate": 2.2645502645502648e-05, "loss": 0.3945, "loss_nan_ranks": 0, "loss_rank_avg": 0.3694095015525818, "step": 215, "valid_targets_mean": 4309.9, "valid_targets_min": 1808 }, { "epoch": 0.4074074074074074, "grad_norm": 0.5984897505965682, "learning_rate": 2.317460317460318e-05, "loss": 0.3735, "loss_nan_ranks": 0, "loss_rank_avg": 0.3190292418003082, "step": 220, "valid_targets_mean": 4658.9, "valid_targets_min": 1714 }, { "epoch": 0.4166666666666667, "grad_norm": 0.6652241547692664, "learning_rate": 2.3703703703703703e-05, "loss": 0.3656, "loss_nan_ranks": 0, "loss_rank_avg": 0.3782808184623718, "step": 225, "valid_targets_mean": 4637.1, "valid_targets_min": 1662 }, { "epoch": 0.42592592592592593, "grad_norm": 0.6773269861053436, "learning_rate": 2.4232804232804234e-05, "loss": 0.3476, "loss_nan_ranks": 0, "loss_rank_avg": 0.3852205276489258, "step": 230, "valid_targets_mean": 3920.9, "valid_targets_min": 1620 }, { "epoch": 0.4351851851851852, "grad_norm": 0.6294988371468406, "learning_rate": 2.4761904761904766e-05, "loss": 0.3577, "loss_nan_ranks": 0, "loss_rank_avg": 0.38655975461006165, "step": 235, "valid_targets_mean": 4567.7, "valid_targets_min": 2256 }, { "epoch": 0.4444444444444444, "grad_norm": 0.7066702282132421, "learning_rate": 2.5291005291005294e-05, "loss": 0.3836, "loss_nan_ranks": 0, "loss_rank_avg": 0.42079806327819824, "step": 240, "valid_targets_mean": 3376.9, "valid_targets_min": 708 }, { "epoch": 0.4537037037037037, "grad_norm": 0.6424177932025107, "learning_rate": 2.582010582010582e-05, "loss": 0.3315, "loss_nan_ranks": 0, "loss_rank_avg": 0.3230123519897461, "step": 245, "valid_targets_mean": 3249.8, "valid_targets_min": 551 }, { "epoch": 0.46296296296296297, "grad_norm": 0.6455853042915125, "learning_rate": 2.6349206349206353e-05, "loss": 0.3953, "loss_nan_ranks": 0, "loss_rank_avg": 0.4580186605453491, "step": 250, "valid_targets_mean": 4673.4, "valid_targets_min": 1703 }, { "epoch": 0.4722222222222222, "grad_norm": 0.5916132534087772, "learning_rate": 2.687830687830688e-05, "loss": 0.3799, "loss_nan_ranks": 0, "loss_rank_avg": 0.36162281036376953, "step": 255, "valid_targets_mean": 3973.9, "valid_targets_min": 2418 }, { "epoch": 0.48148148148148145, "grad_norm": 0.5577346524553856, "learning_rate": 2.740740740740741e-05, "loss": 0.3443, "loss_nan_ranks": 0, "loss_rank_avg": 0.2668542265892029, "step": 260, "valid_targets_mean": 4436.5, "valid_targets_min": 591 }, { "epoch": 0.49074074074074076, "grad_norm": 0.6414361548008731, "learning_rate": 2.7936507936507936e-05, "loss": 0.3806, "loss_nan_ranks": 0, "loss_rank_avg": 0.3715950846672058, "step": 265, "valid_targets_mean": 4098.6, "valid_targets_min": 2238 }, { "epoch": 0.5, "grad_norm": 0.5856940202703296, "learning_rate": 2.8465608465608467e-05, "loss": 0.3926, "loss_nan_ranks": 0, "loss_rank_avg": 0.4181647002696991, "step": 270, "valid_targets_mean": 5686.6, "valid_targets_min": 932 }, { "epoch": 0.5092592592592593, "grad_norm": 0.632615688558668, "learning_rate": 2.8994708994709e-05, "loss": 0.3687, "loss_nan_ranks": 0, "loss_rank_avg": 0.33307045698165894, "step": 275, "valid_targets_mean": 4060.1, "valid_targets_min": 595 }, { "epoch": 0.5185185185185185, "grad_norm": 0.6599712643815261, "learning_rate": 2.9523809523809526e-05, "loss": 0.3474, "loss_nan_ranks": 0, "loss_rank_avg": 0.40192732214927673, "step": 280, "valid_targets_mean": 3505.5, "valid_targets_min": 721 }, { "epoch": 0.5277777777777778, "grad_norm": 0.7871824525334413, "learning_rate": 3.0052910052910054e-05, "loss": 0.3811, "loss_nan_ranks": 0, "loss_rank_avg": 0.37350624799728394, "step": 285, "valid_targets_mean": 4021.2, "valid_targets_min": 480 }, { "epoch": 0.5370370370370371, "grad_norm": 0.5941203905483521, "learning_rate": 3.058201058201058e-05, "loss": 0.3806, "loss_nan_ranks": 0, "loss_rank_avg": 0.34662193059921265, "step": 290, "valid_targets_mean": 4111.9, "valid_targets_min": 849 }, { "epoch": 0.5462962962962963, "grad_norm": 0.6832466030128974, "learning_rate": 3.111111111111112e-05, "loss": 0.3809, "loss_nan_ranks": 0, "loss_rank_avg": 0.35143667459487915, "step": 295, "valid_targets_mean": 3446.2, "valid_targets_min": 567 }, { "epoch": 0.5555555555555556, "grad_norm": 0.5652043089590578, "learning_rate": 3.1640211640211645e-05, "loss": 0.4166, "loss_nan_ranks": 0, "loss_rank_avg": 0.39690762758255005, "step": 300, "valid_targets_mean": 4713.9, "valid_targets_min": 772 }, { "epoch": 0.5648148148148148, "grad_norm": 0.7212363216251159, "learning_rate": 3.216931216931217e-05, "loss": 0.3699, "loss_nan_ranks": 0, "loss_rank_avg": 0.3936673402786255, "step": 305, "valid_targets_mean": 3249.8, "valid_targets_min": 528 }, { "epoch": 0.5740740740740741, "grad_norm": 0.5825429131593575, "learning_rate": 3.26984126984127e-05, "loss": 0.3738, "loss_nan_ranks": 0, "loss_rank_avg": 0.37310439348220825, "step": 310, "valid_targets_mean": 5273.2, "valid_targets_min": 496 }, { "epoch": 0.5833333333333334, "grad_norm": 0.5872360398885519, "learning_rate": 3.322751322751323e-05, "loss": 0.3407, "loss_nan_ranks": 0, "loss_rank_avg": 0.35499951243400574, "step": 315, "valid_targets_mean": 5564.2, "valid_targets_min": 2044 }, { "epoch": 0.5925925925925926, "grad_norm": 0.7151624805661932, "learning_rate": 3.375661375661376e-05, "loss": 0.3616, "loss_nan_ranks": 0, "loss_rank_avg": 0.3740341365337372, "step": 320, "valid_targets_mean": 4010.5, "valid_targets_min": 1007 }, { "epoch": 0.6018518518518519, "grad_norm": 0.7663314912022873, "learning_rate": 3.4285714285714284e-05, "loss": 0.3501, "loss_nan_ranks": 0, "loss_rank_avg": 0.37295448780059814, "step": 325, "valid_targets_mean": 3384.2, "valid_targets_min": 803 }, { "epoch": 0.6111111111111112, "grad_norm": 0.7089838515813951, "learning_rate": 3.481481481481482e-05, "loss": 0.3472, "loss_nan_ranks": 0, "loss_rank_avg": 0.33399441838264465, "step": 330, "valid_targets_mean": 5579.9, "valid_targets_min": 1870 }, { "epoch": 0.6203703703703703, "grad_norm": 0.5776773939398085, "learning_rate": 3.5343915343915346e-05, "loss": 0.3353, "loss_nan_ranks": 0, "loss_rank_avg": 0.3364108204841614, "step": 335, "valid_targets_mean": 4555.3, "valid_targets_min": 1137 }, { "epoch": 0.6296296296296297, "grad_norm": 0.6642731154701934, "learning_rate": 3.5873015873015874e-05, "loss": 0.3497, "loss_nan_ranks": 0, "loss_rank_avg": 0.3789205551147461, "step": 340, "valid_targets_mean": 3312.5, "valid_targets_min": 943 }, { "epoch": 0.6388888888888888, "grad_norm": 0.6054739369110539, "learning_rate": 3.64021164021164e-05, "loss": 0.3283, "loss_nan_ranks": 0, "loss_rank_avg": 0.3263542652130127, "step": 345, "valid_targets_mean": 4468.6, "valid_targets_min": 2434 }, { "epoch": 0.6481481481481481, "grad_norm": 0.6874656720878914, "learning_rate": 3.6931216931216936e-05, "loss": 0.3563, "loss_nan_ranks": 0, "loss_rank_avg": 0.3052312731742859, "step": 350, "valid_targets_mean": 3007.6, "valid_targets_min": 702 }, { "epoch": 0.6574074074074074, "grad_norm": 0.6507760256018081, "learning_rate": 3.7460317460317464e-05, "loss": 0.3617, "loss_nan_ranks": 0, "loss_rank_avg": 0.3832956552505493, "step": 355, "valid_targets_mean": 3906.8, "valid_targets_min": 726 }, { "epoch": 0.6666666666666666, "grad_norm": 0.5435451628510831, "learning_rate": 3.798941798941799e-05, "loss": 0.3757, "loss_nan_ranks": 0, "loss_rank_avg": 0.39122748374938965, "step": 360, "valid_targets_mean": 5430.8, "valid_targets_min": 327 }, { "epoch": 0.6759259259259259, "grad_norm": 0.5644022877423189, "learning_rate": 3.851851851851852e-05, "loss": 0.3777, "loss_nan_ranks": 0, "loss_rank_avg": 0.32652783393859863, "step": 365, "valid_targets_mean": 4897.6, "valid_targets_min": 1720 }, { "epoch": 0.6851851851851852, "grad_norm": 0.6380722458933044, "learning_rate": 3.904761904761905e-05, "loss": 0.3161, "loss_nan_ranks": 0, "loss_rank_avg": 0.340414822101593, "step": 370, "valid_targets_mean": 3580.2, "valid_targets_min": 757 }, { "epoch": 0.6944444444444444, "grad_norm": 0.5617778297985487, "learning_rate": 3.957671957671958e-05, "loss": 0.3299, "loss_nan_ranks": 0, "loss_rank_avg": 0.2890230715274811, "step": 375, "valid_targets_mean": 3849.2, "valid_targets_min": 1728 }, { "epoch": 0.7037037037037037, "grad_norm": 0.7391134176193144, "learning_rate": 3.999999147231606e-05, "loss": 0.3425, "loss_nan_ranks": 0, "loss_rank_avg": 0.3627009093761444, "step": 380, "valid_targets_mean": 3590.0, "valid_targets_min": 1700 }, { "epoch": 0.7129629629629629, "grad_norm": 0.585798655714323, "learning_rate": 3.9999693004141615e-05, "loss": 0.3378, "loss_nan_ranks": 0, "loss_rank_avg": 0.33226558566093445, "step": 385, "valid_targets_mean": 3770.4, "valid_targets_min": 490 }, { "epoch": 0.7222222222222222, "grad_norm": 0.517353669888523, "learning_rate": 3.999896815904212e-05, "loss": 0.3454, "loss_nan_ranks": 0, "loss_rank_avg": 0.36117538809776306, "step": 390, "valid_targets_mean": 5560.3, "valid_targets_min": 1959 }, { "epoch": 0.7314814814814815, "grad_norm": 0.6107363197274304, "learning_rate": 3.999781695247067e-05, "loss": 0.358, "loss_nan_ranks": 0, "loss_rank_avg": 0.38827258348464966, "step": 395, "valid_targets_mean": 4259.4, "valid_targets_min": 572 }, { "epoch": 0.7407407407407407, "grad_norm": 0.6359653698753834, "learning_rate": 3.999623940897003e-05, "loss": 0.3114, "loss_nan_ranks": 0, "loss_rank_avg": 0.32796719670295715, "step": 400, "valid_targets_mean": 3816.1, "valid_targets_min": 698 }, { "epoch": 0.75, "grad_norm": 0.6435556727086547, "learning_rate": 3.9994235562172135e-05, "loss": 0.2972, "loss_nan_ranks": 0, "loss_rank_avg": 0.3033405542373657, "step": 405, "valid_targets_mean": 3801.8, "valid_targets_min": 605 }, { "epoch": 0.7592592592592593, "grad_norm": 0.5397509110893886, "learning_rate": 3.999180545479734e-05, "loss": 0.3476, "loss_nan_ranks": 0, "loss_rank_avg": 0.3159610331058502, "step": 410, "valid_targets_mean": 4451.7, "valid_targets_min": 558 }, { "epoch": 0.7685185185185185, "grad_norm": 0.6224527962247686, "learning_rate": 3.998894913865352e-05, "loss": 0.3595, "loss_nan_ranks": 0, "loss_rank_avg": 0.3543645441532135, "step": 415, "valid_targets_mean": 3635.9, "valid_targets_min": 1754 }, { "epoch": 0.7777777777777778, "grad_norm": 0.6426499523396512, "learning_rate": 3.9985666674634976e-05, "loss": 0.3405, "loss_nan_ranks": 0, "loss_rank_avg": 0.339733749628067, "step": 420, "valid_targets_mean": 3655.6, "valid_targets_min": 783 }, { "epoch": 0.7870370370370371, "grad_norm": 0.5232489603174455, "learning_rate": 3.998195813272113e-05, "loss": 0.3453, "loss_nan_ranks": 0, "loss_rank_avg": 0.3885164260864258, "step": 425, "valid_targets_mean": 6157.9, "valid_targets_min": 871 }, { "epoch": 0.7962962962962963, "grad_norm": 0.5645900335897053, "learning_rate": 3.997782359197503e-05, "loss": 0.322, "loss_nan_ranks": 0, "loss_rank_avg": 0.34212440252304077, "step": 430, "valid_targets_mean": 4778.2, "valid_targets_min": 584 }, { "epoch": 0.8055555555555556, "grad_norm": 0.5962980381815696, "learning_rate": 3.997326314054167e-05, "loss": 0.3163, "loss_nan_ranks": 0, "loss_rank_avg": 0.3020142614841461, "step": 435, "valid_targets_mean": 5573.9, "valid_targets_min": 845 }, { "epoch": 0.8148148148148148, "grad_norm": 0.6887712552325551, "learning_rate": 3.9968276875646095e-05, "loss": 0.3244, "loss_nan_ranks": 0, "loss_rank_avg": 0.33892735838890076, "step": 440, "valid_targets_mean": 4693.2, "valid_targets_min": 886 }, { "epoch": 0.8240740740740741, "grad_norm": 0.7056735839532887, "learning_rate": 3.9962864903591375e-05, "loss": 0.3434, "loss_nan_ranks": 0, "loss_rank_avg": 0.4029475450515747, "step": 445, "valid_targets_mean": 3450.6, "valid_targets_min": 930 }, { "epoch": 0.8333333333333334, "grad_norm": 0.5753803712883945, "learning_rate": 3.995702733975625e-05, "loss": 0.2982, "loss_nan_ranks": 0, "loss_rank_avg": 0.27287542819976807, "step": 450, "valid_targets_mean": 3599.7, "valid_targets_min": 1569 }, { "epoch": 0.8425925925925926, "grad_norm": 0.6413965772420983, "learning_rate": 3.9950764308592783e-05, "loss": 0.3417, "loss_nan_ranks": 0, "loss_rank_avg": 0.3798047602176666, "step": 455, "valid_targets_mean": 3774.1, "valid_targets_min": 317 }, { "epoch": 0.8518518518518519, "grad_norm": 0.645787380121332, "learning_rate": 3.9944075943623605e-05, "loss": 0.3172, "loss_nan_ranks": 0, "loss_rank_avg": 0.34351015090942383, "step": 460, "valid_targets_mean": 3503.0, "valid_targets_min": 526 }, { "epoch": 0.8611111111111112, "grad_norm": 0.5184110306882624, "learning_rate": 3.9936962387439135e-05, "loss": 0.3171, "loss_nan_ranks": 0, "loss_rank_avg": 0.3216972351074219, "step": 465, "valid_targets_mean": 5171.2, "valid_targets_min": 1926 }, { "epoch": 0.8703703703703703, "grad_norm": 0.6014751168851044, "learning_rate": 3.992942379169452e-05, "loss": 0.3339, "loss_nan_ranks": 0, "loss_rank_avg": 0.32453712821006775, "step": 470, "valid_targets_mean": 4786.2, "valid_targets_min": 629 }, { "epoch": 0.8796296296296297, "grad_norm": 0.6616582269145059, "learning_rate": 3.992146031710637e-05, "loss": 0.345, "loss_nan_ranks": 0, "loss_rank_avg": 0.35846132040023804, "step": 475, "valid_targets_mean": 3561.0, "valid_targets_min": 949 }, { "epoch": 0.8888888888888888, "grad_norm": 0.49969303426311373, "learning_rate": 3.99130721334494e-05, "loss": 0.3502, "loss_nan_ranks": 0, "loss_rank_avg": 0.3090265095233917, "step": 480, "valid_targets_mean": 4930.3, "valid_targets_min": 491 }, { "epoch": 0.8981481481481481, "grad_norm": 0.6240367506941321, "learning_rate": 3.9904259419552744e-05, "loss": 0.3365, "loss_nan_ranks": 0, "loss_rank_avg": 0.35694044828414917, "step": 485, "valid_targets_mean": 3573.7, "valid_targets_min": 2118 }, { "epoch": 0.9074074074074074, "grad_norm": 0.570990856646238, "learning_rate": 3.989502236329618e-05, "loss": 0.3062, "loss_nan_ranks": 0, "loss_rank_avg": 0.32826316356658936, "step": 490, "valid_targets_mean": 4897.8, "valid_targets_min": 970 }, { "epoch": 0.9166666666666666, "grad_norm": 0.5453511061498877, "learning_rate": 3.988536116160612e-05, "loss": 0.3359, "loss_nan_ranks": 0, "loss_rank_avg": 0.3527144193649292, "step": 495, "valid_targets_mean": 4487.1, "valid_targets_min": 1709 }, { "epoch": 0.9259259259259259, "grad_norm": 0.6304542974915351, "learning_rate": 3.987527602045139e-05, "loss": 0.3406, "loss_nan_ranks": 0, "loss_rank_avg": 0.34708958864212036, "step": 500, "valid_targets_mean": 3580.7, "valid_targets_min": 894 }, { "epoch": 0.9351851851851852, "grad_norm": 0.625911017517709, "learning_rate": 3.9864767154838864e-05, "loss": 0.3331, "loss_nan_ranks": 0, "loss_rank_avg": 0.3413323760032654, "step": 505, "valid_targets_mean": 3402.6, "valid_targets_min": 698 }, { "epoch": 0.9444444444444444, "grad_norm": 0.5749197484139668, "learning_rate": 3.985383478880887e-05, "loss": 0.3338, "loss_nan_ranks": 0, "loss_rank_avg": 0.3760027587413788, "step": 510, "valid_targets_mean": 4457.2, "valid_targets_min": 194 }, { "epoch": 0.9537037037037037, "grad_norm": 0.6836107942264785, "learning_rate": 3.984247915543043e-05, "loss": 0.3132, "loss_nan_ranks": 0, "loss_rank_avg": 0.3548537492752075, "step": 515, "valid_targets_mean": 3044.6, "valid_targets_min": 736 }, { "epoch": 0.9629629629629629, "grad_norm": 0.639879100311534, "learning_rate": 3.9830700496796246e-05, "loss": 0.3251, "loss_nan_ranks": 0, "loss_rank_avg": 0.3742837905883789, "step": 520, "valid_targets_mean": 3597.6, "valid_targets_min": 2209 }, { "epoch": 0.9722222222222222, "grad_norm": 0.789794647068884, "learning_rate": 3.98184990640176e-05, "loss": 0.3275, "loss_nan_ranks": 0, "loss_rank_avg": 0.34361645579338074, "step": 525, "valid_targets_mean": 2886.4, "valid_targets_min": 275 }, { "epoch": 0.9814814814814815, "grad_norm": 0.549167670180771, "learning_rate": 3.9805875117218934e-05, "loss": 0.3573, "loss_nan_ranks": 0, "loss_rank_avg": 0.3770226836204529, "step": 530, "valid_targets_mean": 4474.1, "valid_targets_min": 1026 }, { "epoch": 0.9907407407407407, "grad_norm": 0.499937106224937, "learning_rate": 3.9792828925532376e-05, "loss": 0.3325, "loss_nan_ranks": 0, "loss_rank_avg": 0.31070464849472046, "step": 535, "valid_targets_mean": 4791.1, "valid_targets_min": 1838 }, { "epoch": 1.0, "grad_norm": 0.5379069079693097, "learning_rate": 3.977936076709195e-05, "loss": 0.3367, "loss_nan_ranks": 0, "loss_rank_avg": 0.313912570476532, "step": 540, "valid_targets_mean": 4855.9, "valid_targets_min": 778 }, { "epoch": 1.0092592592592593, "grad_norm": 0.5872678514369906, "learning_rate": 3.976547092902765e-05, "loss": 0.3107, "loss_nan_ranks": 0, "loss_rank_avg": 0.3828044831752777, "step": 545, "valid_targets_mean": 4668.6, "valid_targets_min": 422 }, { "epoch": 1.0185185185185186, "grad_norm": 0.622573855977736, "learning_rate": 3.9751159707459354e-05, "loss": 0.3296, "loss_nan_ranks": 0, "loss_rank_avg": 0.30190443992614746, "step": 550, "valid_targets_mean": 3399.8, "valid_targets_min": 480 }, { "epoch": 1.0277777777777777, "grad_norm": 0.6028687336159148, "learning_rate": 3.973642740749048e-05, "loss": 0.2989, "loss_nan_ranks": 0, "loss_rank_avg": 0.30583667755126953, "step": 555, "valid_targets_mean": 3726.7, "valid_targets_min": 629 }, { "epoch": 1.037037037037037, "grad_norm": 0.46880468225849076, "learning_rate": 3.972127434320148e-05, "loss": 0.3047, "loss_nan_ranks": 0, "loss_rank_avg": 0.2561293840408325, "step": 560, "valid_targets_mean": 4991.6, "valid_targets_min": 1598 }, { "epoch": 1.0462962962962963, "grad_norm": 0.5498139372111231, "learning_rate": 3.970570083764316e-05, "loss": 0.3393, "loss_nan_ranks": 0, "loss_rank_avg": 0.27668747305870056, "step": 565, "valid_targets_mean": 3853.1, "valid_targets_min": 573 }, { "epoch": 1.0555555555555556, "grad_norm": 0.7107012818618696, "learning_rate": 3.968970722282979e-05, "loss": 0.3129, "loss_nan_ranks": 0, "loss_rank_avg": 0.35108423233032227, "step": 570, "valid_targets_mean": 3513.9, "valid_targets_min": 1733 }, { "epoch": 1.0648148148148149, "grad_norm": 0.5944855386113417, "learning_rate": 3.9673293839732024e-05, "loss": 0.2998, "loss_nan_ranks": 0, "loss_rank_avg": 0.239329993724823, "step": 575, "valid_targets_mean": 3587.9, "valid_targets_min": 584 }, { "epoch": 1.074074074074074, "grad_norm": 0.5366706705366189, "learning_rate": 3.965646103826962e-05, "loss": 0.3161, "loss_nan_ranks": 0, "loss_rank_avg": 0.27880698442459106, "step": 580, "valid_targets_mean": 3620.6, "valid_targets_min": 496 }, { "epoch": 1.0833333333333333, "grad_norm": 0.6078514718903982, "learning_rate": 3.963920917730399e-05, "loss": 0.3293, "loss_nan_ranks": 0, "loss_rank_avg": 0.3750278353691101, "step": 585, "valid_targets_mean": 4482.4, "valid_targets_min": 1680 }, { "epoch": 1.0925925925925926, "grad_norm": 0.6207808313685298, "learning_rate": 3.9621538624630546e-05, "loss": 0.3127, "loss_nan_ranks": 0, "loss_rank_avg": 0.31572356820106506, "step": 590, "valid_targets_mean": 3113.2, "valid_targets_min": 635 }, { "epoch": 1.1018518518518519, "grad_norm": 0.49606649278990134, "learning_rate": 3.9603449756970877e-05, "loss": 0.2978, "loss_nan_ranks": 0, "loss_rank_avg": 0.2611154019832611, "step": 595, "valid_targets_mean": 4295.1, "valid_targets_min": 545 }, { "epoch": 1.1111111111111112, "grad_norm": 0.6073469095682115, "learning_rate": 3.9584942959964695e-05, "loss": 0.3069, "loss_nan_ranks": 0, "loss_rank_avg": 0.28560468554496765, "step": 600, "valid_targets_mean": 3647.1, "valid_targets_min": 1445 }, { "epoch": 1.1203703703703705, "grad_norm": 0.5950597164351814, "learning_rate": 3.9566018628161595e-05, "loss": 0.3327, "loss_nan_ranks": 0, "loss_rank_avg": 0.32758647203445435, "step": 605, "valid_targets_mean": 3512.6, "valid_targets_min": 435 }, { "epoch": 1.1296296296296295, "grad_norm": 0.6075429397094143, "learning_rate": 3.9546677165012714e-05, "loss": 0.2825, "loss_nan_ranks": 0, "loss_rank_avg": 0.27764448523521423, "step": 610, "valid_targets_mean": 4952.8, "valid_targets_min": 535 }, { "epoch": 1.1388888888888888, "grad_norm": 0.5550360366103939, "learning_rate": 3.9526918982862045e-05, "loss": 0.2956, "loss_nan_ranks": 0, "loss_rank_avg": 0.3022664785385132, "step": 615, "valid_targets_mean": 3905.1, "valid_targets_min": 831 }, { "epoch": 1.1481481481481481, "grad_norm": 0.6050232035267888, "learning_rate": 3.950674450293771e-05, "loss": 0.3073, "loss_nan_ranks": 0, "loss_rank_avg": 0.32235607504844666, "step": 620, "valid_targets_mean": 3641.3, "valid_targets_min": 563 }, { "epoch": 1.1574074074074074, "grad_norm": 0.5959296142535176, "learning_rate": 3.948615415534294e-05, "loss": 0.3268, "loss_nan_ranks": 0, "loss_rank_avg": 0.39365071058273315, "step": 625, "valid_targets_mean": 3973.9, "valid_targets_min": 626 }, { "epoch": 1.1666666666666667, "grad_norm": 0.5533298682994967, "learning_rate": 3.946514837904693e-05, "loss": 0.2983, "loss_nan_ranks": 0, "loss_rank_avg": 0.2971389889717102, "step": 630, "valid_targets_mean": 4080.9, "valid_targets_min": 834 }, { "epoch": 1.175925925925926, "grad_norm": 0.6106998081873015, "learning_rate": 3.944372762187547e-05, "loss": 0.3329, "loss_nan_ranks": 0, "loss_rank_avg": 0.3265891671180725, "step": 635, "valid_targets_mean": 3942.2, "valid_targets_min": 446 }, { "epoch": 1.1851851851851851, "grad_norm": 0.5794193483906891, "learning_rate": 3.9421892340501405e-05, "loss": 0.3145, "loss_nan_ranks": 0, "loss_rank_avg": 0.3371884822845459, "step": 640, "valid_targets_mean": 4098.7, "valid_targets_min": 295 }, { "epoch": 1.1944444444444444, "grad_norm": 0.5448020606518212, "learning_rate": 3.939964300043487e-05, "loss": 0.2994, "loss_nan_ranks": 0, "loss_rank_avg": 0.27047258615493774, "step": 645, "valid_targets_mean": 4001.4, "valid_targets_min": 592 }, { "epoch": 1.2037037037037037, "grad_norm": 0.5233238220020286, "learning_rate": 3.9376980076013426e-05, "loss": 0.3144, "loss_nan_ranks": 0, "loss_rank_avg": 0.30770355463027954, "step": 650, "valid_targets_mean": 4120.2, "valid_targets_min": 779 }, { "epoch": 1.212962962962963, "grad_norm": 0.5883380365500736, "learning_rate": 3.9353904050391874e-05, "loss": 0.3377, "loss_nan_ranks": 0, "loss_rank_avg": 0.36008432507514954, "step": 655, "valid_targets_mean": 4483.6, "valid_targets_min": 2162 }, { "epoch": 1.2222222222222223, "grad_norm": 0.8824261790418245, "learning_rate": 3.933041541553202e-05, "loss": 0.2975, "loss_nan_ranks": 0, "loss_rank_avg": 0.2799769937992096, "step": 660, "valid_targets_mean": 3285.0, "valid_targets_min": 599 }, { "epoch": 1.2314814814814814, "grad_norm": 0.5442814396188268, "learning_rate": 3.930651467219214e-05, "loss": 0.3, "loss_nan_ranks": 0, "loss_rank_avg": 0.2907693386077881, "step": 665, "valid_targets_mean": 4048.6, "valid_targets_min": 669 }, { "epoch": 1.2407407407407407, "grad_norm": 0.5827496169253467, "learning_rate": 3.928220232991633e-05, "loss": 0.3124, "loss_nan_ranks": 0, "loss_rank_avg": 0.29981476068496704, "step": 670, "valid_targets_mean": 3801.6, "valid_targets_min": 1767 }, { "epoch": 1.25, "grad_norm": 0.5462062382083914, "learning_rate": 3.925747890702363e-05, "loss": 0.3041, "loss_nan_ranks": 0, "loss_rank_avg": 0.2990304231643677, "step": 675, "valid_targets_mean": 4142.3, "valid_targets_min": 321 }, { "epoch": 1.2592592592592593, "grad_norm": 0.6206083513162505, "learning_rate": 3.9232344930596983e-05, "loss": 0.2986, "loss_nan_ranks": 0, "loss_rank_avg": 0.2708045244216919, "step": 680, "valid_targets_mean": 3426.8, "valid_targets_min": 2009 }, { "epoch": 1.2685185185185186, "grad_norm": 0.5745410057483962, "learning_rate": 3.9206800936472e-05, "loss": 0.2947, "loss_nan_ranks": 0, "loss_rank_avg": 0.2561905086040497, "step": 685, "valid_targets_mean": 3190.8, "valid_targets_min": 871 }, { "epoch": 1.2777777777777777, "grad_norm": 0.5537138179650384, "learning_rate": 3.9180847469225514e-05, "loss": 0.3227, "loss_nan_ranks": 0, "loss_rank_avg": 0.359304279088974, "step": 690, "valid_targets_mean": 4509.7, "valid_targets_min": 595 }, { "epoch": 1.287037037037037, "grad_norm": 0.5205868768170611, "learning_rate": 3.9154485082164e-05, "loss": 0.2835, "loss_nan_ranks": 0, "loss_rank_avg": 0.28854691982269287, "step": 695, "valid_targets_mean": 4476.4, "valid_targets_min": 421 }, { "epoch": 1.2962962962962963, "grad_norm": 0.5809148768460918, "learning_rate": 3.912771433731176e-05, "loss": 0.32, "loss_nan_ranks": 0, "loss_rank_avg": 0.33331120014190674, "step": 700, "valid_targets_mean": 4712.1, "valid_targets_min": 2303 }, { "epoch": 1.3055555555555556, "grad_norm": 0.6151605514518709, "learning_rate": 3.910053580539896e-05, "loss": 0.3347, "loss_nan_ranks": 0, "loss_rank_avg": 0.3627372980117798, "step": 705, "valid_targets_mean": 4119.8, "valid_targets_min": 698 }, { "epoch": 1.3148148148148149, "grad_norm": 0.5988192996988059, "learning_rate": 3.907295006584941e-05, "loss": 0.317, "loss_nan_ranks": 0, "loss_rank_avg": 0.34967589378356934, "step": 710, "valid_targets_mean": 4211.2, "valid_targets_min": 888 }, { "epoch": 1.324074074074074, "grad_norm": 0.5533738940567987, "learning_rate": 3.904495770676831e-05, "loss": 0.2942, "loss_nan_ranks": 0, "loss_rank_avg": 0.2595583200454712, "step": 715, "valid_targets_mean": 3652.5, "valid_targets_min": 772 }, { "epoch": 1.3333333333333333, "grad_norm": 0.677729485934007, "learning_rate": 3.9016559324929594e-05, "loss": 0.3195, "loss_nan_ranks": 0, "loss_rank_avg": 0.28384801745414734, "step": 720, "valid_targets_mean": 3426.2, "valid_targets_min": 2319 }, { "epoch": 1.3425925925925926, "grad_norm": 0.5229228302802816, "learning_rate": 3.8987755525763315e-05, "loss": 0.316, "loss_nan_ranks": 0, "loss_rank_avg": 0.3147448003292084, "step": 725, "valid_targets_mean": 4087.8, "valid_targets_min": 567 }, { "epoch": 1.3518518518518519, "grad_norm": 0.5839814638980791, "learning_rate": 3.895854692334264e-05, "loss": 0.3029, "loss_nan_ranks": 0, "loss_rank_avg": 0.25805115699768066, "step": 730, "valid_targets_mean": 4818.6, "valid_targets_min": 1703 }, { "epoch": 1.3611111111111112, "grad_norm": 0.567166024329898, "learning_rate": 3.892893414037084e-05, "loss": 0.3099, "loss_nan_ranks": 0, "loss_rank_avg": 0.2491036057472229, "step": 735, "valid_targets_mean": 3994.6, "valid_targets_min": 2004 }, { "epoch": 1.3703703703703702, "grad_norm": 0.5547431269439685, "learning_rate": 3.889891780816799e-05, "loss": 0.3056, "loss_nan_ranks": 0, "loss_rank_avg": 0.31328123807907104, "step": 740, "valid_targets_mean": 3948.7, "valid_targets_min": 628 }, { "epoch": 1.3796296296296298, "grad_norm": 0.5671900268720954, "learning_rate": 3.886849856665746e-05, "loss": 0.3047, "loss_nan_ranks": 0, "loss_rank_avg": 0.3374163508415222, "step": 745, "valid_targets_mean": 3923.8, "valid_targets_min": 892 }, { "epoch": 1.3888888888888888, "grad_norm": 0.53885923431837, "learning_rate": 3.8837677064352345e-05, "loss": 0.2992, "loss_nan_ranks": 0, "loss_rank_avg": 0.31161177158355713, "step": 750, "valid_targets_mean": 3956.8, "valid_targets_min": 1684 }, { "epoch": 1.3981481481481481, "grad_norm": 0.5062852118800623, "learning_rate": 3.8806453958341615e-05, "loss": 0.2667, "loss_nan_ranks": 0, "loss_rank_avg": 0.23768174648284912, "step": 755, "valid_targets_mean": 3670.1, "valid_targets_min": 840 }, { "epoch": 1.4074074074074074, "grad_norm": 0.5980684056816908, "learning_rate": 3.877482991427607e-05, "loss": 0.3279, "loss_nan_ranks": 0, "loss_rank_avg": 0.3120206594467163, "step": 760, "valid_targets_mean": 3676.0, "valid_targets_min": 1026 }, { "epoch": 1.4166666666666667, "grad_norm": 0.5263249865978269, "learning_rate": 3.874280560635418e-05, "loss": 0.2727, "loss_nan_ranks": 0, "loss_rank_avg": 0.24431112408638, "step": 765, "valid_targets_mean": 4316.4, "valid_targets_min": 1753 }, { "epoch": 1.425925925925926, "grad_norm": 0.5371674118999251, "learning_rate": 3.871038171730775e-05, "loss": 0.322, "loss_nan_ranks": 0, "loss_rank_avg": 0.3550418019294739, "step": 770, "valid_targets_mean": 5343.2, "valid_targets_min": 1034 }, { "epoch": 1.4351851851851851, "grad_norm": 0.5747486969945653, "learning_rate": 3.8677558938387276e-05, "loss": 0.2928, "loss_nan_ranks": 0, "loss_rank_avg": 0.2658607065677643, "step": 775, "valid_targets_mean": 3501.0, "valid_targets_min": 490 }, { "epoch": 1.4444444444444444, "grad_norm": 0.5593639327643428, "learning_rate": 3.864433796934728e-05, "loss": 0.317, "loss_nan_ranks": 0, "loss_rank_avg": 0.2963068187236786, "step": 780, "valid_targets_mean": 3459.8, "valid_targets_min": 1596 }, { "epoch": 1.4537037037037037, "grad_norm": 0.49539758416732715, "learning_rate": 3.861071951843137e-05, "loss": 0.2967, "loss_nan_ranks": 0, "loss_rank_avg": 0.3172762393951416, "step": 785, "valid_targets_mean": 4932.9, "valid_targets_min": 2031 }, { "epoch": 1.462962962962963, "grad_norm": 0.7233453222225057, "learning_rate": 3.8576704302357135e-05, "loss": 0.2987, "loss_nan_ranks": 0, "loss_rank_avg": 0.31079697608947754, "step": 790, "valid_targets_mean": 2923.9, "valid_targets_min": 368 }, { "epoch": 1.4722222222222223, "grad_norm": 0.5304815079906486, "learning_rate": 3.854229304630086e-05, "loss": 0.3058, "loss_nan_ranks": 0, "loss_rank_avg": 0.2519688010215759, "step": 795, "valid_targets_mean": 3894.1, "valid_targets_min": 1015 }, { "epoch": 1.4814814814814814, "grad_norm": 0.5912953728210274, "learning_rate": 3.8507486483882084e-05, "loss": 0.3187, "loss_nan_ranks": 0, "loss_rank_avg": 0.31321465969085693, "step": 800, "valid_targets_mean": 3528.2, "valid_targets_min": 641 }, { "epoch": 1.4907407407407407, "grad_norm": 0.676659645331013, "learning_rate": 3.8472285357147966e-05, "loss": 0.2899, "loss_nan_ranks": 0, "loss_rank_avg": 0.34123727679252625, "step": 805, "valid_targets_mean": 4592.4, "valid_targets_min": 697 }, { "epoch": 1.5, "grad_norm": 0.4442677722852739, "learning_rate": 3.843669041655741e-05, "loss": 0.3091, "loss_nan_ranks": 0, "loss_rank_avg": 0.30493301153182983, "step": 810, "valid_targets_mean": 5845.2, "valid_targets_min": 255 }, { "epoch": 1.5092592592592593, "grad_norm": 0.5682057669159459, "learning_rate": 3.840070242096514e-05, "loss": 0.3152, "loss_nan_ranks": 0, "loss_rank_avg": 0.3426523804664612, "step": 815, "valid_targets_mean": 4269.9, "valid_targets_min": 317 }, { "epoch": 1.5185185185185186, "grad_norm": 0.5718791042050542, "learning_rate": 3.8364322137605484e-05, "loss": 0.2951, "loss_nan_ranks": 0, "loss_rank_avg": 0.2769266366958618, "step": 820, "valid_targets_mean": 3698.8, "valid_targets_min": 665 }, { "epoch": 1.5277777777777777, "grad_norm": 0.5536227502501987, "learning_rate": 3.832755034207601e-05, "loss": 0.2956, "loss_nan_ranks": 0, "loss_rank_avg": 0.30558109283447266, "step": 825, "valid_targets_mean": 4033.0, "valid_targets_min": 585 }, { "epoch": 1.5370370370370372, "grad_norm": 0.5252844534173842, "learning_rate": 3.8290387818321e-05, "loss": 0.2996, "loss_nan_ranks": 0, "loss_rank_avg": 0.3152919411659241, "step": 830, "valid_targets_mean": 4217.6, "valid_targets_min": 1788 }, { "epoch": 1.5462962962962963, "grad_norm": 0.6143178973257938, "learning_rate": 3.825283535861476e-05, "loss": 0.3175, "loss_nan_ranks": 0, "loss_rank_avg": 0.2816426157951355, "step": 835, "valid_targets_mean": 5267.9, "valid_targets_min": 2440 }, { "epoch": 1.5555555555555556, "grad_norm": 0.5986189313818777, "learning_rate": 3.8214893763544684e-05, "loss": 0.3122, "loss_nan_ranks": 0, "loss_rank_avg": 0.27623170614242554, "step": 840, "valid_targets_mean": 3365.1, "valid_targets_min": 474 }, { "epoch": 1.5648148148148149, "grad_norm": 0.44155638321379925, "learning_rate": 3.817656384199422e-05, "loss": 0.2748, "loss_nan_ranks": 0, "loss_rank_avg": 0.2690165340900421, "step": 845, "valid_targets_mean": 5673.1, "valid_targets_min": 2925 }, { "epoch": 1.574074074074074, "grad_norm": 0.5395718733919299, "learning_rate": 3.813784641112563e-05, "loss": 0.3036, "loss_nan_ranks": 0, "loss_rank_avg": 0.29078513383865356, "step": 850, "valid_targets_mean": 5001.7, "valid_targets_min": 2123 }, { "epoch": 1.5833333333333335, "grad_norm": 0.5091833646708693, "learning_rate": 3.8098742296362506e-05, "loss": 0.3271, "loss_nan_ranks": 0, "loss_rank_avg": 0.3604724407196045, "step": 855, "valid_targets_mean": 5089.8, "valid_targets_min": 663 }, { "epoch": 1.5925925925925926, "grad_norm": 0.6087696649809414, "learning_rate": 3.805925233137229e-05, "loss": 0.3232, "loss_nan_ranks": 0, "loss_rank_avg": 0.3626618981361389, "step": 860, "valid_targets_mean": 3525.3, "valid_targets_min": 950 }, { "epoch": 1.6018518518518519, "grad_norm": 0.5643870271264605, "learning_rate": 3.801937735804838e-05, "loss": 0.3133, "loss_nan_ranks": 0, "loss_rank_avg": 0.28040584921836853, "step": 865, "valid_targets_mean": 3399.7, "valid_targets_min": 1903 }, { "epoch": 1.6111111111111112, "grad_norm": 0.5439012864299764, "learning_rate": 3.7979118226492266e-05, "loss": 0.2994, "loss_nan_ranks": 0, "loss_rank_avg": 0.29787707328796387, "step": 870, "valid_targets_mean": 3488.6, "valid_targets_min": 637 }, { "epoch": 1.6203703703703702, "grad_norm": 0.49165947496754964, "learning_rate": 3.793847579499534e-05, "loss": 0.2931, "loss_nan_ranks": 0, "loss_rank_avg": 0.34809091687202454, "step": 875, "valid_targets_mean": 5268.2, "valid_targets_min": 1908 }, { "epoch": 1.6296296296296298, "grad_norm": 0.5140250546410033, "learning_rate": 3.789745093002065e-05, "loss": 0.3238, "loss_nan_ranks": 0, "loss_rank_avg": 0.2461247444152832, "step": 880, "valid_targets_mean": 3590.8, "valid_targets_min": 541 }, { "epoch": 1.6388888888888888, "grad_norm": 0.5054561634272887, "learning_rate": 3.785604450618443e-05, "loss": 0.2708, "loss_nan_ranks": 0, "loss_rank_avg": 0.21256114542484283, "step": 885, "valid_targets_mean": 3803.2, "valid_targets_min": 397 }, { "epoch": 1.6481481481481481, "grad_norm": 0.5046967079101841, "learning_rate": 3.781425740623739e-05, "loss": 0.3127, "loss_nan_ranks": 0, "loss_rank_avg": 0.2555197477340698, "step": 890, "valid_targets_mean": 5367.1, "valid_targets_min": 2149 }, { "epoch": 1.6574074074074074, "grad_norm": 0.5031111391748204, "learning_rate": 3.777209052104598e-05, "loss": 0.309, "loss_nan_ranks": 0, "loss_rank_avg": 0.27394580841064453, "step": 895, "valid_targets_mean": 4603.8, "valid_targets_min": 1982 }, { "epoch": 1.6666666666666665, "grad_norm": 0.5200593833614563, "learning_rate": 3.7729544749573335e-05, "loss": 0.3088, "loss_nan_ranks": 0, "loss_rank_avg": 0.30838844180107117, "step": 900, "valid_targets_mean": 4181.4, "valid_targets_min": 739 }, { "epoch": 1.675925925925926, "grad_norm": 0.5108208774796558, "learning_rate": 3.768662099886014e-05, "loss": 0.3489, "loss_nan_ranks": 0, "loss_rank_avg": 0.35393011569976807, "step": 905, "valid_targets_mean": 5152.1, "valid_targets_min": 910 }, { "epoch": 1.6851851851851851, "grad_norm": 0.563904222236046, "learning_rate": 3.7643320184005284e-05, "loss": 0.2958, "loss_nan_ranks": 0, "loss_rank_avg": 0.2836383879184723, "step": 910, "valid_targets_mean": 3775.8, "valid_targets_min": 1926 }, { "epoch": 1.6944444444444444, "grad_norm": 0.6370969191435905, "learning_rate": 3.7599643228146355e-05, "loss": 0.319, "loss_nan_ranks": 0, "loss_rank_avg": 0.34479522705078125, "step": 915, "valid_targets_mean": 3473.2, "valid_targets_min": 479 }, { "epoch": 1.7037037037037037, "grad_norm": 0.4489327078730733, "learning_rate": 3.755559106243994e-05, "loss": 0.2783, "loss_nan_ranks": 0, "loss_rank_avg": 0.2346605360507965, "step": 920, "valid_targets_mean": 4684.8, "valid_targets_min": 1007 }, { "epoch": 1.7129629629629628, "grad_norm": 0.5284546797799259, "learning_rate": 3.7511164626041823e-05, "loss": 0.3133, "loss_nan_ranks": 0, "loss_rank_avg": 0.3440583646297455, "step": 925, "valid_targets_mean": 4763.8, "valid_targets_min": 2487 }, { "epoch": 1.7222222222222223, "grad_norm": 0.5116796474023507, "learning_rate": 3.746636486608689e-05, "loss": 0.2709, "loss_nan_ranks": 0, "loss_rank_avg": 0.31446945667266846, "step": 930, "valid_targets_mean": 4609.9, "valid_targets_min": 2063 }, { "epoch": 1.7314814814814814, "grad_norm": 0.5614968969786471, "learning_rate": 3.7421192737669005e-05, "loss": 0.3301, "loss_nan_ranks": 0, "loss_rank_avg": 0.32294750213623047, "step": 935, "valid_targets_mean": 3850.6, "valid_targets_min": 610 }, { "epoch": 1.7407407407407407, "grad_norm": 0.5657690526577215, "learning_rate": 3.737564920382061e-05, "loss": 0.2904, "loss_nan_ranks": 0, "loss_rank_avg": 0.31011059880256653, "step": 940, "valid_targets_mean": 3671.0, "valid_targets_min": 2036 }, { "epoch": 1.75, "grad_norm": 0.5783601802984916, "learning_rate": 3.732973523549221e-05, "loss": 0.3106, "loss_nan_ranks": 0, "loss_rank_avg": 0.29496774077415466, "step": 945, "valid_targets_mean": 3423.9, "valid_targets_min": 494 }, { "epoch": 1.7592592592592593, "grad_norm": 0.43290336789364203, "learning_rate": 3.728345181153165e-05, "loss": 0.2913, "loss_nan_ranks": 0, "loss_rank_avg": 0.2509721517562866, "step": 950, "valid_targets_mean": 4465.2, "valid_targets_min": 1717 }, { "epoch": 1.7685185185185186, "grad_norm": 0.4912129854091696, "learning_rate": 3.7236799918663284e-05, "loss": 0.3068, "loss_nan_ranks": 0, "loss_rank_avg": 0.28017228841781616, "step": 955, "valid_targets_mean": 4293.8, "valid_targets_min": 2343 }, { "epoch": 1.7777777777777777, "grad_norm": 0.6332365406111699, "learning_rate": 3.7189780551466905e-05, "loss": 0.2727, "loss_nan_ranks": 0, "loss_rank_avg": 0.26829952001571655, "step": 960, "valid_targets_mean": 2642.8, "valid_targets_min": 1700 }, { "epoch": 1.7870370370370372, "grad_norm": 0.5262914650173831, "learning_rate": 3.714239471235657e-05, "loss": 0.2959, "loss_nan_ranks": 0, "loss_rank_avg": 0.3247262239456177, "step": 965, "valid_targets_mean": 5274.5, "valid_targets_min": 802 }, { "epoch": 1.7962962962962963, "grad_norm": 0.55145501931712, "learning_rate": 3.7094643411559194e-05, "loss": 0.2746, "loss_nan_ranks": 0, "loss_rank_avg": 0.314953088760376, "step": 970, "valid_targets_mean": 3900.6, "valid_targets_min": 1945 }, { "epoch": 1.8055555555555556, "grad_norm": 0.5213565964214443, "learning_rate": 3.704652766709305e-05, "loss": 0.3047, "loss_nan_ranks": 0, "loss_rank_avg": 0.3098965287208557, "step": 975, "valid_targets_mean": 4299.4, "valid_targets_min": 1835 }, { "epoch": 1.8148148148148149, "grad_norm": 0.5142744749508426, "learning_rate": 3.699804850474603e-05, "loss": 0.2846, "loss_nan_ranks": 0, "loss_rank_avg": 0.28077518939971924, "step": 980, "valid_targets_mean": 4123.2, "valid_targets_min": 322 }, { "epoch": 1.824074074074074, "grad_norm": 0.6109944754762814, "learning_rate": 3.6949206958053825e-05, "loss": 0.3212, "loss_nan_ranks": 0, "loss_rank_avg": 0.29146653413772583, "step": 985, "valid_targets_mean": 3814.9, "valid_targets_min": 1464 }, { "epoch": 1.8333333333333335, "grad_norm": 0.6418924924446736, "learning_rate": 3.690000406827783e-05, "loss": 0.3001, "loss_nan_ranks": 0, "loss_rank_avg": 0.2858262360095978, "step": 990, "valid_targets_mean": 2490.6, "valid_targets_min": 559 }, { "epoch": 1.8425925925925926, "grad_norm": 0.5742545688665202, "learning_rate": 3.685044088438299e-05, "loss": 0.3013, "loss_nan_ranks": 0, "loss_rank_avg": 0.2759723961353302, "step": 995, "valid_targets_mean": 4008.8, "valid_targets_min": 1779 }, { "epoch": 1.8518518518518519, "grad_norm": 0.5254418262167269, "learning_rate": 3.680051846301543e-05, "loss": 0.2562, "loss_nan_ranks": 0, "loss_rank_avg": 0.2492840737104416, "step": 1000, "valid_targets_mean": 3808.9, "valid_targets_min": 1535 }, { "epoch": 1.8611111111111112, "grad_norm": 0.44300703016797877, "learning_rate": 3.675023786847991e-05, "loss": 0.2984, "loss_nan_ranks": 0, "loss_rank_avg": 0.27016231417655945, "step": 1005, "valid_targets_mean": 4913.9, "valid_targets_min": 2688 }, { "epoch": 1.8703703703703702, "grad_norm": 0.5313397881548159, "learning_rate": 3.6699600172717137e-05, "loss": 0.2956, "loss_nan_ranks": 0, "loss_rank_avg": 0.27664732933044434, "step": 1010, "valid_targets_mean": 3364.4, "valid_targets_min": 654 }, { "epoch": 1.8796296296296298, "grad_norm": 0.7092703491214453, "learning_rate": 3.6648606455280944e-05, "loss": 0.2968, "loss_nan_ranks": 0, "loss_rank_avg": 0.3045133352279663, "step": 1015, "valid_targets_mean": 3802.8, "valid_targets_min": 313 }, { "epoch": 1.8888888888888888, "grad_norm": 0.5263165468712829, "learning_rate": 3.659725780331524e-05, "loss": 0.3094, "loss_nan_ranks": 0, "loss_rank_avg": 0.3496822714805603, "step": 1020, "valid_targets_mean": 4270.3, "valid_targets_min": 2039 }, { "epoch": 1.8981481481481481, "grad_norm": 0.5704220397986243, "learning_rate": 3.654555531153084e-05, "loss": 0.3112, "loss_nan_ranks": 0, "loss_rank_avg": 0.25687843561172485, "step": 1025, "valid_targets_mean": 3186.1, "valid_targets_min": 310 }, { "epoch": 1.9074074074074074, "grad_norm": 0.4827843955838589, "learning_rate": 3.649350008218214e-05, "loss": 0.2874, "loss_nan_ranks": 0, "loss_rank_avg": 0.3169630169868469, "step": 1030, "valid_targets_mean": 4576.8, "valid_targets_min": 2306 }, { "epoch": 1.9166666666666665, "grad_norm": 0.49931307758205035, "learning_rate": 3.64410932250436e-05, "loss": 0.3212, "loss_nan_ranks": 0, "loss_rank_avg": 0.36018311977386475, "step": 1035, "valid_targets_mean": 4679.8, "valid_targets_min": 896 }, { "epoch": 1.925925925925926, "grad_norm": 0.5372589737787232, "learning_rate": 3.638833585738611e-05, "loss": 0.2966, "loss_nan_ranks": 0, "loss_rank_avg": 0.2810884714126587, "step": 1040, "valid_targets_mean": 3718.3, "valid_targets_min": 1706 }, { "epoch": 1.9351851851851851, "grad_norm": 0.5448044758731123, "learning_rate": 3.633522910395314e-05, "loss": 0.2798, "loss_nan_ranks": 0, "loss_rank_avg": 0.2674560546875, "step": 1045, "valid_targets_mean": 3725.8, "valid_targets_min": 299 }, { "epoch": 1.9444444444444444, "grad_norm": 0.49024688080681106, "learning_rate": 3.628177409693677e-05, "loss": 0.2917, "loss_nan_ranks": 0, "loss_rank_avg": 0.31284260749816895, "step": 1050, "valid_targets_mean": 4673.3, "valid_targets_min": 1880 }, { "epoch": 1.9537037037037037, "grad_norm": 0.4596588910916473, "learning_rate": 3.622797197595359e-05, "loss": 0.299, "loss_nan_ranks": 0, "loss_rank_avg": 0.25262099504470825, "step": 1055, "valid_targets_mean": 4529.9, "valid_targets_min": 943 }, { "epoch": 1.9629629629629628, "grad_norm": 0.556506113366241, "learning_rate": 3.6173823888020335e-05, "loss": 0.3142, "loss_nan_ranks": 0, "loss_rank_avg": 0.3350547254085541, "step": 1060, "valid_targets_mean": 3848.2, "valid_targets_min": 247 }, { "epoch": 1.9722222222222223, "grad_norm": 0.5988985101293695, "learning_rate": 3.611933098752949e-05, "loss": 0.2992, "loss_nan_ranks": 0, "loss_rank_avg": 0.2744201123714447, "step": 1065, "valid_targets_mean": 2717.9, "valid_targets_min": 661 }, { "epoch": 1.9814814814814814, "grad_norm": 0.5027204774148215, "learning_rate": 3.6064494436224655e-05, "loss": 0.299, "loss_nan_ranks": 0, "loss_rank_avg": 0.2839767336845398, "step": 1070, "valid_targets_mean": 4282.0, "valid_targets_min": 1624 }, { "epoch": 1.9907407407407407, "grad_norm": 0.5609086397412129, "learning_rate": 3.6009315403175786e-05, "loss": 0.2909, "loss_nan_ranks": 0, "loss_rank_avg": 0.3083677589893341, "step": 1075, "valid_targets_mean": 3896.8, "valid_targets_min": 821 }, { "epoch": 2.0, "grad_norm": 0.5064761754287327, "learning_rate": 3.595379506475426e-05, "loss": 0.3159, "loss_nan_ranks": 0, "loss_rank_avg": 0.30033770203590393, "step": 1080, "valid_targets_mean": 3941.4, "valid_targets_min": 446 }, { "epoch": 2.009259259259259, "grad_norm": 0.5512471121936553, "learning_rate": 3.5897934604607795e-05, "loss": 0.2752, "loss_nan_ranks": 0, "loss_rank_avg": 0.31047865748405457, "step": 1085, "valid_targets_mean": 4005.3, "valid_targets_min": 667 }, { "epoch": 2.0185185185185186, "grad_norm": 0.5160950707981314, "learning_rate": 3.584173521363525e-05, "loss": 0.2628, "loss_nan_ranks": 0, "loss_rank_avg": 0.23959210515022278, "step": 1090, "valid_targets_mean": 4089.5, "valid_targets_min": 393 }, { "epoch": 2.0277777777777777, "grad_norm": 0.5035622460268815, "learning_rate": 3.578519808996117e-05, "loss": 0.2622, "loss_nan_ranks": 0, "loss_rank_avg": 0.2824031114578247, "step": 1095, "valid_targets_mean": 4900.2, "valid_targets_min": 2631 }, { "epoch": 2.037037037037037, "grad_norm": 0.5944018512965265, "learning_rate": 3.572832443891033e-05, "loss": 0.2674, "loss_nan_ranks": 0, "loss_rank_avg": 0.2954099178314209, "step": 1100, "valid_targets_mean": 3287.0, "valid_targets_min": 828 }, { "epoch": 2.0462962962962963, "grad_norm": 0.56938852512268, "learning_rate": 3.567111547298194e-05, "loss": 0.2806, "loss_nan_ranks": 0, "loss_rank_avg": 0.3027660846710205, "step": 1105, "valid_targets_mean": 4170.7, "valid_targets_min": 1762 }, { "epoch": 2.0555555555555554, "grad_norm": 0.5871826431417995, "learning_rate": 3.561357241182388e-05, "loss": 0.2935, "loss_nan_ranks": 0, "loss_rank_avg": 0.3108934164047241, "step": 1110, "valid_targets_mean": 4158.6, "valid_targets_min": 814 }, { "epoch": 2.064814814814815, "grad_norm": 0.5385231227592268, "learning_rate": 3.555569648220666e-05, "loss": 0.2585, "loss_nan_ranks": 0, "loss_rank_avg": 0.2445867955684662, "step": 1115, "valid_targets_mean": 3935.2, "valid_targets_min": 772 }, { "epoch": 2.074074074074074, "grad_norm": 0.6485921558298788, "learning_rate": 3.549748891799726e-05, "loss": 0.2934, "loss_nan_ranks": 0, "loss_rank_avg": 0.25209206342697144, "step": 1120, "valid_targets_mean": 3367.2, "valid_targets_min": 496 }, { "epoch": 2.0833333333333335, "grad_norm": 0.5022623495987496, "learning_rate": 3.543895096013284e-05, "loss": 0.2828, "loss_nan_ranks": 0, "loss_rank_avg": 0.2579001188278198, "step": 1125, "valid_targets_mean": 4011.8, "valid_targets_min": 771 }, { "epoch": 2.0925925925925926, "grad_norm": 0.47266461256045833, "learning_rate": 3.538008385659427e-05, "loss": 0.3019, "loss_nan_ranks": 0, "loss_rank_avg": 0.30898475646972656, "step": 1130, "valid_targets_mean": 5338.7, "valid_targets_min": 466 }, { "epoch": 2.1018518518518516, "grad_norm": 0.474270085961415, "learning_rate": 3.532088886237956e-05, "loss": 0.2427, "loss_nan_ranks": 0, "loss_rank_avg": 0.24021559953689575, "step": 1135, "valid_targets_mean": 5083.8, "valid_targets_min": 1865 }, { "epoch": 2.111111111111111, "grad_norm": 0.4625964336857986, "learning_rate": 3.5261367239477055e-05, "loss": 0.2749, "loss_nan_ranks": 0, "loss_rank_avg": 0.22822529077529907, "step": 1140, "valid_targets_mean": 4619.4, "valid_targets_min": 2010 }, { "epoch": 2.1203703703703702, "grad_norm": 0.48948627688717905, "learning_rate": 3.520152025683856e-05, "loss": 0.2924, "loss_nan_ranks": 0, "loss_rank_avg": 0.3005768656730652, "step": 1145, "valid_targets_mean": 4932.8, "valid_targets_min": 2461 }, { "epoch": 2.1296296296296298, "grad_norm": 0.53742697060363, "learning_rate": 3.514134919035229e-05, "loss": 0.2515, "loss_nan_ranks": 0, "loss_rank_avg": 0.257113516330719, "step": 1150, "valid_targets_mean": 3710.9, "valid_targets_min": 1530 }, { "epoch": 2.138888888888889, "grad_norm": 0.5503512629168121, "learning_rate": 3.5080855322815635e-05, "loss": 0.2788, "loss_nan_ranks": 0, "loss_rank_avg": 0.27239710092544556, "step": 1155, "valid_targets_mean": 4531.2, "valid_targets_min": 845 }, { "epoch": 2.148148148148148, "grad_norm": 0.6889338655609255, "learning_rate": 3.5020039943907855e-05, "loss": 0.2938, "loss_nan_ranks": 0, "loss_rank_avg": 0.3204009532928467, "step": 1160, "valid_targets_mean": 3625.0, "valid_targets_min": 404 }, { "epoch": 2.1574074074074074, "grad_norm": 0.5579386971063519, "learning_rate": 3.495890435016258e-05, "loss": 0.2861, "loss_nan_ranks": 0, "loss_rank_avg": 0.31176429986953735, "step": 1165, "valid_targets_mean": 3857.6, "valid_targets_min": 1893 }, { "epoch": 2.1666666666666665, "grad_norm": 0.4892660944704105, "learning_rate": 3.489744984494012e-05, "loss": 0.3073, "loss_nan_ranks": 0, "loss_rank_avg": 0.2628619968891144, "step": 1170, "valid_targets_mean": 4434.8, "valid_targets_min": 863 }, { "epoch": 2.175925925925926, "grad_norm": 0.5387503793875826, "learning_rate": 3.4835677738399745e-05, "loss": 0.2899, "loss_nan_ranks": 0, "loss_rank_avg": 0.2822778820991516, "step": 1175, "valid_targets_mean": 4123.1, "valid_targets_min": 733 }, { "epoch": 2.185185185185185, "grad_norm": 0.45394010508846994, "learning_rate": 3.477358934747172e-05, "loss": 0.2703, "loss_nan_ranks": 0, "loss_rank_avg": 0.22972646355628967, "step": 1180, "valid_targets_mean": 4582.6, "valid_targets_min": 549 }, { "epoch": 2.1944444444444446, "grad_norm": 0.5572233307223254, "learning_rate": 3.47111859958292e-05, "loss": 0.2931, "loss_nan_ranks": 0, "loss_rank_avg": 0.3954271078109741, "step": 1185, "valid_targets_mean": 5509.8, "valid_targets_min": 1607 }, { "epoch": 2.2037037037037037, "grad_norm": 0.5791570229327255, "learning_rate": 3.464846901386008e-05, "loss": 0.2783, "loss_nan_ranks": 0, "loss_rank_avg": 0.25588738918304443, "step": 1190, "valid_targets_mean": 3254.6, "valid_targets_min": 1942 }, { "epoch": 2.212962962962963, "grad_norm": 0.5529120478362382, "learning_rate": 3.458543973863859e-05, "loss": 0.299, "loss_nan_ranks": 0, "loss_rank_avg": 0.31822943687438965, "step": 1195, "valid_targets_mean": 4516.6, "valid_targets_min": 1521 }, { "epoch": 2.2222222222222223, "grad_norm": 0.5903216004748315, "learning_rate": 3.452209951389677e-05, "loss": 0.2667, "loss_nan_ranks": 0, "loss_rank_avg": 0.27697065472602844, "step": 1200, "valid_targets_mean": 3290.7, "valid_targets_min": 767 }, { "epoch": 2.2314814814814814, "grad_norm": 0.6691604327279813, "learning_rate": 3.445844968999586e-05, "loss": 0.263, "loss_nan_ranks": 0, "loss_rank_avg": 0.27134162187576294, "step": 1205, "valid_targets_mean": 3421.6, "valid_targets_min": 554 }, { "epoch": 2.240740740740741, "grad_norm": 0.4881557721381399, "learning_rate": 3.4394491623897506e-05, "loss": 0.2725, "loss_nan_ranks": 0, "loss_rank_avg": 0.2903343737125397, "step": 1210, "valid_targets_mean": 4894.1, "valid_targets_min": 2191 }, { "epoch": 2.25, "grad_norm": 0.47094056474807733, "learning_rate": 3.4330226679134805e-05, "loss": 0.2653, "loss_nan_ranks": 0, "loss_rank_avg": 0.23896852135658264, "step": 1215, "valid_targets_mean": 4323.1, "valid_targets_min": 1904 }, { "epoch": 2.259259259259259, "grad_norm": 0.5276120851649831, "learning_rate": 3.426565622578327e-05, "loss": 0.2917, "loss_nan_ranks": 0, "loss_rank_avg": 0.2607136368751526, "step": 1220, "valid_targets_mean": 4749.9, "valid_targets_min": 1003 }, { "epoch": 2.2685185185185186, "grad_norm": 0.5849502666887231, "learning_rate": 3.420078164043161e-05, "loss": 0.2911, "loss_nan_ranks": 0, "loss_rank_avg": 0.3107423186302185, "step": 1225, "valid_targets_mean": 4094.4, "valid_targets_min": 708 }, { "epoch": 2.2777777777777777, "grad_norm": 0.5217159399236517, "learning_rate": 3.413560430615235e-05, "loss": 0.3058, "loss_nan_ranks": 0, "loss_rank_avg": 0.26841235160827637, "step": 1230, "valid_targets_mean": 4422.4, "valid_targets_min": 321 }, { "epoch": 2.287037037037037, "grad_norm": 0.6178611619364681, "learning_rate": 3.407012561247239e-05, "loss": 0.2897, "loss_nan_ranks": 0, "loss_rank_avg": 0.25904977321624756, "step": 1235, "valid_targets_mean": 2980.8, "valid_targets_min": 835 }, { "epoch": 2.2962962962962963, "grad_norm": 0.5079532786534702, "learning_rate": 3.400434695534337e-05, "loss": 0.2699, "loss_nan_ranks": 0, "loss_rank_avg": 0.2537451386451721, "step": 1240, "valid_targets_mean": 4113.8, "valid_targets_min": 735 }, { "epoch": 2.3055555555555554, "grad_norm": 0.532875433175585, "learning_rate": 3.393826973711189e-05, "loss": 0.3025, "loss_nan_ranks": 0, "loss_rank_avg": 0.2632405757904053, "step": 1245, "valid_targets_mean": 4773.4, "valid_targets_min": 513 }, { "epoch": 2.314814814814815, "grad_norm": 0.5803442874340493, "learning_rate": 3.3871895366489624e-05, "loss": 0.2906, "loss_nan_ranks": 0, "loss_rank_avg": 0.2636168301105499, "step": 1250, "valid_targets_mean": 3658.9, "valid_targets_min": 480 }, { "epoch": 2.324074074074074, "grad_norm": 0.5173410175649708, "learning_rate": 3.38052252585233e-05, "loss": 0.2525, "loss_nan_ranks": 0, "loss_rank_avg": 0.27228376269340515, "step": 1255, "valid_targets_mean": 4343.1, "valid_targets_min": 1467 }, { "epoch": 2.3333333333333335, "grad_norm": 0.514166475299944, "learning_rate": 3.373826083456451e-05, "loss": 0.299, "loss_nan_ranks": 0, "loss_rank_avg": 0.27561166882514954, "step": 1260, "valid_targets_mean": 4768.9, "valid_targets_min": 1369 }, { "epoch": 2.3425925925925926, "grad_norm": 0.5438892930868031, "learning_rate": 3.367100352223944e-05, "loss": 0.2903, "loss_nan_ranks": 0, "loss_rank_avg": 0.2975728511810303, "step": 1265, "valid_targets_mean": 4511.4, "valid_targets_min": 1311 }, { "epoch": 2.351851851851852, "grad_norm": 0.5714498435745807, "learning_rate": 3.360345475541839e-05, "loss": 0.2622, "loss_nan_ranks": 0, "loss_rank_avg": 0.28769245743751526, "step": 1270, "valid_targets_mean": 3497.2, "valid_targets_min": 317 }, { "epoch": 2.361111111111111, "grad_norm": 0.5680794410701885, "learning_rate": 3.353561597418524e-05, "loss": 0.3389, "loss_nan_ranks": 0, "loss_rank_avg": 0.3808847665786743, "step": 1275, "valid_targets_mean": 4961.8, "valid_targets_min": 887 }, { "epoch": 2.3703703703703702, "grad_norm": 0.5542270863328363, "learning_rate": 3.346748862480674e-05, "loss": 0.2691, "loss_nan_ranks": 0, "loss_rank_avg": 0.26409125328063965, "step": 1280, "valid_targets_mean": 3505.4, "valid_targets_min": 597 }, { "epoch": 2.3796296296296298, "grad_norm": 0.5979782495601111, "learning_rate": 3.339907415970168e-05, "loss": 0.2859, "loss_nan_ranks": 0, "loss_rank_avg": 0.22898399829864502, "step": 1285, "valid_targets_mean": 2800.9, "valid_targets_min": 541 }, { "epoch": 2.388888888888889, "grad_norm": 0.5281486790881847, "learning_rate": 3.333037403740989e-05, "loss": 0.2677, "loss_nan_ranks": 0, "loss_rank_avg": 0.23821385204792023, "step": 1290, "valid_targets_mean": 3827.2, "valid_targets_min": 736 }, { "epoch": 2.398148148148148, "grad_norm": 0.5811780027704674, "learning_rate": 3.326138972256121e-05, "loss": 0.2568, "loss_nan_ranks": 0, "loss_rank_avg": 0.27474355697631836, "step": 1295, "valid_targets_mean": 3347.3, "valid_targets_min": 647 }, { "epoch": 2.4074074074074074, "grad_norm": 0.5062441112163106, "learning_rate": 3.3192122685844214e-05, "loss": 0.2594, "loss_nan_ranks": 0, "loss_rank_avg": 0.2150622010231018, "step": 1300, "valid_targets_mean": 3436.5, "valid_targets_min": 596 }, { "epoch": 2.4166666666666665, "grad_norm": 0.5298315558070248, "learning_rate": 3.312257440397488e-05, "loss": 0.2515, "loss_nan_ranks": 0, "loss_rank_avg": 0.27570849657058716, "step": 1305, "valid_targets_mean": 4040.8, "valid_targets_min": 474 }, { "epoch": 2.425925925925926, "grad_norm": 0.5014670497202479, "learning_rate": 3.305274635966509e-05, "loss": 0.2686, "loss_nan_ranks": 0, "loss_rank_avg": 0.2778550386428833, "step": 1310, "valid_targets_mean": 4388.4, "valid_targets_min": 1561 }, { "epoch": 2.435185185185185, "grad_norm": 0.4941899546980937, "learning_rate": 3.298264004159104e-05, "loss": 0.2669, "loss_nan_ranks": 0, "loss_rank_avg": 0.2897128760814667, "step": 1315, "valid_targets_mean": 4620.4, "valid_targets_min": 764 }, { "epoch": 2.4444444444444446, "grad_norm": 0.5845810037932724, "learning_rate": 3.2912256944361484e-05, "loss": 0.2502, "loss_nan_ranks": 0, "loss_rank_avg": 0.2573606073856354, "step": 1320, "valid_targets_mean": 3208.6, "valid_targets_min": 762 }, { "epoch": 2.4537037037037037, "grad_norm": 0.5433250444755526, "learning_rate": 3.284159856848589e-05, "loss": 0.253, "loss_nan_ranks": 0, "loss_rank_avg": 0.28917592763900757, "step": 1325, "valid_targets_mean": 4059.9, "valid_targets_min": 869 }, { "epoch": 2.462962962962963, "grad_norm": 0.5684951550508175, "learning_rate": 3.2770666420342426e-05, "loss": 0.2975, "loss_nan_ranks": 0, "loss_rank_avg": 0.33171623945236206, "step": 1330, "valid_targets_mean": 3700.9, "valid_targets_min": 908 }, { "epoch": 2.4722222222222223, "grad_norm": 0.46312664746277976, "learning_rate": 3.269946201214586e-05, "loss": 0.2816, "loss_nan_ranks": 0, "loss_rank_avg": 0.275724321603775, "step": 1335, "valid_targets_mean": 4910.4, "valid_targets_min": 566 }, { "epoch": 2.4814814814814814, "grad_norm": 0.4467189932583639, "learning_rate": 3.262798686191533e-05, "loss": 0.291, "loss_nan_ranks": 0, "loss_rank_avg": 0.24208518862724304, "step": 1340, "valid_targets_mean": 4536.0, "valid_targets_min": 2578 }, { "epoch": 2.490740740740741, "grad_norm": 0.4991364949244441, "learning_rate": 3.255624249344198e-05, "loss": 0.2742, "loss_nan_ranks": 0, "loss_rank_avg": 0.2625071406364441, "step": 1345, "valid_targets_mean": 3629.2, "valid_targets_min": 628 }, { "epoch": 2.5, "grad_norm": 0.5133071061700047, "learning_rate": 3.248423043625642e-05, "loss": 0.2688, "loss_nan_ranks": 0, "loss_rank_avg": 0.21916022896766663, "step": 1350, "valid_targets_mean": 3302.8, "valid_targets_min": 894 }, { "epoch": 2.5092592592592595, "grad_norm": 0.49266442205021715, "learning_rate": 3.241195222559621e-05, "loss": 0.2877, "loss_nan_ranks": 0, "loss_rank_avg": 0.27607983350753784, "step": 1355, "valid_targets_mean": 4676.6, "valid_targets_min": 1984 }, { "epoch": 2.5185185185185186, "grad_norm": 0.6009290708819502, "learning_rate": 3.2339409402373056e-05, "loss": 0.3102, "loss_nan_ranks": 0, "loss_rank_avg": 0.383029580116272, "step": 1360, "valid_targets_mean": 3892.7, "valid_targets_min": 572 }, { "epoch": 2.5277777777777777, "grad_norm": 0.5505025180156696, "learning_rate": 3.2266603513139995e-05, "loss": 0.2757, "loss_nan_ranks": 0, "loss_rank_avg": 0.2777920961380005, "step": 1365, "valid_targets_mean": 3982.9, "valid_targets_min": 2418 }, { "epoch": 2.537037037037037, "grad_norm": 0.5111646149007892, "learning_rate": 3.2193536110058414e-05, "loss": 0.2661, "loss_nan_ranks": 0, "loss_rank_avg": 0.28832823038101196, "step": 1370, "valid_targets_mean": 4848.4, "valid_targets_min": 2127 }, { "epoch": 2.5462962962962963, "grad_norm": 0.5040882202849286, "learning_rate": 3.212020875086495e-05, "loss": 0.288, "loss_nan_ranks": 0, "loss_rank_avg": 0.31644129753112793, "step": 1375, "valid_targets_mean": 4976.4, "valid_targets_min": 1046 }, { "epoch": 2.5555555555555554, "grad_norm": 0.5313850282519446, "learning_rate": 3.20466229988383e-05, "loss": 0.2742, "loss_nan_ranks": 0, "loss_rank_avg": 0.2689250111579895, "step": 1380, "valid_targets_mean": 3821.2, "valid_targets_min": 435 }, { "epoch": 2.564814814814815, "grad_norm": 0.5402024194211922, "learning_rate": 3.197278042276587e-05, "loss": 0.2743, "loss_nan_ranks": 0, "loss_rank_avg": 0.2658718228340149, "step": 1385, "valid_targets_mean": 3890.8, "valid_targets_min": 542 }, { "epoch": 2.574074074074074, "grad_norm": 0.4747076430784914, "learning_rate": 3.189868259691036e-05, "loss": 0.258, "loss_nan_ranks": 0, "loss_rank_avg": 0.23981155455112457, "step": 1390, "valid_targets_mean": 4119.1, "valid_targets_min": 654 }, { "epoch": 2.5833333333333335, "grad_norm": 0.5999323348223499, "learning_rate": 3.182433110097618e-05, "loss": 0.2896, "loss_nan_ranks": 0, "loss_rank_avg": 0.2491762787103653, "step": 1395, "valid_targets_mean": 3585.1, "valid_targets_min": 1770 }, { "epoch": 2.5925925925925926, "grad_norm": 0.46430831925350374, "learning_rate": 3.174972752007577e-05, "loss": 0.2702, "loss_nan_ranks": 0, "loss_rank_avg": 0.25592419505119324, "step": 1400, "valid_targets_mean": 4477.0, "valid_targets_min": 1644 }, { "epoch": 2.601851851851852, "grad_norm": 0.4807452208290434, "learning_rate": 3.1674873444695804e-05, "loss": 0.25, "loss_nan_ranks": 0, "loss_rank_avg": 0.18888458609580994, "step": 1405, "valid_targets_mean": 3690.0, "valid_targets_min": 381 }, { "epoch": 2.611111111111111, "grad_norm": 0.5482835702850894, "learning_rate": 3.15997704706633e-05, "loss": 0.2789, "loss_nan_ranks": 0, "loss_rank_avg": 0.2206830382347107, "step": 1410, "valid_targets_mean": 3195.4, "valid_targets_min": 1566 }, { "epoch": 2.6203703703703702, "grad_norm": 0.4939467467540788, "learning_rate": 3.152442019911161e-05, "loss": 0.2773, "loss_nan_ranks": 0, "loss_rank_avg": 0.2876463532447815, "step": 1415, "valid_targets_mean": 4438.0, "valid_targets_min": 2557 }, { "epoch": 2.6296296296296298, "grad_norm": 0.563321666654457, "learning_rate": 3.144882423644623e-05, "loss": 0.2805, "loss_nan_ranks": 0, "loss_rank_avg": 0.2687886357307434, "step": 1420, "valid_targets_mean": 3560.2, "valid_targets_min": 506 }, { "epoch": 2.638888888888889, "grad_norm": 0.4771594676809964, "learning_rate": 3.1372984194310614e-05, "loss": 0.283, "loss_nan_ranks": 0, "loss_rank_avg": 0.2669658064842224, "step": 1425, "valid_targets_mean": 4496.8, "valid_targets_min": 1034 }, { "epoch": 2.648148148148148, "grad_norm": 0.5701553331661079, "learning_rate": 3.1296901689551766e-05, "loss": 0.2669, "loss_nan_ranks": 0, "loss_rank_avg": 0.2383461445569992, "step": 1430, "valid_targets_mean": 3213.2, "valid_targets_min": 479 }, { "epoch": 2.6574074074074074, "grad_norm": 0.4996052044828777, "learning_rate": 3.122057834418582e-05, "loss": 0.2585, "loss_nan_ranks": 0, "loss_rank_avg": 0.22803762555122375, "step": 1435, "valid_targets_mean": 4339.8, "valid_targets_min": 1876 }, { "epoch": 2.6666666666666665, "grad_norm": 0.4762272371218332, "learning_rate": 3.1144015785363405e-05, "loss": 0.2534, "loss_nan_ranks": 0, "loss_rank_avg": 0.25022315979003906, "step": 1440, "valid_targets_mean": 4088.6, "valid_targets_min": 1840 }, { "epoch": 2.675925925925926, "grad_norm": 0.5320018695023948, "learning_rate": 3.1067215645335e-05, "loss": 0.2788, "loss_nan_ranks": 0, "loss_rank_avg": 0.2924705445766449, "step": 1445, "valid_targets_mean": 3703.2, "valid_targets_min": 1026 }, { "epoch": 2.685185185185185, "grad_norm": 0.48946462823360576, "learning_rate": 3.0990179561416124e-05, "loss": 0.2475, "loss_nan_ranks": 0, "loss_rank_avg": 0.24994626641273499, "step": 1450, "valid_targets_mean": 4758.8, "valid_targets_min": 2745 }, { "epoch": 2.6944444444444446, "grad_norm": 0.5536914814420201, "learning_rate": 3.0912909175952404e-05, "loss": 0.2802, "loss_nan_ranks": 0, "loss_rank_avg": 0.28184986114501953, "step": 1455, "valid_targets_mean": 4326.1, "valid_targets_min": 2025 }, { "epoch": 2.7037037037037037, "grad_norm": 0.5678388519721984, "learning_rate": 3.08354061362846e-05, "loss": 0.2707, "loss_nan_ranks": 0, "loss_rank_avg": 0.2809412181377411, "step": 1460, "valid_targets_mean": 3676.1, "valid_targets_min": 2257 }, { "epoch": 2.712962962962963, "grad_norm": 0.5566129052217905, "learning_rate": 3.075767209471345e-05, "loss": 0.2774, "loss_nan_ranks": 0, "loss_rank_avg": 0.2666032314300537, "step": 1465, "valid_targets_mean": 3426.1, "valid_targets_min": 1705 }, { "epoch": 2.7222222222222223, "grad_norm": 0.6101327463343764, "learning_rate": 3.06797087084645e-05, "loss": 0.3039, "loss_nan_ranks": 0, "loss_rank_avg": 0.28413182497024536, "step": 1470, "valid_targets_mean": 3345.7, "valid_targets_min": 231 }, { "epoch": 2.7314814814814814, "grad_norm": 0.4725939229507694, "learning_rate": 3.060151763965267e-05, "loss": 0.2732, "loss_nan_ranks": 0, "loss_rank_avg": 0.21915774047374725, "step": 1475, "valid_targets_mean": 3618.2, "valid_targets_min": 755 }, { "epoch": 2.7407407407407405, "grad_norm": 0.4395930184494261, "learning_rate": 3.052310055524696e-05, "loss": 0.2566, "loss_nan_ranks": 0, "loss_rank_avg": 0.2695322036743164, "step": 1480, "valid_targets_mean": 4727.2, "valid_targets_min": 1598 }, { "epoch": 2.75, "grad_norm": 0.4706905685407168, "learning_rate": 3.044445912703477e-05, "loss": 0.2838, "loss_nan_ranks": 0, "loss_rank_avg": 0.32568082213401794, "step": 1485, "valid_targets_mean": 5001.6, "valid_targets_min": 665 }, { "epoch": 2.7592592592592595, "grad_norm": 0.4969657932430498, "learning_rate": 3.036559503158637e-05, "loss": 0.2783, "loss_nan_ranks": 0, "loss_rank_avg": 0.23848196864128113, "step": 1490, "valid_targets_mean": 3659.8, "valid_targets_min": 1529 }, { "epoch": 2.7685185185185186, "grad_norm": 0.6292698817704557, "learning_rate": 3.0286509950219077e-05, "loss": 0.2523, "loss_nan_ranks": 0, "loss_rank_avg": 0.26195579767227173, "step": 1495, "valid_targets_mean": 3502.5, "valid_targets_min": 923 }, { "epoch": 2.7777777777777777, "grad_norm": 0.5618365463665778, "learning_rate": 3.020720556896147e-05, "loss": 0.293, "loss_nan_ranks": 0, "loss_rank_avg": 0.3372756242752075, "step": 1500, "valid_targets_mean": 4311.9, "valid_targets_min": 812 }, { "epoch": 2.787037037037037, "grad_norm": 1.0844578159877276, "learning_rate": 3.0127683578517418e-05, "loss": 0.2599, "loss_nan_ranks": 0, "loss_rank_avg": 0.2966684401035309, "step": 1505, "valid_targets_mean": 6157.9, "valid_targets_min": 871 }, { "epoch": 2.7962962962962963, "grad_norm": 0.5031399470373651, "learning_rate": 3.004794567423002e-05, "loss": 0.2444, "loss_nan_ranks": 0, "loss_rank_avg": 0.2614865303039551, "step": 1510, "valid_targets_mean": 4778.2, "valid_targets_min": 584 }, { "epoch": 2.8055555555555554, "grad_norm": 0.5041806681514034, "learning_rate": 2.9967993556045504e-05, "loss": 0.2303, "loss_nan_ranks": 0, "loss_rank_avg": 0.21226972341537476, "step": 1515, "valid_targets_mean": 5573.9, "valid_targets_min": 845 }, { "epoch": 2.814814814814815, "grad_norm": 0.4997735272169362, "learning_rate": 2.988782892847694e-05, "loss": 0.2442, "loss_nan_ranks": 0, "loss_rank_avg": 0.24511948227882385, "step": 1520, "valid_targets_mean": 4693.2, "valid_targets_min": 886 }, { "epoch": 2.824074074074074, "grad_norm": 0.5995647736513411, "learning_rate": 2.9807453500567937e-05, "loss": 0.2597, "loss_nan_ranks": 0, "loss_rank_avg": 0.3068762421607971, "step": 1525, "valid_targets_mean": 3450.6, "valid_targets_min": 930 }, { "epoch": 2.8333333333333335, "grad_norm": 0.6054440225826818, "learning_rate": 2.9726868985856186e-05, "loss": 0.2232, "loss_nan_ranks": 0, "loss_rank_avg": 0.21026404201984406, "step": 1530, "valid_targets_mean": 3599.7, "valid_targets_min": 1569 }, { "epoch": 2.8425925925925926, "grad_norm": 0.5741889496658287, "learning_rate": 2.9646077102336933e-05, "loss": 0.2634, "loss_nan_ranks": 0, "loss_rank_avg": 0.2920532524585724, "step": 1535, "valid_targets_mean": 3774.1, "valid_targets_min": 317 }, { "epoch": 2.851851851851852, "grad_norm": 0.6588428000456348, "learning_rate": 2.956507957242637e-05, "loss": 0.2457, "loss_nan_ranks": 0, "loss_rank_avg": 0.2671510875225067, "step": 1540, "valid_targets_mean": 3503.0, "valid_targets_min": 526 }, { "epoch": 2.861111111111111, "grad_norm": 0.4491076760542966, "learning_rate": 2.9483878122924874e-05, "loss": 0.2372, "loss_nan_ranks": 0, "loss_rank_avg": 0.23452128469944, "step": 1545, "valid_targets_mean": 5171.2, "valid_targets_min": 1926 }, { "epoch": 2.8703703703703702, "grad_norm": 0.5018537144302465, "learning_rate": 2.940247448498025e-05, "loss": 0.2579, "loss_nan_ranks": 0, "loss_rank_avg": 0.25421342253685, "step": 1550, "valid_targets_mean": 4786.2, "valid_targets_min": 629 }, { "epoch": 2.8796296296296298, "grad_norm": 0.5716877920658499, "learning_rate": 2.9320870394050783e-05, "loss": 0.2639, "loss_nan_ranks": 0, "loss_rank_avg": 0.2664939761161804, "step": 1555, "valid_targets_mean": 3561.0, "valid_targets_min": 949 }, { "epoch": 2.888888888888889, "grad_norm": 0.481235284252179, "learning_rate": 2.9239067589868228e-05, "loss": 0.2721, "loss_nan_ranks": 0, "loss_rank_avg": 0.2406879961490631, "step": 1560, "valid_targets_mean": 4930.3, "valid_targets_min": 491 }, { "epoch": 2.898148148148148, "grad_norm": 0.5819300944807342, "learning_rate": 2.9157067816400765e-05, "loss": 0.2586, "loss_nan_ranks": 0, "loss_rank_avg": 0.27452024817466736, "step": 1565, "valid_targets_mean": 3573.7, "valid_targets_min": 2118 }, { "epoch": 2.9074074074074074, "grad_norm": 0.5048566931560609, "learning_rate": 2.90748728218158e-05, "loss": 0.2327, "loss_nan_ranks": 0, "loss_rank_avg": 0.25580722093582153, "step": 1570, "valid_targets_mean": 4897.8, "valid_targets_min": 970 }, { "epoch": 2.9166666666666665, "grad_norm": 0.49150215780244894, "learning_rate": 2.8992484358442673e-05, "loss": 0.2562, "loss_nan_ranks": 0, "loss_rank_avg": 0.2693978548049927, "step": 1575, "valid_targets_mean": 4487.1, "valid_targets_min": 1709 }, { "epoch": 2.925925925925926, "grad_norm": 0.5708812131347851, "learning_rate": 2.8909904182735337e-05, "loss": 0.2655, "loss_nan_ranks": 0, "loss_rank_avg": 0.2671920955181122, "step": 1580, "valid_targets_mean": 3580.7, "valid_targets_min": 894 }, { "epoch": 2.935185185185185, "grad_norm": 0.5601556537304803, "learning_rate": 2.8827134055234883e-05, "loss": 0.2579, "loss_nan_ranks": 0, "loss_rank_avg": 0.26261967420578003, "step": 1585, "valid_targets_mean": 3402.6, "valid_targets_min": 698 }, { "epoch": 2.9444444444444446, "grad_norm": 0.5319230530321215, "learning_rate": 2.874417574053202e-05, "loss": 0.2536, "loss_nan_ranks": 0, "loss_rank_avg": 0.3018246591091156, "step": 1590, "valid_targets_mean": 4457.2, "valid_targets_min": 194 }, { "epoch": 2.9537037037037037, "grad_norm": 0.6399002771658464, "learning_rate": 2.8661031007229443e-05, "loss": 0.2392, "loss_nan_ranks": 0, "loss_rank_avg": 0.27771127223968506, "step": 1595, "valid_targets_mean": 3044.6, "valid_targets_min": 736 }, { "epoch": 2.962962962962963, "grad_norm": 0.5987506953116827, "learning_rate": 2.857770162790416e-05, "loss": 0.2548, "loss_nan_ranks": 0, "loss_rank_avg": 0.2965382933616638, "step": 1600, "valid_targets_mean": 3597.6, "valid_targets_min": 2209 }, { "epoch": 2.9722222222222223, "grad_norm": 0.6380121418721786, "learning_rate": 2.8494189379069662e-05, "loss": 0.2507, "loss_nan_ranks": 0, "loss_rank_avg": 0.2618336081504822, "step": 1605, "valid_targets_mean": 2886.4, "valid_targets_min": 275 }, { "epoch": 2.9814814814814814, "grad_norm": 0.5586710883725614, "learning_rate": 2.8410496041138067e-05, "loss": 0.2811, "loss_nan_ranks": 0, "loss_rank_avg": 0.2990414500236511, "step": 1610, "valid_targets_mean": 4474.1, "valid_targets_min": 1026 }, { "epoch": 2.9907407407407405, "grad_norm": 0.46644132738739846, "learning_rate": 2.8326623398382174e-05, "loss": 0.2564, "loss_nan_ranks": 0, "loss_rank_avg": 0.24458184838294983, "step": 1615, "valid_targets_mean": 4791.1, "valid_targets_min": 1838 }, { "epoch": 3.0, "grad_norm": 0.4761359461180495, "learning_rate": 2.8242573238897395e-05, "loss": 0.2636, "loss_nan_ranks": 0, "loss_rank_avg": 0.24625471234321594, "step": 1620, "valid_targets_mean": 4855.9, "valid_targets_min": 778 }, { "epoch": 3.009259259259259, "grad_norm": 0.5607956275569781, "learning_rate": 2.815834735456367e-05, "loss": 0.2667, "loss_nan_ranks": 0, "loss_rank_avg": 0.2913566827774048, "step": 1625, "valid_targets_mean": 3792.2, "valid_targets_min": 1421 }, { "epoch": 3.0185185185185186, "grad_norm": 0.5200885636493907, "learning_rate": 2.8073947541007246e-05, "loss": 0.2864, "loss_nan_ranks": 0, "loss_rank_avg": 0.2609402537345886, "step": 1630, "valid_targets_mean": 4228.2, "valid_targets_min": 1388 }, { "epoch": 3.0277777777777777, "grad_norm": 0.5464229397916962, "learning_rate": 2.7989375597562386e-05, "loss": 0.253, "loss_nan_ranks": 0, "loss_rank_avg": 0.287320077419281, "step": 1635, "valid_targets_mean": 3905.6, "valid_targets_min": 1626 }, { "epoch": 3.037037037037037, "grad_norm": 0.48953693297086637, "learning_rate": 2.7904633327233016e-05, "loss": 0.2464, "loss_nan_ranks": 0, "loss_rank_avg": 0.23558908700942993, "step": 1640, "valid_targets_mean": 4453.9, "valid_targets_min": 1973 }, { "epoch": 3.0462962962962963, "grad_norm": 0.5636981651244154, "learning_rate": 2.781972253665431e-05, "loss": 0.2445, "loss_nan_ranks": 0, "loss_rank_avg": 0.22617077827453613, "step": 1645, "valid_targets_mean": 4146.0, "valid_targets_min": 1473 }, { "epoch": 3.0555555555555554, "grad_norm": 0.5346016865896094, "learning_rate": 2.773464503605414e-05, "loss": 0.2623, "loss_nan_ranks": 0, "loss_rank_avg": 0.20911234617233276, "step": 1650, "valid_targets_mean": 3475.5, "valid_targets_min": 1771 }, { "epoch": 3.064814814814815, "grad_norm": 0.537999050036353, "learning_rate": 2.764940263921451e-05, "loss": 0.2687, "loss_nan_ranks": 0, "loss_rank_avg": 0.3136928081512451, "step": 1655, "valid_targets_mean": 4361.2, "valid_targets_min": 724 }, { "epoch": 3.074074074074074, "grad_norm": 0.5332117915430361, "learning_rate": 2.7563997163432853e-05, "loss": 0.2621, "loss_nan_ranks": 0, "loss_rank_avg": 0.27018648386001587, "step": 1660, "valid_targets_mean": 4647.1, "valid_targets_min": 1878 }, { "epoch": 3.0833333333333335, "grad_norm": 0.5252502798205276, "learning_rate": 2.7478430429483336e-05, "loss": 0.2662, "loss_nan_ranks": 0, "loss_rank_avg": 0.2500057518482208, "step": 1665, "valid_targets_mean": 4244.9, "valid_targets_min": 2221 }, { "epoch": 3.0925925925925926, "grad_norm": 0.9080479015306887, "learning_rate": 2.7392704261578e-05, "loss": 0.2565, "loss_nan_ranks": 0, "loss_rank_avg": 0.24066869914531708, "step": 1670, "valid_targets_mean": 4786.5, "valid_targets_min": 1795 }, { "epoch": 3.1018518518518516, "grad_norm": 0.5710663215003279, "learning_rate": 2.7306820487327906e-05, "loss": 0.2847, "loss_nan_ranks": 0, "loss_rank_avg": 0.36352789402008057, "step": 1675, "valid_targets_mean": 4979.2, "valid_targets_min": 561 }, { "epoch": 3.111111111111111, "grad_norm": 0.5397905809288394, "learning_rate": 2.7220780937704118e-05, "loss": 0.2614, "loss_nan_ranks": 0, "loss_rank_avg": 0.3355807065963745, "step": 1680, "valid_targets_mean": 4687.1, "valid_targets_min": 582 }, { "epoch": 3.1203703703703702, "grad_norm": 0.5719381517621509, "learning_rate": 2.713458744699873e-05, "loss": 0.2488, "loss_nan_ranks": 0, "loss_rank_avg": 0.27028560638427734, "step": 1685, "valid_targets_mean": 3927.9, "valid_targets_min": 735 }, { "epoch": 3.1296296296296298, "grad_norm": 0.5488071017786058, "learning_rate": 2.704824185278573e-05, "loss": 0.2747, "loss_nan_ranks": 0, "loss_rank_avg": 0.2780161201953888, "step": 1690, "valid_targets_mean": 4392.5, "valid_targets_min": 275 }, { "epoch": 3.138888888888889, "grad_norm": 0.560933973633571, "learning_rate": 2.6961745995881813e-05, "loss": 0.2738, "loss_nan_ranks": 0, "loss_rank_avg": 0.2935350835323334, "step": 1695, "valid_targets_mean": 3781.2, "valid_targets_min": 812 }, { "epoch": 3.148148148148148, "grad_norm": 0.5153173604123659, "learning_rate": 2.6875101720307168e-05, "loss": 0.2458, "loss_nan_ranks": 0, "loss_rank_avg": 0.22335903346538544, "step": 1700, "valid_targets_mean": 3719.2, "valid_targets_min": 322 }, { "epoch": 3.1574074074074074, "grad_norm": 0.529200809615876, "learning_rate": 2.6788310873246133e-05, "loss": 0.2554, "loss_nan_ranks": 0, "loss_rank_avg": 0.321768194437027, "step": 1705, "valid_targets_mean": 4739.2, "valid_targets_min": 1880 }, { "epoch": 3.1666666666666665, "grad_norm": 0.5929542752441085, "learning_rate": 2.670137530500783e-05, "loss": 0.2336, "loss_nan_ranks": 0, "loss_rank_avg": 0.23372915387153625, "step": 1710, "valid_targets_mean": 3828.8, "valid_targets_min": 838 }, { "epoch": 3.175925925925926, "grad_norm": 0.5904824673475364, "learning_rate": 2.661429686898673e-05, "loss": 0.2494, "loss_nan_ranks": 0, "loss_rank_avg": 0.25988543033599854, "step": 1715, "valid_targets_mean": 3370.4, "valid_targets_min": 906 }, { "epoch": 3.185185185185185, "grad_norm": 0.5654133383351743, "learning_rate": 2.6527077421623117e-05, "loss": 0.2585, "loss_nan_ranks": 0, "loss_rank_avg": 0.24876439571380615, "step": 1720, "valid_targets_mean": 3427.9, "valid_targets_min": 545 }, { "epoch": 3.1944444444444446, "grad_norm": 0.6120405362758927, "learning_rate": 2.6439718822363515e-05, "loss": 0.25, "loss_nan_ranks": 0, "loss_rank_avg": 0.23591837286949158, "step": 1725, "valid_targets_mean": 3958.1, "valid_targets_min": 1876 }, { "epoch": 3.2037037037037037, "grad_norm": 0.5650864325109225, "learning_rate": 2.6352222933621065e-05, "loss": 0.2556, "loss_nan_ranks": 0, "loss_rank_avg": 0.2792157828807831, "step": 1730, "valid_targets_mean": 4003.8, "valid_targets_min": 845 }, { "epoch": 3.212962962962963, "grad_norm": 0.5560665906528047, "learning_rate": 2.62645916207358e-05, "loss": 0.2376, "loss_nan_ranks": 0, "loss_rank_avg": 0.22258630394935608, "step": 1735, "valid_targets_mean": 3628.8, "valid_targets_min": 2004 }, { "epoch": 3.2222222222222223, "grad_norm": 0.47392152791291015, "learning_rate": 2.6176826751934882e-05, "loss": 0.2392, "loss_nan_ranks": 0, "loss_rank_avg": 0.23260678350925446, "step": 1740, "valid_targets_mean": 4254.6, "valid_targets_min": 2036 }, { "epoch": 3.2314814814814814, "grad_norm": 0.547370940890995, "learning_rate": 2.6088930198292773e-05, "loss": 0.2568, "loss_nan_ranks": 0, "loss_rank_avg": 0.3267350196838379, "step": 1745, "valid_targets_mean": 5440.8, "valid_targets_min": 587 }, { "epoch": 3.240740740740741, "grad_norm": 0.6207094582404148, "learning_rate": 2.600090383369135e-05, "loss": 0.2704, "loss_nan_ranks": 0, "loss_rank_avg": 0.2993205189704895, "step": 1750, "valid_targets_mean": 3938.6, "valid_targets_min": 471 }, { "epoch": 3.25, "grad_norm": 0.5392432660293104, "learning_rate": 2.5912749534779958e-05, "loss": 0.2636, "loss_nan_ranks": 0, "loss_rank_avg": 0.261831134557724, "step": 1755, "valid_targets_mean": 4243.8, "valid_targets_min": 1809 }, { "epoch": 3.259259259259259, "grad_norm": 0.5976629323136308, "learning_rate": 2.5824469180935377e-05, "loss": 0.2595, "loss_nan_ranks": 0, "loss_rank_avg": 0.25948113203048706, "step": 1760, "valid_targets_mean": 3828.1, "valid_targets_min": 2051 }, { "epoch": 3.2685185185185186, "grad_norm": 0.5616357127195194, "learning_rate": 2.5736064654221808e-05, "loss": 0.2609, "loss_nan_ranks": 0, "loss_rank_avg": 0.2716560959815979, "step": 1765, "valid_targets_mean": 4082.7, "valid_targets_min": 661 }, { "epoch": 3.2777777777777777, "grad_norm": 0.5515421060133198, "learning_rate": 2.564753783935068e-05, "loss": 0.2647, "loss_nan_ranks": 0, "loss_rank_avg": 0.2995178699493408, "step": 1770, "valid_targets_mean": 4297.5, "valid_targets_min": 773 }, { "epoch": 3.287037037037037, "grad_norm": 0.5442897232307273, "learning_rate": 2.5558890623640513e-05, "loss": 0.2822, "loss_nan_ranks": 0, "loss_rank_avg": 0.33121222257614136, "step": 1775, "valid_targets_mean": 4387.3, "valid_targets_min": 638 }, { "epoch": 3.2962962962962963, "grad_norm": 0.5440272723905116, "learning_rate": 2.5470124896976687e-05, "loss": 0.2722, "loss_nan_ranks": 0, "loss_rank_avg": 0.26206910610198975, "step": 1780, "valid_targets_mean": 3992.6, "valid_targets_min": 644 }, { "epoch": 3.3055555555555554, "grad_norm": 0.5245177761132279, "learning_rate": 2.538124255177113e-05, "loss": 0.261, "loss_nan_ranks": 0, "loss_rank_avg": 0.25537651777267456, "step": 1785, "valid_targets_mean": 4090.8, "valid_targets_min": 421 }, { "epoch": 3.314814814814815, "grad_norm": 0.4978922342919577, "learning_rate": 2.5292245482921982e-05, "loss": 0.2662, "loss_nan_ranks": 0, "loss_rank_avg": 0.24825258553028107, "step": 1790, "valid_targets_mean": 4364.4, "valid_targets_min": 1736 }, { "epoch": 3.324074074074074, "grad_norm": 0.5413131194299406, "learning_rate": 2.5203135587773196e-05, "loss": 0.2402, "loss_nan_ranks": 0, "loss_rank_avg": 0.28795957565307617, "step": 1795, "valid_targets_mean": 4514.4, "valid_targets_min": 1916 }, { "epoch": 3.3333333333333335, "grad_norm": 0.48381333303689816, "learning_rate": 2.5113914766074075e-05, "loss": 0.247, "loss_nan_ranks": 0, "loss_rank_avg": 0.29423463344573975, "step": 1800, "valid_targets_mean": 5073.3, "valid_targets_min": 887 }, { "epoch": 3.3425925925925926, "grad_norm": 0.5540751734364149, "learning_rate": 2.5024584919938805e-05, "loss": 0.2744, "loss_nan_ranks": 0, "loss_rank_avg": 0.30663758516311646, "step": 1805, "valid_targets_mean": 3852.0, "valid_targets_min": 783 }, { "epoch": 3.351851851851852, "grad_norm": 0.4875767498216755, "learning_rate": 2.493514795380587e-05, "loss": 0.2352, "loss_nan_ranks": 0, "loss_rank_avg": 0.23132456839084625, "step": 1810, "valid_targets_mean": 4474.8, "valid_targets_min": 2422 }, { "epoch": 3.361111111111111, "grad_norm": 0.562685263202997, "learning_rate": 2.4845605774397482e-05, "loss": 0.2433, "loss_nan_ranks": 0, "loss_rank_avg": 0.2605576515197754, "step": 1815, "valid_targets_mean": 3649.8, "valid_targets_min": 1813 }, { "epoch": 3.3703703703703702, "grad_norm": 0.5971247612228009, "learning_rate": 2.4755960290678884e-05, "loss": 0.2683, "loss_nan_ranks": 0, "loss_rank_avg": 0.2765813171863556, "step": 1820, "valid_targets_mean": 3091.1, "valid_targets_min": 592 }, { "epoch": 3.3796296296296298, "grad_norm": 0.5196767315771833, "learning_rate": 2.4666213413817696e-05, "loss": 0.2425, "loss_nan_ranks": 0, "loss_rank_avg": 0.22254908084869385, "step": 1825, "valid_targets_mean": 3801.4, "valid_targets_min": 747 }, { "epoch": 3.388888888888889, "grad_norm": 0.5121874679965963, "learning_rate": 2.4576367057143167e-05, "loss": 0.2679, "loss_nan_ranks": 0, "loss_rank_avg": 0.2372555434703827, "step": 1830, "valid_targets_mean": 4133.1, "valid_targets_min": 1882 }, { "epoch": 3.398148148148148, "grad_norm": 0.5020669363013955, "learning_rate": 2.4486423136105356e-05, "loss": 0.2584, "loss_nan_ranks": 0, "loss_rank_avg": 0.25579991936683655, "step": 1835, "valid_targets_mean": 4919.1, "valid_targets_min": 1901 }, { "epoch": 3.4074074074074074, "grad_norm": 0.5441744337458053, "learning_rate": 2.4396383568234322e-05, "loss": 0.2472, "loss_nan_ranks": 0, "loss_rank_avg": 0.2616913914680481, "step": 1840, "valid_targets_mean": 4022.9, "valid_targets_min": 721 }, { "epoch": 3.4166666666666665, "grad_norm": 0.5285349190964163, "learning_rate": 2.4306250273099236e-05, "loss": 0.239, "loss_nan_ranks": 0, "loss_rank_avg": 0.27835655212402344, "step": 1845, "valid_targets_mean": 3834.8, "valid_targets_min": 1887 }, { "epoch": 3.425925925925926, "grad_norm": 0.46370125956202335, "learning_rate": 2.421602517226745e-05, "loss": 0.2557, "loss_nan_ranks": 0, "loss_rank_avg": 0.21514523029327393, "step": 1850, "valid_targets_mean": 4822.7, "valid_targets_min": 2356 }, { "epoch": 3.435185185185185, "grad_norm": 0.5045309429602557, "learning_rate": 2.4125710189263555e-05, "loss": 0.2597, "loss_nan_ranks": 0, "loss_rank_avg": 0.24583828449249268, "step": 1855, "valid_targets_mean": 4282.2, "valid_targets_min": 1668 }, { "epoch": 3.4444444444444446, "grad_norm": 0.5794459679981911, "learning_rate": 2.4035307249528326e-05, "loss": 0.2631, "loss_nan_ranks": 0, "loss_rank_avg": 0.2770538926124573, "step": 1860, "valid_targets_mean": 4194.9, "valid_targets_min": 1906 }, { "epoch": 3.4537037037037037, "grad_norm": 0.5757849492309214, "learning_rate": 2.3944818280377732e-05, "loss": 0.2756, "loss_nan_ranks": 0, "loss_rank_avg": 0.2549267113208771, "step": 1865, "valid_targets_mean": 3418.6, "valid_targets_min": 1267 }, { "epoch": 3.462962962962963, "grad_norm": 0.5787001079526624, "learning_rate": 2.3854245210961798e-05, "loss": 0.2648, "loss_nan_ranks": 0, "loss_rank_avg": 0.2735787332057953, "step": 1870, "valid_targets_mean": 3282.9, "valid_targets_min": 327 }, { "epoch": 3.4722222222222223, "grad_norm": 0.6603451083242975, "learning_rate": 2.376358997222351e-05, "loss": 0.2766, "loss_nan_ranks": 0, "loss_rank_avg": 0.30318760871887207, "step": 1875, "valid_targets_mean": 2959.7, "valid_targets_min": 580 }, { "epoch": 3.4814814814814814, "grad_norm": 0.5439007943417832, "learning_rate": 2.367285449685763e-05, "loss": 0.26, "loss_nan_ranks": 0, "loss_rank_avg": 0.319497674703598, "step": 1880, "valid_targets_mean": 4390.6, "valid_targets_min": 1345 }, { "epoch": 3.490740740740741, "grad_norm": 0.47502055786608866, "learning_rate": 2.3582040719269504e-05, "loss": 0.2546, "loss_nan_ranks": 0, "loss_rank_avg": 0.2526184916496277, "step": 1885, "valid_targets_mean": 5038.2, "valid_targets_min": 1510 }, { "epoch": 3.5, "grad_norm": 0.5452928944856862, "learning_rate": 2.3491150575533808e-05, "loss": 0.252, "loss_nan_ranks": 0, "loss_rank_avg": 0.23368355631828308, "step": 1890, "valid_targets_mean": 3530.3, "valid_targets_min": 605 }, { "epoch": 3.5092592592592595, "grad_norm": 0.6073760697025231, "learning_rate": 2.340018600335328e-05, "loss": 0.2521, "loss_nan_ranks": 0, "loss_rank_avg": 0.2643791437149048, "step": 1895, "valid_targets_mean": 4982.6, "valid_targets_min": 2220 }, { "epoch": 3.5185185185185186, "grad_norm": 0.5596007727722335, "learning_rate": 2.3309148942017424e-05, "loss": 0.2583, "loss_nan_ranks": 0, "loss_rank_avg": 0.276612251996994, "step": 1900, "valid_targets_mean": 3690.1, "valid_targets_min": 572 }, { "epoch": 3.5277777777777777, "grad_norm": 0.521852302471038, "learning_rate": 2.321804133236115e-05, "loss": 0.2387, "loss_nan_ranks": 0, "loss_rank_avg": 0.25015997886657715, "step": 1905, "valid_targets_mean": 3807.7, "valid_targets_min": 1461 }, { "epoch": 3.537037037037037, "grad_norm": 0.5202676505674712, "learning_rate": 2.312686511672338e-05, "loss": 0.2731, "loss_nan_ranks": 0, "loss_rank_avg": 0.2852502465248108, "step": 1910, "valid_targets_mean": 4486.8, "valid_targets_min": 1530 }, { "epoch": 3.5462962962962963, "grad_norm": 0.49067308052473124, "learning_rate": 2.3035622238905694e-05, "loss": 0.2382, "loss_nan_ranks": 0, "loss_rank_avg": 0.2287035584449768, "step": 1915, "valid_targets_mean": 4143.6, "valid_targets_min": 1655 }, { "epoch": 3.5555555555555554, "grad_norm": 0.5274253125213653, "learning_rate": 2.2944314644130814e-05, "loss": 0.2529, "loss_nan_ranks": 0, "loss_rank_avg": 0.27749884128570557, "step": 1920, "valid_targets_mean": 4553.0, "valid_targets_min": 781 }, { "epoch": 3.564814814814815, "grad_norm": 0.5688144249989912, "learning_rate": 2.2852944279001207e-05, "loss": 0.2674, "loss_nan_ranks": 0, "loss_rank_avg": 0.3269811272621155, "step": 1925, "valid_targets_mean": 4159.2, "valid_targets_min": 888 }, { "epoch": 3.574074074074074, "grad_norm": 0.5041147570894117, "learning_rate": 2.2761513091457537e-05, "loss": 0.2543, "loss_nan_ranks": 0, "loss_rank_avg": 0.2584291100502014, "step": 1930, "valid_targets_mean": 4408.8, "valid_targets_min": 1757 }, { "epoch": 3.5833333333333335, "grad_norm": 0.5046219032639738, "learning_rate": 2.2670023030737153e-05, "loss": 0.2767, "loss_nan_ranks": 0, "loss_rank_avg": 0.2301914095878601, "step": 1935, "valid_targets_mean": 4570.9, "valid_targets_min": 1585 }, { "epoch": 3.5925925925925926, "grad_norm": 0.5097422302633462, "learning_rate": 2.2578476047332535e-05, "loss": 0.2441, "loss_nan_ranks": 0, "loss_rank_avg": 0.23207487165927887, "step": 1940, "valid_targets_mean": 3510.6, "valid_targets_min": 1621 }, { "epoch": 3.601851851851852, "grad_norm": 0.5502757038033133, "learning_rate": 2.2486874092949708e-05, "loss": 0.2565, "loss_nan_ranks": 0, "loss_rank_avg": 0.24923279881477356, "step": 1945, "valid_targets_mean": 3629.6, "valid_targets_min": 1696 }, { "epoch": 3.611111111111111, "grad_norm": 0.5493878283106496, "learning_rate": 2.2395219120466622e-05, "loss": 0.2409, "loss_nan_ranks": 0, "loss_rank_avg": 0.24204868078231812, "step": 1950, "valid_targets_mean": 3706.9, "valid_targets_min": 698 }, { "epoch": 3.6203703703703702, "grad_norm": 0.5626462939570609, "learning_rate": 2.2303513083891542e-05, "loss": 0.2474, "loss_nan_ranks": 0, "loss_rank_avg": 0.2548343539237976, "step": 1955, "valid_targets_mean": 3551.1, "valid_targets_min": 594 }, { "epoch": 3.6296296296296298, "grad_norm": 0.47203755513693735, "learning_rate": 2.2211757938321373e-05, "loss": 0.2377, "loss_nan_ranks": 0, "loss_rank_avg": 0.20937591791152954, "step": 1960, "valid_targets_mean": 4418.7, "valid_targets_min": 828 }, { "epoch": 3.638888888888889, "grad_norm": 0.5005343443672163, "learning_rate": 2.2119955639899983e-05, "loss": 0.2933, "loss_nan_ranks": 0, "loss_rank_avg": 0.21190600097179413, "step": 1965, "valid_targets_mean": 4131.6, "valid_targets_min": 526 }, { "epoch": 3.648148148148148, "grad_norm": 0.5062414681733032, "learning_rate": 2.20281081457765e-05, "loss": 0.2591, "loss_nan_ranks": 0, "loss_rank_avg": 0.24952177703380585, "step": 1970, "valid_targets_mean": 4448.9, "valid_targets_min": 1588 }, { "epoch": 3.6574074074074074, "grad_norm": 0.5166078661849807, "learning_rate": 2.1936217414063584e-05, "loss": 0.2787, "loss_nan_ranks": 0, "loss_rank_avg": 0.29824504256248474, "step": 1975, "valid_targets_mean": 4616.9, "valid_targets_min": 1758 }, { "epoch": 3.6666666666666665, "grad_norm": 0.5036298018276866, "learning_rate": 2.184428540379569e-05, "loss": 0.2517, "loss_nan_ranks": 0, "loss_rank_avg": 0.21241354942321777, "step": 1980, "valid_targets_mean": 3836.9, "valid_targets_min": 567 }, { "epoch": 3.675925925925926, "grad_norm": 0.5263922576220329, "learning_rate": 2.1752314074887287e-05, "loss": 0.2602, "loss_nan_ranks": 0, "loss_rank_avg": 0.21682290732860565, "step": 1985, "valid_targets_mean": 3577.4, "valid_targets_min": 892 }, { "epoch": 3.685185185185185, "grad_norm": 0.5504297129810796, "learning_rate": 2.1660305388091106e-05, "loss": 0.2259, "loss_nan_ranks": 0, "loss_rank_avg": 0.22139625251293182, "step": 1990, "valid_targets_mean": 3110.8, "valid_targets_min": 479 }, { "epoch": 3.6944444444444446, "grad_norm": 0.49325575866624627, "learning_rate": 2.1568261304956298e-05, "loss": 0.2507, "loss_nan_ranks": 0, "loss_rank_avg": 0.23789441585540771, "step": 1995, "valid_targets_mean": 4232.6, "valid_targets_min": 680 }, { "epoch": 3.7037037037037037, "grad_norm": 0.5807219455547323, "learning_rate": 2.1476183787786638e-05, "loss": 0.2639, "loss_nan_ranks": 0, "loss_rank_avg": 0.26028937101364136, "step": 2000, "valid_targets_mean": 3360.9, "valid_targets_min": 528 }, { "epoch": 3.712962962962963, "grad_norm": 0.5255254634433139, "learning_rate": 2.138407479959869e-05, "loss": 0.2841, "loss_nan_ranks": 0, "loss_rank_avg": 0.24893628060817719, "step": 2005, "valid_targets_mean": 3761.6, "valid_targets_min": 2356 }, { "epoch": 3.7222222222222223, "grad_norm": 0.4773408030946498, "learning_rate": 2.129193630407996e-05, "loss": 0.2352, "loss_nan_ranks": 0, "loss_rank_avg": 0.22808077931404114, "step": 2010, "valid_targets_mean": 4793.6, "valid_targets_min": 2178 }, { "epoch": 3.7314814814814814, "grad_norm": 0.4500006448054135, "learning_rate": 2.119977026554701e-05, "loss": 0.2447, "loss_nan_ranks": 0, "loss_rank_avg": 0.2292492389678955, "step": 2015, "valid_targets_mean": 5103.6, "valid_targets_min": 647 }, { "epoch": 3.7407407407407405, "grad_norm": 0.6473593402015339, "learning_rate": 2.1107578648903614e-05, "loss": 0.2476, "loss_nan_ranks": 0, "loss_rank_avg": 0.2941989600658417, "step": 2020, "valid_targets_mean": 3061.2, "valid_targets_min": 722 }, { "epoch": 3.75, "grad_norm": 0.5203301574671931, "learning_rate": 2.1015363419598835e-05, "loss": 0.2355, "loss_nan_ranks": 0, "loss_rank_avg": 0.2489463984966278, "step": 2025, "valid_targets_mean": 4192.7, "valid_targets_min": 404 }, { "epoch": 3.7592592592592595, "grad_norm": 0.5362575203104003, "learning_rate": 2.0923126543585156e-05, "loss": 0.2558, "loss_nan_ranks": 0, "loss_rank_avg": 0.270350843667984, "step": 2030, "valid_targets_mean": 3732.0, "valid_targets_min": 420 }, { "epoch": 3.7685185185185186, "grad_norm": 0.499224592037341, "learning_rate": 2.0830869987276537e-05, "loss": 0.2512, "loss_nan_ranks": 0, "loss_rank_avg": 0.3091796040534973, "step": 2035, "valid_targets_mean": 5031.6, "valid_targets_min": 1046 }, { "epoch": 3.7777777777777777, "grad_norm": 0.5250725966197842, "learning_rate": 2.0738595717506496e-05, "loss": 0.2561, "loss_nan_ranks": 0, "loss_rank_avg": 0.22418762743473053, "step": 2040, "valid_targets_mean": 3963.0, "valid_targets_min": 1275 }, { "epoch": 3.787037037037037, "grad_norm": 0.5742892201930184, "learning_rate": 2.0646305701486215e-05, "loss": 0.2477, "loss_nan_ranks": 0, "loss_rank_avg": 0.24486325681209564, "step": 2045, "valid_targets_mean": 2970.0, "valid_targets_min": 720 }, { "epoch": 3.7962962962962963, "grad_norm": 0.5131608808905314, "learning_rate": 2.055400190676255e-05, "loss": 0.2405, "loss_nan_ranks": 0, "loss_rank_avg": 0.24303856492042542, "step": 2050, "valid_targets_mean": 4145.8, "valid_targets_min": 2285 }, { "epoch": 3.8055555555555554, "grad_norm": 0.5052923454962346, "learning_rate": 2.046168630117612e-05, "loss": 0.2662, "loss_nan_ranks": 0, "loss_rank_avg": 0.3247288167476654, "step": 2055, "valid_targets_mean": 4734.6, "valid_targets_min": 2031 }, { "epoch": 3.814814814814815, "grad_norm": 0.610872868205616, "learning_rate": 2.0369360852819327e-05, "loss": 0.2513, "loss_nan_ranks": 0, "loss_rank_avg": 0.2507988214492798, "step": 2060, "valid_targets_mean": 3605.2, "valid_targets_min": 523 }, { "epoch": 3.824074074074074, "grad_norm": 0.5723992923066944, "learning_rate": 2.027702752999444e-05, "loss": 0.2675, "loss_nan_ranks": 0, "loss_rank_avg": 0.26201534271240234, "step": 2065, "valid_targets_mean": 4353.8, "valid_targets_min": 2454 }, { "epoch": 3.8333333333333335, "grad_norm": 0.5270133124002951, "learning_rate": 2.0184688301171567e-05, "loss": 0.2682, "loss_nan_ranks": 0, "loss_rank_avg": 0.2589871883392334, "step": 2070, "valid_targets_mean": 3676.9, "valid_targets_min": 573 }, { "epoch": 3.8425925925925926, "grad_norm": 0.5139405722726906, "learning_rate": 2.009234513494676e-05, "loss": 0.284, "loss_nan_ranks": 0, "loss_rank_avg": 0.24153248965740204, "step": 2075, "valid_targets_mean": 4124.9, "valid_targets_min": 663 }, { "epoch": 3.851851851851852, "grad_norm": 0.48652457260517906, "learning_rate": 2e-05, "loss": 0.2613, "loss_nan_ranks": 0, "loss_rank_avg": 0.24923667311668396, "step": 2080, "valid_targets_mean": 4446.2, "valid_targets_min": 2380 }, { "epoch": 3.861111111111111, "grad_norm": 0.5365239749592436, "learning_rate": 1.9907654865053248e-05, "loss": 0.2691, "loss_nan_ranks": 0, "loss_rank_avg": 0.3025137782096863, "step": 2085, "valid_targets_mean": 4364.4, "valid_targets_min": 2177 }, { "epoch": 3.8703703703703702, "grad_norm": 0.5487362203225881, "learning_rate": 1.981531169882844e-05, "loss": 0.2857, "loss_nan_ranks": 0, "loss_rank_avg": 0.2569238543510437, "step": 2090, "valid_targets_mean": 3598.1, "valid_targets_min": 709 }, { "epoch": 3.8796296296296298, "grad_norm": 0.4846676225451064, "learning_rate": 1.9722972470005573e-05, "loss": 0.2412, "loss_nan_ranks": 0, "loss_rank_avg": 0.21618637442588806, "step": 2095, "valid_targets_mean": 4546.6, "valid_targets_min": 2362 }, { "epoch": 3.888888888888889, "grad_norm": 0.5026122833039225, "learning_rate": 1.9630639147180673e-05, "loss": 0.2907, "loss_nan_ranks": 0, "loss_rank_avg": 0.31693074107170105, "step": 2100, "valid_targets_mean": 4688.2, "valid_targets_min": 758 }, { "epoch": 3.898148148148148, "grad_norm": 0.4155051368967143, "learning_rate": 1.9538313698823887e-05, "loss": 0.2372, "loss_nan_ranks": 0, "loss_rank_avg": 0.20872560143470764, "step": 2105, "valid_targets_mean": 5315.1, "valid_targets_min": 513 }, { "epoch": 3.9074074074074074, "grad_norm": 0.5053201320989432, "learning_rate": 1.944599809323745e-05, "loss": 0.2684, "loss_nan_ranks": 0, "loss_rank_avg": 0.23215201497077942, "step": 2110, "valid_targets_mean": 4019.3, "valid_targets_min": 1946 }, { "epoch": 3.9166666666666665, "grad_norm": 0.546899589555972, "learning_rate": 1.935369429851379e-05, "loss": 0.27, "loss_nan_ranks": 0, "loss_rank_avg": 0.3266640901565552, "step": 2115, "valid_targets_mean": 4348.4, "valid_targets_min": 2310 }, { "epoch": 3.925925925925926, "grad_norm": 0.5637484671729249, "learning_rate": 1.926140428249351e-05, "loss": 0.264, "loss_nan_ranks": 0, "loss_rank_avg": 0.31784260272979736, "step": 2120, "valid_targets_mean": 3883.6, "valid_targets_min": 466 }, { "epoch": 3.935185185185185, "grad_norm": 0.42601754727230023, "learning_rate": 1.916913001272347e-05, "loss": 0.2422, "loss_nan_ranks": 0, "loss_rank_avg": 0.25121498107910156, "step": 2125, "valid_targets_mean": 5457.9, "valid_targets_min": 2575 }, { "epoch": 3.9444444444444446, "grad_norm": 0.45355069808084864, "learning_rate": 1.907687345641485e-05, "loss": 0.2587, "loss_nan_ranks": 0, "loss_rank_avg": 0.22492194175720215, "step": 2130, "valid_targets_mean": 4995.0, "valid_targets_min": 2156 }, { "epoch": 3.9537037037037037, "grad_norm": 0.4966703717024659, "learning_rate": 1.8984636580401165e-05, "loss": 0.2529, "loss_nan_ranks": 0, "loss_rank_avg": 0.22575929760932922, "step": 2135, "valid_targets_mean": 3822.2, "valid_targets_min": 445 }, { "epoch": 3.962962962962963, "grad_norm": 0.4401478192705303, "learning_rate": 1.8892421351096393e-05, "loss": 0.2488, "loss_nan_ranks": 0, "loss_rank_avg": 0.25159192085266113, "step": 2140, "valid_targets_mean": 5782.5, "valid_targets_min": 1416 }, { "epoch": 3.9722222222222223, "grad_norm": 0.5363034129883026, "learning_rate": 1.8800229734452998e-05, "loss": 0.2428, "loss_nan_ranks": 0, "loss_rank_avg": 0.20497846603393555, "step": 2145, "valid_targets_mean": 3910.8, "valid_targets_min": 1582 }, { "epoch": 3.9814814814814814, "grad_norm": 0.4949167294147326, "learning_rate": 1.8708063695920047e-05, "loss": 0.2534, "loss_nan_ranks": 0, "loss_rank_avg": 0.2380611002445221, "step": 2150, "valid_targets_mean": 4586.9, "valid_targets_min": 1010 }, { "epoch": 3.9907407407407405, "grad_norm": 0.4867568096304702, "learning_rate": 1.8615925200401318e-05, "loss": 0.2722, "loss_nan_ranks": 0, "loss_rank_avg": 0.2899520993232727, "step": 2155, "valid_targets_mean": 5028.2, "valid_targets_min": 2141 }, { "epoch": 4.0, "grad_norm": 0.48512349657520354, "learning_rate": 1.852381621221337e-05, "loss": 0.2603, "loss_nan_ranks": 0, "loss_rank_avg": 0.25779369473457336, "step": 2160, "valid_targets_mean": 4514.5, "valid_targets_min": 2124 }, { "epoch": 4.0092592592592595, "grad_norm": 0.5430547707519348, "learning_rate": 1.843173869504371e-05, "loss": 0.2365, "loss_nan_ranks": 0, "loss_rank_avg": 0.2733425498008728, "step": 2165, "valid_targets_mean": 3821.5, "valid_targets_min": 675 }, { "epoch": 4.018518518518518, "grad_norm": 0.4760491658593785, "learning_rate": 1.8339694611908897e-05, "loss": 0.2444, "loss_nan_ranks": 0, "loss_rank_avg": 0.2561805248260498, "step": 2170, "valid_targets_mean": 5306.1, "valid_targets_min": 562 }, { "epoch": 4.027777777777778, "grad_norm": 0.48697889807764705, "learning_rate": 1.8247685925112716e-05, "loss": 0.2059, "loss_nan_ranks": 0, "loss_rank_avg": 0.1906963586807251, "step": 2175, "valid_targets_mean": 4063.8, "valid_targets_min": 512 }, { "epoch": 4.037037037037037, "grad_norm": 0.5970157683288126, "learning_rate": 1.8155714596204318e-05, "loss": 0.2474, "loss_nan_ranks": 0, "loss_rank_avg": 0.2847557067871094, "step": 2180, "valid_targets_mean": 3594.9, "valid_targets_min": 2094 }, { "epoch": 4.046296296296297, "grad_norm": 0.5557943898925222, "learning_rate": 1.806378258593642e-05, "loss": 0.23, "loss_nan_ranks": 0, "loss_rank_avg": 0.2644827961921692, "step": 2185, "valid_targets_mean": 3963.4, "valid_targets_min": 2251 }, { "epoch": 4.055555555555555, "grad_norm": 0.5180284994233548, "learning_rate": 1.797189185422351e-05, "loss": 0.236, "loss_nan_ranks": 0, "loss_rank_avg": 0.2229800969362259, "step": 2190, "valid_targets_mean": 3800.6, "valid_targets_min": 1971 }, { "epoch": 4.064814814814815, "grad_norm": 0.5275679350937065, "learning_rate": 1.788004436010002e-05, "loss": 0.2671, "loss_nan_ranks": 0, "loss_rank_avg": 0.2343769669532776, "step": 2195, "valid_targets_mean": 3961.6, "valid_targets_min": 1795 }, { "epoch": 4.074074074074074, "grad_norm": 0.5719653033328994, "learning_rate": 1.778824206167863e-05, "loss": 0.2655, "loss_nan_ranks": 0, "loss_rank_avg": 0.2309592068195343, "step": 2200, "valid_targets_mean": 3531.6, "valid_targets_min": 573 }, { "epoch": 4.083333333333333, "grad_norm": 0.5204783805453003, "learning_rate": 1.7696486916108468e-05, "loss": 0.2389, "loss_nan_ranks": 0, "loss_rank_avg": 0.2981133460998535, "step": 2205, "valid_targets_mean": 4711.4, "valid_targets_min": 1961 }, { "epoch": 4.092592592592593, "grad_norm": 0.5003216687184299, "learning_rate": 1.7604780879533384e-05, "loss": 0.2675, "loss_nan_ranks": 0, "loss_rank_avg": 0.22590869665145874, "step": 2210, "valid_targets_mean": 4413.9, "valid_targets_min": 561 }, { "epoch": 4.101851851851852, "grad_norm": 0.49855495979695824, "learning_rate": 1.7513125907050302e-05, "loss": 0.2315, "loss_nan_ranks": 0, "loss_rank_avg": 0.24315370619297028, "step": 2215, "valid_targets_mean": 4331.7, "valid_targets_min": 789 }, { "epoch": 4.111111111111111, "grad_norm": 0.5590091213064351, "learning_rate": 1.742152395266747e-05, "loss": 0.2377, "loss_nan_ranks": 0, "loss_rank_avg": 0.28527384996414185, "step": 2220, "valid_targets_mean": 4252.1, "valid_targets_min": 723 }, { "epoch": 4.12037037037037, "grad_norm": 0.4968800799735138, "learning_rate": 1.7329976969262854e-05, "loss": 0.238, "loss_nan_ranks": 0, "loss_rank_avg": 0.23040777444839478, "step": 2225, "valid_targets_mean": 4997.2, "valid_targets_min": 338 }, { "epoch": 4.12962962962963, "grad_norm": 0.5225715461374966, "learning_rate": 1.7238486908542463e-05, "loss": 0.2589, "loss_nan_ranks": 0, "loss_rank_avg": 0.24196788668632507, "step": 2230, "valid_targets_mean": 3969.0, "valid_targets_min": 317 }, { "epoch": 4.138888888888889, "grad_norm": 0.566072039301765, "learning_rate": 1.71470557209988e-05, "loss": 0.2505, "loss_nan_ranks": 0, "loss_rank_avg": 0.2790153920650482, "step": 2235, "valid_targets_mean": 3879.8, "valid_targets_min": 569 }, { "epoch": 4.148148148148148, "grad_norm": 0.45951042633304573, "learning_rate": 1.7055685355869196e-05, "loss": 0.2284, "loss_nan_ranks": 0, "loss_rank_avg": 0.2070295810699463, "step": 2240, "valid_targets_mean": 4564.6, "valid_targets_min": 783 }, { "epoch": 4.157407407407407, "grad_norm": 0.5605220935037593, "learning_rate": 1.6964377761094313e-05, "loss": 0.2502, "loss_nan_ranks": 0, "loss_rank_avg": 0.2974599301815033, "step": 2245, "valid_targets_mean": 4355.8, "valid_targets_min": 2288 }, { "epoch": 4.166666666666667, "grad_norm": 0.5399329299027271, "learning_rate": 1.6873134883276626e-05, "loss": 0.2586, "loss_nan_ranks": 0, "loss_rank_avg": 0.25155043601989746, "step": 2250, "valid_targets_mean": 3803.4, "valid_targets_min": 907 }, { "epoch": 4.175925925925926, "grad_norm": 0.5699303989204108, "learning_rate": 1.6781958667638855e-05, "loss": 0.2419, "loss_nan_ranks": 0, "loss_rank_avg": 0.24903835356235504, "step": 2255, "valid_targets_mean": 3695.6, "valid_targets_min": 1570 }, { "epoch": 4.185185185185185, "grad_norm": 0.6105524681890175, "learning_rate": 1.669085105798258e-05, "loss": 0.2226, "loss_nan_ranks": 0, "loss_rank_avg": 0.2412843555212021, "step": 2260, "valid_targets_mean": 3404.7, "valid_targets_min": 1942 }, { "epoch": 4.194444444444445, "grad_norm": 0.513400651402716, "learning_rate": 1.6599813996646727e-05, "loss": 0.222, "loss_nan_ranks": 0, "loss_rank_avg": 0.22134144604206085, "step": 2265, "valid_targets_mean": 4079.2, "valid_targets_min": 826 }, { "epoch": 4.203703703703703, "grad_norm": 0.4868536453874342, "learning_rate": 1.65088494244662e-05, "loss": 0.2203, "loss_nan_ranks": 0, "loss_rank_avg": 0.21563026309013367, "step": 2270, "valid_targets_mean": 4334.6, "valid_targets_min": 1836 }, { "epoch": 4.212962962962963, "grad_norm": 0.5244709386650726, "learning_rate": 1.6417959280730506e-05, "loss": 0.2648, "loss_nan_ranks": 0, "loss_rank_avg": 0.290455162525177, "step": 2275, "valid_targets_mean": 4819.6, "valid_targets_min": 1708 }, { "epoch": 4.222222222222222, "grad_norm": 0.5328915315468012, "learning_rate": 1.632714550314237e-05, "loss": 0.2192, "loss_nan_ranks": 0, "loss_rank_avg": 0.250417560338974, "step": 2280, "valid_targets_mean": 4054.6, "valid_targets_min": 1107 }, { "epoch": 4.231481481481482, "grad_norm": 0.5972878155360123, "learning_rate": 1.6236410027776494e-05, "loss": 0.2507, "loss_nan_ranks": 0, "loss_rank_avg": 0.22642353177070618, "step": 2285, "valid_targets_mean": 3058.1, "valid_targets_min": 563 }, { "epoch": 4.2407407407407405, "grad_norm": 0.5907761427734216, "learning_rate": 1.6145754789038205e-05, "loss": 0.242, "loss_nan_ranks": 0, "loss_rank_avg": 0.18239718675613403, "step": 2290, "valid_targets_mean": 3142.9, "valid_targets_min": 859 }, { "epoch": 4.25, "grad_norm": 0.5433714738595986, "learning_rate": 1.6055181719622278e-05, "loss": 0.2273, "loss_nan_ranks": 0, "loss_rank_avg": 0.18836545944213867, "step": 2295, "valid_targets_mean": 3226.4, "valid_targets_min": 1585 }, { "epoch": 4.2592592592592595, "grad_norm": 0.5040273007610758, "learning_rate": 1.5964692750471684e-05, "loss": 0.2245, "loss_nan_ranks": 0, "loss_rank_avg": 0.23993724584579468, "step": 2300, "valid_targets_mean": 4635.3, "valid_targets_min": 480 }, { "epoch": 4.268518518518518, "grad_norm": 0.5272742282351708, "learning_rate": 1.5874289810736452e-05, "loss": 0.248, "loss_nan_ranks": 0, "loss_rank_avg": 0.2000361979007721, "step": 2305, "valid_targets_mean": 3835.5, "valid_targets_min": 525 }, { "epoch": 4.277777777777778, "grad_norm": 0.5421153003949007, "learning_rate": 1.5783974827732555e-05, "loss": 0.244, "loss_nan_ranks": 0, "loss_rank_avg": 0.20407015085220337, "step": 2310, "valid_targets_mean": 3983.3, "valid_targets_min": 1755 }, { "epoch": 4.287037037037037, "grad_norm": 0.508791566036482, "learning_rate": 1.5693749726900767e-05, "loss": 0.2303, "loss_nan_ranks": 0, "loss_rank_avg": 0.24065956473350525, "step": 2315, "valid_targets_mean": 4733.2, "valid_targets_min": 2012 }, { "epoch": 4.296296296296296, "grad_norm": 0.6278410756307, "learning_rate": 1.560361643176568e-05, "loss": 0.2459, "loss_nan_ranks": 0, "loss_rank_avg": 0.2204664796590805, "step": 2320, "valid_targets_mean": 2955.4, "valid_targets_min": 767 }, { "epoch": 4.305555555555555, "grad_norm": 0.51340304756785, "learning_rate": 1.5513576863894654e-05, "loss": 0.2769, "loss_nan_ranks": 0, "loss_rank_avg": 0.24818578362464905, "step": 2325, "valid_targets_mean": 4591.8, "valid_targets_min": 891 }, { "epoch": 4.314814814814815, "grad_norm": 0.5004366205081487, "learning_rate": 1.5423632942856836e-05, "loss": 0.2378, "loss_nan_ranks": 0, "loss_rank_avg": 0.22766365110874176, "step": 2330, "valid_targets_mean": 4753.9, "valid_targets_min": 2548 }, { "epoch": 4.324074074074074, "grad_norm": 0.5514594228413564, "learning_rate": 1.5333786586182308e-05, "loss": 0.2269, "loss_nan_ranks": 0, "loss_rank_avg": 0.2691954970359802, "step": 2335, "valid_targets_mean": 4707.2, "valid_targets_min": 556 }, { "epoch": 4.333333333333333, "grad_norm": 0.47659547270242236, "learning_rate": 1.5244039709321123e-05, "loss": 0.2492, "loss_nan_ranks": 0, "loss_rank_avg": 0.24745163321495056, "step": 2340, "valid_targets_mean": 4947.1, "valid_targets_min": 1720 }, { "epoch": 4.342592592592593, "grad_norm": 0.5315469803851041, "learning_rate": 1.5154394225602525e-05, "loss": 0.2464, "loss_nan_ranks": 0, "loss_rank_avg": 0.24923008680343628, "step": 2345, "valid_targets_mean": 4185.3, "valid_targets_min": 641 }, { "epoch": 4.351851851851852, "grad_norm": 0.49151936211870245, "learning_rate": 1.5064852046194127e-05, "loss": 0.2356, "loss_nan_ranks": 0, "loss_rank_avg": 0.21445490419864655, "step": 2350, "valid_targets_mean": 4332.5, "valid_targets_min": 1109 }, { "epoch": 4.361111111111111, "grad_norm": 0.530468204810676, "learning_rate": 1.49754150800612e-05, "loss": 0.2564, "loss_nan_ranks": 0, "loss_rank_avg": 0.30778932571411133, "step": 2355, "valid_targets_mean": 5126.1, "valid_targets_min": 742 }, { "epoch": 4.37037037037037, "grad_norm": 0.5309780578096778, "learning_rate": 1.4886085233925931e-05, "loss": 0.2697, "loss_nan_ranks": 0, "loss_rank_avg": 0.2524486780166626, "step": 2360, "valid_targets_mean": 5057.9, "valid_targets_min": 943 }, { "epoch": 4.37962962962963, "grad_norm": 0.4267222102718032, "learning_rate": 1.4796864412226812e-05, "loss": 0.2313, "loss_nan_ranks": 0, "loss_rank_avg": 0.1837308555841446, "step": 2365, "valid_targets_mean": 5174.9, "valid_targets_min": 2396 }, { "epoch": 4.388888888888889, "grad_norm": 0.5625553097603778, "learning_rate": 1.4707754517078021e-05, "loss": 0.2273, "loss_nan_ranks": 0, "loss_rank_avg": 0.2614815831184387, "step": 2370, "valid_targets_mean": 3773.8, "valid_targets_min": 724 }, { "epoch": 4.398148148148148, "grad_norm": 0.5396666245271566, "learning_rate": 1.4618757448228869e-05, "loss": 0.2556, "loss_nan_ranks": 0, "loss_rank_avg": 0.23616960644721985, "step": 2375, "valid_targets_mean": 3829.4, "valid_targets_min": 2237 }, { "epoch": 4.407407407407407, "grad_norm": 0.6629662126529009, "learning_rate": 1.4529875103023316e-05, "loss": 0.2489, "loss_nan_ranks": 0, "loss_rank_avg": 0.2807127833366394, "step": 2380, "valid_targets_mean": 4226.0, "valid_targets_min": 2051 }, { "epoch": 4.416666666666667, "grad_norm": 0.5603617627291267, "learning_rate": 1.4441109376359498e-05, "loss": 0.2632, "loss_nan_ranks": 0, "loss_rank_avg": 0.25401419401168823, "step": 2385, "valid_targets_mean": 3388.5, "valid_targets_min": 1929 }, { "epoch": 4.425925925925926, "grad_norm": 0.512775023115171, "learning_rate": 1.435246216064933e-05, "loss": 0.228, "loss_nan_ranks": 0, "loss_rank_avg": 0.2706540822982788, "step": 2390, "valid_targets_mean": 4530.2, "valid_targets_min": 1979 }, { "epoch": 4.435185185185185, "grad_norm": 0.5204570025829351, "learning_rate": 1.4263935345778202e-05, "loss": 0.2326, "loss_nan_ranks": 0, "loss_rank_avg": 0.24824261665344238, "step": 2395, "valid_targets_mean": 4182.4, "valid_targets_min": 1899 }, { "epoch": 4.444444444444445, "grad_norm": 0.5254410292063911, "learning_rate": 1.417553081906462e-05, "loss": 0.2681, "loss_nan_ranks": 0, "loss_rank_avg": 0.26439177989959717, "step": 2400, "valid_targets_mean": 4461.6, "valid_targets_min": 1416 }, { "epoch": 4.453703703703704, "grad_norm": 0.5956700654564747, "learning_rate": 1.408725046522005e-05, "loss": 0.2271, "loss_nan_ranks": 0, "loss_rank_avg": 0.2445288896560669, "step": 2405, "valid_targets_mean": 3570.7, "valid_targets_min": 708 }, { "epoch": 4.462962962962963, "grad_norm": 0.5443093397325657, "learning_rate": 1.399909616630865e-05, "loss": 0.2533, "loss_nan_ranks": 0, "loss_rank_avg": 0.19300591945648193, "step": 2410, "valid_targets_mean": 4181.1, "valid_targets_min": 1700 }, { "epoch": 4.472222222222222, "grad_norm": 0.5782021783769784, "learning_rate": 1.3911069801707232e-05, "loss": 0.231, "loss_nan_ranks": 0, "loss_rank_avg": 0.2529100179672241, "step": 2415, "valid_targets_mean": 3981.5, "valid_targets_min": 2339 }, { "epoch": 4.481481481481482, "grad_norm": 0.4902132127041017, "learning_rate": 1.3823173248065125e-05, "loss": 0.2318, "loss_nan_ranks": 0, "loss_rank_avg": 0.21891093254089355, "step": 2420, "valid_targets_mean": 4572.9, "valid_targets_min": 917 }, { "epoch": 4.4907407407407405, "grad_norm": 0.5337074804399202, "learning_rate": 1.37354083792642e-05, "loss": 0.2383, "loss_nan_ranks": 0, "loss_rank_avg": 0.22410012781620026, "step": 2425, "valid_targets_mean": 3774.8, "valid_targets_min": 1751 }, { "epoch": 4.5, "grad_norm": 0.5148725556353548, "learning_rate": 1.3647777066378938e-05, "loss": 0.2401, "loss_nan_ranks": 0, "loss_rank_avg": 0.21354614198207855, "step": 2430, "valid_targets_mean": 4094.2, "valid_targets_min": 1633 }, { "epoch": 4.5092592592592595, "grad_norm": 0.5306690061606288, "learning_rate": 1.3560281177636484e-05, "loss": 0.2217, "loss_nan_ranks": 0, "loss_rank_avg": 0.22166001796722412, "step": 2435, "valid_targets_mean": 4369.6, "valid_targets_min": 1700 }, { "epoch": 4.518518518518518, "grad_norm": 0.54955470040774, "learning_rate": 1.347292257837689e-05, "loss": 0.2421, "loss_nan_ranks": 0, "loss_rank_avg": 0.19483765959739685, "step": 2440, "valid_targets_mean": 3584.6, "valid_targets_min": 1698 }, { "epoch": 4.527777777777778, "grad_norm": 0.5166945393629171, "learning_rate": 1.3385703131013279e-05, "loss": 0.2392, "loss_nan_ranks": 0, "loss_rank_avg": 0.26188984513282776, "step": 2445, "valid_targets_mean": 5255.4, "valid_targets_min": 2425 }, { "epoch": 4.537037037037037, "grad_norm": 0.5093872869070459, "learning_rate": 1.3298624694992175e-05, "loss": 0.2249, "loss_nan_ranks": 0, "loss_rank_avg": 0.23913279175758362, "step": 2450, "valid_targets_mean": 4489.4, "valid_targets_min": 1802 }, { "epoch": 4.546296296296296, "grad_norm": 0.47603655861521876, "learning_rate": 1.3211689126753879e-05, "loss": 0.2237, "loss_nan_ranks": 0, "loss_rank_avg": 0.17301449179649353, "step": 2455, "valid_targets_mean": 4347.6, "valid_targets_min": 1813 }, { "epoch": 4.555555555555555, "grad_norm": 0.6346661258154088, "learning_rate": 1.3124898279692837e-05, "loss": 0.2462, "loss_nan_ranks": 0, "loss_rank_avg": 0.2843843400478363, "step": 2460, "valid_targets_mean": 3273.6, "valid_targets_min": 849 }, { "epoch": 4.564814814814815, "grad_norm": 0.5325625066728102, "learning_rate": 1.3038254004118192e-05, "loss": 0.2667, "loss_nan_ranks": 0, "loss_rank_avg": 0.295737624168396, "step": 2465, "valid_targets_mean": 4695.4, "valid_targets_min": 781 }, { "epoch": 4.574074074074074, "grad_norm": 0.5501023241996152, "learning_rate": 1.2951758147214272e-05, "loss": 0.2408, "loss_nan_ranks": 0, "loss_rank_avg": 0.2129950225353241, "step": 2470, "valid_targets_mean": 3447.8, "valid_targets_min": 1908 }, { "epoch": 4.583333333333333, "grad_norm": 0.5306118753400628, "learning_rate": 1.2865412553001274e-05, "loss": 0.2583, "loss_nan_ranks": 0, "loss_rank_avg": 0.3140091896057129, "step": 2475, "valid_targets_mean": 5196.5, "valid_targets_min": 451 }, { "epoch": 4.592592592592593, "grad_norm": 0.582247483161391, "learning_rate": 1.2779219062295892e-05, "loss": 0.2194, "loss_nan_ranks": 0, "loss_rank_avg": 0.22162239253520966, "step": 2480, "valid_targets_mean": 3159.7, "valid_targets_min": 1906 }, { "epoch": 4.601851851851852, "grad_norm": 0.5672219680504043, "learning_rate": 1.26931795126721e-05, "loss": 0.2491, "loss_nan_ranks": 0, "loss_rank_avg": 0.25854402780532837, "step": 2485, "valid_targets_mean": 3898.6, "valid_targets_min": 570 }, { "epoch": 4.611111111111111, "grad_norm": 0.522554196457828, "learning_rate": 1.2607295738422e-05, "loss": 0.2534, "loss_nan_ranks": 0, "loss_rank_avg": 0.28402256965637207, "step": 2490, "valid_targets_mean": 4644.9, "valid_targets_min": 559 }, { "epoch": 4.62037037037037, "grad_norm": 0.5144881360654426, "learning_rate": 1.2521569570516666e-05, "loss": 0.269, "loss_nan_ranks": 0, "loss_rank_avg": 0.24675382673740387, "step": 2495, "valid_targets_mean": 4304.7, "valid_targets_min": 643 }, { "epoch": 4.62962962962963, "grad_norm": 0.5285312184685258, "learning_rate": 1.2436002836567154e-05, "loss": 0.2309, "loss_nan_ranks": 0, "loss_rank_avg": 0.19586646556854248, "step": 2500, "valid_targets_mean": 3602.7, "valid_targets_min": 322 }, { "epoch": 4.638888888888889, "grad_norm": 0.5549134026619124, "learning_rate": 1.2350597360785503e-05, "loss": 0.2455, "loss_nan_ranks": 0, "loss_rank_avg": 0.18762420117855072, "step": 2505, "valid_targets_mean": 3592.7, "valid_targets_min": 1658 }, { "epoch": 4.648148148148148, "grad_norm": 0.5467801855327935, "learning_rate": 1.2265354963945861e-05, "loss": 0.2231, "loss_nan_ranks": 0, "loss_rank_avg": 0.21311713755130768, "step": 2510, "valid_targets_mean": 4042.2, "valid_targets_min": 1311 }, { "epoch": 4.657407407407407, "grad_norm": 0.5982127460318674, "learning_rate": 1.2180277463345697e-05, "loss": 0.2601, "loss_nan_ranks": 0, "loss_rank_avg": 0.2819077968597412, "step": 2515, "valid_targets_mean": 3927.7, "valid_targets_min": 667 }, { "epoch": 4.666666666666667, "grad_norm": 0.5694492693011193, "learning_rate": 1.209536667276699e-05, "loss": 0.2561, "loss_nan_ranks": 0, "loss_rank_avg": 0.22266341745853424, "step": 2520, "valid_targets_mean": 3360.6, "valid_targets_min": 635 }, { "epoch": 4.675925925925926, "grad_norm": 0.5526419588735482, "learning_rate": 1.2010624402437622e-05, "loss": 0.2343, "loss_nan_ranks": 0, "loss_rank_avg": 0.23695549368858337, "step": 2525, "valid_targets_mean": 3984.8, "valid_targets_min": 731 }, { "epoch": 4.685185185185185, "grad_norm": 0.5408354781576525, "learning_rate": 1.1926052458992756e-05, "loss": 0.2414, "loss_nan_ranks": 0, "loss_rank_avg": 0.24009719491004944, "step": 2530, "valid_targets_mean": 4062.1, "valid_targets_min": 770 }, { "epoch": 4.694444444444445, "grad_norm": 0.5900458646547324, "learning_rate": 1.184165264543633e-05, "loss": 0.2605, "loss_nan_ranks": 0, "loss_rank_avg": 0.3150600492954254, "step": 2535, "valid_targets_mean": 3895.1, "valid_targets_min": 247 }, { "epoch": 4.703703703703704, "grad_norm": 0.5696579830073532, "learning_rate": 1.1757426761102608e-05, "loss": 0.2704, "loss_nan_ranks": 0, "loss_rank_avg": 0.223690927028656, "step": 2540, "valid_targets_mean": 3577.1, "valid_targets_min": 1758 }, { "epoch": 4.712962962962963, "grad_norm": 0.6778694424859932, "learning_rate": 1.167337660161783e-05, "loss": 0.235, "loss_nan_ranks": 0, "loss_rank_avg": 0.23883025348186493, "step": 2545, "valid_targets_mean": 3305.8, "valid_targets_min": 321 }, { "epoch": 4.722222222222222, "grad_norm": 0.4941234856749398, "learning_rate": 1.1589503958861936e-05, "loss": 0.2269, "loss_nan_ranks": 0, "loss_rank_avg": 0.17126263678073883, "step": 2550, "valid_targets_mean": 4076.2, "valid_targets_min": 2068 }, { "epoch": 4.731481481481482, "grad_norm": 0.49689750365243684, "learning_rate": 1.1505810620930338e-05, "loss": 0.2495, "loss_nan_ranks": 0, "loss_rank_avg": 0.27281057834625244, "step": 2555, "valid_targets_mean": 4938.1, "valid_targets_min": 1676 }, { "epoch": 4.7407407407407405, "grad_norm": 0.49366801115519954, "learning_rate": 1.1422298372095841e-05, "loss": 0.2173, "loss_nan_ranks": 0, "loss_rank_avg": 0.2007950097322464, "step": 2560, "valid_targets_mean": 4215.3, "valid_targets_min": 1973 }, { "epoch": 4.75, "grad_norm": 0.5522379435269955, "learning_rate": 1.133896899277056e-05, "loss": 0.2632, "loss_nan_ranks": 0, "loss_rank_avg": 0.245499849319458, "step": 2565, "valid_targets_mean": 4917.2, "valid_targets_min": 1888 }, { "epoch": 4.7592592592592595, "grad_norm": 0.43752196002323207, "learning_rate": 1.1255824259467985e-05, "loss": 0.2583, "loss_nan_ranks": 0, "loss_rank_avg": 0.26849859952926636, "step": 2570, "valid_targets_mean": 5791.7, "valid_targets_min": 2194 }, { "epoch": 4.768518518518518, "grad_norm": 0.4978365901424028, "learning_rate": 1.1172865944765122e-05, "loss": 0.2192, "loss_nan_ranks": 0, "loss_rank_avg": 0.19043470919132233, "step": 2575, "valid_targets_mean": 3895.8, "valid_targets_min": 1835 }, { "epoch": 4.777777777777778, "grad_norm": 0.5734466742419532, "learning_rate": 1.109009581726466e-05, "loss": 0.232, "loss_nan_ranks": 0, "loss_rank_avg": 0.25819826126098633, "step": 2580, "valid_targets_mean": 3905.1, "valid_targets_min": 818 }, { "epoch": 4.787037037037037, "grad_norm": 0.4975385383711413, "learning_rate": 1.1007515641557329e-05, "loss": 0.2641, "loss_nan_ranks": 0, "loss_rank_avg": 0.23181453347206116, "step": 2585, "valid_targets_mean": 4291.7, "valid_targets_min": 804 }, { "epoch": 4.796296296296296, "grad_norm": 0.5198599453591137, "learning_rate": 1.092512717818421e-05, "loss": 0.2481, "loss_nan_ranks": 0, "loss_rank_avg": 0.2564372718334198, "step": 2590, "valid_targets_mean": 4180.1, "valid_targets_min": 1857 }, { "epoch": 4.805555555555555, "grad_norm": 0.5359214335268695, "learning_rate": 1.0842932183599238e-05, "loss": 0.2345, "loss_nan_ranks": 0, "loss_rank_avg": 0.20336735248565674, "step": 2595, "valid_targets_mean": 3678.8, "valid_targets_min": 580 }, { "epoch": 4.814814814814815, "grad_norm": 0.5160372923320123, "learning_rate": 1.076093241013178e-05, "loss": 0.2407, "loss_nan_ranks": 0, "loss_rank_avg": 0.2072121798992157, "step": 2600, "valid_targets_mean": 4072.1, "valid_targets_min": 1736 }, { "epoch": 4.824074074074074, "grad_norm": 0.6009912786476855, "learning_rate": 1.067912960594923e-05, "loss": 0.2299, "loss_nan_ranks": 0, "loss_rank_avg": 0.27277135848999023, "step": 2605, "valid_targets_mean": 3798.9, "valid_targets_min": 496 }, { "epoch": 4.833333333333333, "grad_norm": 0.5346358334055163, "learning_rate": 1.0597525515019749e-05, "loss": 0.223, "loss_nan_ranks": 0, "loss_rank_avg": 0.20401796698570251, "step": 2610, "valid_targets_mean": 3752.1, "valid_targets_min": 812 }, { "epoch": 4.842592592592593, "grad_norm": 0.49231495307972206, "learning_rate": 1.0516121877075129e-05, "loss": 0.2623, "loss_nan_ranks": 0, "loss_rank_avg": 0.22834226489067078, "step": 2615, "valid_targets_mean": 4466.0, "valid_targets_min": 2355 }, { "epoch": 4.851851851851852, "grad_norm": 0.5848272712574982, "learning_rate": 1.0434920427573643e-05, "loss": 0.247, "loss_nan_ranks": 0, "loss_rank_avg": 0.2603488862514496, "step": 2620, "valid_targets_mean": 3781.5, "valid_targets_min": 599 }, { "epoch": 4.861111111111111, "grad_norm": 0.5265795938004495, "learning_rate": 1.035392289766307e-05, "loss": 0.246, "loss_nan_ranks": 0, "loss_rank_avg": 0.23392300307750702, "step": 2625, "valid_targets_mean": 4054.4, "valid_targets_min": 787 }, { "epoch": 4.87037037037037, "grad_norm": 0.5187023064482922, "learning_rate": 1.0273131014143822e-05, "loss": 0.2658, "loss_nan_ranks": 0, "loss_rank_avg": 0.2979033291339874, "step": 2630, "valid_targets_mean": 5284.2, "valid_targets_min": 960 }, { "epoch": 4.87962962962963, "grad_norm": 0.5723456672993618, "learning_rate": 1.0192546499432066e-05, "loss": 0.2304, "loss_nan_ranks": 0, "loss_rank_avg": 0.2871008515357971, "step": 2635, "valid_targets_mean": 4034.7, "valid_targets_min": 1757 }, { "epoch": 4.888888888888889, "grad_norm": 0.4518981511283737, "learning_rate": 1.0112171071523064e-05, "loss": 0.2389, "loss_nan_ranks": 0, "loss_rank_avg": 0.24816018342971802, "step": 2640, "valid_targets_mean": 5038.8, "valid_targets_min": 1905 }, { "epoch": 4.898148148148148, "grad_norm": 0.588262346243775, "learning_rate": 1.0032006443954506e-05, "loss": 0.2179, "loss_nan_ranks": 0, "loss_rank_avg": 0.263683021068573, "step": 2645, "valid_targets_mean": 3463.8, "valid_targets_min": 496 }, { "epoch": 4.907407407407407, "grad_norm": 0.5240492310910843, "learning_rate": 9.952054325769984e-06, "loss": 0.2485, "loss_nan_ranks": 0, "loss_rank_avg": 0.2150118052959442, "step": 2650, "valid_targets_mean": 3824.9, "valid_targets_min": 1872 }, { "epoch": 4.916666666666667, "grad_norm": 0.510346682757862, "learning_rate": 9.872316421482592e-06, "loss": 0.2318, "loss_nan_ranks": 0, "loss_rank_avg": 0.21479976177215576, "step": 2655, "valid_targets_mean": 4396.7, "valid_targets_min": 422 }, { "epoch": 4.925925925925926, "grad_norm": 0.5032967964842083, "learning_rate": 9.792794431038542e-06, "loss": 0.2248, "loss_nan_ranks": 0, "loss_rank_avg": 0.1802617311477661, "step": 2660, "valid_targets_mean": 3972.9, "valid_targets_min": 2057 }, { "epoch": 4.935185185185185, "grad_norm": 0.5308551440927738, "learning_rate": 9.713490049780931e-06, "loss": 0.2459, "loss_nan_ranks": 0, "loss_rank_avg": 0.27088022232055664, "step": 2665, "valid_targets_mean": 4305.6, "valid_targets_min": 671 }, { "epoch": 4.944444444444445, "grad_norm": 0.476874892456418, "learning_rate": 9.634404968413644e-06, "loss": 0.2475, "loss_nan_ranks": 0, "loss_rank_avg": 0.25819599628448486, "step": 2670, "valid_targets_mean": 5321.4, "valid_targets_min": 1851 }, { "epoch": 4.953703703703704, "grad_norm": 0.5120117073751925, "learning_rate": 9.555540872965235e-06, "loss": 0.2501, "loss_nan_ranks": 0, "loss_rank_avg": 0.23541975021362305, "step": 2675, "valid_targets_mean": 4679.6, "valid_targets_min": 2191 }, { "epoch": 4.962962962962963, "grad_norm": 0.5036966865354542, "learning_rate": 9.47689944475305e-06, "loss": 0.2373, "loss_nan_ranks": 0, "loss_rank_avg": 0.2063012421131134, "step": 2680, "valid_targets_mean": 3808.1, "valid_targets_min": 1218 }, { "epoch": 4.972222222222222, "grad_norm": 0.5539021581298206, "learning_rate": 9.398482360347327e-06, "loss": 0.2395, "loss_nan_ranks": 0, "loss_rank_avg": 0.21875013411045074, "step": 2685, "valid_targets_mean": 3876.2, "valid_targets_min": 594 }, { "epoch": 4.981481481481482, "grad_norm": 0.4696704645863026, "learning_rate": 9.32029129153551e-06, "loss": 0.2253, "loss_nan_ranks": 0, "loss_rank_avg": 0.2458312213420868, "step": 2690, "valid_targets_mean": 5203.4, "valid_targets_min": 636 }, { "epoch": 4.9907407407407405, "grad_norm": 0.5363470268259611, "learning_rate": 9.242327905286552e-06, "loss": 0.248, "loss_nan_ranks": 0, "loss_rank_avg": 0.2823778986930847, "step": 2695, "valid_targets_mean": 4615.7, "valid_targets_min": 490 }, { "epoch": 5.0, "grad_norm": 0.5485569771302176, "learning_rate": 9.164593863715405e-06, "loss": 0.2394, "loss_nan_ranks": 0, "loss_rank_avg": 0.26309841871261597, "step": 2700, "valid_targets_mean": 4314.6, "valid_targets_min": 1566 }, { "epoch": 5.0092592592592595, "grad_norm": 0.5750093349536356, "learning_rate": 9.087090824047604e-06, "loss": 0.2423, "loss_nan_ranks": 0, "loss_rank_avg": 0.20704308152198792, "step": 2705, "valid_targets_mean": 3146.9, "valid_targets_min": 804 }, { "epoch": 5.018518518518518, "grad_norm": 0.5711593696775072, "learning_rate": 9.009820438583881e-06, "loss": 0.2484, "loss_nan_ranks": 0, "loss_rank_avg": 0.2449912130832672, "step": 2710, "valid_targets_mean": 3544.4, "valid_targets_min": 512 }, { "epoch": 5.027777777777778, "grad_norm": 0.5056840688609416, "learning_rate": 8.932784354665002e-06, "loss": 0.2308, "loss_nan_ranks": 0, "loss_rank_avg": 0.21420851349830627, "step": 2715, "valid_targets_mean": 4169.8, "valid_targets_min": 715 }, { "epoch": 5.037037037037037, "grad_norm": 0.5259628580539836, "learning_rate": 8.855984214636606e-06, "loss": 0.218, "loss_nan_ranks": 0, "loss_rank_avg": 0.2302454710006714, "step": 2720, "valid_targets_mean": 4181.8, "valid_targets_min": 1921 }, { "epoch": 5.046296296296297, "grad_norm": 0.5183310765864052, "learning_rate": 8.779421655814189e-06, "loss": 0.2303, "loss_nan_ranks": 0, "loss_rank_avg": 0.22257420420646667, "step": 2725, "valid_targets_mean": 4248.5, "valid_targets_min": 451 }, { "epoch": 5.055555555555555, "grad_norm": 0.5672433220359422, "learning_rate": 8.703098310448244e-06, "loss": 0.2154, "loss_nan_ranks": 0, "loss_rank_avg": 0.21661844849586487, "step": 2730, "valid_targets_mean": 3690.8, "valid_targets_min": 313 }, { "epoch": 5.064814814814815, "grad_norm": 0.6099943995315652, "learning_rate": 8.627015805689394e-06, "loss": 0.2157, "loss_nan_ranks": 0, "loss_rank_avg": 0.220462366938591, "step": 2735, "valid_targets_mean": 3285.6, "valid_targets_min": 1461 }, { "epoch": 5.074074074074074, "grad_norm": 0.5323546697406513, "learning_rate": 8.551175763553778e-06, "loss": 0.2105, "loss_nan_ranks": 0, "loss_rank_avg": 0.20129993557929993, "step": 2740, "valid_targets_mean": 3924.2, "valid_targets_min": 1717 }, { "epoch": 5.083333333333333, "grad_norm": 0.6107489166400913, "learning_rate": 8.475579800888395e-06, "loss": 0.2503, "loss_nan_ranks": 0, "loss_rank_avg": 0.22628153860569, "step": 2745, "valid_targets_mean": 3306.1, "valid_targets_min": 1109 }, { "epoch": 5.092592592592593, "grad_norm": 0.5714928193929322, "learning_rate": 8.400229529336704e-06, "loss": 0.2521, "loss_nan_ranks": 0, "loss_rank_avg": 0.27930349111557007, "step": 2750, "valid_targets_mean": 3991.1, "valid_targets_min": 2391 }, { "epoch": 5.101851851851852, "grad_norm": 0.5659909185072247, "learning_rate": 8.325126555304208e-06, "loss": 0.2328, "loss_nan_ranks": 0, "loss_rank_avg": 0.2336483746767044, "step": 2755, "valid_targets_mean": 3597.2, "valid_targets_min": 602 }, { "epoch": 5.111111111111111, "grad_norm": 0.5991030111391655, "learning_rate": 8.250272479924234e-06, "loss": 0.2373, "loss_nan_ranks": 0, "loss_rank_avg": 0.207331120967865, "step": 2760, "valid_targets_mean": 3291.4, "valid_targets_min": 1701 }, { "epoch": 5.12037037037037, "grad_norm": 0.5934525134623533, "learning_rate": 8.17566889902382e-06, "loss": 0.2402, "loss_nan_ranks": 0, "loss_rank_avg": 0.24269866943359375, "step": 2765, "valid_targets_mean": 3444.9, "valid_targets_min": 554 }, { "epoch": 5.12962962962963, "grad_norm": 0.5658329820173873, "learning_rate": 8.101317403089635e-06, "loss": 0.2325, "loss_nan_ranks": 0, "loss_rank_avg": 0.23202694952487946, "step": 2770, "valid_targets_mean": 3681.4, "valid_targets_min": 2306 }, { "epoch": 5.138888888888889, "grad_norm": 0.6489256706628873, "learning_rate": 8.027219577234133e-06, "loss": 0.2312, "loss_nan_ranks": 0, "loss_rank_avg": 0.2144010066986084, "step": 2775, "valid_targets_mean": 2766.6, "valid_targets_min": 594 }, { "epoch": 5.148148148148148, "grad_norm": 0.5211992244558095, "learning_rate": 7.953377001161714e-06, "loss": 0.2219, "loss_nan_ranks": 0, "loss_rank_avg": 0.21091634035110474, "step": 2780, "valid_targets_mean": 4418.5, "valid_targets_min": 932 }, { "epoch": 5.157407407407407, "grad_norm": 0.5386716433955135, "learning_rate": 7.879791249135059e-06, "loss": 0.2053, "loss_nan_ranks": 0, "loss_rank_avg": 0.2028743326663971, "step": 2785, "valid_targets_mean": 4136.1, "valid_targets_min": 1604 }, { "epoch": 5.166666666666667, "grad_norm": 0.5017972625347342, "learning_rate": 7.806463889941598e-06, "loss": 0.2354, "loss_nan_ranks": 0, "loss_rank_avg": 0.2554541230201721, "step": 2790, "valid_targets_mean": 4644.6, "valid_targets_min": 2649 }, { "epoch": 5.175925925925926, "grad_norm": 0.6494575106635824, "learning_rate": 7.73339648686001e-06, "loss": 0.2293, "loss_nan_ranks": 0, "loss_rank_avg": 0.24619343876838684, "step": 2795, "valid_targets_mean": 3087.1, "valid_targets_min": 894 }, { "epoch": 5.185185185185185, "grad_norm": 0.5370049338879487, "learning_rate": 7.66059059762695e-06, "loss": 0.2298, "loss_nan_ranks": 0, "loss_rank_avg": 0.23969629406929016, "step": 2800, "valid_targets_mean": 4655.8, "valid_targets_min": 514 }, { "epoch": 5.194444444444445, "grad_norm": 0.48493560753692005, "learning_rate": 7.588047774403795e-06, "loss": 0.2371, "loss_nan_ranks": 0, "loss_rank_avg": 0.212522953748703, "step": 2805, "valid_targets_mean": 4826.5, "valid_targets_min": 1850 }, { "epoch": 5.203703703703703, "grad_norm": 0.5702442363690217, "learning_rate": 7.5157695637435864e-06, "loss": 0.2382, "loss_nan_ranks": 0, "loss_rank_avg": 0.21703308820724487, "step": 2810, "valid_targets_mean": 3602.2, "valid_targets_min": 742 }, { "epoch": 5.212962962962963, "grad_norm": 0.4950198812734338, "learning_rate": 7.443757506558033e-06, "loss": 0.247, "loss_nan_ranks": 0, "loss_rank_avg": 0.2432202249765396, "step": 2815, "valid_targets_mean": 4723.4, "valid_targets_min": 523 }, { "epoch": 5.222222222222222, "grad_norm": 0.5719638372764714, "learning_rate": 7.3720131380846685e-06, "loss": 0.2417, "loss_nan_ranks": 0, "loss_rank_avg": 0.2235596477985382, "step": 2820, "valid_targets_mean": 3752.8, "valid_targets_min": 577 }, { "epoch": 5.231481481481482, "grad_norm": 0.5267450371667391, "learning_rate": 7.300537987854146e-06, "loss": 0.2467, "loss_nan_ranks": 0, "loss_rank_avg": 0.2897190749645233, "step": 2825, "valid_targets_mean": 4997.0, "valid_targets_min": 2002 }, { "epoch": 5.2407407407407405, "grad_norm": 0.6948286687288108, "learning_rate": 7.22933357965758e-06, "loss": 0.2573, "loss_nan_ranks": 0, "loss_rank_avg": 0.3058856725692749, "step": 2830, "valid_targets_mean": 3943.1, "valid_targets_min": 1473 }, { "epoch": 5.25, "grad_norm": 0.586857466777657, "learning_rate": 7.158401431514117e-06, "loss": 0.2399, "loss_nan_ranks": 0, "loss_rank_avg": 0.25646457076072693, "step": 2835, "valid_targets_mean": 3900.5, "valid_targets_min": 1653 }, { "epoch": 5.2592592592592595, "grad_norm": 0.5671217282214601, "learning_rate": 7.0877430556385205e-06, "loss": 0.2341, "loss_nan_ranks": 0, "loss_rank_avg": 0.25015023350715637, "step": 2840, "valid_targets_mean": 4106.6, "valid_targets_min": 663 }, { "epoch": 5.268518518518518, "grad_norm": 0.5512194807040625, "learning_rate": 7.0173599584089625e-06, "loss": 0.2151, "loss_nan_ranks": 0, "loss_rank_avg": 0.19779840111732483, "step": 2845, "valid_targets_mean": 3710.4, "valid_targets_min": 1984 }, { "epoch": 5.277777777777778, "grad_norm": 0.6190422799034527, "learning_rate": 6.947253640334914e-06, "loss": 0.2176, "loss_nan_ranks": 0, "loss_rank_avg": 0.2084774523973465, "step": 2850, "valid_targets_mean": 3782.9, "valid_targets_min": 535 }, { "epoch": 5.287037037037037, "grad_norm": 0.5143657356660781, "learning_rate": 6.87742559602512e-06, "loss": 0.2469, "loss_nan_ranks": 0, "loss_rank_avg": 0.20541247725486755, "step": 2855, "valid_targets_mean": 4369.5, "valid_targets_min": 632 }, { "epoch": 5.296296296296296, "grad_norm": 0.6337293582450882, "learning_rate": 6.807877314155788e-06, "loss": 0.2341, "loss_nan_ranks": 0, "loss_rank_avg": 0.23032309114933014, "step": 2860, "valid_targets_mean": 3650.8, "valid_targets_min": 2050 }, { "epoch": 5.305555555555555, "grad_norm": 0.5832848422160128, "learning_rate": 6.738610277438791e-06, "loss": 0.2299, "loss_nan_ranks": 0, "loss_rank_avg": 0.2612956762313843, "step": 2865, "valid_targets_mean": 3992.4, "valid_targets_min": 1389 }, { "epoch": 5.314814814814815, "grad_norm": 0.5761403577609914, "learning_rate": 6.669625962590114e-06, "loss": 0.2308, "loss_nan_ranks": 0, "loss_rank_avg": 0.23144936561584473, "step": 2870, "valid_targets_mean": 3683.9, "valid_targets_min": 1769 }, { "epoch": 5.324074074074074, "grad_norm": 0.5484533193789417, "learning_rate": 6.600925840298331e-06, "loss": 0.214, "loss_nan_ranks": 0, "loss_rank_avg": 0.1928141713142395, "step": 2875, "valid_targets_mean": 3428.2, "valid_targets_min": 1879 }, { "epoch": 5.333333333333333, "grad_norm": 0.5104494079712223, "learning_rate": 6.532511375193258e-06, "loss": 0.228, "loss_nan_ranks": 0, "loss_rank_avg": 0.2096475064754486, "step": 2880, "valid_targets_mean": 4629.7, "valid_targets_min": 2168 }, { "epoch": 5.342592592592593, "grad_norm": 0.5967975385442751, "learning_rate": 6.464384025814763e-06, "loss": 0.241, "loss_nan_ranks": 0, "loss_rank_avg": 0.2632576823234558, "step": 2885, "valid_targets_mean": 3904.9, "valid_targets_min": 774 }, { "epoch": 5.351851851851852, "grad_norm": 0.5546172949302648, "learning_rate": 6.396545244581609e-06, "loss": 0.2363, "loss_nan_ranks": 0, "loss_rank_avg": 0.23911094665527344, "step": 2890, "valid_targets_mean": 4346.3, "valid_targets_min": 2347 }, { "epoch": 5.361111111111111, "grad_norm": 0.4623970390000608, "learning_rate": 6.3289964777605624e-06, "loss": 0.2334, "loss_nan_ranks": 0, "loss_rank_avg": 0.22868403792381287, "step": 2895, "valid_targets_mean": 5747.1, "valid_targets_min": 1679 }, { "epoch": 5.37037037037037, "grad_norm": 0.5472864241371322, "learning_rate": 6.261739165435492e-06, "loss": 0.2259, "loss_nan_ranks": 0, "loss_rank_avg": 0.24670270085334778, "step": 2900, "valid_targets_mean": 4055.2, "valid_targets_min": 1881 }, { "epoch": 5.37962962962963, "grad_norm": 0.5565568764252096, "learning_rate": 6.1947747414767035e-06, "loss": 0.2494, "loss_nan_ranks": 0, "loss_rank_avg": 0.27123579382896423, "step": 2905, "valid_targets_mean": 4548.1, "valid_targets_min": 762 }, { "epoch": 5.388888888888889, "grad_norm": 0.6213207010595587, "learning_rate": 6.128104633510381e-06, "loss": 0.2197, "loss_nan_ranks": 0, "loss_rank_avg": 0.2588229775428772, "step": 2910, "valid_targets_mean": 3481.0, "valid_targets_min": 537 }, { "epoch": 5.398148148148148, "grad_norm": 0.5039995681794307, "learning_rate": 6.0617302628881104e-06, "loss": 0.2219, "loss_nan_ranks": 0, "loss_rank_avg": 0.23631207644939423, "step": 2915, "valid_targets_mean": 5029.2, "valid_targets_min": 599 }, { "epoch": 5.407407407407407, "grad_norm": 0.5324459779762222, "learning_rate": 5.9956530446566305e-06, "loss": 0.2733, "loss_nan_ranks": 0, "loss_rank_avg": 0.27384650707244873, "step": 2920, "valid_targets_mean": 4611.2, "valid_targets_min": 971 }, { "epoch": 5.416666666666667, "grad_norm": 0.5508613368105185, "learning_rate": 5.929874387527605e-06, "loss": 0.2273, "loss_nan_ranks": 0, "loss_rank_avg": 0.2502458393573761, "step": 2925, "valid_targets_mean": 4067.2, "valid_targets_min": 1611 }, { "epoch": 5.425925925925926, "grad_norm": 0.6630459663999032, "learning_rate": 5.864395693847651e-06, "loss": 0.2336, "loss_nan_ranks": 0, "loss_rank_avg": 0.2312171310186386, "step": 2930, "valid_targets_mean": 3028.5, "valid_targets_min": 513 }, { "epoch": 5.435185185185185, "grad_norm": 0.6102195329637778, "learning_rate": 5.799218359568395e-06, "loss": 0.2404, "loss_nan_ranks": 0, "loss_rank_avg": 0.23122429847717285, "step": 2935, "valid_targets_mean": 3275.2, "valid_targets_min": 949 }, { "epoch": 5.444444444444445, "grad_norm": 0.5924002103430813, "learning_rate": 5.734343774216726e-06, "loss": 0.2403, "loss_nan_ranks": 0, "loss_rank_avg": 0.2418282926082611, "step": 2940, "valid_targets_mean": 3858.4, "valid_targets_min": 2039 }, { "epoch": 5.453703703703704, "grad_norm": 0.5135171674815325, "learning_rate": 5.669773320865198e-06, "loss": 0.2276, "loss_nan_ranks": 0, "loss_rank_avg": 0.19563168287277222, "step": 2945, "valid_targets_mean": 4136.9, "valid_targets_min": 799 }, { "epoch": 5.462962962962963, "grad_norm": 0.5157474067382247, "learning_rate": 5.605508376102504e-06, "loss": 0.2347, "loss_nan_ranks": 0, "loss_rank_avg": 0.22772985696792603, "step": 2950, "valid_targets_mean": 4380.8, "valid_targets_min": 888 }, { "epoch": 5.472222222222222, "grad_norm": 0.58770575498854, "learning_rate": 5.541550310004142e-06, "loss": 0.2344, "loss_nan_ranks": 0, "loss_rank_avg": 0.2741531729698181, "step": 2955, "valid_targets_mean": 4150.1, "valid_targets_min": 1655 }, { "epoch": 5.481481481481482, "grad_norm": 0.5259117380247347, "learning_rate": 5.4779004861032355e-06, "loss": 0.234, "loss_nan_ranks": 0, "loss_rank_avg": 0.2124425172805786, "step": 2960, "valid_targets_mean": 4234.0, "valid_targets_min": 845 }, { "epoch": 5.4907407407407405, "grad_norm": 0.5572729756403186, "learning_rate": 5.414560261361415e-06, "loss": 0.2323, "loss_nan_ranks": 0, "loss_rank_avg": 0.22375890612602234, "step": 2965, "valid_targets_mean": 3794.6, "valid_targets_min": 1686 }, { "epoch": 5.5, "grad_norm": 0.5445675255604132, "learning_rate": 5.351530986139917e-06, "loss": 0.2388, "loss_nan_ranks": 0, "loss_rank_avg": 0.3014816641807556, "step": 2970, "valid_targets_mean": 5170.2, "valid_targets_min": 1872 }, { "epoch": 5.5092592592592595, "grad_norm": 0.6466881136928674, "learning_rate": 5.288814004170804e-06, "loss": 0.2361, "loss_nan_ranks": 0, "loss_rank_avg": 0.24697977304458618, "step": 2975, "valid_targets_mean": 5287.1, "valid_targets_min": 299 }, { "epoch": 5.518518518518518, "grad_norm": 0.5769556261883075, "learning_rate": 5.226410652528293e-06, "loss": 0.2238, "loss_nan_ranks": 0, "loss_rank_avg": 0.20952610671520233, "step": 2980, "valid_targets_mean": 3477.0, "valid_targets_min": 1758 }, { "epoch": 5.527777777777778, "grad_norm": 0.5220365367076272, "learning_rate": 5.164322261600257e-06, "loss": 0.2405, "loss_nan_ranks": 0, "loss_rank_avg": 0.26157093048095703, "step": 2985, "valid_targets_mean": 4698.8, "valid_targets_min": 672 }, { "epoch": 5.537037037037037, "grad_norm": 0.5834708312508325, "learning_rate": 5.102550155059887e-06, "loss": 0.2394, "loss_nan_ranks": 0, "loss_rank_avg": 0.24419638514518738, "step": 2990, "valid_targets_mean": 3697.9, "valid_targets_min": 838 }, { "epoch": 5.546296296296296, "grad_norm": 0.6297100411829605, "learning_rate": 5.041095649837429e-06, "loss": 0.2464, "loss_nan_ranks": 0, "loss_rank_avg": 0.25412261486053467, "step": 2995, "valid_targets_mean": 3376.1, "valid_targets_min": 496 }, { "epoch": 5.555555555555555, "grad_norm": 0.49356981841182485, "learning_rate": 4.97996005609215e-06, "loss": 0.2391, "loss_nan_ranks": 0, "loss_rank_avg": 0.30119216442108154, "step": 3000, "valid_targets_mean": 5648.1, "valid_targets_min": 821 }, { "epoch": 5.564814814814815, "grad_norm": 0.5039612262029771, "learning_rate": 4.919144677184377e-06, "loss": 0.2398, "loss_nan_ranks": 0, "loss_rank_avg": 0.21342125535011292, "step": 3005, "valid_targets_mean": 4504.8, "valid_targets_min": 1713 }, { "epoch": 5.574074074074074, "grad_norm": 0.7773320110901791, "learning_rate": 4.858650809647718e-06, "loss": 0.2318, "loss_nan_ranks": 0, "loss_rank_avg": 0.2356467843055725, "step": 3010, "valid_targets_mean": 4336.1, "valid_targets_min": 2210 }, { "epoch": 5.583333333333333, "grad_norm": 0.53775868558781, "learning_rate": 4.798479743161443e-06, "loss": 0.2447, "loss_nan_ranks": 0, "loss_rank_avg": 0.29584944248199463, "step": 3015, "valid_targets_mean": 5161.2, "valid_targets_min": 420 }, { "epoch": 5.592592592592593, "grad_norm": 0.5399699724215403, "learning_rate": 4.73863276052295e-06, "loss": 0.2401, "loss_nan_ranks": 0, "loss_rank_avg": 0.25342124700546265, "step": 3020, "valid_targets_mean": 4572.4, "valid_targets_min": 2055 }, { "epoch": 5.601851851851852, "grad_norm": 0.559109770350913, "learning_rate": 4.679111137620442e-06, "loss": 0.2181, "loss_nan_ranks": 0, "loss_rank_avg": 0.2064143568277359, "step": 3025, "valid_targets_mean": 4368.1, "valid_targets_min": 1771 }, { "epoch": 5.611111111111111, "grad_norm": 0.5797401297885008, "learning_rate": 4.619916143405734e-06, "loss": 0.2441, "loss_nan_ranks": 0, "loss_rank_avg": 0.27342820167541504, "step": 3030, "valid_targets_mean": 4007.9, "valid_targets_min": 641 }, { "epoch": 5.62037037037037, "grad_norm": 0.5973678981169844, "learning_rate": 4.561049039867167e-06, "loss": 0.2214, "loss_nan_ranks": 0, "loss_rank_avg": 0.2151966243982315, "step": 3035, "valid_targets_mean": 3154.9, "valid_targets_min": 1733 }, { "epoch": 5.62962962962963, "grad_norm": 0.6286394623427501, "learning_rate": 4.502511082002748e-06, "loss": 0.2228, "loss_nan_ranks": 0, "loss_rank_avg": 0.21091020107269287, "step": 3040, "valid_targets_mean": 3375.3, "valid_targets_min": 1700 }, { "epoch": 5.638888888888889, "grad_norm": 0.5797903289542194, "learning_rate": 4.44430351779334e-06, "loss": 0.2302, "loss_nan_ranks": 0, "loss_rank_avg": 0.2548823356628418, "step": 3045, "valid_targets_mean": 3822.7, "valid_targets_min": 827 }, { "epoch": 5.648148148148148, "grad_norm": 0.5395936599039631, "learning_rate": 4.386427588176121e-06, "loss": 0.2306, "loss_nan_ranks": 0, "loss_rank_avg": 0.24894863367080688, "step": 3050, "valid_targets_mean": 4198.8, "valid_targets_min": 1272 }, { "epoch": 5.657407407407407, "grad_norm": 0.5426930111027483, "learning_rate": 4.328884527018067e-06, "loss": 0.2312, "loss_nan_ranks": 0, "loss_rank_avg": 0.23337554931640625, "step": 3055, "valid_targets_mean": 4351.1, "valid_targets_min": 2103 }, { "epoch": 5.666666666666667, "grad_norm": 0.5604242942596068, "learning_rate": 4.271675561089676e-06, "loss": 0.224, "loss_nan_ranks": 0, "loss_rank_avg": 0.2611788511276245, "step": 3060, "valid_targets_mean": 3893.8, "valid_targets_min": 708 }, { "epoch": 5.675925925925926, "grad_norm": 0.5140701008443176, "learning_rate": 4.214801910038831e-06, "loss": 0.2221, "loss_nan_ranks": 0, "loss_rank_avg": 0.2084433138370514, "step": 3065, "valid_targets_mean": 4512.9, "valid_targets_min": 2627 }, { "epoch": 5.685185185185185, "grad_norm": 0.6183694282041834, "learning_rate": 4.1582647863647565e-06, "loss": 0.2526, "loss_nan_ranks": 0, "loss_rank_avg": 0.2690351605415344, "step": 3070, "valid_targets_mean": 3642.6, "valid_targets_min": 1908 }, { "epoch": 5.694444444444445, "grad_norm": 0.5868218733401469, "learning_rate": 4.102065395392208e-06, "loss": 0.2299, "loss_nan_ranks": 0, "loss_rank_avg": 0.24894368648529053, "step": 3075, "valid_targets_mean": 3544.2, "valid_targets_min": 647 }, { "epoch": 5.703703703703704, "grad_norm": 0.5411202176861103, "learning_rate": 4.04620493524575e-06, "loss": 0.2404, "loss_nan_ranks": 0, "loss_rank_avg": 0.2515827715396881, "step": 3080, "valid_targets_mean": 4001.4, "valid_targets_min": 1591 }, { "epoch": 5.712962962962963, "grad_norm": 0.5722421499791819, "learning_rate": 3.990684596824219e-06, "loss": 0.2336, "loss_nan_ranks": 0, "loss_rank_avg": 0.2952159643173218, "step": 3085, "valid_targets_mean": 4322.0, "valid_targets_min": 764 }, { "epoch": 5.722222222222222, "grad_norm": 0.547817757859327, "learning_rate": 3.93550556377535e-06, "loss": 0.21, "loss_nan_ranks": 0, "loss_rank_avg": 0.19061464071273804, "step": 3090, "valid_targets_mean": 3755.5, "valid_targets_min": 472 }, { "epoch": 5.731481481481482, "grad_norm": 0.6502265294741346, "learning_rate": 3.880669012470515e-06, "loss": 0.2148, "loss_nan_ranks": 0, "loss_rank_avg": 0.23853082954883575, "step": 3095, "valid_targets_mean": 3397.8, "valid_targets_min": 880 }, { "epoch": 5.7407407407407405, "grad_norm": 0.5857577530105953, "learning_rate": 3.826176111979673e-06, "loss": 0.2155, "loss_nan_ranks": 0, "loss_rank_avg": 0.22189410030841827, "step": 3100, "valid_targets_mean": 4695.8, "valid_targets_min": 671 }, { "epoch": 5.75, "grad_norm": 0.5846313843567592, "learning_rate": 3.7720280240464145e-06, "loss": 0.2869, "loss_nan_ranks": 0, "loss_rank_avg": 0.23350340127944946, "step": 3105, "valid_targets_mean": 3510.4, "valid_targets_min": 480 }, { "epoch": 5.7592592592592595, "grad_norm": 0.5158563827646333, "learning_rate": 3.7182259030632305e-06, "loss": 0.2355, "loss_nan_ranks": 0, "loss_rank_avg": 0.2879447937011719, "step": 3110, "valid_targets_mean": 4997.9, "valid_targets_min": 637 }, { "epoch": 5.768518518518518, "grad_norm": 0.5925915812377698, "learning_rate": 3.6647708960468696e-06, "loss": 0.2175, "loss_nan_ranks": 0, "loss_rank_avg": 0.17236462235450745, "step": 3115, "valid_targets_mean": 3198.4, "valid_targets_min": 397 }, { "epoch": 5.777777777777778, "grad_norm": 0.5049665240080519, "learning_rate": 3.6116641426138933e-06, "loss": 0.2142, "loss_nan_ranks": 0, "loss_rank_avg": 0.20963430404663086, "step": 3120, "valid_targets_mean": 4305.9, "valid_targets_min": 545 }, { "epoch": 5.787037037037037, "grad_norm": 0.49581708730857366, "learning_rate": 3.5589067749564054e-06, "loss": 0.215, "loss_nan_ranks": 0, "loss_rank_avg": 0.17507000267505646, "step": 3125, "valid_targets_mean": 4279.4, "valid_targets_min": 1984 }, { "epoch": 5.796296296296296, "grad_norm": 0.533046990001763, "learning_rate": 3.5064999178178648e-06, "loss": 0.2437, "loss_nan_ranks": 0, "loss_rank_avg": 0.27857914566993713, "step": 3130, "valid_targets_mean": 4669.2, "valid_targets_min": 2074 }, { "epoch": 5.805555555555555, "grad_norm": 0.5616028974184716, "learning_rate": 3.454444688469165e-06, "loss": 0.2179, "loss_nan_ranks": 0, "loss_rank_avg": 0.2025943398475647, "step": 3135, "valid_targets_mean": 3585.2, "valid_targets_min": 1703 }, { "epoch": 5.814814814814815, "grad_norm": 0.5499230074351966, "learning_rate": 3.4027421966847675e-06, "loss": 0.2032, "loss_nan_ranks": 0, "loss_rank_avg": 0.20543722808361053, "step": 3140, "valid_targets_mean": 4170.7, "valid_targets_min": 1919 }, { "epoch": 5.824074074074074, "grad_norm": 0.5611834664864449, "learning_rate": 3.3513935447190595e-06, "loss": 0.2187, "loss_nan_ranks": 0, "loss_rank_avg": 0.2410752773284912, "step": 3145, "valid_targets_mean": 5189.0, "valid_targets_min": 542 }, { "epoch": 5.833333333333333, "grad_norm": 0.584145430075303, "learning_rate": 3.3003998272828676e-06, "loss": 0.2156, "loss_nan_ranks": 0, "loss_rank_avg": 0.21233102679252625, "step": 3150, "valid_targets_mean": 3442.8, "valid_targets_min": 635 }, { "epoch": 5.842592592592593, "grad_norm": 0.487543792074454, "learning_rate": 3.2497621315200958e-06, "loss": 0.225, "loss_nan_ranks": 0, "loss_rank_avg": 0.19998474419116974, "step": 3155, "valid_targets_mean": 4951.9, "valid_targets_min": 1450 }, { "epoch": 5.851851851851852, "grad_norm": 0.5178454713647473, "learning_rate": 3.199481536984572e-06, "loss": 0.2118, "loss_nan_ranks": 0, "loss_rank_avg": 0.23643583059310913, "step": 3160, "valid_targets_mean": 4172.1, "valid_targets_min": 1883 }, { "epoch": 5.861111111111111, "grad_norm": 0.5368224607954661, "learning_rate": 3.149559115617009e-06, "loss": 0.2243, "loss_nan_ranks": 0, "loss_rank_avg": 0.17453108727931976, "step": 3165, "valid_targets_mean": 3588.9, "valid_targets_min": 818 }, { "epoch": 5.87037037037037, "grad_norm": 0.5771162580976622, "learning_rate": 3.099995931722175e-06, "loss": 0.2149, "loss_nan_ranks": 0, "loss_rank_avg": 0.2502521574497223, "step": 3170, "valid_targets_mean": 4165.6, "valid_targets_min": 2273 }, { "epoch": 5.87962962962963, "grad_norm": 0.5851573775898512, "learning_rate": 3.050793041946183e-06, "loss": 0.2328, "loss_nan_ranks": 0, "loss_rank_avg": 0.30304640531539917, "step": 3175, "valid_targets_mean": 4239.8, "valid_targets_min": 724 }, { "epoch": 5.888888888888889, "grad_norm": 0.45474856609735226, "learning_rate": 3.001951495253972e-06, "loss": 0.2121, "loss_nan_ranks": 0, "loss_rank_avg": 0.19552862644195557, "step": 3180, "valid_targets_mean": 4980.6, "valid_targets_min": 572 }, { "epoch": 5.898148148148148, "grad_norm": 0.623558442164306, "learning_rate": 2.953472332906959e-06, "loss": 0.2413, "loss_nan_ranks": 0, "loss_rank_avg": 0.29056739807128906, "step": 3185, "valid_targets_mean": 3711.6, "valid_targets_min": 1514 }, { "epoch": 5.907407407407407, "grad_norm": 0.49621126539313987, "learning_rate": 2.905356588440811e-06, "loss": 0.2312, "loss_nan_ranks": 0, "loss_rank_avg": 0.2211723029613495, "step": 3190, "valid_targets_mean": 5076.4, "valid_targets_min": 1947 }, { "epoch": 5.916666666666667, "grad_norm": 0.51673830714577, "learning_rate": 2.857605287643437e-06, "loss": 0.2061, "loss_nan_ranks": 0, "loss_rank_avg": 0.18910914659500122, "step": 3195, "valid_targets_mean": 3844.3, "valid_targets_min": 733 }, { "epoch": 5.925925925925926, "grad_norm": 0.5638628522537501, "learning_rate": 2.8102194485331e-06, "loss": 0.2326, "loss_nan_ranks": 0, "loss_rank_avg": 0.2086801677942276, "step": 3200, "valid_targets_mean": 4155.5, "valid_targets_min": 2057 }, { "epoch": 5.935185185185185, "grad_norm": 0.5790573290649278, "learning_rate": 2.763200081336721e-06, "loss": 0.2335, "loss_nan_ranks": 0, "loss_rank_avg": 0.21793599426746368, "step": 3205, "valid_targets_mean": 3484.1, "valid_targets_min": 368 }, { "epoch": 5.944444444444445, "grad_norm": 0.6122242849287904, "learning_rate": 2.7165481884683576e-06, "loss": 0.2207, "loss_nan_ranks": 0, "loss_rank_avg": 0.2835915982723236, "step": 3210, "valid_targets_mean": 3717.0, "valid_targets_min": 747 }, { "epoch": 5.953703703703704, "grad_norm": 0.5672715926891396, "learning_rate": 2.6702647645077973e-06, "loss": 0.2395, "loss_nan_ranks": 0, "loss_rank_avg": 0.21978811919689178, "step": 3215, "valid_targets_mean": 4550.6, "valid_targets_min": 773 }, { "epoch": 5.962962962962963, "grad_norm": 0.6351867703165379, "learning_rate": 2.6243507961793936e-06, "loss": 0.247, "loss_nan_ranks": 0, "loss_rank_avg": 0.2440010905265808, "step": 3220, "valid_targets_mean": 3087.8, "valid_targets_min": 981 }, { "epoch": 5.972222222222222, "grad_norm": 0.6513878183905164, "learning_rate": 2.5788072623309977e-06, "loss": 0.2241, "loss_nan_ranks": 0, "loss_rank_avg": 0.2559264600276947, "step": 3225, "valid_targets_mean": 2916.1, "valid_targets_min": 317 }, { "epoch": 5.981481481481482, "grad_norm": 0.519478406666749, "learning_rate": 2.5336351339131147e-06, "loss": 0.2064, "loss_nan_ranks": 0, "loss_rank_avg": 0.1981564164161682, "step": 3230, "valid_targets_mean": 4137.3, "valid_targets_min": 1696 }, { "epoch": 5.9907407407407405, "grad_norm": 0.6242847386183599, "learning_rate": 2.488835373958185e-06, "loss": 0.2424, "loss_nan_ranks": 0, "loss_rank_avg": 0.28682032227516174, "step": 3235, "valid_targets_mean": 3630.4, "valid_targets_min": 1887 }, { "epoch": 6.0, "grad_norm": 0.6825780673728339, "learning_rate": 2.444408937560059e-06, "loss": 0.2316, "loss_nan_ranks": 0, "loss_rank_avg": 0.25937968492507935, "step": 3240, "valid_targets_mean": 2739.1, "valid_targets_min": 466 }, { "epoch": 6.0092592592592595, "grad_norm": 0.539754241133728, "learning_rate": 2.400356771853651e-06, "loss": 0.2459, "loss_nan_ranks": 0, "loss_rank_avg": 0.1831408143043518, "step": 3245, "valid_targets_mean": 3674.9, "valid_targets_min": 1668 }, { "epoch": 6.018518518518518, "grad_norm": 0.46005254806761847, "learning_rate": 2.3566798159947157e-06, "loss": 0.2196, "loss_nan_ranks": 0, "loss_rank_avg": 0.20222413539886475, "step": 3250, "valid_targets_mean": 4828.1, "valid_targets_min": 1945 }, { "epoch": 6.027777777777778, "grad_norm": 0.4444020837622067, "learning_rate": 2.3133790011398618e-06, "loss": 0.1927, "loss_nan_ranks": 0, "loss_rank_avg": 0.1549903154373169, "step": 3255, "valid_targets_mean": 4708.4, "valid_targets_min": 1799 }, { "epoch": 6.037037037037037, "grad_norm": 0.5920689398499658, "learning_rate": 2.2704552504266664e-06, "loss": 0.2241, "loss_nan_ranks": 0, "loss_rank_avg": 0.25390511751174927, "step": 3260, "valid_targets_mean": 3571.1, "valid_targets_min": 552 }, { "epoch": 6.046296296296297, "grad_norm": 0.526798188540084, "learning_rate": 2.2279094789540244e-06, "loss": 0.2179, "loss_nan_ranks": 0, "loss_rank_avg": 0.23036353290081024, "step": 3265, "valid_targets_mean": 4382.3, "valid_targets_min": 771 }, { "epoch": 6.055555555555555, "grad_norm": 0.5759163726963563, "learning_rate": 2.185742593762614e-06, "loss": 0.2361, "loss_nan_ranks": 0, "loss_rank_avg": 0.25047504901885986, "step": 3270, "valid_targets_mean": 3867.2, "valid_targets_min": 721 }, { "epoch": 6.064814814814815, "grad_norm": 0.48826580428064326, "learning_rate": 2.143955493815577e-06, "loss": 0.2393, "loss_nan_ranks": 0, "loss_rank_avg": 0.2461843639612198, "step": 3275, "valid_targets_mean": 5183.8, "valid_targets_min": 1736 }, { "epoch": 6.074074074074074, "grad_norm": 0.5210334298756018, "learning_rate": 2.1025490699793516e-06, "loss": 0.2211, "loss_nan_ranks": 0, "loss_rank_avg": 0.20328880846500397, "step": 3280, "valid_targets_mean": 4089.4, "valid_targets_min": 986 }, { "epoch": 6.083333333333333, "grad_norm": 0.5220951505604092, "learning_rate": 2.0615242050046656e-06, "loss": 0.2303, "loss_nan_ranks": 0, "loss_rank_avg": 0.19010621309280396, "step": 3285, "valid_targets_mean": 4184.5, "valid_targets_min": 894 }, { "epoch": 6.092592592592593, "grad_norm": 0.566804464053669, "learning_rate": 2.020881773507739e-06, "loss": 0.2381, "loss_nan_ranks": 0, "loss_rank_avg": 0.25445860624313354, "step": 3290, "valid_targets_mean": 4107.2, "valid_targets_min": 1939 }, { "epoch": 6.101851851851852, "grad_norm": 0.5330996196349423, "learning_rate": 1.9806226419516195e-06, "loss": 0.2296, "loss_nan_ranks": 0, "loss_rank_avg": 0.2357954978942871, "step": 3295, "valid_targets_mean": 4824.0, "valid_targets_min": 574 }, { "epoch": 6.111111111111111, "grad_norm": 0.5686340488301583, "learning_rate": 1.9407476686277095e-06, "loss": 0.2522, "loss_nan_ranks": 0, "loss_rank_avg": 0.22579503059387207, "step": 3300, "valid_targets_mean": 4233.5, "valid_targets_min": 739 }, { "epoch": 6.12037037037037, "grad_norm": 0.8954557853890478, "learning_rate": 1.9012577036374936e-06, "loss": 0.2174, "loss_nan_ranks": 0, "loss_rank_avg": 0.2578815519809723, "step": 3305, "valid_targets_mean": 3204.6, "valid_targets_min": 767 }, { "epoch": 6.12962962962963, "grad_norm": 0.5650667976761549, "learning_rate": 1.8621535888743825e-06, "loss": 0.2392, "loss_nan_ranks": 0, "loss_rank_avg": 0.2621210813522339, "step": 3310, "valid_targets_mean": 4207.1, "valid_targets_min": 2564 }, { "epoch": 6.138888888888889, "grad_norm": 0.6316650802316781, "learning_rate": 1.8234361580057802e-06, "loss": 0.2348, "loss_nan_ranks": 0, "loss_rank_avg": 0.2570689618587494, "step": 3315, "valid_targets_mean": 3737.0, "valid_targets_min": 766 }, { "epoch": 6.148148148148148, "grad_norm": 0.5704491276714678, "learning_rate": 1.7851062364553184e-06, "loss": 0.2394, "loss_nan_ranks": 0, "loss_rank_avg": 0.22671329975128174, "step": 3320, "valid_targets_mean": 3896.4, "valid_targets_min": 1714 }, { "epoch": 6.157407407407407, "grad_norm": 0.5984289258264732, "learning_rate": 1.7471646413852439e-06, "loss": 0.2415, "loss_nan_ranks": 0, "loss_rank_avg": 0.23753738403320312, "step": 3325, "valid_targets_mean": 3671.3, "valid_targets_min": 1769 }, { "epoch": 6.166666666666667, "grad_norm": 0.5333521022284862, "learning_rate": 1.709612181678999e-06, "loss": 0.2124, "loss_nan_ranks": 0, "loss_rank_avg": 0.22245770692825317, "step": 3330, "valid_targets_mean": 4270.4, "valid_targets_min": 1961 }, { "epoch": 6.175925925925926, "grad_norm": 0.6230366161491935, "learning_rate": 1.6724496579239979e-06, "loss": 0.2226, "loss_nan_ranks": 0, "loss_rank_avg": 0.22154787182807922, "step": 3335, "valid_targets_mean": 3099.2, "valid_targets_min": 629 }, { "epoch": 6.185185185185185, "grad_norm": 0.6069509329922341, "learning_rate": 1.6356778623945223e-06, "loss": 0.2495, "loss_nan_ranks": 0, "loss_rank_avg": 0.248618945479393, "step": 3340, "valid_targets_mean": 3467.0, "valid_targets_min": 661 }, { "epoch": 6.194444444444445, "grad_norm": 0.6234149579450843, "learning_rate": 1.5992975790348642e-06, "loss": 0.2239, "loss_nan_ranks": 0, "loss_rank_avg": 0.23241698741912842, "step": 3345, "valid_targets_mean": 3599.1, "valid_targets_min": 1668 }, { "epoch": 6.203703703703703, "grad_norm": 0.6001381504985039, "learning_rate": 1.5633095834425983e-06, "loss": 0.2197, "loss_nan_ranks": 0, "loss_rank_avg": 0.19718721508979797, "step": 3350, "valid_targets_mean": 3149.3, "valid_targets_min": 542 }, { "epoch": 6.212962962962963, "grad_norm": 0.5362275830715411, "learning_rate": 1.527714642852045e-06, "loss": 0.2089, "loss_nan_ranks": 0, "loss_rank_avg": 0.19483308494091034, "step": 3355, "valid_targets_mean": 3562.2, "valid_targets_min": 511 }, { "epoch": 6.222222222222222, "grad_norm": 0.5695773907229609, "learning_rate": 1.492513516117915e-06, "loss": 0.2167, "loss_nan_ranks": 0, "loss_rank_avg": 0.20090246200561523, "step": 3360, "valid_targets_mean": 3909.8, "valid_targets_min": 1802 }, { "epoch": 6.231481481481482, "grad_norm": 0.6005583636288468, "learning_rate": 1.457706953699145e-06, "loss": 0.2142, "loss_nan_ranks": 0, "loss_rank_avg": 0.19939187169075012, "step": 3365, "valid_targets_mean": 3433.8, "valid_targets_min": 1569 }, { "epoch": 6.2407407407407405, "grad_norm": 0.5007154171778021, "learning_rate": 1.423295697642868e-06, "loss": 0.2313, "loss_nan_ranks": 0, "loss_rank_avg": 0.21219304203987122, "step": 3370, "valid_targets_mean": 4683.0, "valid_targets_min": 2362 }, { "epoch": 6.25, "grad_norm": 0.5968890483515067, "learning_rate": 1.3892804815686312e-06, "loss": 0.2214, "loss_nan_ranks": 0, "loss_rank_avg": 0.2734060287475586, "step": 3375, "valid_targets_mean": 4207.8, "valid_targets_min": 1660 }, { "epoch": 6.2592592592592595, "grad_norm": 0.6330168182645113, "learning_rate": 1.35566203065272e-06, "loss": 0.2432, "loss_nan_ranks": 0, "loss_rank_avg": 0.22437870502471924, "step": 3380, "valid_targets_mean": 3316.6, "valid_targets_min": 1267 }, { "epoch": 6.268518518518518, "grad_norm": 0.577655871447151, "learning_rate": 1.3224410616127292e-06, "loss": 0.2348, "loss_nan_ranks": 0, "loss_rank_avg": 0.24034109711647034, "step": 3385, "valid_targets_mean": 4207.6, "valid_targets_min": 342 }, { "epoch": 6.277777777777778, "grad_norm": 0.5856036889391484, "learning_rate": 1.2896182826922577e-06, "loss": 0.2339, "loss_nan_ranks": 0, "loss_rank_avg": 0.2569791078567505, "step": 3390, "valid_targets_mean": 3788.3, "valid_targets_min": 1142 }, { "epoch": 6.287037037037037, "grad_norm": 0.622367825324181, "learning_rate": 1.2571943936458197e-06, "loss": 0.2048, "loss_nan_ranks": 0, "loss_rank_avg": 0.18795576691627502, "step": 3395, "valid_targets_mean": 3135.8, "valid_targets_min": 595 }, { "epoch": 6.296296296296296, "grad_norm": 0.5507538621859848, "learning_rate": 1.2251700857239412e-06, "loss": 0.2004, "loss_nan_ranks": 0, "loss_rank_avg": 0.16971886157989502, "step": 3400, "valid_targets_mean": 3565.2, "valid_targets_min": 812 }, { "epoch": 6.305555555555555, "grad_norm": 0.5709347005687355, "learning_rate": 1.1935460416583889e-06, "loss": 0.2236, "loss_nan_ranks": 0, "loss_rank_avg": 0.19665175676345825, "step": 3405, "valid_targets_mean": 4646.4, "valid_targets_min": 2430 }, { "epoch": 6.314814814814815, "grad_norm": 0.5551338557244925, "learning_rate": 1.162322935647655e-06, "loss": 0.2099, "loss_nan_ranks": 0, "loss_rank_avg": 0.23359957337379456, "step": 3410, "valid_targets_mean": 3922.3, "valid_targets_min": 1700 }, { "epoch": 6.324074074074074, "grad_norm": 0.6081424722887298, "learning_rate": 1.1315014333425455e-06, "loss": 0.2288, "loss_nan_ranks": 0, "loss_rank_avg": 0.27564293146133423, "step": 3415, "valid_targets_mean": 3644.6, "valid_targets_min": 672 }, { "epoch": 6.333333333333333, "grad_norm": 0.6017513819380911, "learning_rate": 1.101082191832017e-06, "loss": 0.2185, "loss_nan_ranks": 0, "loss_rank_avg": 0.25283360481262207, "step": 3420, "valid_targets_mean": 3448.8, "valid_targets_min": 580 }, { "epoch": 6.342592592592593, "grad_norm": 0.6051873506386266, "learning_rate": 1.0710658596291612e-06, "loss": 0.2314, "loss_nan_ranks": 0, "loss_rank_avg": 0.2810269594192505, "step": 3425, "valid_targets_mean": 4163.6, "valid_targets_min": 729 }, { "epoch": 6.351851851851852, "grad_norm": 0.5949919214922811, "learning_rate": 1.0414530766573661e-06, "loss": 0.2163, "loss_nan_ranks": 0, "loss_rank_avg": 0.20459462702274323, "step": 3430, "valid_targets_mean": 3321.0, "valid_targets_min": 1666 }, { "epoch": 6.361111111111111, "grad_norm": 0.5564753367621429, "learning_rate": 1.0122444742366945e-06, "loss": 0.2202, "loss_nan_ranks": 0, "loss_rank_avg": 0.23238684237003326, "step": 3435, "valid_targets_mean": 4126.9, "valid_targets_min": 840 }, { "epoch": 6.37037037037037, "grad_norm": 0.4818681450985929, "learning_rate": 9.83440675070404e-07, "loss": 0.2286, "loss_nan_ranks": 0, "loss_rank_avg": 0.2342509627342224, "step": 3440, "valid_targets_mean": 5746.8, "valid_targets_min": 1421 }, { "epoch": 6.37962962962963, "grad_norm": 0.6306703109771072, "learning_rate": 9.550422932316938e-07, "loss": 0.2265, "loss_nan_ranks": 0, "loss_rank_avg": 0.2181071788072586, "step": 3445, "valid_targets_mean": 3027.6, "valid_targets_min": 313 }, { "epoch": 6.388888888888889, "grad_norm": 0.7109368059368608, "learning_rate": 9.270499341505901e-07, "loss": 0.2361, "loss_nan_ranks": 0, "loss_rank_avg": 0.21922603249549866, "step": 3450, "valid_targets_mean": 3950.0, "valid_targets_min": 425 }, { "epoch": 6.398148148148148, "grad_norm": 0.5748329728377656, "learning_rate": 8.994641946010474e-07, "loss": 0.2394, "loss_nan_ranks": 0, "loss_rank_avg": 0.26003679633140564, "step": 3455, "valid_targets_mean": 4081.2, "valid_targets_min": 764 }, { "epoch": 6.407407407407407, "grad_norm": 0.6855173336098317, "learning_rate": 8.722856626882415e-07, "loss": 0.244, "loss_nan_ranks": 0, "loss_rank_avg": 0.2373206913471222, "step": 3460, "valid_targets_mean": 3279.6, "valid_targets_min": 764 }, { "epoch": 6.416666666666667, "grad_norm": 0.5616037615887454, "learning_rate": 8.455149178360012e-07, "loss": 0.2444, "loss_nan_ranks": 0, "loss_rank_avg": 0.2553655803203583, "step": 3465, "valid_targets_mean": 4561.8, "valid_targets_min": 594 }, { "epoch": 6.425925925925926, "grad_norm": 0.5276114854080077, "learning_rate": 8.191525307744896e-07, "loss": 0.2229, "loss_nan_ranks": 0, "loss_rank_avg": 0.24148723483085632, "step": 3470, "valid_targets_mean": 4596.1, "valid_targets_min": 880 }, { "epoch": 6.435185185185185, "grad_norm": 0.5535681163787745, "learning_rate": 7.931990635280052e-07, "loss": 0.2168, "loss_nan_ranks": 0, "loss_rank_avg": 0.23151087760925293, "step": 3475, "valid_targets_mean": 4101.1, "valid_targets_min": 215 }, { "epoch": 6.444444444444445, "grad_norm": 0.5364895072322574, "learning_rate": 7.676550694030172e-07, "loss": 0.2134, "loss_nan_ranks": 0, "loss_rank_avg": 0.2174125611782074, "step": 3480, "valid_targets_mean": 4606.8, "valid_targets_min": 570 }, { "epoch": 6.453703703703704, "grad_norm": 0.5101763745085794, "learning_rate": 7.425210929763738e-07, "loss": 0.2273, "loss_nan_ranks": 0, "loss_rank_avg": 0.211885005235672, "step": 3485, "valid_targets_mean": 4786.0, "valid_targets_min": 829 }, { "epoch": 6.462962962962963, "grad_norm": 0.5954727436115828, "learning_rate": 7.17797670083673e-07, "loss": 0.2155, "loss_nan_ranks": 0, "loss_rank_avg": 0.21472561359405518, "step": 3490, "valid_targets_mean": 3507.8, "valid_targets_min": 541 }, { "epoch": 6.472222222222222, "grad_norm": 0.5173291565619594, "learning_rate": 6.934853278078635e-07, "loss": 0.2205, "loss_nan_ranks": 0, "loss_rank_avg": 0.20479455590248108, "step": 3495, "valid_targets_mean": 3915.1, "valid_targets_min": 1907 }, { "epoch": 6.481481481481482, "grad_norm": 0.5598429190290807, "learning_rate": 6.695845844679816e-07, "loss": 0.2146, "loss_nan_ranks": 0, "loss_rank_avg": 0.265296995639801, "step": 3500, "valid_targets_mean": 4297.6, "valid_targets_min": 569 }, { "epoch": 6.4907407407407405, "grad_norm": 0.644127732439113, "learning_rate": 6.460959496081276e-07, "loss": 0.196, "loss_nan_ranks": 0, "loss_rank_avg": 0.19158616662025452, "step": 3505, "valid_targets_mean": 3284.1, "valid_targets_min": 1857 }, { "epoch": 6.5, "grad_norm": 0.46580841568793, "learning_rate": 6.230199239865808e-07, "loss": 0.2263, "loss_nan_ranks": 0, "loss_rank_avg": 0.16796517372131348, "step": 3510, "valid_targets_mean": 4302.2, "valid_targets_min": 2752 }, { "epoch": 6.5092592592592595, "grad_norm": 0.5292186252116317, "learning_rate": 6.003569995651304e-07, "loss": 0.2106, "loss_nan_ranks": 0, "loss_rank_avg": 0.19200381636619568, "step": 3515, "valid_targets_mean": 4388.9, "valid_targets_min": 512 }, { "epoch": 6.518518518518518, "grad_norm": 0.5737139503739112, "learning_rate": 5.781076594986035e-07, "loss": 0.2173, "loss_nan_ranks": 0, "loss_rank_avg": 0.2559890151023865, "step": 3520, "valid_targets_mean": 4729.4, "valid_targets_min": 743 }, { "epoch": 6.527777777777778, "grad_norm": 0.601281290007252, "learning_rate": 5.562723781245316e-07, "loss": 0.2374, "loss_nan_ranks": 0, "loss_rank_avg": 0.20773258805274963, "step": 3525, "valid_targets_mean": 3555.2, "valid_targets_min": 1591 }, { "epoch": 6.537037037037037, "grad_norm": 0.5889092918003285, "learning_rate": 5.348516209530741e-07, "loss": 0.2463, "loss_nan_ranks": 0, "loss_rank_avg": 0.2901744246482849, "step": 3530, "valid_targets_mean": 4212.6, "valid_targets_min": 616 }, { "epoch": 6.546296296296296, "grad_norm": 0.5058670706615777, "learning_rate": 5.13845844657066e-07, "loss": 0.216, "loss_nan_ranks": 0, "loss_rank_avg": 0.19678425788879395, "step": 3535, "valid_targets_mean": 4579.2, "valid_targets_min": 1761 }, { "epoch": 6.555555555555555, "grad_norm": 0.6257218263377696, "learning_rate": 4.93255497062295e-07, "loss": 0.2402, "loss_nan_ranks": 0, "loss_rank_avg": 0.24043655395507812, "step": 3540, "valid_targets_mean": 3257.0, "valid_targets_min": 647 }, { "epoch": 6.564814814814815, "grad_norm": 0.5505549796788423, "learning_rate": 4.730810171379574e-07, "loss": 0.234, "loss_nan_ranks": 0, "loss_rank_avg": 0.2459501177072525, "step": 3545, "valid_targets_mean": 4357.0, "valid_targets_min": 733 }, { "epoch": 6.574074074074074, "grad_norm": 0.6028654489237595, "learning_rate": 4.533228349872887e-07, "loss": 0.2163, "loss_nan_ranks": 0, "loss_rank_avg": 0.2598051428794861, "step": 3550, "valid_targets_mean": 3666.0, "valid_targets_min": 649 }, { "epoch": 6.583333333333333, "grad_norm": 0.6185888512407045, "learning_rate": 4.339813718384056e-07, "loss": 0.2476, "loss_nan_ranks": 0, "loss_rank_avg": 0.21807453036308289, "step": 3555, "valid_targets_mean": 3160.6, "valid_targets_min": 513 }, { "epoch": 6.592592592592593, "grad_norm": 0.6008888131850063, "learning_rate": 4.1505704003531155e-07, "loss": 0.2289, "loss_nan_ranks": 0, "loss_rank_avg": 0.2528533339500427, "step": 3560, "valid_targets_mean": 3871.8, "valid_targets_min": 582 }, { "epoch": 6.601851851851852, "grad_norm": 0.5898425554611046, "learning_rate": 3.965502430291235e-07, "loss": 0.2291, "loss_nan_ranks": 0, "loss_rank_avg": 0.2124367356300354, "step": 3565, "valid_targets_mean": 4025.0, "valid_targets_min": 813 }, { "epoch": 6.611111111111111, "grad_norm": 0.5495965363011839, "learning_rate": 3.784613753694566e-07, "loss": 0.2235, "loss_nan_ranks": 0, "loss_rank_avg": 0.2144167423248291, "step": 3570, "valid_targets_mean": 4046.5, "valid_targets_min": 991 }, { "epoch": 6.62037037037037, "grad_norm": 0.6165423333917514, "learning_rate": 3.607908226960155e-07, "loss": 0.2126, "loss_nan_ranks": 0, "loss_rank_avg": 0.21597029268741608, "step": 3575, "valid_targets_mean": 3308.0, "valid_targets_min": 566 }, { "epoch": 6.62962962962963, "grad_norm": 0.45236188898394386, "learning_rate": 3.4353896173038524e-07, "loss": 0.2282, "loss_nan_ranks": 0, "loss_rank_avg": 0.2370537519454956, "step": 3580, "valid_targets_mean": 5711.6, "valid_targets_min": 670 }, { "epoch": 6.638888888888889, "grad_norm": 0.56933040396151, "learning_rate": 3.2670616026797776e-07, "loss": 0.2117, "loss_nan_ranks": 0, "loss_rank_avg": 0.1997252255678177, "step": 3585, "valid_targets_mean": 3777.3, "valid_targets_min": 818 }, { "epoch": 6.648148148148148, "grad_norm": 0.5659167186719102, "learning_rate": 3.102927771702091e-07, "loss": 0.2384, "loss_nan_ranks": 0, "loss_rank_avg": 0.20919036865234375, "step": 3590, "valid_targets_mean": 3730.8, "valid_targets_min": 770 }, { "epoch": 6.657407407407407, "grad_norm": 0.5079303353870578, "learning_rate": 2.942991623568436e-07, "loss": 0.2238, "loss_nan_ranks": 0, "loss_rank_avg": 0.19151973724365234, "step": 3595, "valid_targets_mean": 4735.9, "valid_targets_min": 2372 }, { "epoch": 6.666666666666667, "grad_norm": 0.5137082597472087, "learning_rate": 2.7872565679852414e-07, "loss": 0.2424, "loss_nan_ranks": 0, "loss_rank_avg": 0.2293935865163803, "step": 3600, "valid_targets_mean": 4664.9, "valid_targets_min": 2201 }, { "epoch": 6.675925925925926, "grad_norm": 0.5409545059748535, "learning_rate": 2.635725925095245e-07, "loss": 0.2289, "loss_nan_ranks": 0, "loss_rank_avg": 0.23130936920642853, "step": 3605, "valid_targets_mean": 4416.5, "valid_targets_min": 1904 }, { "epoch": 6.685185185185185, "grad_norm": 0.555324535146906, "learning_rate": 2.4884029254064636e-07, "loss": 0.2302, "loss_nan_ranks": 0, "loss_rank_avg": 0.21047340333461761, "step": 3610, "valid_targets_mean": 3865.1, "valid_targets_min": 2178 }, { "epoch": 6.694444444444445, "grad_norm": 0.4829255723553718, "learning_rate": 2.3452907097235355e-07, "loss": 0.2091, "loss_nan_ranks": 0, "loss_rank_avg": 0.1802108883857727, "step": 3615, "valid_targets_mean": 3995.1, "valid_targets_min": 764 }, { "epoch": 6.703703703703704, "grad_norm": 0.5450225699083614, "learning_rate": 2.2063923290805756e-07, "loss": 0.2062, "loss_nan_ranks": 0, "loss_rank_avg": 0.21498540043830872, "step": 3620, "valid_targets_mean": 3961.8, "valid_targets_min": 1708 }, { "epoch": 6.712962962962963, "grad_norm": 0.5695770354778458, "learning_rate": 2.0717107446762696e-07, "loss": 0.2023, "loss_nan_ranks": 0, "loss_rank_avg": 0.19042396545410156, "step": 3625, "valid_targets_mean": 3396.2, "valid_targets_min": 665 }, { "epoch": 6.722222222222222, "grad_norm": 0.5764051828461008, "learning_rate": 1.9412488278107044e-07, "loss": 0.1986, "loss_nan_ranks": 0, "loss_rank_avg": 0.19108936190605164, "step": 3630, "valid_targets_mean": 3522.6, "valid_targets_min": 1445 }, { "epoch": 6.731481481481482, "grad_norm": 0.5141025615330546, "learning_rate": 1.8150093598240825e-07, "loss": 0.221, "loss_nan_ranks": 0, "loss_rank_avg": 0.2816708981990814, "step": 3635, "valid_targets_mean": 5298.4, "valid_targets_min": 1615 }, { "epoch": 6.7407407407407405, "grad_norm": 0.5516459278281802, "learning_rate": 1.69299503203757e-07, "loss": 0.2296, "loss_nan_ranks": 0, "loss_rank_avg": 0.20504800975322723, "step": 3640, "valid_targets_mean": 3981.1, "valid_targets_min": 2114 }, { "epoch": 6.75, "grad_norm": 0.4418946934572028, "learning_rate": 1.5752084456957416e-07, "loss": 0.2515, "loss_nan_ranks": 0, "loss_rank_avg": 0.24999968707561493, "step": 3645, "valid_targets_mean": 5980.2, "valid_targets_min": 2761 }, { "epoch": 6.7592592592592595, "grad_norm": 0.5163142937241847, "learning_rate": 1.4616521119112937e-07, "loss": 0.2332, "loss_nan_ranks": 0, "loss_rank_avg": 0.2180902659893036, "step": 3650, "valid_targets_mean": 4460.1, "valid_targets_min": 1782 }, { "epoch": 6.768518518518518, "grad_norm": 0.5275315251769807, "learning_rate": 1.3523284516113955e-07, "loss": 0.2103, "loss_nan_ranks": 0, "loss_rank_avg": 0.2362240105867386, "step": 3655, "valid_targets_mean": 4463.1, "valid_targets_min": 856 }, { "epoch": 6.777777777777778, "grad_norm": 0.5932303316312726, "learning_rate": 1.2472397954861549e-07, "loss": 0.2442, "loss_nan_ranks": 0, "loss_rank_avg": 0.25914400815963745, "step": 3660, "valid_targets_mean": 4156.2, "valid_targets_min": 1887 }, { "epoch": 6.787037037037037, "grad_norm": 0.5704356783239624, "learning_rate": 1.1463883839388346e-07, "loss": 0.2125, "loss_nan_ranks": 0, "loss_rank_avg": 0.29468631744384766, "step": 3665, "valid_targets_mean": 4419.7, "valid_targets_min": 836 }, { "epoch": 6.796296296296296, "grad_norm": 0.6176484972834143, "learning_rate": 1.0497763670382022e-07, "loss": 0.2039, "loss_nan_ranks": 0, "loss_rank_avg": 0.24018700420856476, "step": 3670, "valid_targets_mean": 3530.4, "valid_targets_min": 368 }, { "epoch": 6.805555555555555, "grad_norm": 0.5827116087848868, "learning_rate": 9.574058044725665e-08, "loss": 0.2377, "loss_nan_ranks": 0, "loss_rank_avg": 0.2357904613018036, "step": 3675, "valid_targets_mean": 3568.0, "valid_targets_min": 2129 }, { "epoch": 6.814814814814815, "grad_norm": 0.5334174149912604, "learning_rate": 8.692786655060348e-08, "loss": 0.2446, "loss_nan_ranks": 0, "loss_rank_avg": 0.27739739418029785, "step": 3680, "valid_targets_mean": 4644.2, "valid_targets_min": 597 }, { "epoch": 6.824074074074074, "grad_norm": 0.5908537050626661, "learning_rate": 7.853968289363245e-08, "loss": 0.1994, "loss_nan_ranks": 0, "loss_rank_avg": 0.1957089900970459, "step": 3685, "valid_targets_mean": 4683.8, "valid_targets_min": 2043 }, { "epoch": 6.833333333333333, "grad_norm": 0.5260600630535545, "learning_rate": 7.057620830548617e-08, "loss": 0.2024, "loss_nan_ranks": 0, "loss_rank_avg": 0.18605807423591614, "step": 3690, "valid_targets_mean": 4110.4, "valid_targets_min": 1942 }, { "epoch": 6.842592592592593, "grad_norm": 0.5916196044721515, "learning_rate": 6.30376125608656e-08, "loss": 0.2371, "loss_nan_ranks": 0, "loss_rank_avg": 0.2218480408191681, "step": 3695, "valid_targets_mean": 3644.4, "valid_targets_min": 570 }, { "epoch": 6.851851851851852, "grad_norm": 0.514705393546641, "learning_rate": 5.592405637639742e-08, "loss": 0.2319, "loss_nan_ranks": 0, "loss_rank_avg": 0.20492291450500488, "step": 3700, "valid_targets_mean": 4558.1, "valid_targets_min": 2073 }, { "epoch": 6.861111111111111, "grad_norm": 0.4524372951551533, "learning_rate": 4.923569140722118e-08, "loss": 0.2241, "loss_nan_ranks": 0, "loss_rank_avg": 0.28827571868896484, "step": 3705, "valid_targets_mean": 6742.3, "valid_targets_min": 1989 }, { "epoch": 6.87037037037037, "grad_norm": 0.5258479700135631, "learning_rate": 4.2972660243749686e-08, "loss": 0.231, "loss_nan_ranks": 0, "loss_rank_avg": 0.1960398554801941, "step": 3710, "valid_targets_mean": 4020.9, "valid_targets_min": 879 }, { "epoch": 6.87962962962963, "grad_norm": 0.5383620822998924, "learning_rate": 3.7135096408631443e-08, "loss": 0.2237, "loss_nan_ranks": 0, "loss_rank_avg": 0.17605650424957275, "step": 3715, "valid_targets_mean": 3673.5, "valid_targets_min": 892 }, { "epoch": 6.888888888888889, "grad_norm": 0.5022673424281345, "learning_rate": 3.172312435390401e-08, "loss": 0.2181, "loss_nan_ranks": 0, "loss_rank_avg": 0.21586468815803528, "step": 3720, "valid_targets_mean": 4826.8, "valid_targets_min": 1808 }, { "epoch": 6.898148148148148, "grad_norm": 0.5150139357949609, "learning_rate": 2.673685945833615e-08, "loss": 0.2383, "loss_nan_ranks": 0, "loss_rank_avg": 0.24277575314044952, "step": 3725, "valid_targets_mean": 5270.2, "valid_targets_min": 2004 }, { "epoch": 6.907407407407407, "grad_norm": 0.512483831039239, "learning_rate": 2.2176408024974228e-08, "loss": 0.2511, "loss_nan_ranks": 0, "loss_rank_avg": 0.23216493427753448, "step": 3730, "valid_targets_mean": 4509.7, "valid_targets_min": 1745 }, { "epoch": 6.916666666666667, "grad_norm": 0.5381090256017264, "learning_rate": 1.8041867278875137e-08, "loss": 0.2299, "loss_nan_ranks": 0, "loss_rank_avg": 0.19807547330856323, "step": 3735, "valid_targets_mean": 4138.4, "valid_targets_min": 779 }, { "epoch": 6.925925925925926, "grad_norm": 0.49323585595517727, "learning_rate": 1.4333325365030181e-08, "loss": 0.2223, "loss_nan_ranks": 0, "loss_rank_avg": 0.21127669513225555, "step": 3740, "valid_targets_mean": 5065.2, "valid_targets_min": 1890 }, { "epoch": 6.935185185185185, "grad_norm": 0.555488870581542, "learning_rate": 1.1050861346488806e-08, "loss": 0.2293, "loss_nan_ranks": 0, "loss_rank_avg": 0.20611155033111572, "step": 3745, "valid_targets_mean": 3845.6, "valid_targets_min": 723 }, { "epoch": 6.944444444444445, "grad_norm": 0.5336086789856043, "learning_rate": 8.194545202666604e-09, "loss": 0.2392, "loss_nan_ranks": 0, "loss_rank_avg": 0.1871817260980606, "step": 3750, "valid_targets_mean": 3598.5, "valid_targets_min": 849 }, { "epoch": 6.953703703703704, "grad_norm": 0.6088047301014547, "learning_rate": 5.76443782786873e-09, "loss": 0.2481, "loss_nan_ranks": 0, "loss_rank_avg": 0.22356441617012024, "step": 3755, "valid_targets_mean": 3994.5, "valid_targets_min": 1869 }, { "epoch": 6.962962962962963, "grad_norm": 0.5623518574868789, "learning_rate": 3.760591029973171e-09, "loss": 0.225, "loss_nan_ranks": 0, "loss_rank_avg": 0.21949899196624756, "step": 3760, "valid_targets_mean": 3838.9, "valid_targets_min": 928 }, { "epoch": 6.972222222222222, "grad_norm": 0.6022551873866897, "learning_rate": 2.1830475293360686e-09, "loss": 0.2255, "loss_nan_ranks": 0, "loss_rank_avg": 0.22909459471702576, "step": 3765, "valid_targets_mean": 3225.6, "valid_targets_min": 656 }, { "epoch": 6.981481481481482, "grad_norm": 0.5183031403584336, "learning_rate": 1.0318409578835564e-09, "loss": 0.2227, "loss_nan_ranks": 0, "loss_rank_avg": 0.2013653814792633, "step": 3770, "valid_targets_mean": 4304.2, "valid_targets_min": 446 }, { "epoch": 6.9907407407407405, "grad_norm": 0.5846907653612194, "learning_rate": 3.069958583856725e-10, "loss": 0.2175, "loss_nan_ranks": 0, "loss_rank_avg": 0.23626720905303955, "step": 3775, "valid_targets_mean": 3642.6, "valid_targets_min": 1138 }, { "epoch": 7.0, "grad_norm": 0.5911749795882507, "learning_rate": 8.527683943437837e-12, "loss": 0.2341, "loss_nan_ranks": 0, "loss_rank_avg": 0.23940664529800415, "step": 3780, "valid_targets_mean": 4033.5, "valid_targets_min": 2079 }, { "epoch": 7.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.23940664529800415, "step": 3780, "total_flos": 1019495884914688.0, "train_loss": 0.14474489343544794, "train_runtime": 12189.5389, "train_samples_per_second": 4.959, "train_steps_per_second": 0.31, "valid_targets_mean": 4033.5, "valid_targets_min": 2079 } ], "logging_steps": 5, "max_steps": 3780, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 1500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1019495884914688.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }