diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,8363 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 3780, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.009259259259259259, + "grad_norm": 19.112235093128195, + "learning_rate": 4.232804232804233e-07, + "loss": 0.8275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.803094744682312, + "step": 5, + "valid_targets_mean": 3444.3, + "valid_targets_min": 828 + }, + { + "epoch": 0.018518518518518517, + "grad_norm": 17.940707174937973, + "learning_rate": 9.523809523809525e-07, + "loss": 0.7963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.8306764960289001, + "step": 10, + "valid_targets_mean": 4536.1, + "valid_targets_min": 1670 + }, + { + "epoch": 0.027777777777777776, + "grad_norm": 15.664620071655396, + "learning_rate": 1.4814814814814815e-06, + "loss": 0.7903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.8159181475639343, + "step": 15, + "valid_targets_mean": 3574.7, + "valid_targets_min": 435 + }, + { + "epoch": 0.037037037037037035, + "grad_norm": 10.770635827936216, + "learning_rate": 2.0105820105820108e-06, + "loss": 0.7797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7561392188072205, + "step": 20, + "valid_targets_mean": 4583.4, + "valid_targets_min": 577 + }, + { + "epoch": 0.046296296296296294, + "grad_norm": 7.426416040771228, + "learning_rate": 2.53968253968254e-06, + "loss": 0.7281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7589821815490723, + "step": 25, + "valid_targets_mean": 3544.4, + "valid_targets_min": 763 + }, + { + "epoch": 0.05555555555555555, + "grad_norm": 3.713460240688579, + "learning_rate": 3.068783068783069e-06, + "loss": 0.669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6719295978546143, + "step": 30, + "valid_targets_mean": 3903.3, + "valid_targets_min": 697 + }, + { + "epoch": 0.06481481481481481, + "grad_norm": 2.1703955297067186, + "learning_rate": 3.597883597883598e-06, + "loss": 0.5837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5826764106750488, + "step": 35, + "valid_targets_mean": 3806.2, + "valid_targets_min": 1937 + }, + { + "epoch": 0.07407407407407407, + "grad_norm": 1.8165232850431967, + "learning_rate": 4.126984126984127e-06, + "loss": 0.6208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6975480318069458, + "step": 40, + "valid_targets_mean": 3366.0, + "valid_targets_min": 878 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 1.3433617476391144, + "learning_rate": 4.656084656084656e-06, + "loss": 0.5655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5773887634277344, + "step": 45, + "valid_targets_mean": 4220.3, + "valid_targets_min": 724 + }, + { + "epoch": 0.09259259259259259, + "grad_norm": 1.1527914357135887, + "learning_rate": 5.185185185185185e-06, + "loss": 0.5912, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6037584543228149, + "step": 50, + "valid_targets_mean": 4641.8, + "valid_targets_min": 2175 + }, + { + "epoch": 0.10185185185185185, + "grad_norm": 0.9246044784239899, + "learning_rate": 5.7142857142857145e-06, + "loss": 0.601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5939217805862427, + "step": 55, + "valid_targets_mean": 4679.5, + "valid_targets_min": 663 + }, + { + "epoch": 0.1111111111111111, + "grad_norm": 0.8146470542392132, + "learning_rate": 6.243386243386243e-06, + "loss": 0.5341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5055123567581177, + "step": 60, + "valid_targets_mean": 3902.7, + "valid_targets_min": 923 + }, + { + "epoch": 0.12037037037037036, + "grad_norm": 0.7976083063634939, + "learning_rate": 6.772486772486773e-06, + "loss": 0.532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4894016683101654, + "step": 65, + "valid_targets_mean": 3446.8, + "valid_targets_min": 556 + }, + { + "epoch": 0.12962962962962962, + "grad_norm": 0.7321082473641598, + "learning_rate": 7.301587301587301e-06, + "loss": 0.5207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46321815252304077, + "step": 70, + "valid_targets_mean": 4182.4, + "valid_targets_min": 2033 + }, + { + "epoch": 0.1388888888888889, + "grad_norm": 0.7291697059938048, + "learning_rate": 7.830687830687831e-06, + "loss": 0.512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5047429800033569, + "step": 75, + "valid_targets_mean": 3302.7, + "valid_targets_min": 627 + }, + { + "epoch": 0.14814814814814814, + "grad_norm": 0.7739549159047908, + "learning_rate": 8.35978835978836e-06, + "loss": 0.4734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47949308156967163, + "step": 80, + "valid_targets_mean": 3213.7, + "valid_targets_min": 1752 + }, + { + "epoch": 0.1574074074074074, + "grad_norm": 0.7111733388587708, + "learning_rate": 8.888888888888888e-06, + "loss": 0.4823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44027847051620483, + "step": 85, + "valid_targets_mean": 3505.9, + "valid_targets_min": 1994 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.6831309764350063, + "learning_rate": 9.417989417989418e-06, + "loss": 0.4392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40418338775634766, + "step": 90, + "valid_targets_mean": 3209.4, + "valid_targets_min": 512 + }, + { + "epoch": 0.17592592592592593, + "grad_norm": 0.6213729941542423, + "learning_rate": 9.947089947089947e-06, + "loss": 0.4356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43431776762008667, + "step": 95, + "valid_targets_mean": 3941.1, + "valid_targets_min": 723 + }, + { + "epoch": 0.18518518518518517, + "grad_norm": 0.7201560667011954, + "learning_rate": 1.0476190476190477e-05, + "loss": 0.4769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5052137970924377, + "step": 100, + "valid_targets_mean": 3591.6, + "valid_targets_min": 880 + }, + { + "epoch": 0.19444444444444445, + "grad_norm": 0.6092157523393694, + "learning_rate": 1.1005291005291006e-05, + "loss": 0.4378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41137802600860596, + "step": 105, + "valid_targets_mean": 3784.0, + "valid_targets_min": 546 + }, + { + "epoch": 0.2037037037037037, + "grad_norm": 0.6962357662281705, + "learning_rate": 1.1534391534391536e-05, + "loss": 0.4424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5407352447509766, + "step": 110, + "valid_targets_mean": 4280.3, + "valid_targets_min": 764 + }, + { + "epoch": 0.21296296296296297, + "grad_norm": 0.6281974355729566, + "learning_rate": 1.2063492063492064e-05, + "loss": 0.4527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4453981816768646, + "step": 115, + "valid_targets_mean": 3953.9, + "valid_targets_min": 1669 + }, + { + "epoch": 0.2222222222222222, + "grad_norm": 0.618723801537215, + "learning_rate": 1.2592592592592593e-05, + "loss": 0.4168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3996565639972687, + "step": 120, + "valid_targets_mean": 4443.4, + "valid_targets_min": 1748 + }, + { + "epoch": 0.23148148148148148, + "grad_norm": 0.6419954427627002, + "learning_rate": 1.3121693121693123e-05, + "loss": 0.4195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4261256754398346, + "step": 125, + "valid_targets_mean": 4298.9, + "valid_targets_min": 2102 + }, + { + "epoch": 0.24074074074074073, + "grad_norm": 0.8161990345008757, + "learning_rate": 1.3650793650793652e-05, + "loss": 0.4461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5341705083847046, + "step": 130, + "valid_targets_mean": 4825.4, + "valid_targets_min": 1620 + }, + { + "epoch": 0.25, + "grad_norm": 0.8509610334596335, + "learning_rate": 1.417989417989418e-05, + "loss": 0.436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44645223021507263, + "step": 135, + "valid_targets_mean": 4780.1, + "valid_targets_min": 789 + }, + { + "epoch": 0.25925925925925924, + "grad_norm": 0.5904975210645568, + "learning_rate": 1.470899470899471e-05, + "loss": 0.4288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40643012523651123, + "step": 140, + "valid_targets_mean": 4096.2, + "valid_targets_min": 541 + }, + { + "epoch": 0.26851851851851855, + "grad_norm": 0.6739039533538748, + "learning_rate": 1.523809523809524e-05, + "loss": 0.389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4152180552482605, + "step": 145, + "valid_targets_mean": 4372.8, + "valid_targets_min": 1808 + }, + { + "epoch": 0.2777777777777778, + "grad_norm": 0.6677022093982549, + "learning_rate": 1.576719576719577e-05, + "loss": 0.3913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36584043502807617, + "step": 150, + "valid_targets_mean": 3181.2, + "valid_targets_min": 892 + }, + { + "epoch": 0.28703703703703703, + "grad_norm": 0.6028158751437971, + "learning_rate": 1.6296296296296297e-05, + "loss": 0.4025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3777807652950287, + "step": 155, + "valid_targets_mean": 3913.8, + "valid_targets_min": 1488 + }, + { + "epoch": 0.2962962962962963, + "grad_norm": 0.5663817438154167, + "learning_rate": 1.6825396825396828e-05, + "loss": 0.433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42696279287338257, + "step": 160, + "valid_targets_mean": 4789.0, + "valid_targets_min": 1714 + }, + { + "epoch": 0.3055555555555556, + "grad_norm": 0.7015646909300565, + "learning_rate": 1.7354497354497356e-05, + "loss": 0.4013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37903106212615967, + "step": 165, + "valid_targets_mean": 4173.1, + "valid_targets_min": 520 + }, + { + "epoch": 0.3148148148148148, + "grad_norm": 0.6234945858741571, + "learning_rate": 1.7883597883597884e-05, + "loss": 0.3997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3800799548625946, + "step": 170, + "valid_targets_mean": 4546.9, + "valid_targets_min": 1768 + }, + { + "epoch": 0.32407407407407407, + "grad_norm": 0.598847110558081, + "learning_rate": 1.8412698412698415e-05, + "loss": 0.3828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3486472964286804, + "step": 175, + "valid_targets_mean": 5024.6, + "valid_targets_min": 1684 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.850399133293019, + "learning_rate": 1.8941798941798943e-05, + "loss": 0.3629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3398283123970032, + "step": 180, + "valid_targets_mean": 4075.1, + "valid_targets_min": 1613 + }, + { + "epoch": 0.3425925925925926, + "grad_norm": 0.6301928805898117, + "learning_rate": 1.947089947089947e-05, + "loss": 0.4053, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28791162371635437, + "step": 185, + "valid_targets_mean": 3739.5, + "valid_targets_min": 790 + }, + { + "epoch": 0.35185185185185186, + "grad_norm": 0.6696380315329734, + "learning_rate": 2e-05, + "loss": 0.3974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4022134840488434, + "step": 190, + "valid_targets_mean": 4268.4, + "valid_targets_min": 1592 + }, + { + "epoch": 0.3611111111111111, + "grad_norm": 0.707972099980987, + "learning_rate": 2.0529100529100533e-05, + "loss": 0.4258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.407692015171051, + "step": 195, + "valid_targets_mean": 3323.2, + "valid_targets_min": 721 + }, + { + "epoch": 0.37037037037037035, + "grad_norm": 0.6176399796412881, + "learning_rate": 2.105820105820106e-05, + "loss": 0.3572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32153958082199097, + "step": 200, + "valid_targets_mean": 3555.6, + "valid_targets_min": 840 + }, + { + "epoch": 0.37962962962962965, + "grad_norm": 0.625992536194421, + "learning_rate": 2.158730158730159e-05, + "loss": 0.3921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.49598437547683716, + "step": 205, + "valid_targets_mean": 5771.6, + "valid_targets_min": 675 + }, + { + "epoch": 0.3888888888888889, + "grad_norm": 0.6818922510280604, + "learning_rate": 2.211640211640212e-05, + "loss": 0.3721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39517930150032043, + "step": 210, + "valid_targets_mean": 3803.9, + "valid_targets_min": 523 + }, + { + "epoch": 0.39814814814814814, + "grad_norm": 0.5873676150067556, + "learning_rate": 2.2645502645502648e-05, + "loss": 0.3945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3694095015525818, + "step": 215, + "valid_targets_mean": 4309.9, + "valid_targets_min": 1808 + }, + { + "epoch": 0.4074074074074074, + "grad_norm": 0.5984897505965682, + "learning_rate": 2.317460317460318e-05, + "loss": 0.3735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3190292418003082, + "step": 220, + "valid_targets_mean": 4658.9, + "valid_targets_min": 1714 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.6652241547692664, + "learning_rate": 2.3703703703703703e-05, + "loss": 0.3656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3782808184623718, + "step": 225, + "valid_targets_mean": 4637.1, + "valid_targets_min": 1662 + }, + { + "epoch": 0.42592592592592593, + "grad_norm": 0.6773269861053436, + "learning_rate": 2.4232804232804234e-05, + "loss": 0.3476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3852205276489258, + "step": 230, + "valid_targets_mean": 3920.9, + "valid_targets_min": 1620 + }, + { + "epoch": 0.4351851851851852, + "grad_norm": 0.6294988371468406, + "learning_rate": 2.4761904761904766e-05, + "loss": 0.3577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38655975461006165, + "step": 235, + "valid_targets_mean": 4567.7, + "valid_targets_min": 2256 + }, + { + "epoch": 0.4444444444444444, + "grad_norm": 0.7066702282132421, + "learning_rate": 2.5291005291005294e-05, + "loss": 0.3836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42079806327819824, + "step": 240, + "valid_targets_mean": 3376.9, + "valid_targets_min": 708 + }, + { + "epoch": 0.4537037037037037, + "grad_norm": 0.6424177932025107, + "learning_rate": 2.582010582010582e-05, + "loss": 0.3315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3230123519897461, + "step": 245, + "valid_targets_mean": 3249.8, + "valid_targets_min": 551 + }, + { + "epoch": 0.46296296296296297, + "grad_norm": 0.6455853042915125, + "learning_rate": 2.6349206349206353e-05, + "loss": 0.3953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4580186605453491, + "step": 250, + "valid_targets_mean": 4673.4, + "valid_targets_min": 1703 + }, + { + "epoch": 0.4722222222222222, + "grad_norm": 0.5916132534087772, + "learning_rate": 2.687830687830688e-05, + "loss": 0.3799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36162281036376953, + "step": 255, + "valid_targets_mean": 3973.9, + "valid_targets_min": 2418 + }, + { + "epoch": 0.48148148148148145, + "grad_norm": 0.5577346524553856, + "learning_rate": 2.740740740740741e-05, + "loss": 0.3443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2668542265892029, + "step": 260, + "valid_targets_mean": 4436.5, + "valid_targets_min": 591 + }, + { + "epoch": 0.49074074074074076, + "grad_norm": 0.6414361548008731, + "learning_rate": 2.7936507936507936e-05, + "loss": 0.3806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3715950846672058, + "step": 265, + "valid_targets_mean": 4098.6, + "valid_targets_min": 2238 + }, + { + "epoch": 0.5, + "grad_norm": 0.5856940202703296, + "learning_rate": 2.8465608465608467e-05, + "loss": 0.3926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4181647002696991, + "step": 270, + "valid_targets_mean": 5686.6, + "valid_targets_min": 932 + }, + { + "epoch": 0.5092592592592593, + "grad_norm": 0.632615688558668, + "learning_rate": 2.8994708994709e-05, + "loss": 0.3687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33307045698165894, + "step": 275, + "valid_targets_mean": 4060.1, + "valid_targets_min": 595 + }, + { + "epoch": 0.5185185185185185, + "grad_norm": 0.6599712643815261, + "learning_rate": 2.9523809523809526e-05, + "loss": 0.3474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40192732214927673, + "step": 280, + "valid_targets_mean": 3505.5, + "valid_targets_min": 721 + }, + { + "epoch": 0.5277777777777778, + "grad_norm": 0.7871824525334413, + "learning_rate": 3.0052910052910054e-05, + "loss": 0.3811, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37350624799728394, + "step": 285, + "valid_targets_mean": 4021.2, + "valid_targets_min": 480 + }, + { + "epoch": 0.5370370370370371, + "grad_norm": 0.5941203905483521, + "learning_rate": 3.058201058201058e-05, + "loss": 0.3806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34662193059921265, + "step": 290, + "valid_targets_mean": 4111.9, + "valid_targets_min": 849 + }, + { + "epoch": 0.5462962962962963, + "grad_norm": 0.6832466030128974, + "learning_rate": 3.111111111111112e-05, + "loss": 0.3809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35143667459487915, + "step": 295, + "valid_targets_mean": 3446.2, + "valid_targets_min": 567 + }, + { + "epoch": 0.5555555555555556, + "grad_norm": 0.5652043089590578, + "learning_rate": 3.1640211640211645e-05, + "loss": 0.4166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39690762758255005, + "step": 300, + "valid_targets_mean": 4713.9, + "valid_targets_min": 772 + }, + { + "epoch": 0.5648148148148148, + "grad_norm": 0.7212363216251159, + "learning_rate": 3.216931216931217e-05, + "loss": 0.3699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3936673402786255, + "step": 305, + "valid_targets_mean": 3249.8, + "valid_targets_min": 528 + }, + { + "epoch": 0.5740740740740741, + "grad_norm": 0.5825429131593575, + "learning_rate": 3.26984126984127e-05, + "loss": 0.3738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37310439348220825, + "step": 310, + "valid_targets_mean": 5273.2, + "valid_targets_min": 496 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.5872360398885519, + "learning_rate": 3.322751322751323e-05, + "loss": 0.3407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35499951243400574, + "step": 315, + "valid_targets_mean": 5564.2, + "valid_targets_min": 2044 + }, + { + "epoch": 0.5925925925925926, + "grad_norm": 0.7151624805661932, + "learning_rate": 3.375661375661376e-05, + "loss": 0.3616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3740341365337372, + "step": 320, + "valid_targets_mean": 4010.5, + "valid_targets_min": 1007 + }, + { + "epoch": 0.6018518518518519, + "grad_norm": 0.7663314912022873, + "learning_rate": 3.4285714285714284e-05, + "loss": 0.3501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37295448780059814, + "step": 325, + "valid_targets_mean": 3384.2, + "valid_targets_min": 803 + }, + { + "epoch": 0.6111111111111112, + "grad_norm": 0.7089838515813951, + "learning_rate": 3.481481481481482e-05, + "loss": 0.3472, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33399441838264465, + "step": 330, + "valid_targets_mean": 5579.9, + "valid_targets_min": 1870 + }, + { + "epoch": 0.6203703703703703, + "grad_norm": 0.5776773939398085, + "learning_rate": 3.5343915343915346e-05, + "loss": 0.3353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3364108204841614, + "step": 335, + "valid_targets_mean": 4555.3, + "valid_targets_min": 1137 + }, + { + "epoch": 0.6296296296296297, + "grad_norm": 0.6642731154701934, + "learning_rate": 3.5873015873015874e-05, + "loss": 0.3497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3789205551147461, + "step": 340, + "valid_targets_mean": 3312.5, + "valid_targets_min": 943 + }, + { + "epoch": 0.6388888888888888, + "grad_norm": 0.6054739369110539, + "learning_rate": 3.64021164021164e-05, + "loss": 0.3283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3263542652130127, + "step": 345, + "valid_targets_mean": 4468.6, + "valid_targets_min": 2434 + }, + { + "epoch": 0.6481481481481481, + "grad_norm": 0.6874656720878914, + "learning_rate": 3.6931216931216936e-05, + "loss": 0.3563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3052312731742859, + "step": 350, + "valid_targets_mean": 3007.6, + "valid_targets_min": 702 + }, + { + "epoch": 0.6574074074074074, + "grad_norm": 0.6507760256018081, + "learning_rate": 3.7460317460317464e-05, + "loss": 0.3617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3832956552505493, + "step": 355, + "valid_targets_mean": 3906.8, + "valid_targets_min": 726 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.5435451628510831, + "learning_rate": 3.798941798941799e-05, + "loss": 0.3757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39122748374938965, + "step": 360, + "valid_targets_mean": 5430.8, + "valid_targets_min": 327 + }, + { + "epoch": 0.6759259259259259, + "grad_norm": 0.5644022877423189, + "learning_rate": 3.851851851851852e-05, + "loss": 0.3777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32652783393859863, + "step": 365, + "valid_targets_mean": 4897.6, + "valid_targets_min": 1720 + }, + { + "epoch": 0.6851851851851852, + "grad_norm": 0.6380722458933044, + "learning_rate": 3.904761904761905e-05, + "loss": 0.3161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.340414822101593, + "step": 370, + "valid_targets_mean": 3580.2, + "valid_targets_min": 757 + }, + { + "epoch": 0.6944444444444444, + "grad_norm": 0.5617778297985487, + "learning_rate": 3.957671957671958e-05, + "loss": 0.3299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2890230715274811, + "step": 375, + "valid_targets_mean": 3849.2, + "valid_targets_min": 1728 + }, + { + "epoch": 0.7037037037037037, + "grad_norm": 0.7391134176193144, + "learning_rate": 3.999999147231606e-05, + "loss": 0.3425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3627009093761444, + "step": 380, + "valid_targets_mean": 3590.0, + "valid_targets_min": 1700 + }, + { + "epoch": 0.7129629629629629, + "grad_norm": 0.585798655714323, + "learning_rate": 3.9999693004141615e-05, + "loss": 0.3378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33226558566093445, + "step": 385, + "valid_targets_mean": 3770.4, + "valid_targets_min": 490 + }, + { + "epoch": 0.7222222222222222, + "grad_norm": 0.517353669888523, + "learning_rate": 3.999896815904212e-05, + "loss": 0.3454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36117538809776306, + "step": 390, + "valid_targets_mean": 5560.3, + "valid_targets_min": 1959 + }, + { + "epoch": 0.7314814814814815, + "grad_norm": 0.6107363197274304, + "learning_rate": 3.999781695247067e-05, + "loss": 0.358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38827258348464966, + "step": 395, + "valid_targets_mean": 4259.4, + "valid_targets_min": 572 + }, + { + "epoch": 0.7407407407407407, + "grad_norm": 0.6359653698753834, + "learning_rate": 3.999623940897003e-05, + "loss": 0.3114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32796719670295715, + "step": 400, + "valid_targets_mean": 3816.1, + "valid_targets_min": 698 + }, + { + "epoch": 0.75, + "grad_norm": 0.6435556727086547, + "learning_rate": 3.9994235562172135e-05, + "loss": 0.2972, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3033405542373657, + "step": 405, + "valid_targets_mean": 3801.8, + "valid_targets_min": 605 + }, + { + "epoch": 0.7592592592592593, + "grad_norm": 0.5397509110893886, + "learning_rate": 3.999180545479734e-05, + "loss": 0.3476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3159610331058502, + "step": 410, + "valid_targets_mean": 4451.7, + "valid_targets_min": 558 + }, + { + "epoch": 0.7685185185185185, + "grad_norm": 0.6224527962247686, + "learning_rate": 3.998894913865352e-05, + "loss": 0.3595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3543645441532135, + "step": 415, + "valid_targets_mean": 3635.9, + "valid_targets_min": 1754 + }, + { + "epoch": 0.7777777777777778, + "grad_norm": 0.6426499523396512, + "learning_rate": 3.9985666674634976e-05, + "loss": 0.3405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.339733749628067, + "step": 420, + "valid_targets_mean": 3655.6, + "valid_targets_min": 783 + }, + { + "epoch": 0.7870370370370371, + "grad_norm": 0.5232489603174455, + "learning_rate": 3.998195813272113e-05, + "loss": 0.3453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3885164260864258, + "step": 425, + "valid_targets_mean": 6157.9, + "valid_targets_min": 871 + }, + { + "epoch": 0.7962962962962963, + "grad_norm": 0.5645900335897053, + "learning_rate": 3.997782359197503e-05, + "loss": 0.322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34212440252304077, + "step": 430, + "valid_targets_mean": 4778.2, + "valid_targets_min": 584 + }, + { + "epoch": 0.8055555555555556, + "grad_norm": 0.5962980381815696, + "learning_rate": 3.997326314054167e-05, + "loss": 0.3163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3020142614841461, + "step": 435, + "valid_targets_mean": 5573.9, + "valid_targets_min": 845 + }, + { + "epoch": 0.8148148148148148, + "grad_norm": 0.6887712552325551, + "learning_rate": 3.9968276875646095e-05, + "loss": 0.3244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33892735838890076, + "step": 440, + "valid_targets_mean": 4693.2, + "valid_targets_min": 886 + }, + { + "epoch": 0.8240740740740741, + "grad_norm": 0.7056735839532887, + "learning_rate": 3.9962864903591375e-05, + "loss": 0.3434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4029475450515747, + "step": 445, + "valid_targets_mean": 3450.6, + "valid_targets_min": 930 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.5753803712883945, + "learning_rate": 3.995702733975625e-05, + "loss": 0.2982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27287542819976807, + "step": 450, + "valid_targets_mean": 3599.7, + "valid_targets_min": 1569 + }, + { + "epoch": 0.8425925925925926, + "grad_norm": 0.6413965772420983, + "learning_rate": 3.9950764308592783e-05, + "loss": 0.3417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3798047602176666, + "step": 455, + "valid_targets_mean": 3774.1, + "valid_targets_min": 317 + }, + { + "epoch": 0.8518518518518519, + "grad_norm": 0.645787380121332, + "learning_rate": 3.9944075943623605e-05, + "loss": 0.3172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34351015090942383, + "step": 460, + "valid_targets_mean": 3503.0, + "valid_targets_min": 526 + }, + { + "epoch": 0.8611111111111112, + "grad_norm": 0.5184110306882624, + "learning_rate": 3.9936962387439135e-05, + "loss": 0.3171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3216972351074219, + "step": 465, + "valid_targets_mean": 5171.2, + "valid_targets_min": 1926 + }, + { + "epoch": 0.8703703703703703, + "grad_norm": 0.6014751168851044, + "learning_rate": 3.992942379169452e-05, + "loss": 0.3339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32453712821006775, + "step": 470, + "valid_targets_mean": 4786.2, + "valid_targets_min": 629 + }, + { + "epoch": 0.8796296296296297, + "grad_norm": 0.6616582269145059, + "learning_rate": 3.992146031710637e-05, + "loss": 0.345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35846132040023804, + "step": 475, + "valid_targets_mean": 3561.0, + "valid_targets_min": 949 + }, + { + "epoch": 0.8888888888888888, + "grad_norm": 0.49969303426311373, + "learning_rate": 3.99130721334494e-05, + "loss": 0.3502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3090265095233917, + "step": 480, + "valid_targets_mean": 4930.3, + "valid_targets_min": 491 + }, + { + "epoch": 0.8981481481481481, + "grad_norm": 0.6240367506941321, + "learning_rate": 3.9904259419552744e-05, + "loss": 0.3365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35694044828414917, + "step": 485, + "valid_targets_mean": 3573.7, + "valid_targets_min": 2118 + }, + { + "epoch": 0.9074074074074074, + "grad_norm": 0.570990856646238, + "learning_rate": 3.989502236329618e-05, + "loss": 0.3062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32826316356658936, + "step": 490, + "valid_targets_mean": 4897.8, + "valid_targets_min": 970 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.5453511061498877, + "learning_rate": 3.988536116160612e-05, + "loss": 0.3359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3527144193649292, + "step": 495, + "valid_targets_mean": 4487.1, + "valid_targets_min": 1709 + }, + { + "epoch": 0.9259259259259259, + "grad_norm": 0.6304542974915351, + "learning_rate": 3.987527602045139e-05, + "loss": 0.3406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34708958864212036, + "step": 500, + "valid_targets_mean": 3580.7, + "valid_targets_min": 894 + }, + { + "epoch": 0.9351851851851852, + "grad_norm": 0.625911017517709, + "learning_rate": 3.9864767154838864e-05, + "loss": 0.3331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3413323760032654, + "step": 505, + "valid_targets_mean": 3402.6, + "valid_targets_min": 698 + }, + { + "epoch": 0.9444444444444444, + "grad_norm": 0.5749197484139668, + "learning_rate": 3.985383478880887e-05, + "loss": 0.3338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3760027587413788, + "step": 510, + "valid_targets_mean": 4457.2, + "valid_targets_min": 194 + }, + { + "epoch": 0.9537037037037037, + "grad_norm": 0.6836107942264785, + "learning_rate": 3.984247915543043e-05, + "loss": 0.3132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3548537492752075, + "step": 515, + "valid_targets_mean": 3044.6, + "valid_targets_min": 736 + }, + { + "epoch": 0.9629629629629629, + "grad_norm": 0.639879100311534, + "learning_rate": 3.9830700496796246e-05, + "loss": 0.3251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3742837905883789, + "step": 520, + "valid_targets_mean": 3597.6, + "valid_targets_min": 2209 + }, + { + "epoch": 0.9722222222222222, + "grad_norm": 0.789794647068884, + "learning_rate": 3.98184990640176e-05, + "loss": 0.3275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34361645579338074, + "step": 525, + "valid_targets_mean": 2886.4, + "valid_targets_min": 275 + }, + { + "epoch": 0.9814814814814815, + "grad_norm": 0.549167670180771, + "learning_rate": 3.9805875117218934e-05, + "loss": 0.3573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3770226836204529, + "step": 530, + "valid_targets_mean": 4474.1, + "valid_targets_min": 1026 + }, + { + "epoch": 0.9907407407407407, + "grad_norm": 0.499937106224937, + "learning_rate": 3.9792828925532376e-05, + "loss": 0.3325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31070464849472046, + "step": 535, + "valid_targets_mean": 4791.1, + "valid_targets_min": 1838 + }, + { + "epoch": 1.0, + "grad_norm": 0.5379069079693097, + "learning_rate": 3.977936076709195e-05, + "loss": 0.3367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.313912570476532, + "step": 540, + "valid_targets_mean": 4855.9, + "valid_targets_min": 778 + }, + { + "epoch": 1.0092592592592593, + "grad_norm": 0.5872678514369906, + "learning_rate": 3.976547092902765e-05, + "loss": 0.3107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3828044831752777, + "step": 545, + "valid_targets_mean": 4668.6, + "valid_targets_min": 422 + }, + { + "epoch": 1.0185185185185186, + "grad_norm": 0.622573855977736, + "learning_rate": 3.9751159707459354e-05, + "loss": 0.3296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30190443992614746, + "step": 550, + "valid_targets_mean": 3399.8, + "valid_targets_min": 480 + }, + { + "epoch": 1.0277777777777777, + "grad_norm": 0.6028687336159148, + "learning_rate": 3.973642740749048e-05, + "loss": 0.2989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30583667755126953, + "step": 555, + "valid_targets_mean": 3726.7, + "valid_targets_min": 629 + }, + { + "epoch": 1.037037037037037, + "grad_norm": 0.46880468225849076, + "learning_rate": 3.972127434320148e-05, + "loss": 0.3047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2561293840408325, + "step": 560, + "valid_targets_mean": 4991.6, + "valid_targets_min": 1598 + }, + { + "epoch": 1.0462962962962963, + "grad_norm": 0.5498139372111231, + "learning_rate": 3.970570083764316e-05, + "loss": 0.3393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27668747305870056, + "step": 565, + "valid_targets_mean": 3853.1, + "valid_targets_min": 573 + }, + { + "epoch": 1.0555555555555556, + "grad_norm": 0.7107012818618696, + "learning_rate": 3.968970722282979e-05, + "loss": 0.3129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35108423233032227, + "step": 570, + "valid_targets_mean": 3513.9, + "valid_targets_min": 1733 + }, + { + "epoch": 1.0648148148148149, + "grad_norm": 0.5944855386113417, + "learning_rate": 3.9673293839732024e-05, + "loss": 0.2998, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.239329993724823, + "step": 575, + "valid_targets_mean": 3587.9, + "valid_targets_min": 584 + }, + { + "epoch": 1.074074074074074, + "grad_norm": 0.5366706705366189, + "learning_rate": 3.965646103826962e-05, + "loss": 0.3161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27880698442459106, + "step": 580, + "valid_targets_mean": 3620.6, + "valid_targets_min": 496 + }, + { + "epoch": 1.0833333333333333, + "grad_norm": 0.6078514718903982, + "learning_rate": 3.963920917730399e-05, + "loss": 0.3293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3750278353691101, + "step": 585, + "valid_targets_mean": 4482.4, + "valid_targets_min": 1680 + }, + { + "epoch": 1.0925925925925926, + "grad_norm": 0.6207808313685298, + "learning_rate": 3.9621538624630546e-05, + "loss": 0.3127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31572356820106506, + "step": 590, + "valid_targets_mean": 3113.2, + "valid_targets_min": 635 + }, + { + "epoch": 1.1018518518518519, + "grad_norm": 0.49606649278990134, + "learning_rate": 3.9603449756970877e-05, + "loss": 0.2978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2611154019832611, + "step": 595, + "valid_targets_mean": 4295.1, + "valid_targets_min": 545 + }, + { + "epoch": 1.1111111111111112, + "grad_norm": 0.6073469095682115, + "learning_rate": 3.9584942959964695e-05, + "loss": 0.3069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28560468554496765, + "step": 600, + "valid_targets_mean": 3647.1, + "valid_targets_min": 1445 + }, + { + "epoch": 1.1203703703703705, + "grad_norm": 0.5950597164351814, + "learning_rate": 3.9566018628161595e-05, + "loss": 0.3327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32758647203445435, + "step": 605, + "valid_targets_mean": 3512.6, + "valid_targets_min": 435 + }, + { + "epoch": 1.1296296296296295, + "grad_norm": 0.6075429397094143, + "learning_rate": 3.9546677165012714e-05, + "loss": 0.2825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27764448523521423, + "step": 610, + "valid_targets_mean": 4952.8, + "valid_targets_min": 535 + }, + { + "epoch": 1.1388888888888888, + "grad_norm": 0.5550360366103939, + "learning_rate": 3.9526918982862045e-05, + "loss": 0.2956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3022664785385132, + "step": 615, + "valid_targets_mean": 3905.1, + "valid_targets_min": 831 + }, + { + "epoch": 1.1481481481481481, + "grad_norm": 0.6050232035267888, + "learning_rate": 3.950674450293771e-05, + "loss": 0.3073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32235607504844666, + "step": 620, + "valid_targets_mean": 3641.3, + "valid_targets_min": 563 + }, + { + "epoch": 1.1574074074074074, + "grad_norm": 0.5959296142535176, + "learning_rate": 3.948615415534294e-05, + "loss": 0.3268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39365071058273315, + "step": 625, + "valid_targets_mean": 3973.9, + "valid_targets_min": 626 + }, + { + "epoch": 1.1666666666666667, + "grad_norm": 0.5533298682994967, + "learning_rate": 3.946514837904693e-05, + "loss": 0.2983, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2971389889717102, + "step": 630, + "valid_targets_mean": 4080.9, + "valid_targets_min": 834 + }, + { + "epoch": 1.175925925925926, + "grad_norm": 0.6106998081873015, + "learning_rate": 3.944372762187547e-05, + "loss": 0.3329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3265891671180725, + "step": 635, + "valid_targets_mean": 3942.2, + "valid_targets_min": 446 + }, + { + "epoch": 1.1851851851851851, + "grad_norm": 0.5794193483906891, + "learning_rate": 3.9421892340501405e-05, + "loss": 0.3145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3371884822845459, + "step": 640, + "valid_targets_mean": 4098.7, + "valid_targets_min": 295 + }, + { + "epoch": 1.1944444444444444, + "grad_norm": 0.5448020606518212, + "learning_rate": 3.939964300043487e-05, + "loss": 0.2994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27047258615493774, + "step": 645, + "valid_targets_mean": 4001.4, + "valid_targets_min": 592 + }, + { + "epoch": 1.2037037037037037, + "grad_norm": 0.5233238220020286, + "learning_rate": 3.9376980076013426e-05, + "loss": 0.3144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30770355463027954, + "step": 650, + "valid_targets_mean": 4120.2, + "valid_targets_min": 779 + }, + { + "epoch": 1.212962962962963, + "grad_norm": 0.5883380365500736, + "learning_rate": 3.9353904050391874e-05, + "loss": 0.3377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36008432507514954, + "step": 655, + "valid_targets_mean": 4483.6, + "valid_targets_min": 2162 + }, + { + "epoch": 1.2222222222222223, + "grad_norm": 0.8824261790418245, + "learning_rate": 3.933041541553202e-05, + "loss": 0.2975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2799769937992096, + "step": 660, + "valid_targets_mean": 3285.0, + "valid_targets_min": 599 + }, + { + "epoch": 1.2314814814814814, + "grad_norm": 0.5442814396188268, + "learning_rate": 3.930651467219214e-05, + "loss": 0.3, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2907693386077881, + "step": 665, + "valid_targets_mean": 4048.6, + "valid_targets_min": 669 + }, + { + "epoch": 1.2407407407407407, + "grad_norm": 0.5827496169253467, + "learning_rate": 3.928220232991633e-05, + "loss": 0.3124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29981476068496704, + "step": 670, + "valid_targets_mean": 3801.6, + "valid_targets_min": 1767 + }, + { + "epoch": 1.25, + "grad_norm": 0.5462062382083914, + "learning_rate": 3.925747890702363e-05, + "loss": 0.3041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2990304231643677, + "step": 675, + "valid_targets_mean": 4142.3, + "valid_targets_min": 321 + }, + { + "epoch": 1.2592592592592593, + "grad_norm": 0.6206083513162505, + "learning_rate": 3.9232344930596983e-05, + "loss": 0.2986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2708045244216919, + "step": 680, + "valid_targets_mean": 3426.8, + "valid_targets_min": 2009 + }, + { + "epoch": 1.2685185185185186, + "grad_norm": 0.5745410057483962, + "learning_rate": 3.9206800936472e-05, + "loss": 0.2947, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2561905086040497, + "step": 685, + "valid_targets_mean": 3190.8, + "valid_targets_min": 871 + }, + { + "epoch": 1.2777777777777777, + "grad_norm": 0.5537138179650384, + "learning_rate": 3.9180847469225514e-05, + "loss": 0.3227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.359304279088974, + "step": 690, + "valid_targets_mean": 4509.7, + "valid_targets_min": 595 + }, + { + "epoch": 1.287037037037037, + "grad_norm": 0.5205868768170611, + "learning_rate": 3.9154485082164e-05, + "loss": 0.2835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28854691982269287, + "step": 695, + "valid_targets_mean": 4476.4, + "valid_targets_min": 421 + }, + { + "epoch": 1.2962962962962963, + "grad_norm": 0.5809148768460918, + "learning_rate": 3.912771433731176e-05, + "loss": 0.32, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33331120014190674, + "step": 700, + "valid_targets_mean": 4712.1, + "valid_targets_min": 2303 + }, + { + "epoch": 1.3055555555555556, + "grad_norm": 0.6151605514518709, + "learning_rate": 3.910053580539896e-05, + "loss": 0.3347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3627372980117798, + "step": 705, + "valid_targets_mean": 4119.8, + "valid_targets_min": 698 + }, + { + "epoch": 1.3148148148148149, + "grad_norm": 0.5988192996988059, + "learning_rate": 3.907295006584941e-05, + "loss": 0.317, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34967589378356934, + "step": 710, + "valid_targets_mean": 4211.2, + "valid_targets_min": 888 + }, + { + "epoch": 1.324074074074074, + "grad_norm": 0.5533738940567987, + "learning_rate": 3.904495770676831e-05, + "loss": 0.2942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2595583200454712, + "step": 715, + "valid_targets_mean": 3652.5, + "valid_targets_min": 772 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 0.677729485934007, + "learning_rate": 3.9016559324929594e-05, + "loss": 0.3195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28384801745414734, + "step": 720, + "valid_targets_mean": 3426.2, + "valid_targets_min": 2319 + }, + { + "epoch": 1.3425925925925926, + "grad_norm": 0.5229228302802816, + "learning_rate": 3.8987755525763315e-05, + "loss": 0.316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3147448003292084, + "step": 725, + "valid_targets_mean": 4087.8, + "valid_targets_min": 567 + }, + { + "epoch": 1.3518518518518519, + "grad_norm": 0.5839814638980791, + "learning_rate": 3.895854692334264e-05, + "loss": 0.3029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25805115699768066, + "step": 730, + "valid_targets_mean": 4818.6, + "valid_targets_min": 1703 + }, + { + "epoch": 1.3611111111111112, + "grad_norm": 0.567166024329898, + "learning_rate": 3.892893414037084e-05, + "loss": 0.3099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2491036057472229, + "step": 735, + "valid_targets_mean": 3994.6, + "valid_targets_min": 2004 + }, + { + "epoch": 1.3703703703703702, + "grad_norm": 0.5547431269439685, + "learning_rate": 3.889891780816799e-05, + "loss": 0.3056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31328123807907104, + "step": 740, + "valid_targets_mean": 3948.7, + "valid_targets_min": 628 + }, + { + "epoch": 1.3796296296296298, + "grad_norm": 0.5671900268720954, + "learning_rate": 3.886849856665746e-05, + "loss": 0.3047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3374163508415222, + "step": 745, + "valid_targets_mean": 3923.8, + "valid_targets_min": 892 + }, + { + "epoch": 1.3888888888888888, + "grad_norm": 0.53885923431837, + "learning_rate": 3.8837677064352345e-05, + "loss": 0.2992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31161177158355713, + "step": 750, + "valid_targets_mean": 3956.8, + "valid_targets_min": 1684 + }, + { + "epoch": 1.3981481481481481, + "grad_norm": 0.5062852118800623, + "learning_rate": 3.8806453958341615e-05, + "loss": 0.2667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23768174648284912, + "step": 755, + "valid_targets_mean": 3670.1, + "valid_targets_min": 840 + }, + { + "epoch": 1.4074074074074074, + "grad_norm": 0.5980684056816908, + "learning_rate": 3.877482991427607e-05, + "loss": 0.3279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3120206594467163, + "step": 760, + "valid_targets_mean": 3676.0, + "valid_targets_min": 1026 + }, + { + "epoch": 1.4166666666666667, + "grad_norm": 0.5263249865978269, + "learning_rate": 3.874280560635418e-05, + "loss": 0.2727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24431112408638, + "step": 765, + "valid_targets_mean": 4316.4, + "valid_targets_min": 1753 + }, + { + "epoch": 1.425925925925926, + "grad_norm": 0.5371674118999251, + "learning_rate": 3.871038171730775e-05, + "loss": 0.322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3550418019294739, + "step": 770, + "valid_targets_mean": 5343.2, + "valid_targets_min": 1034 + }, + { + "epoch": 1.4351851851851851, + "grad_norm": 0.5747486969945653, + "learning_rate": 3.8677558938387276e-05, + "loss": 0.2928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2658607065677643, + "step": 775, + "valid_targets_mean": 3501.0, + "valid_targets_min": 490 + }, + { + "epoch": 1.4444444444444444, + "grad_norm": 0.5593639327643428, + "learning_rate": 3.864433796934728e-05, + "loss": 0.317, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2963068187236786, + "step": 780, + "valid_targets_mean": 3459.8, + "valid_targets_min": 1596 + }, + { + "epoch": 1.4537037037037037, + "grad_norm": 0.49539758416732715, + "learning_rate": 3.861071951843137e-05, + "loss": 0.2967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3172762393951416, + "step": 785, + "valid_targets_mean": 4932.9, + "valid_targets_min": 2031 + }, + { + "epoch": 1.462962962962963, + "grad_norm": 0.7233453222225057, + "learning_rate": 3.8576704302357135e-05, + "loss": 0.2987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31079697608947754, + "step": 790, + "valid_targets_mean": 2923.9, + "valid_targets_min": 368 + }, + { + "epoch": 1.4722222222222223, + "grad_norm": 0.5304815079906486, + "learning_rate": 3.854229304630086e-05, + "loss": 0.3058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2519688010215759, + "step": 795, + "valid_targets_mean": 3894.1, + "valid_targets_min": 1015 + }, + { + "epoch": 1.4814814814814814, + "grad_norm": 0.5912953728210274, + "learning_rate": 3.8507486483882084e-05, + "loss": 0.3187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31321465969085693, + "step": 800, + "valid_targets_mean": 3528.2, + "valid_targets_min": 641 + }, + { + "epoch": 1.4907407407407407, + "grad_norm": 0.676659645331013, + "learning_rate": 3.8472285357147966e-05, + "loss": 0.2899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34123727679252625, + "step": 805, + "valid_targets_mean": 4592.4, + "valid_targets_min": 697 + }, + { + "epoch": 1.5, + "grad_norm": 0.4442677722852739, + "learning_rate": 3.843669041655741e-05, + "loss": 0.3091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30493301153182983, + "step": 810, + "valid_targets_mean": 5845.2, + "valid_targets_min": 255 + }, + { + "epoch": 1.5092592592592593, + "grad_norm": 0.5682057669159459, + "learning_rate": 3.840070242096514e-05, + "loss": 0.3152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3426523804664612, + "step": 815, + "valid_targets_mean": 4269.9, + "valid_targets_min": 317 + }, + { + "epoch": 1.5185185185185186, + "grad_norm": 0.5718791042050542, + "learning_rate": 3.8364322137605484e-05, + "loss": 0.2951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2769266366958618, + "step": 820, + "valid_targets_mean": 3698.8, + "valid_targets_min": 665 + }, + { + "epoch": 1.5277777777777777, + "grad_norm": 0.5536227502501987, + "learning_rate": 3.832755034207601e-05, + "loss": 0.2956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30558109283447266, + "step": 825, + "valid_targets_mean": 4033.0, + "valid_targets_min": 585 + }, + { + "epoch": 1.5370370370370372, + "grad_norm": 0.5252844534173842, + "learning_rate": 3.8290387818321e-05, + "loss": 0.2996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3152919411659241, + "step": 830, + "valid_targets_mean": 4217.6, + "valid_targets_min": 1788 + }, + { + "epoch": 1.5462962962962963, + "grad_norm": 0.6143178973257938, + "learning_rate": 3.825283535861476e-05, + "loss": 0.3175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2816426157951355, + "step": 835, + "valid_targets_mean": 5267.9, + "valid_targets_min": 2440 + }, + { + "epoch": 1.5555555555555556, + "grad_norm": 0.5986189313818777, + "learning_rate": 3.8214893763544684e-05, + "loss": 0.3122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27623170614242554, + "step": 840, + "valid_targets_mean": 3365.1, + "valid_targets_min": 474 + }, + { + "epoch": 1.5648148148148149, + "grad_norm": 0.44155638321379925, + "learning_rate": 3.817656384199422e-05, + "loss": 0.2748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2690165340900421, + "step": 845, + "valid_targets_mean": 5673.1, + "valid_targets_min": 2925 + }, + { + "epoch": 1.574074074074074, + "grad_norm": 0.5395718733919299, + "learning_rate": 3.813784641112563e-05, + "loss": 0.3036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29078513383865356, + "step": 850, + "valid_targets_mean": 5001.7, + "valid_targets_min": 2123 + }, + { + "epoch": 1.5833333333333335, + "grad_norm": 0.5091833646708693, + "learning_rate": 3.8098742296362506e-05, + "loss": 0.3271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3604724407196045, + "step": 855, + "valid_targets_mean": 5089.8, + "valid_targets_min": 663 + }, + { + "epoch": 1.5925925925925926, + "grad_norm": 0.6087696649809414, + "learning_rate": 3.805925233137229e-05, + "loss": 0.3232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3626618981361389, + "step": 860, + "valid_targets_mean": 3525.3, + "valid_targets_min": 950 + }, + { + "epoch": 1.6018518518518519, + "grad_norm": 0.5643870271264605, + "learning_rate": 3.801937735804838e-05, + "loss": 0.3133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28040584921836853, + "step": 865, + "valid_targets_mean": 3399.7, + "valid_targets_min": 1903 + }, + { + "epoch": 1.6111111111111112, + "grad_norm": 0.5439012864299764, + "learning_rate": 3.7979118226492266e-05, + "loss": 0.2994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29787707328796387, + "step": 870, + "valid_targets_mean": 3488.6, + "valid_targets_min": 637 + }, + { + "epoch": 1.6203703703703702, + "grad_norm": 0.49165947496754964, + "learning_rate": 3.793847579499534e-05, + "loss": 0.2931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34809091687202454, + "step": 875, + "valid_targets_mean": 5268.2, + "valid_targets_min": 1908 + }, + { + "epoch": 1.6296296296296298, + "grad_norm": 0.5140250546410033, + "learning_rate": 3.789745093002065e-05, + "loss": 0.3238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2461247444152832, + "step": 880, + "valid_targets_mean": 3590.8, + "valid_targets_min": 541 + }, + { + "epoch": 1.6388888888888888, + "grad_norm": 0.5054561634272887, + "learning_rate": 3.785604450618443e-05, + "loss": 0.2708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21256114542484283, + "step": 885, + "valid_targets_mean": 3803.2, + "valid_targets_min": 397 + }, + { + "epoch": 1.6481481481481481, + "grad_norm": 0.5046967079101841, + "learning_rate": 3.781425740623739e-05, + "loss": 0.3127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2555197477340698, + "step": 890, + "valid_targets_mean": 5367.1, + "valid_targets_min": 2149 + }, + { + "epoch": 1.6574074074074074, + "grad_norm": 0.5031111391748204, + "learning_rate": 3.777209052104598e-05, + "loss": 0.309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27394580841064453, + "step": 895, + "valid_targets_mean": 4603.8, + "valid_targets_min": 1982 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 0.5200593833614563, + "learning_rate": 3.7729544749573335e-05, + "loss": 0.3088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30838844180107117, + "step": 900, + "valid_targets_mean": 4181.4, + "valid_targets_min": 739 + }, + { + "epoch": 1.675925925925926, + "grad_norm": 0.5108208774796558, + "learning_rate": 3.768662099886014e-05, + "loss": 0.3489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35393011569976807, + "step": 905, + "valid_targets_mean": 5152.1, + "valid_targets_min": 910 + }, + { + "epoch": 1.6851851851851851, + "grad_norm": 0.563904222236046, + "learning_rate": 3.7643320184005284e-05, + "loss": 0.2958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2836383879184723, + "step": 910, + "valid_targets_mean": 3775.8, + "valid_targets_min": 1926 + }, + { + "epoch": 1.6944444444444444, + "grad_norm": 0.6370969191435905, + "learning_rate": 3.7599643228146355e-05, + "loss": 0.319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34479522705078125, + "step": 915, + "valid_targets_mean": 3473.2, + "valid_targets_min": 479 + }, + { + "epoch": 1.7037037037037037, + "grad_norm": 0.4489327078730733, + "learning_rate": 3.755559106243994e-05, + "loss": 0.2783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2346605360507965, + "step": 920, + "valid_targets_mean": 4684.8, + "valid_targets_min": 1007 + }, + { + "epoch": 1.7129629629629628, + "grad_norm": 0.5284546797799259, + "learning_rate": 3.7511164626041823e-05, + "loss": 0.3133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3440583646297455, + "step": 925, + "valid_targets_mean": 4763.8, + "valid_targets_min": 2487 + }, + { + "epoch": 1.7222222222222223, + "grad_norm": 0.5116796474023507, + "learning_rate": 3.746636486608689e-05, + "loss": 0.2709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31446945667266846, + "step": 930, + "valid_targets_mean": 4609.9, + "valid_targets_min": 2063 + }, + { + "epoch": 1.7314814814814814, + "grad_norm": 0.5614968969786471, + "learning_rate": 3.7421192737669005e-05, + "loss": 0.3301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32294750213623047, + "step": 935, + "valid_targets_mean": 3850.6, + "valid_targets_min": 610 + }, + { + "epoch": 1.7407407407407407, + "grad_norm": 0.5657690526577215, + "learning_rate": 3.737564920382061e-05, + "loss": 0.2904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31011059880256653, + "step": 940, + "valid_targets_mean": 3671.0, + "valid_targets_min": 2036 + }, + { + "epoch": 1.75, + "grad_norm": 0.5783601802984916, + "learning_rate": 3.732973523549221e-05, + "loss": 0.3106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29496774077415466, + "step": 945, + "valid_targets_mean": 3423.9, + "valid_targets_min": 494 + }, + { + "epoch": 1.7592592592592593, + "grad_norm": 0.43290336789364203, + "learning_rate": 3.728345181153165e-05, + "loss": 0.2913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2509721517562866, + "step": 950, + "valid_targets_mean": 4465.2, + "valid_targets_min": 1717 + }, + { + "epoch": 1.7685185185185186, + "grad_norm": 0.4912129854091696, + "learning_rate": 3.7236799918663284e-05, + "loss": 0.3068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28017228841781616, + "step": 955, + "valid_targets_mean": 4293.8, + "valid_targets_min": 2343 + }, + { + "epoch": 1.7777777777777777, + "grad_norm": 0.6332365406111699, + "learning_rate": 3.7189780551466905e-05, + "loss": 0.2727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26829952001571655, + "step": 960, + "valid_targets_mean": 2642.8, + "valid_targets_min": 1700 + }, + { + "epoch": 1.7870370370370372, + "grad_norm": 0.5262914650173831, + "learning_rate": 3.714239471235657e-05, + "loss": 0.2959, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3247262239456177, + "step": 965, + "valid_targets_mean": 5274.5, + "valid_targets_min": 802 + }, + { + "epoch": 1.7962962962962963, + "grad_norm": 0.55145501931712, + "learning_rate": 3.7094643411559194e-05, + "loss": 0.2746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.314953088760376, + "step": 970, + "valid_targets_mean": 3900.6, + "valid_targets_min": 1945 + }, + { + "epoch": 1.8055555555555556, + "grad_norm": 0.5213565964214443, + "learning_rate": 3.704652766709305e-05, + "loss": 0.3047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3098965287208557, + "step": 975, + "valid_targets_mean": 4299.4, + "valid_targets_min": 1835 + }, + { + "epoch": 1.8148148148148149, + "grad_norm": 0.5142744749508426, + "learning_rate": 3.699804850474603e-05, + "loss": 0.2846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28077518939971924, + "step": 980, + "valid_targets_mean": 4123.2, + "valid_targets_min": 322 + }, + { + "epoch": 1.824074074074074, + "grad_norm": 0.6109944754762814, + "learning_rate": 3.6949206958053825e-05, + "loss": 0.3212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29146653413772583, + "step": 985, + "valid_targets_mean": 3814.9, + "valid_targets_min": 1464 + }, + { + "epoch": 1.8333333333333335, + "grad_norm": 0.6418924924446736, + "learning_rate": 3.690000406827783e-05, + "loss": 0.3001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2858262360095978, + "step": 990, + "valid_targets_mean": 2490.6, + "valid_targets_min": 559 + }, + { + "epoch": 1.8425925925925926, + "grad_norm": 0.5742545688665202, + "learning_rate": 3.685044088438299e-05, + "loss": 0.3013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2759723961353302, + "step": 995, + "valid_targets_mean": 4008.8, + "valid_targets_min": 1779 + }, + { + "epoch": 1.8518518518518519, + "grad_norm": 0.5254418262167269, + "learning_rate": 3.680051846301543e-05, + "loss": 0.2562, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2492840737104416, + "step": 1000, + "valid_targets_mean": 3808.9, + "valid_targets_min": 1535 + }, + { + "epoch": 1.8611111111111112, + "grad_norm": 0.44300703016797877, + "learning_rate": 3.675023786847991e-05, + "loss": 0.2984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27016231417655945, + "step": 1005, + "valid_targets_mean": 4913.9, + "valid_targets_min": 2688 + }, + { + "epoch": 1.8703703703703702, + "grad_norm": 0.5313397881548159, + "learning_rate": 3.6699600172717137e-05, + "loss": 0.2956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27664732933044434, + "step": 1010, + "valid_targets_mean": 3364.4, + "valid_targets_min": 654 + }, + { + "epoch": 1.8796296296296298, + "grad_norm": 0.7092703491214453, + "learning_rate": 3.6648606455280944e-05, + "loss": 0.2968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3045133352279663, + "step": 1015, + "valid_targets_mean": 3802.8, + "valid_targets_min": 313 + }, + { + "epoch": 1.8888888888888888, + "grad_norm": 0.5263165468712829, + "learning_rate": 3.659725780331524e-05, + "loss": 0.3094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3496822714805603, + "step": 1020, + "valid_targets_mean": 4270.3, + "valid_targets_min": 2039 + }, + { + "epoch": 1.8981481481481481, + "grad_norm": 0.5704220397986243, + "learning_rate": 3.654555531153084e-05, + "loss": 0.3112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25687843561172485, + "step": 1025, + "valid_targets_mean": 3186.1, + "valid_targets_min": 310 + }, + { + "epoch": 1.9074074074074074, + "grad_norm": 0.4827843955838589, + "learning_rate": 3.649350008218214e-05, + "loss": 0.2874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3169630169868469, + "step": 1030, + "valid_targets_mean": 4576.8, + "valid_targets_min": 2306 + }, + { + "epoch": 1.9166666666666665, + "grad_norm": 0.49931307758205035, + "learning_rate": 3.64410932250436e-05, + "loss": 0.3212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36018311977386475, + "step": 1035, + "valid_targets_mean": 4679.8, + "valid_targets_min": 896 + }, + { + "epoch": 1.925925925925926, + "grad_norm": 0.5372589737787232, + "learning_rate": 3.638833585738611e-05, + "loss": 0.2966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2810884714126587, + "step": 1040, + "valid_targets_mean": 3718.3, + "valid_targets_min": 1706 + }, + { + "epoch": 1.9351851851851851, + "grad_norm": 0.5448044758731123, + "learning_rate": 3.633522910395314e-05, + "loss": 0.2798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2674560546875, + "step": 1045, + "valid_targets_mean": 3725.8, + "valid_targets_min": 299 + }, + { + "epoch": 1.9444444444444444, + "grad_norm": 0.49024688080681106, + "learning_rate": 3.628177409693677e-05, + "loss": 0.2917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31284260749816895, + "step": 1050, + "valid_targets_mean": 4673.3, + "valid_targets_min": 1880 + }, + { + "epoch": 1.9537037037037037, + "grad_norm": 0.4596588910916473, + "learning_rate": 3.622797197595359e-05, + "loss": 0.299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25262099504470825, + "step": 1055, + "valid_targets_mean": 4529.9, + "valid_targets_min": 943 + }, + { + "epoch": 1.9629629629629628, + "grad_norm": 0.556506113366241, + "learning_rate": 3.6173823888020335e-05, + "loss": 0.3142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3350547254085541, + "step": 1060, + "valid_targets_mean": 3848.2, + "valid_targets_min": 247 + }, + { + "epoch": 1.9722222222222223, + "grad_norm": 0.5988985101293695, + "learning_rate": 3.611933098752949e-05, + "loss": 0.2992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2744201123714447, + "step": 1065, + "valid_targets_mean": 2717.9, + "valid_targets_min": 661 + }, + { + "epoch": 1.9814814814814814, + "grad_norm": 0.5027204774148215, + "learning_rate": 3.6064494436224655e-05, + "loss": 0.299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2839767336845398, + "step": 1070, + "valid_targets_mean": 4282.0, + "valid_targets_min": 1624 + }, + { + "epoch": 1.9907407407407407, + "grad_norm": 0.5609086397412129, + "learning_rate": 3.6009315403175786e-05, + "loss": 0.2909, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3083677589893341, + "step": 1075, + "valid_targets_mean": 3896.8, + "valid_targets_min": 821 + }, + { + "epoch": 2.0, + "grad_norm": 0.5064761754287327, + "learning_rate": 3.595379506475426e-05, + "loss": 0.3159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30033770203590393, + "step": 1080, + "valid_targets_mean": 3941.4, + "valid_targets_min": 446 + }, + { + "epoch": 2.009259259259259, + "grad_norm": 0.5512471121936553, + "learning_rate": 3.5897934604607795e-05, + "loss": 0.2752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31047865748405457, + "step": 1085, + "valid_targets_mean": 4005.3, + "valid_targets_min": 667 + }, + { + "epoch": 2.0185185185185186, + "grad_norm": 0.5160950707981314, + "learning_rate": 3.584173521363525e-05, + "loss": 0.2628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23959210515022278, + "step": 1090, + "valid_targets_mean": 4089.5, + "valid_targets_min": 393 + }, + { + "epoch": 2.0277777777777777, + "grad_norm": 0.5035622460268815, + "learning_rate": 3.578519808996117e-05, + "loss": 0.2622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2824031114578247, + "step": 1095, + "valid_targets_mean": 4900.2, + "valid_targets_min": 2631 + }, + { + "epoch": 2.037037037037037, + "grad_norm": 0.5944018512965265, + "learning_rate": 3.572832443891033e-05, + "loss": 0.2674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2954099178314209, + "step": 1100, + "valid_targets_mean": 3287.0, + "valid_targets_min": 828 + }, + { + "epoch": 2.0462962962962963, + "grad_norm": 0.56938852512268, + "learning_rate": 3.567111547298194e-05, + "loss": 0.2806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3027660846710205, + "step": 1105, + "valid_targets_mean": 4170.7, + "valid_targets_min": 1762 + }, + { + "epoch": 2.0555555555555554, + "grad_norm": 0.5871826431417995, + "learning_rate": 3.561357241182388e-05, + "loss": 0.2935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3108934164047241, + "step": 1110, + "valid_targets_mean": 4158.6, + "valid_targets_min": 814 + }, + { + "epoch": 2.064814814814815, + "grad_norm": 0.5385231227592268, + "learning_rate": 3.555569648220666e-05, + "loss": 0.2585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2445867955684662, + "step": 1115, + "valid_targets_mean": 3935.2, + "valid_targets_min": 772 + }, + { + "epoch": 2.074074074074074, + "grad_norm": 0.6485921558298788, + "learning_rate": 3.549748891799726e-05, + "loss": 0.2934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25209206342697144, + "step": 1120, + "valid_targets_mean": 3367.2, + "valid_targets_min": 496 + }, + { + "epoch": 2.0833333333333335, + "grad_norm": 0.5022623495987496, + "learning_rate": 3.543895096013284e-05, + "loss": 0.2828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2579001188278198, + "step": 1125, + "valid_targets_mean": 4011.8, + "valid_targets_min": 771 + }, + { + "epoch": 2.0925925925925926, + "grad_norm": 0.47266461256045833, + "learning_rate": 3.538008385659427e-05, + "loss": 0.3019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30898475646972656, + "step": 1130, + "valid_targets_mean": 5338.7, + "valid_targets_min": 466 + }, + { + "epoch": 2.1018518518518516, + "grad_norm": 0.474270085961415, + "learning_rate": 3.532088886237956e-05, + "loss": 0.2427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24021559953689575, + "step": 1135, + "valid_targets_mean": 5083.8, + "valid_targets_min": 1865 + }, + { + "epoch": 2.111111111111111, + "grad_norm": 0.4625964336857986, + "learning_rate": 3.5261367239477055e-05, + "loss": 0.2749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22822529077529907, + "step": 1140, + "valid_targets_mean": 4619.4, + "valid_targets_min": 2010 + }, + { + "epoch": 2.1203703703703702, + "grad_norm": 0.48948627688717905, + "learning_rate": 3.520152025683856e-05, + "loss": 0.2924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3005768656730652, + "step": 1145, + "valid_targets_mean": 4932.8, + "valid_targets_min": 2461 + }, + { + "epoch": 2.1296296296296298, + "grad_norm": 0.53742697060363, + "learning_rate": 3.514134919035229e-05, + "loss": 0.2515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.257113516330719, + "step": 1150, + "valid_targets_mean": 3710.9, + "valid_targets_min": 1530 + }, + { + "epoch": 2.138888888888889, + "grad_norm": 0.5503512629168121, + "learning_rate": 3.5080855322815635e-05, + "loss": 0.2788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27239710092544556, + "step": 1155, + "valid_targets_mean": 4531.2, + "valid_targets_min": 845 + }, + { + "epoch": 2.148148148148148, + "grad_norm": 0.6889338655609255, + "learning_rate": 3.5020039943907855e-05, + "loss": 0.2938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3204009532928467, + "step": 1160, + "valid_targets_mean": 3625.0, + "valid_targets_min": 404 + }, + { + "epoch": 2.1574074074074074, + "grad_norm": 0.5579386971063519, + "learning_rate": 3.495890435016258e-05, + "loss": 0.2861, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31176429986953735, + "step": 1165, + "valid_targets_mean": 3857.6, + "valid_targets_min": 1893 + }, + { + "epoch": 2.1666666666666665, + "grad_norm": 0.4892660944704105, + "learning_rate": 3.489744984494012e-05, + "loss": 0.3073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2628619968891144, + "step": 1170, + "valid_targets_mean": 4434.8, + "valid_targets_min": 863 + }, + { + "epoch": 2.175925925925926, + "grad_norm": 0.5387503793875826, + "learning_rate": 3.4835677738399745e-05, + "loss": 0.2899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2822778820991516, + "step": 1175, + "valid_targets_mean": 4123.1, + "valid_targets_min": 733 + }, + { + "epoch": 2.185185185185185, + "grad_norm": 0.45394010508846994, + "learning_rate": 3.477358934747172e-05, + "loss": 0.2703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22972646355628967, + "step": 1180, + "valid_targets_mean": 4582.6, + "valid_targets_min": 549 + }, + { + "epoch": 2.1944444444444446, + "grad_norm": 0.5572233307223254, + "learning_rate": 3.47111859958292e-05, + "loss": 0.2931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3954271078109741, + "step": 1185, + "valid_targets_mean": 5509.8, + "valid_targets_min": 1607 + }, + { + "epoch": 2.2037037037037037, + "grad_norm": 0.5791570229327255, + "learning_rate": 3.464846901386008e-05, + "loss": 0.2783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25588738918304443, + "step": 1190, + "valid_targets_mean": 3254.6, + "valid_targets_min": 1942 + }, + { + "epoch": 2.212962962962963, + "grad_norm": 0.5529120478362382, + "learning_rate": 3.458543973863859e-05, + "loss": 0.299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31822943687438965, + "step": 1195, + "valid_targets_mean": 4516.6, + "valid_targets_min": 1521 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.5903216004748315, + "learning_rate": 3.452209951389677e-05, + "loss": 0.2667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27697065472602844, + "step": 1200, + "valid_targets_mean": 3290.7, + "valid_targets_min": 767 + }, + { + "epoch": 2.2314814814814814, + "grad_norm": 0.6691604327279813, + "learning_rate": 3.445844968999586e-05, + "loss": 0.263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27134162187576294, + "step": 1205, + "valid_targets_mean": 3421.6, + "valid_targets_min": 554 + }, + { + "epoch": 2.240740740740741, + "grad_norm": 0.4881557721381399, + "learning_rate": 3.4394491623897506e-05, + "loss": 0.2725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2903343737125397, + "step": 1210, + "valid_targets_mean": 4894.1, + "valid_targets_min": 2191 + }, + { + "epoch": 2.25, + "grad_norm": 0.47094056474807733, + "learning_rate": 3.4330226679134805e-05, + "loss": 0.2653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23896852135658264, + "step": 1215, + "valid_targets_mean": 4323.1, + "valid_targets_min": 1904 + }, + { + "epoch": 2.259259259259259, + "grad_norm": 0.5276120851649831, + "learning_rate": 3.426565622578327e-05, + "loss": 0.2917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2607136368751526, + "step": 1220, + "valid_targets_mean": 4749.9, + "valid_targets_min": 1003 + }, + { + "epoch": 2.2685185185185186, + "grad_norm": 0.5849502666887231, + "learning_rate": 3.420078164043161e-05, + "loss": 0.2911, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3107423186302185, + "step": 1225, + "valid_targets_mean": 4094.4, + "valid_targets_min": 708 + }, + { + "epoch": 2.2777777777777777, + "grad_norm": 0.5217159399236517, + "learning_rate": 3.413560430615235e-05, + "loss": 0.3058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26841235160827637, + "step": 1230, + "valid_targets_mean": 4422.4, + "valid_targets_min": 321 + }, + { + "epoch": 2.287037037037037, + "grad_norm": 0.6178611619364681, + "learning_rate": 3.407012561247239e-05, + "loss": 0.2897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25904977321624756, + "step": 1235, + "valid_targets_mean": 2980.8, + "valid_targets_min": 835 + }, + { + "epoch": 2.2962962962962963, + "grad_norm": 0.5079532786534702, + "learning_rate": 3.400434695534337e-05, + "loss": 0.2699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2537451386451721, + "step": 1240, + "valid_targets_mean": 4113.8, + "valid_targets_min": 735 + }, + { + "epoch": 2.3055555555555554, + "grad_norm": 0.532875433175585, + "learning_rate": 3.393826973711189e-05, + "loss": 0.3025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2632405757904053, + "step": 1245, + "valid_targets_mean": 4773.4, + "valid_targets_min": 513 + }, + { + "epoch": 2.314814814814815, + "grad_norm": 0.5803442874340493, + "learning_rate": 3.3871895366489624e-05, + "loss": 0.2906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2636168301105499, + "step": 1250, + "valid_targets_mean": 3658.9, + "valid_targets_min": 480 + }, + { + "epoch": 2.324074074074074, + "grad_norm": 0.5173410175649708, + "learning_rate": 3.38052252585233e-05, + "loss": 0.2525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27228376269340515, + "step": 1255, + "valid_targets_mean": 4343.1, + "valid_targets_min": 1467 + }, + { + "epoch": 2.3333333333333335, + "grad_norm": 0.514166475299944, + "learning_rate": 3.373826083456451e-05, + "loss": 0.299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27561166882514954, + "step": 1260, + "valid_targets_mean": 4768.9, + "valid_targets_min": 1369 + }, + { + "epoch": 2.3425925925925926, + "grad_norm": 0.5438892930868031, + "learning_rate": 3.367100352223944e-05, + "loss": 0.2903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2975728511810303, + "step": 1265, + "valid_targets_mean": 4511.4, + "valid_targets_min": 1311 + }, + { + "epoch": 2.351851851851852, + "grad_norm": 0.5714498435745807, + "learning_rate": 3.360345475541839e-05, + "loss": 0.2622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28769245743751526, + "step": 1270, + "valid_targets_mean": 3497.2, + "valid_targets_min": 317 + }, + { + "epoch": 2.361111111111111, + "grad_norm": 0.5680794410701885, + "learning_rate": 3.353561597418524e-05, + "loss": 0.3389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3808847665786743, + "step": 1275, + "valid_targets_mean": 4961.8, + "valid_targets_min": 887 + }, + { + "epoch": 2.3703703703703702, + "grad_norm": 0.5542270863328363, + "learning_rate": 3.346748862480674e-05, + "loss": 0.2691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26409125328063965, + "step": 1280, + "valid_targets_mean": 3505.4, + "valid_targets_min": 597 + }, + { + "epoch": 2.3796296296296298, + "grad_norm": 0.5979782495601111, + "learning_rate": 3.339907415970168e-05, + "loss": 0.2859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22898399829864502, + "step": 1285, + "valid_targets_mean": 2800.9, + "valid_targets_min": 541 + }, + { + "epoch": 2.388888888888889, + "grad_norm": 0.5281486790881847, + "learning_rate": 3.333037403740989e-05, + "loss": 0.2677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23821385204792023, + "step": 1290, + "valid_targets_mean": 3827.2, + "valid_targets_min": 736 + }, + { + "epoch": 2.398148148148148, + "grad_norm": 0.5811780027704674, + "learning_rate": 3.326138972256121e-05, + "loss": 0.2568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27474355697631836, + "step": 1295, + "valid_targets_mean": 3347.3, + "valid_targets_min": 647 + }, + { + "epoch": 2.4074074074074074, + "grad_norm": 0.5062441112163106, + "learning_rate": 3.3192122685844214e-05, + "loss": 0.2594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2150622010231018, + "step": 1300, + "valid_targets_mean": 3436.5, + "valid_targets_min": 596 + }, + { + "epoch": 2.4166666666666665, + "grad_norm": 0.5298315558070248, + "learning_rate": 3.312257440397488e-05, + "loss": 0.2515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27570849657058716, + "step": 1305, + "valid_targets_mean": 4040.8, + "valid_targets_min": 474 + }, + { + "epoch": 2.425925925925926, + "grad_norm": 0.5014670497202479, + "learning_rate": 3.305274635966509e-05, + "loss": 0.2686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2778550386428833, + "step": 1310, + "valid_targets_mean": 4388.4, + "valid_targets_min": 1561 + }, + { + "epoch": 2.435185185185185, + "grad_norm": 0.4941899546980937, + "learning_rate": 3.298264004159104e-05, + "loss": 0.2669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2897128760814667, + "step": 1315, + "valid_targets_mean": 4620.4, + "valid_targets_min": 764 + }, + { + "epoch": 2.4444444444444446, + "grad_norm": 0.5845810037932724, + "learning_rate": 3.2912256944361484e-05, + "loss": 0.2502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2573606073856354, + "step": 1320, + "valid_targets_mean": 3208.6, + "valid_targets_min": 762 + }, + { + "epoch": 2.4537037037037037, + "grad_norm": 0.5433250444755526, + "learning_rate": 3.284159856848589e-05, + "loss": 0.253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28917592763900757, + "step": 1325, + "valid_targets_mean": 4059.9, + "valid_targets_min": 869 + }, + { + "epoch": 2.462962962962963, + "grad_norm": 0.5684951550508175, + "learning_rate": 3.2770666420342426e-05, + "loss": 0.2975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33171623945236206, + "step": 1330, + "valid_targets_mean": 3700.9, + "valid_targets_min": 908 + }, + { + "epoch": 2.4722222222222223, + "grad_norm": 0.46312664746277976, + "learning_rate": 3.269946201214586e-05, + "loss": 0.2816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.275724321603775, + "step": 1335, + "valid_targets_mean": 4910.4, + "valid_targets_min": 566 + }, + { + "epoch": 2.4814814814814814, + "grad_norm": 0.4467189932583639, + "learning_rate": 3.262798686191533e-05, + "loss": 0.291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24208518862724304, + "step": 1340, + "valid_targets_mean": 4536.0, + "valid_targets_min": 2578 + }, + { + "epoch": 2.490740740740741, + "grad_norm": 0.4991364949244441, + "learning_rate": 3.255624249344198e-05, + "loss": 0.2742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2625071406364441, + "step": 1345, + "valid_targets_mean": 3629.2, + "valid_targets_min": 628 + }, + { + "epoch": 2.5, + "grad_norm": 0.5133071061700047, + "learning_rate": 3.248423043625642e-05, + "loss": 0.2688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21916022896766663, + "step": 1350, + "valid_targets_mean": 3302.8, + "valid_targets_min": 894 + }, + { + "epoch": 2.5092592592592595, + "grad_norm": 0.49266442205021715, + "learning_rate": 3.241195222559621e-05, + "loss": 0.2877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27607983350753784, + "step": 1355, + "valid_targets_mean": 4676.6, + "valid_targets_min": 1984 + }, + { + "epoch": 2.5185185185185186, + "grad_norm": 0.6009290708819502, + "learning_rate": 3.2339409402373056e-05, + "loss": 0.3102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.383029580116272, + "step": 1360, + "valid_targets_mean": 3892.7, + "valid_targets_min": 572 + }, + { + "epoch": 2.5277777777777777, + "grad_norm": 0.5505025180156696, + "learning_rate": 3.2266603513139995e-05, + "loss": 0.2757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2777920961380005, + "step": 1365, + "valid_targets_mean": 3982.9, + "valid_targets_min": 2418 + }, + { + "epoch": 2.537037037037037, + "grad_norm": 0.5111646149007892, + "learning_rate": 3.2193536110058414e-05, + "loss": 0.2661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28832823038101196, + "step": 1370, + "valid_targets_mean": 4848.4, + "valid_targets_min": 2127 + }, + { + "epoch": 2.5462962962962963, + "grad_norm": 0.5040882202849286, + "learning_rate": 3.212020875086495e-05, + "loss": 0.288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31644129753112793, + "step": 1375, + "valid_targets_mean": 4976.4, + "valid_targets_min": 1046 + }, + { + "epoch": 2.5555555555555554, + "grad_norm": 0.5313850282519446, + "learning_rate": 3.20466229988383e-05, + "loss": 0.2742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2689250111579895, + "step": 1380, + "valid_targets_mean": 3821.2, + "valid_targets_min": 435 + }, + { + "epoch": 2.564814814814815, + "grad_norm": 0.5402024194211922, + "learning_rate": 3.197278042276587e-05, + "loss": 0.2743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2658718228340149, + "step": 1385, + "valid_targets_mean": 3890.8, + "valid_targets_min": 542 + }, + { + "epoch": 2.574074074074074, + "grad_norm": 0.4747076430784914, + "learning_rate": 3.189868259691036e-05, + "loss": 0.258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23981155455112457, + "step": 1390, + "valid_targets_mean": 4119.1, + "valid_targets_min": 654 + }, + { + "epoch": 2.5833333333333335, + "grad_norm": 0.5999323348223499, + "learning_rate": 3.182433110097618e-05, + "loss": 0.2896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2491762787103653, + "step": 1395, + "valid_targets_mean": 3585.1, + "valid_targets_min": 1770 + }, + { + "epoch": 2.5925925925925926, + "grad_norm": 0.46430831925350374, + "learning_rate": 3.174972752007577e-05, + "loss": 0.2702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25592419505119324, + "step": 1400, + "valid_targets_mean": 4477.0, + "valid_targets_min": 1644 + }, + { + "epoch": 2.601851851851852, + "grad_norm": 0.4807452208290434, + "learning_rate": 3.1674873444695804e-05, + "loss": 0.25, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18888458609580994, + "step": 1405, + "valid_targets_mean": 3690.0, + "valid_targets_min": 381 + }, + { + "epoch": 2.611111111111111, + "grad_norm": 0.5482835702850894, + "learning_rate": 3.15997704706633e-05, + "loss": 0.2789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2206830382347107, + "step": 1410, + "valid_targets_mean": 3195.4, + "valid_targets_min": 1566 + }, + { + "epoch": 2.6203703703703702, + "grad_norm": 0.4939467467540788, + "learning_rate": 3.152442019911161e-05, + "loss": 0.2773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2876463532447815, + "step": 1415, + "valid_targets_mean": 4438.0, + "valid_targets_min": 2557 + }, + { + "epoch": 2.6296296296296298, + "grad_norm": 0.563321666654457, + "learning_rate": 3.144882423644623e-05, + "loss": 0.2805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2687886357307434, + "step": 1420, + "valid_targets_mean": 3560.2, + "valid_targets_min": 506 + }, + { + "epoch": 2.638888888888889, + "grad_norm": 0.4771594676809964, + "learning_rate": 3.1372984194310614e-05, + "loss": 0.283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2669658064842224, + "step": 1425, + "valid_targets_mean": 4496.8, + "valid_targets_min": 1034 + }, + { + "epoch": 2.648148148148148, + "grad_norm": 0.5701553331661079, + "learning_rate": 3.1296901689551766e-05, + "loss": 0.2669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2383461445569992, + "step": 1430, + "valid_targets_mean": 3213.2, + "valid_targets_min": 479 + }, + { + "epoch": 2.6574074074074074, + "grad_norm": 0.4996052044828777, + "learning_rate": 3.122057834418582e-05, + "loss": 0.2585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22803762555122375, + "step": 1435, + "valid_targets_mean": 4339.8, + "valid_targets_min": 1876 + }, + { + "epoch": 2.6666666666666665, + "grad_norm": 0.4762272371218332, + "learning_rate": 3.1144015785363405e-05, + "loss": 0.2534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25022315979003906, + "step": 1440, + "valid_targets_mean": 4088.6, + "valid_targets_min": 1840 + }, + { + "epoch": 2.675925925925926, + "grad_norm": 0.5320018695023948, + "learning_rate": 3.1067215645335e-05, + "loss": 0.2788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2924705445766449, + "step": 1445, + "valid_targets_mean": 3703.2, + "valid_targets_min": 1026 + }, + { + "epoch": 2.685185185185185, + "grad_norm": 0.48946462823360576, + "learning_rate": 3.0990179561416124e-05, + "loss": 0.2475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24994626641273499, + "step": 1450, + "valid_targets_mean": 4758.8, + "valid_targets_min": 2745 + }, + { + "epoch": 2.6944444444444446, + "grad_norm": 0.5536914814420201, + "learning_rate": 3.0912909175952404e-05, + "loss": 0.2802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28184986114501953, + "step": 1455, + "valid_targets_mean": 4326.1, + "valid_targets_min": 2025 + }, + { + "epoch": 2.7037037037037037, + "grad_norm": 0.5678388519721984, + "learning_rate": 3.08354061362846e-05, + "loss": 0.2707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2809412181377411, + "step": 1460, + "valid_targets_mean": 3676.1, + "valid_targets_min": 2257 + }, + { + "epoch": 2.712962962962963, + "grad_norm": 0.5566129052217905, + "learning_rate": 3.075767209471345e-05, + "loss": 0.2774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2666032314300537, + "step": 1465, + "valid_targets_mean": 3426.1, + "valid_targets_min": 1705 + }, + { + "epoch": 2.7222222222222223, + "grad_norm": 0.6101327463343764, + "learning_rate": 3.06797087084645e-05, + "loss": 0.3039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28413182497024536, + "step": 1470, + "valid_targets_mean": 3345.7, + "valid_targets_min": 231 + }, + { + "epoch": 2.7314814814814814, + "grad_norm": 0.4725939229507694, + "learning_rate": 3.060151763965267e-05, + "loss": 0.2732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21915774047374725, + "step": 1475, + "valid_targets_mean": 3618.2, + "valid_targets_min": 755 + }, + { + "epoch": 2.7407407407407405, + "grad_norm": 0.4395930184494261, + "learning_rate": 3.052310055524696e-05, + "loss": 0.2566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2695322036743164, + "step": 1480, + "valid_targets_mean": 4727.2, + "valid_targets_min": 1598 + }, + { + "epoch": 2.75, + "grad_norm": 0.4706905685407168, + "learning_rate": 3.044445912703477e-05, + "loss": 0.2838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32568082213401794, + "step": 1485, + "valid_targets_mean": 5001.6, + "valid_targets_min": 665 + }, + { + "epoch": 2.7592592592592595, + "grad_norm": 0.4969657932430498, + "learning_rate": 3.036559503158637e-05, + "loss": 0.2783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23848196864128113, + "step": 1490, + "valid_targets_mean": 3659.8, + "valid_targets_min": 1529 + }, + { + "epoch": 2.7685185185185186, + "grad_norm": 0.6292698817704557, + "learning_rate": 3.0286509950219077e-05, + "loss": 0.2523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26195579767227173, + "step": 1495, + "valid_targets_mean": 3502.5, + "valid_targets_min": 923 + }, + { + "epoch": 2.7777777777777777, + "grad_norm": 0.5618365463665778, + "learning_rate": 3.020720556896147e-05, + "loss": 0.293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3372756242752075, + "step": 1500, + "valid_targets_mean": 4311.9, + "valid_targets_min": 812 + }, + { + "epoch": 2.787037037037037, + "grad_norm": 1.0844578159877276, + "learning_rate": 3.0127683578517418e-05, + "loss": 0.2599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2966684401035309, + "step": 1505, + "valid_targets_mean": 6157.9, + "valid_targets_min": 871 + }, + { + "epoch": 2.7962962962962963, + "grad_norm": 0.5031399470373651, + "learning_rate": 3.004794567423002e-05, + "loss": 0.2444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2614865303039551, + "step": 1510, + "valid_targets_mean": 4778.2, + "valid_targets_min": 584 + }, + { + "epoch": 2.8055555555555554, + "grad_norm": 0.5041806681514034, + "learning_rate": 2.9967993556045504e-05, + "loss": 0.2303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21226972341537476, + "step": 1515, + "valid_targets_mean": 5573.9, + "valid_targets_min": 845 + }, + { + "epoch": 2.814814814814815, + "grad_norm": 0.4997735272169362, + "learning_rate": 2.988782892847694e-05, + "loss": 0.2442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24511948227882385, + "step": 1520, + "valid_targets_mean": 4693.2, + "valid_targets_min": 886 + }, + { + "epoch": 2.824074074074074, + "grad_norm": 0.5995647736513411, + "learning_rate": 2.9807453500567937e-05, + "loss": 0.2597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3068762421607971, + "step": 1525, + "valid_targets_mean": 3450.6, + "valid_targets_min": 930 + }, + { + "epoch": 2.8333333333333335, + "grad_norm": 0.6054440225826818, + "learning_rate": 2.9726868985856186e-05, + "loss": 0.2232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21026404201984406, + "step": 1530, + "valid_targets_mean": 3599.7, + "valid_targets_min": 1569 + }, + { + "epoch": 2.8425925925925926, + "grad_norm": 0.5741889496658287, + "learning_rate": 2.9646077102336933e-05, + "loss": 0.2634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2920532524585724, + "step": 1535, + "valid_targets_mean": 3774.1, + "valid_targets_min": 317 + }, + { + "epoch": 2.851851851851852, + "grad_norm": 0.6588428000456348, + "learning_rate": 2.956507957242637e-05, + "loss": 0.2457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2671510875225067, + "step": 1540, + "valid_targets_mean": 3503.0, + "valid_targets_min": 526 + }, + { + "epoch": 2.861111111111111, + "grad_norm": 0.4491076760542966, + "learning_rate": 2.9483878122924874e-05, + "loss": 0.2372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23452128469944, + "step": 1545, + "valid_targets_mean": 5171.2, + "valid_targets_min": 1926 + }, + { + "epoch": 2.8703703703703702, + "grad_norm": 0.5018537144302465, + "learning_rate": 2.940247448498025e-05, + "loss": 0.2579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25421342253685, + "step": 1550, + "valid_targets_mean": 4786.2, + "valid_targets_min": 629 + }, + { + "epoch": 2.8796296296296298, + "grad_norm": 0.5716877920658499, + "learning_rate": 2.9320870394050783e-05, + "loss": 0.2639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2664939761161804, + "step": 1555, + "valid_targets_mean": 3561.0, + "valid_targets_min": 949 + }, + { + "epoch": 2.888888888888889, + "grad_norm": 0.481235284252179, + "learning_rate": 2.9239067589868228e-05, + "loss": 0.2721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2406879961490631, + "step": 1560, + "valid_targets_mean": 4930.3, + "valid_targets_min": 491 + }, + { + "epoch": 2.898148148148148, + "grad_norm": 0.5819300944807342, + "learning_rate": 2.9157067816400765e-05, + "loss": 0.2586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27452024817466736, + "step": 1565, + "valid_targets_mean": 3573.7, + "valid_targets_min": 2118 + }, + { + "epoch": 2.9074074074074074, + "grad_norm": 0.5048566931560609, + "learning_rate": 2.90748728218158e-05, + "loss": 0.2327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25580722093582153, + "step": 1570, + "valid_targets_mean": 4897.8, + "valid_targets_min": 970 + }, + { + "epoch": 2.9166666666666665, + "grad_norm": 0.49150215780244894, + "learning_rate": 2.8992484358442673e-05, + "loss": 0.2562, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2693978548049927, + "step": 1575, + "valid_targets_mean": 4487.1, + "valid_targets_min": 1709 + }, + { + "epoch": 2.925925925925926, + "grad_norm": 0.5708812131347851, + "learning_rate": 2.8909904182735337e-05, + "loss": 0.2655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2671920955181122, + "step": 1580, + "valid_targets_mean": 3580.7, + "valid_targets_min": 894 + }, + { + "epoch": 2.935185185185185, + "grad_norm": 0.5601556537304803, + "learning_rate": 2.8827134055234883e-05, + "loss": 0.2579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26261967420578003, + "step": 1585, + "valid_targets_mean": 3402.6, + "valid_targets_min": 698 + }, + { + "epoch": 2.9444444444444446, + "grad_norm": 0.5319230530321215, + "learning_rate": 2.874417574053202e-05, + "loss": 0.2536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3018246591091156, + "step": 1590, + "valid_targets_mean": 4457.2, + "valid_targets_min": 194 + }, + { + "epoch": 2.9537037037037037, + "grad_norm": 0.6399002771658464, + "learning_rate": 2.8661031007229443e-05, + "loss": 0.2392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27771127223968506, + "step": 1595, + "valid_targets_mean": 3044.6, + "valid_targets_min": 736 + }, + { + "epoch": 2.962962962962963, + "grad_norm": 0.5987506953116827, + "learning_rate": 2.857770162790416e-05, + "loss": 0.2548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2965382933616638, + "step": 1600, + "valid_targets_mean": 3597.6, + "valid_targets_min": 2209 + }, + { + "epoch": 2.9722222222222223, + "grad_norm": 0.6380121418721786, + "learning_rate": 2.8494189379069662e-05, + "loss": 0.2507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2618336081504822, + "step": 1605, + "valid_targets_mean": 2886.4, + "valid_targets_min": 275 + }, + { + "epoch": 2.9814814814814814, + "grad_norm": 0.5586710883725614, + "learning_rate": 2.8410496041138067e-05, + "loss": 0.2811, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2990414500236511, + "step": 1610, + "valid_targets_mean": 4474.1, + "valid_targets_min": 1026 + }, + { + "epoch": 2.9907407407407405, + "grad_norm": 0.46644132738739846, + "learning_rate": 2.8326623398382174e-05, + "loss": 0.2564, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24458184838294983, + "step": 1615, + "valid_targets_mean": 4791.1, + "valid_targets_min": 1838 + }, + { + "epoch": 3.0, + "grad_norm": 0.4761359461180495, + "learning_rate": 2.8242573238897395e-05, + "loss": 0.2636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24625471234321594, + "step": 1620, + "valid_targets_mean": 4855.9, + "valid_targets_min": 778 + }, + { + "epoch": 3.009259259259259, + "grad_norm": 0.5607956275569781, + "learning_rate": 2.815834735456367e-05, + "loss": 0.2667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2913566827774048, + "step": 1625, + "valid_targets_mean": 3792.2, + "valid_targets_min": 1421 + }, + { + "epoch": 3.0185185185185186, + "grad_norm": 0.5200885636493907, + "learning_rate": 2.8073947541007246e-05, + "loss": 0.2864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2609402537345886, + "step": 1630, + "valid_targets_mean": 4228.2, + "valid_targets_min": 1388 + }, + { + "epoch": 3.0277777777777777, + "grad_norm": 0.5464229397916962, + "learning_rate": 2.7989375597562386e-05, + "loss": 0.253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.287320077419281, + "step": 1635, + "valid_targets_mean": 3905.6, + "valid_targets_min": 1626 + }, + { + "epoch": 3.037037037037037, + "grad_norm": 0.48953693297086637, + "learning_rate": 2.7904633327233016e-05, + "loss": 0.2464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23558908700942993, + "step": 1640, + "valid_targets_mean": 4453.9, + "valid_targets_min": 1973 + }, + { + "epoch": 3.0462962962962963, + "grad_norm": 0.5636981651244154, + "learning_rate": 2.781972253665431e-05, + "loss": 0.2445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22617077827453613, + "step": 1645, + "valid_targets_mean": 4146.0, + "valid_targets_min": 1473 + }, + { + "epoch": 3.0555555555555554, + "grad_norm": 0.5346016865896094, + "learning_rate": 2.773464503605414e-05, + "loss": 0.2623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20911234617233276, + "step": 1650, + "valid_targets_mean": 3475.5, + "valid_targets_min": 1771 + }, + { + "epoch": 3.064814814814815, + "grad_norm": 0.537999050036353, + "learning_rate": 2.764940263921451e-05, + "loss": 0.2687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3136928081512451, + "step": 1655, + "valid_targets_mean": 4361.2, + "valid_targets_min": 724 + }, + { + "epoch": 3.074074074074074, + "grad_norm": 0.5332117915430361, + "learning_rate": 2.7563997163432853e-05, + "loss": 0.2621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27018648386001587, + "step": 1660, + "valid_targets_mean": 4647.1, + "valid_targets_min": 1878 + }, + { + "epoch": 3.0833333333333335, + "grad_norm": 0.5252502798205276, + "learning_rate": 2.7478430429483336e-05, + "loss": 0.2662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2500057518482208, + "step": 1665, + "valid_targets_mean": 4244.9, + "valid_targets_min": 2221 + }, + { + "epoch": 3.0925925925925926, + "grad_norm": 0.9080479015306887, + "learning_rate": 2.7392704261578e-05, + "loss": 0.2565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24066869914531708, + "step": 1670, + "valid_targets_mean": 4786.5, + "valid_targets_min": 1795 + }, + { + "epoch": 3.1018518518518516, + "grad_norm": 0.5710663215003279, + "learning_rate": 2.7306820487327906e-05, + "loss": 0.2847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36352789402008057, + "step": 1675, + "valid_targets_mean": 4979.2, + "valid_targets_min": 561 + }, + { + "epoch": 3.111111111111111, + "grad_norm": 0.5397905809288394, + "learning_rate": 2.7220780937704118e-05, + "loss": 0.2614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3355807065963745, + "step": 1680, + "valid_targets_mean": 4687.1, + "valid_targets_min": 582 + }, + { + "epoch": 3.1203703703703702, + "grad_norm": 0.5719381517621509, + "learning_rate": 2.713458744699873e-05, + "loss": 0.2488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27028560638427734, + "step": 1685, + "valid_targets_mean": 3927.9, + "valid_targets_min": 735 + }, + { + "epoch": 3.1296296296296298, + "grad_norm": 0.5488071017786058, + "learning_rate": 2.704824185278573e-05, + "loss": 0.2747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2780161201953888, + "step": 1690, + "valid_targets_mean": 4392.5, + "valid_targets_min": 275 + }, + { + "epoch": 3.138888888888889, + "grad_norm": 0.560933973633571, + "learning_rate": 2.6961745995881813e-05, + "loss": 0.2738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2935350835323334, + "step": 1695, + "valid_targets_mean": 3781.2, + "valid_targets_min": 812 + }, + { + "epoch": 3.148148148148148, + "grad_norm": 0.5153173604123659, + "learning_rate": 2.6875101720307168e-05, + "loss": 0.2458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22335903346538544, + "step": 1700, + "valid_targets_mean": 3719.2, + "valid_targets_min": 322 + }, + { + "epoch": 3.1574074074074074, + "grad_norm": 0.529200809615876, + "learning_rate": 2.6788310873246133e-05, + "loss": 0.2554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.321768194437027, + "step": 1705, + "valid_targets_mean": 4739.2, + "valid_targets_min": 1880 + }, + { + "epoch": 3.1666666666666665, + "grad_norm": 0.5929542752441085, + "learning_rate": 2.670137530500783e-05, + "loss": 0.2336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23372915387153625, + "step": 1710, + "valid_targets_mean": 3828.8, + "valid_targets_min": 838 + }, + { + "epoch": 3.175925925925926, + "grad_norm": 0.5904824673475364, + "learning_rate": 2.661429686898673e-05, + "loss": 0.2494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25988543033599854, + "step": 1715, + "valid_targets_mean": 3370.4, + "valid_targets_min": 906 + }, + { + "epoch": 3.185185185185185, + "grad_norm": 0.5654133383351743, + "learning_rate": 2.6527077421623117e-05, + "loss": 0.2585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24876439571380615, + "step": 1720, + "valid_targets_mean": 3427.9, + "valid_targets_min": 545 + }, + { + "epoch": 3.1944444444444446, + "grad_norm": 0.6120405362758927, + "learning_rate": 2.6439718822363515e-05, + "loss": 0.25, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23591837286949158, + "step": 1725, + "valid_targets_mean": 3958.1, + "valid_targets_min": 1876 + }, + { + "epoch": 3.2037037037037037, + "grad_norm": 0.5650864325109225, + "learning_rate": 2.6352222933621065e-05, + "loss": 0.2556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2792157828807831, + "step": 1730, + "valid_targets_mean": 4003.8, + "valid_targets_min": 845 + }, + { + "epoch": 3.212962962962963, + "grad_norm": 0.5560665906528047, + "learning_rate": 2.62645916207358e-05, + "loss": 0.2376, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22258630394935608, + "step": 1735, + "valid_targets_mean": 3628.8, + "valid_targets_min": 2004 + }, + { + "epoch": 3.2222222222222223, + "grad_norm": 0.47392152791291015, + "learning_rate": 2.6176826751934882e-05, + "loss": 0.2392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23260678350925446, + "step": 1740, + "valid_targets_mean": 4254.6, + "valid_targets_min": 2036 + }, + { + "epoch": 3.2314814814814814, + "grad_norm": 0.547370940890995, + "learning_rate": 2.6088930198292773e-05, + "loss": 0.2568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3267350196838379, + "step": 1745, + "valid_targets_mean": 5440.8, + "valid_targets_min": 587 + }, + { + "epoch": 3.240740740740741, + "grad_norm": 0.6207094582404148, + "learning_rate": 2.600090383369135e-05, + "loss": 0.2704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2993205189704895, + "step": 1750, + "valid_targets_mean": 3938.6, + "valid_targets_min": 471 + }, + { + "epoch": 3.25, + "grad_norm": 0.5392432660293104, + "learning_rate": 2.5912749534779958e-05, + "loss": 0.2636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.261831134557724, + "step": 1755, + "valid_targets_mean": 4243.8, + "valid_targets_min": 1809 + }, + { + "epoch": 3.259259259259259, + "grad_norm": 0.5976629323136308, + "learning_rate": 2.5824469180935377e-05, + "loss": 0.2595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25948113203048706, + "step": 1760, + "valid_targets_mean": 3828.1, + "valid_targets_min": 2051 + }, + { + "epoch": 3.2685185185185186, + "grad_norm": 0.5616357127195194, + "learning_rate": 2.5736064654221808e-05, + "loss": 0.2609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2716560959815979, + "step": 1765, + "valid_targets_mean": 4082.7, + "valid_targets_min": 661 + }, + { + "epoch": 3.2777777777777777, + "grad_norm": 0.5515421060133198, + "learning_rate": 2.564753783935068e-05, + "loss": 0.2647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2995178699493408, + "step": 1770, + "valid_targets_mean": 4297.5, + "valid_targets_min": 773 + }, + { + "epoch": 3.287037037037037, + "grad_norm": 0.5442897232307273, + "learning_rate": 2.5558890623640513e-05, + "loss": 0.2822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33121222257614136, + "step": 1775, + "valid_targets_mean": 4387.3, + "valid_targets_min": 638 + }, + { + "epoch": 3.2962962962962963, + "grad_norm": 0.5440272723905116, + "learning_rate": 2.5470124896976687e-05, + "loss": 0.2722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26206910610198975, + "step": 1780, + "valid_targets_mean": 3992.6, + "valid_targets_min": 644 + }, + { + "epoch": 3.3055555555555554, + "grad_norm": 0.5245177761132279, + "learning_rate": 2.538124255177113e-05, + "loss": 0.261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25537651777267456, + "step": 1785, + "valid_targets_mean": 4090.8, + "valid_targets_min": 421 + }, + { + "epoch": 3.314814814814815, + "grad_norm": 0.4978922342919577, + "learning_rate": 2.5292245482921982e-05, + "loss": 0.2662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24825258553028107, + "step": 1790, + "valid_targets_mean": 4364.4, + "valid_targets_min": 1736 + }, + { + "epoch": 3.324074074074074, + "grad_norm": 0.5413131194299406, + "learning_rate": 2.5203135587773196e-05, + "loss": 0.2402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28795957565307617, + "step": 1795, + "valid_targets_mean": 4514.4, + "valid_targets_min": 1916 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.48381333303689816, + "learning_rate": 2.5113914766074075e-05, + "loss": 0.247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29423463344573975, + "step": 1800, + "valid_targets_mean": 5073.3, + "valid_targets_min": 887 + }, + { + "epoch": 3.3425925925925926, + "grad_norm": 0.5540751734364149, + "learning_rate": 2.5024584919938805e-05, + "loss": 0.2744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30663758516311646, + "step": 1805, + "valid_targets_mean": 3852.0, + "valid_targets_min": 783 + }, + { + "epoch": 3.351851851851852, + "grad_norm": 0.4875767498216755, + "learning_rate": 2.493514795380587e-05, + "loss": 0.2352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23132456839084625, + "step": 1810, + "valid_targets_mean": 4474.8, + "valid_targets_min": 2422 + }, + { + "epoch": 3.361111111111111, + "grad_norm": 0.562685263202997, + "learning_rate": 2.4845605774397482e-05, + "loss": 0.2433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2605576515197754, + "step": 1815, + "valid_targets_mean": 3649.8, + "valid_targets_min": 1813 + }, + { + "epoch": 3.3703703703703702, + "grad_norm": 0.5971247612228009, + "learning_rate": 2.4755960290678884e-05, + "loss": 0.2683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2765813171863556, + "step": 1820, + "valid_targets_mean": 3091.1, + "valid_targets_min": 592 + }, + { + "epoch": 3.3796296296296298, + "grad_norm": 0.5196767315771833, + "learning_rate": 2.4666213413817696e-05, + "loss": 0.2425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22254908084869385, + "step": 1825, + "valid_targets_mean": 3801.4, + "valid_targets_min": 747 + }, + { + "epoch": 3.388888888888889, + "grad_norm": 0.5121874679965963, + "learning_rate": 2.4576367057143167e-05, + "loss": 0.2679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2372555434703827, + "step": 1830, + "valid_targets_mean": 4133.1, + "valid_targets_min": 1882 + }, + { + "epoch": 3.398148148148148, + "grad_norm": 0.5020669363013955, + "learning_rate": 2.4486423136105356e-05, + "loss": 0.2584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25579991936683655, + "step": 1835, + "valid_targets_mean": 4919.1, + "valid_targets_min": 1901 + }, + { + "epoch": 3.4074074074074074, + "grad_norm": 0.5441744337458053, + "learning_rate": 2.4396383568234322e-05, + "loss": 0.2472, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2616913914680481, + "step": 1840, + "valid_targets_mean": 4022.9, + "valid_targets_min": 721 + }, + { + "epoch": 3.4166666666666665, + "grad_norm": 0.5285349190964163, + "learning_rate": 2.4306250273099236e-05, + "loss": 0.239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27835655212402344, + "step": 1845, + "valid_targets_mean": 3834.8, + "valid_targets_min": 1887 + }, + { + "epoch": 3.425925925925926, + "grad_norm": 0.46370125956202335, + "learning_rate": 2.421602517226745e-05, + "loss": 0.2557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21514523029327393, + "step": 1850, + "valid_targets_mean": 4822.7, + "valid_targets_min": 2356 + }, + { + "epoch": 3.435185185185185, + "grad_norm": 0.5045309429602557, + "learning_rate": 2.4125710189263555e-05, + "loss": 0.2597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24583828449249268, + "step": 1855, + "valid_targets_mean": 4282.2, + "valid_targets_min": 1668 + }, + { + "epoch": 3.4444444444444446, + "grad_norm": 0.5794459679981911, + "learning_rate": 2.4035307249528326e-05, + "loss": 0.2631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2770538926124573, + "step": 1860, + "valid_targets_mean": 4194.9, + "valid_targets_min": 1906 + }, + { + "epoch": 3.4537037037037037, + "grad_norm": 0.5757849492309214, + "learning_rate": 2.3944818280377732e-05, + "loss": 0.2756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2549267113208771, + "step": 1865, + "valid_targets_mean": 3418.6, + "valid_targets_min": 1267 + }, + { + "epoch": 3.462962962962963, + "grad_norm": 0.5787001079526624, + "learning_rate": 2.3854245210961798e-05, + "loss": 0.2648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2735787332057953, + "step": 1870, + "valid_targets_mean": 3282.9, + "valid_targets_min": 327 + }, + { + "epoch": 3.4722222222222223, + "grad_norm": 0.6603451083242975, + "learning_rate": 2.376358997222351e-05, + "loss": 0.2766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30318760871887207, + "step": 1875, + "valid_targets_mean": 2959.7, + "valid_targets_min": 580 + }, + { + "epoch": 3.4814814814814814, + "grad_norm": 0.5439007943417832, + "learning_rate": 2.367285449685763e-05, + "loss": 0.26, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.319497674703598, + "step": 1880, + "valid_targets_mean": 4390.6, + "valid_targets_min": 1345 + }, + { + "epoch": 3.490740740740741, + "grad_norm": 0.47502055786608866, + "learning_rate": 2.3582040719269504e-05, + "loss": 0.2546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2526184916496277, + "step": 1885, + "valid_targets_mean": 5038.2, + "valid_targets_min": 1510 + }, + { + "epoch": 3.5, + "grad_norm": 0.5452928944856862, + "learning_rate": 2.3491150575533808e-05, + "loss": 0.252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23368355631828308, + "step": 1890, + "valid_targets_mean": 3530.3, + "valid_targets_min": 605 + }, + { + "epoch": 3.5092592592592595, + "grad_norm": 0.6073760697025231, + "learning_rate": 2.340018600335328e-05, + "loss": 0.2521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2643791437149048, + "step": 1895, + "valid_targets_mean": 4982.6, + "valid_targets_min": 2220 + }, + { + "epoch": 3.5185185185185186, + "grad_norm": 0.5596007727722335, + "learning_rate": 2.3309148942017424e-05, + "loss": 0.2583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.276612251996994, + "step": 1900, + "valid_targets_mean": 3690.1, + "valid_targets_min": 572 + }, + { + "epoch": 3.5277777777777777, + "grad_norm": 0.521852302471038, + "learning_rate": 2.321804133236115e-05, + "loss": 0.2387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25015997886657715, + "step": 1905, + "valid_targets_mean": 3807.7, + "valid_targets_min": 1461 + }, + { + "epoch": 3.537037037037037, + "grad_norm": 0.5202676505674712, + "learning_rate": 2.312686511672338e-05, + "loss": 0.2731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2852502465248108, + "step": 1910, + "valid_targets_mean": 4486.8, + "valid_targets_min": 1530 + }, + { + "epoch": 3.5462962962962963, + "grad_norm": 0.49067308052473124, + "learning_rate": 2.3035622238905694e-05, + "loss": 0.2382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2287035584449768, + "step": 1915, + "valid_targets_mean": 4143.6, + "valid_targets_min": 1655 + }, + { + "epoch": 3.5555555555555554, + "grad_norm": 0.5274253125213653, + "learning_rate": 2.2944314644130814e-05, + "loss": 0.2529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27749884128570557, + "step": 1920, + "valid_targets_mean": 4553.0, + "valid_targets_min": 781 + }, + { + "epoch": 3.564814814814815, + "grad_norm": 0.5688144249989912, + "learning_rate": 2.2852944279001207e-05, + "loss": 0.2674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3269811272621155, + "step": 1925, + "valid_targets_mean": 4159.2, + "valid_targets_min": 888 + }, + { + "epoch": 3.574074074074074, + "grad_norm": 0.5041147570894117, + "learning_rate": 2.2761513091457537e-05, + "loss": 0.2543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2584291100502014, + "step": 1930, + "valid_targets_mean": 4408.8, + "valid_targets_min": 1757 + }, + { + "epoch": 3.5833333333333335, + "grad_norm": 0.5046219032639738, + "learning_rate": 2.2670023030737153e-05, + "loss": 0.2767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2301914095878601, + "step": 1935, + "valid_targets_mean": 4570.9, + "valid_targets_min": 1585 + }, + { + "epoch": 3.5925925925925926, + "grad_norm": 0.5097422302633462, + "learning_rate": 2.2578476047332535e-05, + "loss": 0.2441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23207487165927887, + "step": 1940, + "valid_targets_mean": 3510.6, + "valid_targets_min": 1621 + }, + { + "epoch": 3.601851851851852, + "grad_norm": 0.5502757038033133, + "learning_rate": 2.2486874092949708e-05, + "loss": 0.2565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24923279881477356, + "step": 1945, + "valid_targets_mean": 3629.6, + "valid_targets_min": 1696 + }, + { + "epoch": 3.611111111111111, + "grad_norm": 0.5493878283106496, + "learning_rate": 2.2395219120466622e-05, + "loss": 0.2409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24204868078231812, + "step": 1950, + "valid_targets_mean": 3706.9, + "valid_targets_min": 698 + }, + { + "epoch": 3.6203703703703702, + "grad_norm": 0.5626462939570609, + "learning_rate": 2.2303513083891542e-05, + "loss": 0.2474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2548343539237976, + "step": 1955, + "valid_targets_mean": 3551.1, + "valid_targets_min": 594 + }, + { + "epoch": 3.6296296296296298, + "grad_norm": 0.47203755513693735, + "learning_rate": 2.2211757938321373e-05, + "loss": 0.2377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20937591791152954, + "step": 1960, + "valid_targets_mean": 4418.7, + "valid_targets_min": 828 + }, + { + "epoch": 3.638888888888889, + "grad_norm": 0.5005343443672163, + "learning_rate": 2.2119955639899983e-05, + "loss": 0.2933, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21190600097179413, + "step": 1965, + "valid_targets_mean": 4131.6, + "valid_targets_min": 526 + }, + { + "epoch": 3.648148148148148, + "grad_norm": 0.5062414681733032, + "learning_rate": 2.20281081457765e-05, + "loss": 0.2591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24952177703380585, + "step": 1970, + "valid_targets_mean": 4448.9, + "valid_targets_min": 1588 + }, + { + "epoch": 3.6574074074074074, + "grad_norm": 0.5166078661849807, + "learning_rate": 2.1936217414063584e-05, + "loss": 0.2787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29824504256248474, + "step": 1975, + "valid_targets_mean": 4616.9, + "valid_targets_min": 1758 + }, + { + "epoch": 3.6666666666666665, + "grad_norm": 0.5036298018276866, + "learning_rate": 2.184428540379569e-05, + "loss": 0.2517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21241354942321777, + "step": 1980, + "valid_targets_mean": 3836.9, + "valid_targets_min": 567 + }, + { + "epoch": 3.675925925925926, + "grad_norm": 0.5263922576220329, + "learning_rate": 2.1752314074887287e-05, + "loss": 0.2602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21682290732860565, + "step": 1985, + "valid_targets_mean": 3577.4, + "valid_targets_min": 892 + }, + { + "epoch": 3.685185185185185, + "grad_norm": 0.5504297129810796, + "learning_rate": 2.1660305388091106e-05, + "loss": 0.2259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22139625251293182, + "step": 1990, + "valid_targets_mean": 3110.8, + "valid_targets_min": 479 + }, + { + "epoch": 3.6944444444444446, + "grad_norm": 0.49325575866624627, + "learning_rate": 2.1568261304956298e-05, + "loss": 0.2507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23789441585540771, + "step": 1995, + "valid_targets_mean": 4232.6, + "valid_targets_min": 680 + }, + { + "epoch": 3.7037037037037037, + "grad_norm": 0.5807219455547323, + "learning_rate": 2.1476183787786638e-05, + "loss": 0.2639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26028937101364136, + "step": 2000, + "valid_targets_mean": 3360.9, + "valid_targets_min": 528 + }, + { + "epoch": 3.712962962962963, + "grad_norm": 0.5255254634433139, + "learning_rate": 2.138407479959869e-05, + "loss": 0.2841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24893628060817719, + "step": 2005, + "valid_targets_mean": 3761.6, + "valid_targets_min": 2356 + }, + { + "epoch": 3.7222222222222223, + "grad_norm": 0.4773408030946498, + "learning_rate": 2.129193630407996e-05, + "loss": 0.2352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22808077931404114, + "step": 2010, + "valid_targets_mean": 4793.6, + "valid_targets_min": 2178 + }, + { + "epoch": 3.7314814814814814, + "grad_norm": 0.4500006448054135, + "learning_rate": 2.119977026554701e-05, + "loss": 0.2447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2292492389678955, + "step": 2015, + "valid_targets_mean": 5103.6, + "valid_targets_min": 647 + }, + { + "epoch": 3.7407407407407405, + "grad_norm": 0.6473593402015339, + "learning_rate": 2.1107578648903614e-05, + "loss": 0.2476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2941989600658417, + "step": 2020, + "valid_targets_mean": 3061.2, + "valid_targets_min": 722 + }, + { + "epoch": 3.75, + "grad_norm": 0.5203301574671931, + "learning_rate": 2.1015363419598835e-05, + "loss": 0.2355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2489463984966278, + "step": 2025, + "valid_targets_mean": 4192.7, + "valid_targets_min": 404 + }, + { + "epoch": 3.7592592592592595, + "grad_norm": 0.5362575203104003, + "learning_rate": 2.0923126543585156e-05, + "loss": 0.2558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.270350843667984, + "step": 2030, + "valid_targets_mean": 3732.0, + "valid_targets_min": 420 + }, + { + "epoch": 3.7685185185185186, + "grad_norm": 0.499224592037341, + "learning_rate": 2.0830869987276537e-05, + "loss": 0.2512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3091796040534973, + "step": 2035, + "valid_targets_mean": 5031.6, + "valid_targets_min": 1046 + }, + { + "epoch": 3.7777777777777777, + "grad_norm": 0.5250725966197842, + "learning_rate": 2.0738595717506496e-05, + "loss": 0.2561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22418762743473053, + "step": 2040, + "valid_targets_mean": 3963.0, + "valid_targets_min": 1275 + }, + { + "epoch": 3.787037037037037, + "grad_norm": 0.5742892201930184, + "learning_rate": 2.0646305701486215e-05, + "loss": 0.2477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24486325681209564, + "step": 2045, + "valid_targets_mean": 2970.0, + "valid_targets_min": 720 + }, + { + "epoch": 3.7962962962962963, + "grad_norm": 0.5131608808905314, + "learning_rate": 2.055400190676255e-05, + "loss": 0.2405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24303856492042542, + "step": 2050, + "valid_targets_mean": 4145.8, + "valid_targets_min": 2285 + }, + { + "epoch": 3.8055555555555554, + "grad_norm": 0.5052923454962346, + "learning_rate": 2.046168630117612e-05, + "loss": 0.2662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3247288167476654, + "step": 2055, + "valid_targets_mean": 4734.6, + "valid_targets_min": 2031 + }, + { + "epoch": 3.814814814814815, + "grad_norm": 0.610872868205616, + "learning_rate": 2.0369360852819327e-05, + "loss": 0.2513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2507988214492798, + "step": 2060, + "valid_targets_mean": 3605.2, + "valid_targets_min": 523 + }, + { + "epoch": 3.824074074074074, + "grad_norm": 0.5723992923066944, + "learning_rate": 2.027702752999444e-05, + "loss": 0.2675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26201534271240234, + "step": 2065, + "valid_targets_mean": 4353.8, + "valid_targets_min": 2454 + }, + { + "epoch": 3.8333333333333335, + "grad_norm": 0.5270133124002951, + "learning_rate": 2.0184688301171567e-05, + "loss": 0.2682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2589871883392334, + "step": 2070, + "valid_targets_mean": 3676.9, + "valid_targets_min": 573 + }, + { + "epoch": 3.8425925925925926, + "grad_norm": 0.5139405722726906, + "learning_rate": 2.009234513494676e-05, + "loss": 0.284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24153248965740204, + "step": 2075, + "valid_targets_mean": 4124.9, + "valid_targets_min": 663 + }, + { + "epoch": 3.851851851851852, + "grad_norm": 0.48652457260517906, + "learning_rate": 2e-05, + "loss": 0.2613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24923667311668396, + "step": 2080, + "valid_targets_mean": 4446.2, + "valid_targets_min": 2380 + }, + { + "epoch": 3.861111111111111, + "grad_norm": 0.5365239749592436, + "learning_rate": 1.9907654865053248e-05, + "loss": 0.2691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3025137782096863, + "step": 2085, + "valid_targets_mean": 4364.4, + "valid_targets_min": 2177 + }, + { + "epoch": 3.8703703703703702, + "grad_norm": 0.5487362203225881, + "learning_rate": 1.981531169882844e-05, + "loss": 0.2857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2569238543510437, + "step": 2090, + "valid_targets_mean": 3598.1, + "valid_targets_min": 709 + }, + { + "epoch": 3.8796296296296298, + "grad_norm": 0.4846676225451064, + "learning_rate": 1.9722972470005573e-05, + "loss": 0.2412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21618637442588806, + "step": 2095, + "valid_targets_mean": 4546.6, + "valid_targets_min": 2362 + }, + { + "epoch": 3.888888888888889, + "grad_norm": 0.5026122833039225, + "learning_rate": 1.9630639147180673e-05, + "loss": 0.2907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31693074107170105, + "step": 2100, + "valid_targets_mean": 4688.2, + "valid_targets_min": 758 + }, + { + "epoch": 3.898148148148148, + "grad_norm": 0.4155051368967143, + "learning_rate": 1.9538313698823887e-05, + "loss": 0.2372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20872560143470764, + "step": 2105, + "valid_targets_mean": 5315.1, + "valid_targets_min": 513 + }, + { + "epoch": 3.9074074074074074, + "grad_norm": 0.5053201320989432, + "learning_rate": 1.944599809323745e-05, + "loss": 0.2684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23215201497077942, + "step": 2110, + "valid_targets_mean": 4019.3, + "valid_targets_min": 1946 + }, + { + "epoch": 3.9166666666666665, + "grad_norm": 0.546899589555972, + "learning_rate": 1.935369429851379e-05, + "loss": 0.27, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3266640901565552, + "step": 2115, + "valid_targets_mean": 4348.4, + "valid_targets_min": 2310 + }, + { + "epoch": 3.925925925925926, + "grad_norm": 0.5637484671729249, + "learning_rate": 1.926140428249351e-05, + "loss": 0.264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31784260272979736, + "step": 2120, + "valid_targets_mean": 3883.6, + "valid_targets_min": 466 + }, + { + "epoch": 3.935185185185185, + "grad_norm": 0.42601754727230023, + "learning_rate": 1.916913001272347e-05, + "loss": 0.2422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25121498107910156, + "step": 2125, + "valid_targets_mean": 5457.9, + "valid_targets_min": 2575 + }, + { + "epoch": 3.9444444444444446, + "grad_norm": 0.45355069808084864, + "learning_rate": 1.907687345641485e-05, + "loss": 0.2587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22492194175720215, + "step": 2130, + "valid_targets_mean": 4995.0, + "valid_targets_min": 2156 + }, + { + "epoch": 3.9537037037037037, + "grad_norm": 0.4966703717024659, + "learning_rate": 1.8984636580401165e-05, + "loss": 0.2529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22575929760932922, + "step": 2135, + "valid_targets_mean": 3822.2, + "valid_targets_min": 445 + }, + { + "epoch": 3.962962962962963, + "grad_norm": 0.4401478192705303, + "learning_rate": 1.8892421351096393e-05, + "loss": 0.2488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25159192085266113, + "step": 2140, + "valid_targets_mean": 5782.5, + "valid_targets_min": 1416 + }, + { + "epoch": 3.9722222222222223, + "grad_norm": 0.5363034129883026, + "learning_rate": 1.8800229734452998e-05, + "loss": 0.2428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20497846603393555, + "step": 2145, + "valid_targets_mean": 3910.8, + "valid_targets_min": 1582 + }, + { + "epoch": 3.9814814814814814, + "grad_norm": 0.4949167294147326, + "learning_rate": 1.8708063695920047e-05, + "loss": 0.2534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2380611002445221, + "step": 2150, + "valid_targets_mean": 4586.9, + "valid_targets_min": 1010 + }, + { + "epoch": 3.9907407407407405, + "grad_norm": 0.4867568096304702, + "learning_rate": 1.8615925200401318e-05, + "loss": 0.2722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2899520993232727, + "step": 2155, + "valid_targets_mean": 5028.2, + "valid_targets_min": 2141 + }, + { + "epoch": 4.0, + "grad_norm": 0.48512349657520354, + "learning_rate": 1.852381621221337e-05, + "loss": 0.2603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25779369473457336, + "step": 2160, + "valid_targets_mean": 4514.5, + "valid_targets_min": 2124 + }, + { + "epoch": 4.0092592592592595, + "grad_norm": 0.5430547707519348, + "learning_rate": 1.843173869504371e-05, + "loss": 0.2365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2733425498008728, + "step": 2165, + "valid_targets_mean": 3821.5, + "valid_targets_min": 675 + }, + { + "epoch": 4.018518518518518, + "grad_norm": 0.4760491658593785, + "learning_rate": 1.8339694611908897e-05, + "loss": 0.2444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2561805248260498, + "step": 2170, + "valid_targets_mean": 5306.1, + "valid_targets_min": 562 + }, + { + "epoch": 4.027777777777778, + "grad_norm": 0.48697889807764705, + "learning_rate": 1.8247685925112716e-05, + "loss": 0.2059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1906963586807251, + "step": 2175, + "valid_targets_mean": 4063.8, + "valid_targets_min": 512 + }, + { + "epoch": 4.037037037037037, + "grad_norm": 0.5970157683288126, + "learning_rate": 1.8155714596204318e-05, + "loss": 0.2474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2847557067871094, + "step": 2180, + "valid_targets_mean": 3594.9, + "valid_targets_min": 2094 + }, + { + "epoch": 4.046296296296297, + "grad_norm": 0.5557943898925222, + "learning_rate": 1.806378258593642e-05, + "loss": 0.23, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2644827961921692, + "step": 2185, + "valid_targets_mean": 3963.4, + "valid_targets_min": 2251 + }, + { + "epoch": 4.055555555555555, + "grad_norm": 0.5180284994233548, + "learning_rate": 1.797189185422351e-05, + "loss": 0.236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2229800969362259, + "step": 2190, + "valid_targets_mean": 3800.6, + "valid_targets_min": 1971 + }, + { + "epoch": 4.064814814814815, + "grad_norm": 0.5275679350937065, + "learning_rate": 1.788004436010002e-05, + "loss": 0.2671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2343769669532776, + "step": 2195, + "valid_targets_mean": 3961.6, + "valid_targets_min": 1795 + }, + { + "epoch": 4.074074074074074, + "grad_norm": 0.5719653033328994, + "learning_rate": 1.778824206167863e-05, + "loss": 0.2655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2309592068195343, + "step": 2200, + "valid_targets_mean": 3531.6, + "valid_targets_min": 573 + }, + { + "epoch": 4.083333333333333, + "grad_norm": 0.5204783805453003, + "learning_rate": 1.7696486916108468e-05, + "loss": 0.2389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2981133460998535, + "step": 2205, + "valid_targets_mean": 4711.4, + "valid_targets_min": 1961 + }, + { + "epoch": 4.092592592592593, + "grad_norm": 0.5003216687184299, + "learning_rate": 1.7604780879533384e-05, + "loss": 0.2675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22590869665145874, + "step": 2210, + "valid_targets_mean": 4413.9, + "valid_targets_min": 561 + }, + { + "epoch": 4.101851851851852, + "grad_norm": 0.49855495979695824, + "learning_rate": 1.7513125907050302e-05, + "loss": 0.2315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24315370619297028, + "step": 2215, + "valid_targets_mean": 4331.7, + "valid_targets_min": 789 + }, + { + "epoch": 4.111111111111111, + "grad_norm": 0.5590091213064351, + "learning_rate": 1.742152395266747e-05, + "loss": 0.2377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28527384996414185, + "step": 2220, + "valid_targets_mean": 4252.1, + "valid_targets_min": 723 + }, + { + "epoch": 4.12037037037037, + "grad_norm": 0.4968800799735138, + "learning_rate": 1.7329976969262854e-05, + "loss": 0.238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23040777444839478, + "step": 2225, + "valid_targets_mean": 4997.2, + "valid_targets_min": 338 + }, + { + "epoch": 4.12962962962963, + "grad_norm": 0.5225715461374966, + "learning_rate": 1.7238486908542463e-05, + "loss": 0.2589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24196788668632507, + "step": 2230, + "valid_targets_mean": 3969.0, + "valid_targets_min": 317 + }, + { + "epoch": 4.138888888888889, + "grad_norm": 0.566072039301765, + "learning_rate": 1.71470557209988e-05, + "loss": 0.2505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2790153920650482, + "step": 2235, + "valid_targets_mean": 3879.8, + "valid_targets_min": 569 + }, + { + "epoch": 4.148148148148148, + "grad_norm": 0.45951042633304573, + "learning_rate": 1.7055685355869196e-05, + "loss": 0.2284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2070295810699463, + "step": 2240, + "valid_targets_mean": 4564.6, + "valid_targets_min": 783 + }, + { + "epoch": 4.157407407407407, + "grad_norm": 0.5605220935037593, + "learning_rate": 1.6964377761094313e-05, + "loss": 0.2502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2974599301815033, + "step": 2245, + "valid_targets_mean": 4355.8, + "valid_targets_min": 2288 + }, + { + "epoch": 4.166666666666667, + "grad_norm": 0.5399329299027271, + "learning_rate": 1.6873134883276626e-05, + "loss": 0.2586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25155043601989746, + "step": 2250, + "valid_targets_mean": 3803.4, + "valid_targets_min": 907 + }, + { + "epoch": 4.175925925925926, + "grad_norm": 0.5699303989204108, + "learning_rate": 1.6781958667638855e-05, + "loss": 0.2419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24903835356235504, + "step": 2255, + "valid_targets_mean": 3695.6, + "valid_targets_min": 1570 + }, + { + "epoch": 4.185185185185185, + "grad_norm": 0.6105524681890175, + "learning_rate": 1.669085105798258e-05, + "loss": 0.2226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2412843555212021, + "step": 2260, + "valid_targets_mean": 3404.7, + "valid_targets_min": 1942 + }, + { + "epoch": 4.194444444444445, + "grad_norm": 0.513400651402716, + "learning_rate": 1.6599813996646727e-05, + "loss": 0.222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22134144604206085, + "step": 2265, + "valid_targets_mean": 4079.2, + "valid_targets_min": 826 + }, + { + "epoch": 4.203703703703703, + "grad_norm": 0.4868536453874342, + "learning_rate": 1.65088494244662e-05, + "loss": 0.2203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21563026309013367, + "step": 2270, + "valid_targets_mean": 4334.6, + "valid_targets_min": 1836 + }, + { + "epoch": 4.212962962962963, + "grad_norm": 0.5244709386650726, + "learning_rate": 1.6417959280730506e-05, + "loss": 0.2648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.290455162525177, + "step": 2275, + "valid_targets_mean": 4819.6, + "valid_targets_min": 1708 + }, + { + "epoch": 4.222222222222222, + "grad_norm": 0.5328915315468012, + "learning_rate": 1.632714550314237e-05, + "loss": 0.2192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.250417560338974, + "step": 2280, + "valid_targets_mean": 4054.6, + "valid_targets_min": 1107 + }, + { + "epoch": 4.231481481481482, + "grad_norm": 0.5972878155360123, + "learning_rate": 1.6236410027776494e-05, + "loss": 0.2507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22642353177070618, + "step": 2285, + "valid_targets_mean": 3058.1, + "valid_targets_min": 563 + }, + { + "epoch": 4.2407407407407405, + "grad_norm": 0.5907761427734216, + "learning_rate": 1.6145754789038205e-05, + "loss": 0.242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18239718675613403, + "step": 2290, + "valid_targets_mean": 3142.9, + "valid_targets_min": 859 + }, + { + "epoch": 4.25, + "grad_norm": 0.5433714738595986, + "learning_rate": 1.6055181719622278e-05, + "loss": 0.2273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18836545944213867, + "step": 2295, + "valid_targets_mean": 3226.4, + "valid_targets_min": 1585 + }, + { + "epoch": 4.2592592592592595, + "grad_norm": 0.5040273007610758, + "learning_rate": 1.5964692750471684e-05, + "loss": 0.2245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23993724584579468, + "step": 2300, + "valid_targets_mean": 4635.3, + "valid_targets_min": 480 + }, + { + "epoch": 4.268518518518518, + "grad_norm": 0.5272742282351708, + "learning_rate": 1.5874289810736452e-05, + "loss": 0.248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2000361979007721, + "step": 2305, + "valid_targets_mean": 3835.5, + "valid_targets_min": 525 + }, + { + "epoch": 4.277777777777778, + "grad_norm": 0.5421153003949007, + "learning_rate": 1.5783974827732555e-05, + "loss": 0.244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20407015085220337, + "step": 2310, + "valid_targets_mean": 3983.3, + "valid_targets_min": 1755 + }, + { + "epoch": 4.287037037037037, + "grad_norm": 0.508791566036482, + "learning_rate": 1.5693749726900767e-05, + "loss": 0.2303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24065956473350525, + "step": 2315, + "valid_targets_mean": 4733.2, + "valid_targets_min": 2012 + }, + { + "epoch": 4.296296296296296, + "grad_norm": 0.6278410756307, + "learning_rate": 1.560361643176568e-05, + "loss": 0.2459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2204664796590805, + "step": 2320, + "valid_targets_mean": 2955.4, + "valid_targets_min": 767 + }, + { + "epoch": 4.305555555555555, + "grad_norm": 0.51340304756785, + "learning_rate": 1.5513576863894654e-05, + "loss": 0.2769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24818578362464905, + "step": 2325, + "valid_targets_mean": 4591.8, + "valid_targets_min": 891 + }, + { + "epoch": 4.314814814814815, + "grad_norm": 0.5004366205081487, + "learning_rate": 1.5423632942856836e-05, + "loss": 0.2378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22766365110874176, + "step": 2330, + "valid_targets_mean": 4753.9, + "valid_targets_min": 2548 + }, + { + "epoch": 4.324074074074074, + "grad_norm": 0.5514594228413564, + "learning_rate": 1.5333786586182308e-05, + "loss": 0.2269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2691954970359802, + "step": 2335, + "valid_targets_mean": 4707.2, + "valid_targets_min": 556 + }, + { + "epoch": 4.333333333333333, + "grad_norm": 0.47659547270242236, + "learning_rate": 1.5244039709321123e-05, + "loss": 0.2492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24745163321495056, + "step": 2340, + "valid_targets_mean": 4947.1, + "valid_targets_min": 1720 + }, + { + "epoch": 4.342592592592593, + "grad_norm": 0.5315469803851041, + "learning_rate": 1.5154394225602525e-05, + "loss": 0.2464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24923008680343628, + "step": 2345, + "valid_targets_mean": 4185.3, + "valid_targets_min": 641 + }, + { + "epoch": 4.351851851851852, + "grad_norm": 0.49151936211870245, + "learning_rate": 1.5064852046194127e-05, + "loss": 0.2356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21445490419864655, + "step": 2350, + "valid_targets_mean": 4332.5, + "valid_targets_min": 1109 + }, + { + "epoch": 4.361111111111111, + "grad_norm": 0.530468204810676, + "learning_rate": 1.49754150800612e-05, + "loss": 0.2564, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30778932571411133, + "step": 2355, + "valid_targets_mean": 5126.1, + "valid_targets_min": 742 + }, + { + "epoch": 4.37037037037037, + "grad_norm": 0.5309780578096778, + "learning_rate": 1.4886085233925931e-05, + "loss": 0.2697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2524486780166626, + "step": 2360, + "valid_targets_mean": 5057.9, + "valid_targets_min": 943 + }, + { + "epoch": 4.37962962962963, + "grad_norm": 0.4267222102718032, + "learning_rate": 1.4796864412226812e-05, + "loss": 0.2313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1837308555841446, + "step": 2365, + "valid_targets_mean": 5174.9, + "valid_targets_min": 2396 + }, + { + "epoch": 4.388888888888889, + "grad_norm": 0.5625553097603778, + "learning_rate": 1.4707754517078021e-05, + "loss": 0.2273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2614815831184387, + "step": 2370, + "valid_targets_mean": 3773.8, + "valid_targets_min": 724 + }, + { + "epoch": 4.398148148148148, + "grad_norm": 0.5396666245271566, + "learning_rate": 1.4618757448228869e-05, + "loss": 0.2556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23616960644721985, + "step": 2375, + "valid_targets_mean": 3829.4, + "valid_targets_min": 2237 + }, + { + "epoch": 4.407407407407407, + "grad_norm": 0.6629662126529009, + "learning_rate": 1.4529875103023316e-05, + "loss": 0.2489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2807127833366394, + "step": 2380, + "valid_targets_mean": 4226.0, + "valid_targets_min": 2051 + }, + { + "epoch": 4.416666666666667, + "grad_norm": 0.5603617627291267, + "learning_rate": 1.4441109376359498e-05, + "loss": 0.2632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25401419401168823, + "step": 2385, + "valid_targets_mean": 3388.5, + "valid_targets_min": 1929 + }, + { + "epoch": 4.425925925925926, + "grad_norm": 0.512775023115171, + "learning_rate": 1.435246216064933e-05, + "loss": 0.228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2706540822982788, + "step": 2390, + "valid_targets_mean": 4530.2, + "valid_targets_min": 1979 + }, + { + "epoch": 4.435185185185185, + "grad_norm": 0.5204570025829351, + "learning_rate": 1.4263935345778202e-05, + "loss": 0.2326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24824261665344238, + "step": 2395, + "valid_targets_mean": 4182.4, + "valid_targets_min": 1899 + }, + { + "epoch": 4.444444444444445, + "grad_norm": 0.5254410292063911, + "learning_rate": 1.417553081906462e-05, + "loss": 0.2681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26439177989959717, + "step": 2400, + "valid_targets_mean": 4461.6, + "valid_targets_min": 1416 + }, + { + "epoch": 4.453703703703704, + "grad_norm": 0.5956700654564747, + "learning_rate": 1.408725046522005e-05, + "loss": 0.2271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2445288896560669, + "step": 2405, + "valid_targets_mean": 3570.7, + "valid_targets_min": 708 + }, + { + "epoch": 4.462962962962963, + "grad_norm": 0.5443093397325657, + "learning_rate": 1.399909616630865e-05, + "loss": 0.2533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19300591945648193, + "step": 2410, + "valid_targets_mean": 4181.1, + "valid_targets_min": 1700 + }, + { + "epoch": 4.472222222222222, + "grad_norm": 0.5782021783769784, + "learning_rate": 1.3911069801707232e-05, + "loss": 0.231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2529100179672241, + "step": 2415, + "valid_targets_mean": 3981.5, + "valid_targets_min": 2339 + }, + { + "epoch": 4.481481481481482, + "grad_norm": 0.4902132127041017, + "learning_rate": 1.3823173248065125e-05, + "loss": 0.2318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21891093254089355, + "step": 2420, + "valid_targets_mean": 4572.9, + "valid_targets_min": 917 + }, + { + "epoch": 4.4907407407407405, + "grad_norm": 0.5337074804399202, + "learning_rate": 1.37354083792642e-05, + "loss": 0.2383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22410012781620026, + "step": 2425, + "valid_targets_mean": 3774.8, + "valid_targets_min": 1751 + }, + { + "epoch": 4.5, + "grad_norm": 0.5148725556353548, + "learning_rate": 1.3647777066378938e-05, + "loss": 0.2401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21354614198207855, + "step": 2430, + "valid_targets_mean": 4094.2, + "valid_targets_min": 1633 + }, + { + "epoch": 4.5092592592592595, + "grad_norm": 0.5306690061606288, + "learning_rate": 1.3560281177636484e-05, + "loss": 0.2217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22166001796722412, + "step": 2435, + "valid_targets_mean": 4369.6, + "valid_targets_min": 1700 + }, + { + "epoch": 4.518518518518518, + "grad_norm": 0.54955470040774, + "learning_rate": 1.347292257837689e-05, + "loss": 0.2421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19483765959739685, + "step": 2440, + "valid_targets_mean": 3584.6, + "valid_targets_min": 1698 + }, + { + "epoch": 4.527777777777778, + "grad_norm": 0.5166945393629171, + "learning_rate": 1.3385703131013279e-05, + "loss": 0.2392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26188984513282776, + "step": 2445, + "valid_targets_mean": 5255.4, + "valid_targets_min": 2425 + }, + { + "epoch": 4.537037037037037, + "grad_norm": 0.5093872869070459, + "learning_rate": 1.3298624694992175e-05, + "loss": 0.2249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23913279175758362, + "step": 2450, + "valid_targets_mean": 4489.4, + "valid_targets_min": 1802 + }, + { + "epoch": 4.546296296296296, + "grad_norm": 0.47603655861521876, + "learning_rate": 1.3211689126753879e-05, + "loss": 0.2237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17301449179649353, + "step": 2455, + "valid_targets_mean": 4347.6, + "valid_targets_min": 1813 + }, + { + "epoch": 4.555555555555555, + "grad_norm": 0.6346661258154088, + "learning_rate": 1.3124898279692837e-05, + "loss": 0.2462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2843843400478363, + "step": 2460, + "valid_targets_mean": 3273.6, + "valid_targets_min": 849 + }, + { + "epoch": 4.564814814814815, + "grad_norm": 0.5325625066728102, + "learning_rate": 1.3038254004118192e-05, + "loss": 0.2667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.295737624168396, + "step": 2465, + "valid_targets_mean": 4695.4, + "valid_targets_min": 781 + }, + { + "epoch": 4.574074074074074, + "grad_norm": 0.5501023241996152, + "learning_rate": 1.2951758147214272e-05, + "loss": 0.2408, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2129950225353241, + "step": 2470, + "valid_targets_mean": 3447.8, + "valid_targets_min": 1908 + }, + { + "epoch": 4.583333333333333, + "grad_norm": 0.5306118753400628, + "learning_rate": 1.2865412553001274e-05, + "loss": 0.2583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3140091896057129, + "step": 2475, + "valid_targets_mean": 5196.5, + "valid_targets_min": 451 + }, + { + "epoch": 4.592592592592593, + "grad_norm": 0.582247483161391, + "learning_rate": 1.2779219062295892e-05, + "loss": 0.2194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22162239253520966, + "step": 2480, + "valid_targets_mean": 3159.7, + "valid_targets_min": 1906 + }, + { + "epoch": 4.601851851851852, + "grad_norm": 0.5672219680504043, + "learning_rate": 1.26931795126721e-05, + "loss": 0.2491, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25854402780532837, + "step": 2485, + "valid_targets_mean": 3898.6, + "valid_targets_min": 570 + }, + { + "epoch": 4.611111111111111, + "grad_norm": 0.522554196457828, + "learning_rate": 1.2607295738422e-05, + "loss": 0.2534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28402256965637207, + "step": 2490, + "valid_targets_mean": 4644.9, + "valid_targets_min": 559 + }, + { + "epoch": 4.62037037037037, + "grad_norm": 0.5144881360654426, + "learning_rate": 1.2521569570516666e-05, + "loss": 0.269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24675382673740387, + "step": 2495, + "valid_targets_mean": 4304.7, + "valid_targets_min": 643 + }, + { + "epoch": 4.62962962962963, + "grad_norm": 0.5285312184685258, + "learning_rate": 1.2436002836567154e-05, + "loss": 0.2309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19586646556854248, + "step": 2500, + "valid_targets_mean": 3602.7, + "valid_targets_min": 322 + }, + { + "epoch": 4.638888888888889, + "grad_norm": 0.5549134026619124, + "learning_rate": 1.2350597360785503e-05, + "loss": 0.2455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18762420117855072, + "step": 2505, + "valid_targets_mean": 3592.7, + "valid_targets_min": 1658 + }, + { + "epoch": 4.648148148148148, + "grad_norm": 0.5467801855327935, + "learning_rate": 1.2265354963945861e-05, + "loss": 0.2231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21311713755130768, + "step": 2510, + "valid_targets_mean": 4042.2, + "valid_targets_min": 1311 + }, + { + "epoch": 4.657407407407407, + "grad_norm": 0.5982127460318674, + "learning_rate": 1.2180277463345697e-05, + "loss": 0.2601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2819077968597412, + "step": 2515, + "valid_targets_mean": 3927.7, + "valid_targets_min": 667 + }, + { + "epoch": 4.666666666666667, + "grad_norm": 0.5694492693011193, + "learning_rate": 1.209536667276699e-05, + "loss": 0.2561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22266341745853424, + "step": 2520, + "valid_targets_mean": 3360.6, + "valid_targets_min": 635 + }, + { + "epoch": 4.675925925925926, + "grad_norm": 0.5526419588735482, + "learning_rate": 1.2010624402437622e-05, + "loss": 0.2343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23695549368858337, + "step": 2525, + "valid_targets_mean": 3984.8, + "valid_targets_min": 731 + }, + { + "epoch": 4.685185185185185, + "grad_norm": 0.5408354781576525, + "learning_rate": 1.1926052458992756e-05, + "loss": 0.2414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24009719491004944, + "step": 2530, + "valid_targets_mean": 4062.1, + "valid_targets_min": 770 + }, + { + "epoch": 4.694444444444445, + "grad_norm": 0.5900458646547324, + "learning_rate": 1.184165264543633e-05, + "loss": 0.2605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3150600492954254, + "step": 2535, + "valid_targets_mean": 3895.1, + "valid_targets_min": 247 + }, + { + "epoch": 4.703703703703704, + "grad_norm": 0.5696579830073532, + "learning_rate": 1.1757426761102608e-05, + "loss": 0.2704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.223690927028656, + "step": 2540, + "valid_targets_mean": 3577.1, + "valid_targets_min": 1758 + }, + { + "epoch": 4.712962962962963, + "grad_norm": 0.6778694424859932, + "learning_rate": 1.167337660161783e-05, + "loss": 0.235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23883025348186493, + "step": 2545, + "valid_targets_mean": 3305.8, + "valid_targets_min": 321 + }, + { + "epoch": 4.722222222222222, + "grad_norm": 0.4941234856749398, + "learning_rate": 1.1589503958861936e-05, + "loss": 0.2269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17126263678073883, + "step": 2550, + "valid_targets_mean": 4076.2, + "valid_targets_min": 2068 + }, + { + "epoch": 4.731481481481482, + "grad_norm": 0.49689750365243684, + "learning_rate": 1.1505810620930338e-05, + "loss": 0.2495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27281057834625244, + "step": 2555, + "valid_targets_mean": 4938.1, + "valid_targets_min": 1676 + }, + { + "epoch": 4.7407407407407405, + "grad_norm": 0.49366801115519954, + "learning_rate": 1.1422298372095841e-05, + "loss": 0.2173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2007950097322464, + "step": 2560, + "valid_targets_mean": 4215.3, + "valid_targets_min": 1973 + }, + { + "epoch": 4.75, + "grad_norm": 0.5522379435269955, + "learning_rate": 1.133896899277056e-05, + "loss": 0.2632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.245499849319458, + "step": 2565, + "valid_targets_mean": 4917.2, + "valid_targets_min": 1888 + }, + { + "epoch": 4.7592592592592595, + "grad_norm": 0.43752196002323207, + "learning_rate": 1.1255824259467985e-05, + "loss": 0.2583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26849859952926636, + "step": 2570, + "valid_targets_mean": 5791.7, + "valid_targets_min": 2194 + }, + { + "epoch": 4.768518518518518, + "grad_norm": 0.4978365901424028, + "learning_rate": 1.1172865944765122e-05, + "loss": 0.2192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19043470919132233, + "step": 2575, + "valid_targets_mean": 3895.8, + "valid_targets_min": 1835 + }, + { + "epoch": 4.777777777777778, + "grad_norm": 0.5734466742419532, + "learning_rate": 1.109009581726466e-05, + "loss": 0.232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25819826126098633, + "step": 2580, + "valid_targets_mean": 3905.1, + "valid_targets_min": 818 + }, + { + "epoch": 4.787037037037037, + "grad_norm": 0.4975385383711413, + "learning_rate": 1.1007515641557329e-05, + "loss": 0.2641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23181453347206116, + "step": 2585, + "valid_targets_mean": 4291.7, + "valid_targets_min": 804 + }, + { + "epoch": 4.796296296296296, + "grad_norm": 0.5198599453591137, + "learning_rate": 1.092512717818421e-05, + "loss": 0.2481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2564372718334198, + "step": 2590, + "valid_targets_mean": 4180.1, + "valid_targets_min": 1857 + }, + { + "epoch": 4.805555555555555, + "grad_norm": 0.5359214335268695, + "learning_rate": 1.0842932183599238e-05, + "loss": 0.2345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20336735248565674, + "step": 2595, + "valid_targets_mean": 3678.8, + "valid_targets_min": 580 + }, + { + "epoch": 4.814814814814815, + "grad_norm": 0.5160372923320123, + "learning_rate": 1.076093241013178e-05, + "loss": 0.2407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2072121798992157, + "step": 2600, + "valid_targets_mean": 4072.1, + "valid_targets_min": 1736 + }, + { + "epoch": 4.824074074074074, + "grad_norm": 0.6009912786476855, + "learning_rate": 1.067912960594923e-05, + "loss": 0.2299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27277135848999023, + "step": 2605, + "valid_targets_mean": 3798.9, + "valid_targets_min": 496 + }, + { + "epoch": 4.833333333333333, + "grad_norm": 0.5346358334055163, + "learning_rate": 1.0597525515019749e-05, + "loss": 0.223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20401796698570251, + "step": 2610, + "valid_targets_mean": 3752.1, + "valid_targets_min": 812 + }, + { + "epoch": 4.842592592592593, + "grad_norm": 0.49231495307972206, + "learning_rate": 1.0516121877075129e-05, + "loss": 0.2623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22834226489067078, + "step": 2615, + "valid_targets_mean": 4466.0, + "valid_targets_min": 2355 + }, + { + "epoch": 4.851851851851852, + "grad_norm": 0.5848272712574982, + "learning_rate": 1.0434920427573643e-05, + "loss": 0.247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2603488862514496, + "step": 2620, + "valid_targets_mean": 3781.5, + "valid_targets_min": 599 + }, + { + "epoch": 4.861111111111111, + "grad_norm": 0.5265795938004495, + "learning_rate": 1.035392289766307e-05, + "loss": 0.246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23392300307750702, + "step": 2625, + "valid_targets_mean": 4054.4, + "valid_targets_min": 787 + }, + { + "epoch": 4.87037037037037, + "grad_norm": 0.5187023064482922, + "learning_rate": 1.0273131014143822e-05, + "loss": 0.2658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2979033291339874, + "step": 2630, + "valid_targets_mean": 5284.2, + "valid_targets_min": 960 + }, + { + "epoch": 4.87962962962963, + "grad_norm": 0.5723456672993618, + "learning_rate": 1.0192546499432066e-05, + "loss": 0.2304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2871008515357971, + "step": 2635, + "valid_targets_mean": 4034.7, + "valid_targets_min": 1757 + }, + { + "epoch": 4.888888888888889, + "grad_norm": 0.4518981511283737, + "learning_rate": 1.0112171071523064e-05, + "loss": 0.2389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24816018342971802, + "step": 2640, + "valid_targets_mean": 5038.8, + "valid_targets_min": 1905 + }, + { + "epoch": 4.898148148148148, + "grad_norm": 0.588262346243775, + "learning_rate": 1.0032006443954506e-05, + "loss": 0.2179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.263683021068573, + "step": 2645, + "valid_targets_mean": 3463.8, + "valid_targets_min": 496 + }, + { + "epoch": 4.907407407407407, + "grad_norm": 0.5240492310910843, + "learning_rate": 9.952054325769984e-06, + "loss": 0.2485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2150118052959442, + "step": 2650, + "valid_targets_mean": 3824.9, + "valid_targets_min": 1872 + }, + { + "epoch": 4.916666666666667, + "grad_norm": 0.510346682757862, + "learning_rate": 9.872316421482592e-06, + "loss": 0.2318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21479976177215576, + "step": 2655, + "valid_targets_mean": 4396.7, + "valid_targets_min": 422 + }, + { + "epoch": 4.925925925925926, + "grad_norm": 0.5032967964842083, + "learning_rate": 9.792794431038542e-06, + "loss": 0.2248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1802617311477661, + "step": 2660, + "valid_targets_mean": 3972.9, + "valid_targets_min": 2057 + }, + { + "epoch": 4.935185185185185, + "grad_norm": 0.5308551440927738, + "learning_rate": 9.713490049780931e-06, + "loss": 0.2459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27088022232055664, + "step": 2665, + "valid_targets_mean": 4305.6, + "valid_targets_min": 671 + }, + { + "epoch": 4.944444444444445, + "grad_norm": 0.476874892456418, + "learning_rate": 9.634404968413644e-06, + "loss": 0.2475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25819599628448486, + "step": 2670, + "valid_targets_mean": 5321.4, + "valid_targets_min": 1851 + }, + { + "epoch": 4.953703703703704, + "grad_norm": 0.5120117073751925, + "learning_rate": 9.555540872965235e-06, + "loss": 0.2501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23541975021362305, + "step": 2675, + "valid_targets_mean": 4679.6, + "valid_targets_min": 2191 + }, + { + "epoch": 4.962962962962963, + "grad_norm": 0.5036966865354542, + "learning_rate": 9.47689944475305e-06, + "loss": 0.2373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2063012421131134, + "step": 2680, + "valid_targets_mean": 3808.1, + "valid_targets_min": 1218 + }, + { + "epoch": 4.972222222222222, + "grad_norm": 0.5539021581298206, + "learning_rate": 9.398482360347327e-06, + "loss": 0.2395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21875013411045074, + "step": 2685, + "valid_targets_mean": 3876.2, + "valid_targets_min": 594 + }, + { + "epoch": 4.981481481481482, + "grad_norm": 0.4696704645863026, + "learning_rate": 9.32029129153551e-06, + "loss": 0.2253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2458312213420868, + "step": 2690, + "valid_targets_mean": 5203.4, + "valid_targets_min": 636 + }, + { + "epoch": 4.9907407407407405, + "grad_norm": 0.5363470268259611, + "learning_rate": 9.242327905286552e-06, + "loss": 0.248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2823778986930847, + "step": 2695, + "valid_targets_mean": 4615.7, + "valid_targets_min": 490 + }, + { + "epoch": 5.0, + "grad_norm": 0.5485569771302176, + "learning_rate": 9.164593863715405e-06, + "loss": 0.2394, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26309841871261597, + "step": 2700, + "valid_targets_mean": 4314.6, + "valid_targets_min": 1566 + }, + { + "epoch": 5.0092592592592595, + "grad_norm": 0.5750093349536356, + "learning_rate": 9.087090824047604e-06, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20704308152198792, + "step": 2705, + "valid_targets_mean": 3146.9, + "valid_targets_min": 804 + }, + { + "epoch": 5.018518518518518, + "grad_norm": 0.5711593696775072, + "learning_rate": 9.009820438583881e-06, + "loss": 0.2484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2449912130832672, + "step": 2710, + "valid_targets_mean": 3544.4, + "valid_targets_min": 512 + }, + { + "epoch": 5.027777777777778, + "grad_norm": 0.5056840688609416, + "learning_rate": 8.932784354665002e-06, + "loss": 0.2308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21420851349830627, + "step": 2715, + "valid_targets_mean": 4169.8, + "valid_targets_min": 715 + }, + { + "epoch": 5.037037037037037, + "grad_norm": 0.5259628580539836, + "learning_rate": 8.855984214636606e-06, + "loss": 0.218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2302454710006714, + "step": 2720, + "valid_targets_mean": 4181.8, + "valid_targets_min": 1921 + }, + { + "epoch": 5.046296296296297, + "grad_norm": 0.5183310765864052, + "learning_rate": 8.779421655814189e-06, + "loss": 0.2303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22257420420646667, + "step": 2725, + "valid_targets_mean": 4248.5, + "valid_targets_min": 451 + }, + { + "epoch": 5.055555555555555, + "grad_norm": 0.5672433220359422, + "learning_rate": 8.703098310448244e-06, + "loss": 0.2154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21661844849586487, + "step": 2730, + "valid_targets_mean": 3690.8, + "valid_targets_min": 313 + }, + { + "epoch": 5.064814814814815, + "grad_norm": 0.6099943995315652, + "learning_rate": 8.627015805689394e-06, + "loss": 0.2157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.220462366938591, + "step": 2735, + "valid_targets_mean": 3285.6, + "valid_targets_min": 1461 + }, + { + "epoch": 5.074074074074074, + "grad_norm": 0.5323546697406513, + "learning_rate": 8.551175763553778e-06, + "loss": 0.2105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20129993557929993, + "step": 2740, + "valid_targets_mean": 3924.2, + "valid_targets_min": 1717 + }, + { + "epoch": 5.083333333333333, + "grad_norm": 0.6107489166400913, + "learning_rate": 8.475579800888395e-06, + "loss": 0.2503, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22628153860569, + "step": 2745, + "valid_targets_mean": 3306.1, + "valid_targets_min": 1109 + }, + { + "epoch": 5.092592592592593, + "grad_norm": 0.5714928193929322, + "learning_rate": 8.400229529336704e-06, + "loss": 0.2521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27930349111557007, + "step": 2750, + "valid_targets_mean": 3991.1, + "valid_targets_min": 2391 + }, + { + "epoch": 5.101851851851852, + "grad_norm": 0.5659909185072247, + "learning_rate": 8.325126555304208e-06, + "loss": 0.2328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2336483746767044, + "step": 2755, + "valid_targets_mean": 3597.2, + "valid_targets_min": 602 + }, + { + "epoch": 5.111111111111111, + "grad_norm": 0.5991030111391655, + "learning_rate": 8.250272479924234e-06, + "loss": 0.2373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.207331120967865, + "step": 2760, + "valid_targets_mean": 3291.4, + "valid_targets_min": 1701 + }, + { + "epoch": 5.12037037037037, + "grad_norm": 0.5934525134623533, + "learning_rate": 8.17566889902382e-06, + "loss": 0.2402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24269866943359375, + "step": 2765, + "valid_targets_mean": 3444.9, + "valid_targets_min": 554 + }, + { + "epoch": 5.12962962962963, + "grad_norm": 0.5658329820173873, + "learning_rate": 8.101317403089635e-06, + "loss": 0.2325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23202694952487946, + "step": 2770, + "valid_targets_mean": 3681.4, + "valid_targets_min": 2306 + }, + { + "epoch": 5.138888888888889, + "grad_norm": 0.6489256706628873, + "learning_rate": 8.027219577234133e-06, + "loss": 0.2312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2144010066986084, + "step": 2775, + "valid_targets_mean": 2766.6, + "valid_targets_min": 594 + }, + { + "epoch": 5.148148148148148, + "grad_norm": 0.5211992244558095, + "learning_rate": 7.953377001161714e-06, + "loss": 0.2219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21091634035110474, + "step": 2780, + "valid_targets_mean": 4418.5, + "valid_targets_min": 932 + }, + { + "epoch": 5.157407407407407, + "grad_norm": 0.5386716433955135, + "learning_rate": 7.879791249135059e-06, + "loss": 0.2053, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2028743326663971, + "step": 2785, + "valid_targets_mean": 4136.1, + "valid_targets_min": 1604 + }, + { + "epoch": 5.166666666666667, + "grad_norm": 0.5017972625347342, + "learning_rate": 7.806463889941598e-06, + "loss": 0.2354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2554541230201721, + "step": 2790, + "valid_targets_mean": 4644.6, + "valid_targets_min": 2649 + }, + { + "epoch": 5.175925925925926, + "grad_norm": 0.6494575106635824, + "learning_rate": 7.73339648686001e-06, + "loss": 0.2293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24619343876838684, + "step": 2795, + "valid_targets_mean": 3087.1, + "valid_targets_min": 894 + }, + { + "epoch": 5.185185185185185, + "grad_norm": 0.5370049338879487, + "learning_rate": 7.66059059762695e-06, + "loss": 0.2298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23969629406929016, + "step": 2800, + "valid_targets_mean": 4655.8, + "valid_targets_min": 514 + }, + { + "epoch": 5.194444444444445, + "grad_norm": 0.48493560753692005, + "learning_rate": 7.588047774403795e-06, + "loss": 0.2371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.212522953748703, + "step": 2805, + "valid_targets_mean": 4826.5, + "valid_targets_min": 1850 + }, + { + "epoch": 5.203703703703703, + "grad_norm": 0.5702442363690217, + "learning_rate": 7.5157695637435864e-06, + "loss": 0.2382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21703308820724487, + "step": 2810, + "valid_targets_mean": 3602.2, + "valid_targets_min": 742 + }, + { + "epoch": 5.212962962962963, + "grad_norm": 0.4950198812734338, + "learning_rate": 7.443757506558033e-06, + "loss": 0.247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2432202249765396, + "step": 2815, + "valid_targets_mean": 4723.4, + "valid_targets_min": 523 + }, + { + "epoch": 5.222222222222222, + "grad_norm": 0.5719638372764714, + "learning_rate": 7.3720131380846685e-06, + "loss": 0.2417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2235596477985382, + "step": 2820, + "valid_targets_mean": 3752.8, + "valid_targets_min": 577 + }, + { + "epoch": 5.231481481481482, + "grad_norm": 0.5267450371667391, + "learning_rate": 7.300537987854146e-06, + "loss": 0.2467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2897190749645233, + "step": 2825, + "valid_targets_mean": 4997.0, + "valid_targets_min": 2002 + }, + { + "epoch": 5.2407407407407405, + "grad_norm": 0.6948286687288108, + "learning_rate": 7.22933357965758e-06, + "loss": 0.2573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3058856725692749, + "step": 2830, + "valid_targets_mean": 3943.1, + "valid_targets_min": 1473 + }, + { + "epoch": 5.25, + "grad_norm": 0.586857466777657, + "learning_rate": 7.158401431514117e-06, + "loss": 0.2399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25646457076072693, + "step": 2835, + "valid_targets_mean": 3900.5, + "valid_targets_min": 1653 + }, + { + "epoch": 5.2592592592592595, + "grad_norm": 0.5671217282214601, + "learning_rate": 7.0877430556385205e-06, + "loss": 0.2341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25015023350715637, + "step": 2840, + "valid_targets_mean": 4106.6, + "valid_targets_min": 663 + }, + { + "epoch": 5.268518518518518, + "grad_norm": 0.5512194807040625, + "learning_rate": 7.0173599584089625e-06, + "loss": 0.2151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19779840111732483, + "step": 2845, + "valid_targets_mean": 3710.4, + "valid_targets_min": 1984 + }, + { + "epoch": 5.277777777777778, + "grad_norm": 0.6190422799034527, + "learning_rate": 6.947253640334914e-06, + "loss": 0.2176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2084774523973465, + "step": 2850, + "valid_targets_mean": 3782.9, + "valid_targets_min": 535 + }, + { + "epoch": 5.287037037037037, + "grad_norm": 0.5143657356660781, + "learning_rate": 6.87742559602512e-06, + "loss": 0.2469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20541247725486755, + "step": 2855, + "valid_targets_mean": 4369.5, + "valid_targets_min": 632 + }, + { + "epoch": 5.296296296296296, + "grad_norm": 0.6337293582450882, + "learning_rate": 6.807877314155788e-06, + "loss": 0.2341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23032309114933014, + "step": 2860, + "valid_targets_mean": 3650.8, + "valid_targets_min": 2050 + }, + { + "epoch": 5.305555555555555, + "grad_norm": 0.5832848422160128, + "learning_rate": 6.738610277438791e-06, + "loss": 0.2299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2612956762313843, + "step": 2865, + "valid_targets_mean": 3992.4, + "valid_targets_min": 1389 + }, + { + "epoch": 5.314814814814815, + "grad_norm": 0.5761403577609914, + "learning_rate": 6.669625962590114e-06, + "loss": 0.2308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23144936561584473, + "step": 2870, + "valid_targets_mean": 3683.9, + "valid_targets_min": 1769 + }, + { + "epoch": 5.324074074074074, + "grad_norm": 0.5484533193789417, + "learning_rate": 6.600925840298331e-06, + "loss": 0.214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1928141713142395, + "step": 2875, + "valid_targets_mean": 3428.2, + "valid_targets_min": 1879 + }, + { + "epoch": 5.333333333333333, + "grad_norm": 0.5104494079712223, + "learning_rate": 6.532511375193258e-06, + "loss": 0.228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2096475064754486, + "step": 2880, + "valid_targets_mean": 4629.7, + "valid_targets_min": 2168 + }, + { + "epoch": 5.342592592592593, + "grad_norm": 0.5967975385442751, + "learning_rate": 6.464384025814763e-06, + "loss": 0.241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2632576823234558, + "step": 2885, + "valid_targets_mean": 3904.9, + "valid_targets_min": 774 + }, + { + "epoch": 5.351851851851852, + "grad_norm": 0.5546172949302648, + "learning_rate": 6.396545244581609e-06, + "loss": 0.2363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23911094665527344, + "step": 2890, + "valid_targets_mean": 4346.3, + "valid_targets_min": 2347 + }, + { + "epoch": 5.361111111111111, + "grad_norm": 0.4623970390000608, + "learning_rate": 6.3289964777605624e-06, + "loss": 0.2334, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22868403792381287, + "step": 2895, + "valid_targets_mean": 5747.1, + "valid_targets_min": 1679 + }, + { + "epoch": 5.37037037037037, + "grad_norm": 0.5472864241371322, + "learning_rate": 6.261739165435492e-06, + "loss": 0.2259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24670270085334778, + "step": 2900, + "valid_targets_mean": 4055.2, + "valid_targets_min": 1881 + }, + { + "epoch": 5.37962962962963, + "grad_norm": 0.5565568764252096, + "learning_rate": 6.1947747414767035e-06, + "loss": 0.2494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27123579382896423, + "step": 2905, + "valid_targets_mean": 4548.1, + "valid_targets_min": 762 + }, + { + "epoch": 5.388888888888889, + "grad_norm": 0.6213207010595587, + "learning_rate": 6.128104633510381e-06, + "loss": 0.2197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2588229775428772, + "step": 2910, + "valid_targets_mean": 3481.0, + "valid_targets_min": 537 + }, + { + "epoch": 5.398148148148148, + "grad_norm": 0.5039995681794307, + "learning_rate": 6.0617302628881104e-06, + "loss": 0.2219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23631207644939423, + "step": 2915, + "valid_targets_mean": 5029.2, + "valid_targets_min": 599 + }, + { + "epoch": 5.407407407407407, + "grad_norm": 0.5324459779762222, + "learning_rate": 5.9956530446566305e-06, + "loss": 0.2733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27384650707244873, + "step": 2920, + "valid_targets_mean": 4611.2, + "valid_targets_min": 971 + }, + { + "epoch": 5.416666666666667, + "grad_norm": 0.5508613368105185, + "learning_rate": 5.929874387527605e-06, + "loss": 0.2273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2502458393573761, + "step": 2925, + "valid_targets_mean": 4067.2, + "valid_targets_min": 1611 + }, + { + "epoch": 5.425925925925926, + "grad_norm": 0.6630459663999032, + "learning_rate": 5.864395693847651e-06, + "loss": 0.2336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2312171310186386, + "step": 2930, + "valid_targets_mean": 3028.5, + "valid_targets_min": 513 + }, + { + "epoch": 5.435185185185185, + "grad_norm": 0.6102195329637778, + "learning_rate": 5.799218359568395e-06, + "loss": 0.2404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23122429847717285, + "step": 2935, + "valid_targets_mean": 3275.2, + "valid_targets_min": 949 + }, + { + "epoch": 5.444444444444445, + "grad_norm": 0.5924002103430813, + "learning_rate": 5.734343774216726e-06, + "loss": 0.2403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2418282926082611, + "step": 2940, + "valid_targets_mean": 3858.4, + "valid_targets_min": 2039 + }, + { + "epoch": 5.453703703703704, + "grad_norm": 0.5135171674815325, + "learning_rate": 5.669773320865198e-06, + "loss": 0.2276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19563168287277222, + "step": 2945, + "valid_targets_mean": 4136.9, + "valid_targets_min": 799 + }, + { + "epoch": 5.462962962962963, + "grad_norm": 0.5157474067382247, + "learning_rate": 5.605508376102504e-06, + "loss": 0.2347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22772985696792603, + "step": 2950, + "valid_targets_mean": 4380.8, + "valid_targets_min": 888 + }, + { + "epoch": 5.472222222222222, + "grad_norm": 0.58770575498854, + "learning_rate": 5.541550310004142e-06, + "loss": 0.2344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2741531729698181, + "step": 2955, + "valid_targets_mean": 4150.1, + "valid_targets_min": 1655 + }, + { + "epoch": 5.481481481481482, + "grad_norm": 0.5259117380247347, + "learning_rate": 5.4779004861032355e-06, + "loss": 0.234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2124425172805786, + "step": 2960, + "valid_targets_mean": 4234.0, + "valid_targets_min": 845 + }, + { + "epoch": 5.4907407407407405, + "grad_norm": 0.5572729756403186, + "learning_rate": 5.414560261361415e-06, + "loss": 0.2323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22375890612602234, + "step": 2965, + "valid_targets_mean": 3794.6, + "valid_targets_min": 1686 + }, + { + "epoch": 5.5, + "grad_norm": 0.5445675255604132, + "learning_rate": 5.351530986139917e-06, + "loss": 0.2388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3014816641807556, + "step": 2970, + "valid_targets_mean": 5170.2, + "valid_targets_min": 1872 + }, + { + "epoch": 5.5092592592592595, + "grad_norm": 0.6466881136928674, + "learning_rate": 5.288814004170804e-06, + "loss": 0.2361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24697977304458618, + "step": 2975, + "valid_targets_mean": 5287.1, + "valid_targets_min": 299 + }, + { + "epoch": 5.518518518518518, + "grad_norm": 0.5769556261883075, + "learning_rate": 5.226410652528293e-06, + "loss": 0.2238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20952610671520233, + "step": 2980, + "valid_targets_mean": 3477.0, + "valid_targets_min": 1758 + }, + { + "epoch": 5.527777777777778, + "grad_norm": 0.5220365367076272, + "learning_rate": 5.164322261600257e-06, + "loss": 0.2405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26157093048095703, + "step": 2985, + "valid_targets_mean": 4698.8, + "valid_targets_min": 672 + }, + { + "epoch": 5.537037037037037, + "grad_norm": 0.5834708312508325, + "learning_rate": 5.102550155059887e-06, + "loss": 0.2394, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24419638514518738, + "step": 2990, + "valid_targets_mean": 3697.9, + "valid_targets_min": 838 + }, + { + "epoch": 5.546296296296296, + "grad_norm": 0.6297100411829605, + "learning_rate": 5.041095649837429e-06, + "loss": 0.2464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25412261486053467, + "step": 2995, + "valid_targets_mean": 3376.1, + "valid_targets_min": 496 + }, + { + "epoch": 5.555555555555555, + "grad_norm": 0.49356981841182485, + "learning_rate": 4.97996005609215e-06, + "loss": 0.2391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30119216442108154, + "step": 3000, + "valid_targets_mean": 5648.1, + "valid_targets_min": 821 + }, + { + "epoch": 5.564814814814815, + "grad_norm": 0.5039612262029771, + "learning_rate": 4.919144677184377e-06, + "loss": 0.2398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21342125535011292, + "step": 3005, + "valid_targets_mean": 4504.8, + "valid_targets_min": 1713 + }, + { + "epoch": 5.574074074074074, + "grad_norm": 0.7773320110901791, + "learning_rate": 4.858650809647718e-06, + "loss": 0.2318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2356467843055725, + "step": 3010, + "valid_targets_mean": 4336.1, + "valid_targets_min": 2210 + }, + { + "epoch": 5.583333333333333, + "grad_norm": 0.53775868558781, + "learning_rate": 4.798479743161443e-06, + "loss": 0.2447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29584944248199463, + "step": 3015, + "valid_targets_mean": 5161.2, + "valid_targets_min": 420 + }, + { + "epoch": 5.592592592592593, + "grad_norm": 0.5399699724215403, + "learning_rate": 4.73863276052295e-06, + "loss": 0.2401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25342124700546265, + "step": 3020, + "valid_targets_mean": 4572.4, + "valid_targets_min": 2055 + }, + { + "epoch": 5.601851851851852, + "grad_norm": 0.559109770350913, + "learning_rate": 4.679111137620442e-06, + "loss": 0.2181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2064143568277359, + "step": 3025, + "valid_targets_mean": 4368.1, + "valid_targets_min": 1771 + }, + { + "epoch": 5.611111111111111, + "grad_norm": 0.5797401297885008, + "learning_rate": 4.619916143405734e-06, + "loss": 0.2441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27342820167541504, + "step": 3030, + "valid_targets_mean": 4007.9, + "valid_targets_min": 641 + }, + { + "epoch": 5.62037037037037, + "grad_norm": 0.5973678981169844, + "learning_rate": 4.561049039867167e-06, + "loss": 0.2214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2151966243982315, + "step": 3035, + "valid_targets_mean": 3154.9, + "valid_targets_min": 1733 + }, + { + "epoch": 5.62962962962963, + "grad_norm": 0.6286394623427501, + "learning_rate": 4.502511082002748e-06, + "loss": 0.2228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21091020107269287, + "step": 3040, + "valid_targets_mean": 3375.3, + "valid_targets_min": 1700 + }, + { + "epoch": 5.638888888888889, + "grad_norm": 0.5797903289542194, + "learning_rate": 4.44430351779334e-06, + "loss": 0.2302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2548823356628418, + "step": 3045, + "valid_targets_mean": 3822.7, + "valid_targets_min": 827 + }, + { + "epoch": 5.648148148148148, + "grad_norm": 0.5395936599039631, + "learning_rate": 4.386427588176121e-06, + "loss": 0.2306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24894863367080688, + "step": 3050, + "valid_targets_mean": 4198.8, + "valid_targets_min": 1272 + }, + { + "epoch": 5.657407407407407, + "grad_norm": 0.5426930111027483, + "learning_rate": 4.328884527018067e-06, + "loss": 0.2312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23337554931640625, + "step": 3055, + "valid_targets_mean": 4351.1, + "valid_targets_min": 2103 + }, + { + "epoch": 5.666666666666667, + "grad_norm": 0.5604242942596068, + "learning_rate": 4.271675561089676e-06, + "loss": 0.224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2611788511276245, + "step": 3060, + "valid_targets_mean": 3893.8, + "valid_targets_min": 708 + }, + { + "epoch": 5.675925925925926, + "grad_norm": 0.5140701008443176, + "learning_rate": 4.214801910038831e-06, + "loss": 0.2221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2084433138370514, + "step": 3065, + "valid_targets_mean": 4512.9, + "valid_targets_min": 2627 + }, + { + "epoch": 5.685185185185185, + "grad_norm": 0.6183694282041834, + "learning_rate": 4.1582647863647565e-06, + "loss": 0.2526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2690351605415344, + "step": 3070, + "valid_targets_mean": 3642.6, + "valid_targets_min": 1908 + }, + { + "epoch": 5.694444444444445, + "grad_norm": 0.5868218733401469, + "learning_rate": 4.102065395392208e-06, + "loss": 0.2299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24894368648529053, + "step": 3075, + "valid_targets_mean": 3544.2, + "valid_targets_min": 647 + }, + { + "epoch": 5.703703703703704, + "grad_norm": 0.5411202176861103, + "learning_rate": 4.04620493524575e-06, + "loss": 0.2404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2515827715396881, + "step": 3080, + "valid_targets_mean": 4001.4, + "valid_targets_min": 1591 + }, + { + "epoch": 5.712962962962963, + "grad_norm": 0.5722421499791819, + "learning_rate": 3.990684596824219e-06, + "loss": 0.2336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2952159643173218, + "step": 3085, + "valid_targets_mean": 4322.0, + "valid_targets_min": 764 + }, + { + "epoch": 5.722222222222222, + "grad_norm": 0.547817757859327, + "learning_rate": 3.93550556377535e-06, + "loss": 0.21, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19061464071273804, + "step": 3090, + "valid_targets_mean": 3755.5, + "valid_targets_min": 472 + }, + { + "epoch": 5.731481481481482, + "grad_norm": 0.6502265294741346, + "learning_rate": 3.880669012470515e-06, + "loss": 0.2148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23853082954883575, + "step": 3095, + "valid_targets_mean": 3397.8, + "valid_targets_min": 880 + }, + { + "epoch": 5.7407407407407405, + "grad_norm": 0.5857577530105953, + "learning_rate": 3.826176111979673e-06, + "loss": 0.2155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22189410030841827, + "step": 3100, + "valid_targets_mean": 4695.8, + "valid_targets_min": 671 + }, + { + "epoch": 5.75, + "grad_norm": 0.5846313843567592, + "learning_rate": 3.7720280240464145e-06, + "loss": 0.2869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23350340127944946, + "step": 3105, + "valid_targets_mean": 3510.4, + "valid_targets_min": 480 + }, + { + "epoch": 5.7592592592592595, + "grad_norm": 0.5158563827646333, + "learning_rate": 3.7182259030632305e-06, + "loss": 0.2355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2879447937011719, + "step": 3110, + "valid_targets_mean": 4997.9, + "valid_targets_min": 637 + }, + { + "epoch": 5.768518518518518, + "grad_norm": 0.5925915812377698, + "learning_rate": 3.6647708960468696e-06, + "loss": 0.2175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17236462235450745, + "step": 3115, + "valid_targets_mean": 3198.4, + "valid_targets_min": 397 + }, + { + "epoch": 5.777777777777778, + "grad_norm": 0.5049665240080519, + "learning_rate": 3.6116641426138933e-06, + "loss": 0.2142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20963430404663086, + "step": 3120, + "valid_targets_mean": 4305.9, + "valid_targets_min": 545 + }, + { + "epoch": 5.787037037037037, + "grad_norm": 0.49581708730857366, + "learning_rate": 3.5589067749564054e-06, + "loss": 0.215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17507000267505646, + "step": 3125, + "valid_targets_mean": 4279.4, + "valid_targets_min": 1984 + }, + { + "epoch": 5.796296296296296, + "grad_norm": 0.533046990001763, + "learning_rate": 3.5064999178178648e-06, + "loss": 0.2437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27857914566993713, + "step": 3130, + "valid_targets_mean": 4669.2, + "valid_targets_min": 2074 + }, + { + "epoch": 5.805555555555555, + "grad_norm": 0.5616028974184716, + "learning_rate": 3.454444688469165e-06, + "loss": 0.2179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2025943398475647, + "step": 3135, + "valid_targets_mean": 3585.2, + "valid_targets_min": 1703 + }, + { + "epoch": 5.814814814814815, + "grad_norm": 0.5499230074351966, + "learning_rate": 3.4027421966847675e-06, + "loss": 0.2032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20543722808361053, + "step": 3140, + "valid_targets_mean": 4170.7, + "valid_targets_min": 1919 + }, + { + "epoch": 5.824074074074074, + "grad_norm": 0.5611834664864449, + "learning_rate": 3.3513935447190595e-06, + "loss": 0.2187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2410752773284912, + "step": 3145, + "valid_targets_mean": 5189.0, + "valid_targets_min": 542 + }, + { + "epoch": 5.833333333333333, + "grad_norm": 0.584145430075303, + "learning_rate": 3.3003998272828676e-06, + "loss": 0.2156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21233102679252625, + "step": 3150, + "valid_targets_mean": 3442.8, + "valid_targets_min": 635 + }, + { + "epoch": 5.842592592592593, + "grad_norm": 0.487543792074454, + "learning_rate": 3.2497621315200958e-06, + "loss": 0.225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19998474419116974, + "step": 3155, + "valid_targets_mean": 4951.9, + "valid_targets_min": 1450 + }, + { + "epoch": 5.851851851851852, + "grad_norm": 0.5178454713647473, + "learning_rate": 3.199481536984572e-06, + "loss": 0.2118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23643583059310913, + "step": 3160, + "valid_targets_mean": 4172.1, + "valid_targets_min": 1883 + }, + { + "epoch": 5.861111111111111, + "grad_norm": 0.5368224607954661, + "learning_rate": 3.149559115617009e-06, + "loss": 0.2243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17453108727931976, + "step": 3165, + "valid_targets_mean": 3588.9, + "valid_targets_min": 818 + }, + { + "epoch": 5.87037037037037, + "grad_norm": 0.5771162580976622, + "learning_rate": 3.099995931722175e-06, + "loss": 0.2149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2502521574497223, + "step": 3170, + "valid_targets_mean": 4165.6, + "valid_targets_min": 2273 + }, + { + "epoch": 5.87962962962963, + "grad_norm": 0.5851573775898512, + "learning_rate": 3.050793041946183e-06, + "loss": 0.2328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30304640531539917, + "step": 3175, + "valid_targets_mean": 4239.8, + "valid_targets_min": 724 + }, + { + "epoch": 5.888888888888889, + "grad_norm": 0.45474856609735226, + "learning_rate": 3.001951495253972e-06, + "loss": 0.2121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19552862644195557, + "step": 3180, + "valid_targets_mean": 4980.6, + "valid_targets_min": 572 + }, + { + "epoch": 5.898148148148148, + "grad_norm": 0.623558442164306, + "learning_rate": 2.953472332906959e-06, + "loss": 0.2413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29056739807128906, + "step": 3185, + "valid_targets_mean": 3711.6, + "valid_targets_min": 1514 + }, + { + "epoch": 5.907407407407407, + "grad_norm": 0.49621126539313987, + "learning_rate": 2.905356588440811e-06, + "loss": 0.2312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2211723029613495, + "step": 3190, + "valid_targets_mean": 5076.4, + "valid_targets_min": 1947 + }, + { + "epoch": 5.916666666666667, + "grad_norm": 0.51673830714577, + "learning_rate": 2.857605287643437e-06, + "loss": 0.2061, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18910914659500122, + "step": 3195, + "valid_targets_mean": 3844.3, + "valid_targets_min": 733 + }, + { + "epoch": 5.925925925925926, + "grad_norm": 0.5638628522537501, + "learning_rate": 2.8102194485331e-06, + "loss": 0.2326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2086801677942276, + "step": 3200, + "valid_targets_mean": 4155.5, + "valid_targets_min": 2057 + }, + { + "epoch": 5.935185185185185, + "grad_norm": 0.5790573290649278, + "learning_rate": 2.763200081336721e-06, + "loss": 0.2335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21793599426746368, + "step": 3205, + "valid_targets_mean": 3484.1, + "valid_targets_min": 368 + }, + { + "epoch": 5.944444444444445, + "grad_norm": 0.6122242849287904, + "learning_rate": 2.7165481884683576e-06, + "loss": 0.2207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2835915982723236, + "step": 3210, + "valid_targets_mean": 3717.0, + "valid_targets_min": 747 + }, + { + "epoch": 5.953703703703704, + "grad_norm": 0.5672715926891396, + "learning_rate": 2.6702647645077973e-06, + "loss": 0.2395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21978811919689178, + "step": 3215, + "valid_targets_mean": 4550.6, + "valid_targets_min": 773 + }, + { + "epoch": 5.962962962962963, + "grad_norm": 0.6351867703165379, + "learning_rate": 2.6243507961793936e-06, + "loss": 0.247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2440010905265808, + "step": 3220, + "valid_targets_mean": 3087.8, + "valid_targets_min": 981 + }, + { + "epoch": 5.972222222222222, + "grad_norm": 0.6513878183905164, + "learning_rate": 2.5788072623309977e-06, + "loss": 0.2241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2559264600276947, + "step": 3225, + "valid_targets_mean": 2916.1, + "valid_targets_min": 317 + }, + { + "epoch": 5.981481481481482, + "grad_norm": 0.519478406666749, + "learning_rate": 2.5336351339131147e-06, + "loss": 0.2064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1981564164161682, + "step": 3230, + "valid_targets_mean": 4137.3, + "valid_targets_min": 1696 + }, + { + "epoch": 5.9907407407407405, + "grad_norm": 0.6242847386183599, + "learning_rate": 2.488835373958185e-06, + "loss": 0.2424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28682032227516174, + "step": 3235, + "valid_targets_mean": 3630.4, + "valid_targets_min": 1887 + }, + { + "epoch": 6.0, + "grad_norm": 0.6825780673728339, + "learning_rate": 2.444408937560059e-06, + "loss": 0.2316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25937968492507935, + "step": 3240, + "valid_targets_mean": 2739.1, + "valid_targets_min": 466 + }, + { + "epoch": 6.0092592592592595, + "grad_norm": 0.539754241133728, + "learning_rate": 2.400356771853651e-06, + "loss": 0.2459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1831408143043518, + "step": 3245, + "valid_targets_mean": 3674.9, + "valid_targets_min": 1668 + }, + { + "epoch": 6.018518518518518, + "grad_norm": 0.46005254806761847, + "learning_rate": 2.3566798159947157e-06, + "loss": 0.2196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20222413539886475, + "step": 3250, + "valid_targets_mean": 4828.1, + "valid_targets_min": 1945 + }, + { + "epoch": 6.027777777777778, + "grad_norm": 0.4444020837622067, + "learning_rate": 2.3133790011398618e-06, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1549903154373169, + "step": 3255, + "valid_targets_mean": 4708.4, + "valid_targets_min": 1799 + }, + { + "epoch": 6.037037037037037, + "grad_norm": 0.5920689398499658, + "learning_rate": 2.2704552504266664e-06, + "loss": 0.2241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25390511751174927, + "step": 3260, + "valid_targets_mean": 3571.1, + "valid_targets_min": 552 + }, + { + "epoch": 6.046296296296297, + "grad_norm": 0.526798188540084, + "learning_rate": 2.2279094789540244e-06, + "loss": 0.2179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23036353290081024, + "step": 3265, + "valid_targets_mean": 4382.3, + "valid_targets_min": 771 + }, + { + "epoch": 6.055555555555555, + "grad_norm": 0.5759163726963563, + "learning_rate": 2.185742593762614e-06, + "loss": 0.2361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25047504901885986, + "step": 3270, + "valid_targets_mean": 3867.2, + "valid_targets_min": 721 + }, + { + "epoch": 6.064814814814815, + "grad_norm": 0.48826580428064326, + "learning_rate": 2.143955493815577e-06, + "loss": 0.2393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2461843639612198, + "step": 3275, + "valid_targets_mean": 5183.8, + "valid_targets_min": 1736 + }, + { + "epoch": 6.074074074074074, + "grad_norm": 0.5210334298756018, + "learning_rate": 2.1025490699793516e-06, + "loss": 0.2211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20328880846500397, + "step": 3280, + "valid_targets_mean": 4089.4, + "valid_targets_min": 986 + }, + { + "epoch": 6.083333333333333, + "grad_norm": 0.5220951505604092, + "learning_rate": 2.0615242050046656e-06, + "loss": 0.2303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19010621309280396, + "step": 3285, + "valid_targets_mean": 4184.5, + "valid_targets_min": 894 + }, + { + "epoch": 6.092592592592593, + "grad_norm": 0.566804464053669, + "learning_rate": 2.020881773507739e-06, + "loss": 0.2381, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25445860624313354, + "step": 3290, + "valid_targets_mean": 4107.2, + "valid_targets_min": 1939 + }, + { + "epoch": 6.101851851851852, + "grad_norm": 0.5330996196349423, + "learning_rate": 1.9806226419516195e-06, + "loss": 0.2296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2357954978942871, + "step": 3295, + "valid_targets_mean": 4824.0, + "valid_targets_min": 574 + }, + { + "epoch": 6.111111111111111, + "grad_norm": 0.5686340488301583, + "learning_rate": 1.9407476686277095e-06, + "loss": 0.2522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22579503059387207, + "step": 3300, + "valid_targets_mean": 4233.5, + "valid_targets_min": 739 + }, + { + "epoch": 6.12037037037037, + "grad_norm": 0.8954557853890478, + "learning_rate": 1.9012577036374936e-06, + "loss": 0.2174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2578815519809723, + "step": 3305, + "valid_targets_mean": 3204.6, + "valid_targets_min": 767 + }, + { + "epoch": 6.12962962962963, + "grad_norm": 0.5650667976761549, + "learning_rate": 1.8621535888743825e-06, + "loss": 0.2392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2621210813522339, + "step": 3310, + "valid_targets_mean": 4207.1, + "valid_targets_min": 2564 + }, + { + "epoch": 6.138888888888889, + "grad_norm": 0.6316650802316781, + "learning_rate": 1.8234361580057802e-06, + "loss": 0.2348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2570689618587494, + "step": 3315, + "valid_targets_mean": 3737.0, + "valid_targets_min": 766 + }, + { + "epoch": 6.148148148148148, + "grad_norm": 0.5704491276714678, + "learning_rate": 1.7851062364553184e-06, + "loss": 0.2394, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22671329975128174, + "step": 3320, + "valid_targets_mean": 3896.4, + "valid_targets_min": 1714 + }, + { + "epoch": 6.157407407407407, + "grad_norm": 0.5984289258264732, + "learning_rate": 1.7471646413852439e-06, + "loss": 0.2415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23753738403320312, + "step": 3325, + "valid_targets_mean": 3671.3, + "valid_targets_min": 1769 + }, + { + "epoch": 6.166666666666667, + "grad_norm": 0.5333521022284862, + "learning_rate": 1.709612181678999e-06, + "loss": 0.2124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22245770692825317, + "step": 3330, + "valid_targets_mean": 4270.4, + "valid_targets_min": 1961 + }, + { + "epoch": 6.175925925925926, + "grad_norm": 0.6230366161491935, + "learning_rate": 1.6724496579239979e-06, + "loss": 0.2226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22154787182807922, + "step": 3335, + "valid_targets_mean": 3099.2, + "valid_targets_min": 629 + }, + { + "epoch": 6.185185185185185, + "grad_norm": 0.6069509329922341, + "learning_rate": 1.6356778623945223e-06, + "loss": 0.2495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.248618945479393, + "step": 3340, + "valid_targets_mean": 3467.0, + "valid_targets_min": 661 + }, + { + "epoch": 6.194444444444445, + "grad_norm": 0.6234149579450843, + "learning_rate": 1.5992975790348642e-06, + "loss": 0.2239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23241698741912842, + "step": 3345, + "valid_targets_mean": 3599.1, + "valid_targets_min": 1668 + }, + { + "epoch": 6.203703703703703, + "grad_norm": 0.6001381504985039, + "learning_rate": 1.5633095834425983e-06, + "loss": 0.2197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19718721508979797, + "step": 3350, + "valid_targets_mean": 3149.3, + "valid_targets_min": 542 + }, + { + "epoch": 6.212962962962963, + "grad_norm": 0.5362275830715411, + "learning_rate": 1.527714642852045e-06, + "loss": 0.2089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19483308494091034, + "step": 3355, + "valid_targets_mean": 3562.2, + "valid_targets_min": 511 + }, + { + "epoch": 6.222222222222222, + "grad_norm": 0.5695773907229609, + "learning_rate": 1.492513516117915e-06, + "loss": 0.2167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20090246200561523, + "step": 3360, + "valid_targets_mean": 3909.8, + "valid_targets_min": 1802 + }, + { + "epoch": 6.231481481481482, + "grad_norm": 0.6005583636288468, + "learning_rate": 1.457706953699145e-06, + "loss": 0.2142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19939187169075012, + "step": 3365, + "valid_targets_mean": 3433.8, + "valid_targets_min": 1569 + }, + { + "epoch": 6.2407407407407405, + "grad_norm": 0.5007154171778021, + "learning_rate": 1.423295697642868e-06, + "loss": 0.2313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21219304203987122, + "step": 3370, + "valid_targets_mean": 4683.0, + "valid_targets_min": 2362 + }, + { + "epoch": 6.25, + "grad_norm": 0.5968890483515067, + "learning_rate": 1.3892804815686312e-06, + "loss": 0.2214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2734060287475586, + "step": 3375, + "valid_targets_mean": 4207.8, + "valid_targets_min": 1660 + }, + { + "epoch": 6.2592592592592595, + "grad_norm": 0.6330168182645113, + "learning_rate": 1.35566203065272e-06, + "loss": 0.2432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22437870502471924, + "step": 3380, + "valid_targets_mean": 3316.6, + "valid_targets_min": 1267 + }, + { + "epoch": 6.268518518518518, + "grad_norm": 0.577655871447151, + "learning_rate": 1.3224410616127292e-06, + "loss": 0.2348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24034109711647034, + "step": 3385, + "valid_targets_mean": 4207.6, + "valid_targets_min": 342 + }, + { + "epoch": 6.277777777777778, + "grad_norm": 0.5856036889391484, + "learning_rate": 1.2896182826922577e-06, + "loss": 0.2339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2569791078567505, + "step": 3390, + "valid_targets_mean": 3788.3, + "valid_targets_min": 1142 + }, + { + "epoch": 6.287037037037037, + "grad_norm": 0.622367825324181, + "learning_rate": 1.2571943936458197e-06, + "loss": 0.2048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18795576691627502, + "step": 3395, + "valid_targets_mean": 3135.8, + "valid_targets_min": 595 + }, + { + "epoch": 6.296296296296296, + "grad_norm": 0.5507538621859848, + "learning_rate": 1.2251700857239412e-06, + "loss": 0.2004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16971886157989502, + "step": 3400, + "valid_targets_mean": 3565.2, + "valid_targets_min": 812 + }, + { + "epoch": 6.305555555555555, + "grad_norm": 0.5709347005687355, + "learning_rate": 1.1935460416583889e-06, + "loss": 0.2236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19665175676345825, + "step": 3405, + "valid_targets_mean": 4646.4, + "valid_targets_min": 2430 + }, + { + "epoch": 6.314814814814815, + "grad_norm": 0.5551338557244925, + "learning_rate": 1.162322935647655e-06, + "loss": 0.2099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23359957337379456, + "step": 3410, + "valid_targets_mean": 3922.3, + "valid_targets_min": 1700 + }, + { + "epoch": 6.324074074074074, + "grad_norm": 0.6081424722887298, + "learning_rate": 1.1315014333425455e-06, + "loss": 0.2288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27564293146133423, + "step": 3415, + "valid_targets_mean": 3644.6, + "valid_targets_min": 672 + }, + { + "epoch": 6.333333333333333, + "grad_norm": 0.6017513819380911, + "learning_rate": 1.101082191832017e-06, + "loss": 0.2185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25283360481262207, + "step": 3420, + "valid_targets_mean": 3448.8, + "valid_targets_min": 580 + }, + { + "epoch": 6.342592592592593, + "grad_norm": 0.6051873506386266, + "learning_rate": 1.0710658596291612e-06, + "loss": 0.2314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2810269594192505, + "step": 3425, + "valid_targets_mean": 4163.6, + "valid_targets_min": 729 + }, + { + "epoch": 6.351851851851852, + "grad_norm": 0.5949919214922811, + "learning_rate": 1.0414530766573661e-06, + "loss": 0.2163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20459462702274323, + "step": 3430, + "valid_targets_mean": 3321.0, + "valid_targets_min": 1666 + }, + { + "epoch": 6.361111111111111, + "grad_norm": 0.5564753367621429, + "learning_rate": 1.0122444742366945e-06, + "loss": 0.2202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23238684237003326, + "step": 3435, + "valid_targets_mean": 4126.9, + "valid_targets_min": 840 + }, + { + "epoch": 6.37037037037037, + "grad_norm": 0.4818681450985929, + "learning_rate": 9.83440675070404e-07, + "loss": 0.2286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2342509627342224, + "step": 3440, + "valid_targets_mean": 5746.8, + "valid_targets_min": 1421 + }, + { + "epoch": 6.37962962962963, + "grad_norm": 0.6306703109771072, + "learning_rate": 9.550422932316938e-07, + "loss": 0.2265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2181071788072586, + "step": 3445, + "valid_targets_mean": 3027.6, + "valid_targets_min": 313 + }, + { + "epoch": 6.388888888888889, + "grad_norm": 0.7109368059368608, + "learning_rate": 9.270499341505901e-07, + "loss": 0.2361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21922603249549866, + "step": 3450, + "valid_targets_mean": 3950.0, + "valid_targets_min": 425 + }, + { + "epoch": 6.398148148148148, + "grad_norm": 0.5748329728377656, + "learning_rate": 8.994641946010474e-07, + "loss": 0.2394, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26003679633140564, + "step": 3455, + "valid_targets_mean": 4081.2, + "valid_targets_min": 764 + }, + { + "epoch": 6.407407407407407, + "grad_norm": 0.6855173336098317, + "learning_rate": 8.722856626882415e-07, + "loss": 0.244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2373206913471222, + "step": 3460, + "valid_targets_mean": 3279.6, + "valid_targets_min": 764 + }, + { + "epoch": 6.416666666666667, + "grad_norm": 0.5616037615887454, + "learning_rate": 8.455149178360012e-07, + "loss": 0.2444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2553655803203583, + "step": 3465, + "valid_targets_mean": 4561.8, + "valid_targets_min": 594 + }, + { + "epoch": 6.425925925925926, + "grad_norm": 0.5276114854080077, + "learning_rate": 8.191525307744896e-07, + "loss": 0.2229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24148723483085632, + "step": 3470, + "valid_targets_mean": 4596.1, + "valid_targets_min": 880 + }, + { + "epoch": 6.435185185185185, + "grad_norm": 0.5535681163787745, + "learning_rate": 7.931990635280052e-07, + "loss": 0.2168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23151087760925293, + "step": 3475, + "valid_targets_mean": 4101.1, + "valid_targets_min": 215 + }, + { + "epoch": 6.444444444444445, + "grad_norm": 0.5364895072322574, + "learning_rate": 7.676550694030172e-07, + "loss": 0.2134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2174125611782074, + "step": 3480, + "valid_targets_mean": 4606.8, + "valid_targets_min": 570 + }, + { + "epoch": 6.453703703703704, + "grad_norm": 0.5101763745085794, + "learning_rate": 7.425210929763738e-07, + "loss": 0.2273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.211885005235672, + "step": 3485, + "valid_targets_mean": 4786.0, + "valid_targets_min": 829 + }, + { + "epoch": 6.462962962962963, + "grad_norm": 0.5954727436115828, + "learning_rate": 7.17797670083673e-07, + "loss": 0.2155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21472561359405518, + "step": 3490, + "valid_targets_mean": 3507.8, + "valid_targets_min": 541 + }, + { + "epoch": 6.472222222222222, + "grad_norm": 0.5173291565619594, + "learning_rate": 6.934853278078635e-07, + "loss": 0.2205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20479455590248108, + "step": 3495, + "valid_targets_mean": 3915.1, + "valid_targets_min": 1907 + }, + { + "epoch": 6.481481481481482, + "grad_norm": 0.5598429190290807, + "learning_rate": 6.695845844679816e-07, + "loss": 0.2146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.265296995639801, + "step": 3500, + "valid_targets_mean": 4297.6, + "valid_targets_min": 569 + }, + { + "epoch": 6.4907407407407405, + "grad_norm": 0.644127732439113, + "learning_rate": 6.460959496081276e-07, + "loss": 0.196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19158616662025452, + "step": 3505, + "valid_targets_mean": 3284.1, + "valid_targets_min": 1857 + }, + { + "epoch": 6.5, + "grad_norm": 0.46580841568793, + "learning_rate": 6.230199239865808e-07, + "loss": 0.2263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16796517372131348, + "step": 3510, + "valid_targets_mean": 4302.2, + "valid_targets_min": 2752 + }, + { + "epoch": 6.5092592592592595, + "grad_norm": 0.5292186252116317, + "learning_rate": 6.003569995651304e-07, + "loss": 0.2106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19200381636619568, + "step": 3515, + "valid_targets_mean": 4388.9, + "valid_targets_min": 512 + }, + { + "epoch": 6.518518518518518, + "grad_norm": 0.5737139503739112, + "learning_rate": 5.781076594986035e-07, + "loss": 0.2173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2559890151023865, + "step": 3520, + "valid_targets_mean": 4729.4, + "valid_targets_min": 743 + }, + { + "epoch": 6.527777777777778, + "grad_norm": 0.601281290007252, + "learning_rate": 5.562723781245316e-07, + "loss": 0.2374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20773258805274963, + "step": 3525, + "valid_targets_mean": 3555.2, + "valid_targets_min": 1591 + }, + { + "epoch": 6.537037037037037, + "grad_norm": 0.5889092918003285, + "learning_rate": 5.348516209530741e-07, + "loss": 0.2463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2901744246482849, + "step": 3530, + "valid_targets_mean": 4212.6, + "valid_targets_min": 616 + }, + { + "epoch": 6.546296296296296, + "grad_norm": 0.5058670706615777, + "learning_rate": 5.13845844657066e-07, + "loss": 0.216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19678425788879395, + "step": 3535, + "valid_targets_mean": 4579.2, + "valid_targets_min": 1761 + }, + { + "epoch": 6.555555555555555, + "grad_norm": 0.6257218263377696, + "learning_rate": 4.93255497062295e-07, + "loss": 0.2402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24043655395507812, + "step": 3540, + "valid_targets_mean": 3257.0, + "valid_targets_min": 647 + }, + { + "epoch": 6.564814814814815, + "grad_norm": 0.5505549796788423, + "learning_rate": 4.730810171379574e-07, + "loss": 0.234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2459501177072525, + "step": 3545, + "valid_targets_mean": 4357.0, + "valid_targets_min": 733 + }, + { + "epoch": 6.574074074074074, + "grad_norm": 0.6028654489237595, + "learning_rate": 4.533228349872887e-07, + "loss": 0.2163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2598051428794861, + "step": 3550, + "valid_targets_mean": 3666.0, + "valid_targets_min": 649 + }, + { + "epoch": 6.583333333333333, + "grad_norm": 0.6185888512407045, + "learning_rate": 4.339813718384056e-07, + "loss": 0.2476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21807453036308289, + "step": 3555, + "valid_targets_mean": 3160.6, + "valid_targets_min": 513 + }, + { + "epoch": 6.592592592592593, + "grad_norm": 0.6008888131850063, + "learning_rate": 4.1505704003531155e-07, + "loss": 0.2289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2528533339500427, + "step": 3560, + "valid_targets_mean": 3871.8, + "valid_targets_min": 582 + }, + { + "epoch": 6.601851851851852, + "grad_norm": 0.5898425554611046, + "learning_rate": 3.965502430291235e-07, + "loss": 0.2291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2124367356300354, + "step": 3565, + "valid_targets_mean": 4025.0, + "valid_targets_min": 813 + }, + { + "epoch": 6.611111111111111, + "grad_norm": 0.5495965363011839, + "learning_rate": 3.784613753694566e-07, + "loss": 0.2235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2144167423248291, + "step": 3570, + "valid_targets_mean": 4046.5, + "valid_targets_min": 991 + }, + { + "epoch": 6.62037037037037, + "grad_norm": 0.6165423333917514, + "learning_rate": 3.607908226960155e-07, + "loss": 0.2126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21597029268741608, + "step": 3575, + "valid_targets_mean": 3308.0, + "valid_targets_min": 566 + }, + { + "epoch": 6.62962962962963, + "grad_norm": 0.45236188898394386, + "learning_rate": 3.4353896173038524e-07, + "loss": 0.2282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2370537519454956, + "step": 3580, + "valid_targets_mean": 5711.6, + "valid_targets_min": 670 + }, + { + "epoch": 6.638888888888889, + "grad_norm": 0.56933040396151, + "learning_rate": 3.2670616026797776e-07, + "loss": 0.2117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1997252255678177, + "step": 3585, + "valid_targets_mean": 3777.3, + "valid_targets_min": 818 + }, + { + "epoch": 6.648148148148148, + "grad_norm": 0.5659167186719102, + "learning_rate": 3.102927771702091e-07, + "loss": 0.2384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20919036865234375, + "step": 3590, + "valid_targets_mean": 3730.8, + "valid_targets_min": 770 + }, + { + "epoch": 6.657407407407407, + "grad_norm": 0.5079303353870578, + "learning_rate": 2.942991623568436e-07, + "loss": 0.2238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19151973724365234, + "step": 3595, + "valid_targets_mean": 4735.9, + "valid_targets_min": 2372 + }, + { + "epoch": 6.666666666666667, + "grad_norm": 0.5137082597472087, + "learning_rate": 2.7872565679852414e-07, + "loss": 0.2424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2293935865163803, + "step": 3600, + "valid_targets_mean": 4664.9, + "valid_targets_min": 2201 + }, + { + "epoch": 6.675925925925926, + "grad_norm": 0.5409545059748535, + "learning_rate": 2.635725925095245e-07, + "loss": 0.2289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23130936920642853, + "step": 3605, + "valid_targets_mean": 4416.5, + "valid_targets_min": 1904 + }, + { + "epoch": 6.685185185185185, + "grad_norm": 0.555324535146906, + "learning_rate": 2.4884029254064636e-07, + "loss": 0.2302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21047340333461761, + "step": 3610, + "valid_targets_mean": 3865.1, + "valid_targets_min": 2178 + }, + { + "epoch": 6.694444444444445, + "grad_norm": 0.4829255723553718, + "learning_rate": 2.3452907097235355e-07, + "loss": 0.2091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1802108883857727, + "step": 3615, + "valid_targets_mean": 3995.1, + "valid_targets_min": 764 + }, + { + "epoch": 6.703703703703704, + "grad_norm": 0.5450225699083614, + "learning_rate": 2.2063923290805756e-07, + "loss": 0.2062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21498540043830872, + "step": 3620, + "valid_targets_mean": 3961.8, + "valid_targets_min": 1708 + }, + { + "epoch": 6.712962962962963, + "grad_norm": 0.5695770354778458, + "learning_rate": 2.0717107446762696e-07, + "loss": 0.2023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19042396545410156, + "step": 3625, + "valid_targets_mean": 3396.2, + "valid_targets_min": 665 + }, + { + "epoch": 6.722222222222222, + "grad_norm": 0.5764051828461008, + "learning_rate": 1.9412488278107044e-07, + "loss": 0.1986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19108936190605164, + "step": 3630, + "valid_targets_mean": 3522.6, + "valid_targets_min": 1445 + }, + { + "epoch": 6.731481481481482, + "grad_norm": 0.5141025615330546, + "learning_rate": 1.8150093598240825e-07, + "loss": 0.221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2816708981990814, + "step": 3635, + "valid_targets_mean": 5298.4, + "valid_targets_min": 1615 + }, + { + "epoch": 6.7407407407407405, + "grad_norm": 0.5516459278281802, + "learning_rate": 1.69299503203757e-07, + "loss": 0.2296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20504800975322723, + "step": 3640, + "valid_targets_mean": 3981.1, + "valid_targets_min": 2114 + }, + { + "epoch": 6.75, + "grad_norm": 0.4418946934572028, + "learning_rate": 1.5752084456957416e-07, + "loss": 0.2515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24999968707561493, + "step": 3645, + "valid_targets_mean": 5980.2, + "valid_targets_min": 2761 + }, + { + "epoch": 6.7592592592592595, + "grad_norm": 0.5163142937241847, + "learning_rate": 1.4616521119112937e-07, + "loss": 0.2332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2180902659893036, + "step": 3650, + "valid_targets_mean": 4460.1, + "valid_targets_min": 1782 + }, + { + "epoch": 6.768518518518518, + "grad_norm": 0.5275315251769807, + "learning_rate": 1.3523284516113955e-07, + "loss": 0.2103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2362240105867386, + "step": 3655, + "valid_targets_mean": 4463.1, + "valid_targets_min": 856 + }, + { + "epoch": 6.777777777777778, + "grad_norm": 0.5932303316312726, + "learning_rate": 1.2472397954861549e-07, + "loss": 0.2442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25914400815963745, + "step": 3660, + "valid_targets_mean": 4156.2, + "valid_targets_min": 1887 + }, + { + "epoch": 6.787037037037037, + "grad_norm": 0.5704356783239624, + "learning_rate": 1.1463883839388346e-07, + "loss": 0.2125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29468631744384766, + "step": 3665, + "valid_targets_mean": 4419.7, + "valid_targets_min": 836 + }, + { + "epoch": 6.796296296296296, + "grad_norm": 0.6176484972834143, + "learning_rate": 1.0497763670382022e-07, + "loss": 0.2039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24018700420856476, + "step": 3670, + "valid_targets_mean": 3530.4, + "valid_targets_min": 368 + }, + { + "epoch": 6.805555555555555, + "grad_norm": 0.5827116087848868, + "learning_rate": 9.574058044725665e-08, + "loss": 0.2377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2357904613018036, + "step": 3675, + "valid_targets_mean": 3568.0, + "valid_targets_min": 2129 + }, + { + "epoch": 6.814814814814815, + "grad_norm": 0.5334174149912604, + "learning_rate": 8.692786655060348e-08, + "loss": 0.2446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27739739418029785, + "step": 3680, + "valid_targets_mean": 4644.2, + "valid_targets_min": 597 + }, + { + "epoch": 6.824074074074074, + "grad_norm": 0.5908537050626661, + "learning_rate": 7.853968289363245e-08, + "loss": 0.1994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1957089900970459, + "step": 3685, + "valid_targets_mean": 4683.8, + "valid_targets_min": 2043 + }, + { + "epoch": 6.833333333333333, + "grad_norm": 0.5260600630535545, + "learning_rate": 7.057620830548617e-08, + "loss": 0.2024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18605807423591614, + "step": 3690, + "valid_targets_mean": 4110.4, + "valid_targets_min": 1942 + }, + { + "epoch": 6.842592592592593, + "grad_norm": 0.5916196044721515, + "learning_rate": 6.30376125608656e-08, + "loss": 0.2371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2218480408191681, + "step": 3695, + "valid_targets_mean": 3644.4, + "valid_targets_min": 570 + }, + { + "epoch": 6.851851851851852, + "grad_norm": 0.514705393546641, + "learning_rate": 5.592405637639742e-08, + "loss": 0.2319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20492291450500488, + "step": 3700, + "valid_targets_mean": 4558.1, + "valid_targets_min": 2073 + }, + { + "epoch": 6.861111111111111, + "grad_norm": 0.4524372951551533, + "learning_rate": 4.923569140722118e-08, + "loss": 0.2241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28827571868896484, + "step": 3705, + "valid_targets_mean": 6742.3, + "valid_targets_min": 1989 + }, + { + "epoch": 6.87037037037037, + "grad_norm": 0.5258479700135631, + "learning_rate": 4.2972660243749686e-08, + "loss": 0.231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1960398554801941, + "step": 3710, + "valid_targets_mean": 4020.9, + "valid_targets_min": 879 + }, + { + "epoch": 6.87962962962963, + "grad_norm": 0.5383620822998924, + "learning_rate": 3.7135096408631443e-08, + "loss": 0.2237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17605650424957275, + "step": 3715, + "valid_targets_mean": 3673.5, + "valid_targets_min": 892 + }, + { + "epoch": 6.888888888888889, + "grad_norm": 0.5022673424281345, + "learning_rate": 3.172312435390401e-08, + "loss": 0.2181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21586468815803528, + "step": 3720, + "valid_targets_mean": 4826.8, + "valid_targets_min": 1808 + }, + { + "epoch": 6.898148148148148, + "grad_norm": 0.5150139357949609, + "learning_rate": 2.673685945833615e-08, + "loss": 0.2383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24277575314044952, + "step": 3725, + "valid_targets_mean": 5270.2, + "valid_targets_min": 2004 + }, + { + "epoch": 6.907407407407407, + "grad_norm": 0.512483831039239, + "learning_rate": 2.2176408024974228e-08, + "loss": 0.2511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23216493427753448, + "step": 3730, + "valid_targets_mean": 4509.7, + "valid_targets_min": 1745 + }, + { + "epoch": 6.916666666666667, + "grad_norm": 0.5381090256017264, + "learning_rate": 1.8041867278875137e-08, + "loss": 0.2299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19807547330856323, + "step": 3735, + "valid_targets_mean": 4138.4, + "valid_targets_min": 779 + }, + { + "epoch": 6.925925925925926, + "grad_norm": 0.49323585595517727, + "learning_rate": 1.4333325365030181e-08, + "loss": 0.2223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21127669513225555, + "step": 3740, + "valid_targets_mean": 5065.2, + "valid_targets_min": 1890 + }, + { + "epoch": 6.935185185185185, + "grad_norm": 0.555488870581542, + "learning_rate": 1.1050861346488806e-08, + "loss": 0.2293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20611155033111572, + "step": 3745, + "valid_targets_mean": 3845.6, + "valid_targets_min": 723 + }, + { + "epoch": 6.944444444444445, + "grad_norm": 0.5336086789856043, + "learning_rate": 8.194545202666604e-09, + "loss": 0.2392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1871817260980606, + "step": 3750, + "valid_targets_mean": 3598.5, + "valid_targets_min": 849 + }, + { + "epoch": 6.953703703703704, + "grad_norm": 0.6088047301014547, + "learning_rate": 5.76443782786873e-09, + "loss": 0.2481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22356441617012024, + "step": 3755, + "valid_targets_mean": 3994.5, + "valid_targets_min": 1869 + }, + { + "epoch": 6.962962962962963, + "grad_norm": 0.5623518574868789, + "learning_rate": 3.760591029973171e-09, + "loss": 0.225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21949899196624756, + "step": 3760, + "valid_targets_mean": 3838.9, + "valid_targets_min": 928 + }, + { + "epoch": 6.972222222222222, + "grad_norm": 0.6022551873866897, + "learning_rate": 2.1830475293360686e-09, + "loss": 0.2255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22909459471702576, + "step": 3765, + "valid_targets_mean": 3225.6, + "valid_targets_min": 656 + }, + { + "epoch": 6.981481481481482, + "grad_norm": 0.5183031403584336, + "learning_rate": 1.0318409578835564e-09, + "loss": 0.2227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2013653814792633, + "step": 3770, + "valid_targets_mean": 4304.2, + "valid_targets_min": 446 + }, + { + "epoch": 6.9907407407407405, + "grad_norm": 0.5846907653612194, + "learning_rate": 3.069958583856725e-10, + "loss": 0.2175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23626720905303955, + "step": 3775, + "valid_targets_mean": 3642.6, + "valid_targets_min": 1138 + }, + { + "epoch": 7.0, + "grad_norm": 0.5911749795882507, + "learning_rate": 8.527683943437837e-12, + "loss": 0.2341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23940664529800415, + "step": 3780, + "valid_targets_mean": 4033.5, + "valid_targets_min": 2079 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23940664529800415, + "step": 3780, + "total_flos": 1019495884914688.0, + "train_loss": 0.14474489343544794, + "train_runtime": 12189.5389, + "train_samples_per_second": 4.959, + "train_steps_per_second": 0.31, + "valid_targets_mean": 4033.5, + "valid_targets_min": 2079 + } + ], + "logging_steps": 5, + "max_steps": 3780, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 1500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1019495884914688.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}