a1-crosscodeeval_python / trainer_state.json
EtashGuha's picture
Upload folder using huggingface_hub
8ef8615 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.0,
"eval_steps": 500,
"global_step": 3780,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.009259259259259259,
"grad_norm": 19.112235093128195,
"learning_rate": 4.232804232804233e-07,
"loss": 0.8275,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.803094744682312,
"step": 5,
"valid_targets_mean": 3444.3,
"valid_targets_min": 828
},
{
"epoch": 0.018518518518518517,
"grad_norm": 17.940707174937973,
"learning_rate": 9.523809523809525e-07,
"loss": 0.7963,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.8306764960289001,
"step": 10,
"valid_targets_mean": 4536.1,
"valid_targets_min": 1670
},
{
"epoch": 0.027777777777777776,
"grad_norm": 15.664620071655396,
"learning_rate": 1.4814814814814815e-06,
"loss": 0.7903,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.8159181475639343,
"step": 15,
"valid_targets_mean": 3574.7,
"valid_targets_min": 435
},
{
"epoch": 0.037037037037037035,
"grad_norm": 10.770635827936216,
"learning_rate": 2.0105820105820108e-06,
"loss": 0.7797,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.7561392188072205,
"step": 20,
"valid_targets_mean": 4583.4,
"valid_targets_min": 577
},
{
"epoch": 0.046296296296296294,
"grad_norm": 7.426416040771228,
"learning_rate": 2.53968253968254e-06,
"loss": 0.7281,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.7589821815490723,
"step": 25,
"valid_targets_mean": 3544.4,
"valid_targets_min": 763
},
{
"epoch": 0.05555555555555555,
"grad_norm": 3.713460240688579,
"learning_rate": 3.068783068783069e-06,
"loss": 0.669,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6719295978546143,
"step": 30,
"valid_targets_mean": 3903.3,
"valid_targets_min": 697
},
{
"epoch": 0.06481481481481481,
"grad_norm": 2.1703955297067186,
"learning_rate": 3.597883597883598e-06,
"loss": 0.5837,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5826764106750488,
"step": 35,
"valid_targets_mean": 3806.2,
"valid_targets_min": 1937
},
{
"epoch": 0.07407407407407407,
"grad_norm": 1.8165232850431967,
"learning_rate": 4.126984126984127e-06,
"loss": 0.6208,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6975480318069458,
"step": 40,
"valid_targets_mean": 3366.0,
"valid_targets_min": 878
},
{
"epoch": 0.08333333333333333,
"grad_norm": 1.3433617476391144,
"learning_rate": 4.656084656084656e-06,
"loss": 0.5655,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5773887634277344,
"step": 45,
"valid_targets_mean": 4220.3,
"valid_targets_min": 724
},
{
"epoch": 0.09259259259259259,
"grad_norm": 1.1527914357135887,
"learning_rate": 5.185185185185185e-06,
"loss": 0.5912,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6037584543228149,
"step": 50,
"valid_targets_mean": 4641.8,
"valid_targets_min": 2175
},
{
"epoch": 0.10185185185185185,
"grad_norm": 0.9246044784239899,
"learning_rate": 5.7142857142857145e-06,
"loss": 0.601,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5939217805862427,
"step": 55,
"valid_targets_mean": 4679.5,
"valid_targets_min": 663
},
{
"epoch": 0.1111111111111111,
"grad_norm": 0.8146470542392132,
"learning_rate": 6.243386243386243e-06,
"loss": 0.5341,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5055123567581177,
"step": 60,
"valid_targets_mean": 3902.7,
"valid_targets_min": 923
},
{
"epoch": 0.12037037037037036,
"grad_norm": 0.7976083063634939,
"learning_rate": 6.772486772486773e-06,
"loss": 0.532,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4894016683101654,
"step": 65,
"valid_targets_mean": 3446.8,
"valid_targets_min": 556
},
{
"epoch": 0.12962962962962962,
"grad_norm": 0.7321082473641598,
"learning_rate": 7.301587301587301e-06,
"loss": 0.5207,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.46321815252304077,
"step": 70,
"valid_targets_mean": 4182.4,
"valid_targets_min": 2033
},
{
"epoch": 0.1388888888888889,
"grad_norm": 0.7291697059938048,
"learning_rate": 7.830687830687831e-06,
"loss": 0.512,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5047429800033569,
"step": 75,
"valid_targets_mean": 3302.7,
"valid_targets_min": 627
},
{
"epoch": 0.14814814814814814,
"grad_norm": 0.7739549159047908,
"learning_rate": 8.35978835978836e-06,
"loss": 0.4734,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.47949308156967163,
"step": 80,
"valid_targets_mean": 3213.7,
"valid_targets_min": 1752
},
{
"epoch": 0.1574074074074074,
"grad_norm": 0.7111733388587708,
"learning_rate": 8.888888888888888e-06,
"loss": 0.4823,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.44027847051620483,
"step": 85,
"valid_targets_mean": 3505.9,
"valid_targets_min": 1994
},
{
"epoch": 0.16666666666666666,
"grad_norm": 0.6831309764350063,
"learning_rate": 9.417989417989418e-06,
"loss": 0.4392,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.40418338775634766,
"step": 90,
"valid_targets_mean": 3209.4,
"valid_targets_min": 512
},
{
"epoch": 0.17592592592592593,
"grad_norm": 0.6213729941542423,
"learning_rate": 9.947089947089947e-06,
"loss": 0.4356,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.43431776762008667,
"step": 95,
"valid_targets_mean": 3941.1,
"valid_targets_min": 723
},
{
"epoch": 0.18518518518518517,
"grad_norm": 0.7201560667011954,
"learning_rate": 1.0476190476190477e-05,
"loss": 0.4769,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5052137970924377,
"step": 100,
"valid_targets_mean": 3591.6,
"valid_targets_min": 880
},
{
"epoch": 0.19444444444444445,
"grad_norm": 0.6092157523393694,
"learning_rate": 1.1005291005291006e-05,
"loss": 0.4378,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.41137802600860596,
"step": 105,
"valid_targets_mean": 3784.0,
"valid_targets_min": 546
},
{
"epoch": 0.2037037037037037,
"grad_norm": 0.6962357662281705,
"learning_rate": 1.1534391534391536e-05,
"loss": 0.4424,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5407352447509766,
"step": 110,
"valid_targets_mean": 4280.3,
"valid_targets_min": 764
},
{
"epoch": 0.21296296296296297,
"grad_norm": 0.6281974355729566,
"learning_rate": 1.2063492063492064e-05,
"loss": 0.4527,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4453981816768646,
"step": 115,
"valid_targets_mean": 3953.9,
"valid_targets_min": 1669
},
{
"epoch": 0.2222222222222222,
"grad_norm": 0.618723801537215,
"learning_rate": 1.2592592592592593e-05,
"loss": 0.4168,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3996565639972687,
"step": 120,
"valid_targets_mean": 4443.4,
"valid_targets_min": 1748
},
{
"epoch": 0.23148148148148148,
"grad_norm": 0.6419954427627002,
"learning_rate": 1.3121693121693123e-05,
"loss": 0.4195,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4261256754398346,
"step": 125,
"valid_targets_mean": 4298.9,
"valid_targets_min": 2102
},
{
"epoch": 0.24074074074074073,
"grad_norm": 0.8161990345008757,
"learning_rate": 1.3650793650793652e-05,
"loss": 0.4461,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5341705083847046,
"step": 130,
"valid_targets_mean": 4825.4,
"valid_targets_min": 1620
},
{
"epoch": 0.25,
"grad_norm": 0.8509610334596335,
"learning_rate": 1.417989417989418e-05,
"loss": 0.436,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.44645223021507263,
"step": 135,
"valid_targets_mean": 4780.1,
"valid_targets_min": 789
},
{
"epoch": 0.25925925925925924,
"grad_norm": 0.5904975210645568,
"learning_rate": 1.470899470899471e-05,
"loss": 0.4288,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.40643012523651123,
"step": 140,
"valid_targets_mean": 4096.2,
"valid_targets_min": 541
},
{
"epoch": 0.26851851851851855,
"grad_norm": 0.6739039533538748,
"learning_rate": 1.523809523809524e-05,
"loss": 0.389,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4152180552482605,
"step": 145,
"valid_targets_mean": 4372.8,
"valid_targets_min": 1808
},
{
"epoch": 0.2777777777777778,
"grad_norm": 0.6677022093982549,
"learning_rate": 1.576719576719577e-05,
"loss": 0.3913,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.36584043502807617,
"step": 150,
"valid_targets_mean": 3181.2,
"valid_targets_min": 892
},
{
"epoch": 0.28703703703703703,
"grad_norm": 0.6028158751437971,
"learning_rate": 1.6296296296296297e-05,
"loss": 0.4025,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3777807652950287,
"step": 155,
"valid_targets_mean": 3913.8,
"valid_targets_min": 1488
},
{
"epoch": 0.2962962962962963,
"grad_norm": 0.5663817438154167,
"learning_rate": 1.6825396825396828e-05,
"loss": 0.433,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.42696279287338257,
"step": 160,
"valid_targets_mean": 4789.0,
"valid_targets_min": 1714
},
{
"epoch": 0.3055555555555556,
"grad_norm": 0.7015646909300565,
"learning_rate": 1.7354497354497356e-05,
"loss": 0.4013,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.37903106212615967,
"step": 165,
"valid_targets_mean": 4173.1,
"valid_targets_min": 520
},
{
"epoch": 0.3148148148148148,
"grad_norm": 0.6234945858741571,
"learning_rate": 1.7883597883597884e-05,
"loss": 0.3997,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3800799548625946,
"step": 170,
"valid_targets_mean": 4546.9,
"valid_targets_min": 1768
},
{
"epoch": 0.32407407407407407,
"grad_norm": 0.598847110558081,
"learning_rate": 1.8412698412698415e-05,
"loss": 0.3828,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3486472964286804,
"step": 175,
"valid_targets_mean": 5024.6,
"valid_targets_min": 1684
},
{
"epoch": 0.3333333333333333,
"grad_norm": 0.850399133293019,
"learning_rate": 1.8941798941798943e-05,
"loss": 0.3629,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3398283123970032,
"step": 180,
"valid_targets_mean": 4075.1,
"valid_targets_min": 1613
},
{
"epoch": 0.3425925925925926,
"grad_norm": 0.6301928805898117,
"learning_rate": 1.947089947089947e-05,
"loss": 0.4053,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28791162371635437,
"step": 185,
"valid_targets_mean": 3739.5,
"valid_targets_min": 790
},
{
"epoch": 0.35185185185185186,
"grad_norm": 0.6696380315329734,
"learning_rate": 2e-05,
"loss": 0.3974,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4022134840488434,
"step": 190,
"valid_targets_mean": 4268.4,
"valid_targets_min": 1592
},
{
"epoch": 0.3611111111111111,
"grad_norm": 0.707972099980987,
"learning_rate": 2.0529100529100533e-05,
"loss": 0.4258,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.407692015171051,
"step": 195,
"valid_targets_mean": 3323.2,
"valid_targets_min": 721
},
{
"epoch": 0.37037037037037035,
"grad_norm": 0.6176399796412881,
"learning_rate": 2.105820105820106e-05,
"loss": 0.3572,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32153958082199097,
"step": 200,
"valid_targets_mean": 3555.6,
"valid_targets_min": 840
},
{
"epoch": 0.37962962962962965,
"grad_norm": 0.625992536194421,
"learning_rate": 2.158730158730159e-05,
"loss": 0.3921,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.49598437547683716,
"step": 205,
"valid_targets_mean": 5771.6,
"valid_targets_min": 675
},
{
"epoch": 0.3888888888888889,
"grad_norm": 0.6818922510280604,
"learning_rate": 2.211640211640212e-05,
"loss": 0.3721,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.39517930150032043,
"step": 210,
"valid_targets_mean": 3803.9,
"valid_targets_min": 523
},
{
"epoch": 0.39814814814814814,
"grad_norm": 0.5873676150067556,
"learning_rate": 2.2645502645502648e-05,
"loss": 0.3945,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3694095015525818,
"step": 215,
"valid_targets_mean": 4309.9,
"valid_targets_min": 1808
},
{
"epoch": 0.4074074074074074,
"grad_norm": 0.5984897505965682,
"learning_rate": 2.317460317460318e-05,
"loss": 0.3735,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3190292418003082,
"step": 220,
"valid_targets_mean": 4658.9,
"valid_targets_min": 1714
},
{
"epoch": 0.4166666666666667,
"grad_norm": 0.6652241547692664,
"learning_rate": 2.3703703703703703e-05,
"loss": 0.3656,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3782808184623718,
"step": 225,
"valid_targets_mean": 4637.1,
"valid_targets_min": 1662
},
{
"epoch": 0.42592592592592593,
"grad_norm": 0.6773269861053436,
"learning_rate": 2.4232804232804234e-05,
"loss": 0.3476,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3852205276489258,
"step": 230,
"valid_targets_mean": 3920.9,
"valid_targets_min": 1620
},
{
"epoch": 0.4351851851851852,
"grad_norm": 0.6294988371468406,
"learning_rate": 2.4761904761904766e-05,
"loss": 0.3577,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.38655975461006165,
"step": 235,
"valid_targets_mean": 4567.7,
"valid_targets_min": 2256
},
{
"epoch": 0.4444444444444444,
"grad_norm": 0.7066702282132421,
"learning_rate": 2.5291005291005294e-05,
"loss": 0.3836,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.42079806327819824,
"step": 240,
"valid_targets_mean": 3376.9,
"valid_targets_min": 708
},
{
"epoch": 0.4537037037037037,
"grad_norm": 0.6424177932025107,
"learning_rate": 2.582010582010582e-05,
"loss": 0.3315,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3230123519897461,
"step": 245,
"valid_targets_mean": 3249.8,
"valid_targets_min": 551
},
{
"epoch": 0.46296296296296297,
"grad_norm": 0.6455853042915125,
"learning_rate": 2.6349206349206353e-05,
"loss": 0.3953,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4580186605453491,
"step": 250,
"valid_targets_mean": 4673.4,
"valid_targets_min": 1703
},
{
"epoch": 0.4722222222222222,
"grad_norm": 0.5916132534087772,
"learning_rate": 2.687830687830688e-05,
"loss": 0.3799,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.36162281036376953,
"step": 255,
"valid_targets_mean": 3973.9,
"valid_targets_min": 2418
},
{
"epoch": 0.48148148148148145,
"grad_norm": 0.5577346524553856,
"learning_rate": 2.740740740740741e-05,
"loss": 0.3443,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2668542265892029,
"step": 260,
"valid_targets_mean": 4436.5,
"valid_targets_min": 591
},
{
"epoch": 0.49074074074074076,
"grad_norm": 0.6414361548008731,
"learning_rate": 2.7936507936507936e-05,
"loss": 0.3806,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3715950846672058,
"step": 265,
"valid_targets_mean": 4098.6,
"valid_targets_min": 2238
},
{
"epoch": 0.5,
"grad_norm": 0.5856940202703296,
"learning_rate": 2.8465608465608467e-05,
"loss": 0.3926,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4181647002696991,
"step": 270,
"valid_targets_mean": 5686.6,
"valid_targets_min": 932
},
{
"epoch": 0.5092592592592593,
"grad_norm": 0.632615688558668,
"learning_rate": 2.8994708994709e-05,
"loss": 0.3687,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.33307045698165894,
"step": 275,
"valid_targets_mean": 4060.1,
"valid_targets_min": 595
},
{
"epoch": 0.5185185185185185,
"grad_norm": 0.6599712643815261,
"learning_rate": 2.9523809523809526e-05,
"loss": 0.3474,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.40192732214927673,
"step": 280,
"valid_targets_mean": 3505.5,
"valid_targets_min": 721
},
{
"epoch": 0.5277777777777778,
"grad_norm": 0.7871824525334413,
"learning_rate": 3.0052910052910054e-05,
"loss": 0.3811,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.37350624799728394,
"step": 285,
"valid_targets_mean": 4021.2,
"valid_targets_min": 480
},
{
"epoch": 0.5370370370370371,
"grad_norm": 0.5941203905483521,
"learning_rate": 3.058201058201058e-05,
"loss": 0.3806,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.34662193059921265,
"step": 290,
"valid_targets_mean": 4111.9,
"valid_targets_min": 849
},
{
"epoch": 0.5462962962962963,
"grad_norm": 0.6832466030128974,
"learning_rate": 3.111111111111112e-05,
"loss": 0.3809,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.35143667459487915,
"step": 295,
"valid_targets_mean": 3446.2,
"valid_targets_min": 567
},
{
"epoch": 0.5555555555555556,
"grad_norm": 0.5652043089590578,
"learning_rate": 3.1640211640211645e-05,
"loss": 0.4166,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.39690762758255005,
"step": 300,
"valid_targets_mean": 4713.9,
"valid_targets_min": 772
},
{
"epoch": 0.5648148148148148,
"grad_norm": 0.7212363216251159,
"learning_rate": 3.216931216931217e-05,
"loss": 0.3699,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3936673402786255,
"step": 305,
"valid_targets_mean": 3249.8,
"valid_targets_min": 528
},
{
"epoch": 0.5740740740740741,
"grad_norm": 0.5825429131593575,
"learning_rate": 3.26984126984127e-05,
"loss": 0.3738,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.37310439348220825,
"step": 310,
"valid_targets_mean": 5273.2,
"valid_targets_min": 496
},
{
"epoch": 0.5833333333333334,
"grad_norm": 0.5872360398885519,
"learning_rate": 3.322751322751323e-05,
"loss": 0.3407,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.35499951243400574,
"step": 315,
"valid_targets_mean": 5564.2,
"valid_targets_min": 2044
},
{
"epoch": 0.5925925925925926,
"grad_norm": 0.7151624805661932,
"learning_rate": 3.375661375661376e-05,
"loss": 0.3616,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3740341365337372,
"step": 320,
"valid_targets_mean": 4010.5,
"valid_targets_min": 1007
},
{
"epoch": 0.6018518518518519,
"grad_norm": 0.7663314912022873,
"learning_rate": 3.4285714285714284e-05,
"loss": 0.3501,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.37295448780059814,
"step": 325,
"valid_targets_mean": 3384.2,
"valid_targets_min": 803
},
{
"epoch": 0.6111111111111112,
"grad_norm": 0.7089838515813951,
"learning_rate": 3.481481481481482e-05,
"loss": 0.3472,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.33399441838264465,
"step": 330,
"valid_targets_mean": 5579.9,
"valid_targets_min": 1870
},
{
"epoch": 0.6203703703703703,
"grad_norm": 0.5776773939398085,
"learning_rate": 3.5343915343915346e-05,
"loss": 0.3353,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3364108204841614,
"step": 335,
"valid_targets_mean": 4555.3,
"valid_targets_min": 1137
},
{
"epoch": 0.6296296296296297,
"grad_norm": 0.6642731154701934,
"learning_rate": 3.5873015873015874e-05,
"loss": 0.3497,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3789205551147461,
"step": 340,
"valid_targets_mean": 3312.5,
"valid_targets_min": 943
},
{
"epoch": 0.6388888888888888,
"grad_norm": 0.6054739369110539,
"learning_rate": 3.64021164021164e-05,
"loss": 0.3283,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3263542652130127,
"step": 345,
"valid_targets_mean": 4468.6,
"valid_targets_min": 2434
},
{
"epoch": 0.6481481481481481,
"grad_norm": 0.6874656720878914,
"learning_rate": 3.6931216931216936e-05,
"loss": 0.3563,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3052312731742859,
"step": 350,
"valid_targets_mean": 3007.6,
"valid_targets_min": 702
},
{
"epoch": 0.6574074074074074,
"grad_norm": 0.6507760256018081,
"learning_rate": 3.7460317460317464e-05,
"loss": 0.3617,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3832956552505493,
"step": 355,
"valid_targets_mean": 3906.8,
"valid_targets_min": 726
},
{
"epoch": 0.6666666666666666,
"grad_norm": 0.5435451628510831,
"learning_rate": 3.798941798941799e-05,
"loss": 0.3757,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.39122748374938965,
"step": 360,
"valid_targets_mean": 5430.8,
"valid_targets_min": 327
},
{
"epoch": 0.6759259259259259,
"grad_norm": 0.5644022877423189,
"learning_rate": 3.851851851851852e-05,
"loss": 0.3777,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32652783393859863,
"step": 365,
"valid_targets_mean": 4897.6,
"valid_targets_min": 1720
},
{
"epoch": 0.6851851851851852,
"grad_norm": 0.6380722458933044,
"learning_rate": 3.904761904761905e-05,
"loss": 0.3161,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.340414822101593,
"step": 370,
"valid_targets_mean": 3580.2,
"valid_targets_min": 757
},
{
"epoch": 0.6944444444444444,
"grad_norm": 0.5617778297985487,
"learning_rate": 3.957671957671958e-05,
"loss": 0.3299,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2890230715274811,
"step": 375,
"valid_targets_mean": 3849.2,
"valid_targets_min": 1728
},
{
"epoch": 0.7037037037037037,
"grad_norm": 0.7391134176193144,
"learning_rate": 3.999999147231606e-05,
"loss": 0.3425,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3627009093761444,
"step": 380,
"valid_targets_mean": 3590.0,
"valid_targets_min": 1700
},
{
"epoch": 0.7129629629629629,
"grad_norm": 0.585798655714323,
"learning_rate": 3.9999693004141615e-05,
"loss": 0.3378,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.33226558566093445,
"step": 385,
"valid_targets_mean": 3770.4,
"valid_targets_min": 490
},
{
"epoch": 0.7222222222222222,
"grad_norm": 0.517353669888523,
"learning_rate": 3.999896815904212e-05,
"loss": 0.3454,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.36117538809776306,
"step": 390,
"valid_targets_mean": 5560.3,
"valid_targets_min": 1959
},
{
"epoch": 0.7314814814814815,
"grad_norm": 0.6107363197274304,
"learning_rate": 3.999781695247067e-05,
"loss": 0.358,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.38827258348464966,
"step": 395,
"valid_targets_mean": 4259.4,
"valid_targets_min": 572
},
{
"epoch": 0.7407407407407407,
"grad_norm": 0.6359653698753834,
"learning_rate": 3.999623940897003e-05,
"loss": 0.3114,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32796719670295715,
"step": 400,
"valid_targets_mean": 3816.1,
"valid_targets_min": 698
},
{
"epoch": 0.75,
"grad_norm": 0.6435556727086547,
"learning_rate": 3.9994235562172135e-05,
"loss": 0.2972,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3033405542373657,
"step": 405,
"valid_targets_mean": 3801.8,
"valid_targets_min": 605
},
{
"epoch": 0.7592592592592593,
"grad_norm": 0.5397509110893886,
"learning_rate": 3.999180545479734e-05,
"loss": 0.3476,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3159610331058502,
"step": 410,
"valid_targets_mean": 4451.7,
"valid_targets_min": 558
},
{
"epoch": 0.7685185185185185,
"grad_norm": 0.6224527962247686,
"learning_rate": 3.998894913865352e-05,
"loss": 0.3595,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3543645441532135,
"step": 415,
"valid_targets_mean": 3635.9,
"valid_targets_min": 1754
},
{
"epoch": 0.7777777777777778,
"grad_norm": 0.6426499523396512,
"learning_rate": 3.9985666674634976e-05,
"loss": 0.3405,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.339733749628067,
"step": 420,
"valid_targets_mean": 3655.6,
"valid_targets_min": 783
},
{
"epoch": 0.7870370370370371,
"grad_norm": 0.5232489603174455,
"learning_rate": 3.998195813272113e-05,
"loss": 0.3453,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3885164260864258,
"step": 425,
"valid_targets_mean": 6157.9,
"valid_targets_min": 871
},
{
"epoch": 0.7962962962962963,
"grad_norm": 0.5645900335897053,
"learning_rate": 3.997782359197503e-05,
"loss": 0.322,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.34212440252304077,
"step": 430,
"valid_targets_mean": 4778.2,
"valid_targets_min": 584
},
{
"epoch": 0.8055555555555556,
"grad_norm": 0.5962980381815696,
"learning_rate": 3.997326314054167e-05,
"loss": 0.3163,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3020142614841461,
"step": 435,
"valid_targets_mean": 5573.9,
"valid_targets_min": 845
},
{
"epoch": 0.8148148148148148,
"grad_norm": 0.6887712552325551,
"learning_rate": 3.9968276875646095e-05,
"loss": 0.3244,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.33892735838890076,
"step": 440,
"valid_targets_mean": 4693.2,
"valid_targets_min": 886
},
{
"epoch": 0.8240740740740741,
"grad_norm": 0.7056735839532887,
"learning_rate": 3.9962864903591375e-05,
"loss": 0.3434,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4029475450515747,
"step": 445,
"valid_targets_mean": 3450.6,
"valid_targets_min": 930
},
{
"epoch": 0.8333333333333334,
"grad_norm": 0.5753803712883945,
"learning_rate": 3.995702733975625e-05,
"loss": 0.2982,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27287542819976807,
"step": 450,
"valid_targets_mean": 3599.7,
"valid_targets_min": 1569
},
{
"epoch": 0.8425925925925926,
"grad_norm": 0.6413965772420983,
"learning_rate": 3.9950764308592783e-05,
"loss": 0.3417,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3798047602176666,
"step": 455,
"valid_targets_mean": 3774.1,
"valid_targets_min": 317
},
{
"epoch": 0.8518518518518519,
"grad_norm": 0.645787380121332,
"learning_rate": 3.9944075943623605e-05,
"loss": 0.3172,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.34351015090942383,
"step": 460,
"valid_targets_mean": 3503.0,
"valid_targets_min": 526
},
{
"epoch": 0.8611111111111112,
"grad_norm": 0.5184110306882624,
"learning_rate": 3.9936962387439135e-05,
"loss": 0.3171,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3216972351074219,
"step": 465,
"valid_targets_mean": 5171.2,
"valid_targets_min": 1926
},
{
"epoch": 0.8703703703703703,
"grad_norm": 0.6014751168851044,
"learning_rate": 3.992942379169452e-05,
"loss": 0.3339,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32453712821006775,
"step": 470,
"valid_targets_mean": 4786.2,
"valid_targets_min": 629
},
{
"epoch": 0.8796296296296297,
"grad_norm": 0.6616582269145059,
"learning_rate": 3.992146031710637e-05,
"loss": 0.345,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.35846132040023804,
"step": 475,
"valid_targets_mean": 3561.0,
"valid_targets_min": 949
},
{
"epoch": 0.8888888888888888,
"grad_norm": 0.49969303426311373,
"learning_rate": 3.99130721334494e-05,
"loss": 0.3502,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3090265095233917,
"step": 480,
"valid_targets_mean": 4930.3,
"valid_targets_min": 491
},
{
"epoch": 0.8981481481481481,
"grad_norm": 0.6240367506941321,
"learning_rate": 3.9904259419552744e-05,
"loss": 0.3365,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.35694044828414917,
"step": 485,
"valid_targets_mean": 3573.7,
"valid_targets_min": 2118
},
{
"epoch": 0.9074074074074074,
"grad_norm": 0.570990856646238,
"learning_rate": 3.989502236329618e-05,
"loss": 0.3062,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32826316356658936,
"step": 490,
"valid_targets_mean": 4897.8,
"valid_targets_min": 970
},
{
"epoch": 0.9166666666666666,
"grad_norm": 0.5453511061498877,
"learning_rate": 3.988536116160612e-05,
"loss": 0.3359,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3527144193649292,
"step": 495,
"valid_targets_mean": 4487.1,
"valid_targets_min": 1709
},
{
"epoch": 0.9259259259259259,
"grad_norm": 0.6304542974915351,
"learning_rate": 3.987527602045139e-05,
"loss": 0.3406,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.34708958864212036,
"step": 500,
"valid_targets_mean": 3580.7,
"valid_targets_min": 894
},
{
"epoch": 0.9351851851851852,
"grad_norm": 0.625911017517709,
"learning_rate": 3.9864767154838864e-05,
"loss": 0.3331,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3413323760032654,
"step": 505,
"valid_targets_mean": 3402.6,
"valid_targets_min": 698
},
{
"epoch": 0.9444444444444444,
"grad_norm": 0.5749197484139668,
"learning_rate": 3.985383478880887e-05,
"loss": 0.3338,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3760027587413788,
"step": 510,
"valid_targets_mean": 4457.2,
"valid_targets_min": 194
},
{
"epoch": 0.9537037037037037,
"grad_norm": 0.6836107942264785,
"learning_rate": 3.984247915543043e-05,
"loss": 0.3132,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3548537492752075,
"step": 515,
"valid_targets_mean": 3044.6,
"valid_targets_min": 736
},
{
"epoch": 0.9629629629629629,
"grad_norm": 0.639879100311534,
"learning_rate": 3.9830700496796246e-05,
"loss": 0.3251,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3742837905883789,
"step": 520,
"valid_targets_mean": 3597.6,
"valid_targets_min": 2209
},
{
"epoch": 0.9722222222222222,
"grad_norm": 0.789794647068884,
"learning_rate": 3.98184990640176e-05,
"loss": 0.3275,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.34361645579338074,
"step": 525,
"valid_targets_mean": 2886.4,
"valid_targets_min": 275
},
{
"epoch": 0.9814814814814815,
"grad_norm": 0.549167670180771,
"learning_rate": 3.9805875117218934e-05,
"loss": 0.3573,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3770226836204529,
"step": 530,
"valid_targets_mean": 4474.1,
"valid_targets_min": 1026
},
{
"epoch": 0.9907407407407407,
"grad_norm": 0.499937106224937,
"learning_rate": 3.9792828925532376e-05,
"loss": 0.3325,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31070464849472046,
"step": 535,
"valid_targets_mean": 4791.1,
"valid_targets_min": 1838
},
{
"epoch": 1.0,
"grad_norm": 0.5379069079693097,
"learning_rate": 3.977936076709195e-05,
"loss": 0.3367,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.313912570476532,
"step": 540,
"valid_targets_mean": 4855.9,
"valid_targets_min": 778
},
{
"epoch": 1.0092592592592593,
"grad_norm": 0.5872678514369906,
"learning_rate": 3.976547092902765e-05,
"loss": 0.3107,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3828044831752777,
"step": 545,
"valid_targets_mean": 4668.6,
"valid_targets_min": 422
},
{
"epoch": 1.0185185185185186,
"grad_norm": 0.622573855977736,
"learning_rate": 3.9751159707459354e-05,
"loss": 0.3296,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30190443992614746,
"step": 550,
"valid_targets_mean": 3399.8,
"valid_targets_min": 480
},
{
"epoch": 1.0277777777777777,
"grad_norm": 0.6028687336159148,
"learning_rate": 3.973642740749048e-05,
"loss": 0.2989,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30583667755126953,
"step": 555,
"valid_targets_mean": 3726.7,
"valid_targets_min": 629
},
{
"epoch": 1.037037037037037,
"grad_norm": 0.46880468225849076,
"learning_rate": 3.972127434320148e-05,
"loss": 0.3047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2561293840408325,
"step": 560,
"valid_targets_mean": 4991.6,
"valid_targets_min": 1598
},
{
"epoch": 1.0462962962962963,
"grad_norm": 0.5498139372111231,
"learning_rate": 3.970570083764316e-05,
"loss": 0.3393,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27668747305870056,
"step": 565,
"valid_targets_mean": 3853.1,
"valid_targets_min": 573
},
{
"epoch": 1.0555555555555556,
"grad_norm": 0.7107012818618696,
"learning_rate": 3.968970722282979e-05,
"loss": 0.3129,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.35108423233032227,
"step": 570,
"valid_targets_mean": 3513.9,
"valid_targets_min": 1733
},
{
"epoch": 1.0648148148148149,
"grad_norm": 0.5944855386113417,
"learning_rate": 3.9673293839732024e-05,
"loss": 0.2998,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.239329993724823,
"step": 575,
"valid_targets_mean": 3587.9,
"valid_targets_min": 584
},
{
"epoch": 1.074074074074074,
"grad_norm": 0.5366706705366189,
"learning_rate": 3.965646103826962e-05,
"loss": 0.3161,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27880698442459106,
"step": 580,
"valid_targets_mean": 3620.6,
"valid_targets_min": 496
},
{
"epoch": 1.0833333333333333,
"grad_norm": 0.6078514718903982,
"learning_rate": 3.963920917730399e-05,
"loss": 0.3293,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3750278353691101,
"step": 585,
"valid_targets_mean": 4482.4,
"valid_targets_min": 1680
},
{
"epoch": 1.0925925925925926,
"grad_norm": 0.6207808313685298,
"learning_rate": 3.9621538624630546e-05,
"loss": 0.3127,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31572356820106506,
"step": 590,
"valid_targets_mean": 3113.2,
"valid_targets_min": 635
},
{
"epoch": 1.1018518518518519,
"grad_norm": 0.49606649278990134,
"learning_rate": 3.9603449756970877e-05,
"loss": 0.2978,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2611154019832611,
"step": 595,
"valid_targets_mean": 4295.1,
"valid_targets_min": 545
},
{
"epoch": 1.1111111111111112,
"grad_norm": 0.6073469095682115,
"learning_rate": 3.9584942959964695e-05,
"loss": 0.3069,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28560468554496765,
"step": 600,
"valid_targets_mean": 3647.1,
"valid_targets_min": 1445
},
{
"epoch": 1.1203703703703705,
"grad_norm": 0.5950597164351814,
"learning_rate": 3.9566018628161595e-05,
"loss": 0.3327,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32758647203445435,
"step": 605,
"valid_targets_mean": 3512.6,
"valid_targets_min": 435
},
{
"epoch": 1.1296296296296295,
"grad_norm": 0.6075429397094143,
"learning_rate": 3.9546677165012714e-05,
"loss": 0.2825,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27764448523521423,
"step": 610,
"valid_targets_mean": 4952.8,
"valid_targets_min": 535
},
{
"epoch": 1.1388888888888888,
"grad_norm": 0.5550360366103939,
"learning_rate": 3.9526918982862045e-05,
"loss": 0.2956,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3022664785385132,
"step": 615,
"valid_targets_mean": 3905.1,
"valid_targets_min": 831
},
{
"epoch": 1.1481481481481481,
"grad_norm": 0.6050232035267888,
"learning_rate": 3.950674450293771e-05,
"loss": 0.3073,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32235607504844666,
"step": 620,
"valid_targets_mean": 3641.3,
"valid_targets_min": 563
},
{
"epoch": 1.1574074074074074,
"grad_norm": 0.5959296142535176,
"learning_rate": 3.948615415534294e-05,
"loss": 0.3268,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.39365071058273315,
"step": 625,
"valid_targets_mean": 3973.9,
"valid_targets_min": 626
},
{
"epoch": 1.1666666666666667,
"grad_norm": 0.5533298682994967,
"learning_rate": 3.946514837904693e-05,
"loss": 0.2983,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2971389889717102,
"step": 630,
"valid_targets_mean": 4080.9,
"valid_targets_min": 834
},
{
"epoch": 1.175925925925926,
"grad_norm": 0.6106998081873015,
"learning_rate": 3.944372762187547e-05,
"loss": 0.3329,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3265891671180725,
"step": 635,
"valid_targets_mean": 3942.2,
"valid_targets_min": 446
},
{
"epoch": 1.1851851851851851,
"grad_norm": 0.5794193483906891,
"learning_rate": 3.9421892340501405e-05,
"loss": 0.3145,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3371884822845459,
"step": 640,
"valid_targets_mean": 4098.7,
"valid_targets_min": 295
},
{
"epoch": 1.1944444444444444,
"grad_norm": 0.5448020606518212,
"learning_rate": 3.939964300043487e-05,
"loss": 0.2994,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27047258615493774,
"step": 645,
"valid_targets_mean": 4001.4,
"valid_targets_min": 592
},
{
"epoch": 1.2037037037037037,
"grad_norm": 0.5233238220020286,
"learning_rate": 3.9376980076013426e-05,
"loss": 0.3144,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30770355463027954,
"step": 650,
"valid_targets_mean": 4120.2,
"valid_targets_min": 779
},
{
"epoch": 1.212962962962963,
"grad_norm": 0.5883380365500736,
"learning_rate": 3.9353904050391874e-05,
"loss": 0.3377,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.36008432507514954,
"step": 655,
"valid_targets_mean": 4483.6,
"valid_targets_min": 2162
},
{
"epoch": 1.2222222222222223,
"grad_norm": 0.8824261790418245,
"learning_rate": 3.933041541553202e-05,
"loss": 0.2975,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2799769937992096,
"step": 660,
"valid_targets_mean": 3285.0,
"valid_targets_min": 599
},
{
"epoch": 1.2314814814814814,
"grad_norm": 0.5442814396188268,
"learning_rate": 3.930651467219214e-05,
"loss": 0.3,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2907693386077881,
"step": 665,
"valid_targets_mean": 4048.6,
"valid_targets_min": 669
},
{
"epoch": 1.2407407407407407,
"grad_norm": 0.5827496169253467,
"learning_rate": 3.928220232991633e-05,
"loss": 0.3124,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29981476068496704,
"step": 670,
"valid_targets_mean": 3801.6,
"valid_targets_min": 1767
},
{
"epoch": 1.25,
"grad_norm": 0.5462062382083914,
"learning_rate": 3.925747890702363e-05,
"loss": 0.3041,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2990304231643677,
"step": 675,
"valid_targets_mean": 4142.3,
"valid_targets_min": 321
},
{
"epoch": 1.2592592592592593,
"grad_norm": 0.6206083513162505,
"learning_rate": 3.9232344930596983e-05,
"loss": 0.2986,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2708045244216919,
"step": 680,
"valid_targets_mean": 3426.8,
"valid_targets_min": 2009
},
{
"epoch": 1.2685185185185186,
"grad_norm": 0.5745410057483962,
"learning_rate": 3.9206800936472e-05,
"loss": 0.2947,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2561905086040497,
"step": 685,
"valid_targets_mean": 3190.8,
"valid_targets_min": 871
},
{
"epoch": 1.2777777777777777,
"grad_norm": 0.5537138179650384,
"learning_rate": 3.9180847469225514e-05,
"loss": 0.3227,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.359304279088974,
"step": 690,
"valid_targets_mean": 4509.7,
"valid_targets_min": 595
},
{
"epoch": 1.287037037037037,
"grad_norm": 0.5205868768170611,
"learning_rate": 3.9154485082164e-05,
"loss": 0.2835,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28854691982269287,
"step": 695,
"valid_targets_mean": 4476.4,
"valid_targets_min": 421
},
{
"epoch": 1.2962962962962963,
"grad_norm": 0.5809148768460918,
"learning_rate": 3.912771433731176e-05,
"loss": 0.32,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.33331120014190674,
"step": 700,
"valid_targets_mean": 4712.1,
"valid_targets_min": 2303
},
{
"epoch": 1.3055555555555556,
"grad_norm": 0.6151605514518709,
"learning_rate": 3.910053580539896e-05,
"loss": 0.3347,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3627372980117798,
"step": 705,
"valid_targets_mean": 4119.8,
"valid_targets_min": 698
},
{
"epoch": 1.3148148148148149,
"grad_norm": 0.5988192996988059,
"learning_rate": 3.907295006584941e-05,
"loss": 0.317,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.34967589378356934,
"step": 710,
"valid_targets_mean": 4211.2,
"valid_targets_min": 888
},
{
"epoch": 1.324074074074074,
"grad_norm": 0.5533738940567987,
"learning_rate": 3.904495770676831e-05,
"loss": 0.2942,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2595583200454712,
"step": 715,
"valid_targets_mean": 3652.5,
"valid_targets_min": 772
},
{
"epoch": 1.3333333333333333,
"grad_norm": 0.677729485934007,
"learning_rate": 3.9016559324929594e-05,
"loss": 0.3195,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28384801745414734,
"step": 720,
"valid_targets_mean": 3426.2,
"valid_targets_min": 2319
},
{
"epoch": 1.3425925925925926,
"grad_norm": 0.5229228302802816,
"learning_rate": 3.8987755525763315e-05,
"loss": 0.316,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3147448003292084,
"step": 725,
"valid_targets_mean": 4087.8,
"valid_targets_min": 567
},
{
"epoch": 1.3518518518518519,
"grad_norm": 0.5839814638980791,
"learning_rate": 3.895854692334264e-05,
"loss": 0.3029,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25805115699768066,
"step": 730,
"valid_targets_mean": 4818.6,
"valid_targets_min": 1703
},
{
"epoch": 1.3611111111111112,
"grad_norm": 0.567166024329898,
"learning_rate": 3.892893414037084e-05,
"loss": 0.3099,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2491036057472229,
"step": 735,
"valid_targets_mean": 3994.6,
"valid_targets_min": 2004
},
{
"epoch": 1.3703703703703702,
"grad_norm": 0.5547431269439685,
"learning_rate": 3.889891780816799e-05,
"loss": 0.3056,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31328123807907104,
"step": 740,
"valid_targets_mean": 3948.7,
"valid_targets_min": 628
},
{
"epoch": 1.3796296296296298,
"grad_norm": 0.5671900268720954,
"learning_rate": 3.886849856665746e-05,
"loss": 0.3047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3374163508415222,
"step": 745,
"valid_targets_mean": 3923.8,
"valid_targets_min": 892
},
{
"epoch": 1.3888888888888888,
"grad_norm": 0.53885923431837,
"learning_rate": 3.8837677064352345e-05,
"loss": 0.2992,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31161177158355713,
"step": 750,
"valid_targets_mean": 3956.8,
"valid_targets_min": 1684
},
{
"epoch": 1.3981481481481481,
"grad_norm": 0.5062852118800623,
"learning_rate": 3.8806453958341615e-05,
"loss": 0.2667,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23768174648284912,
"step": 755,
"valid_targets_mean": 3670.1,
"valid_targets_min": 840
},
{
"epoch": 1.4074074074074074,
"grad_norm": 0.5980684056816908,
"learning_rate": 3.877482991427607e-05,
"loss": 0.3279,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3120206594467163,
"step": 760,
"valid_targets_mean": 3676.0,
"valid_targets_min": 1026
},
{
"epoch": 1.4166666666666667,
"grad_norm": 0.5263249865978269,
"learning_rate": 3.874280560635418e-05,
"loss": 0.2727,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24431112408638,
"step": 765,
"valid_targets_mean": 4316.4,
"valid_targets_min": 1753
},
{
"epoch": 1.425925925925926,
"grad_norm": 0.5371674118999251,
"learning_rate": 3.871038171730775e-05,
"loss": 0.322,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3550418019294739,
"step": 770,
"valid_targets_mean": 5343.2,
"valid_targets_min": 1034
},
{
"epoch": 1.4351851851851851,
"grad_norm": 0.5747486969945653,
"learning_rate": 3.8677558938387276e-05,
"loss": 0.2928,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2658607065677643,
"step": 775,
"valid_targets_mean": 3501.0,
"valid_targets_min": 490
},
{
"epoch": 1.4444444444444444,
"grad_norm": 0.5593639327643428,
"learning_rate": 3.864433796934728e-05,
"loss": 0.317,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2963068187236786,
"step": 780,
"valid_targets_mean": 3459.8,
"valid_targets_min": 1596
},
{
"epoch": 1.4537037037037037,
"grad_norm": 0.49539758416732715,
"learning_rate": 3.861071951843137e-05,
"loss": 0.2967,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3172762393951416,
"step": 785,
"valid_targets_mean": 4932.9,
"valid_targets_min": 2031
},
{
"epoch": 1.462962962962963,
"grad_norm": 0.7233453222225057,
"learning_rate": 3.8576704302357135e-05,
"loss": 0.2987,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31079697608947754,
"step": 790,
"valid_targets_mean": 2923.9,
"valid_targets_min": 368
},
{
"epoch": 1.4722222222222223,
"grad_norm": 0.5304815079906486,
"learning_rate": 3.854229304630086e-05,
"loss": 0.3058,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2519688010215759,
"step": 795,
"valid_targets_mean": 3894.1,
"valid_targets_min": 1015
},
{
"epoch": 1.4814814814814814,
"grad_norm": 0.5912953728210274,
"learning_rate": 3.8507486483882084e-05,
"loss": 0.3187,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31321465969085693,
"step": 800,
"valid_targets_mean": 3528.2,
"valid_targets_min": 641
},
{
"epoch": 1.4907407407407407,
"grad_norm": 0.676659645331013,
"learning_rate": 3.8472285357147966e-05,
"loss": 0.2899,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.34123727679252625,
"step": 805,
"valid_targets_mean": 4592.4,
"valid_targets_min": 697
},
{
"epoch": 1.5,
"grad_norm": 0.4442677722852739,
"learning_rate": 3.843669041655741e-05,
"loss": 0.3091,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30493301153182983,
"step": 810,
"valid_targets_mean": 5845.2,
"valid_targets_min": 255
},
{
"epoch": 1.5092592592592593,
"grad_norm": 0.5682057669159459,
"learning_rate": 3.840070242096514e-05,
"loss": 0.3152,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3426523804664612,
"step": 815,
"valid_targets_mean": 4269.9,
"valid_targets_min": 317
},
{
"epoch": 1.5185185185185186,
"grad_norm": 0.5718791042050542,
"learning_rate": 3.8364322137605484e-05,
"loss": 0.2951,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2769266366958618,
"step": 820,
"valid_targets_mean": 3698.8,
"valid_targets_min": 665
},
{
"epoch": 1.5277777777777777,
"grad_norm": 0.5536227502501987,
"learning_rate": 3.832755034207601e-05,
"loss": 0.2956,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30558109283447266,
"step": 825,
"valid_targets_mean": 4033.0,
"valid_targets_min": 585
},
{
"epoch": 1.5370370370370372,
"grad_norm": 0.5252844534173842,
"learning_rate": 3.8290387818321e-05,
"loss": 0.2996,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3152919411659241,
"step": 830,
"valid_targets_mean": 4217.6,
"valid_targets_min": 1788
},
{
"epoch": 1.5462962962962963,
"grad_norm": 0.6143178973257938,
"learning_rate": 3.825283535861476e-05,
"loss": 0.3175,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2816426157951355,
"step": 835,
"valid_targets_mean": 5267.9,
"valid_targets_min": 2440
},
{
"epoch": 1.5555555555555556,
"grad_norm": 0.5986189313818777,
"learning_rate": 3.8214893763544684e-05,
"loss": 0.3122,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27623170614242554,
"step": 840,
"valid_targets_mean": 3365.1,
"valid_targets_min": 474
},
{
"epoch": 1.5648148148148149,
"grad_norm": 0.44155638321379925,
"learning_rate": 3.817656384199422e-05,
"loss": 0.2748,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2690165340900421,
"step": 845,
"valid_targets_mean": 5673.1,
"valid_targets_min": 2925
},
{
"epoch": 1.574074074074074,
"grad_norm": 0.5395718733919299,
"learning_rate": 3.813784641112563e-05,
"loss": 0.3036,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29078513383865356,
"step": 850,
"valid_targets_mean": 5001.7,
"valid_targets_min": 2123
},
{
"epoch": 1.5833333333333335,
"grad_norm": 0.5091833646708693,
"learning_rate": 3.8098742296362506e-05,
"loss": 0.3271,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3604724407196045,
"step": 855,
"valid_targets_mean": 5089.8,
"valid_targets_min": 663
},
{
"epoch": 1.5925925925925926,
"grad_norm": 0.6087696649809414,
"learning_rate": 3.805925233137229e-05,
"loss": 0.3232,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3626618981361389,
"step": 860,
"valid_targets_mean": 3525.3,
"valid_targets_min": 950
},
{
"epoch": 1.6018518518518519,
"grad_norm": 0.5643870271264605,
"learning_rate": 3.801937735804838e-05,
"loss": 0.3133,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28040584921836853,
"step": 865,
"valid_targets_mean": 3399.7,
"valid_targets_min": 1903
},
{
"epoch": 1.6111111111111112,
"grad_norm": 0.5439012864299764,
"learning_rate": 3.7979118226492266e-05,
"loss": 0.2994,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29787707328796387,
"step": 870,
"valid_targets_mean": 3488.6,
"valid_targets_min": 637
},
{
"epoch": 1.6203703703703702,
"grad_norm": 0.49165947496754964,
"learning_rate": 3.793847579499534e-05,
"loss": 0.2931,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.34809091687202454,
"step": 875,
"valid_targets_mean": 5268.2,
"valid_targets_min": 1908
},
{
"epoch": 1.6296296296296298,
"grad_norm": 0.5140250546410033,
"learning_rate": 3.789745093002065e-05,
"loss": 0.3238,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2461247444152832,
"step": 880,
"valid_targets_mean": 3590.8,
"valid_targets_min": 541
},
{
"epoch": 1.6388888888888888,
"grad_norm": 0.5054561634272887,
"learning_rate": 3.785604450618443e-05,
"loss": 0.2708,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21256114542484283,
"step": 885,
"valid_targets_mean": 3803.2,
"valid_targets_min": 397
},
{
"epoch": 1.6481481481481481,
"grad_norm": 0.5046967079101841,
"learning_rate": 3.781425740623739e-05,
"loss": 0.3127,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2555197477340698,
"step": 890,
"valid_targets_mean": 5367.1,
"valid_targets_min": 2149
},
{
"epoch": 1.6574074074074074,
"grad_norm": 0.5031111391748204,
"learning_rate": 3.777209052104598e-05,
"loss": 0.309,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27394580841064453,
"step": 895,
"valid_targets_mean": 4603.8,
"valid_targets_min": 1982
},
{
"epoch": 1.6666666666666665,
"grad_norm": 0.5200593833614563,
"learning_rate": 3.7729544749573335e-05,
"loss": 0.3088,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30838844180107117,
"step": 900,
"valid_targets_mean": 4181.4,
"valid_targets_min": 739
},
{
"epoch": 1.675925925925926,
"grad_norm": 0.5108208774796558,
"learning_rate": 3.768662099886014e-05,
"loss": 0.3489,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.35393011569976807,
"step": 905,
"valid_targets_mean": 5152.1,
"valid_targets_min": 910
},
{
"epoch": 1.6851851851851851,
"grad_norm": 0.563904222236046,
"learning_rate": 3.7643320184005284e-05,
"loss": 0.2958,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2836383879184723,
"step": 910,
"valid_targets_mean": 3775.8,
"valid_targets_min": 1926
},
{
"epoch": 1.6944444444444444,
"grad_norm": 0.6370969191435905,
"learning_rate": 3.7599643228146355e-05,
"loss": 0.319,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.34479522705078125,
"step": 915,
"valid_targets_mean": 3473.2,
"valid_targets_min": 479
},
{
"epoch": 1.7037037037037037,
"grad_norm": 0.4489327078730733,
"learning_rate": 3.755559106243994e-05,
"loss": 0.2783,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2346605360507965,
"step": 920,
"valid_targets_mean": 4684.8,
"valid_targets_min": 1007
},
{
"epoch": 1.7129629629629628,
"grad_norm": 0.5284546797799259,
"learning_rate": 3.7511164626041823e-05,
"loss": 0.3133,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3440583646297455,
"step": 925,
"valid_targets_mean": 4763.8,
"valid_targets_min": 2487
},
{
"epoch": 1.7222222222222223,
"grad_norm": 0.5116796474023507,
"learning_rate": 3.746636486608689e-05,
"loss": 0.2709,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31446945667266846,
"step": 930,
"valid_targets_mean": 4609.9,
"valid_targets_min": 2063
},
{
"epoch": 1.7314814814814814,
"grad_norm": 0.5614968969786471,
"learning_rate": 3.7421192737669005e-05,
"loss": 0.3301,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32294750213623047,
"step": 935,
"valid_targets_mean": 3850.6,
"valid_targets_min": 610
},
{
"epoch": 1.7407407407407407,
"grad_norm": 0.5657690526577215,
"learning_rate": 3.737564920382061e-05,
"loss": 0.2904,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31011059880256653,
"step": 940,
"valid_targets_mean": 3671.0,
"valid_targets_min": 2036
},
{
"epoch": 1.75,
"grad_norm": 0.5783601802984916,
"learning_rate": 3.732973523549221e-05,
"loss": 0.3106,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29496774077415466,
"step": 945,
"valid_targets_mean": 3423.9,
"valid_targets_min": 494
},
{
"epoch": 1.7592592592592593,
"grad_norm": 0.43290336789364203,
"learning_rate": 3.728345181153165e-05,
"loss": 0.2913,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2509721517562866,
"step": 950,
"valid_targets_mean": 4465.2,
"valid_targets_min": 1717
},
{
"epoch": 1.7685185185185186,
"grad_norm": 0.4912129854091696,
"learning_rate": 3.7236799918663284e-05,
"loss": 0.3068,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28017228841781616,
"step": 955,
"valid_targets_mean": 4293.8,
"valid_targets_min": 2343
},
{
"epoch": 1.7777777777777777,
"grad_norm": 0.6332365406111699,
"learning_rate": 3.7189780551466905e-05,
"loss": 0.2727,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26829952001571655,
"step": 960,
"valid_targets_mean": 2642.8,
"valid_targets_min": 1700
},
{
"epoch": 1.7870370370370372,
"grad_norm": 0.5262914650173831,
"learning_rate": 3.714239471235657e-05,
"loss": 0.2959,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3247262239456177,
"step": 965,
"valid_targets_mean": 5274.5,
"valid_targets_min": 802
},
{
"epoch": 1.7962962962962963,
"grad_norm": 0.55145501931712,
"learning_rate": 3.7094643411559194e-05,
"loss": 0.2746,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.314953088760376,
"step": 970,
"valid_targets_mean": 3900.6,
"valid_targets_min": 1945
},
{
"epoch": 1.8055555555555556,
"grad_norm": 0.5213565964214443,
"learning_rate": 3.704652766709305e-05,
"loss": 0.3047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3098965287208557,
"step": 975,
"valid_targets_mean": 4299.4,
"valid_targets_min": 1835
},
{
"epoch": 1.8148148148148149,
"grad_norm": 0.5142744749508426,
"learning_rate": 3.699804850474603e-05,
"loss": 0.2846,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28077518939971924,
"step": 980,
"valid_targets_mean": 4123.2,
"valid_targets_min": 322
},
{
"epoch": 1.824074074074074,
"grad_norm": 0.6109944754762814,
"learning_rate": 3.6949206958053825e-05,
"loss": 0.3212,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29146653413772583,
"step": 985,
"valid_targets_mean": 3814.9,
"valid_targets_min": 1464
},
{
"epoch": 1.8333333333333335,
"grad_norm": 0.6418924924446736,
"learning_rate": 3.690000406827783e-05,
"loss": 0.3001,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2858262360095978,
"step": 990,
"valid_targets_mean": 2490.6,
"valid_targets_min": 559
},
{
"epoch": 1.8425925925925926,
"grad_norm": 0.5742545688665202,
"learning_rate": 3.685044088438299e-05,
"loss": 0.3013,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2759723961353302,
"step": 995,
"valid_targets_mean": 4008.8,
"valid_targets_min": 1779
},
{
"epoch": 1.8518518518518519,
"grad_norm": 0.5254418262167269,
"learning_rate": 3.680051846301543e-05,
"loss": 0.2562,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2492840737104416,
"step": 1000,
"valid_targets_mean": 3808.9,
"valid_targets_min": 1535
},
{
"epoch": 1.8611111111111112,
"grad_norm": 0.44300703016797877,
"learning_rate": 3.675023786847991e-05,
"loss": 0.2984,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27016231417655945,
"step": 1005,
"valid_targets_mean": 4913.9,
"valid_targets_min": 2688
},
{
"epoch": 1.8703703703703702,
"grad_norm": 0.5313397881548159,
"learning_rate": 3.6699600172717137e-05,
"loss": 0.2956,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27664732933044434,
"step": 1010,
"valid_targets_mean": 3364.4,
"valid_targets_min": 654
},
{
"epoch": 1.8796296296296298,
"grad_norm": 0.7092703491214453,
"learning_rate": 3.6648606455280944e-05,
"loss": 0.2968,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3045133352279663,
"step": 1015,
"valid_targets_mean": 3802.8,
"valid_targets_min": 313
},
{
"epoch": 1.8888888888888888,
"grad_norm": 0.5263165468712829,
"learning_rate": 3.659725780331524e-05,
"loss": 0.3094,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3496822714805603,
"step": 1020,
"valid_targets_mean": 4270.3,
"valid_targets_min": 2039
},
{
"epoch": 1.8981481481481481,
"grad_norm": 0.5704220397986243,
"learning_rate": 3.654555531153084e-05,
"loss": 0.3112,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25687843561172485,
"step": 1025,
"valid_targets_mean": 3186.1,
"valid_targets_min": 310
},
{
"epoch": 1.9074074074074074,
"grad_norm": 0.4827843955838589,
"learning_rate": 3.649350008218214e-05,
"loss": 0.2874,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3169630169868469,
"step": 1030,
"valid_targets_mean": 4576.8,
"valid_targets_min": 2306
},
{
"epoch": 1.9166666666666665,
"grad_norm": 0.49931307758205035,
"learning_rate": 3.64410932250436e-05,
"loss": 0.3212,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.36018311977386475,
"step": 1035,
"valid_targets_mean": 4679.8,
"valid_targets_min": 896
},
{
"epoch": 1.925925925925926,
"grad_norm": 0.5372589737787232,
"learning_rate": 3.638833585738611e-05,
"loss": 0.2966,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2810884714126587,
"step": 1040,
"valid_targets_mean": 3718.3,
"valid_targets_min": 1706
},
{
"epoch": 1.9351851851851851,
"grad_norm": 0.5448044758731123,
"learning_rate": 3.633522910395314e-05,
"loss": 0.2798,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2674560546875,
"step": 1045,
"valid_targets_mean": 3725.8,
"valid_targets_min": 299
},
{
"epoch": 1.9444444444444444,
"grad_norm": 0.49024688080681106,
"learning_rate": 3.628177409693677e-05,
"loss": 0.2917,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31284260749816895,
"step": 1050,
"valid_targets_mean": 4673.3,
"valid_targets_min": 1880
},
{
"epoch": 1.9537037037037037,
"grad_norm": 0.4596588910916473,
"learning_rate": 3.622797197595359e-05,
"loss": 0.299,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25262099504470825,
"step": 1055,
"valid_targets_mean": 4529.9,
"valid_targets_min": 943
},
{
"epoch": 1.9629629629629628,
"grad_norm": 0.556506113366241,
"learning_rate": 3.6173823888020335e-05,
"loss": 0.3142,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3350547254085541,
"step": 1060,
"valid_targets_mean": 3848.2,
"valid_targets_min": 247
},
{
"epoch": 1.9722222222222223,
"grad_norm": 0.5988985101293695,
"learning_rate": 3.611933098752949e-05,
"loss": 0.2992,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2744201123714447,
"step": 1065,
"valid_targets_mean": 2717.9,
"valid_targets_min": 661
},
{
"epoch": 1.9814814814814814,
"grad_norm": 0.5027204774148215,
"learning_rate": 3.6064494436224655e-05,
"loss": 0.299,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2839767336845398,
"step": 1070,
"valid_targets_mean": 4282.0,
"valid_targets_min": 1624
},
{
"epoch": 1.9907407407407407,
"grad_norm": 0.5609086397412129,
"learning_rate": 3.6009315403175786e-05,
"loss": 0.2909,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3083677589893341,
"step": 1075,
"valid_targets_mean": 3896.8,
"valid_targets_min": 821
},
{
"epoch": 2.0,
"grad_norm": 0.5064761754287327,
"learning_rate": 3.595379506475426e-05,
"loss": 0.3159,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30033770203590393,
"step": 1080,
"valid_targets_mean": 3941.4,
"valid_targets_min": 446
},
{
"epoch": 2.009259259259259,
"grad_norm": 0.5512471121936553,
"learning_rate": 3.5897934604607795e-05,
"loss": 0.2752,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31047865748405457,
"step": 1085,
"valid_targets_mean": 4005.3,
"valid_targets_min": 667
},
{
"epoch": 2.0185185185185186,
"grad_norm": 0.5160950707981314,
"learning_rate": 3.584173521363525e-05,
"loss": 0.2628,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23959210515022278,
"step": 1090,
"valid_targets_mean": 4089.5,
"valid_targets_min": 393
},
{
"epoch": 2.0277777777777777,
"grad_norm": 0.5035622460268815,
"learning_rate": 3.578519808996117e-05,
"loss": 0.2622,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2824031114578247,
"step": 1095,
"valid_targets_mean": 4900.2,
"valid_targets_min": 2631
},
{
"epoch": 2.037037037037037,
"grad_norm": 0.5944018512965265,
"learning_rate": 3.572832443891033e-05,
"loss": 0.2674,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2954099178314209,
"step": 1100,
"valid_targets_mean": 3287.0,
"valid_targets_min": 828
},
{
"epoch": 2.0462962962962963,
"grad_norm": 0.56938852512268,
"learning_rate": 3.567111547298194e-05,
"loss": 0.2806,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3027660846710205,
"step": 1105,
"valid_targets_mean": 4170.7,
"valid_targets_min": 1762
},
{
"epoch": 2.0555555555555554,
"grad_norm": 0.5871826431417995,
"learning_rate": 3.561357241182388e-05,
"loss": 0.2935,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3108934164047241,
"step": 1110,
"valid_targets_mean": 4158.6,
"valid_targets_min": 814
},
{
"epoch": 2.064814814814815,
"grad_norm": 0.5385231227592268,
"learning_rate": 3.555569648220666e-05,
"loss": 0.2585,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2445867955684662,
"step": 1115,
"valid_targets_mean": 3935.2,
"valid_targets_min": 772
},
{
"epoch": 2.074074074074074,
"grad_norm": 0.6485921558298788,
"learning_rate": 3.549748891799726e-05,
"loss": 0.2934,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25209206342697144,
"step": 1120,
"valid_targets_mean": 3367.2,
"valid_targets_min": 496
},
{
"epoch": 2.0833333333333335,
"grad_norm": 0.5022623495987496,
"learning_rate": 3.543895096013284e-05,
"loss": 0.2828,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2579001188278198,
"step": 1125,
"valid_targets_mean": 4011.8,
"valid_targets_min": 771
},
{
"epoch": 2.0925925925925926,
"grad_norm": 0.47266461256045833,
"learning_rate": 3.538008385659427e-05,
"loss": 0.3019,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30898475646972656,
"step": 1130,
"valid_targets_mean": 5338.7,
"valid_targets_min": 466
},
{
"epoch": 2.1018518518518516,
"grad_norm": 0.474270085961415,
"learning_rate": 3.532088886237956e-05,
"loss": 0.2427,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24021559953689575,
"step": 1135,
"valid_targets_mean": 5083.8,
"valid_targets_min": 1865
},
{
"epoch": 2.111111111111111,
"grad_norm": 0.4625964336857986,
"learning_rate": 3.5261367239477055e-05,
"loss": 0.2749,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22822529077529907,
"step": 1140,
"valid_targets_mean": 4619.4,
"valid_targets_min": 2010
},
{
"epoch": 2.1203703703703702,
"grad_norm": 0.48948627688717905,
"learning_rate": 3.520152025683856e-05,
"loss": 0.2924,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3005768656730652,
"step": 1145,
"valid_targets_mean": 4932.8,
"valid_targets_min": 2461
},
{
"epoch": 2.1296296296296298,
"grad_norm": 0.53742697060363,
"learning_rate": 3.514134919035229e-05,
"loss": 0.2515,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.257113516330719,
"step": 1150,
"valid_targets_mean": 3710.9,
"valid_targets_min": 1530
},
{
"epoch": 2.138888888888889,
"grad_norm": 0.5503512629168121,
"learning_rate": 3.5080855322815635e-05,
"loss": 0.2788,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27239710092544556,
"step": 1155,
"valid_targets_mean": 4531.2,
"valid_targets_min": 845
},
{
"epoch": 2.148148148148148,
"grad_norm": 0.6889338655609255,
"learning_rate": 3.5020039943907855e-05,
"loss": 0.2938,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3204009532928467,
"step": 1160,
"valid_targets_mean": 3625.0,
"valid_targets_min": 404
},
{
"epoch": 2.1574074074074074,
"grad_norm": 0.5579386971063519,
"learning_rate": 3.495890435016258e-05,
"loss": 0.2861,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31176429986953735,
"step": 1165,
"valid_targets_mean": 3857.6,
"valid_targets_min": 1893
},
{
"epoch": 2.1666666666666665,
"grad_norm": 0.4892660944704105,
"learning_rate": 3.489744984494012e-05,
"loss": 0.3073,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2628619968891144,
"step": 1170,
"valid_targets_mean": 4434.8,
"valid_targets_min": 863
},
{
"epoch": 2.175925925925926,
"grad_norm": 0.5387503793875826,
"learning_rate": 3.4835677738399745e-05,
"loss": 0.2899,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2822778820991516,
"step": 1175,
"valid_targets_mean": 4123.1,
"valid_targets_min": 733
},
{
"epoch": 2.185185185185185,
"grad_norm": 0.45394010508846994,
"learning_rate": 3.477358934747172e-05,
"loss": 0.2703,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22972646355628967,
"step": 1180,
"valid_targets_mean": 4582.6,
"valid_targets_min": 549
},
{
"epoch": 2.1944444444444446,
"grad_norm": 0.5572233307223254,
"learning_rate": 3.47111859958292e-05,
"loss": 0.2931,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3954271078109741,
"step": 1185,
"valid_targets_mean": 5509.8,
"valid_targets_min": 1607
},
{
"epoch": 2.2037037037037037,
"grad_norm": 0.5791570229327255,
"learning_rate": 3.464846901386008e-05,
"loss": 0.2783,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25588738918304443,
"step": 1190,
"valid_targets_mean": 3254.6,
"valid_targets_min": 1942
},
{
"epoch": 2.212962962962963,
"grad_norm": 0.5529120478362382,
"learning_rate": 3.458543973863859e-05,
"loss": 0.299,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31822943687438965,
"step": 1195,
"valid_targets_mean": 4516.6,
"valid_targets_min": 1521
},
{
"epoch": 2.2222222222222223,
"grad_norm": 0.5903216004748315,
"learning_rate": 3.452209951389677e-05,
"loss": 0.2667,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27697065472602844,
"step": 1200,
"valid_targets_mean": 3290.7,
"valid_targets_min": 767
},
{
"epoch": 2.2314814814814814,
"grad_norm": 0.6691604327279813,
"learning_rate": 3.445844968999586e-05,
"loss": 0.263,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27134162187576294,
"step": 1205,
"valid_targets_mean": 3421.6,
"valid_targets_min": 554
},
{
"epoch": 2.240740740740741,
"grad_norm": 0.4881557721381399,
"learning_rate": 3.4394491623897506e-05,
"loss": 0.2725,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2903343737125397,
"step": 1210,
"valid_targets_mean": 4894.1,
"valid_targets_min": 2191
},
{
"epoch": 2.25,
"grad_norm": 0.47094056474807733,
"learning_rate": 3.4330226679134805e-05,
"loss": 0.2653,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23896852135658264,
"step": 1215,
"valid_targets_mean": 4323.1,
"valid_targets_min": 1904
},
{
"epoch": 2.259259259259259,
"grad_norm": 0.5276120851649831,
"learning_rate": 3.426565622578327e-05,
"loss": 0.2917,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2607136368751526,
"step": 1220,
"valid_targets_mean": 4749.9,
"valid_targets_min": 1003
},
{
"epoch": 2.2685185185185186,
"grad_norm": 0.5849502666887231,
"learning_rate": 3.420078164043161e-05,
"loss": 0.2911,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3107423186302185,
"step": 1225,
"valid_targets_mean": 4094.4,
"valid_targets_min": 708
},
{
"epoch": 2.2777777777777777,
"grad_norm": 0.5217159399236517,
"learning_rate": 3.413560430615235e-05,
"loss": 0.3058,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26841235160827637,
"step": 1230,
"valid_targets_mean": 4422.4,
"valid_targets_min": 321
},
{
"epoch": 2.287037037037037,
"grad_norm": 0.6178611619364681,
"learning_rate": 3.407012561247239e-05,
"loss": 0.2897,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25904977321624756,
"step": 1235,
"valid_targets_mean": 2980.8,
"valid_targets_min": 835
},
{
"epoch": 2.2962962962962963,
"grad_norm": 0.5079532786534702,
"learning_rate": 3.400434695534337e-05,
"loss": 0.2699,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2537451386451721,
"step": 1240,
"valid_targets_mean": 4113.8,
"valid_targets_min": 735
},
{
"epoch": 2.3055555555555554,
"grad_norm": 0.532875433175585,
"learning_rate": 3.393826973711189e-05,
"loss": 0.3025,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2632405757904053,
"step": 1245,
"valid_targets_mean": 4773.4,
"valid_targets_min": 513
},
{
"epoch": 2.314814814814815,
"grad_norm": 0.5803442874340493,
"learning_rate": 3.3871895366489624e-05,
"loss": 0.2906,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2636168301105499,
"step": 1250,
"valid_targets_mean": 3658.9,
"valid_targets_min": 480
},
{
"epoch": 2.324074074074074,
"grad_norm": 0.5173410175649708,
"learning_rate": 3.38052252585233e-05,
"loss": 0.2525,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27228376269340515,
"step": 1255,
"valid_targets_mean": 4343.1,
"valid_targets_min": 1467
},
{
"epoch": 2.3333333333333335,
"grad_norm": 0.514166475299944,
"learning_rate": 3.373826083456451e-05,
"loss": 0.299,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27561166882514954,
"step": 1260,
"valid_targets_mean": 4768.9,
"valid_targets_min": 1369
},
{
"epoch": 2.3425925925925926,
"grad_norm": 0.5438892930868031,
"learning_rate": 3.367100352223944e-05,
"loss": 0.2903,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2975728511810303,
"step": 1265,
"valid_targets_mean": 4511.4,
"valid_targets_min": 1311
},
{
"epoch": 2.351851851851852,
"grad_norm": 0.5714498435745807,
"learning_rate": 3.360345475541839e-05,
"loss": 0.2622,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28769245743751526,
"step": 1270,
"valid_targets_mean": 3497.2,
"valid_targets_min": 317
},
{
"epoch": 2.361111111111111,
"grad_norm": 0.5680794410701885,
"learning_rate": 3.353561597418524e-05,
"loss": 0.3389,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3808847665786743,
"step": 1275,
"valid_targets_mean": 4961.8,
"valid_targets_min": 887
},
{
"epoch": 2.3703703703703702,
"grad_norm": 0.5542270863328363,
"learning_rate": 3.346748862480674e-05,
"loss": 0.2691,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26409125328063965,
"step": 1280,
"valid_targets_mean": 3505.4,
"valid_targets_min": 597
},
{
"epoch": 2.3796296296296298,
"grad_norm": 0.5979782495601111,
"learning_rate": 3.339907415970168e-05,
"loss": 0.2859,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22898399829864502,
"step": 1285,
"valid_targets_mean": 2800.9,
"valid_targets_min": 541
},
{
"epoch": 2.388888888888889,
"grad_norm": 0.5281486790881847,
"learning_rate": 3.333037403740989e-05,
"loss": 0.2677,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23821385204792023,
"step": 1290,
"valid_targets_mean": 3827.2,
"valid_targets_min": 736
},
{
"epoch": 2.398148148148148,
"grad_norm": 0.5811780027704674,
"learning_rate": 3.326138972256121e-05,
"loss": 0.2568,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27474355697631836,
"step": 1295,
"valid_targets_mean": 3347.3,
"valid_targets_min": 647
},
{
"epoch": 2.4074074074074074,
"grad_norm": 0.5062441112163106,
"learning_rate": 3.3192122685844214e-05,
"loss": 0.2594,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2150622010231018,
"step": 1300,
"valid_targets_mean": 3436.5,
"valid_targets_min": 596
},
{
"epoch": 2.4166666666666665,
"grad_norm": 0.5298315558070248,
"learning_rate": 3.312257440397488e-05,
"loss": 0.2515,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27570849657058716,
"step": 1305,
"valid_targets_mean": 4040.8,
"valid_targets_min": 474
},
{
"epoch": 2.425925925925926,
"grad_norm": 0.5014670497202479,
"learning_rate": 3.305274635966509e-05,
"loss": 0.2686,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2778550386428833,
"step": 1310,
"valid_targets_mean": 4388.4,
"valid_targets_min": 1561
},
{
"epoch": 2.435185185185185,
"grad_norm": 0.4941899546980937,
"learning_rate": 3.298264004159104e-05,
"loss": 0.2669,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2897128760814667,
"step": 1315,
"valid_targets_mean": 4620.4,
"valid_targets_min": 764
},
{
"epoch": 2.4444444444444446,
"grad_norm": 0.5845810037932724,
"learning_rate": 3.2912256944361484e-05,
"loss": 0.2502,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2573606073856354,
"step": 1320,
"valid_targets_mean": 3208.6,
"valid_targets_min": 762
},
{
"epoch": 2.4537037037037037,
"grad_norm": 0.5433250444755526,
"learning_rate": 3.284159856848589e-05,
"loss": 0.253,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28917592763900757,
"step": 1325,
"valid_targets_mean": 4059.9,
"valid_targets_min": 869
},
{
"epoch": 2.462962962962963,
"grad_norm": 0.5684951550508175,
"learning_rate": 3.2770666420342426e-05,
"loss": 0.2975,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.33171623945236206,
"step": 1330,
"valid_targets_mean": 3700.9,
"valid_targets_min": 908
},
{
"epoch": 2.4722222222222223,
"grad_norm": 0.46312664746277976,
"learning_rate": 3.269946201214586e-05,
"loss": 0.2816,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.275724321603775,
"step": 1335,
"valid_targets_mean": 4910.4,
"valid_targets_min": 566
},
{
"epoch": 2.4814814814814814,
"grad_norm": 0.4467189932583639,
"learning_rate": 3.262798686191533e-05,
"loss": 0.291,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24208518862724304,
"step": 1340,
"valid_targets_mean": 4536.0,
"valid_targets_min": 2578
},
{
"epoch": 2.490740740740741,
"grad_norm": 0.4991364949244441,
"learning_rate": 3.255624249344198e-05,
"loss": 0.2742,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2625071406364441,
"step": 1345,
"valid_targets_mean": 3629.2,
"valid_targets_min": 628
},
{
"epoch": 2.5,
"grad_norm": 0.5133071061700047,
"learning_rate": 3.248423043625642e-05,
"loss": 0.2688,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21916022896766663,
"step": 1350,
"valid_targets_mean": 3302.8,
"valid_targets_min": 894
},
{
"epoch": 2.5092592592592595,
"grad_norm": 0.49266442205021715,
"learning_rate": 3.241195222559621e-05,
"loss": 0.2877,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27607983350753784,
"step": 1355,
"valid_targets_mean": 4676.6,
"valid_targets_min": 1984
},
{
"epoch": 2.5185185185185186,
"grad_norm": 0.6009290708819502,
"learning_rate": 3.2339409402373056e-05,
"loss": 0.3102,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.383029580116272,
"step": 1360,
"valid_targets_mean": 3892.7,
"valid_targets_min": 572
},
{
"epoch": 2.5277777777777777,
"grad_norm": 0.5505025180156696,
"learning_rate": 3.2266603513139995e-05,
"loss": 0.2757,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2777920961380005,
"step": 1365,
"valid_targets_mean": 3982.9,
"valid_targets_min": 2418
},
{
"epoch": 2.537037037037037,
"grad_norm": 0.5111646149007892,
"learning_rate": 3.2193536110058414e-05,
"loss": 0.2661,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28832823038101196,
"step": 1370,
"valid_targets_mean": 4848.4,
"valid_targets_min": 2127
},
{
"epoch": 2.5462962962962963,
"grad_norm": 0.5040882202849286,
"learning_rate": 3.212020875086495e-05,
"loss": 0.288,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31644129753112793,
"step": 1375,
"valid_targets_mean": 4976.4,
"valid_targets_min": 1046
},
{
"epoch": 2.5555555555555554,
"grad_norm": 0.5313850282519446,
"learning_rate": 3.20466229988383e-05,
"loss": 0.2742,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2689250111579895,
"step": 1380,
"valid_targets_mean": 3821.2,
"valid_targets_min": 435
},
{
"epoch": 2.564814814814815,
"grad_norm": 0.5402024194211922,
"learning_rate": 3.197278042276587e-05,
"loss": 0.2743,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2658718228340149,
"step": 1385,
"valid_targets_mean": 3890.8,
"valid_targets_min": 542
},
{
"epoch": 2.574074074074074,
"grad_norm": 0.4747076430784914,
"learning_rate": 3.189868259691036e-05,
"loss": 0.258,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23981155455112457,
"step": 1390,
"valid_targets_mean": 4119.1,
"valid_targets_min": 654
},
{
"epoch": 2.5833333333333335,
"grad_norm": 0.5999323348223499,
"learning_rate": 3.182433110097618e-05,
"loss": 0.2896,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2491762787103653,
"step": 1395,
"valid_targets_mean": 3585.1,
"valid_targets_min": 1770
},
{
"epoch": 2.5925925925925926,
"grad_norm": 0.46430831925350374,
"learning_rate": 3.174972752007577e-05,
"loss": 0.2702,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25592419505119324,
"step": 1400,
"valid_targets_mean": 4477.0,
"valid_targets_min": 1644
},
{
"epoch": 2.601851851851852,
"grad_norm": 0.4807452208290434,
"learning_rate": 3.1674873444695804e-05,
"loss": 0.25,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18888458609580994,
"step": 1405,
"valid_targets_mean": 3690.0,
"valid_targets_min": 381
},
{
"epoch": 2.611111111111111,
"grad_norm": 0.5482835702850894,
"learning_rate": 3.15997704706633e-05,
"loss": 0.2789,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2206830382347107,
"step": 1410,
"valid_targets_mean": 3195.4,
"valid_targets_min": 1566
},
{
"epoch": 2.6203703703703702,
"grad_norm": 0.4939467467540788,
"learning_rate": 3.152442019911161e-05,
"loss": 0.2773,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2876463532447815,
"step": 1415,
"valid_targets_mean": 4438.0,
"valid_targets_min": 2557
},
{
"epoch": 2.6296296296296298,
"grad_norm": 0.563321666654457,
"learning_rate": 3.144882423644623e-05,
"loss": 0.2805,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2687886357307434,
"step": 1420,
"valid_targets_mean": 3560.2,
"valid_targets_min": 506
},
{
"epoch": 2.638888888888889,
"grad_norm": 0.4771594676809964,
"learning_rate": 3.1372984194310614e-05,
"loss": 0.283,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2669658064842224,
"step": 1425,
"valid_targets_mean": 4496.8,
"valid_targets_min": 1034
},
{
"epoch": 2.648148148148148,
"grad_norm": 0.5701553331661079,
"learning_rate": 3.1296901689551766e-05,
"loss": 0.2669,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2383461445569992,
"step": 1430,
"valid_targets_mean": 3213.2,
"valid_targets_min": 479
},
{
"epoch": 2.6574074074074074,
"grad_norm": 0.4996052044828777,
"learning_rate": 3.122057834418582e-05,
"loss": 0.2585,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22803762555122375,
"step": 1435,
"valid_targets_mean": 4339.8,
"valid_targets_min": 1876
},
{
"epoch": 2.6666666666666665,
"grad_norm": 0.4762272371218332,
"learning_rate": 3.1144015785363405e-05,
"loss": 0.2534,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25022315979003906,
"step": 1440,
"valid_targets_mean": 4088.6,
"valid_targets_min": 1840
},
{
"epoch": 2.675925925925926,
"grad_norm": 0.5320018695023948,
"learning_rate": 3.1067215645335e-05,
"loss": 0.2788,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2924705445766449,
"step": 1445,
"valid_targets_mean": 3703.2,
"valid_targets_min": 1026
},
{
"epoch": 2.685185185185185,
"grad_norm": 0.48946462823360576,
"learning_rate": 3.0990179561416124e-05,
"loss": 0.2475,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24994626641273499,
"step": 1450,
"valid_targets_mean": 4758.8,
"valid_targets_min": 2745
},
{
"epoch": 2.6944444444444446,
"grad_norm": 0.5536914814420201,
"learning_rate": 3.0912909175952404e-05,
"loss": 0.2802,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28184986114501953,
"step": 1455,
"valid_targets_mean": 4326.1,
"valid_targets_min": 2025
},
{
"epoch": 2.7037037037037037,
"grad_norm": 0.5678388519721984,
"learning_rate": 3.08354061362846e-05,
"loss": 0.2707,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2809412181377411,
"step": 1460,
"valid_targets_mean": 3676.1,
"valid_targets_min": 2257
},
{
"epoch": 2.712962962962963,
"grad_norm": 0.5566129052217905,
"learning_rate": 3.075767209471345e-05,
"loss": 0.2774,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2666032314300537,
"step": 1465,
"valid_targets_mean": 3426.1,
"valid_targets_min": 1705
},
{
"epoch": 2.7222222222222223,
"grad_norm": 0.6101327463343764,
"learning_rate": 3.06797087084645e-05,
"loss": 0.3039,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28413182497024536,
"step": 1470,
"valid_targets_mean": 3345.7,
"valid_targets_min": 231
},
{
"epoch": 2.7314814814814814,
"grad_norm": 0.4725939229507694,
"learning_rate": 3.060151763965267e-05,
"loss": 0.2732,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21915774047374725,
"step": 1475,
"valid_targets_mean": 3618.2,
"valid_targets_min": 755
},
{
"epoch": 2.7407407407407405,
"grad_norm": 0.4395930184494261,
"learning_rate": 3.052310055524696e-05,
"loss": 0.2566,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2695322036743164,
"step": 1480,
"valid_targets_mean": 4727.2,
"valid_targets_min": 1598
},
{
"epoch": 2.75,
"grad_norm": 0.4706905685407168,
"learning_rate": 3.044445912703477e-05,
"loss": 0.2838,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32568082213401794,
"step": 1485,
"valid_targets_mean": 5001.6,
"valid_targets_min": 665
},
{
"epoch": 2.7592592592592595,
"grad_norm": 0.4969657932430498,
"learning_rate": 3.036559503158637e-05,
"loss": 0.2783,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23848196864128113,
"step": 1490,
"valid_targets_mean": 3659.8,
"valid_targets_min": 1529
},
{
"epoch": 2.7685185185185186,
"grad_norm": 0.6292698817704557,
"learning_rate": 3.0286509950219077e-05,
"loss": 0.2523,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26195579767227173,
"step": 1495,
"valid_targets_mean": 3502.5,
"valid_targets_min": 923
},
{
"epoch": 2.7777777777777777,
"grad_norm": 0.5618365463665778,
"learning_rate": 3.020720556896147e-05,
"loss": 0.293,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3372756242752075,
"step": 1500,
"valid_targets_mean": 4311.9,
"valid_targets_min": 812
},
{
"epoch": 2.787037037037037,
"grad_norm": 1.0844578159877276,
"learning_rate": 3.0127683578517418e-05,
"loss": 0.2599,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2966684401035309,
"step": 1505,
"valid_targets_mean": 6157.9,
"valid_targets_min": 871
},
{
"epoch": 2.7962962962962963,
"grad_norm": 0.5031399470373651,
"learning_rate": 3.004794567423002e-05,
"loss": 0.2444,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2614865303039551,
"step": 1510,
"valid_targets_mean": 4778.2,
"valid_targets_min": 584
},
{
"epoch": 2.8055555555555554,
"grad_norm": 0.5041806681514034,
"learning_rate": 2.9967993556045504e-05,
"loss": 0.2303,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21226972341537476,
"step": 1515,
"valid_targets_mean": 5573.9,
"valid_targets_min": 845
},
{
"epoch": 2.814814814814815,
"grad_norm": 0.4997735272169362,
"learning_rate": 2.988782892847694e-05,
"loss": 0.2442,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24511948227882385,
"step": 1520,
"valid_targets_mean": 4693.2,
"valid_targets_min": 886
},
{
"epoch": 2.824074074074074,
"grad_norm": 0.5995647736513411,
"learning_rate": 2.9807453500567937e-05,
"loss": 0.2597,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3068762421607971,
"step": 1525,
"valid_targets_mean": 3450.6,
"valid_targets_min": 930
},
{
"epoch": 2.8333333333333335,
"grad_norm": 0.6054440225826818,
"learning_rate": 2.9726868985856186e-05,
"loss": 0.2232,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21026404201984406,
"step": 1530,
"valid_targets_mean": 3599.7,
"valid_targets_min": 1569
},
{
"epoch": 2.8425925925925926,
"grad_norm": 0.5741889496658287,
"learning_rate": 2.9646077102336933e-05,
"loss": 0.2634,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2920532524585724,
"step": 1535,
"valid_targets_mean": 3774.1,
"valid_targets_min": 317
},
{
"epoch": 2.851851851851852,
"grad_norm": 0.6588428000456348,
"learning_rate": 2.956507957242637e-05,
"loss": 0.2457,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2671510875225067,
"step": 1540,
"valid_targets_mean": 3503.0,
"valid_targets_min": 526
},
{
"epoch": 2.861111111111111,
"grad_norm": 0.4491076760542966,
"learning_rate": 2.9483878122924874e-05,
"loss": 0.2372,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23452128469944,
"step": 1545,
"valid_targets_mean": 5171.2,
"valid_targets_min": 1926
},
{
"epoch": 2.8703703703703702,
"grad_norm": 0.5018537144302465,
"learning_rate": 2.940247448498025e-05,
"loss": 0.2579,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25421342253685,
"step": 1550,
"valid_targets_mean": 4786.2,
"valid_targets_min": 629
},
{
"epoch": 2.8796296296296298,
"grad_norm": 0.5716877920658499,
"learning_rate": 2.9320870394050783e-05,
"loss": 0.2639,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2664939761161804,
"step": 1555,
"valid_targets_mean": 3561.0,
"valid_targets_min": 949
},
{
"epoch": 2.888888888888889,
"grad_norm": 0.481235284252179,
"learning_rate": 2.9239067589868228e-05,
"loss": 0.2721,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2406879961490631,
"step": 1560,
"valid_targets_mean": 4930.3,
"valid_targets_min": 491
},
{
"epoch": 2.898148148148148,
"grad_norm": 0.5819300944807342,
"learning_rate": 2.9157067816400765e-05,
"loss": 0.2586,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27452024817466736,
"step": 1565,
"valid_targets_mean": 3573.7,
"valid_targets_min": 2118
},
{
"epoch": 2.9074074074074074,
"grad_norm": 0.5048566931560609,
"learning_rate": 2.90748728218158e-05,
"loss": 0.2327,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25580722093582153,
"step": 1570,
"valid_targets_mean": 4897.8,
"valid_targets_min": 970
},
{
"epoch": 2.9166666666666665,
"grad_norm": 0.49150215780244894,
"learning_rate": 2.8992484358442673e-05,
"loss": 0.2562,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2693978548049927,
"step": 1575,
"valid_targets_mean": 4487.1,
"valid_targets_min": 1709
},
{
"epoch": 2.925925925925926,
"grad_norm": 0.5708812131347851,
"learning_rate": 2.8909904182735337e-05,
"loss": 0.2655,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2671920955181122,
"step": 1580,
"valid_targets_mean": 3580.7,
"valid_targets_min": 894
},
{
"epoch": 2.935185185185185,
"grad_norm": 0.5601556537304803,
"learning_rate": 2.8827134055234883e-05,
"loss": 0.2579,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26261967420578003,
"step": 1585,
"valid_targets_mean": 3402.6,
"valid_targets_min": 698
},
{
"epoch": 2.9444444444444446,
"grad_norm": 0.5319230530321215,
"learning_rate": 2.874417574053202e-05,
"loss": 0.2536,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3018246591091156,
"step": 1590,
"valid_targets_mean": 4457.2,
"valid_targets_min": 194
},
{
"epoch": 2.9537037037037037,
"grad_norm": 0.6399002771658464,
"learning_rate": 2.8661031007229443e-05,
"loss": 0.2392,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27771127223968506,
"step": 1595,
"valid_targets_mean": 3044.6,
"valid_targets_min": 736
},
{
"epoch": 2.962962962962963,
"grad_norm": 0.5987506953116827,
"learning_rate": 2.857770162790416e-05,
"loss": 0.2548,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2965382933616638,
"step": 1600,
"valid_targets_mean": 3597.6,
"valid_targets_min": 2209
},
{
"epoch": 2.9722222222222223,
"grad_norm": 0.6380121418721786,
"learning_rate": 2.8494189379069662e-05,
"loss": 0.2507,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2618336081504822,
"step": 1605,
"valid_targets_mean": 2886.4,
"valid_targets_min": 275
},
{
"epoch": 2.9814814814814814,
"grad_norm": 0.5586710883725614,
"learning_rate": 2.8410496041138067e-05,
"loss": 0.2811,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2990414500236511,
"step": 1610,
"valid_targets_mean": 4474.1,
"valid_targets_min": 1026
},
{
"epoch": 2.9907407407407405,
"grad_norm": 0.46644132738739846,
"learning_rate": 2.8326623398382174e-05,
"loss": 0.2564,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24458184838294983,
"step": 1615,
"valid_targets_mean": 4791.1,
"valid_targets_min": 1838
},
{
"epoch": 3.0,
"grad_norm": 0.4761359461180495,
"learning_rate": 2.8242573238897395e-05,
"loss": 0.2636,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24625471234321594,
"step": 1620,
"valid_targets_mean": 4855.9,
"valid_targets_min": 778
},
{
"epoch": 3.009259259259259,
"grad_norm": 0.5607956275569781,
"learning_rate": 2.815834735456367e-05,
"loss": 0.2667,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2913566827774048,
"step": 1625,
"valid_targets_mean": 3792.2,
"valid_targets_min": 1421
},
{
"epoch": 3.0185185185185186,
"grad_norm": 0.5200885636493907,
"learning_rate": 2.8073947541007246e-05,
"loss": 0.2864,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2609402537345886,
"step": 1630,
"valid_targets_mean": 4228.2,
"valid_targets_min": 1388
},
{
"epoch": 3.0277777777777777,
"grad_norm": 0.5464229397916962,
"learning_rate": 2.7989375597562386e-05,
"loss": 0.253,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.287320077419281,
"step": 1635,
"valid_targets_mean": 3905.6,
"valid_targets_min": 1626
},
{
"epoch": 3.037037037037037,
"grad_norm": 0.48953693297086637,
"learning_rate": 2.7904633327233016e-05,
"loss": 0.2464,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23558908700942993,
"step": 1640,
"valid_targets_mean": 4453.9,
"valid_targets_min": 1973
},
{
"epoch": 3.0462962962962963,
"grad_norm": 0.5636981651244154,
"learning_rate": 2.781972253665431e-05,
"loss": 0.2445,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22617077827453613,
"step": 1645,
"valid_targets_mean": 4146.0,
"valid_targets_min": 1473
},
{
"epoch": 3.0555555555555554,
"grad_norm": 0.5346016865896094,
"learning_rate": 2.773464503605414e-05,
"loss": 0.2623,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20911234617233276,
"step": 1650,
"valid_targets_mean": 3475.5,
"valid_targets_min": 1771
},
{
"epoch": 3.064814814814815,
"grad_norm": 0.537999050036353,
"learning_rate": 2.764940263921451e-05,
"loss": 0.2687,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3136928081512451,
"step": 1655,
"valid_targets_mean": 4361.2,
"valid_targets_min": 724
},
{
"epoch": 3.074074074074074,
"grad_norm": 0.5332117915430361,
"learning_rate": 2.7563997163432853e-05,
"loss": 0.2621,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27018648386001587,
"step": 1660,
"valid_targets_mean": 4647.1,
"valid_targets_min": 1878
},
{
"epoch": 3.0833333333333335,
"grad_norm": 0.5252502798205276,
"learning_rate": 2.7478430429483336e-05,
"loss": 0.2662,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2500057518482208,
"step": 1665,
"valid_targets_mean": 4244.9,
"valid_targets_min": 2221
},
{
"epoch": 3.0925925925925926,
"grad_norm": 0.9080479015306887,
"learning_rate": 2.7392704261578e-05,
"loss": 0.2565,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24066869914531708,
"step": 1670,
"valid_targets_mean": 4786.5,
"valid_targets_min": 1795
},
{
"epoch": 3.1018518518518516,
"grad_norm": 0.5710663215003279,
"learning_rate": 2.7306820487327906e-05,
"loss": 0.2847,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.36352789402008057,
"step": 1675,
"valid_targets_mean": 4979.2,
"valid_targets_min": 561
},
{
"epoch": 3.111111111111111,
"grad_norm": 0.5397905809288394,
"learning_rate": 2.7220780937704118e-05,
"loss": 0.2614,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3355807065963745,
"step": 1680,
"valid_targets_mean": 4687.1,
"valid_targets_min": 582
},
{
"epoch": 3.1203703703703702,
"grad_norm": 0.5719381517621509,
"learning_rate": 2.713458744699873e-05,
"loss": 0.2488,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27028560638427734,
"step": 1685,
"valid_targets_mean": 3927.9,
"valid_targets_min": 735
},
{
"epoch": 3.1296296296296298,
"grad_norm": 0.5488071017786058,
"learning_rate": 2.704824185278573e-05,
"loss": 0.2747,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2780161201953888,
"step": 1690,
"valid_targets_mean": 4392.5,
"valid_targets_min": 275
},
{
"epoch": 3.138888888888889,
"grad_norm": 0.560933973633571,
"learning_rate": 2.6961745995881813e-05,
"loss": 0.2738,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2935350835323334,
"step": 1695,
"valid_targets_mean": 3781.2,
"valid_targets_min": 812
},
{
"epoch": 3.148148148148148,
"grad_norm": 0.5153173604123659,
"learning_rate": 2.6875101720307168e-05,
"loss": 0.2458,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22335903346538544,
"step": 1700,
"valid_targets_mean": 3719.2,
"valid_targets_min": 322
},
{
"epoch": 3.1574074074074074,
"grad_norm": 0.529200809615876,
"learning_rate": 2.6788310873246133e-05,
"loss": 0.2554,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.321768194437027,
"step": 1705,
"valid_targets_mean": 4739.2,
"valid_targets_min": 1880
},
{
"epoch": 3.1666666666666665,
"grad_norm": 0.5929542752441085,
"learning_rate": 2.670137530500783e-05,
"loss": 0.2336,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23372915387153625,
"step": 1710,
"valid_targets_mean": 3828.8,
"valid_targets_min": 838
},
{
"epoch": 3.175925925925926,
"grad_norm": 0.5904824673475364,
"learning_rate": 2.661429686898673e-05,
"loss": 0.2494,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25988543033599854,
"step": 1715,
"valid_targets_mean": 3370.4,
"valid_targets_min": 906
},
{
"epoch": 3.185185185185185,
"grad_norm": 0.5654133383351743,
"learning_rate": 2.6527077421623117e-05,
"loss": 0.2585,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24876439571380615,
"step": 1720,
"valid_targets_mean": 3427.9,
"valid_targets_min": 545
},
{
"epoch": 3.1944444444444446,
"grad_norm": 0.6120405362758927,
"learning_rate": 2.6439718822363515e-05,
"loss": 0.25,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23591837286949158,
"step": 1725,
"valid_targets_mean": 3958.1,
"valid_targets_min": 1876
},
{
"epoch": 3.2037037037037037,
"grad_norm": 0.5650864325109225,
"learning_rate": 2.6352222933621065e-05,
"loss": 0.2556,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2792157828807831,
"step": 1730,
"valid_targets_mean": 4003.8,
"valid_targets_min": 845
},
{
"epoch": 3.212962962962963,
"grad_norm": 0.5560665906528047,
"learning_rate": 2.62645916207358e-05,
"loss": 0.2376,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22258630394935608,
"step": 1735,
"valid_targets_mean": 3628.8,
"valid_targets_min": 2004
},
{
"epoch": 3.2222222222222223,
"grad_norm": 0.47392152791291015,
"learning_rate": 2.6176826751934882e-05,
"loss": 0.2392,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23260678350925446,
"step": 1740,
"valid_targets_mean": 4254.6,
"valid_targets_min": 2036
},
{
"epoch": 3.2314814814814814,
"grad_norm": 0.547370940890995,
"learning_rate": 2.6088930198292773e-05,
"loss": 0.2568,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3267350196838379,
"step": 1745,
"valid_targets_mean": 5440.8,
"valid_targets_min": 587
},
{
"epoch": 3.240740740740741,
"grad_norm": 0.6207094582404148,
"learning_rate": 2.600090383369135e-05,
"loss": 0.2704,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2993205189704895,
"step": 1750,
"valid_targets_mean": 3938.6,
"valid_targets_min": 471
},
{
"epoch": 3.25,
"grad_norm": 0.5392432660293104,
"learning_rate": 2.5912749534779958e-05,
"loss": 0.2636,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.261831134557724,
"step": 1755,
"valid_targets_mean": 4243.8,
"valid_targets_min": 1809
},
{
"epoch": 3.259259259259259,
"grad_norm": 0.5976629323136308,
"learning_rate": 2.5824469180935377e-05,
"loss": 0.2595,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25948113203048706,
"step": 1760,
"valid_targets_mean": 3828.1,
"valid_targets_min": 2051
},
{
"epoch": 3.2685185185185186,
"grad_norm": 0.5616357127195194,
"learning_rate": 2.5736064654221808e-05,
"loss": 0.2609,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2716560959815979,
"step": 1765,
"valid_targets_mean": 4082.7,
"valid_targets_min": 661
},
{
"epoch": 3.2777777777777777,
"grad_norm": 0.5515421060133198,
"learning_rate": 2.564753783935068e-05,
"loss": 0.2647,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2995178699493408,
"step": 1770,
"valid_targets_mean": 4297.5,
"valid_targets_min": 773
},
{
"epoch": 3.287037037037037,
"grad_norm": 0.5442897232307273,
"learning_rate": 2.5558890623640513e-05,
"loss": 0.2822,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.33121222257614136,
"step": 1775,
"valid_targets_mean": 4387.3,
"valid_targets_min": 638
},
{
"epoch": 3.2962962962962963,
"grad_norm": 0.5440272723905116,
"learning_rate": 2.5470124896976687e-05,
"loss": 0.2722,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26206910610198975,
"step": 1780,
"valid_targets_mean": 3992.6,
"valid_targets_min": 644
},
{
"epoch": 3.3055555555555554,
"grad_norm": 0.5245177761132279,
"learning_rate": 2.538124255177113e-05,
"loss": 0.261,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25537651777267456,
"step": 1785,
"valid_targets_mean": 4090.8,
"valid_targets_min": 421
},
{
"epoch": 3.314814814814815,
"grad_norm": 0.4978922342919577,
"learning_rate": 2.5292245482921982e-05,
"loss": 0.2662,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24825258553028107,
"step": 1790,
"valid_targets_mean": 4364.4,
"valid_targets_min": 1736
},
{
"epoch": 3.324074074074074,
"grad_norm": 0.5413131194299406,
"learning_rate": 2.5203135587773196e-05,
"loss": 0.2402,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28795957565307617,
"step": 1795,
"valid_targets_mean": 4514.4,
"valid_targets_min": 1916
},
{
"epoch": 3.3333333333333335,
"grad_norm": 0.48381333303689816,
"learning_rate": 2.5113914766074075e-05,
"loss": 0.247,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29423463344573975,
"step": 1800,
"valid_targets_mean": 5073.3,
"valid_targets_min": 887
},
{
"epoch": 3.3425925925925926,
"grad_norm": 0.5540751734364149,
"learning_rate": 2.5024584919938805e-05,
"loss": 0.2744,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30663758516311646,
"step": 1805,
"valid_targets_mean": 3852.0,
"valid_targets_min": 783
},
{
"epoch": 3.351851851851852,
"grad_norm": 0.4875767498216755,
"learning_rate": 2.493514795380587e-05,
"loss": 0.2352,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23132456839084625,
"step": 1810,
"valid_targets_mean": 4474.8,
"valid_targets_min": 2422
},
{
"epoch": 3.361111111111111,
"grad_norm": 0.562685263202997,
"learning_rate": 2.4845605774397482e-05,
"loss": 0.2433,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2605576515197754,
"step": 1815,
"valid_targets_mean": 3649.8,
"valid_targets_min": 1813
},
{
"epoch": 3.3703703703703702,
"grad_norm": 0.5971247612228009,
"learning_rate": 2.4755960290678884e-05,
"loss": 0.2683,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2765813171863556,
"step": 1820,
"valid_targets_mean": 3091.1,
"valid_targets_min": 592
},
{
"epoch": 3.3796296296296298,
"grad_norm": 0.5196767315771833,
"learning_rate": 2.4666213413817696e-05,
"loss": 0.2425,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22254908084869385,
"step": 1825,
"valid_targets_mean": 3801.4,
"valid_targets_min": 747
},
{
"epoch": 3.388888888888889,
"grad_norm": 0.5121874679965963,
"learning_rate": 2.4576367057143167e-05,
"loss": 0.2679,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2372555434703827,
"step": 1830,
"valid_targets_mean": 4133.1,
"valid_targets_min": 1882
},
{
"epoch": 3.398148148148148,
"grad_norm": 0.5020669363013955,
"learning_rate": 2.4486423136105356e-05,
"loss": 0.2584,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25579991936683655,
"step": 1835,
"valid_targets_mean": 4919.1,
"valid_targets_min": 1901
},
{
"epoch": 3.4074074074074074,
"grad_norm": 0.5441744337458053,
"learning_rate": 2.4396383568234322e-05,
"loss": 0.2472,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2616913914680481,
"step": 1840,
"valid_targets_mean": 4022.9,
"valid_targets_min": 721
},
{
"epoch": 3.4166666666666665,
"grad_norm": 0.5285349190964163,
"learning_rate": 2.4306250273099236e-05,
"loss": 0.239,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27835655212402344,
"step": 1845,
"valid_targets_mean": 3834.8,
"valid_targets_min": 1887
},
{
"epoch": 3.425925925925926,
"grad_norm": 0.46370125956202335,
"learning_rate": 2.421602517226745e-05,
"loss": 0.2557,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21514523029327393,
"step": 1850,
"valid_targets_mean": 4822.7,
"valid_targets_min": 2356
},
{
"epoch": 3.435185185185185,
"grad_norm": 0.5045309429602557,
"learning_rate": 2.4125710189263555e-05,
"loss": 0.2597,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24583828449249268,
"step": 1855,
"valid_targets_mean": 4282.2,
"valid_targets_min": 1668
},
{
"epoch": 3.4444444444444446,
"grad_norm": 0.5794459679981911,
"learning_rate": 2.4035307249528326e-05,
"loss": 0.2631,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2770538926124573,
"step": 1860,
"valid_targets_mean": 4194.9,
"valid_targets_min": 1906
},
{
"epoch": 3.4537037037037037,
"grad_norm": 0.5757849492309214,
"learning_rate": 2.3944818280377732e-05,
"loss": 0.2756,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2549267113208771,
"step": 1865,
"valid_targets_mean": 3418.6,
"valid_targets_min": 1267
},
{
"epoch": 3.462962962962963,
"grad_norm": 0.5787001079526624,
"learning_rate": 2.3854245210961798e-05,
"loss": 0.2648,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2735787332057953,
"step": 1870,
"valid_targets_mean": 3282.9,
"valid_targets_min": 327
},
{
"epoch": 3.4722222222222223,
"grad_norm": 0.6603451083242975,
"learning_rate": 2.376358997222351e-05,
"loss": 0.2766,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30318760871887207,
"step": 1875,
"valid_targets_mean": 2959.7,
"valid_targets_min": 580
},
{
"epoch": 3.4814814814814814,
"grad_norm": 0.5439007943417832,
"learning_rate": 2.367285449685763e-05,
"loss": 0.26,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.319497674703598,
"step": 1880,
"valid_targets_mean": 4390.6,
"valid_targets_min": 1345
},
{
"epoch": 3.490740740740741,
"grad_norm": 0.47502055786608866,
"learning_rate": 2.3582040719269504e-05,
"loss": 0.2546,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2526184916496277,
"step": 1885,
"valid_targets_mean": 5038.2,
"valid_targets_min": 1510
},
{
"epoch": 3.5,
"grad_norm": 0.5452928944856862,
"learning_rate": 2.3491150575533808e-05,
"loss": 0.252,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23368355631828308,
"step": 1890,
"valid_targets_mean": 3530.3,
"valid_targets_min": 605
},
{
"epoch": 3.5092592592592595,
"grad_norm": 0.6073760697025231,
"learning_rate": 2.340018600335328e-05,
"loss": 0.2521,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2643791437149048,
"step": 1895,
"valid_targets_mean": 4982.6,
"valid_targets_min": 2220
},
{
"epoch": 3.5185185185185186,
"grad_norm": 0.5596007727722335,
"learning_rate": 2.3309148942017424e-05,
"loss": 0.2583,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.276612251996994,
"step": 1900,
"valid_targets_mean": 3690.1,
"valid_targets_min": 572
},
{
"epoch": 3.5277777777777777,
"grad_norm": 0.521852302471038,
"learning_rate": 2.321804133236115e-05,
"loss": 0.2387,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25015997886657715,
"step": 1905,
"valid_targets_mean": 3807.7,
"valid_targets_min": 1461
},
{
"epoch": 3.537037037037037,
"grad_norm": 0.5202676505674712,
"learning_rate": 2.312686511672338e-05,
"loss": 0.2731,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2852502465248108,
"step": 1910,
"valid_targets_mean": 4486.8,
"valid_targets_min": 1530
},
{
"epoch": 3.5462962962962963,
"grad_norm": 0.49067308052473124,
"learning_rate": 2.3035622238905694e-05,
"loss": 0.2382,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2287035584449768,
"step": 1915,
"valid_targets_mean": 4143.6,
"valid_targets_min": 1655
},
{
"epoch": 3.5555555555555554,
"grad_norm": 0.5274253125213653,
"learning_rate": 2.2944314644130814e-05,
"loss": 0.2529,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27749884128570557,
"step": 1920,
"valid_targets_mean": 4553.0,
"valid_targets_min": 781
},
{
"epoch": 3.564814814814815,
"grad_norm": 0.5688144249989912,
"learning_rate": 2.2852944279001207e-05,
"loss": 0.2674,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3269811272621155,
"step": 1925,
"valid_targets_mean": 4159.2,
"valid_targets_min": 888
},
{
"epoch": 3.574074074074074,
"grad_norm": 0.5041147570894117,
"learning_rate": 2.2761513091457537e-05,
"loss": 0.2543,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2584291100502014,
"step": 1930,
"valid_targets_mean": 4408.8,
"valid_targets_min": 1757
},
{
"epoch": 3.5833333333333335,
"grad_norm": 0.5046219032639738,
"learning_rate": 2.2670023030737153e-05,
"loss": 0.2767,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2301914095878601,
"step": 1935,
"valid_targets_mean": 4570.9,
"valid_targets_min": 1585
},
{
"epoch": 3.5925925925925926,
"grad_norm": 0.5097422302633462,
"learning_rate": 2.2578476047332535e-05,
"loss": 0.2441,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23207487165927887,
"step": 1940,
"valid_targets_mean": 3510.6,
"valid_targets_min": 1621
},
{
"epoch": 3.601851851851852,
"grad_norm": 0.5502757038033133,
"learning_rate": 2.2486874092949708e-05,
"loss": 0.2565,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24923279881477356,
"step": 1945,
"valid_targets_mean": 3629.6,
"valid_targets_min": 1696
},
{
"epoch": 3.611111111111111,
"grad_norm": 0.5493878283106496,
"learning_rate": 2.2395219120466622e-05,
"loss": 0.2409,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24204868078231812,
"step": 1950,
"valid_targets_mean": 3706.9,
"valid_targets_min": 698
},
{
"epoch": 3.6203703703703702,
"grad_norm": 0.5626462939570609,
"learning_rate": 2.2303513083891542e-05,
"loss": 0.2474,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2548343539237976,
"step": 1955,
"valid_targets_mean": 3551.1,
"valid_targets_min": 594
},
{
"epoch": 3.6296296296296298,
"grad_norm": 0.47203755513693735,
"learning_rate": 2.2211757938321373e-05,
"loss": 0.2377,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20937591791152954,
"step": 1960,
"valid_targets_mean": 4418.7,
"valid_targets_min": 828
},
{
"epoch": 3.638888888888889,
"grad_norm": 0.5005343443672163,
"learning_rate": 2.2119955639899983e-05,
"loss": 0.2933,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21190600097179413,
"step": 1965,
"valid_targets_mean": 4131.6,
"valid_targets_min": 526
},
{
"epoch": 3.648148148148148,
"grad_norm": 0.5062414681733032,
"learning_rate": 2.20281081457765e-05,
"loss": 0.2591,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24952177703380585,
"step": 1970,
"valid_targets_mean": 4448.9,
"valid_targets_min": 1588
},
{
"epoch": 3.6574074074074074,
"grad_norm": 0.5166078661849807,
"learning_rate": 2.1936217414063584e-05,
"loss": 0.2787,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29824504256248474,
"step": 1975,
"valid_targets_mean": 4616.9,
"valid_targets_min": 1758
},
{
"epoch": 3.6666666666666665,
"grad_norm": 0.5036298018276866,
"learning_rate": 2.184428540379569e-05,
"loss": 0.2517,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21241354942321777,
"step": 1980,
"valid_targets_mean": 3836.9,
"valid_targets_min": 567
},
{
"epoch": 3.675925925925926,
"grad_norm": 0.5263922576220329,
"learning_rate": 2.1752314074887287e-05,
"loss": 0.2602,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21682290732860565,
"step": 1985,
"valid_targets_mean": 3577.4,
"valid_targets_min": 892
},
{
"epoch": 3.685185185185185,
"grad_norm": 0.5504297129810796,
"learning_rate": 2.1660305388091106e-05,
"loss": 0.2259,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22139625251293182,
"step": 1990,
"valid_targets_mean": 3110.8,
"valid_targets_min": 479
},
{
"epoch": 3.6944444444444446,
"grad_norm": 0.49325575866624627,
"learning_rate": 2.1568261304956298e-05,
"loss": 0.2507,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23789441585540771,
"step": 1995,
"valid_targets_mean": 4232.6,
"valid_targets_min": 680
},
{
"epoch": 3.7037037037037037,
"grad_norm": 0.5807219455547323,
"learning_rate": 2.1476183787786638e-05,
"loss": 0.2639,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26028937101364136,
"step": 2000,
"valid_targets_mean": 3360.9,
"valid_targets_min": 528
},
{
"epoch": 3.712962962962963,
"grad_norm": 0.5255254634433139,
"learning_rate": 2.138407479959869e-05,
"loss": 0.2841,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24893628060817719,
"step": 2005,
"valid_targets_mean": 3761.6,
"valid_targets_min": 2356
},
{
"epoch": 3.7222222222222223,
"grad_norm": 0.4773408030946498,
"learning_rate": 2.129193630407996e-05,
"loss": 0.2352,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22808077931404114,
"step": 2010,
"valid_targets_mean": 4793.6,
"valid_targets_min": 2178
},
{
"epoch": 3.7314814814814814,
"grad_norm": 0.4500006448054135,
"learning_rate": 2.119977026554701e-05,
"loss": 0.2447,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2292492389678955,
"step": 2015,
"valid_targets_mean": 5103.6,
"valid_targets_min": 647
},
{
"epoch": 3.7407407407407405,
"grad_norm": 0.6473593402015339,
"learning_rate": 2.1107578648903614e-05,
"loss": 0.2476,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2941989600658417,
"step": 2020,
"valid_targets_mean": 3061.2,
"valid_targets_min": 722
},
{
"epoch": 3.75,
"grad_norm": 0.5203301574671931,
"learning_rate": 2.1015363419598835e-05,
"loss": 0.2355,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2489463984966278,
"step": 2025,
"valid_targets_mean": 4192.7,
"valid_targets_min": 404
},
{
"epoch": 3.7592592592592595,
"grad_norm": 0.5362575203104003,
"learning_rate": 2.0923126543585156e-05,
"loss": 0.2558,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.270350843667984,
"step": 2030,
"valid_targets_mean": 3732.0,
"valid_targets_min": 420
},
{
"epoch": 3.7685185185185186,
"grad_norm": 0.499224592037341,
"learning_rate": 2.0830869987276537e-05,
"loss": 0.2512,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3091796040534973,
"step": 2035,
"valid_targets_mean": 5031.6,
"valid_targets_min": 1046
},
{
"epoch": 3.7777777777777777,
"grad_norm": 0.5250725966197842,
"learning_rate": 2.0738595717506496e-05,
"loss": 0.2561,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22418762743473053,
"step": 2040,
"valid_targets_mean": 3963.0,
"valid_targets_min": 1275
},
{
"epoch": 3.787037037037037,
"grad_norm": 0.5742892201930184,
"learning_rate": 2.0646305701486215e-05,
"loss": 0.2477,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24486325681209564,
"step": 2045,
"valid_targets_mean": 2970.0,
"valid_targets_min": 720
},
{
"epoch": 3.7962962962962963,
"grad_norm": 0.5131608808905314,
"learning_rate": 2.055400190676255e-05,
"loss": 0.2405,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24303856492042542,
"step": 2050,
"valid_targets_mean": 4145.8,
"valid_targets_min": 2285
},
{
"epoch": 3.8055555555555554,
"grad_norm": 0.5052923454962346,
"learning_rate": 2.046168630117612e-05,
"loss": 0.2662,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3247288167476654,
"step": 2055,
"valid_targets_mean": 4734.6,
"valid_targets_min": 2031
},
{
"epoch": 3.814814814814815,
"grad_norm": 0.610872868205616,
"learning_rate": 2.0369360852819327e-05,
"loss": 0.2513,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2507988214492798,
"step": 2060,
"valid_targets_mean": 3605.2,
"valid_targets_min": 523
},
{
"epoch": 3.824074074074074,
"grad_norm": 0.5723992923066944,
"learning_rate": 2.027702752999444e-05,
"loss": 0.2675,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26201534271240234,
"step": 2065,
"valid_targets_mean": 4353.8,
"valid_targets_min": 2454
},
{
"epoch": 3.8333333333333335,
"grad_norm": 0.5270133124002951,
"learning_rate": 2.0184688301171567e-05,
"loss": 0.2682,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2589871883392334,
"step": 2070,
"valid_targets_mean": 3676.9,
"valid_targets_min": 573
},
{
"epoch": 3.8425925925925926,
"grad_norm": 0.5139405722726906,
"learning_rate": 2.009234513494676e-05,
"loss": 0.284,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24153248965740204,
"step": 2075,
"valid_targets_mean": 4124.9,
"valid_targets_min": 663
},
{
"epoch": 3.851851851851852,
"grad_norm": 0.48652457260517906,
"learning_rate": 2e-05,
"loss": 0.2613,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24923667311668396,
"step": 2080,
"valid_targets_mean": 4446.2,
"valid_targets_min": 2380
},
{
"epoch": 3.861111111111111,
"grad_norm": 0.5365239749592436,
"learning_rate": 1.9907654865053248e-05,
"loss": 0.2691,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3025137782096863,
"step": 2085,
"valid_targets_mean": 4364.4,
"valid_targets_min": 2177
},
{
"epoch": 3.8703703703703702,
"grad_norm": 0.5487362203225881,
"learning_rate": 1.981531169882844e-05,
"loss": 0.2857,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2569238543510437,
"step": 2090,
"valid_targets_mean": 3598.1,
"valid_targets_min": 709
},
{
"epoch": 3.8796296296296298,
"grad_norm": 0.4846676225451064,
"learning_rate": 1.9722972470005573e-05,
"loss": 0.2412,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21618637442588806,
"step": 2095,
"valid_targets_mean": 4546.6,
"valid_targets_min": 2362
},
{
"epoch": 3.888888888888889,
"grad_norm": 0.5026122833039225,
"learning_rate": 1.9630639147180673e-05,
"loss": 0.2907,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31693074107170105,
"step": 2100,
"valid_targets_mean": 4688.2,
"valid_targets_min": 758
},
{
"epoch": 3.898148148148148,
"grad_norm": 0.4155051368967143,
"learning_rate": 1.9538313698823887e-05,
"loss": 0.2372,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20872560143470764,
"step": 2105,
"valid_targets_mean": 5315.1,
"valid_targets_min": 513
},
{
"epoch": 3.9074074074074074,
"grad_norm": 0.5053201320989432,
"learning_rate": 1.944599809323745e-05,
"loss": 0.2684,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23215201497077942,
"step": 2110,
"valid_targets_mean": 4019.3,
"valid_targets_min": 1946
},
{
"epoch": 3.9166666666666665,
"grad_norm": 0.546899589555972,
"learning_rate": 1.935369429851379e-05,
"loss": 0.27,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3266640901565552,
"step": 2115,
"valid_targets_mean": 4348.4,
"valid_targets_min": 2310
},
{
"epoch": 3.925925925925926,
"grad_norm": 0.5637484671729249,
"learning_rate": 1.926140428249351e-05,
"loss": 0.264,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31784260272979736,
"step": 2120,
"valid_targets_mean": 3883.6,
"valid_targets_min": 466
},
{
"epoch": 3.935185185185185,
"grad_norm": 0.42601754727230023,
"learning_rate": 1.916913001272347e-05,
"loss": 0.2422,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25121498107910156,
"step": 2125,
"valid_targets_mean": 5457.9,
"valid_targets_min": 2575
},
{
"epoch": 3.9444444444444446,
"grad_norm": 0.45355069808084864,
"learning_rate": 1.907687345641485e-05,
"loss": 0.2587,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22492194175720215,
"step": 2130,
"valid_targets_mean": 4995.0,
"valid_targets_min": 2156
},
{
"epoch": 3.9537037037037037,
"grad_norm": 0.4966703717024659,
"learning_rate": 1.8984636580401165e-05,
"loss": 0.2529,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22575929760932922,
"step": 2135,
"valid_targets_mean": 3822.2,
"valid_targets_min": 445
},
{
"epoch": 3.962962962962963,
"grad_norm": 0.4401478192705303,
"learning_rate": 1.8892421351096393e-05,
"loss": 0.2488,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25159192085266113,
"step": 2140,
"valid_targets_mean": 5782.5,
"valid_targets_min": 1416
},
{
"epoch": 3.9722222222222223,
"grad_norm": 0.5363034129883026,
"learning_rate": 1.8800229734452998e-05,
"loss": 0.2428,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20497846603393555,
"step": 2145,
"valid_targets_mean": 3910.8,
"valid_targets_min": 1582
},
{
"epoch": 3.9814814814814814,
"grad_norm": 0.4949167294147326,
"learning_rate": 1.8708063695920047e-05,
"loss": 0.2534,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2380611002445221,
"step": 2150,
"valid_targets_mean": 4586.9,
"valid_targets_min": 1010
},
{
"epoch": 3.9907407407407405,
"grad_norm": 0.4867568096304702,
"learning_rate": 1.8615925200401318e-05,
"loss": 0.2722,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2899520993232727,
"step": 2155,
"valid_targets_mean": 5028.2,
"valid_targets_min": 2141
},
{
"epoch": 4.0,
"grad_norm": 0.48512349657520354,
"learning_rate": 1.852381621221337e-05,
"loss": 0.2603,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25779369473457336,
"step": 2160,
"valid_targets_mean": 4514.5,
"valid_targets_min": 2124
},
{
"epoch": 4.0092592592592595,
"grad_norm": 0.5430547707519348,
"learning_rate": 1.843173869504371e-05,
"loss": 0.2365,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2733425498008728,
"step": 2165,
"valid_targets_mean": 3821.5,
"valid_targets_min": 675
},
{
"epoch": 4.018518518518518,
"grad_norm": 0.4760491658593785,
"learning_rate": 1.8339694611908897e-05,
"loss": 0.2444,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2561805248260498,
"step": 2170,
"valid_targets_mean": 5306.1,
"valid_targets_min": 562
},
{
"epoch": 4.027777777777778,
"grad_norm": 0.48697889807764705,
"learning_rate": 1.8247685925112716e-05,
"loss": 0.2059,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1906963586807251,
"step": 2175,
"valid_targets_mean": 4063.8,
"valid_targets_min": 512
},
{
"epoch": 4.037037037037037,
"grad_norm": 0.5970157683288126,
"learning_rate": 1.8155714596204318e-05,
"loss": 0.2474,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2847557067871094,
"step": 2180,
"valid_targets_mean": 3594.9,
"valid_targets_min": 2094
},
{
"epoch": 4.046296296296297,
"grad_norm": 0.5557943898925222,
"learning_rate": 1.806378258593642e-05,
"loss": 0.23,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2644827961921692,
"step": 2185,
"valid_targets_mean": 3963.4,
"valid_targets_min": 2251
},
{
"epoch": 4.055555555555555,
"grad_norm": 0.5180284994233548,
"learning_rate": 1.797189185422351e-05,
"loss": 0.236,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2229800969362259,
"step": 2190,
"valid_targets_mean": 3800.6,
"valid_targets_min": 1971
},
{
"epoch": 4.064814814814815,
"grad_norm": 0.5275679350937065,
"learning_rate": 1.788004436010002e-05,
"loss": 0.2671,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2343769669532776,
"step": 2195,
"valid_targets_mean": 3961.6,
"valid_targets_min": 1795
},
{
"epoch": 4.074074074074074,
"grad_norm": 0.5719653033328994,
"learning_rate": 1.778824206167863e-05,
"loss": 0.2655,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2309592068195343,
"step": 2200,
"valid_targets_mean": 3531.6,
"valid_targets_min": 573
},
{
"epoch": 4.083333333333333,
"grad_norm": 0.5204783805453003,
"learning_rate": 1.7696486916108468e-05,
"loss": 0.2389,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2981133460998535,
"step": 2205,
"valid_targets_mean": 4711.4,
"valid_targets_min": 1961
},
{
"epoch": 4.092592592592593,
"grad_norm": 0.5003216687184299,
"learning_rate": 1.7604780879533384e-05,
"loss": 0.2675,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22590869665145874,
"step": 2210,
"valid_targets_mean": 4413.9,
"valid_targets_min": 561
},
{
"epoch": 4.101851851851852,
"grad_norm": 0.49855495979695824,
"learning_rate": 1.7513125907050302e-05,
"loss": 0.2315,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24315370619297028,
"step": 2215,
"valid_targets_mean": 4331.7,
"valid_targets_min": 789
},
{
"epoch": 4.111111111111111,
"grad_norm": 0.5590091213064351,
"learning_rate": 1.742152395266747e-05,
"loss": 0.2377,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28527384996414185,
"step": 2220,
"valid_targets_mean": 4252.1,
"valid_targets_min": 723
},
{
"epoch": 4.12037037037037,
"grad_norm": 0.4968800799735138,
"learning_rate": 1.7329976969262854e-05,
"loss": 0.238,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23040777444839478,
"step": 2225,
"valid_targets_mean": 4997.2,
"valid_targets_min": 338
},
{
"epoch": 4.12962962962963,
"grad_norm": 0.5225715461374966,
"learning_rate": 1.7238486908542463e-05,
"loss": 0.2589,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24196788668632507,
"step": 2230,
"valid_targets_mean": 3969.0,
"valid_targets_min": 317
},
{
"epoch": 4.138888888888889,
"grad_norm": 0.566072039301765,
"learning_rate": 1.71470557209988e-05,
"loss": 0.2505,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2790153920650482,
"step": 2235,
"valid_targets_mean": 3879.8,
"valid_targets_min": 569
},
{
"epoch": 4.148148148148148,
"grad_norm": 0.45951042633304573,
"learning_rate": 1.7055685355869196e-05,
"loss": 0.2284,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2070295810699463,
"step": 2240,
"valid_targets_mean": 4564.6,
"valid_targets_min": 783
},
{
"epoch": 4.157407407407407,
"grad_norm": 0.5605220935037593,
"learning_rate": 1.6964377761094313e-05,
"loss": 0.2502,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2974599301815033,
"step": 2245,
"valid_targets_mean": 4355.8,
"valid_targets_min": 2288
},
{
"epoch": 4.166666666666667,
"grad_norm": 0.5399329299027271,
"learning_rate": 1.6873134883276626e-05,
"loss": 0.2586,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25155043601989746,
"step": 2250,
"valid_targets_mean": 3803.4,
"valid_targets_min": 907
},
{
"epoch": 4.175925925925926,
"grad_norm": 0.5699303989204108,
"learning_rate": 1.6781958667638855e-05,
"loss": 0.2419,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24903835356235504,
"step": 2255,
"valid_targets_mean": 3695.6,
"valid_targets_min": 1570
},
{
"epoch": 4.185185185185185,
"grad_norm": 0.6105524681890175,
"learning_rate": 1.669085105798258e-05,
"loss": 0.2226,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2412843555212021,
"step": 2260,
"valid_targets_mean": 3404.7,
"valid_targets_min": 1942
},
{
"epoch": 4.194444444444445,
"grad_norm": 0.513400651402716,
"learning_rate": 1.6599813996646727e-05,
"loss": 0.222,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22134144604206085,
"step": 2265,
"valid_targets_mean": 4079.2,
"valid_targets_min": 826
},
{
"epoch": 4.203703703703703,
"grad_norm": 0.4868536453874342,
"learning_rate": 1.65088494244662e-05,
"loss": 0.2203,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21563026309013367,
"step": 2270,
"valid_targets_mean": 4334.6,
"valid_targets_min": 1836
},
{
"epoch": 4.212962962962963,
"grad_norm": 0.5244709386650726,
"learning_rate": 1.6417959280730506e-05,
"loss": 0.2648,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.290455162525177,
"step": 2275,
"valid_targets_mean": 4819.6,
"valid_targets_min": 1708
},
{
"epoch": 4.222222222222222,
"grad_norm": 0.5328915315468012,
"learning_rate": 1.632714550314237e-05,
"loss": 0.2192,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.250417560338974,
"step": 2280,
"valid_targets_mean": 4054.6,
"valid_targets_min": 1107
},
{
"epoch": 4.231481481481482,
"grad_norm": 0.5972878155360123,
"learning_rate": 1.6236410027776494e-05,
"loss": 0.2507,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22642353177070618,
"step": 2285,
"valid_targets_mean": 3058.1,
"valid_targets_min": 563
},
{
"epoch": 4.2407407407407405,
"grad_norm": 0.5907761427734216,
"learning_rate": 1.6145754789038205e-05,
"loss": 0.242,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18239718675613403,
"step": 2290,
"valid_targets_mean": 3142.9,
"valid_targets_min": 859
},
{
"epoch": 4.25,
"grad_norm": 0.5433714738595986,
"learning_rate": 1.6055181719622278e-05,
"loss": 0.2273,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18836545944213867,
"step": 2295,
"valid_targets_mean": 3226.4,
"valid_targets_min": 1585
},
{
"epoch": 4.2592592592592595,
"grad_norm": 0.5040273007610758,
"learning_rate": 1.5964692750471684e-05,
"loss": 0.2245,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23993724584579468,
"step": 2300,
"valid_targets_mean": 4635.3,
"valid_targets_min": 480
},
{
"epoch": 4.268518518518518,
"grad_norm": 0.5272742282351708,
"learning_rate": 1.5874289810736452e-05,
"loss": 0.248,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2000361979007721,
"step": 2305,
"valid_targets_mean": 3835.5,
"valid_targets_min": 525
},
{
"epoch": 4.277777777777778,
"grad_norm": 0.5421153003949007,
"learning_rate": 1.5783974827732555e-05,
"loss": 0.244,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20407015085220337,
"step": 2310,
"valid_targets_mean": 3983.3,
"valid_targets_min": 1755
},
{
"epoch": 4.287037037037037,
"grad_norm": 0.508791566036482,
"learning_rate": 1.5693749726900767e-05,
"loss": 0.2303,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24065956473350525,
"step": 2315,
"valid_targets_mean": 4733.2,
"valid_targets_min": 2012
},
{
"epoch": 4.296296296296296,
"grad_norm": 0.6278410756307,
"learning_rate": 1.560361643176568e-05,
"loss": 0.2459,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2204664796590805,
"step": 2320,
"valid_targets_mean": 2955.4,
"valid_targets_min": 767
},
{
"epoch": 4.305555555555555,
"grad_norm": 0.51340304756785,
"learning_rate": 1.5513576863894654e-05,
"loss": 0.2769,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24818578362464905,
"step": 2325,
"valid_targets_mean": 4591.8,
"valid_targets_min": 891
},
{
"epoch": 4.314814814814815,
"grad_norm": 0.5004366205081487,
"learning_rate": 1.5423632942856836e-05,
"loss": 0.2378,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22766365110874176,
"step": 2330,
"valid_targets_mean": 4753.9,
"valid_targets_min": 2548
},
{
"epoch": 4.324074074074074,
"grad_norm": 0.5514594228413564,
"learning_rate": 1.5333786586182308e-05,
"loss": 0.2269,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2691954970359802,
"step": 2335,
"valid_targets_mean": 4707.2,
"valid_targets_min": 556
},
{
"epoch": 4.333333333333333,
"grad_norm": 0.47659547270242236,
"learning_rate": 1.5244039709321123e-05,
"loss": 0.2492,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24745163321495056,
"step": 2340,
"valid_targets_mean": 4947.1,
"valid_targets_min": 1720
},
{
"epoch": 4.342592592592593,
"grad_norm": 0.5315469803851041,
"learning_rate": 1.5154394225602525e-05,
"loss": 0.2464,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24923008680343628,
"step": 2345,
"valid_targets_mean": 4185.3,
"valid_targets_min": 641
},
{
"epoch": 4.351851851851852,
"grad_norm": 0.49151936211870245,
"learning_rate": 1.5064852046194127e-05,
"loss": 0.2356,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21445490419864655,
"step": 2350,
"valid_targets_mean": 4332.5,
"valid_targets_min": 1109
},
{
"epoch": 4.361111111111111,
"grad_norm": 0.530468204810676,
"learning_rate": 1.49754150800612e-05,
"loss": 0.2564,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30778932571411133,
"step": 2355,
"valid_targets_mean": 5126.1,
"valid_targets_min": 742
},
{
"epoch": 4.37037037037037,
"grad_norm": 0.5309780578096778,
"learning_rate": 1.4886085233925931e-05,
"loss": 0.2697,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2524486780166626,
"step": 2360,
"valid_targets_mean": 5057.9,
"valid_targets_min": 943
},
{
"epoch": 4.37962962962963,
"grad_norm": 0.4267222102718032,
"learning_rate": 1.4796864412226812e-05,
"loss": 0.2313,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1837308555841446,
"step": 2365,
"valid_targets_mean": 5174.9,
"valid_targets_min": 2396
},
{
"epoch": 4.388888888888889,
"grad_norm": 0.5625553097603778,
"learning_rate": 1.4707754517078021e-05,
"loss": 0.2273,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2614815831184387,
"step": 2370,
"valid_targets_mean": 3773.8,
"valid_targets_min": 724
},
{
"epoch": 4.398148148148148,
"grad_norm": 0.5396666245271566,
"learning_rate": 1.4618757448228869e-05,
"loss": 0.2556,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23616960644721985,
"step": 2375,
"valid_targets_mean": 3829.4,
"valid_targets_min": 2237
},
{
"epoch": 4.407407407407407,
"grad_norm": 0.6629662126529009,
"learning_rate": 1.4529875103023316e-05,
"loss": 0.2489,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2807127833366394,
"step": 2380,
"valid_targets_mean": 4226.0,
"valid_targets_min": 2051
},
{
"epoch": 4.416666666666667,
"grad_norm": 0.5603617627291267,
"learning_rate": 1.4441109376359498e-05,
"loss": 0.2632,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25401419401168823,
"step": 2385,
"valid_targets_mean": 3388.5,
"valid_targets_min": 1929
},
{
"epoch": 4.425925925925926,
"grad_norm": 0.512775023115171,
"learning_rate": 1.435246216064933e-05,
"loss": 0.228,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2706540822982788,
"step": 2390,
"valid_targets_mean": 4530.2,
"valid_targets_min": 1979
},
{
"epoch": 4.435185185185185,
"grad_norm": 0.5204570025829351,
"learning_rate": 1.4263935345778202e-05,
"loss": 0.2326,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24824261665344238,
"step": 2395,
"valid_targets_mean": 4182.4,
"valid_targets_min": 1899
},
{
"epoch": 4.444444444444445,
"grad_norm": 0.5254410292063911,
"learning_rate": 1.417553081906462e-05,
"loss": 0.2681,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26439177989959717,
"step": 2400,
"valid_targets_mean": 4461.6,
"valid_targets_min": 1416
},
{
"epoch": 4.453703703703704,
"grad_norm": 0.5956700654564747,
"learning_rate": 1.408725046522005e-05,
"loss": 0.2271,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2445288896560669,
"step": 2405,
"valid_targets_mean": 3570.7,
"valid_targets_min": 708
},
{
"epoch": 4.462962962962963,
"grad_norm": 0.5443093397325657,
"learning_rate": 1.399909616630865e-05,
"loss": 0.2533,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19300591945648193,
"step": 2410,
"valid_targets_mean": 4181.1,
"valid_targets_min": 1700
},
{
"epoch": 4.472222222222222,
"grad_norm": 0.5782021783769784,
"learning_rate": 1.3911069801707232e-05,
"loss": 0.231,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2529100179672241,
"step": 2415,
"valid_targets_mean": 3981.5,
"valid_targets_min": 2339
},
{
"epoch": 4.481481481481482,
"grad_norm": 0.4902132127041017,
"learning_rate": 1.3823173248065125e-05,
"loss": 0.2318,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21891093254089355,
"step": 2420,
"valid_targets_mean": 4572.9,
"valid_targets_min": 917
},
{
"epoch": 4.4907407407407405,
"grad_norm": 0.5337074804399202,
"learning_rate": 1.37354083792642e-05,
"loss": 0.2383,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22410012781620026,
"step": 2425,
"valid_targets_mean": 3774.8,
"valid_targets_min": 1751
},
{
"epoch": 4.5,
"grad_norm": 0.5148725556353548,
"learning_rate": 1.3647777066378938e-05,
"loss": 0.2401,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21354614198207855,
"step": 2430,
"valid_targets_mean": 4094.2,
"valid_targets_min": 1633
},
{
"epoch": 4.5092592592592595,
"grad_norm": 0.5306690061606288,
"learning_rate": 1.3560281177636484e-05,
"loss": 0.2217,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22166001796722412,
"step": 2435,
"valid_targets_mean": 4369.6,
"valid_targets_min": 1700
},
{
"epoch": 4.518518518518518,
"grad_norm": 0.54955470040774,
"learning_rate": 1.347292257837689e-05,
"loss": 0.2421,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19483765959739685,
"step": 2440,
"valid_targets_mean": 3584.6,
"valid_targets_min": 1698
},
{
"epoch": 4.527777777777778,
"grad_norm": 0.5166945393629171,
"learning_rate": 1.3385703131013279e-05,
"loss": 0.2392,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26188984513282776,
"step": 2445,
"valid_targets_mean": 5255.4,
"valid_targets_min": 2425
},
{
"epoch": 4.537037037037037,
"grad_norm": 0.5093872869070459,
"learning_rate": 1.3298624694992175e-05,
"loss": 0.2249,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23913279175758362,
"step": 2450,
"valid_targets_mean": 4489.4,
"valid_targets_min": 1802
},
{
"epoch": 4.546296296296296,
"grad_norm": 0.47603655861521876,
"learning_rate": 1.3211689126753879e-05,
"loss": 0.2237,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17301449179649353,
"step": 2455,
"valid_targets_mean": 4347.6,
"valid_targets_min": 1813
},
{
"epoch": 4.555555555555555,
"grad_norm": 0.6346661258154088,
"learning_rate": 1.3124898279692837e-05,
"loss": 0.2462,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2843843400478363,
"step": 2460,
"valid_targets_mean": 3273.6,
"valid_targets_min": 849
},
{
"epoch": 4.564814814814815,
"grad_norm": 0.5325625066728102,
"learning_rate": 1.3038254004118192e-05,
"loss": 0.2667,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.295737624168396,
"step": 2465,
"valid_targets_mean": 4695.4,
"valid_targets_min": 781
},
{
"epoch": 4.574074074074074,
"grad_norm": 0.5501023241996152,
"learning_rate": 1.2951758147214272e-05,
"loss": 0.2408,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2129950225353241,
"step": 2470,
"valid_targets_mean": 3447.8,
"valid_targets_min": 1908
},
{
"epoch": 4.583333333333333,
"grad_norm": 0.5306118753400628,
"learning_rate": 1.2865412553001274e-05,
"loss": 0.2583,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3140091896057129,
"step": 2475,
"valid_targets_mean": 5196.5,
"valid_targets_min": 451
},
{
"epoch": 4.592592592592593,
"grad_norm": 0.582247483161391,
"learning_rate": 1.2779219062295892e-05,
"loss": 0.2194,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22162239253520966,
"step": 2480,
"valid_targets_mean": 3159.7,
"valid_targets_min": 1906
},
{
"epoch": 4.601851851851852,
"grad_norm": 0.5672219680504043,
"learning_rate": 1.26931795126721e-05,
"loss": 0.2491,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25854402780532837,
"step": 2485,
"valid_targets_mean": 3898.6,
"valid_targets_min": 570
},
{
"epoch": 4.611111111111111,
"grad_norm": 0.522554196457828,
"learning_rate": 1.2607295738422e-05,
"loss": 0.2534,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28402256965637207,
"step": 2490,
"valid_targets_mean": 4644.9,
"valid_targets_min": 559
},
{
"epoch": 4.62037037037037,
"grad_norm": 0.5144881360654426,
"learning_rate": 1.2521569570516666e-05,
"loss": 0.269,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24675382673740387,
"step": 2495,
"valid_targets_mean": 4304.7,
"valid_targets_min": 643
},
{
"epoch": 4.62962962962963,
"grad_norm": 0.5285312184685258,
"learning_rate": 1.2436002836567154e-05,
"loss": 0.2309,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19586646556854248,
"step": 2500,
"valid_targets_mean": 3602.7,
"valid_targets_min": 322
},
{
"epoch": 4.638888888888889,
"grad_norm": 0.5549134026619124,
"learning_rate": 1.2350597360785503e-05,
"loss": 0.2455,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18762420117855072,
"step": 2505,
"valid_targets_mean": 3592.7,
"valid_targets_min": 1658
},
{
"epoch": 4.648148148148148,
"grad_norm": 0.5467801855327935,
"learning_rate": 1.2265354963945861e-05,
"loss": 0.2231,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21311713755130768,
"step": 2510,
"valid_targets_mean": 4042.2,
"valid_targets_min": 1311
},
{
"epoch": 4.657407407407407,
"grad_norm": 0.5982127460318674,
"learning_rate": 1.2180277463345697e-05,
"loss": 0.2601,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2819077968597412,
"step": 2515,
"valid_targets_mean": 3927.7,
"valid_targets_min": 667
},
{
"epoch": 4.666666666666667,
"grad_norm": 0.5694492693011193,
"learning_rate": 1.209536667276699e-05,
"loss": 0.2561,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22266341745853424,
"step": 2520,
"valid_targets_mean": 3360.6,
"valid_targets_min": 635
},
{
"epoch": 4.675925925925926,
"grad_norm": 0.5526419588735482,
"learning_rate": 1.2010624402437622e-05,
"loss": 0.2343,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23695549368858337,
"step": 2525,
"valid_targets_mean": 3984.8,
"valid_targets_min": 731
},
{
"epoch": 4.685185185185185,
"grad_norm": 0.5408354781576525,
"learning_rate": 1.1926052458992756e-05,
"loss": 0.2414,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24009719491004944,
"step": 2530,
"valid_targets_mean": 4062.1,
"valid_targets_min": 770
},
{
"epoch": 4.694444444444445,
"grad_norm": 0.5900458646547324,
"learning_rate": 1.184165264543633e-05,
"loss": 0.2605,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3150600492954254,
"step": 2535,
"valid_targets_mean": 3895.1,
"valid_targets_min": 247
},
{
"epoch": 4.703703703703704,
"grad_norm": 0.5696579830073532,
"learning_rate": 1.1757426761102608e-05,
"loss": 0.2704,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.223690927028656,
"step": 2540,
"valid_targets_mean": 3577.1,
"valid_targets_min": 1758
},
{
"epoch": 4.712962962962963,
"grad_norm": 0.6778694424859932,
"learning_rate": 1.167337660161783e-05,
"loss": 0.235,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23883025348186493,
"step": 2545,
"valid_targets_mean": 3305.8,
"valid_targets_min": 321
},
{
"epoch": 4.722222222222222,
"grad_norm": 0.4941234856749398,
"learning_rate": 1.1589503958861936e-05,
"loss": 0.2269,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17126263678073883,
"step": 2550,
"valid_targets_mean": 4076.2,
"valid_targets_min": 2068
},
{
"epoch": 4.731481481481482,
"grad_norm": 0.49689750365243684,
"learning_rate": 1.1505810620930338e-05,
"loss": 0.2495,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27281057834625244,
"step": 2555,
"valid_targets_mean": 4938.1,
"valid_targets_min": 1676
},
{
"epoch": 4.7407407407407405,
"grad_norm": 0.49366801115519954,
"learning_rate": 1.1422298372095841e-05,
"loss": 0.2173,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2007950097322464,
"step": 2560,
"valid_targets_mean": 4215.3,
"valid_targets_min": 1973
},
{
"epoch": 4.75,
"grad_norm": 0.5522379435269955,
"learning_rate": 1.133896899277056e-05,
"loss": 0.2632,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.245499849319458,
"step": 2565,
"valid_targets_mean": 4917.2,
"valid_targets_min": 1888
},
{
"epoch": 4.7592592592592595,
"grad_norm": 0.43752196002323207,
"learning_rate": 1.1255824259467985e-05,
"loss": 0.2583,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26849859952926636,
"step": 2570,
"valid_targets_mean": 5791.7,
"valid_targets_min": 2194
},
{
"epoch": 4.768518518518518,
"grad_norm": 0.4978365901424028,
"learning_rate": 1.1172865944765122e-05,
"loss": 0.2192,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19043470919132233,
"step": 2575,
"valid_targets_mean": 3895.8,
"valid_targets_min": 1835
},
{
"epoch": 4.777777777777778,
"grad_norm": 0.5734466742419532,
"learning_rate": 1.109009581726466e-05,
"loss": 0.232,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25819826126098633,
"step": 2580,
"valid_targets_mean": 3905.1,
"valid_targets_min": 818
},
{
"epoch": 4.787037037037037,
"grad_norm": 0.4975385383711413,
"learning_rate": 1.1007515641557329e-05,
"loss": 0.2641,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23181453347206116,
"step": 2585,
"valid_targets_mean": 4291.7,
"valid_targets_min": 804
},
{
"epoch": 4.796296296296296,
"grad_norm": 0.5198599453591137,
"learning_rate": 1.092512717818421e-05,
"loss": 0.2481,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2564372718334198,
"step": 2590,
"valid_targets_mean": 4180.1,
"valid_targets_min": 1857
},
{
"epoch": 4.805555555555555,
"grad_norm": 0.5359214335268695,
"learning_rate": 1.0842932183599238e-05,
"loss": 0.2345,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20336735248565674,
"step": 2595,
"valid_targets_mean": 3678.8,
"valid_targets_min": 580
},
{
"epoch": 4.814814814814815,
"grad_norm": 0.5160372923320123,
"learning_rate": 1.076093241013178e-05,
"loss": 0.2407,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2072121798992157,
"step": 2600,
"valid_targets_mean": 4072.1,
"valid_targets_min": 1736
},
{
"epoch": 4.824074074074074,
"grad_norm": 0.6009912786476855,
"learning_rate": 1.067912960594923e-05,
"loss": 0.2299,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27277135848999023,
"step": 2605,
"valid_targets_mean": 3798.9,
"valid_targets_min": 496
},
{
"epoch": 4.833333333333333,
"grad_norm": 0.5346358334055163,
"learning_rate": 1.0597525515019749e-05,
"loss": 0.223,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20401796698570251,
"step": 2610,
"valid_targets_mean": 3752.1,
"valid_targets_min": 812
},
{
"epoch": 4.842592592592593,
"grad_norm": 0.49231495307972206,
"learning_rate": 1.0516121877075129e-05,
"loss": 0.2623,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22834226489067078,
"step": 2615,
"valid_targets_mean": 4466.0,
"valid_targets_min": 2355
},
{
"epoch": 4.851851851851852,
"grad_norm": 0.5848272712574982,
"learning_rate": 1.0434920427573643e-05,
"loss": 0.247,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2603488862514496,
"step": 2620,
"valid_targets_mean": 3781.5,
"valid_targets_min": 599
},
{
"epoch": 4.861111111111111,
"grad_norm": 0.5265795938004495,
"learning_rate": 1.035392289766307e-05,
"loss": 0.246,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23392300307750702,
"step": 2625,
"valid_targets_mean": 4054.4,
"valid_targets_min": 787
},
{
"epoch": 4.87037037037037,
"grad_norm": 0.5187023064482922,
"learning_rate": 1.0273131014143822e-05,
"loss": 0.2658,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2979033291339874,
"step": 2630,
"valid_targets_mean": 5284.2,
"valid_targets_min": 960
},
{
"epoch": 4.87962962962963,
"grad_norm": 0.5723456672993618,
"learning_rate": 1.0192546499432066e-05,
"loss": 0.2304,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2871008515357971,
"step": 2635,
"valid_targets_mean": 4034.7,
"valid_targets_min": 1757
},
{
"epoch": 4.888888888888889,
"grad_norm": 0.4518981511283737,
"learning_rate": 1.0112171071523064e-05,
"loss": 0.2389,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24816018342971802,
"step": 2640,
"valid_targets_mean": 5038.8,
"valid_targets_min": 1905
},
{
"epoch": 4.898148148148148,
"grad_norm": 0.588262346243775,
"learning_rate": 1.0032006443954506e-05,
"loss": 0.2179,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.263683021068573,
"step": 2645,
"valid_targets_mean": 3463.8,
"valid_targets_min": 496
},
{
"epoch": 4.907407407407407,
"grad_norm": 0.5240492310910843,
"learning_rate": 9.952054325769984e-06,
"loss": 0.2485,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2150118052959442,
"step": 2650,
"valid_targets_mean": 3824.9,
"valid_targets_min": 1872
},
{
"epoch": 4.916666666666667,
"grad_norm": 0.510346682757862,
"learning_rate": 9.872316421482592e-06,
"loss": 0.2318,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21479976177215576,
"step": 2655,
"valid_targets_mean": 4396.7,
"valid_targets_min": 422
},
{
"epoch": 4.925925925925926,
"grad_norm": 0.5032967964842083,
"learning_rate": 9.792794431038542e-06,
"loss": 0.2248,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1802617311477661,
"step": 2660,
"valid_targets_mean": 3972.9,
"valid_targets_min": 2057
},
{
"epoch": 4.935185185185185,
"grad_norm": 0.5308551440927738,
"learning_rate": 9.713490049780931e-06,
"loss": 0.2459,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27088022232055664,
"step": 2665,
"valid_targets_mean": 4305.6,
"valid_targets_min": 671
},
{
"epoch": 4.944444444444445,
"grad_norm": 0.476874892456418,
"learning_rate": 9.634404968413644e-06,
"loss": 0.2475,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25819599628448486,
"step": 2670,
"valid_targets_mean": 5321.4,
"valid_targets_min": 1851
},
{
"epoch": 4.953703703703704,
"grad_norm": 0.5120117073751925,
"learning_rate": 9.555540872965235e-06,
"loss": 0.2501,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23541975021362305,
"step": 2675,
"valid_targets_mean": 4679.6,
"valid_targets_min": 2191
},
{
"epoch": 4.962962962962963,
"grad_norm": 0.5036966865354542,
"learning_rate": 9.47689944475305e-06,
"loss": 0.2373,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2063012421131134,
"step": 2680,
"valid_targets_mean": 3808.1,
"valid_targets_min": 1218
},
{
"epoch": 4.972222222222222,
"grad_norm": 0.5539021581298206,
"learning_rate": 9.398482360347327e-06,
"loss": 0.2395,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21875013411045074,
"step": 2685,
"valid_targets_mean": 3876.2,
"valid_targets_min": 594
},
{
"epoch": 4.981481481481482,
"grad_norm": 0.4696704645863026,
"learning_rate": 9.32029129153551e-06,
"loss": 0.2253,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2458312213420868,
"step": 2690,
"valid_targets_mean": 5203.4,
"valid_targets_min": 636
},
{
"epoch": 4.9907407407407405,
"grad_norm": 0.5363470268259611,
"learning_rate": 9.242327905286552e-06,
"loss": 0.248,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2823778986930847,
"step": 2695,
"valid_targets_mean": 4615.7,
"valid_targets_min": 490
},
{
"epoch": 5.0,
"grad_norm": 0.5485569771302176,
"learning_rate": 9.164593863715405e-06,
"loss": 0.2394,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26309841871261597,
"step": 2700,
"valid_targets_mean": 4314.6,
"valid_targets_min": 1566
},
{
"epoch": 5.0092592592592595,
"grad_norm": 0.5750093349536356,
"learning_rate": 9.087090824047604e-06,
"loss": 0.2423,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20704308152198792,
"step": 2705,
"valid_targets_mean": 3146.9,
"valid_targets_min": 804
},
{
"epoch": 5.018518518518518,
"grad_norm": 0.5711593696775072,
"learning_rate": 9.009820438583881e-06,
"loss": 0.2484,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2449912130832672,
"step": 2710,
"valid_targets_mean": 3544.4,
"valid_targets_min": 512
},
{
"epoch": 5.027777777777778,
"grad_norm": 0.5056840688609416,
"learning_rate": 8.932784354665002e-06,
"loss": 0.2308,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21420851349830627,
"step": 2715,
"valid_targets_mean": 4169.8,
"valid_targets_min": 715
},
{
"epoch": 5.037037037037037,
"grad_norm": 0.5259628580539836,
"learning_rate": 8.855984214636606e-06,
"loss": 0.218,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2302454710006714,
"step": 2720,
"valid_targets_mean": 4181.8,
"valid_targets_min": 1921
},
{
"epoch": 5.046296296296297,
"grad_norm": 0.5183310765864052,
"learning_rate": 8.779421655814189e-06,
"loss": 0.2303,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22257420420646667,
"step": 2725,
"valid_targets_mean": 4248.5,
"valid_targets_min": 451
},
{
"epoch": 5.055555555555555,
"grad_norm": 0.5672433220359422,
"learning_rate": 8.703098310448244e-06,
"loss": 0.2154,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21661844849586487,
"step": 2730,
"valid_targets_mean": 3690.8,
"valid_targets_min": 313
},
{
"epoch": 5.064814814814815,
"grad_norm": 0.6099943995315652,
"learning_rate": 8.627015805689394e-06,
"loss": 0.2157,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.220462366938591,
"step": 2735,
"valid_targets_mean": 3285.6,
"valid_targets_min": 1461
},
{
"epoch": 5.074074074074074,
"grad_norm": 0.5323546697406513,
"learning_rate": 8.551175763553778e-06,
"loss": 0.2105,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20129993557929993,
"step": 2740,
"valid_targets_mean": 3924.2,
"valid_targets_min": 1717
},
{
"epoch": 5.083333333333333,
"grad_norm": 0.6107489166400913,
"learning_rate": 8.475579800888395e-06,
"loss": 0.2503,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22628153860569,
"step": 2745,
"valid_targets_mean": 3306.1,
"valid_targets_min": 1109
},
{
"epoch": 5.092592592592593,
"grad_norm": 0.5714928193929322,
"learning_rate": 8.400229529336704e-06,
"loss": 0.2521,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27930349111557007,
"step": 2750,
"valid_targets_mean": 3991.1,
"valid_targets_min": 2391
},
{
"epoch": 5.101851851851852,
"grad_norm": 0.5659909185072247,
"learning_rate": 8.325126555304208e-06,
"loss": 0.2328,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2336483746767044,
"step": 2755,
"valid_targets_mean": 3597.2,
"valid_targets_min": 602
},
{
"epoch": 5.111111111111111,
"grad_norm": 0.5991030111391655,
"learning_rate": 8.250272479924234e-06,
"loss": 0.2373,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.207331120967865,
"step": 2760,
"valid_targets_mean": 3291.4,
"valid_targets_min": 1701
},
{
"epoch": 5.12037037037037,
"grad_norm": 0.5934525134623533,
"learning_rate": 8.17566889902382e-06,
"loss": 0.2402,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24269866943359375,
"step": 2765,
"valid_targets_mean": 3444.9,
"valid_targets_min": 554
},
{
"epoch": 5.12962962962963,
"grad_norm": 0.5658329820173873,
"learning_rate": 8.101317403089635e-06,
"loss": 0.2325,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23202694952487946,
"step": 2770,
"valid_targets_mean": 3681.4,
"valid_targets_min": 2306
},
{
"epoch": 5.138888888888889,
"grad_norm": 0.6489256706628873,
"learning_rate": 8.027219577234133e-06,
"loss": 0.2312,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2144010066986084,
"step": 2775,
"valid_targets_mean": 2766.6,
"valid_targets_min": 594
},
{
"epoch": 5.148148148148148,
"grad_norm": 0.5211992244558095,
"learning_rate": 7.953377001161714e-06,
"loss": 0.2219,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21091634035110474,
"step": 2780,
"valid_targets_mean": 4418.5,
"valid_targets_min": 932
},
{
"epoch": 5.157407407407407,
"grad_norm": 0.5386716433955135,
"learning_rate": 7.879791249135059e-06,
"loss": 0.2053,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2028743326663971,
"step": 2785,
"valid_targets_mean": 4136.1,
"valid_targets_min": 1604
},
{
"epoch": 5.166666666666667,
"grad_norm": 0.5017972625347342,
"learning_rate": 7.806463889941598e-06,
"loss": 0.2354,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2554541230201721,
"step": 2790,
"valid_targets_mean": 4644.6,
"valid_targets_min": 2649
},
{
"epoch": 5.175925925925926,
"grad_norm": 0.6494575106635824,
"learning_rate": 7.73339648686001e-06,
"loss": 0.2293,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24619343876838684,
"step": 2795,
"valid_targets_mean": 3087.1,
"valid_targets_min": 894
},
{
"epoch": 5.185185185185185,
"grad_norm": 0.5370049338879487,
"learning_rate": 7.66059059762695e-06,
"loss": 0.2298,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23969629406929016,
"step": 2800,
"valid_targets_mean": 4655.8,
"valid_targets_min": 514
},
{
"epoch": 5.194444444444445,
"grad_norm": 0.48493560753692005,
"learning_rate": 7.588047774403795e-06,
"loss": 0.2371,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.212522953748703,
"step": 2805,
"valid_targets_mean": 4826.5,
"valid_targets_min": 1850
},
{
"epoch": 5.203703703703703,
"grad_norm": 0.5702442363690217,
"learning_rate": 7.5157695637435864e-06,
"loss": 0.2382,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21703308820724487,
"step": 2810,
"valid_targets_mean": 3602.2,
"valid_targets_min": 742
},
{
"epoch": 5.212962962962963,
"grad_norm": 0.4950198812734338,
"learning_rate": 7.443757506558033e-06,
"loss": 0.247,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2432202249765396,
"step": 2815,
"valid_targets_mean": 4723.4,
"valid_targets_min": 523
},
{
"epoch": 5.222222222222222,
"grad_norm": 0.5719638372764714,
"learning_rate": 7.3720131380846685e-06,
"loss": 0.2417,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2235596477985382,
"step": 2820,
"valid_targets_mean": 3752.8,
"valid_targets_min": 577
},
{
"epoch": 5.231481481481482,
"grad_norm": 0.5267450371667391,
"learning_rate": 7.300537987854146e-06,
"loss": 0.2467,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2897190749645233,
"step": 2825,
"valid_targets_mean": 4997.0,
"valid_targets_min": 2002
},
{
"epoch": 5.2407407407407405,
"grad_norm": 0.6948286687288108,
"learning_rate": 7.22933357965758e-06,
"loss": 0.2573,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3058856725692749,
"step": 2830,
"valid_targets_mean": 3943.1,
"valid_targets_min": 1473
},
{
"epoch": 5.25,
"grad_norm": 0.586857466777657,
"learning_rate": 7.158401431514117e-06,
"loss": 0.2399,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25646457076072693,
"step": 2835,
"valid_targets_mean": 3900.5,
"valid_targets_min": 1653
},
{
"epoch": 5.2592592592592595,
"grad_norm": 0.5671217282214601,
"learning_rate": 7.0877430556385205e-06,
"loss": 0.2341,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25015023350715637,
"step": 2840,
"valid_targets_mean": 4106.6,
"valid_targets_min": 663
},
{
"epoch": 5.268518518518518,
"grad_norm": 0.5512194807040625,
"learning_rate": 7.0173599584089625e-06,
"loss": 0.2151,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19779840111732483,
"step": 2845,
"valid_targets_mean": 3710.4,
"valid_targets_min": 1984
},
{
"epoch": 5.277777777777778,
"grad_norm": 0.6190422799034527,
"learning_rate": 6.947253640334914e-06,
"loss": 0.2176,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2084774523973465,
"step": 2850,
"valid_targets_mean": 3782.9,
"valid_targets_min": 535
},
{
"epoch": 5.287037037037037,
"grad_norm": 0.5143657356660781,
"learning_rate": 6.87742559602512e-06,
"loss": 0.2469,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20541247725486755,
"step": 2855,
"valid_targets_mean": 4369.5,
"valid_targets_min": 632
},
{
"epoch": 5.296296296296296,
"grad_norm": 0.6337293582450882,
"learning_rate": 6.807877314155788e-06,
"loss": 0.2341,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23032309114933014,
"step": 2860,
"valid_targets_mean": 3650.8,
"valid_targets_min": 2050
},
{
"epoch": 5.305555555555555,
"grad_norm": 0.5832848422160128,
"learning_rate": 6.738610277438791e-06,
"loss": 0.2299,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2612956762313843,
"step": 2865,
"valid_targets_mean": 3992.4,
"valid_targets_min": 1389
},
{
"epoch": 5.314814814814815,
"grad_norm": 0.5761403577609914,
"learning_rate": 6.669625962590114e-06,
"loss": 0.2308,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23144936561584473,
"step": 2870,
"valid_targets_mean": 3683.9,
"valid_targets_min": 1769
},
{
"epoch": 5.324074074074074,
"grad_norm": 0.5484533193789417,
"learning_rate": 6.600925840298331e-06,
"loss": 0.214,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1928141713142395,
"step": 2875,
"valid_targets_mean": 3428.2,
"valid_targets_min": 1879
},
{
"epoch": 5.333333333333333,
"grad_norm": 0.5104494079712223,
"learning_rate": 6.532511375193258e-06,
"loss": 0.228,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2096475064754486,
"step": 2880,
"valid_targets_mean": 4629.7,
"valid_targets_min": 2168
},
{
"epoch": 5.342592592592593,
"grad_norm": 0.5967975385442751,
"learning_rate": 6.464384025814763e-06,
"loss": 0.241,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2632576823234558,
"step": 2885,
"valid_targets_mean": 3904.9,
"valid_targets_min": 774
},
{
"epoch": 5.351851851851852,
"grad_norm": 0.5546172949302648,
"learning_rate": 6.396545244581609e-06,
"loss": 0.2363,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23911094665527344,
"step": 2890,
"valid_targets_mean": 4346.3,
"valid_targets_min": 2347
},
{
"epoch": 5.361111111111111,
"grad_norm": 0.4623970390000608,
"learning_rate": 6.3289964777605624e-06,
"loss": 0.2334,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22868403792381287,
"step": 2895,
"valid_targets_mean": 5747.1,
"valid_targets_min": 1679
},
{
"epoch": 5.37037037037037,
"grad_norm": 0.5472864241371322,
"learning_rate": 6.261739165435492e-06,
"loss": 0.2259,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24670270085334778,
"step": 2900,
"valid_targets_mean": 4055.2,
"valid_targets_min": 1881
},
{
"epoch": 5.37962962962963,
"grad_norm": 0.5565568764252096,
"learning_rate": 6.1947747414767035e-06,
"loss": 0.2494,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27123579382896423,
"step": 2905,
"valid_targets_mean": 4548.1,
"valid_targets_min": 762
},
{
"epoch": 5.388888888888889,
"grad_norm": 0.6213207010595587,
"learning_rate": 6.128104633510381e-06,
"loss": 0.2197,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2588229775428772,
"step": 2910,
"valid_targets_mean": 3481.0,
"valid_targets_min": 537
},
{
"epoch": 5.398148148148148,
"grad_norm": 0.5039995681794307,
"learning_rate": 6.0617302628881104e-06,
"loss": 0.2219,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23631207644939423,
"step": 2915,
"valid_targets_mean": 5029.2,
"valid_targets_min": 599
},
{
"epoch": 5.407407407407407,
"grad_norm": 0.5324459779762222,
"learning_rate": 5.9956530446566305e-06,
"loss": 0.2733,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27384650707244873,
"step": 2920,
"valid_targets_mean": 4611.2,
"valid_targets_min": 971
},
{
"epoch": 5.416666666666667,
"grad_norm": 0.5508613368105185,
"learning_rate": 5.929874387527605e-06,
"loss": 0.2273,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2502458393573761,
"step": 2925,
"valid_targets_mean": 4067.2,
"valid_targets_min": 1611
},
{
"epoch": 5.425925925925926,
"grad_norm": 0.6630459663999032,
"learning_rate": 5.864395693847651e-06,
"loss": 0.2336,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2312171310186386,
"step": 2930,
"valid_targets_mean": 3028.5,
"valid_targets_min": 513
},
{
"epoch": 5.435185185185185,
"grad_norm": 0.6102195329637778,
"learning_rate": 5.799218359568395e-06,
"loss": 0.2404,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23122429847717285,
"step": 2935,
"valid_targets_mean": 3275.2,
"valid_targets_min": 949
},
{
"epoch": 5.444444444444445,
"grad_norm": 0.5924002103430813,
"learning_rate": 5.734343774216726e-06,
"loss": 0.2403,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2418282926082611,
"step": 2940,
"valid_targets_mean": 3858.4,
"valid_targets_min": 2039
},
{
"epoch": 5.453703703703704,
"grad_norm": 0.5135171674815325,
"learning_rate": 5.669773320865198e-06,
"loss": 0.2276,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19563168287277222,
"step": 2945,
"valid_targets_mean": 4136.9,
"valid_targets_min": 799
},
{
"epoch": 5.462962962962963,
"grad_norm": 0.5157474067382247,
"learning_rate": 5.605508376102504e-06,
"loss": 0.2347,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22772985696792603,
"step": 2950,
"valid_targets_mean": 4380.8,
"valid_targets_min": 888
},
{
"epoch": 5.472222222222222,
"grad_norm": 0.58770575498854,
"learning_rate": 5.541550310004142e-06,
"loss": 0.2344,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2741531729698181,
"step": 2955,
"valid_targets_mean": 4150.1,
"valid_targets_min": 1655
},
{
"epoch": 5.481481481481482,
"grad_norm": 0.5259117380247347,
"learning_rate": 5.4779004861032355e-06,
"loss": 0.234,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2124425172805786,
"step": 2960,
"valid_targets_mean": 4234.0,
"valid_targets_min": 845
},
{
"epoch": 5.4907407407407405,
"grad_norm": 0.5572729756403186,
"learning_rate": 5.414560261361415e-06,
"loss": 0.2323,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22375890612602234,
"step": 2965,
"valid_targets_mean": 3794.6,
"valid_targets_min": 1686
},
{
"epoch": 5.5,
"grad_norm": 0.5445675255604132,
"learning_rate": 5.351530986139917e-06,
"loss": 0.2388,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3014816641807556,
"step": 2970,
"valid_targets_mean": 5170.2,
"valid_targets_min": 1872
},
{
"epoch": 5.5092592592592595,
"grad_norm": 0.6466881136928674,
"learning_rate": 5.288814004170804e-06,
"loss": 0.2361,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24697977304458618,
"step": 2975,
"valid_targets_mean": 5287.1,
"valid_targets_min": 299
},
{
"epoch": 5.518518518518518,
"grad_norm": 0.5769556261883075,
"learning_rate": 5.226410652528293e-06,
"loss": 0.2238,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20952610671520233,
"step": 2980,
"valid_targets_mean": 3477.0,
"valid_targets_min": 1758
},
{
"epoch": 5.527777777777778,
"grad_norm": 0.5220365367076272,
"learning_rate": 5.164322261600257e-06,
"loss": 0.2405,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26157093048095703,
"step": 2985,
"valid_targets_mean": 4698.8,
"valid_targets_min": 672
},
{
"epoch": 5.537037037037037,
"grad_norm": 0.5834708312508325,
"learning_rate": 5.102550155059887e-06,
"loss": 0.2394,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24419638514518738,
"step": 2990,
"valid_targets_mean": 3697.9,
"valid_targets_min": 838
},
{
"epoch": 5.546296296296296,
"grad_norm": 0.6297100411829605,
"learning_rate": 5.041095649837429e-06,
"loss": 0.2464,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25412261486053467,
"step": 2995,
"valid_targets_mean": 3376.1,
"valid_targets_min": 496
},
{
"epoch": 5.555555555555555,
"grad_norm": 0.49356981841182485,
"learning_rate": 4.97996005609215e-06,
"loss": 0.2391,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30119216442108154,
"step": 3000,
"valid_targets_mean": 5648.1,
"valid_targets_min": 821
},
{
"epoch": 5.564814814814815,
"grad_norm": 0.5039612262029771,
"learning_rate": 4.919144677184377e-06,
"loss": 0.2398,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21342125535011292,
"step": 3005,
"valid_targets_mean": 4504.8,
"valid_targets_min": 1713
},
{
"epoch": 5.574074074074074,
"grad_norm": 0.7773320110901791,
"learning_rate": 4.858650809647718e-06,
"loss": 0.2318,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2356467843055725,
"step": 3010,
"valid_targets_mean": 4336.1,
"valid_targets_min": 2210
},
{
"epoch": 5.583333333333333,
"grad_norm": 0.53775868558781,
"learning_rate": 4.798479743161443e-06,
"loss": 0.2447,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29584944248199463,
"step": 3015,
"valid_targets_mean": 5161.2,
"valid_targets_min": 420
},
{
"epoch": 5.592592592592593,
"grad_norm": 0.5399699724215403,
"learning_rate": 4.73863276052295e-06,
"loss": 0.2401,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25342124700546265,
"step": 3020,
"valid_targets_mean": 4572.4,
"valid_targets_min": 2055
},
{
"epoch": 5.601851851851852,
"grad_norm": 0.559109770350913,
"learning_rate": 4.679111137620442e-06,
"loss": 0.2181,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2064143568277359,
"step": 3025,
"valid_targets_mean": 4368.1,
"valid_targets_min": 1771
},
{
"epoch": 5.611111111111111,
"grad_norm": 0.5797401297885008,
"learning_rate": 4.619916143405734e-06,
"loss": 0.2441,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27342820167541504,
"step": 3030,
"valid_targets_mean": 4007.9,
"valid_targets_min": 641
},
{
"epoch": 5.62037037037037,
"grad_norm": 0.5973678981169844,
"learning_rate": 4.561049039867167e-06,
"loss": 0.2214,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2151966243982315,
"step": 3035,
"valid_targets_mean": 3154.9,
"valid_targets_min": 1733
},
{
"epoch": 5.62962962962963,
"grad_norm": 0.6286394623427501,
"learning_rate": 4.502511082002748e-06,
"loss": 0.2228,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21091020107269287,
"step": 3040,
"valid_targets_mean": 3375.3,
"valid_targets_min": 1700
},
{
"epoch": 5.638888888888889,
"grad_norm": 0.5797903289542194,
"learning_rate": 4.44430351779334e-06,
"loss": 0.2302,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2548823356628418,
"step": 3045,
"valid_targets_mean": 3822.7,
"valid_targets_min": 827
},
{
"epoch": 5.648148148148148,
"grad_norm": 0.5395936599039631,
"learning_rate": 4.386427588176121e-06,
"loss": 0.2306,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24894863367080688,
"step": 3050,
"valid_targets_mean": 4198.8,
"valid_targets_min": 1272
},
{
"epoch": 5.657407407407407,
"grad_norm": 0.5426930111027483,
"learning_rate": 4.328884527018067e-06,
"loss": 0.2312,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23337554931640625,
"step": 3055,
"valid_targets_mean": 4351.1,
"valid_targets_min": 2103
},
{
"epoch": 5.666666666666667,
"grad_norm": 0.5604242942596068,
"learning_rate": 4.271675561089676e-06,
"loss": 0.224,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2611788511276245,
"step": 3060,
"valid_targets_mean": 3893.8,
"valid_targets_min": 708
},
{
"epoch": 5.675925925925926,
"grad_norm": 0.5140701008443176,
"learning_rate": 4.214801910038831e-06,
"loss": 0.2221,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2084433138370514,
"step": 3065,
"valid_targets_mean": 4512.9,
"valid_targets_min": 2627
},
{
"epoch": 5.685185185185185,
"grad_norm": 0.6183694282041834,
"learning_rate": 4.1582647863647565e-06,
"loss": 0.2526,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2690351605415344,
"step": 3070,
"valid_targets_mean": 3642.6,
"valid_targets_min": 1908
},
{
"epoch": 5.694444444444445,
"grad_norm": 0.5868218733401469,
"learning_rate": 4.102065395392208e-06,
"loss": 0.2299,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24894368648529053,
"step": 3075,
"valid_targets_mean": 3544.2,
"valid_targets_min": 647
},
{
"epoch": 5.703703703703704,
"grad_norm": 0.5411202176861103,
"learning_rate": 4.04620493524575e-06,
"loss": 0.2404,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2515827715396881,
"step": 3080,
"valid_targets_mean": 4001.4,
"valid_targets_min": 1591
},
{
"epoch": 5.712962962962963,
"grad_norm": 0.5722421499791819,
"learning_rate": 3.990684596824219e-06,
"loss": 0.2336,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2952159643173218,
"step": 3085,
"valid_targets_mean": 4322.0,
"valid_targets_min": 764
},
{
"epoch": 5.722222222222222,
"grad_norm": 0.547817757859327,
"learning_rate": 3.93550556377535e-06,
"loss": 0.21,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19061464071273804,
"step": 3090,
"valid_targets_mean": 3755.5,
"valid_targets_min": 472
},
{
"epoch": 5.731481481481482,
"grad_norm": 0.6502265294741346,
"learning_rate": 3.880669012470515e-06,
"loss": 0.2148,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23853082954883575,
"step": 3095,
"valid_targets_mean": 3397.8,
"valid_targets_min": 880
},
{
"epoch": 5.7407407407407405,
"grad_norm": 0.5857577530105953,
"learning_rate": 3.826176111979673e-06,
"loss": 0.2155,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22189410030841827,
"step": 3100,
"valid_targets_mean": 4695.8,
"valid_targets_min": 671
},
{
"epoch": 5.75,
"grad_norm": 0.5846313843567592,
"learning_rate": 3.7720280240464145e-06,
"loss": 0.2869,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23350340127944946,
"step": 3105,
"valid_targets_mean": 3510.4,
"valid_targets_min": 480
},
{
"epoch": 5.7592592592592595,
"grad_norm": 0.5158563827646333,
"learning_rate": 3.7182259030632305e-06,
"loss": 0.2355,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2879447937011719,
"step": 3110,
"valid_targets_mean": 4997.9,
"valid_targets_min": 637
},
{
"epoch": 5.768518518518518,
"grad_norm": 0.5925915812377698,
"learning_rate": 3.6647708960468696e-06,
"loss": 0.2175,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17236462235450745,
"step": 3115,
"valid_targets_mean": 3198.4,
"valid_targets_min": 397
},
{
"epoch": 5.777777777777778,
"grad_norm": 0.5049665240080519,
"learning_rate": 3.6116641426138933e-06,
"loss": 0.2142,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20963430404663086,
"step": 3120,
"valid_targets_mean": 4305.9,
"valid_targets_min": 545
},
{
"epoch": 5.787037037037037,
"grad_norm": 0.49581708730857366,
"learning_rate": 3.5589067749564054e-06,
"loss": 0.215,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17507000267505646,
"step": 3125,
"valid_targets_mean": 4279.4,
"valid_targets_min": 1984
},
{
"epoch": 5.796296296296296,
"grad_norm": 0.533046990001763,
"learning_rate": 3.5064999178178648e-06,
"loss": 0.2437,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27857914566993713,
"step": 3130,
"valid_targets_mean": 4669.2,
"valid_targets_min": 2074
},
{
"epoch": 5.805555555555555,
"grad_norm": 0.5616028974184716,
"learning_rate": 3.454444688469165e-06,
"loss": 0.2179,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2025943398475647,
"step": 3135,
"valid_targets_mean": 3585.2,
"valid_targets_min": 1703
},
{
"epoch": 5.814814814814815,
"grad_norm": 0.5499230074351966,
"learning_rate": 3.4027421966847675e-06,
"loss": 0.2032,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20543722808361053,
"step": 3140,
"valid_targets_mean": 4170.7,
"valid_targets_min": 1919
},
{
"epoch": 5.824074074074074,
"grad_norm": 0.5611834664864449,
"learning_rate": 3.3513935447190595e-06,
"loss": 0.2187,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2410752773284912,
"step": 3145,
"valid_targets_mean": 5189.0,
"valid_targets_min": 542
},
{
"epoch": 5.833333333333333,
"grad_norm": 0.584145430075303,
"learning_rate": 3.3003998272828676e-06,
"loss": 0.2156,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21233102679252625,
"step": 3150,
"valid_targets_mean": 3442.8,
"valid_targets_min": 635
},
{
"epoch": 5.842592592592593,
"grad_norm": 0.487543792074454,
"learning_rate": 3.2497621315200958e-06,
"loss": 0.225,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19998474419116974,
"step": 3155,
"valid_targets_mean": 4951.9,
"valid_targets_min": 1450
},
{
"epoch": 5.851851851851852,
"grad_norm": 0.5178454713647473,
"learning_rate": 3.199481536984572e-06,
"loss": 0.2118,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23643583059310913,
"step": 3160,
"valid_targets_mean": 4172.1,
"valid_targets_min": 1883
},
{
"epoch": 5.861111111111111,
"grad_norm": 0.5368224607954661,
"learning_rate": 3.149559115617009e-06,
"loss": 0.2243,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17453108727931976,
"step": 3165,
"valid_targets_mean": 3588.9,
"valid_targets_min": 818
},
{
"epoch": 5.87037037037037,
"grad_norm": 0.5771162580976622,
"learning_rate": 3.099995931722175e-06,
"loss": 0.2149,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2502521574497223,
"step": 3170,
"valid_targets_mean": 4165.6,
"valid_targets_min": 2273
},
{
"epoch": 5.87962962962963,
"grad_norm": 0.5851573775898512,
"learning_rate": 3.050793041946183e-06,
"loss": 0.2328,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30304640531539917,
"step": 3175,
"valid_targets_mean": 4239.8,
"valid_targets_min": 724
},
{
"epoch": 5.888888888888889,
"grad_norm": 0.45474856609735226,
"learning_rate": 3.001951495253972e-06,
"loss": 0.2121,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19552862644195557,
"step": 3180,
"valid_targets_mean": 4980.6,
"valid_targets_min": 572
},
{
"epoch": 5.898148148148148,
"grad_norm": 0.623558442164306,
"learning_rate": 2.953472332906959e-06,
"loss": 0.2413,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29056739807128906,
"step": 3185,
"valid_targets_mean": 3711.6,
"valid_targets_min": 1514
},
{
"epoch": 5.907407407407407,
"grad_norm": 0.49621126539313987,
"learning_rate": 2.905356588440811e-06,
"loss": 0.2312,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2211723029613495,
"step": 3190,
"valid_targets_mean": 5076.4,
"valid_targets_min": 1947
},
{
"epoch": 5.916666666666667,
"grad_norm": 0.51673830714577,
"learning_rate": 2.857605287643437e-06,
"loss": 0.2061,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18910914659500122,
"step": 3195,
"valid_targets_mean": 3844.3,
"valid_targets_min": 733
},
{
"epoch": 5.925925925925926,
"grad_norm": 0.5638628522537501,
"learning_rate": 2.8102194485331e-06,
"loss": 0.2326,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2086801677942276,
"step": 3200,
"valid_targets_mean": 4155.5,
"valid_targets_min": 2057
},
{
"epoch": 5.935185185185185,
"grad_norm": 0.5790573290649278,
"learning_rate": 2.763200081336721e-06,
"loss": 0.2335,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21793599426746368,
"step": 3205,
"valid_targets_mean": 3484.1,
"valid_targets_min": 368
},
{
"epoch": 5.944444444444445,
"grad_norm": 0.6122242849287904,
"learning_rate": 2.7165481884683576e-06,
"loss": 0.2207,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2835915982723236,
"step": 3210,
"valid_targets_mean": 3717.0,
"valid_targets_min": 747
},
{
"epoch": 5.953703703703704,
"grad_norm": 0.5672715926891396,
"learning_rate": 2.6702647645077973e-06,
"loss": 0.2395,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21978811919689178,
"step": 3215,
"valid_targets_mean": 4550.6,
"valid_targets_min": 773
},
{
"epoch": 5.962962962962963,
"grad_norm": 0.6351867703165379,
"learning_rate": 2.6243507961793936e-06,
"loss": 0.247,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2440010905265808,
"step": 3220,
"valid_targets_mean": 3087.8,
"valid_targets_min": 981
},
{
"epoch": 5.972222222222222,
"grad_norm": 0.6513878183905164,
"learning_rate": 2.5788072623309977e-06,
"loss": 0.2241,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2559264600276947,
"step": 3225,
"valid_targets_mean": 2916.1,
"valid_targets_min": 317
},
{
"epoch": 5.981481481481482,
"grad_norm": 0.519478406666749,
"learning_rate": 2.5336351339131147e-06,
"loss": 0.2064,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1981564164161682,
"step": 3230,
"valid_targets_mean": 4137.3,
"valid_targets_min": 1696
},
{
"epoch": 5.9907407407407405,
"grad_norm": 0.6242847386183599,
"learning_rate": 2.488835373958185e-06,
"loss": 0.2424,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28682032227516174,
"step": 3235,
"valid_targets_mean": 3630.4,
"valid_targets_min": 1887
},
{
"epoch": 6.0,
"grad_norm": 0.6825780673728339,
"learning_rate": 2.444408937560059e-06,
"loss": 0.2316,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25937968492507935,
"step": 3240,
"valid_targets_mean": 2739.1,
"valid_targets_min": 466
},
{
"epoch": 6.0092592592592595,
"grad_norm": 0.539754241133728,
"learning_rate": 2.400356771853651e-06,
"loss": 0.2459,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1831408143043518,
"step": 3245,
"valid_targets_mean": 3674.9,
"valid_targets_min": 1668
},
{
"epoch": 6.018518518518518,
"grad_norm": 0.46005254806761847,
"learning_rate": 2.3566798159947157e-06,
"loss": 0.2196,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20222413539886475,
"step": 3250,
"valid_targets_mean": 4828.1,
"valid_targets_min": 1945
},
{
"epoch": 6.027777777777778,
"grad_norm": 0.4444020837622067,
"learning_rate": 2.3133790011398618e-06,
"loss": 0.1927,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1549903154373169,
"step": 3255,
"valid_targets_mean": 4708.4,
"valid_targets_min": 1799
},
{
"epoch": 6.037037037037037,
"grad_norm": 0.5920689398499658,
"learning_rate": 2.2704552504266664e-06,
"loss": 0.2241,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25390511751174927,
"step": 3260,
"valid_targets_mean": 3571.1,
"valid_targets_min": 552
},
{
"epoch": 6.046296296296297,
"grad_norm": 0.526798188540084,
"learning_rate": 2.2279094789540244e-06,
"loss": 0.2179,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23036353290081024,
"step": 3265,
"valid_targets_mean": 4382.3,
"valid_targets_min": 771
},
{
"epoch": 6.055555555555555,
"grad_norm": 0.5759163726963563,
"learning_rate": 2.185742593762614e-06,
"loss": 0.2361,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25047504901885986,
"step": 3270,
"valid_targets_mean": 3867.2,
"valid_targets_min": 721
},
{
"epoch": 6.064814814814815,
"grad_norm": 0.48826580428064326,
"learning_rate": 2.143955493815577e-06,
"loss": 0.2393,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2461843639612198,
"step": 3275,
"valid_targets_mean": 5183.8,
"valid_targets_min": 1736
},
{
"epoch": 6.074074074074074,
"grad_norm": 0.5210334298756018,
"learning_rate": 2.1025490699793516e-06,
"loss": 0.2211,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20328880846500397,
"step": 3280,
"valid_targets_mean": 4089.4,
"valid_targets_min": 986
},
{
"epoch": 6.083333333333333,
"grad_norm": 0.5220951505604092,
"learning_rate": 2.0615242050046656e-06,
"loss": 0.2303,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19010621309280396,
"step": 3285,
"valid_targets_mean": 4184.5,
"valid_targets_min": 894
},
{
"epoch": 6.092592592592593,
"grad_norm": 0.566804464053669,
"learning_rate": 2.020881773507739e-06,
"loss": 0.2381,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25445860624313354,
"step": 3290,
"valid_targets_mean": 4107.2,
"valid_targets_min": 1939
},
{
"epoch": 6.101851851851852,
"grad_norm": 0.5330996196349423,
"learning_rate": 1.9806226419516195e-06,
"loss": 0.2296,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2357954978942871,
"step": 3295,
"valid_targets_mean": 4824.0,
"valid_targets_min": 574
},
{
"epoch": 6.111111111111111,
"grad_norm": 0.5686340488301583,
"learning_rate": 1.9407476686277095e-06,
"loss": 0.2522,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22579503059387207,
"step": 3300,
"valid_targets_mean": 4233.5,
"valid_targets_min": 739
},
{
"epoch": 6.12037037037037,
"grad_norm": 0.8954557853890478,
"learning_rate": 1.9012577036374936e-06,
"loss": 0.2174,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2578815519809723,
"step": 3305,
"valid_targets_mean": 3204.6,
"valid_targets_min": 767
},
{
"epoch": 6.12962962962963,
"grad_norm": 0.5650667976761549,
"learning_rate": 1.8621535888743825e-06,
"loss": 0.2392,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2621210813522339,
"step": 3310,
"valid_targets_mean": 4207.1,
"valid_targets_min": 2564
},
{
"epoch": 6.138888888888889,
"grad_norm": 0.6316650802316781,
"learning_rate": 1.8234361580057802e-06,
"loss": 0.2348,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2570689618587494,
"step": 3315,
"valid_targets_mean": 3737.0,
"valid_targets_min": 766
},
{
"epoch": 6.148148148148148,
"grad_norm": 0.5704491276714678,
"learning_rate": 1.7851062364553184e-06,
"loss": 0.2394,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22671329975128174,
"step": 3320,
"valid_targets_mean": 3896.4,
"valid_targets_min": 1714
},
{
"epoch": 6.157407407407407,
"grad_norm": 0.5984289258264732,
"learning_rate": 1.7471646413852439e-06,
"loss": 0.2415,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23753738403320312,
"step": 3325,
"valid_targets_mean": 3671.3,
"valid_targets_min": 1769
},
{
"epoch": 6.166666666666667,
"grad_norm": 0.5333521022284862,
"learning_rate": 1.709612181678999e-06,
"loss": 0.2124,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22245770692825317,
"step": 3330,
"valid_targets_mean": 4270.4,
"valid_targets_min": 1961
},
{
"epoch": 6.175925925925926,
"grad_norm": 0.6230366161491935,
"learning_rate": 1.6724496579239979e-06,
"loss": 0.2226,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22154787182807922,
"step": 3335,
"valid_targets_mean": 3099.2,
"valid_targets_min": 629
},
{
"epoch": 6.185185185185185,
"grad_norm": 0.6069509329922341,
"learning_rate": 1.6356778623945223e-06,
"loss": 0.2495,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.248618945479393,
"step": 3340,
"valid_targets_mean": 3467.0,
"valid_targets_min": 661
},
{
"epoch": 6.194444444444445,
"grad_norm": 0.6234149579450843,
"learning_rate": 1.5992975790348642e-06,
"loss": 0.2239,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23241698741912842,
"step": 3345,
"valid_targets_mean": 3599.1,
"valid_targets_min": 1668
},
{
"epoch": 6.203703703703703,
"grad_norm": 0.6001381504985039,
"learning_rate": 1.5633095834425983e-06,
"loss": 0.2197,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19718721508979797,
"step": 3350,
"valid_targets_mean": 3149.3,
"valid_targets_min": 542
},
{
"epoch": 6.212962962962963,
"grad_norm": 0.5362275830715411,
"learning_rate": 1.527714642852045e-06,
"loss": 0.2089,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19483308494091034,
"step": 3355,
"valid_targets_mean": 3562.2,
"valid_targets_min": 511
},
{
"epoch": 6.222222222222222,
"grad_norm": 0.5695773907229609,
"learning_rate": 1.492513516117915e-06,
"loss": 0.2167,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20090246200561523,
"step": 3360,
"valid_targets_mean": 3909.8,
"valid_targets_min": 1802
},
{
"epoch": 6.231481481481482,
"grad_norm": 0.6005583636288468,
"learning_rate": 1.457706953699145e-06,
"loss": 0.2142,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19939187169075012,
"step": 3365,
"valid_targets_mean": 3433.8,
"valid_targets_min": 1569
},
{
"epoch": 6.2407407407407405,
"grad_norm": 0.5007154171778021,
"learning_rate": 1.423295697642868e-06,
"loss": 0.2313,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21219304203987122,
"step": 3370,
"valid_targets_mean": 4683.0,
"valid_targets_min": 2362
},
{
"epoch": 6.25,
"grad_norm": 0.5968890483515067,
"learning_rate": 1.3892804815686312e-06,
"loss": 0.2214,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2734060287475586,
"step": 3375,
"valid_targets_mean": 4207.8,
"valid_targets_min": 1660
},
{
"epoch": 6.2592592592592595,
"grad_norm": 0.6330168182645113,
"learning_rate": 1.35566203065272e-06,
"loss": 0.2432,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22437870502471924,
"step": 3380,
"valid_targets_mean": 3316.6,
"valid_targets_min": 1267
},
{
"epoch": 6.268518518518518,
"grad_norm": 0.577655871447151,
"learning_rate": 1.3224410616127292e-06,
"loss": 0.2348,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24034109711647034,
"step": 3385,
"valid_targets_mean": 4207.6,
"valid_targets_min": 342
},
{
"epoch": 6.277777777777778,
"grad_norm": 0.5856036889391484,
"learning_rate": 1.2896182826922577e-06,
"loss": 0.2339,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2569791078567505,
"step": 3390,
"valid_targets_mean": 3788.3,
"valid_targets_min": 1142
},
{
"epoch": 6.287037037037037,
"grad_norm": 0.622367825324181,
"learning_rate": 1.2571943936458197e-06,
"loss": 0.2048,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18795576691627502,
"step": 3395,
"valid_targets_mean": 3135.8,
"valid_targets_min": 595
},
{
"epoch": 6.296296296296296,
"grad_norm": 0.5507538621859848,
"learning_rate": 1.2251700857239412e-06,
"loss": 0.2004,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16971886157989502,
"step": 3400,
"valid_targets_mean": 3565.2,
"valid_targets_min": 812
},
{
"epoch": 6.305555555555555,
"grad_norm": 0.5709347005687355,
"learning_rate": 1.1935460416583889e-06,
"loss": 0.2236,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19665175676345825,
"step": 3405,
"valid_targets_mean": 4646.4,
"valid_targets_min": 2430
},
{
"epoch": 6.314814814814815,
"grad_norm": 0.5551338557244925,
"learning_rate": 1.162322935647655e-06,
"loss": 0.2099,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23359957337379456,
"step": 3410,
"valid_targets_mean": 3922.3,
"valid_targets_min": 1700
},
{
"epoch": 6.324074074074074,
"grad_norm": 0.6081424722887298,
"learning_rate": 1.1315014333425455e-06,
"loss": 0.2288,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27564293146133423,
"step": 3415,
"valid_targets_mean": 3644.6,
"valid_targets_min": 672
},
{
"epoch": 6.333333333333333,
"grad_norm": 0.6017513819380911,
"learning_rate": 1.101082191832017e-06,
"loss": 0.2185,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25283360481262207,
"step": 3420,
"valid_targets_mean": 3448.8,
"valid_targets_min": 580
},
{
"epoch": 6.342592592592593,
"grad_norm": 0.6051873506386266,
"learning_rate": 1.0710658596291612e-06,
"loss": 0.2314,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2810269594192505,
"step": 3425,
"valid_targets_mean": 4163.6,
"valid_targets_min": 729
},
{
"epoch": 6.351851851851852,
"grad_norm": 0.5949919214922811,
"learning_rate": 1.0414530766573661e-06,
"loss": 0.2163,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20459462702274323,
"step": 3430,
"valid_targets_mean": 3321.0,
"valid_targets_min": 1666
},
{
"epoch": 6.361111111111111,
"grad_norm": 0.5564753367621429,
"learning_rate": 1.0122444742366945e-06,
"loss": 0.2202,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23238684237003326,
"step": 3435,
"valid_targets_mean": 4126.9,
"valid_targets_min": 840
},
{
"epoch": 6.37037037037037,
"grad_norm": 0.4818681450985929,
"learning_rate": 9.83440675070404e-07,
"loss": 0.2286,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2342509627342224,
"step": 3440,
"valid_targets_mean": 5746.8,
"valid_targets_min": 1421
},
{
"epoch": 6.37962962962963,
"grad_norm": 0.6306703109771072,
"learning_rate": 9.550422932316938e-07,
"loss": 0.2265,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2181071788072586,
"step": 3445,
"valid_targets_mean": 3027.6,
"valid_targets_min": 313
},
{
"epoch": 6.388888888888889,
"grad_norm": 0.7109368059368608,
"learning_rate": 9.270499341505901e-07,
"loss": 0.2361,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21922603249549866,
"step": 3450,
"valid_targets_mean": 3950.0,
"valid_targets_min": 425
},
{
"epoch": 6.398148148148148,
"grad_norm": 0.5748329728377656,
"learning_rate": 8.994641946010474e-07,
"loss": 0.2394,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26003679633140564,
"step": 3455,
"valid_targets_mean": 4081.2,
"valid_targets_min": 764
},
{
"epoch": 6.407407407407407,
"grad_norm": 0.6855173336098317,
"learning_rate": 8.722856626882415e-07,
"loss": 0.244,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2373206913471222,
"step": 3460,
"valid_targets_mean": 3279.6,
"valid_targets_min": 764
},
{
"epoch": 6.416666666666667,
"grad_norm": 0.5616037615887454,
"learning_rate": 8.455149178360012e-07,
"loss": 0.2444,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2553655803203583,
"step": 3465,
"valid_targets_mean": 4561.8,
"valid_targets_min": 594
},
{
"epoch": 6.425925925925926,
"grad_norm": 0.5276114854080077,
"learning_rate": 8.191525307744896e-07,
"loss": 0.2229,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24148723483085632,
"step": 3470,
"valid_targets_mean": 4596.1,
"valid_targets_min": 880
},
{
"epoch": 6.435185185185185,
"grad_norm": 0.5535681163787745,
"learning_rate": 7.931990635280052e-07,
"loss": 0.2168,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23151087760925293,
"step": 3475,
"valid_targets_mean": 4101.1,
"valid_targets_min": 215
},
{
"epoch": 6.444444444444445,
"grad_norm": 0.5364895072322574,
"learning_rate": 7.676550694030172e-07,
"loss": 0.2134,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2174125611782074,
"step": 3480,
"valid_targets_mean": 4606.8,
"valid_targets_min": 570
},
{
"epoch": 6.453703703703704,
"grad_norm": 0.5101763745085794,
"learning_rate": 7.425210929763738e-07,
"loss": 0.2273,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.211885005235672,
"step": 3485,
"valid_targets_mean": 4786.0,
"valid_targets_min": 829
},
{
"epoch": 6.462962962962963,
"grad_norm": 0.5954727436115828,
"learning_rate": 7.17797670083673e-07,
"loss": 0.2155,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21472561359405518,
"step": 3490,
"valid_targets_mean": 3507.8,
"valid_targets_min": 541
},
{
"epoch": 6.472222222222222,
"grad_norm": 0.5173291565619594,
"learning_rate": 6.934853278078635e-07,
"loss": 0.2205,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20479455590248108,
"step": 3495,
"valid_targets_mean": 3915.1,
"valid_targets_min": 1907
},
{
"epoch": 6.481481481481482,
"grad_norm": 0.5598429190290807,
"learning_rate": 6.695845844679816e-07,
"loss": 0.2146,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.265296995639801,
"step": 3500,
"valid_targets_mean": 4297.6,
"valid_targets_min": 569
},
{
"epoch": 6.4907407407407405,
"grad_norm": 0.644127732439113,
"learning_rate": 6.460959496081276e-07,
"loss": 0.196,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19158616662025452,
"step": 3505,
"valid_targets_mean": 3284.1,
"valid_targets_min": 1857
},
{
"epoch": 6.5,
"grad_norm": 0.46580841568793,
"learning_rate": 6.230199239865808e-07,
"loss": 0.2263,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16796517372131348,
"step": 3510,
"valid_targets_mean": 4302.2,
"valid_targets_min": 2752
},
{
"epoch": 6.5092592592592595,
"grad_norm": 0.5292186252116317,
"learning_rate": 6.003569995651304e-07,
"loss": 0.2106,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19200381636619568,
"step": 3515,
"valid_targets_mean": 4388.9,
"valid_targets_min": 512
},
{
"epoch": 6.518518518518518,
"grad_norm": 0.5737139503739112,
"learning_rate": 5.781076594986035e-07,
"loss": 0.2173,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2559890151023865,
"step": 3520,
"valid_targets_mean": 4729.4,
"valid_targets_min": 743
},
{
"epoch": 6.527777777777778,
"grad_norm": 0.601281290007252,
"learning_rate": 5.562723781245316e-07,
"loss": 0.2374,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20773258805274963,
"step": 3525,
"valid_targets_mean": 3555.2,
"valid_targets_min": 1591
},
{
"epoch": 6.537037037037037,
"grad_norm": 0.5889092918003285,
"learning_rate": 5.348516209530741e-07,
"loss": 0.2463,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2901744246482849,
"step": 3530,
"valid_targets_mean": 4212.6,
"valid_targets_min": 616
},
{
"epoch": 6.546296296296296,
"grad_norm": 0.5058670706615777,
"learning_rate": 5.13845844657066e-07,
"loss": 0.216,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19678425788879395,
"step": 3535,
"valid_targets_mean": 4579.2,
"valid_targets_min": 1761
},
{
"epoch": 6.555555555555555,
"grad_norm": 0.6257218263377696,
"learning_rate": 4.93255497062295e-07,
"loss": 0.2402,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24043655395507812,
"step": 3540,
"valid_targets_mean": 3257.0,
"valid_targets_min": 647
},
{
"epoch": 6.564814814814815,
"grad_norm": 0.5505549796788423,
"learning_rate": 4.730810171379574e-07,
"loss": 0.234,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2459501177072525,
"step": 3545,
"valid_targets_mean": 4357.0,
"valid_targets_min": 733
},
{
"epoch": 6.574074074074074,
"grad_norm": 0.6028654489237595,
"learning_rate": 4.533228349872887e-07,
"loss": 0.2163,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2598051428794861,
"step": 3550,
"valid_targets_mean": 3666.0,
"valid_targets_min": 649
},
{
"epoch": 6.583333333333333,
"grad_norm": 0.6185888512407045,
"learning_rate": 4.339813718384056e-07,
"loss": 0.2476,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21807453036308289,
"step": 3555,
"valid_targets_mean": 3160.6,
"valid_targets_min": 513
},
{
"epoch": 6.592592592592593,
"grad_norm": 0.6008888131850063,
"learning_rate": 4.1505704003531155e-07,
"loss": 0.2289,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2528533339500427,
"step": 3560,
"valid_targets_mean": 3871.8,
"valid_targets_min": 582
},
{
"epoch": 6.601851851851852,
"grad_norm": 0.5898425554611046,
"learning_rate": 3.965502430291235e-07,
"loss": 0.2291,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2124367356300354,
"step": 3565,
"valid_targets_mean": 4025.0,
"valid_targets_min": 813
},
{
"epoch": 6.611111111111111,
"grad_norm": 0.5495965363011839,
"learning_rate": 3.784613753694566e-07,
"loss": 0.2235,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2144167423248291,
"step": 3570,
"valid_targets_mean": 4046.5,
"valid_targets_min": 991
},
{
"epoch": 6.62037037037037,
"grad_norm": 0.6165423333917514,
"learning_rate": 3.607908226960155e-07,
"loss": 0.2126,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21597029268741608,
"step": 3575,
"valid_targets_mean": 3308.0,
"valid_targets_min": 566
},
{
"epoch": 6.62962962962963,
"grad_norm": 0.45236188898394386,
"learning_rate": 3.4353896173038524e-07,
"loss": 0.2282,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2370537519454956,
"step": 3580,
"valid_targets_mean": 5711.6,
"valid_targets_min": 670
},
{
"epoch": 6.638888888888889,
"grad_norm": 0.56933040396151,
"learning_rate": 3.2670616026797776e-07,
"loss": 0.2117,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1997252255678177,
"step": 3585,
"valid_targets_mean": 3777.3,
"valid_targets_min": 818
},
{
"epoch": 6.648148148148148,
"grad_norm": 0.5659167186719102,
"learning_rate": 3.102927771702091e-07,
"loss": 0.2384,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20919036865234375,
"step": 3590,
"valid_targets_mean": 3730.8,
"valid_targets_min": 770
},
{
"epoch": 6.657407407407407,
"grad_norm": 0.5079303353870578,
"learning_rate": 2.942991623568436e-07,
"loss": 0.2238,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19151973724365234,
"step": 3595,
"valid_targets_mean": 4735.9,
"valid_targets_min": 2372
},
{
"epoch": 6.666666666666667,
"grad_norm": 0.5137082597472087,
"learning_rate": 2.7872565679852414e-07,
"loss": 0.2424,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2293935865163803,
"step": 3600,
"valid_targets_mean": 4664.9,
"valid_targets_min": 2201
},
{
"epoch": 6.675925925925926,
"grad_norm": 0.5409545059748535,
"learning_rate": 2.635725925095245e-07,
"loss": 0.2289,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23130936920642853,
"step": 3605,
"valid_targets_mean": 4416.5,
"valid_targets_min": 1904
},
{
"epoch": 6.685185185185185,
"grad_norm": 0.555324535146906,
"learning_rate": 2.4884029254064636e-07,
"loss": 0.2302,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21047340333461761,
"step": 3610,
"valid_targets_mean": 3865.1,
"valid_targets_min": 2178
},
{
"epoch": 6.694444444444445,
"grad_norm": 0.4829255723553718,
"learning_rate": 2.3452907097235355e-07,
"loss": 0.2091,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1802108883857727,
"step": 3615,
"valid_targets_mean": 3995.1,
"valid_targets_min": 764
},
{
"epoch": 6.703703703703704,
"grad_norm": 0.5450225699083614,
"learning_rate": 2.2063923290805756e-07,
"loss": 0.2062,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21498540043830872,
"step": 3620,
"valid_targets_mean": 3961.8,
"valid_targets_min": 1708
},
{
"epoch": 6.712962962962963,
"grad_norm": 0.5695770354778458,
"learning_rate": 2.0717107446762696e-07,
"loss": 0.2023,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19042396545410156,
"step": 3625,
"valid_targets_mean": 3396.2,
"valid_targets_min": 665
},
{
"epoch": 6.722222222222222,
"grad_norm": 0.5764051828461008,
"learning_rate": 1.9412488278107044e-07,
"loss": 0.1986,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19108936190605164,
"step": 3630,
"valid_targets_mean": 3522.6,
"valid_targets_min": 1445
},
{
"epoch": 6.731481481481482,
"grad_norm": 0.5141025615330546,
"learning_rate": 1.8150093598240825e-07,
"loss": 0.221,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2816708981990814,
"step": 3635,
"valid_targets_mean": 5298.4,
"valid_targets_min": 1615
},
{
"epoch": 6.7407407407407405,
"grad_norm": 0.5516459278281802,
"learning_rate": 1.69299503203757e-07,
"loss": 0.2296,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20504800975322723,
"step": 3640,
"valid_targets_mean": 3981.1,
"valid_targets_min": 2114
},
{
"epoch": 6.75,
"grad_norm": 0.4418946934572028,
"learning_rate": 1.5752084456957416e-07,
"loss": 0.2515,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24999968707561493,
"step": 3645,
"valid_targets_mean": 5980.2,
"valid_targets_min": 2761
},
{
"epoch": 6.7592592592592595,
"grad_norm": 0.5163142937241847,
"learning_rate": 1.4616521119112937e-07,
"loss": 0.2332,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2180902659893036,
"step": 3650,
"valid_targets_mean": 4460.1,
"valid_targets_min": 1782
},
{
"epoch": 6.768518518518518,
"grad_norm": 0.5275315251769807,
"learning_rate": 1.3523284516113955e-07,
"loss": 0.2103,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2362240105867386,
"step": 3655,
"valid_targets_mean": 4463.1,
"valid_targets_min": 856
},
{
"epoch": 6.777777777777778,
"grad_norm": 0.5932303316312726,
"learning_rate": 1.2472397954861549e-07,
"loss": 0.2442,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25914400815963745,
"step": 3660,
"valid_targets_mean": 4156.2,
"valid_targets_min": 1887
},
{
"epoch": 6.787037037037037,
"grad_norm": 0.5704356783239624,
"learning_rate": 1.1463883839388346e-07,
"loss": 0.2125,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29468631744384766,
"step": 3665,
"valid_targets_mean": 4419.7,
"valid_targets_min": 836
},
{
"epoch": 6.796296296296296,
"grad_norm": 0.6176484972834143,
"learning_rate": 1.0497763670382022e-07,
"loss": 0.2039,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24018700420856476,
"step": 3670,
"valid_targets_mean": 3530.4,
"valid_targets_min": 368
},
{
"epoch": 6.805555555555555,
"grad_norm": 0.5827116087848868,
"learning_rate": 9.574058044725665e-08,
"loss": 0.2377,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2357904613018036,
"step": 3675,
"valid_targets_mean": 3568.0,
"valid_targets_min": 2129
},
{
"epoch": 6.814814814814815,
"grad_norm": 0.5334174149912604,
"learning_rate": 8.692786655060348e-08,
"loss": 0.2446,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27739739418029785,
"step": 3680,
"valid_targets_mean": 4644.2,
"valid_targets_min": 597
},
{
"epoch": 6.824074074074074,
"grad_norm": 0.5908537050626661,
"learning_rate": 7.853968289363245e-08,
"loss": 0.1994,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1957089900970459,
"step": 3685,
"valid_targets_mean": 4683.8,
"valid_targets_min": 2043
},
{
"epoch": 6.833333333333333,
"grad_norm": 0.5260600630535545,
"learning_rate": 7.057620830548617e-08,
"loss": 0.2024,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18605807423591614,
"step": 3690,
"valid_targets_mean": 4110.4,
"valid_targets_min": 1942
},
{
"epoch": 6.842592592592593,
"grad_norm": 0.5916196044721515,
"learning_rate": 6.30376125608656e-08,
"loss": 0.2371,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2218480408191681,
"step": 3695,
"valid_targets_mean": 3644.4,
"valid_targets_min": 570
},
{
"epoch": 6.851851851851852,
"grad_norm": 0.514705393546641,
"learning_rate": 5.592405637639742e-08,
"loss": 0.2319,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20492291450500488,
"step": 3700,
"valid_targets_mean": 4558.1,
"valid_targets_min": 2073
},
{
"epoch": 6.861111111111111,
"grad_norm": 0.4524372951551533,
"learning_rate": 4.923569140722118e-08,
"loss": 0.2241,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28827571868896484,
"step": 3705,
"valid_targets_mean": 6742.3,
"valid_targets_min": 1989
},
{
"epoch": 6.87037037037037,
"grad_norm": 0.5258479700135631,
"learning_rate": 4.2972660243749686e-08,
"loss": 0.231,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1960398554801941,
"step": 3710,
"valid_targets_mean": 4020.9,
"valid_targets_min": 879
},
{
"epoch": 6.87962962962963,
"grad_norm": 0.5383620822998924,
"learning_rate": 3.7135096408631443e-08,
"loss": 0.2237,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17605650424957275,
"step": 3715,
"valid_targets_mean": 3673.5,
"valid_targets_min": 892
},
{
"epoch": 6.888888888888889,
"grad_norm": 0.5022673424281345,
"learning_rate": 3.172312435390401e-08,
"loss": 0.2181,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21586468815803528,
"step": 3720,
"valid_targets_mean": 4826.8,
"valid_targets_min": 1808
},
{
"epoch": 6.898148148148148,
"grad_norm": 0.5150139357949609,
"learning_rate": 2.673685945833615e-08,
"loss": 0.2383,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24277575314044952,
"step": 3725,
"valid_targets_mean": 5270.2,
"valid_targets_min": 2004
},
{
"epoch": 6.907407407407407,
"grad_norm": 0.512483831039239,
"learning_rate": 2.2176408024974228e-08,
"loss": 0.2511,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23216493427753448,
"step": 3730,
"valid_targets_mean": 4509.7,
"valid_targets_min": 1745
},
{
"epoch": 6.916666666666667,
"grad_norm": 0.5381090256017264,
"learning_rate": 1.8041867278875137e-08,
"loss": 0.2299,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19807547330856323,
"step": 3735,
"valid_targets_mean": 4138.4,
"valid_targets_min": 779
},
{
"epoch": 6.925925925925926,
"grad_norm": 0.49323585595517727,
"learning_rate": 1.4333325365030181e-08,
"loss": 0.2223,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21127669513225555,
"step": 3740,
"valid_targets_mean": 5065.2,
"valid_targets_min": 1890
},
{
"epoch": 6.935185185185185,
"grad_norm": 0.555488870581542,
"learning_rate": 1.1050861346488806e-08,
"loss": 0.2293,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20611155033111572,
"step": 3745,
"valid_targets_mean": 3845.6,
"valid_targets_min": 723
},
{
"epoch": 6.944444444444445,
"grad_norm": 0.5336086789856043,
"learning_rate": 8.194545202666604e-09,
"loss": 0.2392,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1871817260980606,
"step": 3750,
"valid_targets_mean": 3598.5,
"valid_targets_min": 849
},
{
"epoch": 6.953703703703704,
"grad_norm": 0.6088047301014547,
"learning_rate": 5.76443782786873e-09,
"loss": 0.2481,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22356441617012024,
"step": 3755,
"valid_targets_mean": 3994.5,
"valid_targets_min": 1869
},
{
"epoch": 6.962962962962963,
"grad_norm": 0.5623518574868789,
"learning_rate": 3.760591029973171e-09,
"loss": 0.225,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21949899196624756,
"step": 3760,
"valid_targets_mean": 3838.9,
"valid_targets_min": 928
},
{
"epoch": 6.972222222222222,
"grad_norm": 0.6022551873866897,
"learning_rate": 2.1830475293360686e-09,
"loss": 0.2255,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22909459471702576,
"step": 3765,
"valid_targets_mean": 3225.6,
"valid_targets_min": 656
},
{
"epoch": 6.981481481481482,
"grad_norm": 0.5183031403584336,
"learning_rate": 1.0318409578835564e-09,
"loss": 0.2227,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2013653814792633,
"step": 3770,
"valid_targets_mean": 4304.2,
"valid_targets_min": 446
},
{
"epoch": 6.9907407407407405,
"grad_norm": 0.5846907653612194,
"learning_rate": 3.069958583856725e-10,
"loss": 0.2175,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23626720905303955,
"step": 3775,
"valid_targets_mean": 3642.6,
"valid_targets_min": 1138
},
{
"epoch": 7.0,
"grad_norm": 0.5911749795882507,
"learning_rate": 8.527683943437837e-12,
"loss": 0.2341,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23940664529800415,
"step": 3780,
"valid_targets_mean": 4033.5,
"valid_targets_min": 2079
},
{
"epoch": 7.0,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23940664529800415,
"step": 3780,
"total_flos": 1019495884914688.0,
"train_loss": 0.14474489343544794,
"train_runtime": 12189.5389,
"train_samples_per_second": 4.959,
"train_steps_per_second": 0.31,
"valid_targets_mean": 4033.5,
"valid_targets_min": 2079
}
],
"logging_steps": 5,
"max_steps": 3780,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 1500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1019495884914688.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}