{ "best_metric": 0.9238303497851478, "best_model_checkpoint": "./saved_models/llama_prompt_sbdh_gpt4_v2_0/checkpoint-120", "epoch": 5.0, "eval_steps": 500, "global_step": 120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.6714589595794678, "learning_rate": 3.3333333333333335e-05, "loss": 2.5644, "step": 24 }, { "epoch": 1.0, "eval_acc_macro": 0.7160291923788676, "eval_acc_micro": 0.7509433962263584, "eval_auc_macro": 0.9741292675539216, "eval_auc_micro": 0.9750886034759222, "eval_f1_at_5": 0.4217622259434855, "eval_f1_at_8": 0.2920473967500829, "eval_f1_macro": 0.7989797861861626, "eval_f1_micro": 0.8577586206895812, "eval_loss": 0.11446325480937958, "eval_prec_at_5": 0.27009132420091325, "eval_prec_at_8": 0.1716609589041096, "eval_prec_macro": 0.8115680364358134, "eval_prec_micro": 0.8931777378814278, "eval_rec_at_5": 0.9619482496194824, "eval_rec_at_8": 0.9777397260273972, "eval_rec_macro": 0.7915340851938971, "eval_rec_micro": 0.8250414593697492, "eval_runtime": 30.8714, "eval_samples_per_second": 28.376, "eval_steps_per_second": 3.563, "eval_threshold": -0.625, "step": 24 }, { "epoch": 2.0, "grad_norm": 0.35452723503112793, "learning_rate": 6.666666666666667e-05, "loss": 0.0736, "step": 48 }, { "epoch": 2.0, "eval_acc_macro": 0.8391717260025603, "eval_acc_micro": 0.8443432042520239, "eval_auc_macro": 0.9939078443751225, "eval_auc_micro": 0.99449507245633, "eval_f1_at_5": 0.42918048633589306, "eval_f1_at_8": 0.2922793659426448, "eval_f1_macro": 0.9106835825010626, "eval_f1_micro": 0.9156031288595375, "eval_loss": 0.06005650386214256, "eval_prec_at_5": 0.27488584474885847, "eval_prec_at_8": 0.17180365296803654, "eval_prec_macro": 0.9048916396910188, "eval_prec_micro": 0.9092395748159517, "eval_rec_at_5": 0.978310502283105, "eval_rec_at_8": 0.978310502283105, "eval_rec_macro": 0.919126994992955, "eval_rec_micro": 0.9220563847428754, "eval_runtime": 31.0816, "eval_samples_per_second": 28.184, "eval_steps_per_second": 3.539, "eval_threshold": -1.375, "step": 48 }, { "epoch": 3.0, "grad_norm": 0.5112707614898682, "learning_rate": 0.0001, "loss": 0.0498, "step": 72 }, { "epoch": 3.0, "eval_acc_macro": 0.8538277777274056, "eval_acc_micro": 0.8602316602315938, "eval_auc_macro": 0.9946434924695814, "eval_auc_micro": 0.9959011091116522, "eval_f1_at_5": 0.4288472848093916, "eval_f1_at_8": 0.29274329072668936, "eval_f1_macro": 0.9190983946030712, "eval_f1_micro": 0.924865089248574, "eval_loss": 0.047555435448884964, "eval_prec_at_5": 0.27465753424657535, "eval_prec_at_8": 0.1720890410958904, "eval_prec_macro": 0.919149383334144, "eval_prec_micro": 0.9260182876142206, "eval_rec_at_5": 0.9777397260273972, "eval_rec_at_8": 0.9794520547945206, "eval_rec_macro": 0.9214974849786018, "eval_rec_micro": 0.9237147595355785, "eval_runtime": 31.1362, "eval_samples_per_second": 28.134, "eval_steps_per_second": 3.533, "eval_threshold": 0.0, "step": 72 }, { "epoch": 4.0, "grad_norm": 0.3539921045303345, "learning_rate": 9.411764705882353e-05, "loss": 0.0409, "step": 96 }, { "epoch": 4.0, "eval_acc_macro": 0.8544346372841939, "eval_acc_micro": 0.8602484472049021, "eval_auc_macro": 0.9958044036634556, "eval_auc_micro": 0.9960722303661861, "eval_f1_at_5": 0.4288472848093916, "eval_f1_at_8": 0.29274329072668936, "eval_f1_macro": 0.919964042577448, "eval_f1_micro": 0.9248747913187875, "eval_loss": 0.05106280744075775, "eval_prec_at_5": 0.27465753424657535, "eval_prec_at_8": 0.1720890410958904, "eval_prec_macro": 0.9270437825158037, "eval_prec_micro": 0.9310924369747117, "eval_rec_at_5": 0.9777397260273972, "eval_rec_at_8": 0.9794520547945206, "eval_rec_macro": 0.914455594415295, "eval_rec_micro": 0.9187396351574694, "eval_runtime": 31.1411, "eval_samples_per_second": 28.13, "eval_steps_per_second": 3.532, "eval_threshold": -0.125, "step": 96 }, { "epoch": 5.0, "grad_norm": 0.4515529274940491, "learning_rate": 8.823529411764706e-05, "loss": 0.0306, "step": 120 }, { "epoch": 5.0, "eval_acc_macro": 0.8608096832911526, "eval_acc_micro": 0.8670212765956787, "eval_auc_macro": 0.9959155249262238, "eval_auc_micro": 0.9964546135288169, "eval_f1_at_5": 0.4288472848093916, "eval_f1_at_8": 0.29274329072668936, "eval_f1_macro": 0.9238303497851478, "eval_f1_micro": 0.9287749287748531, "eval_loss": 0.05085020139813423, "eval_prec_at_5": 0.27465753424657535, "eval_prec_at_8": 0.1720890410958904, "eval_prec_macro": 0.9063834789244836, "eval_prec_micro": 0.912070343724947, "eval_rec_at_5": 0.9777397260273972, "eval_rec_at_8": 0.9794520547945206, "eval_rec_macro": 0.9425487929333234, "eval_rec_micro": 0.9461028192370691, "eval_runtime": 31.1075, "eval_samples_per_second": 28.16, "eval_steps_per_second": 3.536, "eval_threshold": -0.375, "step": 120 } ], "logging_steps": 500, "max_steps": 480, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.619710176329728e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }