!!python/object/apply:collections.OrderedDict - - - batch_size - 128 - - buffer_size - 125000 - - exploration_final_eps - 0.01 - - exploration_fraction - 0.25 - - gamma - 0.99 - - gradient_steps - 1 - - learning_rate - 0.00063 - - learning_starts - 0 - - n_envs - 24 - - n_timesteps - 50000 - - policy - MlpPolicy - - policy_kwargs - dict(net_arch=[512, 256, 128, 128], activation_fn=nn.ReLU) - - target_update_interval - 250 - - train_freq - 4 - - vec_env_wrapper - stable_baselines3.common.vec_env.VecMonitor