{ "ac_kwargs": { "activation": "ReLU", "hidden_sizes": [ 400 ] }, "act_noise": 0.1, "actor_critic": "MLPActorCritic", "batch_size": 100, "env_fn": "functools.partial(functools.partial(functools.partial(functools.partial(functools.partial(functools.partial(functools.partial(functools.partial(functools.partial(, env_name='gyroscopeenv-v0', reward_type='Quadratic', reward_args={'qx1': 9, 'qx2': 0.05, 'qx3': 9, 'qx4': 0.05, 'pu1': 0.1, 'pu2': 0.1}, ep_len=110), env_name='gyroscopeenv-v0', reward_type='Absolute', reward_args={'qx1': 9, 'qx2': 0.05, 'qx3': 9, 'qx4': 0.05, 'pu1': 0.5, 'pu2': 0.5}, ep_len=110), env_name='gyroscopeenv-v0', reward_type='Normalized', reward_args={'k': 0.1, 'pu1': 0.25, 'pu2': 0.25}, ep_len=110), env_name='gyroscopeenv-v0', reward_type='Quadratic with ending penalty', reward_args={'qx1': 9, 'qx2': 0.05, 'qx3': 9, 'qx4': 0.05, 'pu1': 0.1, 'pu2': 0.1, 'sx1': 100, 'sx3': 100, 'end_horizon': 1}, ep_len=110), env_name='gyroscopeenv-v0', reward_type='Quadratic with penalty', reward_args={'qx1': 9, 'qx2': 0.05, 'qx3': 9, 'qx4': 0.05, 'pu1': 0.25, 'pu2': 0.25, 'bound': 0.2, 'penalty': 40}, ep_len=110), env_name='gyroscopeenv-v0', reward_type='Quadratic with penalty', reward_args={'qx1': 9, 'qx2': 0.05, 'qx3': 9, 'qx4': 0.05, 'pu1': 0.25, 'pu2': 0.25, 'bound': 0.2, 'penalty': 40}, ep_len=110), env_name='gyroscopeenv-v0', reward_type='Quadratic with exponential', reward_args={'qx1': 1, 'qx2': 0, 'qx3': 1, 'qx4': 0, 'pu1': 0, 'pu2': 0, 'eax1': 10, 'ebx1': 10, 'eax3': 10, 'ebx3': 10}, ep_len=110), env_name='gyroscopeenv-v0', reward_type='Quadratic with penalty', reward_args={'qx1': 9, 'qx2': 0.05, 'qx3': 9, 'qx4': 0.05, 'pu1': 0.25, 'pu2': 0.25, 'bound': 0.2, 'penalty': 40}, ep_len=110), env_name='gyroscopeenv-v0', reward_type='Quadratic with bonus', reward_args={'qx1': 1, 'qx2': 0, 'qx3': 1, 'qx4': 0, 'pu1': 0, 'pu2': 0, 'bound': 0.05, 'bonus': 2}, ep_len=110)", "epochs": 100, "exp_name": "ddpg_q_b", "gamma": 0.995, "logger": { "": { "epoch_dict": {}, "exp_name": "ddpg_q_b", "first_row": true, "log_current_row": {}, "log_headers": [], "output_dir": "ddpg_q_b", "output_file": { "<_io.TextIOWrapper name='ddpg_q_b/progress.txt' mode='w' encoding='UTF-8'>": { "mode": "w" } } } }, "logger_kwargs": { "exp_name": "ddpg_q_b", "output_dir": "ddpg_q_b" }, "max_ep_len": 110, "num_test_episodes": 10, "pi_lr": 0.0025, "polyak": 0.995, "q_lr": 0.0025, "replay_size": 1000000, "save_freq": 1, "seed": 0, "start_steps": 10000, "steps_per_epoch": 1650, "update_after": 1000, "update_every": 50 }