Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add regression testing and copy over necessary changes #141

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Open
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@ rlkit/launchers/conf_private.py
MANIFEST
*.egg-info
\.idea/
.pytest_cache
.coverage
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,13 @@ This requires some knowledge of AWS and/or GCP, which is beyond the scope of
this README.
To learn more, more about `doodad`, [go to the repository](https://github.com/vitchyr/doodad/), which is based on [this original repository](https://github.com/justinjfu/doodad/).

### Testing
A set of basic regression tests can be run with:
```
nose2 -v -B -s tests/regression/basic
```
Other directories in `tests/regression` have project-specific tests.

# Requests for pull-requests
- Implement policy-gradient algorithms.
- Implement model-based algorithms.
Expand Down
Empty file added examples/__init__.py
Empty file.
Empty file added examples/awac/__init__.py
Empty file.
Empty file added examples/awac/hand/__init__.py
Empty file.
53 changes: 16 additions & 37 deletions examples/awac/hand/awac1.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,22 @@

import rlkit.util.hyperparameter as hyp
from rlkit.launchers.launcher_util import run_experiment
from rlkit.launchers.arglauncher import run_variants

from rlkit.torch.sac.policies import GaussianPolicy
from rlkit.torch.networks import Clamp

if __name__ == "__main__":
def main():
variant = dict(
num_epochs=501,
num_eval_steps_per_epoch=1000,
num_trains_per_train_loop=1000,
num_expl_steps_per_train_loop=1000,
min_num_steps_before_training=1000,
algo_kwargs=dict(
num_epochs=501,
num_eval_steps_per_epoch=1000,
num_trains_per_train_loop=1000,
num_expl_steps_per_train_loop=1000,
min_num_steps_before_training=1000,
batch_size=1024,
),
max_path_length=1000,
batch_size=1024,
algorithm="AWAC",
replay_buffer_size=int(1E6),

Expand Down Expand Up @@ -99,35 +102,11 @@
search_space, default_parameters=variant,
)

# n_seeds = 1
# mode = 'local'
# exp_prefix = 'dev-{}'.format(
# __file__.replace('/', '-').replace('_', '-').split('.')[0]
# )

n_seeds = 3
mode = 'ec2'
exp_prefix = 'hand-awac1'
variants = []
for variant in sweeper.iterate_hyperparameters():
variants.append(variant)

for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
for _ in range(n_seeds):
run_experiment(
experiment,
exp_prefix=exp_prefix,
mode=mode,
variant=variant,
use_gpu=True,
snapshot_gap=200,
snapshot_mode='gap_and_last',
num_exps_per_instance=3,
gcp_kwargs=dict(
zone='us-west1-b',
),
run_variants(experiment, variants, process_args)

)

# variants = []
# for variant in sweeper.iterate_hyperparameters():
# variants.append(variant)

# run_variants(experiment, variants, process_args)
if __name__ == "__main__":
main()
Empty file.
53 changes: 16 additions & 37 deletions examples/awac/mujoco/awac1.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,22 @@

import rlkit.util.hyperparameter as hyp
from rlkit.launchers.launcher_util import run_experiment
from rlkit.launchers.arglauncher import run_variants

from rlkit.torch.sac.policies import GaussianPolicy
from rlkit.torch.networks import Clamp

if __name__ == "__main__":
def main():
variant = dict(
num_epochs=501,
num_eval_steps_per_epoch=1000,
num_trains_per_train_loop=1000,
num_expl_steps_per_train_loop=1000,
min_num_steps_before_training=1000,
algo_kwargs=dict(
num_epochs=501,
num_eval_steps_per_epoch=1000,
num_trains_per_train_loop=1000,
num_expl_steps_per_train_loop=1000,
min_num_steps_before_training=1000,
batch_size=1024,
),
max_path_length=1000,
batch_size=1024,
replay_buffer_size=int(1E6),
layer_size=256,
num_layers=2,
Expand Down Expand Up @@ -87,35 +90,11 @@
search_space, default_parameters=variant,
)

n_seeds = 1
mode = 'local'
exp_prefix = 'dev-{}'.format(
__file__.replace('/', '-').replace('_', '-').split('.')[0]
)

# n_seeds = 3
# mode = 'gcp'
# exp_prefix = 'skew-fit-pickup-reference-post-refactor'
variants = []
for variant in sweeper.iterate_hyperparameters():
variants.append(variant)

for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
for _ in range(n_seeds):
run_experiment(
experiment,
exp_prefix=exp_prefix,
mode=mode,
variant=variant,
use_gpu=use_gpu,
snapshot_gap=200,
snapshot_mode='gap_and_last',
num_exps_per_instance=3,
gcp_kwargs=dict(
zone='us-west1-b',
),
run_variants(experiment, variants, process_args)

)

# variants = []
# for variant in sweeper.iterate_hyperparameters():
# variants.append(variant)

# run_variants(experiment, variants, process_args)
if __name__ == "__main__":
main()
69 changes: 41 additions & 28 deletions examples/ddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,24 @@
import rlkit.torch.pytorch_util as ptu
from rlkit.torch.torch_rl_algorithm import TorchBatchRLAlgorithm

import random
import torch
import numpy as np

def experiment(variant):
eval_env = NormalizedBoxEnv(HalfCheetahEnv())
expl_env = NormalizedBoxEnv(HalfCheetahEnv())
# Or for a specific version:
# import gym
# env = NormalizedBoxEnv(gym.make('HalfCheetah-v1'))

seed = variant["seed"]
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
eval_env.seed(seed)
expl_env.seed(seed)

obs_dim = eval_env.observation_space.low.size
action_dim = eval_env.action_space.low.size
qf = ConcatMlp(
Expand Down Expand Up @@ -65,34 +76,36 @@ def experiment(variant):
algorithm.to(ptu.device)
algorithm.train()

variant = dict(
algorithm_kwargs=dict(
num_epochs=1000,
num_eval_steps_per_epoch=1000,
num_trains_per_train_loop=1000,
num_expl_steps_per_train_loop=1000,
min_num_steps_before_training=10000,
max_path_length=1000,
batch_size=128,
),
trainer_kwargs=dict(
use_soft_update=True,
tau=1e-2,
discount=0.99,
qf_learning_rate=1e-3,
policy_learning_rate=1e-4,
),
qf_kwargs=dict(
hidden_sizes=[400, 300],
),
policy_kwargs=dict(
hidden_sizes=[400, 300],
),
replay_buffer_size=int(1E6),
seed=random.randint(0, 100000),
)

if __name__ == "__main__":
# noinspection PyTypeChecker
variant = dict(
algorithm_kwargs=dict(
num_epochs=1000,
num_eval_steps_per_epoch=1000,
num_trains_per_train_loop=1000,
num_expl_steps_per_train_loop=1000,
min_num_steps_before_training=10000,
max_path_length=1000,
batch_size=128,
),
trainer_kwargs=dict(
use_soft_update=True,
tau=1e-2,
discount=0.99,
qf_learning_rate=1e-3,
policy_learning_rate=1e-4,
),
qf_kwargs=dict(
hidden_sizes=[400, 300],
),
policy_kwargs=dict(
hidden_sizes=[400, 300],
),
replay_buffer_size=int(1E6),
)
# ptu.set_gpu_mode(True) # optionally set the GPU (default=False)
def main():
setup_logger('name-of-experiment', variant=variant)
experiment(variant)

if __name__ == "__main__":
main()
55 changes: 34 additions & 21 deletions examples/dqn_and_double_dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,23 @@
from rlkit.samplers.data_collector import MdpPathCollector
from rlkit.torch.torch_rl_algorithm import TorchBatchRLAlgorithm

import random
import torch
import numpy as np

def experiment(variant):
expl_env = gym.make('CartPole-v0').env
eval_env = gym.make('CartPole-v0').env
obs_dim = expl_env.observation_space.low.size
action_dim = eval_env.action_space.n

seed = variant["seed"]
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
eval_env.seed(seed)
expl_env.seed(seed)

qf = Mlp(
hidden_sizes=[32, 32],
input_size=obs_dim,
Expand Down Expand Up @@ -70,28 +80,31 @@ def experiment(variant):
algorithm.to(ptu.device)
algorithm.train()

variant = dict(
algorithm="DQN",
version="normal",
layer_size=256,
replay_buffer_size=int(1E6),
algorithm_kwargs=dict(
num_epochs=3000,
num_eval_steps_per_epoch=5000,
num_trains_per_train_loop=1000,
num_expl_steps_per_train_loop=1000,
min_num_steps_before_training=1000,
max_path_length=1000,
batch_size=256,
),
trainer_kwargs=dict(
discount=0.99,
learning_rate=3E-4,
),
seed=random.randint(0, 100000),
)

if __name__ == "__main__":
# noinspection PyTypeChecker
variant = dict(
algorithm="DQN",
version="normal",
layer_size=256,
replay_buffer_size=int(1E6),
algorithm_kwargs=dict(
num_epochs=3000,
num_eval_steps_per_epoch=5000,
num_trains_per_train_loop=1000,
num_expl_steps_per_train_loop=1000,
min_num_steps_before_training=1000,
max_path_length=1000,
batch_size=256,
),
trainer_kwargs=dict(
discount=0.99,
learning_rate=3E-4,
),
)
def main():
setup_logger('dqn-CartPole', variant=variant)
# ptu.set_gpu_mode(True) # optionally set the GPU (default=False)
experiment(variant)

if __name__ == "__main__":
main()
Empty file added examples/her/__init__.py
Empty file.
Loading