Skip to content

Commit

Permalink
add test
Browse files Browse the repository at this point in the history
Signed-off-by: Matteo Bettini <[email protected]>
  • Loading branch information
matteobettini committed Sep 21, 2023
1 parent 3b4aa6a commit 1678fdc
Show file tree
Hide file tree
Showing 14 changed files with 171 additions and 35 deletions.
2 changes: 2 additions & 0 deletions .github/unittest/install_pettingzoo.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@

pip install "pettingzoo[all]==1.24.1"
39 changes: 39 additions & 0 deletions .github/workflows/pettingzoo_tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see:
# https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions


name: vmas_tests

on:
push:
branches: [ $default-branch , "main" ]
pull_request:
branches: [ $default-branch , "main" ]

permissions:
contents: read

jobs:
build:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10"]

steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
bash .github/unittest/install_dependencies.sh
- name: Install pettingzoo
run: |
bash .github/unittest/install_pettingzoo.sh
- name: Test with pytest
run: |
pytest test/test_pettingzoo.py --doctest-modules --junitxml=junit/test-results.xml --cov=. --cov-report=xml --cov-report=html
2 changes: 1 addition & 1 deletion benchmarl/algorithms/iddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def _get_policy_for_loss(

policy = ProbabilisticActor(
module=actor_module,
spec=self.action_spec,
spec=self.action_spec[group, "action"],
in_keys=[(group, "param")],
out_keys=[(group, "action")],
distribution_class=TanhDelta,
Expand Down
6 changes: 3 additions & 3 deletions benchmarl/algorithms/ippo.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def _get_policy_for_loss(
)
policy = ProbabilisticActor(
module=TensorDictSequential(actor_module, extractor_module),
spec=self.action_spec[(group, "action")],
spec=self.action_spec[group, "action"],
in_keys=[(group, "loc"), (group, "scale")],
out_keys=[(group, "action")],
distribution_class=TanhNormal,
Expand All @@ -173,7 +173,7 @@ def _get_policy_for_loss(
if self.action_mask_spec is None:
policy = ProbabilisticActor(
module=actor_module,
spec=self.action_spec,
spec=self.action_spec[group, "action"],
in_keys=[(group, "logits")],
out_keys=[(group, "action")],
distribution_class=Categorical,
Expand All @@ -183,7 +183,7 @@ def _get_policy_for_loss(
else:
policy = ProbabilisticActor(
module=actor_module,
spec=self.action_spec,
spec=self.action_spec[group, "action"],
in_keys={
"logits": (group, "logits"),
"mask": (group, "action_mask"),
Expand Down
4 changes: 2 additions & 2 deletions benchmarl/algorithms/iql.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def _get_loss(
policy_for_loss,
delay_value=self.delay_value,
loss_function=self.loss_function,
action_space=self.action_spec,
action_space=self.action_spec[group, "action"],
)
loss_module.set_keys(
reward=(group, "reward"),
Expand Down Expand Up @@ -142,7 +142,7 @@ def _get_policy_for_loss(
(group, "action_value"),
(group, "chosen_action_value"),
],
spec=self.action_spec,
spec=self.action_spec[group, "action"],
action_space=None,
)

Expand Down
10 changes: 5 additions & 5 deletions benchmarl/algorithms/isac.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def _get_loss(
alpha_init=self.alpha_init,
min_alpha=self.min_alpha,
max_alpha=self.max_alpha,
action_spec=self.action_spec,
action_spec=self.action_spec[group, "action"],
fixed_alpha=self.fixed_alpha,
target_entropy=self.target_entropy,
delay_qvalue=self.delay_qvalue,
Expand All @@ -113,7 +113,7 @@ def _get_loss(
alpha_init=self.alpha_init,
min_alpha=self.min_alpha,
max_alpha=self.max_alpha,
action_space=self.action_spec,
action_space=self.action_spec[group, "action"],
fixed_alpha=self.fixed_alpha,
target_entropy=self.target_entropy,
delay_qvalue=self.delay_qvalue,
Expand Down Expand Up @@ -202,7 +202,7 @@ def _get_policy_for_loss(
)
policy = ProbabilisticActor(
module=TensorDictSequential(actor_module, extractor_module),
spec=self.action_spec,
spec=self.action_spec[group, "action"],
in_keys=[(group, "loc"), (group, "scale")],
out_keys=[(group, "action")],
distribution_class=TanhNormal,
Expand All @@ -218,7 +218,7 @@ def _get_policy_for_loss(
if self.action_mask_spec is None:
policy = ProbabilisticActor(
module=actor_module,
spec=self.action_spec,
spec=self.action_spec[group, "action"],
in_keys=[(group, "logits")],
out_keys=[(group, "action")],
distribution_class=Categorical,
Expand All @@ -228,7 +228,7 @@ def _get_policy_for_loss(
else:
policy = ProbabilisticActor(
module=actor_module,
spec=self.action_spec,
spec=self.action_spec[group, "action"],
in_keys={
"logits": (group, "logits"),
"mask": (group, "action_mask"),
Expand Down
4 changes: 2 additions & 2 deletions benchmarl/algorithms/maddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def _get_policy_for_loss(

policy = ProbabilisticActor(
module=actor_module,
spec=self.action_spec,
spec=self.action_spec[group, "action"],
in_keys=[(group, "param")],
out_keys=[(group, "action")],
distribution_class=TanhDelta,
Expand Down Expand Up @@ -220,7 +220,7 @@ def get_value_module(self, group: str) -> TensorDictModule:
modules.append(
TensorDictModule(
lambda state, action: torch.cat(
[state, action.view(action.shape[:-2], -1)], dim=-1
[state, action.view(*action.shape[:-2], -1)], dim=-1
),
in_keys=["state", (group, "action")],
out_keys=["state_action"],
Expand Down
6 changes: 3 additions & 3 deletions benchmarl/algorithms/mappo.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def _get_policy_for_loss(
)
policy = ProbabilisticActor(
module=TensorDictSequential(actor_module, extractor_module),
spec=self.action_spec,
spec=self.action_spec[group, "action"],
in_keys=[(group, "loc"), (group, "scale")],
out_keys=[(group, "action")],
distribution_class=TanhNormal,
Expand All @@ -172,7 +172,7 @@ def _get_policy_for_loss(
if self.action_mask_spec is None:
policy = ProbabilisticActor(
module=actor_module,
spec=self.action_spec,
spec=self.action_spec[group, "action"],
in_keys=[(group, "logits")],
out_keys=[(group, "action")],
distribution_class=Categorical,
Expand All @@ -182,7 +182,7 @@ def _get_policy_for_loss(
else:
policy = ProbabilisticActor(
module=actor_module,
spec=self.action_spec,
spec=self.action_spec[group, "action"],
in_keys={
"logits": (group, "logits"),
"mask": (group, "action_mask"),
Expand Down
12 changes: 6 additions & 6 deletions benchmarl/algorithms/masac.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def _get_loss(
alpha_init=self.alpha_init,
min_alpha=self.min_alpha,
max_alpha=self.max_alpha,
action_spec=self.action_spec,
action_spec=self.action_spec[group, "action"],
fixed_alpha=self.fixed_alpha,
target_entropy=self.target_entropy,
delay_qvalue=self.delay_qvalue,
Expand All @@ -113,7 +113,7 @@ def _get_loss(
alpha_init=self.alpha_init,
min_alpha=self.min_alpha,
max_alpha=self.max_alpha,
action_space=self.action_spec,
action_space=self.action_spec[group, "action"],
fixed_alpha=self.fixed_alpha,
target_entropy=self.target_entropy,
delay_qvalue=self.delay_qvalue,
Expand Down Expand Up @@ -202,7 +202,7 @@ def _get_policy_for_loss(
)
policy = ProbabilisticActor(
module=TensorDictSequential(actor_module, extractor_module),
spec=self.action_spec,
spec=self.action_spec[group, "action"],
in_keys=[(group, "loc"), (group, "scale")],
out_keys=[(group, "action")],
distribution_class=TanhNormal,
Expand All @@ -218,7 +218,7 @@ def _get_policy_for_loss(
if self.action_mask_spec is None:
policy = ProbabilisticActor(
module=actor_module,
spec=self.action_spec,
spec=self.action_spec[group, "action"],
in_keys=[(group, "logits")],
out_keys=[(group, "action")],
distribution_class=Categorical,
Expand All @@ -228,7 +228,7 @@ def _get_policy_for_loss(
else:
policy = ProbabilisticActor(
module=actor_module,
spec=self.action_spec,
spec=self.action_spec[group, "action"],
in_keys={
"logits": (group, "logits"),
"mask": (group, "action_mask"),
Expand Down Expand Up @@ -364,7 +364,7 @@ def get_continuous_value_module(self, group: str) -> TensorDictModule:
modules.append(
TensorDictModule(
lambda state, action: torch.cat(
[state, action.view(action.shape[:-2], -1)], dim=-1
[state, action.view(*action.shape[:-2], -1)], dim=-1
),
in_keys=["state", (group, "action")],
out_keys=["state_action"],
Expand Down
6 changes: 3 additions & 3 deletions benchmarl/algorithms/qmix.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def _get_loss(
self.get_mixer(group),
delay_value=self.delay_value,
loss_function=self.loss_function,
action_space=self.action_spec,
action_space=self.action_spec[group, "action"],
)
loss_module.set_keys(
reward="reward",
Expand Down Expand Up @@ -147,7 +147,7 @@ def _get_policy_for_loss(
(group, "action_value"),
(group, "chosen_action_value"),
],
spec=self.action_spec,
spec=self.action_spec[group, "action"],
action_space=None,
)

Expand Down Expand Up @@ -180,7 +180,7 @@ def process_batch(self, group: str, batch: TensorDictBase) -> TensorDictBase:
if done_key not in keys:
batch.set(
done_key,
batch.get(("next", group, "done")).mean(-2),
batch.get(("next", group, "done")).any(-2),
)

if reward_key not in keys:
Expand Down
6 changes: 3 additions & 3 deletions benchmarl/algorithms/vdn.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def _get_loss(
self.get_mixer(group),
delay_value=self.delay_value,
loss_function=self.loss_function,
action_space=self.action_spec,
action_space=self.action_spec[group, "action"],
)
loss_module.set_keys(
reward="reward",
Expand Down Expand Up @@ -144,7 +144,7 @@ def _get_policy_for_loss(
(group, "action_value"),
(group, "chosen_action_value"),
],
spec=self.action_spec,
spec=self.action_spec[group, "action"],
action_space=None,
)

Expand Down Expand Up @@ -177,7 +177,7 @@ def process_batch(self, group: str, batch: TensorDictBase) -> TensorDictBase:
if done_key not in keys:
batch.set(
done_key,
batch.get(("next", group, "done")).mean(-2),
batch.get(("next", group, "done")).any(-2),
)

if reward_key not in keys:
Expand Down
10 changes: 4 additions & 6 deletions benchmarl/environments/pettingzoo/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def has_state(self) -> bool:
return True
return False

def has_render(self) -> bool:
def has_render(self, env: EnvBase) -> bool:
return True

def max_steps(self, env: EnvBase) -> bool:
Expand All @@ -66,8 +66,11 @@ def action_mask_spec(self, env: EnvBase) -> Optional[CompositeSpec]:
del group_obs_spec[key]
if group_obs_spec.is_empty():
del observation_spec[group]
if "state" in observation_spec.keys():
del observation_spec["state"]
if observation_spec.is_empty():
return None

return observation_spec

def observation_spec(self, env: EnvBase) -> CompositeSpec:
Expand Down Expand Up @@ -95,8 +98,3 @@ def action_spec(self, env: EnvBase) -> CompositeSpec:
@staticmethod
def env_name() -> str:
return "pettingzoo"


if __name__ == "__main__":
print(PettingZooTask.MULTIWALKER.get_from_yaml())
print(PettingZooTask.MULTIWALKER.get_from_yaml().get_env_fun(1, True, seed=0)())
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
author="Matteo Bettini",
author_email="[email protected]",
packages=find_packages(),
install_requires=["torchrl", "tqdm"],
install_requires=["torchrl", "tqdm", "hydra-core"],
extras_require={
"tasks": ["vmas>=1.2.10", "pettingzoo[all]>=1.24.1"],
},
include_package_data=True,
)
Loading

0 comments on commit 1678fdc

Please sign in to comment.