add test

Signed-off-by: Matteo Bettini <[email protected]>
facebookresearch · Sep 21, 2023 · 1678fdc · 1678fdc
1 parent 3b4aa6a
commit 1678fdc
Show file tree

Hide file tree

Showing 14 changed files with 171 additions and 35 deletions.
diff --git a/.github/unittest/install_pettingzoo.sh b/.github/unittest/install_pettingzoo.sh
@@ -0,0 +1,2 @@
+
+pip install "pettingzoo[all]==1.24.1"
diff --git a/.github/workflows/pettingzoo_tests.yml b/.github/workflows/pettingzoo_tests.yml
@@ -0,0 +1,39 @@
+# This workflow will install Python dependencies, run tests and lint with a single version of Python
+# For more information see:
+# https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+
+name: vmas_tests
+
+on:
+  push:
+    branches: [ $default-branch , "main" ]
+  pull_request:
+    branches: [ $default-branch , "main" ]
+
+permissions:
+  contents: read
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.8", "3.9", "3.10"]
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+       bash .github/unittest/install_dependencies.sh
+    - name: Install pettingzoo
+      run: |
+       bash .github/unittest/install_pettingzoo.sh
+    - name: Test with pytest
+      run: |
+        pytest test/test_pettingzoo.py --doctest-modules --junitxml=junit/test-results.xml --cov=. --cov-report=xml --cov-report=html
diff --git a/benchmarl/algorithms/iddpg.py b/benchmarl/algorithms/iddpg.py
@@ -142,7 +142,7 @@ def _get_policy_for_loss(
 
             policy = ProbabilisticActor(
                 module=actor_module,
-                spec=self.action_spec,
+                spec=self.action_spec[group, "action"],
                 in_keys=[(group, "param")],
                 out_keys=[(group, "action")],
                 distribution_class=TanhDelta,

diff --git a/benchmarl/algorithms/ippo.py b/benchmarl/algorithms/ippo.py
@@ -157,7 +157,7 @@ def _get_policy_for_loss(
             )
             policy = ProbabilisticActor(
                 module=TensorDictSequential(actor_module, extractor_module),
-                spec=self.action_spec[(group, "action")],
+                spec=self.action_spec[group, "action"],
                 in_keys=[(group, "loc"), (group, "scale")],
                 out_keys=[(group, "action")],
                 distribution_class=TanhNormal,
@@ -173,7 +173,7 @@ def _get_policy_for_loss(
             if self.action_mask_spec is None:
                 policy = ProbabilisticActor(
                     module=actor_module,
-                    spec=self.action_spec,
+                    spec=self.action_spec[group, "action"],
                     in_keys=[(group, "logits")],
                     out_keys=[(group, "action")],
                     distribution_class=Categorical,
@@ -183,7 +183,7 @@ def _get_policy_for_loss(
             else:
                 policy = ProbabilisticActor(
                     module=actor_module,
-                    spec=self.action_spec,
+                    spec=self.action_spec[group, "action"],
                     in_keys={
                         "logits": (group, "logits"),
                         "mask": (group, "action_mask"),

diff --git a/benchmarl/algorithms/iql.py b/benchmarl/algorithms/iql.py
@@ -62,7 +62,7 @@ def _get_loss(
                 policy_for_loss,
                 delay_value=self.delay_value,
                 loss_function=self.loss_function,
-                action_space=self.action_spec,
+                action_space=self.action_spec[group, "action"],
             )
             loss_module.set_keys(
                 reward=(group, "reward"),
@@ -142,7 +142,7 @@ def _get_policy_for_loss(
                 (group, "action_value"),
                 (group, "chosen_action_value"),
             ],
-            spec=self.action_spec,
+            spec=self.action_spec[group, "action"],
             action_space=None,
         )
 

diff --git a/benchmarl/algorithms/isac.py b/benchmarl/algorithms/isac.py
@@ -91,7 +91,7 @@ def _get_loss(
                 alpha_init=self.alpha_init,
                 min_alpha=self.min_alpha,
                 max_alpha=self.max_alpha,
-                action_spec=self.action_spec,
+                action_spec=self.action_spec[group, "action"],
                 fixed_alpha=self.fixed_alpha,
                 target_entropy=self.target_entropy,
                 delay_qvalue=self.delay_qvalue,
@@ -113,7 +113,7 @@ def _get_loss(
                 alpha_init=self.alpha_init,
                 min_alpha=self.min_alpha,
                 max_alpha=self.max_alpha,
-                action_space=self.action_spec,
+                action_space=self.action_spec[group, "action"],
                 fixed_alpha=self.fixed_alpha,
                 target_entropy=self.target_entropy,
                 delay_qvalue=self.delay_qvalue,
@@ -202,7 +202,7 @@ def _get_policy_for_loss(
             )
             policy = ProbabilisticActor(
                 module=TensorDictSequential(actor_module, extractor_module),
-                spec=self.action_spec,
+                spec=self.action_spec[group, "action"],
                 in_keys=[(group, "loc"), (group, "scale")],
                 out_keys=[(group, "action")],
                 distribution_class=TanhNormal,
@@ -218,7 +218,7 @@ def _get_policy_for_loss(
             if self.action_mask_spec is None:
                 policy = ProbabilisticActor(
                     module=actor_module,
-                    spec=self.action_spec,
+                    spec=self.action_spec[group, "action"],
                     in_keys=[(group, "logits")],
                     out_keys=[(group, "action")],
                     distribution_class=Categorical,
@@ -228,7 +228,7 @@ def _get_policy_for_loss(
             else:
                 policy = ProbabilisticActor(
                     module=actor_module,
-                    spec=self.action_spec,
+                    spec=self.action_spec[group, "action"],
                     in_keys={
                         "logits": (group, "logits"),
                         "mask": (group, "action_mask"),

diff --git a/benchmarl/algorithms/maddpg.py b/benchmarl/algorithms/maddpg.py
@@ -142,7 +142,7 @@ def _get_policy_for_loss(
 
             policy = ProbabilisticActor(
                 module=actor_module,
-                spec=self.action_spec,
+                spec=self.action_spec[group, "action"],
                 in_keys=[(group, "param")],
                 out_keys=[(group, "action")],
                 distribution_class=TanhDelta,
@@ -220,7 +220,7 @@ def get_value_module(self, group: str) -> TensorDictModule:
             modules.append(
                 TensorDictModule(
                     lambda state, action: torch.cat(
-                        [state, action.view(action.shape[:-2], -1)], dim=-1
+                        [state, action.view(*action.shape[:-2], -1)], dim=-1
                     ),
                     in_keys=["state", (group, "action")],
                     out_keys=["state_action"],

diff --git a/benchmarl/algorithms/mappo.py b/benchmarl/algorithms/mappo.py
@@ -156,7 +156,7 @@ def _get_policy_for_loss(
             )
             policy = ProbabilisticActor(
                 module=TensorDictSequential(actor_module, extractor_module),
-                spec=self.action_spec,
+                spec=self.action_spec[group, "action"],
                 in_keys=[(group, "loc"), (group, "scale")],
                 out_keys=[(group, "action")],
                 distribution_class=TanhNormal,
@@ -172,7 +172,7 @@ def _get_policy_for_loss(
             if self.action_mask_spec is None:
                 policy = ProbabilisticActor(
                     module=actor_module,
-                    spec=self.action_spec,
+                    spec=self.action_spec[group, "action"],
                     in_keys=[(group, "logits")],
                     out_keys=[(group, "action")],
                     distribution_class=Categorical,
@@ -182,7 +182,7 @@ def _get_policy_for_loss(
             else:
                 policy = ProbabilisticActor(
                     module=actor_module,
-                    spec=self.action_spec,
+                    spec=self.action_spec[group, "action"],
                     in_keys={
                         "logits": (group, "logits"),
                         "mask": (group, "action_mask"),

diff --git a/benchmarl/algorithms/masac.py b/benchmarl/algorithms/masac.py
@@ -91,7 +91,7 @@ def _get_loss(
                 alpha_init=self.alpha_init,
                 min_alpha=self.min_alpha,
                 max_alpha=self.max_alpha,
-                action_spec=self.action_spec,
+                action_spec=self.action_spec[group, "action"],
                 fixed_alpha=self.fixed_alpha,
                 target_entropy=self.target_entropy,
                 delay_qvalue=self.delay_qvalue,
@@ -113,7 +113,7 @@ def _get_loss(
                 alpha_init=self.alpha_init,
                 min_alpha=self.min_alpha,
                 max_alpha=self.max_alpha,
-                action_space=self.action_spec,
+                action_space=self.action_spec[group, "action"],
                 fixed_alpha=self.fixed_alpha,
                 target_entropy=self.target_entropy,
                 delay_qvalue=self.delay_qvalue,
@@ -202,7 +202,7 @@ def _get_policy_for_loss(
             )
             policy = ProbabilisticActor(
                 module=TensorDictSequential(actor_module, extractor_module),
-                spec=self.action_spec,
+                spec=self.action_spec[group, "action"],
                 in_keys=[(group, "loc"), (group, "scale")],
                 out_keys=[(group, "action")],
                 distribution_class=TanhNormal,
@@ -218,7 +218,7 @@ def _get_policy_for_loss(
             if self.action_mask_spec is None:
                 policy = ProbabilisticActor(
                     module=actor_module,
-                    spec=self.action_spec,
+                    spec=self.action_spec[group, "action"],
                     in_keys=[(group, "logits")],
                     out_keys=[(group, "action")],
                     distribution_class=Categorical,
@@ -228,7 +228,7 @@ def _get_policy_for_loss(
             else:
                 policy = ProbabilisticActor(
                     module=actor_module,
-                    spec=self.action_spec,
+                    spec=self.action_spec[group, "action"],
                     in_keys={
                         "logits": (group, "logits"),
                         "mask": (group, "action_mask"),
@@ -364,7 +364,7 @@ def get_continuous_value_module(self, group: str) -> TensorDictModule:
             modules.append(
                 TensorDictModule(
                     lambda state, action: torch.cat(
-                        [state, action.view(action.shape[:-2], -1)], dim=-1
+                        [state, action.view(*action.shape[:-2], -1)], dim=-1
                     ),
                     in_keys=["state", (group, "action")],
                     out_keys=["state_action"],

diff --git a/benchmarl/algorithms/qmix.py b/benchmarl/algorithms/qmix.py
@@ -66,7 +66,7 @@ def _get_loss(
                 self.get_mixer(group),
                 delay_value=self.delay_value,
                 loss_function=self.loss_function,
-                action_space=self.action_spec,
+                action_space=self.action_spec[group, "action"],
             )
             loss_module.set_keys(
                 reward="reward",
@@ -147,7 +147,7 @@ def _get_policy_for_loss(
                 (group, "action_value"),
                 (group, "chosen_action_value"),
             ],
-            spec=self.action_spec,
+            spec=self.action_spec[group, "action"],
             action_space=None,
         )
 
@@ -180,7 +180,7 @@ def process_batch(self, group: str, batch: TensorDictBase) -> TensorDictBase:
         if done_key not in keys:
             batch.set(
                 done_key,
-                batch.get(("next", group, "done")).mean(-2),
+                batch.get(("next", group, "done")).any(-2),
             )
 
         if reward_key not in keys:

diff --git a/benchmarl/algorithms/vdn.py b/benchmarl/algorithms/vdn.py
@@ -63,7 +63,7 @@ def _get_loss(
                 self.get_mixer(group),
                 delay_value=self.delay_value,
                 loss_function=self.loss_function,
-                action_space=self.action_spec,
+                action_space=self.action_spec[group, "action"],
             )
             loss_module.set_keys(
                 reward="reward",
@@ -144,7 +144,7 @@ def _get_policy_for_loss(
                 (group, "action_value"),
                 (group, "chosen_action_value"),
             ],
-            spec=self.action_spec,
+            spec=self.action_spec[group, "action"],
             action_space=None,
         )
 
@@ -177,7 +177,7 @@ def process_batch(self, group: str, batch: TensorDictBase) -> TensorDictBase:
         if done_key not in keys:
             batch.set(
                 done_key,
-                batch.get(("next", group, "done")).mean(-2),
+                batch.get(("next", group, "done")).any(-2),
             )
 
         if reward_key not in keys:

diff --git a/benchmarl/environments/pettingzoo/common.py b/benchmarl/environments/pettingzoo/common.py
@@ -43,7 +43,7 @@ def has_state(self) -> bool:
             return True
         return False
 
-    def has_render(self) -> bool:
+    def has_render(self, env: EnvBase) -> bool:
         return True
 
     def max_steps(self, env: EnvBase) -> bool:
@@ -66,8 +66,11 @@ def action_mask_spec(self, env: EnvBase) -> Optional[CompositeSpec]:
                     del group_obs_spec[key]
             if group_obs_spec.is_empty():
                 del observation_spec[group]
+        if "state" in observation_spec.keys():
+            del observation_spec["state"]
         if observation_spec.is_empty():
             return None
+
         return observation_spec
 
     def observation_spec(self, env: EnvBase) -> CompositeSpec:
@@ -95,8 +98,3 @@ def action_spec(self, env: EnvBase) -> CompositeSpec:
     @staticmethod
     def env_name() -> str:
         return "pettingzoo"
-
-
-if __name__ == "__main__":
-    print(PettingZooTask.MULTIWALKER.get_from_yaml())
-    print(PettingZooTask.MULTIWALKER.get_from_yaml().get_env_fun(1, True, seed=0)())
diff --git a/setup.py b/setup.py
@@ -8,6 +8,9 @@
     author="Matteo Bettini",
     author_email="[email protected]",
     packages=find_packages(),
-    install_requires=["torchrl", "tqdm"],
+    install_requires=["torchrl", "tqdm", "hydra-core"],
+    extras_require={
+        "tasks": ["vmas>=1.2.10", "pettingzoo[all]>=1.24.1"],
+    },
     include_package_data=True,
 )