Skip to content

Commit

Permalink
[Environment] Melitngpot (#75)
Browse files Browse the repository at this point in the history
  • Loading branch information
matteobettini authored Apr 18, 2024
1 parent 6296a1f commit 3439abe
Show file tree
Hide file tree
Showing 79 changed files with 486 additions and 58 deletions.
2 changes: 2 additions & 0 deletions .github/unittest/install_meltingpot.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@

pip install dm-meltingpot
2 changes: 1 addition & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.10"]
python-version: ["3.11"]

steps:
- uses: actions/checkout@v3
Expand Down
43 changes: 43 additions & 0 deletions .github/workflows/meltingpot_tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see:
# https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions


name: meltingpot_tests

on:
push:
branches: [ $default-branch , "main" ]
pull_request:
branches: [ $default-branch , "main" ]

permissions:
contents: read

jobs:
tests:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.11"]

steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
bash .github/unittest/install_dependencies_nightly.sh
- name: Install meltingpot
run: |
bash .github/unittest/install_meltingpot.sh
- name: Test with pytest
run: |
pytest test/test_meltingpot.py --doctest-modules --junitxml=junit/test-results.xml --cov=. --cov-report=xml --cov-report=html
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
fail_ci_if_error: false
6 changes: 2 additions & 4 deletions .github/workflows/pettingzoo_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10"]
python-version: ["3.11"]

steps:
- uses: actions/checkout@v3
Expand All @@ -37,9 +37,7 @@ jobs:
- name: Test with pytest
run: |
xvfb-run -s "-screen 0 1024x768x24" pytest test/test_pettingzoo.py --doctest-modules --junitxml=junit/test-results.xml --cov=. --cov-report=xml --cov-report=html
- if: matrix.python-version == '3.10'
name: Upload coverage to Codecov
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
fail_ci_if_error: false
5 changes: 2 additions & 3 deletions .github/workflows/smacv2_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.10"]
python-version: ["3.11"]

steps:
- uses: actions/checkout@v3
Expand All @@ -44,8 +44,7 @@ jobs:
pytest test/test_smacv2.py --doctest-modules --junitxml=junit/test-results.xml --cov=. --cov-report=xml --cov-report=html
- if: matrix.python-version == '3.10'
name: Upload coverage to Codecov
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
fail_ci_if_error: false
4 changes: 2 additions & 2 deletions .github/workflows/torchrl_stable_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.10"]
python-version: ["3.11"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
Expand All @@ -38,4 +38,4 @@ jobs:
bash .github/unittest/install_pettingzoo.sh
- name: Tests
run: |
xvfb-run -s "-screen 0 1024x768x24" pytest test/test_algorithm.py test/test_models.py test/test_task.py test/test_vmas.py test/test_pettingzoo.py --doctest-modules --junitxml=junit/test-results.xml --cov=. --cov-report=xml --cov-report=html
xvfb-run -s "-screen 0 1024x768x24" pytest test/test_algorithm.py test/test_models.py test/test_task.py test/test_vmas.py test/test_pettingzoo.py test/test_meltingpot.py --doctest-modules --junitxml=junit/test-results.xml --cov=. --cov-report=xml --cov-report=html
2 changes: 1 addition & 1 deletion .github/workflows/unit_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10"]
python-version: ["3.8", "3.9", "3.10","3.11"]

steps:
- uses: actions/checkout@v3
Expand Down
5 changes: 2 additions & 3 deletions .github/workflows/vmas_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10"]
python-version: ["3.11"]

steps:
- uses: actions/checkout@v3
Expand All @@ -38,8 +38,7 @@ jobs:
run: |
xvfb-run -s "-screen 0 1024x768x24" pytest test/test_vmas.py --doctest-modules --junitxml=junit/test-results.xml --cov=. --cov-report=xml --cov-report=html
- if: matrix.python-version == '3.10'
name: Upload coverage to Codecov
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
fail_ci_if_error: false
20 changes: 13 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
[![tests](https://github.com/facebookresearch/BenchMARL/actions/workflows/unit_tests.yml/badge.svg)](test)
[![codecov](https://codecov.io/github/facebookresearch/BenchMARL/coverage.svg?branch=main)](https://codecov.io/gh/facebookresearch/BenchMARL)
[![Documentation Status](https://readthedocs.org/projects/benchmarl/badge/?version=latest)](https://benchmarl.readthedocs.io/en/latest/?badge=latest)
[![Python](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10-blue.svg)](https://www.python.org/downloads/)
[![Python](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue.svg)](https://www.python.org/downloads/)
<a href="https://pypi.org/project/benchmarl"><img src="https://img.shields.io/pypi/v/benchmarl" alt="pypi version"></a>
[![Downloads](https://static.pepy.tech/personalized-badge/benchmarl?period=total&units=international_system&left_color=grey&right_color=blue&left_text=Downloads)](https://pepy.tech/project/benchmarl)
[![Discord Shield](https://dcbadge.vercel.app/api/server/jEEWCn6T3p?style=flat)](https://discord.gg/jEEWCn6T3p)
Expand Down Expand Up @@ -113,6 +113,10 @@ pip install vmas
pip install "pettingzoo[all]"
```

##### MeltingPot
```bash
pip install dm-meltingpot
```
##### SMACv2

Follow the instructions on the environment [repository](https://github.com/oxwhirl/smacv2).
Expand Down Expand Up @@ -236,12 +240,14 @@ determine the training strategy. Here is a table with the currently implemented
challenge to solve.
They differ based on many aspects, here is a table with the current environments in BenchMARL

| Environment | Tasks | Cooperation | Global state | Reward function | Action space | Vectorized |
|--------------------------------------------------------------------|-------------------------------------|---------------------------|--------------|-------------------------------|-----------------------|:----------------:|
| [VMAS](https://github.com/proroklab/VectorizedMultiAgentSimulator) | [18](benchmarl/conf/task/vmas) | Cooperative + Competitive | No | Shared + Independent + Global | Continuous + Discrete | Yes |
| [SMACv2](https://github.com/oxwhirl/smacv2) | [15](benchmarl/conf/task/smacv2) | Cooperative | Yes | Global | Discrete | No |
| [MPE](https://github.com/openai/multiagent-particle-envs) | [8](benchmarl/conf/task/pettingzoo) | Cooperative + Competitive | Yes | Shared + Independent | Continuous + Discrete | No |
| [SISL](https://github.com/sisl/MADRL) | [2](benchmarl/conf/task/pettingzoo) | Cooperative | No | Shared | Continuous | No |
| Environment | Tasks | Cooperation | Global state | Reward function | Action space | Vectorized |
|--------------------------------------------------------------------|--------------------------------------|---------------------------|--------------|-------------------------------|-----------------------|:----------------:|
| [VMAS](https://github.com/proroklab/VectorizedMultiAgentSimulator) | [18](benchmarl/conf/task/vmas) | Cooperative + Competitive | No | Shared + Independent + Global | Continuous + Discrete | Yes |
| [SMACv2](https://github.com/oxwhirl/smacv2) | [15](benchmarl/conf/task/smacv2) | Cooperative | Yes | Global | Discrete | No |
| [MPE](https://github.com/openai/multiagent-particle-envs) | [8](benchmarl/conf/task/pettingzoo) | Cooperative + Competitive | Yes | Shared + Independent | Continuous + Discrete | No |
| [SISL](https://github.com/sisl/MADRL) | [2](benchmarl/conf/task/pettingzoo) | Cooperative | No | Shared | Continuous | No |
| [MeltingPot](https://github.com/google-deepmind/meltingpot) | [49](benchmarl/conf/task/meltingpot) | Cooperative + Competitive | Yes | Independent | Discrete | No |


> [!NOTE]
> BenchMARL uses the [TorchRL MARL API](https://github.com/pytorch/rl/issues/1463) for grouping agents.
Expand Down
8 changes: 5 additions & 3 deletions benchmarl/algorithms/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pathlib
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Any, Dict, Iterable, Optional, Tuple, Type
from typing import Any, Dict, Iterable, List, Optional, Tuple, Type

from tensordict import TensorDictBase
from tensordict.nn import TensorDictModule, TensorDictSequential
Expand All @@ -19,6 +19,7 @@
TensorDictReplayBuffer,
)
from torchrl.data.replay_buffers import RandomSampler, SamplerWithoutReplacement
from torchrl.envs import Compose, Transform
from torchrl.objectives import LossModule
from torchrl.objectives.utils import HardUpdate, SoftUpdate, TargetNetUpdater

Expand Down Expand Up @@ -132,15 +133,15 @@ def get_loss_and_updater(self, group: str) -> Tuple[LossModule, TargetNetUpdater
return self._losses_and_updaters[group]

def get_replay_buffer(
self,
group: str,
self, group: str, transforms: List[Transform] = None
) -> ReplayBuffer:
"""
Get the ReplayBuffer for a specific group.
This function will check ``self.on_policy`` and create the buffer accordingly
Args:
group (str): agent group of the loss and updater
transforms (optional, list of Transform): Transforms to apply to the replay buffer ``.sample()`` call
Returns: ReplayBuffer the group
"""
Expand All @@ -154,6 +155,7 @@ def get_replay_buffer(
sampler=sampler,
batch_size=sampling_size,
priority_key=(group, "td_error"),
transform=Compose(*transforms) if transforms is not None else None,
)

def get_policy_for_loss(self, group: str) -> TensorDictModule:
Expand Down
2 changes: 1 addition & 1 deletion benchmarl/conf/experiment/base_experiment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ evaluation_episodes: 10
evaluation_deterministic_actions: True

# List of loggers to use, options are: wandb, csv, tensorboard, mflow
loggers: [wandb]
loggers: []
# Create a json folder as part of the output in the format of marl-eval
create_json: True

Expand Down
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
5 changes: 3 additions & 2 deletions benchmarl/environments/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@
#

from .common import Task
from .meltingpot.common import MeltingPotTask
from .pettingzoo.common import PettingZooTask
from .smacv2.common import Smacv2Task
from .vmas.common import VmasTask

# This is a registry mapping "envname/task_name" to the EnvNameTask.TASK_NAME enum
# It is used by automatically load task enums from yaml files
task_config_registry = {}
for env in [VmasTask, Smacv2Task, PettingZooTask]:
for env in [VmasTask, Smacv2Task, PettingZooTask, MeltingPotTask]:
env_config_registry = {
f"{env.env_name()}/{task.name.lower()}": task for task in env
}
Expand All @@ -31,14 +32,14 @@
from .pettingzoo.simple_tag import TaskConfig as SimpleTagConfig
from .pettingzoo.simple_world_comm import TaskConfig as SimpleWorldComm
from .pettingzoo.waterworld import TaskConfig as WaterworldConfig

from .vmas.balance import TaskConfig as BalanceConfig
from .vmas.dispersion import TaskConfig as DispersionConfig
from .vmas.dropout import TaskConfig as DropoutConfig
from .vmas.give_way import TaskConfig as GiveWayConfig
from .vmas.navigation import TaskConfig as NavigationConfig
from .vmas.reverse_transport import TaskConfig as ReverseTransportConfig
from .vmas.sampling import TaskConfig as SamplingConfig

from .vmas.simple_adverasary import TaskConfig as VmasSimpleAdversaryConfig
from .vmas.simple_crypto import TaskConfig as VmasSimpleCryptoConfig
from .vmas.simple_push import TaskConfig as VmasSimplePushConfig
Expand Down
26 changes: 24 additions & 2 deletions benchmarl/environments/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,13 @@ def get_env_fun(
num_envs (int): The number of envs that should be in the batch_size of the returned env.
In vectorized envs, this can be used to set the number of batched environments.
If your environment is not vectorized, you can just ignore this, and it will be
wrapped in a torchrl.envs.SerialEnv with num_envs automatically.
wrapped in a :class:`torchrl.envs.SerialEnv` with num_envs automatically.
continuous_actions (bool): Whether your environment should have continuous or discrete actions.
If your environment does not support both, ignore this and refer to the supports_x_actions methods.
seed (optional, int): The seed of your env
device (str): the device of your env, you can pass this to any torchrl env constructor
Returns: a function that takes no arguments and returns a torchrl.envs.EnvBase object
Returns: a function that takes no arguments and returns a :class:`torchrl.envs.EnvBase` object
"""
raise NotImplementedError
Expand Down Expand Up @@ -242,6 +242,28 @@ def get_reward_sum_transform(self, env: EnvBase) -> Transform:
reset_keys = env.reset_keys
return RewardSum(reset_keys=reset_keys)

def get_env_transforms(self, env: EnvBase) -> List[Transform]:
"""
Returns a list of :class:`torchrl.envs.Transform` to be applied to the env.
Args:
env (EnvBase): An environment created via self.get_env_fun
"""
return []

def get_replay_buffer_transforms(self, env: EnvBase) -> List[Transform]:
"""
Returns a list of :class:`torchrl.envs.Transform` to be applied to the :class:`torchrl.data.ReplayBuffer`.
Args:
env (EnvBase): An environment created via self.get_env_fun
"""
return []

@staticmethod
def render_callback(experiment, env: EnvBase, data: TensorDictBase):
try:
Expand Down
Empty file.
Loading

0 comments on commit 3439abe

Please sign in to comment.