amend

Signed-off-by: Matteo Bettini <[email protected]>
facebookresearch · Oct 9, 2023 · 664c21f · 664c21f
1 parent a7dfc96
commit 664c21f
Show file tree

Hide file tree

Showing 9 changed files with 48 additions and 11 deletions.
diff --git a/benchmarl/algorithms/isac.py b/benchmarl/algorithms/isac.py
@@ -109,11 +109,13 @@ def _get_loss(
         return loss_module, True
 
     def _get_parameters(self, group: str, loss: ClipPPOLoss) -> Dict[str, Iterable]:
-        return {
+        items = {
             "loss_actor": list(loss.actor_network_params.flatten_keys().values()),
             "loss_qvalue": list(loss.qvalue_network_params.flatten_keys().values()),
-            "loss_alpha": [loss.log_alpha],
         }
+        if not self.fixed_alpha:
+            items.update({"loss_alpha": [loss.log_alpha]})
+        return items
 
     def _get_policy_for_loss(
         self, group: str, model_config: ModelConfig, continuous: bool

diff --git a/benchmarl/algorithms/masac.py b/benchmarl/algorithms/masac.py
@@ -104,11 +104,13 @@ def _get_loss(
         return loss_module, True
 
     def _get_parameters(self, group: str, loss: LossModule) -> Dict[str, Iterable]:
-        return {
+        items = {
             "loss_actor": list(loss.actor_network_params.flatten_keys().values()),
             "loss_qvalue": list(loss.qvalue_network_params.flatten_keys().values()),
-            "loss_alpha": [loss.log_alpha],
         }
+        if not self.fixed_alpha:
+            items.update({"loss_alpha": [loss.log_alpha]})
+        return items
 
     def _get_policy_for_loss(
         self, group: str, model_config: ModelConfig, continuous: bool

diff --git a/benchmarl/experiment/experiment.py b/benchmarl/experiment/experiment.py
@@ -524,8 +524,6 @@ def _optimizer_loop(self, group: str) -> TensorDictBase:
 
                 optimizer.step()
                 optimizer.zero_grad()
-            elif loss_name.startswith("loss"):
-                raise AssertionError
         self.replay_buffers[group].update_tensordict_priority(subdata)
         if self.target_updaters[group] is not None:
             self.target_updaters[group].step()

diff --git a/premade_scripts/smacv2/conf/algorithm/isac.yaml b/premade_scripts/smacv2/conf/algorithm/isac.yaml
@@ -0,0 +1,17 @@
+defaults:
+  - isac_config
+  - _self_
+
+
+share_param_critic:  True
+
+num_qvalue_nets:  2
+loss_function:  "l2"
+delay_qvalue:  True
+target_entropy: "auto"
+discrete_target_entropy_weight: 0.2
+
+alpha_init: 0.000001
+min_alpha: null
+max_alpha: null
+fixed_alpha: True
diff --git a/premade_scripts/smacv2/conf/algorithm/masac.yaml b/premade_scripts/smacv2/conf/algorithm/masac.yaml
@@ -0,0 +1,18 @@
+defaults:
+  - masac_config
+  - _self_
+
+
+
+share_param_critic: True
+
+num_qvalue_nets: 2
+loss_function: "l2"
+delay_qvalue: True
+target_entropy: "auto"
+discrete_target_entropy_weight: 0.2
+
+alpha_init: 0.000001
+min_alpha: null
+max_alpha: null
+fixed_alpha: True
diff --git a/premade_scripts/smacv2/config.yaml → premade_scripts/smacv2/conf/config.yaml b/premade_scripts/smacv2/config.yaml → premade_scripts/smacv2/conf/config.yaml
@@ -14,7 +14,7 @@ hydra:
 seed: 0
 
 experiment:
-  sampling_device: "cuda"
+  sampling_device: "cpu"
   train_device: "cuda"
 
   share_policy_params: True
@@ -31,10 +31,10 @@ experiment:
 
   exploration_eps_init: 0.8
   exploration_eps_end: 0.01
-  exploration_anneal_frames: null
+  exploration_anneal_frames: 1_000_000
 
   max_n_iters: null
-  max_n_frames: 30_000_000
+  max_n_frames: 20_000_000
 
   on_policy_collected_frames_per_batch: 6000
   on_policy_n_envs_per_worker: 10

diff --git a/premade_scripts/smacv2/smacv2_run.py b/premade_scripts/smacv2/smacv2_run.py
@@ -6,7 +6,7 @@
 from omegaconf import DictConfig, OmegaConf
 
 
-@hydra.main(version_base=None, config_path=".", config_name="config")
+@hydra.main(version_base=None, config_path="conf", config_name="config")
 def hydra_experiment(cfg: DictConfig) -> None:
     hydra_choices = HydraConfig.get().runtime.choices
     task_name = hydra_choices.task

diff --git a/premade_scripts/vmas/config.yaml → premade_scripts/vmas/conf/config.yaml b/premade_scripts/vmas/config.yaml → premade_scripts/vmas/conf/config.yaml
diff --git a/premade_scripts/vmas/vmas_run.py b/premade_scripts/vmas/vmas_run.py
@@ -6,7 +6,7 @@
 from omegaconf import DictConfig, OmegaConf
 
 
-@hydra.main(version_base=None, config_path=".", config_name="config")
+@hydra.main(version_base=None, config_path="conf", config_name="config")
 def hydra_experiment(cfg: DictConfig) -> None:
     hydra_choices = HydraConfig.get().runtime.choices
     task_name = hydra_choices.task