Add seed parameter to all simulations, propagating it into sample_par…

…ameters PiperOrigin-RevId: 681777282 Change-Id: Ic3b04f860840e8f3653a977647e97f2aaa0f45ff
google-deepmind · Oct 3, 2024 · c3ef91b · c3ef91b
1 parent 987e2d4
commit c3ef91b
Show file tree

Hide file tree

Showing 21 changed files with 223 additions and 120 deletions.
diff --git a/concordia/environment/game_master.py b/concordia/environment/game_master.py
@@ -31,9 +31,9 @@
 from concordia.typing import component
 from concordia.utils import concurrency
 from concordia.utils import helper_functions
+import numpy as np
 import termcolor
 
-
 DEFAULT_THOUGHTS = (
     thought_chains.attempt_to_result,
     thought_chains.result_to_who_what_where,
@@ -97,6 +97,7 @@ def __init__(
       concurrent_externalities: bool = True,
       use_default_instructions: bool = True,
       log_color: str = 'red',
+      seed: int | None = None,
   ):
     """Game master constructor.
 
@@ -123,6 +124,7 @@ def __init__(
         instructions used for the game master, e.g. do this if you plan to pass
         custom instructions as a constant component instead.
       log_color: color in which to print logs
+      seed: random seed for the game master
     """
     self._name = name
     self._model = model
@@ -132,6 +134,9 @@ def __init__(
     self._randomise_initiative = randomise_initiative
     self._player_observes_event = player_observes_event
     self._players_act_simultaneously = players_act_simultaneously
+    self._seed = seed or random.getrandbits(63)
+    self._rng = random.Random(seed)
+
     if isinstance(action_spec, agent_lib.ActionSpec):
       self._action_spec = {player.name: action_spec for player in players}
     else:
@@ -201,7 +206,9 @@ def _handle_action(self, player_name: str, action_attempt: str) -> None:
     })
 
     # Produce the event that has happened as the result of the action attempt
-    prompt = interactive_document.InteractiveDocument(self._model)
+    prompt = interactive_document.InteractiveDocument(
+        self._model, rng=np.random.default_rng(self._seed)
+    )
     for comp in self._components.values():
       state_of_component = comp.state()
       if state_of_component:
@@ -324,7 +331,7 @@ def step(
     else:
       players = list(self._players_by_name.values())
     if self._randomise_initiative:
-      random.shuffle(players)
+      self._rng.shuffle(players)
 
     if action_spec_override is None:
       action_spec = self._action_spec

diff --git a/concordia/environment/scenes/conversation.py b/concordia/environment/scenes/conversation.py
@@ -20,6 +20,7 @@
 """
 
 from collections.abc import Sequence
+import random
 
 from concordia.agents import deprecated_agent
 from concordia.associative_memory import blank_memories
@@ -31,6 +32,7 @@
 from concordia.typing import agent as simulacrum_agent
 from concordia.typing import component
 from concordia.typing import entity
+import numpy as np
 import termcolor
 
 
@@ -47,6 +49,7 @@ def __init__(
       max_steps: int | None = None,
       verbose: bool = False,
       log_colour: str = 'red',
+      seed: int | None = None,
   ):
     """This component accumulates history of a conversation scene in its state.
 
@@ -62,6 +65,7 @@ def __init__(
       max_steps: Maximum number of conversation steps. If none, no limit
       verbose: whether or not to print intermediate reasoning steps
       log_colour: colour for logging
+      seed: random seed for the chain of thought document
     """
     self._model = model
     self._state = premise
@@ -71,7 +75,7 @@ def __init__(
     self._key_question = key_question
     self._max_steps = max_steps
     self._current_steps = 0
-
+    self._seed = seed or random.getrandbits(63)
     self._verbose = verbose
 
   def name(self) -> str:
@@ -87,7 +91,9 @@ def terminate_episode(self) -> bool:
 
     if not self._check_for_termination:
       return False
-    chain_of_thought = interactive_document.InteractiveDocument(self._model)
+    chain_of_thought = interactive_document.InteractiveDocument(
+        self._model, rng=np.random.default_rng(self._seed)
+    )
     chain_of_thought.statement('\n')
     chain_of_thought.statement(f'Key question: {self._key_question}')
     chain_of_thought.statement(f'Conversation:\n{self._state}\n')
@@ -154,6 +160,7 @@ def make_conversation_game_master(
     key_question: str | None = None,
     max_steps: int | None = 3,
     verbose: bool = False,
+    seed: int | None = None,
 ):
   """Creates a game master that runs a conversation between players.
 
@@ -176,6 +183,7 @@ def make_conversation_game_master(
       answer to this question.
     max_steps: Maximum number of conversation steps. If none, no limit
     verbose: whether or not to print
+    seed: random seed for the game master
 
   Returns:
     a game master
@@ -211,6 +219,7 @@ def make_conversation_game_master(
       check_for_termination=check_for_termination,
       key_question=key_question,
       max_steps=max_steps,
+      seed=seed,
   )
 
   for player in players:
@@ -230,5 +239,6 @@ def make_conversation_game_master(
       player_observes_event=False,
       concurrent_externalities=False,
       verbose=True,
+      seed=seed,
   )
   return game_master
diff --git a/concordia/factory/environment/basic_game_master.py b/concordia/factory/environment/basic_game_master.py
@@ -59,6 +59,7 @@ def build_game_master(
     npc_context: str = '',
     max_conversation_length: int = 10,
     verbose: bool = False,
+    seed: int | None = None,
 ) -> tuple[game_master.GameMaster, associative_memory.AssociativeMemory]:
   """Build a game master (i.e., an environment).
 
@@ -84,6 +85,7 @@ def build_game_master(
     npc_context: extra context provided only to non-player characters
     max_conversation_length: The maximum number of turns in a conversation.
     verbose: whether or not to print verbose debug information
+    seed: random seed for the chain of thought document
 
   Returns:
     A tuple consisting of a game master and its memory.
@@ -178,6 +180,7 @@ def build_game_master(
       ],
       randomise_initiative=True,
       player_observes_event=False,
+      seed=seed,
       verbose=verbose,
   )
 
@@ -192,6 +195,7 @@ def build_decision_scene_game_master(
     decision_action_spec: agent_lib.ActionSpec,
     payoffs: gm_components.schelling_diagram_payoffs.SchellingPayoffs,
     verbose: bool = False,
+    seed: int | None = None,
 ) -> game_master.GameMaster:
   """Build a decision game master for decision scenes."""
   decision_env = game_master.GameMaster(
@@ -207,6 +211,7 @@ def build_decision_scene_game_master(
       player_observes_event=False,
       concurrent_externalities=False,
       verbose=verbose,
+      seed=seed,
   )
   return decision_env
 

diff --git a/examples/modular/environment/haggling.py b/examples/modular/environment/haggling.py
@@ -552,6 +552,7 @@ def __init__(
       only_match_with_support: bool = False,
       num_games: int = 2,
       num_main_players: int = 3,
+      seed: int | None = None,
   ):
     """Initialize the simulation object.
 
@@ -580,6 +581,7 @@ def __init__(
         supporting players.
       num_games: the number of games to play.
       num_main_players: the number of main players.
+      seed: the random seed to use.
     """
     # Support for these parameters will be added in a future addition coming
     # very imminently.
@@ -589,6 +591,7 @@ def __init__(
         helper_functions.load_time_and_place_module(
             time_and_place_module=time_and_place_module,
             default_time_and_place_modules=DEFAULT_TIME_AND_PLACE_MODULES,
+            seed=seed,
         )
     )
     sampled_settings.num_supporting_players = num_supporting_player

diff --git a/examples/modular/environment/haggling_multi_item.py b/examples/modular/environment/haggling_multi_item.py
@@ -582,6 +582,7 @@ def __init__(
       only_match_with_support: bool = False,
       num_games: int = 2,
       num_main_players: int = 3,
+      seed: int | None = None,
   ):
     """Initialize the simulation object.
 
@@ -610,6 +611,7 @@ def __init__(
         supporting players.
       num_games: the number of games to play.
       num_main_players: the number of main players.
+      seed: the random seed to use.
     """
     # Support for these parameters will be added in a future addition coming
     # very imminently.
@@ -619,6 +621,7 @@ def __init__(
         helper_functions.load_time_and_place_module(
             time_and_place_module=time_and_place_module,
             default_time_and_place_modules=DEFAULT_TIME_AND_PLACE_MODULES,
+            seed=seed,
         )
     )
     sampled_settings.num_supporting_players = num_supporting_player

diff --git a/examples/modular/environment/labor_collective_action.py b/examples/modular/environment/labor_collective_action.py
@@ -123,6 +123,7 @@ def configure_players(
     model: language_model.LanguageModel,
     sampled_settings: Any,
     time_and_place_params: types.ModuleType,
+    rng: random.Random,
 ) -> tuple[
     list[formative_memories.AgentConfig],
     list[formative_memories.AgentConfig],
@@ -136,6 +137,7 @@ def configure_players(
     sampled_settings: the environment configuration containing the time and
       place details.
     time_and_place_params: the module containing the time and place parameters
+    rng: the random number generator to use.
 
   Returns:
     main_player_configs: configs for the main characters
@@ -161,9 +163,9 @@ def get_agent_config(player_name: str, environment_cfg: Any):
       subject_pronoun = 'they'
       object_pronoun = 'their'
 
-    birth_year = environment_cfg.year - (30 + random.randint(-8, 8))
-    birth_month = random.randint(1, 12)
-    birth_day = random.randint(1, 28)
+    birth_year = environment_cfg.year - (30 + rng.randint(-8, 8))
+    birth_month = rng.randint(1, 12)
+    birth_day = rng.randint(1, 28)
     goal_str = (
         f'{player_name} hopes to be able to provide for their '
         'family and live a full life.'
@@ -172,7 +174,9 @@ def get_agent_config(player_name: str, environment_cfg: Any):
         f"{player_name}'s personality is like "
         + player_traits_and_styles.get_trait(flowery=True)
     )
-    prompt = interactive_document.InteractiveDocument(model)
+    prompt = interactive_document.InteractiveDocument(
+        model, rng=np.random.default_rng(sampled_settings.seed)
+    )
     prompt.statement(
         'The following exercise is preparatory work for a role playing '
         'session. The purpose of the exercise is to fill in the backstory '
@@ -271,9 +275,9 @@ def get_agent_config(player_name: str, environment_cfg: Any):
           'gender', None
       ),
       date_of_birth=datetime.datetime(
-          year=sampled_settings.year - (30 + random.randint(10, 30)),
-          month=random.randint(1, 12),
-          day=random.randint(1, 28),
+          year=sampled_settings.year - (30 + rng.randint(10, 30)),
+          month=rng.randint(1, 12),
+          day=rng.randint(1, 28),
       ),
       goal=(
           f'{sampled_settings.antagonist} wants to make as much money '
@@ -307,9 +311,9 @@ def get_agent_config(player_name: str, environment_cfg: Any):
           'gender', None
       ),
       date_of_birth=datetime.datetime(
-          year=sampled_settings.year - (30 + random.randint(2, 10)),
-          month=random.randint(1, 12),
-          day=random.randint(1, 28),
+          year=sampled_settings.year - (30 + rng.randint(2, 10)),
+          month=rng.randint(1, 12),
+          day=rng.randint(1, 28),
       ),
       goal=(
           f'{sampled_settings.organizer} wants to prevent the '
@@ -424,6 +428,7 @@ def configure_scenes(
       player_observes_event=False,
       concurrent_externalities=False,
       verbose=verbose,
+      seed=sampled_settings.seed,
   )
 
   def _get_discussion_scene_type(
@@ -644,6 +649,7 @@ def __init__(
           bots_lib.SupportingAgentFactory | types.ModuleType
       ) = rational_agent_supporting,
       time_and_place_module: str | None = None,
+      seed: int | None = None,
   ):
     """Initialize the simulation object.
 
@@ -665,6 +671,7 @@ def __init__(
       time_and_place_module: optionally, specify a module containing settings
         that create a sense of setting in a specific time and place. If not
         specified, a random module will be chosen from the default options.
+        seed: the random seed to use.
     """
     if resident_visitor_modules is None:
       self._resident_visitor_mode = False
@@ -674,7 +681,6 @@ def __init__(
       self._resident_agent_module, self._visitor_agent_module = (
           resident_visitor_modules
       )
-
     self._agent_model = model
 
     if override_agent_model:
@@ -690,9 +696,12 @@ def __init__(
     time_and_place_params, sampled_settings = (
         helper_functions.load_time_and_place_module(
             time_and_place_module=time_and_place_module,
-            default_time_and_place_modules=DEFAULT_TIME_AND_PLACE_MODULES)
+            default_time_and_place_modules=DEFAULT_TIME_AND_PLACE_MODULES,
+            seed=seed,
+        )
     )
 
+    self._rng = random.Random(sampled_settings.seed)
     start_time = datetime.datetime(
         year=time_and_place_params.YEAR,
         month=time_and_place_params.MONTH,
@@ -735,11 +744,14 @@ def __init__(
     )
 
     main_player_configs, supporting_player_configs, antagonist_config, _ = (
-        configure_players(model=model,
-                          sampled_settings=sampled_settings,
-                          time_and_place_params=time_and_place_params)
+        configure_players(
+            model=model,
+            sampled_settings=sampled_settings,
+            time_and_place_params=time_and_place_params,
+            rng=self._rng,
+        )
     )
-    random.shuffle(main_player_configs)
+    self._rng.shuffle(main_player_configs)
 
     tasks = {
         config.name: functools.partial(
@@ -989,6 +1001,7 @@ def set_wage_function(args: _TriggeredFunctionPreEventFnArgsT) -> str:
                 sampled_settings.supporting_player_locations
             ),
             additional_components=additional_gm_components,
+            seed=seed,
         )
     )
     self._scenes, decision_env, industrial_action = configure_scenes(