Skip to content

Commit

Permalink
Add seed parameter to all simulations, propagating it into sample_par…
Browse files Browse the repository at this point in the history
…ameters

PiperOrigin-RevId: 681777282
Change-Id: Ic3b04f860840e8f3653a977647e97f2aaa0f45ff
  • Loading branch information
vezhnick authored and copybara-github committed Oct 3, 2024
1 parent 987e2d4 commit c3ef91b
Show file tree
Hide file tree
Showing 21 changed files with 223 additions and 120 deletions.
13 changes: 10 additions & 3 deletions concordia/environment/game_master.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@
from concordia.typing import component
from concordia.utils import concurrency
from concordia.utils import helper_functions
import numpy as np
import termcolor


DEFAULT_THOUGHTS = (
thought_chains.attempt_to_result,
thought_chains.result_to_who_what_where,
Expand Down Expand Up @@ -97,6 +97,7 @@ def __init__(
concurrent_externalities: bool = True,
use_default_instructions: bool = True,
log_color: str = 'red',
seed: int | None = None,
):
"""Game master constructor.
Expand All @@ -123,6 +124,7 @@ def __init__(
instructions used for the game master, e.g. do this if you plan to pass
custom instructions as a constant component instead.
log_color: color in which to print logs
seed: random seed for the game master
"""
self._name = name
self._model = model
Expand All @@ -132,6 +134,9 @@ def __init__(
self._randomise_initiative = randomise_initiative
self._player_observes_event = player_observes_event
self._players_act_simultaneously = players_act_simultaneously
self._seed = seed or random.getrandbits(63)
self._rng = random.Random(seed)

if isinstance(action_spec, agent_lib.ActionSpec):
self._action_spec = {player.name: action_spec for player in players}
else:
Expand Down Expand Up @@ -201,7 +206,9 @@ def _handle_action(self, player_name: str, action_attempt: str) -> None:
})

# Produce the event that has happened as the result of the action attempt
prompt = interactive_document.InteractiveDocument(self._model)
prompt = interactive_document.InteractiveDocument(
self._model, rng=np.random.default_rng(self._seed)
)
for comp in self._components.values():
state_of_component = comp.state()
if state_of_component:
Expand Down Expand Up @@ -324,7 +331,7 @@ def step(
else:
players = list(self._players_by_name.values())
if self._randomise_initiative:
random.shuffle(players)
self._rng.shuffle(players)

if action_spec_override is None:
action_spec = self._action_spec
Expand Down
14 changes: 12 additions & 2 deletions concordia/environment/scenes/conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"""

from collections.abc import Sequence
import random

from concordia.agents import deprecated_agent
from concordia.associative_memory import blank_memories
Expand All @@ -31,6 +32,7 @@
from concordia.typing import agent as simulacrum_agent
from concordia.typing import component
from concordia.typing import entity
import numpy as np
import termcolor


Expand All @@ -47,6 +49,7 @@ def __init__(
max_steps: int | None = None,
verbose: bool = False,
log_colour: str = 'red',
seed: int | None = None,
):
"""This component accumulates history of a conversation scene in its state.
Expand All @@ -62,6 +65,7 @@ def __init__(
max_steps: Maximum number of conversation steps. If none, no limit
verbose: whether or not to print intermediate reasoning steps
log_colour: colour for logging
seed: random seed for the chain of thought document
"""
self._model = model
self._state = premise
Expand All @@ -71,7 +75,7 @@ def __init__(
self._key_question = key_question
self._max_steps = max_steps
self._current_steps = 0

self._seed = seed or random.getrandbits(63)
self._verbose = verbose

def name(self) -> str:
Expand All @@ -87,7 +91,9 @@ def terminate_episode(self) -> bool:

if not self._check_for_termination:
return False
chain_of_thought = interactive_document.InteractiveDocument(self._model)
chain_of_thought = interactive_document.InteractiveDocument(
self._model, rng=np.random.default_rng(self._seed)
)
chain_of_thought.statement('\n')
chain_of_thought.statement(f'Key question: {self._key_question}')
chain_of_thought.statement(f'Conversation:\n{self._state}\n')
Expand Down Expand Up @@ -154,6 +160,7 @@ def make_conversation_game_master(
key_question: str | None = None,
max_steps: int | None = 3,
verbose: bool = False,
seed: int | None = None,
):
"""Creates a game master that runs a conversation between players.
Expand All @@ -176,6 +183,7 @@ def make_conversation_game_master(
answer to this question.
max_steps: Maximum number of conversation steps. If none, no limit
verbose: whether or not to print
seed: random seed for the game master
Returns:
a game master
Expand Down Expand Up @@ -211,6 +219,7 @@ def make_conversation_game_master(
check_for_termination=check_for_termination,
key_question=key_question,
max_steps=max_steps,
seed=seed,
)

for player in players:
Expand All @@ -230,5 +239,6 @@ def make_conversation_game_master(
player_observes_event=False,
concurrent_externalities=False,
verbose=True,
seed=seed,
)
return game_master
5 changes: 5 additions & 0 deletions concordia/factory/environment/basic_game_master.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def build_game_master(
npc_context: str = '',
max_conversation_length: int = 10,
verbose: bool = False,
seed: int | None = None,
) -> tuple[game_master.GameMaster, associative_memory.AssociativeMemory]:
"""Build a game master (i.e., an environment).
Expand All @@ -84,6 +85,7 @@ def build_game_master(
npc_context: extra context provided only to non-player characters
max_conversation_length: The maximum number of turns in a conversation.
verbose: whether or not to print verbose debug information
seed: random seed for the chain of thought document
Returns:
A tuple consisting of a game master and its memory.
Expand Down Expand Up @@ -178,6 +180,7 @@ def build_game_master(
],
randomise_initiative=True,
player_observes_event=False,
seed=seed,
verbose=verbose,
)

Expand All @@ -192,6 +195,7 @@ def build_decision_scene_game_master(
decision_action_spec: agent_lib.ActionSpec,
payoffs: gm_components.schelling_diagram_payoffs.SchellingPayoffs,
verbose: bool = False,
seed: int | None = None,
) -> game_master.GameMaster:
"""Build a decision game master for decision scenes."""
decision_env = game_master.GameMaster(
Expand All @@ -207,6 +211,7 @@ def build_decision_scene_game_master(
player_observes_event=False,
concurrent_externalities=False,
verbose=verbose,
seed=seed,
)
return decision_env

Expand Down
3 changes: 3 additions & 0 deletions examples/modular/environment/haggling.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,7 @@ def __init__(
only_match_with_support: bool = False,
num_games: int = 2,
num_main_players: int = 3,
seed: int | None = None,
):
"""Initialize the simulation object.
Expand Down Expand Up @@ -580,6 +581,7 @@ def __init__(
supporting players.
num_games: the number of games to play.
num_main_players: the number of main players.
seed: the random seed to use.
"""
# Support for these parameters will be added in a future addition coming
# very imminently.
Expand All @@ -589,6 +591,7 @@ def __init__(
helper_functions.load_time_and_place_module(
time_and_place_module=time_and_place_module,
default_time_and_place_modules=DEFAULT_TIME_AND_PLACE_MODULES,
seed=seed,
)
)
sampled_settings.num_supporting_players = num_supporting_player
Expand Down
3 changes: 3 additions & 0 deletions examples/modular/environment/haggling_multi_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,7 @@ def __init__(
only_match_with_support: bool = False,
num_games: int = 2,
num_main_players: int = 3,
seed: int | None = None,
):
"""Initialize the simulation object.
Expand Down Expand Up @@ -610,6 +611,7 @@ def __init__(
supporting players.
num_games: the number of games to play.
num_main_players: the number of main players.
seed: the random seed to use.
"""
# Support for these parameters will be added in a future addition coming
# very imminently.
Expand All @@ -619,6 +621,7 @@ def __init__(
helper_functions.load_time_and_place_module(
time_and_place_module=time_and_place_module,
default_time_and_place_modules=DEFAULT_TIME_AND_PLACE_MODULES,
seed=seed,
)
)
sampled_settings.num_supporting_players = num_supporting_player
Expand Down
45 changes: 29 additions & 16 deletions examples/modular/environment/labor_collective_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ def configure_players(
model: language_model.LanguageModel,
sampled_settings: Any,
time_and_place_params: types.ModuleType,
rng: random.Random,
) -> tuple[
list[formative_memories.AgentConfig],
list[formative_memories.AgentConfig],
Expand All @@ -136,6 +137,7 @@ def configure_players(
sampled_settings: the environment configuration containing the time and
place details.
time_and_place_params: the module containing the time and place parameters
rng: the random number generator to use.
Returns:
main_player_configs: configs for the main characters
Expand All @@ -161,9 +163,9 @@ def get_agent_config(player_name: str, environment_cfg: Any):
subject_pronoun = 'they'
object_pronoun = 'their'

birth_year = environment_cfg.year - (30 + random.randint(-8, 8))
birth_month = random.randint(1, 12)
birth_day = random.randint(1, 28)
birth_year = environment_cfg.year - (30 + rng.randint(-8, 8))
birth_month = rng.randint(1, 12)
birth_day = rng.randint(1, 28)
goal_str = (
f'{player_name} hopes to be able to provide for their '
'family and live a full life.'
Expand All @@ -172,7 +174,9 @@ def get_agent_config(player_name: str, environment_cfg: Any):
f"{player_name}'s personality is like "
+ player_traits_and_styles.get_trait(flowery=True)
)
prompt = interactive_document.InteractiveDocument(model)
prompt = interactive_document.InteractiveDocument(
model, rng=np.random.default_rng(sampled_settings.seed)
)
prompt.statement(
'The following exercise is preparatory work for a role playing '
'session. The purpose of the exercise is to fill in the backstory '
Expand Down Expand Up @@ -271,9 +275,9 @@ def get_agent_config(player_name: str, environment_cfg: Any):
'gender', None
),
date_of_birth=datetime.datetime(
year=sampled_settings.year - (30 + random.randint(10, 30)),
month=random.randint(1, 12),
day=random.randint(1, 28),
year=sampled_settings.year - (30 + rng.randint(10, 30)),
month=rng.randint(1, 12),
day=rng.randint(1, 28),
),
goal=(
f'{sampled_settings.antagonist} wants to make as much money '
Expand Down Expand Up @@ -307,9 +311,9 @@ def get_agent_config(player_name: str, environment_cfg: Any):
'gender', None
),
date_of_birth=datetime.datetime(
year=sampled_settings.year - (30 + random.randint(2, 10)),
month=random.randint(1, 12),
day=random.randint(1, 28),
year=sampled_settings.year - (30 + rng.randint(2, 10)),
month=rng.randint(1, 12),
day=rng.randint(1, 28),
),
goal=(
f'{sampled_settings.organizer} wants to prevent the '
Expand Down Expand Up @@ -424,6 +428,7 @@ def configure_scenes(
player_observes_event=False,
concurrent_externalities=False,
verbose=verbose,
seed=sampled_settings.seed,
)

def _get_discussion_scene_type(
Expand Down Expand Up @@ -644,6 +649,7 @@ def __init__(
bots_lib.SupportingAgentFactory | types.ModuleType
) = rational_agent_supporting,
time_and_place_module: str | None = None,
seed: int | None = None,
):
"""Initialize the simulation object.
Expand All @@ -665,6 +671,7 @@ def __init__(
time_and_place_module: optionally, specify a module containing settings
that create a sense of setting in a specific time and place. If not
specified, a random module will be chosen from the default options.
seed: the random seed to use.
"""
if resident_visitor_modules is None:
self._resident_visitor_mode = False
Expand All @@ -674,7 +681,6 @@ def __init__(
self._resident_agent_module, self._visitor_agent_module = (
resident_visitor_modules
)

self._agent_model = model

if override_agent_model:
Expand All @@ -690,9 +696,12 @@ def __init__(
time_and_place_params, sampled_settings = (
helper_functions.load_time_and_place_module(
time_and_place_module=time_and_place_module,
default_time_and_place_modules=DEFAULT_TIME_AND_PLACE_MODULES)
default_time_and_place_modules=DEFAULT_TIME_AND_PLACE_MODULES,
seed=seed,
)
)

self._rng = random.Random(sampled_settings.seed)
start_time = datetime.datetime(
year=time_and_place_params.YEAR,
month=time_and_place_params.MONTH,
Expand Down Expand Up @@ -735,11 +744,14 @@ def __init__(
)

main_player_configs, supporting_player_configs, antagonist_config, _ = (
configure_players(model=model,
sampled_settings=sampled_settings,
time_and_place_params=time_and_place_params)
configure_players(
model=model,
sampled_settings=sampled_settings,
time_and_place_params=time_and_place_params,
rng=self._rng,
)
)
random.shuffle(main_player_configs)
self._rng.shuffle(main_player_configs)

tasks = {
config.name: functools.partial(
Expand Down Expand Up @@ -989,6 +1001,7 @@ def set_wage_function(args: _TriggeredFunctionPreEventFnArgsT) -> str:
sampled_settings.supporting_player_locations
),
additional_components=additional_gm_components,
seed=seed,
)
)
self._scenes, decision_env, industrial_action = configure_scenes(
Expand Down
Loading

0 comments on commit c3ef91b

Please sign in to comment.