From a97468c6d75d53b5ae3b56dc6ec093ca9e3bc994 Mon Sep 17 00:00:00 2001 From: Sasha Vezhnevets Date: Tue, 1 Oct 2024 11:26:11 -0700 Subject: [PATCH] update to pub coordination that properly reports a closed pub and adjusts the payoffs so that there is no payoff for going on to the pub alone PiperOrigin-RevId: 681098500 Change-Id: I17b1a0a55987d7fb806ecde565206336febf7993 --- .../components/game_master/coordination_payoffs.py | 5 +++++ examples/modular/environment/pub_coordination.py | 11 +++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/concordia/components/game_master/coordination_payoffs.py b/concordia/components/game_master/coordination_payoffs.py index fc1ff4c5..f0258cfc 100644 --- a/concordia/components/game_master/coordination_payoffs.py +++ b/concordia/components/game_master/coordination_payoffs.py @@ -38,6 +38,7 @@ def __call__( rewards: Mapping[str, float], relational_matrix: Mapping[str, Mapping[str, float]], player_multipliers: Mapping[str, Mapping[str, float]], + option_multipliers: Mapping[str, float], ) -> Mapping[str, str]: """Function of joint actions, rewards, relational matrix and player multipliers which returns an outcome description message for each player. @@ -51,6 +52,7 @@ def __call__( of 1, including self relationships (diagonal). player_multipliers: A mapping from player name to a mapping from action to their multiplier. + option_multipliers: A mapping from option to their multiplier. Returns: A mapping from player name to their outcome description message. @@ -127,6 +129,8 @@ def __init__( name: {name_b: 1.0 for name_b in self._acting_player_names} for name in self._acting_player_names } + for name in self._acting_player_names: + self._relational_matrix[name][name] = 0.0 else: if len(relational_matrix) != len(self._acting_player_names): raise ValueError( @@ -240,6 +244,7 @@ def _set_outcome_messages( rewards=rewards, relational_matrix=self._relational_matrix, player_multipliers=self._player_multipliers, + option_multipliers=self._option_multipliers, ) common_view_of_player_obs = '\n'.join([ f'{name} observed: {observation}' diff --git a/examples/modular/environment/pub_coordination.py b/examples/modular/environment/pub_coordination.py index 28aa8faf..9c588518 100644 --- a/examples/modular/environment/pub_coordination.py +++ b/examples/modular/environment/pub_coordination.py @@ -218,7 +218,7 @@ def configure_players(sampled_settings: Any) -> tuple[ for i in range(sampled_settings.num_supporting_players): name = names[sampled_settings.num_main_players + i] gender = sampled_settings.person_data[name]['gender'] - favorite_pub = random.choice(sampled_settings.venues) + favorite_pub = sampled_settings.venues[1] config = configure_player( name, gender, @@ -543,6 +543,7 @@ def outcome_summary_fn( rewards: Mapping[str, float], relational_matrix: Mapping[str, Mapping[str, float]], player_multipliers: Mapping[str, Mapping[str, float]], + option_multipliers: Mapping[str, float], ) -> Mapping[str, str]: """Function of joint actions, rewards, relational matrix and player multipliers which returns an outcome description message for each player. @@ -556,6 +557,7 @@ def outcome_summary_fn( 1, including self relationships (diagonal). player_multipliers: A mapping from player name to a mapping from action to their multiplier. + option_multipliers: A mapping from option (pub) to their multiplier. Returns: A mapping from player name to their outcome description message. @@ -598,6 +600,9 @@ def outcome_summary_fn( player_action = joint_action[player] same_choice_by_relation = 0 score = rewards[player] + was_pub_closed = player_action in option_multipliers and ( + option_multipliers[player_action] == 0.0 + ) if score > 0.9: enjoyment = f'Overall, {player} had a great time watching the game!' @@ -634,7 +639,9 @@ def outcome_summary_fn( f"None of {player}'s friends showed up, it couldn't have been worse!" ) - if player_multipliers[player][choice_by_player[player]] > 0.99: + if was_pub_closed: + choice_of_pub = f'{player} went to a closed pub.' + elif player_multipliers[player][choice_by_player[player]] > 0.99: choice_of_pub = f'{player} watched the game at their favourite pub.' else: choice_of_pub = (