Skip to content

Commit

Permalink
Clean up and improve pub_coordination.
Browse files Browse the repository at this point in the history
 - make a more nuanced outcome message, which clearly specify how the outcome could have been better
 - make a supporting player pick a favourite pub at random
 - add goal to memories
 - fix starting time to 10am rather than midnight
 - clean up the the code

PiperOrigin-RevId: 679509855
Change-Id: Ia957796eb048c64332242283c5ab8193bea96423
  • Loading branch information
vezhnick authored and copybara-github committed Sep 27, 2024
1 parent 4342429 commit dc06d19
Show file tree
Hide file tree
Showing 2 changed files with 131 additions and 27 deletions.
46 changes: 40 additions & 6 deletions concordia/components/game_master/coordination_payoffs.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from collections.abc import Callable, Mapping, Sequence
import copy
import datetime
from typing import Protocol

from concordia.agents import deprecated_agent
from concordia.agents import entity_agent
Expand All @@ -28,6 +29,35 @@
import termcolor


class OutcomeSummarizationFn(Protocol):
"""Protocol for outcome summarization function."""

def __call__(
self,
joint_action: Mapping[str, str],
rewards: Mapping[str, float],
relational_matrix: Mapping[str, Mapping[str, float]],
player_multipliers: Mapping[str, Mapping[str, float]],
) -> Mapping[str, str]:
"""Function of joint actions, rewards, relational matrix and player multipliers which returns an outcome description message for each player.
Args:
joint_action: A mapping from player name to their chosen action.
rewards: A mapping from player name to their reward.
relational_matrix: A matrix of relationships between players. The entry
[i][j] specifies the value for player i of making the same choice as
player j. Matrix is not assumed to be symmetric or having a particular
value on the diagonal. If `None`, all players are assumed to have value
of 1, including self relationships (diagonal).
player_multipliers: A mapping from player name to a mapping from action to
their multiplier.
Returns:
A mapping from player name to their outcome description message.
"""
...


class CoordinationPayoffs(component.Component):
"""Define payoffs for coordination games.
Expand All @@ -45,9 +75,7 @@ def __init__(
resolution_scene: str,
players: Sequence[deprecated_agent.BasicAgent | entity_agent.EntityAgent],
acting_player_names: Sequence[str],
outcome_summarization_fn: Callable[
[Mapping[str, str], Mapping[str, float]], Mapping[str, str]
],
outcome_summarization_fn: OutcomeSummarizationFn,
clock_now: Callable[[], datetime.datetime],
relational_matrix: Mapping[str, Mapping[str, float]] | None = None,
name: str = 'scoring function',
Expand All @@ -64,8 +92,9 @@ def __init__(
after the event, i.e. when to check the joint action and compute results
players: sequence of agents (a superset of the active players)
acting_player_names: sequence of names of players who act each stage
outcome_summarization_fn: function of joint actions and rewards which
returns an outcome description message for each player
outcome_summarization_fn: Function of joint actions, rewards, relational
matrix and player multipliers which returns an outcome description
message for each player
clock_now: Function to call to get current time.
relational_matrix: a matrix of relationships between players. The entry
[i][j] specifies the value for player i of making the same choice as
Expand Down Expand Up @@ -206,7 +235,12 @@ def _set_outcome_messages(
])
# Players see a text-based summarization of the events, which may or may not
# include the actual reward values.
partial_states = self._outcome_summarization_fn(joint_action, rewards)
partial_states = self._outcome_summarization_fn(
joint_action=joint_action,
rewards=rewards,
relational_matrix=self._relational_matrix,
player_multipliers=self._player_multipliers,
)
common_view_of_player_obs = '\n'.join([
f'{name} observed: {observation}'
for name, observation in partial_states.items()
Expand Down
112 changes: 91 additions & 21 deletions examples/modular/environment/pub_coordination.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
from concordia.components import game_master as gm_components
from concordia.environment import game_master
from concordia.environment.scenes import conversation
from examples.modular.environment.modules import player_names
from examples.modular.environment.modules import player_traits_and_styles
from examples.modular.environment.modules import pub_coordination_relationships
from examples.modular.environment.supporting_agent_factory import basic_puppet_agent
Expand All @@ -53,6 +52,7 @@
import numpy as np

ItemTypeConfig = gm_components.inventory.ItemTypeConfig
CoordinationPayoffs = gm_components.coordination_payoffs.CoordinationPayoffs

DEFAULT_TIME_AND_PLACE_MODULES = ('pub_coordination_london',)

Expand All @@ -66,8 +66,6 @@

USE_CONVERSATION_GM = True

FIRST_NAMES = player_names.FIRST_NAMES


@dataclasses.dataclass
class WorldConfig:
Expand Down Expand Up @@ -170,6 +168,11 @@ def configure_player(
+ player_traits_and_styles.get_trait(flowery=True)
),
extras=extras,
specific_memories=(
f'[goal] {name} goals is to watch the game in the same pub as'
f' {all_player_names_str}. {name} would prefer everyone went to'
f' {favorite_pub}'
),
)
return config

Expand Down Expand Up @@ -214,7 +217,7 @@ def configure_players(sampled_settings: Any) -> tuple[
for i in range(sampled_settings.num_supporting_players):
name = names[sampled_settings.num_main_players + i]
gender = sampled_settings.person_data[name]['gender']
favorite_pub = sampled_settings.venues[0]
favorite_pub = random.choice(sampled_settings.venues)
config = configure_player(
name,
gender,
Expand All @@ -236,9 +239,6 @@ def configure_players(sampled_settings: Any) -> tuple[
return main_player_configs, supporting_player_configs


CoordinationPayoffs = gm_components.coordination_payoffs.CoordinationPayoffs


def sample_symmetric_relationship_matrix(names: Sequence[str]):
"""Samples a symmetric matrix of relationships in a group.
Expand Down Expand Up @@ -540,14 +540,47 @@ def outcome_summary_fn(
# `binary_joint_action` should be type Mapping[str, bool] (ie bool not int).
joint_action: Mapping[str, str],
rewards: Mapping[str, float],
relational_matrix: Mapping[str, Mapping[str, float]],
player_multipliers: Mapping[str, Mapping[str, float]],
) -> Mapping[str, str]:
"""Summarize the outcome of a decision scene."""
"""Function of joint actions, rewards, relational matrix and player multipliers which returns an outcome description message for each player.
Args:
joint_action: A mapping from player name to their chosen action.
rewards: A mapping from player name to their reward.
relational_matrix: A matrix of relationships between players. The entry
[i][j] specifies the value for player i of making the same choice as
player j. Matrix is not assumed to be symmetric or having a particular
value on the diagonal. If `None`, all players are assumed to have value of
1, including self relationships (diagonal).
player_multipliers: A mapping from player name to a mapping from action to
their multiplier.
Returns:
A mapping from player name to their outcome description message.
"""

player_names = list(joint_action.keys())

if len(relational_matrix) != len(player_names):
raise ValueError(
'Relationship matrix must have the same length as the number of'
' acting players.'
)
for _, row in relational_matrix.items():
if len(row) != len(player_names):
raise ValueError(
'Relationship matrix rows must have the same length as the number'
' of acting players.'
)

players_by_choice = {}
choice_by_player = {}
for name, choice in joint_action.items():
if choice not in players_by_choice:
players_by_choice[choice] = []
players_by_choice[choice].append(name)
choice_by_player[name] = choice

summary_of_attendance = ''

Expand All @@ -559,25 +592,59 @@ def outcome_summary_fn(
)

results = {}
for name, score in rewards.items():

for player in player_names:
player_action = joint_action[player]
same_choice_by_relation = 0
score = rewards[player]

if score > 0.9:
outcome_str = 'had a great time watching the game!'
enjoyment = f'Overall, {player} had a great time watching the game!'
elif score > 0.5:
outcome_str = (
'had an ok time watching the game, but it could have been better if'
' more friends showed up'
enjoyment = f'Overall, {player} had an ok time watching the game.'
elif score < 1e-8:
enjoyment = (
f'Overall, {player} had the worst time ever, since the pub was'
' closed.'
)
else:
enjoyment = f'Overall, {player} had a bad time watching the game.'

for other_player in player_names:
if player_action == joint_action[other_player] and player != other_player:
same_choice_by_relation += relational_matrix[player][other_player]
max_reward_possible = (
sum(max(0, r) for r in relational_matrix[player].values())
- relational_matrix[player][player]
)
if same_choice_by_relation == max_reward_possible:
friends_attendance = (
f"All of {player}'s friends went to the same"
" pub! It couldn't have been better."
)
elif same_choice_by_relation > 0.5 * max_reward_possible:
friends_attendance = (
'It could have been better if more friends showed up.'
)
elif same_choice_by_relation > 0.0:
friends_attendance = (
f'{player} would have'
' been a lot happier if more of their friends had shown up.'
)
elif score == 0.0:
outcome_str = (
'turned up at a pub, which was closed. Had to go home with'
' disappointment.'
else:
friends_attendance = (
f"None of {player}'s friends showed up, it couldn't have been worse!"
)

if player_multipliers[player][choice_by_player[player]] > 0.99:
choice_of_pub = f'{player} watched the game at their favourite pub.'
else:
outcome_str = (
'had a bad time watching the game, since barely any of their friends'
' showed up'
choice_of_pub = (
f'{player} watched the game at the pub that is not their favourite.'
)
results[name] = f'{summary_of_attendance}. {name} {outcome_str}'
results[player] = (
f'{summary_of_attendance} {choice_of_pub} {friends_attendance} {enjoyment}'
)

print(summary_of_attendance)
return results
Expand Down Expand Up @@ -671,6 +738,9 @@ def __init__(
year=time_and_place_params.YEAR,
month=time_and_place_params.MONTH,
day=time_and_place_params.DAY,
hour=10,
minute=0,
second=0,
)

setup_clock_time = start_time - datetime.timedelta(days=1)
Expand Down

0 comments on commit dc06d19

Please sign in to comment.