diff --git a/json/players/gameSpecific/TicTacToe.json b/json/players/gameSpecific/TicTacToe.json index a62ed7813..f534f498e 100644 --- a/json/players/gameSpecific/TicTacToe.json +++ b/json/players/gameSpecific/TicTacToe.json @@ -2,11 +2,11 @@ "budgetType": "BUDGET_TIME", "rolloutLength": 30, "opponentTreePolicy": "OneTree", - "MASTGamma": 0, + "MASTGamma": 0.0, "heuristic": { "class": "players.heuristics.WinOnlyHeuristic" }, - "K": 1, + "K": 1.0, "exploreEpsilon": 0.1, "treePolicy": "UCB", "MAST": "Both", diff --git a/src/main/java/core/Game.java b/src/main/java/core/Game.java index 3bfc588f6..60bf2f362 100644 --- a/src/main/java/core/Game.java +++ b/src/main/java/core/Game.java @@ -177,156 +177,6 @@ public static Game runOne(GameType gameToPlay, String parameterConfigFile, List< return game; } - /** - * Runs several games with a given random seed. - * - * @param gamesToPlay - list of games to play. - * @param players - list of players for the game. - * @param nRepetitions - number of repetitions of each game. - * @param seed - random seed for all games. If null, a new random seed is used for each game. - * @param randomizeParameters - if true, game parameters are randomized for each run of each game (if possible). - * @param detailedStatistics - if true, detailed statistics are printed, otherwise just average of wins - */ - public static void runMany(List gamesToPlay, List players, Long seed, - int nRepetitions, boolean randomizeParameters, - boolean detailedStatistics, List listeners, int turnPause) { - int nPlayers = players.size(); - - // Save win rate statistics over all games - TAGNumericStatSummary[] overall = new TAGNumericStatSummary[nPlayers]; - String[] agentNames = new String[nPlayers]; - for (int i = 0; i < nPlayers; i++) { - String[] split = players.get(i).getClass().toString().split("\\."); - String agentName = split[split.length - 1] + "-" + i; - overall[i] = new TAGNumericStatSummary("Overall " + agentName); - agentNames[i] = agentName; - } - - // For each game... - for (GameType gt : gamesToPlay) { - - // Save win rate statistics over all repetitions of this game - TAGNumericStatSummary[] statSummaries = new TAGNumericStatSummary[nPlayers]; - for (int i = 0; i < nPlayers; i++) { - statSummaries[i] = new TAGNumericStatSummary("{Game: " + gt.name() + "; Player: " + agentNames[i] + "}"); - } - - // Play n repetitions of this game and record player results - Game game = null; - int offset = 0; - for (int i = 0; i < nRepetitions; i++) { - Long s = seed; - if (s == null) s = System.currentTimeMillis(); - s += offset; - game = runOne(gt, null, players, s, randomizeParameters, listeners, null, turnPause); - if (game != null) { - recordPlayerResults(statSummaries, game); - offset = game.getGameState().getRoundCounter() * game.getGameState().getNPlayers(); - } else { - break; - } -// System.out.println("Game " + i + "/" + nRepetitions); - } - - if (game != null) { - System.out.println("---------------------"); - for (int i = 0; i < nPlayers; i++) { - // Print statistics for this game - if (detailedStatistics) { - System.out.println(statSummaries[i].toString()); - } else { - System.out.println(statSummaries[i].name + ": " + statSummaries[i].mean() + " (n=" + statSummaries[i].n() + ")"); - } - - // Record in overall statistics - overall[i].add(statSummaries[i]); - } - } - } - - // Print final statistics - System.out.println("\n=====================\n"); - for (int i = 0; i < nPlayers; i++) { - // Print statistics for this game - if (detailedStatistics) { - System.out.println(overall[i].toString()); - } else { - System.out.println(overall[i].name + ": " + overall[i].mean()); - } - } - } - - /** - * Runs several games with a set of random seeds, one for each repetition of a game. - * - * @param gamesToPlay - list of games to play. - * @param players - list of players for the game. - * @param nRepetitions - number of repetitions of each game. - * @param seeds - random seeds array, one for each repetition of a game. - * @param ac - action controller for GUI interactions, null if playing without visuals. - * @param randomizeParameters - if true, game parameters are randomized for each run of each game (if possible). - */ - public static void runMany(List gamesToPlay, List players, int nRepetitions, - long[] seeds, ActionController ac, boolean randomizeParameters, List listeners, int turnPause) { - int nPlayers = players.size(); - - // Save win rate statistics over all games - TAGNumericStatSummary[] overall = new TAGNumericStatSummary[nPlayers]; - for (int i = 0; i < nPlayers; i++) { - overall[i] = new TAGNumericStatSummary("Overall Player " + i); - } - - // For each game... - for (GameType gt : gamesToPlay) { - - // Save win rate statistics over all repetitions of this game - TAGNumericStatSummary[] statSummaries = new TAGNumericStatSummary[nPlayers]; - for (int i = 0; i < nPlayers; i++) { - statSummaries[i] = new TAGNumericStatSummary("Game: " + gt.name() + "; Player: " + i); - } - - // Play n repetitions of this game and record player results - for (int i = 0; i < nRepetitions; i++) { - Game game = runOne(gt, null, players, seeds[i], randomizeParameters, listeners, null, turnPause); - if (game != null) { - recordPlayerResults(statSummaries, game); - } - } - - for (int i = 0; i < nPlayers; i++) { - // Print statistics for this game - System.out.println(statSummaries[i].toString()); - - // Record in overall statistics - overall[i].add(statSummaries[i]); - } - } - - // Print final statistics - System.out.println("\n---------------------\n"); - for (int i = 0; i < nPlayers; i++) { - // Print statistics for this game - System.out.println(overall[i].toString()); - } - } - - /** - * Records statistics of given game into the given StatSummary objects. Only WIN, LOSE or DRAW are valid results - * recorded. - * - * @param statSummaries - object recording statistics - * @param game - finished game - */ - public static void recordPlayerResults(TAGNumericStatSummary[] statSummaries, Game game) { - int nPlayers = statSummaries.length; - CoreConstants.GameResult[] results = game.getGameState().getPlayerResults(); - for (int p = 0; p < nPlayers; p++) { - if (results[p] == CoreConstants.GameResult.WIN_GAME || results[p] == CoreConstants.GameResult.LOSE_GAME || results[p] == CoreConstants.GameResult.DRAW_GAME) { - statSummaries[p].add(results[p].value); - } - } - } - public void setTurnPause(int turnPause) { this.turnPause = turnPause; } @@ -359,6 +209,19 @@ public final void reset(List players) { * @param newRandomSeed - random seed is updated in the game parameters object and used throughout the game. */ public final void reset(List players, long newRandomSeed) { + reset(gameState, forwardModel, players, newRandomSeed); + } + + /** + * Resets the game. Sets up the game state to the initial state as described by game rules, assigns players + * and their IDs, and initialises all players. + * + * @param gameState - game state to apply the reset to + * @param forwardModel - the forward model to use for resetting the game state + * @param players - new players for the game + * @param newRandomSeed - random seed is updated in the game parameters object and used throughout the game. + */ + public final void reset(AbstractGameState gameState, AbstractForwardModel forwardModel, List players, long newRandomSeed) { gameState.reset(newRandomSeed); forwardModel.abstractSetup(gameState); if (players.size() == gameState.getNPlayers()) { @@ -412,18 +275,15 @@ public void resetStats() { } /** - * Runs the game, + * Runs the game, with synchronisation facilities for GUI and terminal players; not to be used for + * (possibly multithreaded) ParameterSearch and RunGames instances */ public final void run() { - listeners.forEach(l -> l.onEvent(Event.createEvent(Event.GameEvent.ABOUT_TO_START, gameState))); boolean firstEnd = true; - while (gameState.isNotTerminal() && !stop) { - synchronized (this) { - // Now synchronized with possible intervention from the GUI // This is only relevant if the game has been paused...so should not affect // performance in non-GUI situations @@ -443,7 +303,6 @@ public final void run() { // as the JVM hoists pause and isHumanToMove() ouside the while loop on the basis that // they cannot be changed in this thread.... - /* * The Game is responsible for tracking the players and the current game state * It is important that the Game never passes the main AbstractGameState to the individual players, @@ -455,8 +314,9 @@ public final void run() { */ // Get player to ask for actions next (This horrendous line is for backwards compatibility). - boolean reacting = (gameState instanceof AbstractGameStateWithTurnOrder && ((AbstractGameStateWithTurnOrder) gameState).getTurnOrder() instanceof ReactiveTurnOrder - && ((ReactiveTurnOrder) ((AbstractGameStateWithTurnOrder) gameState).getTurnOrder()).getReactivePlayers().size() > 0); + boolean reacting = gameState instanceof AbstractGameStateWithTurnOrder && + ((AbstractGameStateWithTurnOrder) gameState).getTurnOrder() instanceof ReactiveTurnOrder && + !((ReactiveTurnOrder) ((AbstractGameStateWithTurnOrder) gameState).getTurnOrder()).getReactivePlayers().isEmpty(); // Check if this is the same player as last, count number of actions per turn if (!reacting) { @@ -470,10 +330,8 @@ public final void run() { } if (gameState.isNotTerminal()) { - if (debug) System.out.printf("Invoking oneAction from Game for player %d%n", activePlayer); oneAction(); - } else { if (firstEnd) { if (gameState.coreGameParameters.verbose) { @@ -483,7 +341,6 @@ public final void run() { firstEnd = false; } } - if (debug) System.out.println("Exiting synchronized block in Game"); } } @@ -495,13 +352,93 @@ public final void run() { } } + /** + * Runs an instance of the game, with the possibility to run fully parallel from any other game instances. + * Not to be used when there is a human player, only useful when parallelization needs to be possible. + * @return The final gameState object after finishing the game run(s) + */ + public AbstractGameState runInstance(LinkedList players, int seed, boolean randomGameParameters) { + AbstractGameState gameState = this.gameState.copy(); // our own copy of the gameState, to play games with + AbstractForwardModel forwardModel = this.forwardModel.copy(); // our own copy of the forwardModel, to avoid concurrency issues + reset(gameState, forwardModel, players, seed); // reset gameState before playing + + synchronized (this) { + listeners.forEach(l -> l.onEvent(Event.createEvent(Event.GameEvent.ABOUT_TO_START, gameState))); + } + if (randomGameParameters) { + gameState.getGameParameters().randomize(); + System.out.println("Game parameters: " + gameState.getGameParameters()); + } + + int lastPlayer = -1; // initialise with no last player, since we are starting a new game + int nActionsPerTurn = 1; // keep track within this scope to avoid parallel processes to modify this game's stats + + boolean firstEnd = true; +// System.out.println("Running game: "+matchUpPlayers); + while (gameState.isNotTerminal()) { + int activePlayer = gameState.getCurrentPlayer(); + + AbstractPlayer currentPlayer = players.get(activePlayer); + /* + * The Game is responsible for tracking the players and the current game state + * It is important that the Game never passes the main AbstractGameState to the individual players, + * but instead always uses copy(playerId) to both: + * i) shuffle any hidden data they cannot see + * ii) ensure that any changes the player makes to the game state do not affect the genuine game state + * + * Players should never have access to the Game, or the main AbstractGameState, or to each other! + */ + + // Get player to ask for actions next (This horrendous line is for backwards compatibility). + boolean reacting = gameState instanceof AbstractGameStateWithTurnOrder && + ((AbstractGameStateWithTurnOrder) gameState).getTurnOrder() instanceof ReactiveTurnOrder && + !((ReactiveTurnOrder) ((AbstractGameStateWithTurnOrder) gameState).getTurnOrder()).getReactivePlayers().isEmpty(); + + // Check if this is the same player as last, count number of actions per turn + if (!reacting) { + if (currentPlayer != null && activePlayer == lastPlayer) { + nActionsPerTurn++; + } else { + nActionsPerTurnSum += nActionsPerTurn; // atomic + nActionsPerTurn = 1; + nActionsPerTurnCount++; // atomic + } + } + + if (gameState.isNotTerminal()) { + if (debug) System.out.printf("Invoking oneAction from Game for player %d%n", activePlayer); + // keep track of last player within this scope, since parallel processes may modify this.lastPlayer + lastPlayer = gameState.getCurrentPlayer(); + oneAction(gameState, forwardModel, players); + } else { + if (firstEnd) { + if (gameState.coreGameParameters.verbose) { + System.out.println("Ended"); + } + terminate(gameState, forwardModel); + firstEnd = false; + } + } + if (debug) System.out.println("Exiting synchronized block in Game"); + } + if (firstEnd) { + if (gameState.coreGameParameters.verbose) { + System.out.println("Ended"); + } + terminate(gameState, forwardModel); + } + return gameState; + } + public final boolean isHumanToMove() { int activePlayer = gameState.getCurrentPlayer(); return this.getPlayers().get(activePlayer) instanceof HumanGUIPlayer; } public final AbstractAction oneAction() { - + return oneAction(gameState, forwardModel, players); + } + public final AbstractAction oneAction(AbstractGameState gameState, AbstractForwardModel forwardModel, List players) { // we pause before each action is taken if running with a delay (e.g. for video recording with random players) if (turnPause > 0) synchronized (this) { @@ -589,8 +526,9 @@ public final AbstractAction oneAction() { } // We publish an ACTION_CHOSEN message before we implement the action, so that observers can record the state that led to the decision AbstractAction finalAction = action; - listeners.forEach(l -> l.onEvent(Event.createEvent(Event.GameEvent.ACTION_CHOSEN, gameState, finalAction, activePlayer))); - + synchronized (this) { + listeners.forEach(l -> l.onEvent(Event.createEvent(Event.GameEvent.ACTION_CHOSEN, gameState, finalAction, activePlayer))); + } } else { currentPlayer.registerUpdatedObservation(observation); } @@ -621,7 +559,9 @@ public final AbstractAction oneAction() { // We publish an ACTION_TAKEN message once the action is taken so that observers can record the result of the action // (such as the next player) AbstractAction finalAction1 = action; - listeners.forEach(l -> l.onEvent(Event.createEvent(Event.GameEvent.ACTION_TAKEN, gameState, finalAction1.copy(), activePlayer))); + synchronized (this) { + listeners.forEach(l -> l.onEvent(Event.createEvent(Event.GameEvent.ACTION_TAKEN, gameState, finalAction1.copy(), activePlayer))); + } if (debug) System.out.printf("Finishing oneAction for player %s%n", activePlayer); return action; @@ -631,6 +571,15 @@ public final AbstractAction oneAction() { * Called at the end of game loop execution, when the game is over. */ private void terminate() { + terminate(gameState, forwardModel); + } + + /** + * Called at the end of game loop execution, when the game is over, given some gameState + * @param gameState The game state to handle termination for. + * @param forwardModel The forward model to handle termination with. + */ + private void terminate(AbstractGameState gameState, AbstractForwardModel forwardModel) { // Print last state if (gameState instanceof IPrintable && gameState.coreGameParameters.verbose) { ((IPrintable) gameState).printToConsole(); @@ -638,7 +587,9 @@ private void terminate() { // Perform any end of game computations as required by the game forwardModel.endGame(gameState); - listeners.forEach(l -> l.onEvent(Event.createEvent(Event.GameEvent.GAME_OVER, gameState))); + synchronized (this) { + listeners.forEach(l -> l.onEvent(Event.createEvent(Event.GameEvent.GAME_OVER, gameState))); + } if (gameState.coreGameParameters.recordEventHistory) { gameState.recordHistory(Event.GameEvent.GAME_OVER.name()); for (int i = 0; i < gameState.getNPlayers(); i++) { @@ -836,8 +787,7 @@ public static void main(String[] args) { /* Set up players for the game */ ArrayList players = new ArrayList<>(); - players.add(new RandomPlayer()); - players.add(new RandomPlayer()); + players.add(new BasicMCTSPlayer()); players.add(new BasicMCTSPlayer()); // RMHCParams params = new RMHCParams(); diff --git a/src/main/java/evaluation/RunArg.java b/src/main/java/evaluation/RunArg.java index 8011e167d..3343ee390 100644 --- a/src/main/java/evaluation/RunArg.java +++ b/src/main/java/evaluation/RunArg.java @@ -121,6 +121,11 @@ public enum RunArg { nPlayers("The number of players in each game. Overrides playerRange.", -1, new Usage[]{Usage.ParameterSearch, Usage.RunGames}), + nThreads("The number of threads that can be spawned in order to evaluate games.\n" + + "\t For tournaments (including tournaments performed after ParameterSearch), the individual matachup evaluations are parallelized;" + + "\t For ParameterSearch itself, the repeats are parallelized; for this part, fewer threads than specified may be allocated.", + 1, + new Usage[]{Usage.ParameterSearch, Usage.RunGames}), neighbourhood("The size of neighbourhood to look at in NTBEA. Default is min(50, |searchSpace|/100) ", 50, new Usage[]{Usage.ParameterSearch}), diff --git a/src/main/java/evaluation/listeners/ActionFeatureListener.java b/src/main/java/evaluation/listeners/ActionFeatureListener.java index 14ca3a348..26eb32348 100644 --- a/src/main/java/evaluation/listeners/ActionFeatureListener.java +++ b/src/main/java/evaluation/listeners/ActionFeatureListener.java @@ -51,7 +51,7 @@ public String[] names() { @Override - public double[] extractFeatureVector(AbstractAction action, AbstractGameState state, int perspectivePlayer) { + protected double[] extractFeatureVector(AbstractAction action, AbstractGameState state, int perspectivePlayer) { // We put phi in first, and then psi double[] retValue = new double[psiFn.names().length + phiFn.names().length]; double[] phi = cachedPhi == null ? @@ -63,13 +63,13 @@ public double[] extractFeatureVector(AbstractAction action, AbstractGameState st return retValue; } - protected void processStateWithTargets(AbstractGameState state, AbstractAction action, Map> targets) { + protected synchronized void processStateWithTargets(AbstractGameState state, AbstractAction action, Map> targets) { actionValues = targets; processState(state, action); } @Override - public void processState(AbstractGameState state, AbstractAction action) { + public synchronized void processState(AbstractGameState state, AbstractAction action) { // we override this from FeatureListener, because we want to record the feature vector for each action if (action == null) return; // we do not record data for the GAME_OVER event cachedPhi = null; @@ -109,7 +109,7 @@ private Map getActionScores(AbstractAction action) { @Override - public String injectAgentAttributes(String raw) { + public synchronized String injectAgentAttributes(String raw) { return raw.replaceAll(Pattern.quote("*PSI*"), psiFn.getClass().getCanonicalName()) .replaceAll(Pattern.quote("*PHI*"), phiFn != null ? phiFn.getClass().getCanonicalName() : "NONE"); } diff --git a/src/main/java/evaluation/listeners/FeatureListener.java b/src/main/java/evaluation/listeners/FeatureListener.java index 1d42e29f5..9a03ebcc0 100644 --- a/src/main/java/evaluation/listeners/FeatureListener.java +++ b/src/main/java/evaluation/listeners/FeatureListener.java @@ -38,7 +38,7 @@ public void setLogger(IStatisticLogger logger) { } @Override - public void onEvent(Event event) { + public synchronized void onEvent(Event event) { if (event.type == frequency && frequency != Event.GameEvent.GAME_OVER) { // if GAME_OVER, then we cover this a few lines down @@ -55,7 +55,7 @@ public void onEvent(Event event) { } @Override - public boolean setOutputDirectory(String... nestedDirectories) { + public synchronized boolean setOutputDirectory(String... nestedDirectories) { if (logger instanceof FileStatsLogger fileLogger) { fileLogger.setOutPutDirectory(nestedDirectories); @@ -112,18 +112,18 @@ public void report() { } @Override - public void setGame(Game game) { + public synchronized void setGame(Game game) { this.game = game; } @Override - public Game getGame() { + public synchronized Game getGame() { return game; } public abstract String[] names(); - public abstract double[] extractFeatureVector(AbstractAction action, AbstractGameState state, int perspectivePlayer); + protected abstract double[] extractFeatureVector(AbstractAction action, AbstractGameState state, int perspectivePlayer); /** diff --git a/src/main/java/evaluation/listeners/MetricsGameListener.java b/src/main/java/evaluation/listeners/MetricsGameListener.java index 5e1313d5c..7c67b7677 100644 --- a/src/main/java/evaluation/listeners/MetricsGameListener.java +++ b/src/main/java/evaluation/listeners/MetricsGameListener.java @@ -74,7 +74,7 @@ public MetricsGameListener(IDataLogger.ReportDestination logTo, IDataLogger.Repo * @param event Event has information about its type and data fields for game, state, action and player. * It's not guaranteed that the data fields are different to null, so a check is necessary. */ - public void onEvent(Event event) { + public synchronized void onEvent(Event event) { if (!eventsOfInterest.contains(event.type)) return; @@ -93,7 +93,7 @@ public void onEvent(Event event) { } @Override - public boolean setOutputDirectory(String... nestedDirectories) { + public synchronized boolean setOutputDirectory(String... nestedDirectories) { boolean success = true; @@ -166,22 +166,22 @@ private String eventToIndexingColumn(IGameEvent e) { } /* Getters, setters */ - public final void setGame(Game game) { + public synchronized final void setGame(Game game) { this.game = game; } - public final Game getGame() { + public synchronized final Game getGame() { return game; } - public void reset() { + public synchronized void reset() { for (AbstractMetric metric : metrics.values()) { metric.reset(); } } @Override - public void init(Game game, int nPlayersPerGame, Set playerNames) { + public synchronized void init(Game game, int nPlayersPerGame, Set playerNames) { this.game = game; for (AbstractMetric metric : metrics.values()) { diff --git a/src/main/java/evaluation/listeners/StateFeatureListener.java b/src/main/java/evaluation/listeners/StateFeatureListener.java index 0e2f01e48..b70c5c028 100644 --- a/src/main/java/evaluation/listeners/StateFeatureListener.java +++ b/src/main/java/evaluation/listeners/StateFeatureListener.java @@ -30,12 +30,12 @@ public String[] names() { } @Override - public double[] extractFeatureVector(AbstractAction action, AbstractGameState state, int perspectivePlayer) { + protected double[] extractFeatureVector(AbstractAction action, AbstractGameState state, int perspectivePlayer) { return phiFn.featureVector(state, perspectivePlayer); } @Override - public String injectAgentAttributes(String raw) { + public synchronized String injectAgentAttributes(String raw) { return raw.replaceAll(Pattern.quote("*PHI*"), phiFn.getClass().getCanonicalName()); } } diff --git a/src/main/java/evaluation/optimisation/GameEvaluator.java b/src/main/java/evaluation/optimisation/GameEvaluator.java index 655aac36a..96473237a 100644 --- a/src/main/java/evaluation/optimisation/GameEvaluator.java +++ b/src/main/java/evaluation/optimisation/GameEvaluator.java @@ -5,7 +5,6 @@ import core.Game; import core.interfaces.IGameHeuristic; import core.interfaces.IStateHeuristic; -import core.interfaces.IStatisticLogger; import evaluation.listeners.IGameListener; import evodef.SearchSpace; import evodef.SolutionEvaluator; @@ -95,9 +94,17 @@ public double evaluate(double[] doubles) { */ @Override public double evaluate(int[] settings) { - if (debug) - System.out.printf("Starting evaluation %d of %s at %tT%n", nEvals, - Arrays.toString(settings), System.currentTimeMillis()); + if (debug) { + HashMap chosenConfigs = new HashMap<>(); + for (int i = 0; i < searchSpace.nDims(); i++) { + int finalI = i; + chosenConfigs.put(searchSpace.name(i), IntStream.range(0, searchSpace.nValues(i)) + .mapToObj(j -> searchSpace.value(finalI, j)) + .toList().get(settings[i])); + } + System.out.printf("%d Starting evaluation %d of %s at %tT%n", this.hashCode(), nEvals, + chosenConfigs, System.currentTimeMillis()); + } Object configuredThing = searchSpace.getAgent(settings); boolean tuningPlayer = configuredThing instanceof AbstractPlayer; boolean tuningGame = configuredThing instanceof Game; diff --git a/src/main/java/evaluation/optimisation/MultiNTBEA.java b/src/main/java/evaluation/optimisation/MultiNTBEA.java index d15a8d9ee..8620d2c33 100644 --- a/src/main/java/evaluation/optimisation/MultiNTBEA.java +++ b/src/main/java/evaluation/optimisation/MultiNTBEA.java @@ -132,4 +132,8 @@ private static int manhattan(int[] x, int[] y) { return retValue; } + @Override + public NTBEA copy() { + return new MultiNTBEA(params, game, nPlayers); + } } diff --git a/src/main/java/evaluation/optimisation/NTBEA.java b/src/main/java/evaluation/optimisation/NTBEA.java index 5bbc39af5..351e96923 100644 --- a/src/main/java/evaluation/optimisation/NTBEA.java +++ b/src/main/java/evaluation/optimisation/NTBEA.java @@ -1,20 +1,16 @@ package evaluation.optimisation; import core.AbstractGameState; -import core.AbstractParameters; import core.AbstractPlayer; import core.interfaces.IGameHeuristic; import core.interfaces.IStateHeuristic; import evaluation.RunArg; import evaluation.listeners.IGameListener; -import evaluation.tournaments.AbstractTournament; import evaluation.tournaments.RoundRobinTournament; -import org.apache.commons.math3.util.CombinatoricsUtils; import games.GameType; import ntbea.NTupleBanditEA; import ntbea.NTupleSystem; import org.json.simple.JSONObject; -import players.IAnyTimePlayer; import players.PlayerFactory; import players.heuristics.OrdinalPosition; import players.heuristics.PureScoreHeuristic; @@ -28,6 +24,9 @@ import java.io.FileWriter; import java.io.IOException; import java.util.*; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; import java.util.function.IntToDoubleFunction; import java.util.regex.Pattern; import java.util.stream.Collectors; @@ -136,92 +135,55 @@ public void writeAgentJSON(int[] settings, String fileName) { * @return */ public Pair run() { + ExecutorService executor = params.nThreads > 1 ? Executors.newFixedThreadPool(Math.min(params.nThreads, params.repeats)) : null; + // if we're multithreading, we don't want to have different threads interfering with eachother + List clones = new ArrayList<>(); + if (executor == null) { + // no multithreading, so list of clones consists only of the current object + clones.add(this); + } + // Only this loop is parallelized, since the rest is just analysis, or initializing a tournament (which is + // on its own already parallelized) for (currentIteration = 0; currentIteration < params.repeats; currentIteration++) { - runIteration(); - writeAgentJSON(winnerSettings.get(winnerSettings.size() - 1), - params.destDir + File.separator + "Recommended_" + currentIteration + ".json"); + NTBEA clone = executor == null ? this : this.copy(); + if (executor != null) { + clone.currentIteration = currentIteration; // for correct reporting + // run in parallel if allowed + executor.submit(clone::runIteration); + clones.add(clone); + } else { + clone.runIteration(); + } } - // After all runs are complete, if tournamentGames are specified, then we allow all the - // winners from each iteration to play in a tournament and pick the winner of this tournament - if (params.tournamentGames > 0 && winnersPerRun.get(0) instanceof AbstractPlayer) { - if (!elites.isEmpty()) { - // first of all we add the elites into winnerSettings, and winnersPerRun - // i.e. we effectively add an extra 'run' for each elite - for (int[] elite : elites) { - winnerSettings.add(elite); - winnersPerRun.add(params.searchSpace.getAgent(elite)); + if (executor != null) { + executor.shutdown(); + try { + // Wait for all tasks to complete; no timeout (infty hours) because this normally also has no timeout + if (!executor.awaitTermination(Long.MAX_VALUE, TimeUnit.HOURS)) { + executor.shutdownNow(); // Force shutdown if tasks are hanging } + } catch (InterruptedException e) { + executor.shutdownNow(); // Restore interrupted status and shutdown + Thread.currentThread().interrupt(); } + } - List players = winnersPerRun.stream().map(p -> (AbstractPlayer) p).collect(Collectors.toList()); - for (int i = 0; i < players.size(); i++) { - players.get(i).setName("Winner " + i + " : " + Arrays.toString(winnerSettings.get(i))); - } - // Given we have N players in each game, and a total of M agents (the number of NTBEA iterations), we - // can reduce the variance in the results (and hence the accuracy of picking the best agent) by using the exhaustive mode - // this does rely on not having, say 20 NTBEA iterations on a 6-player game (38k combinations); but assuming - // the advice of 10 or fewer iterations holds, then even on a 5-player game we have 252 combinations, which is fine. - //double combinationsOfPlayers = CombinatoricsUtils.binomialCoefficientDouble(players.size(), nPlayers); - int nTeams = params.byTeam ? game.createGameInstance(nPlayers, params.gameParams).getGameState().getNTeams() : nPlayers; - if (players.size() < nTeams) { - System.out.println("Not enough players to run a tournament with " + nTeams + " players. Skipping the final tournament - " + - "check the repeats options is at least equal to the number of players."); - } else { - Map config = new HashMap<>(); - config.put(matchups, params.tournamentGames); - if (players.size() < nPlayers) { - // if we don't have enough players to fill the game, then we will need to use self-play - config.put(RunArg.mode, "exhaustiveSP"); - } else { - config.put(RunArg.mode, "exhaustive"); - } - config.put(byTeam, true); - config.put(RunArg.distinctRandomSeeds, 0); - config.put(RunArg.budget, params.budget); - config.put(RunArg.verbose, false); - config.put(RunArg.destDir, params.destDir); - RoundRobinTournament tournament = new RoundRobinTournament(players, game, nPlayers, params.gameParams, config); - createListeners().forEach(tournament::addListener); - tournament.run(); - // create a new list of results in descending order of score - IntToDoubleFunction cmp = params.evalMethod.equals("Ordinal") ? i -> -tournament.getOrdinalRank(i) : tournament::getWinRate; - List agentsInOrder = IntStream.range(0, players.size()) - .boxed() - .sorted(Comparator.comparingDouble(cmp::applyAsDouble)) - .collect(Collectors.toList()); - Collections.reverse(agentsInOrder); - params.logFile = "RRT_" + params.logFile; - for (int index : agentsInOrder) { - if (params.verbose) - System.out.printf("Player %d %s\tWin Rate: %.3f +/- %.3f\tMean Ordinal: %.2f +/- %.2f%n", index, Arrays.toString(winnerSettings.get(index)), - tournament.getWinRate(index), tournament.getWinStdErr(index), - tournament.getOrdinalRank(index), tournament.getOrdinalStdErr(index)); - Pair resultToReport = new Pair<>(tournament.getWinRate(index), tournament.getWinStdErr(index)); - if (params.evalMethod.equals("Ordinal")) - resultToReport = new Pair<>(tournament.getOrdinalRank(index), tournament.getOrdinalStdErr(index)); - - logSummary(new Pair<>(resultToReport, winnerSettings.get(index)), params); - } - params.logFile = params.logFile.substring(4); - bestResult = params.evalMethod.equals("Ordinal") ? - new Pair<>(new Pair<>(tournament.getOrdinalRank(agentsInOrder.get(0)), tournament.getOrdinalStdErr(agentsInOrder.get(0))), winnerSettings.get(agentsInOrder.get(0))) : - new Pair<>(new Pair<>(tournament.getWinRate(agentsInOrder.get(0)), tournament.getWinStdErr(agentsInOrder.get(0))), winnerSettings.get(agentsInOrder.get(0))); - - // We then want to check the win rate against the elite agent (if one was provided) - // we only regard an agent as better, if it beats the elite agent by at least 2 sd (so, c. 95%) confidence - if (elites.size() == 1 && agentsInOrder.get(0) != winnersPerRun.size() - 1) { - // The elite agent is always the last one (and if the elite won fair and square, then we skip this - double eliteWinRate = tournament.getWinRate(winnersPerRun.size() - 1); - double eliteStdErr = tournament.getWinStdErr(winnersPerRun.size() - 1); - if (eliteWinRate + 2 * eliteStdErr > bestResult.a.a) { - if (params.verbose) - System.out.printf("Elite agent won with %.3f +/- %.3f versus challenger at %.3f, so we are sticking with it%n", eliteWinRate, eliteStdErr, bestResult.a.a); - bestResult = new Pair<>(new Pair<>(eliteWinRate, eliteStdErr), elites.get(0)); - } - } - } + // After all runs are complete, do some cleaning up and logging + for (NTBEA clone : clones) { + writeAgentJSON(clone.winnerSettings.get(clone.winnerSettings.size() - 1), + params.destDir + File.separator + "Recommended_" + clone.currentIteration + ".json"); + } + if (clones.size() > 1 || clones.get(0) != this) { + // aggregate all data from all clones + collectCloneResults(clones); + } + + // If tournamentGames are specified, then we allow all the winners from each iteration + // to play in a tournament and pick the winner of this tournament + if (params.tournamentGames > 0 && winnersPerRun.get(0) instanceof AbstractPlayer) { + activateTournament(); } if (params.verbose) { System.out.println("\nFinal Recommendation: "); @@ -233,6 +195,102 @@ public Pair run() { return new Pair<>(params.searchSpace.getAgent(bestResult.b), bestResult.b); } + /** + * Gathers all data from all parallel threads, which have their data stored in a clone of this object + * This ensures all metrics from these clones are incorporated into this object's data + * @param clones the list of clones that have run in separate threads + */ + protected void collectCloneResults(List clones) { + for (NTBEA clone : clones) { + this.elites.addAll(clone.elites); + this.winnersPerRun.addAll(clone.winnersPerRun); + this.winnerSettings.addAll(clone.winnerSettings); + if (clone.bestResult.a.a > this.bestResult.a.a) { + bestResult = clone.bestResult; + } + } + } + + protected void activateTournament() { + if (!elites.isEmpty()) { + // first of all we add the elites into winnerSettings, and winnersPerRun + // i.e. we effectively add an extra 'run' for each elite + for (int[] elite : elites) { + winnerSettings.add(elite); + winnersPerRun.add(params.searchSpace.getAgent(elite)); + } + } + + List players = winnersPerRun.stream().map(p -> (AbstractPlayer) p).collect(Collectors.toList()); + for (int i = 0; i < players.size(); i++) { + players.get(i).setName("Winner " + i + " : " + Arrays.toString(winnerSettings.get(i))); + } + // Given we have N players in each game, and a total of M agents (the number of NTBEA iterations), we + // can reduce the variance in the results (and hence the accuracy of picking the best agent) by using the exhaustive mode + // this does rely on not having, say 20 NTBEA iterations on a 6-player game (38k combinations); but assuming + // the advice of 10 or fewer iterations holds, then even on a 5-player game we have 252 combinations, which is fine. + //double combinationsOfPlayers = CombinatoricsUtils.binomialCoefficientDouble(players.size(), nPlayers); + int nTeams = params.byTeam ? game.createGameInstance(nPlayers, params.gameParams).getGameState().getNTeams() : nPlayers; + if (players.size() < nTeams) { + System.out.println("Not enough players to run a tournament with " + nTeams + " players. Skipping the final tournament - " + + "check the repeats options is at least equal to the number of players."); + } else { + Map config = new HashMap<>(); + config.put(matchups, params.tournamentGames); + if (players.size() < nPlayers) { + // if we don't have enough players to fill the game, then we will need to use self-play + config.put(RunArg.mode, "exhaustiveSP"); + } else { + config.put(RunArg.mode, "exhaustive"); + } + config.put(byTeam, true); + config.put(RunArg.distinctRandomSeeds, 0); + config.put(RunArg.budget, params.budget); + config.put(RunArg.verbose, params.verbose); + config.put(RunArg.destDir, params.destDir); + config.put(RunArg.nThreads, params.nThreads); + RoundRobinTournament tournament = new RoundRobinTournament(players, game, nPlayers, params.gameParams, config); + createListeners().forEach(tournament::addListener); + tournament.run(); + // create a new list of results in descending order of score + IntToDoubleFunction cmp = params.evalMethod.equals("Ordinal") ? i -> -tournament.getOrdinalRank(i) : tournament::getWinRate; + List agentsInOrder = IntStream.range(0, players.size()) + .boxed() + .sorted(Comparator.comparingDouble(cmp::applyAsDouble)) + .collect(Collectors.toList()); + Collections.reverse(agentsInOrder); + params.logFile = "RRT_" + params.logFile; + for (int index : agentsInOrder) { + if (params.verbose) + System.out.printf("Player %d %s\tWin Rate: %.3f +/- %.3f\tMean Ordinal: %.2f +/- %.2f%n", index, Arrays.toString(winnerSettings.get(index)), + tournament.getWinRate(index), tournament.getWinStdErr(index), + tournament.getOrdinalRank(index), tournament.getOrdinalStdErr(index)); + Pair resultToReport = new Pair<>(tournament.getWinRate(index), tournament.getWinStdErr(index)); + if (params.evalMethod.equals("Ordinal")) + resultToReport = new Pair<>(tournament.getOrdinalRank(index), tournament.getOrdinalStdErr(index)); + + logSummary(new Pair<>(resultToReport, winnerSettings.get(index)), params); + } + params.logFile = params.logFile.substring(4); + bestResult = params.evalMethod.equals("Ordinal") ? + new Pair<>(new Pair<>(tournament.getOrdinalRank(agentsInOrder.get(0)), tournament.getOrdinalStdErr(agentsInOrder.get(0))), winnerSettings.get(agentsInOrder.get(0))) : + new Pair<>(new Pair<>(tournament.getWinRate(agentsInOrder.get(0)), tournament.getWinStdErr(agentsInOrder.get(0))), winnerSettings.get(agentsInOrder.get(0))); + + // We then want to check the win rate against the elite agent (if one was provided) + // we only regard an agent as better, if it beats the elite agent by at least 2 sd (so, c. 95%) confidence + if (elites.size() == 1 && agentsInOrder.get(0) != winnersPerRun.size() - 1) { + // The elite agent is always the last one (and if the elite won fair and square, then we skip this + double eliteWinRate = tournament.getWinRate(winnersPerRun.size() - 1); + double eliteStdErr = tournament.getWinStdErr(winnersPerRun.size() - 1); + if (eliteWinRate + 2 * eliteStdErr > bestResult.a.a) { + if (params.verbose) + System.out.printf("Elite agent won with %.3f +/- %.3f versus challenger at %.3f, so we are sticking with it%n", eliteWinRate, eliteStdErr, bestResult.a.a); + bestResult = new Pair<>(new Pair<>(eliteWinRate, eliteStdErr), elites.get(0)); + } + } + } + } + protected void runTrials() { evaluator.reset(); searchFramework.runTrial(evaluator, params.iterationsPerRun); @@ -380,6 +438,10 @@ private static void logSummary(Pair, int[]> data, NTBEAPara } } + public NTBEA copy() { + return new NTBEA(params, game, nPlayers); + } + private static String valueToString(int paramIndex, int valueIndex, ITPSearchSpace ss) { Object value = ss.value(paramIndex, valueIndex); String valueString = value.toString(); @@ -390,5 +452,4 @@ private static String valueToString(int paramIndex, int valueIndex, ITPSearchSpa } return valueString; } - } diff --git a/src/main/java/evaluation/optimisation/NTBEAParameters.java b/src/main/java/evaluation/optimisation/NTBEAParameters.java index 5bfec6c3d..45d219d90 100644 --- a/src/main/java/evaluation/optimisation/NTBEAParameters.java +++ b/src/main/java/evaluation/optimisation/NTBEAParameters.java @@ -43,6 +43,7 @@ public enum Mode { public ITPSearchSpace searchSpace; public AbstractParameters gameParams; public boolean byTeam; + public int nThreads; public NTBEAParameters(Map args) { this(args, Function.identity()); @@ -67,6 +68,7 @@ public NTBEAParameters(Map args, Function prepro GameType game = GameType.valueOf(args.get(RunArg.game).toString()); gameParams = args.get(RunArg.gameParams).equals("") ? null : AbstractParameters.createFromFile(game, (String) args.get(RunArg.gameParams)); + nThreads = (int) args.get(RunArg.nThreads); mode = Mode.valueOf((String) args.get(RunArg.NTBEAMode)); logFile = "NTBEA.log"; diff --git a/src/main/java/evaluation/tournaments/RoundRobinTournament.java b/src/main/java/evaluation/tournaments/RoundRobinTournament.java index 71d70de35..b8e4b9312 100644 --- a/src/main/java/evaluation/tournaments/RoundRobinTournament.java +++ b/src/main/java/evaluation/tournaments/RoundRobinTournament.java @@ -1,5 +1,6 @@ package evaluation.tournaments; +import core.AbstractGameState; import core.AbstractParameters; import core.AbstractPlayer; import evaluation.RunArg; @@ -15,6 +16,9 @@ import java.io.File; import java.io.FileWriter; import java.util.*; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -48,6 +52,7 @@ public class RoundRobinTournament extends AbstractTournament { protected boolean randomGameParams; public String name; public boolean byTeam; + public int nThreads = 1; protected long randomSeed; List gameSeeds = new ArrayList<>(); @@ -66,6 +71,7 @@ public RoundRobinTournament(List agents, GameType game AbstractParameters gameParams, Map config) { super(agents, gameToPlay, playersPerGame, gameParams); int nTeams = game.getGameState().getNTeams(); + this.nThreads = (int) config.getOrDefault(RunArg.nThreads, 1); this.verbose = (boolean) config.getOrDefault(RunArg.verbose, false); this.tournamentMode = switch (config.get(RunArg.mode).toString().toUpperCase()) { case "EXHAUSTIVE" -> EXHAUSTIVE; @@ -168,7 +174,6 @@ public void run() { game.addListener(gameTracker); } - LinkedList matchUp = new LinkedList<>(); // add outer loop if we have tournamentSeeds enabled; if not this will just run once List allSeeds = new ArrayList<>(gameSeeds); for (int iter = 0; iter < Math.max(1, tournamentSeeds); iter++) { @@ -181,7 +186,7 @@ public void run() { // use a seed per matchup gameSeeds = IntStream.range(0, gamesPerMatchup).mapToObj(i -> seedRnd.nextInt()).collect(toList()); } - createAndRunMatchUp(matchUp); + createAndRunMatchUps(); } reportResults(); @@ -220,12 +225,10 @@ public AbstractPlayer getWinner() { } /** - * Recursively creates one combination of players and evaluates it. - * - * @param matchUp - current combination of players, updated recursively. + * Create and run all match ups given the selected tournament mode. */ - public void createAndRunMatchUp(List matchUp) { - + public void createAndRunMatchUps() { + ExecutorService executor = nThreads > 1 ? Executors.newFixedThreadPool(nThreads) : null; int nTeams = byTeam ? game.getGameState().getNTeams() : nPlayers; switch (tournamentMode) { case RANDOM: @@ -235,7 +238,7 @@ public void createAndRunMatchUp(List matchUp) { List matchup = new ArrayList<>(nTeams); for (int j = 0; j < nTeams; j++) matchup.add(idStream.getAsInt()); - evaluateMatchUp(matchup, 1, Collections.singletonList(seedRnd.nextInt())); + evaluateMatchUp(matchup, 1, Collections.singletonList(seedRnd.nextInt()), executor); } break; case ONE_VS_ALL: @@ -256,7 +259,7 @@ public void createAndRunMatchUp(List matchUp) { } // We split the total budget equally across the possible positions the focus player can be in // We will therefore use the first chunk of gameSeeds only (but use the same gameSeeds for each position) - evaluateMatchUp(matchup, totalGameBudget / nTeams, gameSeeds); + evaluateMatchUp(matchup, totalGameBudget / nTeams, gameSeeds, executor); } else { for (int m = 0; m < this.totalGameBudget / nTeams; m++) { Collections.shuffle(agentOrder, seedRnd); @@ -268,96 +271,131 @@ public void createAndRunMatchUp(List matchUp) { matchup.add(agentOrder.get(j % agentOrder.size())); } } - evaluateMatchUp(matchup, 1, Collections.singletonList(gameSeeds.get(m))); + evaluateMatchUp(matchup, 1, Collections.singletonList(gameSeeds.get(m)), executor); } } } break; case EXHAUSTIVE: case EXHAUSTIVE_SELF_PLAY: - // in this case we are in exhaustive mode, so we recursively construct all possible combinations of players - if (matchUp.size() == nTeams) { - evaluateMatchUp(matchUp, gamesPerMatchup, gameSeeds); - } else { - for (Integer agentID : this.allAgentIds) { - if (tournamentMode == EXHAUSTIVE_SELF_PLAY || !matchUp.contains(agentID)) { - matchUp.add(agentID); - createAndRunMatchUp(matchUp); - matchUp.remove(agentID); - } - } + ArrayList> matchups = new ArrayList<>(); + + generateMatchUps(matchups, new ArrayList<>(), nTeams); + + for (List matchup : matchups) { + evaluateMatchUp(matchup, gamesPerMatchup, gameSeeds, executor); } } + if (executor != null) { + executor.shutdown(); + try { + // Wait for all tasks to complete; no timeout (infty hours) because this normally also has no timeout + if (!executor.awaitTermination(Long.MAX_VALUE, TimeUnit.HOURS)) { + executor.shutdownNow(); // Force shutdown if tasks are hanging + } + } catch (InterruptedException e) { + executor.shutdownNow(); // Restore interrupted status and shutdown + Thread.currentThread().interrupt(); + } + } } /** + * Recursively generate an exhaustive list of possible size-`nTeams` match ups between agents, + * and then add these to a list of results + * @param result the list to store the match ups in + * @param current the current state of the list to be added to the results + * @param nTeams the size of each match up to be generated + */ + private void generateMatchUps(ArrayList> result, ArrayList current, int nTeams) { + if (current.size() == nTeams) { + result.add(new ArrayList<>(current)); + } else { + for (Integer agentID : this.allAgentIds) { + if (tournamentMode == EXHAUSTIVE_SELF_PLAY || !current.contains(agentID)) { + // only add duplicates when self play is enabled + current.add(agentID); + generateMatchUps(result, current, nTeams); + current.remove(current.size() - 1); + } + } + } + } + + /** + * Wrapper for evaluateMatchUp, which checks if the function should be called in parallel or not. * Evaluates one combination of players. + * @param agentIDsInThisGame - IDs of agents participating in this run. + * @param executor - The ExecutorService which holds the pool of threads allocated, or `null` if not ran on parallel threads. + */ + protected void evaluateMatchUp(List agentIDsInThisGame, int nGames, List seeds, ExecutorService executor) { + if (executor != null) { + // run in parallel if allowed + executor.submit(() -> { + evaluateMatchUp(agentIDsInThisGame, nGames, seeds); + }); + } else { + evaluateMatchUp(agentIDsInThisGame, nGames, seeds); + } + } + /** + * Evaluates one combination of players. May be run in parallel. * * @param agentIDsInThisGame - IDs of agents participating in this run. */ protected void evaluateMatchUp(List agentIDsInThisGame, int nGames, List seeds) { + // Note: This function is able to run in multiple parallel threads at the same time. In order to avoid + // concurrency problems, operations on `this` need to be either atomic, or synchronized(this). if (seeds.size() < nGames) throw new AssertionError("Not enough seeds for the number of games requested"); if (debug) System.out.printf("Evaluate %s at %tT%n", agentIDsInThisGame.toString(), System.currentTimeMillis()); LinkedList matchUpPlayers = new LinkedList<>(); - // If we are in self-play mode, we need to create a copy of the player to avoid them sharing the same state + // If we are in self-play mode or multithreading, we need to create a copy of the player to avoid them sharing the same state // If not in self-play mode then this is unnecessary, as the same agent will never be in the same game twice for (int agentID : agentIDsInThisGame) - matchUpPlayers.add(tournamentMode == EXHAUSTIVE_SELF_PLAY ? this.agents.get(agentID).copy() : this.agents.get(agentID)); - - if (verbose) { - StringBuffer sb = new StringBuffer(); - sb.append("["); - for (int agentID : agentIDsInThisGame) - sb.append(this.agents.get(agentID).toString()).append(","); - sb.setCharAt(sb.length() - 1, ']'); - System.out.println(sb); - } + matchUpPlayers.add(tournamentMode == EXHAUSTIVE_SELF_PLAY || nThreads > 1 ? this.agents.get(agentID).copy() : this.agents.get(agentID)); // TODO : Not sure this is the ideal place for this...ask Raluca Set agentNames = agents.stream().map(AbstractPlayer::toString).collect(Collectors.toSet()); - for (IGameListener listener : listeners) { - if (listener instanceof TournamentMetricsGameListener) { - ((TournamentMetricsGameListener) listener).tournamentInit(game, nPlayers, agentNames, new HashSet<>(matchUpPlayers)); + synchronized (this) { + for (IGameListener listener : listeners) { + if (listener instanceof TournamentMetricsGameListener) { + ((TournamentMetricsGameListener) listener).tournamentInit(game, nPlayers, agentNames, new HashSet<>(matchUpPlayers)); + } } } // Run the game N = gamesPerMatchUp times with these players for (int i = 0; i < nGames; i++) { - // if tournamentSeeds > 0, then we are running this many tournaments, each with a different random seed fixed for the whole tournament - // so we override the standard random seeds - game.reset(matchUpPlayers, seeds.get(i)); - - // Randomize parameters - if (randomGameParams) { - game.getGameState().getGameParameters().randomize(); - System.out.println("Game parameters: " + game.getGameState().getGameParameters()); - } + AbstractGameState gs; + GameResult[] results; - game.run(); // Always running tournaments without visuals - GameResult[] results = game.getGameState().getPlayerResults(); + // run an instance of the game, allowing parallelizing, and save the final game state. + // This function will also reset the state and apply random parameters if necessary. + gs = game.runInstance(matchUpPlayers, seeds.get(i), randomGameParams); + results = gs.getPlayerResults(); int numDraws = 0; for (int j = 0; j < matchUpPlayers.size(); j++) { - nGamesPlayed[agentIDsInThisGame.get(j)] += 1; + nGamesPlayed[agentIDsInThisGame.get(j)]++; for (int k = 0; k < matchUpPlayers.size(); k++) { if (k != j) { - nGamesPlayedPerOpponent[agentIDsInThisGame.get(j)][agentIDsInThisGame.get(k)] += 1; + nGamesPlayedPerOpponent[agentIDsInThisGame.get(j)][agentIDsInThisGame.get(k)]++; } } // now we need to be careful if we have a team game, as the agents are indexed by Team, not player if (byTeam) { - for (int player = 0; player < game.getGameState().getNPlayers(); player++) { - if (game.getGameState().getTeam(player) == j) { - numDraws += updatePoints(results, agentIDsInThisGame, agentIDsInThisGame.get(j), player); + for (int player = 0; player < gs.getNPlayers(); player++) { + if (gs.getTeam(player) == j) { + numDraws += updatePoints(gs, results, agentIDsInThisGame, agentIDsInThisGame.get(j), player); break; // we stop after one player on the team to avoid double counting } } } else { - numDraws += updatePoints(results, agentIDsInThisGame, agentIDsInThisGame.get(j), j); + numDraws += updatePoints(gs, results, agentIDsInThisGame, agentIDsInThisGame.get(j), j); } } @@ -373,9 +411,13 @@ protected void evaluateMatchUp(List agentIDsInThisGame, int nGames, Lis if (verbose) { StringBuffer sb = new StringBuffer(); sb.append("["); + for (int agentID : agentIDsInThisGame) + sb.append(this.agents.get(agentID).toString()).append(","); + sb.setCharAt(sb.length() - 1, ']'); + sb.append(": ["); for (int j = 0; j < matchUpPlayers.size(); j++) { - for (int player = 0; player < game.getGameState().getNPlayers(); player++) { - if (game.getGameState().getTeam(player) == j) { + for (int player = 0; player < gs.getNPlayers(); player++) { + if (gs.getTeam(player) == j) { sb.append(results[player]).append(","); break; // we stop after one player on the team to avoid double counting } @@ -386,18 +428,19 @@ protected void evaluateMatchUp(List agentIDsInThisGame, int nGames, Lis } } - totalGamesRun += nGames; + totalGamesRun += nGames; // atomic } - private int updatePoints(GameResult[] results, List matchUpPlayers, int j, int player) { + private int updatePoints(AbstractGameState gs, GameResult[] results, List matchUpPlayers, int j, int player) { + // Note: This function may be called in parallelized games; ensure either atomic operations on this, or synchronized(this) // j is the index of the agent in the matchup; player is the corresponding player number in the game - int ordinalPos = game.getGameState().getOrdinalPosition(player); + int ordinalPos = gs.getOrdinalPosition(player); rankPerPlayer[j] += ordinalPos; rankPerPlayerSquared[j] += ordinalPos * ordinalPos; - for (int playerPos = 0; playerPos < game.getGameState().getNPlayers(); playerPos++) { + for (int playerPos = 0; playerPos < gs.getNPlayers(); playerPos++) { if (playerPos != player) { - int ordinalOther = game.getGameState().getOrdinalPosition(playerPos); + int ordinalOther = gs.getOrdinalPosition(playerPos); ordinalDeltaPerOpponent[j][matchUpPlayers.get(playerPos)] += ordinalOther - ordinalPos; } } @@ -424,10 +467,10 @@ protected void calculateFinalResults() { for (int i = 0; i < this.agents.size(); i++) { // We calculate the standard deviation, and hence the standard error on the mean value // (using a normal approximation, which is valid for large N) - double stdDev = Math.sqrt(pointsPerPlayerSquared[i] / nGamesPlayed[i] - (pointsPerPlayer[i] / nGamesPlayed[i]) + double stdDev = sqrt(pointsPerPlayerSquared[i] / nGamesPlayed[i] - (pointsPerPlayer[i] / nGamesPlayed[i]) * (pointsPerPlayer[i] / nGamesPlayed[i])); finalWinRanking.put(i, new Pair<>(pointsPerPlayer[i] / nGamesPlayed[i], stdDev / sqrt(nGamesPlayed[i]))); - stdDev = Math.sqrt(rankPerPlayerSquared[i] / nGamesPlayed[i] - (rankPerPlayer[i] / nGamesPlayed[i]) * (rankPerPlayer[i] / nGamesPlayed[i])); + stdDev = sqrt(rankPerPlayerSquared[i] / nGamesPlayed[i] - (rankPerPlayer[i] / nGamesPlayed[i]) * (rankPerPlayer[i] / nGamesPlayed[i])); finalOrdinalRanking.put(i, new Pair<>(rankPerPlayer[i] / nGamesPlayed[i], stdDev / sqrt(nGamesPlayed[i]))); } // Sort by points. diff --git a/src/main/java/players/mcts/MCTSTreeActionStatisticsListener.java b/src/main/java/players/mcts/MCTSTreeActionStatisticsListener.java index 9810ae88b..6b5cb93f2 100644 --- a/src/main/java/players/mcts/MCTSTreeActionStatisticsListener.java +++ b/src/main/java/players/mcts/MCTSTreeActionStatisticsListener.java @@ -31,7 +31,7 @@ public MCTSTreeActionStatisticsListener(IActionFeatureVector actionFeatures, ISt @Override - public void onEvent(Event event) { + public synchronized void onEvent(Event event) { if (event.type == Event.GameEvent.ACTION_CHOSEN) { // We extract the root node from the current player's tree AbstractPlayer player = this.getGame().getPlayers().get(event.state.getCurrentPlayer()); @@ -44,7 +44,7 @@ public void onEvent(Event event) { // else we do nothing } - public void recordData(SingleTreeNode root, AbstractForwardModel forwardModel) { + private void recordData(SingleTreeNode root, AbstractForwardModel forwardModel) { if (root instanceof MultiTreeNode) { // access the root for the acting player instead