pbsinclair42 · LucasBorboleta · Dec 31, 2020 · Jan 1, 2021 · Jan 1, 2021 · Jan 2, 2021
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+*.pyc
+__pycache__
diff --git a/README.md b/README.md
@@ -1,37 +1,50 @@
 # MCTS
 
-This package provides a simple way of using Monte Carlo Tree Search in any perfect information domain.  
+This package provides a simple way of using Monte Carlo Tree Search in any perfect information domain.
 
-## Installation 
+## Installation
 
 With pip: `pip install mcts`
 
 Without pip: Download the zip/tar.gz file of the [latest release](https://github.com/pbsinclair42/MCTS/releases), extract it, and run `python setup.py install`
 
 ## Quick Usage
 
-In order to run MCTS, you must implement a `State` class which can fully describe the state of the world.  It must also implement four methods: 
+In order to run MCTS, you must implement a `State` class which can fully describe the state of the world.  It must also implement the following methods:
 
-- `getCurrentPlayer()`: Returns 1 if it is the maximizer player's turn to choose an action, or -1 for the minimiser player
+- `getCurrentPlayer()`: Returns 1 if it is the maximizer player's turn to choose an action, or -1 for the minimizer player
 - `getPossibleActions()`: Returns an iterable of all actions which can be taken from this state
 - `takeAction(action)`: Returns the state which results from taking action `action`
 - `isTerminal()`: Returns whether this state is a terminal state
-- `getReward()`: Returns the reward for this state.  Only needed for terminal states. 
+- `getReward()`: Returns the reward for this state: 0 for a draw, positive for a win by maximizer player or negative for a win by the minimizer player.  Only needed for terminal states.
 
-You must also choose a hashable representation for an action as used in `getPossibleActions` and `takeAction`.  Typically this would be a class with a custom `__hash__` method, but it could also simply be a tuple or a string.  
+You must also choose a hashable representation for an action as used in `getPossibleActions` and `takeAction`.  Typically this would be a class with a custom `__hash__` method, but it could also simply be a tuple or a string.
 
 Once these have been implemented, running MCTS is as simple as initializing your starting state, then running:
 
 ```python
 from mcts import mcts
 
-mcts = mcts(timeLimit=1000)
-bestAction = mcts.search(initialState=initialState)
+currentState = MyState()
+...
+searcher = mcts(timeLimit=1000)
+bestAction = searcher.search(initialState=currentState)
+currentState = currentState.takeAction(action)
+...
+
 ```
-See [naughtsandcrosses.py](https://github.com/pbsinclair42/MCTS/blob/master/naughtsandcrosses.py) for a simple example.  
+See [naughtsandcrosses.py](https://github.com/pbsinclair42/MCTS/blob/master/naughtsandcrosses.py) for a simple example.
+
+## Detailed usage
+A few customizations are possible through the `mcts` constructor:
+
+- The number of MCTS search rounds can be limited by either a given time limit or a given iteration number. 
+- The exploration constant $c$, which appears in the UCT score $w_i/n_i + c\sqrt{{ln N_i}/n_i}$ with theoretical default setting $c=\sqrt 2$, can be adapted to your game.
+- The default uniform random rollout/playout policy can be changed.
+
+A few statistics can be retrieved after each MCTS search call (see `naughtsandcrosses.py` example)
 
-## Slow Usage
-//TODO
+More of MCTS theory could be found at https://en.wikipedia.org/wiki/Monte_Carlo_tree_search and cited references.
 
 ## Collaborating
 

diff --git a/mcts.py b/mcts.py
@@ -27,7 +27,7 @@ def __init__(self, state, parent):
 
 
 class mcts():
-    def __init__(self, timeLimit=None, iterationLimit=None, explorationConstant=1 / math.sqrt(2),
+    def __init__(self, timeLimit=None, iterationLimit=None, explorationConstant=math.sqrt(2),
                  rolloutPolicy=randomPolicy):
         if timeLimit != None:
             if iterationLimit != None:
@@ -75,6 +75,7 @@ def selectNode(self, node):
 
     def expand(self, node):
         actions = node.state.getPossibleActions()
+        random.shuffle(actions)
         for action in actions:
             if action not in node.children:
                 newNode = treeNode(node.state.takeAction(action), node)
@@ -95,8 +96,8 @@ def getBestChild(self, node, explorationValue):
         bestValue = float("-inf")
         bestNodes = []
         for child in node.children.values():
-            nodeValue = node.state.getCurrentPlayer() * child.totalReward / child.numVisits + explorationValue * math.sqrt(
-                2 * math.log(node.numVisits) / child.numVisits)
+            nodeValue = (node.state.getCurrentPlayer() * child.totalReward / child.numVisits +
+                         explorationValue * math.sqrt(math.log(node.numVisits) / child.numVisits))
             if nodeValue > bestValue:
                 bestValue = nodeValue
                 bestNodes = [child]
@@ -108,3 +109,12 @@ def getAction(self, root, bestChild):
         for action, node in root.children.items():
             if node is bestChild:
                 return action
+
+    def getStatistics(self, action=None):
+        statistics = {}
+        statistics['rootNumVisits'] = self.root.numVisits
+        statistics['rootTotalReward'] = self.root.totalReward
+        if action is not None:
+            statistics['actionNumVisits'] = self.root.children[action].numVisits
+            statistics['actionTotalReward'] = self.root.children[action].totalReward
+        return statistics
diff --git a/naughtsandcrosses.py b/naughtsandcrosses.py
@@ -4,54 +4,72 @@
 from mcts import mcts
 from functools import reduce
 import operator
+import random
 
 
 class NaughtsAndCrossesState():
-    def __init__(self):
-        self.board = [[0, 0, 0], [0, 0, 0], [0, 0, 0]]
+
+    playerNames = {1:'O', -1:'X'}
+
+    def __init__(self, gridSize=3):
+        self.gridSize = gridSize
+        self.board = [ [0 for _ in range(self.gridSize)] for _ in range(self.gridSize)]
         self.currentPlayer = 1
+        self.possibleActions = None
+
+    def show(self):
+        for row in self.board:
+            row_text = ""
+            for cell in row:
+                if cell in self.playerNames:
+                    row_text += f" {self.playerNames[cell]} "
+                else:
+                    row_text += " . "
+            print(row_text)
 
     def getCurrentPlayer(self):
         return self.currentPlayer
 
     def getPossibleActions(self):
-        possibleActions = []
-        for i in range(len(self.board)):
-            for j in range(len(self.board[i])):
-                if self.board[i][j] == 0:
-                    possibleActions.append(Action(player=self.currentPlayer, x=i, y=j))
-        return possibleActions
+        if self.possibleActions is None:
+            self.possibleActions = []
+            for i in range(len(self.board)):
+                for j in range(len(self.board[i])):
+                    if self.board[i][j] == 0:
+                        self.possibleActions.append(Action(player=self.currentPlayer, x=i, y=j))
+        return self.possibleActions
 
     def takeAction(self, action):
         newState = deepcopy(self)
         newState.board[action.x][action.y] = action.player
         newState.currentPlayer = self.currentPlayer * -1
+        newState.possibleActions = None
         return newState
 
     def isTerminal(self):
         for row in self.board:
-            if abs(sum(row)) == 3:
+            if abs(sum(row)) == self.gridSize:
                 return True
         for column in list(map(list, zip(*self.board))):
-            if abs(sum(column)) == 3:
+            if abs(sum(column)) == self.gridSize:
                 return True
         for diagonal in [[self.board[i][i] for i in range(len(self.board))],
                          [self.board[i][len(self.board) - i - 1] for i in range(len(self.board))]]:
-            if abs(sum(diagonal)) == 3:
+            if abs(sum(diagonal)) == self.gridSize:
                 return True
         return reduce(operator.mul, sum(self.board, []), 1)
 
     def getReward(self):
         for row in self.board:
-            if abs(sum(row)) == 3:
-                return sum(row) / 3
+            if abs(sum(row)) == self.gridSize:
+                return sum(row) / self.gridSize
         for column in list(map(list, zip(*self.board))):
-            if abs(sum(column)) == 3:
-                return sum(column) / 3
+            if abs(sum(column)) == self.gridSize:
+                return sum(column) / self.gridSize
         for diagonal in [[self.board[i][i] for i in range(len(self.board))],
                          [self.board[i][len(self.board) - i - 1] for i in range(len(self.board))]]:
-            if abs(sum(diagonal)) == 3:
-                return sum(diagonal) / 3
+            if abs(sum(diagonal)) == self.gridSize:
+                return sum(diagonal) / self.gridSize
         return False
 
 
@@ -74,8 +92,51 @@ def __hash__(self):
         return hash((self.x, self.y, self.player))
 
 
-initialState = NaughtsAndCrossesState()
-mcts = mcts(timeLimit=1000)
-action = mcts.search(initialState=initialState)
+def main():
+    """Example of a NaughtsAndCrossesState game play between MCTS and random searchers.
+    The standard 3x3 grid is randomly extended up to 10x10 in order to exercise the MCTS time ressource.
+    One of the two player is randomly assigned to the MCTS searcher for purpose of correctness checking.
+    A basic statistics is provided at each MCTS turn."""
+
+    playerNames = NaughtsAndCrossesState.playerNames
+    mctsPlayer = random.choice(sorted(playerNames.keys()))
+    gridSize = random.choice(list(range(3,11)))
+
+    currentState = NaughtsAndCrossesState(gridSize)
+    turn = 0
+    currentState.show()
+    while not currentState.isTerminal():
+        turn += 1
+        player = currentState.getCurrentPlayer()
+        action_count = len(currentState.getPossibleActions())
+
+        if player == mctsPlayer:
+            searcher = mcts(timeLimit=1_000)
+            searcherName = "mcts-1-second"
+            action = searcher.search(initialState=currentState)
+            statistics = searcher.getStatistics(action)
+        else:
+            searcherName = "random"
+            action = random.choice(currentState.getPossibleActions())
+            statistics = None
+
+        currentState = currentState.takeAction(action)
+        print(f"at turn {turn} player {playerNames[player]}={player} ({searcherName}) takes action {action} amongst {action_count} possibilities")
+
+        if statistics is not None:
+            print(f"mcts statitics for the chosen action: {statistics['actionTotalReward']} total reward over {statistics['actionNumVisits']} visits")
+            print(f"mcts statitics for all explored actions: {statistics['rootTotalReward']} total reward over {statistics['rootNumVisits']} visits")
+
+        print('-'*90)
+        currentState.show()
+
+    print('-'*90)
+    if currentState.getReward() == 0:
+        print(f"game {gridSize}x{gridSize} terminates; nobody wins")
+    else:
+        print(f"game {gridSize}x{gridSize} terminates; player {playerNames[player]}={player} ({searcherName}) wins")
+
+
+if __name__ == "__main__":
+    main()
 
-print(action)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		*.pyc
		__pycache__
Copy link Owner pbsinclair42 Jan 9, 2021 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. I prefer not to include generic `.gitignore`s like this, in favour of each developer having their own global config, though I appreciate this view isn't shared by everyone. As such, I'd rather not include a `.gitignore` unless it's for generated files specific to this repository. Copy link Author LucasBorboleta Jan 10, 2021 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. I understand your point.