polish(xcy): polish comments in tree search files (#185)

* polish(xcy): polish comment in tree_search * polish(xcy):polish the comments for ptree * polish(xcy): add comment to ptree files * polish(xcy):add comments for ctree files
opendilab · Feb 8, 2024 · 6ffcc4d · 6ffcc4d
1 parent 3d338ae
commit 6ffcc4d
Show file tree

Hide file tree

Showing 16 changed files with 1,074 additions and 607 deletions.
diff --git a/lzero/mcts/ctree/ctree_efficientzero/lib/cnode.cpp b/lzero/mcts/ctree/ctree_efficientzero/lib/cnode.cpp
@@ -248,7 +248,7 @@ namespace tree
         /*
         Overview:
             Find the current best trajectory starts from the current node.
-        Outputs:
+        Returns:
             - traj: a vector of node index, which is the current best trajectory from this node.
         */
         std::vector<int> traj;
@@ -270,7 +270,7 @@ namespace tree
         /*
         Overview:
             Get the distribution of child nodes in the format of visit_count.
-        Outputs:
+        Returns:
             - distribution: a vector of distribution of child nodes in the format of visit count (i.e. [1,3,0,2,5]).
         */
         std::vector<int> distribution;
@@ -378,7 +378,7 @@ namespace tree
         /*
         Overview:
             Find the current best trajectory starts from each root.
-        Outputs:
+        Returns:
             - traj: a vector of node index, which is the current best trajectory from each root.
         */
         std::vector<std::vector<int> > trajs;
@@ -396,7 +396,7 @@ namespace tree
         /*
         Overview:
             Get the children distribution of each root.
-        Outputs:
+        Returns:
             - distribution: a vector of distribution of child nodes in the format of visit count (i.e. [1,3,0,2,5]).
         */
         std::vector<std::vector<int> > distributions;
@@ -618,7 +618,7 @@ namespace tree
             - disount_factor: the discount factor of reward.
             - mean_q: the mean q value of the parent node.
             - players: the number of players.
-        Outputs:
+        Returns:
             - action: the action to select.
         */
         float max_score = FLOAT_MIN;
@@ -667,7 +667,7 @@ namespace tree
             - pb_c_init: constants c1 in muzero.
             - disount_factor: the discount factor of reward.
             - players: the number of players.
-        Outputs:
+        Returns:
             - ucb_value: the ucb score of the child.
         */
         float pb_c = 0.0, prior_score = 0.0, value_score = 0.0;

diff --git a/lzero/mcts/ctree/ctree_gumbel_muzero/lib/cnode.cpp b/lzero/mcts/ctree/ctree_gumbel_muzero/lib/cnode.cpp
@@ -265,7 +265,7 @@ namespace tree{
         /*
         Overview:
             Find the current best trajectory starts from the current node.
-        Outputs:
+        Returns:
             - traj: a vector of node index, which is the current best trajectory from this node.
         */
         std::vector<int> traj;
@@ -287,7 +287,7 @@ namespace tree{
         /*
         Overview:
             Get the distribution of child nodes in the format of visit_count.
-        Outputs:
+        Returns:
             - distribution: a vector of distribution of child nodes in the format of visit count (i.e. [1,3,0,2,5]).
         */
         std::vector<int> distribution;
@@ -311,7 +311,7 @@ namespace tree{
         /*
         Overview:
             Get the completed value of child nodes.
-        Outputs:
+        Returns:
             - discount_factor: the discount_factor of reward.
             - action_space_size: the size of action space.
         */
@@ -468,7 +468,7 @@ namespace tree{
         /*
         Overview:
             Find the current best trajectory starts from each root.
-        Outputs:
+        Returns:
             - traj: a vector of node index, which is the current best trajectory from each root.
         */
         std::vector<std::vector<int> > trajs;
@@ -486,7 +486,7 @@ namespace tree{
         /*
         Overview:
             Get the children distribution of each root.
-        Outputs:
+        Returns:
             - distribution: a vector of distribution of child nodes in the format of visit count (i.e. [1,3,0,2,5]).
         */
         std::vector<std::vector<int> > distributions;
@@ -664,7 +664,7 @@ namespace tree{
             - disount_factor: the discount factor of reward.
             - mean_q: the mean q value of the parent node.
             - players: the number of players.
-        Outputs:
+        Returns:
             - action: the action to select.
         */
         float max_score = FLOAT_MIN;
@@ -708,7 +708,7 @@ namespace tree{
             - disount_factor: the discount factor of reward.
             - num_simulations: the upper limit number of simulations.
             - max_num_considered_actions: the maximum number of considered actions.
-        Outputs:
+        Returns:
             - action: the action to select.
         */
         std::vector<int> child_visit_count;
@@ -752,7 +752,7 @@ namespace tree{
         Arguments:
             - root: the roots to select the child node.
             - disount_factor: the discount factor of reward.
-        Outputs:
+        Returns:
             - action: the action to select.
         */
         std::vector<int> child_visit_count;
@@ -803,7 +803,7 @@ namespace tree{
             - pb_c_init: constants c1 in muzero.
             - disount_factor: the discount factor of reward.
             - players: the number of players.
-        Outputs:
+        Returns:
             - ucb_value: the ucb score of the child.
         */
         float pb_c = 0.0, prior_score = 0.0, value_score = 0.0;
@@ -942,7 +942,7 @@ namespace tree{
             - q_value: the q value of the current node.
             - child_visit: the visit counts of the child nodes.
             - child_prior: the prior of the child nodes.
-        Outputs:
+        Returns:
             - mixed Q value.
         */
         float visit_count_sum = 0.0;
@@ -1002,7 +1002,7 @@ namespace tree{
             - value_cale: the scale of value.
             - rescale_values: whether to rescale the values.
             - epsilon: the lower limit of gap in max-min normalization
-        Outputs:
+        Returns:
             - completed Q value.
         */
         assert (child_visit.size() == child_prior.size());
@@ -1047,7 +1047,7 @@ namespace tree{
         Arguments:
             - max_num_considered_actions: the maximum number of considered actions.
             - num_simulations: the upper limit number of simulations.
-        Outputs:
+        Returns:
             - the considered visit sequence.
         */
         std::vector<int> visit_seq;
@@ -1084,7 +1084,7 @@ namespace tree{
         Arguments:
             - max_num_considered_actions: the maximum number of considered actions.
             - num_simulations: the upper limit number of simulations.
-        Outputs:
+        Returns:
             - the table of considered visits.
         */
         std::vector<std::vector<int> > table;
@@ -1105,7 +1105,7 @@ namespace tree{
             - logits: the logits vector of child nodes.
             - normalized_qvalues: the normalized Q values of child nodes.
             - visit_counts: the visit counts of child nodes.
-        Outputs:
+        Returns:
             - the score of nodes to be considered.
         */
         float low_logit = -1e9;
@@ -1139,7 +1139,7 @@ namespace tree{
             - gumbel_scale: the scale of gumbel.
             - gumbel_rng: the seed to generate gumbel.
             - shape: the shape of gumbel vectors to be generated
-        Outputs:
+        Returns:
             - gumbel vectors.
         */
         std::mt19937 gen(static_cast<unsigned int>(gumbel_rng));

diff --git a/lzero/mcts/ctree/ctree_muzero/lib/cnode.cpp b/lzero/mcts/ctree/ctree_muzero/lib/cnode.cpp
@@ -239,7 +239,7 @@ namespace tree
         /*
         Overview:
             Find the current best trajectory starts from the current node.
-        Outputs:
+        Returns:
             - traj: a vector of node index, which is the current best trajectory from this node.
         */
         std::vector<int> traj;
@@ -261,7 +261,7 @@ namespace tree
         /*
         Overview:
             Get the distribution of child nodes in the format of visit_count.
-        Outputs:
+        Returns:
             - distribution: a vector of distribution of child nodes in the format of visit count (i.e. [1,3,0,2,5]).
         */
         std::vector<int> distribution;
@@ -371,7 +371,7 @@ namespace tree
         /*
         Overview:
             Find the current best trajectory starts from each root.
-        Outputs:
+        Returns:
             - traj: a vector of node index, which is the current best trajectory from each root.
         */
         std::vector<std::vector<int> > trajs;
@@ -389,7 +389,7 @@ namespace tree
         /*
         Overview:
             Get the children distribution of each root.
-        Outputs:
+        Returns:
             - distribution: a vector of distribution of child nodes in the format of visit count (i.e. [1,3,0,2,5]).
         */
         std::vector<std::vector<int> > distributions;
@@ -561,7 +561,7 @@ namespace tree
             - disount_factor: the discount factor of reward.
             - mean_q: the mean q value of the parent node.
             - players: the number of players.
-        Outputs:
+        Returns:
             - action: the action to select.
         */
         float max_score = FLOAT_MIN;
@@ -609,7 +609,7 @@ namespace tree
             - pb_c_init: constants c1 in muzero.
             - disount_factor: the discount factor of reward.
             - players: the number of players.
-        Outputs:
+        Returns:
             - ucb_value: the ucb score of the child.
         */
         float pb_c = 0.0, prior_score = 0.0, value_score = 0.0;

diff --git a/lzero/mcts/ctree/ctree_sampled_efficientzero/lib/cnode.cpp b/lzero/mcts/ctree/ctree_sampled_efficientzero/lib/cnode.cpp
@@ -94,7 +94,7 @@ namespace tree
     {
         /*
         Overview:
-            get the final combined hash value from the hash values of each dimension of the multi-dimensional action.
+            Get the final combined hash value from the hash values of each dimension of the multi-dimensional action.
         */
         std::vector<size_t> hash = this->get_hash();
         size_t combined_hash = hash[0];
@@ -558,7 +558,7 @@ namespace tree
         /*
         Overview:
             Find the current best trajectory starts from the current node.
-        Outputs:
+        Returns:
             - traj: a vector of node index, which is the current best trajectory from this node.
         */
         std::vector<CAction> traj;
@@ -585,7 +585,7 @@ namespace tree
         /*
         Overview:
             Get the distribution of child nodes in the format of visit_count.
-        Outputs:
+        Returns:
             - distribution: a vector of distribution of child nodes in the format of visit count (i.e. [1,3,0,2,5]).
         */
         std::vector<int> distribution;
@@ -724,7 +724,7 @@ namespace tree
         /*
         Overview:
             Find the current best trajectory starts from each root.
-        Outputs:
+        Returns:
             - traj: a vector of node index, which is the current best trajectory from each root.
         */
         std::vector<std::vector<std::vector<float> > > trajs;
@@ -742,7 +742,7 @@ namespace tree
         /*
         Overview:
             Get the children distribution of each root.
-        Outputs:
+        Returns:
             - distribution: a vector of distribution of child nodes in the format of visit count (i.e. [1,3,0,2,5]).
         */
         std::vector<std::vector<int> > distributions;
@@ -761,7 +761,7 @@ namespace tree
         /*
         Overview:
             Get the sampled_actions of each root.
-        Outputs:
+        Returns:
             - python_sampled_actions: a vector of sampled_actions for each root, e.g. the size of original action space is 6, the K=3, 
             python_sampled_actions = [[1,3,0], [2,4,0], [5,4,1]].
         */
@@ -984,7 +984,7 @@ namespace tree
             - mean_q: the mean q value of the parent node.
             - players: the number of players.
             - continuous_action_space: whether the action space is continous in current env.
-        Outputs:
+        Returns:
             - action: the action to select.
         */
         // sampled related core code
@@ -1040,7 +1040,7 @@ namespace tree
             - disount_factor: the discount factor of reward.
             - players: the number of players.
             - continuous_action_space: whether the action space is continous in current env.
-        Outputs:
+        Returns:
             - ucb_value: the ucb score of the child.
         */
         float pb_c = 0.0, prior_score = 0.0, value_score = 0.0;

diff --git a/lzero/mcts/ctree/ctree_stochastic_muzero/lib/cnode.cpp b/lzero/mcts/ctree/ctree_stochastic_muzero/lib/cnode.cpp
@@ -76,6 +76,7 @@ namespace tree
         Arguments:
             - prior: the prior value of this node.
             - legal_actions: a vector of legal actions of this node.
+            - is_chance: Whether the node is a chance node.
         */
         this->prior = prior;
         this->legal_actions = legal_actions;
@@ -264,7 +265,7 @@ namespace tree
         /*
         Overview:
             Find the current best trajectory starts from the current node.
-        Outputs:
+        Returns:
             - traj: a vector of node index, which is the current best trajectory from this node.
         */
         std::vector<int> traj;
@@ -286,7 +287,7 @@ namespace tree
         /*
         Overview:
             Get the distribution of child nodes in the format of visit_count.
-        Outputs:
+        Returns:
             - distribution: a vector of distribution of child nodes in the format of visit count (i.e. [1,3,0,2,5]).
         */
         std::vector<int> distribution;
@@ -398,7 +399,7 @@ namespace tree
         /*
         Overview:
             Find the current best trajectory starts from each root.
-        Outputs:
+        Returns:
             - traj: a vector of node index, which is the current best trajectory from each root.
         */
         std::vector<std::vector<int> > trajs;
@@ -416,7 +417,7 @@ namespace tree
         /*
         Overview:
             Get the children distribution of each root.
-        Outputs:
+        Returns:
             - distribution: a vector of distribution of child nodes in the format of visit count (i.e. [1,3,0,2,5]).
         */
         std::vector<std::vector<int> > distributions;
@@ -603,7 +604,7 @@ namespace tree
             - disount_factor: the discount factor of reward.
             - mean_q: the mean q value of the parent node.
             - players: the number of players.
-        Outputs:
+        Returns:
             - action: the action to select.
         */
         if (root->is_chance) {
@@ -675,7 +676,7 @@ namespace tree
             - pb_c_init: constants c1 in muzero.
             - disount_factor: the discount factor of reward.
             - players: the number of players.
-        Outputs:
+        Returns:
             - ucb_value: the ucb score of the child.
         */
         float pb_c = 0.0, prior_score = 0.0, value_score = 0.0;