Skip to content

Commit

Permalink
polish(xcy): polish comments in tree search files (#185)
Browse files Browse the repository at this point in the history
* polish(xcy): polish comment in tree_search

* polish(xcy):polish the comments for ptree

* polish(xcy): add comment to ptree files

* polish(xcy):add comments for ctree files
  • Loading branch information
HarryXuancy authored Feb 8, 2024
1 parent 3d338ae commit 6ffcc4d
Show file tree
Hide file tree
Showing 16 changed files with 1,074 additions and 607 deletions.
12 changes: 6 additions & 6 deletions lzero/mcts/ctree/ctree_efficientzero/lib/cnode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ namespace tree
/*
Overview:
Find the current best trajectory starts from the current node.
Outputs:
Returns:
- traj: a vector of node index, which is the current best trajectory from this node.
*/
std::vector<int> traj;
Expand All @@ -270,7 +270,7 @@ namespace tree
/*
Overview:
Get the distribution of child nodes in the format of visit_count.
Outputs:
Returns:
- distribution: a vector of distribution of child nodes in the format of visit count (i.e. [1,3,0,2,5]).
*/
std::vector<int> distribution;
Expand Down Expand Up @@ -378,7 +378,7 @@ namespace tree
/*
Overview:
Find the current best trajectory starts from each root.
Outputs:
Returns:
- traj: a vector of node index, which is the current best trajectory from each root.
*/
std::vector<std::vector<int> > trajs;
Expand All @@ -396,7 +396,7 @@ namespace tree
/*
Overview:
Get the children distribution of each root.
Outputs:
Returns:
- distribution: a vector of distribution of child nodes in the format of visit count (i.e. [1,3,0,2,5]).
*/
std::vector<std::vector<int> > distributions;
Expand Down Expand Up @@ -618,7 +618,7 @@ namespace tree
- disount_factor: the discount factor of reward.
- mean_q: the mean q value of the parent node.
- players: the number of players.
Outputs:
Returns:
- action: the action to select.
*/
float max_score = FLOAT_MIN;
Expand Down Expand Up @@ -667,7 +667,7 @@ namespace tree
- pb_c_init: constants c1 in muzero.
- disount_factor: the discount factor of reward.
- players: the number of players.
Outputs:
Returns:
- ucb_value: the ucb score of the child.
*/
float pb_c = 0.0, prior_score = 0.0, value_score = 0.0;
Expand Down
30 changes: 15 additions & 15 deletions lzero/mcts/ctree/ctree_gumbel_muzero/lib/cnode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ namespace tree{
/*
Overview:
Find the current best trajectory starts from the current node.
Outputs:
Returns:
- traj: a vector of node index, which is the current best trajectory from this node.
*/
std::vector<int> traj;
Expand All @@ -287,7 +287,7 @@ namespace tree{
/*
Overview:
Get the distribution of child nodes in the format of visit_count.
Outputs:
Returns:
- distribution: a vector of distribution of child nodes in the format of visit count (i.e. [1,3,0,2,5]).
*/
std::vector<int> distribution;
Expand All @@ -311,7 +311,7 @@ namespace tree{
/*
Overview:
Get the completed value of child nodes.
Outputs:
Returns:
- discount_factor: the discount_factor of reward.
- action_space_size: the size of action space.
*/
Expand Down Expand Up @@ -468,7 +468,7 @@ namespace tree{
/*
Overview:
Find the current best trajectory starts from each root.
Outputs:
Returns:
- traj: a vector of node index, which is the current best trajectory from each root.
*/
std::vector<std::vector<int> > trajs;
Expand All @@ -486,7 +486,7 @@ namespace tree{
/*
Overview:
Get the children distribution of each root.
Outputs:
Returns:
- distribution: a vector of distribution of child nodes in the format of visit count (i.e. [1,3,0,2,5]).
*/
std::vector<std::vector<int> > distributions;
Expand Down Expand Up @@ -664,7 +664,7 @@ namespace tree{
- disount_factor: the discount factor of reward.
- mean_q: the mean q value of the parent node.
- players: the number of players.
Outputs:
Returns:
- action: the action to select.
*/
float max_score = FLOAT_MIN;
Expand Down Expand Up @@ -708,7 +708,7 @@ namespace tree{
- disount_factor: the discount factor of reward.
- num_simulations: the upper limit number of simulations.
- max_num_considered_actions: the maximum number of considered actions.
Outputs:
Returns:
- action: the action to select.
*/
std::vector<int> child_visit_count;
Expand Down Expand Up @@ -752,7 +752,7 @@ namespace tree{
Arguments:
- root: the roots to select the child node.
- disount_factor: the discount factor of reward.
Outputs:
Returns:
- action: the action to select.
*/
std::vector<int> child_visit_count;
Expand Down Expand Up @@ -803,7 +803,7 @@ namespace tree{
- pb_c_init: constants c1 in muzero.
- disount_factor: the discount factor of reward.
- players: the number of players.
Outputs:
Returns:
- ucb_value: the ucb score of the child.
*/
float pb_c = 0.0, prior_score = 0.0, value_score = 0.0;
Expand Down Expand Up @@ -942,7 +942,7 @@ namespace tree{
- q_value: the q value of the current node.
- child_visit: the visit counts of the child nodes.
- child_prior: the prior of the child nodes.
Outputs:
Returns:
- mixed Q value.
*/
float visit_count_sum = 0.0;
Expand Down Expand Up @@ -1002,7 +1002,7 @@ namespace tree{
- value_cale: the scale of value.
- rescale_values: whether to rescale the values.
- epsilon: the lower limit of gap in max-min normalization
Outputs:
Returns:
- completed Q value.
*/
assert (child_visit.size() == child_prior.size());
Expand Down Expand Up @@ -1047,7 +1047,7 @@ namespace tree{
Arguments:
- max_num_considered_actions: the maximum number of considered actions.
- num_simulations: the upper limit number of simulations.
Outputs:
Returns:
- the considered visit sequence.
*/
std::vector<int> visit_seq;
Expand Down Expand Up @@ -1084,7 +1084,7 @@ namespace tree{
Arguments:
- max_num_considered_actions: the maximum number of considered actions.
- num_simulations: the upper limit number of simulations.
Outputs:
Returns:
- the table of considered visits.
*/
std::vector<std::vector<int> > table;
Expand All @@ -1105,7 +1105,7 @@ namespace tree{
- logits: the logits vector of child nodes.
- normalized_qvalues: the normalized Q values of child nodes.
- visit_counts: the visit counts of child nodes.
Outputs:
Returns:
- the score of nodes to be considered.
*/
float low_logit = -1e9;
Expand Down Expand Up @@ -1139,7 +1139,7 @@ namespace tree{
- gumbel_scale: the scale of gumbel.
- gumbel_rng: the seed to generate gumbel.
- shape: the shape of gumbel vectors to be generated
Outputs:
Returns:
- gumbel vectors.
*/
std::mt19937 gen(static_cast<unsigned int>(gumbel_rng));
Expand Down
12 changes: 6 additions & 6 deletions lzero/mcts/ctree/ctree_muzero/lib/cnode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ namespace tree
/*
Overview:
Find the current best trajectory starts from the current node.
Outputs:
Returns:
- traj: a vector of node index, which is the current best trajectory from this node.
*/
std::vector<int> traj;
Expand All @@ -261,7 +261,7 @@ namespace tree
/*
Overview:
Get the distribution of child nodes in the format of visit_count.
Outputs:
Returns:
- distribution: a vector of distribution of child nodes in the format of visit count (i.e. [1,3,0,2,5]).
*/
std::vector<int> distribution;
Expand Down Expand Up @@ -371,7 +371,7 @@ namespace tree
/*
Overview:
Find the current best trajectory starts from each root.
Outputs:
Returns:
- traj: a vector of node index, which is the current best trajectory from each root.
*/
std::vector<std::vector<int> > trajs;
Expand All @@ -389,7 +389,7 @@ namespace tree
/*
Overview:
Get the children distribution of each root.
Outputs:
Returns:
- distribution: a vector of distribution of child nodes in the format of visit count (i.e. [1,3,0,2,5]).
*/
std::vector<std::vector<int> > distributions;
Expand Down Expand Up @@ -561,7 +561,7 @@ namespace tree
- disount_factor: the discount factor of reward.
- mean_q: the mean q value of the parent node.
- players: the number of players.
Outputs:
Returns:
- action: the action to select.
*/
float max_score = FLOAT_MIN;
Expand Down Expand Up @@ -609,7 +609,7 @@ namespace tree
- pb_c_init: constants c1 in muzero.
- disount_factor: the discount factor of reward.
- players: the number of players.
Outputs:
Returns:
- ucb_value: the ucb score of the child.
*/
float pb_c = 0.0, prior_score = 0.0, value_score = 0.0;
Expand Down
16 changes: 8 additions & 8 deletions lzero/mcts/ctree/ctree_sampled_efficientzero/lib/cnode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ namespace tree
{
/*
Overview:
get the final combined hash value from the hash values of each dimension of the multi-dimensional action.
Get the final combined hash value from the hash values of each dimension of the multi-dimensional action.
*/
std::vector<size_t> hash = this->get_hash();
size_t combined_hash = hash[0];
Expand Down Expand Up @@ -558,7 +558,7 @@ namespace tree
/*
Overview:
Find the current best trajectory starts from the current node.
Outputs:
Returns:
- traj: a vector of node index, which is the current best trajectory from this node.
*/
std::vector<CAction> traj;
Expand All @@ -585,7 +585,7 @@ namespace tree
/*
Overview:
Get the distribution of child nodes in the format of visit_count.
Outputs:
Returns:
- distribution: a vector of distribution of child nodes in the format of visit count (i.e. [1,3,0,2,5]).
*/
std::vector<int> distribution;
Expand Down Expand Up @@ -724,7 +724,7 @@ namespace tree
/*
Overview:
Find the current best trajectory starts from each root.
Outputs:
Returns:
- traj: a vector of node index, which is the current best trajectory from each root.
*/
std::vector<std::vector<std::vector<float> > > trajs;
Expand All @@ -742,7 +742,7 @@ namespace tree
/*
Overview:
Get the children distribution of each root.
Outputs:
Returns:
- distribution: a vector of distribution of child nodes in the format of visit count (i.e. [1,3,0,2,5]).
*/
std::vector<std::vector<int> > distributions;
Expand All @@ -761,7 +761,7 @@ namespace tree
/*
Overview:
Get the sampled_actions of each root.
Outputs:
Returns:
- python_sampled_actions: a vector of sampled_actions for each root, e.g. the size of original action space is 6, the K=3,
python_sampled_actions = [[1,3,0], [2,4,0], [5,4,1]].
*/
Expand Down Expand Up @@ -984,7 +984,7 @@ namespace tree
- mean_q: the mean q value of the parent node.
- players: the number of players.
- continuous_action_space: whether the action space is continous in current env.
Outputs:
Returns:
- action: the action to select.
*/
// sampled related core code
Expand Down Expand Up @@ -1040,7 +1040,7 @@ namespace tree
- disount_factor: the discount factor of reward.
- players: the number of players.
- continuous_action_space: whether the action space is continous in current env.
Outputs:
Returns:
- ucb_value: the ucb score of the child.
*/
float pb_c = 0.0, prior_score = 0.0, value_score = 0.0;
Expand Down
13 changes: 7 additions & 6 deletions lzero/mcts/ctree/ctree_stochastic_muzero/lib/cnode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ namespace tree
Arguments:
- prior: the prior value of this node.
- legal_actions: a vector of legal actions of this node.
- is_chance: Whether the node is a chance node.
*/
this->prior = prior;
this->legal_actions = legal_actions;
Expand Down Expand Up @@ -264,7 +265,7 @@ namespace tree
/*
Overview:
Find the current best trajectory starts from the current node.
Outputs:
Returns:
- traj: a vector of node index, which is the current best trajectory from this node.
*/
std::vector<int> traj;
Expand All @@ -286,7 +287,7 @@ namespace tree
/*
Overview:
Get the distribution of child nodes in the format of visit_count.
Outputs:
Returns:
- distribution: a vector of distribution of child nodes in the format of visit count (i.e. [1,3,0,2,5]).
*/
std::vector<int> distribution;
Expand Down Expand Up @@ -398,7 +399,7 @@ namespace tree
/*
Overview:
Find the current best trajectory starts from each root.
Outputs:
Returns:
- traj: a vector of node index, which is the current best trajectory from each root.
*/
std::vector<std::vector<int> > trajs;
Expand All @@ -416,7 +417,7 @@ namespace tree
/*
Overview:
Get the children distribution of each root.
Outputs:
Returns:
- distribution: a vector of distribution of child nodes in the format of visit count (i.e. [1,3,0,2,5]).
*/
std::vector<std::vector<int> > distributions;
Expand Down Expand Up @@ -603,7 +604,7 @@ namespace tree
- disount_factor: the discount factor of reward.
- mean_q: the mean q value of the parent node.
- players: the number of players.
Outputs:
Returns:
- action: the action to select.
*/
if (root->is_chance) {
Expand Down Expand Up @@ -675,7 +676,7 @@ namespace tree
- pb_c_init: constants c1 in muzero.
- disount_factor: the discount factor of reward.
- players: the number of players.
Outputs:
Returns:
- ucb_value: the ucb score of the child.
*/
float pb_c = 0.0, prior_score = 0.0, value_score = 0.0;
Expand Down
Loading

0 comments on commit 6ffcc4d

Please sign in to comment.