diff --git a/lzero/mcts/tests/cprofile_mcts_ptree.py b/lzero/mcts/tests/cprofile_mcts_ptree.py
index 4edf376ec..956ec39fa 100644
--- a/lzero/mcts/tests/cprofile_mcts_ptree.py
+++ b/lzero/mcts/tests/cprofile_mcts_ptree.py
@@ -27,7 +27,7 @@ def initial_inference(self, observation):
         reward_hidden_state_state = (torch.zeros(size=(1, batch_size, 16)), torch.zeros(size=(1, batch_size, 16)))
 
         output = {
-            'value': value,
+            'searched_value': value,
             'value_prefix': value_prefix,
             'policy_logits': policy_logits,
             'latent_state': latent_state,
@@ -45,7 +45,7 @@ def recurrent_inference(self, hidden_states, reward_hidden_states, actions):
         policy_logits = torch.zeros(size=(batch_size, self.action_num))
 
         output = {
-            'value': value,
+            'searched_value': value,
             'value_prefix': value_prefix,
             'policy_logits': policy_logits,
             'latent_state': latent_state,
diff --git a/lzero/mcts/tests/eval_tree_speed.py b/lzero/mcts/tests/eval_tree_speed.py
index b80957dbf..c7134f3b3 100644
--- a/lzero/mcts/tests/eval_tree_speed.py
+++ b/lzero/mcts/tests/eval_tree_speed.py
@@ -32,7 +32,7 @@ def initial_inference(self, observation):
         reward_hidden_state_state = (torch.zeros(size=(1, batch_size, 16)), torch.zeros(size=(1, batch_size, 16)))
 
         output = {
-            'value': value,
+            'searched_value': value,
             'value_prefix': value_prefix,
             'policy_logits': policy_logits,
             'latent_state': latent_state,
@@ -50,7 +50,7 @@ def recurrent_inference(self, hidden_states, reward_hidden_states, actions):
         policy_logits = torch.zeros(size=(batch_size, self.action_num))
 
         output = {
-            'value': value,
+            'searched_value': value,
             'value_prefix': value_prefix,
             'policy_logits': policy_logits,
             'latent_state': latent_state,
diff --git a/lzero/mcts/tests/test_mcts_ctree.py b/lzero/mcts/tests/test_mcts_ctree.py
index f15fe6780..0e569d329 100644
--- a/lzero/mcts/tests/test_mcts_ctree.py
+++ b/lzero/mcts/tests/test_mcts_ctree.py
@@ -37,7 +37,7 @@ def initial_inference(self, observation):
         reward_hidden_state_roots = (torch.zeros(size=(1, batch_size, 16)), torch.zeros(size=(1, batch_size, 16)))
 
         output = {
-            'value': value,
+            'searched_value': value,
             'value_prefix': value_prefix,
             'policy_logits': policy_logits,
             'latent_state': latent_state,
@@ -60,7 +60,7 @@ def recurrent_inference(self, latent_states, reward_hidden_states, actions=None)
         policy_logits = torch.zeros(size=(batch_size, self.action_num))
 
         output = {
-            'value': value,
+            'searched_value': value,
             'value_prefix': value_prefix,
             'policy_logits': policy_logits,
             'latent_state': latent_state,
diff --git a/lzero/mcts/tests/test_mcts_ptree.py b/lzero/mcts/tests/test_mcts_ptree.py
index 613c0a205..75ec347ca 100644
--- a/lzero/mcts/tests/test_mcts_ptree.py
+++ b/lzero/mcts/tests/test_mcts_ptree.py
@@ -29,7 +29,7 @@ def initial_inference(self, observation):
         reward_hidden_state_state = (torch.zeros(size=(1, batch_size, 16)), torch.zeros(size=(1, batch_size, 16)))
 
         output = {
-            'value': value,
+            'searched_value': value,
             'value_prefix': value_prefix,
             'policy_logits': policy_logits,
             'latent_state': latent_state,
@@ -47,7 +47,7 @@ def recurrent_inference(self, hidden_states, reward_hidden_states, actions):
         policy_logits = torch.zeros(size=(batch_size, self.action_num))
 
         output = {
-            'value': value,
+            'searched_value': value,
             'value_prefix': value_prefix,
             'policy_logits': policy_logits,
             'latent_state': latent_state,
diff --git a/lzero/mcts/tests/test_mcts_sampled_ctree.py b/lzero/mcts/tests/test_mcts_sampled_ctree.py
index 8b0a74b21..2e4c15277 100644
--- a/lzero/mcts/tests/test_mcts_sampled_ctree.py
+++ b/lzero/mcts/tests/test_mcts_sampled_ctree.py
@@ -29,7 +29,7 @@ def initial_inference(self, observation):
         reward_hidden_state_state = (torch.zeros(size=(1, batch_size, 16)), torch.zeros(size=(1, batch_size, 16)))
 
         output = {
-            'value': value,
+            'searched_value': value,
             'value_prefix': value_prefix,
             'policy_logits': policy_logits,
             'latent_state': latent_state,
@@ -48,7 +48,7 @@ def recurrent_inference(self, hidden_states, reward_hidden_states, actions):
         # policy_logits = torch.zeros(size=(batch_size, self.action_num))
 
         output = {
-            'value': value,
+            'searched_value': value,
             'value_prefix': value_prefix,
             'policy_logits': policy_logits,
             'latent_state': latent_state,
diff --git a/lzero/policy/efficientzero.py b/lzero/policy/efficientzero.py
index cd69bba86..685e473d7 100644
--- a/lzero/policy/efficientzero.py
+++ b/lzero/policy/efficientzero.py
@@ -155,7 +155,7 @@ class EfficientZeroPolicy(MuZeroPolicy):
         # (float) The fixed temperature value for MCTS action selection, which is used to control the exploration.
         # The larger the value, the more exploration. This value is only used when manual_temperature_decay=False.
         fixed_temperature_value=0.25,
-        # (bool) Whether to use the true chance in MCTS in 2048 env.
+        # (bool) Whether to use the true chance in MCTS in some environments with stochastic dynamics, such as 2048.
         use_ture_chance_label_in_chance_encoder=False,
 
         # ****** Priority ******
@@ -626,11 +626,11 @@ def _forward_collect(
                     action = np.where(action_mask[i] == 1.0)[0][action_index_in_legal_action_set]
                 output[env_id] = {
                     'action': action,
-                    'distributions': distributions,
+                    'visit_count_distributions': distributions,
                     'visit_count_distribution_entropy': visit_count_distribution_entropy,
-                    'value': value,
-                    'pred_value': pred_values[i],
-                    'policy_logits': policy_logits[i],
+                    'searched_value': value,
+                    'predicted_value': pred_values[i],
+                    'predicted_policy_logits': policy_logits[i],
                 }
 
         return output
@@ -646,7 +646,7 @@ def _init_eval(self) -> None:
         else:
             self._mcts_eval = MCTSPtree(self._cfg)
 
-    def _forward_eval(self, data: torch.Tensor, action_mask: list, to_play: -1, ready_env_id=None):
+    def _forward_eval(self, data: torch.Tensor, action_mask: list, to_play: -1, ready_env_id: np.array = None,):
         """
          Overview:
              The forward function for evaluating the current policy in eval mode. Use model to execute MCTS search.
@@ -718,11 +718,11 @@ def _forward_eval(self, data: torch.Tensor, action_mask: list, to_play: -1, read
                 action = np.where(action_mask[i] == 1.0)[0][action_index_in_legal_action_set]
                 output[env_id] = {
                     'action': action,
-                    'distributions': distributions,
+                    'visit_count_distributions': distributions,
                     'visit_count_distribution_entropy': visit_count_distribution_entropy,
-                    'value': value,
-                    'pred_value': pred_values[i],
-                    'policy_logits': policy_logits[i],
+                    'searched_value': value,
+                    'predicted_value': pred_values[i],
+                    'predicted_policy_logits': policy_logits[i],
                 }
 
         return output
diff --git a/lzero/policy/gumbel_muzero.py b/lzero/policy/gumbel_muzero.py
index b4f88a387..218ab99b6 100644
--- a/lzero/policy/gumbel_muzero.py
+++ b/lzero/policy/gumbel_muzero.py
@@ -486,7 +486,7 @@ def _forward_collect(
             action_mask: list = None,
             temperature: float = 1,
             to_play: List = [-1],
-            ready_env_id=None
+            ready_env_id: np.array = None,
     ) -> Dict:
         """
         Overview:
@@ -572,13 +572,13 @@ def _forward_collect(
                 action = np.argmax([v for v in valid_value])
                 output[env_id] = {
                     'action': action,
-                    'distributions': distributions,
+                    'visit_count_distributions': distributions,
                     'visit_count_distribution_entropy': visit_count_distribution_entropy,
-                    'value': value,
+                    'searched_value': value,
                     'roots_completed_value': roots_completed_value,
                     'improved_policy_probs': improved_policy_probs,
-                    'pred_value': pred_values[i],
-                    'policy_logits': policy_logits[i],
+                    'predicted_value': pred_values[i],
+                    'predicted_policy_logits': policy_logits[i],
                 }
 
         return output
@@ -594,7 +594,7 @@ def _init_eval(self) -> None:
         else:
             self._mcts_eval = MCTSPtree(self._cfg)
 
-    def _forward_eval(self, data: torch.Tensor, action_mask: list, to_play: int = -1, ready_env_id=None) -> Dict:
+    def _forward_eval(self, data: torch.Tensor, action_mask: list, to_play: int = -1, ready_env_id: np.array = None,) -> Dict:
         """
         Overview:
             The forward function for evaluating the current policy in eval mode. Use model to execute MCTS search.
@@ -674,11 +674,11 @@ def _forward_eval(self, data: torch.Tensor, action_mask: list, to_play: int = -1
 
                 output[env_id] = {
                     'action': action,
-                    'distributions': distributions,
+                    'visit_count_distributions': distributions,
                     'visit_count_distribution_entropy': visit_count_distribution_entropy,
-                    'value': value,
-                    'pred_value': pred_values[i],
-                    'policy_logits': policy_logits[i],
+                    'searched_value': value,
+                    'predicted_value': pred_values[i],
+                    'predicted_policy_logits': policy_logits[i],
                 }
 
         return output
diff --git a/lzero/policy/muzero.py b/lzero/policy/muzero.py
index 136e7d250..80bbc53ba 100644
--- a/lzero/policy/muzero.py
+++ b/lzero/policy/muzero.py
@@ -152,7 +152,7 @@ class MuZeroPolicy(Policy):
         # (float) The fixed temperature value for MCTS action selection, which is used to control the exploration.
         # The larger the value, the more exploration. This value is only used when manual_temperature_decay=False.
         fixed_temperature_value=0.25,
-        # (bool) Whether to use the true chance in MCTS in 2048 env.
+        # (bool) Whether to use the true chance in MCTS in some environments with stochastic dynamics, such as 2048.
         use_ture_chance_label_in_chance_encoder=False,
 
         # ****** Priority ******
@@ -477,7 +477,7 @@ def _forward_collect(
             temperature: float = 1,
             to_play: List = [-1],
             epsilon: float = 0.25,
-            ready_env_id=None
+            ready_env_id: np.array = None,
     ) -> Dict:
         """
         Overview:
@@ -562,11 +562,11 @@ def _forward_collect(
                     action = np.where(action_mask[i] == 1.0)[0][action_index_in_legal_action_set]
                 output[env_id] = {
                     'action': action,
-                    'distributions': distributions,
+                    'visit_count_distributions': distributions,
                     'visit_count_distribution_entropy': visit_count_distribution_entropy,
-                    'value': value,
-                    'pred_value': pred_values[i],
-                    'policy_logits': policy_logits[i],
+                    'searched_value': value,
+                    'predicted_value': pred_values[i],
+                    'predicted_policy_logits': policy_logits[i],
                 }
 
         return output
@@ -606,7 +606,7 @@ def _get_target_obs_index_in_step_k(self, step):
             end_index = self._cfg.model.observation_shape * (step + self._cfg.model.frame_stack_num)
         return beg_index, end_index
 
-    def _forward_eval(self, data: torch.Tensor, action_mask: list, to_play: int = -1, ready_env_id=None) -> Dict:
+    def _forward_eval(self, data: torch.Tensor, action_mask: list, to_play: int = -1, ready_env_id: np.array = None,) -> Dict:
         """
         Overview:
             The forward function for evaluating the current policy in eval mode. Use model to execute MCTS search.
@@ -676,11 +676,11 @@ def _forward_eval(self, data: torch.Tensor, action_mask: list, to_play: int = -1
 
                 output[env_id] = {
                     'action': action,
-                    'distributions': distributions,
+                    'visit_count_distributions': distributions,
                     'visit_count_distribution_entropy': visit_count_distribution_entropy,
-                    'value': value,
-                    'pred_value': pred_values[i],
-                    'policy_logits': policy_logits[i],
+                    'searched_value': value,
+                    'predicted_value': pred_values[i],
+                    'predicted_policy_logits': policy_logits[i],
                 }
 
         return output
diff --git a/lzero/policy/random_policy.py b/lzero/policy/random_policy.py
index 3abd568ce..735a4122d 100644
--- a/lzero/policy/random_policy.py
+++ b/lzero/policy/random_policy.py
@@ -20,7 +20,7 @@ def __init__(
         cfg: dict,
         model: Optional[Union[type, torch.nn.Module]] = None,
         enable_field: Optional[List[str]] = None,
-        action_space = None,
+        action_space: Any = None,
     ):
         if cfg.type == 'muzero':
             from lzero.mcts import MuZeroMCTSCtree as MCTSCtree
@@ -65,15 +65,15 @@ def default_model(self) -> Tuple[str, List[str]]:
             elif self._cfg.type == 'muzero':
                 return 'MuZeroModelMLP', ['lzero.model.muzero_model_mlp']
             elif self._cfg.type == 'sampled_efficientzero':
-                return 'SampledEfficientZeroModelMLP', ['lzero.model.sampled_efficientzero_modelMLP']
+                return 'SampledEfficientZeroModelMLP', ['lzero.model.sampled_efficientzero_model_mlp']
             else:
                 raise NotImplementedError("need to implement pipeline: {}".format(self._cfg.type))
 
     def _init_collect(self) -> None:
         """
-         Overview:
-             Collect mode init method. Called by ``self.__init__``. Initialize the collect model and MCTS utils.
-         """
+        Overview:
+            Collect mode init method. Called by ``self.__init__``. Initialize the collect model and MCTS utils.
+        """
         self._collect_model = self._model
         if self._cfg.mcts_ctree:
             self._mcts_collect = self.MCTSCtree(self._cfg)
@@ -92,8 +92,8 @@ def _forward_collect(
         temperature: float = 1,
         to_play: List = [-1],
         epsilon: float = 0.25,
-        ready_env_id=None,
-    ):
+        ready_env_id: np.array = None,
+    ) -> Dict:
         """
         Overview:
             The forward function for collecting data in collect mode. Use model to execute MCTS search.
@@ -141,7 +141,7 @@ def _forward_collect(
                 )
             policy_logits = policy_logits.detach().cpu().numpy().tolist()
 
-            if self._cfg.model.continuous_action_space is True:
+            if self._cfg.model.continuous_action_space:
                 # when the action space of the environment is continuous, action_mask[:] is None.
                 # NOTE: in continuous action space env: we set all legal_actions as -1
                 legal_actions = [
@@ -208,11 +208,12 @@ def _forward_collect(
                 distributions, value = roots_visit_count_distributions[i], roots_values[i]
 
                 if self._cfg.type in ['sampled_efficientzero']:
-                    try:
-                        root_sampled_actions = np.array([action.value for action in roots_sampled_actions[i]])
-                    except Exception:
-                        # logging.warning('ctree_sampled_efficientzero roots.get_sampled_actions() return list')
+                    if self._cfg.mcts_ctree:
+                        # In ctree, the method roots.get_sampled_actions() returns a list object.
                         root_sampled_actions = np.array([action for action in roots_sampled_actions[i]])
+                    else:
+                        # In ptree, the same method roots.get_sampled_actions() returns an Action object.
+                        root_sampled_actions = np.array([action.value for action in roots_sampled_actions[i]])
 
                 # NOTE: Only legal actions possess visit counts, so the ``action_index_in_legal_action_set`` represents
                 # the index within the legal action set, rather than the index in the entire action set.
@@ -220,32 +221,32 @@ def _forward_collect(
                     distributions, temperature=self._collect_mcts_temperature, deterministic=False
                 )
 
-                # ****** sample a random action from the legal action set ********
-                if self._cfg.type in ['sampled_efficientzero']:
-                    random_action = self.action_space.sample()
-                else:
-                    # all items except action are formally obtained from MCTS
-                    random_action = int(np.random.choice(legal_actions[env_id], 1))
                 # ****************************************************************
-                # NOTE: The action is randomly selected from the legal action set, the distribution is the real visit count distribution from the MCTS seraech.
+                # NOTE: The action is randomly selected from the legal action set, 
+                # the distribution is the real visit count distribution from the MCTS search.
                 if self._cfg.type in ['sampled_efficientzero']:
+                    # ****** sample a random action from the legal action set ********
+                    random_action = self.action_space.sample()
                     output[env_id] = {
                         'action': random_action,
-                        'distributions': distributions,
+                        'visit_count_distributions': distributions,
                         'root_sampled_actions': root_sampled_actions,
                         'visit_count_distribution_entropy': visit_count_distribution_entropy,
-                        'value': value,
-                        'pred_value': pred_values[i],
-                        'policy_logits': policy_logits[i],
+                        'searched_value': value,
+                        'predicted_value': pred_values[i],
+                        'predicted_policy_logits': policy_logits[i],
                     }
                 else:
+                    # ****** sample a random action from the legal action set ********
+                    random_action = int(np.random.choice(legal_actions[env_id], 1))
+                    # all items except action are formally obtained from MCTS
                     output[env_id] = {
                         'action': random_action,
-                        'distributions': distributions,
+                        'visit_count_distributions': distributions,
                         'visit_count_distribution_entropy': visit_count_distribution_entropy,
-                        'value': value,
-                        'pred_value': pred_values[i],
-                        'policy_logits': policy_logits[i],
+                        'searched_value': value,
+                        'predicted_value': pred_values[i],
+                        'predicted_policy_logits': policy_logits[i],
                     }
 
         return output
@@ -268,7 +269,7 @@ def _init_learn(self) -> None:
     def _forward_learn(self, data: torch.Tensor) -> Dict[str, Union[float, int]]:
         pass
 
-    def _forward_eval(self, data: torch.Tensor, action_mask: list, to_play: -1, ready_env_id=None):
+    def _forward_eval(self, data: torch.Tensor, action_mask: list, to_play: -1, ready_env_id: np.array = None,):
         pass
 
     def _monitor_vars_learn(self) -> List[str]:
diff --git a/lzero/policy/sampled_efficientzero.py b/lzero/policy/sampled_efficientzero.py
index 5f6509d68..184ab8c42 100644
--- a/lzero/policy/sampled_efficientzero.py
+++ b/lzero/policy/sampled_efficientzero.py
@@ -170,7 +170,7 @@ class SampledEfficientZeroPolicy(MuZeroPolicy):
         # (float) The fixed temperature value for MCTS action selection, which is used to control the exploration.
         # The larger the value, the more exploration. This value is only used when manual_temperature_decay=False.
         fixed_temperature_value=0.25,
-        # (bool) Whether to use the true chance in MCTS in 2048 env.
+        # (bool) Whether to use the true chance in MCTS in some environments with stochastic dynamics, such as 2048.
         use_ture_chance_label_in_chance_encoder=False,
 
         # ****** Priority ******
@@ -788,7 +788,7 @@ def _init_collect(self) -> None:
 
     def _forward_collect(
             self, data: torch.Tensor, action_mask: list = None, temperature: np.ndarray = 1, to_play=-1,
-            epsilon: float = 0.25, ready_env_id=None
+            epsilon: float = 0.25, ready_env_id: np.array = None,
     ):
         """
         Overview:
@@ -878,23 +878,25 @@ def _forward_collect(
 
             for i, env_id in enumerate(ready_env_id):
                 distributions, value = roots_visit_count_distributions[i], roots_values[i]
-                try:
-                    root_sampled_actions = np.array([action.value for action in roots_sampled_actions[i]])
-                except Exception:
-                    # logging.warning('ctree_sampled_efficientzero roots.get_sampled_actions() return list')
+                if self._cfg.mcts_ctree:
+                    # In ctree, the method roots.get_sampled_actions() returns a list object.
                     root_sampled_actions = np.array([action for action in roots_sampled_actions[i]])
+                else:
+                    # In ptree, the same method roots.get_sampled_actions() returns an Action object.
+                    root_sampled_actions = np.array([action.value for action in roots_sampled_actions[i]])
 
                 # NOTE: Only legal actions possess visit counts, so the ``action_index_in_legal_action_set`` represents
                 # the index within the legal action set, rather than the index in the entire action set.
                 action, visit_count_distribution_entropy = select_action(
                     distributions, temperature=self._collect_mcts_temperature, deterministic=False
                 )
-                try:
-                    action = roots_sampled_actions[i][action].value
-                    # logging.warning('ptree_sampled_efficientzero roots.get_sampled_actions() return array')
-                except Exception:
-                    # logging.warning('ctree_sampled_efficientzero roots.get_sampled_actions() return list')
+
+                if self._cfg.mcts_ctree:
+                    # In ctree, the method roots.get_sampled_actions() returns a list object.
                     action = np.array(roots_sampled_actions[i][action])
+                else:
+                    # In ptree, the same method roots.get_sampled_actions() returns an Action object.
+                    action = roots_sampled_actions[i][action].value
 
                 if not self._cfg.model.continuous_action_space:
                     if len(action.shape) == 0:
@@ -904,12 +906,12 @@ def _forward_collect(
 
                 output[env_id] = {
                     'action': action,
-                    'distributions': distributions,
+                    'visit_count_distributions': distributions,
                     'root_sampled_actions': root_sampled_actions,
                     'visit_count_distribution_entropy': visit_count_distribution_entropy,
-                    'value': value,
-                    'pred_value': pred_values[i],
-                    'policy_logits': policy_logits[i],
+                    'searched_value': value,
+                    'predicted_value': pred_values[i],
+                    'predicted_policy_logits': policy_logits[i],
                 }
 
         return output
@@ -925,7 +927,7 @@ def _init_eval(self) -> None:
         else:
             self._mcts_eval = MCTSPtree(self._cfg)
 
-    def _forward_eval(self, data: torch.Tensor, action_mask: list, to_play: -1, ready_env_id=None):
+    def _forward_eval(self, data: torch.Tensor, action_mask: list, to_play: -1, ready_env_id: np.array = None,):
         """
          Overview:
              The forward function for evaluating the current policy in eval mode. Use model to execute MCTS search.
@@ -1040,12 +1042,12 @@ def _forward_eval(self, data: torch.Tensor, action_mask: list, to_play: -1, read
 
                 output[env_id] = {
                     'action': action,
-                    'distributions': distributions,
+                    'visit_count_distributions': distributions,
                     'root_sampled_actions': root_sampled_actions,
                     'visit_count_distribution_entropy': visit_count_distribution_entropy,
-                    'value': value,
-                    'pred_value': pred_values[i],
-                    'policy_logits': policy_logits[i],
+                    'searched_value': value,
+                    'predicted_value': pred_values[i],
+                    'predicted_policy_logits': policy_logits[i],
                 }
 
         return output
diff --git a/lzero/policy/stochastic_muzero.py b/lzero/policy/stochastic_muzero.py
index 78f66213f..96a9f7ff4 100644
--- a/lzero/policy/stochastic_muzero.py
+++ b/lzero/policy/stochastic_muzero.py
@@ -575,7 +575,7 @@ def _forward_collect(
             temperature: float = 1,
             to_play: List = [-1],
             epsilon: float = 0.25,
-            ready_env_id=None
+            ready_env_id: np.array = None,
     ) -> Dict:
         """
         Overview:
@@ -652,11 +652,11 @@ def _forward_collect(
                 action = np.where(action_mask[i] == 1.0)[0][action_index_in_legal_action_set]
                 output[env_id] = {
                     'action': action,
-                    'distributions': distributions,
+                    'visit_count_distributions': distributions,
                     'visit_count_distribution_entropy': visit_count_distribution_entropy,
-                    'value': value,
-                    'pred_value': pred_values[i],
-                    'policy_logits': policy_logits[i],
+                    'searched_value': value,
+                    'predicted_value': pred_values[i],
+                    'predicted_policy_logits': policy_logits[i],
                 }
 
         return output
@@ -672,7 +672,7 @@ def _init_eval(self) -> None:
         else:
             self._mcts_eval = MCTSPtree(self._cfg)
 
-    def _forward_eval(self, data: torch.Tensor, action_mask: list, to_play: int = -1, ready_env_id=None) -> Dict:
+    def _forward_eval(self, data: torch.Tensor, action_mask: list, to_play: int = -1, ready_env_id: np.array = None,) -> Dict:
         """
         Overview:
             The forward function for evaluating the current policy in eval mode. Use model to execute MCTS search. \
@@ -742,11 +742,11 @@ def _forward_eval(self, data: torch.Tensor, action_mask: list, to_play: int = -1
 
                 output[env_id] = {
                     'action': action,
-                    'distributions': distributions,
+                    'visit_count_distributions': distributions,
                     'visit_count_distribution_entropy': visit_count_distribution_entropy,
-                    'value': value,
-                    'pred_value': pred_values[i],
-                    'policy_logits': policy_logits[i],
+                    'searched_value': value,
+                    'predicted_value': pred_values[i],
+                    'predicted_policy_logits': policy_logits[i],
                 }
 
         return output
diff --git a/lzero/worker/muzero_collector.py b/lzero/worker/muzero_collector.py
index 331c72d17..aca581c47 100644
--- a/lzero/worker/muzero_collector.py
+++ b/lzero/worker/muzero_collector.py
@@ -411,14 +411,14 @@ def collect(self,
                 policy_output = self._policy.forward(stack_obs, action_mask, temperature, to_play, epsilon)
 
                 actions_no_env_id = {k: v['action'] for k, v in policy_output.items()}
-                distributions_dict_no_env_id = {k: v['distributions'] for k, v in policy_output.items()}
+                distributions_dict_no_env_id = {k: v['visit_count_distributions'] for k, v in policy_output.items()}
                 if self.policy_config.sampled_algo:
                     root_sampled_actions_dict_no_env_id = {
                         k: v['root_sampled_actions']
                         for k, v in policy_output.items()
                     }
-                value_dict_no_env_id = {k: v['value'] for k, v in policy_output.items()}
-                pred_value_dict_no_env_id = {k: v['pred_value'] for k, v in policy_output.items()}
+                value_dict_no_env_id = {k: v['searched_value'] for k, v in policy_output.items()}
+                pred_value_dict_no_env_id = {k: v['predicted_value'] for k, v in policy_output.items()}
                 visit_entropy_dict_no_env_id = {
                     k: v['visit_count_distribution_entropy']
                     for k, v in policy_output.items()
diff --git a/zoo/box2d/lunarlander/config/lunarlander_cont_sampled_efficientzero_config.py b/zoo/box2d/lunarlander/config/lunarlander_cont_sampled_efficientzero_config.py
index 4de24a8bf..5d1ede4b2 100644
--- a/zoo/box2d/lunarlander/config/lunarlander_cont_sampled_efficientzero_config.py
+++ b/zoo/box2d/lunarlander/config/lunarlander_cont_sampled_efficientzero_config.py
@@ -30,6 +30,7 @@
         manager=dict(shared_memory=False, ),
     ),
     policy=dict(
+        mcts_ctree=True,
         model=dict(
             observation_shape=8,
             action_space_size=2,
@@ -53,7 +54,7 @@
         grad_clip_value=0.5,
         num_simulations=num_simulations,
         reanalyze_ratio=reanalyze_ratio,
-        random_collect_episode_num=8,
+        random_collect_episode_num=0,
         # NOTE: for continuous gaussian policy, we use the policy_entropy_loss as in the original Sampled MuZero paper.
         policy_entropy_loss_weight=5e-3,
         n_episode=n_episode,