diff --git a/zoo/atari/config/atari_unizero_sgement_config.py b/zoo/atari/config/atari_unizero_sgement_config.py index ff61ef539..ba37271fd 100644 --- a/zoo/atari/config/atari_unizero_sgement_config.py +++ b/zoo/atari/config/atari_unizero_sgement_config.py @@ -42,16 +42,18 @@ num_unroll_steps = 10 infer_context_length = 4 +num_layers = 2 + # ====== only for debug ===== -# collector_env_num = 8 -# num_segments = 8 -# evaluator_env_num = 2 -# num_simulations = 5 -# max_env_step = int(2e5) -# reanalyze_ratio = 0.1 -# batch_size = 64 -# num_unroll_steps = 10 -# replay_ratio = 0.01 +collector_env_num = 8 +num_segments = 8 +evaluator_env_num = 2 +num_simulations = 5 +max_env_step = int(2e5) +reanalyze_ratio = 0. +batch_size = 64 +num_unroll_steps = 10 +replay_ratio = 0.05 # ============================================================== # end of the most frequently changed config specified by the user @@ -69,18 +71,18 @@ n_evaluator_episode=evaluator_env_num, manager=dict(shared_memory=False, ), # TODO: only for debug - # collect_max_episode_steps=int(20), - # eval_max_episode_steps=int(20), + collect_max_episode_steps=int(20), + eval_max_episode_steps=int(20), ), policy=dict( - learn=dict(learner=dict(hook=dict(save_ckpt_after_iter=20000,),),), # default is 10000 + learn=dict(learner=dict(hook=dict(save_ckpt_after_iter=100000,),),), # default is 10000 model=dict( # observation_shape=(3, 64, 64), observation_shape=(3, 96, 96), action_space_size=action_space_size, world_model_cfg=dict( - # policy_entropy_weight=0, # NOTE - policy_entropy_weight=1e-4, + policy_entropy_weight=0, # NOTE + # policy_entropy_weight=1e-4, continuous_action_space=False, max_blocks=num_unroll_steps, max_tokens=2 * num_unroll_steps, # NOTE: each timestep has 2 tokens: obs and action @@ -88,7 +90,7 @@ device='cuda', # device='cpu', action_space_size=action_space_size, - num_layers=2, + num_layers=num_layers, num_heads=8, embed_dim=768, obs_type='image', @@ -101,7 +103,8 @@ # use_augmentation=True, use_augmentation=False, - manual_temperature_decay=True, # TODO + # manual_temperature_decay=True, # TODO + manual_temperature_decay=False, # TODO threshold_training_steps_for_final_temperature=int(2.5e4), # manual_temperature_decay=False, # TODO @@ -113,8 +116,8 @@ replay_ratio=replay_ratio, batch_size=batch_size, optim_type='AdamW', - learning_rate=0.0001, - # learning_rate=0.001, # TODO + # learning_rate=0.0001, + learning_rate=0.1, # TODO num_simulations=num_simulations, reanalyze_ratio=reanalyze_ratio, num_segments=num_segments, @@ -156,10 +159,10 @@ seeds = [0] # You can add more seed values here for seed in seeds: # Update exp_name to include the current seed - main_config.exp_name = f'data_efficiency0829_plus_tune-uz_0918/{env_id[:-14]}/{env_id[:-14]}_uz_numsegments-{num_segments}_gsl{game_segment_length}_pew1e-4_decaytemp25k_upc{update_per_collect}-rr{replay_ratio}_rer{reanalyze_ratio}_H{num_unroll_steps}_bs{batch_size}_seed{seed}' + # main_config.exp_name = f'data_efficiency0829_plus_tune-uz_0920/{env_id[:-14]}/{env_id[:-14]}_uz_nlayer{num_layers}_numsegments-{num_segments}_gsl{game_segment_length}_temp025_upc{update_per_collect}-rr{replay_ratio}_rer{reanalyze_ratio}_H{num_unroll_steps}-infer{infer_context_length}_bs{batch_size}_seed{seed}' # main_config.exp_name = f'data_efficiency0829_plus_tune-uz_0917/numsegments-{num_segments}_gsl{game_segment_length}_origin-target-value-policy_pew0_fixsample_temp025_useprio/{env_id[:-14]}_stack1_unizero_upc{update_per_collect}-rr{replay_ratio}_rer{reanalyze_ratio}_H{num_unroll_steps}_bs{batch_size}_seed{seed}_nlayer2' - # main_config.exp_name = f'data_efficiency0829_plus_tune-uz_debug/numsegments-{num_segments}_gsl{game_segment_length}_fix/obshape96_use-augmentation-obsw10/{env_id[:-14]}_stack1_unizero_upc{update_per_collect}-rr{replay_ratio}_H{num_unroll_steps}_bs{batch_size}_seed{seed}_nlayer2' + main_config.exp_name = f'data_efficiency0829_plus_tune-uz_debug/numsegments-{num_segments}_gsl{game_segment_length}_fix/obshape96_use-augmentation-obsw10/{env_id[:-14]}_stack1_unizero_upc{update_per_collect}-rr{replay_ratio}_H{num_unroll_steps}_bs{batch_size}_seed{seed}_nlayer2' from lzero.entry import train_unizero train_unizero([main_config, create_config], seed=seed, model_path=main_config.policy.model_path, max_env_step=max_env_step) diff --git a/zoo/atari/config/atari_unizero_sgement_config_batch.py b/zoo/atari/config/atari_unizero_sgement_config_batch.py index 85c979dc1..a6fe992dd 100644 --- a/zoo/atari/config/atari_unizero_sgement_config_batch.py +++ b/zoo/atari/config/atari_unizero_sgement_config_batch.py @@ -10,16 +10,19 @@ def main(env_id, seed): # ============================================================== update_per_collect = None replay_ratio = 0.25 + # collector_env_num = 8 # TODO + # num_segments = 8 + # collector_env_num = 4 # TODO + # num_segments = 4 + # game_segment_length=10 - collector_env_num = 8 - num_segments = 8 - + collector_env_num = 1 # TODO + num_segments = 1 game_segment_length=20 - # game_segment_length=400 - evaluator_env_num = 3 + evaluator_env_num = 8 # TODO num_simulations = 50 - max_env_step = int(2e5) + max_env_step = int(5e5) # TODO reanalyze_ratio = 0. @@ -27,6 +30,11 @@ def main(env_id, seed): num_unroll_steps = 10 infer_context_length = 4 + # num_unroll_steps = 5 + # infer_context_length = 4 + + num_layers = 2 + # ====== only for debug ===== # collector_env_num = 8 # num_segments = 8 @@ -58,14 +66,14 @@ def main(env_id, seed): # eval_max_episode_steps=int(20), ), policy=dict( - learn=dict(learner=dict(hook=dict(save_ckpt_after_iter=200000,),),), # default is 10000 + learn=dict(learner=dict(hook=dict(save_ckpt_after_iter=100000,),),), # default is 10000 model=dict( # observation_shape=(3, 64, 64), observation_shape=(3, 96, 96), action_space_size=action_space_size, world_model_cfg=dict( - # policy_entropy_weight=0, # NOTE - policy_entropy_weight=1e-4, + policy_entropy_weight=0, # NOTE + # policy_entropy_weight=1e-4, continuous_action_space=False, max_blocks=num_unroll_steps, max_tokens=2 * num_unroll_steps, # NOTE: each timestep has 2 tokens: obs and action @@ -73,7 +81,7 @@ def main(env_id, seed): device='cuda', # device='cpu', action_space_size=action_space_size, - num_layers=2, + num_layers=num_layers, num_heads=8, embed_dim=768, obs_type='image', @@ -86,7 +94,8 @@ def main(env_id, seed): # use_augmentation=True, use_augmentation=False, - manual_temperature_decay=True, # TODO + # manual_temperature_decay=True, # TODO + manual_temperature_decay=False, # TODO threshold_training_steps_for_final_temperature=int(2.5e4), # manual_temperature_decay=False, # TODO @@ -99,7 +108,6 @@ def main(env_id, seed): batch_size=batch_size, optim_type='AdamW', learning_rate=0.0001, - # learning_rate=0.001, # TODO num_simulations=num_simulations, reanalyze_ratio=reanalyze_ratio, num_segments=num_segments, @@ -136,9 +144,7 @@ def main(env_id, seed): atari_unizero_create_config = EasyDict(atari_unizero_create_config) create_config = atari_unizero_create_config - - # Update exp_name to include the current seed - main_config.exp_name = f'data_efficiency0829_plus_tune-uz_0918/{env_id[:-14]}/{env_id[:-14]}_uz_numsegments-{num_segments}_gsl{game_segment_length}_pew1e-4_decaytemp25k_upc{update_per_collect}-rr{replay_ratio}_rer{reanalyze_ratio}_H{num_unroll_steps}_bs{batch_size}_seed{seed}' + main_config.exp_name = f'data_efficiency0829_plus_tune-uz_0920/{env_id[:-14]}/{env_id[:-14]}_uz_nlayer{num_layers}_eval8_collect{collector_env_num}-numsegments-{num_segments}_gsl{game_segment_length}_temp025_upc{update_per_collect}-rr{replay_ratio}_rer{reanalyze_ratio}_H{num_unroll_steps}-infer{infer_context_length}_bs{batch_size}_seed{seed}' from lzero.entry import train_unizero train_unizero([main_config, create_config], seed=seed, model_path=main_config.policy.model_path, max_env_step=max_env_step) diff --git a/zoo/atari/config/sco_acp_mbq_uz_batch.sh b/zoo/atari/config/sco_acp_mbq_uz_batch.sh index a574ec24d..11b02c037 100644 --- a/zoo/atari/config/sco_acp_mbq_uz_batch.sh +++ b/zoo/atari/config/sco_acp_mbq_uz_batch.sh @@ -27,22 +27,35 @@ # 'BreakoutNoFrameskip-v4' # ) +# envs=( +# 'AlienNoFrameskip-v4' +# 'AmidarNoFrameskip-v4' +# 'AssaultNoFrameskip-v4' +# 'AsterixNoFrameskip-v4' +# 'ChopperCommandNoFrameskip-v4' +# 'DemonAttackNoFrameskip-v4' +# 'KangarooNoFrameskip-v4' +# 'KrullNoFrameskip-v4' +# 'KungFuMasterNoFrameskip-v4' +# 'RoadRunnerNoFrameskip-v4' +# 'UpNDownNoFrameskip-v4' +# ) + +# one env +# env='AsterixNoFrameskip-v4' +# seed=0 +# script='source activate base && export HTTPS_PROXY=http://172.16.1.135:3128/ && cd /mnt/afs/niuyazhe/code/LightZero && pip install -e . -i https://pkg.sensetime.com/repository/pypi-proxy/simple/ && pip3 install ale-py autorom && AutoROM --accept-license && python3 -u /mnt/afs/niuyazhe/code/LightZero/zoo/atari/config/atari_unizero_sgement_config_batch.py --env %q --seed %d' +# script=${script/\%q/$env} +# script=${script/\%d/$seed} +# echo "The final script is: " $script + +# batch env envs=( - 'AlienNoFrameskip-v4' - 'AmidarNoFrameskip-v4' - 'AssaultNoFrameskip-v4' + 'PongNoFrameskip-v4' + 'QbertNoFrameskip-v4' 'AsterixNoFrameskip-v4' - 'ChopperCommandNoFrameskip-v4' - 'DemonAttackNoFrameskip-v4' - 'KangarooNoFrameskip-v4' - 'KrullNoFrameskip-v4' - 'KungFuMasterNoFrameskip-v4' - 'RoadRunnerNoFrameskip-v4' - 'UpNDownNoFrameskip-v4' ) - seed=0 - for env in "${envs[@]}"; do script='source activate base && export HTTPS_PROXY=http://172.16.1.135:3128/ && cd /mnt/afs/niuyazhe/code/LightZero && pip install -e . -i https://pkg.sensetime.com/repository/pypi-proxy/simple/ && pip3 install ale-py autorom && AutoROM --accept-license && python3 -u /mnt/afs/niuyazhe/code/LightZero/zoo/atari/config/atari_unizero_sgement_config_batch.py --env %q --seed %d' script=${script/\%q/$env} @@ -51,7 +64,7 @@ for env in "${envs[@]}"; do sco acp jobs create --workspace-name=fb1861da-1c6c-42c7-87ed-e08d8b314a99 \ --aec2-name=eb37789e-90bb-418d-ad4a-19ce4b81ab0c\ - --job-name="uz-$env-s$seed" \ + --job-name="uz-nlayer2-H10-seg1-gsl20-$env-s$seed" \ --container-image-url='registry.cn-sh-01.sensecore.cn/basemodel-ccr/aicl-b27637a9-660e-4927:20231222-17h24m12s' \ --training-framework=pytorch \ --enable-mpi \ @@ -60,3 +73,6 @@ sco acp jobs create --workspace-name=fb1861da-1c6c-42c7-87ed-e08d8b314a99 \ --storage-mount 6f8b7bf6-c313-11ed-adcf-92dd2c58bebc:/mnt/afs \ --command="$script" done + + +# --job-name="uz-nlayer2-H5-$env-s$seed" \