Skip to content

Commit

Permalink
polish(pu): polish uz configs
Browse files Browse the repository at this point in the history
  • Loading branch information
dyyoungg committed Sep 20, 2024
1 parent 91d48c1 commit 1c8b92b
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 48 deletions.
43 changes: 23 additions & 20 deletions zoo/atari/config/atari_unizero_sgement_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,18 @@
num_unroll_steps = 10
infer_context_length = 4

num_layers = 2

# ====== only for debug =====
# collector_env_num = 8
# num_segments = 8
# evaluator_env_num = 2
# num_simulations = 5
# max_env_step = int(2e5)
# reanalyze_ratio = 0.1
# batch_size = 64
# num_unroll_steps = 10
# replay_ratio = 0.01
collector_env_num = 8
num_segments = 8
evaluator_env_num = 2
num_simulations = 5
max_env_step = int(2e5)
reanalyze_ratio = 0.
batch_size = 64
num_unroll_steps = 10
replay_ratio = 0.05

# ==============================================================
# end of the most frequently changed config specified by the user
Expand All @@ -69,26 +71,26 @@
n_evaluator_episode=evaluator_env_num,
manager=dict(shared_memory=False, ),
# TODO: only for debug
# collect_max_episode_steps=int(20),
# eval_max_episode_steps=int(20),
collect_max_episode_steps=int(20),
eval_max_episode_steps=int(20),
),
policy=dict(
learn=dict(learner=dict(hook=dict(save_ckpt_after_iter=20000,),),), # default is 10000
learn=dict(learner=dict(hook=dict(save_ckpt_after_iter=100000,),),), # default is 10000
model=dict(
# observation_shape=(3, 64, 64),
observation_shape=(3, 96, 96),
action_space_size=action_space_size,
world_model_cfg=dict(
# policy_entropy_weight=0, # NOTE
policy_entropy_weight=1e-4,
policy_entropy_weight=0, # NOTE
# policy_entropy_weight=1e-4,
continuous_action_space=False,
max_blocks=num_unroll_steps,
max_tokens=2 * num_unroll_steps, # NOTE: each timestep has 2 tokens: obs and action
context_length=2 * infer_context_length,
device='cuda',
# device='cpu',
action_space_size=action_space_size,
num_layers=2,
num_layers=num_layers,
num_heads=8,
embed_dim=768,
obs_type='image',
Expand All @@ -101,7 +103,8 @@
# use_augmentation=True,
use_augmentation=False,

manual_temperature_decay=True, # TODO
# manual_temperature_decay=True, # TODO
manual_temperature_decay=False, # TODO
threshold_training_steps_for_final_temperature=int(2.5e4),
# manual_temperature_decay=False, # TODO

Expand All @@ -113,8 +116,8 @@
replay_ratio=replay_ratio,
batch_size=batch_size,
optim_type='AdamW',
learning_rate=0.0001,
# learning_rate=0.001, # TODO
# learning_rate=0.0001,
learning_rate=0.1, # TODO
num_simulations=num_simulations,
reanalyze_ratio=reanalyze_ratio,
num_segments=num_segments,
Expand Down Expand Up @@ -156,10 +159,10 @@
seeds = [0] # You can add more seed values here
for seed in seeds:
# Update exp_name to include the current seed
main_config.exp_name = f'data_efficiency0829_plus_tune-uz_0918/{env_id[:-14]}/{env_id[:-14]}_uz_numsegments-{num_segments}_gsl{game_segment_length}_pew1e-4_decaytemp25k_upc{update_per_collect}-rr{replay_ratio}_rer{reanalyze_ratio}_H{num_unroll_steps}_bs{batch_size}_seed{seed}'
# main_config.exp_name = f'data_efficiency0829_plus_tune-uz_0920/{env_id[:-14]}/{env_id[:-14]}_uz_nlayer{num_layers}_numsegments-{num_segments}_gsl{game_segment_length}_temp025_upc{update_per_collect}-rr{replay_ratio}_rer{reanalyze_ratio}_H{num_unroll_steps}-infer{infer_context_length}_bs{batch_size}_seed{seed}'
# main_config.exp_name = f'data_efficiency0829_plus_tune-uz_0917/numsegments-{num_segments}_gsl{game_segment_length}_origin-target-value-policy_pew0_fixsample_temp025_useprio/{env_id[:-14]}_stack1_unizero_upc{update_per_collect}-rr{replay_ratio}_rer{reanalyze_ratio}_H{num_unroll_steps}_bs{batch_size}_seed{seed}_nlayer2'

# main_config.exp_name = f'data_efficiency0829_plus_tune-uz_debug/numsegments-{num_segments}_gsl{game_segment_length}_fix/obshape96_use-augmentation-obsw10/{env_id[:-14]}_stack1_unizero_upc{update_per_collect}-rr{replay_ratio}_H{num_unroll_steps}_bs{batch_size}_seed{seed}_nlayer2'
main_config.exp_name = f'data_efficiency0829_plus_tune-uz_debug/numsegments-{num_segments}_gsl{game_segment_length}_fix/obshape96_use-augmentation-obsw10/{env_id[:-14]}_stack1_unizero_upc{update_per_collect}-rr{replay_ratio}_H{num_unroll_steps}_bs{batch_size}_seed{seed}_nlayer2'

from lzero.entry import train_unizero
train_unizero([main_config, create_config], seed=seed, model_path=main_config.policy.model_path, max_env_step=max_env_step)
Expand Down
36 changes: 21 additions & 15 deletions zoo/atari/config/atari_unizero_sgement_config_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,31 @@ def main(env_id, seed):
# ==============================================================
update_per_collect = None
replay_ratio = 0.25
# collector_env_num = 8 # TODO
# num_segments = 8
# collector_env_num = 4 # TODO
# num_segments = 4
# game_segment_length=10

collector_env_num = 8
num_segments = 8

collector_env_num = 1 # TODO
num_segments = 1
game_segment_length=20
# game_segment_length=400

evaluator_env_num = 3
evaluator_env_num = 8 # TODO
num_simulations = 50
max_env_step = int(2e5)
max_env_step = int(5e5) # TODO

reanalyze_ratio = 0.

batch_size = 64
num_unroll_steps = 10
infer_context_length = 4

# num_unroll_steps = 5
# infer_context_length = 4

num_layers = 2

# ====== only for debug =====
# collector_env_num = 8
# num_segments = 8
Expand Down Expand Up @@ -58,22 +66,22 @@ def main(env_id, seed):
# eval_max_episode_steps=int(20),
),
policy=dict(
learn=dict(learner=dict(hook=dict(save_ckpt_after_iter=200000,),),), # default is 10000
learn=dict(learner=dict(hook=dict(save_ckpt_after_iter=100000,),),), # default is 10000
model=dict(
# observation_shape=(3, 64, 64),
observation_shape=(3, 96, 96),
action_space_size=action_space_size,
world_model_cfg=dict(
# policy_entropy_weight=0, # NOTE
policy_entropy_weight=1e-4,
policy_entropy_weight=0, # NOTE
# policy_entropy_weight=1e-4,
continuous_action_space=False,
max_blocks=num_unroll_steps,
max_tokens=2 * num_unroll_steps, # NOTE: each timestep has 2 tokens: obs and action
context_length=2 * infer_context_length,
device='cuda',
# device='cpu',
action_space_size=action_space_size,
num_layers=2,
num_layers=num_layers,
num_heads=8,
embed_dim=768,
obs_type='image',
Expand All @@ -86,7 +94,8 @@ def main(env_id, seed):
# use_augmentation=True,
use_augmentation=False,

manual_temperature_decay=True, # TODO
# manual_temperature_decay=True, # TODO
manual_temperature_decay=False, # TODO
threshold_training_steps_for_final_temperature=int(2.5e4),
# manual_temperature_decay=False, # TODO

Expand All @@ -99,7 +108,6 @@ def main(env_id, seed):
batch_size=batch_size,
optim_type='AdamW',
learning_rate=0.0001,
# learning_rate=0.001, # TODO
num_simulations=num_simulations,
reanalyze_ratio=reanalyze_ratio,
num_segments=num_segments,
Expand Down Expand Up @@ -136,9 +144,7 @@ def main(env_id, seed):
atari_unizero_create_config = EasyDict(atari_unizero_create_config)
create_config = atari_unizero_create_config


# Update exp_name to include the current seed
main_config.exp_name = f'data_efficiency0829_plus_tune-uz_0918/{env_id[:-14]}/{env_id[:-14]}_uz_numsegments-{num_segments}_gsl{game_segment_length}_pew1e-4_decaytemp25k_upc{update_per_collect}-rr{replay_ratio}_rer{reanalyze_ratio}_H{num_unroll_steps}_bs{batch_size}_seed{seed}'
main_config.exp_name = f'data_efficiency0829_plus_tune-uz_0920/{env_id[:-14]}/{env_id[:-14]}_uz_nlayer{num_layers}_eval8_collect{collector_env_num}-numsegments-{num_segments}_gsl{game_segment_length}_temp025_upc{update_per_collect}-rr{replay_ratio}_rer{reanalyze_ratio}_H{num_unroll_steps}-infer{infer_context_length}_bs{batch_size}_seed{seed}'

from lzero.entry import train_unizero
train_unizero([main_config, create_config], seed=seed, model_path=main_config.policy.model_path, max_env_step=max_env_step)
Expand Down
42 changes: 29 additions & 13 deletions zoo/atari/config/sco_acp_mbq_uz_batch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,35 @@
# 'BreakoutNoFrameskip-v4'
# )

# envs=(
# 'AlienNoFrameskip-v4'
# 'AmidarNoFrameskip-v4'
# 'AssaultNoFrameskip-v4'
# 'AsterixNoFrameskip-v4'
# 'ChopperCommandNoFrameskip-v4'
# 'DemonAttackNoFrameskip-v4'
# 'KangarooNoFrameskip-v4'
# 'KrullNoFrameskip-v4'
# 'KungFuMasterNoFrameskip-v4'
# 'RoadRunnerNoFrameskip-v4'
# 'UpNDownNoFrameskip-v4'
# )

# one env
# env='AsterixNoFrameskip-v4'
# seed=0
# script='source activate base && export HTTPS_PROXY=http://172.16.1.135:3128/ && cd /mnt/afs/niuyazhe/code/LightZero && pip install -e . -i https://pkg.sensetime.com/repository/pypi-proxy/simple/ && pip3 install ale-py autorom && AutoROM --accept-license && python3 -u /mnt/afs/niuyazhe/code/LightZero/zoo/atari/config/atari_unizero_sgement_config_batch.py --env %q --seed %d'
# script=${script/\%q/$env}
# script=${script/\%d/$seed}
# echo "The final script is: " $script

# batch env
envs=(
'AlienNoFrameskip-v4'
'AmidarNoFrameskip-v4'
'AssaultNoFrameskip-v4'
'PongNoFrameskip-v4'
'QbertNoFrameskip-v4'
'AsterixNoFrameskip-v4'
'ChopperCommandNoFrameskip-v4'
'DemonAttackNoFrameskip-v4'
'KangarooNoFrameskip-v4'
'KrullNoFrameskip-v4'
'KungFuMasterNoFrameskip-v4'
'RoadRunnerNoFrameskip-v4'
'UpNDownNoFrameskip-v4'
)

seed=0

for env in "${envs[@]}"; do
script='source activate base && export HTTPS_PROXY=http://172.16.1.135:3128/ && cd /mnt/afs/niuyazhe/code/LightZero && pip install -e . -i https://pkg.sensetime.com/repository/pypi-proxy/simple/ && pip3 install ale-py autorom && AutoROM --accept-license && python3 -u /mnt/afs/niuyazhe/code/LightZero/zoo/atari/config/atari_unizero_sgement_config_batch.py --env %q --seed %d'
script=${script/\%q/$env}
Expand All @@ -51,7 +64,7 @@ for env in "${envs[@]}"; do

sco acp jobs create --workspace-name=fb1861da-1c6c-42c7-87ed-e08d8b314a99 \
--aec2-name=eb37789e-90bb-418d-ad4a-19ce4b81ab0c\
--job-name="uz-$env-s$seed" \
--job-name="uz-nlayer2-H10-seg1-gsl20-$env-s$seed" \
--container-image-url='registry.cn-sh-01.sensecore.cn/basemodel-ccr/aicl-b27637a9-660e-4927:20231222-17h24m12s' \
--training-framework=pytorch \
--enable-mpi \
Expand All @@ -60,3 +73,6 @@ sco acp jobs create --workspace-name=fb1861da-1c6c-42c7-87ed-e08d8b314a99 \
--storage-mount 6f8b7bf6-c313-11ed-adcf-92dd2c58bebc:/mnt/afs \
--command="$script"
done


# --job-name="uz-nlayer2-H5-$env-s$seed" \

0 comments on commit 1c8b92b

Please sign in to comment.