-
Notifications
You must be signed in to change notification settings - Fork 5
/
train_enzyme_substrate_33layer.sh
47 lines (42 loc) · 1.32 KB
/
train_enzyme_substrate_33layer.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/bin/bash
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
data_path=data/enzyme_substrate_data_lucky_best.json
local_root=model_path
pretrained_model="esm2_t33_650M_UR50D"
output_path=$local_root/enzygen_substrate
python3 -m torch.distributed.launch fairseq_cli/train.py ${data_path} \
--profile \
--num-workers 0 \
--distributed-world-size 8 \
--save-dir ${output_path} \
--task geometric_protein_design \
--dataset-impl-source "raw" \
--dataset-impl-target "coor" \
--criterion geometric_protein_substrate_loss --encoder-factor 1.0 --decoder-factor 1.0 --binding-factor 0.5 \
--arch geometric_protein_substrate_model_esm \
--encoder-embed-dim 1280 \
--egnn-mode "rm-node" \
--decoder-layers 3 \
--pretrained-esm-model ${pretrained_model} \
--knn 30 \
--dropout 0.3 \
--optimizer adam --adam-betas '(0.9,0.98)' \
--lr 3e-4 --lr-scheduler inverse_sqrt \
--stop-min-lr '1e-10' --warmup-updates 4000 \
--warmup-init-lr '5e-5' \
--clip-norm 0.0001 \
--ddp-backend legacy_ddp \
--log-format 'simple' --log-interval 10 \
--max-tokens 1024 \
--update-freq 1 \
--max-update 1000000 \
--max-epoch 100 \
--validate-after-updates 3000 \
--validate-interval-updates 3000 \
--save-interval-updates 3000 \
--valid-subset valid \
--max-sentences-valid 8 \
--validate-interval 1 \
--save-interval 1 \
--keep-interval-updates 10 \
--skip-invalid-size-inputs-valid-test