-
Notifications
You must be signed in to change notification settings - Fork 1
/
config_experts.ini
40 lines (37 loc) · 951 Bytes
/
config_experts.ini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
[LoraConfig]
lora_alpha=16
lora_dropout=0.1
r=64
bias=none
task_type=CAUSAL_LM
[TrainingArguments]
num_train_epochs=3
per_device_train_batch_size=4
gradient_accumulation_steps=2
gradient_checkpointing=True
optim=paged_adamw_32bit
logging_steps=10
save_strategy=epoch
learning_rate=2e-4
bf16=True
tf32=True
max_grad_norm=0.3
warmup_ratio=0.03
lr_scheduler_type=constant
output_dir=${Paths:output_dir}
report_to=wandb
[Models]
model=meta-llama/Llama-2-7b-hf
dataset=jondurbin/airoboros-2.1
embeddings_model=thenlper/gte-small
embeddings_max_length=512
; # Max tokens for our embedding model. This code is really designed for the gte-*
; series, e.g.: https://huggingface.co/thenlper/gte-small
; but could in theory be generated to work with other models I suspect.
[Paths]
base_dir=/work3/s212722/herd
dataset_dir=${base_dir}/datasets
cache_dir=${base_dir}/cache
output_dir=${base_dir}/${Models:model}
experts_dir=experts/
experts_file=experts.json