-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.py
129 lines (99 loc) · 2.58 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import texar as tx
dcoder_config = {
'dim': 768,
'num_blocks': 12,
'multihead_attention': {
'num_heads': 8,
'output_dim': 768
# See documentation for more optional hyperparameters
},
'position_embedder_hparams': {
'dim': 768
},
'initializer': {
'type': 'variance_scaling_initializer',
'kwargs': {
'scale': 1.0,
'mode': 'fan_avg',
'distribution': 'uniform',
},
},
'poswise_feedforward': tx.modules.default_transformer_poswise_net_hparams(
output_dim=768)
}
loss_label_confidence = 0.9
random_seed = 1234
beam_width = 5
alpha = 0.6
hidden_dim = 768
opt = {
'optimizer': {
'type': 'AdamOptimizer',
'kwargs': {
'beta1': 0.9,
'beta2': 0.999,
'epsilon': 1e-8
}
}
}
# opt = {
# 'optimizer': {
# 'type': 'AdamOptimizer',
# 'kwargs': {
# 'beta1': 0.9,
# 'beta2': 0.997,
# 'epsilon': 1e-9
# }
# }
# }
#warmup steps must be 0.1% of number of iterations
lr = {
'learning_rate_schedule': 'constant.linear_warmup.rsqrt_decay.rsqrt_depth',
'lr_constant': 2 * (hidden_dim ** -0.5),
'static_lr': 1e-3,
'warmup_steps': 10000,
}
bos_token_id =101
eos_token_id = 102
model_dir= "./models"
run_mode= "train_and_evaluate"
# 训练、验证、预测的batch大小
batch_size = 32
eval_batch_size = 32
test_batch_size = 32
# batch_size = 16
# eval_batch_size = 16
# test_batch_size = 16
max_train_steps = 100000
display_steps = 10
checkpoint_steps = 2000
eval_steps = 50000
test_steps = 5000
max_decoding_length = 400
max_seq_length_src = 150
max_seq_length_tgt = 30
epochs =10
is_distributed = False
data_dir = "data/"
train_out_file = "data/train.tf_record"
eval_out_file = "data/eval.tf_record"
test_out_file = "data/test.tf_record"
bert_pretrain_dir="./bert_uncased_model"
train_story = "data/train_story.txt"
train_summ = "data/train_summ.txt"
eval_story = "data/eval_story.txt"
eval_summ = "data/eval_summ.txt"
test_story = "data/test_story.txt"
test_summ = "data/test_summ.txt"
# data_dir = "data1/"
# train_out_file = "data1/train.tf_record"
# eval_out_file = "data1/eval.tf_record"
# test_out_file = "data1/test.tf_record"
# bert_pretrain_dir="./bert_uncased_model"
# train_story = "data1/train_story.txt"
# train_summ = "data1/train_summ.txt"
# eval_story = "data1/eval_story.txt"
# eval_summ = "data1/eval_summ.txt"
# test_story = "data1/test_story.txt"
# test_summ = "data1/test_summ.txt"
bert_pretrain_dir = "./uncased_L-12_H-768_A-12"