-
Notifications
You must be signed in to change notification settings - Fork 10
/
LSTM_sequence.py
276 lines (218 loc) · 9.93 KB
/
LSTM_sequence.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
'''
Accelerating Symbolic Regression with Deep Learning
By Tyler Hughes, Siddharth Buddhiraju and Rituraj
For CS 221, Fall 2017-2018
This file reads examples generated from generate_examples.py, and trains a
decoder(LSTM) to minimize L2 loss
on the generated equation against the correct equation.
Then, it predicts on the test dataset and returns statistics.
'''
import numpy as np
import tensorflow as tf
import sys
from data_loader import load_data
#============Read examples from file ========================
max_depth = 2
fname_phi = './data/encoded_states_d'+str(max_depth)+'.txt'
fname_eq = './data/desired_equation_components_d'+str(max_depth)+'.txt'
feature_vector_arr, equation_strings_arr, one_hot_list, eq_dict,\
reverse_dict = load_data(fname_phi,fname_eq)
#========Separating training and testing data========
feature_vector_full = feature_vector_arr
equation_strings_full = equation_strings_arr
train_ratio = 0.95
N_total = len(feature_vector_arr)
feature_vector_test = feature_vector_arr[int(N_total*train_ratio):N_total]
equation_strings_test = equation_strings_arr[int(N_total*train_ratio):N_total]
feature_vector_train = feature_vector_arr[0:int(N_total*train_ratio)]
equation_strings_train = equation_strings_arr[0:int(N_total*train_ratio)]
#=================Setting up LSTM parameters==========
N_feature = len(feature_vector_train[0])
N_vocab = len(eq_dict)
N_train = len(equation_strings_train)
N_steps = max([len(e) for e in equation_strings_train])
LSTM_size = 20
N_epoch = 300
print('working on %s examples' % N_train)
print(' number of equation elements : %s' % N_vocab)
print(' maximum equation length : %s' % N_steps)
print(' length of feature vector : %s' % N_feature)
print(' size of LSTM states : %s' % LSTM_size)
#===========Functions================================
# turn the equation into a one-hot representation
def get_one_hot(eq_string):
one_hot_list = []
for i in range(N_steps):
one_hot = np.zeros((N_vocab,1))
if i < len(eq_string):
s = eq_string[i]
one_hot[eq_dict[s],0] = 1
else:
s = '<eoe>'
one_hot[eq_dict[s],0] = 1
one_hot_list.append(one_hot)
return one_hot_list
def predictTraintime(feature, target, lstm_cell):
feature = tf.add(tf.matmul(feature,Wf),bf)
out, state = tf.contrib.rnn.static_rnn(lstm_cell,[feature], dtype=tf.float32)
#cell = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=0.8) <- Tried but did not improve results
#out, state = tf.contrib.rnn.static_rnn(cell,[feature], dtype=tf.float32)
out = tf.reshape(out,[LSTM_size,-1])
out = tf.add(tf.matmul(Wo,out),bo)
out = tf.nn.softmax(out,dim=0)
out_list = [out] #out_list shifted above softmax for cross entropy
for i in range(N_steps-1):
in_prev = tf.reshape(target[0,i,:],[N_vocab,1])
input_element = tf.add(tf.matmul(Wi,in_prev),bi)
input_element = tf.reshape(input_element,[1,LSTM_size])
#out, state = tf.contrib.rnn.static_rnn(cell,[input_element], initial_state=state, dtype=tf.float32)
out, state = tf.contrib.rnn.static_rnn(lstm_cell,[input_element], initial_state=state, dtype=tf.float32)
out = tf.reshape(out,[LSTM_size,-1])
out = tf.add(tf.matmul(Wo,out),bo)
out = tf.nn.softmax(out,dim=0)
out_list.append(out) #out_list shifted above softmax for cross entropy
return out_list
def predictTesttime(feature, lstm_cell):
feature = tf.add(tf.matmul(feature,Wf),bf)
out, state = tf.contrib.rnn.static_rnn(lstm_cell,[feature], dtype=tf.float32)
out = tf.reshape(out,[LSTM_size,-1])
out = tf.add(tf.matmul(Wo,out),bo)
out = tf.nn.softmax(out,dim=0)
out_list = [out]
for i in range(N_steps-1):
input_element = tf.add(tf.matmul(Wi,out),bi)
input_element = tf.reshape(input_element,[1,LSTM_size])
out, state = tf.contrib.rnn.static_rnn(lstm_cell,[input_element], initial_state=state, dtype=tf.float32)
out = tf.reshape(out,[LSTM_size,-1])
out = tf.add(tf.matmul(Wo,out),bo)
out = tf.nn.softmax(out,dim=0)
out_list.append(out)
return out_list
def one_hot_to_eq_str(one_hot_list):
one_hot_list = one_hot_list[0] # need to get 0th element since only one training example in practice
N = len(one_hot_list)
equation = ''
eq_el = ''
for i in range(N):
one_hot_allowed = one_hot_list[i]
prediction = np.argmax(one_hot_allowed)
eq_el = reverse_dict[prediction]
equation += eq_el
return equation
#===========Setting up objects for LSTM==========
# turn the equation into a one-hot representation and reshape for TF
features = [np.reshape(np.array(f),(1,N_feature)) for f in feature_vector_train]
eq_one_hot = [np.reshape(np.array(get_one_hot(e)),(1,N_steps,N_vocab)) for e in equation_strings_train]
# input to the first LSTM cell (the feature vector)
feature = tf.placeholder(tf.float32,[1,N_feature])
# desired out values from each LSTM cell
target = tf.placeholder(tf.float32,[1,N_steps,N_vocab])
# output weights and biases (to softmax)
Wo = tf.Variable(tf.random_normal([N_vocab,LSTM_size]))
bo = tf.Variable(tf.zeros([N_vocab,1]))
# output weights and biases (to softmax)
Wi = tf.Variable(tf.random_normal([LSTM_size,N_vocab]))
bi = tf.Variable(tf.zeros([LSTM_size,1]))
# output weights and biases (to softmax)
Wf = tf.Variable(tf.random_normal([N_feature,LSTM_size]))
bf = tf.Variable(tf.zeros([1,LSTM_size]))
# define the basic lstm cell
lstm_cell = tf.contrib.rnn.BasicLSTMCell(LSTM_size)
#==================================================
loss = tf.constant(0.0)
#=========Outlists at training and testing time=====
out_list = predictTraintime(feature, target, lstm_cell)
out_list_tensor = tf.reshape(out_list,[1,N_steps,N_vocab])
out_list_run = predictTesttime(feature, lstm_cell)
out_list_run_tensor = tf.reshape(out_list_run,[1,N_steps,N_vocab])
#======L2 Loss======
loss = loss + tf.reduce_sum(tf.square(tf.subtract(out_list_tensor,target)))
#======Cross entropy with Logits======
#loss = loss + tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(labels=target,logits=\
#out_list_tensor))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
# turn the equation into a one-hot representation and reshape for TF
features_train = [np.reshape(np.array(f),(1,N_feature)) for f in\
feature_vector_train]
features_test = [np.reshape(np.array(f),(1,N_feature)) for f in\
feature_vector_test]
eq_one_hot_train = [np.reshape(np.array(get_one_hot(e)),(1,N_steps,N_vocab))\
for e in equation_strings_train]
with tf.Session() as sess:
#======Training the NN+LSTM on training data======
sess.run(tf.global_variables_initializer())
losses = []
for i in range(N_epoch):
epoch_loss = 0.0
for m in range(N_train):
_, loss_calc, out_list_calc = sess.run([optimizer, loss, \
out_list_tensor], feed_dict={ feature:features_train[m],\
target:eq_one_hot_train[m]})
epoch_loss += loss_calc
losses.append(epoch_loss)
sys.stdout.write("\repoch %s of %s. loss: %s" % (i,N_epoch,epoch_loss))
sys.stdout.flush()
print("\n")
#======Test error on the training (seen) data======
def trainError(index):
p = sess.run(out_list_run_tensor,feed_dict=\
{feature:features_train[index]})
eq_pred = one_hot_to_eq_str(p)
suppliedString = ''.join(equation_strings_train[index]).replace('<eoe>','')
predictedString = eq_pred.replace('<eoe>','')
#print '--'
#print("supplied feature vector for : %s" % (suppliedString))
#print("predicted equation of : %s" % (predictedString))
if (suppliedString == 'x') and (predictedString=='x'):
return (0,1)
elif (suppliedString == 'c') and (predictedString=='c'):
return (0,1)
elif predictedString==suppliedString:
return (1,0)
return (0,0)
#======Test error on the test (unseen) data======
def testError(index):
p = sess.run(out_list_run_tensor,feed_dict={feature:features_test[index]})
eq_pred = one_hot_to_eq_str(p)
suppliedString = ''.join(equation_strings_test[index]).replace('<eoe>','')
predictedString = eq_pred.replace('<eoe>','')
print '--'
print("supplied feature vector for : %s" % (suppliedString))
print("predicted equation of : %s" % (predictedString))
if (suppliedString == 'x') and (predictedString=='x'):
return (0,1)
elif (suppliedString == 'c') and (predictedString=='c'):
return (0,1)
elif predictedString==suppliedString:
return (1,0)
return (0,0)
#==================Console output========================
print 'Testing on test data:'
correctPreds = 0
correctPredsX = 0
for j in range(len(features_test)):
output = testError(j)
correctPreds += output[0]
correctPredsX += output[1]
print 'Number of correct "x/c" predictions: %d' %correctPredsX
print 'Number of correct predictions excluding "x/c": %d' %correctPreds
print 'Total %d out of %d' %(correctPreds+correctPredsX,\
len(feature_vector_test))
print ("\n")
print 'Now on original training data:'
correctPreds = 0
for j in range(len(features_train)):
output = trainError(j)
correctPreds += output[0]
correctPredsX += output[1]
print 'Number of correct "x/c" predictions: %d' %correctPredsX
print 'Number of correct predictions excluding "x/c": %d' %correctPreds
print 'Total %d out of %d' %(correctPreds+correctPredsX,len(feature_vector_train))
new_examples = [''.join(ex).replace('<eoe','') for ex in \
equation_strings_test if not (ex in equation_strings_train)]
print 'New functions were: ', new_examples
#============Optional: Save epoch loss to file===============
g = open('data/seq_1_1500ex_L2_300ep.txt', 'w')
for e in losses:
g.write(str(e)+'\n')
g.close()