Added python eval script.

stanfordnlp · Sep 2, 2015 · 8b9bb2b · 8b9bb2b
1 parent c2a1cdf
commit 8b9bb2b
Showing 1 changed file with 95 additions and 0 deletions.
diff --git a/eval/python/evaluate.py b/eval/python/evaluate.py
@@ -0,0 +1,95 @@
+import argparse
+import numpy as np
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--vocab_file', default='vocab.txt', type=str)
+    parser.add_argument('--vectors_file', default='vectors.txt', type=str)
+    args = parser.parse_args()
+
+    with open(args.vocab_file, 'r') as f:
+        words = [x.rstrip().split(' ')[0] for x in f.readlines()]
+    with open(args.vectors_file, 'r') as f:
+        vectors = {}
+        for line in f:
+            vals = line.rstrip().split(' ')
+            vectors[vals[0]] = map(float, vals[1:])
+
+    vocab_size = len(words)
+    vocab = {w: idx for idx, w in enumerate(words)}
+    ivocab = {idx: w for idx, w in enumerate(words)}
+
+    vector_dim = len(vectors[ivocab[0]])
+    W = np.zeros((vocab_size, vector_dim))
+    for word, v in vectors.iteritems():
+        if word == '<unk>':
+            continue
+        W[vocab[word], :] = v
+
+    # normalize each word vector to unit variance
+    W_norm = np.zeros(W.shape)
+    d = (np.sum(W ** 2, 1) ** (0.5))
+    W_norm = (W.T / d).T
+    evaluate_vectors(W_norm, vocab, ivocab)
+
+def evaluate_vectors(W, vocab, ivocab):
+    """Evaluate the trained word vectors on a variety of tasks"""
+
+    filenames = [
+        'capital-common-countries.txt', 'capital-world.txt', 'currency.txt',
+        'city-in-state.txt', 'family.txt', 'gram1-adjective-to-adverb.txt',
+        'gram2-opposite.txt', 'gram3-comparative.txt', 'gram4-superlative.txt',
+        'gram5-present-participle.txt', 'gram6-nationality-adjective.txt',
+        'gram7-past-tense.txt', 'gram8-plural.txt', 'gram9-plural-verbs.txt',
+        ]
+    prefix = './eval/question-data/'
+
+    # to avoid memory overflow, could be increased/decreased
+    # depending on system and vocab size
+    split_size = 100
+
+    correct_tot = 0 # count correct questions
+    count_tot = 0 # count all questions
+    full_count = 0 # count all questions, including those with unknown words
+
+    for i in xrange(len(filenames)):
+        with open('%s/%s' % (prefix, filenames[i]), 'r') as f:
+            full_data = [line.rstrip().split(' ') for line in f]
+            full_count += len(full_data)
+            data = [x for x in full_data if all(word in vocab for word in x)]
+
+        indices = np.array([[vocab[word] for word in row] for row in data])
+        ind1, ind2, ind3, ind4 = indices.T
+
+        predictions = np.zeros((len(indices),))
+        num_iter = int(np.ceil(len(indices) / float(split_size)))
+        for j in xrange(num_iter):
+            subset = np.arange(j*split_size, min((j + 1)*split_size, len(ind1)))
+
+            pred_vec = (W[ind2[subset], :] - W[ind1[subset], :]
+                +  W[ind3[subset], :])
+            #cosine similarity if input W has been normalized
+            dist = np.dot(W, pred_vec.T)
+
+            for k in xrange(len(subset)):
+                dist[ind1[subset[k]], k] = -np.Inf
+                dist[ind2[subset[k]], k] = -np.Inf
+                dist[ind3[subset[k]], k] = -np.Inf
+
+            # predicted word index
+            predictions[subset] = np.argmax(dist, 0).flatten()
+
+        val = (ind4 == predictions) #correct predictions
+        count_tot = count_tot + len(ind1)
+        correct_tot = correct_tot + sum(val)
+        print('ACCURACY TOP1: %.2f%% (%d/%d)' %
+            (np.mean(val) * 100, np.sum(val), len(val)))
+
+    print('Total accuracy: %.2f%%' %
+        (100 * correct_tot / float(count_tot)))
+    print('Questions seen/total: %.2f%% (%d/%d)' %
+        (100 * count_tot / float(full_count), count_tot, full_count))
+
+
+if __name__ == "__main__":
+    main()