-
Notifications
You must be signed in to change notification settings - Fork 1
/
text_classification_with_an_rnn.py
73 lines (52 loc) · 2.11 KB
/
text_classification_with_an_rnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# -*- coding: utf-8 -*-
"""Text classification with an RNN.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1jWZtkV3DksDsOTAjonbu74hKDp7I3Q3H
"""
!pip install -q tf-nightly
import tensorflow_datasets as tfds
import tensorflow as tf
import matplotlib.pyplot as plt
def plot_graphs(history, metric):
plt.plot(history.history[metric])
plt.plot(history.history['val_'+metric], '')
plt.xlabel("Epochs")
plt.ylabel(metric)
plt.legend([metric, 'val_'+metric])
plt.show()
dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True,
as_supervised=True)
train_examples, test_examples = dataset['train'], dataset['test']
encoder = info.features['text'].encoder
print('Vocabulary size: {}'.format(encoder.vocab_size))
sample_string = 'Hello TensorFlow.'
encoded_string = encoder.encode(sample_string)
print('Encoded string is {}'.format(encoded_string))
original_string = encoder.decode(encoded_string)
print('The original string: "{}"'.format(original_string))
assert original_string == sample_string
for index in encoded_string:
print('{} ----> {}'.format(index, encoder.decode([index])))
BUFFER_SIZE = 10000
BATCH_SIZE = 64
train_dataset = (train_examples
.shuffle(BUFFER_SIZE)
.padded_batch(BATCH_SIZE, padded_shapes=([None],[])))
test_dataset = (test_examples
.padded_batch(BATCH_SIZE, padded_shapes=([None],[])))
model = tf.keras.Sequential([
tf.keras.layers.Embedding(encoder.vocab_size, 64),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(1)
])
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
optimizer=tf.keras.optimizers.Adam(1e-4),
metrics=['accuracy'])
history = model.fit(train_dataset, epochs=10,
validation_data=test_dataset,
validation_steps=30)
test_loss, test_acc = model.evaluate(test_dataset)
print('Test Loss: {}'.format(test_loss))
print('Test Accuracy: {}'.format(test_acc))