-
Notifications
You must be signed in to change notification settings - Fork 4
/
play.py
executable file
·171 lines (153 loc) · 5.46 KB
/
play.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
#!/usr/bin/env python3
"""
Play OpenAI gym's games using the keyboard.
"""
import sys
import os
from os.path import join as pjoin
import time
import math
import random
import logging
import argparse
import json
import numpy as np
import gym
from gym import wrappers
from pynput.keyboard import Key, Listener
class InputState(object):
def __init__(self, action_space, keymap):
self.pressed_keys = set()
self.pressed_key = None
self.recent_action = None
self.user_stop = False
self.keymap = keymap
self.action_space = action_space
def get_action(input_state):
key = input_state.pressed_key
key2 = None
if key is not None:
if isinstance(key, Key):
key2 = key.name
else:
try:
key2 = key.char
except AttributeError:
logging.warning('Invalid key {}'.format(repr(repr(key))))
keymap = input_state.keymap
default_action = keymap['default']
action = default_action
action_space = input_state.action_space
if key2 is not None:
if key2 in keymap:
action = keymap[key2]
elif key2.isdigit():
n = int(key2)
if isinstance(action_space, gym.spaces.Discrete) and n < action_space.n:
action = n
if action == 'same':
action = input_state.recent_action
elif action == 'next' or action == 'prev':
delta = 1 if action == 'next' else -1
if isinstance(action_space, gym.spaces.Discrete):
if input_state.recent_action is None:
action = 0
else:
action = (input_state.recent_action + delta) % action_space.n
else:
logging.warning("'next' does not make sense for continuous action space")
action = action_space.sample()
elif action == 'random' or action == 'rand':
action = action_space.sample()
input_state.recent_action = action
return action
def main():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('game', help='Name of the game')
parser.add_argument('--force-continue', action='store_true', default=False,
help="Keep playing even after 'Game over'")
parser.add_argument('--delay', type=float, help='Extra delay between frames in milliseconds')
parser.add_argument('-o', '--output', help='Output directory path')
args = parser.parse_args()
# Get keymap
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
keymap_fpath_prefix = os.path.join(BASE_DIR, 'keymaps', args.game)
keymap_fpath_json = keymap_fpath_prefix + '.json'
try:
with open(keymap_fpath_json) as fobj:
print('Using JSON keymap', file=sys.stderr)
keymap = {key.lower(): value for key, value in json.load(fobj).items()}
except FileNotFoundError:
logging.warning('No keymap found')
keymap = {}
if 'default' not in keymap:
keymap['default'] = 'random'
# Initialization
env = gym.make(args.game)
state = env.reset()
done = False
t = 0
score = 0
input_state = InputState(env.action_space, keymap)
print('Action space:', env.action_space)
print('Observation space:', env.observation_space)
out_dir = args.output
if out_dir is not None:
os.makedirs(out_dir, exist_ok=True)
states = [state]
actions = []
rewards = []
def keypress_callback(key):
input_state.pressed_key = key
input_state.pressed_keys.add(key)
def keyrelease_callback(key):
input_state.pressed_keys.remove(key)
if input_state.pressed_keys:
input_state.pressed_key = next(iter(input_state.pressed_keys))
else:
input_state.pressed_key = None
# Check for Ctrl+C
try:
if key.char == 'c' and input_state.pressed_keys in ({Key.ctrl}, {Key.ctrl_r}):
print('Ctrl+C')
input_state.user_stop = True
except AttributeError:
pass
with Listener(on_press=keypress_callback, on_release=keyrelease_callback) as listener:
while not input_state.user_stop and (args.force_continue or not done):
env.render()
if args.delay is not None:
time.sleep(args.delay / 1000)
action = get_action(input_state)
state2, reward, done, info = env.step(action)
if out_dir is not None:
states.append(state2)
rewards.append(reward)
actions.append(action)
t += 1
score += reward
state = state2
env.close()
print('Time:', t)
print('Score:', score)
if out_dir is not None:
states = np.array(states)
rewards = np.array(rewards)
actions = np.array(actions)
metadata = {
'time': t,
'score': score,
'done': done,
'observation_shape': list(states.shape[1:]),
'observation_dtype': states.dtype.name,
'action_shape': list(actions.shape[1:]),
'action_dtype': actions.dtype.name,
'reward_dtype': rewards.dtype.name,
}
with open(pjoin(out_dir, 'metadata.json'), 'w') as fobj:
json.dump(metadata, fobj, indent=4, sort_keys=True)
np.save(pjoin(out_dir, 'states.npy'), states)
np.save(pjoin(out_dir, 'actions.npy'), actions)
np.save(pjoin(out_dir, 'rewards.npy'), rewards)
if __name__ == '__main__':
main()