-
Notifications
You must be signed in to change notification settings - Fork 0
/
logger.py
218 lines (181 loc) · 7.84 KB
/
logger.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
from __future__ import print_function
import os
import subprocess
import time
import pandas as pd
from datetime import datetime
from shutil import copyfile
class Logger:
"""
Initializes the Logger object.
The logger class is still very basic right now and can only deal with
homogeneous ice.
Parameters
----------
logdir : string
Path to the logdir to write to. If None a directory inside ./log gets
created as logdir. Its name is the current timestamp.
overwrite : boolean
If True the logdir will get overwritten if it already exists. Caution
is advised, it is always recommended to set this parameter to False in
which case an exception is raised if the logdir already exists.
"""
# ---------------------------- Initialization -----------------------------
def __init__(self, logdir=None, overwrite=False, log_version=True):
self._start_time = datetime.utcnow()
# set default logdir
if logdir is None:
# for python 2 support... in python 3 there is datetime.timestamp()
logdir = './log/{}/'.format(
int(time.mktime(self._start_time.timetuple())))
# check if the logdir already exists or create it
if os.path.exists(logdir):
if not overwrite:
raise LogdirAlreadyExistsError("If you want to overwrite the "
"existing log set "
"overwrite=True.")
# clean the existing logdir if overwrite is True
if os.path.isfile(logdir+'session.log'):
os.unlink(logdir+'session.log')
if os.path.isfile(logdir+'variables.hdf5'):
os.unlink(logdir+'variables.hdf5')
else:
os.makedirs(logdir)
# copy the currently used settings into the logdir
copyfile('./settings.py', logdir+'settings.py')
# create version logfile
if log_version:
version_log = "Git HEAD points at " \
+ subprocess.check_output(['git', 'rev-parse',
'HEAD']).decode('utf-8') \
+ "Git Status says:\n" \
+ subprocess.check_output(['git', 'status']).decode('utf-8')
try:
with open(logdir+'version.log', 'w') as version_logfile:
version_logfile.write(version_log)
except Exception as e:
self.message("Logger could not write to file: " + str(e))
# start writing into the buffer
self._session_buffer = "Starting at " \
+ self._start_time.strftime("%a %b %d %H:%M:%S %Z %Y UTC")+'\n'
# set attributes
self._logdir = logdir
self._variables = []
self._print_variables = set()
def register_variables(self, variables, print_variables=None,
print_all=False):
"""
Registers the variables which are supposed to get logged. This method
can only be called once per session for correct behavior.
Parameters
----------
variables : list of strings
A list which contains the names of all the variables which are
supposed to get logged. Variables must be scalar.
print_variables : list of strings
The variables which are supposed to get printed on every step if
Logger.log is called with printing=True. Must be a subset of
variables.
print_all : boolean
If true make print_variables = variables. Overrides the
print_variables parameter.
"""
if print_all:
print_variables = variables
else:
# verify that print_variables is a subset of variables
if not set(print_variables) <= set(variables):
raise NotASubsetError("print_variables must be a subset of "
"variables.")
self._variables = variables
self._print_variables = set(print_variables)
# initialize pandas dataframe buffer
self._data_buffer = pd.DataFrame(columns=variables)
# ----------------------- Writing to Files --------------------------------
def write(self):
"""
Writes the current buffers to files.
Should only be used sparingly to not waist too many resources.
"""
try:
# write print buffer to session log
with open(self._logdir+'session.log', 'a') as session_logfile:
session_logfile.write(self._session_buffer)
self._session_buffer = ""
# write all variables to hdf5 store
store = pd.HDFStore(self._logdir+'variables.hdf5')
store.append('Variables', self._data_buffer, format='t',
data_columns=True)
store.close()
except Exception as e:
self.message("Logger could not write to file:", str(e))
# reset the data buffer TODO: better way?
self._data_buffer = pd.DataFrame(columns=self._variables)
# -------------------- Public Logging Methods -----------------------------
def log(self, step, variables, printing=True):
"""
Very basic and specialized logging method.
Parameters
----------
step : integer
The current training step.
variables : list of scalar numbers
A list with one value for each registered variable in the same
order they have been registered. The variables have to be
registered first using Logger.register_variables.
printing : boolean
Whether or not to print the logged step.
"""
if len(variables) != len(self._variables):
raise InvalidNumberOfVariables("The number of parsed variables "
"does not equal the number of "
"registered variables")
session_time = datetime.utcnow() - self._start_time
hours, remainder = divmod(int(session_time.total_seconds()), 3600)
minutes, seconds = divmod(remainder, 60)
line = ("[{:02d}:{:02d}:{:02d} {:08d}]").format(
hours, minutes, seconds, step)
for i, value in enumerate(variables):
if self._variables[i] in self._print_variables:
if value > 1:
line += " {}: {:1.3f}".format(self._variables[i], value)
else:
line += " {}: {:1.3e}".format(self._variables[i], value)
line += '\n'
# append row to data buffer
self._data_buffer.loc[step] = variables
if printing:
print(line[:-1])
self._session_buffer += line
def message(self, message, step=None, printing=True):
"""
Logs and optionally prints a message.
Parameters
----------
message : String
The message to log
step : Integer
Current training step or None
printing : Boolean
If true the message is also printed to the screen
"""
session_time = datetime.utcnow() - self._start_time
hours, remainder = divmod(session_time.seconds, 3600)
minutes, seconds = divmod(remainder, 60)
if step:
line = ("[{:02d}:{:02d}:{:02d} {:08d}] ").format(
hours, minutes, seconds, step)
else:
line = ("[{:02d}:{:02d}:{:02d}] ").format(
hours, minutes, seconds)
line += message + '\n'
self._session_buffer += line
if printing:
print(line[:-1])
# -------------------------------- Exceptions ---------------------------------
class LogdirAlreadyExistsError(Exception):
pass
class NotASubsetError(Exception):
pass
class InvalidNumberOfVariables(Exception):
pass