forked from hongzimao/decima-sim
-
Notifications
You must be signed in to change notification settings - Fork 0
/
average_reward.py
executable file
·38 lines (33 loc) · 1.24 KB
/
average_reward.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
class AveragePerStepReward(object):
def __init__(self, size):
self.size = size
self.count = 0
self.reward_record = []
self.time_record = []
self.reward_sum = 0
self.time_sum = 0
def add(self, reward, time):
if self.count >= self.size:
stale_reward = self.reward_record.pop(0)
stale_time = self.time_record.pop(0)
self.reward_sum -= stale_reward
self.time_sum -= stale_time
else:
self.count += 1
self.reward_record.append(reward)
self.time_record.append(time)
self.reward_sum += reward
self.time_sum += time
def add_list(self, list_reward, list_time):
assert len(list_reward) == len(list_time)
for i in range(len(list_reward)):
self.add(list_reward[i], list_time[i])
def add_list_filter_zero(self, list_reward, list_time):
assert len(list_reward) == len(list_time)
for i in range(len(list_reward)):
if list_time[i] != 0:
self.add(list_reward[i], list_time[i])
else:
assert list_reward[i] == 0
def get_avg_per_step_reward(self):
return float(self.reward_sum) / float(self.time_sum)