forked from Cernewein/heating-RL-agent
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
122 lines (109 loc) · 5.09 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from DQN import DAgent
import sys
from environment import Building
from matplotlib import style
style.use('ggplot')
from vars import *
from itertools import count
import pickle as pkl
import os
import argparse
import sys
import torch
import pandas as pd
from train_dqn import train_dqn
from train_ddpg import train_ddpg
import numpy as np
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--ckpt", default=None)
parser.add_argument("--model_name", default='')
parser.add_argument("--dynamic", default=False, type=lambda x: (str(x).lower() == 'true'))
parser.add_argument("--soft", default=False,type=lambda x: (str(x).lower() == 'true'))
parser.add_argument("--eval", default=False, type=lambda x: (str(x).lower() == 'true'))
parser.add_argument("--model_type", default='DDPG')
parser.add_argument("--noisy", default=False, type=lambda x: (str(x).lower() == 'true'))
return parser.parse_args()
def run(ckpt,model_name,dynamic,soft, eval, model_type, noisy):
if not eval:
if model_type == 'DQN':
train_dqn(ckpt, model_name, dynamic, soft)
else:
train_ddpg(ckpt, model_name, dynamic, noisy)
else:
if ckpt:
brain = torch.load(ckpt,map_location=torch.device('cpu'))
brain.epsilon = 0
brain.eps_end = 0
brain.add_noise = False
env = Building(dynamic=True, eval=True)
inside_temperatures = [env.inside_temperature]
ambient_temperatures = [env.ambient_temperature]
prices = [env.price]
actions = [0]
rewards=[0]
print('Starting evaluation of the model')
state = env.reset()
state = torch.tensor(state, dtype=torch.float).to(device)
# Normalizing data using an online algo
brain.normalizer.observe(state)
state = brain.normalizer.normalize(state).unsqueeze(0)
for t_episode in range(NUM_TIME_STEPS):
action = brain.select_action(state).type(torch.FloatTensor)
prices.append(env.price) # Will be replaced with environment price in price branch
actions.append(action.item())
next_state, reward, done = env.step(action.item())
rewards.append(reward)
inside_temperatures.append(env.inside_temperature)
ambient_temperatures.append(env.ambient_temperature)
if not done:
next_state = torch.tensor(next_state, dtype=torch.float, device=device)
# normalize data using an online algo
brain.normalizer.observe(next_state)
next_state = brain.normalizer.normalize(next_state).unsqueeze(0)
else:
next_state = None
# Move to the next state
state = next_state
eval_data = pd.DataFrame()
eval_data['Inside Temperatures'] = inside_temperatures
eval_data['Ambient Temperatures'] = ambient_temperatures
eval_data['Prices'] = prices
eval_data['Actions'] = actions
eval_data['Rewards'] = rewards
with open(os.getcwd() + '/data/output/' + model_name + '_eval.pkl', 'wb') as f:
pkl.dump(eval_data, f)
print('Finished the evaluation on January \n' +
'Starting policy evaluation')
# We will run through a number of combinations for inside temperature,
# Outside temperature and price. Time and sun will be fixed for this evaluation
# Values will onlu be saved if decision output by agent is equal to 1
inside_temperatures = []
ambient_temperatures = []
prices = []
actions = []
times = []
suns = []
for inside_temp in np.arange(19, 23, 1/10):
for ambient_temp in np.arange(-5, 5, 1/10):
for price in range(0, 60):
state = [inside_temp, ambient_temp,0, price]
state = torch.tensor(state, dtype=torch.float).to(device)
state = brain.normalizer.normalize(state).unsqueeze(0)
action = brain.select_action(state).type(torch.FloatTensor).item()
inside_temperatures.append(inside_temp)
ambient_temperatures.append(ambient_temp)
prices.append(price)
actions.append(action)
eval_data = pd.DataFrame()
eval_data['Inside Temperatures'] = inside_temperatures
eval_data['Ambient Temperatures'] = ambient_temperatures
eval_data['Prices'] = prices
eval_data['Actions'] = actions
with open(os.getcwd() + '/data/output/' + model_name + 'policy_eval.pkl', 'wb') as f:
pkl.dump(eval_data, f)
else:
print('If no training should be performed, then please choose a model that should be evaluated')
if __name__ == '__main__':
args = parse_args()
run(**vars(args))