-
Notifications
You must be signed in to change notification settings - Fork 2
/
main.m
79 lines (61 loc) · 2.7 KB
/
main.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
% Manuel Meraz
% EECS 270 Robot Algorithms
% Makov Decision Processes Controller for Simple Inverted Pendulum
% If previous policy has been generated, load it from the Policies.mat file
try
Policies = load('Policies.mat', 'Policies').Policies;
catch
'File does not exist'
end
% params is a struct containing all MDP params
% setPoint is the angle you want the inverted pendulum to stay balanced at
params.setPoint = 0;
% depth of recursion tree
params.depthLimit = 2; % Stop it at a low depth for faster computation
params.stateBounds = [params.setPoint-pi/4, params.setPoint+pi/4; -5, 5];
params.numStates = 15; % Low states faster to compute and works good enough
params.discount = 0.95; % High discount means future rewards matter more
% Time step size
params.dt = 0.1; % Seconds
% Number of dimensions state vector is in
params.dimensions = 2; % Don't modify this.
% Noise contains all noise params
noise.mu = zeros(params.dimensions, 1);
noise.covariance = [0.01, 0; 0, 0.001];
% Calculates the step size between each upper and lower bound
for d = 1:params.dimensions
% Step size is (outer bound - inner bound) / number of states
params.stepSize(d, 1) = (...
(params.stateBounds(d, 2) - params.stateBounds(d, 1))/...
params.numStates);
end
% Simulation Parameters
sim.interval = 100; % The width of the plot for live view
sim.maxIterations = 500; % Stops after maxIterations
sim.thetaNaught = params.setPoint; % Center it around the setPoint
sim.thetaDotNaught = 0; % Start sim standing still
% difference from setpoint where sim will cause policy to fail
% Larger values will give more wiggle room
sim.fail.upperBound = params.setPoint + pi/4;
sim.fail.lowerBound = params.setPoint - pi/4;
% Will add noise to sim if true, false will show no noise
sim.addNoise = true;
% Set of actions
% Higher resolution action set helps improve balancing
A = [-100, -35, -10, -5, -1, 0, 1, 5, 10, 35, 100];
% Set of states. [theta1 theta2 ... thetan;thetaDot1 thetaDot2 .... thetaDotn]
S = [linspace(params.stateBounds(1,1), params.stateBounds(1,2), params.numStates);...
linspace(params.stateBounds(2,1), params.stateBounds(2,2), params.numStates)];
try
Policy = Policies{params.numStates, params.depthLimit, params.setPoint + 1};
Policy(1,1); % Check to see if it's not empty
catch
'Policy does not exist. Generating one for the number of states'
% Policy is of length numStates and contains the optimal action a
% in the corresponding column of state s in the set S
Policy = MDP(params, noise, S, A);
Policies{params.numStates, params.depthLimit, params.setPoint + 1} = Policy;
save('Policies.mat', 'Policies');
end
% All plotting handled in here
startSimulation(sim, params, noise, Policy, S);