-
Notifications
You must be signed in to change notification settings - Fork 0
/
diag_util.jl
67 lines (55 loc) · 2.57 KB
/
diag_util.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
function get_do_nothing_plan_data(other_plan_data, util_model)
do_nothing = ImplementEvalAction()
@pipe other_plan_data |>
dropmissing(_, :state) |>
@select(
_,
:actual_reward = [expectedutility.(Ref(util_model), states[Not(end)], Ref(do_nothing)) for states in :state],
:actual_ex_ante_reward = [expectedutility.(Ref(util_model), dgp.(states[Not(end)]), Ref(do_nothing)) for states in :state],
:plan_type = "no impl"
)
end
function get_best_plan_data(other_plan_data, util_model)
@pipe other_plan_data |>
dropmissing(_, :state) |>
@select(
_,
:actual_reward = map(get_program_reward, :state),
:actual_ex_ante_reward = map(s -> get_program_reward(s, eval_getter = dgp), :state),
:plan_type = "best"
)
end
function calculate_util_diff(planned_reward, baseline_reward; accum = false, maxstep = nothing)
if accum
diff = map((p, n) -> cumsum(p) - cumsum(n), planned_reward, baseline_reward)
else
diff = map((p, n) -> p - n, planned_reward, baseline_reward)
end
if maxstep ≢ nothing
diff = map(diff) do sim_diff
sim_diff[1:maxstep]
end
end
return diff
end
function calculate_util_diff_summ(util_diff)
util_diff_mean = [mean(a) for a in SplitApplyCombine.invert(util_diff)]
util_diff_quant = @pipe [quantile(skipmissing(a), [0.25, 0.5, 0.75]) for a in SplitApplyCombine.invert(util_diff)] |>
DataFrame(SplitApplyCombine.invert(_), [:lb, :med, :ub]) |>
insertcols!(_, :step => 1:nrow(_), :mean => util_diff_mean)
return util_diff_quant
end
function get_program_reward(sim_states; eval_getter = identity)
implement_only_asf = SelectProgramSubsetActionSetFactory(FundingPOMDPs.numprograms(first(sim_states)), 1)
fixed_action_reward = [expectedutility.(Ref(util_model), eval_getter.(sim_states[Not(end)]), Ref(a)) for a in FundingPOMDPs.actions(implement_only_asf)]
argmax(sum, fixed_action_reward)
end
function summarize_util_diff(sim_data, compare_to, best_reward = nothing; ex_ante, maxstep, accum)
@pipe sim_data |>
groupby(_, :plan_type) |>
getindex.(_, :, ex_ante ? :actual_ex_ante_reward : :actual_reward) |>
vcat(_, [best_reward]) |>
map(r -> filter(x -> length(x) >= maxstep, r), _) |>
(calculate_util_diff_summ ∘ calculate_util_diff).(_, Ref(compare_to); accum = accum, maxstep = maxstep) |>
vcat(_...; source = :algo => ["greedy", "planned", "random", "freq", "ex post best"])
end