-
Notifications
You must be signed in to change notification settings - Fork 1
/
SampleGenerator.py
213 lines (185 loc) · 10.8 KB
/
SampleGenerator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
# Python module
import itertools
# Python extension modules (require extension installations)
import numpy as np
from scipy.stats import futil
from scipy.sparse.csgraph import _validation
from scipy.stats import uniform, norm, triang, lognorm, beta
## Sample generator
## * Generates sample multipliers from the specified distribution or bounds for the:
## * Latin Hypercube sampling method for the following distributions:
## 1. Uniform distribution
## 2. Normal (Gaussian) distribution
## 3. Triangular distribution
## 4. Lognormal distribution
## 5. Beta distribution
## * Random (uniform distribution) sampling method
## * Full factorial of low, mid and high values
## * Applies the multipliers to modify a mapped parameter set
class SampleGenerator :
# Method generates Latin Hypercube sampled multipliers for the selected distribution specified for each parameter (via dictionary)
def generateLatinHypercubeSampledMultipliers(self, specification_map, number_samples) :
# Construct sets of random sampled multipliers from the selected distribution for each parameter
multiplier_sets = {}
for key, specification in specification_map.items() :
# Generate stratified random probability values for distribution generation via inverse CDF
stratified_random_probabilities = ((np.array(range(number_samples)) + np.random.random(number_samples))/number_samples)
# Use stratified random probability values to generate stratified samples from selected distribution via inverse CDF
distribution = specification['distribution']
if distribution == 'uniform' :
lower = specification['settings']['lower']
base = specification['settings']['upper'] - lower
multiplier_sets[key] = uniform.ppf(stratified_random_probabilities, loc=lower, scale=base).tolist()
elif distribution == 'normal' :
mean = specification['settings']['mean']
std_dev = specification['settings']['std_dev']
multiplier_sets[key] = norm.ppf(stratified_random_probabilities, loc=mean, scale=std_dev).tolist()
elif distribution == 'triangular' :
a = specification['settings']['a']
base = specification['settings']['b'] - a
c_std = (specification['settings']['c'] - a)/base
multiplier_sets[key] = triang.ppf(stratified_random_probabilities, c_std, loc=a, scale=base).tolist()
elif distribution == 'lognormal' :
lower = specification['settings']['lower']
scale = specification['settings']['scale']
sigma = specification['settings']['sigma']
multiplier_sets[key] = lognorm.ppf(stratified_random_probabilities, sigma, loc=lower, scale=scale).tolist()
elif distribution == 'beta' :
lower = specification['settings']['lower']
base = specification['settings']['upper'] - lower
a = specification['settings']['alpha']
b = specification['settings']['beta']
multiplier_sets[key] = beta.ppf(stratified_random_probabilities, a, b, loc=lower, scale=base).tolist()
# Randomly select from sampled multiplier sets without replacement to form multipliers (dictionaries)
sampled_multipliers = []
for i in range(number_samples) :
sampled_multiplier = {}
for key, multiplier_set in multiplier_sets.items() :
random_index = np.random.randint(len(multiplier_set))
sampled_multiplier[key] = multiplier_set.pop(random_index)
sampled_multipliers.append(sampled_multiplier)
return sampled_multipliers
# Method generates Random sampled multipliers for specified bounds (dictionary)
def generateRandomSampledMultipliers(self, specification_map, number_samples) :
# Generate samples of random multipliers
sampled_multipliers = []
for i in range(number_samples) :
sampled_multiplier = {}
for key, specification in specification_map.items() :
lower_bound = 1 - specification['bound']
upper_bound = 1 + specification['bound']
sampled_multiplier[key] = np.random.uniform(lower_bound, upper_bound)
sampled_multipliers.append(sampled_multiplier)
return sampled_multipliers
# Method generates Full Factorial multipliers (from lower, mid, upper) for specified bounds (dictionary)
def generateFullFactorialMultipliers(self, specification_map) :
# Construct sets of lower, mid, and upper multipliers
lower_mid_upper_sets = []
key_set = [] # maintains key order
for key, specification in specification_map.items() :
lower_bound = 1 - specification['bound']
upper_bound = 1 + specification['bound']
lower_mid_upper_sets.append([lower_bound, 1, upper_bound])
key_set.append(key)
# Generate the cartesian product of the multiplier sets
cartesian_product = list(itertools.product(*lower_mid_upper_sets))
# Map the multiplier sets back to their parameter keys
factorial_multipliers = []
for multiplier_set in cartesian_product :
key_mapped_multiplier = {}
for index, key in enumerate(key_set) :
key_mapped_multiplier[key] = multiplier_set[index]
factorial_multipliers.append(key_mapped_multiplier)
return factorial_multipliers
# Method calculates the lower threshold value given a tail probability for a specified distribution
def lowerThreshold(self, distribution, specification, tail_probability) :
if distribution == 'normal' :
mean = specification['mean']
std_dev = specification['std_dev']
return norm.ppf(tail_probability, loc=mean, scale=std_dev)
elif distribution == 'lognormal' :
lower = specification['lower']
scale = specification['scale']
sigma = specification['sigma']
return lognorm.ppf(tail_probability, sigma, loc=lower, scale=scale)
# Method calculates the upper threshold value given a tail probability for a specified distribution
def upperThreshold(self, distribution, specification, tail_probability) :
return self.lowerThreshold(distribution, specification, 1-tail_probability)
# Method utilises a multiplier to modify parameter values
def multipy(self, parameter_values, multipliers, parameter_data_types={}) :
modified_parameter_values = {}
# Multiply each keyed parameter value by the corresponding multiplier where supplied
for key, multiplier in multipliers.items() :
if type(parameter_values[key]) is dict : # nested
modified_parameter_values[key] = {}
for nested_key, nested_value in parameter_values[key].items() :
modified_parameter_values[key][nested_key] = nested_value*multiplier
if parameter_data_types.has_key(key) :
if parameter_data_types[key] == 'integer' :
modified_parameter_values[key][nested_key] = modified_parameter_values[key][nested_key].round().astype(int)
else :
modified_parameter_values[key] = parameter_values[key]*multiplier
if parameter_data_types.has_key(key) :
if parameter_data_types[key] == 'integer' :
modified_parameter_values[key] = modified_parameter_values[key].round().astype(int)
return modified_parameter_values
# Method generates plot values for the selected distribution specified for each parameter (via dictionary)
def generateDistributionPlotValues(self, specification) :
sample_number = 1000
x_values = []
y_values = []
# Generate plot values from selected distribution via PDF
distribution = specification['distribution']
if distribution == 'uniform' :
lower = specification['settings']['lower']
upper = specification['settings']['upper']
base = upper - lower
incr = base/sample_number
for i in range(sample_number) :
x_values.append(lower+i*incr)
y_values = uniform.pdf(x_values, loc=lower, scale=base).tolist()
elif distribution == 'normal' :
mean = specification['settings']['mean']
std_dev = specification['settings']['std_dev']
x_min = mean - 3*std_dev
x_max = mean + 3*std_dev
incr = (x_max - x_min)/sample_number
for i in range(sample_number) :
x_values.append(x_min+i*incr)
y_values = norm.pdf(x_values, loc=mean, scale=std_dev).tolist()
elif distribution == 'triangular' :
a = specification['settings']['a']
base = specification['settings']['b'] - a
c_std = (specification['settings']['c'] - a)/base
incr = base/sample_number
for i in range(sample_number) :
x_values.append(a+i*incr)
y_values = triang.pdf(x_values, c_std, loc=a, scale=base).tolist()
elif distribution == 'lognormal' :
lower = specification['settings']['lower']
scale = specification['settings']['scale']
sigma = specification['settings']['sigma']
x_max = lognorm.isf(0.01, sigma, loc=lower, scale=scale)
incr = (x_max - lower)/sample_number
for i in range(sample_number) :
x_values.append(lower+i*incr)
y_values = lognorm.pdf(x_values, sigma, loc=lower, scale=scale).tolist()
elif distribution == 'beta' :
lower = specification['settings']['lower']
base = specification['settings']['upper'] - lower
incr = base/sample_number
for i in range(sample_number) :
x_values.append(lower+i*incr)
a = specification['settings']['alpha']
b = specification['settings']['beta']
y_values = beta.pdf(x_values, a, b, loc=lower, scale=base).tolist()
# Remove any nan/inf values
remove_indexes = []
for i in range(sample_number) :
if not np.isfinite(y_values[i]) :
remove_indexes.append(i)
for i in range(len(remove_indexes)) :
x_values = np.delete(x_values, i)
y_values = np.delete(y_values, i)
return { 'x_values' : x_values, 'y_values' : y_values }
# END SampleGenerator