-
Notifications
You must be signed in to change notification settings - Fork 0
/
stat_plots.py
executable file
·188 lines (152 loc) · 6.15 KB
/
stat_plots.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
#!/usr/bin/env python3
import json
import os
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import argparse
def load_data(folder: str = None) -> pd.DataFrame:
"""Load data from folder to Pandas.DataFrame.
Read all JSON files in given folder and create DataFrame from them.
Args:
dir (str, optional): Folder to read data from. Defaults to None.
Returns:
pd.DataFrame: DataFrame containing data read from JSON files.
"""
if not folder:
raise IOError("Folder not specified")
dfs = []
try:
for f in os.listdir(folder):
if f.endswith(".json"):
path = os.path.join(folder, f)
df = pd.read_json(path)
dfs.append(df)
except:
print("error occured while loading data.")
temp = pd.concat(dfs, ignore_index=True)
return temp
def plot_gen_fitness(df: pd.DataFrame, fig_location: str = None,
show_plot: bool = False, show_swarm: bool = False):
"""Generate boxplot from provided DataFrame displaying distribution of fitness value
for each generation.
Args:
df (pd.DataFrame): DataFrame containing data to be ploted.
fig_location (str, optional): Path where to store generated plot. Defaults to None.
show_plot (bool, optional): Show plot on display. Defaults to False.
show_swarm (bool, optional): Plot swarmplot over boxplot data. Defaults to False.
"""
# Create plot object and set data to plot
sns.set_theme(style="white", context="paper")
_, ax = plt.subplots(1, 1, figsize=(7, 5))
data = df
if data["gen"].max() > 50:
df1 = df[df["gen"] % 5 == 0]
data = df1
sns.boxplot(data=data, x="gen", y="fitness", ax=ax, color="white")
if show_swarm:
sns.swarmplot(data=df, x="gen", y="fitness", color=".25")
ax.set_xlabel("Generation")
ax.set_ylabel("Fitness")
if data["gen"].max() > 50:
for ind, label in enumerate(ax.get_xticklabels()):
if ind % 5 == 0: # every 10th label is kept
label.set_visible(True)
else:
label.set_visible(False)
# If figure location is specified, try to save plot
if fig_location is not None:
try:
plt.savefig(fig_location)
except FileNotFoundError:
print("Could not save figure to {}".format(fig_location))
if show_plot:
plt.show()
def plot_mean_fitness(df: pd.DataFrame, fig_location: str = None,
show_plot: bool = False):
"""Generate basic plot showing mean fitness value for given generations.
Args:
df (pd.DataFrame): DataFrame containing data to be ploted.
fig_location (str, optional): Path where to store generated plot. Defaults to None.
show_plot (bool, optional): Show plot on display. Defaults to False.
"""
sns.set_theme(style="white", context="paper")
_, ax = plt.subplots(1, 1, figsize=(7, 5))
df1 = df.groupby("gen").agg("mean").reset_index()
df1.rename(columns={"fitness": "mean"}, inplace=True)
df1.plot(ax=ax, x="gen", y="mean")
ax.set_xlabel("Generation")
ax.set_ylabel("Fitness")
# If figure location is specified, try to save plot
if fig_location is not None:
try:
plt.savefig(fig_location)
except FileNotFoundError:
print("Could not save figure to {}".format(fig_location))
if show_plot:
plt.show()
def get_best_code(df: pd.DataFrame) -> str:
"""Return string containing code of best individual.
Args:
df (pd.DataFrame): Dataframe of evolution run
Returns:
str: Return code of best individual.
"""
#filter all result records in dataframe
indi = df[(df["status"] == "result")]
#take record with lowest fitness value
indi2 = indi[indi["fitness"] == indi["fitness"].min()]
#make series out of inner json record
indi2 = indi2.phenotype.apply(pd.Series)
#return code
return indi2["code"].values[0].replace(';', ';\n')
def store_data(df: pd.DataFrame, path: str):
"""Store DataFrame to pickle compressed with gzip.
Args:
df (pd.DataFrame): DataFrame to be stored.
path (str): Path where to store created file.
"""
df.to_pickle(path="{}.pkl.gz".format(path), compression="gzip")
def load_pkl_data(path: str = None) -> pd.DataFrame:
"""Load DataFrame from pickle file compressed with gzip.
Args:
path (str, optional): Path to file. Defaults to None.
Raises:
IOError: If path is None, raise.
Returns:
pd.DataFrame: Return loaded DataFrame
"""
if not path:
raise IOError("No file given")
df = pd.read_pickle(filepath_or_buffer=path, compression="gzip")
return df
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--fig_location", "-f", type=str,
help="Path to filename")
parser.add_argument("--show_plot", "-d",
action="store_true", help="Display plot on screen")
parser.add_argument("--show_swarm", "-s", action="store_true",
help="Display swarmplot over boxplot")
parser.add_argument("--input", "-i", type=str,
help="Data input. Can be folder or "
"pickle file compressed with gzip")
parser.add_argument("--output_file", "-o", type=str,
help="Output file path and name")
parser.add_argument("--code", "-c", action='store_true',
help="Display code of best individual")
parser.add_argument("--mean","-m", action="store_true",
help="Show mean instead of boxplot")
args = parser.parse_args()
if not args.input.endswith(".pkl.gz"):
data = load_data(args.input)
else:
data = load_pkl_data(args.input)
if args.mean:
plot_mean_fitness(data, args.fig_location, args.show_plot)
else:
plot_gen_fitness(data, args.fig_location, args.show_plot, args.show_swarm)
if (args.output_file):
store_data(data, args.output_file)
if args.code:
print("Code:\n{}".format(get_best_code(data)))