-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathadditional_code.py
More file actions
324 lines (276 loc) · 14.3 KB
/
additional_code.py
File metadata and controls
324 lines (276 loc) · 14.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
import pandas as pd
import matplotlib.pyplot as plt
def separate_micro_objectives_and_behaviors_by_mean(df: pd.DataFrame) -> pd.DataFrame:
"""Computes the mean of the __df__ dataframe and separates it into a DataFrame of the form:
| Micro-Objective | Micro-Behavior| Value |
| | | |
...
Returns the new DataFrame
"""
# Initialize an empty list to store the new data
new_data = []
# Compute the mean of the incoming df and normalize the values
local_df = normalize(df, 0, 100).mean() # Computing the mean of the normalizations
#local_df = normalize(df.mean(), 0, 100) # Normalizing the means
#Transform data into percentage
local_df = (100. * local_df / local_df.sum()).round(2)
# Iterate over rows in the original DataFrame
for col, value in local_df.items():
# Split the column name into category and subcategory
micro_objective, micro_behavior = col.split('.')
# Append a new row to the list with category, subcategory, and value
new_data.append({'Micro Objective': micro_objective, 'Micro Behavior': micro_behavior, 'value': value})
# Create a new DataFrame from the list
new_df = pd.DataFrame(new_data)
return new_df
def get_tonal_colors(values_to_color: list) -> list:
"""
Returns a list comprising the tonal color values corresponding to each value from __values_to_color__
"""
color_list = []
for value in values_to_color:
if value not in behavior_catalog_tonal_colormap:
color_list.append("#FFFFFF")
else:
color_list.append(behavior_catalog_tonal_colormap[value])
return color_list
def get_hatches(values_to_hatch: list) -> list:
"""
Returns a list comprising the hatch patterns corresponding to each value from __values_to_hatch__
"""
hatch_list = []
for value in values_to_hatch:
if value not in behavior_catalog_hatchmap:
hatch_list.append("")
else:
hatch_list.append(behavior_catalog_hatchmap[value])
return hatch_list
def generate_heatmap_per_micro_objective(df: pd.DataFrame, micro_objectives, title:str) -> None:
for micro_objective in micro_objectives:
micro_objective_df = [col for col in df if col.startswith(micro_objective)]
micro_objective_df = df[micro_objective_df]
generate_pdf_heatmap(micro_objective_df, title+" "+str(micro_objective), title+" "+str(micro_objective)+" heatmap.pdf")
def generate_pdf_heatmap(df: pd.DataFrame, fig_title: str, fig_name: str, fig_type: str = "", categories = None) -> None:
figure_width = 0.5 * len(df.index) # inches for each sample (width)
figure_height = 0.5 * len(df.columns) # inches for each micro(objective/behavior) (height)
plt.figure(figsize = (figure_width, figure_height))
match fig_type:
case "spawned_processes":
df = df['Spawned Processes']
df = np.asarray([df]) # Heatmap requires 2D dimensional array https://stackoverflow.com/questions/57888688/inconsistent-shape-between-the-condition-and-the-input-while-using-seaborn#comment102199104_57888688
figure = sns.heatmap(df, square=True, annot=True, annot_kws={'size': 9}, cbar_kws={'shrink': .5}, fmt='d', cbar=False, cmap="rocket", vmin=0, vmax=25)
plt.tick_params(axis='both', which='major', labelsize=6, labelbottom=False, labelleft=False, bottom=False, left=False, top=False)# https://stackoverflow.com/a/53304154
#plt.tick_params(left=False, bottom=False, labelbottom=False, labelleft=False)
plt.title(fig_title, fontsize=13)
figure = figure.get_figure()
figure.savefig(fig_name, bbox_inches="tight") #https://stackoverflow.com/a/49201252
plt.close()
return
case "micro-behavior":
horizontal_lines_indexes = list()
actual_index = 0
for category in categories:
actual_index += sum(category in s for s in df.columns)
horizontal_lines_indexes.append(actual_index)
horizontal_lines_indexes = horizontal_lines_indexes[:-1]
df = normalize(df, 0, 100)
case "micro-objective":
df = normalize(df, 0, 100)
case _: #_ is the default case
df = normalize(df, 0, 100)
#### ATTENTION!!! df is transposed before drawing it!
figure = sns.heatmap(df.T, square=True, annot=True, annot_kws={'size': 5}, cbar_kws={'shrink': .5}, fmt='.3f', cbar=True, cmap="magma")
if fig_type == "micro-behavior":
ax = figure.axes
#breakpoint()
ax.hlines(horizontal_lines_indexes, *ax.get_xlim(), color="Red")
plt.tick_params(axis='both', which='major', labelsize=6, labelbottom=True, bottom=True, left=True, top=False)# https://stackoverflow.com/a/53304154
plt.title(fig_title, fontsize=16)
plt.xlabel("Sample", fontsize=8)
plt.ylabel("Behavior", fontsize=8)
figure = figure.get_figure()
figure.savefig(fig_name, bbox_inches="tight") #https://stackoverflow.com/a/49201252
plt.close()
def generate_pdf_nestedpie(df: pd.DataFrame, fig_title: str, fig_name: str) -> None:
"""
Inspired by: https://stackoverflow.com/a/67221817
"""
# Delete the columns whose value is 0
df = df[df.value != 0]
outer_ring = df.groupby('Micro Objective').sum()
inner_ring = df.groupby(['Micro Objective', 'Micro Behavior']).sum()
# Create a nested pie chart
fig, ax = plt.subplots(figsize=(15,15))
size = 0.3
# Outer ring
outer_colors = get_basic_colors(outer_ring.index)
# More info about returne valued by ax.pie: https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.axes.Axes.pie.html
outer_patches, outer_texts, outer_autotexts = ax.pie(outer_ring.values.flatten(), radius=1,
#labels = outer_ring.index,
autopct='%.3f',
pctdistance=1.05,
textprops=dict(weight='bold'),
wedgeprops=dict(width=size, edgecolor='w'),
colors=outer_colors)
# Centering outer texts and outer numeric valuesfor patch, text in zip(inner_patches, inner_texts):
#mang =(patch.theta1 + patch.theta2)/2. # get mean_angle of the wedge
##print(mang, text.get_rotation())
#text.set_rotation(mang) # rotate the label by (mean_angle + 270)
#text.set_va("center")
#text.set_ha("center")
#for number in outer_autotexts:
#number.set_horizontalalignment('center')
#number.set_verticalalignment('top')
# Innter ring
inner_labels = inner_ring.index.get_level_values(1)
inner_colors = get_baisc_colors(inner_labels)
inner_patches, inner_texts, inner_autotexts = ax.pie(inner_ring.values.flatten(), radius=1-size,
labels = inner_labels,
autopct='%.3f',
pctdistance=0.75,
labeldistance=1.05,
rotatelabels=True,
textprops=dict(fontsize=8),
wedgeprops=dict(width=size, edgecolor='w'),
colors=inner_colors)
# Rotating labels: https://stackoverflow.com/a/50237578
#for patch, text in zip(inner_patches, inner_texts):
#mang =(patch.theta1 + patch.theta2)/2. # get mean_angle of the wedge
##print(mang, text.get_rotation())
#text.set_rotation(mang) # rotate the label by (mean_angle + 270)
#text.set_va("center")
#text.set_ha("center")
# Centering inner texts and inner numeric values
#for text in inner_texts:
#text.set_horizontalalignment('center')
#for number in inner_autotexts:
#number.set_horizontalalignment('center')
#number.set_verticalalignment('bottom')
ax.set(aspect="equal")
ax.legend(outer_ring.index, title="Micro Objectives")
plt.title(fig_title, fontsize=13)
# Add legend
#plt.legend( bbox_to_anchor=(0.1, 0.1), fontsize='small')
# Show the graph
plt.savefig(fig_name, format="pdf", bbox_inches="tight")
plt.close() # So data does not get mixed up
def generate_piechart_per_micro_objective(df: pd.DataFrame, micro_objectives, title:str) -> None:
for micro_objective in micro_objectives:
micro_objective_df = [col for col in df if col.startswith(micro_objective)]
micro_objective_df = df[micro_objective_df]
micro_objective_name_no_id = micro_objective[micro_objective.index(']')+1:].strip()
generate_pdf_piechart(micro_objective_df, title+"\n"+str(micro_objective_name_no_id)+" Micro-objective", title+" "+str(micro_objective_name_no_id)+" pie.pdf", micro_objective)
#def generate_pdf_piechart(values: list, colors: list, fig_title: str, fig_name: str, legend_title: str) -> None:
def generate_pdf_piechart(df: pd.DataFrame, fig_title: str, fig_name: str, micro_objective: str = None) -> None:
# If the sum of every value in the DataFrame is 0, it means there is no
# micro behavior from this micro objective present during the execution
# In such case, exit the function because there is nothing to draw
if df.sum().sum() == 0.0:
return
# Normalize data
normalized_df = normalize(df, 0, 100)
# Get the mean
mean_df = normalized_df.mean()
# Delete the columns whose value is 0
mean_df = mean_df[mean_df.values != 0]
#Transform data into percentage
mean_df = (100. * mean_df / mean_df.sum()).round(2)
# Obtain the biggest value in the list so it can be exploded in the pie chart
# Given that mean_df.values is a np.array, instead of using max() and index() functions (for python lists)
# np.argmax()can be used
biggest_value_index = np.argmax(mean_df.values)
hatch = None
#breakpoint()
if micro_objective is None:
#color = get_tonal_colors(mean_df.index)
color = get_basic_colors(mean_df.index)
explode = None
legend_title = "Micro Objectives"
#hatch = get_hatches(mean_df.index)
else:
# Rename indexes by deleting their category
for index_name in mean_df.index:
mean_df.rename(index={f"{index_name}":index_name.removeprefix(micro_objective+".")}, inplace=True) # Python +3.9 https://stackoverflow.com/a/1038845
#color = get_tonal_colors(mean_df.index)
color = get_basic_colors(mean_df.index)
#color = [behavior_catalog_colormap[micro_objective]] * len(df.index) # Repeat same color for each slice
explode = [0.0] * len(mean_df.index)
#breakpoint()
explode[biggest_value_index] = 0.1
#hatch = None
legend_title = "Micro Behaviors"
#hatch = get_hatches(mean_df.index)
# Delete the ID
labels = [index[index.index(']')+1:].strip() for index in mean_df.index]
patches, texts, autotexts = plt.pie(mean_df.values, radius=1,
#labels = labels,
autopct='%1.2f%%',
explode=explode,
labeldistance=1.05,
#pctdistance=1.05,
#textprops=dict(weight='bold'),
textprops=dict(size=7),
#wedgeprops=dict(width=size, edgecolor='w'),
#height=1,
#bottom=3,
wedgeprops=dict(edgecolor='w'),
colors=color,
hatch=hatch,
)
# Apply a different edgecolor to the slice corresponding to the biggest value (the exploded one)
# Only for micro-behaviors
if micro_objective is not None:
#patches[biggest_value_index].set_edgecolor('black')
#patches[biggest_value_index].set_hatchcolor('white')
patches[biggest_value_index].set_path_effects([PathEffects.Stroke(linewidth=1, foreground='black')])
# Now that hatches are active, set the value color to white to improve readability
# Only for micro-behaviors
#if micro_objective is not None:
#if micro_objective is None:
#for autotext in autotexts:
#autotext.set_color('white')
#autotext.set_antialiased(True)
#autotext.set_weight('bold')
#autotext.zorder = 2
##autotext.set_fontsize(8)
##https://matplotlib.org/stable/api/patheffects_api.html#matplotlib.patheffects.Stroke
#autotext.set_path_effects([PathEffects.withStroke(linewidth=2, foreground='black')])
# Set
#breakpoint()
# Add legend
plt.legend(labels, title=legend_title, loc="center", fontsize='small', ncols=2, bbox_to_anchor = (.5, -.1))
if micro_objective:
plt.title(micro_objective[micro_objective.index(']')+1:].strip(), fontsize=13)
else :
plt.title(fig_title, fontsize=13)
# Show the graph
plt.savefig(fig_name, format="pdf", bbox_inches="tight")
plt.close() # So data does not get mixed up
def generate_pdf_stackedbars(df: pd.DataFrame, fig_title: str, fig_name: str) -> None:
# Pivot the DataFrame
df_pivot = df.pivot(index='Micro Objective', columns='Micro Behavior', values='value')
#breakpoint()
colors = get_basic_colors(df_pivot.columns)
# Remove IDs both from index (micro-behaviors) and columns (micro-objective)
for index_name in df_pivot.index:
df_pivot.rename(index={f"{index_name}":index_name[index_name.index(']')+1:].strip()}, inplace=True) # Python +3.9 https://stackoverflow.com/a/1038845
for column_name in df_pivot.columns:
df_pivot.rename(columns={f"{column_name}":column_name[column_name.index(']')+1:].strip()}, inplace=True)
# Plot the stacked bar chart
ax = df_pivot.plot(kind='bar', stacked=True, color=colors, yticks=[0, 25, 50, 75, 100])
# Customize the plot
ax.set_ylabel('Matches')
ax.set_xlabel('Micro Objective')
ax.set_title('Stacked Bar Plot by Subcategory')
plt.title(fig_title, fontsize=13)
# Show the graph
#plt.legend(bbox_to_anchor=(1.05, 1.0), loc='upper left', fontsize='small', title='Micro Behavior')
plt.legend().remove()
plt.savefig(fig_name, format="pdf", bbox_inches="tight")
plt.close() # So data does not get mixed up
def generate_radarchart_per_micro_objective(df: pd.DataFrame, micro_objectives, title:str) -> None:
for micro_objective in micro_objectives:
micro_objective_df = generate_dataframe_specific_micro_objective(df, micro_objective)
micro_objective_name_no_id = micro_objective[micro_objective.index(']')+1:].strip()
#generate_pdf_radarchart(micro_objective_df, title+" "+str(micro_objective), title+" "+str(micro_objective)+" radar.pdf")
generate_pdf_radarchart(micro_objective_df, title+"\n"+str(micro_objective_name_no_id)+" Micro-objective", title+" "+str(micro_objective_name_no_id)+" radar.pdf", micro_objective)