-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patha_02_createRawTrainData.py
More file actions
661 lines (486 loc) · 22.2 KB
/
a_02_createRawTrainData.py
File metadata and controls
661 lines (486 loc) · 22.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
This file creates a summary table with the relevant parameters of each test
the result is a csv file with the relevant processed data.
Created on Wed May 22 13:26:28 2024
@author: miguelgomez
"""
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
def do_pairplot(dataframe, subset, hue, output_dir, filename):
# Define tex type
plt.rc('text', usetex=True)
plt.rc('font', family='serif')
sns.pairplot(dataframe[subset], hue=hue, height=1.5, plot_kws={'alpha':0.5})
# Use latex for the labels
plt.savefig(os.path.join(output_dir, f'{filename}.pdf'), bbox_inches='tight')
plt.close()
pass
def load_json(json_dir):
# Open JSON file and store as dictionary
with open(json_dir, 'r') as file:
data = json.load(file)
return data
def get_rect_props(rawdata, testID):
'''
Extract the properties of a rectangular column test and store them in a list
'''
props = []
props.append(testID)
props.append(rawdata['Name'])
props.append(rawdata['Type'])
props.append(rawdata['TestConfiguration'])
props.append(rawdata['P_Delta'])
props.append(rawdata['FailureType'])
# Axial load
props.append(rawdata['AxLoad'])
# Overall Shape Data
props.append(rawdata['Width'])
props.append(rawdata['Depth'])
props.append(rawdata['L_Inflection'])
# Material Strength Properties
props.append(rawdata['fpc'])
props.append(rawdata['fyl'])
props.append(rawdata['fsul'])
props.append(rawdata['fyt'])
props.append(rawdata['fsut'])
# Reinforcement details
props.append(rawdata['dlb'])
props.append(rawdata['dlb_c'])
props.append(rawdata['nlb'])
props.append(rawdata['cc_per']) # Clear cover in the direction perpendicular to the load
props.append(rawdata['nib_per']) # Number of intermediate bars in the direction perpendicular to the load
props.append(rawdata['cc_par']) # Clear cover in the direction parallel to the load
props.append(rawdata['nib_par']) # Number of intermediate bars in the direction parallel to the load
props.append(rawdata['nsl']) # Number of shear legs
props.append(rawdata['dtb_rcs']) # Diameter of the transverse bars in the region of close spacing of stirrups.
props.append(rawdata['s_rcs']) # Spacing of the transverse bars in the region of close spacing of stirrups.
return props
def get_spiral_props(rawdata, testID):
'''
Extract the properties of a spiral column test and store them in a list.
'''
props = []
# String Data
props.append(testID)
props.append(rawdata['Name'])
props.append(rawdata['Type'])
props.append(rawdata['TestConfiguration'])
props.append(rawdata['P_Delta'])
props.append(rawdata['FailureType'])
# Axial load
props.append(rawdata['AxLoad'])
# Overall Shape Data
props.append(rawdata['Diameter'])
props.append(rawdata['L_Inflection'])
# Material Strength Properties
props.append(rawdata['fpc'])
props.append(rawdata['fyl'])
props.append(rawdata['fsul'])
props.append(rawdata['fyt'])
props.append(rawdata['fsut'])
# Reinforcement details
props.append(rawdata['dlb'])
props.append(rawdata['nlb'])
props.append(rawdata['cc'])
props.append(rawdata['dsp'])
props.append(rawdata['s'])
return props
def get_nd_params(test_data):
'''
This function computes the nondimesional physical parameters
for a reinforced concrete column.
It returns two dataframes:
- The first one contains the nondimensional parameters
- The second one contains the original test data plus the nondimensional parameters as additional columns
For spiral columns, the properties are:
'id', 'name', 'type', 'testcf', 'pd', 'ft', 'axl', 'diam', 'l',
'fpc', 'fyl', 'fsul', 'fyt', 'fsut', 'dlb', 'nlb',
'cc', 'dsp', 's'
For rectangular columns, the properties are:
'id', 'name', 'type', 'testcf', 'pd', 'ft', 'axl', 'w', 'd', 'l',
'fpc', 'fyl', 'fsul', 'fyt', 'fsut', 'dlb', 'dlb_c', 'nlb',
'cc_per', 'nib_per', 'cc_par', 'nib_par', 'nsl', 'dtb_rcs', 's_rcs'
'''
# Check if fpc is zero
if test_data['fpc'] == 0:
# If fpc is zero, then use 30 MPa
test_data['fpc'] = 30
if test_data['type'] == 'Spiral':
# ---------------------- Spiral Column -----------------------------
# (1) Aspect Ratio
ar = 1 / (test_data['diam'] / test_data['l'])
# (2) Longitudinal Reinforcement Ratio
ag = np.pi * (test_data['diam']) ** 2 / 4 # gross area (mm2)
alr = test_data['nlb'] * np.pi * (test_data['dlb']) ** 2 / 4 # long. reinf. area (mm2)
rhol = alr / ag
lrr = rhol * (test_data['fyl'] / test_data['fpc'])
# (3) Transverse Reinforcement Ratio
'''d_core = test_data['diam'] - 2 * test_data['cc'] # diameter of the core (mm)
asp = np.pi * test_data['dsp'] ** 2 / 4 # area of spiral reinf (mm2)
rhosp = 4 * asp / (np.pi * d_core ** 2)'''
asv = np.pi * test_data['dsp'] ** 2 / 2 # area of two legs of spiral reinf (mm2)
rhot = asv / (test_data['diam'] * test_data['s']) # transverse reinforcement ratio in the region of close spacing of stirrups
# if s is zero then rhot is 0
if test_data['s'] == 0:
rhot = 0
if test_data['fyt'] == 0:
# fyt = 0, then use fyl
trr = rhot * (test_data['fyl'] / test_data['fpc'])
else:
trr = rhot * (test_data['fyt'] / test_data['fpc'])
# (4) Axial Load Ratio
ax_cap = test_data['fpc'] * ag / 1000 # kN
alr = test_data['axl'] / ax_cap
# (5) Transverse spacing ratio
if test_data['s'] == 0:
tsr = 2.0
else:
tsr = test_data['s'] / (6 * test_data['dsp'])
# (6) Estimate of the shear strength and the moment strength
Vs = get_shear_strength(test_data)
Vp = get_moment_strength(test_data)
vpvs = Vp/Vs
else:
# If the column is rectangular
# (1) Aspect ratio (as defined by the concrete people)
ar = 1 / (test_data['d'] / test_data['l'])
# (2) Longitudinal reinforcement ratio
ag = test_data['w'] * test_data['d']
# The area of the longitudinal reinforcement is the area of the bars times the number of bars
alr = test_data['nlb'] * np.pi * (test_data['dlb']) ** 2 / 4
rhol = alr / ag
lrr = rhol * (test_data['fyl'] / test_data['fpc'])
# (3) Transverse reinforcement ratio
# Area of the transverse reinforcement is the number of shear legs times
# the area of the transverse bars in the region of close spacing
if test_data['s_rcs'] == 0:
rhot = 0
asv = test_data['nsl'] * np.pi * (test_data['dtb_rcs']) ** 2 / 4
rhot = asv / (test_data['w'] * test_data['s_rcs'])
if test_data['fyt'] == 0:
# fyt = 0, then use fyl
trr = rhot * (test_data['fyl'] / test_data['fpc']) # CHECK THIS!!!
else:
trr = rhot * (test_data['fyt'] / test_data['fpc'])
# (4) Axial load ratio
ax_cap = test_data['fpc'] * ag / 1000 # kN
alr = test_data['axl'] / ax_cap
# (5) Transverse spacing ratio
if test_data['s_rcs'] == 0:
tsr = 2.0
else:
tsr = test_data['s_rcs'] / (6 * test_data['dtb_rcs'])
# (6) Estimate of the shear strength and the moment strength
Vs = get_shear_strength(test_data)
Vp = get_moment_strength(test_data)
vpvs = Vp / Vs
return [ar, lrr, trr, alr, tsr, vpvs]
def get_shear_strength(test_data):
'''
Calculation of the shear strength based on the equations of Sezen and Moehle (2004)
'''
# Get material properties
fpc = test_data['fpc'] # (MPa)
fyl = test_data['fyl'] # (MPa)
fyt = test_data['fyt'] # (MPa)
# Get geometry parameters
if test_data['type'] == 'Spiral':
d = test_data['diam'] # (mm)
else:
w = test_data['w']
d = test_data['d']
length = test_data['l'] # (mm)
if test_data['type'] == 'Spiral':
dtb = test_data['dsp'] # (mm)
s = test_data['s'] # (mm)
else:
# Get properties in the region of close spacing of stirrups
dtb = test_data['dtb_rcs']
s = test_data['s_rcs']
# Get axial load
p = test_data['axl'] * 1000 # (N)
# k factor in Sezen and Moehle (2004) for ductility-displacement dependence of the shear strength
k = 0.9
# Compute a/d ratio
a_dratio = length / d
if a_dratio > 4.0:
a_dratio = 4.0
elif a_dratio < 2.0:
a_dratio = 2.0
if test_data['type'] == 'Spiral':
# Gross area of the cross section
ag = np.pi * (d) ** 2 / 4 # (mm2)
# Mean shear stress at the onset of shear crack
vc = 0.5 * np.sqrt(fpc) / (a_dratio) * np.sqrt(1 + p / (0.5 * np.sqrt(fpc) * ag))
nsl = 2
else:
# Gross area of the cross section
ag = w * d # (mm2)
# Mean shear stress at the onset of shear crack
vc = 0.5 * np.sqrt(fpc) / (a_dratio) * np.sqrt(1 + p / (0.5 * np.sqrt(fpc) * ag))
nsl = test_data['nsl']
# Concrete contribution to shear strenght
Vc = vc * 0.8 * ag / 1000
# Transverse reinforcement contribution to the shear strength
#print('concrete contribution', Vc)
av = nsl * np.pi * (dtb) ** 2 / 4
if s == 0:
Vs = 0
else:
if fyt == 0:
Vs = k * (av * fyl * d / s) / 1000
else:
Vs = k * (av * fyt * d / s) / 1000
#print('reinforcement constribution', Vs)
Vt = Vc + Vs
return Vt
def get_moment_strength(test_data, props='expected'):
'''
Computation of the probable moment strength
Ref: Restrepo and Rodriguez (2013)
Props can be either nominal or expected. Use nominal for design values, use expected when using for
experimental data, where the values of concrete and steel strength were obtaied from measurements.
'''
if props == 'expected':
# If using expected properties
lam_h = 1.15
lam_co = 1.0
else:
# If using nominal properties
lam_h = 1.25
lam_co = 1.7
if test_data['type'] == 'Spiral':
# Get material properties
fpc = test_data['fpc'] # (MPa)
fyl = test_data['fyl'] # (MPa)
# Get geometry parameters
diam = test_data['diam'] # (mm)
length = test_data['l'] # (mm)
dlb = test_data['dlb'] # (mm)
nlb = test_data['nlb'] # (mm)
# Get axial load
p = test_data['axl'] * 1000 # (N)
# Compute extra parameters
ag = np.pi * (diam) ** 2 / 4 # (mm2)
al = np.pi * nlb * (dlb) ** 2 / 4 # (mm2)
# Computations
xc = diam * (0.32 * p / (lam_co * ag * fpc) + 0.1) # distance to neutral axis (mm)
rhol = al / ag # longitudinal reinf. ratio (-)
a1 = rhol * fyl / fpc * (0.23 + 1/3 * (1/2 - xc/diam)) # nd param
a2 = p / (ag * fpc) * (1/2 - xc/diam)
mcd = np.pi / 4 * (lam_h * a1 + a2)
Mcd = mcd * fpc * diam ** 3
Vpr = (Mcd / length) / 1000
else:
# If the column is rectangular
# Get material properties
fpc = test_data['fpc'] # (MPa)
fyl = test_data['fyl'] # (MPa)
# Get geometry parameters
d = test_data['d'] # (mm)
w = test_data['w'] # (mm)
length = test_data['l'] # (mm)
dlb = test_data['dlb'] # (mm)
nlb = test_data['nlb'] # (mm)
# Get axial load
p = test_data['axl'] * 1000 # (N)
# Compute extra parameters
ag = w * d # (mm2)
al = np.pi * nlb * (dlb ** 2 / 4) # (mm2)
# Computations
xc = d * (0.34 * p / (lam_co * ag * fpc) + 0.07) # distance to neutral axis (mm)
rhol = al / ag # longitudinal reinf. ratio (-)
a1 = rhol * fyl / fpc * (0.30 + 1/4 * (1/2 - xc/d)) # nd param
a2 = p / (ag * fpc) * (1/2 - xc/d)
mcd = lam_h * a1 + a2
Mcd = mcd * fpc * w * d ** 2
Vpr = (Mcd / length) / 1000
return Vpr
if __name__ == '__main__':
# :::
# General Settings
# :::
do_pairplots = True
# Define output location
cwd = os.getcwd()
output_dir = os.path.join(cwd, 'gp_training_data', 'raw')
input_dir = os.path.join(cwd, 'test_data')
# Check if the output files already exist. If they do exist, don't do anything
output_file_list = ['DataSpiral.csv', 'DataSpiralWnd.csv', 'DataRect.csv', 'DataRectWND.csv', 'DataAll_NDonly.csv']
if all(os.path.exists(os.path.join(output_dir, f)) for f in output_file_list):
print('Output files already exist. Exiting...')
# Here, add the post-processing plots (maybe)
#exit()
# :::
# Create DataFrames with raw training data
# :::
# Properties of spiral columns
spiral_cols = [
'UniqueId', 'name', 'type', 'testcf', 'pd', 'ft', 'axl', 'diam', 'l',
'fpc', 'fyl', 'fsul', 'fyt', 'fsut', 'dlb', 'nlb',
'cc', 'dsp', 's'
]
# Properties of rectangular columns
rect_cols = [
'UniqueId', 'name', 'type', 'testcf', 'pd', 'ft', 'axl', 'w', 'd', 'l',
'fpc', 'fyl', 'fsul', 'fyt', 'fsut', 'dlb', 'dlb_c', 'nlb',
'cc_per', 'nib_per', 'cc_par', 'nib_par', 'nsl', 'dtb_rcs', 's_rcs'
]
# Create dataframes for rectangular and spiral columns
data_rect = pd.DataFrame(columns=rect_cols)
data_spiral = pd.DataFrame(columns=spiral_cols)
# For all tests...
sp_ii = 0
# Create dataframe for nondimensional parameters
columns = ['UniqueId', 'ar', 'lrr', 'srr', 'alr', 'sdr', 'smr']
ndparams_spiral = pd.DataFrame(columns=columns)
ndparams_rect = pd.DataFrame(columns=columns)
ndparams_all = pd.DataFrame(columns=columns)
# Read the filenames.txt file in the input_dir
with open(os.path.join(input_dir, 'filenames.txt'), 'r') as f:
test_files = f.read().splitlines()
# Check that all files in test_files exist in the input_dir
for f in test_files:
if not os.path.exists(os.path.join(input_dir, f)):
print('File not found:', f)
print('Go back and run a_01_get_data_peer.py')
exit()
# testID is the UniqueId
for testID in range(1, 417):
# (1) Load json file to dictionary
current_dir = os.getcwd()
json_dir = current_dir + '/test_data/test_' + str(testID).zfill(3) +'.json'
# json_dir = r'/Users/miguelgomez/Documents/GitHub/RC_Column_Model/test_data/test_' + str(testID).zfill(3) +'.json'
rawdata = load_json(json_dir)
# (2) Get test type
test_type = rawdata['Type']
# (3) Turn dict into list with properties
try:
if test_type == 'Spiral':
# Get the properties of the spiral column in a list
props = get_spiral_props(rawdata, testID)
# Append the properties to the dataframe
data_spiral.loc[len(data_spiral)] = props
# Get nondimensional parameters for the spiral column
ndparams_ii = get_nd_params(data_spiral.loc[len(data_spiral)-1])
# Add the UniqueId to the ndparams_ii list
ndparams_ii = [testID] + ndparams_ii
# Append at the end of the dataframe
ndparams_spiral.loc[len(ndparams_spiral)] = ndparams_ii
else:
# Get the properties of the rectangular column in a list
props = get_rect_props(rawdata, testID)
# Get the hysteresis data
data_rect.loc[len(data_rect)] = props
# Get nondimensional parameters for the rectangular column
ndparams_ii = get_nd_params(data_rect.loc[len(data_rect)-1])
# Add the UniqueId to the ndparams_ii list
ndparams_ii = [testID] + ndparams_ii
# Append at the end of the dataframe
ndparams_rect.loc[len(ndparams_rect)] = ndparams_ii
except Exception as que_paso:
print('Error in test', testID, 'Y', que_paso)
continue
# Check if the calibrations_ok.csv file exists in the input folder
if not os.path.exists(os.path.join(input_dir, 'calibrations_ok.csv')):
print('File not found: calibrations_ok.csv. Check calibrations and generate such file.')
exit()
# Read the calibrations_ok.csv file (use or don't use)
ok_cals = pd.read_csv(os.path.join(input_dir, 'calibrations_ok.csv'))
# Extract rows where ok_cals in type are Rectangular/Spiral
use_rect_data = ok_cals[ok_cals['type'] == 'Rectangular']
use_spiral_data = ok_cals[ok_cals['type'] == 'Spiral']
# Merge use_rect_data and data_rect by UniqueId column / use_spiral_data and data_spiral
# Change the name of use_rect_data column "type" to "type_use"
use_rect_data = use_rect_data.rename(columns={'type': 'type_use'})
use_spiral_data = use_spiral_data.rename(columns={'type': 'type_use'})
# Now, merge.
data_rect = data_rect.merge(use_rect_data, on='UniqueId', how='left')
data_spiral = data_spiral.merge(use_spiral_data, on='UniqueId', how='left')
# Check that, for all entries in the merged dataframes, the type and type_use columns match
assert (data_rect['type'] == data_rect['type_use']).all(), "Type mismatch in rectangular data"
assert (data_spiral['type'] == data_spiral['type_use']).all(), "Type mismatch in spiral data"
print("Type checks passed: all type and type_use columns match.")
# Merge the nondimensional parameters with the main dataframes on the UniqueId column
# This can't be done on a bigger full dataframe for both column types, because the parameters in data_rect and data_spiral are not guaranteed to match
data_rect_wnd = pd.merge(data_rect, ndparams_rect, on='UniqueId', how='left')
data_spiral_wnd = pd.merge(data_spiral, ndparams_spiral, on='UniqueId', how='left')
# Drop rows where the use column is 0
data_spiral_wnd = data_spiral_wnd[data_spiral_wnd['use'] == 1]
data_rect_wnd = data_rect_wnd[data_rect_wnd['use'] == 1]
# Generate pairplots (if requested)
if do_pairplots:
# Do pairplot for spiral columns
do_pairplot(data_spiral_wnd, ['ar', 'lrr', 'srr', 'alr', 'sdr', 'smr', 'ft'], 'ft', output_dir, 'pairplot_spiral')
# Do pairplot for rectangular columns
do_pairplot(data_rect_wnd, ['ar', 'lrr', 'srr', 'alr', 'sdr', 'smr', 'ft'], 'ft', output_dir, 'pairplot_rectangular')
# Restart index before saving
data_rect = data_rect.reset_index(drop=True)
data_spiral = data_spiral.reset_index(drop=True)
data_spiral_wnd = data_spiral_wnd.reset_index(drop=True)
data_rect_wnd = data_rect_wnd.reset_index(drop=True)
# :::
# Save Files
# :::
# Store dataframe into a csv file inside the output folder
data_rect.to_csv(os.path.join(output_dir, 'DataRect.csv'))
data_spiral.to_csv(os.path.join(output_dir, 'DataSpiral.csv'))
# Store dataframe with the newly added columns
data_spiral_wnd.to_csv(os.path.join(output_dir, 'DataSpiralWnd.csv'))
data_rect_wnd.to_csv(os.path.join(output_dir, 'DataRectWnd.csv'))
# Merge the two dataframes
data_wnd = pd.concat([data_spiral_wnd, data_rect_wnd])
# New dataframe with columns: ['UniqueId', 'Name', 'Type', 'FailureType', 'ar', 'lrr', 'srr', 'alr', 'sdr', 'smr']
merged_data = data_wnd[['UniqueId', 'name', 'type', 'ft', 'ar', 'lrr', 'srr', 'alr', 'sdr', 'smr']].copy()
merged_data.columns = ['UniqueId', 'Name', 'Type', 'FailureType', 'ar', 'lrr', 'srr', 'alr', 'sdr', 'smr']
# Change type of UniqueId to integer
merged_data['UniqueId'] = merged_data['UniqueId'].astype(int)
# The following lines are for correspondence between new dataset and old DesignSafe Id
# Sort merged_data by UniqueId
merged_data = merged_data.sort_values(by='UniqueId')
# Restart index in merged_data
merged_data = merged_data.reset_index(drop=True)
'''
# Load merged_data.csv from old folder
old_merged_data = pd.read_csv('old/merged_data.csv')
# Store index as new column in old_merged_data
old_merged_data['id'] = old_merged_data.index
# Sort old_merged_data by UniqueId
old_merged_data = old_merged_data.sort_values(by='UniqueId')
# Restart index in old_merged_data
old_merged_data = old_merged_data.reset_index(drop=True)
# Add the id column to merged_data
merged_data['id'] = old_merged_data['id']
# Sort merged_data by id
merged_data = merged_data.sort_values(by='id')
# Drop the id column and reset index
merged_data = merged_data.drop(columns='id')
merged_data = merged_data.reset_index(drop=True)
'''
# Store merged_data into a csv file
merged_data.to_csv(os.path.join(output_dir, 'DataAll_NDonly.csv'))
# Do pairplot for all columns
# Add additional input parameters to the pairplot
if do_pairplots:
additional_params = ['ar', 'lrr', 'srr', 'alr', 'sdr', 'smr', 'FailureType', 'Name', 'Type']
do_pairplot(merged_data, additional_params, 'FailureType', output_dir, 'pairplot_all')
# Count how many of each FailureType
failure_modes = ['Flexure-Shear', 'Flexure', 'Shear']
for mode in failure_modes:
count = merged_data[merged_data['FailureType'] == mode].shape[0]
print(f"Count of {mode}: {count}")
# Print total
total_count = merged_data.shape[0]
print(f"Total count: {total_count}")
# Save the information in a text file
with open(os.path.join(output_dir, 'failure_counts.txt'), 'w') as f:
f.write(f"Total count: {total_count}\n")
for mode in failure_modes:
count = merged_data[merged_data['FailureType'] == mode].shape[0]
f.write(f"Count of {mode}: {count}\n")