-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTSNE-CWE.py
More file actions
83 lines (68 loc) · 2.32 KB
/
TSNE-CWE.py
File metadata and controls
83 lines (68 loc) · 2.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from matplotlib.lines import Line2D
# Load data
df = pd.read_csv("normalized-data.csv")
# Define scoring systems
scoring_systems = ["CVSS-B", "Exploitability-Index", "SSVC-L", "SSVC-M", "SSVC-H", "EPSS"]
# Clean and filter CWEs
df = df.dropna(subset=["CWEs"])
df["CWEs"] = df["CWEs"].astype(str).str.split(",")
df = df.explode("CWEs").reset_index(drop=True)
df["CWEs"] = df["CWEs"].str.strip()
# Filter out missing/irrelevant CWEs
excluded = {"NVD-CWE-noinfo", "NVD-CWE-Other", ""}
df = df[~df["CWEs"].isin(excluded)]
# Find top 5 CWEs by frequency
top_cwes = df["CWEs"].value_counts().nlargest(5).index.tolist()
df = df[df["CWEs"].isin(top_cwes)]
# Drop rows with missing score data
df = df.dropna(subset=scoring_systems)
# Calculate a simple score agreement metric (standard deviation)
df["score_agreement"] = df[scoring_systems].std(axis=1)
# Apply t-SNE
tsne = TSNE(n_components=2, perplexity=30, random_state=42)
df[['tsne_x', 'tsne_y']] = tsne.fit_transform(df[scoring_systems])
# Marker styles
marker_styles = ['o', 's', '^', 'D', 'P']
cwe_marker_map = {cwe: marker_styles[i % len(marker_styles)] for i, cwe in enumerate(top_cwes)}
# Plot
plt.figure(figsize=(10, 8))
for cwe in top_cwes:
subset = df[df["CWEs"] == cwe]
plt.scatter(
subset["tsne_x"], subset["tsne_y"],
c=subset["score_agreement"],
cmap="coolwarm",
marker=cwe_marker_map[cwe],
label=cwe,
edgecolor='k',
s=80,
alpha=0.8
)
# Colorbar with larger font
cbar = plt.colorbar()
cbar.set_label("Score Std Dev (Disagreement)", fontsize=17)
cbar.ax.tick_params(labelsize=16)
# Axis labels
plt.xlabel("t-SNE Compressed Dimension 1", fontsize=17)
plt.ylabel("t-SNE Compressed Dimension 2", fontsize=17)
plt.xticks(fontsize=17)
plt.yticks(fontsize=17)
# Legend with larger font
legend_elements = [
Line2D([0], [0], marker=marker, color='w', label=cwe,
markerfacecolor='gray', markeredgecolor='k', markersize=12)
for cwe, marker in cwe_marker_map.items()
]
plt.legend(
handles=legend_elements,
title="CWE",
title_fontsize=17,
fontsize=17,
loc="best"
)
plt.tight_layout()
plt.savefig("tsne_cwe_agreement.png")
plt.show()