-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathAnalyse.py
More file actions
152 lines (117 loc) · 4.58 KB
/
Analyse.py
File metadata and controls
152 lines (117 loc) · 4.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# =============================
# Performance Analysis and Visualization of KT Models
# =============================
import os
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.preprocessing import MinMaxScaler
# -----------------------------
# 1. Load Results from JSON Files
# -----------------------------
RESULTS_DIR = "final_results"
summary_files = [f for f in os.listdir(RESULTS_DIR) if f.endswith('_summary.json')]
# Aggregate all results from JSON into a single DataFrame
all_results = []
for file in summary_files:
model_name = file.replace('_summary.json', '').upper()
with open(os.path.join(RESULTS_DIR, file), 'r') as f:
data = json.load(f)
for item in data:
item['model'] = model_name
all_results.append(pd.DataFrame(data))
df = pd.concat(all_results, ignore_index=True)
# Sort values for cleaner plotting
df.sort_values(by=['dataset', 'model'], inplace=True)
sns.set(style="whitegrid")
# -----------------------------
# 2. Heatmap: AUC per Dataset and Model
# -----------------------------
plt.figure(figsize=(10, 5))
pivot_auc = df.pivot(index='dataset', columns='model', values='avg_auc')
sns.heatmap(pivot_auc, annot=True, cmap="YlGnBu")
plt.title("AUC per Dataset and Model")
plt.tight_layout()
plt.savefig("auc_dataset_model.pdf")
# -----------------------------
# 3. Prepare CSV for MCM (Multiple Comparison Matrix)
# -----------------------------
METRIC = "avg_auc" # Can also use 'avg_f1' or others
# Reload all summary files to isolate the metric per model/dataset
flat_data = []
for file in os.listdir(RESULTS_DIR):
if file.endswith("_summary.json"):
model_name = file.replace("_summary.json", "").upper()
with open(os.path.join(RESULTS_DIR, file), 'r') as f:
data = json.load(f)
for entry in data:
flat_data.append({
"Dataset": entry["dataset"],
"Model": model_name,
METRIC: entry[METRIC]
})
df_metric = pd.DataFrame(flat_data)
df_pivot = df_metric.pivot_table(index="Dataset", columns="Model", values=METRIC)
df_pivot.reset_index(inplace=True)
# Export to CSV for MCM analysis
csv_output = "final_results/mcm_auc_input.csv"
df_pivot.to_csv(csv_output, index=False)
print(f"CSV ready for MCM: {csv_output}")
# -----------------------------
# 4. Generate Multiple Comparison Matrix (MCM)
# -----------------------------
from multi_comp_matrix import MCM
# Load CSV results
df_results = pd.read_csv('final_results/mcm_auc_input.csv')
df_results.drop('Dataset', axis=1, inplace=True)
# Define output directory
output_dir = 'final_results/results'
# Run the MCM analysis and generate heatmap
MCM.compare(
output_dir=output_dir,
df_results=df_results,
pdf_savename="heatmap",
png_savename="heatmap",
fig_size="14,6",
used_statistic="AUC"
)
# -----------------------------
# 5. Bubble plot
# -----------------------------
# Filtrer les modèles à comparer
df_plot = df_mean[df_mean['model'].isin(['FCN', 'LSTM', 'RNN', 'DKTPLUS', 'SAKT', 'KQN', 'DKVMN'])].copy()
plt.figure(figsize=(12, 8))
# Couleurs différentes pour chaque modèle
colors = plt.cm.tab10(np.linspace(0, 1, len(df_plot)))
# Taille des bulles avec log ou sqrt compression
#bubble_sizes = np.sqrt(df_plot['model_size'])*2 # Ajuster ce facteur si besoin
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(500, 2000)) # plage visible
bubble_sizes = scaler.fit_transform(df_plot[['model_size']]).flatten()
# Tracer chaque modèle individuellement
for i, (index, row) in enumerate(df_plot.iterrows()):
plt.scatter(
row['avg_inference_time'],
row['avg_auc'],
s=bubble_sizes[index],
color=colors[i],
edgecolors='black',
alpha=0.6
)
# Afficher nom + taille du modèle
label = f"{row['model']} ({int(row['model_size']):,})"
plt.text(row['avg_inference_time'] + 0.05, row['avg_auc'], label, fontsize=9)
# Axes et titres avec tailles de police augmentées
plt.xlabel("Average Inference Time (seconds)", fontsize=16)
plt.ylabel("Mean AUC", fontsize=16)
plt.title("Models Comparison- Inference Time vs AUC", fontsize=20)
# Légende fictive pour les tailles de modèle
for size in [1e3, 1e4, 1e5, 1e6]:
plt.scatter([], [], s=np.sqrt(size) * 2, label=f"{int(size):,} params", color='gray', alpha=0.4, edgecolors='k')
plt.legend(scatterpoints=1, frameon=True, labelspacing=1, title="Model size",fontsize=10,title_fontsize=11 )
plt.grid(True)
plt.tight_layout()
plt.savefig("bubble_plot.pdf")
plt.show()