-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathperformance-metrics-LLM.py
More file actions
49 lines (44 loc) · 2.74 KB
/
performance-metrics-LLM.py
File metadata and controls
49 lines (44 loc) · 2.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import matplotlib.pyplot as plt
import pandas as pd
# Data for BERT-base, RoBERTa-base, RoBERTa-large, and GPT-2
data_models = {
"BERT": {
"Color": "green",
"Precision": [0, 0.6321, 0.6321, 0.6321, 0.6321, 0.6321, 0.0, 0.6331, 0.9908, 0.9964, 0.9964, 1.000, 1.000, 1.000, 0.9964, 0.8342],
"Recall": [0, 1.000, 1.000, 1.000, 1.000, 1.000, 0.0, 0.9982, 0.9730, 0.9928, 0.9928, 0.9964, 0.9946, 0.9946, 0.9946, 0.9153],
"F1 Score": [0, 0.7746, 0.7746, 0.7746, 0.7746, 0.7746, 0.0, 0.7748, 0.9818, 0.9946, 0.9946, 0.9982, 0.9973, 0.9973, 0.9946, 0.8729]
},
"RoBERTa": {
"Color": "orange",
"Precision": [0.6321, 0.6321, 0.6321, 0.6321, 0.6321, 0.6321, 0.0, 0.6321, 0.6321, 0.9554, 0.9610, 0.9836, 0.9734, 0.9558, 0.9709, 0.6321],
"Recall": [1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 0.0, 1.000, 1.000, 0.9640, 0.9766, 0.9730, 0.9874, 0.9730, 0.9604, 1.000],
"F1 Score": [0.7746, 0.7746, 0.7746, 0.7746, 0.7746, 0.7746, 0.0, 0.7746, 0.7746, 0.9596, 0.9687, 0.9783, 0.9803, 0.9643, 0.9656, 0.7746]
},
"RoBERTa-large": {
"Color": "blue",
"Precision": [0.0, 0.6321, 0.6321, 0.6321, 0.6321, 0.6321, 0.6321, 0.6321, 0.6321, 0.9804, 0.9856, 0.9946, 0.9910, 0.9875, 0.9856, 0.8896],
"Recall": [0.0, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 0.9892, 0.9892, 0.9964, 0.9964, 0.9928, 0.9856, 0.9586],
"F1 Score": [0.0, 0.7746, 0.7746, 0.7746, 0.7746, 0.7746, 0.7746, 0.7746, 0.7746, 0.9848, 0.9874, 0.9955, 0.9937, 0.9901, 0.9856, 0.9228]
},
"GPT-2": {
"Color": "red",
"Precision": [0.5185, 0.9189, 0.6321, 0.9399, 0.9292, 0.9602, 0.9598, 0.9786, 0.9892, 0.9553, 0.9726, 0.9817, 0.9781, 0.9676, 0.9620, 0.6613],
"Recall": [0.0252, 0.9387, 1.000, 0.9297, 0.9694, 0.9568, 0.9459, 0.9910, 0.9928, 0.9622, 0.9604, 0.9658, 0.9658, 0.9676, 0.9568, 0.9712],
"F1 Score": [0.0481, 0.9287, 0.7746, 0.9348, 0.9489, 0.9585, 0.9528, 0.9848, 0.9910, 0.9587, 0.9665, 0.9737, 0.9719, 0.9676, 0.9593, 0.7869]
}
}
# Step range
steps = range(1, 17)
# Create Precision, Recall, and F1 Score Comparison Plot
plt.figure(figsize=(12, 6))
for model, metrics in data_models.items():
color = metrics["Color"]
plt.plot(steps, metrics["Precision"], marker='s', linestyle='--', linewidth=2, color=color, label=f"{model} Precision")
plt.plot(steps, metrics["Recall"], marker='^', linestyle='-.', linewidth=2, color=color, label=f"{model} Recall", alpha=0.8)
plt.plot(steps, metrics["F1 Score"], marker='d', linestyle=':', linewidth=2, color=color, label=f"{model} F1 Score", alpha=0.6)
plt.xlabel("Step")
plt.ylabel("Score (%)")
plt.title("Precision, Recall, and F1 Score Comparison: BERT, RoBERTa, RoBERTa-large, GPT-2")
plt.legend()
plt.grid(True)
plt.show()