kimic commited on
Commit
f465598
1 Parent(s): e2f172a

Added cm and updated graph titles for clarity

Browse files
analysis.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
inference_analysis.ipynb ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "C:\\Users\\kimi\\AppData\\Local\\Temp\\ipykernel_10980\\672662318.py:5: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead.\n",
13
+ " plt.style.use(\"seaborn-whitegrid\")\n"
14
+ ]
15
+ },
16
+ {
17
+ "data": {
18
+ "image/png": "",
19
+ "text/plain": [
20
+ "<Figure size 800x600 with 2 Axes>"
21
+ ]
22
+ },
23
+ "metadata": {},
24
+ "output_type": "display_data"
25
+ }
26
+ ],
27
+ "source": [
28
+ "import matplotlib.pyplot as plt\n",
29
+ "import seaborn as sns\n",
30
+ "import pandas as pd\n",
31
+ "\n",
32
+ "plt.style.use(\"seaborn-whitegrid\")\n",
33
+ "\n",
34
+ "version = 3\n",
35
+ "\n",
36
+ "# Read confusion matrix from CSV\n",
37
+ "cm_df = pd.read_csv(\n",
38
+ " f\"./output/version_{version}/confusion_matrix_inference_{version}.csv\"\n",
39
+ ")\n",
40
+ "cm = cm_df.values\n",
41
+ "\n",
42
+ "# Plotting\n",
43
+ "plt.figure(figsize=(8, 6))\n",
44
+ "sns.heatmap(cm, annot=True, fmt=\"d\", cmap=\"Blues\")\n",
45
+ "plt.title(\"Confusion Matrix (DistilBERT, Holdout Set)\")\n",
46
+ "plt.ylabel(\"True label\")\n",
47
+ "plt.xlabel(\"Predicted label\")\n",
48
+ "plt.show()"
49
+ ]
50
+ }
51
+ ],
52
+ "metadata": {
53
+ "kernelspec": {
54
+ "display_name": "torch",
55
+ "language": "python",
56
+ "name": "python3"
57
+ },
58
+ "language_info": {
59
+ "codemirror_mode": {
60
+ "name": "ipython",
61
+ "version": 3
62
+ },
63
+ "file_extension": ".py",
64
+ "mimetype": "text/x-python",
65
+ "name": "python",
66
+ "nbconvert_exporter": "python",
67
+ "pygments_lexer": "ipython3",
68
+ "version": "3.10.11"
69
+ }
70
+ },
71
+ "nbformat": 4,
72
+ "nbformat_minor": 2
73
+ }
inference_main.py CHANGED
@@ -6,7 +6,7 @@ from transformers import (
6
  )
7
  from datasets import Dataset, load_from_disk
8
  import pandas as pd
9
- from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
10
  from functools import partial
11
  import os
12
 
@@ -107,9 +107,20 @@ if __name__ == "__main__":
107
 
108
  trainer = Trainer(model=model)
109
  predictions = trainer.predict(dataset)
110
- accuracy = accuracy_score(predictions.label_ids, predictions.predictions.argmax(-1))
111
- f1 = f1_score(predictions.label_ids, predictions.predictions.argmax(-1))
112
- auc_roc = roc_auc_score(predictions.label_ids, predictions.predictions.argmax(-1))
 
 
 
 
 
 
 
 
 
 
 
113
 
114
  print(f"Accuracy: {accuracy}")
115
  print(f"F1 Score: {f1}")
 
6
  )
7
  from datasets import Dataset, load_from_disk
8
  import pandas as pd
9
+ from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, confusion_matrix
10
  from functools import partial
11
  import os
12
 
 
107
 
108
  trainer = Trainer(model=model)
109
  predictions = trainer.predict(dataset)
110
+
111
+ # Compute metrics
112
+ true_labels = predictions.label_ids
113
+ pred_labels = predictions.predictions.argmax(-1)
114
+ accuracy = accuracy_score(true_labels, pred_labels)
115
+ f1 = f1_score(true_labels, pred_labels)
116
+ auc_roc = roc_auc_score(true_labels, predictions.predictions[:, 1])
117
+
118
+ # Generate and save confusion matrix
119
+ cm = confusion_matrix(true_labels, pred_labels)
120
+ cm_df = pd.DataFrame(cm)
121
+ cm_filename = f"./output/version_{version}/confusion_matrix_inference_{version}.csv"
122
+ cm_df.to_csv(cm_filename, index=False)
123
+ print(f"Confusion Matrix saved to {cm_filename}")
124
 
125
  print(f"Accuracy: {accuracy}")
126
  print(f"F1 Score: {f1}")
output/version_3/confusion_matrix_inference_3.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92e17b0d80093e9c44f667cde9f7806d424a5c4eb8401a51ebdc4822a5a195e4
3
+ size 20
train_analysis.ipynb ADDED
The diff for this file is too large to render. See raw diff