Spaces:
Sleeping
Sleeping
Added test model performance code
Browse files
app.py
CHANGED
@@ -4,8 +4,8 @@ import numpy as np
|
|
4 |
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
|
5 |
|
6 |
# Define global variables
|
7 |
-
|
8 |
-
|
9 |
|
10 |
# Define analyze function
|
11 |
def analyze(model_name: str, text: str, top_k=1) -> dict:
|
@@ -24,7 +24,7 @@ st.write("You can choose to use my fine-tuned model or pre-trained models.")
|
|
24 |
|
25 |
# Model hub
|
26 |
model_descrip = {
|
27 |
-
|
28 |
Labels: toxic, severe_toxic, obscene, threat, insult, identity_hate",
|
29 |
"distilbert-base-uncased-finetuned-sst-2-english": "This model is a fine-tune checkpoint of DistilBERT-base-uncased, fine-tuned on SST-2. \
|
30 |
Labels: POSITIVE; NEGATIVE ",
|
@@ -50,7 +50,7 @@ if st.button("Analyze"):
|
|
50 |
else:
|
51 |
with st.spinner("Hang on.... Analyzing..."):
|
52 |
# If fine-tuned
|
53 |
-
if user_model ==
|
54 |
result = analyze(user_model, user_input, top_k=2) # Top 2 labels with highest score
|
55 |
result_dict = {
|
56 |
"Text": [user_input],
|
@@ -84,7 +84,7 @@ if st.button("Analyze"):
|
|
84 |
}
|
85 |
|
86 |
for text in sample_texts:
|
87 |
-
result = analyze(
|
88 |
init_table_dict["Text"].append(text[:50])
|
89 |
init_table_dict["Highest Toxicity Class"].append(result[0][0]['label'])
|
90 |
init_table_dict["Highest Score"].append(result[0][0]['score'])
|
|
|
4 |
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
|
5 |
|
6 |
# Define global variables
|
7 |
+
FINE_TUNED_MODEL = "andyqin18/test-finetuned"
|
8 |
+
NUM_SAMPLE_TEXT = 10
|
9 |
|
10 |
# Define analyze function
|
11 |
def analyze(model_name: str, text: str, top_k=1) -> dict:
|
|
|
24 |
|
25 |
# Model hub
|
26 |
model_descrip = {
|
27 |
+
FINE_TUNED_MODEL: "This is a customized BERT-base finetuned model that detects multiple toxicity for a text. \
|
28 |
Labels: toxic, severe_toxic, obscene, threat, insult, identity_hate",
|
29 |
"distilbert-base-uncased-finetuned-sst-2-english": "This model is a fine-tune checkpoint of DistilBERT-base-uncased, fine-tuned on SST-2. \
|
30 |
Labels: POSITIVE; NEGATIVE ",
|
|
|
50 |
else:
|
51 |
with st.spinner("Hang on.... Analyzing..."):
|
52 |
# If fine-tuned
|
53 |
+
if user_model == FINE_TUNED_MODEL:
|
54 |
result = analyze(user_model, user_input, top_k=2) # Top 2 labels with highest score
|
55 |
result_dict = {
|
56 |
"Text": [user_input],
|
|
|
84 |
}
|
85 |
|
86 |
for text in sample_texts:
|
87 |
+
result = analyze(FINE_TUNED_MODEL, text[:50], top_k=2)
|
88 |
init_table_dict["Text"].append(text[:50])
|
89 |
init_table_dict["Highest Toxicity Class"].append(result[0][0]['label'])
|
90 |
init_table_dict["Highest Score"].append(result[0][0]['score'])
|
milestone3/milestone3.py
DELETED
@@ -1,41 +0,0 @@
|
|
1 |
-
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
|
2 |
-
|
3 |
-
def analyze(model_name: str, text: str, top_k=1) -> dict:
|
4 |
-
'''
|
5 |
-
Output result of sentiment analysis of a text through a defined model
|
6 |
-
'''
|
7 |
-
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
8 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
9 |
-
classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer, top_k=top_k)
|
10 |
-
return classifier(text)
|
11 |
-
|
12 |
-
|
13 |
-
user_input = "Go fuck yourself"
|
14 |
-
user_model = "andyqin18/test-finetuned"
|
15 |
-
|
16 |
-
# result = analyze(user_model, user_input, top_k=2)
|
17 |
-
|
18 |
-
# print(result[0][0]['label'])
|
19 |
-
|
20 |
-
import pandas as pd
|
21 |
-
import numpy as np
|
22 |
-
df = pd.read_csv("milestone3/comp/test_comment.csv")
|
23 |
-
test_texts = df["comment_text"].values
|
24 |
-
sample_texts = np.random.choice(test_texts, size=10, replace=False)
|
25 |
-
init_table_dict = {
|
26 |
-
"Text": [],
|
27 |
-
"Highest Toxicity Class": [],
|
28 |
-
"Highest Score": [],
|
29 |
-
"Second Highest Toxicity Class": [],
|
30 |
-
"Second Highest Score": []
|
31 |
-
}
|
32 |
-
|
33 |
-
for text in sample_texts:
|
34 |
-
result = analyze(user_model, text, top_k=2)
|
35 |
-
init_table_dict["Text"].append(text[:50])
|
36 |
-
init_table_dict["Highest Toxicity Class"].append(result[0][0]['label'])
|
37 |
-
init_table_dict["Highest Score"].append(result[0][0]['score'])
|
38 |
-
init_table_dict["Second Highest Toxicity Class"].append(result[0][1]['label'])
|
39 |
-
init_table_dict["Second Highest Score"].append(result[0][1]['score'])
|
40 |
-
|
41 |
-
print(init_table_dict)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
milestone3/{fintune.py → vid_tutorial_code/crash_course_fintune.py}
RENAMED
File without changes
|
milestone3/{crash_course_vid.py → vid_tutorial_code/crash_course_vid.py}
RENAMED
File without changes
|
test_model.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
|
4 |
+
import torch
|
5 |
+
from tqdm import tqdm
|
6 |
+
|
7 |
+
|
8 |
+
# Global var
|
9 |
+
TEST_SIZE = 1000
|
10 |
+
FINE_TUNED_MODEL = "andyqin18/test-finetuned"
|
11 |
+
|
12 |
+
|
13 |
+
# Define analyze function
|
14 |
+
def analyze(text: str):
|
15 |
+
'''
|
16 |
+
Input: Text string
|
17 |
+
Output: Prediction array (6x1) with threshold prob > 0.5
|
18 |
+
'''
|
19 |
+
encoding = tokenizer(text, return_tensors="pt")
|
20 |
+
encoding = {k: v.to(model.device) for k,v in encoding.items()}
|
21 |
+
outputs = model(**encoding)
|
22 |
+
logits = outputs.logits
|
23 |
+
sigmoid = torch.nn.Sigmoid()
|
24 |
+
probs = sigmoid(logits.squeeze().cpu())
|
25 |
+
predictions = np.zeros(probs.shape)
|
26 |
+
predictions[np.where(probs >= 0.5)] = 1
|
27 |
+
return predictions
|
28 |
+
|
29 |
+
|
30 |
+
# Read dataset and randomly select testing texts and respective labels
|
31 |
+
df = pd.read_csv("milestone3/comp/train.csv")
|
32 |
+
labels = df.columns[2:]
|
33 |
+
num_label = len(labels)
|
34 |
+
train_texts = df["comment_text"].values
|
35 |
+
train_labels = df[labels].values
|
36 |
+
|
37 |
+
np.random.seed(1)
|
38 |
+
small_test_texts = np.random.choice(train_texts, size=TEST_SIZE, replace=False)
|
39 |
+
|
40 |
+
np.random.seed(1)
|
41 |
+
small_test_labels_idx = np.random.choice(train_labels.shape[0], size=TEST_SIZE, replace=False)
|
42 |
+
small_test_labels = train_labels[small_test_labels_idx, :]
|
43 |
+
|
44 |
+
|
45 |
+
# Load model and tokenizer. Prepare for analysis loop
|
46 |
+
model = AutoModelForSequenceClassification.from_pretrained(FINE_TUNED_MODEL)
|
47 |
+
tokenizer = AutoTokenizer.from_pretrained(FINE_TUNED_MODEL)
|
48 |
+
total_true = 0
|
49 |
+
total_success = 0
|
50 |
+
TP, FP, TN, FN = 0, 0, 0, 0
|
51 |
+
|
52 |
+
|
53 |
+
# Analysis Loop
|
54 |
+
for comment_idx in tqdm(range(TEST_SIZE), desc="Analyzing..."):
|
55 |
+
comment = small_test_texts[comment_idx]
|
56 |
+
target = small_test_labels[comment_idx]
|
57 |
+
result = analyze(comment[:500])
|
58 |
+
|
59 |
+
# Counting TP, FP, TN, FN
|
60 |
+
for i in range(num_label):
|
61 |
+
if result[i] == target[i]:
|
62 |
+
if result[i] == 1:
|
63 |
+
TP += 1
|
64 |
+
else:
|
65 |
+
TN += 1
|
66 |
+
else:
|
67 |
+
if result[i] == 1:
|
68 |
+
FP += 1
|
69 |
+
else:
|
70 |
+
FN += 1
|
71 |
+
|
72 |
+
# Counting success prediction of 1) each label, 2) label array
|
73 |
+
num_true = (result == target).sum()
|
74 |
+
if num_true == len(labels):
|
75 |
+
total_success += 1
|
76 |
+
total_true += num_true
|
77 |
+
|
78 |
+
# Calculate performance
|
79 |
+
performance = {}
|
80 |
+
performance["label_accuracy"] = total_true/(len(labels) * TEST_SIZE)
|
81 |
+
performance["prediction_accuracy"] = total_success/TEST_SIZE
|
82 |
+
performance["precision"] = TP / (TP + FP)
|
83 |
+
performance["recall"] = TP / (TP + FN)
|
84 |
+
print(performance)
|