sentiment-analysis-app / test_model.py
andyqin18's picture
Updated README
66f2f1a
raw history blame
No virus
2.63 kB
import numpy as np
import pandas as pd
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import torch
from tqdm import tqdm
# Global var
TEST_SIZE = 2000
FINE_TUNED_MODEL = "andyqin18/finetuned-bert-uncased"
# Define analyze function
def analyze(text: str):
'''
Input: Text string
Output: Prediction array (6x1) with threshold prob > 0.5
'''
encoding = tokenizer(text, return_tensors="pt")
encoding = {k: v.to(model.device) for k,v in encoding.items()}
outputs = model(**encoding)
logits = outputs.logits
sigmoid = torch.nn.Sigmoid()
probs = sigmoid(logits.squeeze().cpu())
predictions = np.zeros(probs.shape)
predictions[np.where(probs >= 0.5)] = 1
return predictions
# Read dataset and randomly select testing texts and respective labels
df = pd.read_csv("milestone3/comp/train.csv")
labels = df.columns[2:]
num_label = len(labels)
train_texts = df["comment_text"].values
train_labels = df[labels].values
np.random.seed(1)
small_test_texts = np.random.choice(train_texts, size=TEST_SIZE, replace=False)
np.random.seed(1)
small_test_labels_idx = np.random.choice(train_labels.shape[0], size=TEST_SIZE, replace=False)
small_test_labels = train_labels[small_test_labels_idx, :]
# Load model and tokenizer. Prepare for analysis loop
model = AutoModelForSequenceClassification.from_pretrained(FINE_TUNED_MODEL)
tokenizer = AutoTokenizer.from_pretrained(FINE_TUNED_MODEL)
total_true = 0
total_success = 0
TP, FP, TN, FN = 0, 0, 0, 0
# Analysis Loop
for comment_idx in tqdm(range(TEST_SIZE), desc="Analyzing..."):
comment = small_test_texts[comment_idx]
target = small_test_labels[comment_idx]
result = analyze(comment[:500])
# Counting TP, FP, TN, FN
for i in range(num_label):
if result[i] == target[i]:
if result[i] == 1:
TP += 1
else:
TN += 1
else:
if result[i] == 1:
FP += 1
else:
FN += 1
# Counting success prediction of 1) each label, 2) label array
num_true = (result == target).sum()
if num_true == len(labels):
total_success += 1
total_true += num_true
# Calculate performance
performance = {}
performance["label_accuracy"] = total_true/(len(labels) * TEST_SIZE) # Success prediction of each label
performance["prediction_accuracy"] = total_success/TEST_SIZE # Success prediction of all 6 labels for 1 sample
performance["precision"] = TP / (TP + FP) # Label precision
performance["recall"] = TP / (TP + FN) # Label recall
print(performance)