responsibleGPT / app.py
kyleledbetter's picture
feat(): Initial app commit
438c90e
raw
history blame
No virus
2.63 kB
import requests
import json
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
from sklearn.metrics import confusion_matrix
from datasets import load_dataset
def load_model(endpoint: str):
tokenizer = AutoTokenizer.from_pretrained(endpoint)
model = AutoModelForSequenceClassification.from_pretrained(endpoint)
return tokenizer, model
def test_model(tokenizer, model, test_data: list, label_map: dict):
results = []
for text, true_label in test_data:
inputs = tokenizer(text, return_tensors="pt",
truncation=True, padding=True)
outputs = model(**inputs)
pred_label = label_map[int(outputs.logits.argmax(dim=-1))]
results.append((text, true_label, pred_label))
return results
def generate_report_card(results, label_map):
true_labels = [r[1] for r in results]
pred_labels = [r[2] for r in results]
cm = confusion_matrix(true_labels, pred_labels,
labels=list(label_map.values()))
fig = go.Figure(
data=go.Heatmap(
z=cm,
x=list(label_map.values()),
y=list(label_map.values()),
colorscale='Viridis',
colorbar=dict(title='Number of Samples')
),
layout=go.Layout(
title='Confusion Matrix',
xaxis=dict(title='Predicted Labels'),
yaxis=dict(title='True Labels', autorange='reversed')
)
)
fig.show()
def load_sst2_data(split="test"):
dataset = load_dataset("glue", "sst2", split=split)
data = [(item["sentence"], "positive" if item["label"] == 1 else "negative")
for item in dataset]
return data
# Define your model endpoint and label map
# model_endpoint = "your-model-endpoint"
# Modify this according to your model's labels
# label_map = {0: "label0", 1: "label1"}
model_endpoint = "distilbert-base-uncased-finetuned-sst-2-english"
label_map = {0: "negative", 1: "positive"}
# Load the model and tokenizer
tokenizer, model = load_model(model_endpoint)
# Prepare your test data (list of tuples containing text and true label)
#test_data = [
# ("Sample text 1", "label0"),
# ("Sample text 2", "label1"),
# # Add more test samples here
#]
# Load the test data from the SST-2 dataset
test_data = load_sst2_data()
# Use a smaller subset of test_data for a quicker demonstration (optional)
test_data = test_data[:100]
# Test the model and generate results
results = test_model(tokenizer, model, test_data, label_map)
# Generate the visual report card
generate_report_card(results, label_map)