|
import streamlit as st |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
from datasets import load_dataset, Dataset |
|
import pandas as pd |
|
|
|
|
|
ds = load_dataset("bitext/Bitext-customer-support-llm-chatbot-training-dataset") |
|
|
|
|
|
df = ds['train'].to_pandas() |
|
|
|
|
|
label2id = {label: idx for idx, label in enumerate(df['intent'].unique())} |
|
id2label = {idx: label for label, idx in label2id.items()} |
|
|
|
|
|
df['label'] = df['intent'].map(label2id) |
|
|
|
|
|
df = df[['instruction', 'label', 'intent', 'response']] |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("Chillyblast/Roberta_Question_Answer") |
|
model = AutoModelForSequenceClassification.from_pretrained("Chillyblast/Roberta_Question_Answer") |
|
|
|
|
|
model.eval() |
|
|
|
|
|
def get_intent_and_response(instruction): |
|
|
|
inputs = tokenizer(instruction, return_tensors="pt", truncation=True, padding='max_length', max_length=128) |
|
|
|
|
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
logits = outputs.logits |
|
predicted_label_id = torch.argmax(logits, dim=1).item() |
|
|
|
|
|
predicted_intent = id2label[predicted_label_id] |
|
|
|
|
|
response = df[df['intent'] == predicted_intent].iloc[0]['response'] |
|
|
|
return predicted_intent, response |
|
|
|
|
|
st.title("Customer Support Chatbot") |
|
st.write("Ask a question, and I'll do my best to help you.") |
|
|
|
instruction = st.text_input("You:") |
|
|
|
if st.button("Submit"): |
|
if instruction: |
|
predicted_intent, response = get_intent_and_response(instruction) |
|
st.write(f"**Predicted Intent:** {predicted_intent}") |
|
st.write(f"**Assistant:** {response}") |
|
else: |
|
st.write("Please enter an instruction.") |
|
|
|
if st.button("Exit"): |
|
st.write("Exiting the chat.") |
|
|