Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline | |
import numpy as np | |
import pandas as pd | |
from sklearn.metrics.pairwise import cosine_similarity | |
from InstructorEmbedding import INSTRUCTOR | |
# pipe = pipeline(model="facebook/bart-large-mnli") | |
pipe = pipeline("zero-shot-classification", model="MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7") | |
model = INSTRUCTOR('hkunlp/instructor-large') | |
df = pd.read_csv('intent.csv', delimiter=';') | |
data = [ | |
[ | |
f'Represent the document for retrieval of {x["description"]} information : ', | |
x["message"] | |
] for _,x in df.iterrows() | |
] | |
corpus_embeddings = model.encode(data) | |
def predict(question, lower_threshold, tags, multi_label): | |
query = [['Represent the question for retrieving supporting documents: ',question]] | |
query_embeddings = model.encode(query) | |
similarities = cosine_similarity(query_embeddings,corpus_embeddings) | |
retrieved_doc_id = np.argmax(similarities) | |
if similarities[0][retrieved_doc_id] < float(lower_threshold): | |
ans = pipe(question, candidate_labels=[x.strip() for x in tags.split(",") if x.strip()!=""], multi_label=multi_label) | |
ans['query_similarity_score'] = similarities[0][retrieved_doc_id] | |
return ans | |
return {"chatbot_response" : data[retrieved_doc_id][-1], 'query_similarity_score' : similarities[0][retrieved_doc_id]} | |
gr.Interface(fn=predict, | |
inputs=["text", gr.Slider(0.0, 1.0), "text", gr.Checkbox(label='Allow multiple true classes')], | |
outputs="json").launch() |