|
import gradio as gr |
|
from transformers import pipeline |
|
import numpy as np |
|
import pandas as pd |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
from InstructorEmbedding import INSTRUCTOR |
|
|
|
|
|
pipe = pipeline("zero-shot-classification", model="MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7") |
|
model = INSTRUCTOR('hkunlp/instructor-large') |
|
|
|
df = pd.read_csv('intent.csv', delimiter=';') |
|
|
|
data = [ |
|
[ |
|
f'Represent the document for retrieval of {x["description"]} information : ', |
|
x["message"] |
|
] for _,x in df.iterrows() |
|
] |
|
|
|
corpus_embeddings = model.encode(data) |
|
|
|
|
|
def predict(question, lower_threshold, tags, multi_label): |
|
query = [['Represent the question for retrieving supporting documents: ',question]] |
|
query_embeddings = model.encode(query) |
|
similarities = cosine_similarity(query_embeddings,corpus_embeddings) |
|
retrieved_doc_id = np.argmax(similarities) |
|
|
|
if similarities[0][retrieved_doc_id] < float(lower_threshold): |
|
ans = pipe(question, candidate_labels=[x.strip() for x in tags.split(",") if x.strip()!=""], multi_label=multi_label) |
|
ans['query_similarity_score'] = similarities[0][retrieved_doc_id] |
|
return ans |
|
return {"chatbot_response" : data[retrieved_doc_id][-1], 'query_similarity_score' : similarities[0][retrieved_doc_id]} |
|
|
|
|
|
|
|
|
|
|
|
gr.Interface(fn=predict, |
|
inputs=["text", gr.Slider(0.0, 1.0), "text", gr.Checkbox(label='Allow multiple true classes')], |
|
outputs="json").launch() |
|
|
|
|