NS-Sales-CallAI / app.py
Anustup's picture
Create app.py
0919045
raw history blame
No virus
3.63 kB
import csv
import spacy
import glob
import datetime
import pandas as pd
import gradio as gr
from transformers import pipeline
from huggingface_hub import hf_hub_download
from transformers import AutoModelForSequenceClassification, AutoTokenizer
models = {
'en': 'facebook/bart-large-mnli'
}
hypothesis_templates = {
'en': 'This example is {}.'
}
classifiers = {'en': pipeline("zero-shot-classification", hypothesis_template=hypothesis_templates['en'],
model=models['en'])
}
nli_model = AutoModelForSequenceClassification.from_pretrained('facebook/bart-large-mnli')
tokenizer = AutoTokenizer.from_pretrained('facebook/bart-large-mnli')
labels=["contradicts_hypothesis","Neutral","Entails_hypothesis"]
def prep_examples():
example_text1 = "EMI can be skipped"
example_labels1 = "EMI can be skipped" #"Entails Hypothisis"
example_text2 = "minimum package guranteed"
example_labels2 = "minimum package guranteed" #"Entails Hypothisis"
example_text3 = "100% placement gurantee"
example_labels3 = "100% placement gurantee" #"Entails Hypothisis"
#example_text1 = "EMI can not be skipped"
#example_labels1 = "contradicts_hypothesis"
examples = [
[example_text1, example_labels1, False],
[example_text2, example_labels2, False],
[example_text3, example_labels3, False]]
return examples
def inference_hypothesis(premise,hypothesis,labels):
x = tokenizer.encode(premise, hypothesis, return_tensors='pt',truncation_strategy='only_first')
logits = nli_model(x.to("cpu"))[0]
entail_contradiction_logits = logits[:,[0,1,2]]
probs = entail_contradiction_logits.softmax(dim=1)
return premise,hypothesis,labels[probs.argmax()],entail_contradiction_logits
def sequence_to_classify(sequence, hypothesis_df, multi_label):
hypothesis_df=pd.read_csv(hypothesis_df.name)
lang = 'en'
classifier = classifiers[lang]
inference_output={}
label_clean = str(labels).split(";;")
for i,keyword in enumerate(hypothesis_df.filtering_keyword.tolist()):
if keyword.lower() in sequence.lower():
output = inference_hypothesis(sequence, hypothesis_df.hypothesis.tolist()[i],labels)
if output[2]==hypothesis_df.expected_inference.tolist()[i]:
inference_output[output[0]]={"hypothesis":output[1],"label":output[2],"score":output[3]}
#inference_output.append(output)
return inference_output
predicted_labels = response['labels']
predicted_scores = response['scores']
clean_output = {idx: float(predicted_scores.pop(0)) for idx in predicted_labels}
print("Date:{}, Sequence:{}, Labels: {}".format(
str(datetime.datetime.now()),
sequence,
predicted_labels))
if not multi_label:
top_label_key = list(clean_output.keys())[0]
clean_output = {top_label_key: clean_output[top_label_key]}
return clean_output
def csv_to_df(file):
return pd.read_csv(file)
def csv_to_json(df):
return df.to_json(orient="records")
iface = gr.Interface(
title="Sales Call Analysis AI - NS AI LABS",
description="Off-the-shelf NLP classifier with no domain or task-specific training.",
fn=sequence_to_classify,
inputs=[gr.inputs.Textbox(lines=10,
label="Please enter the text you would like to classify...",
placeholder="Text here..."),
gr.inputs.File(),
gr.inputs.Radio(choices=[False, True],
label="Multi-label?")],
outputs=gr.outputs.Textbox())
iface.launch()