Spaces:
Runtime error
Runtime error
File size: 3,630 Bytes
0919045 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
import csv
import spacy
import glob
import datetime
import pandas as pd
import gradio as gr
from transformers import pipeline
from huggingface_hub import hf_hub_download
from transformers import AutoModelForSequenceClassification, AutoTokenizer
models = {
'en': 'facebook/bart-large-mnli'
}
hypothesis_templates = {
'en': 'This example is {}.'
}
classifiers = {'en': pipeline("zero-shot-classification", hypothesis_template=hypothesis_templates['en'],
model=models['en'])
}
nli_model = AutoModelForSequenceClassification.from_pretrained('facebook/bart-large-mnli')
tokenizer = AutoTokenizer.from_pretrained('facebook/bart-large-mnli')
labels=["contradicts_hypothesis","Neutral","Entails_hypothesis"]
def prep_examples():
example_text1 = "EMI can be skipped"
example_labels1 = "EMI can be skipped" #"Entails Hypothisis"
example_text2 = "minimum package guranteed"
example_labels2 = "minimum package guranteed" #"Entails Hypothisis"
example_text3 = "100% placement gurantee"
example_labels3 = "100% placement gurantee" #"Entails Hypothisis"
#example_text1 = "EMI can not be skipped"
#example_labels1 = "contradicts_hypothesis"
examples = [
[example_text1, example_labels1, False],
[example_text2, example_labels2, False],
[example_text3, example_labels3, False]]
return examples
def inference_hypothesis(premise,hypothesis,labels):
x = tokenizer.encode(premise, hypothesis, return_tensors='pt',truncation_strategy='only_first')
logits = nli_model(x.to("cpu"))[0]
entail_contradiction_logits = logits[:,[0,1,2]]
probs = entail_contradiction_logits.softmax(dim=1)
return premise,hypothesis,labels[probs.argmax()],entail_contradiction_logits
def sequence_to_classify(sequence, hypothesis_df, multi_label):
hypothesis_df=pd.read_csv(hypothesis_df.name)
lang = 'en'
classifier = classifiers[lang]
inference_output={}
label_clean = str(labels).split(";;")
for i,keyword in enumerate(hypothesis_df.filtering_keyword.tolist()):
if keyword.lower() in sequence.lower():
output = inference_hypothesis(sequence, hypothesis_df.hypothesis.tolist()[i],labels)
if output[2]==hypothesis_df.expected_inference.tolist()[i]:
inference_output[output[0]]={"hypothesis":output[1],"label":output[2],"score":output[3]}
#inference_output.append(output)
return inference_output
predicted_labels = response['labels']
predicted_scores = response['scores']
clean_output = {idx: float(predicted_scores.pop(0)) for idx in predicted_labels}
print("Date:{}, Sequence:{}, Labels: {}".format(
str(datetime.datetime.now()),
sequence,
predicted_labels))
if not multi_label:
top_label_key = list(clean_output.keys())[0]
clean_output = {top_label_key: clean_output[top_label_key]}
return clean_output
def csv_to_df(file):
return pd.read_csv(file)
def csv_to_json(df):
return df.to_json(orient="records")
iface = gr.Interface(
title="Sales Call Analysis AI - NS AI LABS",
description="Off-the-shelf NLP classifier with no domain or task-specific training.",
fn=sequence_to_classify,
inputs=[gr.inputs.Textbox(lines=10,
label="Please enter the text you would like to classify...",
placeholder="Text here..."),
gr.inputs.File(),
gr.inputs.Radio(choices=[False, True],
label="Multi-label?")],
outputs=gr.outputs.Textbox())
iface.launch() |