Spaces:

Anustup
/

NS-Sales-CallAI

Runtime error

App Files Files Community

NS-Sales-CallAI / app.py

Anustup

Create app.py

0919045 over 1 year ago

raw history blame

No virus

3.63 kB

	import csv
	import spacy
	import glob
	import datetime
	import pandas as pd
	import gradio as gr
	from transformers import pipeline
	from huggingface_hub import hf_hub_download
	from transformers import AutoModelForSequenceClassification, AutoTokenizer

	models = {
	'en': 'facebook/bart-large-mnli'
	}

	hypothesis_templates = {
	'en': 'This example is {}.'
	}

	classifiers = {'en': pipeline("zero-shot-classification", hypothesis_template=hypothesis_templates['en'],
	model=models['en'])
	}

	nli_model = AutoModelForSequenceClassification.from_pretrained('facebook/bart-large-mnli')
	tokenizer = AutoTokenizer.from_pretrained('facebook/bart-large-mnli')
	labels=["contradicts_hypothesis","Neutral","Entails_hypothesis"]

	def prep_examples():
	example_text1 = "EMI can be skipped"
	example_labels1 = "EMI can be skipped" #"Entails Hypothisis"

	example_text2 = "minimum package guranteed"
	example_labels2 = "minimum package guranteed" #"Entails Hypothisis"

	example_text3 = "100% placement gurantee"
	example_labels3 = "100% placement gurantee" #"Entails Hypothisis"

	#example_text1 = "EMI can not be skipped"
	#example_labels1 = "contradicts_hypothesis"


	examples = [
	[example_text1, example_labels1, False],
	[example_text2, example_labels2, False],
	[example_text3, example_labels3, False]]

	return examples


	def inference_hypothesis(premise,hypothesis,labels):
	x = tokenizer.encode(premise, hypothesis, return_tensors='pt',truncation_strategy='only_first')
	logits = nli_model(x.to("cpu"))[0]
	entail_contradiction_logits = logits[:,[0,1,2]]
	probs = entail_contradiction_logits.softmax(dim=1)
	return premise,hypothesis,labels[probs.argmax()],entail_contradiction_logits

	def sequence_to_classify(sequence, hypothesis_df, multi_label):
	hypothesis_df=pd.read_csv(hypothesis_df.name)
	lang = 'en'
	classifier = classifiers[lang]
	inference_output={}
	label_clean = str(labels).split(";;")
	for i,keyword in enumerate(hypothesis_df.filtering_keyword.tolist()):
	if keyword.lower() in sequence.lower():
	output = inference_hypothesis(sequence, hypothesis_df.hypothesis.tolist()[i],labels)
	if output[2]==hypothesis_df.expected_inference.tolist()[i]:
	inference_output[output[0]]={"hypothesis":output[1],"label":output[2],"score":output[3]}
	#inference_output.append(output)
	return inference_output
	predicted_labels = response['labels']
	predicted_scores = response['scores']
	clean_output = {idx: float(predicted_scores.pop(0)) for idx in predicted_labels}
	print("Date:{}, Sequence:{}, Labels: {}".format(
	str(datetime.datetime.now()),
	sequence,
	predicted_labels))

	if not multi_label:
	top_label_key = list(clean_output.keys())[0]
	clean_output = {top_label_key: clean_output[top_label_key]}
	return clean_output
	def csv_to_df(file):
	return pd.read_csv(file)

	def csv_to_json(df):
	return df.to_json(orient="records")

	iface = gr.Interface(
	title="Sales Call Analysis AI - NS AI LABS",
	description="Off-the-shelf NLP classifier with no domain or task-specific training.",
	fn=sequence_to_classify,
	inputs=[gr.inputs.Textbox(lines=10,
	label="Please enter the text you would like to classify...",
	placeholder="Text here..."),
	gr.inputs.File(),
	gr.inputs.Radio(choices=[False, True],
	label="Multi-label?")],
	outputs=gr.outputs.Textbox())

	iface.launch()