Spaces:

adlozano1
/

gibberish_detector

Runtime error

Al John Lexter Lozano

add DL model, fixed examples, add visual output

9a34627 over 2 years ago

4.18 kB

	from cProfile import label
	from fastapi import File
	import gradio as gr
	from gib_detect_module import detect
	import csv
	import torch
	import tensorflow as tf


	from transformers import AutoModelForSequenceClassification, AutoTokenizer
	DLmodel = AutoModelForSequenceClassification.from_pretrained("madhurjindal/autonlp-Gibberish-Detector-492513457", use_auth_token=True)
	tokenizer = AutoTokenizer.from_pretrained("madhurjindal/autonlp-Gibberish-Detector-492513457", use_auth_token=True)



	def greet(name):
	return "Hello " + name + "!!"

	def detect_gibberish(line,f):
	if line:
	if detect(line):
	return "Valid!!!!", None,None
	else:
	return "Bollocks Giberrish",None,None
	elif f:
	return None, annotate_csv(f), None




	def annotate_csv(f):
	with open(f.name) as csvfile:
	creader = csv.reader(csvfile, delimiter=',', quotechar='"')

	with open('out.csv', 'w', newline='') as csvout:
	cwriter = csv.writer(csvout, delimiter=',',
	quotechar='"', quoting=csv.QUOTE_MINIMAL)
	for row in creader:
	row.append(str(detect(row[0])))
	cwriter.writerow(row)
	return "out.csv"


	def annotate_csv_deep(f):
	labels = DLmodel.config.id2label
	with open(f.name) as csvfile:
	creader = csv.reader(csvfile, delimiter=',', quotechar='"')
	with open('out.csv', 'w', newline='') as csvout:
	cwriter = csv.writer(csvout, delimiter=',',
	quotechar='"', quoting=csv.QUOTE_MINIMAL)
	for row in creader:
	inputs = tokenizer(row, return_tensors="pt")
	outputs = DLmodel(**inputs)
	probs = outputs.logits.softmax(dim=-1).detach().cpu().flatten().numpy().tolist()
	idx = probs.index(max(probs))

	row.append(labels[idx])
	row.append("{:.0%}".format(probs[idx]) )
	cwriter.writerow(row)
	return "out.csv"


	def detect_gibberish_deep(line,f):
	if line:
	inputs = tokenizer(line, return_tensors="pt")
	labels = DLmodel.config.id2label
	outputs = DLmodel(**inputs)
	probs = outputs.logits.softmax(dim=-1).detach().cpu().flatten().numpy().tolist()
	output=dict(zip(labels.values(), probs))
	readable_output=""
	for k,v in output.items():
	readable_output+=k+" : "+ "{:.0%}".format(v) + "\n"
	return readable_output, None, output
	if f:
	return None, annotate_csv_deep(f),None



	def detect_gibberish_abstract(model, line,f):
	if model == "Deep Learning Model":
	return detect_gibberish_deep(line,f)
	else:
	return detect_gibberish(line, f)


	inputLine=gr.inputs.Textbox(lines=1, placeholder="Input text here, if both text and file have values, only the text input will be processed.", default="", label="Text", optional=False)
	inputFile=gr.inputs.File(file_count="single", type="file", label="File to Annotate", optional=True)


	choices = ["Deep Learning Model", "Markov Chain"]
	inputModel=gr.inputs.Dropdown(choices)


	outputLine=gr.outputs.Textbox(type="auto", label=None)
	outputFile=gr.outputs.File( label="Annotated CSV")
	label = gr.outputs.Label(num_top_classes=4)


	examples=[
	["Deep Learning Model","quetzalcoatl","demo_blank.csv"],
	["Deep Learning Model","aasdf","demo_blank.csv"],
	["Deep Learning Model","Covfefe","demo_blank.csv"],
	["Markov Chain","quetzalcoatl","demo_blank.csv"],
	["Markov Chain","aasdf","demo_blank.csv"],
	["Markov Chain","Covfefe","demo_blank.csv"],
	["Deep Learning Model","","demo_bad.txt"],
	["Deep Learning Model","","demo_mixed.txt"],
	["Markov Chain","","demo_bad.txt"],
	["Markov Chain","","demo_mixed.txt"],
	]
	#iface = gr.Interface(fn=[detect_gibberish], inputs=["text",inputFile], outputs=["text",outputFile],examples=examples, allow_flagging='never')

	#iface.launch()


	iface = gr.Interface(fn=[detect_gibberish_abstract], inputs=[inputModel,inputLine,inputFile], outputs=["text",outputFile,label],examples=examples, allow_flagging='never')
	iface.launch()