Al John Lexter Lozano
add DL model, fixed examples, add visual output
9a34627
raw
history blame
4.18 kB
from cProfile import label
from fastapi import File
import gradio as gr
from gib_detect_module import detect
import csv
import torch
import tensorflow as tf
from transformers import AutoModelForSequenceClassification, AutoTokenizer
DLmodel = AutoModelForSequenceClassification.from_pretrained("madhurjindal/autonlp-Gibberish-Detector-492513457", use_auth_token=True)
tokenizer = AutoTokenizer.from_pretrained("madhurjindal/autonlp-Gibberish-Detector-492513457", use_auth_token=True)
def greet(name):
return "Hello " + name + "!!"
def detect_gibberish(line,f):
if line:
if detect(line):
return "Valid!!!!", None,None
else:
return "Bollocks Giberrish",None,None
elif f:
return None, annotate_csv(f), None
def annotate_csv(f):
with open(f.name) as csvfile:
creader = csv.reader(csvfile, delimiter=',', quotechar='"')
with open('out.csv', 'w', newline='') as csvout:
cwriter = csv.writer(csvout, delimiter=',',
quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in creader:
row.append(str(detect(row[0])))
cwriter.writerow(row)
return "out.csv"
def annotate_csv_deep(f):
labels = DLmodel.config.id2label
with open(f.name) as csvfile:
creader = csv.reader(csvfile, delimiter=',', quotechar='"')
with open('out.csv', 'w', newline='') as csvout:
cwriter = csv.writer(csvout, delimiter=',',
quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in creader:
inputs = tokenizer(row, return_tensors="pt")
outputs = DLmodel(**inputs)
probs = outputs.logits.softmax(dim=-1).detach().cpu().flatten().numpy().tolist()
idx = probs.index(max(probs))
row.append(labels[idx])
row.append("{:.0%}".format(probs[idx]) )
cwriter.writerow(row)
return "out.csv"
def detect_gibberish_deep(line,f):
if line:
inputs = tokenizer(line, return_tensors="pt")
labels = DLmodel.config.id2label
outputs = DLmodel(**inputs)
probs = outputs.logits.softmax(dim=-1).detach().cpu().flatten().numpy().tolist()
output=dict(zip(labels.values(), probs))
readable_output=""
for k,v in output.items():
readable_output+=k+" : "+ "{:.0%}".format(v) + "\n"
return readable_output, None, output
if f:
return None, annotate_csv_deep(f),None
def detect_gibberish_abstract(model, line,f):
if model == "Deep Learning Model":
return detect_gibberish_deep(line,f)
else:
return detect_gibberish(line, f)
inputLine=gr.inputs.Textbox(lines=1, placeholder="Input text here, if both text and file have values, only the text input will be processed.", default="", label="Text", optional=False)
inputFile=gr.inputs.File(file_count="single", type="file", label="File to Annotate", optional=True)
choices = ["Deep Learning Model", "Markov Chain"]
inputModel=gr.inputs.Dropdown(choices)
outputLine=gr.outputs.Textbox(type="auto", label=None)
outputFile=gr.outputs.File( label="Annotated CSV")
label = gr.outputs.Label(num_top_classes=4)
examples=[
["Deep Learning Model","quetzalcoatl","demo_blank.csv"],
["Deep Learning Model","aasdf","demo_blank.csv"],
["Deep Learning Model","Covfefe","demo_blank.csv"],
["Markov Chain","quetzalcoatl","demo_blank.csv"],
["Markov Chain","aasdf","demo_blank.csv"],
["Markov Chain","Covfefe","demo_blank.csv"],
["Deep Learning Model","","demo_bad.txt"],
["Deep Learning Model","","demo_mixed.txt"],
["Markov Chain","","demo_bad.txt"],
["Markov Chain","","demo_mixed.txt"],
]
#iface = gr.Interface(fn=[detect_gibberish], inputs=["text",inputFile], outputs=["text",outputFile],examples=examples, allow_flagging='never')
#iface.launch()
iface = gr.Interface(fn=[detect_gibberish_abstract], inputs=[inputModel,inputLine,inputFile], outputs=["text",outputFile,label],examples=examples, allow_flagging='never')
iface.launch()