miscjose's picture
Initial Commit
5efc817
raw
history blame
7.15 kB
from transformers import pipeline
import gradio as gr
pipelines_text = {
'Spam': {'BERT': pipeline("text-classification", model="mariagrandury/distilbert-base-uncased-finetuned-sms-spam-detection"),
'RoBERTa': pipeline("text-classification", model="mariagrandury/roberta-base-finetuned-sms-spam-detection")
},
'Sentiment': {
'BERT': pipeline("text-classification", model="lxyuan/distilbert-base-multilingual-cased-sentiments-student"),
'RoBERTa': pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment-latest")
},
'Emotion': {'BERT': pipeline("text-classification", model="bhadresh-savani/bert-base-go-emotion"),
'RoBERTa': pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base")
}
}
def parseImage(file, radio):
return file.name
max_textboxes = 100
def change_textboxes(n):
return [gr.Textbox.update(visible=True, interactive=True)]*n + [gr.Textbox.update(visible=False, interactive=True)]*(max_textboxes-int(n))
def parseText(text_upload_file, delimeter_dropdown):
delimeter_mapping = {'New Line': '\n','Tab': '\t','Comma': ','}
delimeter = delimeter_mapping[delimeter_dropdown]
text_boxes = ['' for i in range(max_textboxes)]
with open(text_upload_file.name, 'r') as f:
text_upload = f.read()
for idx, text in enumerate(text_upload.split(delimeter)):
text_boxes[idx] = text
return text_boxes
def annotateText(text_boxes_slider, annotation_radio, model_dropdown, *text_boxes_texbox):
text_boxes_texbox = [text for text in text_boxes_texbox]
res_label = ['' for i in range(max_textboxes)]
res_score = ['' for i in range(max_textboxes)]
# predictions
pipe = pipelines_text[annotation_radio][model_dropdown]
predictions = pipe([text_boxes_texbox[i] for i in range(text_boxes_slider)])
for idx, pred in enumerate(predictions):
# special case for spam (might change later)
if annotation_radio == 'Spam':
res_label[idx] = 'Not Spam' if pred['label'] == 'LABEL_0' else 'Spam'
else:
res_label[idx] = pred['label']
res_score[idx] = '{:.2f}'.format(pred['score'])
with open('annotations.csv', 'w') as f:
f.write('text,annotation,confidence\n')
for idx in range(max_textboxes):
if text_boxes_texbox[idx]:
f.write('{},{},{}\n'.format(text_boxes_texbox[idx], res_label[idx], res_score[idx]))
return ['./annotations.csv'] + text_boxes_texbox + res_label + res_score
with gr.Blocks() as demo:
gr.Markdown("# Data Annotation Tool")
gr.Markdown('Upload a file or enter text in the Data Viewer section. Sample files are at the end of the page.')
with gr.Tab("Text"):
with gr.Row():
with gr.Column():
gr.Markdown("## Data Upload")
text_upload_file = gr.File(file_types=['text'])
delimeter_dropdown = gr.Dropdown(choices=['New Line','Tab','Comma'], label='Delimeter')
text_upload_button = gr.Button('Parse File')
with gr.Row():
with gr.Column():
gr.Markdown("## Data Viewer")
# slider component
text_boxes_slider = gr.Slider(1, max_textboxes, value=3, step=1)
# text box components (3 visible and max_textboxes-3 not visible)
text_boxes_texbox = [gr.Textbox(show_label=False,interactive=True) for i in range(3)] + [gr.Textbox(show_label=False, visible=False) for i in range(max_textboxes-3)]
annotation_radio = gr.Radio(choices=['Spam', 'Sentiment', 'Emotion'], label='Annotation', value='RoBERTa')
model_dropdown = gr.Dropdown(choices=['BERT', 'RoBERTa'], label='Model')
text_submit_button = gr.Button('Annotate Data')
with gr.Row():
gr.Markdown("## Data Output")
with gr.Row():
with gr.Column(scale=6):
gr.Markdown("Text")
text_output_boxes = [gr.Textbox(show_label=False,interactive=False) for i in range(3)] + [gr.Textbox(show_label=False, visible=False, interactive=False) for i in range(max_textboxes-3)]
with gr.Column(scale=1):
gr.Markdown("Annotations")
text_output_annotations_boxes = [gr.Textbox(show_label=False,interactive=False) for i in range(3)] + [gr.Textbox(show_label=False, visible=False, interactive=False) for i in range(max_textboxes-3)]
with gr.Column(scale=1):
gr.Markdown("Confidence")
text_output_confidence_boxes = [gr.Textbox(show_label=False,interactive=False) for i in range(3)] + [gr.Textbox(show_label=False, visible=False, interactive=False) for i in range(max_textboxes-3)]
text_ouput_file = gr.File(label='File Output', file_types=['csv'])
gr.Markdown("## Test Examples")
with gr.Row():
with gr.Column():
gr.Examples(
examples=[['./examples/text/spam.txt', 'New Line'],['./examples/text/sentiment.txt', 'New Line'],['./examples/text/emotion.txt', 'New Line']],
fn=parseText,
inputs=[text_upload_file, delimeter_dropdown],
outputs=text_boxes_texbox,
cache_examples=True
)
# event listeners
text_upload_button.click(fn=parseText, inputs=[text_upload_file, delimeter_dropdown], outputs=text_boxes_texbox)
text_boxes_slider.change(fn=change_textboxes, inputs=text_boxes_slider, outputs=text_boxes_texbox)
text_boxes_slider.change(fn=change_textboxes, inputs=text_boxes_slider, outputs=text_output_boxes)
text_boxes_slider.change(fn=change_textboxes, inputs=text_boxes_slider, outputs=text_output_annotations_boxes)
text_boxes_slider.change(fn=change_textboxes, inputs=text_boxes_slider, outputs=text_output_confidence_boxes)
text_submit_button.click(fn=annotateText, inputs=[text_boxes_slider, annotation_radio, model_dropdown] + text_boxes_texbox, outputs=[text_ouput_file]+text_output_boxes + text_output_annotations_boxes+text_output_confidence_boxes)
with gr.Tab("Image"):
with gr.Row():
gr.Markdown("## Coming Soon!")
# with gr.Row():
# file_image = gr.File(file_count=['directory'],file_types=['image'], label='File Upload')
# image = gr.Image()
# with gr.Row():
# radio_image = gr.Radio(choices=['Object Detection'], label='Annotation')
# models_image = gr.Dropdown(choices=['DETR'], label='Model')
# with gr.Row():
# button_image = gr.Button('Submit')
# with gr.Row():
# output_image = gr.File(label='File Output', file_types=['image'])
# image tab event listeners
# button_image.click(fn=doImage, inputs=[file_image, radio_image], outputs=output_image)
if __name__ == "__main__":
demo.launch()