from transformers import pipeline import gradio as gr pipelines_text = { 'Spam': {'BERT': pipeline("text-classification", model="mariagrandury/distilbert-base-uncased-finetuned-sms-spam-detection"), 'RoBERTa': pipeline("text-classification", model="mariagrandury/roberta-base-finetuned-sms-spam-detection") }, 'Sentiment': { 'BERT': pipeline("text-classification", model="lxyuan/distilbert-base-multilingual-cased-sentiments-student"), 'RoBERTa': pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment-latest") }, 'Emotion': {'BERT': pipeline("text-classification", model="bhadresh-savani/bert-base-go-emotion"), 'RoBERTa': pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base") } } def parseImage(file, radio): return file.name max_textboxes = 100 def change_textboxes(n): return [gr.Textbox.update(visible=True, interactive=True)]*n + [gr.Textbox.update(visible=False, interactive=True)]*(max_textboxes-int(n)) def parseText(text_upload_file, delimeter_dropdown): delimeter_mapping = {'New Line': '\n','Tab': '\t','Comma': ','} delimeter = delimeter_mapping[delimeter_dropdown] text_boxes = ['' for i in range(max_textboxes)] with open(text_upload_file.name, 'r') as f: text_upload = f.read() for idx, text in enumerate(text_upload.split(delimeter)): text_boxes[idx] = text return text_boxes def annotateText(text_boxes_slider, annotation_radio, model_dropdown, *text_boxes_texbox): text_boxes_texbox = [text for text in text_boxes_texbox] res_label = ['' for i in range(max_textboxes)] res_score = ['' for i in range(max_textboxes)] # predictions pipe = pipelines_text[annotation_radio][model_dropdown] predictions = pipe([text_boxes_texbox[i] for i in range(text_boxes_slider)]) for idx, pred in enumerate(predictions): # special case for spam (might change later) if annotation_radio == 'Spam': res_label[idx] = 'Not Spam' if pred['label'] == 'LABEL_0' else 'Spam' else: res_label[idx] = pred['label'] res_score[idx] = '{:.2f}'.format(pred['score']) with open('annotations.csv', 'w') as f: f.write('text,annotation,confidence\n') for idx in range(max_textboxes): if text_boxes_texbox[idx]: f.write('{},{},{}\n'.format(text_boxes_texbox[idx], res_label[idx], res_score[idx])) return ['./annotations.csv'] + text_boxes_texbox + res_label + res_score with gr.Blocks() as demo: gr.Markdown("# Data Annotation Tool") gr.Markdown('Upload a file or enter text in the Data Viewer section. Sample files are at the end of the page.') with gr.Tab("Text"): with gr.Row(): with gr.Column(): gr.Markdown("## Data Upload") text_upload_file = gr.File(file_types=['text']) delimeter_dropdown = gr.Dropdown(choices=['New Line','Tab','Comma'], label='Delimeter') text_upload_button = gr.Button('Parse File') with gr.Row(): with gr.Column(): gr.Markdown("## Data Viewer") # slider component text_boxes_slider = gr.Slider(1, max_textboxes, value=3, step=1) # text box components (3 visible and max_textboxes-3 not visible) text_boxes_texbox = [gr.Textbox(show_label=False,interactive=True) for i in range(3)] + [gr.Textbox(show_label=False, visible=False) for i in range(max_textboxes-3)] annotation_radio = gr.Radio(choices=['Spam', 'Sentiment', 'Emotion'], label='Annotation', value='RoBERTa') model_dropdown = gr.Dropdown(choices=['BERT', 'RoBERTa'], label='Model') text_submit_button = gr.Button('Annotate Data') with gr.Row(): gr.Markdown("## Data Output") with gr.Row(): with gr.Column(scale=6): gr.Markdown("Text") text_output_boxes = [gr.Textbox(show_label=False,interactive=False) for i in range(3)] + [gr.Textbox(show_label=False, visible=False, interactive=False) for i in range(max_textboxes-3)] with gr.Column(scale=1): gr.Markdown("Annotations") text_output_annotations_boxes = [gr.Textbox(show_label=False,interactive=False) for i in range(3)] + [gr.Textbox(show_label=False, visible=False, interactive=False) for i in range(max_textboxes-3)] with gr.Column(scale=1): gr.Markdown("Confidence") text_output_confidence_boxes = [gr.Textbox(show_label=False,interactive=False) for i in range(3)] + [gr.Textbox(show_label=False, visible=False, interactive=False) for i in range(max_textboxes-3)] text_ouput_file = gr.File(label='File Output', file_types=['csv']) gr.Markdown("## Test Examples") with gr.Row(): with gr.Column(): gr.Examples( examples=[['./examples/text/spam.txt', 'New Line'],['./examples/text/sentiment.txt', 'New Line'],['./examples/text/emotion.txt', 'New Line']], fn=parseText, inputs=[text_upload_file, delimeter_dropdown], outputs=text_boxes_texbox, cache_examples=True ) # event listeners text_upload_button.click(fn=parseText, inputs=[text_upload_file, delimeter_dropdown], outputs=text_boxes_texbox) text_boxes_slider.change(fn=change_textboxes, inputs=text_boxes_slider, outputs=text_boxes_texbox) text_boxes_slider.change(fn=change_textboxes, inputs=text_boxes_slider, outputs=text_output_boxes) text_boxes_slider.change(fn=change_textboxes, inputs=text_boxes_slider, outputs=text_output_annotations_boxes) text_boxes_slider.change(fn=change_textboxes, inputs=text_boxes_slider, outputs=text_output_confidence_boxes) text_submit_button.click(fn=annotateText, inputs=[text_boxes_slider, annotation_radio, model_dropdown] + text_boxes_texbox, outputs=[text_ouput_file]+text_output_boxes + text_output_annotations_boxes+text_output_confidence_boxes) with gr.Tab("Image"): with gr.Row(): gr.Markdown("## Coming Soon!") # with gr.Row(): # file_image = gr.File(file_count=['directory'],file_types=['image'], label='File Upload') # image = gr.Image() # with gr.Row(): # radio_image = gr.Radio(choices=['Object Detection'], label='Annotation') # models_image = gr.Dropdown(choices=['DETR'], label='Model') # with gr.Row(): # button_image = gr.Button('Submit') # with gr.Row(): # output_image = gr.File(label='File Output', file_types=['image']) # image tab event listeners # button_image.click(fn=doImage, inputs=[file_image, radio_image], outputs=output_image) if __name__ == "__main__": demo.launch()