import logging import os from typing import List, Tuple import gradio as gr import pandas as pd import spacy import torch from transformers import AutoModelForTokenClassification, AutoTokenizer try: nlp = spacy.load("pt_core_news_sm") except Exception: os.system("python -m spacy download pt_core_news_sm") nlp = spacy.load("pt_core_news_sm") model = AutoModelForTokenClassification.from_pretrained("Emanuel/porttagger-news-base") tokenizer = AutoTokenizer.from_pretrained("Emanuel/porttagger-news-base") logger = logging.getLogger() logger.setLevel(logging.DEBUG) def predict(text, nlp, logger=None) -> Tuple[List[str], List[str]]: doc = nlp(text) tokens = [token.text for token in doc] logger.info("Starting predictions for sentence: {}".format(text)) input_tokens = tokenizer( tokens, return_tensors="pt", is_split_into_words=True, return_offsets_mapping=True, return_special_tokens_mask=True, ) output = model(input_tokens["input_ids"]) i_token = 0 labels = [] scores = [] for off, is_special_token, pred in zip( input_tokens["offset_mapping"][0], input_tokens["special_tokens_mask"][0], output.logits[0], ): if is_special_token or off[0] > 0: continue label = model.config.__dict__["id2label"][int(pred.argmax(axis=-1))] if logger is not None: logger.info("{}, {}, {}".format(off, tokens[i_token], label)) labels.append(label) scores.append( "{:.2f}".format(100 * float(torch.softmax(pred, dim=-1).detach().max())) ) i_token += 1 return tokens, labels, scores def text_analysis(text): tokens, labels, scores = predict(text, nlp, logger) pos_count = pd.DataFrame( { "token": tokens, "etiqueta": labels, "confiança": scores, } ) pos_tokens = [] for token, label in zip(tokens, labels): pos_tokens.extend([(token, label), (" ", None)]) output_highlighted.update(visible=True) output_df.update(visible=True) return { output_highlighted: output_highlighted.update(visible=True, value=(pos_tokens)), output_df: output_df.update(visible=True, value=pos_count), } css = open("style.css").read() top_html = open("top.html").read() bottom_html = open("bottom.html").read() with gr.Blocks(css=css) as demo: gr.HTML(top_html) text = gr.Textbox(placeholder="Enter your text here...", label="Input") examples = gr.Examples( examples=[ [ "A população não poderia ter acesso a relatórios que explicassem, por exemplo, os motivos exatos de atrasos em obras de linhas e estações." ], ["Filme 'Star Wars : Os Últimos Jedi' ganha trailer definitivo; assista."], ], inputs=[text], label="Select an example", ) output_highlighted = gr.HighlightedText(label="Colorful output", visible=False) output_df = gr.Dataframe(label="Tabular output", visible=False) submit_btn = gr.Button("Send") submit_btn.click( fn=text_analysis, inputs=text, outputs=[output_highlighted, output_df] ) gr.HTML(bottom_html) demo.launch(debug=True)