from transformers import AutoTokenizer, AutoModelForTokenClassification from transformers import pipeline import gradio as gr model_name = "valurank/bert-base-NER" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForTokenClassification.from_pretrained(model_name) nlp = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple") def rename_group(output_list): final_output = [] for output in output_list: output["entity"] = output["entity_group"] del output["entity_group"] final_output.append(output) return final_output def remove_prefix(word, prefix): if prefix in word: return word.split(prefix, 1)[1] return " " + word def join_results(results): joined_results = [] for result in results: if "##" in result["word"] and joined_results: joined_results[-1]["end"] = result["end"] joined_results[-1]["word"] += remove_prefix(result["word"], "##") joined_results[-1]["score"] = min(joined_results[-1]["score"], result["score"]) else: joined_results.append(result) return joined_results examples = [ """ Texas A&M professor used chatbot chatbot to assess students' grades. The OpenAI chatbot is actually called ChatGPT and claims to have written every paper written by the bot. The bot isn’t made to detect material composed by AI, or even material produced by itself. Texas A&M University-Commerce said they are investigating the incident and developing policies related to AI in the classroom. The university denied that anyone had received a failing grade. The school also confirmed that several students had been cleared of any academic dishonesty. The use of AI in coursework is a rapidly changing issue that confronts all learning institutions.""" ] def ner(text): output = nlp(text) output = join_results(output) output = rename_group(output) return {"text": text, "entities": output} demo = gr.Interface(ner, gr.Textbox(placeholder="Enter sentence here..."), gr.HighlightedText(), examples=examples) if __name__ == '__main__': demo.launch(debug=True)