Spaces:
Runtime error
Runtime error
File size: 3,314 Bytes
7f12d56 7ab4587 7f12d56 7ab4587 7f12d56 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
# Gradio
import gradio as gr
# Hugging Face libraries
from transformers import pipeline
from transformers import AutoTokenizer
# Model checkpoint
model_checkpoint = "dbmdz/bert-large-cased-finetuned-conll03-english"
# Instantiate the pipeline
ner_task = pipeline(model=model_checkpoint, task="ner",
aggregation_strategy="simple")
# Instantiate the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
# Sample sentences
sentence1 = "Herbert Akroyd Stuart patented the first diesel engine, 1890"
sentence2 = "May 10 A delegation tells Leopold III his return would be \
illtimed, 1945"
sentence3 = "Fri May 10 Fred Astaire (Frederick Austerlitz) born in Omaha, Nebraska, 1899"
sentence4 = "Fri May 10 Germany invades Low Countries, 1940"
sentence5 = "Fri May 10 Nazi bookburning, 1933"
sentence6 = "Fri May 10 Confederate Memorial Day in South Carolina"
sentence7 = "Fri May 10 Mothers Day in Guatemala"
sentence8 = "Fri May 10 Dave Mason is born in Worcester, England, 1945"
# Gradio interface
def predict(sentence):
"""
Use the corresponding tokenizer to tokenize the sentence.
Use the model to predict the entities.
"""
# Get the tokens from the tokenizer
processed_tokens = tokenizer(sentence)
token_pieces = processed_tokens.tokens()
# Get the prediction of ner from the model
result_ner = ner_task(sentence)
formatted_ner = ""
entities_count = 0
# Print individual entities.
# Start the count from 1 for intuitive reading.
for i, result in enumerate(result_ner):
# Only get the result where score is at least 0.8
if result['score'] < 0.8:
continue;
else:
entities_count += 1
formatted_ner += f"Number: {entities_count} \n" \
+ f"Entity: {result['entity_group']}\n" \
+ f"Word group: {result['word']}\n" \
+ f"Score: {result['score']}\n"
formatted_ner += f"{result}\n\n"
formatted_ner += f"Number of predicted entities: {entities_count}\n\n"
return token_pieces, formatted_ner
# Main Gradio interface
demo = gr.Interface(
fn = predict,
inputs = [gr.TextArea(label="Place your sentence here", lines=10,
show_copy_button=True)],
outputs =
[
gr.TextArea(label="Tokens input to the model", interactive=False,
lines=10, show_copy_button=True),
gr.TextArea(label="Prediction of entities", interactive=False,
lines=10, show_copy_button=True)
],
examples=[[sentence1], [sentence2], [sentence3], [sentence4],
[sentence5], [sentence6], [sentence7], [sentence8]],
title = "NER (Named Entities Recognition)",
description = f"""
## Using model {model_checkpoint} to predict entities type
<p style="font-size: 1.2rem;">Notes: </p>
<ul style="font-size: 1.2rem; list-style-type:square">
<li> The examples are from the calendar utility in Linux.
<li> The model cannot recognize date and time.
<li> It can recongize PER (person), LOC (location), ORG (organization) and MIS (miscellaneous)
entities.
</ul>
"""
)
demo.launch() |