liewchooichin commited on
Commit
7f12d56
1 Parent(s): aaff802

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -0
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Gradio
2
+ import gradio as gr
3
+
4
+ # Hugging Face libraries
5
+ from transformers import pipeline
6
+ from transformers import AutoTokenizer
7
+
8
+ # Model checkpoint
9
+ model_checkpoint = "dbmdz/bert-large-cased-finetuned-conll03-english"
10
+
11
+ # Instantiate the pipeline
12
+ ner_task = pipeline(model=model_checkpoint, task="ner",
13
+ aggregation_strategy="simple")
14
+
15
+ # Instantiate the tokenizer
16
+ tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
17
+
18
+ # Sample sentences
19
+ sentence1 = "Herbert Akroyd Stuart patented the first diesel engine, 1890"
20
+ sentence2 = "May 10 A delegation tells Leopold III his return would be \
21
+ illtimed, 1945"
22
+ sentence3 = "Fri May 10 Fred Astaire (Frederick Austerlitz) born in Omaha, Nebraska, 1899"
23
+ sentence4 = "Fri May 10 Germany invades Low Countries, 1940"
24
+ sentence5 = "Fri May 10 Nazi bookburning, 1933"
25
+ sentence6 = "Fri May 10 Confederate Memorial Day in South Carolina"
26
+ sentence7 = "Fri May 10 Mothers Day in Guatemala"
27
+ sentence8 = "Fri May 10 Dave Mason is born in Worcester, England, 1945"
28
+
29
+
30
+ # Gradio interface
31
+ def predict(sentence):
32
+ """
33
+ Use the corresponding tokenizer to tokenize the sentence.
34
+ Use the model to predict the entities.
35
+ """
36
+ # Get the tokens from the tokenizer
37
+ processed_tokens = tokenizer(sentence)
38
+ token_pieces = processed_tokens.tokens()
39
+
40
+ # Get the prediction of ner from the model
41
+ result_ner = ner_task(sentence)
42
+ formatted_ner = f"Number of predicted entities: {len(result_ner)}\n\n"
43
+ # Print individual entities.
44
+ # Start the count from 1 for intuitive reading.
45
+ for i, result in enumerate(result_ner):
46
+ formatted_ner += f"Number: {i+1} \n" \
47
+ + f"Entity: {result['entity_group']}\n" \
48
+ + f"Word group: {result['word']}\n" \
49
+ + f"Score: {result['score']}\n"
50
+ formatted_ner += f"{result}\n\n"
51
+
52
+ return token_pieces, formatted_ner
53
+
54
+ # Main Gradio interface
55
+ demo = gr.Interface(
56
+ fn = predict,
57
+ inputs = [gr.TextArea(label="Place your sentence here", lines=10,
58
+ show_copy_button=True)],
59
+ outputs =
60
+ [
61
+ gr.TextArea(label="Tokens input to the model", interactive=False,
62
+ lines=10, show_copy_button=True),
63
+ gr.TextArea(label="Prediction of entities", interactive=False,
64
+ lines=10, show_copy_button=True)
65
+ ],
66
+ examples=[[sentence1], [sentence2], [sentence3], [sentence4],
67
+ [sentence5], [sentence6], [sentence7], [sentence8]],
68
+ title = "NER (Named Entities Recognition)",
69
+ description = f"""
70
+ ## Using model {model_checkpoint} to predict entities type
71
+ <p style="font-size: 1.2rem;">Notes: </p>
72
+ <ul style="font-size: 1.2rem; list-style-type:square">
73
+ <li> The examples are from the calendar utility in Linux.
74
+ <li> The model cannot recognize date and time.
75
+ <li> It can recongize PER (person), LOC (location), ORG (organization) and MIS (miscellaneous)
76
+ entities.
77
+ </ul>
78
+ """
79
+ )
80
+ demo.launch()