hamdah926 commited on
Commit
265bcac
·
verified ·
1 Parent(s): 447e0db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -19
app.py CHANGED
@@ -8,25 +8,44 @@ from transformers import pipeline
8
 
9
  ner = pipeline('ner', model = 'FacebookAI/xlm-roberta-large-finetuned-conll03-english', grouped_entities = True)
10
 
11
- #Conveting the NER output into a DataFrame:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
 
 
13
  def entities_to_df(text):
14
  all_entities = []
15
- #the NER model will be used on the input text
16
- entities = ner(text)
17
 
 
18
  for entity in entities:
 
19
  all_entities.append({
20
  "Entity": entity['word'],
21
- "Type" : entity['entity_group'],
22
  "Score": float((entity['score'])),
23
  "Start": entity['start'],
24
  "End": entity['end'],
25
- "Text": text,
26
  })
27
 
28
  df = pd.DataFrame(all_entities)
29
-
30
  #the df in the output did not round the score above so I rounded it after creating the df
31
  df['Score'] = df['Score'].round(4)
32
 
@@ -38,28 +57,25 @@ def highlight_entities(text):
38
  df = entities_to_df(text)
39
  highlighted_text = ""
40
  last_idx = 0
41
-
42
- # Iterating between the entities in the DF in order
43
  for i, entity in df.iterrows(): #iterrows is a function in the df to iterate by rows
44
  # Add the text before the entity
45
  highlighted_text += text[last_idx:entity['Start']]
46
-
47
- #highlighting the entities in RED by using HTML div and css and thiers types(per, org,loc or misc)
48
-
49
  highlighted_text += f"<div style='background-color: red; display: inline;'>{entity['Entity']} ({entity['Type']})</div>"
50
-
51
  #updating the index after the current entity
52
  last_idx = entity['End']
53
-
54
  # add the text after the last entity
55
  highlighted_text += text[last_idx:]
56
-
57
- # again we will use an HTML div to make the output looks better :)
58
  return f"<div>{highlighted_text}</div>"
59
 
60
  # The last function which will combine the two previous functions and will be used in the interface
61
  def NER_output(text):
62
- html = highlight_entities(text)
63
  df = entities_to_df(text)
64
  return html,df
65
 
@@ -68,9 +84,10 @@ default_value ="J.K. Rowling wrote the Harry Potter series, which was published
68
 
69
  # Gradio Interface
70
  demo = gr.Interface(
71
- fn=NER_output,
72
- inputs=gr.Textbox(label="Enter text:", lines=6, value = default_value),
73
- outputs=[gr.HTML(label="Entity Visualization"), gr.Dataframe(label="Entities in DataFrame format"),]
 
74
  #above, we used the NER_output, and since that function return the html and the df there will be two outputs
75
  #The first is gr.HTML and the second gr.Datagrame
76
  )
 
8
 
9
  ner = pipeline('ner', model = 'FacebookAI/xlm-roberta-large-finetuned-conll03-english', grouped_entities = True)
10
 
11
+ #a function to split each sentence containing an entity in the text by commas.
12
+ #start to comma, comma to comma, last comma to the remaining text
13
+ def split_sentences(text, start, end):
14
+
15
+ #comma before entity
16
+ start_comma = text.rfind(',', 0, start)
17
+ if start_comma == -1: #if rfind did not find a comma before the entity:
18
+ start_comma = 0 #start from the beginning (first sentence)
19
+ else:
20
+ start_comma += 1 #if comma found, then start from the char after the comma
21
+
22
+ # comma after the entity
23
+ end_comma = text.find(',', end)
24
+ if end_comma == -1:
25
+ return text[start_comma:].strip() #if it did not find a comma, return the text from the last comma to the end
26
+ else: #if it did find a comma, go to that comma
27
+ return text[start_comma:end_comma].strip()
28
 
29
+
30
+ #Conveting the NER output into a DataFrame:
31
  def entities_to_df(text):
32
  all_entities = []
33
+ entities = ner(text)#the NER model will be used on the input text
 
34
 
35
+ #putting the entities into a data frame with the needed keys + calling the split sentences fumction in the for loop
36
  for entity in entities:
37
+ sentence = split_sentences(text, entity['start'], entity['end'])
38
  all_entities.append({
39
  "Entity": entity['word'],
40
+ "Type" : entity['entity_group'], #loc, org, per, misc
41
  "Score": float((entity['score'])),
42
  "Start": entity['start'],
43
  "End": entity['end'],
44
+ "Sentence": sentence,
45
  })
46
 
47
  df = pd.DataFrame(all_entities)
48
+
49
  #the df in the output did not round the score above so I rounded it after creating the df
50
  df['Score'] = df['Score'].round(4)
51
 
 
57
  df = entities_to_df(text)
58
  highlighted_text = ""
59
  last_idx = 0
60
+
61
+ # Iterating the DF rows in order
62
  for i, entity in df.iterrows(): #iterrows is a function in the df to iterate by rows
63
  # Add the text before the entity
64
  highlighted_text += text[last_idx:entity['Start']]
65
+ #highlighting the entities in RED by using HTML div and css and thiers types(per, org,loc or misc)
 
 
66
  highlighted_text += f"<div style='background-color: red; display: inline;'>{entity['Entity']} ({entity['Type']})</div>"
 
67
  #updating the index after the current entity
68
  last_idx = entity['End']
69
+
70
  # add the text after the last entity
71
  highlighted_text += text[last_idx:]
72
+
73
+ # again we will use an HTML div block to make the output looks better :)
74
  return f"<div>{highlighted_text}</div>"
75
 
76
  # The last function which will combine the two previous functions and will be used in the interface
77
  def NER_output(text):
78
+ html = highlight_entities(text)
79
  df = entities_to_df(text)
80
  return html,df
81
 
 
84
 
85
  # Gradio Interface
86
  demo = gr.Interface(
87
+ fn=NER_output,
88
+ inputs=gr.Textbox(label="Enter text:", lines=6, value = default_value),
89
+ outputs=[gr.HTML(label="Entities Highlighted"), gr.Dataframe(label="Entities in DataFrame format")],
90
+ title = "NER model with highlighted entities"
91
  #above, we used the NER_output, and since that function return the html and the df there will be two outputs
92
  #The first is gr.HTML and the second gr.Datagrame
93
  )