Spaces:

hamdah926
/

NER_model_with_gradio

Sleeping

App Files Files Community

hamdah926 commited on Sep 16, 2024

Commit

265bcac

verified ·

1 Parent(s): 447e0db

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -19

app.py CHANGED Viewed

@@ -8,25 +8,44 @@ from transformers import pipeline
 ner = pipeline('ner', model = 'FacebookAI/xlm-roberta-large-finetuned-conll03-english', grouped_entities = True)
-#Conveting the NER output into a DataFrame:
 def entities_to_df(text):
   all_entities = []
-  #the NER model will be used on the input text
-  entities = ner(text)
   for entity in entities:
     all_entities.append({
           "Entity": entity['word'],
-          "Type" : entity['entity_group'],
           "Score": float((entity['score'])),
           "Start": entity['start'],
           "End": entity['end'],
-          "Text": text,
       })
   df = pd.DataFrame(all_entities)
   #the df in the output did not round the score above so I rounded it after creating the df
   df['Score'] = df['Score'].round(4)
@@ -38,28 +57,25 @@ def highlight_entities(text):
     df = entities_to_df(text)
     highlighted_text = ""
     last_idx = 0
-    # Iterating between the entities in the DF in order
     for i, entity in df.iterrows(): #iterrows is a function in the df to iterate by rows
         # Add the text before the entity
         highlighted_text += text[last_idx:entity['Start']]
-        #highlighting the entities in RED by using HTML div and css and thiers types(per, org,loc or misc)
         highlighted_text += f"<div style='background-color: red; display: inline;'>{entity['Entity']} ({entity['Type']})</div>"
         #updating the index after the current entity
         last_idx = entity['End']
     # add the text after the last entity
     highlighted_text += text[last_idx:]
-    # again we will use an HTML div to make the output looks better :)
     return f"<div>{highlighted_text}</div>"
 # The last function which will combine the two previous functions and will be used in the interface
 def NER_output(text):
-    html = highlight_entities(text)
     df = entities_to_df(text)
     return html,df
@@ -68,9 +84,10 @@ default_value ="J.K. Rowling wrote the Harry Potter series, which was published
 # Gradio Interface
 demo = gr.Interface(
-    fn=NER_output,
-    inputs=gr.Textbox(label="Enter text:", lines=6, value = default_value),
-    outputs=[gr.HTML(label="Entity Visualization"), gr.Dataframe(label="Entities in DataFrame format"),]
     #above, we used the NER_output, and since that function return the html and the df there will be two outputs
     #The first is gr.HTML and the second gr.Datagrame
 )

 ner = pipeline('ner', model = 'FacebookAI/xlm-roberta-large-finetuned-conll03-english', grouped_entities = True)
+#a function to split each sentence containing an entity in the text by commas.
+#start to comma, comma to comma, last comma to the remaining text
+def split_sentences(text, start, end):
+    #comma before entity
+    start_comma = text.rfind(',', 0, start)
+    if start_comma == -1: #if rfind did not find a comma before the entity:
+        start_comma = 0  #start from the beginning (first sentence)
+    else:
+        start_comma += 1  #if comma found, then start from the char after the comma
+    # comma after the entity
+    end_comma = text.find(',', end)
+    if end_comma == -1:
+        return text[start_comma:].strip() #if it did not find a comma, return the text from the last comma to the end
+    else: #if it did find a comma, go to that comma
+        return  text[start_comma:end_comma].strip()
+#Conveting the NER output into a DataFrame:
 def entities_to_df(text):
   all_entities = []
+  entities = ner(text)#the NER model will be used on the input text
+#putting the entities into a data frame with the needed keys + calling the split sentences fumction in the for loop
   for entity in entities:
+    sentence = split_sentences(text, entity['start'], entity['end'])
     all_entities.append({
           "Entity": entity['word'],
+          "Type" : entity['entity_group'], #loc, org, per, misc
           "Score": float((entity['score'])),
           "Start": entity['start'],
           "End": entity['end'],
+          "Sentence": sentence,
       })
   df = pd.DataFrame(all_entities)
   #the df in the output did not round the score above so I rounded it after creating the df
   df['Score'] = df['Score'].round(4)
     df = entities_to_df(text)
     highlighted_text = ""
     last_idx = 0
+    # Iterating the DF rows in order
     for i, entity in df.iterrows(): #iterrows is a function in the df to iterate by rows
         # Add the text before the entity
         highlighted_text += text[last_idx:entity['Start']]
+        #highlighting the entities in RED by using HTML div and css and thiers types(per, org,loc or misc)
         highlighted_text += f"<div style='background-color: red; display: inline;'>{entity['Entity']} ({entity['Type']})</div>"
         #updating the index after the current entity
         last_idx = entity['End']
     # add the text after the last entity
     highlighted_text += text[last_idx:]
+    # again we will use an HTML div block to make the output looks better :)
     return f"<div>{highlighted_text}</div>"
 # The last function which will combine the two previous functions and will be used in the interface
 def NER_output(text):
+    html = highlight_entities(text)
     df = entities_to_df(text)
     return html,df
 # Gradio Interface
 demo = gr.Interface(
+    fn=NER_output,
+    inputs=gr.Textbox(label="Enter text:", lines=6, value = default_value),
+    outputs=[gr.HTML(label="Entities Highlighted"), gr.Dataframe(label="Entities in DataFrame format")],
+    title = "NER model with highlighted entities"
     #above, we used the NER_output, and since that function return the html and the df there will be two outputs
     #The first is gr.HTML and the second gr.Datagrame
 )