a-menu commited on
Commit
21d0900
1 Parent(s): 047434d

apply ner on tail

Browse files
Files changed (1) hide show
  1. app.py +20 -7
app.py CHANGED
@@ -447,23 +447,36 @@ def entities_to_xml(xml_content, ner):
447
  # Iterate through all descendants in the <body>
448
  for descendant in descendants:
449
 
 
450
  if descendant.text:
451
- # Apply ner model on the text of the descendant
452
  doc = get_doc(ner, descendant.text)
453
- # Make a reversed list of the entities based on their position
454
  entities = get_entities(doc)
455
  entities.sort(key=lambda ent: ent[2], reverse=True)
456
 
457
  for ent in entities:
458
- # Create an XML element with the appropriate tag
459
- # Use the position of the entity to replace the right target only
460
  xml_tag = xml_mapping(ent[0], ent[1])
461
  start_index = ent[2]
462
  end_index = ent[3]
463
  descendant.text = (
464
- descendant.text[:start_index]
465
- + etree.tostring(xml_tag, encoding="unicode")
466
- + descendant.text[end_index:]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
467
  )
468
 
469
  # Export modified XML content
 
447
  # Iterate through all descendants in the <body>
448
  for descendant in descendants:
449
 
450
+ # Apply ner model on the text of the descendant
451
  if descendant.text:
 
452
  doc = get_doc(ner, descendant.text)
 
453
  entities = get_entities(doc)
454
  entities.sort(key=lambda ent: ent[2], reverse=True)
455
 
456
  for ent in entities:
 
 
457
  xml_tag = xml_mapping(ent[0], ent[1])
458
  start_index = ent[2]
459
  end_index = ent[3]
460
  descendant.text = (
461
+ descendant.text[:start_index]
462
+ + etree.tostring(xml_tag, encoding="unicode")
463
+ + descendant.text[end_index:]
464
+ )
465
+
466
+ # Apply ner model on the tail of the descendant
467
+ if descendant.tail:
468
+ doc_tail = get_doc(ner, descendant.tail)
469
+ entities_tail = get_entities(doc_tail)
470
+ entities_tail.sort(key=lambda ent: ent[2], reverse=True)
471
+
472
+ for ent_tail in entities_tail:
473
+ xml_tag_tail = xml_mapping(ent_tail[0], ent_tail[1])
474
+ start_index_tail = ent_tail[2]
475
+ end_index_tail = ent_tail[3]
476
+ descendant.tail = (
477
+ descendant.tail[:start_index_tail]
478
+ + etree.tostring(xml_tag_tail, encoding="unicode")
479
+ + descendant.tail[end_index_tail:]
480
  )
481
 
482
  # Export modified XML content