Spaces:
Sleeping
Sleeping
apply ner on tail
Browse files
app.py
CHANGED
@@ -447,23 +447,36 @@ def entities_to_xml(xml_content, ner):
|
|
447 |
# Iterate through all descendants in the <body>
|
448 |
for descendant in descendants:
|
449 |
|
|
|
450 |
if descendant.text:
|
451 |
-
# Apply ner model on the text of the descendant
|
452 |
doc = get_doc(ner, descendant.text)
|
453 |
-
# Make a reversed list of the entities based on their position
|
454 |
entities = get_entities(doc)
|
455 |
entities.sort(key=lambda ent: ent[2], reverse=True)
|
456 |
|
457 |
for ent in entities:
|
458 |
-
# Create an XML element with the appropriate tag
|
459 |
-
# Use the position of the entity to replace the right target only
|
460 |
xml_tag = xml_mapping(ent[0], ent[1])
|
461 |
start_index = ent[2]
|
462 |
end_index = ent[3]
|
463 |
descendant.text = (
|
464 |
-
|
465 |
-
|
466 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
467 |
)
|
468 |
|
469 |
# Export modified XML content
|
|
|
447 |
# Iterate through all descendants in the <body>
|
448 |
for descendant in descendants:
|
449 |
|
450 |
+
# Apply ner model on the text of the descendant
|
451 |
if descendant.text:
|
|
|
452 |
doc = get_doc(ner, descendant.text)
|
|
|
453 |
entities = get_entities(doc)
|
454 |
entities.sort(key=lambda ent: ent[2], reverse=True)
|
455 |
|
456 |
for ent in entities:
|
|
|
|
|
457 |
xml_tag = xml_mapping(ent[0], ent[1])
|
458 |
start_index = ent[2]
|
459 |
end_index = ent[3]
|
460 |
descendant.text = (
|
461 |
+
descendant.text[:start_index]
|
462 |
+
+ etree.tostring(xml_tag, encoding="unicode")
|
463 |
+
+ descendant.text[end_index:]
|
464 |
+
)
|
465 |
+
|
466 |
+
# Apply ner model on the tail of the descendant
|
467 |
+
if descendant.tail:
|
468 |
+
doc_tail = get_doc(ner, descendant.tail)
|
469 |
+
entities_tail = get_entities(doc_tail)
|
470 |
+
entities_tail.sort(key=lambda ent: ent[2], reverse=True)
|
471 |
+
|
472 |
+
for ent_tail in entities_tail:
|
473 |
+
xml_tag_tail = xml_mapping(ent_tail[0], ent_tail[1])
|
474 |
+
start_index_tail = ent_tail[2]
|
475 |
+
end_index_tail = ent_tail[3]
|
476 |
+
descendant.tail = (
|
477 |
+
descendant.tail[:start_index_tail]
|
478 |
+
+ etree.tostring(xml_tag_tail, encoding="unicode")
|
479 |
+
+ descendant.tail[end_index_tail:]
|
480 |
)
|
481 |
|
482 |
# Export modified XML content
|