Spaces:
Runtime error
Runtime error
Upload 3 files
Browse files- IBO_to_XML.py +135 -0
- NER_Distiller.py +138 -0
- XML_to_HTML.py +32 -0
IBO_to_XML.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# By Wasim Khatib
|
| 2 |
+
# Version 2.0
|
| 3 |
+
# This function take a list a set of annotated entities, in this format: [["صرح","O"],
|
| 4 |
+
# ["رئيس","B-OCC"], ["نقابة","B-OCC B-ORG"],
|
| 5 |
+
# ["العاملين","I-OCC B-ORG"], ["في","I-OCC I-ORG"], ["جامعة","I-OCC I-ORG B-ORG"],
|
| 6 |
+
# ["بيرزيت","I-OCC I-ORG I-ORG B-LOC"],["ان","O"], ["غدا","O"], ["هو","O"], ["يوم","B-DATE"],["الخميس","I-DATE"]]
|
| 7 |
+
# after that it will return text of xml in this fomrat: صرح <OCC> رئيس <ORG> نقابة العاملين </ORG> </OCC> يوم في <ORG>
|
| 8 |
+
# جامعة <LOC> بيرزيت </LOC> </ORG> ان غدا هو <DATE> يوم الخميس </DATE>
|
| 9 |
+
# This function assume the input is correct and each tag must start with B- or I-, not empty tag and discard all tags
|
| 10 |
+
# start with ignore I- tags if they don’t have B-tags.
|
| 11 |
+
import numpy as np
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def IBO_to_XML(temp):
|
| 15 |
+
xml_output = ""
|
| 16 |
+
|
| 17 |
+
temp_entities = sortTags(temp)
|
| 18 |
+
|
| 19 |
+
temp_list = list()
|
| 20 |
+
|
| 21 |
+
# initlize the temp_list
|
| 22 |
+
temp_list.append("")
|
| 23 |
+
word_position = 0
|
| 24 |
+
|
| 25 |
+
# For each entity, convert ibo to xml list.
|
| 26 |
+
for entity in temp_entities:
|
| 27 |
+
counter_tag = 0
|
| 28 |
+
# For each tag
|
| 29 |
+
for tag in str(entity[1]).split():
|
| 30 |
+
|
| 31 |
+
# If the counter tag greater than or equal to lenght of templist, if yes then we will append the empt value in templist
|
| 32 |
+
if counter_tag >= len(temp_list):
|
| 33 |
+
temp_list.append("")
|
| 34 |
+
|
| 35 |
+
# If the tag is equal O then and word position not equal zero then add all from templist to output ist
|
| 36 |
+
if "O" == tag and word_position != 0:
|
| 37 |
+
for j in range(len(temp_list),0,-1):
|
| 38 |
+
if temp_list[j-1]!= "":
|
| 39 |
+
xml_output+=" </"+str(temp_list[j-1])+">"
|
| 40 |
+
temp_list[j-1] = ""
|
| 41 |
+
|
| 42 |
+
# if its not equal O and its correct tag like B-tag or I-tag and its B
|
| 43 |
+
elif "O" != tag and len(tag.split("-")) == 2 and tag.split("-")[0] == "B":
|
| 44 |
+
# if the templist of counter tag is not empty then we need add xml word that contains
|
| 45 |
+
# </name of previous tag> its mean that we closed the tag in xml in xml_output
|
| 46 |
+
if temp_list[counter_tag] != "":
|
| 47 |
+
xml_output+=" </"+str(temp_list[counter_tag])+">"
|
| 48 |
+
# After that we replace the previous tag from templist in new tag
|
| 49 |
+
temp_list[counter_tag] = str(tag).split("-")[1]
|
| 50 |
+
# And add xml word that contains <name of new tag> its mean we open the tag in xml in xml_output
|
| 51 |
+
xml_output += " <" + str(temp_list[counter_tag]) + ">"
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
# if its not equal O and its correct tag like B-tag or I-tag and its i and not first word postion
|
| 56 |
+
elif "O" != tag and len(tag.split("-")) == 2 and tag.split("-")[0] == "I" and word_position != 0:
|
| 57 |
+
# we need to check if this tag like previous tag
|
| 58 |
+
for j in range(counter_tag,len(temp_list)):
|
| 59 |
+
# if its equal then will break the loop and continue
|
| 60 |
+
if temp_list[j] == tag[2:]:
|
| 61 |
+
break
|
| 62 |
+
# if not then we need to add xml word to close the tag like </name of previous> in xml_output
|
| 63 |
+
else:
|
| 64 |
+
if temp_list[j] != "":
|
| 65 |
+
xml_output+=" </"+str(temp_list[j])+">"
|
| 66 |
+
temp_list[j] = ""
|
| 67 |
+
counter_tag += 1
|
| 68 |
+
word_position += 1
|
| 69 |
+
# Add word in xml_output
|
| 70 |
+
xml_output +=" "+str(entity[0])
|
| 71 |
+
# Add all xml words in xml_output
|
| 72 |
+
for j in range(0, len(temp_list)):
|
| 73 |
+
if temp_list[j] != "":
|
| 74 |
+
xml_output+=" </"+str(temp_list[j])+">"
|
| 75 |
+
return xml_output.strip()
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def sortTags(entities):
|
| 79 |
+
temp_entities = entities
|
| 80 |
+
temp_counter = 0
|
| 81 |
+
# For each entity, this loop will sort each tag of entitiy, first it will check if the
|
| 82 |
+
# previous tags has same count of this tag, second will sort the tags and check if this tags is correct
|
| 83 |
+
for entity in temp_entities:
|
| 84 |
+
tags = entity[1].split()
|
| 85 |
+
for tag in tags:
|
| 86 |
+
# if the counter is not 0 then, will complete
|
| 87 |
+
if temp_counter != 0:
|
| 88 |
+
# Check if this tag is equal I-, if yes then it will count how many tag in this tags and
|
| 89 |
+
# count how many tag in previous tags
|
| 90 |
+
if "I-" == tag[0:2]:
|
| 91 |
+
counter_of_this_tag = 0
|
| 92 |
+
counter_of_previous_tag = 0
|
| 93 |
+
for word in tags:
|
| 94 |
+
if tag.split("-")[1] in word:
|
| 95 |
+
counter_of_this_tag+=1
|
| 96 |
+
for word in temp_entities[temp_counter-1][1].split():
|
| 97 |
+
if tag.split("-")[1] in word:
|
| 98 |
+
counter_of_previous_tag+=1
|
| 99 |
+
# if the counter of previous tag is bigger than counter of this tag, then we
|
| 100 |
+
# need to add I-tag in this tags
|
| 101 |
+
if counter_of_previous_tag > counter_of_this_tag:
|
| 102 |
+
tags.append("I-"+tag.split("-")[1])
|
| 103 |
+
# Sort the tags
|
| 104 |
+
tags.sort()
|
| 105 |
+
# Need to revers the tags because it should begins with I
|
| 106 |
+
tags.reverse()
|
| 107 |
+
# If the counter is not 0 then we can complete
|
| 108 |
+
if temp_counter != 0:
|
| 109 |
+
this_tags = tags
|
| 110 |
+
previous_tags = temp_entities[temp_counter - 1][1].split()
|
| 111 |
+
sorted_tags = list()
|
| 112 |
+
|
| 113 |
+
# Check if the this tag is not O and previous tags is not O, then will complete,
|
| 114 |
+
# if not then it will ignor this tag
|
| 115 |
+
if "O" not in this_tags and "O" not in previous_tags:
|
| 116 |
+
index = 0
|
| 117 |
+
#For each previous tags, need sort this tag by previous tags if its I, B we can ignor
|
| 118 |
+
for i in previous_tags:
|
| 119 |
+
j = 0
|
| 120 |
+
while this_tags and j < len(this_tags):
|
| 121 |
+
if this_tags[j][0:2] == "I-" and this_tags[j][2:] == i[2:]:
|
| 122 |
+
sorted_tags.insert(index, this_tags.pop(j))
|
| 123 |
+
break
|
| 124 |
+
elif this_tags[j][0:2] == "B-":
|
| 125 |
+
break
|
| 126 |
+
j += 1
|
| 127 |
+
index += 1
|
| 128 |
+
sorted_tags += this_tags
|
| 129 |
+
tags = sorted_tags
|
| 130 |
+
str_tag = " "
|
| 131 |
+
str_tag = str_tag.join(tags)
|
| 132 |
+
str_tag = str_tag.strip()
|
| 133 |
+
temp_entities[temp_counter][1] = str_tag
|
| 134 |
+
temp_counter += 1
|
| 135 |
+
return temp_entities
|
NER_Distiller.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# By Wasim Khatib
|
| 2 |
+
# Version 2.0
|
| 3 |
+
# This function take a list a set of annotated entities, in this format: [["صرح","O"], ["رئيس","B-OCC"], ["نقابة","B-OCC B-ORG"],
|
| 4 |
+
# ["العاملين","I-OCC B-ORG"], ["في","I-OCC I-ORG"], ["جامعة","I-OCC I-ORG B-ORG"],
|
| 5 |
+
# ["بيرزيت","I-OCC I-ORG I-ORG B-LOC"],["ان","O"], ["غدا","O"], ["هو","O"], ["يوم","B-DATE"],["الخميس","I-DATE"]]
|
| 6 |
+
# after that it will return array of the set of distilled entities and their positions (start, end) and tags, such as
|
| 7 |
+
# [[" رئيس نقابة العاملين في جامعة بيرزيت", OCC,1,7],
|
| 8 |
+
# [" نقابة العاملين في جامعة بيرزيت", ORG,2,7], [" جامعة بيرزيت", ORG,5,7],["يوم الخميس", DATE,10,11]]
|
| 9 |
+
def distill_entities(entities):
|
| 10 |
+
# This is list that we put the output what we need
|
| 11 |
+
list_output = list()
|
| 12 |
+
|
| 13 |
+
# This line go to sort function and save the output to temp_entities
|
| 14 |
+
temp_entities = sortTags(entities)
|
| 15 |
+
|
| 16 |
+
# This list help us to make the output,
|
| 17 |
+
temp_list = list()
|
| 18 |
+
|
| 19 |
+
# initlize the temp_list
|
| 20 |
+
temp_list.append(["", "", 0, 0])
|
| 21 |
+
word_position = 0
|
| 22 |
+
|
| 23 |
+
# For each entity, convert ibo to distllir list.
|
| 24 |
+
for entity in temp_entities:
|
| 25 |
+
# This is counter tag of this entity
|
| 26 |
+
counter_tag = 0
|
| 27 |
+
# For each tag
|
| 28 |
+
for tag in str(entity[1]).split():
|
| 29 |
+
# If the counter tag greater than or equal to lenght of templist, if yes then we will append the empty value in templist
|
| 30 |
+
if counter_tag >= len(temp_list):
|
| 31 |
+
temp_list.append(["", "", 0, 0])
|
| 32 |
+
|
| 33 |
+
# If tag equal O and word postion of this tag is not equal zero then it will add all
|
| 34 |
+
# not empty eliment of temp list in output list
|
| 35 |
+
if "O" == tag and word_position != 0:
|
| 36 |
+
for j in range(0, len(temp_list)):
|
| 37 |
+
if temp_list[j][1] != "":
|
| 38 |
+
list_output.append([temp_list[j][0].strip(), temp_list[j][1], temp_list[j][2], temp_list[j][3]])
|
| 39 |
+
temp_list[j][0] = ""
|
| 40 |
+
temp_list[j][1] = ""
|
| 41 |
+
temp_list[j][2] = word_position
|
| 42 |
+
temp_list[j][3] = word_position
|
| 43 |
+
# if this tag not equal O, and split by '-' the tag and check the lenght equals two and if the first eliment
|
| 44 |
+
# of the split its B
|
| 45 |
+
elif "O" != tag and len(tag.split("-")) == 2 and tag.split("-")[0] == "B":
|
| 46 |
+
# if the temp_list of counter is not empty then it will append in output list and hten it will
|
| 47 |
+
# initilize by new string and tag in templist of counter
|
| 48 |
+
if temp_list[counter_tag][1] != "":
|
| 49 |
+
list_output.append([temp_list[counter_tag][0].strip(), temp_list[counter_tag][1], temp_list[counter_tag][2], temp_list[counter_tag][3]])
|
| 50 |
+
temp_list[counter_tag][0] = str(entity[0]) + " "
|
| 51 |
+
temp_list[counter_tag][1] = str(tag).split("-")[1]
|
| 52 |
+
temp_list[counter_tag][2] = word_position
|
| 53 |
+
temp_list[counter_tag][3] = word_position
|
| 54 |
+
|
| 55 |
+
# if this tag not equal O, and split by '-' the tag and check the lenght equals two and if the first eliment
|
| 56 |
+
# of the split its O
|
| 57 |
+
elif "O" != tag and len(tag.split("-")) == 2 and tag.split("-")[0] == "I" and word_position != 0:
|
| 58 |
+
# For each of temp_list, check if in this counter tag of templist is same tag with this.tag
|
| 59 |
+
# then will complete if not it will save in output list and cheak another
|
| 60 |
+
for j in range(counter_tag,len(temp_list)):
|
| 61 |
+
if temp_list[j][1] == tag[2:] and temp_list[j][3] != word_position:
|
| 62 |
+
temp_list[j][0] += str(entity[0]) + " "
|
| 63 |
+
temp_list[j][3] += 1
|
| 64 |
+
break
|
| 65 |
+
else:
|
| 66 |
+
if temp_list[j][1] != "":
|
| 67 |
+
list_output.append([temp_list[j][0].strip(), temp_list[j][1], temp_list[j][2], temp_list[j][3]])
|
| 68 |
+
temp_list[j][0] = ""
|
| 69 |
+
temp_list[j][1] = ""
|
| 70 |
+
temp_list[j][2] = word_position
|
| 71 |
+
temp_list[j][3] = word_position
|
| 72 |
+
counter_tag += 1
|
| 73 |
+
word_position += 1
|
| 74 |
+
# For each temp_list, at the end of the previous loop, there will be some
|
| 75 |
+
# values in this list, we should save it to the output list
|
| 76 |
+
for j in range(0, len(temp_list)):
|
| 77 |
+
if temp_list[j][1] != "":
|
| 78 |
+
list_output.append([temp_list[j][0].strip(), temp_list[j][1], temp_list[j][2], temp_list[j][3]])
|
| 79 |
+
return sorted(list_output, key=lambda x: (x[2]))
|
| 80 |
+
|
| 81 |
+
def sortTags(entities):
|
| 82 |
+
temp_entities = entities
|
| 83 |
+
temp_counter = 0
|
| 84 |
+
# For each entity, this loop will sort each tag of entitiy, first it will check if the
|
| 85 |
+
# previous tags has same count of this tag, second will sort the tags and check if this tags is correct
|
| 86 |
+
for entity in temp_entities:
|
| 87 |
+
tags = entity[1].split()
|
| 88 |
+
for tag in tags:
|
| 89 |
+
# if the counter is not 0 then, will complete
|
| 90 |
+
if temp_counter != 0:
|
| 91 |
+
# Check if this tag is equal I-, if yes then it will count how many tag in this tags and
|
| 92 |
+
# count how many tag in previous tags
|
| 93 |
+
if "I-" == tag[0:2]:
|
| 94 |
+
counter_of_this_tag = 0
|
| 95 |
+
counter_of_previous_tag = 0
|
| 96 |
+
for word in tags:
|
| 97 |
+
if tag.split("-")[1] in word:
|
| 98 |
+
counter_of_this_tag+=1
|
| 99 |
+
for word in temp_entities[temp_counter-1][1].split():
|
| 100 |
+
if tag.split("-")[1] in word:
|
| 101 |
+
counter_of_previous_tag+=1
|
| 102 |
+
# if the counter of previous tag is bigger than counter of this tag, then we
|
| 103 |
+
# need to add I-tag in this tags
|
| 104 |
+
if counter_of_previous_tag > counter_of_this_tag:
|
| 105 |
+
tags.append("I-"+tag.split("-")[1])
|
| 106 |
+
# Sort the tags
|
| 107 |
+
tags.sort()
|
| 108 |
+
# Need to revers the tags because it should begins with I
|
| 109 |
+
tags.reverse()
|
| 110 |
+
# If the counter is not 0 then we can complete
|
| 111 |
+
if temp_counter != 0:
|
| 112 |
+
this_tags = tags
|
| 113 |
+
previous_tags = temp_entities[temp_counter - 1][1].split()
|
| 114 |
+
sorted_tags = list()
|
| 115 |
+
|
| 116 |
+
# Check if the this tag is not O and previous tags is not O, then will complete,
|
| 117 |
+
# if not then it will ignor this tag
|
| 118 |
+
if "O" not in this_tags and "O" not in previous_tags:
|
| 119 |
+
index = 0
|
| 120 |
+
#For each previous tags, need sort this tag by previous tags if its I, B we can ignor
|
| 121 |
+
for i in previous_tags:
|
| 122 |
+
j = 0
|
| 123 |
+
while this_tags and j < len(this_tags):
|
| 124 |
+
if this_tags[j][0:2] == "I-" and this_tags[j][2:] == i[2:]:
|
| 125 |
+
sorted_tags.insert(index, this_tags.pop(j))
|
| 126 |
+
break
|
| 127 |
+
elif this_tags[j][0:2] == "B-":
|
| 128 |
+
break
|
| 129 |
+
j += 1
|
| 130 |
+
index += 1
|
| 131 |
+
sorted_tags += this_tags
|
| 132 |
+
tags = sorted_tags
|
| 133 |
+
str_tag = " "
|
| 134 |
+
str_tag = str_tag.join(tags)
|
| 135 |
+
str_tag = str_tag.strip()
|
| 136 |
+
temp_entities[temp_counter][1] = str_tag
|
| 137 |
+
temp_counter += 1
|
| 138 |
+
return temp_entities
|
XML_to_HTML.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def NER_XML_to_HTML(xml):
|
| 5 |
+
html = re.sub(r'WORK_OF_ART','WORKOFART',xml)
|
| 6 |
+
|
| 7 |
+
# replace every end tag with end span tag "</span>"
|
| 8 |
+
html = re.sub(r'</[A-Z]+>','</span>',html)
|
| 9 |
+
|
| 10 |
+
# replace every start tag with the appropriate css class
|
| 11 |
+
html = re.sub(r'<PERS>','<span class="ner_pers" data-entity="PERS">',html)
|
| 12 |
+
html = re.sub(r'<GROUP>','<span class="ner_group" data-entity="NORP">',html)
|
| 13 |
+
html = re.sub(r'<OCC>','<span class="ner_occ" data-entity="OCC">',html)
|
| 14 |
+
html = re.sub(r'<ORG>','<span class="ner_org" data-entity="ORG">',html)
|
| 15 |
+
html = re.sub(r'<LOC>','<span class="ner_loc" data-entity="LOC">',html)
|
| 16 |
+
html = re.sub(r'<GPE>','<span class="ner_gpe" data-entity="GPE">',html)
|
| 17 |
+
html = re.sub(r'<FAC>','<span class="ner_fac" data-entity="FAC">',html)
|
| 18 |
+
html = re.sub(r'<EVENT>','<span class="ner_event" data-entity="EVENT">',html)
|
| 19 |
+
html = re.sub(r'<DATE>','<span class="ner_date" data-entity="DATE">',html)
|
| 20 |
+
html = re.sub(r'<TIME>','<span class="ner_time" data-entity="TIME">',html)
|
| 21 |
+
html = re.sub(r'<CARDINAL>','<span class="ner_cardinal" data-entity="CARDINAL">',html)
|
| 22 |
+
html = re.sub(r'<ORDINAL>','<span class="ner_ordinal" data-entity="ORDINAL">',html)
|
| 23 |
+
html = re.sub(r'<PERCENT>','<span class="ner_percent" data-entity="PERCENT">',html)
|
| 24 |
+
html = re.sub(r'<QUANTITY>','<span class="ner_quantity" data-entity="QUANTITY">',html)
|
| 25 |
+
html = re.sub(r'<UNIT>','<span class="ner_unit" data-entity="UNIT">',html)
|
| 26 |
+
html = re.sub(r'<MONEY>','<span class="ner_money" data-entity="MONEY">',html)
|
| 27 |
+
html = re.sub(r'<CURR>','<span class="ner_currency" data-entity="CURRENCY">',html)
|
| 28 |
+
html = re.sub(r'<LANGUAGE>','<span class="ner_language" data-entity="LANGUAGE">',html)
|
| 29 |
+
html = re.sub(r'<PRODUCT>','<span class="ner_product" data-entity="PRODUCT">',html)
|
| 30 |
+
html = re.sub(r'<WORKOFART>','<span class="ner_work_of_art" data-entity="WORK_OF_ART">',html)
|
| 31 |
+
html = re.sub(r'<LAW>','<span class="ner_law" data-entity="LAW">',html)
|
| 32 |
+
return html
|