Sadashiv commited on
Commit
bea627b
1 Parent(s): fb25b6b

first commit

Browse files
Files changed (3) hide show
  1. app.py +10 -0
  2. requirements.txt +4 -0
  3. utils.py +84 -0
app.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from annotated_text import annotated_text
3
+ from utils import ner_extraction
4
+
5
+
6
+ input_text = 'Bill Gates lives in USA'
7
+
8
+ ner_extraction = ner_extraction(input_text)
9
+
10
+ print(ner_extraction.entity_position())
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit
2
+ st-annotated-text
3
+ requests
4
+ python-dotenv
utils.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from dotenv import load_dotenv
3
+ import os
4
+
5
+ # load the .env file
6
+ load_dotenv()
7
+
8
+ API_KEY = os.getenv("API")
9
+
10
+ API_URL = "https://api-inference.huggingface.co/models/Sadashiv/BERT-ner"
11
+ headers = {"Authorization": f"Bearer {API_KEY}"}
12
+
13
+ tag_color_combination = {'O': '#FF5733',
14
+ 'PER': '#35B7FF',
15
+ 'ORG': '#00FF00',
16
+ 'LOC': '#FFA500',
17
+ 'MISC': '#BA55D3'}
18
+
19
+
20
+ class ner_extraction:
21
+ def __init__(self, input_text):
22
+ self.input_text = input_text
23
+
24
+ def query(self):
25
+ response = requests.post(API_URL, headers=headers, json=self.input_text)
26
+ return response.json()
27
+
28
+ def entity_position_locator(self):
29
+ output = self.query()
30
+ entity_position = {}
31
+
32
+ for i in range(len(output)):
33
+ entity_position[i]={}
34
+ entity_position[i]["start"]=output[i]['start']
35
+ entity_position[i]["end"]=output[i]['end']
36
+
37
+ return entity_position
38
+
39
+ def entity_update(self):
40
+ entity_list = []
41
+ output = self.query()
42
+
43
+ for i in range(len(output)):
44
+ entity_list.append(
45
+ (
46
+ output[i]['word'],
47
+ output[i]['entity_group'],
48
+ tag_color_combination.get(output[i]['entity_group'])
49
+ )
50
+ )
51
+
52
+ return entity_list
53
+
54
+ def text_list(self):
55
+
56
+ input_text = self.input_text
57
+ entity_position = self.entity_position_locator()
58
+
59
+ split_text = []
60
+
61
+ for i in entity_position:
62
+ split_text.append(input_text[entity_position[i]['start']:entity_position[i]['end']])
63
+
64
+ if entity_position[i]['end']!=len(input_text):
65
+
66
+ if i+1<len(entity_position):
67
+ split_text.append(input_text[entity_position[i]['end']:entity_position[i+1]['start']])
68
+
69
+ else:
70
+ split_text.append(input_text[entity_position[i]['end']:])
71
+
72
+ return split_text
73
+
74
+
75
+ def entity_position(self):
76
+ split_text = self.text_list()
77
+ entity_list = self.entity_update()
78
+ for i in range(len(split_text)):
79
+ for j in range(len(entity_list)):
80
+ if type(split_text[i])!= tuple:
81
+ if split_text[i].lower()==entity_list[j][0]:
82
+ split_text[i]=entity_list[j]
83
+
84
+ return tuple(split_text)