Spaces:
Runtime error
Runtime error
Commit
·
58ef0b0
1
Parent(s):
8ee1150
Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,78 @@
|
|
1 |
import streamlit as st
|
2 |
from transformers import pipeline
|
3 |
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
text = st.text_area('enter some text: ')
|
6 |
|
7 |
if text:
|
8 |
-
|
9 |
-
st.json(
|
|
|
1 |
import streamlit as st
|
2 |
from transformers import pipeline
|
3 |
|
4 |
+
# Load the pre-trained NER model
|
5 |
+
model = pipeline("ner", model="/ner-app/mendobert/", tokenizer="indolem/indobert-base-uncased")
|
6 |
+
basemodel = pipeline("ner", model="/ner-app/base-model/", tokenizer="indolem/indobert-base-uncased")
|
7 |
+
|
8 |
+
ner_results = model(text)
|
9 |
+
ner_results2 = basemodel(text)
|
10 |
+
|
11 |
+
|
12 |
+
# MendoBERT
|
13 |
+
|
14 |
+
formatted_results = []
|
15 |
+
for result in ner_results:
|
16 |
+
end = result["start"]+len(result["word"].replace("##", ""))
|
17 |
+
|
18 |
+
if result["word"].startswith("##"):
|
19 |
+
formatted_results[-1]["end"] = end
|
20 |
+
formatted_results[-1]["word"]+= result["word"].replace("##", "")
|
21 |
+
else:
|
22 |
+
formatted_results.append({
|
23 |
+
'start': result["start"],
|
24 |
+
'end': end,
|
25 |
+
'entity': result["entity"],
|
26 |
+
'index': result["index"],
|
27 |
+
'score': result["score"],
|
28 |
+
'word': result["word"]})
|
29 |
+
|
30 |
+
for result in formatted_results:
|
31 |
+
if result["entity"].startswith("LABEL_0"):
|
32 |
+
result["entity"] = "O"
|
33 |
+
elif result["entity"].startswith("LABEL_1"):
|
34 |
+
result["entity"] = "B"
|
35 |
+
elif result["entity"].startswith("LABEL_2"):
|
36 |
+
result["entity"] = "I"
|
37 |
+
|
38 |
+
mendo =[]
|
39 |
+
for result in formatted_results:
|
40 |
+
if not result["entity"].startswith("O"):
|
41 |
+
mendo.append(f"""Entity: {result["entity"]}, Start:{result["start"]}, End:{result["end"]}, word:{text[result["start"]:result["end"]]}""")
|
42 |
+
|
43 |
+
# Base Model
|
44 |
+
|
45 |
+
formatted_results = []
|
46 |
+
for result in ner_results2:
|
47 |
+
end = result["start"]+len(result["word"].replace("##", ""))
|
48 |
+
|
49 |
+
if result["word"].startswith("##"):
|
50 |
+
formatted_results[-1]["end"] = end
|
51 |
+
formatted_results[-1]["word"]+= result["word"].replace("##", "")
|
52 |
+
else:
|
53 |
+
formatted_results.append({
|
54 |
+
'start': result["start"],
|
55 |
+
'end': end,
|
56 |
+
'entity': result["entity"],
|
57 |
+
'index': result["index"],
|
58 |
+
'score': result["score"],
|
59 |
+
'word': result["word"]})
|
60 |
+
|
61 |
+
for result in formatted_results:
|
62 |
+
if result["entity"].startswith("LABEL_0"):
|
63 |
+
result["entity"] = "O"
|
64 |
+
elif result["entity"].startswith("LABEL_1"):
|
65 |
+
result["entity"] = "B"
|
66 |
+
elif result["entity"].startswith("LABEL_2"):
|
67 |
+
result["entity"] = "I"
|
68 |
+
|
69 |
+
base=[]
|
70 |
+
for result in formatted_results:
|
71 |
+
if not result["entity"].startswith("O"):
|
72 |
+
base.append(f"""Entity: {result["entity"]}, Start:{result["start"]}, End:{result["end"]}, word:{text[result["start"]:result["end"]]}""")
|
73 |
+
|
74 |
text = st.text_area('enter some text: ')
|
75 |
|
76 |
if text:
|
77 |
+
st.json(base)
|
78 |
+
st.json(mendo)
|