glitch0011 commited on
Commit
58ef0b0
·
1 Parent(s): 8ee1150

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -3
app.py CHANGED
@@ -1,9 +1,78 @@
1
  import streamlit as st
2
  from transformers import pipeline
3
 
4
- pipe = pipeline('sentiment-analysis')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  text = st.text_area('enter some text: ')
6
 
7
  if text:
8
- out = pipe(text)
9
- st.json(out)
 
1
  import streamlit as st
2
  from transformers import pipeline
3
 
4
+ # Load the pre-trained NER model
5
+ model = pipeline("ner", model="/ner-app/mendobert/", tokenizer="indolem/indobert-base-uncased")
6
+ basemodel = pipeline("ner", model="/ner-app/base-model/", tokenizer="indolem/indobert-base-uncased")
7
+
8
+ ner_results = model(text)
9
+ ner_results2 = basemodel(text)
10
+
11
+
12
+ # MendoBERT
13
+
14
+ formatted_results = []
15
+ for result in ner_results:
16
+ end = result["start"]+len(result["word"].replace("##", ""))
17
+
18
+ if result["word"].startswith("##"):
19
+ formatted_results[-1]["end"] = end
20
+ formatted_results[-1]["word"]+= result["word"].replace("##", "")
21
+ else:
22
+ formatted_results.append({
23
+ 'start': result["start"],
24
+ 'end': end,
25
+ 'entity': result["entity"],
26
+ 'index': result["index"],
27
+ 'score': result["score"],
28
+ 'word': result["word"]})
29
+
30
+ for result in formatted_results:
31
+ if result["entity"].startswith("LABEL_0"):
32
+ result["entity"] = "O"
33
+ elif result["entity"].startswith("LABEL_1"):
34
+ result["entity"] = "B"
35
+ elif result["entity"].startswith("LABEL_2"):
36
+ result["entity"] = "I"
37
+
38
+ mendo =[]
39
+ for result in formatted_results:
40
+ if not result["entity"].startswith("O"):
41
+ mendo.append(f"""Entity: {result["entity"]}, Start:{result["start"]}, End:{result["end"]}, word:{text[result["start"]:result["end"]]}""")
42
+
43
+ # Base Model
44
+
45
+ formatted_results = []
46
+ for result in ner_results2:
47
+ end = result["start"]+len(result["word"].replace("##", ""))
48
+
49
+ if result["word"].startswith("##"):
50
+ formatted_results[-1]["end"] = end
51
+ formatted_results[-1]["word"]+= result["word"].replace("##", "")
52
+ else:
53
+ formatted_results.append({
54
+ 'start': result["start"],
55
+ 'end': end,
56
+ 'entity': result["entity"],
57
+ 'index': result["index"],
58
+ 'score': result["score"],
59
+ 'word': result["word"]})
60
+
61
+ for result in formatted_results:
62
+ if result["entity"].startswith("LABEL_0"):
63
+ result["entity"] = "O"
64
+ elif result["entity"].startswith("LABEL_1"):
65
+ result["entity"] = "B"
66
+ elif result["entity"].startswith("LABEL_2"):
67
+ result["entity"] = "I"
68
+
69
+ base=[]
70
+ for result in formatted_results:
71
+ if not result["entity"].startswith("O"):
72
+ base.append(f"""Entity: {result["entity"]}, Start:{result["start"]}, End:{result["end"]}, word:{text[result["start"]:result["end"]]}""")
73
+
74
  text = st.text_area('enter some text: ')
75
 
76
  if text:
77
+ st.json(base)
78
+ st.json(mendo)