Spaces:
Sleeping
Sleeping
Commit
•
aa15917
1
Parent(s):
c94987d
update_sentence_input
Browse files
app.py
CHANGED
@@ -1,60 +1,99 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
import pickle
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
-
# Define the predict function
|
7 |
-
def predict(x):
|
8 |
-
x1 = hiddenLayer1(x)
|
9 |
-
final = outputLayer([x1] + x)
|
10 |
-
return final.data
|
11 |
|
12 |
-
|
|
|
13 |
def loadModel():
|
14 |
-
neuron1weightsbias
|
15 |
-
with open(f'
|
16 |
neuron1weightsbias = pickle.load(file)
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
if i!='0' and i!='1':
|
42 |
-
st.write("Please input Binary number only")
|
43 |
-
flag = 1
|
44 |
else:
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
|
|
|
|
56 |
else:
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""usemodel.ipynb
|
3 |
+
|
4 |
+
Automatically generated by Colaboratory.
|
5 |
+
|
6 |
+
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/1c8Qtf9TWr3apElEv2uDgCD_MQwHnmw0B
|
8 |
+
"""
|
9 |
+
|
10 |
import pickle
|
11 |
+
from mygrad import Neuron, Value
|
12 |
+
import streamlit as st
|
13 |
+
def convertToOneHotEncode(tags):
|
14 |
+
tag1 = tags[0]
|
15 |
+
tag2 = tags[1]
|
16 |
+
vec1 = [0]*5
|
17 |
+
vec2 = [0]*4
|
18 |
+
vec1[tag1] = 1
|
19 |
+
vec2[tag2-1] = 1
|
20 |
+
vec1.extend(vec2)
|
21 |
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
+
|
24 |
+
return vec1
|
25 |
def loadModel():
|
26 |
+
neuron1weightsbias = []
|
27 |
+
with open(f'weights.pkl', 'rb') as file:
|
28 |
neuron1weightsbias = pickle.load(file)
|
29 |
+
neuron = Neuron(10)
|
30 |
+
|
31 |
+
neuron.w = [Value(i) for i in neuron1weightsbias[:-1]]
|
32 |
+
neuron.b = Value(neuron1weightsbias[-1])
|
33 |
+
return neuron
|
34 |
+
|
35 |
+
import json
|
36 |
+
def loadjson(filepath):
|
37 |
+
data = []
|
38 |
+
with open(filepath, 'rb') as file:
|
39 |
+
for line in file:
|
40 |
+
data.append(json.loads(line))
|
41 |
+
return data
|
42 |
+
|
43 |
+
data = loadjson('data/train.jsonl')
|
44 |
+
data2 = loadjson('data/test.jsonl')
|
45 |
+
X = [element['pos_tags'] for element in data] + [element['pos_tags'] for element in data2]
|
46 |
+
Y = [element['chunk_tags'] for element in data] + [element['chunk_tags'] for element in data2]
|
47 |
+
|
48 |
+
n = loadModel()
|
49 |
+
|
50 |
+
def predictsentence(postagsOfSentence):
|
51 |
+
if postagsOfSentence:
|
52 |
+
postagsOfSentence = [0] + postagsOfSentence
|
|
|
|
|
|
|
53 |
else:
|
54 |
+
return
|
55 |
+
xnew = []
|
56 |
+
for ix in range(1, len(postagsOfSentence)):
|
57 |
+
xnew.append([ postagsOfSentence[ix-1], postagsOfSentence[ix]])
|
58 |
+
for ix, pair in enumerate(xnew):
|
59 |
+
xnew[ix] = convertToOneHotEncode(pair)
|
60 |
+
w = Value(0)
|
61 |
+
chunks = []
|
62 |
+
for ix2, wordpair in enumerate(xnew):
|
63 |
+
xinput = [w] + wordpair
|
64 |
+
w = n(xinput)
|
65 |
+
if w.data > 0.5:
|
66 |
+
chunks.append(1)
|
67 |
else:
|
68 |
+
chunks.append(0)
|
69 |
+
return chunks
|
70 |
+
def input_(input):
|
71 |
+
if not input:
|
72 |
+
return
|
73 |
+
result = word_tokenize(input)
|
74 |
+
word_pos= nltk.pos_tag(result)
|
75 |
+
pos = [ i[1] for i in word_pos]
|
76 |
+
for i in range(len(pos)):
|
77 |
+
if pos[i] =='NN':
|
78 |
+
pos[i] = 1
|
79 |
+
elif pos[i] =='DT':
|
80 |
+
pos[i] = 2
|
81 |
+
elif pos[i] =='JJ':
|
82 |
+
pos[i] = 3
|
83 |
+
else:
|
84 |
+
pos[i]= 4
|
85 |
+
return pos
|
86 |
+
st.title('Chunk tagging')
|
87 |
+
input = st.text_input('Input the pos tags')
|
88 |
+
|
89 |
+
import nltk
|
90 |
+
from nltk.tokenize import word_tokenize
|
91 |
+
|
92 |
+
inputs = input_(input)
|
93 |
+
output = predictsentence(inputs)
|
94 |
+
st.write(output)
|
95 |
|
96 |
+
# import pandas as pd
|
97 |
+
# data = output
|
98 |
+
# df = pd.DataFrame.from_dict(data)
|
99 |
+
# st.dataframe(df)
|