# # -*- coding: utf-8 -*- # """usemodel.ipynb # Automatically generated by Colaboratory. # Original file is located at # https://colab.research.google.com/drive/1c8Qtf9TWr3apElEv2uDgCD_MQwHnmw0B # """ # import pickle # from mygrad import Neuron, Value # import streamlit as st # def convertToOneHotEncode(tags): # tag1 = tags[0] # tag2 = tags[1] # vec1 = [0]*5 # vec2 = [0]*4 # vec1[tag1] = 1 # vec2[tag2-1] = 1 # vec1.extend(vec2) # return vec1 # def loadModel(): # neuron1weightsbias = [] # with open(f'weights.pkl', 'rb') as file: # neuron1weightsbias = pickle.load(file) # neuron = Neuron(10) # neuron.w = [Value(i) for i in neuron1weightsbias[:-1]] # neuron.b = Value(neuron1weightsbias[-1]) # return neuron # import json # def loadjson(filepath): # data = [] # with open(filepath, 'rb') as file: # for line in file: # data.append(json.loads(line)) # return data # data = loadjson('data/train.jsonl') # data2 = loadjson('data/test.jsonl') # X = [element['pos_tags'] for element in data] + [element['pos_tags'] for element in data2] # Y = [element['chunk_tags'] for element in data] + [element['chunk_tags'] for element in data2] # n = loadModel() # def predictsentence(postagsOfSentence): # if postagsOfSentence: # postagsOfSentence = [0] + postagsOfSentence # else: # return # xnew = [] # for ix in range(1, len(postagsOfSentence)): # xnew.append([ postagsOfSentence[ix-1], postagsOfSentence[ix]]) # for ix, pair in enumerate(xnew): # xnew[ix] = convertToOneHotEncode(pair) # w = Value(0) # chunks = [] # for ix2, wordpair in enumerate(xnew): # xinput = [w] + wordpair # w = n(xinput) # if w.data > 0.5: # chunks.append(1) # else: # chunks.append(0) # return chunks # def input_(input): # input = input.split(',') # inputs = [int(x.strip()) for x in input] # # for i in input: # # if i.isalphanumeric(): # # inputs.append(int(input)) # # else: # # st.write('Invalid Input') # return inputs # st.title('Chunk tagging') # input = st.text_input('Input the pos tags') # inputs = input_(input) # output = predictsentence(inputs) # st.write(output) # # import pandas as pd # # data = output # # df = pd.DataFrame.from_dict(data) # # st.dataframe(df) # -*- coding: utf-8 -*- """usemodel.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1c8Qtf9TWr3apElEv2uDgCD_MQwHnmw0B """ import pickle from mygrad import Neuron, Value import streamlit as st def convertToOneHotEncode(tags): tag1 = tags[0] tag2 = tags[1] vec1 = [0]*5 vec2 = [0]*4 vec1[tag1] = 1 vec2[tag2-1] = 1 vec1.extend(vec2) return vec1 def loadModel(): neuron1weightsbias = [] with open(f'weights.pkl', 'rb') as file: neuron1weightsbias = pickle.load(file) neuron = Neuron(10) neuron.w = [Value(i) for i in neuron1weightsbias[:-1]] neuron.b = Value(neuron1weightsbias[-1]) return neuron import json def loadjson(filepath): data = [] with open(filepath, 'rb') as file: for line in file: data.append(json.loads(line)) return data data = loadjson('data/train.jsonl') data2 = loadjson('data/test.jsonl') X = [element['pos_tags'] for element in data] + [element['pos_tags'] for element in data2] Y = [element['chunk_tags'] for element in data] + [element['chunk_tags'] for element in data2] n = loadModel() def predictsentence(postagsOfSentence): if postagsOfSentence: postagsOfSentence = [0] + postagsOfSentence else: return xnew = [] for ix in range(1, len(postagsOfSentence)): xnew.append([ postagsOfSentence[ix-1], postagsOfSentence[ix]]) for ix, pair in enumerate(xnew): xnew[ix] = convertToOneHotEncode(pair) w = Value(0) chunks = [] for ix2, wordpair in enumerate(xnew): xinput = [w] + wordpair w = n(xinput) if w.data > 0.5: chunks.append(1) else: chunks.append(0) return chunks def input_(input): if not input: return result = word_tokenize(input) word_pos= nltk.pos_tag(result) pos = [ i[1] for i in word_pos] for i in range(len(pos)): if pos[i] =='NN': pos[i] = 1 elif pos[i] =='DT': pos[i] = 2 elif pos[i] =='JJ': pos[i] = 3 else: pos[i]= 4 return pos st.title('Chunk tagging') input = st.text_input('Input the sentence') import nltk nltk.download('punkt') nltk.download('averaged_perceptron_tagger') from nltk.tokenize import word_tokenize inputs = input_(input) output = predictsentence(inputs) st.write(output) # import pandas as pd # data = output # df = pd.DataFrame.from_dict(data) # st.dataframe(df)