Assignment2 / app.py
Piyushmryaa's picture
Update app.py
442d172 verified
# # -*- coding: utf-8 -*-
# """usemodel.ipynb
# Automatically generated by Colaboratory.
# Original file is located at
# https://colab.research.google.com/drive/1c8Qtf9TWr3apElEv2uDgCD_MQwHnmw0B
# """
# import pickle
# from mygrad import Neuron, Value
# import streamlit as st
# def convertToOneHotEncode(tags):
# tag1 = tags[0]
# tag2 = tags[1]
# vec1 = [0]*5
# vec2 = [0]*4
# vec1[tag1] = 1
# vec2[tag2-1] = 1
# vec1.extend(vec2)
# return vec1
# def loadModel():
# neuron1weightsbias = []
# with open(f'weights.pkl', 'rb') as file:
# neuron1weightsbias = pickle.load(file)
# neuron = Neuron(10)
# neuron.w = [Value(i) for i in neuron1weightsbias[:-1]]
# neuron.b = Value(neuron1weightsbias[-1])
# return neuron
# import json
# def loadjson(filepath):
# data = []
# with open(filepath, 'rb') as file:
# for line in file:
# data.append(json.loads(line))
# return data
# data = loadjson('data/train.jsonl')
# data2 = loadjson('data/test.jsonl')
# X = [element['pos_tags'] for element in data] + [element['pos_tags'] for element in data2]
# Y = [element['chunk_tags'] for element in data] + [element['chunk_tags'] for element in data2]
# n = loadModel()
# def predictsentence(postagsOfSentence):
# if postagsOfSentence:
# postagsOfSentence = [0] + postagsOfSentence
# else:
# return
# xnew = []
# for ix in range(1, len(postagsOfSentence)):
# xnew.append([ postagsOfSentence[ix-1], postagsOfSentence[ix]])
# for ix, pair in enumerate(xnew):
# xnew[ix] = convertToOneHotEncode(pair)
# w = Value(0)
# chunks = []
# for ix2, wordpair in enumerate(xnew):
# xinput = [w] + wordpair
# w = n(xinput)
# if w.data > 0.5:
# chunks.append(1)
# else:
# chunks.append(0)
# return chunks
# def input_(input):
# input = input.split(',')
# inputs = [int(x.strip()) for x in input]
# # for i in input:
# # if i.isalphanumeric():
# # inputs.append(int(input))
# # else:
# # st.write('Invalid Input')
# return inputs
# st.title('Chunk tagging')
# input = st.text_input('Input the pos tags')
# inputs = input_(input)
# output = predictsentence(inputs)
# st.write(output)
# # import pandas as pd
# # data = output
# # df = pd.DataFrame.from_dict(data)
# # st.dataframe(df)
# -*- coding: utf-8 -*-
"""usemodel.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1c8Qtf9TWr3apElEv2uDgCD_MQwHnmw0B
"""
import pickle
from mygrad import Neuron, Value
import streamlit as st
def convertToOneHotEncode(tags):
tag1 = tags[0]
tag2 = tags[1]
vec1 = [0]*5
vec2 = [0]*4
vec1[tag1] = 1
vec2[tag2-1] = 1
vec1.extend(vec2)
return vec1
def loadModel():
neuron1weightsbias = []
with open(f'weights.pkl', 'rb') as file:
neuron1weightsbias = pickle.load(file)
neuron = Neuron(10)
neuron.w = [Value(i) for i in neuron1weightsbias[:-1]]
neuron.b = Value(neuron1weightsbias[-1])
return neuron
import json
def loadjson(filepath):
data = []
with open(filepath, 'rb') as file:
for line in file:
data.append(json.loads(line))
return data
data = loadjson('data/train.jsonl')
data2 = loadjson('data/test.jsonl')
X = [element['pos_tags'] for element in data] + [element['pos_tags'] for element in data2]
Y = [element['chunk_tags'] for element in data] + [element['chunk_tags'] for element in data2]
n = loadModel()
def predictsentence(postagsOfSentence):
if postagsOfSentence:
postagsOfSentence = [0] + postagsOfSentence
else:
return
xnew = []
for ix in range(1, len(postagsOfSentence)):
xnew.append([ postagsOfSentence[ix-1], postagsOfSentence[ix]])
for ix, pair in enumerate(xnew):
xnew[ix] = convertToOneHotEncode(pair)
w = Value(0)
chunks = []
for ix2, wordpair in enumerate(xnew):
xinput = [w] + wordpair
w = n(xinput)
if w.data > 0.5:
chunks.append(1)
else:
chunks.append(0)
return chunks
def input_(input):
if not input:
return
result = word_tokenize(input)
word_pos= nltk.pos_tag(result)
pos = [ i[1] for i in word_pos]
for i in range(len(pos)):
if pos[i] =='NN':
pos[i] = 1
elif pos[i] =='DT':
pos[i] = 2
elif pos[i] =='JJ':
pos[i] = 3
else:
pos[i]= 4
return pos
st.title('Chunk tagging')
input = st.text_input('Input the sentence')
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
from nltk.tokenize import word_tokenize
inputs = input_(input)
output = predictsentence(inputs)
st.write(output)
# import pandas as pd
# data = output
# df = pd.DataFrame.from_dict(data)
# st.dataframe(df)