Spaces:

Piyushmryaa
/

Assignment2

Sleeping

App Files Files Community

Assignment2 / app.py

Piyushmryaa

Update app.py

38cc06f verified 7 months ago

raw

history blame

No virus

4.98 kB

	# # -- coding: utf-8 --
	# """usemodel.ipynb

	# Automatically generated by Colaboratory.

	# Original file is located at
	# https://colab.research.google.com/drive/1c8Qtf9TWr3apElEv2uDgCD_MQwHnmw0B
	# """

	# import pickle
	# from mygrad import Neuron, Value
	# import streamlit as st
	# def convertToOneHotEncode(tags):
	# tag1 = tags[0]
	# tag2 = tags[1]
	# vec1 = [0]*5
	# vec2 = [0]*4
	# vec1[tag1] = 1
	# vec2[tag2-1] = 1
	# vec1.extend(vec2)



	# return vec1
	# def loadModel():
	# neuron1weightsbias = []
	# with open(f'weights.pkl', 'rb') as file:
	# neuron1weightsbias = pickle.load(file)
	# neuron = Neuron(10)

	# neuron.w = [Value(i) for i in neuron1weightsbias[:-1]]
	# neuron.b = Value(neuron1weightsbias[-1])
	# return neuron

	# import json
	# def loadjson(filepath):
	# data = []
	# with open(filepath, 'rb') as file:
	# for line in file:
	# data.append(json.loads(line))
	# return data

	# data = loadjson('data/train.jsonl')
	# data2 = loadjson('data/test.jsonl')
	# X = [element['pos_tags'] for element in data] + [element['pos_tags'] for element in data2]
	# Y = [element['chunk_tags'] for element in data] + [element['chunk_tags'] for element in data2]

	# n = loadModel()

	# def predictsentence(postagsOfSentence):
	# if postagsOfSentence:
	# postagsOfSentence = [0] + postagsOfSentence
	# else:
	# return
	# xnew = []
	# for ix in range(1, len(postagsOfSentence)):
	# xnew.append([ postagsOfSentence[ix-1], postagsOfSentence[ix]])
	# for ix, pair in enumerate(xnew):
	# xnew[ix] = convertToOneHotEncode(pair)
	# w = Value(0)
	# chunks = []
	# for ix2, wordpair in enumerate(xnew):
	# xinput = [w] + wordpair
	# w = n(xinput)
	# if w.data > 0.5:
	# chunks.append(1)
	# else:
	# chunks.append(0)
	# return chunks
	# def input_(input):
	# input = input.split(',')
	# inputs = [int(x.strip()) for x in input]
	# # for i in input:
	# # if i.isalphanumeric():
	# # inputs.append(int(input))
	# # else:
	# # st.write('Invalid Input')
	# return inputs
	# st.title('Chunk tagging')
	# input = st.text_input('Input the pos tags')
	# inputs = input_(input)
	# output = predictsentence(inputs)
	# st.write(output)

	# # import pandas as pd
	# # data = output
	# # df = pd.DataFrame.from_dict(data)
	# # st.dataframe(df)
	# -- coding: utf-8 --
	"""usemodel.ipynb

	Automatically generated by Colaboratory.

	Original file is located at
	https://colab.research.google.com/drive/1c8Qtf9TWr3apElEv2uDgCD_MQwHnmw0B
	"""

	import pickle
	from mygrad import Neuron, Value
	import streamlit as st
	def convertToOneHotEncode(tags):
	tag1 = tags[0]
	tag2 = tags[1]
	vec1 = [0]*5
	vec2 = [0]*4
	vec1[tag1] = 1
	vec2[tag2-1] = 1
	vec1.extend(vec2)



	return vec1
	def loadModel():
	neuron1weightsbias = []
	with open(f'weights.pkl', 'rb') as file:
	neuron1weightsbias = pickle.load(file)
	neuron = Neuron(10)

	neuron.w = [Value(i) for i in neuron1weightsbias[:-1]]
	neuron.b = Value(neuron1weightsbias[-1])
	return neuron

	import json
	def loadjson(filepath):
	data = []
	with open(filepath, 'rb') as file:
	for line in file:
	data.append(json.loads(line))
	return data

	data = loadjson('data/train.jsonl')
	data2 = loadjson('data/test.jsonl')
	X = [element['pos_tags'] for element in data] + [element['pos_tags'] for element in data2]
	Y = [element['chunk_tags'] for element in data] + [element['chunk_tags'] for element in data2]

	n = loadModel()

	def predictsentence(postagsOfSentence):
	if postagsOfSentence:
	postagsOfSentence = [0] + postagsOfSentence
	else:
	return
	xnew = []
	for ix in range(1, len(postagsOfSentence)):
	xnew.append([ postagsOfSentence[ix-1], postagsOfSentence[ix]])
	for ix, pair in enumerate(xnew):
	xnew[ix] = convertToOneHotEncode(pair)
	w = Value(0)
	chunks = []
	for ix2, wordpair in enumerate(xnew):
	xinput = [w] + wordpair
	w = n(xinput)
	if w.data > 0.5:
	chunks.append(1)
	else:
	chunks.append(0)
	return chunks
	def input_(input):
	if not input:
	return
	result = word_tokenize(input)
	word_pos= nltk.pos_tag(result)
	pos = [ i[1] for i in word_pos]
	for i in range(len(pos)):
	if pos[i] =='NN':
	pos[i] = 1
	elif pos[i] =='DT':
	pos[i] = 2
	elif pos[i] =='JJ':
	pos[i] = 3
	else:
	pos[i]= 4
	return pos
	st.title('Chunk tagging')
	input = st.text_input('Input the pos tags')

	import nltk
	nltk.download('punkt')
	nltk.download('averaged_perceptron_tagger')
	from nltk.tokenize import word_tokenize

	inputs = input_(input)
	output = predictsentence(inputs)
	st.write(output)

	# import pandas as pd
	# data = output
	# df = pd.DataFrame.from_dict(data)
	# st.dataframe(df)