Spaces:
Sleeping
Sleeping
# # -*- coding: utf-8 -*- | |
# """usemodel.ipynb | |
# Automatically generated by Colaboratory. | |
# Original file is located at | |
# https://colab.research.google.com/drive/1c8Qtf9TWr3apElEv2uDgCD_MQwHnmw0B | |
# """ | |
# import pickle | |
# from mygrad import Neuron, Value | |
# import streamlit as st | |
# def convertToOneHotEncode(tags): | |
# tag1 = tags[0] | |
# tag2 = tags[1] | |
# vec1 = [0]*5 | |
# vec2 = [0]*4 | |
# vec1[tag1] = 1 | |
# vec2[tag2-1] = 1 | |
# vec1.extend(vec2) | |
# return vec1 | |
# def loadModel(): | |
# neuron1weightsbias = [] | |
# with open(f'weights.pkl', 'rb') as file: | |
# neuron1weightsbias = pickle.load(file) | |
# neuron = Neuron(10) | |
# neuron.w = [Value(i) for i in neuron1weightsbias[:-1]] | |
# neuron.b = Value(neuron1weightsbias[-1]) | |
# return neuron | |
# import json | |
# def loadjson(filepath): | |
# data = [] | |
# with open(filepath, 'rb') as file: | |
# for line in file: | |
# data.append(json.loads(line)) | |
# return data | |
# data = loadjson('data/train.jsonl') | |
# data2 = loadjson('data/test.jsonl') | |
# X = [element['pos_tags'] for element in data] + [element['pos_tags'] for element in data2] | |
# Y = [element['chunk_tags'] for element in data] + [element['chunk_tags'] for element in data2] | |
# n = loadModel() | |
# def predictsentence(postagsOfSentence): | |
# if postagsOfSentence: | |
# postagsOfSentence = [0] + postagsOfSentence | |
# else: | |
# return | |
# xnew = [] | |
# for ix in range(1, len(postagsOfSentence)): | |
# xnew.append([ postagsOfSentence[ix-1], postagsOfSentence[ix]]) | |
# for ix, pair in enumerate(xnew): | |
# xnew[ix] = convertToOneHotEncode(pair) | |
# w = Value(0) | |
# chunks = [] | |
# for ix2, wordpair in enumerate(xnew): | |
# xinput = [w] + wordpair | |
# w = n(xinput) | |
# if w.data > 0.5: | |
# chunks.append(1) | |
# else: | |
# chunks.append(0) | |
# return chunks | |
# def input_(input): | |
# input = input.split(',') | |
# inputs = [int(x.strip()) for x in input] | |
# # for i in input: | |
# # if i.isalphanumeric(): | |
# # inputs.append(int(input)) | |
# # else: | |
# # st.write('Invalid Input') | |
# return inputs | |
# st.title('Chunk tagging') | |
# input = st.text_input('Input the pos tags') | |
# inputs = input_(input) | |
# output = predictsentence(inputs) | |
# st.write(output) | |
# # import pandas as pd | |
# # data = output | |
# # df = pd.DataFrame.from_dict(data) | |
# # st.dataframe(df) | |
# -*- coding: utf-8 -*- | |
"""usemodel.ipynb | |
Automatically generated by Colaboratory. | |
Original file is located at | |
https://colab.research.google.com/drive/1c8Qtf9TWr3apElEv2uDgCD_MQwHnmw0B | |
""" | |
import pickle | |
from mygrad import Neuron, Value | |
import streamlit as st | |
def convertToOneHotEncode(tags): | |
tag1 = tags[0] | |
tag2 = tags[1] | |
vec1 = [0]*5 | |
vec2 = [0]*4 | |
vec1[tag1] = 1 | |
vec2[tag2-1] = 1 | |
vec1.extend(vec2) | |
return vec1 | |
def loadModel(): | |
neuron1weightsbias = [] | |
with open(f'weights.pkl', 'rb') as file: | |
neuron1weightsbias = pickle.load(file) | |
neuron = Neuron(10) | |
neuron.w = [Value(i) for i in neuron1weightsbias[:-1]] | |
neuron.b = Value(neuron1weightsbias[-1]) | |
return neuron | |
import json | |
def loadjson(filepath): | |
data = [] | |
with open(filepath, 'rb') as file: | |
for line in file: | |
data.append(json.loads(line)) | |
return data | |
data = loadjson('data/train.jsonl') | |
data2 = loadjson('data/test.jsonl') | |
X = [element['pos_tags'] for element in data] + [element['pos_tags'] for element in data2] | |
Y = [element['chunk_tags'] for element in data] + [element['chunk_tags'] for element in data2] | |
n = loadModel() | |
def predictsentence(postagsOfSentence): | |
if postagsOfSentence: | |
postagsOfSentence = [0] + postagsOfSentence | |
else: | |
return | |
xnew = [] | |
for ix in range(1, len(postagsOfSentence)): | |
xnew.append([ postagsOfSentence[ix-1], postagsOfSentence[ix]]) | |
for ix, pair in enumerate(xnew): | |
xnew[ix] = convertToOneHotEncode(pair) | |
w = Value(0) | |
chunks = [] | |
for ix2, wordpair in enumerate(xnew): | |
xinput = [w] + wordpair | |
w = n(xinput) | |
if w.data > 0.5: | |
chunks.append(1) | |
else: | |
chunks.append(0) | |
return chunks | |
def input_(input): | |
if not input: | |
return | |
result = word_tokenize(input) | |
word_pos= nltk.pos_tag(result) | |
pos = [ i[1] for i in word_pos] | |
for i in range(len(pos)): | |
if pos[i] =='NN': | |
pos[i] = 1 | |
elif pos[i] =='DT': | |
pos[i] = 2 | |
elif pos[i] =='JJ': | |
pos[i] = 3 | |
else: | |
pos[i]= 4 | |
return pos | |
st.title('Chunk tagging') | |
input = st.text_input('Input the pos tags') | |
import nltk | |
nltk.download('punkt') | |
nltk.download('averaged_perceptron_tagger') | |
from nltk.tokenize import word_tokenize | |
inputs = input_(input) | |
output = predictsentence(inputs) | |
st.write(output) | |
# import pandas as pd | |
# data = output | |
# df = pd.DataFrame.from_dict(data) | |
# st.dataframe(df) | |