# # -*- coding: utf-8 -*-
# """usemodel.ipynb

# Automatically generated by Colaboratory.

# Original file is located at
#     https://colab.research.google.com/drive/1c8Qtf9TWr3apElEv2uDgCD_MQwHnmw0B
# """

# import pickle
# from mygrad import Neuron, Value
# import streamlit as st 
# def convertToOneHotEncode(tags):
#     tag1 = tags[0]
#     tag2 = tags[1]
#     vec1 = [0]*5
#     vec2 = [0]*4
#     vec1[tag1] = 1
#     vec2[tag2-1] = 1
#     vec1.extend(vec2)


#     return vec1
# def loadModel():
#     neuron1weightsbias = []
#     with open(f'weights.pkl', 'rb') as file:
#         neuron1weightsbias = pickle.load(file)
#     neuron = Neuron(10)

#     neuron.w = [Value(i) for i in neuron1weightsbias[:-1]]
#     neuron.b = Value(neuron1weightsbias[-1])
#     return neuron

# import json
# def loadjson(filepath):
#     data = []
#     with open(filepath, 'rb') as file:
#         for line in file:
#             data.append(json.loads(line))
#     return data

# data = loadjson('data/train.jsonl')
# data2 = loadjson('data/test.jsonl')
# X = [element['pos_tags'] for element in data] + [element['pos_tags'] for element in data2]
# Y = [element['chunk_tags'] for element in data] + [element['chunk_tags'] for element in data2]

# n = loadModel()

# def predictsentence(postagsOfSentence):
#     if postagsOfSentence:
#         postagsOfSentence = [0] + postagsOfSentence
#     else:
#         return
#     xnew = []
#     for ix in range(1, len(postagsOfSentence)):
#         xnew.append([ postagsOfSentence[ix-1], postagsOfSentence[ix]])
#     for ix, pair in enumerate(xnew):
#         xnew[ix] = convertToOneHotEncode(pair)
#     w = Value(0)
#     chunks = []
#     for ix2, wordpair in enumerate(xnew):
#         xinput = [w] + wordpair
#         w = n(xinput)
#         if w.data > 0.5:
#             chunks.append(1)
#         else:
#             chunks.append(0)
#     return chunks
# def input_(input):
#     input = input.split(',')
#     inputs = [int(x.strip()) for x in input]
#     # for i in input:
#     #     if i.isalphanumeric():
#     #         inputs.append(int(input))
#     #     else:
#     #         st.write('Invalid Input')
#     return inputs
# st.title('Chunk tagging')
# input = st.text_input('Input the pos tags')
# inputs = input_(input)
# output = predictsentence(inputs)
# st.write(output)

# # import pandas as pd
# # data = output
# # df = pd.DataFrame.from_dict(data)
# # st.dataframe(df)
# -*- coding: utf-8 -*-
"""usemodel.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1c8Qtf9TWr3apElEv2uDgCD_MQwHnmw0B
"""

import pickle
from mygrad import Neuron, Value
import streamlit as st 
def convertToOneHotEncode(tags):
    tag1 = tags[0]
    tag2 = tags[1]
    vec1 = [0]*5
    vec2 = [0]*4
    vec1[tag1] = 1
    vec2[tag2-1] = 1
    vec1.extend(vec2)


    return vec1
def loadModel():
    neuron1weightsbias = []
    with open(f'weights.pkl', 'rb') as file:
        neuron1weightsbias = pickle.load(file)
    neuron = Neuron(10)

    neuron.w = [Value(i) for i in neuron1weightsbias[:-1]]
    neuron.b = Value(neuron1weightsbias[-1])
    return neuron

import json
def loadjson(filepath):
    data = []
    with open(filepath, 'rb') as file:
        for line in file:
            data.append(json.loads(line))
    return data

data = loadjson('data/train.jsonl')
data2 = loadjson('data/test.jsonl')
X = [element['pos_tags'] for element in data] + [element['pos_tags'] for element in data2]
Y = [element['chunk_tags'] for element in data] + [element['chunk_tags'] for element in data2]

n = loadModel()

def predictsentence(postagsOfSentence):
    if postagsOfSentence:
        postagsOfSentence = [0] + postagsOfSentence
    else:
        return
    xnew = []
    for ix in range(1, len(postagsOfSentence)):
        xnew.append([ postagsOfSentence[ix-1], postagsOfSentence[ix]])
    for ix, pair in enumerate(xnew):
        xnew[ix] = convertToOneHotEncode(pair)
    w = Value(0)
    chunks = []
    for ix2, wordpair in enumerate(xnew):
        xinput = [w] + wordpair
        w = n(xinput)
        if w.data > 0.5:
            chunks.append(1)
        else:
            chunks.append(0)
    return chunks
def input_(input):
    if not input:
        return
    result = word_tokenize(input)
    word_pos=  nltk.pos_tag(result)
    pos = [ i[1] for i in word_pos]
    for i in range(len(pos)):
        if pos[i] =='NN':
            pos[i] = 1
        elif pos[i] =='DT':
            pos[i] = 2
        elif pos[i] =='JJ':
            pos[i] = 3
        else:
            pos[i]= 4
    return pos
st.title('Chunk tagging')
input = st.text_input('Input the sentence')

import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
from nltk.tokenize import word_tokenize

inputs = input_(input)
output = predictsentence(inputs)
st.write(output)

# import pandas as pd
# data = output
# df = pd.DataFrame.from_dict(data)
# st.dataframe(df)