#!/usr/bin/env python
# -*- coding: utf-8 -*-
import json
import pickle
import os
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from src.tfidf import set_paths, read_data, preprocess, training_utils, tfidf
from src.check import find_closest_match
from src.check import is_str_in
#tfidf = TfidfVectorizer(max_features=300, stop_words="english")
import streamlit as st

st.title("Expense Tagging")
st.subheader("Upload a txt file with each line containing a brand, we'll tell you their categories")

file_name = "src/brands.json"
with open(file_name,'r') as f:
    data = json.load(f)
brands = []
categories = []
for k in data.keys():
    brands.append(data[k]["name"])
    categories.append(data[k]["category"])

#i = input()
#print(len(brands), len(categories))
#bo,ind = is_str_in(i,brands)
#a,b,_,_ = find_closest_match(i,brands)
#print(a,b,brands[b])
#if bo:
#    print(categories[ind])
#print(categories[b])
filename = 'src/Models/final_lr1.sav'
loaded_model = pickle.load(open(filename, 'rb'))

with open('src/Models/tfidf2.pickle','rb') as to_read:
   fitted_tfidf = pickle.load(to_read)

map_dict = {0:"Food and Groceries", 1:"Medical and Healthcare",2:"Education",3:"Lifestyle and Entertainment",4:"Travel & Transportation",5:"Clothing"}

def predict_model(brand):
    bo,ind = is_str_in(brand,brands)
    if bo:
        out = categories[ind]
 
    else:
        w = fitted_tfidf.transform([brand])
                    # print(w)

        pred = loaded_model.predict(w)
        out = map_dict[pred[0]]
    return out
                # print(loaded_model.predict(w))
                #out = categories[out]

import time
# brand = st.text_input("Enter the name of the brand")
#     submit = st.form_submit_button('Submit')
uploaded_file = st.file_uploader("Choose a file")
if uploaded_file is not None:
    uploaded_file = uploaded_file.getvalue().decode('utf-8').splitlines()
        # st.write(uploaded_file)

# print the list
    #print(content_list)

# remove new line characters
    brand_list = [x.strip() for x in uploaded_file]
        #st.write(" ".join(content_list))
    st.subheader("Output File")
    with st.spinner(text="This may take a moment..."):
                time.sleep(2)
                out_list = []
                for brand in brand_list:
                    out_list.append(brand+" -> "+predict_model(brand))
                
                # bo,ind = is_str_in(brand,brands)
                # if bo:
                #     out = categories[ind]
    
                # else:
                #     a,out1,_,_=find_closest_match(brand,brands)
                #     w = fitted_tfidf.transform([brand])
                #         # print(w)

                #     pred = loaded_model.predict(w)
                #     out = map_dict[pred[0]]
                #     out = "Normal String matching:"+str(categories[out1])+"\n"+" Model:"+out
                    # print(loaded_model.predict(w))
                    #out = categories[out]

    out = "\n".join(out_list)
    st.download_button('Download Outputs', out) 
    
#'''

#while True:
    #w =input()
    #if w == 'b':
    #    break
#'''