Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python | |
| # -*- coding: utf-8 -*- | |
| import json | |
| import pickle | |
| import os | |
| import pandas as pd | |
| import numpy as np | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import classification_report | |
| from src.tfidf import set_paths, read_data, preprocess, training_utils, tfidf | |
| from src.check import find_closest_match | |
| from src.check import is_str_in | |
| #tfidf = TfidfVectorizer(max_features=300, stop_words="english") | |
| import streamlit as st | |
| st.title("Expense Tagging") | |
| st.subheader("Upload a txt file with each line containing a brand, we'll tell you their categories") | |
| file_name = "src/brands.json" | |
| with open(file_name,'r') as f: | |
| data = json.load(f) | |
| brands = [] | |
| categories = [] | |
| for k in data.keys(): | |
| brands.append(data[k]["name"]) | |
| categories.append(data[k]["category"]) | |
| #i = input() | |
| #print(len(brands), len(categories)) | |
| #bo,ind = is_str_in(i,brands) | |
| #a,b,_,_ = find_closest_match(i,brands) | |
| #print(a,b,brands[b]) | |
| #if bo: | |
| # print(categories[ind]) | |
| #print(categories[b]) | |
| filename = 'src/Models/final_lr1.sav' | |
| loaded_model = pickle.load(open(filename, 'rb')) | |
| with open('src/Models/tfidf2.pickle','rb') as to_read: | |
| fitted_tfidf = pickle.load(to_read) | |
| map_dict = {0:"Food and Groceries", 1:"Medical and Healthcare",2:"Education",3:"Lifestyle and Entertainment",4:"Travel & Transportation",5:"Clothing"} | |
| def predict_model(brand): | |
| bo,ind = is_str_in(brand,brands) | |
| if bo: | |
| out = categories[ind] | |
| else: | |
| w = fitted_tfidf.transform([brand]) | |
| # print(w) | |
| pred = loaded_model.predict(w) | |
| out = map_dict[pred[0]] | |
| return out | |
| # print(loaded_model.predict(w)) | |
| #out = categories[out] | |
| import time | |
| # brand = st.text_input("Enter the name of the brand") | |
| # submit = st.form_submit_button('Submit') | |
| uploaded_file = st.file_uploader("Choose a file") | |
| if uploaded_file is not None: | |
| uploaded_file = uploaded_file.getvalue().decode('utf-8').splitlines() | |
| # st.write(uploaded_file) | |
| # print the list | |
| #print(content_list) | |
| # remove new line characters | |
| brand_list = [x.strip() for x in uploaded_file] | |
| #st.write(" ".join(content_list)) | |
| st.subheader("Output File") | |
| with st.spinner(text="This may take a moment..."): | |
| time.sleep(2) | |
| out_list = [] | |
| for brand in brand_list: | |
| out_list.append(brand+" -> "+predict_model(brand)) | |
| # bo,ind = is_str_in(brand,brands) | |
| # if bo: | |
| # out = categories[ind] | |
| # else: | |
| # a,out1,_,_=find_closest_match(brand,brands) | |
| # w = fitted_tfidf.transform([brand]) | |
| # # print(w) | |
| # pred = loaded_model.predict(w) | |
| # out = map_dict[pred[0]] | |
| # out = "Normal String matching:"+str(categories[out1])+"\n"+" Model:"+out | |
| # print(loaded_model.predict(w)) | |
| #out = categories[out] | |
| out = "\n".join(out_list) | |
| st.download_button('Download Outputs', out) | |
| #''' | |
| #while True: | |
| #w =input() | |
| #if w == 'b': | |
| # break | |
| #''' |