Spaces:

nirmalya8
/

expense_tagging

Runtime error

App Files Files Community

expense_tagging / app.py

nirmalya8

Updated Readme

d52fe35 over 2 years ago

raw

history blame contribute delete

3.27 kB

	#!/usr/bin/env python
	# -- coding: utf-8 --
	import json
	import pickle
	import os
	import pandas as pd
	import numpy as np
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import classification_report
	from src.tfidf import set_paths, read_data, preprocess, training_utils, tfidf
	from src.check import find_closest_match
	from src.check import is_str_in
	#tfidf = TfidfVectorizer(max_features=300, stop_words="english")
	import streamlit as st

	st.title("Expense Tagging")
	st.subheader("Upload a txt file with each line containing a brand, we'll tell you their categories")

	file_name = "src/brands.json"
	with open(file_name,'r') as f:
	data = json.load(f)
	brands = []
	categories = []
	for k in data.keys():
	brands.append(data[k]["name"])
	categories.append(data[k]["category"])

	#i = input()
	#print(len(brands), len(categories))
	#bo,ind = is_str_in(i,brands)
	#a,b,_,_ = find_closest_match(i,brands)
	#print(a,b,brands[b])
	#if bo:
	# print(categories[ind])
	#print(categories[b])
	filename = 'src/Models/final_lr1.sav'
	loaded_model = pickle.load(open(filename, 'rb'))

	with open('src/Models/tfidf2.pickle','rb') as to_read:
	fitted_tfidf = pickle.load(to_read)

	map_dict = {0:"Food and Groceries", 1:"Medical and Healthcare",2:"Education",3:"Lifestyle and Entertainment",4:"Travel & Transportation",5:"Clothing"}

	def predict_model(brand):
	bo,ind = is_str_in(brand,brands)
	if bo:
	out = categories[ind]

	else:
	w = fitted_tfidf.transform([brand])
	# print(w)

	pred = loaded_model.predict(w)
	out = map_dict[pred[0]]
	return out
	# print(loaded_model.predict(w))
	#out = categories[out]

	import time
	# brand = st.text_input("Enter the name of the brand")
	# submit = st.form_submit_button('Submit')
	uploaded_file = st.file_uploader("Choose a file")
	if uploaded_file is not None:
	uploaded_file = uploaded_file.getvalue().decode('utf-8').splitlines()
	# st.write(uploaded_file)

	# print the list
	#print(content_list)

	# remove new line characters
	brand_list = [x.strip() for x in uploaded_file]
	#st.write(" ".join(content_list))
	st.subheader("Output File")
	with st.spinner(text="This may take a moment..."):
	time.sleep(2)
	out_list = []
	for brand in brand_list:
	out_list.append(brand+" -> "+predict_model(brand))

	# bo,ind = is_str_in(brand,brands)
	# if bo:
	# out = categories[ind]

	# else:
	# a,out1,_,_=find_closest_match(brand,brands)
	# w = fitted_tfidf.transform([brand])
	# # print(w)

	# pred = loaded_model.predict(w)
	# out = map_dict[pred[0]]
	# out = "Normal String matching:"+str(categories[out1])+"\n"+" Model:"+out
	# print(loaded_model.predict(w))
	#out = categories[out]

	out = "\n".join(out_list)
	st.download_button('Download Outputs', out)

	#'''

	#while True:
	#w =input()
	#if w == 'b':
	# break
	#'''