Spaces:

madebybread
/

brightly-ai

Paused

App Files Files Community

brightly-ai / run.py

beweinreich

fixes for wweia categories

a4b0df8 26 days ago

raw

history blame

No virus

1.93 kB

	import time
	import cProfile
	import pstats
	import pandas as pd
	from algo import Algo
	from db.db_utils import get_connection

	if __name__ == "__main__":
	db_conn = get_connection()
	db_cursor = db_conn.cursor()
	# raw_file_name = 'food-forward-2022-raw-data.csv'
	raw_file_name = 'MFB-2023-raw-data.csv'

	# chop off the extension for the results run key
	result_file_name = raw_file_name.split('.')[0]
	run_key = f"{result_file_name}-{int(time.time())}"

	# override
	# run_key = 'food-forward-2022-raw-data-1719334900'

	# find the number of rows that were already processed associated with this run key
	db_cursor.execute('SELECT run_row FROM results WHERE run_key = %s ORDER BY run_row DESC', (run_key,))

	# get the last row that was processed
	last_row = db_cursor.fetchone()

	algo = Algo(db_conn, run_key)
	input_file_path = f'./raw/{raw_file_name}'
	df_input = pd.read_csv(input_file_path)

	input_data = [(desc, i + 2, donor, date, weight) for i, (desc, donor, date, weight) in enumerate(zip(df_input['description'].astype(str).tolist(), df_input['donor'].astype(str).tolist(), df_input['date'].astype(str).tolist(), df_input['weight'].astype(str).tolist()))]

	# run_row is the the last row from the CSV file that was processed, so let's offset from there
	last_row_num = 1
	if last_row:
	last_row_num = last_row[0] - 1

	print(f"Processing {len(input_data)} rows")
	print(f"Starting at row #{last_row_num}")

	input_data = input_data[last_row_num:]

	# limit to 100 rows
	# input_data = input_data[:100]

	# profiler = cProfile.Profile()
	# profiler.enable()

	algo.match_words(input_data)

	# profiler.disable()

	# # Print profiling results
	# stats = pstats.Stats(profiler).sort_stats('cumtime')
	# stats.print_stats(20) # Print top 20 results


	# algo.match_words([['bananas']])

	db_conn.close()