# run.py import os import time import cProfile import pstats import pandas as pd from dotenv import load_dotenv from algo import Algo from db.db_utils import get_connection from tasks import process_file, file_is_complete from redis import Redis from rq import Queue load_dotenv() REDIS_URL = os.environ['REDIS_URL'] WORKER_TIMEOUT = 7200 # 2 hours redis_conn = Redis.from_url(REDIS_URL) q = Queue('default', connection=redis_conn) if __name__ == "__main__": db_conn = get_connection() db_cursor = db_conn.cursor() # raw_file_name = 'food-forward-2022-raw-data.csv' # raw_file_name = 'MFB-2023-raw-data.csv' # get all files in the raw folder and iterate through them raw_files = os.listdir('./raw') # remove test.csv from raw_files raw_files = [f for f in raw_files if f != 'test.csv'] # for raw_file_name in ['sharing-excess-2020-raw-data.csv', 'sharing-excess-2021-raw-data.csv', 'sharing-excess-2022-raw-data.csv', 'sharing-excess-2023-raw-data.csv']: # for raw_file_name in ['spoonfuls-2023-Raw-Data.csv']: for raw_file_name in raw_files: run_key = raw_file_name.split('.')[0] csv_complete, _ = file_is_complete(raw_file_name, run_key, db_cursor) if not csv_complete: job = q.enqueue(process_file, raw_file_name, job_timeout=WORKER_TIMEOUT) print(f"Task enqueued with job ID: {job.id}") db_conn.close()