brightly-ai / run.py
beweinreich's picture
uncomment inserts
4c8bae0
raw
history blame
No virus
1.41 kB
# run.py
import os
import time
import cProfile
import pstats
import pandas as pd
from dotenv import load_dotenv
from algo import Algo
from db.db_utils import get_connection
from tasks import process_file, file_is_complete
from redis import Redis
from rq import Queue
load_dotenv()
REDIS_URL = os.environ['REDIS_URL']
WORKER_TIMEOUT = 7200 # 2 hours
redis_conn = Redis.from_url(REDIS_URL)
q = Queue('default', connection=redis_conn)
if __name__ == "__main__":
db_conn = get_connection()
db_cursor = db_conn.cursor()
# raw_file_name = 'food-forward-2022-raw-data.csv'
# raw_file_name = 'MFB-2023-raw-data.csv'
# get all files in the raw folder and iterate through them
raw_files = os.listdir('./raw')
# remove test.csv from raw_files
raw_files = [f for f in raw_files if f != 'test.csv']
# for raw_file_name in ['sharing-excess-2020-raw-data.csv', 'sharing-excess-2021-raw-data.csv', 'sharing-excess-2022-raw-data.csv', 'sharing-excess-2023-raw-data.csv']:
# for raw_file_name in ['spoonfuls-2023-Raw-Data.csv']:
for raw_file_name in raw_files:
run_key = raw_file_name.split('.')[0]
csv_complete, _ = file_is_complete(raw_file_name, run_key, db_cursor)
if not csv_complete:
job = q.enqueue(process_file, raw_file_name, job_timeout=WORKER_TIMEOUT)
print(f"Task enqueued with job ID: {job.id}")
db_conn.close()