Spaces:

madebybread
/

brightly-ai

Paused

beweinreich commited on 26 days ago

Commit

a216741

•

1 Parent(s): efa7589

need to figure out how to make this more performant

Files changed (3) hide show

db/db_utils.py CHANGED Viewed

@@ -101,6 +101,23 @@ def get_mapping_from_db(cursor, cleaned_word):
         return dict(zip(columns, row))
     return None
 def get_dictionary_data_from_db(cursor, dictionary_word):
     cursor.execute('SELECT * FROM dictionary WHERE description = %s', (dictionary_word,))
     row = cursor.fetchone()

         return dict(zip(columns, row))
     return None
+def get_batch_mapping_from_db(cursor, cleaned_words):
+    if not cleaned_words:
+        return {}
+    # Create a query with a list of placeholders
+    placeholders = ', '.join(['%s'] * len(cleaned_words))
+    query = f'SELECT * FROM mappings WHERE cleaned_word IN ({placeholders})'
+    cursor.execute(query, tuple(cleaned_words))
+    rows = cursor.fetchall()
+    if rows:
+        columns = [col[0] for col in cursor.description]
+        return {row[columns.index('cleaned_word')]: dict(zip(columns, row)) for row in rows}
+    return {}
 def get_dictionary_data_from_db(cursor, dictionary_word):
     cursor.execute('SELECT * FROM dictionary WHERE description = %s', (dictionary_word,))
     row = cursor.fetchone()

requirements.txt CHANGED Viewed

@@ -7,7 +7,6 @@ openai==1.34.0
 pluralizer==1.2.0
 psutil==6.0.0
 psycopg2-binary==2.9.9
-asyncpg==0.29.0
 python-dotenv==1.0.1
 python-Levenshtein==0.25.1
 requests==2.32.3

 pluralizer==1.2.0
 psutil==6.0.0
 psycopg2-binary==2.9.9
 python-dotenv==1.0.1
 python-Levenshtein==0.25.1
 requests==2.32.3

run.py CHANGED Viewed

@@ -8,15 +8,15 @@ from db.db_utils import get_connection
 if __name__ == "__main__":
     db_conn = get_connection()
     db_cursor = db_conn.cursor()
-    raw_file_name = 'food-forward-2022-raw-data.csv'
-    # raw_file_name = 'MFB-2023-raw-data.csv'
     # chop off the extension for the results run key
     result_file_name = raw_file_name.split('.')[0]
     run_key = f"{result_file_name}-{int(time.time())}"
     # override
-    run_key = 'food-forward-2022-raw-data-1719334900'
     # find the number of rows that were already processed associated with this run key
     db_cursor.execute('SELECT run_row FROM results WHERE run_key = %s ORDER BY run_row DESC', (run_key,))
@@ -36,7 +36,7 @@ if __name__ == "__main__":
         last_row_num = last_row[0] - 1
     print(f"Processing {len(input_data)} rows")
-    print(f"Skipping to {last_row_num} row")
     input_data = input_data[last_row_num:]

 if __name__ == "__main__":
     db_conn = get_connection()
     db_cursor = db_conn.cursor()
+    # raw_file_name = 'food-forward-2022-raw-data.csv'
+    raw_file_name = 'MFB-2023-raw-data.csv'
     # chop off the extension for the results run key
     result_file_name = raw_file_name.split('.')[0]
     run_key = f"{result_file_name}-{int(time.time())}"
     # override
+    # run_key = 'food-forward-2022-raw-data-1719334900'
     # find the number of rows that were already processed associated with this run key
     db_cursor.execute('SELECT run_row FROM results WHERE run_key = %s ORDER BY run_row DESC', (run_key,))
         last_row_num = last_row[0] - 1
     print(f"Processing {len(input_data)} rows")
+    print(f"Starting at row #{last_row_num}")
     input_data = input_data[last_row_num:]