Spaces:

madebybread
/

brightly-ai

Paused

beweinreich commited on Jun 14

Commit

ee698cf

•

1 Parent(s): 22ad617

bug fixes

Files changed (2) hide show

add_mappings_to_embeddings.py CHANGED Viewed

@@ -1,7 +1,9 @@
 import pickle
 import os
-from similarity_fast import SimilarityFast
 import pandas as pd
 from utils import generate_embedding
 from db.db_utils import get_connection
@@ -25,7 +27,7 @@ def update_data(data, new_data):
 pickle_file_paths = ['./embeddings/fast/sentence-transformers-all-mpnet-base-v2.pkl', './embeddings/slow/sentence-transformers-all-mpnet-base-v2.pkl']
 db_conn = get_connection()
-db_cursor = db_conn.cursor()
 # select all mappings that have not been reviewed
 db_cursor.execute("SELECT input_word, dictionary_word FROM mappings WHERE reviewed = 1")

 import pickle
 import os
+import psycopg2
 import pandas as pd
+from psycopg2.extras import DictCursor
+from similarity_fast import SimilarityFast
 from utils import generate_embedding
 from db.db_utils import get_connection
 pickle_file_paths = ['./embeddings/fast/sentence-transformers-all-mpnet-base-v2.pkl', './embeddings/slow/sentence-transformers-all-mpnet-base-v2.pkl']
 db_conn = get_connection()
+db_cursor = db_conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
 # select all mappings that have not been reviewed
 db_cursor.execute("SELECT input_word, dictionary_word FROM mappings WHERE reviewed = 1")

category_mapper.py CHANGED Viewed

@@ -1,10 +1,12 @@
 import os
 import csv
 import json
 import pandas as pd
 from tqdm import tqdm
 from openai import OpenAI
 from dotenv import load_dotenv
 from db.db_utils import get_connection
@@ -14,7 +16,7 @@ api_key = os.getenv("OPENAI_API_KEY")
 client = OpenAI(api_key=api_key)
 db_conn = get_connection()
-db_cursor = db_conn.cursor()
 # Load your Excel file
 file_path = './dictionary/final_corrected_wweia_food_category_complete - final_corrected_wweia_food_category_complete.csv'
@@ -56,7 +58,7 @@ db_cursor.execute('SELECT * FROM dictionary where wweia_category is null')
 rows = db_cursor.fetchall()
 for row in tqdm(rows, desc="Processing"):
-    # Get the food item and category
     fdc_id = row['fdc_id']
     food_item = row['description']
     category = row['food_category']
@@ -71,7 +73,13 @@ for row in tqdm(rows, desc="Processing"):
     best_category = find_best_category(food_item, category, spreadsheet)
     print(f"Q: '{food_item}'")
     print(f"A: '{best_category}'")
-    print()
     if best_category:
         db_cursor.execute('UPDATE dictionary SET wweia_category = %s WHERE fdc_id = %s', (best_category, fdc_id))

 import os
 import csv
 import json
+import psycopg2
 import pandas as pd
 from tqdm import tqdm
 from openai import OpenAI
 from dotenv import load_dotenv
+from psycopg2.extras import DictCursor
 from db.db_utils import get_connection
 client = OpenAI(api_key=api_key)
 db_conn = get_connection()
+db_cursor = db_conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
 # Load your Excel file
 file_path = './dictionary/final_corrected_wweia_food_category_complete - final_corrected_wweia_food_category_complete.csv'
 rows = db_cursor.fetchall()
 for row in tqdm(rows, desc="Processing"):
+    print()
     fdc_id = row['fdc_id']
     food_item = row['description']
     category = row['food_category']
     best_category = find_best_category(food_item, category, spreadsheet)
     print(f"Q: '{food_item}'")
     print(f"A: '{best_category}'")
     if best_category:
         db_cursor.execute('UPDATE dictionary SET wweia_category = %s WHERE fdc_id = %s', (best_category, fdc_id))
+        db_conn.commit()
+    else:
+        print(f"Failed to find a category for '{food_item}'")
+db_conn.close()