Spaces:
Paused
Paused
Commit
•
ee698cf
1
Parent(s):
22ad617
bug fixes
Browse files- add_mappings_to_embeddings.py +4 -2
- category_mapper.py +11 -3
add_mappings_to_embeddings.py
CHANGED
@@ -1,7 +1,9 @@
|
|
1 |
import pickle
|
2 |
import os
|
3 |
-
|
4 |
import pandas as pd
|
|
|
|
|
5 |
from utils import generate_embedding
|
6 |
from db.db_utils import get_connection
|
7 |
|
@@ -25,7 +27,7 @@ def update_data(data, new_data):
|
|
25 |
pickle_file_paths = ['./embeddings/fast/sentence-transformers-all-mpnet-base-v2.pkl', './embeddings/slow/sentence-transformers-all-mpnet-base-v2.pkl']
|
26 |
|
27 |
db_conn = get_connection()
|
28 |
-
db_cursor = db_conn.cursor()
|
29 |
|
30 |
# select all mappings that have not been reviewed
|
31 |
db_cursor.execute("SELECT input_word, dictionary_word FROM mappings WHERE reviewed = 1")
|
|
|
1 |
import pickle
|
2 |
import os
|
3 |
+
import psycopg2
|
4 |
import pandas as pd
|
5 |
+
from psycopg2.extras import DictCursor
|
6 |
+
from similarity_fast import SimilarityFast
|
7 |
from utils import generate_embedding
|
8 |
from db.db_utils import get_connection
|
9 |
|
|
|
27 |
pickle_file_paths = ['./embeddings/fast/sentence-transformers-all-mpnet-base-v2.pkl', './embeddings/slow/sentence-transformers-all-mpnet-base-v2.pkl']
|
28 |
|
29 |
db_conn = get_connection()
|
30 |
+
db_cursor = db_conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
|
31 |
|
32 |
# select all mappings that have not been reviewed
|
33 |
db_cursor.execute("SELECT input_word, dictionary_word FROM mappings WHERE reviewed = 1")
|
category_mapper.py
CHANGED
@@ -1,10 +1,12 @@
|
|
1 |
import os
|
2 |
import csv
|
3 |
import json
|
|
|
4 |
import pandas as pd
|
5 |
from tqdm import tqdm
|
6 |
from openai import OpenAI
|
7 |
from dotenv import load_dotenv
|
|
|
8 |
from db.db_utils import get_connection
|
9 |
|
10 |
|
@@ -14,7 +16,7 @@ api_key = os.getenv("OPENAI_API_KEY")
|
|
14 |
client = OpenAI(api_key=api_key)
|
15 |
|
16 |
db_conn = get_connection()
|
17 |
-
db_cursor = db_conn.cursor()
|
18 |
|
19 |
# Load your Excel file
|
20 |
file_path = './dictionary/final_corrected_wweia_food_category_complete - final_corrected_wweia_food_category_complete.csv'
|
@@ -56,7 +58,7 @@ db_cursor.execute('SELECT * FROM dictionary where wweia_category is null')
|
|
56 |
rows = db_cursor.fetchall()
|
57 |
|
58 |
for row in tqdm(rows, desc="Processing"):
|
59 |
-
|
60 |
fdc_id = row['fdc_id']
|
61 |
food_item = row['description']
|
62 |
category = row['food_category']
|
@@ -71,7 +73,13 @@ for row in tqdm(rows, desc="Processing"):
|
|
71 |
best_category = find_best_category(food_item, category, spreadsheet)
|
72 |
print(f"Q: '{food_item}'")
|
73 |
print(f"A: '{best_category}'")
|
74 |
-
print()
|
75 |
|
76 |
if best_category:
|
77 |
db_cursor.execute('UPDATE dictionary SET wweia_category = %s WHERE fdc_id = %s', (best_category, fdc_id))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
import csv
|
3 |
import json
|
4 |
+
import psycopg2
|
5 |
import pandas as pd
|
6 |
from tqdm import tqdm
|
7 |
from openai import OpenAI
|
8 |
from dotenv import load_dotenv
|
9 |
+
from psycopg2.extras import DictCursor
|
10 |
from db.db_utils import get_connection
|
11 |
|
12 |
|
|
|
16 |
client = OpenAI(api_key=api_key)
|
17 |
|
18 |
db_conn = get_connection()
|
19 |
+
db_cursor = db_conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
|
20 |
|
21 |
# Load your Excel file
|
22 |
file_path = './dictionary/final_corrected_wweia_food_category_complete - final_corrected_wweia_food_category_complete.csv'
|
|
|
58 |
rows = db_cursor.fetchall()
|
59 |
|
60 |
for row in tqdm(rows, desc="Processing"):
|
61 |
+
print()
|
62 |
fdc_id = row['fdc_id']
|
63 |
food_item = row['description']
|
64 |
category = row['food_category']
|
|
|
73 |
best_category = find_best_category(food_item, category, spreadsheet)
|
74 |
print(f"Q: '{food_item}'")
|
75 |
print(f"A: '{best_category}'")
|
|
|
76 |
|
77 |
if best_category:
|
78 |
db_cursor.execute('UPDATE dictionary SET wweia_category = %s WHERE fdc_id = %s', (best_category, fdc_id))
|
79 |
+
db_conn.commit()
|
80 |
+
else:
|
81 |
+
print(f"Failed to find a category for '{food_item}'")
|
82 |
+
|
83 |
+
|
84 |
+
|
85 |
+
db_conn.close()
|