beweinreich commited on
Commit
ee698cf
1 Parent(s): 22ad617
add_mappings_to_embeddings.py CHANGED
@@ -1,7 +1,9 @@
1
  import pickle
2
  import os
3
- from similarity_fast import SimilarityFast
4
  import pandas as pd
 
 
5
  from utils import generate_embedding
6
  from db.db_utils import get_connection
7
 
@@ -25,7 +27,7 @@ def update_data(data, new_data):
25
  pickle_file_paths = ['./embeddings/fast/sentence-transformers-all-mpnet-base-v2.pkl', './embeddings/slow/sentence-transformers-all-mpnet-base-v2.pkl']
26
 
27
  db_conn = get_connection()
28
- db_cursor = db_conn.cursor()
29
 
30
  # select all mappings that have not been reviewed
31
  db_cursor.execute("SELECT input_word, dictionary_word FROM mappings WHERE reviewed = 1")
 
1
  import pickle
2
  import os
3
+ import psycopg2
4
  import pandas as pd
5
+ from psycopg2.extras import DictCursor
6
+ from similarity_fast import SimilarityFast
7
  from utils import generate_embedding
8
  from db.db_utils import get_connection
9
 
 
27
  pickle_file_paths = ['./embeddings/fast/sentence-transformers-all-mpnet-base-v2.pkl', './embeddings/slow/sentence-transformers-all-mpnet-base-v2.pkl']
28
 
29
  db_conn = get_connection()
30
+ db_cursor = db_conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
31
 
32
  # select all mappings that have not been reviewed
33
  db_cursor.execute("SELECT input_word, dictionary_word FROM mappings WHERE reviewed = 1")
category_mapper.py CHANGED
@@ -1,10 +1,12 @@
1
  import os
2
  import csv
3
  import json
 
4
  import pandas as pd
5
  from tqdm import tqdm
6
  from openai import OpenAI
7
  from dotenv import load_dotenv
 
8
  from db.db_utils import get_connection
9
 
10
 
@@ -14,7 +16,7 @@ api_key = os.getenv("OPENAI_API_KEY")
14
  client = OpenAI(api_key=api_key)
15
 
16
  db_conn = get_connection()
17
- db_cursor = db_conn.cursor()
18
 
19
  # Load your Excel file
20
  file_path = './dictionary/final_corrected_wweia_food_category_complete - final_corrected_wweia_food_category_complete.csv'
@@ -56,7 +58,7 @@ db_cursor.execute('SELECT * FROM dictionary where wweia_category is null')
56
  rows = db_cursor.fetchall()
57
 
58
  for row in tqdm(rows, desc="Processing"):
59
- # Get the food item and category
60
  fdc_id = row['fdc_id']
61
  food_item = row['description']
62
  category = row['food_category']
@@ -71,7 +73,13 @@ for row in tqdm(rows, desc="Processing"):
71
  best_category = find_best_category(food_item, category, spreadsheet)
72
  print(f"Q: '{food_item}'")
73
  print(f"A: '{best_category}'")
74
- print()
75
 
76
  if best_category:
77
  db_cursor.execute('UPDATE dictionary SET wweia_category = %s WHERE fdc_id = %s', (best_category, fdc_id))
 
 
 
 
 
 
 
 
1
  import os
2
  import csv
3
  import json
4
+ import psycopg2
5
  import pandas as pd
6
  from tqdm import tqdm
7
  from openai import OpenAI
8
  from dotenv import load_dotenv
9
+ from psycopg2.extras import DictCursor
10
  from db.db_utils import get_connection
11
 
12
 
 
16
  client = OpenAI(api_key=api_key)
17
 
18
  db_conn = get_connection()
19
+ db_cursor = db_conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
20
 
21
  # Load your Excel file
22
  file_path = './dictionary/final_corrected_wweia_food_category_complete - final_corrected_wweia_food_category_complete.csv'
 
58
  rows = db_cursor.fetchall()
59
 
60
  for row in tqdm(rows, desc="Processing"):
61
+ print()
62
  fdc_id = row['fdc_id']
63
  food_item = row['description']
64
  category = row['food_category']
 
73
  best_category = find_best_category(food_item, category, spreadsheet)
74
  print(f"Q: '{food_item}'")
75
  print(f"A: '{best_category}'")
 
76
 
77
  if best_category:
78
  db_cursor.execute('UPDATE dictionary SET wweia_category = %s WHERE fdc_id = %s', (best_category, fdc_id))
79
+ db_conn.commit()
80
+ else:
81
+ print(f"Failed to find a category for '{food_item}'")
82
+
83
+
84
+
85
+ db_conn.close()