import os import csv import json import psycopg2 import pandas as pd from tqdm import tqdm from openai import OpenAI from dotenv import load_dotenv from psycopg2.extras import DictCursor from db.db_utils import get_connection load_dotenv() api_key = os.getenv("OPENAI_API_KEY") client = OpenAI(api_key=api_key) db_conn = get_connection() db_cursor = db_conn.cursor(cursor_factory=psycopg2.extras.DictCursor) # Load your Excel file file_path = './dictionary/final_corrected_wweia_food_category_complete - final_corrected_wweia_food_category_complete.csv' spreadsheet = pd.read_csv(file_path) def find_best_category(food_item, category, dataframe): filtered_df = dataframe[dataframe['closest_category'] == category] if not filtered_df.empty: descriptions = filtered_df['wweia_food_category_description'].tolist() prompt = ( f"Given the food item '{food_item}' and the category '{category}', choose the most appropriate category from the following options:\n{descriptions}\n\n" f"You should respond in json format with an object that has the key `guess`, and the value is the categoy." ) completion = client.chat.completions.create( messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt} ], model="gpt-3.5-turbo-1106", response_format={"type": "json_object"}, ) response = completion.choices[0].message.content parsed = parse_response(response) return parsed # Define the function to parse the GPT response def parse_response(response): try: result = json.loads(response) return result['guess'] except (json.JSONDecodeError, KeyError) as e: print(f"Error parsing response: {response} - {e}") return None # open up the current dictionary csv file db_cursor.execute('SELECT * FROM dictionary where wweia_category is null') rows = db_cursor.fetchall() for row in tqdm(rows, desc="Processing"): print() fdc_id = row['fdc_id'] food_item = row['description'] category = row['food_category'] if pd.notna(row['wweia_category']) and row['wweia_category'] != "" and row['wweia_category'] != "nan" and row is not None: # print(f"Skipping '{food_item}' as it already has a category {row['wweia_category']}") continue else: print(f"Processing '{food_item}'") # Find the best category for the food item best_category = find_best_category(food_item, category, spreadsheet) print(f"Q: '{food_item}'") print(f"A: '{best_category}'") if best_category: db_cursor.execute('UPDATE dictionary SET wweia_category = %s WHERE fdc_id = %s', (best_category, fdc_id)) db_conn.commit() else: print(f"Failed to find a category for '{food_item}'") db_conn.close()