import os import csv import json import pandas as pd from openai import OpenAI from dotenv import load_dotenv load_dotenv() api_key = os.getenv("OPENAI_API_KEY") client = OpenAI(api_key=api_key) input_file_path = f'dictionary/dictionary.csv' df_input = pd.read_csv(input_file_path) input_words = df_input['description'].astype(str).tolist() # take first 10 words for testing food_items = input_words[:1000] # offset the first 1000 words # food_items = input_words[1000:2000] # Define the function to query the GPT API def query_gpt(food_item): prompt = ( f"I'm attempting to pre-seed a database with similar items to known food items.\n\n" f"I'm going to give you a string of text. I need you to find the food item, and come up with 5-10 variations of this food item that would have similar dry matter content.\n\n" f"For example, if I give you: \"lemons, whole, canned, solids and liquids, with salt added\" you should know that the food item is \"lemon\" and you should give me a list of varieties of lemons, like: \"meyer lemons\", \"eureka lemons\", \"lisbon lemons\", etc.\n\n" f"However, if I say \"eggplant\", you should not say \"eggplant dip\", because eggplant dip has a different dry matter content than eggplant.\n\n" f"You should respond in json format with an object with three keys: \"original\", \"food_item\", and \"similar\". The \"original\" key should have the original food item, \"food_item\" should be the isolated food item, and the \"similar\" key should have a list of similar food items.\n\n" f"Your first string is: \"{food_item}\"" ) completion = client.chat.completions.create( messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt} ], model="gpt-3.5-turbo-1106", response_format={"type": "json_object"}, ) print("completion") print(completion) return completion.choices[0].message.content # Define the function to parse the GPT response def parse_response(response): try: result = json.loads(response) return result["original"], result["food_item"], result["similar"] except (json.JSONDecodeError, KeyError) as e: print(f"Error parsing response: {response} - {e}") return None, None, None # Open a CSV file to write the results with open('preseed.csv', mode='w', newline='') as file: writer = csv.writer(file) writer.writerow(["original", "food_item", "similar"]) for item in food_items: response = query_gpt(item) original, food_item, similar = parse_response(response) if original and food_item and similar: writer.writerow([original, food_item, similar]) print("Food variations saved to preseed.csv")