Spaces:
Paused
Paused
import os | |
import csv | |
import json | |
import pandas as pd | |
from openai import OpenAI | |
from dotenv import load_dotenv | |
load_dotenv() | |
api_key = os.getenv("OPENAI_API_KEY") | |
client = OpenAI(api_key=api_key) | |
input_file_path = f'dictionary/dictionary.csv' | |
df_input = pd.read_csv(input_file_path) | |
input_words = df_input['description'].astype(str).tolist() | |
# take first 10 words for testing | |
food_items = input_words[:1000] | |
# offset the first 1000 words | |
# food_items = input_words[1000:2000] | |
# Define the function to query the GPT API | |
def query_gpt(food_item): | |
prompt = ( | |
f"I'm attempting to pre-seed a database with similar items to known food items.\n\n" | |
f"I'm going to give you a string of text. I need you to find the food item, and come up with 5-10 variations of this food item that would have similar dry matter content.\n\n" | |
f"For example, if I give you: \"lemons, whole, canned, solids and liquids, with salt added\" you should know that the food item is \"lemon\" and you should give me a list of varieties of lemons, like: \"meyer lemons\", \"eureka lemons\", \"lisbon lemons\", etc.\n\n" | |
f"However, if I say \"eggplant\", you should not say \"eggplant dip\", because eggplant dip has a different dry matter content than eggplant.\n\n" | |
f"You should respond in json format with an object with three keys: \"original\", \"food_item\", and \"similar\". The \"original\" key should have the original food item, \"food_item\" should be the isolated food item, and the \"similar\" key should have a list of similar food items.\n\n" | |
f"Your first string is: \"{food_item}\"" | |
) | |
completion = client.chat.completions.create( | |
messages=[ | |
{"role": "system", "content": "You are a helpful assistant."}, | |
{"role": "user", "content": prompt} | |
], | |
model="gpt-3.5-turbo-1106", | |
response_format={"type": "json_object"}, | |
) | |
print("completion") | |
print(completion) | |
return completion.choices[0].message.content | |
# Define the function to parse the GPT response | |
def parse_response(response): | |
try: | |
result = json.loads(response) | |
return result["original"], result["food_item"], result["similar"] | |
except (json.JSONDecodeError, KeyError) as e: | |
print(f"Error parsing response: {response} - {e}") | |
return None, None, None | |
# Open a CSV file to write the results | |
with open('preseed.csv', mode='w', newline='') as file: | |
writer = csv.writer(file) | |
writer.writerow(["original", "food_item", "similar"]) | |
for item in food_items: | |
response = query_gpt(item) | |
original, food_item, similar = parse_response(response) | |
if original and food_item and similar: | |
writer.writerow([original, food_item, similar]) | |
print("Food variations saved to preseed.csv") | |