beweinreich's picture
ported everything into dictionary postgres
22ad617
raw
history blame
No virus
2.83 kB
import os
import csv
import json
import pandas as pd
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)
input_file_path = f'dictionary/dictionary.csv'
df_input = pd.read_csv(input_file_path)
input_words = df_input['description'].astype(str).tolist()
# take first 10 words for testing
food_items = input_words[:1000]
# offset the first 1000 words
# food_items = input_words[1000:2000]
# Define the function to query the GPT API
def query_gpt(food_item):
prompt = (
f"I'm attempting to pre-seed a database with similar items to known food items.\n\n"
f"I'm going to give you a string of text. I need you to find the food item, and come up with 5-10 variations of this food item that would have similar dry matter content.\n\n"
f"For example, if I give you: \"lemons, whole, canned, solids and liquids, with salt added\" you should know that the food item is \"lemon\" and you should give me a list of varieties of lemons, like: \"meyer lemons\", \"eureka lemons\", \"lisbon lemons\", etc.\n\n"
f"However, if I say \"eggplant\", you should not say \"eggplant dip\", because eggplant dip has a different dry matter content than eggplant.\n\n"
f"You should respond in json format with an object with three keys: \"original\", \"food_item\", and \"similar\". The \"original\" key should have the original food item, \"food_item\" should be the isolated food item, and the \"similar\" key should have a list of similar food items.\n\n"
f"Your first string is: \"{food_item}\""
)
completion = client.chat.completions.create(
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
],
model="gpt-3.5-turbo-1106",
response_format={"type": "json_object"},
)
print("completion")
print(completion)
return completion.choices[0].message.content
# Define the function to parse the GPT response
def parse_response(response):
try:
result = json.loads(response)
return result["original"], result["food_item"], result["similar"]
except (json.JSONDecodeError, KeyError) as e:
print(f"Error parsing response: {response} - {e}")
return None, None, None
# Open a CSV file to write the results
with open('preseed.csv', mode='w', newline='') as file:
writer = csv.writer(file)
writer.writerow(["original", "food_item", "similar"])
for item in food_items:
response = query_gpt(item)
original, food_item, similar = parse_response(response)
if original and food_item and similar:
writer.writerow([original, food_item, similar])
print("Food variations saved to preseed.csv")