File size: 2,832 Bytes
9189e38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import os
import csv
import json
import pandas as pd
from openai import OpenAI
from dotenv import load_dotenv


load_dotenv()

api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)

input_file_path = f'dictionary/dictionary.csv'
df_input = pd.read_csv(input_file_path)
input_words = df_input['description'].astype(str).tolist()

# take first 10 words for testing
food_items = input_words[:1000]

# offset the first 1000 words
# food_items = input_words[1000:2000]

# Define the function to query the GPT API
def query_gpt(food_item):
    prompt = (
        f"I'm attempting to pre-seed a database with similar items to known food items.\n\n"
        f"I'm going to give you a string of text. I need you to find the food item, and come up with 5-10 variations of this food item that would have similar dry matter content.\n\n"
        f"For example, if I give you: \"lemons, whole, canned, solids and liquids, with salt added\" you should know that the food item is \"lemon\" and you should give me a list of varieties of lemons, like: \"meyer lemons\", \"eureka lemons\", \"lisbon lemons\", etc.\n\n"
        f"However, if I say \"eggplant\", you should not say \"eggplant dip\", because eggplant dip has a different dry matter content than eggplant.\n\n"
        f"You should respond in json format with an object with three keys: \"original\", \"food_item\", and \"similar\". The \"original\" key should have the original food item, \"food_item\" should be the isolated food item, and the \"similar\" key should have a list of similar food items.\n\n"
        f"Your first string is: \"{food_item}\""
    )


    completion = client.chat.completions.create(
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        model="gpt-3.5-turbo-1106",
        response_format={"type": "json_object"},
    )
    print("completion")
    print(completion)
    return completion.choices[0].message.content

# Define the function to parse the GPT response
def parse_response(response):
    try:
        result = json.loads(response)
        return result["original"], result["food_item"], result["similar"]
    except (json.JSONDecodeError, KeyError) as e:
        print(f"Error parsing response: {response} - {e}")
        return None, None, None

# Open a CSV file to write the results
with open('preseed.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["original", "food_item", "similar"])

    for item in food_items:
        response = query_gpt(item)
        original, food_item, similar = parse_response(response)
        
        if original and food_item and similar:
            writer.writerow([original, food_item, similar])

print("Food variations saved to preseed.csv")