Spaces:

madebybread
/

brightly-ai

Paused

App Files Files Community

brightly-ai / old_experiments /preseed.py

beweinreich

ported everything into dictionary postgres

22ad617 about 1 month ago

raw

history blame

No virus

2.83 kB

	import os
	import csv
	import json
	import pandas as pd
	from openai import OpenAI
	from dotenv import load_dotenv


	load_dotenv()

	api_key = os.getenv("OPENAI_API_KEY")
	client = OpenAI(api_key=api_key)

	input_file_path = f'dictionary/dictionary.csv'
	df_input = pd.read_csv(input_file_path)
	input_words = df_input['description'].astype(str).tolist()

	# take first 10 words for testing
	food_items = input_words[:1000]

	# offset the first 1000 words
	# food_items = input_words[1000:2000]

	# Define the function to query the GPT API
	def query_gpt(food_item):
	prompt = (
	f"I'm attempting to pre-seed a database with similar items to known food items.\n\n"
	f"I'm going to give you a string of text. I need you to find the food item, and come up with 5-10 variations of this food item that would have similar dry matter content.\n\n"
	f"For example, if I give you: \"lemons, whole, canned, solids and liquids, with salt added\" you should know that the food item is \"lemon\" and you should give me a list of varieties of lemons, like: \"meyer lemons\", \"eureka lemons\", \"lisbon lemons\", etc.\n\n"
	f"However, if I say \"eggplant\", you should not say \"eggplant dip\", because eggplant dip has a different dry matter content than eggplant.\n\n"
	f"You should respond in json format with an object with three keys: \"original\", \"food_item\", and \"similar\". The \"original\" key should have the original food item, \"food_item\" should be the isolated food item, and the \"similar\" key should have a list of similar food items.\n\n"
	f"Your first string is: \"{food_item}\""
	)


	completion = client.chat.completions.create(
	messages=[
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": prompt}
	],
	model="gpt-3.5-turbo-1106",
	response_format={"type": "json_object"},
	)
	print("completion")
	print(completion)
	return completion.choices[0].message.content

	# Define the function to parse the GPT response
	def parse_response(response):
	try:
	result = json.loads(response)
	return result["original"], result["food_item"], result["similar"]
	except (json.JSONDecodeError, KeyError) as e:
	print(f"Error parsing response: {response} - {e}")
	return None, None, None

	# Open a CSV file to write the results
	with open('preseed.csv', mode='w', newline='') as file:
	writer = csv.writer(file)
	writer.writerow(["original", "food_item", "similar"])

	for item in food_items:
	response = query_gpt(item)
	original, food_item, similar = parse_response(response)

	if original and food_item and similar:
	writer.writerow([original, food_item, similar])

	print("Food variations saved to preseed.csv")