Spaces:

madebybread
/

brightly-ai

Paused

App Files Files Community

brightly-ai / category_mapper.py

beweinreich

bug fixes

ee698cf about 1 month ago

raw

history blame

No virus

2.9 kB

	import os
	import csv
	import json
	import psycopg2
	import pandas as pd
	from tqdm import tqdm
	from openai import OpenAI
	from dotenv import load_dotenv
	from psycopg2.extras import DictCursor
	from db.db_utils import get_connection


	load_dotenv()

	api_key = os.getenv("OPENAI_API_KEY")
	client = OpenAI(api_key=api_key)

	db_conn = get_connection()
	db_cursor = db_conn.cursor(cursor_factory=psycopg2.extras.DictCursor)

	# Load your Excel file
	file_path = './dictionary/final_corrected_wweia_food_category_complete - final_corrected_wweia_food_category_complete.csv'
	spreadsheet = pd.read_csv(file_path)

	def find_best_category(food_item, category, dataframe):
	filtered_df = dataframe[dataframe['closest_category'] == category]
	if not filtered_df.empty:
	descriptions = filtered_df['wweia_food_category_description'].tolist()

	prompt = (
	f"Given the food item '{food_item}' and the category '{category}', choose the most appropriate category from the following options:\n{descriptions}\n\n"
	f"You should respond in json format with an object that has the key `guess`, and the value is the categoy."
	)

	completion = client.chat.completions.create(
	messages=[
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": prompt}
	],
	model="gpt-3.5-turbo-1106",
	response_format={"type": "json_object"},
	)
	response = completion.choices[0].message.content
	parsed = parse_response(response)
	return parsed

	# Define the function to parse the GPT response
	def parse_response(response):
	try:
	result = json.loads(response)
	return result['guess']
	except (json.JSONDecodeError, KeyError) as e:
	print(f"Error parsing response: {response} - {e}")
	return None

	# open up the current dictionary csv file
	db_cursor.execute('SELECT * FROM dictionary where wweia_category is null')
	rows = db_cursor.fetchall()

	for row in tqdm(rows, desc="Processing"):
	print()
	fdc_id = row['fdc_id']
	food_item = row['description']
	category = row['food_category']

	if pd.notna(row['wweia_category']) and row['wweia_category'] != "" and row['wweia_category'] != "nan" and row is not None:
	# print(f"Skipping '{food_item}' as it already has a category {row['wweia_category']}")
	continue
	else:
	print(f"Processing '{food_item}'")

	# Find the best category for the food item
	best_category = find_best_category(food_item, category, spreadsheet)
	print(f"Q: '{food_item}'")
	print(f"A: '{best_category}'")

	if best_category:
	db_cursor.execute('UPDATE dictionary SET wweia_category = %s WHERE fdc_id = %s', (best_category, fdc_id))
	db_conn.commit()
	else:
	print(f"Failed to find a category for '{food_item}'")



	db_conn.close()