import os import csv import json import time import heapq import pandas as pd from openai import OpenAI from dotenv import load_dotenv from Levenshtein import distance from tqdm import tqdm from db.db_utils import get_connection from ask_gpt import query_gpt from similarity_fast import SimilarityFast # This iterates over every mapping and flags a row if it doesn't think the mapping is correct load_dotenv() api_key = os.getenv("OPENAI_API_KEY") client = OpenAI(api_key=api_key) def query_gpt(food_item, dictionary_word, similar_words_list): line_separated_words = '\n'.join(similar_words_list) prompt = ( f"""I have a particular item and I need to know if it's correctly associated with another item. It's ideally the same word, or semantically similar. I will also provide a list of other similar words that you could be a better fit. You should respond in JSON format with an object that has the key `guess`, and the value is the most similar food item. This is important: only return the mapped dictionary word, or a word from the list of similar words. The item is: "{food_item}" It has been mapped to: "{dictionary_word}" Here are some similar words: {line_separated_words} """) completion = client.chat.completions.create( messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt} ], model="gpt-3.5-turbo-1106", response_format={"type": "json_object"}, ) response = completion.choices[0].message.content parsed = parse_response(response) print(f"Food Item: '{food_item}'") print(f"Dictionary Word: '{dictionary_word}'") print(f"GPT Word: {parsed}") return parsed # Define the function to parse the GPT response def parse_response(response): try: result = json.loads(response) return result['guess'] except (json.JSONDecodeError, KeyError) as e: print(f"Error parsing response: {response} - {e}") return None db_conn = get_connection() db_cursor = db_conn.cursor() # Load the dictionary db_cursor.execute("SELECT description FROM dictionary") dictionary = db_cursor.fetchall() dictionary = [item[0] for item in dictionary] db_cursor.execute("SELECT input_word, dictionary_word FROM mappings where reviewed = false") results = db_cursor.fetchall() similarity_fast = SimilarityFast(db_cursor) for row in results: input_word = row[0] dictionary_word = row[1] print("") print("=====================================") print(f"Checking: '{input_word}'") print(f" -> Dictionary: '{dictionary_word}'") mapping = similarity_fast.find_most_similar_word(input_word) if mapping['dictionary_word'] != dictionary_word: # temp stopgap continue print(f"Updating: '{input_word}' to '{mapping['dictionary_word']}'") confirm = input(f"Press 'y' to confirm. Any other key to skip") if confirm.lower() == 'y': db_cursor.execute("UPDATE mappings SET dictionary_word = %s, reviewed = true WHERE input_word = %s", (mapping['dictionary_word'], input_word,)) db_conn.commit() else: db_cursor.execute("UPDATE mappings SET reviewed = true WHERE input_word = %s", (input_word,)) db_conn.commit() else: similar_words_list = mapping['similar_words'].split('|') similar_words_list.append('Non-Food Item') similar_words_list.append('Mixed Food Items') response = query_gpt(input_word, dictionary_word, similar_words_list) if response: if response in dictionary: if response == dictionary_word: print(" -> Correct") db_cursor.execute("UPDATE mappings SET reviewed = true WHERE input_word = %s", (input_word,)) db_conn.commit() elif response != dictionary_word: # temp stopgap continue print(f"Updating: '{input_word}' to '{response}'") confirm = input("Press 'y' to confirm, 'i' to ignore, 'd' to delete, 'm' for mixture, any other key to skip: ") if confirm.lower() == 'y': if response == 'Non-Food Item': sql = "UPDATE mappings SET dictionary_word = %s, is_food = FALSE, reviewed = true WHERE input_word = %s" else: sql = "UPDATE mappings SET dictionary_word = %s, reviewed = true, is_food = true WHERE input_word = %s" print(f" - Updating mapping with {response}") db_cursor.execute(sql, (response, input_word)) db_conn.commit() elif confirm.lower() == 'i': print(f" - Ignoring mapping") sql = "UPDATE mappings SET ignore = true, reviewed = true WHERE input_word = %s" db_cursor.execute(sql, (input_word,)) db_conn.commit() elif confirm.lower() == 'd': print(f" - Deleting mapping") sql = "DELETE FROM mappings WHERE input_word = %s" db_cursor.execute(sql, (input_word,)) db_conn.commit() elif confirm.lower() == 'm': print(f" - Mixed food items") sql = "UPDATE mappings SET reviewed = true, dictionary_word = 'Mixed Food Items', is_food = true WHERE input_word = %s" db_cursor.execute(sql, (input_word,)) db_conn.commit() else: db_cursor.execute("UPDATE mappings SET reviewed = true WHERE input_word = %s", (input_word,)) db_conn.commit() db_conn.close()