Spaces:
Paused
Paused
import os | |
import csv | |
import json | |
import time | |
import heapq | |
import pandas as pd | |
from openai import OpenAI | |
from dotenv import load_dotenv | |
from Levenshtein import distance | |
from tqdm import tqdm | |
from db.db_utils import get_connection | |
from ask_gpt import query_gpt | |
from similarity_fast import SimilarityFast | |
# This iterates over every mapping and flags a row if it doesn't think the mapping is correct | |
load_dotenv() | |
api_key = os.getenv("OPENAI_API_KEY") | |
client = OpenAI(api_key=api_key) | |
def query_gpt(food_item, dictionary_word, similar_words_list): | |
line_separated_words = '\n'.join(similar_words_list) | |
prompt = ( | |
f"""I have a particular item and I need to know if it's correctly associated with another item. It's ideally the same word, or semantically similar. | |
I will also provide a list of other similar words that you could be a better fit. | |
You should respond in JSON format with an object that has the key `guess`, and the value is the most similar food item. | |
This is important: only return the mapped dictionary word, or a word from the list of similar words. | |
The item is: "{food_item}" | |
It has been mapped to: "{dictionary_word}" | |
Here are some similar words: {line_separated_words} | |
""") | |
completion = client.chat.completions.create( | |
messages=[ | |
{"role": "system", "content": "You are a helpful assistant."}, | |
{"role": "user", "content": prompt} | |
], | |
model="gpt-3.5-turbo-1106", | |
response_format={"type": "json_object"}, | |
) | |
response = completion.choices[0].message.content | |
parsed = parse_response(response) | |
print(f"Food Item: '{food_item}'") | |
print(f"Dictionary Word: '{dictionary_word}'") | |
print(f"GPT Word: {parsed}") | |
return parsed | |
# Define the function to parse the GPT response | |
def parse_response(response): | |
try: | |
result = json.loads(response) | |
return result['guess'] | |
except (json.JSONDecodeError, KeyError) as e: | |
print(f"Error parsing response: {response} - {e}") | |
return None | |
db_conn = get_connection() | |
db_cursor = db_conn.cursor() | |
# Load the dictionary | |
db_cursor.execute("SELECT description FROM dictionary") | |
dictionary = db_cursor.fetchall() | |
dictionary = [item[0] for item in dictionary] | |
db_cursor.execute("SELECT input_word, dictionary_word FROM mappings where reviewed = false") | |
results = db_cursor.fetchall() | |
similarity_fast = SimilarityFast(db_cursor) | |
for row in results: | |
input_word = row[0] | |
dictionary_word = row[1] | |
print("") | |
print("=====================================") | |
print(f"Checking: '{input_word}'") | |
print(f" -> Dictionary: '{dictionary_word}'") | |
mapping = similarity_fast.find_most_similar_word(input_word) | |
if mapping['dictionary_word'] != dictionary_word: | |
# temp stopgap | |
continue | |
print(f"Updating: '{input_word}' to '{mapping['dictionary_word']}'") | |
confirm = input(f"Press 'y' to confirm. Any other key to skip") | |
if confirm.lower() == 'y': | |
db_cursor.execute("UPDATE mappings SET dictionary_word = %s, reviewed = true WHERE input_word = %s", (mapping['dictionary_word'], input_word,)) | |
db_conn.commit() | |
else: | |
db_cursor.execute("UPDATE mappings SET reviewed = true WHERE input_word = %s", (input_word,)) | |
db_conn.commit() | |
else: | |
similar_words_list = mapping['similar_words'].split('|') | |
similar_words_list.append('Non-Food Item') | |
similar_words_list.append('Mixed Food Items') | |
response = query_gpt(input_word, dictionary_word, similar_words_list) | |
if response: | |
if response in dictionary: | |
if response == dictionary_word: | |
print(" -> Correct") | |
db_cursor.execute("UPDATE mappings SET reviewed = true WHERE input_word = %s", (input_word,)) | |
db_conn.commit() | |
elif response != dictionary_word: | |
# temp stopgap | |
continue | |
print(f"Updating: '{input_word}' to '{response}'") | |
confirm = input("Press 'y' to confirm, 'i' to ignore, 'd' to delete, 'm' for mixture, any other key to skip: ") | |
if confirm.lower() == 'y': | |
if response == 'Non-Food Item': | |
sql = "UPDATE mappings SET dictionary_word = %s, is_food = FALSE, reviewed = true WHERE input_word = %s" | |
else: | |
sql = "UPDATE mappings SET dictionary_word = %s, reviewed = true, is_food = true WHERE input_word = %s" | |
print(f" - Updating mapping with {response}") | |
db_cursor.execute(sql, (response, input_word)) | |
db_conn.commit() | |
elif confirm.lower() == 'i': | |
print(f" - Ignoring mapping") | |
sql = "UPDATE mappings SET ignore = true, reviewed = true WHERE input_word = %s" | |
db_cursor.execute(sql, (input_word,)) | |
db_conn.commit() | |
elif confirm.lower() == 'd': | |
print(f" - Deleting mapping") | |
sql = "DELETE FROM mappings WHERE input_word = %s" | |
db_cursor.execute(sql, (input_word,)) | |
db_conn.commit() | |
elif confirm.lower() == 'm': | |
print(f" - Mixed food items") | |
sql = "UPDATE mappings SET reviewed = true, dictionary_word = 'Mixed Food Items', is_food = true WHERE input_word = %s" | |
db_cursor.execute(sql, (input_word,)) | |
db_conn.commit() | |
else: | |
db_cursor.execute("UPDATE mappings SET reviewed = true WHERE input_word = %s", (input_word,)) | |
db_conn.commit() | |
db_conn.close() |