File size: 5,917 Bytes
ea58a70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d76c731
00d89f2
 
 
ea58a70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00d89f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea58a70
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import os
import csv
import json
import time
import heapq
import pandas as pd
from openai import OpenAI
from dotenv import load_dotenv
from Levenshtein import distance
from tqdm import tqdm
from db.db_utils import get_connection
from ask_gpt import query_gpt
from similarity_fast import SimilarityFast

# This iterates over every mapping and flags a row if it doesn't think the mapping is correct


load_dotenv()

api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)



def query_gpt(food_item, dictionary_word, similar_words_list):
    line_separated_words = '\n'.join(similar_words_list)

    prompt = (
      f"""I have a particular item and I need to know if it's correctly associated with another item. It's ideally the same word, or semantically similar.

      I will also provide a list of other similar words that you could be a better fit.

      You should respond in JSON format with an object that has the key `guess`, and the value is the most similar food item.

      This is important: only return the mapped dictionary word, or a word from the list of similar words.

      The item is: "{food_item}"
      It has been mapped to: "{dictionary_word}"
      Here are some similar words: {line_separated_words}
      """)

    completion = client.chat.completions.create(
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        model="gpt-3.5-turbo-1106",
        response_format={"type": "json_object"},
    )
    response = completion.choices[0].message.content
    parsed = parse_response(response)
    print(f"Food Item: '{food_item}'")
    print(f"Dictionary Word: '{dictionary_word}'")
    print(f"GPT Word: {parsed}")
    return parsed

# Define the function to parse the GPT response
def parse_response(response):
    try:
        result = json.loads(response)
        return result['guess']
    except (json.JSONDecodeError, KeyError) as e:
        print(f"Error parsing response: {response} - {e}")
        return None


db_conn = get_connection()
db_cursor = db_conn.cursor()

# Load the dictionary
db_cursor.execute("SELECT description FROM dictionary")
dictionary = db_cursor.fetchall()
dictionary = [item[0] for item in dictionary]

db_cursor.execute("SELECT input_word, dictionary_word FROM mappings where reviewed = false")
results = db_cursor.fetchall()

similarity_fast = SimilarityFast(db_cursor)

for row in results:
    input_word = row[0]
    dictionary_word = row[1]

    print("")
    print("=====================================")
    print(f"Checking: '{input_word}'")
    print(f" -> Dictionary: '{dictionary_word}'")

    mapping = similarity_fast.find_most_similar_word(input_word)

    if mapping['dictionary_word'] != dictionary_word:
        # temp stopgap
        continue

        print(f"Updating: '{input_word}' to '{mapping['dictionary_word']}'")
        confirm = input(f"Press 'y' to confirm. Any other key to skip")
        if confirm.lower() == 'y':
            db_cursor.execute("UPDATE mappings SET dictionary_word = %s, reviewed = true WHERE input_word = %s", (mapping['dictionary_word'], input_word,))
            db_conn.commit()
        else:
            db_cursor.execute("UPDATE mappings SET reviewed = true WHERE input_word = %s", (input_word,))
            db_conn.commit()
    else:
        similar_words_list = mapping['similar_words'].split('|')
        similar_words_list.append('Non-Food Item')
        similar_words_list.append('Mixed Food Items')

        response = query_gpt(input_word, dictionary_word, similar_words_list)
        if response:
            if response in dictionary:
              if response == dictionary_word:
                  print(" -> Correct")
                  db_cursor.execute("UPDATE mappings SET reviewed = true WHERE input_word = %s", (input_word,))
                  db_conn.commit()
              elif response != dictionary_word:
                  # temp stopgap
                  continue

                  print(f"Updating: '{input_word}' to '{response}'")
                  confirm = input("Press 'y' to confirm, 'i' to ignore, 'd' to delete, 'm' for mixture, any other key to skip: ")
                  if confirm.lower() == 'y':
                      if response == 'Non-Food Item':
                          sql = "UPDATE mappings SET dictionary_word = %s, is_food = FALSE, reviewed = true WHERE input_word = %s"
                      else:
                          sql = "UPDATE mappings SET dictionary_word = %s, reviewed = true, is_food = true WHERE input_word = %s"
                      
                      print(f" - Updating mapping with {response}")
                      db_cursor.execute(sql, (response, input_word))
                      db_conn.commit()
                  elif confirm.lower() == 'i':
                      print(f" - Ignoring mapping")
                      sql = "UPDATE mappings SET ignore = true, reviewed = true WHERE input_word = %s"
                      db_cursor.execute(sql, (input_word,))
                      db_conn.commit()
                  elif confirm.lower() == 'd':
                      print(f" - Deleting mapping")
                      sql = "DELETE FROM mappings WHERE input_word = %s"
                      db_cursor.execute(sql, (input_word,))
                      db_conn.commit()
                  elif confirm.lower() == 'm':
                      print(f" - Mixed food items")
                      sql = "UPDATE mappings SET reviewed = true, dictionary_word = 'Mixed Food Items', is_food = true WHERE input_word = %s"
                      db_cursor.execute(sql, (input_word,))
                      db_conn.commit()
                  else:
                      db_cursor.execute("UPDATE mappings SET reviewed = true WHERE input_word = %s", (input_word,))
                      db_conn.commit()


db_conn.close()