Spaces:
Paused
Paused
Commit
•
019a7e5
1
Parent(s):
0ff64fc
improvements
Browse files- audits/1720034413.csv +1 -0
- audits/1720034790.csv +2 -0
- audits/1720034901.csv +2 -0
- audits/1720035211.csv +16 -0
- audits/1720035904.csv +1 -0
- chatgpt_audit.py +30 -7
audits/1720034413.csv
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
input_word,original_dictionary_word,new_dictionary_word
|
audits/1720034790.csv
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
input_word,original_dictionary_word,new_dictionary_word
|
2 |
+
Tea,"Tea, bubble","Tea, hot, leaf, green"
|
audits/1720034901.csv
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
input_word,original_dictionary_word,new_dictionary_word
|
2 |
+
Snack cakes,"Snacks, popcorn, cakes","Snack cake, chocolate"
|
audits/1720035211.csv
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
input_word,original_dictionary_word,new_dictionary_word
|
2 |
+
arrabiata sauce,"Beef and noodles, no sauce",Spaghetti sauce with meat
|
3 |
+
Canned Meats,"Meat, NFS","Luncheon meat, pork, canned"
|
4 |
+
Chips,Vegetable chips,"Potato chips, NFS"
|
5 |
+
graham crumbs,"Cereals ready-to-eat, QUAKER, HONEY GRAHAM OH!S",Graham crackers
|
6 |
+
Chedder Popcorn,"Popcorn, NFS","Popcorn, ready-to-eat, cheese flavored"
|
7 |
+
Rice Krispies,"Snacks, rice cakes, brown rice, corn","Snacks, KELLOGG, KELLOGG'S RICE KRISPIES TREATS Squares"
|
8 |
+
"Beverages, Propel, with electrolytes and sweeteners, fruit-flavored, water","Beverages, Mixed vegetable and fruit juice drink, with added nutrients","Beverages, Water with added vitamins and minerals, bottles, sweetened, assorted fruit flavors"
|
9 |
+
Spag. Sause,"Sauce, pasta, spaghetti/marinara, ready-to-serve",Spaghetti sauce
|
10 |
+
Baking mix,"Cake or cupcake, white with white icing, bakery",Baked Products
|
11 |
+
Cereal,"Cereal, other, plain","Cereal, other, NFS"
|
12 |
+
RiceCrisps,"Snacks, rice cakes, brown rice, sesame seed",Rice cake
|
13 |
+
Riced Cauliflower,Fried cauliflower,"Cauliflower, cooked, as ingredient"
|
14 |
+
Canned Juice,"Carrot juice, canned","Fruit juice, NFS"
|
15 |
+
Cliff,"Snacks, granola bars, soft, uncoated, plain",Clif Z bar
|
16 |
+
Crackers Bulk,"Cookies, graham crackers, plain or honey (includes cinnamon)","Crackers, wheat"
|
audits/1720035904.csv
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
input_word,original_dictionary_word,new_dictionary_word
|
chatgpt_audit.py
CHANGED
@@ -10,6 +10,8 @@ from Levenshtein import distance
|
|
10 |
from tqdm import tqdm
|
11 |
from db.db_utils import get_connection
|
12 |
from ask_gpt import query_gpt
|
|
|
|
|
13 |
|
14 |
# For any unreviewed mappings, we ask chatgpt to consider:
|
15 |
# 1. The similar_words list
|
@@ -26,6 +28,15 @@ client = OpenAI(api_key=api_key)
|
|
26 |
|
27 |
output_file_path = f'./audits/{int(time.time())}.csv'
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
def update_csv(results):
|
30 |
df_results = pd.DataFrame(results, columns=['input_word', 'original_dictionary_word', 'new_dictionary_word',])
|
31 |
df_results.to_csv(output_file_path, index=False)
|
@@ -89,7 +100,7 @@ dictionary = db_cursor.fetchall()
|
|
89 |
dictionary = [item[0] for item in dictionary]
|
90 |
|
91 |
# select all mappings that have not been reviewed
|
92 |
-
db_cursor.execute("SELECT input_word, dictionary_word, similar_words, is_food FROM mappings WHERE
|
93 |
results = db_cursor.fetchall()
|
94 |
|
95 |
# iterate through each row, grab the input_word and ask chatgpt to compare it to the dictionary_word
|
@@ -117,7 +128,7 @@ for row in results:
|
|
117 |
new_row = None
|
118 |
if response == dictionary_word and response in dictionary:
|
119 |
print(f" - Mapping is correct")
|
120 |
-
db_cursor.execute("UPDATE mappings SET
|
121 |
db_conn.commit()
|
122 |
else:
|
123 |
# We should update the mapping in the database
|
@@ -125,6 +136,18 @@ for row in results:
|
|
125 |
# We should set reviewed to 1
|
126 |
# first confirm that the response is in the dictionary
|
127 |
if response in dictionary:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
# prompt the user if we want to make the change or not
|
129 |
print("------")
|
130 |
print("Updating mapping to the following:")
|
@@ -142,9 +165,9 @@ for row in results:
|
|
142 |
|
143 |
if confirm.lower() == 'y':
|
144 |
if response == 'Non-Food Item':
|
145 |
-
sql = "UPDATE mappings SET dictionary_word = %s, is_food = FALSE, reviewed = true WHERE input_word = %s"
|
146 |
else:
|
147 |
-
sql = "UPDATE mappings SET dictionary_word = %s, reviewed = true, is_food = true WHERE input_word = %s"
|
148 |
|
149 |
print(f" - Updating mapping with {response}")
|
150 |
db_cursor.execute(sql, (response, input_word))
|
@@ -156,7 +179,7 @@ for row in results:
|
|
156 |
}
|
157 |
elif confirm.lower() == 'i':
|
158 |
print(f" - Ignoring mapping")
|
159 |
-
sql = "UPDATE mappings SET ignore = true, reviewed = true WHERE input_word = %s"
|
160 |
db_cursor.execute(sql, (input_word,))
|
161 |
db_conn.commit()
|
162 |
elif confirm.lower() == 'd':
|
@@ -166,11 +189,11 @@ for row in results:
|
|
166 |
db_conn.commit()
|
167 |
elif confirm.lower() == 'm':
|
168 |
print(f" - Heterogeneous Mixture")
|
169 |
-
sql = "UPDATE mappings SET reviewed = true, dictionary_word = 'Heterogeneous Mixture', is_food = true WHERE input_word = %s"
|
170 |
db_cursor.execute(sql, (input_word,))
|
171 |
db_conn.commit()
|
172 |
else:
|
173 |
-
db_cursor.execute("UPDATE mappings SET reviewed = true WHERE input_word = %s", (input_word,))
|
174 |
db_conn.commit()
|
175 |
else:
|
176 |
print(f" - Response {response} is not in the dictionary")
|
|
|
10 |
from tqdm import tqdm
|
11 |
from db.db_utils import get_connection
|
12 |
from ask_gpt import query_gpt
|
13 |
+
from utils import generate_embedding, cosine_similarity
|
14 |
+
from sentence_transformers import SentenceTransformer, util
|
15 |
|
16 |
# For any unreviewed mappings, we ask chatgpt to consider:
|
17 |
# 1. The similar_words list
|
|
|
28 |
|
29 |
output_file_path = f'./audits/{int(time.time())}.csv'
|
30 |
|
31 |
+
model_name = 'sentence-transformers/all-mpnet-base-v2'
|
32 |
+
model = SentenceTransformer(model_name)
|
33 |
+
|
34 |
+
def compare_embeddings(old_dictionary_word, new_dictionary_word):
|
35 |
+
old_embedding = generate_embedding(model, old_dictionary_word)
|
36 |
+
new_embedding = generate_embedding(model, new_dictionary_word)
|
37 |
+
cosine_similarity_score = cosine_similarity(old_embedding, new_embedding)
|
38 |
+
return cosine_similarity_score
|
39 |
+
|
40 |
def update_csv(results):
|
41 |
df_results = pd.DataFrame(results, columns=['input_word', 'original_dictionary_word', 'new_dictionary_word',])
|
42 |
df_results.to_csv(output_file_path, index=False)
|
|
|
100 |
dictionary = [item[0] for item in dictionary]
|
101 |
|
102 |
# select all mappings that have not been reviewed
|
103 |
+
db_cursor.execute("SELECT input_word, dictionary_word, similar_words, is_food FROM mappings WHERE gpt_reviewed = false and is_food = true and dictionary_word != 'Heterogeneous Mixture'")
|
104 |
results = db_cursor.fetchall()
|
105 |
|
106 |
# iterate through each row, grab the input_word and ask chatgpt to compare it to the dictionary_word
|
|
|
128 |
new_row = None
|
129 |
if response == dictionary_word and response in dictionary:
|
130 |
print(f" - Mapping is correct")
|
131 |
+
db_cursor.execute("UPDATE mappings SET gpt_reviewed = true WHERE input_word = %s", (input_word,))
|
132 |
db_conn.commit()
|
133 |
else:
|
134 |
# We should update the mapping in the database
|
|
|
136 |
# We should set reviewed to 1
|
137 |
# first confirm that the response is in the dictionary
|
138 |
if response in dictionary:
|
139 |
+
|
140 |
+
# If the response is similar to the original dictionary word, lets just skip it
|
141 |
+
compare_score = compare_embeddings(dictionary_word, response)
|
142 |
+
if compare_score > 0.8:
|
143 |
+
print(f" - Mapping is already similar to the dictionary word")
|
144 |
+
db_cursor.execute("UPDATE mappings SET gpt_reviewed = true WHERE input_word = %s", (input_word,))
|
145 |
+
db_conn.commit()
|
146 |
+
continue
|
147 |
+
else:
|
148 |
+
print(f" - Mapping is not similar to the dictionary word")
|
149 |
+
print(f" - Cosine Similarity: {compare_score}")
|
150 |
+
|
151 |
# prompt the user if we want to make the change or not
|
152 |
print("------")
|
153 |
print("Updating mapping to the following:")
|
|
|
165 |
|
166 |
if confirm.lower() == 'y':
|
167 |
if response == 'Non-Food Item':
|
168 |
+
sql = "UPDATE mappings SET dictionary_word = %s, is_food = FALSE, description = 'Non-Food Item', gpt_reviewed = true, reviewed = true WHERE input_word = %s"
|
169 |
else:
|
170 |
+
sql = "UPDATE mappings SET dictionary_word = %s, gpt_reviewed = true, reviewed = true, is_food = true WHERE input_word = %s"
|
171 |
|
172 |
print(f" - Updating mapping with {response}")
|
173 |
db_cursor.execute(sql, (response, input_word))
|
|
|
179 |
}
|
180 |
elif confirm.lower() == 'i':
|
181 |
print(f" - Ignoring mapping")
|
182 |
+
sql = "UPDATE mappings SET ignore = true, gpt_reviewed = true, reviewed = true WHERE input_word = %s"
|
183 |
db_cursor.execute(sql, (input_word,))
|
184 |
db_conn.commit()
|
185 |
elif confirm.lower() == 'd':
|
|
|
189 |
db_conn.commit()
|
190 |
elif confirm.lower() == 'm':
|
191 |
print(f" - Heterogeneous Mixture")
|
192 |
+
sql = "UPDATE mappings SET gpt_reviewed = true, reviewed = true, dictionary_word = 'Heterogeneous Mixture', is_food = true WHERE input_word = %s"
|
193 |
db_cursor.execute(sql, (input_word,))
|
194 |
db_conn.commit()
|
195 |
else:
|
196 |
+
db_cursor.execute("UPDATE mappings SET gpt_reviewed = true, reviewed = true WHERE input_word = %s", (input_word,))
|
197 |
db_conn.commit()
|
198 |
else:
|
199 |
print(f" - Response {response} is not in the dictionary")
|