Spaces:
Paused
Paused
from utils import clean_word | |
from mapping_template import heterogeneous_template, generic_template | |
from db.db_utils import get_connection, store_mapping_to_db | |
db_conn = get_connection() | |
db_cursor = db_conn.cursor() | |
# Remap certain foundation categories | |
db_cursor.execute("UPDATE dictionary SET sr_legacy_food_category = 'Beverages' WHERE sr_legacy_food_category = 'Alcoholic Beverages'") | |
db_cursor.execute("UPDATE dictionary SET sr_legacy_food_category = 'Beans and Lentils' WHERE sr_legacy_food_category = 'Legumes and Legume Products'") | |
db_cursor.execute("UPDATE dictionary SET sr_legacy_food_category = 'Condiments and Sauces' WHERE sr_legacy_food_category = 'Soups, Sauces, and Gravies'") | |
db_conn.commit() | |
# Add USDA Food Item | |
db_cursor.execute(""" | |
INSERT INTO "public"."dictionary" | |
("fdc_id", "description", "sr_legacy_food_category", "wweia_category", "water_content", "dry_matter_content", "leakage") | |
VALUES | |
(%s, %s, %s, %s, %s, %s, %s); | |
""", | |
( | |
9999000, | |
'USDA Food Item', | |
'Government Donation (Not Counted)', | |
'Government Donation (Not Counted)', | |
0, | |
0, | |
0, | |
) | |
) | |
db_conn.commit() | |
# Save all the foundation categories as additional dictionary items, where the dry_matter_content, water_content, and leakage are all the avg from the results table | |
# you will need to manually load an existings results.csv file into the results table for this to work... | |
db_cursor.execute(""" | |
CREATE SEQUENCE IF NOT EXISTS dictionary_fdc_id_seq | |
START WITH 9999001; | |
""") | |
db_cursor.execute(""" | |
INSERT INTO dictionary (fdc_id, description, sr_legacy_food_category, dry_matter_content, water_content, leakage) | |
WITH averages AS ( | |
SELECT sr_legacy_food_category, AVG(dry_matter_content) AS dry_matter_content, AVG(leakage) AS leakage | |
FROM results | |
WHERE sr_legacy_food_category IS NOT NULL | |
GROUP BY sr_legacy_food_category | |
ORDER BY sr_legacy_food_category | |
) | |
SELECT nextval('dictionary_fdc_id_seq'), | |
sr_legacy_food_category, | |
sr_legacy_food_category, | |
dry_matter_content, | |
1 - dry_matter_content AS water_content, | |
leakage | |
FROM averages; | |
""") | |
db_conn.commit() | |
# add a bunch of heterogenous mappings | |
heterogeneous_inputs = [ | |
"General Groceries", | |
"Grocery Assortment", | |
"Food Variety Pack", | |
"Miscellaneous Edibles", | |
"Assorted Grocery Items", | |
"Mixed Pantry Staples", | |
"Assorted Perishables", | |
"Grocery Mix", | |
"Variety Food Box", | |
"Miscellaneous Food Supplies", | |
"Mixed Canned Goods", | |
"Grocery Combo Pack", | |
"Food Item Collection", | |
"Mixed Frozen Foods", | |
"Assorted Non-Perishables", | |
"Grocery Selection", | |
"Variety Food Bundle", | |
"Food Item Mix", | |
"Miscellaneous Groceries", | |
"Mixed Dry Goods", | |
"Assorted Packaged Foods", | |
"Grocery Variety Box", | |
] | |
for item in heterogeneous_inputs: | |
mapping = heterogeneous_template(item) | |
store_mapping_to_db(db_cursor, db_conn, mapping) | |
# we also want to store all the foundation categories and with prefixes/suffix | |
qualifiers = [ | |
'Assorted', | |
'Mixed', | |
'Variety', | |
'Combo', | |
'Collection', | |
'Selection', | |
'Bundle', | |
'Pack', | |
'Box', | |
'Various', | |
'Miscellaneous', | |
'Misc', | |
'Mixture', | |
] | |
db_cursor.execute("SELECT sr_legacy_food_category, dry_matter_content, water_content, leakage FROM dictionary WHERE fdc_id > 9999000 ORDER BY sr_legacy_food_category") | |
categories = db_cursor.fetchall() | |
for item in categories: | |
category = item[0] | |
dry_matter_content = item[1] | |
water_content = item[2] | |
leakage = item[3] | |
for qualifier in qualifiers: | |
input_word = f"{qualifier} {category}" | |
print(f"Storing {input_word}") | |
cleaned_word = clean_word(input_word) | |
mapping = generic_template(input_word, cleaned_word, 1, 1, None, True, 1, category, category, dry_matter_content, water_content, leakage) | |
store_mapping_to_db(db_cursor, db_conn, mapping) | |
input_word = f"{category} {qualifier}" | |
print(f"Storing {input_word}") | |
cleaned_word = clean_word(input_word) | |
mapping = generic_template(input_word, cleaned_word, 1, 1, None, True, 1, category, category, dry_matter_content, water_content, leakage) | |
store_mapping_to_db(db_cursor, db_conn, mapping) | |
db_conn.close() |