from utils import clean_word from mapping_template import heterogeneous_template, generic_template from db.db_utils import get_connection, store_mapping_to_db db_conn = get_connection() db_cursor = db_conn.cursor() # Remap certain foundation categories db_cursor.execute("UPDATE dictionary SET sr_legacy_food_category = 'Beverages' WHERE sr_legacy_food_category = 'Alcoholic Beverages'") db_cursor.execute("UPDATE dictionary SET sr_legacy_food_category = 'Beans and Lentils' WHERE sr_legacy_food_category = 'Legumes and Legume Products'") db_cursor.execute("UPDATE dictionary SET sr_legacy_food_category = 'Condiments and Sauces' WHERE sr_legacy_food_category = 'Soups, Sauces, and Gravies'") db_conn.commit() # Add USDA Food Item db_cursor.execute(""" INSERT INTO "public"."dictionary" ("fdc_id", "description", "sr_legacy_food_category", "wweia_category", "water_content", "dry_matter_content", "leakage") VALUES (%s, %s, %s, %s, %s, %s, %s); """, ( 9999000, 'USDA Food Item', 'Government Donation (Not Counted)', 'Government Donation (Not Counted)', 0, 0, 0, ) ) db_conn.commit() # Save all the foundation categories as additional dictionary items, where the dry_matter_content, water_content, and leakage are all the avg from the results table # you will need to manually load an existings results.csv file into the results table for this to work... db_cursor.execute(""" CREATE SEQUENCE IF NOT EXISTS dictionary_fdc_id_seq START WITH 9999001; """) db_cursor.execute(""" INSERT INTO dictionary (fdc_id, description, sr_legacy_food_category, dry_matter_content, water_content, leakage) WITH averages AS ( SELECT sr_legacy_food_category, AVG(dry_matter_content) AS dry_matter_content, AVG(leakage) AS leakage FROM results WHERE sr_legacy_food_category IS NOT NULL GROUP BY sr_legacy_food_category ORDER BY sr_legacy_food_category ) SELECT nextval('dictionary_fdc_id_seq'), sr_legacy_food_category, sr_legacy_food_category, dry_matter_content, 1 - dry_matter_content AS water_content, leakage FROM averages; """) db_conn.commit() # add a bunch of heterogenous mappings heterogeneous_inputs = [ "General Groceries", "Grocery Assortment", "Food Variety Pack", "Miscellaneous Edibles", "Assorted Grocery Items", "Mixed Pantry Staples", "Assorted Perishables", "Grocery Mix", "Variety Food Box", "Miscellaneous Food Supplies", "Mixed Canned Goods", "Grocery Combo Pack", "Food Item Collection", "Mixed Frozen Foods", "Assorted Non-Perishables", "Grocery Selection", "Variety Food Bundle", "Food Item Mix", "Miscellaneous Groceries", "Mixed Dry Goods", "Assorted Packaged Foods", "Grocery Variety Box", ] for item in heterogeneous_inputs: mapping = heterogeneous_template(item) store_mapping_to_db(db_cursor, db_conn, mapping) # we also want to store all the foundation categories and with prefixes/suffix qualifiers = [ 'Assorted', 'Mixed', 'Variety', 'Combo', 'Collection', 'Selection', 'Bundle', 'Pack', 'Box', 'Various', 'Miscellaneous', 'Misc', 'Mixture', ] db_cursor.execute("SELECT sr_legacy_food_category, dry_matter_content, water_content, leakage FROM dictionary WHERE fdc_id > 9999000 ORDER BY sr_legacy_food_category") categories = db_cursor.fetchall() for item in categories: category = item[0] dry_matter_content = item[1] water_content = item[2] leakage = item[3] for qualifier in qualifiers: input_word = f"{qualifier} {category}" print(f"Storing {input_word}") cleaned_word = clean_word(input_word) mapping = generic_template(input_word, cleaned_word, 1, 1, None, True, 1, category, category, dry_matter_content, water_content, leakage, "Category") store_mapping_to_db(db_cursor, db_conn, mapping) input_word = f"{category} {qualifier}" print(f"Storing {input_word}") cleaned_word = clean_word(input_word) mapping = generic_template(input_word, cleaned_word, 1, 1, None, True, 1, category, category, dry_matter_content, water_content, leakage, "Category") store_mapping_to_db(db_cursor, db_conn, mapping) db_conn.close()