brightly-ai / post_import_updates.py
beweinreich's picture
bugfix the post import script
ffcb4f2
raw
history blame
No virus
4.28 kB
from utils import clean_word
from mapping_template import heterogeneous_template, generic_template
from db.db_utils import get_connection, store_mapping_to_db
db_conn = get_connection()
db_cursor = db_conn.cursor()
# Remap certain foundation categories
db_cursor.execute("UPDATE dictionary SET sr_legacy_food_category = 'Beverages' WHERE sr_legacy_food_category = 'Alcoholic Beverages'")
db_cursor.execute("UPDATE dictionary SET sr_legacy_food_category = 'Beans and Lentils' WHERE sr_legacy_food_category = 'Legumes and Legume Products'")
db_cursor.execute("UPDATE dictionary SET sr_legacy_food_category = 'Condiments and Sauces' WHERE sr_legacy_food_category = 'Soups, Sauces, and Gravies'")
db_conn.commit()
# Add USDA Food Item
db_cursor.execute("""
INSERT INTO "public"."dictionary"
("fdc_id", "description", "sr_legacy_food_category", "wweia_category", "water_content", "dry_matter_content", "leakage")
VALUES
(%s, %s, %s, %s, %s, %s, %s);
""",
(
9999000,
'USDA Food Item',
'Government Donation (Not Counted)',
'Government Donation (Not Counted)',
0,
0,
0,
)
)
db_conn.commit()
# Save all the foundation categories as additional dictionary items, where the dry_matter_content, water_content, and leakage are all the avg from the results table
# you will need to manually load an existings results.csv file into the results table for this to work...
db_cursor.execute("""
CREATE SEQUENCE IF NOT EXISTS dictionary_fdc_id_seq
START WITH 9999001;
""")
db_cursor.execute("""
INSERT INTO dictionary (fdc_id, description, sr_legacy_food_category, dry_matter_content, water_content, leakage)
WITH averages AS (
SELECT sr_legacy_food_category, AVG(dry_matter_content) AS dry_matter_content, AVG(leakage) AS leakage
FROM results
WHERE sr_legacy_food_category IS NOT NULL
GROUP BY sr_legacy_food_category
ORDER BY sr_legacy_food_category
)
SELECT nextval('dictionary_fdc_id_seq'),
sr_legacy_food_category,
sr_legacy_food_category,
dry_matter_content,
1 - dry_matter_content AS water_content,
leakage
FROM averages;
""")
db_conn.commit()
# add a bunch of heterogenous mappings
heterogeneous_inputs = [
"General Groceries",
"Grocery Assortment",
"Food Variety Pack",
"Miscellaneous Edibles",
"Assorted Grocery Items",
"Mixed Pantry Staples",
"Assorted Perishables",
"Grocery Mix",
"Variety Food Box",
"Miscellaneous Food Supplies",
"Mixed Canned Goods",
"Grocery Combo Pack",
"Food Item Collection",
"Mixed Frozen Foods",
"Assorted Non-Perishables",
"Grocery Selection",
"Variety Food Bundle",
"Food Item Mix",
"Miscellaneous Groceries",
"Mixed Dry Goods",
"Assorted Packaged Foods",
"Grocery Variety Box",
]
for item in heterogeneous_inputs:
mapping = heterogeneous_template(item)
store_mapping_to_db(db_cursor, db_conn, mapping)
# we also want to store all the foundation categories and with prefixes/suffix
qualifiers = [
'Assorted',
'Mixed',
'Variety',
'Combo',
'Collection',
'Selection',
'Bundle',
'Pack',
'Box',
'Various',
'Miscellaneous',
'Misc',
'Mixture',
]
db_cursor.execute("SELECT sr_legacy_food_category, dry_matter_content, water_content, leakage FROM dictionary WHERE fdc_id > 9999000 ORDER BY sr_legacy_food_category")
categories = db_cursor.fetchall()
for item in categories:
category = item[0]
dry_matter_content = item[1]
water_content = item[2]
leakage = item[3]
for qualifier in qualifiers:
input_word = f"{qualifier} {category}"
print(f"Storing {input_word}")
cleaned_word = clean_word(input_word)
mapping = generic_template(input_word, cleaned_word, 1, 1, None, True, 1, category, category, dry_matter_content, water_content, leakage)
store_mapping_to_db(db_cursor, db_conn, mapping)
input_word = f"{category} {qualifier}"
print(f"Storing {input_word}")
cleaned_word = clean_word(input_word)
mapping = generic_template(input_word, cleaned_word, 1, 1, None, True, 1, category, category, dry_matter_content, water_content, leakage)
store_mapping_to_db(db_cursor, db_conn, mapping)
db_conn.close()