File size: 4,275 Bytes
1c8b854
 
 
 
 
 
 
 
 
 
 
 
 
ffcb4f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c8b854
 
 
ffcb4f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c8b854
 
ffcb4f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c8b854
ffcb4f2
 
 
1c8b854
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ffcb4f2
1c8b854
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
from utils import clean_word
from mapping_template import heterogeneous_template, generic_template
from db.db_utils import get_connection, store_mapping_to_db

db_conn = get_connection()
db_cursor = db_conn.cursor()

# Remap certain foundation categories
db_cursor.execute("UPDATE dictionary SET sr_legacy_food_category = 'Beverages' WHERE sr_legacy_food_category = 'Alcoholic Beverages'")
db_cursor.execute("UPDATE dictionary SET sr_legacy_food_category = 'Beans and Lentils' WHERE sr_legacy_food_category = 'Legumes and Legume Products'")
db_cursor.execute("UPDATE dictionary SET sr_legacy_food_category = 'Condiments and Sauces' WHERE sr_legacy_food_category = 'Soups, Sauces, and Gravies'")
db_conn.commit()

# Add USDA Food Item
db_cursor.execute("""
  INSERT INTO "public"."dictionary"
  ("fdc_id", "description", "sr_legacy_food_category", "wweia_category", "water_content", "dry_matter_content", "leakage")
  VALUES
  (%s, %s, %s, %s, %s, %s, %s);
  """, 
  (
    9999000,
    'USDA Food Item',
    'Government Donation (Not Counted)',
    'Government Donation (Not Counted)',
    0,
    0,
    0,
  )
)
db_conn.commit()

# Save all the foundation categories as additional dictionary items, where the dry_matter_content, water_content, and leakage are all the avg from the results table

# you will need to manually load an existings results.csv file into the results table for this to work...
db_cursor.execute("""
    CREATE SEQUENCE IF NOT EXISTS dictionary_fdc_id_seq
    START WITH 9999001;
""")
db_cursor.execute("""
    INSERT INTO dictionary (fdc_id, description, sr_legacy_food_category, dry_matter_content, water_content, leakage)
    WITH averages AS (
        SELECT sr_legacy_food_category, AVG(dry_matter_content) AS dry_matter_content, AVG(leakage) AS leakage
        FROM results
        WHERE sr_legacy_food_category IS NOT NULL
        GROUP BY sr_legacy_food_category
        ORDER BY sr_legacy_food_category
    )
    SELECT nextval('dictionary_fdc_id_seq'), 
          sr_legacy_food_category, 
          sr_legacy_food_category,
          dry_matter_content, 
          1 - dry_matter_content AS water_content,
          leakage
    FROM averages;
""")
db_conn.commit()


# add a bunch of heterogenous mappings
heterogeneous_inputs = [
  "General Groceries",
  "Grocery Assortment",
  "Food Variety Pack",
  "Miscellaneous Edibles",
  "Assorted Grocery Items",
  "Mixed Pantry Staples",
  "Assorted Perishables",
  "Grocery Mix",
  "Variety Food Box",
  "Miscellaneous Food Supplies",
  "Mixed Canned Goods",
  "Grocery Combo Pack",
  "Food Item Collection",
  "Mixed Frozen Foods",
  "Assorted Non-Perishables",
  "Grocery Selection",
  "Variety Food Bundle",
  "Food Item Mix",
  "Miscellaneous Groceries",
  "Mixed Dry Goods",
  "Assorted Packaged Foods",
  "Grocery Variety Box",
]

for item in heterogeneous_inputs:
    mapping = heterogeneous_template(item)
    store_mapping_to_db(db_cursor, db_conn, mapping)

# we also want to store all the foundation categories and with prefixes/suffix

qualifiers = [
  'Assorted',
  'Mixed',
  'Variety',
  'Combo',
  'Collection',
  'Selection',
  'Bundle',
  'Pack',
  'Box',
  'Various',
  'Miscellaneous',
  'Misc',
  'Mixture',
]

db_cursor.execute("SELECT sr_legacy_food_category, dry_matter_content, water_content, leakage FROM dictionary WHERE fdc_id > 9999000 ORDER BY sr_legacy_food_category")
categories = db_cursor.fetchall()

for item in categories:
    category = item[0]
    dry_matter_content = item[1]
    water_content = item[2]
    leakage = item[3]

    for qualifier in qualifiers:
        input_word = f"{qualifier} {category}"
        print(f"Storing {input_word}")
        cleaned_word = clean_word(input_word)
        mapping = generic_template(input_word, cleaned_word, 1, 1, None, True, 1, category, category, dry_matter_content, water_content, leakage)
        store_mapping_to_db(db_cursor, db_conn, mapping)
        
        input_word = f"{category} {qualifier}"
        print(f"Storing {input_word}")
        cleaned_word = clean_word(input_word)
        mapping = generic_template(input_word, cleaned_word, 1, 1, None, True, 1, category, category, dry_matter_content, water_content, leakage)
        store_mapping_to_db(db_cursor, db_conn, mapping)


db_conn.close()