ecoChef / parser.py
tejasashinde's picture
Added project
d9d1fcf
import dateparser
import re
from datetime import datetime
import json
import os
import inflect
# Load shelf life data from spoilage.json
with open("spoilage_data.json", "r") as f:
shelf_life_data = json.load(f)
p = inflect.engine()
def parse_ingredients(text):
lines = [item.strip() for item in text.split(',')]
parsed = []
for line in lines:
raw_line = line
expiry = None
expiry_phrase = None
quantity = "1"
unit = ""
# Extract expiry phrase
keywords = r"(?:expiring|expire|exp|exp dt|expiration date|use by|best before|by|from|on)"
expiry_match = re.search(rf'\b{keywords}\b\s*(.*)', line, flags=re.IGNORECASE)
if expiry_match:
expiry_phrase = expiry_match.group(0)
possible_date_str = expiry_match.group(1)
maybe_date = dateparser.parse(possible_date_str)
if maybe_date:
expiry = maybe_date.date()
line = line.replace(expiry_phrase, '').strip()
# Extract quantity + unit + name
match = re.match(r"(?:(\d+)\s*([a-zA-Z]+)?\s+([a-zA-Z ]+))|([a-zA-Z ]+)\s+(\d+)([a-zA-Z]*)", line)
if match:
if match.group(1):
quantity = match.group(1)
unit = match.group(2) or ""
name_clean = match.group(3).strip()
elif match.group(4):
name_clean = match.group(4).strip()
quantity = match.group(5)
unit = match.group(6) or ""
else:
name_clean = re.sub(r'\d+.*', '', line).strip()
# Calculate days left
if expiry:
days_left = (expiry - datetime.today().date()).days
else:
# Estimate from shelf life data
shelf_key = name_clean.lower()
singular_key = p.singular_noun(shelf_key) or shelf_key
shelf_life = shelf_life_data.get(shelf_key) or shelf_life_data.get(singular_key)
days_left = shelf_life if isinstance(shelf_life, int) else None
parsed.append({
"raw": raw_line,
"name": name_clean.lower(),
"quantity": quantity,
"unit": unit.lower(),
"expiry_date": expiry.isoformat() if expiry else None,
"days_left": days_left,
"note": "ok"
})
return parsed