Spaces:

ccolas
/

TastyPiano

Runtime error

File size: 11,404 Bytes

93c029f

# This script loads the list and profiles of our ingredients selection.
# It defines rules to recognize ingredients from the list in recipes and the function to extract that information from ingredient strings.

import pandas as pd
from src.cocktails.config import INGREDIENTS_LIST_PATH, COCKTAILS_CSV_DATA
import numpy as np

ingredient_profiles = pd.read_csv(INGREDIENTS_LIST_PATH)
ingredient_list = [ing.lower() for ing in ingredient_profiles['ingredient']]
n_ingredients = len(ingredient_list)
ingredient2ingredient_id = dict(zip(ingredient_list, range(n_ingredients)))

ingredients_types = sorted(set(ingredient_profiles['type']))
# for each type, get all ingredients
ing_per_type = [[ing for ing in ingredient_list if ingredient_profiles['type'][ingredient_list.index(ing)] == type] for type in ingredients_types]
ingredients_per_type = dict(zip(ingredients_types, ing_per_type))

bubble_ingredients =  ['soda', 'ginger beer', 'tonic', 'sparkling wine']
# rules to recognize ingredients in recipes.
# in [] are separate rules with an OR relation: only one needs to be satisfied
# within [], rules apply with and AND relation: all rules need to be satisfied.
# ~ indicates that the following expression must NOT appear
# simple expression indicate that the expression MUST appear.
ingredient_search = {#'salt': ['salt'],
                     'lime juice': [['lime', '~soda', '~lemonade', '~cordial']],
                     'lemon juice': [['lemon', '~soda', '~lemonade']],
                     'angostura': [['angostura', '~orange'],
                                   ['bitter', '~campari', '~orange', '~red', '~italian', '~fernet']],
                     'orange bitters': [['orange', 'bitter', '~bittersweet']],
                     'orange juice': [['orange', '~bitter', '~jam', '~marmalade', '~liqueur', '~water'],
                                      ['orange', 'squeeze']],
                     'pineapple juice': [['pineapple']],
                     # 'apple juice': [['apple', 'juice', '~pine']],
                     'cranberry juice': [['cranberry', 'juice']],
                     'cointreau': ['cointreau', 'triple sec', 'grand marnier', 'curaçao', 'curacao'],
                     'luxardo maraschino': ['luxardo', 'maraschino', 'kirsch'],
                     'amaretto': ['amaretto'],
                     'benedictine': ['benedictine', 'bénédictine', 'bénedictine', 'benédictine'],
                     'campari': ['campari', ['italian', 'red', 'bitter'], 'aperol', 'bittersweet', 'aperitivo', 'orange-red'],
                     # 'campari': ['campari', ['italian', 'red', 'bitter']],
                     # 'crème de violette': [['violette', 'crème'], ['crême', 'violette'], ['liqueur', 'violette']],
                     # 'aperol': ['aperol', 'bittersweet', 'aperitivo', 'orange-red'],
                     'green chartreuse': ['chartreuse'],
                     'black raspberry liqueur': [['cassis', 'liqueur'],
                                                 ['black raspberry', 'liqueur'],
                                                 ['raspberry', 'liqueur'],
                                                 ['strawberry', 'liqueur'],
                                                 ['blackberry', 'liqueur'],
                                                 ['violette', 'crème'], ['crême', 'violette'], ['liqueur', 'violette']],
                     # 'simple syrup': [],
                     # 'drambuie': ['drambuie'],
                     # 'fernet branca': ['fernet', 'branca'],
                     'gin': [['gin', '~sloe', '~ginger']],
                     'vodka': ['vodka'],
                     'cuban rum': [['rum', 'puerto rican'], ['light', 'rum'], ['white', 'rum'], ['rum', 'havana', '~7'], ['rum', 'bacardi']],
                     'cognac': [['cognac', '~grand marnier', '~cointreau', '~orange']],
                     # 'bourbon': [['bourbon', '~liqueur']],
                     # 'tequila': ['tequila', 'pisco'],
                     # 'tequila': ['tequila'],
                     'scotch': ['scotch'],
                     'dark rum': [['rum', 'age', '~bacardi', '~havana'],
                                  ['rum', 'dark', '~bacardi', '~havana'],
                                  ['rum', 'old', '~bacardi', '~havana'],
                                  ['rum', 'old', '7'],
                                  ['rum', 'havana', '7'],
                                  ['havana', 'rum', 'especial']],
                     'absinthe': ['absinthe'],
                     'rye whiskey': ['rye', ['bourbon', '~liqueur']],
                     # 'rye whiskey': ['rye'],
                     'apricot brandy': [['apricot', 'brandy']],
                     # 'pisco': ['pisco'],
                     # 'cachaça': ['cachaça', 'cachaca'],
                     'egg': [['egg', 'white', '~yolk', '~whole']],
                     'soda': [['soda', 'water', '~lemon', '~lime']],
                     'mint': ['mint'],
                     'sparkling wine': ['sparkling wine', 'prosecco', 'champagne'],
                     'ginger beer': [['ginger', 'beer'], ['ginger', 'ale']],
                     'tonic': [['tonic'], ['7up'], ['sprite']],
                     # 'espresso': ['espresso', 'expresso', ['café', '~liqueur', '~cream'],
                     #              ['cafe', '~liqueur', '~cream'],
                     #              ['coffee', '~liqueur', '~cream']],
                     # 'southern comfort': ['southern comfort'],
                     # 'cola': ['cola', 'coke', 'pepsi'],
                     'double syrup': [['sugar','~raspberry'], ['simple', 'syrup'], ['double', 'syrup']],
                     # 'grenadine': ['grenadine', ['pomegranate', 'syrup']],
                     'grenadine': ['grenadine', ['pomegranate', 'syrup'], ['raspberry', 'syrup', '~black']],
                     'honey syrup': ['honey', ['maple', 'syrup']],
                     # 'raspberry syrup': [['raspberry', 'syrup', '~black']],
                     'dry vermouth': [['vermouth', 'dry'], ['vermouth', 'white'], ['vermouth', 'french'], 'lillet'],
                     'sweet vermouth': [['vermouth', 'sweet'], ['vermouth', 'red'], ['vermouth', 'italian']],
                     # 'lillet blanc': ['lillet'],
                     'water': [['water', '~sugar', '~coconut', '~soda', '~tonic', '~honey', '~orange', '~melon']]
                     }
# check that there is a rule for all ingredients in the list
assert sorted(ingredient_list) == sorted(ingredient_search.keys()), 'ing search dict keys do not match ingredient list'

def get_ingredients_info():
    data = pd.read_csv(COCKTAILS_CSV_DATA)
    max_ingredients, ingredient_set, liquor_set, liqueur_set, vermouth_set = get_max_n_ingredients(data)
    ingredient_list = sorted(ingredient_set)
    alcohol = sorted(liquor_set.union(liqueur_set).union(vermouth_set).union(set(['sparkling wine'])))
    ind_alcohol = [i for i in range(len(ingredient_list)) if ingredient_list[i] in alcohol]
    return max_ingredients, ingredient_list, ind_alcohol

def get_max_n_ingredients(data):
    max_count = 0
    ingredient_set = set()
    alcohol_set = set()
    liqueur_set = set()
    vermouth_set = set()
    ing_str = np.array(data['ingredients_str'])
    for i in range(len(data['names'])):
        ingredients, quantities = extract_ingredients(ing_str[i])
        max_count = max(max_count, len(ingredients))
        for ing in ingredients:
            ingredient_set.add(ing)
            if ing in ingredients_per_type['liquor']:
                alcohol_set.add(ing)
            if ing in ingredients_per_type['liqueur']:
                liqueur_set.add(ing)
            if ing in ingredients_per_type['vermouth']:
                vermouth_set.add(ing)
    return max_count, ingredient_set, alcohol_set, liqueur_set, vermouth_set

def find_ingredient_from_str(ing_str):
    # function that assigns an ingredient string to one of the ingredient if possible, following the rules defined above.
    # return a flag and the ingredient string. When flag is false, the ingredient has not been found and the cocktail is rejected.
    ing_str = ing_str.lower()
    flags = []
    for k in ingredient_list:
        or_flags = [] # get flag for each of several conditions
        for i_p, pattern in enumerate(ingredient_search[k]):
            or_flags.append(True)
            if isinstance(pattern, str):
                if pattern[0] == '~' and pattern[1:] in ing_str:
                    or_flags[-1] = False
                elif pattern[0] != '~' and pattern not in ing_str:
                    or_flags[-1] = False
            elif isinstance(pattern, list):
                for element in pattern:
                    if element[0] == '~':
                        or_flags[-1] = or_flags[-1] and not element[1:] in ing_str
                    else:
                        or_flags[-1] = or_flags[-1] and element in ing_str
            else:
                raise ValueError
        flags.append(any(or_flags))
    if sum(flags) > 1:
        print(ing_str)
        for i_f, f in enumerate(flags):
            if f:
                print(ingredient_list[i_f])
        stop = 1
        return True, ingredient_list[flags.index(True)]
    elif sum(flags) == 0:
        # if 'grape' not in ing_str:
        #     print('\t\t Not found:', ing_str)
        return True, None
    else:
        return False, ingredient_list[flags.index(True)]

def get_cocktails_per_ingredient(ing_strs):
    cocktails_per_ing = dict(zip(ingredient_list, [[] for _ in range(len(ingredient_list))]))
    for i_ing, ing_str in enumerate(ing_strs):
        ingredients, _ = extract_ingredients(ing_str)
        for ing in ingredients:
            cocktails_per_ing[ing].append(i_ing)
    return cocktails_per_ing

def extract_ingredients(ingredient_str):
    # extract list of ingredients and quantities from an formatted ingredient string (reverse of format_ingredients)
    ingredient_str = ingredient_str[1: -1]
    words = ingredient_str.split(',')
    ingredients = []
    quantities = []
    for i in range(len(words)//2):
        ingredients.append(words[2 * i][1:])
        quantities.append(float(words[2 * i + 1][:-1]))
    return ingredients, quantities

def format_ingredients(ingredients, quantities):
    # format an ingredient string from the lists of ingredients and quantities (reverse of extract_ingredients)
    out = '['
    for ing, q in zip(ingredients, quantities):
        if ing[-1] == ' ':
            ingre = ing[:-1]
        else:
            ingre = ing
        out += f'({ingre},{q}),'
    out = out[:-1] + ']'
    return out


def get_ingredient_count(data):
    # get count of ingredients in the whole dataset
    ingredient_counts = dict(zip(ingredient_list, [0] * len(ingredient_list)))
    for i in range(len(data['names'])):
        if data['to_keep'][i]:
            ingredients, _ = extract_ingredients(data['ingredients_str'][i])
            for i in ingredients:
                ingredient_counts[i] += 1
    return ingredient_counts

def add_counts_to_ingredient_list(data):
    # update the list of ingredients to add their count of occurence in dataset.
    ingredient_counts = get_ingredient_count(data)
    counts = [ingredient_counts[k] for k in ingredient_list]
    ingredient_profiles['counts'] = counts
    ingredient_profiles.to_csv(INGREDIENTS_LIST_PATH, index=False)