Spaces:
Runtime error
Runtime error
File size: 11,404 Bytes
93c029f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
# This script loads the list and profiles of our ingredients selection.
# It defines rules to recognize ingredients from the list in recipes and the function to extract that information from ingredient strings.
import pandas as pd
from src.cocktails.config import INGREDIENTS_LIST_PATH, COCKTAILS_CSV_DATA
import numpy as np
ingredient_profiles = pd.read_csv(INGREDIENTS_LIST_PATH)
ingredient_list = [ing.lower() for ing in ingredient_profiles['ingredient']]
n_ingredients = len(ingredient_list)
ingredient2ingredient_id = dict(zip(ingredient_list, range(n_ingredients)))
ingredients_types = sorted(set(ingredient_profiles['type']))
# for each type, get all ingredients
ing_per_type = [[ing for ing in ingredient_list if ingredient_profiles['type'][ingredient_list.index(ing)] == type] for type in ingredients_types]
ingredients_per_type = dict(zip(ingredients_types, ing_per_type))
bubble_ingredients = ['soda', 'ginger beer', 'tonic', 'sparkling wine']
# rules to recognize ingredients in recipes.
# in [] are separate rules with an OR relation: only one needs to be satisfied
# within [], rules apply with and AND relation: all rules need to be satisfied.
# ~ indicates that the following expression must NOT appear
# simple expression indicate that the expression MUST appear.
ingredient_search = {#'salt': ['salt'],
'lime juice': [['lime', '~soda', '~lemonade', '~cordial']],
'lemon juice': [['lemon', '~soda', '~lemonade']],
'angostura': [['angostura', '~orange'],
['bitter', '~campari', '~orange', '~red', '~italian', '~fernet']],
'orange bitters': [['orange', 'bitter', '~bittersweet']],
'orange juice': [['orange', '~bitter', '~jam', '~marmalade', '~liqueur', '~water'],
['orange', 'squeeze']],
'pineapple juice': [['pineapple']],
# 'apple juice': [['apple', 'juice', '~pine']],
'cranberry juice': [['cranberry', 'juice']],
'cointreau': ['cointreau', 'triple sec', 'grand marnier', 'curaçao', 'curacao'],
'luxardo maraschino': ['luxardo', 'maraschino', 'kirsch'],
'amaretto': ['amaretto'],
'benedictine': ['benedictine', 'bénédictine', 'bénedictine', 'benédictine'],
'campari': ['campari', ['italian', 'red', 'bitter'], 'aperol', 'bittersweet', 'aperitivo', 'orange-red'],
# 'campari': ['campari', ['italian', 'red', 'bitter']],
# 'crème de violette': [['violette', 'crème'], ['crême', 'violette'], ['liqueur', 'violette']],
# 'aperol': ['aperol', 'bittersweet', 'aperitivo', 'orange-red'],
'green chartreuse': ['chartreuse'],
'black raspberry liqueur': [['cassis', 'liqueur'],
['black raspberry', 'liqueur'],
['raspberry', 'liqueur'],
['strawberry', 'liqueur'],
['blackberry', 'liqueur'],
['violette', 'crème'], ['crême', 'violette'], ['liqueur', 'violette']],
# 'simple syrup': [],
# 'drambuie': ['drambuie'],
# 'fernet branca': ['fernet', 'branca'],
'gin': [['gin', '~sloe', '~ginger']],
'vodka': ['vodka'],
'cuban rum': [['rum', 'puerto rican'], ['light', 'rum'], ['white', 'rum'], ['rum', 'havana', '~7'], ['rum', 'bacardi']],
'cognac': [['cognac', '~grand marnier', '~cointreau', '~orange']],
# 'bourbon': [['bourbon', '~liqueur']],
# 'tequila': ['tequila', 'pisco'],
# 'tequila': ['tequila'],
'scotch': ['scotch'],
'dark rum': [['rum', 'age', '~bacardi', '~havana'],
['rum', 'dark', '~bacardi', '~havana'],
['rum', 'old', '~bacardi', '~havana'],
['rum', 'old', '7'],
['rum', 'havana', '7'],
['havana', 'rum', 'especial']],
'absinthe': ['absinthe'],
'rye whiskey': ['rye', ['bourbon', '~liqueur']],
# 'rye whiskey': ['rye'],
'apricot brandy': [['apricot', 'brandy']],
# 'pisco': ['pisco'],
# 'cachaça': ['cachaça', 'cachaca'],
'egg': [['egg', 'white', '~yolk', '~whole']],
'soda': [['soda', 'water', '~lemon', '~lime']],
'mint': ['mint'],
'sparkling wine': ['sparkling wine', 'prosecco', 'champagne'],
'ginger beer': [['ginger', 'beer'], ['ginger', 'ale']],
'tonic': [['tonic'], ['7up'], ['sprite']],
# 'espresso': ['espresso', 'expresso', ['café', '~liqueur', '~cream'],
# ['cafe', '~liqueur', '~cream'],
# ['coffee', '~liqueur', '~cream']],
# 'southern comfort': ['southern comfort'],
# 'cola': ['cola', 'coke', 'pepsi'],
'double syrup': [['sugar','~raspberry'], ['simple', 'syrup'], ['double', 'syrup']],
# 'grenadine': ['grenadine', ['pomegranate', 'syrup']],
'grenadine': ['grenadine', ['pomegranate', 'syrup'], ['raspberry', 'syrup', '~black']],
'honey syrup': ['honey', ['maple', 'syrup']],
# 'raspberry syrup': [['raspberry', 'syrup', '~black']],
'dry vermouth': [['vermouth', 'dry'], ['vermouth', 'white'], ['vermouth', 'french'], 'lillet'],
'sweet vermouth': [['vermouth', 'sweet'], ['vermouth', 'red'], ['vermouth', 'italian']],
# 'lillet blanc': ['lillet'],
'water': [['water', '~sugar', '~coconut', '~soda', '~tonic', '~honey', '~orange', '~melon']]
}
# check that there is a rule for all ingredients in the list
assert sorted(ingredient_list) == sorted(ingredient_search.keys()), 'ing search dict keys do not match ingredient list'
def get_ingredients_info():
data = pd.read_csv(COCKTAILS_CSV_DATA)
max_ingredients, ingredient_set, liquor_set, liqueur_set, vermouth_set = get_max_n_ingredients(data)
ingredient_list = sorted(ingredient_set)
alcohol = sorted(liquor_set.union(liqueur_set).union(vermouth_set).union(set(['sparkling wine'])))
ind_alcohol = [i for i in range(len(ingredient_list)) if ingredient_list[i] in alcohol]
return max_ingredients, ingredient_list, ind_alcohol
def get_max_n_ingredients(data):
max_count = 0
ingredient_set = set()
alcohol_set = set()
liqueur_set = set()
vermouth_set = set()
ing_str = np.array(data['ingredients_str'])
for i in range(len(data['names'])):
ingredients, quantities = extract_ingredients(ing_str[i])
max_count = max(max_count, len(ingredients))
for ing in ingredients:
ingredient_set.add(ing)
if ing in ingredients_per_type['liquor']:
alcohol_set.add(ing)
if ing in ingredients_per_type['liqueur']:
liqueur_set.add(ing)
if ing in ingredients_per_type['vermouth']:
vermouth_set.add(ing)
return max_count, ingredient_set, alcohol_set, liqueur_set, vermouth_set
def find_ingredient_from_str(ing_str):
# function that assigns an ingredient string to one of the ingredient if possible, following the rules defined above.
# return a flag and the ingredient string. When flag is false, the ingredient has not been found and the cocktail is rejected.
ing_str = ing_str.lower()
flags = []
for k in ingredient_list:
or_flags = [] # get flag for each of several conditions
for i_p, pattern in enumerate(ingredient_search[k]):
or_flags.append(True)
if isinstance(pattern, str):
if pattern[0] == '~' and pattern[1:] in ing_str:
or_flags[-1] = False
elif pattern[0] != '~' and pattern not in ing_str:
or_flags[-1] = False
elif isinstance(pattern, list):
for element in pattern:
if element[0] == '~':
or_flags[-1] = or_flags[-1] and not element[1:] in ing_str
else:
or_flags[-1] = or_flags[-1] and element in ing_str
else:
raise ValueError
flags.append(any(or_flags))
if sum(flags) > 1:
print(ing_str)
for i_f, f in enumerate(flags):
if f:
print(ingredient_list[i_f])
stop = 1
return True, ingredient_list[flags.index(True)]
elif sum(flags) == 0:
# if 'grape' not in ing_str:
# print('\t\t Not found:', ing_str)
return True, None
else:
return False, ingredient_list[flags.index(True)]
def get_cocktails_per_ingredient(ing_strs):
cocktails_per_ing = dict(zip(ingredient_list, [[] for _ in range(len(ingredient_list))]))
for i_ing, ing_str in enumerate(ing_strs):
ingredients, _ = extract_ingredients(ing_str)
for ing in ingredients:
cocktails_per_ing[ing].append(i_ing)
return cocktails_per_ing
def extract_ingredients(ingredient_str):
# extract list of ingredients and quantities from an formatted ingredient string (reverse of format_ingredients)
ingredient_str = ingredient_str[1: -1]
words = ingredient_str.split(',')
ingredients = []
quantities = []
for i in range(len(words)//2):
ingredients.append(words[2 * i][1:])
quantities.append(float(words[2 * i + 1][:-1]))
return ingredients, quantities
def format_ingredients(ingredients, quantities):
# format an ingredient string from the lists of ingredients and quantities (reverse of extract_ingredients)
out = '['
for ing, q in zip(ingredients, quantities):
if ing[-1] == ' ':
ingre = ing[:-1]
else:
ingre = ing
out += f'({ingre},{q}),'
out = out[:-1] + ']'
return out
def get_ingredient_count(data):
# get count of ingredients in the whole dataset
ingredient_counts = dict(zip(ingredient_list, [0] * len(ingredient_list)))
for i in range(len(data['names'])):
if data['to_keep'][i]:
ingredients, _ = extract_ingredients(data['ingredients_str'][i])
for i in ingredients:
ingredient_counts[i] += 1
return ingredient_counts
def add_counts_to_ingredient_list(data):
# update the list of ingredients to add their count of occurence in dataset.
ingredient_counts = get_ingredient_count(data)
counts = [ingredient_counts[k] for k in ingredient_list]
ingredient_profiles['counts'] = counts
ingredient_profiles.to_csv(INGREDIENTS_LIST_PATH, index=False) |