AndriiPets commited on
Commit
03a8026
·
1 Parent(s): 309b734
.dockerignore ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .git
2
+ Dockerfile
3
+ .DS_Store
4
+ .gitignore
5
+ .dockerignore
6
+
7
+ /credentials
8
+ /cache
9
+ /store
10
+
11
+ /node_modules
12
+
13
+ # https://github.com/github/gitignore/blob/master/Global/macOS.gitignore
14
+
15
+ # General
16
+ *.DS_Store
17
+ .AppleDouble
18
+ .LSOverride
19
+
20
+ # Icon must end with two \r
21
+ Icon
22
+
23
+
24
+ # Thumbnails
25
+ ._*
26
+
27
+ # Files that might appear in the root of a volume
28
+ .DocumentRevisions-V100
29
+ .fseventsd
30
+ .Spotlight-V100
31
+ .TemporaryItems
32
+ .Trashes
33
+ .VolumeIcon.icns
34
+ .com.apple.timemachine.donotpresent
35
+
36
+ # Directories potentially created on remote AFP share
37
+ .AppleDB
38
+ .AppleDesktop
39
+ Network Trash Folder
40
+ Temporary Items
41
+ .apdisk
42
+
43
+ # https://raw.githubusercontent.com/github/gitignore/master/Python.gitignore
44
+
45
+ # Byte-compiled / optimized / DLL files
46
+ __pycache__/
47
+ *.py[cod]
48
+ *$py.class
49
+
50
+ # C extensions
51
+ *.so
52
+
53
+ # Distribution / packaging
54
+ .Python
55
+ build/
56
+ develop-eggs/
57
+ dist/
58
+ downloads/
59
+ eggs/
60
+ .eggs/
61
+ lib64/
62
+ parts/
63
+ sdist/
64
+ var/
65
+ wheels/
66
+ *.egg-info/
67
+ .installed.cfg
68
+ *.egg
69
+
70
+ # PyInstaller
71
+ # Usually these files are written by a python script from a template
72
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
73
+ *.manifest
74
+ *.spec
75
+
76
+ # Installer logs
77
+ pip-log.txt
78
+ pip-delete-this-directory.txt
79
+
80
+ # Unit test / coverage reports
81
+ htmlcov/
82
+ .tox/
83
+ .coverage
84
+ .coverage.*
85
+ .cache
86
+ nosetests.xml
87
+ coverage.xml
88
+ *.cover
89
+ .hypothesis/
90
+
91
+ # Translations
92
+ *.mo
93
+ *.pot
94
+
95
+ # Django stuff:
96
+ *.log
97
+ local_settings.py
98
+
99
+ # Flask stuff:
100
+ instance/
101
+ .webassets-cache
102
+
103
+ # Scrapy stuff:
104
+ .scrapy
105
+
106
+ # Sphinx documentation
107
+ docs/_build/
108
+
109
+ # PyBuilder
110
+ target/
111
+
112
+ # Jupyter Notebook
113
+ .ipynb_checkpoints
114
+
115
+ # pyenv
116
+ .python-version
117
+
118
+ # celery beat schedule file
119
+ celerybeat-schedule
120
+
121
+ # SageMath parsed files
122
+ *.sage.py
123
+
124
+ # Environments
125
+ .env
126
+ .venv
127
+ env/
128
+ venv/
129
+ ENV/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
.gitignore ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .DS_Store
2
+ .env
3
+ .flaskenv
4
+ *.pyc
5
+ *.pyo
6
+ env/
7
+ venv/
8
+ .venv/
9
+ env*
10
+ dist/
11
+ build/
12
+ *.egg
13
+ *.egg-info/
14
+ .tox/
15
+ .cache/
16
+ .pytest_cache/
17
+ .idea/
18
+ docs/_build/
19
+ .vscode
20
+
21
+ # Coverage reports
22
+ htmlcov/
23
+ .coverage
24
+ .coverage.*
25
+ *,cover
Dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ RUN pip install --upgrade pip
6
+
7
+ RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
8
+
9
+ COPY ./requirements.txt /code/requirements.txt
10
+
11
+ ENV PATH="/root/.cargo/bin:${PATH}"
12
+
13
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
14
+
15
+ COPY . .
16
+
17
+ EXPOSE 7860
18
+
19
+ CMD ["flask", "run"]
app.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from flask import Flask
3
+ from generator import generation_function
4
+ from flask import request
5
+ from flask_cors import CORS
6
+ import asyncio
7
+
8
+ app = Flask(__name__)
9
+ CORS(app)
10
+
11
+
12
+ @app.route('/')
13
+ def hello():
14
+ return 'Hello world!'
15
+
16
+
17
+ @app.route('/generate', methods=['POST'])
18
+ def generate():
19
+ request_data = request.get_json()
20
+
21
+ text = request_data["prompts"]
22
+
23
+ if request_data.get("lang"):
24
+ generated_recepie = generation_function(
25
+ text, lang=request_data["lang"])
26
+ else:
27
+ generated_recepie = generation_function(text)
28
+
29
+ return generated_recepie
30
+
31
+
32
+ if __name__ == '__main__':
33
+ app.run()
generator.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline, set_seed
2
+ from transformers import AutoTokenizer
3
+ import re
4
+ from utils import ext
5
+ from utils.ext import pure_comma_separation
6
+
7
+ from decouple import config
8
+ import os
9
+
10
+ from utils.api import generate_cook_image
11
+ from utils.translators.translate_recepie import translate_recepie
12
+ from utils.translators.translate_input import translate_input
13
+
14
+
15
+ model_name_or_path = "flax-community/t5-recipe-generation"
16
+ task = "text2text-generation"
17
+
18
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
19
+ generator = pipeline(task, model=model_name_or_path,
20
+ tokenizer=model_name_or_path)
21
+
22
+ prefix = "items: "
23
+
24
+
25
+ chef_top = {
26
+ "max_length": 512,
27
+ "min_length": 64,
28
+ "no_repeat_ngram_size": 3,
29
+ "do_sample": True,
30
+ "top_k": 60,
31
+ "top_p": 0.95,
32
+ "num_return_sequences": 1,
33
+ "return_tensors": True,
34
+ "return_text": False
35
+ }
36
+ chef_beam = {
37
+ "max_length": 512,
38
+ "min_length": 64,
39
+ "no_repeat_ngram_size": 3,
40
+ "early_stopping": True,
41
+ "num_beams": 5,
42
+ "length_penalty": 1.5,
43
+ "num_return_sequences": 1
44
+ }
45
+
46
+ generation_kwargs = {
47
+ "max_length": 512,
48
+ "min_length": 64,
49
+ "no_repeat_ngram_size": 3,
50
+ "do_sample": True,
51
+ "top_k": 60,
52
+ "top_p": 0.95
53
+ }
54
+
55
+
56
+ def load_api():
57
+ api_key = config("API_KEY")
58
+ api_id = config("API_ID")
59
+ return {"KEY": api_key, "ID": api_id}
60
+
61
+
62
+ def skip_special_tokens_and_prettify(text):
63
+
64
+ data = {"title": "", "ingredients": [], "directions": []}
65
+
66
+ text = text + '$'
67
+
68
+ pattern = r"(\w+:)(.+?(?=\w+:|\$))"
69
+
70
+ for match in re.findall(pattern, text):
71
+ if match[0] == 'title:':
72
+ data["title"] = match[1]
73
+ elif match[0] == 'ingredients:':
74
+ data["ingredients"] = [ing.strip() for ing in match[1].split(',')]
75
+ elif match[0] == 'directions:':
76
+ data["directions"] = [d.strip() for d in match[1].split('.')]
77
+ else:
78
+ pass
79
+
80
+ data["ingredients"] = ext.ingredients(
81
+ data["ingredients"])
82
+
83
+ data["directions"] = ext.directions(data["directions"])
84
+
85
+ data["title"] = ext.title(data["title"])
86
+
87
+ return data
88
+
89
+
90
+ def generation_function(texts, lang="en"):
91
+
92
+ langs = ['ru', 'en']
93
+ api_credentials = load_api()
94
+
95
+ if lang != "en" and lang in langs:
96
+ texts = translate_input(texts, lang)
97
+
98
+ output_ids = generator(
99
+ texts,
100
+ ** chef_top
101
+ )[0]["generated_token_ids"]
102
+
103
+ recepie = tokenizer.decode(output_ids, skip_special_tokens=False)
104
+
105
+ generated_recipe = skip_special_tokens_and_prettify(recepie)
106
+
107
+ if lang != "en" and lang in langs:
108
+ generated_recipe = translate_recepie(generated_recipe, lang)
109
+
110
+ cook_image = generate_cook_image(
111
+ generated_recipe['title'], app_id=api_credentials['ID'], app_key=api_credentials['KEY'])
112
+
113
+ generated_recipe["image"] = cook_image
114
+
115
+ return generated_recipe
116
+
117
+
118
+ items = [
119
+ "macaroni, butter, salt, bacon, milk, flour, pepper, cream corn",
120
+ "provolone cheese, bacon, bread, ginger"
121
+ ]
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Flask==2.2.3
2
+ Flask_Cors==3.0.10
3
+ python-decouple==3.8
4
+ Requests==2.28.2
5
+ transformers==4.27.4
6
+ sacremoses==0.0.53
7
+ torch==2.0.0
test.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import os
3
+
4
+ text = 'title:macaroni and corn ingredients: 1 lb. macarooni 2 tbsp. butter 1 tsp salt 4 slices bacon 2 1/2 c. milk 2/3 c flour pepper to taste 1 can cream corn directions: cook macaronis in salted water until tender. fry bacon until crisp. drain on paper towel. melt butter in saucepan. add flour, salt and pepper. cook over low heat, stirring constantly, until mixture is smooth and bubbly. remove from heat. stir in milk. return to heat and bring to a boil. reduce heat and simmer until thickened. fold in corn and bacon. pour into greased baking dish. bake at 350 for 30 minutes.'
5
+
6
+ text = text + '$'
7
+
8
+ pattern = r"(\w+:)(.+?(?=\w+:|\$))"
9
+
10
+ data = {"title": "", "ingredients": [], "directions": []}
11
+
12
+ for match in re.findall(pattern, text):
13
+ if match[0] == 'title:':
14
+ data["title"] = match[1]
15
+ elif match[0] == 'ingredients:':
16
+ data["ingredients"] = [ing.strip() for ing in match[1].split(',')]
17
+ elif match[0] == 'directions:':
18
+ data["directions"] = [dir.strip() for dir in match[1].split('.')]
19
+ print(os.getenv("API_KEY"))
utils/api.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import requests
3
+
4
+
5
+ def generate_cook_image(query, app_id, app_key):
6
+ api_url = f"https://api.edamam.com/api/recipes/v2?type=public&q={query}&app_id={app_id}&app_key={app_key}&field=image"
7
+
8
+ try:
9
+ r = requests.get(api_url)
10
+ if r.status_code != 200:
11
+ return None
12
+
13
+ rj = r.json()
14
+ if "hits" not in rj or not len(rj["hits"]) > 0:
15
+ return None
16
+
17
+ data = rj["hits"]
18
+ data = data[random.randint(
19
+ 1, min(5, len(data) - 1))] if len(data) > 1 else data[0]
20
+
21
+ if "recipe" not in data or "image" not in data["recipe"]:
22
+ return None
23
+
24
+ image = data["recipe"]["image"]
25
+ return image
26
+ except Exception as e:
27
+ return None
utils/ext.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+
4
+ DEFAULT_MAP_DICT = {
5
+ " c ": " c. ",
6
+ ", chopped": " (chopped)",
7
+ ", crumbled": " (crumbled)",
8
+ ", thawed": " (thawed)",
9
+ ", melted": " (melted)",
10
+ }
11
+
12
+
13
+ def replace_regex(text, map_dict):
14
+ pattern = "|".join(map(re.escape, map_dict.keys()))
15
+ return re.sub(pattern, lambda m: map_dict[m.group()], str(text))
16
+
17
+
18
+ def unique_list(seq):
19
+ seen = set()
20
+ seen_add = seen.add
21
+ return [x for x in seq if not (x in seen or seen_add(x))]
22
+
23
+
24
+ def pure_comma_separation(list_str, return_list=True):
25
+ r = unique_list([item.strip()
26
+ for item in list_str.lower().split(",") if item.strip()])
27
+ if return_list:
28
+ return r
29
+ return ", ".join(r)
30
+
31
+
32
+ def ingredient(text, map_dict):
33
+ if len(map_dict) > 0:
34
+ map_dict.update(**DEFAULT_MAP_DICT)
35
+ else:
36
+ map_dict = DEFAULT_MAP_DICT
37
+
38
+ text = replace_regex(text, map_dict)
39
+ text = re.sub(r"(\d)\s(\d\/\d)", r" \1+\2 ", text)
40
+ text = " ".join([word.strip() for word in text.split() if word.strip()])
41
+ return text
42
+
43
+
44
+ def ingredients(text: list[str]):
45
+
46
+ tokens = ['<sep>', '<section>']
47
+
48
+ texts = text[0].split(tokens[0])
49
+
50
+ text_list = [text.replace(tokens[1], '').strip() for text in texts]
51
+
52
+ return text_list
53
+
54
+
55
+ def directions(text_list: list[str]):
56
+ token = '<sep>'
57
+ text_list.pop()
58
+
59
+ clean_txt = []
60
+
61
+ for line in text_list:
62
+ new = line.replace(token, '').strip()
63
+ clean_txt.append(new)
64
+
65
+ return clean_txt
66
+
67
+
68
+ def title(text: str):
69
+ token = '<section>'
70
+
71
+ return text.replace(token, '').strip()
utils/translators/en_ru/en_ru.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from transformers import FSMTForConditionalGeneration, FSMTTokenizer
3
+ mname = "facebook/wmt19-en-ru"
4
+ tokenizer = FSMTTokenizer.from_pretrained(mname)
5
+ model = FSMTForConditionalGeneration.from_pretrained(mname)
6
+
7
+
8
+ def translator_en_ru(text):
9
+
10
+ input_ids = tokenizer.encode(text, return_tensors="pt")
11
+ outputs = model.generate(input_ids)
12
+ decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
13
+
14
+ return decoded
utils/translators/en_ru/ru_en.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from transformers import FSMTForConditionalGeneration, FSMTTokenizer
3
+ mname = "facebook/wmt19-ru-en"
4
+ tokenizer = FSMTTokenizer.from_pretrained(mname)
5
+ model = FSMTForConditionalGeneration.from_pretrained(mname)
6
+
7
+
8
+ def translate_ru_en(text):
9
+ input_ids = tokenizer.encode(text, return_tensors="pt")
10
+ outputs = model.generate(input_ids)
11
+ decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
12
+
13
+ return decoded
utils/translators/translate_input.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from utils.translators.en_ru.ru_en import translate_ru_en
3
+
4
+ translators = {"ru": translate_ru_en}
5
+
6
+
7
+ def translate_input(inputs, lang="ru"):
8
+ translator = translators[lang]
9
+
10
+ text = translator(inputs)
11
+ return text
utils/translators/translate_recepie.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from utils.translators.en_ru.en_ru import translator_en_ru
3
+
4
+ translators = {"ru": translator_en_ru}
5
+
6
+
7
+ def translate_recepie(recepie, lang="en"):
8
+
9
+ translator = translators[lang]
10
+
11
+ data = {"title": "", "ingredients": [], "directions": []}
12
+
13
+ data["title"] = translator(recepie["title"])
14
+
15
+ directions = "|".join(recepie["directions"])
16
+ data["directions"] = [x.strip() for x in translator(directions).split(',')]
17
+
18
+ ingredients = "/".join(recepie["ingredients"])
19
+ data["ingredients"] = [x.strip()
20
+ for x in translator(ingredients).split('/')]
21
+
22
+ return data