Spaces:
Sleeping
Sleeping
AndriiPets
commited on
Commit
·
03a8026
1
Parent(s):
309b734
initial
Browse files- .dockerignore +142 -0
- .gitignore +25 -0
- Dockerfile +19 -0
- app.py +33 -0
- generator.py +121 -0
- requirements.txt +7 -0
- test.py +19 -0
- utils/api.py +27 -0
- utils/ext.py +71 -0
- utils/translators/en_ru/en_ru.py +14 -0
- utils/translators/en_ru/ru_en.py +13 -0
- utils/translators/translate_input.py +11 -0
- utils/translators/translate_recepie.py +22 -0
.dockerignore
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.git
|
2 |
+
Dockerfile
|
3 |
+
.DS_Store
|
4 |
+
.gitignore
|
5 |
+
.dockerignore
|
6 |
+
|
7 |
+
/credentials
|
8 |
+
/cache
|
9 |
+
/store
|
10 |
+
|
11 |
+
/node_modules
|
12 |
+
|
13 |
+
# https://github.com/github/gitignore/blob/master/Global/macOS.gitignore
|
14 |
+
|
15 |
+
# General
|
16 |
+
*.DS_Store
|
17 |
+
.AppleDouble
|
18 |
+
.LSOverride
|
19 |
+
|
20 |
+
# Icon must end with two \r
|
21 |
+
Icon
|
22 |
+
|
23 |
+
|
24 |
+
# Thumbnails
|
25 |
+
._*
|
26 |
+
|
27 |
+
# Files that might appear in the root of a volume
|
28 |
+
.DocumentRevisions-V100
|
29 |
+
.fseventsd
|
30 |
+
.Spotlight-V100
|
31 |
+
.TemporaryItems
|
32 |
+
.Trashes
|
33 |
+
.VolumeIcon.icns
|
34 |
+
.com.apple.timemachine.donotpresent
|
35 |
+
|
36 |
+
# Directories potentially created on remote AFP share
|
37 |
+
.AppleDB
|
38 |
+
.AppleDesktop
|
39 |
+
Network Trash Folder
|
40 |
+
Temporary Items
|
41 |
+
.apdisk
|
42 |
+
|
43 |
+
# https://raw.githubusercontent.com/github/gitignore/master/Python.gitignore
|
44 |
+
|
45 |
+
# Byte-compiled / optimized / DLL files
|
46 |
+
__pycache__/
|
47 |
+
*.py[cod]
|
48 |
+
*$py.class
|
49 |
+
|
50 |
+
# C extensions
|
51 |
+
*.so
|
52 |
+
|
53 |
+
# Distribution / packaging
|
54 |
+
.Python
|
55 |
+
build/
|
56 |
+
develop-eggs/
|
57 |
+
dist/
|
58 |
+
downloads/
|
59 |
+
eggs/
|
60 |
+
.eggs/
|
61 |
+
lib64/
|
62 |
+
parts/
|
63 |
+
sdist/
|
64 |
+
var/
|
65 |
+
wheels/
|
66 |
+
*.egg-info/
|
67 |
+
.installed.cfg
|
68 |
+
*.egg
|
69 |
+
|
70 |
+
# PyInstaller
|
71 |
+
# Usually these files are written by a python script from a template
|
72 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
73 |
+
*.manifest
|
74 |
+
*.spec
|
75 |
+
|
76 |
+
# Installer logs
|
77 |
+
pip-log.txt
|
78 |
+
pip-delete-this-directory.txt
|
79 |
+
|
80 |
+
# Unit test / coverage reports
|
81 |
+
htmlcov/
|
82 |
+
.tox/
|
83 |
+
.coverage
|
84 |
+
.coverage.*
|
85 |
+
.cache
|
86 |
+
nosetests.xml
|
87 |
+
coverage.xml
|
88 |
+
*.cover
|
89 |
+
.hypothesis/
|
90 |
+
|
91 |
+
# Translations
|
92 |
+
*.mo
|
93 |
+
*.pot
|
94 |
+
|
95 |
+
# Django stuff:
|
96 |
+
*.log
|
97 |
+
local_settings.py
|
98 |
+
|
99 |
+
# Flask stuff:
|
100 |
+
instance/
|
101 |
+
.webassets-cache
|
102 |
+
|
103 |
+
# Scrapy stuff:
|
104 |
+
.scrapy
|
105 |
+
|
106 |
+
# Sphinx documentation
|
107 |
+
docs/_build/
|
108 |
+
|
109 |
+
# PyBuilder
|
110 |
+
target/
|
111 |
+
|
112 |
+
# Jupyter Notebook
|
113 |
+
.ipynb_checkpoints
|
114 |
+
|
115 |
+
# pyenv
|
116 |
+
.python-version
|
117 |
+
|
118 |
+
# celery beat schedule file
|
119 |
+
celerybeat-schedule
|
120 |
+
|
121 |
+
# SageMath parsed files
|
122 |
+
*.sage.py
|
123 |
+
|
124 |
+
# Environments
|
125 |
+
.env
|
126 |
+
.venv
|
127 |
+
env/
|
128 |
+
venv/
|
129 |
+
ENV/
|
130 |
+
|
131 |
+
# Spyder project settings
|
132 |
+
.spyderproject
|
133 |
+
.spyproject
|
134 |
+
|
135 |
+
# Rope project settings
|
136 |
+
.ropeproject
|
137 |
+
|
138 |
+
# mkdocs documentation
|
139 |
+
/site
|
140 |
+
|
141 |
+
# mypy
|
142 |
+
.mypy_cache/
|
.gitignore
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.DS_Store
|
2 |
+
.env
|
3 |
+
.flaskenv
|
4 |
+
*.pyc
|
5 |
+
*.pyo
|
6 |
+
env/
|
7 |
+
venv/
|
8 |
+
.venv/
|
9 |
+
env*
|
10 |
+
dist/
|
11 |
+
build/
|
12 |
+
*.egg
|
13 |
+
*.egg-info/
|
14 |
+
.tox/
|
15 |
+
.cache/
|
16 |
+
.pytest_cache/
|
17 |
+
.idea/
|
18 |
+
docs/_build/
|
19 |
+
.vscode
|
20 |
+
|
21 |
+
# Coverage reports
|
22 |
+
htmlcov/
|
23 |
+
.coverage
|
24 |
+
.coverage.*
|
25 |
+
*,cover
|
Dockerfile
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
|
3 |
+
WORKDIR /code
|
4 |
+
|
5 |
+
RUN pip install --upgrade pip
|
6 |
+
|
7 |
+
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
8 |
+
|
9 |
+
COPY ./requirements.txt /code/requirements.txt
|
10 |
+
|
11 |
+
ENV PATH="/root/.cargo/bin:${PATH}"
|
12 |
+
|
13 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
14 |
+
|
15 |
+
COPY . .
|
16 |
+
|
17 |
+
EXPOSE 7860
|
18 |
+
|
19 |
+
CMD ["flask", "run"]
|
app.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from flask import Flask
|
3 |
+
from generator import generation_function
|
4 |
+
from flask import request
|
5 |
+
from flask_cors import CORS
|
6 |
+
import asyncio
|
7 |
+
|
8 |
+
app = Flask(__name__)
|
9 |
+
CORS(app)
|
10 |
+
|
11 |
+
|
12 |
+
@app.route('/')
|
13 |
+
def hello():
|
14 |
+
return 'Hello world!'
|
15 |
+
|
16 |
+
|
17 |
+
@app.route('/generate', methods=['POST'])
|
18 |
+
def generate():
|
19 |
+
request_data = request.get_json()
|
20 |
+
|
21 |
+
text = request_data["prompts"]
|
22 |
+
|
23 |
+
if request_data.get("lang"):
|
24 |
+
generated_recepie = generation_function(
|
25 |
+
text, lang=request_data["lang"])
|
26 |
+
else:
|
27 |
+
generated_recepie = generation_function(text)
|
28 |
+
|
29 |
+
return generated_recepie
|
30 |
+
|
31 |
+
|
32 |
+
if __name__ == '__main__':
|
33 |
+
app.run()
|
generator.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import pipeline, set_seed
|
2 |
+
from transformers import AutoTokenizer
|
3 |
+
import re
|
4 |
+
from utils import ext
|
5 |
+
from utils.ext import pure_comma_separation
|
6 |
+
|
7 |
+
from decouple import config
|
8 |
+
import os
|
9 |
+
|
10 |
+
from utils.api import generate_cook_image
|
11 |
+
from utils.translators.translate_recepie import translate_recepie
|
12 |
+
from utils.translators.translate_input import translate_input
|
13 |
+
|
14 |
+
|
15 |
+
model_name_or_path = "flax-community/t5-recipe-generation"
|
16 |
+
task = "text2text-generation"
|
17 |
+
|
18 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
|
19 |
+
generator = pipeline(task, model=model_name_or_path,
|
20 |
+
tokenizer=model_name_or_path)
|
21 |
+
|
22 |
+
prefix = "items: "
|
23 |
+
|
24 |
+
|
25 |
+
chef_top = {
|
26 |
+
"max_length": 512,
|
27 |
+
"min_length": 64,
|
28 |
+
"no_repeat_ngram_size": 3,
|
29 |
+
"do_sample": True,
|
30 |
+
"top_k": 60,
|
31 |
+
"top_p": 0.95,
|
32 |
+
"num_return_sequences": 1,
|
33 |
+
"return_tensors": True,
|
34 |
+
"return_text": False
|
35 |
+
}
|
36 |
+
chef_beam = {
|
37 |
+
"max_length": 512,
|
38 |
+
"min_length": 64,
|
39 |
+
"no_repeat_ngram_size": 3,
|
40 |
+
"early_stopping": True,
|
41 |
+
"num_beams": 5,
|
42 |
+
"length_penalty": 1.5,
|
43 |
+
"num_return_sequences": 1
|
44 |
+
}
|
45 |
+
|
46 |
+
generation_kwargs = {
|
47 |
+
"max_length": 512,
|
48 |
+
"min_length": 64,
|
49 |
+
"no_repeat_ngram_size": 3,
|
50 |
+
"do_sample": True,
|
51 |
+
"top_k": 60,
|
52 |
+
"top_p": 0.95
|
53 |
+
}
|
54 |
+
|
55 |
+
|
56 |
+
def load_api():
|
57 |
+
api_key = config("API_KEY")
|
58 |
+
api_id = config("API_ID")
|
59 |
+
return {"KEY": api_key, "ID": api_id}
|
60 |
+
|
61 |
+
|
62 |
+
def skip_special_tokens_and_prettify(text):
|
63 |
+
|
64 |
+
data = {"title": "", "ingredients": [], "directions": []}
|
65 |
+
|
66 |
+
text = text + '$'
|
67 |
+
|
68 |
+
pattern = r"(\w+:)(.+?(?=\w+:|\$))"
|
69 |
+
|
70 |
+
for match in re.findall(pattern, text):
|
71 |
+
if match[0] == 'title:':
|
72 |
+
data["title"] = match[1]
|
73 |
+
elif match[0] == 'ingredients:':
|
74 |
+
data["ingredients"] = [ing.strip() for ing in match[1].split(',')]
|
75 |
+
elif match[0] == 'directions:':
|
76 |
+
data["directions"] = [d.strip() for d in match[1].split('.')]
|
77 |
+
else:
|
78 |
+
pass
|
79 |
+
|
80 |
+
data["ingredients"] = ext.ingredients(
|
81 |
+
data["ingredients"])
|
82 |
+
|
83 |
+
data["directions"] = ext.directions(data["directions"])
|
84 |
+
|
85 |
+
data["title"] = ext.title(data["title"])
|
86 |
+
|
87 |
+
return data
|
88 |
+
|
89 |
+
|
90 |
+
def generation_function(texts, lang="en"):
|
91 |
+
|
92 |
+
langs = ['ru', 'en']
|
93 |
+
api_credentials = load_api()
|
94 |
+
|
95 |
+
if lang != "en" and lang in langs:
|
96 |
+
texts = translate_input(texts, lang)
|
97 |
+
|
98 |
+
output_ids = generator(
|
99 |
+
texts,
|
100 |
+
** chef_top
|
101 |
+
)[0]["generated_token_ids"]
|
102 |
+
|
103 |
+
recepie = tokenizer.decode(output_ids, skip_special_tokens=False)
|
104 |
+
|
105 |
+
generated_recipe = skip_special_tokens_and_prettify(recepie)
|
106 |
+
|
107 |
+
if lang != "en" and lang in langs:
|
108 |
+
generated_recipe = translate_recepie(generated_recipe, lang)
|
109 |
+
|
110 |
+
cook_image = generate_cook_image(
|
111 |
+
generated_recipe['title'], app_id=api_credentials['ID'], app_key=api_credentials['KEY'])
|
112 |
+
|
113 |
+
generated_recipe["image"] = cook_image
|
114 |
+
|
115 |
+
return generated_recipe
|
116 |
+
|
117 |
+
|
118 |
+
items = [
|
119 |
+
"macaroni, butter, salt, bacon, milk, flour, pepper, cream corn",
|
120 |
+
"provolone cheese, bacon, bread, ginger"
|
121 |
+
]
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Flask==2.2.3
|
2 |
+
Flask_Cors==3.0.10
|
3 |
+
python-decouple==3.8
|
4 |
+
Requests==2.28.2
|
5 |
+
transformers==4.27.4
|
6 |
+
sacremoses==0.0.53
|
7 |
+
torch==2.0.0
|
test.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import os
|
3 |
+
|
4 |
+
text = 'title:macaroni and corn ingredients: 1 lb. macarooni 2 tbsp. butter 1 tsp salt 4 slices bacon 2 1/2 c. milk 2/3 c flour pepper to taste 1 can cream corn directions: cook macaronis in salted water until tender. fry bacon until crisp. drain on paper towel. melt butter in saucepan. add flour, salt and pepper. cook over low heat, stirring constantly, until mixture is smooth and bubbly. remove from heat. stir in milk. return to heat and bring to a boil. reduce heat and simmer until thickened. fold in corn and bacon. pour into greased baking dish. bake at 350 for 30 minutes.'
|
5 |
+
|
6 |
+
text = text + '$'
|
7 |
+
|
8 |
+
pattern = r"(\w+:)(.+?(?=\w+:|\$))"
|
9 |
+
|
10 |
+
data = {"title": "", "ingredients": [], "directions": []}
|
11 |
+
|
12 |
+
for match in re.findall(pattern, text):
|
13 |
+
if match[0] == 'title:':
|
14 |
+
data["title"] = match[1]
|
15 |
+
elif match[0] == 'ingredients:':
|
16 |
+
data["ingredients"] = [ing.strip() for ing in match[1].split(',')]
|
17 |
+
elif match[0] == 'directions:':
|
18 |
+
data["directions"] = [dir.strip() for dir in match[1].split('.')]
|
19 |
+
print(os.getenv("API_KEY"))
|
utils/api.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import random
|
2 |
+
import requests
|
3 |
+
|
4 |
+
|
5 |
+
def generate_cook_image(query, app_id, app_key):
|
6 |
+
api_url = f"https://api.edamam.com/api/recipes/v2?type=public&q={query}&app_id={app_id}&app_key={app_key}&field=image"
|
7 |
+
|
8 |
+
try:
|
9 |
+
r = requests.get(api_url)
|
10 |
+
if r.status_code != 200:
|
11 |
+
return None
|
12 |
+
|
13 |
+
rj = r.json()
|
14 |
+
if "hits" not in rj or not len(rj["hits"]) > 0:
|
15 |
+
return None
|
16 |
+
|
17 |
+
data = rj["hits"]
|
18 |
+
data = data[random.randint(
|
19 |
+
1, min(5, len(data) - 1))] if len(data) > 1 else data[0]
|
20 |
+
|
21 |
+
if "recipe" not in data or "image" not in data["recipe"]:
|
22 |
+
return None
|
23 |
+
|
24 |
+
image = data["recipe"]["image"]
|
25 |
+
return image
|
26 |
+
except Exception as e:
|
27 |
+
return None
|
utils/ext.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
|
3 |
+
|
4 |
+
DEFAULT_MAP_DICT = {
|
5 |
+
" c ": " c. ",
|
6 |
+
", chopped": " (chopped)",
|
7 |
+
", crumbled": " (crumbled)",
|
8 |
+
", thawed": " (thawed)",
|
9 |
+
", melted": " (melted)",
|
10 |
+
}
|
11 |
+
|
12 |
+
|
13 |
+
def replace_regex(text, map_dict):
|
14 |
+
pattern = "|".join(map(re.escape, map_dict.keys()))
|
15 |
+
return re.sub(pattern, lambda m: map_dict[m.group()], str(text))
|
16 |
+
|
17 |
+
|
18 |
+
def unique_list(seq):
|
19 |
+
seen = set()
|
20 |
+
seen_add = seen.add
|
21 |
+
return [x for x in seq if not (x in seen or seen_add(x))]
|
22 |
+
|
23 |
+
|
24 |
+
def pure_comma_separation(list_str, return_list=True):
|
25 |
+
r = unique_list([item.strip()
|
26 |
+
for item in list_str.lower().split(",") if item.strip()])
|
27 |
+
if return_list:
|
28 |
+
return r
|
29 |
+
return ", ".join(r)
|
30 |
+
|
31 |
+
|
32 |
+
def ingredient(text, map_dict):
|
33 |
+
if len(map_dict) > 0:
|
34 |
+
map_dict.update(**DEFAULT_MAP_DICT)
|
35 |
+
else:
|
36 |
+
map_dict = DEFAULT_MAP_DICT
|
37 |
+
|
38 |
+
text = replace_regex(text, map_dict)
|
39 |
+
text = re.sub(r"(\d)\s(\d\/\d)", r" \1+\2 ", text)
|
40 |
+
text = " ".join([word.strip() for word in text.split() if word.strip()])
|
41 |
+
return text
|
42 |
+
|
43 |
+
|
44 |
+
def ingredients(text: list[str]):
|
45 |
+
|
46 |
+
tokens = ['<sep>', '<section>']
|
47 |
+
|
48 |
+
texts = text[0].split(tokens[0])
|
49 |
+
|
50 |
+
text_list = [text.replace(tokens[1], '').strip() for text in texts]
|
51 |
+
|
52 |
+
return text_list
|
53 |
+
|
54 |
+
|
55 |
+
def directions(text_list: list[str]):
|
56 |
+
token = '<sep>'
|
57 |
+
text_list.pop()
|
58 |
+
|
59 |
+
clean_txt = []
|
60 |
+
|
61 |
+
for line in text_list:
|
62 |
+
new = line.replace(token, '').strip()
|
63 |
+
clean_txt.append(new)
|
64 |
+
|
65 |
+
return clean_txt
|
66 |
+
|
67 |
+
|
68 |
+
def title(text: str):
|
69 |
+
token = '<section>'
|
70 |
+
|
71 |
+
return text.replace(token, '').strip()
|
utils/translators/en_ru/en_ru.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from transformers import FSMTForConditionalGeneration, FSMTTokenizer
|
3 |
+
mname = "facebook/wmt19-en-ru"
|
4 |
+
tokenizer = FSMTTokenizer.from_pretrained(mname)
|
5 |
+
model = FSMTForConditionalGeneration.from_pretrained(mname)
|
6 |
+
|
7 |
+
|
8 |
+
def translator_en_ru(text):
|
9 |
+
|
10 |
+
input_ids = tokenizer.encode(text, return_tensors="pt")
|
11 |
+
outputs = model.generate(input_ids)
|
12 |
+
decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
13 |
+
|
14 |
+
return decoded
|
utils/translators/en_ru/ru_en.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from transformers import FSMTForConditionalGeneration, FSMTTokenizer
|
3 |
+
mname = "facebook/wmt19-ru-en"
|
4 |
+
tokenizer = FSMTTokenizer.from_pretrained(mname)
|
5 |
+
model = FSMTForConditionalGeneration.from_pretrained(mname)
|
6 |
+
|
7 |
+
|
8 |
+
def translate_ru_en(text):
|
9 |
+
input_ids = tokenizer.encode(text, return_tensors="pt")
|
10 |
+
outputs = model.generate(input_ids)
|
11 |
+
decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
12 |
+
|
13 |
+
return decoded
|
utils/translators/translate_input.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from utils.translators.en_ru.ru_en import translate_ru_en
|
3 |
+
|
4 |
+
translators = {"ru": translate_ru_en}
|
5 |
+
|
6 |
+
|
7 |
+
def translate_input(inputs, lang="ru"):
|
8 |
+
translator = translators[lang]
|
9 |
+
|
10 |
+
text = translator(inputs)
|
11 |
+
return text
|
utils/translators/translate_recepie.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from utils.translators.en_ru.en_ru import translator_en_ru
|
3 |
+
|
4 |
+
translators = {"ru": translator_en_ru}
|
5 |
+
|
6 |
+
|
7 |
+
def translate_recepie(recepie, lang="en"):
|
8 |
+
|
9 |
+
translator = translators[lang]
|
10 |
+
|
11 |
+
data = {"title": "", "ingredients": [], "directions": []}
|
12 |
+
|
13 |
+
data["title"] = translator(recepie["title"])
|
14 |
+
|
15 |
+
directions = "|".join(recepie["directions"])
|
16 |
+
data["directions"] = [x.strip() for x in translator(directions).split(',')]
|
17 |
+
|
18 |
+
ingredients = "/".join(recepie["ingredients"])
|
19 |
+
data["ingredients"] = [x.strip()
|
20 |
+
for x in translator(ingredients).split('/')]
|
21 |
+
|
22 |
+
return data
|