from transformers import pipeline, set_seed from transformers import AutoTokenizer import re from utils import ext from utils.ext import pure_comma_separation from decouple import config import os from utils.api import generate_cook_image from utils.translators.translate_recepie import translate_recepie from utils.translators.translate_input import translate_input os.environ['TRANSFORMERS_CACHE'] = './cache' model_name_or_path = "flax-community/t5-recipe-generation" task = "text2text-generation" tokenizer = AutoTokenizer.from_pretrained( model_name_or_path) generator = pipeline(task, model=model_name_or_path, tokenizer=model_name_or_path) prefix = "items: " chef_top = { "max_length": 512, "min_length": 64, "no_repeat_ngram_size": 3, "do_sample": True, "top_k": 60, "top_p": 0.95, "num_return_sequences": 1, "return_tensors": True, "return_text": False } chef_beam = { "max_length": 512, "min_length": 64, "no_repeat_ngram_size": 3, "early_stopping": True, "num_beams": 5, "length_penalty": 1.5, "num_return_sequences": 1 } generation_kwargs = { "max_length": 512, "min_length": 64, "no_repeat_ngram_size": 3, "do_sample": True, "top_k": 60, "top_p": 0.95 } def load_api(): api_key = config("API_KEY") api_id = config("API_ID") return {"KEY": api_key, "ID": api_id} def skip_special_tokens_and_prettify(text): data = {"title": "", "ingredients": [], "directions": []} text = text + '$' pattern = r"(\w+:)(.+?(?=\w+:|\$))" for match in re.findall(pattern, text): if match[0] == 'title:': data["title"] = match[1] elif match[0] == 'ingredients:': data["ingredients"] = [ing.strip() for ing in match[1].split(',')] elif match[0] == 'directions:': data["directions"] = [d.strip() for d in match[1].split('.')] else: pass data["ingredients"] = ext.ingredients( data["ingredients"]) data["directions"] = ext.directions(data["directions"]) data["title"] = ext.title(data["title"]) return data def generation_function(texts, lang="en", chef="top"): langs = ['ru', 'en'] chefs = {'top': chef_top, 'beam': chef_beam} api_credentials = load_api() if lang != "en" and lang in langs: texts = translate_input(texts, lang) output_ids = generator( texts, ** chefs[chef] )[0]["generated_token_ids"] recepie = tokenizer.decode(output_ids, skip_special_tokens=False) generated_recipe = skip_special_tokens_and_prettify(recepie) original_title = generated_recipe["title"] if lang != "en" and lang in langs: generated_recipe = translate_recepie(generated_recipe, lang) cook_image = generate_cook_image( original_title, app_id=api_credentials['ID'], app_key=api_credentials['KEY']) generated_recipe["image"] = cook_image return generated_recipe items = [ "macaroni, butter, salt, bacon, milk, flour, pepper, cream corn", "provolone cheese, bacon, bread, ginger" ]