Spaces:
Sleeping
Sleeping
VishalMysore
commited on
Commit
•
6699094
1
Parent(s):
b501ded
Create cook.py
Browse files
cook.py
ADDED
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from urllib.parse import quote
|
3 |
+
import re
|
4 |
+
import torch
|
5 |
+
from transformers import pipeline,AutoTokenizer, AutoModelForSeq2SeqLM
|
6 |
+
print("initializing")
|
7 |
+
pipe = pipeline("text-generation", model="VishalMysore/cookgptlama", torch_dtype=torch.bfloat16)
|
8 |
+
nllbtokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-1.3B")
|
9 |
+
nllbmodel = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-1.3B")
|
10 |
+
print("initializing done")
|
11 |
+
|
12 |
+
def read_html_file(file_path):
|
13 |
+
try:
|
14 |
+
with open(file_path, 'r', encoding='utf-8') as file:
|
15 |
+
html_content = file.read()
|
16 |
+
html_content = html_content.encode('ascii', 'ignore').decode('ascii')
|
17 |
+
html_content= html_content.replace("\n","")
|
18 |
+
html_content=re.sub( ">\s+<", "><" , html_content)
|
19 |
+
return html_content
|
20 |
+
except FileNotFoundError:
|
21 |
+
print(f"File not found: {file_path}")
|
22 |
+
return None
|
23 |
+
except Exception as e:
|
24 |
+
print(f"An error occurred: {str(e)}")
|
25 |
+
return None
|
26 |
+
def remove_text_after_marker(input_text, marker):
|
27 |
+
# Find the index of the marker
|
28 |
+
index = input_text.find(marker)
|
29 |
+
|
30 |
+
# Check if the marker was found
|
31 |
+
if index != -1:
|
32 |
+
# Extract the text before the marker
|
33 |
+
output_text = input_text[:index]
|
34 |
+
else:
|
35 |
+
# If the marker was not found, return the original input
|
36 |
+
output_text = input_text
|
37 |
+
|
38 |
+
return output_text
|
39 |
+
def remove_text_after(input_text, substring):
|
40 |
+
# Find the index of the substring
|
41 |
+
index = input_text.find(substring)
|
42 |
+
|
43 |
+
# Check if the substring was found
|
44 |
+
if index != -1:
|
45 |
+
# Extract the text after the substring
|
46 |
+
output_text = input_text[index + len(substring):]
|
47 |
+
else:
|
48 |
+
# If the substring was not found, return the original input
|
49 |
+
output_text = input_text
|
50 |
+
|
51 |
+
return output_text
|
52 |
+
def remove_text(input_text):
|
53 |
+
# Specify the text portion to be removed
|
54 |
+
text_to_remove = "\nYou are my personal chef experienced in Indian spicy food</s>\n\nprovide me recipe of paneer bhurji with cook time </s>\n\nIngredients:"
|
55 |
+
|
56 |
+
# Find the starting index of the text portion to be removed
|
57 |
+
start_index = input_text.find(text_to_remove)
|
58 |
+
|
59 |
+
# Check if the text portion was found
|
60 |
+
if start_index != -1:
|
61 |
+
# Remove the text portion from the input
|
62 |
+
output_text = input_text[:start_index] + input_text[start_index + len(text_to_remove):]
|
63 |
+
else:
|
64 |
+
# If the text portion was not found, return the original input
|
65 |
+
output_text = input_text
|
66 |
+
|
67 |
+
return output_text
|
68 |
+
def translate(tgt_lang_id,text):
|
69 |
+
print('translating')
|
70 |
+
#Get the code of your target langauge. After getting the language code; get the id
|
71 |
+
tgt_lang_id = nllbtokenizer.lang_code_to_id[tgt_lang_id]
|
72 |
+
#tokenize your input
|
73 |
+
model_inputs = nllbtokenizer(text, return_tensors='pt', padding='longest')
|
74 |
+
#generate output
|
75 |
+
gen_tokens = nllbmodel.generate(**model_inputs , forced_bos_token_id=tgt_lang_id)
|
76 |
+
#decode output — convert to text
|
77 |
+
translated_text = nllbtokenizer.batch_decode(gen_tokens, skip_special_tokens=True)
|
78 |
+
print('translating complete')
|
79 |
+
#print
|
80 |
+
print(translated_text)
|
81 |
+
return translated_text
|
82 |
+
def askllmpipe(text):
|
83 |
+
print(text)
|
84 |
+
messages = [{"role": "system","content": "You are my personal chef experienced in Indian spicy food",}, {"role": "user", "content": text},]
|
85 |
+
prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
|
86 |
+
outputs = pipe(prompt, max_new_tokens=500, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
|
87 |
+
print('got the result from llm')
|
88 |
+
return_text=outputs[0]["generated_text"]
|
89 |
+
print('trimming')
|
90 |
+
return_text= remove_text_after(return_text,"<|assistant|>")
|
91 |
+
print('trimming part 2')
|
92 |
+
return_text= remove_text_after_marker(return_text,"###")
|
93 |
+
print('trimming part 2 done')
|
94 |
+
return return_text
|
95 |
+
def answer(text):
|
96 |
+
return "I want to eat raw salad with lots of onion"
|
97 |
+
|
98 |
+
def callchain(recipe , dropdown_input):
|
99 |
+
language_mapping = {"English": "eng_Latn", "Hindi": "hin_Deva", "Tamil": "tam_Taml","Gujarati":"guj_Gujr","Telugu":"tel_Telu"}
|
100 |
+
# Get the mapped value for the selected language
|
101 |
+
selected_language = language_mapping.get(dropdown_input, "")
|
102 |
+
output_text = askllmpipe(recipe)
|
103 |
+
# If the selected language is not English, call the translation method
|
104 |
+
if selected_language != "eng_Latn": # Check if language is not English
|
105 |
+
output_text = translate(selected_language, output_text)
|
106 |
+
return output_text
|
107 |
+
|
108 |
+
html_content = read_html_file("cookgpt.html")
|
109 |
+
descriptionFindRecipe="## Welcome to CookGPT\n CookGPT is an innovative AI-based chef that combines the charm of traditional cooking with the efficiency of modern technology. Whether you're a culinary enthusiast, a busy professional, or someone looking for culinary inspiration, CookGPT is designed to make your cooking experience delightful, personalized, and effortless."
|
110 |
+
descriptionAddRecipe="## what your favorite? \n Users will have the capability to contribute their custom recipes by either adding them manually or uploading video content. These recipes will be subject to upvoting by other users, and those receiving positive feedback will be scheduled for fine-tuning our Language Model (LLM). This collaborative approach allows the community to actively participate in refining and enhancing the model based on the most appreciated and valued recipes."
|
111 |
+
descriptionFineTune="## Miss something \n Finetuning Parameters Customization, we will keep rebuilding the based model untill all the recipes are covered"
|
112 |
+
with gr.Blocks() as demo:
|
113 |
+
with gr.Tab("Find Recipe"):
|
114 |
+
gr.Markdown(descriptionFindRecipe)
|
115 |
+
findRecipeText = gr.Textbox(label="Recipe Query",info="What do you want to eat today!")
|
116 |
+
language =gr.Dropdown(
|
117 |
+
["English", "Hindi", "Tamil", "Telugu", "Gujarati"], label="Languge", value="Hindi",info="Select your desired Language!"
|
118 |
+
)
|
119 |
+
findbtn = gr.Button(value="Find Recipe")
|
120 |
+
findRecipeTextOutPut = gr.Textbox(label="Recipe Details")
|
121 |
+
findbtn.click(callchain, inputs=[findRecipeText],outputs=[findRecipeTextOutPut])
|
122 |
+
examples = gr.Examples(examples=[["Provide me Recipe for Paneer Butter Masala.","Tamil"], ["Provide me a Recipe for Aloo Kofta.","Hindi"]],
|
123 |
+
inputs=[findRecipeText,language])
|
124 |
+
gr.HTML(html_content)
|
125 |
+
|
126 |
+
with gr.Tab("Add Recipe"):
|
127 |
+
gr.Markdown(descriptionAddRecipe)
|
128 |
+
addRecipe = gr.Textbox(label="Add Your Recipe",info="Favorite dish made by your Grandma!")
|
129 |
+
formatedRecipe = gr.Textbox(label="Formated Recipe",info="Format the Recipe!")
|
130 |
+
btn = gr.Button(value="Format Recipe")
|
131 |
+
btn.click(answer, inputs=[addRecipe],outputs=[formatedRecipe])
|
132 |
+
gr.HTML(html_content)
|
133 |
+
|
134 |
+
with gr.Tab("Fine Tune The model"):
|
135 |
+
gr.Markdown(descriptionFineTune)
|
136 |
+
btn = gr.Button(value="View Finetuning Dataset")
|
137 |
+
gr.HTML(html_content)
|
138 |
+
|
139 |
+
demo.launch()
|