Spaces:

5to9
/

bot-royale

Sleeping

App Files Files Community

5to9 commited on Sep 27

Commit

5d9e699

•

1 Parent(s): ecf24e8

0.26 loading models on start

Browse files

Files changed (1) hide show

app.py +25 -41

app.py CHANGED Viewed

@@ -24,11 +24,33 @@ models_available = [
     "mistralai/Mistral-7B-Instruct-v0.3",
 ]
-#device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 device = "cuda"
-tokenizer_a, model_a = None, None
-tokenizer_b, model_b = None, None
 def apply_chat_template(messages, add_generation_prompt=False):
     """
@@ -54,39 +76,6 @@ def apply_chat_template(messages, add_generation_prompt=False):
     return pharia_template
-@spaces.GPU()
-def load_model_a(model_id):
-    global tokenizer_a, model_a, model_id_a
-    try:
-        model_id_a = model_id # need to access model_id with tokenizer
-        tokenizer_a = AutoTokenizer.from_pretrained(model_id)
-        model_a = AutoModelForCausalLM.from_pretrained(
-            model_id,
-            torch_dtype=torch.float16,
-            device_map="auto",
-            trust_remote_code=True,
-        )
-        model_a.tie_weights()
-    except Exception as e:
-        logging.error(f'{SPACER} Error: {e}, Traceback {traceback.format_exc()}')
-    return gr.update(label=model_id)
-@spaces.GPU()
-def load_model_b(model_id):
-    global tokenizer_b, model_b, model_id_b
-    try:
-        model_id_b = model_id
-        tokenizer_b = AutoTokenizer.from_pretrained(model_id)
-        model_b = AutoModelForCausalLM.from_pretrained(
-            model_id,
-            torch_dtype=torch.float16,
-            device_map="auto",
-            trust_remote_code=True,
-        )
-        model_b.tie_weights()
-    except Exception as e:
-        logging.error(f'{SPACER} Error: {e}, Traceback {traceback.format_exc()}')
-    return gr.update(label=model_id)
 @spaces.GPU()
 def generate_both(system_prompt, input_text, chatbot_a, chatbot_b, max_new_tokens=2048, temperature=0.2, top_p=0.9, repetition_penalty=1.1):
@@ -208,10 +197,8 @@ with gr.Blocks() as demo:
         system_prompt = gr.Textbox(lines=1, label="System Prompt", value="You are a helpful chatbot. Write a Nike style ad headline about the shame of being second best", show_copy_button=True)
         with gr.Row(variant="panel"):
             with gr.Column():
-                model_dropdown_a = gr.Dropdown(label="Model A", choices=models_available, value=None)
                 chatbot_a = gr.Chatbot(label="Model A", rtl=True, likeable=True, show_copy_button=True, height=500)
             with gr.Column():
-                model_dropdown_b = gr.Dropdown(label="Model B", choices=models_available, value=None)
                 chatbot_b = gr.Chatbot(label="Model B", rtl=True, likeable=True, show_copy_button=True, height=500)
         with gr.Row(variant="panel"):
             with gr.Column(scale=1):
@@ -224,9 +211,6 @@ with gr.Blocks() as demo:
             top_p = gr.Slider(minimum=0.0, maximum=1.0, value=1.0, label="Top-p", step=0.01)
             repetition_penalty = gr.Slider(minimum=0.1, maximum=2.0, value=1.1, label="Repetition Penalty", step=0.1)
-    model_dropdown_a.change(load_model_a, inputs=[model_dropdown_a], outputs=[chatbot_a])
-    model_dropdown_b.change(load_model_b, inputs=[model_dropdown_b], outputs=[chatbot_b])
     input_text.submit(generate_both, inputs=[system_prompt, input_text, chatbot_a, chatbot_b, max_new_tokens, temperature, top_p, repetition_penalty], outputs=[chatbot_a, chatbot_b])
     submit_btn.click(generate_both, inputs=[system_prompt, input_text, chatbot_a, chatbot_b, max_new_tokens, temperature, top_p, repetition_penalty], outputs=[chatbot_a, chatbot_b])
     clear_btn.click(clear, outputs=[chatbot_a, chatbot_b])

     "mistralai/Mistral-7B-Instruct-v0.3",
 ]
+model_a_info = {"id": "NousResearch/Meta-Llama-3.1-8B-Instruct",
+                    "name": "Meta Llama 3.1 8B Instruct"}
+model_b_info = {"id": "mistralai/Mistral-7B-Instruct-v0.3",
+                    "name": "Mistral 7B Instruct v0.3"}
 device = "cuda"
+try:
+    tokenizer_a = AutoTokenizer.from_pretrained(model_a_info['id'])
+    model_a = AutoModelForCausalLM.from_pretrained(
+        model_a_info['id'],
+        torch_dtype=torch.float16,
+        device_map="auto",
+        trust_remote_code=True,
+    )
+    #model_a.tie_weights()
+    tokenizer_b = AutoTokenizer.from_pretrained(model_b_info['id'])
+    model_b = AutoModelForCausalLM.from_pretrained(
+        model_b_info['id'],
+        torch_dtype=torch.float16,
+        device_map="auto",
+        trust_remote_code=True,
+    )
+    model_b.tie_weights()
+except Exception as e:
+    logging.error(f'{SPACER} Error: {e}, Traceback {traceback.format_exc()}')
 def apply_chat_template(messages, add_generation_prompt=False):
     """
     return pharia_template
 @spaces.GPU()
 def generate_both(system_prompt, input_text, chatbot_a, chatbot_b, max_new_tokens=2048, temperature=0.2, top_p=0.9, repetition_penalty=1.1):
         system_prompt = gr.Textbox(lines=1, label="System Prompt", value="You are a helpful chatbot. Write a Nike style ad headline about the shame of being second best", show_copy_button=True)
         with gr.Row(variant="panel"):
             with gr.Column():
                 chatbot_a = gr.Chatbot(label="Model A", rtl=True, likeable=True, show_copy_button=True, height=500)
             with gr.Column():
                 chatbot_b = gr.Chatbot(label="Model B", rtl=True, likeable=True, show_copy_button=True, height=500)
         with gr.Row(variant="panel"):
             with gr.Column(scale=1):
             top_p = gr.Slider(minimum=0.0, maximum=1.0, value=1.0, label="Top-p", step=0.01)
             repetition_penalty = gr.Slider(minimum=0.1, maximum=2.0, value=1.1, label="Repetition Penalty", step=0.1)
     input_text.submit(generate_both, inputs=[system_prompt, input_text, chatbot_a, chatbot_b, max_new_tokens, temperature, top_p, repetition_penalty], outputs=[chatbot_a, chatbot_b])
     submit_btn.click(generate_both, inputs=[system_prompt, input_text, chatbot_a, chatbot_b, max_new_tokens, temperature, top_p, repetition_penalty], outputs=[chatbot_a, chatbot_b])
     clear_btn.click(clear, outputs=[chatbot_a, chatbot_b])