Spaces:

thesven
/

Llama3-8B-SFT-code_bagel-bnb-4bit

Runtime error

thesven commited on May 25, 2024

Commit

29655fa

1 Parent(s): 04e30d7

update

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import gradio as gr
 import spaces
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 model_to_use = "thesven/Llama3-8B-SFT-code_bagel-bnb-4bit"
@@ -10,7 +11,7 @@ tokenizer = None
 model = None
 @spaces.GPU
-def start():
     global tokenizer, model
     model_name_or_path = model_to_use
@@ -35,7 +36,7 @@ def start():
 def send_message(message, history):
     global tokenizer, model
     if tokenizer is None or model is None:
-        start()  # Ensure the model and tokenizer are initialized
     # Add the user's message to the history
     history.append(("User", message))
@@ -49,20 +50,28 @@ def send_message(message, history):
     # Add the model's response to the history
     history.append(("Bot", generated_text))
-    return history, history
 with gr.Blocks() as demo:
     gr.Markdown("# Chat with the Model")
-    start_button = gr.Button("Start Model")
-    status_text = gr.Textbox(label="Status")
-    start_button.click(start, inputs=None, outputs=status_text)
     chatbot = gr.Chatbot()
     message = gr.Textbox(label="Your Message")
-    send_button = gr.Button("Send")
-    send_button.click(send_message, inputs=[message, chatbot], outputs=[chatbot, chatbot])
 demo.launch()

 import spaces
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+import threading
 model_to_use = "thesven/Llama3-8B-SFT-code_bagel-bnb-4bit"
 model = None
 @spaces.GPU
+def load_model():
     global tokenizer, model
     model_name_or_path = model_to_use
 def send_message(message, history):
     global tokenizer, model
     if tokenizer is None or model is None:
+        return history  # Return the existing history if the model is not loaded
     # Add the user's message to the history
     history.append(("User", message))
     # Add the model's response to the history
     history.append(("Bot", generated_text))
+    return history
+def initialize():
+    # Function to run the model loading in a separate thread
+    threading.Thread(target=load_model).start()
 with gr.Blocks() as demo:
     gr.Markdown("# Chat with the Model")
+    status_text = gr.Textbox(label="Status", value="Loading model, please wait...")
+    send_button = gr.Button("Send", interactive=False)  # Disable the send button initially
     chatbot = gr.Chatbot()
     message = gr.Textbox(label="Your Message")
+    def enable_send_button():
+        send_button.interactive = True
+        status_text.value = "Model loaded and ready!"
+    demo.load(_js="initialize(); enable_send_button();")
+    send_button.click(send_message, inputs=[message, chatbot], outputs=chatbot)
+initialize()  # Start model initialization on app load
 demo.launch()