thesven commited on
Commit
29655fa
1 Parent(s): 04e30d7
Files changed (1) hide show
  1. app.py +18 -9
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  import spaces
3
  import torch
4
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 
5
 
6
  model_to_use = "thesven/Llama3-8B-SFT-code_bagel-bnb-4bit"
7
 
@@ -10,7 +11,7 @@ tokenizer = None
10
  model = None
11
 
12
  @spaces.GPU
13
- def start():
14
  global tokenizer, model
15
  model_name_or_path = model_to_use
16
 
@@ -35,7 +36,7 @@ def start():
35
  def send_message(message, history):
36
  global tokenizer, model
37
  if tokenizer is None or model is None:
38
- start() # Ensure the model and tokenizer are initialized
39
 
40
  # Add the user's message to the history
41
  history.append(("User", message))
@@ -49,20 +50,28 @@ def send_message(message, history):
49
  # Add the model's response to the history
50
  history.append(("Bot", generated_text))
51
 
52
- return history, history
 
 
 
 
53
 
54
  with gr.Blocks() as demo:
55
  gr.Markdown("# Chat with the Model")
56
 
57
- start_button = gr.Button("Start Model")
58
- status_text = gr.Textbox(label="Status")
59
-
60
- start_button.click(start, inputs=None, outputs=status_text)
61
 
62
  chatbot = gr.Chatbot()
63
  message = gr.Textbox(label="Your Message")
64
- send_button = gr.Button("Send")
65
 
66
- send_button.click(send_message, inputs=[message, chatbot], outputs=[chatbot, chatbot])
 
 
 
 
 
67
 
 
 
68
  demo.launch()
 
2
  import spaces
3
  import torch
4
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
5
+ import threading
6
 
7
  model_to_use = "thesven/Llama3-8B-SFT-code_bagel-bnb-4bit"
8
 
 
11
  model = None
12
 
13
  @spaces.GPU
14
+ def load_model():
15
  global tokenizer, model
16
  model_name_or_path = model_to_use
17
 
 
36
  def send_message(message, history):
37
  global tokenizer, model
38
  if tokenizer is None or model is None:
39
+ return history # Return the existing history if the model is not loaded
40
 
41
  # Add the user's message to the history
42
  history.append(("User", message))
 
50
  # Add the model's response to the history
51
  history.append(("Bot", generated_text))
52
 
53
+ return history
54
+
55
+ def initialize():
56
+ # Function to run the model loading in a separate thread
57
+ threading.Thread(target=load_model).start()
58
 
59
  with gr.Blocks() as demo:
60
  gr.Markdown("# Chat with the Model")
61
 
62
+ status_text = gr.Textbox(label="Status", value="Loading model, please wait...")
63
+ send_button = gr.Button("Send", interactive=False) # Disable the send button initially
 
 
64
 
65
  chatbot = gr.Chatbot()
66
  message = gr.Textbox(label="Your Message")
 
67
 
68
+ def enable_send_button():
69
+ send_button.interactive = True
70
+ status_text.value = "Model loaded and ready!"
71
+
72
+ demo.load(_js="initialize(); enable_send_button();")
73
+ send_button.click(send_message, inputs=[message, chatbot], outputs=chatbot)
74
 
75
+ initialize() # Start model initialization on app load
76
+
77
  demo.launch()