5to9 commited on
Commit
5d9e699
1 Parent(s): ecf24e8

0.26 loading models on start

Browse files
Files changed (1) hide show
  1. app.py +25 -41
app.py CHANGED
@@ -24,11 +24,33 @@ models_available = [
24
  "mistralai/Mistral-7B-Instruct-v0.3",
25
  ]
26
 
27
- #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
 
 
 
28
  device = "cuda"
29
 
30
- tokenizer_a, model_a = None, None
31
- tokenizer_b, model_b = None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  def apply_chat_template(messages, add_generation_prompt=False):
34
  """
@@ -54,39 +76,6 @@ def apply_chat_template(messages, add_generation_prompt=False):
54
 
55
  return pharia_template
56
 
57
- @spaces.GPU()
58
- def load_model_a(model_id):
59
- global tokenizer_a, model_a, model_id_a
60
- try:
61
- model_id_a = model_id # need to access model_id with tokenizer
62
- tokenizer_a = AutoTokenizer.from_pretrained(model_id)
63
- model_a = AutoModelForCausalLM.from_pretrained(
64
- model_id,
65
- torch_dtype=torch.float16,
66
- device_map="auto",
67
- trust_remote_code=True,
68
- )
69
- model_a.tie_weights()
70
- except Exception as e:
71
- logging.error(f'{SPACER} Error: {e}, Traceback {traceback.format_exc()}')
72
- return gr.update(label=model_id)
73
-
74
- @spaces.GPU()
75
- def load_model_b(model_id):
76
- global tokenizer_b, model_b, model_id_b
77
- try:
78
- model_id_b = model_id
79
- tokenizer_b = AutoTokenizer.from_pretrained(model_id)
80
- model_b = AutoModelForCausalLM.from_pretrained(
81
- model_id,
82
- torch_dtype=torch.float16,
83
- device_map="auto",
84
- trust_remote_code=True,
85
- )
86
- model_b.tie_weights()
87
- except Exception as e:
88
- logging.error(f'{SPACER} Error: {e}, Traceback {traceback.format_exc()}')
89
- return gr.update(label=model_id)
90
 
91
  @spaces.GPU()
92
  def generate_both(system_prompt, input_text, chatbot_a, chatbot_b, max_new_tokens=2048, temperature=0.2, top_p=0.9, repetition_penalty=1.1):
@@ -208,10 +197,8 @@ with gr.Blocks() as demo:
208
  system_prompt = gr.Textbox(lines=1, label="System Prompt", value="You are a helpful chatbot. Write a Nike style ad headline about the shame of being second best", show_copy_button=True)
209
  with gr.Row(variant="panel"):
210
  with gr.Column():
211
- model_dropdown_a = gr.Dropdown(label="Model A", choices=models_available, value=None)
212
  chatbot_a = gr.Chatbot(label="Model A", rtl=True, likeable=True, show_copy_button=True, height=500)
213
  with gr.Column():
214
- model_dropdown_b = gr.Dropdown(label="Model B", choices=models_available, value=None)
215
  chatbot_b = gr.Chatbot(label="Model B", rtl=True, likeable=True, show_copy_button=True, height=500)
216
  with gr.Row(variant="panel"):
217
  with gr.Column(scale=1):
@@ -224,9 +211,6 @@ with gr.Blocks() as demo:
224
  top_p = gr.Slider(minimum=0.0, maximum=1.0, value=1.0, label="Top-p", step=0.01)
225
  repetition_penalty = gr.Slider(minimum=0.1, maximum=2.0, value=1.1, label="Repetition Penalty", step=0.1)
226
 
227
- model_dropdown_a.change(load_model_a, inputs=[model_dropdown_a], outputs=[chatbot_a])
228
- model_dropdown_b.change(load_model_b, inputs=[model_dropdown_b], outputs=[chatbot_b])
229
-
230
  input_text.submit(generate_both, inputs=[system_prompt, input_text, chatbot_a, chatbot_b, max_new_tokens, temperature, top_p, repetition_penalty], outputs=[chatbot_a, chatbot_b])
231
  submit_btn.click(generate_both, inputs=[system_prompt, input_text, chatbot_a, chatbot_b, max_new_tokens, temperature, top_p, repetition_penalty], outputs=[chatbot_a, chatbot_b])
232
  clear_btn.click(clear, outputs=[chatbot_a, chatbot_b])
 
24
  "mistralai/Mistral-7B-Instruct-v0.3",
25
  ]
26
 
27
+ model_a_info = {"id": "NousResearch/Meta-Llama-3.1-8B-Instruct",
28
+ "name": "Meta Llama 3.1 8B Instruct"}
29
+ model_b_info = {"id": "mistralai/Mistral-7B-Instruct-v0.3",
30
+ "name": "Mistral 7B Instruct v0.3"}
31
+
32
  device = "cuda"
33
 
34
+ try:
35
+ tokenizer_a = AutoTokenizer.from_pretrained(model_a_info['id'])
36
+ model_a = AutoModelForCausalLM.from_pretrained(
37
+ model_a_info['id'],
38
+ torch_dtype=torch.float16,
39
+ device_map="auto",
40
+ trust_remote_code=True,
41
+ )
42
+ #model_a.tie_weights()
43
+ tokenizer_b = AutoTokenizer.from_pretrained(model_b_info['id'])
44
+ model_b = AutoModelForCausalLM.from_pretrained(
45
+ model_b_info['id'],
46
+ torch_dtype=torch.float16,
47
+ device_map="auto",
48
+ trust_remote_code=True,
49
+ )
50
+ model_b.tie_weights()
51
+ except Exception as e:
52
+ logging.error(f'{SPACER} Error: {e}, Traceback {traceback.format_exc()}')
53
+
54
 
55
  def apply_chat_template(messages, add_generation_prompt=False):
56
  """
 
76
 
77
  return pharia_template
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
  @spaces.GPU()
81
  def generate_both(system_prompt, input_text, chatbot_a, chatbot_b, max_new_tokens=2048, temperature=0.2, top_p=0.9, repetition_penalty=1.1):
 
197
  system_prompt = gr.Textbox(lines=1, label="System Prompt", value="You are a helpful chatbot. Write a Nike style ad headline about the shame of being second best", show_copy_button=True)
198
  with gr.Row(variant="panel"):
199
  with gr.Column():
 
200
  chatbot_a = gr.Chatbot(label="Model A", rtl=True, likeable=True, show_copy_button=True, height=500)
201
  with gr.Column():
 
202
  chatbot_b = gr.Chatbot(label="Model B", rtl=True, likeable=True, show_copy_button=True, height=500)
203
  with gr.Row(variant="panel"):
204
  with gr.Column(scale=1):
 
211
  top_p = gr.Slider(minimum=0.0, maximum=1.0, value=1.0, label="Top-p", step=0.01)
212
  repetition_penalty = gr.Slider(minimum=0.1, maximum=2.0, value=1.1, label="Repetition Penalty", step=0.1)
213
 
 
 
 
214
  input_text.submit(generate_both, inputs=[system_prompt, input_text, chatbot_a, chatbot_b, max_new_tokens, temperature, top_p, repetition_penalty], outputs=[chatbot_a, chatbot_b])
215
  submit_btn.click(generate_both, inputs=[system_prompt, input_text, chatbot_a, chatbot_b, max_new_tokens, temperature, top_p, repetition_penalty], outputs=[chatbot_a, chatbot_b])
216
  clear_btn.click(clear, outputs=[chatbot_a, chatbot_b])