MohamedRashad commited on
Commit
2f1457b
1 Parent(s): f5e1c16

Refactor model ID handling in app.py and update requirements.txt

Browse files
Files changed (1) hide show
  1. app.py +9 -7
app.py CHANGED
@@ -8,13 +8,13 @@ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENT
8
 
9
  models_available = [
10
  "MohamedRashad/Arabic-Orpo-Llama-3-8B-Instruct",
11
- "silma-ai/SILMA-9B-Instruct-v0.1.1",
12
  "inceptionai/jais-adapted-7b-chat",
13
  # "inceptionai/jais-adapted-13b-chat",
14
  "inceptionai/jais-family-6p7b-chat",
15
  # "inceptionai/jais-family-13b-chat",
16
  "NousResearch/Meta-Llama-3.1-8B-Instruct",
17
- "unsloth/gemma-2-9b-it",
18
  "NousResearch/Meta-Llama-3-8B-Instruct",
19
  ]
20
 
@@ -23,6 +23,7 @@ tokenizer_b, model_b = None, None
23
 
24
  def load_model_a(model_id):
25
  global tokenizer_a, model_a
 
26
  tokenizer_a = AutoTokenizer.from_pretrained(model_id)
27
  print(f"model A: {tokenizer_a.eos_token}")
28
  try:
@@ -45,6 +46,7 @@ def load_model_a(model_id):
45
 
46
  def load_model_b(model_id):
47
  global tokenizer_b, model_b
 
48
  tokenizer_b = AutoTokenizer.from_pretrained(model_id)
49
  print(f"model B: {tokenizer_b.eos_token}")
50
  try:
@@ -103,7 +105,8 @@ def generate_both(system_prompt, input_text, chatbot_a, chatbot_b, max_new_token
103
  streamer=text_streamer_a,
104
  max_new_tokens=max_new_tokens,
105
  pad_token_id=tokenizer_a.eos_token_id,
106
- do_sample=True if temperature > 0 else False,
 
107
  temperature=temperature,
108
  top_p=top_p,
109
  repetition_penalty=repetition_penalty,
@@ -113,7 +116,7 @@ def generate_both(system_prompt, input_text, chatbot_a, chatbot_b, max_new_token
113
  streamer=text_streamer_b,
114
  max_new_tokens=max_new_tokens,
115
  pad_token_id=tokenizer_b.eos_token_id,
116
- do_sample=True if temperature > 0 else False,
117
  temperature=temperature,
118
  top_p=top_p,
119
  repetition_penalty=repetition_penalty,
@@ -162,7 +165,6 @@ def clear():
162
  return [], []
163
 
164
  arena_notes = """Important Notes:
165
- - `gemma-2` model doesn't have system prompt, so it's make the system prompt field empty for the model to work.
166
  - Sometimes an error may occur when generating the response, in this case, please try again.
167
  """
168
 
@@ -184,8 +186,8 @@ with gr.Blocks(title="Arabic-ORPO-Llama3") as demo:
184
  input_text = gr.Textbox(lines=1, label="", value="مرحبا", rtl=True, text_align="right", scale=3, show_copy_button=True)
185
  with gr.Accordion(label="Generation Configurations", open=False):
186
  max_new_tokens = gr.Slider(minimum=128, maximum=4096, value=2048, label="Max New Tokens", step=128)
187
- temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, label="Temperature", step=0.01)
188
- top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.9, label="Top-p", step=0.01)
189
  repetition_penalty = gr.Slider(minimum=0.1, maximum=2.0, value=1.1, label="Repetition Penalty", step=0.1)
190
 
191
  model_dropdown_a.change(load_model_a, inputs=[model_dropdown_a], outputs=[chatbot_a])
 
8
 
9
  models_available = [
10
  "MohamedRashad/Arabic-Orpo-Llama-3-8B-Instruct",
11
+ "silma-ai/SILMA-9B-Instruct-v1.0",
12
  "inceptionai/jais-adapted-7b-chat",
13
  # "inceptionai/jais-adapted-13b-chat",
14
  "inceptionai/jais-family-6p7b-chat",
15
  # "inceptionai/jais-family-13b-chat",
16
  "NousResearch/Meta-Llama-3.1-8B-Instruct",
17
+ # "unsloth/gemma-2-9b-it",
18
  "NousResearch/Meta-Llama-3-8B-Instruct",
19
  ]
20
 
 
23
 
24
  def load_model_a(model_id):
25
  global tokenizer_a, model_a
26
+ del tokenizer_a, model_a
27
  tokenizer_a = AutoTokenizer.from_pretrained(model_id)
28
  print(f"model A: {tokenizer_a.eos_token}")
29
  try:
 
46
 
47
  def load_model_b(model_id):
48
  global tokenizer_b, model_b
49
+ del tokenizer_b, model_b
50
  tokenizer_b = AutoTokenizer.from_pretrained(model_id)
51
  print(f"model B: {tokenizer_b.eos_token}")
52
  try:
 
105
  streamer=text_streamer_a,
106
  max_new_tokens=max_new_tokens,
107
  pad_token_id=tokenizer_a.eos_token_id,
108
+ do_sample=False,
109
+ # do_sample=True if temperature > 0 else False,
110
  temperature=temperature,
111
  top_p=top_p,
112
  repetition_penalty=repetition_penalty,
 
116
  streamer=text_streamer_b,
117
  max_new_tokens=max_new_tokens,
118
  pad_token_id=tokenizer_b.eos_token_id,
119
+ do_sample=False,
120
  temperature=temperature,
121
  top_p=top_p,
122
  repetition_penalty=repetition_penalty,
 
165
  return [], []
166
 
167
  arena_notes = """Important Notes:
 
168
  - Sometimes an error may occur when generating the response, in this case, please try again.
169
  """
170
 
 
186
  input_text = gr.Textbox(lines=1, label="", value="مرحبا", rtl=True, text_align="right", scale=3, show_copy_button=True)
187
  with gr.Accordion(label="Generation Configurations", open=False):
188
  max_new_tokens = gr.Slider(minimum=128, maximum=4096, value=2048, label="Max New Tokens", step=128)
189
+ temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, label="Temperature", step=0.01)
190
+ top_p = gr.Slider(minimum=0.0, maximum=1.0, value=1.0, label="Top-p", step=0.01)
191
  repetition_penalty = gr.Slider(minimum=0.1, maximum=2.0, value=1.1, label="Repetition Penalty", step=0.1)
192
 
193
  model_dropdown_a.change(load_model_a, inputs=[model_dropdown_a], outputs=[chatbot_a])