NeuraFusionAI commited on
Commit
123ab4d
1 Parent(s): 29a70a8

first commit

Browse files
Files changed (4) hide show
  1. README.md +58 -5
  2. app.py +220 -0
  3. .gitattributes → gitattributes +0 -0
  4. requirements.txt +5 -0
README.md CHANGED
@@ -1,13 +1,66 @@
1
- ---
2
- title: Arabic Evaluation
3
- emoji: 🏃
4
  colorFrom: green
5
- colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 4.42.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+
2
+ title: Arabic Chatbot Arena
3
+ emoji: 🏆🇦🇪
4
  colorFrom: green
5
+ colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 4.28.3
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
+ tags:
12
+ - arabic
13
+ - nlp
14
+ - chatbot
15
+ - language-model
16
+ - comparison
17
+ ---
18
+
19
+ # Arabic Chatbot Arena
20
+
21
+ ## Overview
22
+
23
+ Welcome to the Arabic Chatbot Arena! This interactive space allows you to compare and evaluate different Arabic language models side-by-side. Test various chatbots, analyze their responses, and contribute to the advancement of Arabic NLP.
24
+
25
+ ## Features
26
+
27
+ - **Model Selection**: Choose from a curated list of state-of-the-art Arabic language models.
28
+ - **Side-by-Side Comparison**: Evaluate two models simultaneously for easy comparison.
29
+ - **Customizable Prompts**: Use your own system prompts and user inputs to test specific scenarios.
30
+ - **Real-Time Generation**: Watch as the models generate responses in real-time.
31
+ - **Adjustable Parameters**: Fine-tune generation settings like temperature, top-p, and max tokens.
32
+ - **RTL Support**: Full right-to-left text support for a natural Arabic writing experience.
33
+
34
+ ## How to Use
35
+
36
+ 1. Select two models from the dropdown menus.
37
+ 2. (Optional) Customize the system prompt to set the context.
38
+ 3. Enter your question or prompt in Arabic.
39
+ 4. Click "Generate" to see both models respond.
40
+ 5. Compare the outputs and use the "Like" buttons to rate responses.
41
+ 6. Adjust generation parameters as needed for different results.
42
+
43
+ ## Models
44
+
45
+ This space features a diverse selection of Arabic language models, including:
46
+
47
+ - MohamedRashad/Arabic-Orpo-Llama-3-8B-Instruct
48
+ - silma-ai/SILMA-9B-Instruct-v1.0
49
+ - inceptionai/jais-adapted-7b-chat
50
+ - ... (and many more)
51
+
52
+ ## Contributing
53
+
54
+ Your feedback is valuable! If you encounter any issues or have suggestions for improvement, please open an issue on the [GitHub repository](https://github.com/yourusername/arabic-chatbot-arena).
55
+
56
+ ## License
57
+
58
+ This project is licensed under the Apache 2.0 License. See the [LICENSE](LICENSE) file for details.
59
+
60
+ ## Acknowledgements
61
+
62
+ Special thanks to all the model creators and the Arabic NLP community for their contributions to advancing Arabic language technology.
63
+
64
  ---
65
 
66
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
3
+ import torch
4
+ import gradio as gr
5
+ import os
6
+ from functools import lru_cache
7
+
8
+
9
+ from threading import Thread
10
+ import subprocess
11
+ import logging
12
+ subprocess.run('pip install -U flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
13
+
14
+ models_available = [
15
+ "MohamedRashad/Arabic-Orpo-Llama-3-8B-Instruct",
16
+ "silma-ai/SILMA-9B-Instruct-v1.0",
17
+ "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
18
+ "MaziyarPanahi/calme-2.2-qwen2-72b",
19
+ "davidkim205/Rhea-72b-v0.5",
20
+ "dnhkng/RYS-XLarge",
21
+ "arcee-ai/Arcee-Nova",
22
+ "paloalma/TW3-JRGL-v2",
23
+ "freewheelin/free-evo-qwen72b-v0.8-re",
24
+ "dfurman/Qwen2-72B-Orpo-v0.1",
25
+ "MaziyarPanahi/calme-2.1-qwen2-72b",
26
+ "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3",
27
+ ""
28
+ "inceptionai/jais-adapted-7b-chat",
29
+ "inceptionai/jais-family-6p7b-chat",
30
+ "inceptionai/jais-family-2p7b-chat",
31
+ "inceptionai/jais-family-1p3b-chat",
32
+ "inceptionai/jais-family-590m-chat",
33
+ ]
34
+
35
+ tokenizer_a, model_a = None, None
36
+ tokenizer_b, model_b = None, None
37
+ torch_dtype = torch.bfloat16
38
+ attn_implementation = "flash_attention_2"
39
+
40
+ def load_model_a(model_id):
41
+ global tokenizer_a, model_a
42
+ tokenizer_a = AutoTokenizer.from_pretrained(model_id)
43
+ print(f"model A: {tokenizer_a.eos_token}")
44
+ try:
45
+ model_a = AutoModelForCausalLM.from_pretrained(
46
+ model_id,
47
+ torch_dtype=torch_dtype,
48
+ device_map="auto",
49
+ attn_implementation=attn_implementation,
50
+ trust_remote_code=True,
51
+ ).eval()
52
+ except Exception as e:
53
+ print(f"Using default attention implementation in {model_id}")
54
+ print(f"Error: {e}")
55
+ model_a = AutoModelForCausalLM.from_pretrained(
56
+ model_id,
57
+ torch_dtype=torch_dtype,
58
+ device_map="auto",
59
+ trust_remote_code=True,
60
+ ).eval()
61
+ model_a.tie_weights()
62
+ return gr.update(label=model_id)
63
+
64
+ def load_model_b(model_id):
65
+ global tokenizer_b, model_b
66
+ tokenizer_b = AutoTokenizer.from_pretrained(model_id)
67
+ print(f"model B: {tokenizer_b.eos_token}")
68
+ try:
69
+ model_b = AutoModelForCausalLM.from_pretrained(
70
+ model_id,
71
+ torch_dtype=torch_dtype,
72
+ device_map="auto",
73
+ attn_implementation=attn_implementation,
74
+ trust_remote_code=True,
75
+ ).eval()
76
+ except Exception as e:
77
+ print(f"Error: {e}")
78
+ print(f"Using default attention implementation in {model_id}")
79
+ model_b = AutoModelForCausalLM.from_pretrained(
80
+ model_id,
81
+ torch_dtype=torch_dtype,
82
+ device_map="auto",
83
+ trust_remote_code=True,
84
+ ).eval()
85
+ model_b.tie_weights()
86
+ return gr.update(label=model_id)
87
+
88
+ @spaces.GPU()
89
+ def generate_both(system_prompt, input_text, chatbot_a, chatbot_b, max_new_tokens=2048, temperature=0.2, top_p=0.9, repetition_penalty=1.1):
90
+
91
+ text_streamer_a = TextIteratorStreamer(tokenizer_a, skip_prompt=True)
92
+ text_streamer_b = TextIteratorStreamer(tokenizer_b, skip_prompt=True)
93
+
94
+ system_prompt_list = [{"role": "system", "content": system_prompt}] if system_prompt else []
95
+ input_text_list = [{"role": "user", "content": input_text}]
96
+
97
+ chat_history_a = []
98
+ for user, assistant in chatbot_a:
99
+ chat_history_a.append({"role": "user", "content": user})
100
+ chat_history_a.append({"role": "assistant", "content": assistant})
101
+
102
+ chat_history_b = []
103
+ for user, assistant in chatbot_b:
104
+ chat_history_b.append({"role": "user", "content": user})
105
+ chat_history_b.append({"role": "assistant", "content": assistant})
106
+
107
+ base_messages = system_prompt_list + chat_history_a + input_text_list
108
+ new_messages = system_prompt_list + chat_history_b + input_text_list
109
+
110
+ input_ids_a = tokenizer_a.apply_chat_template(
111
+ base_messages,
112
+ add_generation_prompt=True,
113
+ return_tensors="pt"
114
+ ).to(model_a.device)
115
+
116
+ input_ids_b = tokenizer_b.apply_chat_template(
117
+ new_messages,
118
+ add_generation_prompt=True,
119
+ return_tensors="pt"
120
+ ).to(model_b.device)
121
+
122
+ generation_kwargs_a = dict(
123
+ input_ids=input_ids_a,
124
+ streamer=text_streamer_a,
125
+ max_new_tokens=max_new_tokens,
126
+ pad_token_id=tokenizer_a.eos_token_id,
127
+ do_sample=True,
128
+ temperature=temperature,
129
+ top_p=top_p,
130
+ repetition_penalty=repetition_penalty,
131
+ )
132
+ generation_kwargs_b = dict(
133
+ input_ids=input_ids_b,
134
+ streamer=text_streamer_b,
135
+ max_new_tokens=max_new_tokens,
136
+ pad_token_id=tokenizer_b.eos_token_id,
137
+ do_sample=True,
138
+ temperature=temperature,
139
+ top_p=top_p,
140
+ repetition_penalty=repetition_penalty,
141
+ )
142
+
143
+ thread_a = Thread(target=model_a.generate, kwargs=generation_kwargs_a)
144
+ thread_b = Thread(target=model_b.generate, kwargs=generation_kwargs_b)
145
+
146
+ thread_a.start()
147
+ thread_b.start()
148
+
149
+ chatbot_a.append([input_text, ""])
150
+ chatbot_b.append([input_text, ""])
151
+
152
+ finished_a = False
153
+ finished_b = False
154
+
155
+ while not (finished_a and finished_b):
156
+ if not finished_a:
157
+ try:
158
+ text_a = next(text_streamer_a)
159
+ if tokenizer_a.eos_token in text_a:
160
+ eot_location = text_a.find(tokenizer_a.eos_token)
161
+ text_a = text_a[:eot_location]
162
+ finished_a = True
163
+ chatbot_a[-1][-1] += text_a
164
+ yield chatbot_a, chatbot_b
165
+ except StopIteration:
166
+ finished_a = True
167
+
168
+ if not finished_b:
169
+ try:
170
+ text_b = next(text_streamer_b)
171
+ if tokenizer_b.eos_token in text_b:
172
+ eot_location = text_b.find(tokenizer_b.eos_token)
173
+ text_b = text_b[:eot_location]
174
+ finished_b = True
175
+ chatbot_b[-1][-1] += text_b
176
+ yield chatbot_a, chatbot_b
177
+ except StopIteration:
178
+ finished_b = True
179
+
180
+ return chatbot_a, chatbot_b
181
+
182
+ def clear():
183
+ return [], []
184
+
185
+ arena_notes = """## Important Notes:
186
+ - Sometimes an error may occur when generating the response, in this case, please try again.
187
+ """
188
+
189
+ with gr.Blocks() as demo:
190
+ with gr.Column():
191
+ gr.HTML("<center><h1>Arabic Chatbot Comparison</h1></center>")
192
+ gr.Markdown(arena_notes)
193
+ system_prompt = gr.Textbox(lines=1, label="System Prompt", value="أنت متحدث لبق باللغة العربية!", rtl=True, text_align="right", show_copy_button=True)
194
+ with gr.Row(variant="panel"):
195
+ with gr.Column():
196
+ model_dropdown_a = gr.Dropdown(label="Model A", choices=models_available, value=None)
197
+ chatbot_a = gr.Chatbot(label="Model A", rtl=True, likeable=True, show_copy_button=True, height=500)
198
+ with gr.Column():
199
+ model_dropdown_b = gr.Dropdown(label="Model B", choices=models_available, value=None)
200
+ chatbot_b = gr.Chatbot(label="Model B", rtl=True, likeable=True, show_copy_button=True, height=500)
201
+ with gr.Row(variant="panel"):
202
+ with gr.Column(scale=1):
203
+ submit_btn = gr.Button(value="Generate", variant="primary")
204
+ clear_btn = gr.Button(value="Clear", variant="secondary")
205
+ input_text = gr.Textbox(lines=1, label="", value="مرحبا", rtl=True, text_align="right", scale=3, show_copy_button=True)
206
+ with gr.Accordion(label="Generation Configurations", open=False):
207
+ max_new_tokens = gr.Slider(minimum=128, maximum=4096, value=2048, label="Max New Tokens", step=128)
208
+ temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, label="Temperature", step=0.01)
209
+ top_p = gr.Slider(minimum=0.0, maximum=1.0, value=1.0, label="Top-p", step=0.01)
210
+ repetition_penalty = gr.Slider(minimum=0.1, maximum=2.0, value=1.1, label="Repetition Penalty", step=0.1)
211
+
212
+ model_dropdown_a.change(load_model_a, inputs=[model_dropdown_a], outputs=[chatbot_a])
213
+ model_dropdown_b.change(load_model_b, inputs=[model_dropdown_b], outputs=[chatbot_b])
214
+
215
+ input_text.submit(generate_both, inputs=[system_prompt, input_text, chatbot_a, chatbot_b, max_new_tokens, temperature, top_p, repetition_penalty], outputs=[chatbot_a, chatbot_b])
216
+ submit_btn.click(generate_both, inputs=[system_prompt, input_text, chatbot_a, chatbot_b, max_new_tokens, temperature, top_p, repetition_penalty], outputs=[chatbot_a, chatbot_b])
217
+ clear_btn.click(clear, outputs=[chatbot_a, chatbot_b])
218
+
219
+ if __name__ == "__main__":
220
+ demo.queue().launch()
.gitattributes → gitattributes RENAMED
File without changes
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ transformers==4.44.1
2
+ torch
3
+ accelerate==0.33.0
4
+ sentencepiece==0.2.0
5
+ spaces