andrewsiah commited on
Commit
23dea16
·
verified ·
1 Parent(s): 9a3150c

Upload folder using huggingface_hub

Browse files
Files changed (10) hide show
  1. .gitignore +3 -1
  2. README.md +15 -1
  3. chatbot.py +35 -7
  4. eval.py +288 -71
  5. eval_old.py +145 -0
  6. leaderboard.py +69 -0
  7. pyproject.toml +3 -0
  8. requirements.txt +3 -0
  9. uv.lock +0 -0
  10. vllm_inference.py +3 -1
.gitignore CHANGED
@@ -1,5 +1,7 @@
1
  .env
2
  .ai/
3
  .cursorrules
4
- gradio_cache_examples/
5
  __pycache__/
 
 
 
 
1
  .env
2
  .ai/
3
  .cursorrules
 
4
  __pycache__/
5
+ gradio_cached_examples/
6
+ supa.ipynb
7
+ .venv/
README.md CHANGED
@@ -6,7 +6,7 @@ sdk_version: 4.44.0
6
  ---
7
  # Turing-Test-Prompt-Competition
8
 
9
- This project implements a chatbot using vLLM for inference and Streamlit for the user interface.
10
 
11
  ## Setup and Deployment
12
 
@@ -38,6 +38,20 @@ To run the chatbot locally:
38
  ngrok http 8501
39
  ```
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  ## Project Structure
42
 
43
  - `download_llama.py`: Script to download the LLaMA model
 
6
  ---
7
  # Turing-Test-Prompt-Competition
8
 
9
+ This project implements a chatbot using vLLM for inference and Streamlit for the user interface and Gradio for the evaluation interface.
10
 
11
  ## Setup and Deployment
12
 
 
38
  ngrok http 8501
39
  ```
40
 
41
+ ### Running the Evaluation Interface
42
+
43
+ To run the evaluation interface locally:
44
+
45
+ 1. Start the Gradio app:
46
+ ```
47
+ gradio eval.py
48
+ ```
49
+
50
+ 2. To deploy to HF Space, run:
51
+ ```
52
+ gradio deploy
53
+ ```
54
+
55
  ## Project Structure
56
 
57
  - `download_llama.py`: Script to download the LLaMA model
chatbot.py CHANGED
@@ -27,6 +27,20 @@ def get_completion(client, model_id, messages, args):
27
  except Exception as e:
28
  print(f"Error during API call: {e}")
29
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  # App title
32
  st.set_page_config(page_title="Turing Test")
@@ -49,14 +63,32 @@ with st.sidebar:
49
  # Add system prompt input
50
  st.subheader('System Prompt')
51
  system_prompt = st.text_area("Enter a system prompt:",
52
- "you are rolplaying as an old grandma",
53
  help="This message sets the behavior of the AI.")
54
  st.subheader('Models and parameters')
55
  selected_model = st.sidebar.selectbox('Choose a model', ['meta-llama/Meta-Llama-3.1-8B-Instruct'], key='selected_model')
56
  temperature = st.sidebar.slider('temperature', min_value=0.01, max_value=5.0, value=0.8, step=0.1)
57
  top_p = st.sidebar.slider('top_p', min_value=0.01, max_value=1.0, value=0.95, step=0.01)
58
  max_length = st.sidebar.slider('max_length', min_value=32, max_value=1024, value=32, step=8)
 
59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  # Store chat history
62
  if "messages" not in st.session_state.keys():
@@ -70,12 +102,8 @@ for message in st.session_state.messages[1:]:
70
  with st.chat_message(message["role"]):
71
  st.write(message["content"])
72
 
73
- def clear_chat_history():
74
- st.session_state.messages = [
75
- {"role": "system", "content": system_prompt},
76
- {"role": "assistant", "content": "Hello!"}
77
- ]
78
- st.sidebar.button('Clear Chat History', on_click=clear_chat_history)
79
 
80
  # Function for generating Llama2 response using OpenAI client API
81
  def generate_llama2_response(prompt_input, model, temperature, top_p, max_length):
 
27
  except Exception as e:
28
  print(f"Error during API call: {e}")
29
  return None
30
+
31
+ def save_configuration(config):
32
+ from supabase import create_client, Client
33
+
34
+ url: str = "https://rwtzkiofjrpekpcazdoa.supabase.co"
35
+ key: str = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InJ3dHpraW9manJwZWtwY2F6ZG9hIiwicm9sZSI6ImFub24iLCJpYXQiOjE3MjUyMDc0MTMsImV4cCI6MjA0MDc4MzQxM30.ey2PKyQkxlXorq_NnUQtbj08MgVW31h0pq1MYMgV9eU"
36
+ supabase: Client = create_client(url, key)
37
+ response = supabase.table("config").insert(config).execute()
38
+
39
+ def clear_chat_history():
40
+ st.session_state.messages = [
41
+ {"role": "system", "content": system_prompt},
42
+ {"role": "assistant", "content": "Hello!"}
43
+ ]
44
 
45
  # App title
46
  st.set_page_config(page_title="Turing Test")
 
63
  # Add system prompt input
64
  st.subheader('System Prompt')
65
  system_prompt = st.text_area("Enter a system prompt:",
66
+ "you are roleplaying as an old grandma",
67
  help="This message sets the behavior of the AI.")
68
  st.subheader('Models and parameters')
69
  selected_model = st.sidebar.selectbox('Choose a model', ['meta-llama/Meta-Llama-3.1-8B-Instruct'], key='selected_model')
70
  temperature = st.sidebar.slider('temperature', min_value=0.01, max_value=5.0, value=0.8, step=0.1)
71
  top_p = st.sidebar.slider('top_p', min_value=0.01, max_value=1.0, value=0.95, step=0.01)
72
  max_length = st.sidebar.slider('max_length', min_value=32, max_value=1024, value=32, step=8)
73
+ st.sidebar.button('Clear Chat History', on_click=clear_chat_history)
74
 
75
+ # Add submit button for configuration
76
+ submit_config = st.sidebar.button('Submit Configuration')
77
+
78
+
79
+ if submit_config:
80
+ # Save the current configuration to the database
81
+ config = {
82
+ "user_id": "123",
83
+ "prompt": system_prompt,
84
+ "model": selected_model,
85
+ "temperature": temperature,
86
+ "top_p": top_p,
87
+ "max_length": max_length
88
+ }
89
+ save_configuration(config)
90
+ st.sidebar.success("Configuration submitted successfully!")
91
+
92
 
93
  # Store chat history
94
  if "messages" not in st.session_state.keys():
 
102
  with st.chat_message(message["role"]):
103
  st.write(message["content"])
104
 
105
+
106
+
 
 
 
 
107
 
108
  # Function for generating Llama2 response using OpenAI client API
109
  def generate_llama2_response(prompt_input, model, temperature, top_p, max_length):
eval.py CHANGED
@@ -1,8 +1,21 @@
1
  import gradio as gr
2
-
3
  import os
4
  import openai
5
  from dataclasses import dataclass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  @dataclass
8
  class Args:
@@ -16,130 +29,334 @@ class Args:
16
  temperature: float = 0.8
17
  top_p: float = 0.95
18
 
19
- def get_completion(client, model_id, messages, args):
 
20
  completion_args = {
21
- "model": model_id,
22
  "messages": messages,
23
- "frequency_penalty": args.frequency_penalty,
24
- "max_tokens": args.max_tokens,
25
- "n": args.n,
26
- "presence_penalty": args.presence_penalty,
27
- "seed": args.seed,
28
- "stop": args.stop,
29
- "stream": args.stream,
30
- "temperature": args.temperature,
31
- "top_p": args.top_p,
32
- }
33
-
34
- completion_args = {
35
- k: v for k, v in completion_args.items() if v is not None
36
  }
37
 
38
  try:
 
39
  response = client.chat.completions.create(**completion_args)
 
40
  return response
41
  except Exception as e:
42
  print(f"Error during API call: {e}")
43
  return None
44
 
45
- def chat_response(message, history, model):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  # Set up OpenAI client
47
  openai_api_key = "super-secret-token"
 
48
  os.environ['OPENAI_API_KEY'] = openai_api_key
 
49
  openai.api_key = openai_api_key
50
  openai.api_base = "https://turingtest--example-vllm-openai-compatible-serve.modal.run/v1"
51
  client = openai.OpenAI(api_key=openai_api_key, base_url=openai.api_base)
52
 
53
- # Prepare messages
54
- messages = [{"role": "system", "content": "You are a helpful assistant."}]
55
-
56
- # Convert history to the correct format
57
- for user_msg, assistant_msg in history:
58
- messages.append({"role": "user", "content": user_msg})
59
- if assistant_msg:
60
- messages.append({"role": "assistant", "content": assistant_msg})
 
 
 
 
 
 
 
 
 
61
 
62
  messages.append({"role": "user", "content": message})
63
 
64
- # Set up arguments
65
- args = Args()
66
-
67
- # Use the correct model identifier
68
- model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
69
 
70
  # Get completion
71
- response = get_completion(client, model_id, messages, args)
72
-
73
- if response and response.choices:
74
- return response.choices[0].message.content
75
  else:
76
- return f"Error: Please retry or contact support if retried more than twice."
77
-
78
- def create_chat_interface(model):
79
- return gr.ChatInterface(
80
- fn=lambda message, history: chat_response(message, history, model),
81
- chatbot=gr.Chatbot(height=400, label=f"Choice {model}"),
82
- textbox=gr.Textbox(placeholder="Message", container=False, scale=7),
83
- # title=f"Choice {model}",
84
- description="",
85
- theme="dark",
86
- # examples=[["what's up"]],
87
- # cache_examples=True,
88
- retry_btn=None,
89
- undo_btn=None,
90
- clear_btn=None,
91
  )
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", neutral_hue="slate"), head=
94
- """
95
- <style>
96
- body {
97
- font-family: 'Calibri', sans-serif; /* Choose your desired font */
98
- }
99
- </style>
100
- """) as demo:
101
- gr.Markdown("## Turing Test Prompt Competition")
 
 
 
 
 
 
 
 
 
 
 
102
 
 
 
103
  with gr.Row():
104
  with gr.Column():
105
- chat_a = create_chat_interface("A")
106
  with gr.Column():
107
- chat_b = create_chat_interface("B")
108
 
109
  with gr.Row():
110
- a_better = gr.Button("👉 A is better", scale=1)
111
- b_better = gr.Button("👈 B is better", scale=1)
112
  tie = gr.Button("🤝 Tie", scale=1)
113
- both_bad = gr.Button("👎 Both are bad", scale=1)
 
 
 
 
 
 
 
114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
  prompt_input = gr.Textbox(placeholder="Message for both...", container=False)
117
  send_btn = gr.Button("Send to Both", variant="primary")
118
 
119
  def send_prompt(prompt):
120
- # This function will now return the prompt for both chatbots
121
- return prompt, prompt, gr.update(value=""), gr.update(value="")
 
 
 
 
 
122
 
123
- # Update the click and submit events
124
  send_btn.click(
125
  send_prompt,
126
- inputs=[prompt_input],
127
  outputs=[
128
- chat_a.textbox,
129
- chat_b.textbox,
130
  prompt_input,
131
  prompt_input
132
  ]
133
  )
134
  prompt_input.submit(
135
  send_prompt,
136
- inputs=[prompt_input],
137
  outputs=[
138
- chat_a.textbox,
139
- chat_b.textbox,
140
  prompt_input,
141
  prompt_input
142
  ]
143
  )
 
144
  if __name__ == "__main__":
145
  demo.launch(share=True)
 
1
  import gradio as gr
 
2
  import os
3
  import openai
4
  from dataclasses import dataclass
5
+ from supabase import create_client, Client
6
+ from uuid import UUID
7
+ from dotenv import load_dotenv
8
+ import random
9
+
10
+ # Load environment variables from .env file
11
+ load_dotenv()
12
+
13
+ # Initialize Supabase client
14
+ SUPABASE_URL = os.getenv("SUPABASE_URL")
15
+ SUPABASE_KEY = os.getenv("SUPABASE_KEY")
16
+ supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
17
+
18
+ SHOW_CONFIG = True
19
 
20
  @dataclass
21
  class Args:
 
29
  temperature: float = 0.8
30
  top_p: float = 0.95
31
 
32
+ def get_completion(client, config, messages):
33
+ print("GETTING COMPLETION")
34
  completion_args = {
35
+ "model": config['model'],
36
  "messages": messages,
37
+ "frequency_penalty": config.get('frequency_penalty', 0),
38
+ "max_tokens": config.get('max_length', 32),
39
+ "n": config.get('n', 1),
40
+ "presence_penalty": config.get('presence_penalty', 0),
41
+ "seed": config.get('seed', 42),
42
+ "stop": config.get('stop', None),
43
+ "stream": config.get('stream', False),
44
+ "temperature": config.get('temperature', 0.8),
45
+ "top_p": config.get('top_p', 0.95),
 
 
 
 
46
  }
47
 
48
  try:
49
+ print("TRYING TO GET COMPLETION")
50
  response = client.chat.completions.create(**completion_args)
51
+ print("GOT COMPLETION")
52
  return response
53
  except Exception as e:
54
  print(f"Error during API call: {e}")
55
  return None
56
 
57
+ def get_two_random_configs(round_num: int):
58
+ print("GETTING TWO RANDOM CONFIGS")
59
+ # Fetch all configurations for the current round
60
+ response = supabase.table("configs")\
61
+ .select("*")\
62
+ .eq("round", round_num)\
63
+ .execute()
64
+
65
+ if not response.data or len(response.data) < 2:
66
+ return None, None
67
+
68
+ # Randomly select two unique configurations
69
+ selected_configs = random.sample(response.data, 2)
70
+ return selected_configs[0], selected_configs[1]
71
+
72
+ def initialize_session(state):
73
+ print("INITIALIZING SESSION")
74
+ current_round = get_current_round()
75
+ if not current_round:
76
+ state.value["error"] = "Error: No active round found."
77
+ return
78
+
79
+ config_a, config_b = get_two_random_configs(round_num=current_round)
80
+ if not config_a or not config_b:
81
+ state.value["error"] = "Error: Not enough configurations available for voting."
82
+ return
83
+
84
+ state.value['config_a'] = config_a
85
+ state.value['config_b'] = config_b
86
+ state.value['conversation_a'] = []
87
+ state.value['conversation_b'] = []
88
+ state.value['round'] = current_round
89
+
90
+ def chat_response_a(message, history):
91
+ print("CHAT RESPONSE A")
92
+ return chat_response(message, history, 'a')
93
+
94
+ def chat_response_b(message, history):
95
+ print("CHAT RESPONSE B")
96
+ return chat_response(message, history, 'b')
97
+
98
+ def chat_response(message, history, config_type):
99
+ # Access the state within the Blocks
100
+ current_state = demo.blocks['state'].value # Accessing state correctly
101
+ print("CHAT RESPONSE")
102
+ config_a = current_state.get('config_a')
103
+ config_b = current_state.get('config_b')
104
+
105
+ # Handle initialization if configs are missing
106
+ if not config_a or not config_b:
107
+ initialize_session(demo.blocks['state'])
108
+ config_a = current_state.get('config_a')
109
+ config_b = current_state.get('config_b')
110
+ if not config_a or not config_b:
111
+ return "Error: Configurations not initialized sufficiently."
112
+
113
  # Set up OpenAI client
114
  openai_api_key = "super-secret-token"
115
+
116
  os.environ['OPENAI_API_KEY'] = openai_api_key
117
+
118
  openai.api_key = openai_api_key
119
  openai.api_base = "https://turingtest--example-vllm-openai-compatible-serve.modal.run/v1"
120
  client = openai.OpenAI(api_key=openai_api_key, base_url=openai.api_base)
121
 
122
+ # Append existing conversation
123
+ if config_type == 'a':
124
+ system_message = {"role": "system", "content": f"{config_a['sys_prompt']}"}
125
+ messages = [system_message]
126
+ for user_msg, assistant_msg in current_state['conversation_a']:
127
+ if user_msg:
128
+ messages.append({"role": "user", "content": user_msg})
129
+ if assistant_msg:
130
+ messages.append({"role": "assistant", "content": assistant_msg})
131
+ else:
132
+ system_message = {"role": "system", "content": f"{config_b['sys_prompt']}"}
133
+ messages = [system_message]
134
+ for user_msg, assistant_msg in current_state['conversation_b']:
135
+ if user_msg:
136
+ messages.append({"role": "user", "content": user_msg})
137
+ if assistant_msg:
138
+ messages.append({"role": "assistant", "content": assistant_msg})
139
 
140
  messages.append({"role": "user", "content": message})
141
 
142
+ # Determine which configuration to use
143
+ # config_id = config_a['id'] if config_type == 'a' else config_b['id']
 
 
 
144
 
145
  # Get completion
146
+ # response = get_completion(client, config_id, messages)
147
+ if config_type == 'a':
148
+ response = get_completion(client, config_a, messages)
 
149
  else:
150
+ response = get_completion(client, config_b, messages)
151
+
152
+ assistant_reply = (
153
+ response.choices[0].message.content if response and response.choices else
154
+ "Error: Please retry or contact support if retried more than twice."
 
 
 
 
 
 
 
 
 
 
155
  )
156
 
157
+ # Update the appropriate conversation state
158
+ if config_type == 'a':
159
+ current_state['conversation_a'].append((message, assistant_reply))
160
+ else:
161
+ current_state['conversation_b'].append((message, assistant_reply))
162
+
163
+ # Update the state
164
+ # demo.blocks['state'].update(current_state)
165
+ demo.blocks['state'].value = current_state
166
+
167
+ return assistant_reply
168
+
169
+ def create_chat_interface(model_label):
170
+ print("CREATE CHAT INTERFACE")
171
+ if model_label == 'a':
172
+ return gr.ChatInterface(
173
+ fn=lambda message, history: (chat_response_a(message, history)),
174
+ chatbot=gr.Chatbot(height=400, label=f"Choice {model_label}"),
175
+ textbox=gr.Textbox(placeholder="Message", container=False, scale=7),
176
+ description="",
177
+ theme="dark",
178
+ retry_btn=None,
179
+ undo_btn=None,
180
+ clear_btn=None,
181
+ )
182
+ else:
183
+ return gr.ChatInterface(
184
+ fn=lambda message, history: (chat_response_b(message, history)),
185
+ chatbot=gr.Chatbot(height=400, label=f"Choice {model_label}"),
186
+ textbox=gr.Textbox(placeholder="Message", container=False, scale=7),
187
+ description="",
188
+ theme="dark",
189
+ retry_btn=None,
190
+ undo_btn=None,
191
+ clear_btn=None,
192
+ )
193
+
194
+ def submit_vote(vote: str, state):
195
+ print("SUBMIT VOTE")
196
+
197
+ a_config_id = state.value['config_a']['id']
198
+ b_config_id = state.value['config_b']['id']
199
+ conversation_a = state.value.get('conversation_a', [])
200
+ conversation_b = state.value.get('conversation_b', [])
201
+
202
+ # Save conversations to Supabase
203
+ supabase.table("conversations").insert([
204
+ {
205
+ "user_id": None, # No authentication, set to None or another identifier if available
206
+ "configuration_id": a_config_id,
207
+ "messages": conversation_a
208
+ },
209
+ {
210
+ "user_id": None,
211
+ "configuration_id": b_config_id,
212
+ "messages": conversation_b
213
+ }
214
+ ]).execute()
215
+
216
+ # Save vote to Supabase
217
+ supabase.table("votes").insert({
218
+ "a_config_id": str(a_config_id),
219
+ "b_config_id": str(b_config_id),
220
+ "voted_by_uid": None, # No user ID since authentication is not implemented
221
+ "round": get_current_round(), # Assuming Round 1; modify as needed
222
+ "is_tie": vote == "tie",
223
+ "a_wins": vote == "a",
224
+ "created_at": "now()"
225
+ }).execute()
226
+
227
+ # Update ELO ratings
228
+ # update_elo(a_config_id, b_config_id, vote)
229
+
230
+ # Reset conversations for next voting
231
+ state.value['conversation_a'] = []
232
+ state.value['conversation_b'] = []
233
+
234
+ return "Vote submitted!"
235
+
236
+ def update_elo(a_config_id: UUID, b_config_id: UUID, vote: str):
237
+ print("UPDATE ELO")
238
+ a_elo_response = supabase.table("elos").select("rating").eq("user_id", a_config_id).single().execute()
239
+ b_elo_response = supabase.table("elos").select("rating").eq("user_id", b_config_id).single().execute()
240
+
241
+ if not a_elo_response.data or not b_elo_response.data:
242
+ return
243
+
244
+ a_elo = a_elo_response.data["rating"]
245
+ b_elo = b_elo_response.data["rating"]
246
+
247
+ if vote == "a":
248
+ a_new = a_elo + 10
249
+ b_new = b_elo - 10
250
+ elif vote == "b":
251
+ a_new = a_elo - 10
252
+ b_new = b_elo + 10
253
+ else:
254
+ # Tie: no change or minimal change
255
+ a_new = a_elo
256
+ b_new = b_elo
257
+
258
+ supabase.table("elos").update({"rating": a_new}).eq("user_id", a_config_id).execute()
259
+ supabase.table("elos").update({"rating": b_new}).eq("user_id", b_config_id).execute()
260
+
261
+ def get_current_round():
262
+ print("GET CURRENT ROUND")
263
+ response = supabase.table("round_status").select("round").eq("is_eval_active", True).single().execute()
264
+ if response.data:
265
+ return response.data["round"]
266
+ return None
267
+
268
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", neutral_hue="slate"), head=
269
+ """
270
+ <style>
271
+ body {
272
+ font-family: 'Calibri', sans-serif; /* Choose your desired font */
273
+ }
274
+ </style>
275
+ """) as demo:
276
+ gr.Markdown("## Turing Test Prompt Comp")
277
+
278
+ # State to hold current config IDs and separate conversations
279
+ state = gr.State({
280
+ "config_a": None,
281
+ "config_b": None,
282
+ "conversation_a": [],
283
+ "conversation_b": [],
284
+ "round": 1,
285
+ "error": None
286
+ })
287
+ demo.blocks['state'] = state # Assign state to a key for easy access
288
 
289
+ initialize_session(state)
290
+
291
  with gr.Row():
292
  with gr.Column():
293
+ chat_a = create_chat_interface('a')
294
  with gr.Column():
295
+ chat_b = create_chat_interface('b')
296
 
297
  with gr.Row():
298
+ a_better = gr.Button("A is better 👈", scale=1)
 
299
  tie = gr.Button("🤝 Tie", scale=1)
300
+ b_better = gr.Button("👉 B is better", scale=1)
301
+
302
+ # Output component to display status messages
303
+ output_message = gr.Textbox(label="Status", interactive=False)
304
+
305
+ # Define separate functions for each vote type
306
+ def submit_vote_a():
307
+ return submit_vote('a', state)
308
 
309
+ def submit_vote_b():
310
+ return submit_vote('b', state)
311
+
312
+ def submit_vote_tie():
313
+ return submit_vote('tie', state)
314
+
315
+ # Connect buttons to their respective functions
316
+ a_better.click(
317
+ submit_vote_a,
318
+ inputs=None,
319
+ outputs=output_message
320
+ )
321
+ b_better.click(
322
+ submit_vote_b,
323
+ inputs=None,
324
+ outputs=output_message
325
+ )
326
+ tie.click(
327
+ submit_vote_tie,
328
+ inputs=None,
329
+ outputs=output_message
330
+ )
331
 
332
  prompt_input = gr.Textbox(placeholder="Message for both...", container=False)
333
  send_btn = gr.Button("Send to Both", variant="primary")
334
 
335
  def send_prompt(prompt):
336
+ current_state = state.value
337
+ # Append user's prompt to both conversations
338
+ if prompt:
339
+ current_state['conversation_a'].append((prompt, None))
340
+ current_state['conversation_b'].append((prompt, None))
341
+ state.update(current_state)
342
+ return "", ""
343
 
 
344
  send_btn.click(
345
  send_prompt,
346
+ inputs=prompt_input,
347
  outputs=[
 
 
348
  prompt_input,
349
  prompt_input
350
  ]
351
  )
352
  prompt_input.submit(
353
  send_prompt,
354
+ inputs=prompt_input,
355
  outputs=[
 
 
356
  prompt_input,
357
  prompt_input
358
  ]
359
  )
360
+
361
  if __name__ == "__main__":
362
  demo.launch(share=True)
eval_old.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ import os
4
+ import openai
5
+ from dataclasses import dataclass
6
+
7
+ @dataclass
8
+ class Args:
9
+ frequency_penalty: float = 0
10
+ max_tokens: int = 32
11
+ n: int = 1
12
+ presence_penalty: float = 0
13
+ seed: int = 42
14
+ stop: str = None
15
+ stream: bool = False
16
+ temperature: float = 0.8
17
+ top_p: float = 0.95
18
+
19
+ def get_completion(client, model_id, messages, args):
20
+ completion_args = {
21
+ "model": model_id,
22
+ "messages": messages,
23
+ "frequency_penalty": args.frequency_penalty,
24
+ "max_tokens": args.max_tokens,
25
+ "n": args.n,
26
+ "presence_penalty": args.presence_penalty,
27
+ "seed": args.seed,
28
+ "stop": args.stop,
29
+ "stream": args.stream,
30
+ "temperature": args.temperature,
31
+ "top_p": args.top_p,
32
+ }
33
+
34
+ completion_args = {
35
+ k: v for k, v in completion_args.items() if v is not None
36
+ }
37
+
38
+ try:
39
+ response = client.chat.completions.create(**completion_args)
40
+ return response
41
+ except Exception as e:
42
+ print(f"Error during API call: {e}")
43
+ return None
44
+
45
+ def chat_response(message, history, model):
46
+ # Set up OpenAI client
47
+ openai_api_key = "super-secret-token"
48
+ os.environ['OPENAI_API_KEY'] = openai_api_key
49
+ openai.api_key = openai_api_key
50
+ openai.api_base = "https://turingtest--example-vllm-openai-compatible-serve.modal.run/v1"
51
+ client = openai.OpenAI(api_key=openai_api_key, base_url=openai.api_base)
52
+
53
+ # Prepare messages
54
+ messages = [{"role": "system", "content": "You are a helpful assistant."}]
55
+
56
+ # Convert history to the correct format
57
+ for user_msg, assistant_msg in history:
58
+ messages.append({"role": "user", "content": user_msg})
59
+ if assistant_msg:
60
+ messages.append({"role": "assistant", "content": assistant_msg})
61
+
62
+ messages.append({"role": "user", "content": message})
63
+
64
+ # Set up arguments
65
+ args = Args()
66
+
67
+ # Use the correct model identifier
68
+ model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
69
+
70
+ # Get completion
71
+ response = get_completion(client, model_id, messages, args)
72
+
73
+ if response and response.choices:
74
+ return response.choices[0].message.content
75
+ else:
76
+ return f"Error: Please retry or contact support if retried more than twice."
77
+
78
+ def create_chat_interface(model):
79
+ return gr.ChatInterface(
80
+ fn=lambda message, history: chat_response(message, history, model),
81
+ chatbot=gr.Chatbot(height=400, label=f"Choice {model}"),
82
+ textbox=gr.Textbox(placeholder="Message", container=False, scale=7),
83
+ # title=f"Choice {model}",
84
+ description="",
85
+ theme="dark",
86
+ # examples=[["what's up"]],
87
+ # cache_examples=True,
88
+ retry_btn=None,
89
+ undo_btn=None,
90
+ clear_btn=None,
91
+ )
92
+
93
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", neutral_hue="slate"), head=
94
+ """
95
+ <style>
96
+ body {
97
+ font-family: 'Calibri', sans-serif; /* Choose your desired font */
98
+ }
99
+ </style>
100
+ """) as demo:
101
+ gr.Markdown("## Turing Test Prompt Competition")
102
+
103
+ with gr.Row():
104
+ with gr.Column():
105
+ chat_a = create_chat_interface("A")
106
+ with gr.Column():
107
+ chat_b = create_chat_interface("B")
108
+
109
+ with gr.Row():
110
+ a_better = gr.Button("👉 A is better", scale=1)
111
+ b_better = gr.Button("👈 B is better", scale=1)
112
+ tie = gr.Button("🤝 Tie", scale=1)
113
+ both_bad = gr.Button("👎 Both are bad", scale=1)
114
+
115
+
116
+ prompt_input = gr.Textbox(placeholder="Message for both...", container=False)
117
+ send_btn = gr.Button("Send to Both", variant="primary")
118
+
119
+ def send_prompt(prompt):
120
+ # This function will now return the prompt for both chatbots
121
+ return prompt, prompt, gr.update(value=""), gr.update(value="")
122
+
123
+ # Update the click and submit events
124
+ send_btn.click(
125
+ send_prompt,
126
+ inputs=[prompt_input],
127
+ outputs=[
128
+ chat_a.textbox,
129
+ chat_b.textbox,
130
+ prompt_input,
131
+ prompt_input
132
+ ]
133
+ )
134
+ prompt_input.submit(
135
+ send_prompt,
136
+ inputs=[prompt_input],
137
+ outputs=[
138
+ chat_a.textbox,
139
+ chat_b.textbox,
140
+ prompt_input,
141
+ prompt_input
142
+ ]
143
+ )
144
+ if __name__ == "__main__":
145
+ demo.launch(share=True)
leaderboard.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import time
3
+ from supabase import create_client, Client
4
+ import os
5
+ from dotenv import load_dotenv
6
+ import pandas as pd
7
+
8
+ # Load environment variables
9
+ load_dotenv()
10
+
11
+ # Initialize Supabase client
12
+ SUPABASE_URL = os.getenv("SUPABASE_URL")
13
+ SUPABASE_KEY = os.getenv("SUPABASE_KEY")
14
+ supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
15
+
16
+
17
+ def get_active_round():
18
+ # Fetch the active round data and return both round ID and round number
19
+ response = supabase.table("round_status").select("id, round").eq("is_eval_active", True).single().execute()
20
+ if response.data:
21
+ return response.data['id'], response.data['round'] # Return both round ID and round number
22
+ return None, None
23
+
24
+
25
+ def get_elo_ratings(round_id):
26
+ # Query the ELO ratings based on the round_id
27
+ response = supabase.table("elos").select("user_id, rating").eq("round", round_id).execute()
28
+
29
+ print("get_elo_ratings: ", response.data)
30
+ if response.data:
31
+ df = pd.DataFrame(response.data)
32
+ df = df.sort_values(by='rating', ascending=False)
33
+ print(df.head())
34
+ return df
35
+ return pd.DataFrame(columns=['user_id', 'rating'])
36
+
37
+
38
+ def update_info():
39
+ # Get the active round ID and round number
40
+ round_id, round_number = get_active_round()
41
+ print("Active Round ID:", round_id, "Round Number:", round_number) # This will print both round ID and round number
42
+ if round_id:
43
+ # Fetch the ELO ratings based on the round ID
44
+ elo_ratings = get_elo_ratings(round_id)
45
+ return f"Active Round: {round_number}", elo_ratings # Display the round number in the UI
46
+ else:
47
+ return "No active round found", pd.DataFrame(columns=['user_id', 'rating'])
48
+
49
+
50
+ with gr.Blocks() as demo:
51
+ gr.Markdown("## Leaderboard")
52
+ round_info = gr.Textbox(label="")
53
+ elo_table = gr.DataFrame(label="ELO Ratings", headers=["User ID", "Rating"])
54
+
55
+ # Create a periodic update function
56
+ def periodic_update():
57
+ round_status, ratings = update_info()
58
+ return round_status, ratings
59
+
60
+ # Load initial values
61
+ demo.load(update_info, outputs=[round_info, elo_table])
62
+
63
+ # Use gr.Timer to trigger updates every 5 seconds
64
+ timer = gr.Timer(value=5, active=True) # Set timer to tick every 5 seconds
65
+ timer.tick(periodic_update, outputs=[round_info, elo_table])
66
+
67
+ if __name__ == "__main__":
68
+ demo.queue()
69
+ demo.launch()
pyproject.toml CHANGED
@@ -6,7 +6,10 @@ readme = "README.md"
6
  requires-python = ">=3.9"
7
  dependencies = [
8
  "gradio>=4.44.0",
 
9
  "modal>=0.64.126",
10
  "openai>=1.46.1",
 
11
  "streamlit>=1.38.0",
 
12
  ]
 
6
  requires-python = ">=3.9"
7
  dependencies = [
8
  "gradio>=4.44.0",
9
+ "jupyter>=1.1.1",
10
  "modal>=0.64.126",
11
  "openai>=1.46.1",
12
+ "python-dotenv>=1.0.1",
13
  "streamlit>=1.38.0",
14
+ "supabase>=2.7.4",
15
  ]
requirements.txt CHANGED
@@ -100,3 +100,6 @@ watchfiles==0.24.0
100
  websockets==12.0
101
  yarl==1.11.1
102
  zipp==3.20.2
 
 
 
 
100
  websockets==12.0
101
  yarl==1.11.1
102
  zipp==3.20.2
103
+
104
+ supabase~=2.7.4
105
+ python-dotenv~=1.0.1
uv.lock CHANGED
The diff for this file is too large to render. See raw diff
 
vllm_inference.py CHANGED
@@ -79,14 +79,16 @@ app = modal.App("example-vllm-openai-compatible")
79
  N_GPU = 1 # tip: for best results, first upgrade to more powerful GPUs, and only then increase GPU count
80
  TOKEN = "super-secret-token" # auth token. for production use, replace with a modal.Secret
81
 
 
82
  MINUTES = 60 # seconds
83
  HOURS = 60 * MINUTES
84
 
 
85
 
86
  @app.function(
87
  image=vllm_image,
88
  gpu=modal.gpu.A100(count=N_GPU, size="40GB"),
89
- container_idle_timeout=5 * MINUTES,
90
  timeout=24 * HOURS,
91
  allow_concurrent_inputs=100,
92
  volumes={MODELS_DIR: volume},
 
79
  N_GPU = 1 # tip: for best results, first upgrade to more powerful GPUs, and only then increase GPU count
80
  TOKEN = "super-secret-token" # auth token. for production use, replace with a modal.Secret
81
 
82
+ SECONDS = 1
83
  MINUTES = 60 # seconds
84
  HOURS = 60 * MINUTES
85
 
86
+ # TODO: Implement secrets https://modal.com/docs/guide/secrets
87
 
88
  @app.function(
89
  image=vllm_image,
90
  gpu=modal.gpu.A100(count=N_GPU, size="40GB"),
91
+ container_idle_timeout=3 * MINUTES,
92
  timeout=24 * HOURS,
93
  allow_concurrent_inputs=100,
94
  volumes={MODELS_DIR: volume},