winglian commited on
Commit
ea2621e
β€’
1 Parent(s): 56e046b

cleanup, fix elo timestamp ordering, add guanaco 13b

Browse files
Files changed (2) hide show
  1. app.py +118 -118
  2. calculate_elo.py +3 -2
app.py CHANGED
@@ -123,6 +123,7 @@ AVAILABLE_MODELS = {
123
  "lmsys-vicuna-13b": ("2nlb32ydkaz6yd", prompt_chat),
124
  "supercot-13b": ("0be7865dwxpwqk", prompt_instruct, ["Instruction:"]),
125
  "mpt-7b-instruct": ("jpqbvnyluj18b0", prompt_instruct),
 
126
  }
127
 
128
  _memoized_models = defaultdict()
@@ -275,15 +276,15 @@ with gr.Blocks() as arena:
275
  - [πŸ’΅ Consider Donating on our Patreon](http://patreon.com/OpenAccessAICollective)
276
  - Join us on [Discord](https://discord.gg/PugNNHAF5r)
277
  """)
278
- with gr.Tab("Chatbot"):
279
  with gr.Row():
280
  with gr.Column():
281
- chatbot1 = gr.Chatbot()
282
  with gr.Column():
283
- chatbot2 = gr.Chatbot()
284
  with gr.Row():
285
- choose1 = gr.Button(value="πŸ‘ˆ Prefer left", variant="secondary", visible=False).style(full_width=True)
286
- choose2 = gr.Button(value="πŸ‘‰ Prefer right", variant="secondary", visible=False).style(full_width=True)
287
  choose3 = gr.Button(value="🀝 Tie", variant="secondary", visible=False).style(full_width=True)
288
  choose4 = gr.Button(value="πŸ‘‰ Both are bad", variant="secondary", visible=False).style(full_width=True)
289
  with gr.Row():
@@ -293,133 +294,132 @@ with gr.Blocks() as arena:
293
  dismiss_reveal = gr.Button(value="Dismiss & Continue", variant="secondary", visible=False).style(full_width=True)
294
  with gr.Row():
295
  with gr.Column():
296
- message = gr.Textbox(
297
  label="What do you want to ask?",
298
  placeholder="Ask me anything.",
299
  lines=3,
300
  )
301
  with gr.Column():
302
- rlhf_persona = gr.Textbox(
303
  "", label="Persona Tags", interactive=True, visible=True, placeholder="Tell us about how you are judging the quality. ex: #CoT #SFW #NSFW #helpful #ethical #creativity", lines=2)
304
- system_msg = gr.Textbox(
305
  start_message, label="System Message", interactive=True, visible=True, placeholder="system prompt", lines=8)
306
 
307
- nudge_msg = gr.Textbox(
308
  "", label="Assistant Nudge", interactive=True, visible=True, placeholder="the first words of the assistant response to nudge them in the right direction.", lines=2)
309
  with gr.Row():
310
- submit = gr.Button(value="Send message", variant="secondary").style(full_width=True)
311
- clear = gr.Button(value="New topic", variant="secondary").style(full_width=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
  with gr.Tab("Leaderboard"):
313
  with gr.Column():
314
  leaderboard_markdown = gr.Markdown(f"""{leaderboard_intro}
315
  {dataset_to_markdown()}
316
  """)
317
- refresh = gr.Button(value="Refresh Leaderboard", variant="secondary").style(full_width=True)
318
- state = gr.State({})
319
-
320
- refresh.click(fn=refresh_md, inputs=[], outputs=[leaderboard_markdown])
321
-
322
- clear.click(lambda: None, None, chatbot1, queue=False)
323
- clear.click(lambda: None, None, chatbot2, queue=False)
324
- clear.click(lambda: None, None, message, queue=False)
325
- clear.click(lambda: None, None, nudge_msg, queue=False)
326
-
327
- submit_click_event = submit.click(
328
- lambda *args: (
329
- gr.update(visible=False, interactive=False),
330
- gr.update(visible=False),
331
- gr.update(visible=False),
332
- ),
333
- inputs=[], outputs=[message, clear, submit], queue=True
334
- ).then(
335
- fn=user, inputs=[message, nudge_msg, chatbot1, chatbot2], outputs=[message, nudge_msg, chatbot1, chatbot2], queue=True
336
- ).then(
337
- fn=chat, inputs=[chatbot1, chatbot2, system_msg], outputs=[chatbot1, chatbot2, message, reveal1, reveal2, state], queue=True
338
- ).then(
339
- lambda *args: (
340
- gr.update(visible=False, interactive=False),
341
- gr.update(visible=True),
342
- gr.update(visible=True),
343
- gr.update(visible=True),
344
- gr.update(visible=True),
345
- gr.update(visible=False),
346
- gr.update(visible=False),
347
- ),
348
- inputs=[message, nudge_msg, system_msg], outputs=[message, choose1, choose2, choose3, choose4, clear, submit], queue=True
349
- )
350
-
351
- choose1_click_event = choose1.click(
352
- fn=chosen_one_first, inputs=[chatbot1, chatbot2, system_msg, nudge_msg, rlhf_persona, state], outputs=[], queue=True
353
- ).then(
354
- lambda *args: (
355
- gr.update(visible=False),
356
- gr.update(visible=False),
357
- gr.update(visible=False),
358
- gr.update(visible=False),
359
- gr.update(visible=True),
360
- gr.update(visible=True),
361
- gr.update(visible=True),
362
- ),
363
- inputs=[], outputs=[choose1, choose2, choose3, choose4, dismiss_reveal, reveal1, reveal2], queue=True
364
- )
365
-
366
- choose2_click_event = choose2.click(
367
- fn=chosen_one_second, inputs=[chatbot1, chatbot2, system_msg, nudge_msg, rlhf_persona, state], outputs=[], queue=True
368
- ).then(
369
- lambda *args: (
370
- gr.update(visible=False),
371
- gr.update(visible=False),
372
- gr.update(visible=False),
373
- gr.update(visible=False),
374
- gr.update(visible=True),
375
- gr.update(visible=True),
376
- gr.update(visible=True),
377
- ),
378
- inputs=[], outputs=[choose1, choose2, choose3, choose4, dismiss_reveal, reveal1, reveal2], queue=True
379
- )
380
-
381
- choose3_click_event = choose3.click(
382
- fn=chosen_one_tie, inputs=[chatbot1, chatbot2, system_msg, nudge_msg, rlhf_persona, state], outputs=[], queue=True
383
- ).then(
384
- lambda *args: (
385
- gr.update(visible=False),
386
- gr.update(visible=False),
387
- gr.update(visible=False),
388
- gr.update(visible=False),
389
- gr.update(visible=True),
390
- gr.update(visible=True),
391
- gr.update(visible=True),
392
- ),
393
- inputs=[], outputs=[choose1, choose2, choose3, choose4, dismiss_reveal, reveal1, reveal2], queue=True
394
- )
395
-
396
- choose4_click_event = choose4.click(
397
- fn=chosen_one_suck, inputs=[chatbot1, chatbot2, system_msg, nudge_msg, rlhf_persona, state], outputs=[], queue=True
398
- ).then(
399
- lambda *args: (
400
- gr.update(visible=False),
401
- gr.update(visible=False),
402
- gr.update(visible=False),
403
- gr.update(visible=False),
404
- gr.update(visible=True),
405
- gr.update(visible=True),
406
- gr.update(visible=True),
407
- ),
408
- inputs=[], outputs=[choose1, choose2, choose3, choose4, dismiss_reveal, reveal1, reveal2], queue=True
409
- )
410
-
411
- dismiss_click_event = dismiss_reveal.click(
412
- lambda *args: (
413
- gr.update(visible=True, interactive=True),
414
- gr.update(visible=False),
415
- gr.update(visible=True),
416
- gr.update(visible=True),
417
- gr.update(visible=False),
418
- gr.update(visible=False),
419
- None,
420
- None,
421
- ),
422
- inputs=[], outputs=[message, dismiss_reveal, clear, submit, reveal1, reveal2, chatbot1, chatbot2], queue=True
423
- )
424
 
425
  arena.queue(concurrency_count=5, max_size=16).launch(debug=True, server_name="0.0.0.0", server_port=7860)
 
123
  "lmsys-vicuna-13b": ("2nlb32ydkaz6yd", prompt_chat),
124
  "supercot-13b": ("0be7865dwxpwqk", prompt_instruct, ["Instruction:"]),
125
  "mpt-7b-instruct": ("jpqbvnyluj18b0", prompt_instruct),
126
+ "guanaco-13b": ("yxl8w98z017mw2", prompt_instruct),
127
  }
128
 
129
  _memoized_models = defaultdict()
 
276
  - [πŸ’΅ Consider Donating on our Patreon](http://patreon.com/OpenAccessAICollective)
277
  - Join us on [Discord](https://discord.gg/PugNNHAF5r)
278
  """)
279
+ with gr.Tab("Chatbot Arena"):
280
  with gr.Row():
281
  with gr.Column():
282
+ arena_chatbot1 = gr.Chatbot(label="Chatbot A")
283
  with gr.Column():
284
+ arena_chatbot2 = gr.Chatbot(label="Chatbot B")
285
  with gr.Row():
286
+ choose1 = gr.Button(value="πŸ‘ˆ Prefer left (A)", variant="secondary", visible=False).style(full_width=True)
287
+ choose2 = gr.Button(value="πŸ‘‰ Prefer right (B)", variant="secondary", visible=False).style(full_width=True)
288
  choose3 = gr.Button(value="🀝 Tie", variant="secondary", visible=False).style(full_width=True)
289
  choose4 = gr.Button(value="πŸ‘‰ Both are bad", variant="secondary", visible=False).style(full_width=True)
290
  with gr.Row():
 
294
  dismiss_reveal = gr.Button(value="Dismiss & Continue", variant="secondary", visible=False).style(full_width=True)
295
  with gr.Row():
296
  with gr.Column():
297
+ arena_message = gr.Textbox(
298
  label="What do you want to ask?",
299
  placeholder="Ask me anything.",
300
  lines=3,
301
  )
302
  with gr.Column():
303
+ arena_rlhf_persona = gr.Textbox(
304
  "", label="Persona Tags", interactive=True, visible=True, placeholder="Tell us about how you are judging the quality. ex: #CoT #SFW #NSFW #helpful #ethical #creativity", lines=2)
305
+ arena_system_msg = gr.Textbox(
306
  start_message, label="System Message", interactive=True, visible=True, placeholder="system prompt", lines=8)
307
 
308
+ arena_nudge_msg = gr.Textbox(
309
  "", label="Assistant Nudge", interactive=True, visible=True, placeholder="the first words of the assistant response to nudge them in the right direction.", lines=2)
310
  with gr.Row():
311
+ arena_submit = gr.Button(value="Send message", variant="secondary").style(full_width=True)
312
+ arena_clear = gr.Button(value="New topic", variant="secondary").style(full_width=False)
313
+ state = gr.State({})
314
+
315
+ arena_clear.click(lambda: None, None, arena_chatbot1, queue=False)
316
+ arena_clear.click(lambda: None, None, arena_chatbot2, queue=False)
317
+ arena_clear.click(lambda: None, None, arena_message, queue=False)
318
+ arena_clear.click(lambda: None, None, arena_nudge_msg, queue=False)
319
+
320
+ submit_click_event = arena_submit.click(
321
+ lambda *args: (
322
+ gr.update(visible=False, interactive=False),
323
+ gr.update(visible=False),
324
+ gr.update(visible=False),
325
+ ),
326
+ inputs=[], outputs=[arena_message, arena_clear, arena_submit], queue=True
327
+ ).then(
328
+ fn=user, inputs=[arena_message, arena_nudge_msg, arena_chatbot1, arena_chatbot2], outputs=[arena_message, arena_nudge_msg, arena_chatbot1, arena_chatbot2], queue=True
329
+ ).then(
330
+ fn=chat, inputs=[arena_chatbot1, arena_chatbot2, arena_system_msg], outputs=[arena_chatbot1, arena_chatbot2, arena_message, reveal1, reveal2, state], queue=True
331
+ ).then(
332
+ lambda *args: (
333
+ gr.update(visible=False, interactive=False),
334
+ gr.update(visible=True),
335
+ gr.update(visible=True),
336
+ gr.update(visible=True),
337
+ gr.update(visible=True),
338
+ gr.update(visible=False),
339
+ gr.update(visible=False),
340
+ ),
341
+ inputs=[arena_message, arena_nudge_msg, arena_system_msg], outputs=[arena_message, choose1, choose2, choose3, choose4, arena_clear, arena_submit], queue=True
342
+ )
343
+
344
+ choose1_click_event = choose1.click(
345
+ fn=chosen_one_first, inputs=[arena_chatbot1, arena_chatbot2, arena_system_msg, arena_nudge_msg, arena_rlhf_persona, state], outputs=[], queue=True
346
+ ).then(
347
+ lambda *args: (
348
+ gr.update(visible=False),
349
+ gr.update(visible=False),
350
+ gr.update(visible=False),
351
+ gr.update(visible=False),
352
+ gr.update(visible=True),
353
+ gr.update(visible=True),
354
+ gr.update(visible=True),
355
+ ),
356
+ inputs=[], outputs=[choose1, choose2, choose3, choose4, dismiss_reveal, reveal1, reveal2], queue=True
357
+ )
358
+
359
+ choose2_click_event = choose2.click(
360
+ fn=chosen_one_second, inputs=[arena_chatbot1, arena_chatbot2, arena_system_msg, arena_nudge_msg, arena_rlhf_persona, state], outputs=[], queue=True
361
+ ).then(
362
+ lambda *args: (
363
+ gr.update(visible=False),
364
+ gr.update(visible=False),
365
+ gr.update(visible=False),
366
+ gr.update(visible=False),
367
+ gr.update(visible=True),
368
+ gr.update(visible=True),
369
+ gr.update(visible=True),
370
+ ),
371
+ inputs=[], outputs=[choose1, choose2, choose3, choose4, dismiss_reveal, reveal1, reveal2], queue=True
372
+ )
373
+
374
+ choose3_click_event = choose3.click(
375
+ fn=chosen_one_tie, inputs=[arena_chatbot1, arena_chatbot2, arena_system_msg, arena_nudge_msg, arena_rlhf_persona, state], outputs=[], queue=True
376
+ ).then(
377
+ lambda *args: (
378
+ gr.update(visible=False),
379
+ gr.update(visible=False),
380
+ gr.update(visible=False),
381
+ gr.update(visible=False),
382
+ gr.update(visible=True),
383
+ gr.update(visible=True),
384
+ gr.update(visible=True),
385
+ ),
386
+ inputs=[], outputs=[choose1, choose2, choose3, choose4, dismiss_reveal, reveal1, reveal2], queue=True
387
+ )
388
+
389
+ choose4_click_event = choose4.click(
390
+ fn=chosen_one_suck, inputs=[arena_chatbot1, arena_chatbot2, arena_system_msg, arena_nudge_msg, arena_rlhf_persona, state], outputs=[], queue=True
391
+ ).then(
392
+ lambda *args: (
393
+ gr.update(visible=False),
394
+ gr.update(visible=False),
395
+ gr.update(visible=False),
396
+ gr.update(visible=False),
397
+ gr.update(visible=True),
398
+ gr.update(visible=True),
399
+ gr.update(visible=True),
400
+ ),
401
+ inputs=[], outputs=[choose1, choose2, choose3, choose4, dismiss_reveal, reveal1, reveal2], queue=True
402
+ )
403
+
404
+ dismiss_click_event = dismiss_reveal.click(
405
+ lambda *args: (
406
+ gr.update(visible=True, interactive=True),
407
+ gr.update(visible=False),
408
+ gr.update(visible=True),
409
+ gr.update(visible=True),
410
+ gr.update(visible=False),
411
+ gr.update(visible=False),
412
+ None,
413
+ None,
414
+ ),
415
+ inputs=[], outputs=[arena_message, dismiss_reveal, arena_clear, arena_submit, reveal1, reveal2, arena_chatbot1, arena_chatbot2], queue=True
416
+ )
417
  with gr.Tab("Leaderboard"):
418
  with gr.Column():
419
  leaderboard_markdown = gr.Markdown(f"""{leaderboard_intro}
420
  {dataset_to_markdown()}
421
  """)
422
+ leaderboad_refresh = gr.Button(value="Refresh Leaderboard", variant="secondary").style(full_width=True)
423
+ leaderboad_refresh.click(fn=refresh_md, inputs=[], outputs=[leaderboard_markdown])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
424
 
425
  arena.queue(concurrency_count=5, max_size=16).launch(debug=True, server_name="0.0.0.0", server_port=7860)
calculate_elo.py CHANGED
@@ -2,6 +2,7 @@ import logging
2
  import os
3
  from datetime import datetime
4
  from decimal import Decimal
 
5
 
6
  import boto3
7
  from boto3.dynamodb.conditions import Attr, Key
@@ -261,8 +262,8 @@ def _backfill_logs():
261
 
262
  def main():
263
  last_processed_timestamp = get_last_processed_timestamp()
264
- battles = get_unprocessed_battles(last_processed_timestamp)
265
-
266
  elo_scores = {}
267
 
268
  for battle in battles:
 
2
  import os
3
  from datetime import datetime
4
  from decimal import Decimal
5
+ from typing import List
6
 
7
  import boto3
8
  from boto3.dynamodb.conditions import Attr, Key
 
262
 
263
  def main():
264
  last_processed_timestamp = get_last_processed_timestamp()
265
+ battles: List[dict] = get_unprocessed_battles(last_processed_timestamp)
266
+ battles = sorted(battles, key=lambda x: x['timestamp'])
267
  elo_scores = {}
268
 
269
  for battle in battles: