taesiri commited on
Commit
05e4334
·
1 Parent(s): c8b7025
.gitattributes CHANGED
@@ -231,3 +231,35 @@ results_qwen/gpt-4-turbo-2024-04-09.pkl filter=lfs diff=lfs merge=lfs -text
231
  results_qwen/CodeLlama-70b-Instruct-hf.png filter=lfs diff=lfs merge=lfs -text
232
  results_qwen/claude-3-haiku-20240307.jpg filter=lfs diff=lfs merge=lfs -text
233
  results_qwen/deepseek-llm-67b-chat.png filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  results_qwen/CodeLlama-70b-Instruct-hf.png filter=lfs diff=lfs merge=lfs -text
232
  results_qwen/claude-3-haiku-20240307.jpg filter=lfs diff=lfs merge=lfs -text
233
  results_qwen/deepseek-llm-67b-chat.png filter=lfs diff=lfs merge=lfs -text
234
+ results_qwen/dbrx-instruct.png filter=lfs diff=lfs merge=lfs -text
235
+ results_qwen/gpt-35-turbo.jpg filter=lfs diff=lfs merge=lfs -text
236
+ results_qwen/gpt-35-turbo.pkl filter=lfs diff=lfs merge=lfs -text
237
+ results_qwen/Qwen1.5-72B-Chat.png filter=lfs diff=lfs merge=lfs -text
238
+ results_qwen/dbrx-instruct.csv filter=lfs diff=lfs merge=lfs -text
239
+ results_qwen/dbrx-instruct.pkl filter=lfs diff=lfs merge=lfs -text
240
+ results_qwen/Yi-34B-Chat.pkl filter=lfs diff=lfs merge=lfs -text
241
+ results_qwen/gpt-35-turbo.csv filter=lfs diff=lfs merge=lfs -text
242
+ results_qwen/Llama-2-70b-chat-hf.jpg filter=lfs diff=lfs merge=lfs -text
243
+ results_qwen/Llama-2-70b-chat-hf.png filter=lfs diff=lfs merge=lfs -text
244
+ results_qwen/Mistral-7B-Instruct-v0.2.csv filter=lfs diff=lfs merge=lfs -text
245
+ results_qwen/Mistral-7B-Instruct-v0.2.png filter=lfs diff=lfs merge=lfs -text
246
+ results_qwen/Qwen1.5-72B-Chat.pkl filter=lfs diff=lfs merge=lfs -text
247
+ results_qwen/Mistral-7B-Instruct-v0.2.jpg filter=lfs diff=lfs merge=lfs -text
248
+ results_qwen/Mistral-7B-Instruct-v0.2.pkl filter=lfs diff=lfs merge=lfs -text
249
+ results_qwen/Qwen1.5-72B-Chat.csv filter=lfs diff=lfs merge=lfs -text
250
+ results_qwen/Yi-34B-Chat.png filter=lfs diff=lfs merge=lfs -text
251
+ results_qwen/Llama-2-70b-chat-hf.csv filter=lfs diff=lfs merge=lfs -text
252
+ results_qwen/StripedHyena-Nous-7B.csv filter=lfs diff=lfs merge=lfs -text
253
+ results_qwen/StripedHyena-Nous-7B.pkl filter=lfs diff=lfs merge=lfs -text
254
+ results_qwen/StripedHyena-Nous-7B.jpg filter=lfs diff=lfs merge=lfs -text
255
+ results_qwen/Yi-34B-Chat.csv filter=lfs diff=lfs merge=lfs -text
256
+ results_qwen/gpt-35-turbo.png filter=lfs diff=lfs merge=lfs -text
257
+ results_qwen/Llama-2-70b-chat-hf.pkl filter=lfs diff=lfs merge=lfs -text
258
+ results_qwen/claude-3-sonnet-20240229.csv filter=lfs diff=lfs merge=lfs -text
259
+ results_qwen/claude-3-sonnet-20240229.png filter=lfs diff=lfs merge=lfs -text
260
+ results_qwen/dbrx-instruct.jpg filter=lfs diff=lfs merge=lfs -text
261
+ results_qwen/Qwen1.5-72B-Chat.jpg filter=lfs diff=lfs merge=lfs -text
262
+ results_qwen/StripedHyena-Nous-7B.png filter=lfs diff=lfs merge=lfs -text
263
+ results_qwen/claude-3-sonnet-20240229.jpg filter=lfs diff=lfs merge=lfs -text
264
+ results_qwen/Yi-34B-Chat.jpg filter=lfs diff=lfs merge=lfs -text
265
+ results_qwen/claude-3-sonnet-20240229.pkl filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -435,14 +435,8 @@ def show_intersection_heatmap(evt: gr.SelectData):
435
 
436
  with gr.Blocks() as demo:
437
  gr.Markdown("# FSM Benchmark Leaderboard")
438
- with gr.Tab("Text-only Benchmark"):
439
- gr.Markdown("# Text-only Leaderboard")
440
- leader_board = gr.Dataframe(accuracy_df, headers=headers_with_icons)
441
- gr.Markdown("## Heatmap")
442
- heatmap_image = gr.Image(label="", show_label=False)
443
- leader_board.select(fn=load_heatmap, outputs=[heatmap_image])
444
 
445
- with gr.Tab("Text-only Benchmark (Judged by Qwen)"):
446
  gr.Markdown("# Text-only Leaderboard (Judged by Qwen)")
447
  leader_board = gr.Dataframe(accuracy_df_qwen, headers=headers_with_icons)
448
  gr.Markdown("## Heatmap")
@@ -527,6 +521,15 @@ with gr.Blocks() as demo:
527
  )
528
  heatmap_image = gr.Plot(label="Model Heatmap")
529
 
 
 
 
 
 
 
 
 
 
530
  included_models_cot.select(
531
  fn=calculate_order_by_first_substring_cot,
532
  inputs=[included_models_cot],
 
435
 
436
  with gr.Blocks() as demo:
437
  gr.Markdown("# FSM Benchmark Leaderboard")
 
 
 
 
 
 
438
 
439
+ with gr.Tab("Text-only Benchmark"):
440
  gr.Markdown("# Text-only Leaderboard (Judged by Qwen)")
441
  leader_board = gr.Dataframe(accuracy_df_qwen, headers=headers_with_icons)
442
  gr.Markdown("## Heatmap")
 
521
  )
522
  heatmap_image = gr.Plot(label="Model Heatmap")
523
 
524
+ with gr.Tab("Text-only Benchmark (deprecated)"):
525
+ gr.Markdown("# Text-only Leaderboard")
526
+ leader_board = gr.Dataframe(accuracy_df, headers=headers_with_icons)
527
+ gr.Markdown("## Heatmap")
528
+ heatmap_image = gr.Image(label="", show_label=False)
529
+ leader_board.select(fn=load_heatmap, outputs=[heatmap_image])
530
+
531
+ # ============ Callbacks ============
532
+
533
  included_models_cot.select(
534
  fn=calculate_order_by_first_substring_cot,
535
  inputs=[included_models_cot],
results_qwen/Llama-2-70b-chat-hf.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5e5a2bcd63b330efb3c92c9d2bfc3a708cb14348dec1bf4e7eb34e604348efa
3
+ size 18452553
results_qwen/Llama-2-70b-chat-hf.jpg ADDED

Git LFS Details

  • SHA256: 0730118c903a7e8a0eed56186c3b5aab3978ad6b1a51990413b3086cd8be726c
  • Pointer size: 132 Bytes
  • Size of remote file: 1.33 MB
results_qwen/Llama-2-70b-chat-hf.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d74e63ba62f3f074e16b0731f5d3f53ecd6f6d431ca6579a46fb95e8e0fc0494
3
+ size 18434995
results_qwen/Llama-2-70b-chat-hf.png ADDED

Git LFS Details

  • SHA256: ccf3f5cf4b62f2e6eddd9b1e34991e92c8a503ca71201c5ef209d4e97d69df08
  • Pointer size: 132 Bytes
  • Size of remote file: 1.01 MB
results_qwen/Mistral-7B-Instruct-v0.2.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fc1fd3720541d6da41e0b3a8ba222576cf9deddc09483adeff44233c43e52b0
3
+ size 25120060
results_qwen/Mistral-7B-Instruct-v0.2.jpg ADDED

Git LFS Details

  • SHA256: a90102f516fba692b21fda394db6ffa35c625230e7e22d36022bfd850e17f8ec
  • Pointer size: 132 Bytes
  • Size of remote file: 1.32 MB
results_qwen/Mistral-7B-Instruct-v0.2.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5c9386da642fcba5d2d83da27bcb9c43324ade40332fe1f0d449391c49e95bd
3
+ size 25132544
results_qwen/Mistral-7B-Instruct-v0.2.png ADDED

Git LFS Details

  • SHA256: 8f62d7b511052a8b79f53251bc719eb90b5dcc9b6cab3848b1d532ddef0c3665
  • Pointer size: 132 Bytes
  • Size of remote file: 1.01 MB
results_qwen/Qwen1.5-72B-Chat.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd7d69ce42103b008ad375df143d73d9022725be435bb1585a392df01d588d4d
3
+ size 12095649
results_qwen/Qwen1.5-72B-Chat.jpg ADDED

Git LFS Details

  • SHA256: 40dc0aa6e910a124457a65f8d7a936704959b86313029798a59d1293d637b3ef
  • Pointer size: 132 Bytes
  • Size of remote file: 1.31 MB
results_qwen/Qwen1.5-72B-Chat.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6716ad73c760159b278364b9c67e1171cf44075148e306575cf57a4e14faf9d7
3
+ size 12128493
results_qwen/Qwen1.5-72B-Chat.png ADDED

Git LFS Details

  • SHA256: 42da492bed81b2ab155365a33dc7fc29e3c20a79994653bfaab00b183013548d
  • Pointer size: 132 Bytes
  • Size of remote file: 1.01 MB
results_qwen/StripedHyena-Nous-7B.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8699ac2a760851df8b3ce3b8531f5185e28cbad084494b71f758d6ede787f365
3
+ size 33824580
results_qwen/StripedHyena-Nous-7B.jpg ADDED

Git LFS Details

  • SHA256: 3b24d9cb0d193eba3fdb203d7aedb491772910a41b09a7eef19b53d24fa62b26
  • Pointer size: 132 Bytes
  • Size of remote file: 1.32 MB
results_qwen/StripedHyena-Nous-7B.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e17907d0209ed8478772675566defd213ecc6cb96106225225b842466fad986
3
+ size 33818513
results_qwen/StripedHyena-Nous-7B.png ADDED

Git LFS Details

  • SHA256: cfda7aa4aa09815ce8cc4e39cd2045b70b36e383d7281af270f1bdd039c0a229
  • Pointer size: 132 Bytes
  • Size of remote file: 1.01 MB
results_qwen/Yi-34B-Chat.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e65a08895fd3a369c29db7ed8e4a58399bc579689f33a6845594632d4d16346
3
+ size 18312597
results_qwen/Yi-34B-Chat.jpg ADDED

Git LFS Details

  • SHA256: 00a49b5e6920db0fddea904bcc5c34aab5f2ebf1e5e47f875f4a861481b54b9b
  • Pointer size: 132 Bytes
  • Size of remote file: 1.33 MB
results_qwen/Yi-34B-Chat.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5468af21ae979604dfadd5de9f3c85c550b520deb17bb03130564b47b21334a9
3
+ size 18366214
results_qwen/Yi-34B-Chat.png ADDED

Git LFS Details

  • SHA256: 6de0b5f7cf86bbc26eb3ee89c52845b134239090fb1ca2f4f36ceb491b2c741b
  • Pointer size: 132 Bytes
  • Size of remote file: 1.02 MB
results_qwen/claude-3-sonnet-20240229.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04d09742be7bb2f08133917006a0b1df70233566b4e2eb87393965beaedf37c5
3
+ size 20960824
results_qwen/claude-3-sonnet-20240229.jpg ADDED

Git LFS Details

  • SHA256: 11c153b8dacfa82a38ba9f9ebe1734f2e704889c8ae17975da79795a0e68e578
  • Pointer size: 132 Bytes
  • Size of remote file: 1.28 MB
results_qwen/claude-3-sonnet-20240229.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a75b7af6e5dede497165284750fffb7acf9bf287d53b447f75e734c0e69c306
3
+ size 20960376
results_qwen/claude-3-sonnet-20240229.png ADDED

Git LFS Details

  • SHA256: 54079ec77137dd645d491681bf7e01d25b946048e4a2cbf4e9a439cdc0649881
  • Pointer size: 132 Bytes
  • Size of remote file: 1.01 MB
results_qwen/dbrx-instruct.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56ee4a0903ab2b5c5d1c478ccaee9063c93a6e82602aead01aa0c83ea75ab17a
3
+ size 15793228
results_qwen/dbrx-instruct.jpg ADDED

Git LFS Details

  • SHA256: 8f8fa78756565fcba4f50ccce07c9f190457def26e8e47bae5818470c1e398c8
  • Pointer size: 132 Bytes
  • Size of remote file: 1.31 MB
results_qwen/dbrx-instruct.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ce392c07fc7c1c7c64d941f7b77a06614f1b51f76ce5d5947dafa0191ddf8ee
3
+ size 15820291
results_qwen/dbrx-instruct.png ADDED

Git LFS Details

  • SHA256: d9215a754055c9dd8c6f6eaf9efb9c9365defcc7cfaad33be68deb5df223f34e
  • Pointer size: 132 Bytes
  • Size of remote file: 1.01 MB
results_qwen/gpt-35-turbo.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3789603ef21192c8267df2bfc434e536c06bb36387ee753cfc079ca5ca062367
3
+ size 8664643
results_qwen/gpt-35-turbo.jpg ADDED

Git LFS Details

  • SHA256: 8e112d9e8a5b755c4c001a3c57cf9a3d7b46fb99582ddfcf779c92220d37fb44
  • Pointer size: 132 Bytes
  • Size of remote file: 1.33 MB
results_qwen/gpt-35-turbo.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c505f8733712a8076c79a5d0e7c78773eb558364ba3baa08c540673bb4de3bdc
3
+ size 8672346
results_qwen/gpt-35-turbo.png ADDED

Git LFS Details

  • SHA256: b171b8c2cb964b6714feee9121eeb077269fdafe40e66401a6e0e700cef26273
  • Pointer size: 132 Bytes
  • Size of remote file: 1.02 MB