djstrong commited on
Commit
d6018f5
1 Parent(s): 492a075
Files changed (2) hide show
  1. benchmark_results.csv +1 -0
  2. src/about.py +1 -1
benchmark_results.csv CHANGED
@@ -148,3 +148,4 @@ Qwen/Qwen1.5-72B-Chat,2024-06-20 18:06:58,,Qwen/Qwen1.5-72B-Chat,,,68.03,eq-benc
148
  Qwen/Qwen2-72B,2024-06-20 18:36:22,,Qwen/Qwen2-72B,,,69.75,eq-bench_v2_pl,169.0,1,transformers, ,,
149
  Qwen/Qwen2-72B-Instruct,2024-06-20 18:55:02,,Qwen/Qwen2-72B-Instruct,,,72.07,eq-bench_v2_pl,169.0,1,transformers, ,,
150
  mistralai/Mixtral-8x22B-v0.1,2024-06-21 20:20:37,,mistralai/Mixtral-8x22B-v0.1,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,34.0 questions were parseable (min is 83%)
 
 
148
  Qwen/Qwen2-72B,2024-06-20 18:36:22,,Qwen/Qwen2-72B,,,69.75,eq-bench_v2_pl,169.0,1,transformers, ,,
149
  Qwen/Qwen2-72B-Instruct,2024-06-20 18:55:02,,Qwen/Qwen2-72B-Instruct,,,72.07,eq-bench_v2_pl,169.0,1,transformers, ,,
150
  mistralai/Mixtral-8x22B-v0.1,2024-06-21 20:20:37,,mistralai/Mixtral-8x22B-v0.1,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,34.0 questions were parseable (min is 83%)
151
+ mistralai/Mixtral-8x22B-Instruct-v0.1,2024-06-26 23:40:01,,mistralai/Mixtral-8x22B-Instruct-v0.1,,,67.63,eq-bench_v2_pl,171.0,1,transformers, ,,
src/about.py CHANGED
@@ -20,7 +20,7 @@ AUTHORS = """Authors:
20
  * Automatic translation: [Remigiusz Kinas](https://www.linkedin.com/in/remigiusz-kinas/)
21
  * Translation proofreading and localization: [Maria Filipkowska](https://www.linkedin.com/in/maria-filipkowska/), [Zuzanna Dabić](https://www.linkedin.com/in/zuzanna-dabic/)
22
  * Preparing dataset: [Kacper Milan](https://www.linkedin.com/in/kacper-milan/)
23
- * Running benchmark: [Krzysztof Wróbel](https://www.linkedin.com/in/wrobelkrzysztof/)
24
 
25
  Based on: EQ-Bench: An Emotional Intelligence Benchmark for Large Language Models, Samuel J. Paech, 2023"""
26
 
 
20
  * Automatic translation: [Remigiusz Kinas](https://www.linkedin.com/in/remigiusz-kinas/)
21
  * Translation proofreading and localization: [Maria Filipkowska](https://www.linkedin.com/in/maria-filipkowska/), [Zuzanna Dabić](https://www.linkedin.com/in/zuzanna-dabic/)
22
  * Preparing dataset: [Kacper Milan](https://www.linkedin.com/in/kacper-milan/)
23
+ * Running benchmark and leaderboard: [Krzysztof Wróbel](https://www.linkedin.com/in/wrobelkrzysztof/)
24
 
25
  Based on: EQ-Bench: An Emotional Intelligence Benchmark for Large Language Models, Samuel J. Paech, 2023"""
26