adding new state-of-art results from ThinkDepth.ai
#4
by
jimlinfeeling
- opened
data/leaderboard.csv
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
model,overall_score,comprehensiveness,insight,instruction_following,readability,citation_accuracy,effective_citations
|
|
|
|
| 2 |
gemini-2.5-pro-deepresearch,49.71,49.51,49.45,50.12,50.00,78.30,165.34
|
| 3 |
openai-deepresearch,46.45,46.46,43.73,49.39,47.22,75.01,39.79
|
| 4 |
claude-research,45.00,45.34,42.79,47.58,44.66,-,-
|
|
|
|
| 1 |
model,overall_score,comprehensiveness,insight,instruction_following,readability,citation_accuracy,effective_citations
|
| 2 |
+
thinkdepthai-deepresearch,52.49,52.03,53.94,52.07,50.44,-,-
|
| 3 |
gemini-2.5-pro-deepresearch,49.71,49.51,49.45,50.12,50.00,78.30,165.34
|
| 4 |
openai-deepresearch,46.45,46.46,43.73,49.39,47.22,75.01,39.79
|
| 5 |
claude-research,45.00,45.34,42.79,47.58,44.66,-,-
|
data/raw_data/thinkdepthai-deepresearch.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:578ce210b003f68a2dd24ba2960c7cee8a63fe5fa826849e27dcf460baba4a8e
|
| 3 |
+
size 2694297
|
data/raw_results/thinkdepthai-deepresearch/race_result.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Comprehensiveness: 0.5203
|
| 2 |
+
Insight: 0.5394
|
| 3 |
+
Instruction Following: 0.5207
|
| 4 |
+
Readability: 0.5044
|
| 5 |
+
Overall Score: 0.5249
|
data/raw_results/thinkdepthai-deepresearch/raw_results.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:543c963bc060c157bfc032955673ac0c7d99ed2fee0543c27fa615c601952d0a
|
| 3 |
+
size 52129
|
tabs/leaderboard_tab.py
CHANGED
|
@@ -26,6 +26,7 @@ COLUMN_RENAME_MAP = {
|
|
| 26 |
# 模型分类映射
|
| 27 |
MODEL_CATEGORIES = {
|
| 28 |
"Deep Research Agent": [
|
|
|
|
| 29 |
"gemini-2.5-pro-deepresearch",
|
| 30 |
"grok-deeper-search",
|
| 31 |
"openai-deepresearch",
|
|
@@ -55,6 +56,7 @@ MODEL_CATEGORIES = {
|
|
| 55 |
# 模型链接映射(目前都设置为空,可以后续添加具体链接)
|
| 56 |
MODEL_LINKS = {
|
| 57 |
# Deep Research Agent
|
|
|
|
| 58 |
"gemini-2.5-pro-deepresearch": "https://gemini.google/overview/deep-research/",
|
| 59 |
"grok-deeper-search": "https://x.ai/news/grok-3",
|
| 60 |
"openai-deepresearch": "https://openai.com/zh-Hans-CN/index/introducing-deep-research/",
|
|
@@ -83,6 +85,7 @@ MODEL_LINKS = {
|
|
| 83 |
# 模型许可证类型映射
|
| 84 |
MODEL_LICENSE_TYPE = {
|
| 85 |
# Deep Research Agent
|
|
|
|
| 86 |
"gemini-2.5-pro-deepresearch": "Proprietary",
|
| 87 |
"grok-deeper-search": "Proprietary",
|
| 88 |
"openai-deepresearch": "Proprietary",
|
|
|
|
| 26 |
# 模型分类映射
|
| 27 |
MODEL_CATEGORIES = {
|
| 28 |
"Deep Research Agent": [
|
| 29 |
+
"thinkdepthai-deepresearch",
|
| 30 |
"gemini-2.5-pro-deepresearch",
|
| 31 |
"grok-deeper-search",
|
| 32 |
"openai-deepresearch",
|
|
|
|
| 56 |
# 模型链接映射(目前都设置为空,可以后续添加具体链接)
|
| 57 |
MODEL_LINKS = {
|
| 58 |
# Deep Research Agent
|
| 59 |
+
"thinkdepthai-deepresearch": "https://www.thinkdepth.ai/",
|
| 60 |
"gemini-2.5-pro-deepresearch": "https://gemini.google/overview/deep-research/",
|
| 61 |
"grok-deeper-search": "https://x.ai/news/grok-3",
|
| 62 |
"openai-deepresearch": "https://openai.com/zh-Hans-CN/index/introducing-deep-research/",
|
|
|
|
| 85 |
# 模型许可证类型映射
|
| 86 |
MODEL_LICENSE_TYPE = {
|
| 87 |
# Deep Research Agent
|
| 88 |
+
"thinkdepthai-deepresearch": "Proprietary",
|
| 89 |
"gemini-2.5-pro-deepresearch": "Proprietary",
|
| 90 |
"grok-deeper-search": "Proprietary",
|
| 91 |
"openai-deepresearch": "Proprietary",
|