Spaces:
Runtime error
Runtime error
richardblythman
commited on
Commit
β’
b871994
1
Parent(s):
c4e55cd
fix bug
Browse files- app.py +2 -9
- tabs/run_benchmark.py +4 -0
app.py
CHANGED
@@ -87,13 +87,6 @@ with demo:
|
|
87 |
with gr.TabItem("π Contribute"):
|
88 |
gr.Markdown(how_to_run)
|
89 |
|
90 |
-
def update_dropdown(tool):
|
91 |
-
if "claude" in tool:
|
92 |
-
return ["claude-3-haiku-20240307", "claude-3-sonnet-20240229", "claude-3-opus-20240229"]
|
93 |
-
else:
|
94 |
-
return ["gpt-3.5-turbo-0125", "gpt-4-0125-preview"]
|
95 |
-
|
96 |
-
|
97 |
# fourth tab - run the benchmark
|
98 |
with gr.TabItem("π₯ Run the Benchmark"):
|
99 |
with gr.Row():
|
@@ -106,14 +99,14 @@ with demo:
|
|
106 |
# "prediction-online-sme",
|
107 |
'prediction-request-rag',
|
108 |
'prediction-request-reasoning',
|
109 |
-
"prediction-url-cot-claude",
|
110 |
# "prediction-request-rag-cohere",
|
111 |
# "prediction-with-research-conservative",
|
112 |
# "prediction-with-research-bold",
|
113 |
], label="Tool Name", info="Choose the tool to run")
|
114 |
model_name = gr.Dropdown([
|
115 |
"gpt-3.5-turbo-0125",
|
116 |
-
"gpt-4-0125-preview"
|
117 |
"claude-3-haiku-20240307",
|
118 |
"claude-3-sonnet-20240229",
|
119 |
"claude-3-opus-20240229",
|
|
|
87 |
with gr.TabItem("π Contribute"):
|
88 |
gr.Markdown(how_to_run)
|
89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
# fourth tab - run the benchmark
|
91 |
with gr.TabItem("π₯ Run the Benchmark"):
|
92 |
with gr.Row():
|
|
|
99 |
# "prediction-online-sme",
|
100 |
'prediction-request-rag',
|
101 |
'prediction-request-reasoning',
|
102 |
+
# "prediction-url-cot-claude",
|
103 |
# "prediction-request-rag-cohere",
|
104 |
# "prediction-with-research-conservative",
|
105 |
# "prediction-with-research-bold",
|
106 |
], label="Tool Name", info="Choose the tool to run")
|
107 |
model_name = gr.Dropdown([
|
108 |
"gpt-3.5-turbo-0125",
|
109 |
+
"gpt-4-0125-preview",
|
110 |
"claude-3-haiku-20240307",
|
111 |
"claude-3-sonnet-20240229",
|
112 |
"claude-3-opus-20240229",
|
tabs/run_benchmark.py
CHANGED
@@ -30,6 +30,10 @@ def run_benchmark_main(tool_name, model_name, num_questions, openai_api_key, ant
|
|
30 |
else:
|
31 |
kwargs["llm_provider"] = "openrouter"
|
32 |
|
|
|
|
|
|
|
|
|
33 |
kwargs["num_urls"] = 3
|
34 |
kwargs["num_words"] = 300
|
35 |
kwargs["provide_source_links"] = True
|
|
|
30 |
else:
|
31 |
kwargs["llm_provider"] = "openrouter"
|
32 |
|
33 |
+
if tool_name == "prediction-request-reasoning" or tool_name == "prediction-request-rag":
|
34 |
+
if not openai_api_key:
|
35 |
+
return f"Error: Tools that use RAG also require an OpenAI API Key"
|
36 |
+
|
37 |
kwargs["num_urls"] = 3
|
38 |
kwargs["num_words"] = 300
|
39 |
kwargs["provide_source_links"] = True
|