richardblythman commited on
Commit
b871994
β€’
1 Parent(s): c4e55cd
Files changed (2) hide show
  1. app.py +2 -9
  2. tabs/run_benchmark.py +4 -0
app.py CHANGED
@@ -87,13 +87,6 @@ with demo:
87
  with gr.TabItem("πŸš€ Contribute"):
88
  gr.Markdown(how_to_run)
89
 
90
- def update_dropdown(tool):
91
- if "claude" in tool:
92
- return ["claude-3-haiku-20240307", "claude-3-sonnet-20240229", "claude-3-opus-20240229"]
93
- else:
94
- return ["gpt-3.5-turbo-0125", "gpt-4-0125-preview"]
95
-
96
-
97
  # fourth tab - run the benchmark
98
  with gr.TabItem("πŸ”₯ Run the Benchmark"):
99
  with gr.Row():
@@ -106,14 +99,14 @@ with demo:
106
  # "prediction-online-sme",
107
  'prediction-request-rag',
108
  'prediction-request-reasoning',
109
- "prediction-url-cot-claude",
110
  # "prediction-request-rag-cohere",
111
  # "prediction-with-research-conservative",
112
  # "prediction-with-research-bold",
113
  ], label="Tool Name", info="Choose the tool to run")
114
  model_name = gr.Dropdown([
115
  "gpt-3.5-turbo-0125",
116
- "gpt-4-0125-preview"
117
  "claude-3-haiku-20240307",
118
  "claude-3-sonnet-20240229",
119
  "claude-3-opus-20240229",
 
87
  with gr.TabItem("πŸš€ Contribute"):
88
  gr.Markdown(how_to_run)
89
 
 
 
 
 
 
 
 
90
  # fourth tab - run the benchmark
91
  with gr.TabItem("πŸ”₯ Run the Benchmark"):
92
  with gr.Row():
 
99
  # "prediction-online-sme",
100
  'prediction-request-rag',
101
  'prediction-request-reasoning',
102
+ # "prediction-url-cot-claude",
103
  # "prediction-request-rag-cohere",
104
  # "prediction-with-research-conservative",
105
  # "prediction-with-research-bold",
106
  ], label="Tool Name", info="Choose the tool to run")
107
  model_name = gr.Dropdown([
108
  "gpt-3.5-turbo-0125",
109
+ "gpt-4-0125-preview",
110
  "claude-3-haiku-20240307",
111
  "claude-3-sonnet-20240229",
112
  "claude-3-opus-20240229",
tabs/run_benchmark.py CHANGED
@@ -30,6 +30,10 @@ def run_benchmark_main(tool_name, model_name, num_questions, openai_api_key, ant
30
  else:
31
  kwargs["llm_provider"] = "openrouter"
32
 
 
 
 
 
33
  kwargs["num_urls"] = 3
34
  kwargs["num_words"] = 300
35
  kwargs["provide_source_links"] = True
 
30
  else:
31
  kwargs["llm_provider"] = "openrouter"
32
 
33
+ if tool_name == "prediction-request-reasoning" or tool_name == "prediction-request-rag":
34
+ if not openai_api_key:
35
+ return f"Error: Tools that use RAG also require an OpenAI API Key"
36
+
37
  kwargs["num_urls"] = 3
38
  kwargs["num_words"] = 300
39
  kwargs["provide_source_links"] = True