arshy commited on
Commit
5348813
1 Parent(s): 1bea577
Files changed (2) hide show
  1. app.py +12 -5
  2. tabs/run_benchmark.py +4 -4
app.py CHANGED
@@ -17,14 +17,14 @@ from tabs.run_benchmark import run_benchmark_main
17
  demo = gr.Blocks()
18
 
19
 
20
- def run_benchmark_gradio(tool_name, model_name, openai_api_key, anthropic_api_key):
21
  """Run the benchmark using inputs."""
22
  if tool_name is None:
23
  return "Please enter the name of your tool."
24
  if openai_api_key is None and anthropic_api_key is None:
25
  return "Please enter either OpenAI or Anthropic API key."
26
 
27
- result = run_benchmark_main(tool_name, model_name, openai_api_key, anthropic_api_key)
28
  if result == 'completed':
29
  # get the results file in the results directory
30
  fns = glob('results/*.csv')
@@ -106,8 +106,8 @@ with demo:
106
  "claude-prediction-offline",
107
  "claude-prediction-online",
108
  'prediction-request-rag',
109
- # "prediction-with-research-conservative",
110
- # "prediction-with-research-bold",
111
  "prediction-request-reasoning-claude",
112
  "prediction-request-rag-claude",
113
  "prediction-url-cot-claude",
@@ -122,6 +122,13 @@ with demo:
122
  with gr.Row():
123
  openai_api_key = gr.Textbox(label="OpenAI API Key", placeholder="Enter your OpenAI API key here", type="password")
124
  anthropic_api_key = gr.Textbox(label="Anthropic API Key", placeholder="Enter your Anthropic API key here", type="password")
 
 
 
 
 
 
 
125
  with gr.Row():
126
  run_button = gr.Button("Run Benchmark")
127
  with gr.Row():
@@ -132,7 +139,7 @@ with demo:
132
  summary = gr.Dataframe()
133
 
134
  run_button.click(run_benchmark_gradio,
135
- inputs=[tool_name, model_name, openai_api_key, anthropic_api_key],
136
  outputs=[result, summary])
137
 
138
  demo.queue(default_concurrency_limit=40).launch()
 
17
  demo = gr.Blocks()
18
 
19
 
20
+ def run_benchmark_gradio(tool_name, model_name, num_questions, openai_api_key, anthropic_api_key):
21
  """Run the benchmark using inputs."""
22
  if tool_name is None:
23
  return "Please enter the name of your tool."
24
  if openai_api_key is None and anthropic_api_key is None:
25
  return "Please enter either OpenAI or Anthropic API key."
26
 
27
+ result = run_benchmark_main(tool_name, model_name, num_questions, openai_api_key, anthropic_api_key)
28
  if result == 'completed':
29
  # get the results file in the results directory
30
  fns = glob('results/*.csv')
 
106
  "claude-prediction-offline",
107
  "claude-prediction-online",
108
  'prediction-request-rag',
109
+ "prediction-with-research-conservative",
110
+ "prediction-with-research-bold",
111
  "prediction-request-reasoning-claude",
112
  "prediction-request-rag-claude",
113
  "prediction-url-cot-claude",
 
122
  with gr.Row():
123
  openai_api_key = gr.Textbox(label="OpenAI API Key", placeholder="Enter your OpenAI API key here", type="password")
124
  anthropic_api_key = gr.Textbox(label="Anthropic API Key", placeholder="Enter your Anthropic API key here", type="password")
125
+ with gr.Row():
126
+ num_questions = gr.Slider(
127
+ minimum=1,
128
+ maximum=340,
129
+ value=10,
130
+ label="Number of questions to run the benchmark on",
131
+ )
132
  with gr.Row():
133
  run_button = gr.Button("Run Benchmark")
134
  with gr.Row():
 
139
  summary = gr.Dataframe()
140
 
141
  run_button.click(run_benchmark_gradio,
142
+ inputs=[tool_name, model_name, num_questions, openai_api_key, anthropic_api_key],
143
  outputs=[result, summary])
144
 
145
  demo.queue(default_concurrency_limit=40).launch()
tabs/run_benchmark.py CHANGED
@@ -2,16 +2,16 @@ import os
2
  from benchmark.run_benchmark import run_benchmark
3
 
4
 
5
- def run_benchmark_main(tool_name, model_name, openai_api_key, anthropic_api_key):
6
  """Run the benchmark using the provided function and API key."""
7
  # Empyt the results directory
8
  os.system("rm -rf results/*")
9
 
10
- print(f"Running benchmark with the following parameters: {tool_name}, {model_name}, {openai_api_key}, {anthropic_api_key}")
11
-
12
  # Set the benchmark parameters
13
  kwargs = {}
14
- kwargs["num_questions"] = 2
 
 
15
  kwargs["tools"] = [tool_name]
16
  if model_name:
17
  kwargs["model"] = model_name
 
2
  from benchmark.run_benchmark import run_benchmark
3
 
4
 
5
+ def run_benchmark_main(tool_name, model_name, num_questions, openai_api_key, anthropic_api_key):
6
  """Run the benchmark using the provided function and API key."""
7
  # Empyt the results directory
8
  os.system("rm -rf results/*")
9
 
 
 
10
  # Set the benchmark parameters
11
  kwargs = {}
12
+ if not num_questions:
13
+ num_questions = 10
14
+ kwargs["num_questions"] = num_questions
15
  kwargs["tools"] = [tool_name]
16
  if model_name:
17
  kwargs["model"] = model_name