richardblythman commited on
Commit
7d57619
1 Parent(s): 6bec1f5

add open source lms

Browse files
Files changed (2) hide show
  1. app.py +16 -13
  2. tabs/run_benchmark.py +4 -2
app.py CHANGED
@@ -17,14 +17,14 @@ from tabs.run_benchmark import run_benchmark_main
17
  demo = gr.Blocks()
18
 
19
 
20
- def run_benchmark_gradio(tool_name, model_name, num_questions, openai_api_key, anthropic_api_key):
21
  """Run the benchmark using inputs."""
22
  if tool_name is None:
23
  return "Please enter the name of your tool."
24
- if openai_api_key is None and anthropic_api_key is None:
25
- return "Please enter either OpenAI or Anthropic API key."
26
 
27
- result = run_benchmark_main(tool_name, model_name, num_questions, openai_api_key, anthropic_api_key)
28
  if result == 'completed':
29
  # get the results file in the results directory
30
  fns = glob('results/*.csv')
@@ -101,16 +101,15 @@ with demo:
101
  [
102
  "prediction-offline",
103
  "prediction-online",
104
- "prediction-offline-sme",
105
- "prediction-online-sme",
106
- "claude-prediction-offline",
107
- "claude-prediction-online",
108
  'prediction-request-rag',
109
- "prediction-with-research-conservative",
110
- "prediction-with-research-bold",
111
- "prediction-request-reasoning-claude",
112
- "prediction-request-rag-claude",
113
  "prediction-url-cot-claude",
 
 
 
114
  ], label="Tool Name", info="Choose the tool to run")
115
  model_name = gr.Dropdown([
116
  "gpt-3.5-turbo-0125",
@@ -118,10 +117,14 @@ with demo:
118
  "claude-3-haiku-20240307",
119
  "claude-3-sonnet-20240229",
120
  "claude-3-opus-20240229",
 
 
 
121
  ], label="Model Name", info="Choose the model to use")
122
  with gr.Row():
123
  openai_api_key = gr.Textbox(label="OpenAI API Key", placeholder="Enter your OpenAI API key here", type="password")
124
  anthropic_api_key = gr.Textbox(label="Anthropic API Key", placeholder="Enter your Anthropic API key here", type="password")
 
125
  with gr.Row():
126
  num_questions = gr.Slider(
127
  minimum=1,
@@ -139,7 +142,7 @@ with demo:
139
  summary = gr.Dataframe()
140
 
141
  run_button.click(run_benchmark_gradio,
142
- inputs=[tool_name, model_name, num_questions, openai_api_key, anthropic_api_key],
143
  outputs=[result, summary])
144
 
145
  demo.queue(default_concurrency_limit=40).launch()
 
17
  demo = gr.Blocks()
18
 
19
 
20
+ def run_benchmark_gradio(tool_name, model_name, num_questions, openai_api_key, anthropic_api_key, openrouter_api_key):
21
  """Run the benchmark using inputs."""
22
  if tool_name is None:
23
  return "Please enter the name of your tool."
24
+ if openai_api_key is None and anthropic_api_key is None and openrouter_api_key is None:
25
+ return "Please enter either OpenAI or Anthropic or OpenRouter API key."
26
 
27
+ result = run_benchmark_main(tool_name, model_name, num_questions, openai_api_key, anthropic_api_key, openrouter_api_key)
28
  if result == 'completed':
29
  # get the results file in the results directory
30
  fns = glob('results/*.csv')
 
101
  [
102
  "prediction-offline",
103
  "prediction-online",
104
+ # "prediction-online-summarized-info",
105
+ # "prediction-offline-sme",
106
+ # "prediction-online-sme",
 
107
  'prediction-request-rag',
108
+ 'prediction-request-reasoning',
 
 
 
109
  "prediction-url-cot-claude",
110
+ # "prediction-request-rag-cohere",
111
+ # "prediction-with-research-conservative",
112
+ # "prediction-with-research-bold",
113
  ], label="Tool Name", info="Choose the tool to run")
114
  model_name = gr.Dropdown([
115
  "gpt-3.5-turbo-0125",
 
117
  "claude-3-haiku-20240307",
118
  "claude-3-sonnet-20240229",
119
  "claude-3-opus-20240229",
120
+ "databricks/dbrx-instruct:nitro",
121
+ "nousresearch/nous-hermes-2-mixtral-8x7b-sft",
122
+ # "cohere/command-r-plus",
123
  ], label="Model Name", info="Choose the model to use")
124
  with gr.Row():
125
  openai_api_key = gr.Textbox(label="OpenAI API Key", placeholder="Enter your OpenAI API key here", type="password")
126
  anthropic_api_key = gr.Textbox(label="Anthropic API Key", placeholder="Enter your Anthropic API key here", type="password")
127
+ openrouter_api_key = gr.Textbox(label="OpenRouter API Key", placeholder="Enter your OpenRouter API key here", type="password")
128
  with gr.Row():
129
  num_questions = gr.Slider(
130
  minimum=1,
 
142
  summary = gr.Dataframe()
143
 
144
  run_button.click(run_benchmark_gradio,
145
+ inputs=[tool_name, model_name, num_questions, openai_api_key, anthropic_api_key, openrouter_api_key],
146
  outputs=[result, summary])
147
 
148
  demo.queue(default_concurrency_limit=40).launch()
tabs/run_benchmark.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  from benchmark.run_benchmark import run_benchmark
3
 
4
 
5
- def run_benchmark_main(tool_name, model_name, num_questions, openai_api_key, anthropic_api_key):
6
  """Run the benchmark using the provided function and API key."""
7
  # Empyt the results directory
8
  os.system("rm -rf results/*")
@@ -20,7 +20,9 @@ def run_benchmark_main(tool_name, model_name, num_questions, openai_api_key, ant
20
  kwargs["api_keys"]["openai"] = openai_api_key
21
  if anthropic_api_key:
22
  kwargs["api_keys"]["anthropic"] = anthropic_api_key
23
-
 
 
24
  kwargs["num_urls"] = 3
25
  kwargs["num_words"] = 300
26
  kwargs["provide_source_links"] = True
 
2
  from benchmark.run_benchmark import run_benchmark
3
 
4
 
5
+ def run_benchmark_main(tool_name, model_name, num_questions, openai_api_key, anthropic_api_key, openrouter_api_key):
6
  """Run the benchmark using the provided function and API key."""
7
  # Empyt the results directory
8
  os.system("rm -rf results/*")
 
20
  kwargs["api_keys"]["openai"] = openai_api_key
21
  if anthropic_api_key:
22
  kwargs["api_keys"]["anthropic"] = anthropic_api_key
23
+ if openrouter_api_key:
24
+ kwargs["api_keys"]["openrouter"] = openrouter_api_key
25
+
26
  kwargs["num_urls"] = 3
27
  kwargs["num_words"] = 300
28
  kwargs["provide_source_links"] = True