cyberosa commited on
Commit
c6ae410
·
1 Parent(s): 634798b

activating run benchmark again. Using poetry instead of pip

Browse files
Files changed (3) hide show
  1. app.py +78 -78
  2. start.py +16 -15
  3. tabs/run_benchmark.py +2 -2
app.py CHANGED
@@ -13,7 +13,7 @@ from tabs.faq import (
13
  from tabs.howto_benchmark import how_to_run
14
 
15
  # disabling temporarily
16
- # from tabs.run_benchmark import run_benchmark_main
17
 
18
  demo = gr.Blocks()
19
 
@@ -111,83 +111,83 @@ with demo:
111
  gr.Markdown(how_to_run)
112
 
113
  # fourth tab - run the benchmark
114
- # with gr.TabItem("🔥 Run the Benchmark"):
115
- # with gr.Row():
116
- # tool_name = gr.Dropdown(
117
- # [
118
- # "prediction-offline",
119
- # "prediction-online",
120
- # # "prediction-online-summarized-info",
121
- # # "prediction-offline-sme",
122
- # # "prediction-online-sme",
123
- # "prediction-request-rag",
124
- # "prediction-request-reasoning",
125
- # # "prediction-url-cot-claude",
126
- # # "prediction-request-rag-cohere",
127
- # # "prediction-with-research-conservative",
128
- # # "prediction-with-research-bold",
129
- # ],
130
- # label="Tool Name",
131
- # info="Choose the tool to run",
132
- # )
133
- # model_name = gr.Dropdown(
134
- # [
135
- # "gpt-3.5-turbo-0125",
136
- # "gpt-4-0125-preview",
137
- # "claude-3-haiku-20240307",
138
- # "claude-3-sonnet-20240229",
139
- # "claude-3-opus-20240229",
140
- # "databricks/dbrx-instruct:nitro",
141
- # "nousresearch/nous-hermes-2-mixtral-8x7b-sft",
142
- # # "cohere/command-r-plus",
143
- # ],
144
- # label="Model Name",
145
- # info="Choose the model to use",
146
- # )
147
- # with gr.Row():
148
- # openai_api_key = gr.Textbox(
149
- # label="OpenAI API Key",
150
- # placeholder="Enter your OpenAI API key here",
151
- # type="password",
152
- # )
153
- # anthropic_api_key = gr.Textbox(
154
- # label="Anthropic API Key",
155
- # placeholder="Enter your Anthropic API key here",
156
- # type="password",
157
- # )
158
- # openrouter_api_key = gr.Textbox(
159
- # label="OpenRouter API Key",
160
- # placeholder="Enter your OpenRouter API key here",
161
- # type="password",
162
- # )
163
- # with gr.Row():
164
- # num_questions = gr.Slider(
165
- # minimum=1,
166
- # maximum=340,
167
- # value=10,
168
- # label="Number of questions to run the benchmark on",
169
- # )
170
- # with gr.Row():
171
- # run_button = gr.Button("Run Benchmark")
172
- # with gr.Row():
173
- # with gr.Accordion("Results", open=True):
174
- # result = gr.Dataframe()
175
- # with gr.Row():
176
- # with gr.Accordion("Summary", open=False):
177
- # summary = gr.Dataframe()
178
-
179
- # run_button.click(
180
- # run_benchmark_gradio,
181
- # inputs=[
182
- # tool_name,
183
- # model_name,
184
- # num_questions,
185
- # openai_api_key,
186
- # anthropic_api_key,
187
- # openrouter_api_key,
188
- # ],
189
- # outputs=[result, summary],
190
- # )
191
 
192
 
193
  demo.queue(default_concurrency_limit=40).launch()
 
13
  from tabs.howto_benchmark import how_to_run
14
 
15
  # disabling temporarily
16
+ from tabs.run_benchmark import run_benchmark_main
17
 
18
  demo = gr.Blocks()
19
 
 
111
  gr.Markdown(how_to_run)
112
 
113
  # fourth tab - run the benchmark
114
+ with gr.TabItem("🔥 Run the Benchmark"):
115
+ with gr.Row():
116
+ tool_name = gr.Dropdown(
117
+ [
118
+ "prediction-offline",
119
+ "prediction-online",
120
+ # "prediction-online-summarized-info",
121
+ # "prediction-offline-sme",
122
+ # "prediction-online-sme",
123
+ "prediction-request-rag",
124
+ "prediction-request-reasoning",
125
+ # "prediction-url-cot-claude",
126
+ # "prediction-request-rag-cohere",
127
+ # "prediction-with-research-conservative",
128
+ # "prediction-with-research-bold",
129
+ ],
130
+ label="Tool Name",
131
+ info="Choose the tool to run",
132
+ )
133
+ model_name = gr.Dropdown(
134
+ [
135
+ "gpt-3.5-turbo-0125",
136
+ "gpt-4-0125-preview",
137
+ "claude-3-haiku-20240307",
138
+ "claude-3-sonnet-20240229",
139
+ "claude-3-opus-20240229",
140
+ "databricks/dbrx-instruct:nitro",
141
+ "nousresearch/nous-hermes-2-mixtral-8x7b-sft",
142
+ # "cohere/command-r-plus",
143
+ ],
144
+ label="Model Name",
145
+ info="Choose the model to use",
146
+ )
147
+ with gr.Row():
148
+ openai_api_key = gr.Textbox(
149
+ label="OpenAI API Key",
150
+ placeholder="Enter your OpenAI API key here",
151
+ type="password",
152
+ )
153
+ anthropic_api_key = gr.Textbox(
154
+ label="Anthropic API Key",
155
+ placeholder="Enter your Anthropic API key here",
156
+ type="password",
157
+ )
158
+ openrouter_api_key = gr.Textbox(
159
+ label="OpenRouter API Key",
160
+ placeholder="Enter your OpenRouter API key here",
161
+ type="password",
162
+ )
163
+ with gr.Row():
164
+ num_questions = gr.Slider(
165
+ minimum=1,
166
+ maximum=340,
167
+ value=10,
168
+ label="Number of questions to run the benchmark on",
169
+ )
170
+ with gr.Row():
171
+ run_button = gr.Button("Run Benchmark")
172
+ with gr.Row():
173
+ with gr.Accordion("Results", open=True):
174
+ result = gr.Dataframe()
175
+ with gr.Row():
176
+ with gr.Accordion("Summary", open=False):
177
+ summary = gr.Dataframe()
178
+
179
+ run_button.click(
180
+ run_benchmark_gradio,
181
+ inputs=[
182
+ tool_name,
183
+ model_name,
184
+ num_questions,
185
+ openai_api_key,
186
+ anthropic_api_key,
187
+ openrouter_api_key,
188
+ ],
189
+ outputs=[result, summary],
190
+ )
191
 
192
 
193
  demo.queue(default_concurrency_limit=40).launch()
start.py CHANGED
@@ -45,25 +45,26 @@ def start():
45
  """Start commands."""
46
  print("Starting commands...")
47
  base_dir = os.getcwd()
48
- # olas_dir = os.path.join(base_dir, "olas-predict-benchmark")
49
- # mech_dir = os.path.join(olas_dir, "benchmark", "mech")
 
50
 
51
  commands = [
52
  ("git submodule init", base_dir),
53
  ("git submodule update --init --recursive", base_dir),
54
  ("git submodule update --remote --recursive", base_dir),
55
- # (
56
- # 'git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*"',
57
- # olas_dir,
58
- # ),
59
- # ("git remote update", olas_dir),
60
- # ("git fetch --all", olas_dir),
61
- # ("git checkout main", olas_dir),
62
- # ("git pull origin main", olas_dir),
63
- # ("git checkout main", mech_dir),
64
- # ("git pull origin main", mech_dir),
65
- # ("pip install -e .", os.path.join(olas_dir, "benchmark")),
66
- # ("pip install -e .", mech_dir),
67
  ("pip install lxml[html_clean]", base_dir),
68
  ("pip install --upgrade huggingface_hub", base_dir),
69
  ]
@@ -72,7 +73,7 @@ def start():
72
  run_command(command, cwd=cwd)
73
 
74
  # add benchmark to the path
75
- # sys.path.append(os.path.join(olas_dir, "benchmark"))
76
 
77
  # Download the dataset
78
  download_dataset()
 
45
  """Start commands."""
46
  print("Starting commands...")
47
  base_dir = os.getcwd()
48
+ olas_dir = os.path.join(base_dir, "olas-predict-benchmark")
49
+ benchmark_dir = os.path.join(olas_dir, "benchmark")
50
+ mech_dir = os.path.join(olas_dir, "benchmark", "mech")
51
 
52
  commands = [
53
  ("git submodule init", base_dir),
54
  ("git submodule update --init --recursive", base_dir),
55
  ("git submodule update --remote --recursive", base_dir),
56
+ (
57
+ 'git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*"',
58
+ olas_dir,
59
+ ),
60
+ ("git remote update", olas_dir),
61
+ ("git fetch --all", olas_dir),
62
+ ("git checkout main", olas_dir),
63
+ ("git pull origin main", olas_dir),
64
+ ("git checkout main", mech_dir),
65
+ ("git pull origin main", mech_dir),
66
+ ("poetry install", benchmark_dir),
67
+ ("pip install -e .", mech_dir),
68
  ("pip install lxml[html_clean]", base_dir),
69
  ("pip install --upgrade huggingface_hub", base_dir),
70
  ]
 
73
  run_command(command, cwd=cwd)
74
 
75
  # add benchmark to the path
76
+ sys.path.append(os.path.join(olas_dir, "benchmark"))
77
 
78
  # Download the dataset
79
  download_dataset()
tabs/run_benchmark.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
 
3
- # from benchmark.run_benchmark import run_benchmark
4
 
5
 
6
  def run_benchmark_main(
@@ -55,7 +55,7 @@ def run_benchmark_main(
55
 
56
  # Run the benchmark
57
  try:
58
- # run_benchmark(kwargs=kwargs)
59
  return "completed"
60
  except Exception as e:
61
  return f"Error running benchmark: {e}"
 
1
  import os
2
 
3
+ from benchmark.run_benchmark import run_benchmark
4
 
5
 
6
  def run_benchmark_main(
 
55
 
56
  # Run the benchmark
57
  try:
58
+ run_benchmark(kwargs=kwargs)
59
  return "completed"
60
  except Exception as e:
61
  return f"Error running benchmark: {e}"