cyberosa commited on
Commit
dbaa2bd
·
1 Parent(s): 9b36cb7

trying to activate run_benchmark tab again

Browse files
Files changed (4) hide show
  1. .gitmodules +3 -0
  2. app.py +125 -125
  3. olas-predict-benchmark +1 -0
  4. start.py +18 -19
.gitmodules ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [submodule "olas-predict-benchmark"]
2
+ path = olas-predict-benchmark
3
+ url = https://github.com/valory-xyz/olas-predict-benchmark.git
app.py CHANGED
@@ -13,69 +13,69 @@ from tabs.faq import (
13
  from tabs.howto_benchmark import how_to_run
14
 
15
  # Feature temporarily disabled til HF support helps us with the Space Error
16
- # from tabs.run_benchmark import run_benchmark_main
17
 
18
 
19
  demo = gr.Blocks()
20
 
21
 
22
- # def run_benchmark_gradio(
23
- # tool_name,
24
- # model_name,
25
- # num_questions,
26
- # openai_api_key,
27
- # anthropic_api_key,
28
- # openrouter_api_key,
29
- # ):
30
- # """Run the benchmark using inputs."""
31
- # if tool_name is None:
32
- # return "Please enter the name of your tool."
33
- # if (
34
- # openai_api_key is None
35
- # and anthropic_api_key is None
36
- # and openrouter_api_key is None
37
- # ):
38
- # return "Please enter either OpenAI or Anthropic or OpenRouter API key."
39
-
40
- # result = run_benchmark_main(
41
- # tool_name,
42
- # model_name,
43
- # num_questions,
44
- # openai_api_key,
45
- # anthropic_api_key,
46
- # openrouter_api_key,
47
- # )
48
 
49
- # if result == "completed":
50
- # # get the results file in the results directory
51
- # fns = glob("results/*.csv")
52
 
53
- # print(f"Number of files in results directory: {len(fns)}")
54
 
55
- # # convert to Path
56
- # files = [Path(file) for file in fns]
57
 
58
- # # get results and summary files
59
- # results_files = [file for file in files if "results" in file.name]
60
 
61
- # # the other file is the summary file
62
- # summary_files = [file for file in files if "summary" in file.name]
63
 
64
- # print(results_files, summary_files)
65
 
66
- # # get the path with results
67
- # results_df = pd.read_csv(results_files[0])
68
- # summary_df = pd.read_csv(summary_files[0])
69
 
70
- # # make sure all df float values are rounded to 4 decimal places
71
- # results_df = results_df.round(4)
72
- # summary_df = summary_df.round(4)
73
 
74
- # return gr.Dataframe(value=results_df), gr.Dataframe(value=summary_df)
75
 
76
- # return gr.Textbox(
77
- # label="Benchmark Result", value=result, interactive=False
78
- # ), gr.Textbox(label="Summary", value="")
79
 
80
 
81
  with demo:
@@ -112,83 +112,83 @@ with demo:
112
  gr.Markdown(how_to_run)
113
 
114
  # fourth tab - run the benchmark
115
- # with gr.TabItem("🔥 Run the Benchmark"):
116
- # with gr.Row():
117
- # tool_name = gr.Dropdown(
118
- # [
119
- # "prediction-offline",
120
- # "prediction-online",
121
- # # "prediction-online-summarized-info",
122
- # # "prediction-offline-sme",
123
- # # "prediction-online-sme",
124
- # "prediction-request-rag",
125
- # "prediction-request-reasoning",
126
- # # "prediction-url-cot-claude",
127
- # # "prediction-request-rag-cohere",
128
- # # "prediction-with-research-conservative",
129
- # # "prediction-with-research-bold",
130
- # ],
131
- # label="Tool Name",
132
- # info="Choose the tool to run",
133
- # )
134
- # model_name = gr.Dropdown(
135
- # [
136
- # "gpt-3.5-turbo-0125",
137
- # "gpt-4-0125-preview",
138
- # "claude-3-haiku-20240307",
139
- # "claude-3-sonnet-20240229",
140
- # "claude-3-opus-20240229",
141
- # "databricks/dbrx-instruct:nitro",
142
- # "nousresearch/nous-hermes-2-mixtral-8x7b-sft",
143
- # # "cohere/command-r-plus",
144
- # ],
145
- # label="Model Name",
146
- # info="Choose the model to use",
147
- # )
148
- # with gr.Row():
149
- # openai_api_key = gr.Textbox(
150
- # label="OpenAI API Key",
151
- # placeholder="Enter your OpenAI API key here",
152
- # type="password",
153
- # )
154
- # anthropic_api_key = gr.Textbox(
155
- # label="Anthropic API Key",
156
- # placeholder="Enter your Anthropic API key here",
157
- # type="password",
158
- # )
159
- # openrouter_api_key = gr.Textbox(
160
- # label="OpenRouter API Key",
161
- # placeholder="Enter your OpenRouter API key here",
162
- # type="password",
163
- # )
164
- # with gr.Row():
165
- # num_questions = gr.Slider(
166
- # minimum=1,
167
- # maximum=340,
168
- # value=10,
169
- # label="Number of questions to run the benchmark on",
170
- # )
171
- # with gr.Row():
172
- # run_button = gr.Button("Run Benchmark")
173
- # with gr.Row():
174
- # with gr.Accordion("Results", open=True):
175
- # result = gr.Dataframe()
176
- # with gr.Row():
177
- # with gr.Accordion("Summary", open=False):
178
- # summary = gr.Dataframe()
179
-
180
- # run_button.click(
181
- # run_benchmark_gradio,
182
- # inputs=[
183
- # tool_name,
184
- # model_name,
185
- # num_questions,
186
- # openai_api_key,
187
- # anthropic_api_key,
188
- # openrouter_api_key,
189
- # ],
190
- # outputs=[result, summary],
191
- # )
192
 
193
 
194
  demo.queue(default_concurrency_limit=40).launch()
 
13
  from tabs.howto_benchmark import how_to_run
14
 
15
  # Feature temporarily disabled til HF support helps us with the Space Error
16
+ from tabs.run_benchmark import run_benchmark_main
17
 
18
 
19
  demo = gr.Blocks()
20
 
21
 
22
+ def run_benchmark_gradio(
23
+ tool_name,
24
+ model_name,
25
+ num_questions,
26
+ openai_api_key,
27
+ anthropic_api_key,
28
+ openrouter_api_key,
29
+ ):
30
+ """Run the benchmark using inputs."""
31
+ if tool_name is None:
32
+ return "Please enter the name of your tool."
33
+ if (
34
+ openai_api_key is None
35
+ and anthropic_api_key is None
36
+ and openrouter_api_key is None
37
+ ):
38
+ return "Please enter either OpenAI or Anthropic or OpenRouter API key."
39
+
40
+ result = run_benchmark_main(
41
+ tool_name,
42
+ model_name,
43
+ num_questions,
44
+ openai_api_key,
45
+ anthropic_api_key,
46
+ openrouter_api_key,
47
+ )
48
 
49
+ if result == "completed":
50
+ # get the results file in the results directory
51
+ fns = glob("results/*.csv")
52
 
53
+ print(f"Number of files in results directory: {len(fns)}")
54
 
55
+ # convert to Path
56
+ files = [Path(file) for file in fns]
57
 
58
+ # get results and summary files
59
+ results_files = [file for file in files if "results" in file.name]
60
 
61
+ # the other file is the summary file
62
+ summary_files = [file for file in files if "summary" in file.name]
63
 
64
+ print(results_files, summary_files)
65
 
66
+ # get the path with results
67
+ results_df = pd.read_csv(results_files[0])
68
+ summary_df = pd.read_csv(summary_files[0])
69
 
70
+ # make sure all df float values are rounded to 4 decimal places
71
+ results_df = results_df.round(4)
72
+ summary_df = summary_df.round(4)
73
 
74
+ return gr.Dataframe(value=results_df), gr.Dataframe(value=summary_df)
75
 
76
+ return gr.Textbox(
77
+ label="Benchmark Result", value=result, interactive=False
78
+ ), gr.Textbox(label="Summary", value="")
79
 
80
 
81
  with demo:
 
112
  gr.Markdown(how_to_run)
113
 
114
  # fourth tab - run the benchmark
115
+ with gr.TabItem("🔥 Run the Benchmark"):
116
+ with gr.Row():
117
+ tool_name = gr.Dropdown(
118
+ [
119
+ "prediction-offline",
120
+ "prediction-online",
121
+ # "prediction-online-summarized-info",
122
+ # "prediction-offline-sme",
123
+ # "prediction-online-sme",
124
+ "prediction-request-rag",
125
+ "prediction-request-reasoning",
126
+ # "prediction-url-cot-claude",
127
+ # "prediction-request-rag-cohere",
128
+ # "prediction-with-research-conservative",
129
+ # "prediction-with-research-bold",
130
+ ],
131
+ label="Tool Name",
132
+ info="Choose the tool to run",
133
+ )
134
+ model_name = gr.Dropdown(
135
+ [
136
+ "gpt-3.5-turbo-0125",
137
+ "gpt-4-0125-preview",
138
+ "claude-3-haiku-20240307",
139
+ "claude-3-sonnet-20240229",
140
+ "claude-3-opus-20240229",
141
+ "databricks/dbrx-instruct:nitro",
142
+ "nousresearch/nous-hermes-2-mixtral-8x7b-sft",
143
+ # "cohere/command-r-plus",
144
+ ],
145
+ label="Model Name",
146
+ info="Choose the model to use",
147
+ )
148
+ with gr.Row():
149
+ openai_api_key = gr.Textbox(
150
+ label="OpenAI API Key",
151
+ placeholder="Enter your OpenAI API key here",
152
+ type="password",
153
+ )
154
+ anthropic_api_key = gr.Textbox(
155
+ label="Anthropic API Key",
156
+ placeholder="Enter your Anthropic API key here",
157
+ type="password",
158
+ )
159
+ openrouter_api_key = gr.Textbox(
160
+ label="OpenRouter API Key",
161
+ placeholder="Enter your OpenRouter API key here",
162
+ type="password",
163
+ )
164
+ with gr.Row():
165
+ num_questions = gr.Slider(
166
+ minimum=1,
167
+ maximum=340,
168
+ value=10,
169
+ label="Number of questions to run the benchmark on",
170
+ )
171
+ with gr.Row():
172
+ run_button = gr.Button("Run Benchmark")
173
+ with gr.Row():
174
+ with gr.Accordion("Results", open=True):
175
+ result = gr.Dataframe()
176
+ with gr.Row():
177
+ with gr.Accordion("Summary", open=False):
178
+ summary = gr.Dataframe()
179
+
180
+ run_button.click(
181
+ run_benchmark_gradio,
182
+ inputs=[
183
+ tool_name,
184
+ model_name,
185
+ num_questions,
186
+ openai_api_key,
187
+ anthropic_api_key,
188
+ openrouter_api_key,
189
+ ],
190
+ outputs=[result, summary],
191
+ )
192
 
193
 
194
  demo.queue(default_concurrency_limit=40).launch()
olas-predict-benchmark ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit eb1b1276bd576e38af86ee334ebf1922da0ac035
start.py CHANGED
@@ -45,26 +45,25 @@ def start():
45
  """Start commands."""
46
  print("Starting commands...")
47
  base_dir = os.getcwd()
48
- # olas_dir = os.path.join(base_dir, "olas-predict-benchmark")
49
- # mech_dir = os.path.join(olas_dir, "benchmark", "mech")
50
 
51
  commands = [
52
- # ("git submodule init", base_dir),
53
- # no updates
54
- # ("git submodule update --init --recursive", base_dir),
55
- # ("git submodule update --remote --recursive", base_dir),
56
- # (
57
- # 'git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*"',
58
- # olas_dir,
59
- # ),
60
- # ("git remote update", olas_dir),
61
- # ("git fetch --all", olas_dir),
62
- # ("git checkout main", olas_dir),
63
- # ("git pull origin main", olas_dir),
64
- # ("git checkout 56ecf18a982c4548feac5efe787690a3ec37c835", mech_dir),
65
- # # ("git pull origin main", mech_dir),
66
- # ("pip install -e .", os.path.join(olas_dir, "benchmark")),
67
- # ("pip install -e .", mech_dir),
68
  ("pip install lxml[html_clean]", base_dir),
69
  ("pip install --upgrade huggingface_hub", base_dir),
70
  ]
@@ -73,7 +72,7 @@ def start():
73
  run_command(command, cwd=cwd)
74
 
75
  # add benchmark to the path
76
- # sys.path.append(os.path.join(olas_dir, "benchmark"))
77
 
78
  # Download the dataset
79
  download_dataset()
 
45
  """Start commands."""
46
  print("Starting commands...")
47
  base_dir = os.getcwd()
48
+ olas_dir = os.path.join(base_dir, "olas-predict-benchmark")
49
+ mech_dir = os.path.join(olas_dir, "benchmark", "mech")
50
 
51
  commands = [
52
+ ("git submodule init", base_dir),
53
+ ("git submodule update --init --recursive", base_dir),
54
+ ("git submodule update --remote --recursive", base_dir),
55
+ (
56
+ 'git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*"',
57
+ olas_dir,
58
+ ),
59
+ ("git remote update", olas_dir),
60
+ ("git fetch --all", olas_dir),
61
+ ("git checkout main", olas_dir),
62
+ ("git pull origin main", olas_dir),
63
+ ("git checkout main", mech_dir),
64
+ ("git pull origin main", mech_dir),
65
+ ("pip install -e .", os.path.join(olas_dir, "benchmark")),
66
+ ("pip install -e .", mech_dir),
 
67
  ("pip install lxml[html_clean]", base_dir),
68
  ("pip install --upgrade huggingface_hub", base_dir),
69
  ]
 
72
  run_command(command, cwd=cwd)
73
 
74
  # add benchmark to the path
75
+ sys.path.append(os.path.join(olas_dir, "benchmark"))
76
 
77
  # Download the dataset
78
  download_dataset()