Spaces:

valory
/

olas-prediction-leaderboard

Running

cyberosa commited on Jul 5, 2024

Commit

d269dc6

•

1 Parent(s): d599f4f

cleaning and add main repo of benchmark

Files changed (2) hide show

automate/run_benchmark.py CHANGED Viewed

@@ -36,10 +36,15 @@ def tool_map(tool):
 def prepare_questions(kwargs):
     test_questions = json.load(
-        open(this_dir / "olas-predict-benchmark/benchmark/data/autocast/autocast_questions_filtered.json")
     )
     with open(
-        this_dir / "olas-predict-benchmark/benchmark/data/autocast/autocast_questions_filtered.pkl", "rb"
     ) as f:
         url_to_content = pickle.load(f)
     num_questions = kwargs.pop("num_questions", len(test_questions))
@@ -73,7 +78,7 @@ def parse_response(response, test_q):
         test_q["p_no"] = float(result["p_no"])
     else:
         test_q["p_no"] = None
     if "confidence" in result.keys():
         test_q["confidence"] = float(result["confidence"])
     else:
@@ -277,6 +282,7 @@ if __name__ == "__main__":
     kwargs["model"] = [
         "gpt-3.5-turbo-0125",
     ]
     kwargs["api_keys"] = {}
     kwargs["api_keys"]["openai"] = os.getenv("OPENAI_API_KEY")
     kwargs["api_keys"]["anthropic"] = os.getenv("ANTHROPIC_API_KEY")
@@ -285,4 +291,4 @@ if __name__ == "__main__":
     kwargs["num_urls"] = 3
     kwargs["num_words"] = 300
     kwargs["provide_source_links"] = True
-    run_benchmark(kwargs)

 def prepare_questions(kwargs):
     test_questions = json.load(
+        open(
+            this_dir
+            / "olas-predict-benchmark/benchmark/data/autocast/autocast_questions_filtered.json"
+        )
     )
     with open(
+        this_dir
+        / "olas-predict-benchmark/benchmark/data/autocast/autocast_questions_filtered.pkl",
+        "rb",
     ) as f:
         url_to_content = pickle.load(f)
     num_questions = kwargs.pop("num_questions", len(test_questions))
         test_q["p_no"] = float(result["p_no"])
     else:
         test_q["p_no"] = None
     if "confidence" in result.keys():
         test_q["confidence"] = float(result["confidence"])
     else:
     kwargs["model"] = [
         "gpt-3.5-turbo-0125",
     ]
     kwargs["api_keys"] = {}
     kwargs["api_keys"]["openai"] = os.getenv("OPENAI_API_KEY")
     kwargs["api_keys"]["anthropic"] = os.getenv("ANTHROPIC_API_KEY")
     kwargs["num_urls"] = 3
     kwargs["num_words"] = 300
     kwargs["provide_source_links"] = True
+    run_benchmark(kwargs)

start.py CHANGED Viewed

@@ -53,15 +53,15 @@ def start():
         # no updates
         # ("git submodule update --init --recursive", base_dir),
         # ("git submodule update --remote --recursive", base_dir),
-        # (
-        #     'git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*"',
-        #     olas_dir,
-        # ),
         # no updates
         ("git remote update", olas_dir),
         ("git fetch --all", olas_dir),
-        ("git checkout bac77acc64ed129608e6f428d40e86c0eb2cb4d1", olas_dir),
-        # ("git pull origin main", olas_dir),
         ("git checkout 56ecf18a982c4548feac5efe787690a3ec37c835", mech_dir),
         # ("git pull origin main", mech_dir),
         ("pip install -e .", os.path.join(olas_dir, "benchmark")),

         # no updates
         # ("git submodule update --init --recursive", base_dir),
         # ("git submodule update --remote --recursive", base_dir),
+        (
+            'git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*"',
+            olas_dir,
+        ),
         # no updates
         ("git remote update", olas_dir),
         ("git fetch --all", olas_dir),
+        ("git checkout main", olas_dir),
+        ("git pull origin main", olas_dir),
         ("git checkout 56ecf18a982c4548feac5efe787690a3ec37c835", mech_dir),
         # ("git pull origin main", mech_dir),
         ("pip install -e .", os.path.join(olas_dir, "benchmark")),