| | import os |
| | import json |
| | from dotenv import load_dotenv |
| | from typing import Any, Tuple |
| |
|
| | from evoagentx.benchmark import MATH |
| | from evoagentx.core.logging import logger |
| | from evoagentx.models import OpenAILLM, OpenAILLMConfig |
| | from evoagentx.optimizers import MiproOptimizer |
| | from evoagentx.core.callbacks import suppress_logger_info |
| | from evoagentx.utils.mipro_utils.register_utils import MiproRegistry |
| |
|
| |
|
| | load_dotenv() |
| | OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") |
| |
|
| | |
| | |
| | |
| |
|
| | class MathSplits(MATH): |
| |
|
| | def _load_data(self): |
| | |
| | super()._load_data() |
| | |
| | import numpy as np |
| | np.random.seed(42) |
| | permutation = np.random.permutation(len(self._test_data)) |
| | full_test_data = self._test_data |
| | |
| | |
| | self._train_data = [full_test_data[idx] for idx in permutation[:100]] |
| | self._test_data = [full_test_data[idx] for idx in permutation[100:200]] |
| |
|
| | |
| | |
| | |
| | |
| | def get_input_keys(self): |
| | return ["problem"] |
| | |
| | |
| | |
| | def evaluate(self, prediction: Any, label: Any) -> dict: |
| | return super().evaluate(prediction, label) |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | |
| | class CustomProgram: |
| |
|
| | def __init__(self, model: OpenAILLM): |
| | self.model = model |
| | self.prompt = "Let's think step by step to answer the math question: {problem}" |
| | |
| | |
| | def save(self, path: str): |
| | params = {"prompt": self.prompt} |
| | with open(path, "w") as f: |
| | json.dump(params, f) |
| |
|
| | def load(self, path: str): |
| | with open(path, "r") as f: |
| | params = json.load(f) |
| | self.prompt = params["prompt"] |
| | |
| | |
| | |
| | |
| | |
| | def __call__(self, problem: str) -> Tuple[str, dict]: |
| | |
| | prompt = self.prompt.format(problem=problem) |
| | response = self.model.generate(prompt=prompt) |
| | solution = response.content |
| | return solution, {"problem": problem, "solution": solution} |
| | |
| |
|
| | def main(): |
| |
|
| | openai_config = OpenAILLMConfig(model="gpt-4o-mini", openai_key=OPENAI_API_KEY, stream=True, output_response=False) |
| | executor_llm = OpenAILLM(config=openai_config) |
| | optimizer_config = OpenAILLMConfig(model="gpt-4o", openai_key=OPENAI_API_KEY, stream=True, output_response=False) |
| | optimizer_llm = OpenAILLM(config=optimizer_config) |
| |
|
| | benchmark = MathSplits() |
| | program = CustomProgram(model=executor_llm) |
| |
|
| | |
| | registry = MiproRegistry() |
| | |
| | |
| | registry.track(program, "prompt", input_names=["problem"], output_names=["solution"]) |
| |
|
| | |
| | |
| | optimizer = MiproOptimizer( |
| | registry=registry, |
| | program=program, |
| | optimizer_llm=optimizer_llm, |
| | max_bootstrapped_demos=4, |
| | max_labeled_demos=4, |
| | num_threads=20, |
| | eval_rounds=1, |
| | auto="medium", |
| | save_path="examples/output/mipro/math_plug_and_play" |
| | ) |
| |
|
| | logger.info("Optimizing program...") |
| | optimizer.optimize(dataset=benchmark) |
| | optimizer.restore_best_program() |
| |
|
| | logger.info("Evaluating program on test set...") |
| | with suppress_logger_info(): |
| | results = optimizer.evaluate(dataset=benchmark, eval_mode="test") |
| | logger.info(f"Evaluation metrics (after optimization): {results}") |
| |
|
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|