martinjosifoski
/

OpenAIChatAtomicFlow

Model card Files Files and versions Community

yeeef commited on Jun 19, 2023

Commit

5a59abd

•

1 Parent(s): 7361bee

remote useless code

Browse files

Files changed (1) hide show

OpenAIChatAtomicFlow.py +29 -107

OpenAIChatAtomicFlow.py CHANGED Viewed

@@ -48,15 +48,15 @@ class OpenAIChatAtomicFlow(AtomicFlow):
     response_annotators: Optional[Dict[str, MessageAnnotator]] = {}
     default_search_space = {
-        "model": tune.choice(
-            [
-                # "text-ada-001",
-                # "text-babbage-001",
-                # "text-davinci-003",
-                "gpt-3.5-turbo",
-                # "gpt-4",
-            ]
-        ),
         "temperature_or_top_p": tune.choice(
             [
                 {"temperature": tune.uniform(0, 2)},
@@ -133,7 +133,10 @@ class OpenAIChatAtomicFlow(AtomicFlow):
         response_annotators = config.get("response_annotators", {})
         if len(response_annotators) > 0:
             for key, config in response_annotators.items():
-                response_annotators[key] = hydra.utils.instantiate(config, _convert_="partial")
         return {"response_annotators": response_annotators}
     @classmethod
@@ -361,6 +364,7 @@ class OpenAIChatAtomicFlow(AtomicFlow):
         optimization_budget: Optional[float] = None,
         num_samples: Optional[int] = 1,
         logging_level: Optional[int] = logging.WARN, # TODO(yeeef)
         **config,
     ) -> Tuple[Dict, Any]: # tune.ExperimentAnalysis
         """
@@ -396,6 +400,7 @@ class OpenAIChatAtomicFlow(AtomicFlow):
             - tune.ExperimentAnalysis: The tuning results.
         """
         space = cls.default_search_space.copy()
         if config is not None:
@@ -413,100 +418,16 @@ class OpenAIChatAtomicFlow(AtomicFlow):
                 space["temperature"] = temperature
                 space["top_p"] = top_p
                 log.warning("temperature and top_p are not recommended to vary together.")
-        # TODO: shall we use cls method?
-        cls._max_valid_n_per_max_tokens, cls._min_invalid_n_per_max_tokens = {}, {}
-        cls.optimization_budget = optimization_budget
-        cls.inference_budget = inference_budget
-        cls._prune_hp = "best_of" if space.get("best_of", 1) != 1 else "n"
-        cls._prompts = space.get("prompt")
-        # if cls._prompts is None:
-        #     cls._messages = space.get("messages")
-        #     assert isinstance(cls._messages, list) and isinstance(
-        #         cls._messages[0], (dict, list)
-        #     ), "messages must be a list of dicts or a list of lists."
-        #     if isinstance(cls._messages[0], dict):
-        #         cls._messages = [cls._messages]
-        #     space["messages"] = tune.choice(list(range(len(cls._messages))))
-        # else:
-        #     assert space.get("messages") is None, "messages and prompt cannot be provided at the same time."
-        #     assert isinstance(cls._prompts, (str, list)), "prompt must be a string or a list of strings."
-        #     if isinstance(cls._prompts, str):
-        #         cls._prompts = [cls._prompts]
-        #     space["prompt"] = tune.choice(list(range(len(cls._prompts))))
-        # cls._stops = space.get("stop")
-        # if cls._stops:
-        #     assert isinstance(
-        #         cls._stops, (str, list)
-        #     ), "stop must be a string, a list of strings, or a list of lists of strings."
-        #     if not (isinstance(cls._stops, list) and isinstance(cls._stops[0], list)):
-        #         cls._stops = [cls._stops]
-        #     space["stop"] = tune.choice(list(range(len(cls._stops))))
-        # cls._config_list = space.get("config_list")
-        # if cls._config_list is not None:
-        #     is_const = is_constant(cls._config_list)
-        #     if is_const:
-        #         space.pop("config_list")
-        # cls._metric, cls._mode = metric, mode
-        # cls._total_cost = 0  # total optimization cost
-        # cls._eval_func = eval_func
-        # cls.data = data
-        # cls.avg_input_tokens = None
-        space_model = space["model"]
-        if not isinstance(space_model, str) and len(space_model) > 1:
-            # make a hierarchical search space
-            subspace = {}
-            if "max_tokens" in space:
-                subspace["max_tokens"] = space.pop("max_tokens")
-            if "temperature_or_top_p" in space:
-                subspace["temperature_or_top_p"] = space.pop("temperature_or_top_p")
-            if "best_of" in space:
-                subspace["best_of"] = space.pop("best_of")
-            if "n" in space:
-                subspace["n"] = space.pop("n")
-            choices = []
-            for model in space["model"]:
-                choices.append({"model": model, **subspace})
-            space["subspace"] = tune.choice(choices)
-            space.pop("model")
-            # start all the models with the same hp config
-            search_alg = BlendSearch(
-                cost_attr="cost",
-                cost_budget=optimization_budget,
-                metric=metric,
-                mode=mode,
-                space=space,
-            )
-            config0 = search_alg.suggest("t0")
-            points_to_evaluate = [config0]
-            for model in space_model:
-                if model != config0["subspace"]["model"]:
-                    point = config0.copy()
-                    point["subspace"] = point["subspace"].copy()
-                    point["subspace"]["model"] = model
-                    points_to_evaluate.append(point)
-            search_alg = BlendSearch(
-                cost_attr="cost",
-                cost_budget=optimization_budget,
-                metric=metric,
-                mode=mode,
-                space=space,
-                points_to_evaluate=points_to_evaluate,
-            )
-        else:
-            # TODO: currently we always falls in this branch
-            search_alg = BlendSearch(
-                cost_attr="cost",
-                cost_budget=optimization_budget,
-                metric=metric,
-                mode=mode,
-                space=space,
-            )
         # Args:
         # evaluation_function: A user-defined evaluation function.
         #     It takes a configuration as input, outputs a evaluation
@@ -527,7 +448,7 @@ class OpenAIChatAtomicFlow(AtomicFlow):
             if temperature_or_top_p is not None:
                 search_config.update(temperature_or_top_p)
-            flow_config["model_name"] = search_config["model"]
             generation_parameters = flow_config["generation_parameters"]
             for generation_parameter in generation_parameters:
                 if generation_parameter == "model_kwargs":
@@ -558,11 +479,12 @@ class OpenAIChatAtomicFlow(AtomicFlow):
             log.info(f"Tunning with config: {search_config}")
             # TODO: the code currently only works when there is no subspace, i.e. there is only one model to tune with
             # align search_config with flow_config
-            updated_flow_config = updated_flow_config_with_search_config(flow_config=cls.get_config(), search_config=search_config)
             log.info(f"Updated flow_config: {updated_flow_config}")
             # flow_launcher = FlowAPILauncher(flow, 1, False, 3, 0, ["code"]) TODO: maybe refactor with flow_launcher
-            # TODO: limitations: langchain api call does not give us the cost of the api call
             final_metrics = {}
             for sample in tune_dps:
                 sample["api_key"] = api_key
@@ -593,6 +515,6 @@ class OpenAIChatAtomicFlow(AtomicFlow):
             verbose=3,
         )
         best_search_config = analysis.best_config
-        flow_config = updated_flow_config_with_search_config(cls.get_config(), best_search_config)
         log.info(f"best search config found: {best_search_config}, analysis: {analysis.best_result}")
         return flow_config, analysis

     response_annotators: Optional[Dict[str, MessageAnnotator]] = {}
     default_search_space = {
+        # "model": tune.choice(
+        #     [
+        #         # "text-ada-001",
+        #         # "text-babbage-001",
+        #         # "text-davinci-003",
+        #         "gpt-3.5-turbo",
+        #         # "gpt-4",
+        #     ]
+        # ),
         "temperature_or_top_p": tune.choice(
             [
                 {"temperature": tune.uniform(0, 2)},
         response_annotators = config.get("response_annotators", {})
         if len(response_annotators) > 0:
             for key, config in response_annotators.items():
+                if isinstance(config, MessageAnnotator):
+                    response_annotators[key] = config
+                else:
+                    response_annotators[key] = hydra.utils.instantiate(config, _convert_="partial")
         return {"response_annotators": response_annotators}
     @classmethod
         optimization_budget: Optional[float] = None,
         num_samples: Optional[int] = 1,
         logging_level: Optional[int] = logging.WARN, # TODO(yeeef)
+        initial_flow_config: Optional[Dict] = None, # if not supplied will use default flow config of the class (xxx.yaml)
         **config,
     ) -> Tuple[Dict, Any]: # tune.ExperimentAnalysis
         """
             - tune.ExperimentAnalysis: The tuning results.
         """
+        initial_flow_config = initial_flow_config or cls.get_config()
         space = cls.default_search_space.copy()
         if config is not None:
                 space["temperature"] = temperature
                 space["top_p"] = top_p
                 log.warning("temperature and top_p are not recommended to vary together.")
+        # Note: currently we fix the model rather than make it tunable
+        search_alg = BlendSearch(
+            cost_attr="cost",
+            cost_budget=optimization_budget,
+            metric=metric,
+            mode=mode,
+            space=space,
+        )
         # Args:
         # evaluation_function: A user-defined evaluation function.
         #     It takes a configuration as input, outputs a evaluation
             if temperature_or_top_p is not None:
                 search_config.update(temperature_or_top_p)
+            flow_config["model_name"] = search_config.get("model", flow_config["model_name"])
             generation_parameters = flow_config["generation_parameters"]
             for generation_parameter in generation_parameters:
                 if generation_parameter == "model_kwargs":
             log.info(f"Tunning with config: {search_config}")
             # TODO: the code currently only works when there is no subspace, i.e. there is only one model to tune with
             # align search_config with flow_config
+            updated_flow_config = updated_flow_config_with_search_config(flow_config=initial_flow_config, search_config=search_config)
             log.info(f"Updated flow_config: {updated_flow_config}")
             # flow_launcher = FlowAPILauncher(flow, 1, False, 3, 0, ["code"]) TODO: maybe refactor with flow_launcher
+            # TODO: limitations: langchain api call does not give us the cost of the api call, and only give us
+            #       one result no matter the n
             final_metrics = {}
             for sample in tune_dps:
                 sample["api_key"] = api_key
             verbose=3,
         )
         best_search_config = analysis.best_config
+        flow_config = updated_flow_config_with_search_config(initial_flow_config, best_search_config)
         log.info(f"best search config found: {best_search_config}, analysis: {analysis.best_result}")
         return flow_config, analysis