yeeef commited on
Commit
5a59abd
1 Parent(s): 7361bee

remote useless code

Browse files
Files changed (1) hide show
  1. OpenAIChatAtomicFlow.py +29 -107
OpenAIChatAtomicFlow.py CHANGED
@@ -48,15 +48,15 @@ class OpenAIChatAtomicFlow(AtomicFlow):
48
  response_annotators: Optional[Dict[str, MessageAnnotator]] = {}
49
 
50
  default_search_space = {
51
- "model": tune.choice(
52
- [
53
- # "text-ada-001",
54
- # "text-babbage-001",
55
- # "text-davinci-003",
56
- "gpt-3.5-turbo",
57
- # "gpt-4",
58
- ]
59
- ),
60
  "temperature_or_top_p": tune.choice(
61
  [
62
  {"temperature": tune.uniform(0, 2)},
@@ -133,7 +133,10 @@ class OpenAIChatAtomicFlow(AtomicFlow):
133
  response_annotators = config.get("response_annotators", {})
134
  if len(response_annotators) > 0:
135
  for key, config in response_annotators.items():
136
- response_annotators[key] = hydra.utils.instantiate(config, _convert_="partial")
 
 
 
137
  return {"response_annotators": response_annotators}
138
 
139
  @classmethod
@@ -361,6 +364,7 @@ class OpenAIChatAtomicFlow(AtomicFlow):
361
  optimization_budget: Optional[float] = None,
362
  num_samples: Optional[int] = 1,
363
  logging_level: Optional[int] = logging.WARN, # TODO(yeeef)
 
364
  **config,
365
  ) -> Tuple[Dict, Any]: # tune.ExperimentAnalysis
366
  """
@@ -396,6 +400,7 @@ class OpenAIChatAtomicFlow(AtomicFlow):
396
  - tune.ExperimentAnalysis: The tuning results.
397
  """
398
 
 
399
  space = cls.default_search_space.copy()
400
 
401
  if config is not None:
@@ -413,100 +418,16 @@ class OpenAIChatAtomicFlow(AtomicFlow):
413
  space["temperature"] = temperature
414
  space["top_p"] = top_p
415
  log.warning("temperature and top_p are not recommended to vary together.")
416
-
417
- # TODO: shall we use cls method?
418
- cls._max_valid_n_per_max_tokens, cls._min_invalid_n_per_max_tokens = {}, {}
419
- cls.optimization_budget = optimization_budget
420
- cls.inference_budget = inference_budget
421
- cls._prune_hp = "best_of" if space.get("best_of", 1) != 1 else "n"
422
- cls._prompts = space.get("prompt")
423
-
424
- # if cls._prompts is None:
425
- # cls._messages = space.get("messages")
426
- # assert isinstance(cls._messages, list) and isinstance(
427
- # cls._messages[0], (dict, list)
428
- # ), "messages must be a list of dicts or a list of lists."
429
- # if isinstance(cls._messages[0], dict):
430
- # cls._messages = [cls._messages]
431
- # space["messages"] = tune.choice(list(range(len(cls._messages))))
432
- # else:
433
- # assert space.get("messages") is None, "messages and prompt cannot be provided at the same time."
434
- # assert isinstance(cls._prompts, (str, list)), "prompt must be a string or a list of strings."
435
- # if isinstance(cls._prompts, str):
436
- # cls._prompts = [cls._prompts]
437
- # space["prompt"] = tune.choice(list(range(len(cls._prompts))))
438
- # cls._stops = space.get("stop")
439
- # if cls._stops:
440
- # assert isinstance(
441
- # cls._stops, (str, list)
442
- # ), "stop must be a string, a list of strings, or a list of lists of strings."
443
- # if not (isinstance(cls._stops, list) and isinstance(cls._stops[0], list)):
444
- # cls._stops = [cls._stops]
445
- # space["stop"] = tune.choice(list(range(len(cls._stops))))
446
-
447
- # cls._config_list = space.get("config_list")
448
- # if cls._config_list is not None:
449
- # is_const = is_constant(cls._config_list)
450
- # if is_const:
451
- # space.pop("config_list")
452
- # cls._metric, cls._mode = metric, mode
453
- # cls._total_cost = 0 # total optimization cost
454
- # cls._eval_func = eval_func
455
- # cls.data = data
456
- # cls.avg_input_tokens = None
457
-
458
- space_model = space["model"]
459
 
460
- if not isinstance(space_model, str) and len(space_model) > 1:
461
- # make a hierarchical search space
462
- subspace = {}
463
- if "max_tokens" in space:
464
- subspace["max_tokens"] = space.pop("max_tokens")
465
- if "temperature_or_top_p" in space:
466
- subspace["temperature_or_top_p"] = space.pop("temperature_or_top_p")
467
- if "best_of" in space:
468
- subspace["best_of"] = space.pop("best_of")
469
- if "n" in space:
470
- subspace["n"] = space.pop("n")
471
- choices = []
472
- for model in space["model"]:
473
- choices.append({"model": model, **subspace})
474
- space["subspace"] = tune.choice(choices)
475
- space.pop("model")
476
- # start all the models with the same hp config
477
- search_alg = BlendSearch(
478
- cost_attr="cost",
479
- cost_budget=optimization_budget,
480
- metric=metric,
481
- mode=mode,
482
- space=space,
483
- )
484
- config0 = search_alg.suggest("t0")
485
- points_to_evaluate = [config0]
486
- for model in space_model:
487
- if model != config0["subspace"]["model"]:
488
- point = config0.copy()
489
- point["subspace"] = point["subspace"].copy()
490
- point["subspace"]["model"] = model
491
- points_to_evaluate.append(point)
492
- search_alg = BlendSearch(
493
- cost_attr="cost",
494
- cost_budget=optimization_budget,
495
- metric=metric,
496
- mode=mode,
497
- space=space,
498
- points_to_evaluate=points_to_evaluate,
499
- )
500
- else:
501
- # TODO: currently we always falls in this branch
502
- search_alg = BlendSearch(
503
- cost_attr="cost",
504
- cost_budget=optimization_budget,
505
- metric=metric,
506
- mode=mode,
507
- space=space,
508
- )
509
-
510
  # Args:
511
  # evaluation_function: A user-defined evaluation function.
512
  # It takes a configuration as input, outputs a evaluation
@@ -527,7 +448,7 @@ class OpenAIChatAtomicFlow(AtomicFlow):
527
  if temperature_or_top_p is not None:
528
  search_config.update(temperature_or_top_p)
529
 
530
- flow_config["model_name"] = search_config["model"]
531
  generation_parameters = flow_config["generation_parameters"]
532
  for generation_parameter in generation_parameters:
533
  if generation_parameter == "model_kwargs":
@@ -558,11 +479,12 @@ class OpenAIChatAtomicFlow(AtomicFlow):
558
  log.info(f"Tunning with config: {search_config}")
559
  # TODO: the code currently only works when there is no subspace, i.e. there is only one model to tune with
560
  # align search_config with flow_config
561
- updated_flow_config = updated_flow_config_with_search_config(flow_config=cls.get_config(), search_config=search_config)
562
  log.info(f"Updated flow_config: {updated_flow_config}")
563
  # flow_launcher = FlowAPILauncher(flow, 1, False, 3, 0, ["code"]) TODO: maybe refactor with flow_launcher
564
 
565
- # TODO: limitations: langchain api call does not give us the cost of the api call
 
566
  final_metrics = {}
567
  for sample in tune_dps:
568
  sample["api_key"] = api_key
@@ -593,6 +515,6 @@ class OpenAIChatAtomicFlow(AtomicFlow):
593
  verbose=3,
594
  )
595
  best_search_config = analysis.best_config
596
- flow_config = updated_flow_config_with_search_config(cls.get_config(), best_search_config)
597
  log.info(f"best search config found: {best_search_config}, analysis: {analysis.best_result}")
598
  return flow_config, analysis
 
48
  response_annotators: Optional[Dict[str, MessageAnnotator]] = {}
49
 
50
  default_search_space = {
51
+ # "model": tune.choice(
52
+ # [
53
+ # # "text-ada-001",
54
+ # # "text-babbage-001",
55
+ # # "text-davinci-003",
56
+ # "gpt-3.5-turbo",
57
+ # # "gpt-4",
58
+ # ]
59
+ # ),
60
  "temperature_or_top_p": tune.choice(
61
  [
62
  {"temperature": tune.uniform(0, 2)},
 
133
  response_annotators = config.get("response_annotators", {})
134
  if len(response_annotators) > 0:
135
  for key, config in response_annotators.items():
136
+ if isinstance(config, MessageAnnotator):
137
+ response_annotators[key] = config
138
+ else:
139
+ response_annotators[key] = hydra.utils.instantiate(config, _convert_="partial")
140
  return {"response_annotators": response_annotators}
141
 
142
  @classmethod
 
364
  optimization_budget: Optional[float] = None,
365
  num_samples: Optional[int] = 1,
366
  logging_level: Optional[int] = logging.WARN, # TODO(yeeef)
367
+ initial_flow_config: Optional[Dict] = None, # if not supplied will use default flow config of the class (xxx.yaml)
368
  **config,
369
  ) -> Tuple[Dict, Any]: # tune.ExperimentAnalysis
370
  """
 
400
  - tune.ExperimentAnalysis: The tuning results.
401
  """
402
 
403
+ initial_flow_config = initial_flow_config or cls.get_config()
404
  space = cls.default_search_space.copy()
405
 
406
  if config is not None:
 
418
  space["temperature"] = temperature
419
  space["top_p"] = top_p
420
  log.warning("temperature and top_p are not recommended to vary together.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
421
 
422
+ # Note: currently we fix the model rather than make it tunable
423
+ search_alg = BlendSearch(
424
+ cost_attr="cost",
425
+ cost_budget=optimization_budget,
426
+ metric=metric,
427
+ mode=mode,
428
+ space=space,
429
+ )
430
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
431
  # Args:
432
  # evaluation_function: A user-defined evaluation function.
433
  # It takes a configuration as input, outputs a evaluation
 
448
  if temperature_or_top_p is not None:
449
  search_config.update(temperature_or_top_p)
450
 
451
+ flow_config["model_name"] = search_config.get("model", flow_config["model_name"])
452
  generation_parameters = flow_config["generation_parameters"]
453
  for generation_parameter in generation_parameters:
454
  if generation_parameter == "model_kwargs":
 
479
  log.info(f"Tunning with config: {search_config}")
480
  # TODO: the code currently only works when there is no subspace, i.e. there is only one model to tune with
481
  # align search_config with flow_config
482
+ updated_flow_config = updated_flow_config_with_search_config(flow_config=initial_flow_config, search_config=search_config)
483
  log.info(f"Updated flow_config: {updated_flow_config}")
484
  # flow_launcher = FlowAPILauncher(flow, 1, False, 3, 0, ["code"]) TODO: maybe refactor with flow_launcher
485
 
486
+ # TODO: limitations: langchain api call does not give us the cost of the api call, and only give us
487
+ # one result no matter the n
488
  final_metrics = {}
489
  for sample in tune_dps:
490
  sample["api_key"] = api_key
 
515
  verbose=3,
516
  )
517
  best_search_config = analysis.best_config
518
+ flow_config = updated_flow_config_with_search_config(initial_flow_config, best_search_config)
519
  log.info(f"best search config found: {best_search_config}, analysis: {analysis.best_result}")
520
  return flow_config, analysis