Alina Lozovskaya
Simplify Setup tab
7ccf9d4
raw
history blame
3.4 kB
import yaml
from loguru import logger
from yourbench_space.utils import CONFIG_PATH
def generate_base_config(hf_org, hf_prefix):
"""Creates the base config dictionary"""
return {
"hf_configuration": {
"token": "$HF_TOKEN",
"private": True,
"hf_organization": hf_org,
"hf_dataset_name": hf_prefix
},
"local_dataset_dir": "results/",
"model_list": [
{
"model_name": "meta-llama/Llama-3.3-70B-Instruct",
"provider": "huggingface",
"base_url": "https://jsq69lxgkhvpnliw.us-east-1.aws.endpoints.huggingface.cloud",
"api_key": "$HF_TOKEN",
"max_concurrent_requests": 16
}
],
"model_roles": {
"ingestion": ["meta-llama/Llama-3.3-70B-Instruct"],
"summarization": ["meta-llama/Llama-3.3-70B-Instruct"],
"single_shot_question_generation": ["meta-llama/Llama-3.3-70B-Instruct"],
"multi_hop_question_generation": ["meta-llama/Llama-3.3-70B-Instruct"],
"answer_generation": ["meta-llama/Llama-3.3-70B-Instruct"],
"judge_answers": ["meta-llama/Llama-3.3-70B-Instruct"]
},
"pipeline": {
"ingestion": {
"source_documents_dir": "/app/uploaded_files",
"output_dir": "/app/ingested",
"run": True
},
"upload_ingest_to_hub": {
"source_documents_dir": "/app/ingested",
"run": True
},
"summarization": {"run": True},
"chunking": {
"chunking_configuration": {
"l_min_tokens": 64,
"l_max_tokens": 128,
"tau_threshold": 0.3,
"h_min": 2,
"h_max": 4
},
"run": True
},
"single_shot_question_generation": {
"diversification_seed": "24 year old adult",
"run": True
},
"multi_hop_question_generation": {"run": True},
"answer_generation": {
"question_type": "single_shot",
"run": True,
"strategies": [
{"name": "zeroshot", "prompt": "ZEROSHOT_QA_USER_PROMPT", "model_name": "meta-llama/Llama-3.3-70B-Instruct"},
{"name": "gold", "prompt": "GOLD_QA_USER_PROMPT", "model_name": "meta-llama/Llama-3.3-70B-Instruct"}
]
},
"judge_answers": {
"run": True,
"comparing_strategies": [["zeroshot", "gold"]],
"chunk_column_index": 0,
"random_seed": 42
}
}
}
def save_yaml_file(config):
"""Saves the given config dictionary to a YAML file"""
with open(CONFIG_PATH, "w") as file:
yaml.dump(config, file, default_flow_style=False, sort_keys=False)
return CONFIG_PATH
def generate_and_save_config(hf_org, hf_prefix):
"""Generates and saves the YAML configuration file"""
logger.debug(f"Generating config with org: {hf_org}, prefix: {hf_prefix}")
config = generate_base_config(hf_org, hf_prefix)
file_path = save_yaml_file(config)
logger.success(f"Config saved at: {file_path}")
return file_path