| | import lighteval |
| | from lighteval.logging.evaluation_tracker import EvaluationTracker |
| | from lighteval.models.vllm.vllm_model import VLLMModelConfig |
| | from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters |
| | from lighteval.utils.imports import is_package_available |
| | from peft import PeftModel |
| | from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig |
| | import os |
| | import torch |
| | import json |
| |
|
| | if is_package_available("accelerate"): |
| | from datetime import timedelta |
| | from accelerate import Accelerator, InitProcessGroupKwargs |
| | accelerator = Accelerator(kwargs_handlers=[InitProcessGroupKwargs(timeout=timedelta(seconds=3000))]) |
| | else: |
| | accelerator = None |
| |
|
| | def merge_lora_if_needed(): |
| | """Merge LoRA model and preserve RoPE scaling configuration""" |
| | merged_path = "/public/home/lshi/yoAI/projects/Online_CL/train/model_sft_save/Qwen2.5-Math-1.5B-Lora-Merged" |
| | |
| | if os.path.exists(os.path.join(merged_path, "config.json")): |
| | print(f"Merged model already exists at {merged_path}") |
| | |
| | |
| | config_path = os.path.join(merged_path, "config.json") |
| | with open(config_path, 'r') as f: |
| | config = json.load(f) |
| | if 'rope_scaling' in config: |
| | print(f"✓ Existing merged model has RoPE scaling: {config['rope_scaling']}") |
| | print(f"✓ Max position embeddings: {config.get('max_position_embeddings', 'N/A')}") |
| | else: |
| | print("⚠ Warning: Existing merged model does NOT have RoPE scaling config!") |
| | print(" Deleting and re-creating with RoPE scaling...") |
| | import shutil |
| | shutil.rmtree(merged_path) |
| | return merge_lora_if_needed() |
| | |
| | return merged_path |
| |
|
| | print("="*100) |
| | print("Merged model not found. Starting merge process...") |
| | print("="*100) |
| |
|
| | lora_path = "/public/home/lshi/yoAI/projects/Online_CL/train/model_sft_save/Qwen2.5-Math-1.5B-Lora" |
| | |
| | |
| | print("\n[1/5] Loading base model...") |
| | base_model = AutoModelForCausalLM.from_pretrained( |
| | "Qwen/Qwen2.5-Math-1.5B", |
| | torch_dtype=torch.bfloat16, |
| | trust_remote_code=True, |
| | device_map="auto" |
| | ) |
| | |
| | |
| | print("\n[2/5] Loading LoRA adapter...") |
| | model = PeftModel.from_pretrained(base_model, lora_path) |
| | |
| | |
| | print("\n[3/5] Merging LoRA weights with base model...") |
| | merged_model = model.merge_and_unload() |
| | |
| | |
| | print(f"\n[4/5] Saving merged model to {merged_path}...") |
| | os.makedirs(merged_path, exist_ok=True) |
| | merged_model.save_pretrained(merged_path, safe_serialization=True) |
| | |
| | |
| | print("\n[5/5] Adding RoPE scaling configuration...") |
| | merged_config_path = os.path.join(merged_path, "config.json") |
| | with open(merged_config_path, 'r') as f: |
| | merged_config = json.load(f) |
| | |
| | |
| | merged_config['rope_scaling'] = { |
| | "type": "linear", |
| | "factor": 2.0 |
| | } |
| | |
| | print(f"✓ Added RoPE scaling: {merged_config['rope_scaling']}") |
| | |
| | |
| | original_max_pos = merged_config.get('max_position_embeddings', 4096) |
| | scaling_factor = merged_config['rope_scaling']['factor'] |
| | new_max_pos = int(original_max_pos * scaling_factor) |
| | merged_config['max_position_embeddings'] = new_max_pos |
| | print(f"✓ Updated max_position_embeddings: {original_max_pos} -> {new_max_pos}") |
| | |
| | |
| | with open(merged_config_path, 'w') as f: |
| | json.dump(merged_config, f, indent=2, ensure_ascii=False) |
| | |
| | |
| | print("Saving tokenizer...") |
| | tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Math-1.5B", trust_remote_code=True) |
| | tokenizer.save_pretrained(merged_path) |
| | |
| | |
| | del base_model |
| | del model |
| | del merged_model |
| | torch.cuda.empty_cache() |
| | |
| | print("\n" + "="*100) |
| | print("✓ Merge completed successfully!") |
| | print(f"✓ Merged model saved to: {merged_path}") |
| | print(f"✓ RoPE scaling config: {merged_config['rope_scaling']}") |
| | print(f"✓ Max position embeddings: {merged_config['max_position_embeddings']}") |
| | print("="*100 + "\n") |
| | |
| | return merged_path |
| |
|
| | def main(): |
| | |
| | os.environ["CUDA_VISIBLE_DEVICES"] = "2" |
| | os.environ["VLLM_ALLOW_LONG_MAX_MODEL_LEN"] = "1" |
| | |
| | print("Checking for merged model...") |
| | merged_model_path = merge_lora_if_needed() |
| | |
| | |
| | num_gpus = torch.cuda.device_count() |
| | print(f"\n{'='*100}") |
| | print(f"Detected {num_gpus} GPU(s)") |
| | if num_gpus > 0: |
| | for i in range(num_gpus): |
| | print(f" GPU {i}: {torch.cuda.get_device_name(i)}") |
| | print(f"{'='*100}\n") |
| | |
| | |
| | config_path = os.path.join(merged_model_path, "config.json") |
| | with open(config_path, 'r') as f: |
| | model_config_dict = json.load(f) |
| | max_position_embeddings = model_config_dict.get('max_position_embeddings', 4096) |
| | rope_scaling = model_config_dict.get('rope_scaling', None) |
| | |
| | print(f"Model max_position_embeddings: {max_position_embeddings}") |
| | print(f"Model RoPE scaling config: {rope_scaling}") |
| | |
| | |
| | max_model_length = 8192 |
| | print(f"Using max_model_length: {max_model_length}\n") |
| | |
| | print("Setting up evaluation pipeline...") |
| | |
| | evaluation_tracker = EvaluationTracker( |
| | output_dir="./results", |
| | save_details=True, |
| | push_to_hub=False, |
| | ) |
| |
|
| | pipeline_params = PipelineParameters( |
| | launcher_type=ParallelismManager.ACCELERATE, |
| | custom_tasks_directory=None, |
| | max_samples=500 |
| | ) |
| |
|
| | model_config = VLLMModelConfig( |
| | model_name=merged_model_path, |
| | dtype="bfloat16", |
| | max_model_length=max_model_length, |
| | trust_remote_code=True, |
| | tensor_parallel_size=num_gpus, |
| | ) |
| |
|
| | task = "lighteval|math_500|0" |
| |
|
| | print(f"Using {num_gpus} GPU(s) with tensor parallelism") |
| | print(f"Task: {task}") |
| | print(f"Max model length: {max_model_length}\n") |
| |
|
| | print("Creating pipeline...") |
| | pipeline = Pipeline( |
| | tasks=task, |
| | pipeline_parameters=pipeline_params, |
| | evaluation_tracker=evaluation_tracker, |
| | model_config=model_config, |
| | ) |
| |
|
| | |
| | print("Configuring generation parameters...") |
| | for task_name, task_obj in pipeline.tasks_dict.items(): |
| | for doc in task_obj._docs: |
| | doc.generation_size = 2048 |
| |
|
| | print("\nStarting evaluation...") |
| | print("="*100) |
| | pipeline.evaluate() |
| | |
| | print("\nSaving results...") |
| | pipeline.save_and_push_results() |
| | |
| | print("\nShowing results...") |
| | pipeline.show_results() |
| | |
| | print("\n" + "="*100) |
| | print("✓ Evaluation completed!") |
| | print("="*100) |
| |
|
| | if __name__ == "__main__": |
| | main() |