Spaces:
Running
Running
| import argparse | |
| import os | |
| import time | |
| import logging | |
| os.environ["HSA_OVERRIDE_GFX_VERSION"] = "9.4.2" | |
| os.environ["HF_HUB_DISABLE_XET"] = "1" | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| import torch | |
| from unsloth import FastLanguageModel | |
| from trl import SFTTrainer, SFTConfig | |
| from train_specialist import TIER_CONFIGS, load_jsonl_dataset, EVAL_FILE, SEED | |
| logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO"), format="%(asctime)s [%(levelname)s] %(message)s") | |
| logger = logging.getLogger(__name__) | |
| def evaluate(tier: int): | |
| """ | |
| Evaluate the fine-tuned OncoAgent model (Tier 1 or 2) on the evaluation dataset. | |
| Reports cross-entropy loss and perplexity. | |
| """ | |
| config = TIER_CONFIGS.get(tier) | |
| if not config: | |
| raise ValueError(f"Invalid tier: {tier}") | |
| adapter_path = os.path.join("models", "oncoagent_adapters", f"tier{tier}", "final") | |
| if not os.path.exists(adapter_path): | |
| logger.error(f"Adapter path not found: {adapter_path}. Please run training first.") | |
| return | |
| logger.info("=" * 60) | |
| logger.info(f"🔍 Starting Post-Training Evaluation for Tier {tier}") | |
| logger.info(f" Adapter path: {adapter_path}") | |
| logger.info("=" * 60) | |
| # Load the model with Unsloth's optimizations | |
| model, tokenizer = FastLanguageModel.from_pretrained( | |
| model_name=adapter_path, | |
| max_seq_length=config.max_seq_length, | |
| load_in_4bit=True, | |
| ) | |
| try: | |
| eval_dataset = load_jsonl_dataset(EVAL_FILE, "evaluation") | |
| except FileNotFoundError: | |
| logger.error(f"Eval file not found at {EVAL_FILE}. Cannot perform evaluation.") | |
| return | |
| logger.info("Running quantitative evaluation (Loss & Perplexity)...") | |
| actual_tokenizer = tokenizer.tokenizer if hasattr(tokenizer, "tokenizer") else tokenizer | |
| if actual_tokenizer.pad_token is None: | |
| actual_tokenizer.pad_token = actual_tokenizer.eos_token | |
| sft_config = SFTConfig( | |
| output_dir=os.path.join("models", "oncoagent_adapters", f"tier{tier}", "eval_results"), | |
| per_device_eval_batch_size=config.batch_size, | |
| max_length=config.max_seq_length, | |
| packing=True, | |
| dataset_text_field="text", | |
| fp16=not torch.cuda.is_bf16_supported(), | |
| bf16=torch.cuda.is_bf16_supported(), | |
| report_to="none", | |
| eos_token=None, | |
| ) | |
| trainer = SFTTrainer( | |
| model=model, | |
| processing_class=actual_tokenizer, | |
| eval_dataset=eval_dataset, | |
| args=sft_config, | |
| ) | |
| t0 = time.time() | |
| metrics = trainer.evaluate() | |
| duration = time.time() - t0 | |
| logger.info("=" * 60) | |
| logger.info(f"✅ EVALUATION COMPLETE FOR TIER {tier}") | |
| logger.info(f" Eval duration: {time.strftime('%Hh %Mm %Ss', time.gmtime(duration))}") | |
| for k, v in metrics.items(): | |
| if isinstance(v, float): | |
| logger.info(f" {k}: {v:.4f}") | |
| else: | |
| logger.info(f" {k}: {v}") | |
| try: | |
| perplexity = torch.exp(torch.tensor(metrics["eval_loss"])).item() | |
| logger.info(f" Perplexity: {perplexity:.4f}") | |
| except Exception: | |
| pass | |
| logger.info("=" * 60) | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser(description="Evaluate Fine-Tuned OncoAgent Models") | |
| parser.add_argument("--tier", type=int, choices=[1, 2], required=True, | |
| help="Select the architectural tier to evaluate (1 = 9B, 2 = 27B)") | |
| args = parser.parse_args() | |
| evaluate(args.tier) | |