| # /// script | |
| # requires-python = ">=3.10" | |
| # dependencies = [ | |
| # "lighteval>=0.6.0", | |
| # "torch>=2.0.0", | |
| # "transformers>=4.40.0", | |
| # "accelerate>=0.30.0", | |
| # "peft>=0.7.0", | |
| # ] | |
| # /// | |
| """Evaluate fine-tuned wheattoast11/agent-zero-lfm-1.2b-v1 on standard benchmarks.""" | |
| import subprocess | |
| import sys | |
| model_args = "model_name=wheattoast11/agent-zero-lfm-1.2b-v1,trust_remote_code=True" | |
| tasks = "leaderboard|mmlu|5|0,leaderboard|arc:challenge|0|0,leaderboard|truthfulqa:mc|0|0" | |
| cmd = [ | |
| sys.executable, "-m", "lighteval", "accelerate", | |
| model_args, | |
| tasks, | |
| "--output-dir", "./eval_results_finetuned", | |
| ] | |
| print(f"Running: {' '.join(cmd)}") | |
| result = subprocess.run(cmd, capture_output=False) | |
| sys.exit(result.returncode) | |