| | |
| | import dataclasses |
| | import os |
| | from dataclasses import dataclass |
| | from typing import Any, Dict, List |
| |
|
| | import json |
| | import numpy as np |
| |
|
| | from swift.llm.template import split_str_parts_by |
| |
|
| |
|
| | @dataclass |
| | class ModelOutput: |
| |
|
| | group: str = None |
| |
|
| | name: str = None |
| |
|
| | cmd: str = None |
| |
|
| | requirements: Dict[str, str] = dataclasses.field(default_factory=dict) |
| |
|
| | args: Dict[str, Any] = dataclasses.field(default_factory=dict) |
| |
|
| | memory: str = None |
| |
|
| | train_time: float = None |
| |
|
| | train_samples: int = None |
| |
|
| | train_samples_per_second: float = None |
| |
|
| | last_model_checkpoint: str = None |
| |
|
| | best_model_checkpoint: str = None |
| |
|
| | best_metric: Any = None |
| |
|
| | global_step: int = None |
| |
|
| | num_total_parameters: float = None |
| |
|
| | num_trainable_parameters: float = None |
| |
|
| | num_buffers: float = None |
| |
|
| | trainable_parameters_percentage: float = None |
| |
|
| | train_dataset_info: str = None |
| |
|
| | val_dataset_info: str = None |
| |
|
| | train_create_time: float = None |
| |
|
| | eval_tokens: int = None |
| |
|
| | eval_time: float = None |
| |
|
| | reports: Dict[str, Any] = None |
| |
|
| | train_loss: float = None |
| |
|
| | @property |
| | def tuner_hyper_params(self): |
| | hyper_params = '' |
| | args = self.args |
| | if 'sft_type' not in args: |
| | return '' |
| | if args['sft_type'] in ('lora', 'adalora', 'longlora'): |
| | if 'lora_rank' in args: |
| | hyper_params += f'rank={args["lora_rank"]}/' \ |
| | f'target={args["lora_target_modules"]}/' \ |
| | f'alpha={args["lora_alpha"]}/' \ |
| | f'lr_ratio={args.get("lora_lr_ratio", None)}/' \ |
| | f'use_rslora={args.get("use_rslora", False)}/' \ |
| | f'use_dora={args.get("use_dora", False)}' |
| | else: |
| | hyper_params = '' |
| | if args['sft_type'] == 'full': |
| | if 'use_galore' in args and args['use_galore'] == 'true': |
| | hyper_params += f'galore_rank={args["galore_rank"]}/' \ |
| | f'galore_per_parameter={args["galore_optim_per_parameter"]}/' \ |
| | f'galore_with_embedding={args["galore_with_embedding"]}/' |
| | if args['sft_type'] == 'llamapro': |
| | hyper_params += f'num_blocks={args["llamapro_num_new_blocks"]}/' |
| | if 'neftune_noise_alpha' in args and args['neftune_noise_alpha']: |
| | hyper_params += f'neftune_noise_alpha={args["neftune_noise_alpha"]}/' |
| |
|
| | if hyper_params.endswith('/'): |
| | hyper_params = hyper_params[:-1] |
| | return hyper_params |
| |
|
| | @property |
| | def hyper_parameters(self): |
| | if 'learning_rate' not in self.args: |
| | return '' |
| | return f'lr={self.args["learning_rate"]}/' \ |
| | f'epoch={self.args["num_train_epochs"]}' |
| |
|
| | @property |
| | def train_speed(self): |
| | if self.train_samples_per_second: |
| | return f'{self.train_samples_per_second:.2f}({self.train_samples} samples/{self.train_time:.2f} seconds)' |
| | else: |
| | return '' |
| |
|
| | @property |
| | def infer_speed(self): |
| | if self.eval_tokens: |
| | return f'{self.eval_tokens / self.eval_time:.2f}({self.eval_tokens} tokens/{self.eval_time:.2f} seconds)' |
| | return '' |
| |
|
| |
|
| | def generate_sft_report(outputs: List[ModelOutput]): |
| | gsm8k_accs = [] |
| | arc_accs = [] |
| | ceval_accs = [] |
| | for output in outputs: |
| | gsm8k_acc = None |
| | arc_acc = None |
| | ceval_acc = None |
| | for report in (output.reports or []): |
| | if report['name'] == 'gsm8k': |
| | gsm8k_acc = report['score'] |
| | if report['name'] == 'arc': |
| | arc_acc = report['score'] |
| | if report['name'] == 'ceval': |
| | ceval_acc = report['score'] |
| | gsm8k_accs.append(gsm8k_acc) |
| | arc_accs.append(arc_acc) |
| | ceval_accs.append(ceval_acc) |
| |
|
| | tab = '| exp_name | model_type | dataset | ms-bench mix ratio | tuner | tuner_params | trainable params(M) | flash_attn | gradient_checkpointing | hypers | memory | train speed(samples/s) | infer speed(tokens/s) | train_loss | eval_loss | gsm8k weighted acc | arc weighted acc | ceval weighted acc |\n' \ |
| | '| -------- | ---------- | ------- | -------------------| ----- | ------------ | ------------------- | -----------| ---------------------- | ------ | ------ | ---------------------- | --------------------- | ---------- | --------- | ------------------ | ---------------- | ------------------ |\n' |
| | min_best_metric = 999. |
| | min_train_loss = 999. |
| | if outputs: |
| | min_best_metric = min([output.best_metric or 999. for output in outputs]) |
| | min_train_loss = min([output.train_loss or 999. for output in outputs]) |
| |
|
| | max_gsm8k = 0.0 |
| | if gsm8k_accs: |
| | max_gsm8k = max([gsm8k or 0. for gsm8k in gsm8k_accs]) |
| |
|
| | max_arc = 0.0 |
| | if arc_accs: |
| | max_arc = max([arc or 0. for arc in arc_accs]) |
| |
|
| | max_ceval = 0.0 |
| | if ceval_accs: |
| | max_ceval = max([ceval or 0. for ceval in ceval_accs]) |
| |
|
| | for output, gsm8k_acc, arc_acc, ceval_acc in zip(outputs, gsm8k_accs, arc_accs, ceval_accs): |
| | use_flash_attn = output.args.get('use_flash_attn', '') |
| | use_gc = output.args.get('gradient_checkpointing', '') |
| | memory = output.memory |
| | train_speed = output.train_speed |
| | infer_speed = output.infer_speed |
| |
|
| | is_best_metric = np.isclose(min_best_metric, output.best_metric or 999.0) |
| | is_best_loss = np.isclose(min_train_loss, output.train_loss or 999.0) |
| | is_best_gsm8k = np.isclose(max_gsm8k, gsm8k_acc or 0.0) |
| | is_best_arc = np.isclose(max_arc, arc_acc or 0.0) |
| | is_best_ceval = np.isclose(max_ceval, ceval_acc or 0.0) |
| |
|
| | if not is_best_metric: |
| | best_metric = '' if not output.best_metric else f'{output.best_metric:.2f}' |
| | else: |
| | best_metric = '' if not output.best_metric else f'**{output.best_metric:.2f}**' |
| |
|
| | if not is_best_loss: |
| | train_loss = '' if not output.train_loss else f'{output.train_loss:.2f}' |
| | else: |
| | train_loss = '' if not output.train_loss else f'**{output.train_loss:.2f}**' |
| |
|
| | if not is_best_gsm8k: |
| | gsm8k_acc = '' if not gsm8k_acc else f'{gsm8k_acc:.3f}' |
| | else: |
| | gsm8k_acc = '' if not gsm8k_acc else f'**{gsm8k_acc:.3f}**' |
| |
|
| | if not is_best_arc: |
| | arc_acc = '' if not arc_acc else f'{arc_acc:.3f}' |
| | else: |
| | arc_acc = '' if not arc_acc else f'**{arc_acc:.3f}**' |
| |
|
| | if not is_best_ceval: |
| | ceval_acc = '' if not ceval_acc else f'{ceval_acc:.3f}' |
| | else: |
| | ceval_acc = '' if not ceval_acc else f'**{ceval_acc:.3f}**' |
| |
|
| | line = f'|{output.name}|' \ |
| | f'{output.args["model_type"]}|' \ |
| | f'{output.args.get("dataset")}|' \ |
| | f'{output.args.get("train_dataset_mix_ratio", 0.)}|' \ |
| | f'{output.args.get("sft_type")}|' \ |
| | f'{output.tuner_hyper_params}|' \ |
| | f'{output.num_trainable_parameters}({output.trainable_parameters_percentage})|' \ |
| | f'{use_flash_attn}|' \ |
| | f'{use_gc}|' \ |
| | f'{output.hyper_parameters}|' \ |
| | f'{memory}|' \ |
| | f'{train_speed}|' \ |
| | f'{infer_speed}|' \ |
| | f'{best_metric}|' \ |
| | f'{train_loss}|' \ |
| | f'{gsm8k_acc}|' \ |
| | f'{arc_acc}|' \ |
| | f'{ceval_acc}|\n' |
| | tab += line |
| | return tab |
| |
|
| |
|
| | def generate_export_report(outputs: List[ModelOutput]): |
| | tab = '| exp_name | model_type | calibration dataset | quantization method | quantization bits | infer speed(tokens/s) | gsm8k weighted acc | arc weighted acc | ceval weighted acc |\n' \ |
| | '| -------- | ---------- | ------------------- | ------------------- | ----------------- | --------------------- | ------------------ | ---------------- | ------------------ |\n' |
| |
|
| | gsm8k_accs = [] |
| | arc_accs = [] |
| | ceval_accs = [] |
| | for output in outputs: |
| | gsm8k_acc = None |
| | arc_acc = None |
| | ceval_acc = None |
| | for report in (output.reports or []): |
| | if report['name'] == 'gsm8k': |
| | gsm8k_acc = report['score'] |
| | if report['name'] == 'arc': |
| | arc_acc = report['score'] |
| | if report['name'] == 'ceval': |
| | ceval_acc = report['score'] |
| | gsm8k_accs.append(gsm8k_acc) |
| | arc_accs.append(arc_acc) |
| | ceval_accs.append(ceval_acc) |
| |
|
| | max_gsm8k = 0.0 |
| | if gsm8k_accs: |
| | max_gsm8k = max([gsm8k or 0. for gsm8k in gsm8k_accs]) |
| |
|
| | max_arc = 0.0 |
| | if arc_accs: |
| | max_arc = max([arc or 0. for arc in arc_accs]) |
| |
|
| | max_ceval = 0.0 |
| | if ceval_accs: |
| | max_ceval = max([ceval or 0. for ceval in ceval_accs]) |
| |
|
| | for output, gsm8k_acc, arc_acc, ceval_acc in zip(outputs, gsm8k_accs, arc_accs, ceval_accs): |
| | infer_speed = output.infer_speed |
| | is_best_gsm8k = np.isclose(max_gsm8k, gsm8k_acc or 0.0) |
| | is_best_arc = np.isclose(max_arc, arc_acc or 0.0) |
| | is_best_ceval = np.isclose(max_ceval, ceval_acc or 0.0) |
| |
|
| | if not is_best_gsm8k: |
| | gsm8k_acc = '' if not gsm8k_acc else f'{gsm8k_acc:.3f}' |
| | else: |
| | gsm8k_acc = '' if not gsm8k_acc else f'**{gsm8k_acc:.3f}**' |
| |
|
| | if not is_best_arc: |
| | arc_acc = '' if not arc_acc else f'{arc_acc:.3f}' |
| | else: |
| | arc_acc = '' if not arc_acc else f'**{arc_acc:.3f}**' |
| |
|
| | if not is_best_ceval: |
| | ceval_acc = '' if not ceval_acc else f'{ceval_acc:.3f}' |
| | else: |
| | ceval_acc = '' if not ceval_acc else f'**{ceval_acc:.3f}**' |
| |
|
| | if output.train_dataset_info: |
| | dataset_info = f'{output.args["dataset"]}/{output.train_dataset_info}' |
| | else: |
| | dataset_info = f'{output.args["dataset"]}' |
| | line = f'|{output.name}|' \ |
| | f'{output.args["model_type"]}|' \ |
| | f'{dataset_info}|' \ |
| | f'{output.args["quant_method"]}|' \ |
| | f'{output.args["quant_bits"]}|' \ |
| | f'{infer_speed}|' \ |
| | f'{gsm8k_acc}|' \ |
| | f'{arc_acc}|' \ |
| | f'{ceval_acc}|\n' |
| | tab += line |
| | return tab |
| |
|
| |
|
| | def parse_output(file): |
| | with open(file, 'r', encoding='utf-8') as f: |
| | content = json.load(f) |
| |
|
| | name = content['name'] |
| | group = content['group'] |
| | cmd = content['cmd'] |
| | requirements = content['requirements'] |
| | args = content['args'] |
| | create_time = float(content.get('create_time') or 0) |
| | content = content['record'] |
| | if cmd == 'export': |
| | best_model_checkpoint = content['best_model_checkpoint'] |
| | eval_tokens = 0 |
| | eval_time = 0.0 |
| | eval_result = None |
| | if 'eval_result' in content: |
| | eval_result = content['eval_result'] |
| | eval_tokens = eval_result['generation_info']['tokens'] |
| | eval_time = eval_result['generation_info']['time'] |
| | eval_result = eval_result['report'] |
| | return ModelOutput( |
| | group=group, |
| | name=name, |
| | cmd=cmd, |
| | requirements=requirements, |
| | args=args, |
| | best_model_checkpoint=best_model_checkpoint, |
| | eval_time=eval_time, |
| | eval_tokens=eval_tokens, |
| | reports=eval_result, |
| | ) |
| | else: |
| | memory = None |
| | train_time = None |
| | train_samples = None |
| | train_samples_per_second = None |
| | last_model_checkpoint = None |
| | best_model_checkpoint = None |
| | best_metric = None |
| | global_step = None |
| | train_dataset_info = None |
| | val_dataset_info = None |
| | num_trainable_parameters = None |
| | num_buffers = None |
| | trainable_parameters_percentage = None |
| | num_total_parameters = None |
| | train_loss = None |
| | if 'memory' in content: |
| | memory = content['memory'] |
| | memory = '/'.join(memory.values()) |
| | if 'train_time' in content: |
| | train_time = content['train_time']['train_runtime'] |
| | train_samples = content['train_time']['n_train_samples'] |
| | train_samples_per_second = content['train_time']['train_samples_per_second'] |
| | if 'last_model_checkpoint' in content: |
| | last_model_checkpoint = content['last_model_checkpoint'] |
| | if 'best_model_checkpoint' in content: |
| | best_model_checkpoint = content['best_model_checkpoint'] |
| | if 'best_metric' in content: |
| | best_metric = content['best_metric'] |
| | if 'log_history' in content: |
| | train_loss = content['log_history'][-1]['train_loss'] |
| | if 'global_step' in content: |
| | global_step = content['global_step'] |
| | if 'dataset_info' in content: |
| | train_dataset_info = content['dataset_info'].get('train_dataset') |
| | val_dataset_info = content['dataset_info'].get('val_dataset') |
| | if 'model_info' in content: |
| | |
| | str_dict = split_str_parts_by(content['model_info'], [ |
| | 'SwiftModel:', 'CausalLM:', 'Seq2SeqLM:', 'LMHeadModel:', 'M Params (', 'M Trainable [', ']), ', |
| | 'M Buffers.' |
| | ]) |
| | str_dict = {c['key']: c['content'] for c in str_dict} |
| | if 'SwiftModel:' in str_dict: |
| | num_total_parameters = float(str_dict['SwiftModel:']) |
| | elif 'CausalLM:' in str_dict: |
| | num_total_parameters = float(str_dict['CausalLM:']) |
| | elif 'Seq2SeqLM:' in str_dict: |
| | num_total_parameters = float(str_dict['Seq2SeqLM:']) |
| | elif 'LMHeadModel:' in str_dict: |
| | num_total_parameters = float(str_dict['LMHeadModel:']) |
| | num_trainable_parameters = float(str_dict['M Params (']) |
| | num_buffers = float(str_dict[']), ']) |
| | trainable_parameters_percentage = str_dict['M Trainable ['] |
| |
|
| | eval_tokens = 0 |
| | eval_time = 0.0 |
| | eval_result = None |
| | if 'eval_result' in content: |
| | eval_result = content['eval_result'] |
| | eval_tokens = eval_result['generation_info']['tokens'] |
| | eval_time = eval_result['generation_info']['time'] |
| | eval_result = eval_result['report'] |
| |
|
| | return ModelOutput( |
| | group=group, |
| | name=name, |
| | cmd=cmd, |
| | requirements=requirements, |
| | args=args, |
| | memory=memory, |
| | train_time=train_time, |
| | train_samples=train_samples, |
| | train_samples_per_second=train_samples_per_second, |
| | last_model_checkpoint=last_model_checkpoint, |
| | best_model_checkpoint=best_model_checkpoint, |
| | best_metric=best_metric, |
| | global_step=global_step, |
| | train_dataset_info=train_dataset_info, |
| | val_dataset_info=val_dataset_info, |
| | train_create_time=create_time, |
| | num_total_parameters=num_total_parameters, |
| | num_trainable_parameters=num_trainable_parameters, |
| | num_buffers=num_buffers, |
| | trainable_parameters_percentage=trainable_parameters_percentage, |
| | eval_time=eval_time, |
| | eval_tokens=eval_tokens, |
| | reports=eval_result, |
| | train_loss=train_loss, |
| | ) |
| |
|
| |
|
| | def generate_reports(): |
| | outputs = [] |
| | for dirs, _, files in os.walk('./experiment'): |
| | for file in files: |
| | abs_file = os.path.join(dirs, file) |
| | if not abs_file.endswith('.json') or 'ipynb' in abs_file: |
| | continue |
| |
|
| | outputs.append(parse_output(abs_file)) |
| |
|
| | all_groups = set([output.group for output in outputs]) |
| | for group in all_groups: |
| | group_outputs = [output for output in outputs if output.group == group] |
| | print(f'=================Printing the sft cmd result of exp {group}==================\n\n') |
| | print(generate_sft_report([output for output in group_outputs if output.cmd in ('sft', 'eval')])) |
| | |
| | |
| | print(f'=================Printing the export cmd result of exp {group}==================\n\n') |
| | print(generate_export_report([output for output in group_outputs if output.cmd == 'export'])) |
| | print('=================Printing done==================\n\n') |
| |
|
| |
|
| | if __name__ == '__main__': |
| | generate_reports() |
| |
|