Spaces:
Runtime error
Runtime error
| import os | |
| import gc | |
| import time | |
| import json | |
| import math | |
| import collections | |
| from datetime import datetime | |
| from typing import Optional, List, Dict, Tuple, Callable, Any, Union | |
| import torch | |
| import numpy as np | |
| from transformers import ( | |
| is_datasets_available, | |
| is_torch_tpu_available, | |
| ) | |
| from transformers.trainer_utils import ( | |
| PredictionOutput, | |
| EvalPrediction, | |
| EvalLoopOutput, | |
| denumpify_detensorize, | |
| speed_metrics, | |
| ) | |
| from transformers.utils import logging | |
| from transformers.debug_utils import DebugOption | |
| if is_datasets_available(): | |
| import datasets | |
| if is_torch_tpu_available(): | |
| import torch_xla.core.xla_model as xm | |
| import torch_xla.debug.metrics as met | |
| from transformers import Trainer | |
| logger = logging.get_logger(__name__) | |
| class ToMixin: | |
| def _optimizer_to(self, device: str = "cpu"): | |
| # https://github.com/pytorch/pytorch/issues/8741 | |
| for param in self.optimizer.state.values(): | |
| # Not sure there are any global tensors in the state dict | |
| if isinstance(param, torch.Tensor): | |
| param.data = param.data.to(device) | |
| if param._grad is not None: | |
| param._grad.data = param._grad.data.to(device) | |
| elif isinstance(param, dict): | |
| for subparam in param.values(): | |
| if isinstance(subparam, torch.Tensor): | |
| subparam.data = subparam.data.to(device) | |
| if subparam._grad is not None: | |
| subparam._grad.data = subparam._grad.data.to( | |
| device) | |
| def _scheduler_to(self, device: str = "cpu"): | |
| # https://github.com/pytorch/pytorch/issues/8741 | |
| for param in self.lr_scheduler.__dict__.values(): | |
| if isinstance(param, torch.Tensor): | |
| param.data = param.data.to(device) | |
| if param._grad is not None: | |
| param._grad.data = param._grad.data.to(device) | |
| class BaseReader(Trainer, ToMixin): | |
| name: str = None | |
| def __init__( | |
| self, | |
| *args, | |
| data_args = {}, | |
| eval_examples: datasets.Dataset = None, | |
| **kwargs | |
| ): | |
| super().__init__(*args, **kwargs) | |
| self.data_args = data_args | |
| self.eval_examples = eval_examples | |
| def free_memory(self): | |
| self.model.to("cpu") | |
| self._optimizer_to("cpu") | |
| self._scheduler_to("cpu") | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| def postprocess( | |
| self, | |
| output: EvalLoopOutput, | |
| ) -> Union[Any, EvalPrediction]: | |
| return output | |
| def evaluate( | |
| self, | |
| eval_dataset: Optional[datasets.Dataset] = None, | |
| eval_examples: Optional[datasets.Dataset] = None, | |
| ignore_keys: Optional[List[str]] = None, | |
| metric_key_prefix: str = "eval", | |
| ) -> Dict[str, float]: | |
| # memory metrics - must set up as early as possible | |
| self._memory_tracker.start() | |
| eval_dataset = self.eval_dataset if eval_dataset is None else eval_dataset | |
| eval_dataloader = self.get_eval_dataloader(eval_dataset) | |
| start_time = time.time() | |
| eval_examples = self.eval_examples if eval_examples is None else eval_examples | |
| compute_metrics = self.compute_metrics | |
| self.compute_metrics = None | |
| eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop | |
| try: | |
| output = eval_loop( | |
| eval_dataloader, | |
| description="Evaluation", | |
| prediction_loss_only=True if compute_metrics is None else None, | |
| ignore_keys=ignore_keys, | |
| metric_key_prefix=metric_key_prefix, | |
| ) | |
| finally: | |
| self.compute_metrics = compute_metrics | |
| if isinstance(eval_dataset, datasets.Dataset): | |
| eval_dataset.set_format( | |
| type=eval_dataset.format["type"], | |
| columns=list(eval_dataset.features.keys()), | |
| ) | |
| eval_preds = self.postprocess(output, eval_examples, eval_dataset, mode="evaluate") | |
| metrics = {} | |
| if self.compute_metrics is not None: | |
| metrics = self.compute_metrics(eval_preds) | |
| # To be JSON-serializable, we need to remove numpy types or zero-d tensors | |
| metrics = denumpify_detensorize(metrics) | |
| # Prefix all keys with metric_key_prefix + '_' | |
| for key in list(metrics.keys()): | |
| if not key.startswith(f"{metric_key_prefix}_"): | |
| metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key) | |
| total_batch_size = self.args.eval_batch_size * self.args.world_size | |
| metrics.update( | |
| speed_metrics( | |
| metric_key_prefix, | |
| start_time, | |
| num_samples=output.num_samples, | |
| num_steps=math.ceil(output.num_samples / total_batch_size), | |
| ) | |
| ) | |
| self.log(metrics) | |
| # Log and save evaluation results | |
| filename = "eval_results.txt" | |
| eval_result_file = self.name + "_" + filename if self.name else filename | |
| with open(os.path.join(self.args.output_dir, eval_result_file), "a") as writer: | |
| logger.info("***** Eval results *****") | |
| writer.write("***** Eval results *****\n") | |
| writer.write(f"{datetime.now()}") | |
| for key in sorted(metrics.keys()): | |
| logger.info(" %s = %s", key, str(metrics[key])) | |
| writer.write("%s = %s\n" % (key, str(metrics[key]))) | |
| writer.write("\n") | |
| if DebugOption.TPU_METRICS_DEBUG in self.args.debug: | |
| # tpu-comment: PyTorch/XLA에 대한 Logging debug metrics (compile, execute times, ops, etc.) | |
| xm.master_print(met.metrics_report()) | |
| self.control = self.callback_handler.on_evaluate( | |
| self.args, self.state, self.control, metrics | |
| ) | |
| self._memory_tracker.stop_and_update_metrics(metrics) | |
| return metrics | |
| def predict( | |
| self, | |
| test_dataset: datasets.Dataset, | |
| test_examples: datasets.Dataset, | |
| ignore_keys: Optional[List[str]] = None, | |
| metric_key_prefix: str = "test", | |
| mode: bool = "predict", | |
| ) -> PredictionOutput: | |
| # memory metrics - must set up as early as possible | |
| self._memory_tracker.start() | |
| test_dataloader = self.get_test_dataloader(test_dataset) | |
| start_time = time.time() | |
| compute_metrics = self.compute_metrics | |
| self.compute_metrics = None | |
| eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop | |
| try: | |
| output = eval_loop( | |
| test_dataloader, | |
| description="Prediction", | |
| ignore_keys=ignore_keys, | |
| metric_key_prefix=metric_key_prefix, | |
| ) | |
| finally: | |
| self.compute_metrics = compute_metrics | |
| if isinstance(test_dataset, datasets.Dataset): | |
| test_dataset.set_format( | |
| type=test_dataset.format["type"], | |
| columns=list(test_dataset.features.keys()), | |
| ) | |
| predictions = self.postprocess(output, test_examples, test_dataset, mode=mode) | |
| self._memory_tracker.stop_and_update_metrics(output.metrics) | |
| return predictions |