Spaces:
Runtime error
Runtime error
# Copyright 2020 The HuggingFace Team. All rights reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
import itertools | |
import os | |
import subprocess | |
from os.path import dirname | |
from parameterized import parameterized | |
from tests.trainer.test_trainer import TrainerIntegrationCommon # noqa | |
from transformers import is_torch_available | |
from transformers.testing_utils import ( | |
TestCasePlus, | |
execute_subprocess_async, | |
get_gpu_count, | |
get_tests_dir, | |
require_deepspeed, | |
require_torch_gpu, | |
slow, | |
) | |
from transformers.trainer_utils import set_seed | |
if is_torch_available(): | |
from tests.trainer.test_trainer import ( # noqa | |
RegressionModelConfig, | |
RegressionPreTrainedModel, | |
get_regression_trainer, | |
) | |
set_seed(42) | |
FIXTURE_DIRECTORY = get_tests_dir("fixtures") | |
ROOT_DIRECTORY = os.path.join(dirname(get_tests_dir())) | |
DS_TESTS_DIRECTORY = dirname(os.path.abspath(__file__)) | |
# default torch.distributed port | |
DEFAULT_MASTER_PORT = "10999" | |
T5_SMALL = "google-t5/t5-small" | |
# *** Working Models *** | |
ALBERT_TINY = "hf-internal-testing/tiny-albert" | |
BART_TINY = "sshleifer/bart-tiny-random" | |
BERT_TINY = "hf-internal-testing/tiny-bert" | |
BIGBIRD_PEGASUS_TINY = "hf-internal-testing/tiny-random-bigbird_pegasus" | |
BIG_BIRD_TINY = "hf-internal-testing/tiny-random-big_bird" | |
BLENDERBOT_TINY = "hf-internal-testing/tiny-random-blenderbot" | |
BLOOM_TINY = "bigscience/bigscience-small-testing" | |
DEBERTA_TINY = "hf-internal-testing/tiny-random-deberta" | |
DEBERTA_V2_TINY = "hf-internal-testing/tiny-random-deberta-v2" | |
DISTILBERT_TINY = "sshleifer/tiny-distilbert-base-cased" | |
ELECTRA_TINY = "hf-internal-testing/tiny-electra" | |
FLAUBERT_TINY = "hf-internal-testing/tiny-random-flaubert" | |
FSMT_TINY = "stas/tiny-wmt19-en-de" | |
FUNNEL_TINY = "hf-internal-testing/tiny-random-funnel" | |
GPT2_TINY = "sshleifer/tiny-gpt2" | |
GPTJ_TINY = "hf-internal-testing/tiny-random-gptj" | |
GPT_NEO_TINY = "hf-internal-testing/tiny-random-gpt_neo" | |
LAYOUTLM_TINY = "hf-internal-testing/tiny-layoutlm" | |
LED_TINY = "hf-internal-testing/tiny-random-led" | |
LONGFORMER_TINY = "hf-internal-testing/tiny-random-longformer" | |
M2M_100_TINY = "stas/tiny-m2m_100" # hf tiny model is unsuitable | |
MARIAN_TINY = "sshleifer/tiny-marian-en-de" | |
MBART_TINY = "sshleifer/tiny-mbart" | |
MOBILEBERT_TINY = "hf-internal-testing/tiny-random-mobilebert" | |
MPNET_TINY = "hf-internal-testing/tiny-random-mpnet" | |
PEGASUS_TINY = "stas/pegasus-cnn_dailymail-tiny-random" | |
PROPHETNET_TINY = "hf-internal-testing/tiny-random-prophetnet" | |
ROBERTA_TINY = "sshleifer/tiny-distilroberta-base" | |
SQUEEZEBERT_TINY = "hf-internal-testing/tiny-random-squeezebert" | |
T5_TINY = "patrickvonplaten/t5-tiny-random" | |
T5_V1_TINY = "hf-internal-testing/tiny-random-t5-v1.1" | |
VIT_TINY = "hf-internal-testing/tiny-random-vit" | |
XLM_ROBERTA_TINY = "hf-internal-testing/tiny-xlm-roberta" | |
XLNET_TINY = "sshleifer/tiny-xlnet-base-cased" | |
# *** To Fix *** | |
# *** tiny model issues *** | |
# missing model files: | |
MT5_TINY = "hf-internal-testing/tiny-random-mt5" | |
CAMEMBERT_TINY = "hf-internal-testing/tiny-random-camembert" | |
OPENAI_GPT_TINY = "hf-internal-testing/tiny-random-openai-gpt" | |
# missing tokenizer files | |
CONVBERT_TINY = "hf-internal-testing/tiny-random-convbert" | |
LAYOUTLMV2_TINY = "hf-internal-testing/tiny-random-layoutlmv2" | |
HUBERT_TINY = "hf-internal-testing/tiny-random-hubert" | |
# issues with tokenizer | |
CTRL_TINY = "hf-internal-testing/tiny-random-ctrl" | |
TRANSFO_XL_TINY = "hf-internal-testing/tiny-random-transfo-xl" # same as Salesforce/ctrl | |
# other issues with tiny models | |
IBERT_TINY = "hf-internal-testing/tiny-random-ibert" # multiple issues with either mlm/qa/clas | |
REFORMER_TINY = "hf-internal-testing/tiny-random-reformer" # multiple issues with either mlm/qa/clas | |
# *** Lacking official examples to test with *** | |
# or not working with examples | |
DPR_TINY = "hf-internal-testing/tiny-random-dpr" | |
# - "dpr" examples/research_projects/rag-end2end-retriever/ | |
RAG_TINY = "hf-internal-testing/tiny-random-rag" | |
# - "rag" research_projects | |
LUKE_TINY = "" | |
# - "luke" Entities classes - no plan to make such example | |
LXMERT_TINY = "hf-internal-testing/tiny-random-lxmert" | |
# - "lxmert" doesn't work with run_qa.py | |
CLIP_TINY = "hf-internal-testing/tiny-random-clip" | |
# - "clip" nothing under pytorch examples - XXX: Suraj is working on adding some - check by end of Sep | |
SPEECH_TO_TEXT_TINY = "hf-internal-testing/tiny-random-speech_to_text" | |
# - "speech_to_text", nothing under pytorch examples | |
# *** Reactive mode *** | |
# models with low usage, unstable API, things about to change - do nothing about the following until someone runs into a problem | |
TAPAS_TINY = "hf-internal-testing/tiny-random-tapas" | |
# additional notes on tapas | |
# 1. "Table must be of type pd.DataFrame" failure | |
# TODO: new models to add: | |
# | |
def get_launcher(distributed=False): | |
# 1. explicitly set --num_nodes=1 just in case these tests end up run on a multi-node setup | |
# - it won't be able to handle that | |
# 2. for now testing with just 2 gpus max (since some quality tests may give different | |
# results with mode gpus because we use very little data) | |
num_gpus = min(2, get_gpu_count()) if distributed else 1 | |
master_port = os.environ.get("DS_TEST_PORT", DEFAULT_MASTER_PORT) | |
return f"deepspeed --num_nodes 1 --num_gpus {num_gpus} --master_port {master_port}".split() | |
def make_task_cmds(): | |
data_dir_samples = f"{FIXTURE_DIRECTORY}/tests_samples" | |
data_dir_wmt = f"{data_dir_samples}/wmt_en_ro" | |
data_dir_xsum = f"{data_dir_samples}/xsum" | |
args_main = """ | |
--do_train | |
--max_train_samples 4 | |
--per_device_train_batch_size 2 | |
--num_train_epochs 1 | |
--fp16 | |
--report_to none | |
--overwrite_output_dir | |
""".split() | |
# try to cover as many models as possible once (it's enough to run on one task per model) | |
# but need a tiny model for each | |
# | |
# should have "{model_type.upper()}_TINY" corresponding vars defined, e.g., T5_TINY, etc. | |
tasks2models = { | |
"trans": [ | |
"bart", | |
"fsmt", | |
"m2m_100", | |
"marian", | |
"mbart", | |
"t5", | |
"t5_v1", | |
# "mt5", missing model files | |
], | |
"sum": [ | |
"pegasus", | |
], | |
"clm": [ | |
"big_bird", | |
"bigbird_pegasus", | |
"blenderbot", | |
"bloom", | |
"gpt2", | |
"gpt_neo", | |
"gptj", | |
"xlm-roberta", | |
"prophetnet", | |
# "camembert", missing model files | |
], | |
"mlm": [ | |
"albert", | |
"deberta", | |
"deberta-v2", | |
"distilbert", | |
"electra", | |
"flaubert", | |
"funnel", | |
"layoutlm", | |
# "reformer", # multiple issues with either mlm/qa/clas | |
], | |
"qa": [ | |
"led", | |
"longformer", | |
"mobilebert", | |
"mpnet", | |
"roberta", | |
"squeezebert", | |
# "convbert", # missing tokenizer files | |
# "layoutlmv2", missing model files | |
], | |
"clas": [ | |
"bert", | |
"xlnet", | |
# "hubert", # missing tokenizer files | |
# "ibert", # multiple issues with either mlm/qa/clas | |
# "transfo-xl", # tokenizer issues as Salesforce/ctrl | |
# "Salesforce/ctrl", # tokenizer issues | |
# "openai-community/openai-gpt", missing model files | |
# "tapas", multiple issues | |
], | |
"img_clas": [ | |
"vit", | |
], | |
} | |
scripts_dir = f"{ROOT_DIRECTORY}/examples/pytorch" | |
tasks = { | |
"trans": f""" | |
{scripts_dir}/translation/run_translation.py | |
--train_file {data_dir_wmt}/train.json | |
--source_lang en | |
--target_lang ro | |
--max_source_length 12 | |
--max_target_length 12 | |
""", | |
"sum": f""" | |
{scripts_dir}/summarization/run_summarization.py | |
--train_file {data_dir_xsum}/sample.json | |
--max_source_length 12 | |
--max_target_length 12 | |
--lang en | |
""", | |
"clm": f""" | |
{scripts_dir}/language-modeling/run_clm.py | |
--train_file {FIXTURE_DIRECTORY}/sample_text.txt | |
--block_size 8 | |
""", | |
"mlm": f""" | |
{scripts_dir}/language-modeling/run_mlm.py | |
--train_file {FIXTURE_DIRECTORY}/sample_text.txt | |
""", | |
"qa": f""" | |
{scripts_dir}/question-answering/run_qa.py | |
--train_file {data_dir_samples}/SQUAD/sample.json | |
""", | |
"clas": f""" | |
{scripts_dir}/text-classification/run_glue.py | |
--train_file {data_dir_samples}/MRPC/train.csv | |
--max_seq_length 12 | |
--task_name MRPC | |
""", | |
"img_clas": f""" | |
{scripts_dir}/image-classification/run_image_classification.py | |
--dataset_name hf-internal-testing/cats_vs_dogs_sample | |
--remove_unused_columns False | |
--max_steps 10 | |
--image_processor_name {DS_TESTS_DIRECTORY}/vit_feature_extractor.json | |
--label_column_name labels | |
""", | |
} | |
launcher = get_launcher(distributed=True) | |
cmds = {} | |
for task, args in tasks.items(): | |
args = args.split() | |
for model in tasks2models[task]: | |
model_name = globals()[f"{model.upper().replace('-', '_')}_TINY"] | |
args_model = f"--model_name_or_path {model_name}".split() | |
cmds[f"{task}_{model}"] = launcher + args + args_model + args_main | |
# # generation special case | |
# if task == "gen": | |
# launcher = f"deepspeed --num_nodes 1 --num_gpus 1".split() | |
# args_model += f"--model_type {model}".split() | |
# cmds[f"{task}_{model}"] = launcher + args + args_model | |
# else: | |
return cmds | |
task_cmds = make_task_cmds() | |
ZERO2 = "zero2" | |
ZERO3 = "zero3" | |
stages = [ZERO2, ZERO3] | |
# future preparation: | |
# for now test just fp16, as these tests are quite slow | |
# FP16 = "fp16" | |
# BF16 = "bf16" | |
# | |
# dtypes = [FP16] | |
# so just hardcoding --fp16 for now | |
# if is_torch_bf16_gpu_available(): | |
# dtypes += [BF16] | |
def parameterized_custom_name_func(func, param_num, param): | |
# customize the test name generator function as we want both params to appear in the sub-test | |
# name, as by default it shows only the first param | |
param_based_name = parameterized.to_safe_name("_".join(str(x) for x in param.args)) | |
return f"{func.__name__}_{param_based_name}" | |
# Cartesian-product of zero stages with models to test | |
params = list(itertools.product(stages, task_cmds.keys())) | |
class TestDeepSpeedModelZoo(TestCasePlus): | |
"""This class is for testing via an external script - can do multiple gpus""" | |
def get_task_cmd(self, task, stage): | |
# return a ready to run train cmd | |
if task not in task_cmds: | |
raise ValueError(f"don't know of task {task}, have {task_cmds.keys()}") | |
cmd = task_cmds[task] | |
args_ds = f"--deepspeed {self.test_file_dir_str}/ds_config_{stage}.json".split() | |
output_dir = self.get_auto_remove_tmp_dir() | |
args_out = f"--output_dir {output_dir}".split() | |
cmd += args_ds + args_out | |
return cmd, output_dir | |
def test_zero_to_fp32(self, stage, task): | |
# testing the ability to do a run followed by recovery of full fp32 weights | |
cmd, output_dir = self.get_task_cmd(task, stage) | |
# 1. generate the checkpoint | |
cmd += "--save_steps 1".split() | |
# keep for quick debug | |
# print(" ".join([f"\nPYTHONPATH={self.src_dir_str}"] + cmd)); die | |
execute_subprocess_async(cmd, env=self.get_env()) | |
# 2. test that the fp32 weights get reconsolidated | |
chkpt_dir = f"{output_dir}/checkpoint-1" | |
recovered_model_path = f"{chkpt_dir}/out.bin" | |
cmd = f"{chkpt_dir}/zero_to_fp32.py {chkpt_dir} {recovered_model_path}" | |
# keep for quick debug | |
# print(" ".join([f"\nPYTHONPATH={self.src_dir_str}"] +cmd)); die | |
subprocess.check_call(cmd, shell=True) | |
assert os.path.exists(recovered_model_path), f"{recovered_model_path} was not found" | |
# possibly could also test that the resulting saved model is usable but given that we use | |
# random models we won't know if it's any good | |