#!/usr/bin/env python # coding: utf-8 # Copyright 2020 The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # This script creates a smallish random model, with a few layers to test things like MP/PP, where # tiny and tiner models are too too small # # It will be used then as "stas/t5-very-small-random" from pathlib import Path import json import tempfile from transformers import T5Tokenizer, T5TokenizerFast, T5Config, T5ForConditionalGeneration from transformers.models.t5.tokenization_t5 import VOCAB_FILES_NAMES mname_from = "patrickvonplaten/t5-tiny-random" mname_very_small = "t5-very-small-random" tokenizer = T5Tokenizer.from_pretrained(mname_from) config = T5Config.from_pretrained(mname_from) tokenizer_fast = T5TokenizerFast.from_pretrained(mname_from) config.update(dict( vocab_size=32128, d_model=64, d_ff=256, d_kv=8, num_layers=8, num_decoder_layers=8, num_heads=4, relative_attention_num_buckets=32, )) very_small_model = T5ForConditionalGeneration(config) print(f"num of params {very_small_model.num_parameters()}") # Test src_texts = ["A long paragraph for summarization.", "Another paragraph for summarization."] tgt_texts = ["Summary of the text.", "Another summary."] batch = tokenizer.prepare_seq2seq_batch(src_texts, tgt_texts, return_tensors="pt") outputs = very_small_model(**batch) print("test output:", len(outputs.logits[0])) # Save very_small_model.half() # makes it smaller very_small_model.save_pretrained(mname_very_small) config.save_pretrained(mname_very_small) tokenizer.save_pretrained(mname_very_small) tokenizer_fast.save_pretrained(mname_very_small) print(f"Generated {mname_very_small}") # Upload # transformers-cli repo create t5-very-small-random # clone and add files