{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "'4.10.0'" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import transformers\n", "transformers.__version__" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from transformers import T5Config, T5Model, load_tf_weights_in_t5" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "checkpoint\t\t\t\tmodel.ckpt-1000000.index\r\n", "model.ckpt-1000000.data-00000-of-00002\tmodel.ckpt-1000000.meta\r\n", "model.ckpt-1000000.data-00001-of-00002\toperative_config.gin\r\n" ] } ], "source": [ "# !wget https://f000.backblazeb2.com/file/malaya-model/pretrained/t5-super-super-tiny-2021-07-28.tar.gz\n", "# !tar -zxf t5-super-super-tiny-2021-07-28.tar.gz\n", "# !rm t5-super-super-tiny-2021-07-28.tar.gz\n", "!ls t5-super-super-tiny-v2" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "T5Config {\n", " \"d_ff\": 512,\n", " \"d_kv\": 64,\n", " \"d_model\": 128,\n", " \"decoder_start_token_id\": 0,\n", " \"dropout_rate\": 0.1,\n", " \"eos_token_id\": 1,\n", " \"feed_forward_proj\": \"relu\",\n", " \"gradient_checkpointing\": false,\n", " \"initializer_factor\": 1.0,\n", " \"inputs_length\": 512,\n", " \"is_encoder_decoder\": true,\n", " \"layer_norm_epsilon\": 1e-06,\n", " \"model_type\": \"t5\",\n", " \"n_positions\": 512,\n", " \"num_decoder_layers\": 2,\n", " \"num_heads\": 6,\n", " \"num_layers\": 2,\n", " \"pad_token_id\": 0,\n", " \"relative_attention_num_buckets\": 32,\n", " \"transformers_version\": \"4.10.0\",\n", " \"use_cache\": true,\n", " \"vocab_size\": 32128\n", "}\n", "\n" ] } ], "source": [ "config = T5Config(\n", " vocab_size = 32128,\n", " n_positions=512,\n", " d_ff = 512,\n", " d_kv = 64,\n", " d_model = 128,\n", " dropout_rate = 0.1,\n", " inputs_length = 512,\n", " num_heads = 6,\n", " num_layers = 2,\n", " decoder_start_token_id = 0,\n", " eos_token_id = 1,\n", " pad_token_id = 0)\n", "print(config)\n", "config.save_pretrained('./')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "T5Model(\n", " (shared): Embedding(32128, 128)\n", " (encoder): T5Stack(\n", " (embed_tokens): Embedding(32128, 128)\n", " (block): ModuleList(\n", " (0): T5Block(\n", " (layer): ModuleList(\n", " (0): T5LayerSelfAttention(\n", " (SelfAttention): T5Attention(\n", " (q): Linear(in_features=128, out_features=384, bias=False)\n", " (k): Linear(in_features=128, out_features=384, bias=False)\n", " (v): Linear(in_features=128, out_features=384, bias=False)\n", " (o): Linear(in_features=384, out_features=128, bias=False)\n", " (relative_attention_bias): Embedding(32, 6)\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (1): T5LayerFF(\n", " (DenseReluDense): T5DenseReluDense(\n", " (wi): Linear(in_features=128, out_features=512, bias=False)\n", " (wo): Linear(in_features=512, out_features=128, bias=False)\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " )\n", " )\n", " (1): T5Block(\n", " (layer): ModuleList(\n", " (0): T5LayerSelfAttention(\n", " (SelfAttention): T5Attention(\n", " (q): Linear(in_features=128, out_features=384, bias=False)\n", " (k): Linear(in_features=128, out_features=384, bias=False)\n", " (v): Linear(in_features=128, out_features=384, bias=False)\n", " (o): Linear(in_features=384, out_features=128, bias=False)\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (1): T5LayerFF(\n", " (DenseReluDense): T5DenseReluDense(\n", " (wi): Linear(in_features=128, out_features=512, bias=False)\n", " (wo): Linear(in_features=512, out_features=128, bias=False)\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " )\n", " )\n", " )\n", " (final_layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (decoder): T5Stack(\n", " (embed_tokens): Embedding(32128, 128)\n", " (block): ModuleList(\n", " (0): T5Block(\n", " (layer): ModuleList(\n", " (0): T5LayerSelfAttention(\n", " (SelfAttention): T5Attention(\n", " (q): Linear(in_features=128, out_features=384, bias=False)\n", " (k): Linear(in_features=128, out_features=384, bias=False)\n", " (v): Linear(in_features=128, out_features=384, bias=False)\n", " (o): Linear(in_features=384, out_features=128, bias=False)\n", " (relative_attention_bias): Embedding(32, 6)\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (1): T5LayerCrossAttention(\n", " (EncDecAttention): T5Attention(\n", " (q): Linear(in_features=128, out_features=384, bias=False)\n", " (k): Linear(in_features=128, out_features=384, bias=False)\n", " (v): Linear(in_features=128, out_features=384, bias=False)\n", " (o): Linear(in_features=384, out_features=128, bias=False)\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (2): T5LayerFF(\n", " (DenseReluDense): T5DenseReluDense(\n", " (wi): Linear(in_features=128, out_features=512, bias=False)\n", " (wo): Linear(in_features=512, out_features=128, bias=False)\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " )\n", " )\n", " (1): T5Block(\n", " (layer): ModuleList(\n", " (0): T5LayerSelfAttention(\n", " (SelfAttention): T5Attention(\n", " (q): Linear(in_features=128, out_features=384, bias=False)\n", " (k): Linear(in_features=128, out_features=384, bias=False)\n", " (v): Linear(in_features=128, out_features=384, bias=False)\n", " (o): Linear(in_features=384, out_features=128, bias=False)\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (1): T5LayerCrossAttention(\n", " (EncDecAttention): T5Attention(\n", " (q): Linear(in_features=128, out_features=384, bias=False)\n", " (k): Linear(in_features=128, out_features=384, bias=False)\n", " (v): Linear(in_features=128, out_features=384, bias=False)\n", " (o): Linear(in_features=384, out_features=128, bias=False)\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (2): T5LayerFF(\n", " (DenseReluDense): T5DenseReluDense(\n", " (wi): Linear(in_features=128, out_features=512, bias=False)\n", " (wo): Linear(in_features=512, out_features=128, bias=False)\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " )\n", " )\n", " )\n", " (final_layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", ")" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model = T5Model(config)\n", "load_tf_weights_in_t5(model, config, 't5-super-super-tiny-v2/model.ckpt-1000000')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('config.json', 'pytorch_model.bin')" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import CONFIG_NAME, WEIGHTS_NAME\n", "CONFIG_NAME, WEIGHTS_NAME" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "import torch\n", "\n", "torch.save(model.state_dict(), './' + WEIGHTS_NAME)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "from transformers import T5Config, T5Model, T5Tokenizer" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# !wget https://f000.backblazeb2.com/file/malaya-model/bpe/sp10m.cased.ms-en.model" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('./tokenizer_config.json',\n", " './special_tokens_map.json',\n", " './spiece.model',\n", " './added_tokens.json')" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tokenizer = T5Tokenizer('sp10m.cased.ms-en.model')\n", "tokenizer.save_pretrained('./')" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "tokenizer = T5Tokenizer.from_pretrained('./', lower = False)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "config = T5Config.from_pretrained('./')" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "model = T5Model.from_pretrained('./pytorch_model.bin', config = config)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "model.save_pretrained('./')" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "from transformers import T5Tokenizer, T5ForConditionalGeneration" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "model = T5ForConditionalGeneration.from_pretrained('./')" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "' Narendra Modi'" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "input_ids = tokenizer.encode('soalan: siapakah perdana menteri malaysia?', return_tensors = 'pt')\n", "outputs = model.generate(input_ids)\n", "tokenizer.decode(outputs[0])" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "' PETALING JAYA: Bekas perdana menteri Najib Razak mempersoalkan sama ada kerajaan tahu bagaimana menguruskan pandemik'" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "input_ids = tokenizer.encode('terjemah Inggeris ke Melayu: PETALING JAYA: Former prime minister Najib Razak has questioned whether the government knows how to manage the Covid-19 pandemic, outlining several seemingly contradictory announcements it has made.', return_tensors = 'pt')\n", "outputs = model.generate(input_ids)\n", "tokenizer.decode(outputs[0])" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\" PETALING JAYA: Former Prime Minister Najib Tun Razak's meeting and Deputy Prime Minister Datuk Seri\"" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "input_ids = tokenizer.encode('terjemah Melayu ke Inggeris: PETALING JAYA: Pertemuan bekas Perdana Menteri, Datuk Seri Najib Tun Razak dan Timbalan Perdana Menteri, Datuk Seri Ismail Sabri Yaakob hari ini adalah bagi membincangkan isu berkaitan hala tuju dan dasar negara.', return_tensors = 'pt')\n", "outputs = model.generate(input_ids)\n", "tokenizer.decode(outputs[0])" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "' Roman Catholic Archdiocese of Maracaibo shares border with Roman Catholic Diocese'" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "input_ids = tokenizer.encode('grafik pengetahuan: Keuskupan Agung Katolik Rom Maracaibo terletak di barat daya Keuskupan Katolik Rom Machiques.', return_tensors = 'pt')\n", "outputs = model.generate(input_ids)\n", "tokenizer.decode(outputs[0])" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "!rm -rf t5-super-super-tiny-v2" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 4 }