add model

Browse files

Files changed (8) hide show

.gitignore +1 -0
config.json +29 -0
convert-from-malaya.ipynb +1077 -0
pytorch_model.bin +3 -0
sp10m.cased.ms-en.model +3 -0
special_tokens_map.json +1 -0
spiece.model +3 -0
tokenizer_config.json +1 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.ipynb_checkpoints

config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "_name_or_path": "./pytorch_model.bin",
+  "architectures": [
+    "T5Model"
+  ],
+  "d_ff": 3072,
+  "d_kv": 64,
+  "d_model": 768,
+  "decoder_start_token_id": 0,
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "gradient_checkpointing": false,
+  "initializer_factor": 1.0,
+  "inputs_length": 1024,
+  "is_encoder_decoder": true,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 1024,
+  "num_decoder_layers": 12,
+  "num_heads": 12,
+  "num_layers": 12,
+  "pad_token_id": 0,
+  "relative_attention_num_buckets": 32,
+  "torch_dtype": "float32",
+  "transformers_version": "4.10.0",
+  "use_cache": true,
+  "vocab_size": 32128
+}

convert-from-malaya.ipynb ADDED Viewed

	@@ -0,0 +1,1077 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'4.10.0'"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import transformers\n",
+    "transformers.__version__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import T5Config, T5Model, load_tf_weights_in_t5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "checkpoint                            model.ckpt-759900.index\r\n",
+      "model.ckpt-759900.data-00000-of-00002 model.ckpt-759900.meta\r\n",
+      "model.ckpt-759900.data-00001-of-00002 operative_config.gin\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# !wget https://f000.backblazeb2.com/file/malaya-model/pretrained/t5-base-2021-07-28.tar.gz\n",
+    "# !tar -zxf t5-base-2021-07-28.tar.gz\n",
+    "# !rm t5-base-2021-07-28.tar.gz\n",
+    "!ls t5-base-v2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "T5Config {\n",
+      "  \"d_ff\": 3072,\n",
+      "  \"d_kv\": 64,\n",
+      "  \"d_model\": 768,\n",
+      "  \"decoder_start_token_id\": 0,\n",
+      "  \"dropout_rate\": 0.1,\n",
+      "  \"eos_token_id\": 1,\n",
+      "  \"feed_forward_proj\": \"relu\",\n",
+      "  \"gradient_checkpointing\": false,\n",
+      "  \"initializer_factor\": 1.0,\n",
+      "  \"inputs_length\": 1024,\n",
+      "  \"is_encoder_decoder\": true,\n",
+      "  \"layer_norm_epsilon\": 1e-06,\n",
+      "  \"model_type\": \"t5\",\n",
+      "  \"n_positions\": 1024,\n",
+      "  \"num_decoder_layers\": 12,\n",
+      "  \"num_heads\": 12,\n",
+      "  \"num_layers\": 12,\n",
+      "  \"pad_token_id\": 0,\n",
+      "  \"relative_attention_num_buckets\": 32,\n",
+      "  \"transformers_version\": \"4.10.0\",\n",
+      "  \"use_cache\": true,\n",
+      "  \"vocab_size\": 32128\n",
+      "}\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "config = T5Config(\n",
+    "    vocab_size = 32128,\n",
+    "    n_positions=1024,\n",
+    "    d_ff = 3072,\n",
+    "    d_kv = 64,\n",
+    "    d_model = 768,\n",
+    "    dropout_rate = 0.1,\n",
+    "    inputs_length = 1024,\n",
+    "    num_heads = 12,\n",
+    "    num_layers = 12,\n",
+    "    decoder_start_token_id = 0,\n",
+    "    eos_token_id = 1,\n",
+    "    pad_token_id = 0)\n",
+    "print(config)\n",
+    "config.save_pretrained('./')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "T5Model(\n",
+       "  (shared): Embedding(32128, 768)\n",
+       "  (encoder): T5Stack(\n",
+       "    (embed_tokens): Embedding(32128, 768)\n",
+       "    (block): ModuleList(\n",
+       "      (0): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (relative_attention_bias): Embedding(32, 12)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
+       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (1): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
+       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (2): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
+       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (3): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
+       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (4): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
+       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (5): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
+       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (6): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
+       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (7): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
+       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (8): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
+       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (9): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
+       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (10): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
+       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (11): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
+       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "    )\n",
+       "    (final_layer_norm): T5LayerNorm()\n",
+       "    (dropout): Dropout(p=0.1, inplace=False)\n",
+       "  )\n",
+       "  (decoder): T5Stack(\n",
+       "    (embed_tokens): Embedding(32128, 768)\n",
+       "    (block): ModuleList(\n",
+       "      (0): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (relative_attention_bias): Embedding(32, 12)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerCrossAttention(\n",
+       "            (EncDecAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (2): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
+       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (1): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerCrossAttention(\n",
+       "            (EncDecAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (2): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
+       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (2): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerCrossAttention(\n",
+       "            (EncDecAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (2): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
+       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (3): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerCrossAttention(\n",
+       "            (EncDecAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (2): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
+       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (4): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerCrossAttention(\n",
+       "            (EncDecAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (2): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
+       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (5): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerCrossAttention(\n",
+       "            (EncDecAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (2): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
+       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (6): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerCrossAttention(\n",
+       "            (EncDecAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (2): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
+       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (7): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerCrossAttention(\n",
+       "            (EncDecAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (2): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
+       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (8): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerCrossAttention(\n",
+       "            (EncDecAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (2): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
+       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (9): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerCrossAttention(\n",
+       "            (EncDecAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (2): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
+       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (10): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerCrossAttention(\n",
+       "            (EncDecAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (2): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
+       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (11): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerCrossAttention(\n",
+       "            (EncDecAttention): T5Attention(\n",
+       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
+       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (2): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
+       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "    )\n",
+       "    (final_layer_norm): T5LayerNorm()\n",
+       "    (dropout): Dropout(p=0.1, inplace=False)\n",
+       "  )\n",
+       ")"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model = T5Model(config)\n",
+    "load_tf_weights_in_t5(model, config, 't5-base-v2/model.ckpt-759900')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "('config.json', 'pytorch_model.bin')"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from transformers import CONFIG_NAME, WEIGHTS_NAME\n",
+    "CONFIG_NAME, WEIGHTS_NAME"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "\n",
+    "torch.save(model.state_dict(), './' + WEIGHTS_NAME)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import T5Config, T5Model, T5Tokenizer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# !wget https://f000.backblazeb2.com/file/malaya-model/bpe/sp10m.cased.ms-en.model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "('./tokenizer_config.json',\n",
+       " './special_tokens_map.json',\n",
+       " './spiece.model',\n",
+       " './added_tokens.json')"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tokenizer = T5Tokenizer('sp10m.cased.ms-en.model')\n",
+    "tokenizer.save_pretrained('./')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tokenizer = T5Tokenizer.from_pretrained('./', lower = False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config = T5Config.from_pretrained('./')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = T5Model.from_pretrained('./pytorch_model.bin', config = config)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.save_pretrained('./')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import T5Tokenizer, T5ForConditionalGeneration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = T5ForConditionalGeneration.from_pretrained('./')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'<pad> Mahathir Mohamad</s>'"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "input_ids = tokenizer.encode('soalan: siapakah perdana menteri malaysia?', return_tensors = 'pt')\n",
+    "outputs = model.generate(input_ids)\n",
+    "tokenizer.decode(outputs[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'<pad> PETALING JAYA: Bekas perdana menteri, Najib Razak, mempersoalkan sama ada kerajaan tahu bagaimana menguruskan wabak'"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "input_ids = tokenizer.encode('terjemah Inggeris ke Melayu: PETALING JAYA: Former prime minister Najib Razak has questioned whether the government knows how to manage the Covid-19 pandemic, outlining several seemingly contradictory announcements it has made.', return_tensors = 'pt')\n",
+    "outputs = model.generate(input_ids)\n",
+    "tokenizer.decode(outputs[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'<pad> PETALING JAYA: Former Prime Minister Najib Tun Razak and Deputy Prime Minister Ismail Sabri Yaakob today discussed'"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "input_ids = tokenizer.encode('terjemah Melayu ke Inggeris: PETALING JAYA: Pertemuan bekas Perdana Menteri, Datuk Seri Najib Tun Razak dan Timbalan Perdana Menteri, Datuk Seri Ismail Sabri Yaakob hari ini adalah bagi membincangkan isu berkaitan hala tuju dan dasar negara.', return_tensors = 'pt')\n",
+    "outputs = model.generate(input_ids)\n",
+    "tokenizer.decode(outputs[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'<pad> Roman Catholic Archdiocese of Maracaibo shares border with Roman Catholic Diocese'"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "input_ids = tokenizer.encode('grafik pengetahuan: Keuskupan Agung Katolik Rom Maracaibo terletak di barat daya Keuskupan Katolik Rom Machiques.', return_tensors = 'pt')\n",
+    "outputs = model.generate(input_ids)\n",
+    "tokenizer.decode(outputs[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!rm -rf t5-base-v2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9b38ca8853a3cc69ec8295b0bbdaf5187944a83c269d6c44279496a3b714c743
+size 891734137

sp10m.cased.ms-en.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:26de51154cccc9db6e65e5d466bdb0b1fff9fab1d80f4689711de943448addd6
+size 803030

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"]}

spiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:26de51154cccc9db6e65e5d466bdb0b1fff9fab1d80f4689711de943448addd6
+size 803030

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "extra_ids": 100, "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"], "sp_model_kwargs": {}, "tokenizer_class": "T5Tokenizer"}