add model

Browse files

Files changed (9) hide show

.gitignore +1 -0
README.md +69 -0
config.json +29 -0
convert-from-malaya.ipynb +734 -0
pytorch_model.bin +3 -0
sp10m.cased.ms-en.model +3 -0
special_tokens_map.json +1 -0
spiece.model +3 -0
tokenizer_config.json +1 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.ipynb_checkpoints

README.md ADDED Viewed

	@@ -0,0 +1,69 @@

+---
+language: ms
+---
+# t5-small-bahasa-cased
+Pretrained T5 small language model for Malay.
+## Pretraining Corpus
+`t5-small-bahasa-cased` model was pretrained on multiple tasks. Below is list of tasks we trained on,
+1. Language masking task on bahasa news, bahasa Wikipedia, bahasa Academia.edu, bahasa parliament and translated The Pile.
+2. News title prediction on bahasa news.
+3. Next sentence prediction on bahasa news, bahasa Wikipedia, bahasa Academia.edu, bahasa parliament and translated The Pile.
+4. Translated QA Natural.
+5. Text Similarity task on translated SNLI and translated MNLI.
+6. EN-MS translation.
+7. MS-EN translation.
+8. Abstractive Summarization.
+9. Knowledge Graph triples generation.
+10. Paraphrase.
+Preparing steps can reproduce at https://github.com/huseinzol05/malaya/tree/master/pretrained-model/t5/prepare
+## Pretraining details
+- This model was trained using Google T5 repository https://github.com/google-research/text-to-text-transfer-transformer, on v3-8 TPU.
+- All steps can reproduce from here, https://github.com/huseinzol05/Malaya/tree/master/pretrained-model/t5
+## Load Pretrained Model
+You can use this model by installing `torch` or `tensorflow` and Huggingface library `transformers`. And you can use it directly by initializing it like this:
+```python
+from transformers import T5Tokenizer, T5Model
+model = T5Model.from_pretrained('malay-huggingface/t5-small-bahasa-cased')
+tokenizer = T5Tokenizer.from_pretrained('malay-huggingface/t5-small-bahasa-cased')
+```
+## Example using T5ForConditionalGeneration
+```python
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained('malay-huggingface/t5-small-bahasa-cased')
+model = T5ForConditionalGeneration.from_pretrained('malay-huggingface/t5-small-bahasa-cased')
+input_ids = tokenizer.encode('soalan: siapakah perdana menteri malaysia?', return_tensors = 'pt')
+outputs = model.generate(input_ids)
+print(tokenizer.decode(outputs[0]))
+```
+Output is,
+```
+'Mahathir Mohamad'
+```
+## Supported prefix
+1. `soalan: {string}`, trained using Natural QA.
+2. `ringkasan: {string}`, for abstractive summarization.
+3. `tajuk: {string}`, for abstractive title.
+4. `parafrasa: {string}`, for abstractive paraphrase.
+5. `terjemah Inggeris ke Melayu: {string}`, for EN-MS translation.
+6. `terjemah Melayu ke Inggeris: {string}`, for MS-EN translation.
+7. `grafik pengetahuan: {string}`, for MS text to EN Knowledge Graph triples format.
+8. `ayat1: {string1} ayat2: {string2}`, semantic similarity.

config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "_name_or_path": "./pytorch_model.bin",
+  "architectures": [
+    "T5Model"
+  ],
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 512,
+  "decoder_start_token_id": 0,
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "gradient_checkpointing": false,
+  "initializer_factor": 1.0,
+  "inputs_length": 1024,
+  "is_encoder_decoder": true,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 1024,
+  "num_decoder_layers": 6,
+  "num_heads": 8,
+  "num_layers": 6,
+  "pad_token_id": 0,
+  "relative_attention_num_buckets": 32,
+  "torch_dtype": "float32",
+  "transformers_version": "4.10.0",
+  "use_cache": true,
+  "vocab_size": 32128
+}

convert-from-malaya.ipynb ADDED Viewed

	@@ -0,0 +1,734 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'4.10.0'"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import transformers\n",
+    "transformers.__version__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import T5Config, T5Model, load_tf_weights_in_t5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "checkpoint                             model.ckpt-1000000.index\r\n",
+      "model.ckpt-1000000.data-00000-of-00002 model.ckpt-1000000.meta\r\n",
+      "model.ckpt-1000000.data-00001-of-00002 operative_config.gin\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# !wget https://f000.backblazeb2.com/file/malaya-model/pretrained/t5-small-2021-07-28.tar.gz\n",
+    "# !tar -zxf t5-small-2021-07-28.tar.gz\n",
+    "# !rm t5-small-2021-07-28.tar.gz\n",
+    "!ls t5-small-v2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "T5Config {\n",
+      "  \"d_ff\": 2048,\n",
+      "  \"d_kv\": 64,\n",
+      "  \"d_model\": 512,\n",
+      "  \"decoder_start_token_id\": 0,\n",
+      "  \"dropout_rate\": 0.1,\n",
+      "  \"eos_token_id\": 1,\n",
+      "  \"feed_forward_proj\": \"relu\",\n",
+      "  \"gradient_checkpointing\": false,\n",
+      "  \"initializer_factor\": 1.0,\n",
+      "  \"inputs_length\": 1024,\n",
+      "  \"is_encoder_decoder\": true,\n",
+      "  \"layer_norm_epsilon\": 1e-06,\n",
+      "  \"model_type\": \"t5\",\n",
+      "  \"n_positions\": 1024,\n",
+      "  \"num_decoder_layers\": 6,\n",
+      "  \"num_heads\": 8,\n",
+      "  \"num_layers\": 6,\n",
+      "  \"pad_token_id\": 0,\n",
+      "  \"relative_attention_num_buckets\": 32,\n",
+      "  \"transformers_version\": \"4.10.0\",\n",
+      "  \"use_cache\": true,\n",
+      "  \"vocab_size\": 32128\n",
+      "}\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "config = T5Config(\n",
+    "    vocab_size = 32128,\n",
+    "    n_positions=1024,\n",
+    "    d_ff = 2048,\n",
+    "    d_kv = 64,\n",
+    "    d_model = 512,\n",
+    "    dropout_rate = 0.1,\n",
+    "    inputs_length = 1024,\n",
+    "    num_heads = 8,\n",
+    "    num_layers = 6,\n",
+    "    decoder_start_token_id = 0,\n",
+    "    eos_token_id = 1,\n",
+    "    pad_token_id = 0)\n",
+    "print(config)\n",
+    "config.save_pretrained('./')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "T5Model(\n",
+       "  (shared): Embedding(32128, 512)\n",
+       "  (encoder): T5Stack(\n",
+       "    (embed_tokens): Embedding(32128, 512)\n",
+       "    (block): ModuleList(\n",
+       "      (0): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (k): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (v): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (o): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (relative_attention_bias): Embedding(32, 8)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=512, out_features=2048, bias=False)\n",
+       "              (wo): Linear(in_features=2048, out_features=512, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (1): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (k): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (v): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (o): Linear(in_features=512, out_features=512, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=512, out_features=2048, bias=False)\n",
+       "              (wo): Linear(in_features=2048, out_features=512, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (2): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (k): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (v): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (o): Linear(in_features=512, out_features=512, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=512, out_features=2048, bias=False)\n",
+       "              (wo): Linear(in_features=2048, out_features=512, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (3): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (k): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (v): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (o): Linear(in_features=512, out_features=512, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=512, out_features=2048, bias=False)\n",
+       "              (wo): Linear(in_features=2048, out_features=512, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (4): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (k): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (v): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (o): Linear(in_features=512, out_features=512, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=512, out_features=2048, bias=False)\n",
+       "              (wo): Linear(in_features=2048, out_features=512, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (5): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (k): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (v): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (o): Linear(in_features=512, out_features=512, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=512, out_features=2048, bias=False)\n",
+       "              (wo): Linear(in_features=2048, out_features=512, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "    )\n",
+       "    (final_layer_norm): T5LayerNorm()\n",
+       "    (dropout): Dropout(p=0.1, inplace=False)\n",
+       "  )\n",
+       "  (decoder): T5Stack(\n",
+       "    (embed_tokens): Embedding(32128, 512)\n",
+       "    (block): ModuleList(\n",
+       "      (0): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (k): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (v): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (o): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (relative_attention_bias): Embedding(32, 8)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerCrossAttention(\n",
+       "            (EncDecAttention): T5Attention(\n",
+       "              (q): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (k): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (v): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (o): Linear(in_features=512, out_features=512, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (2): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=512, out_features=2048, bias=False)\n",
+       "              (wo): Linear(in_features=2048, out_features=512, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (1): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (k): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (v): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (o): Linear(in_features=512, out_features=512, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerCrossAttention(\n",
+       "            (EncDecAttention): T5Attention(\n",
+       "              (q): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (k): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (v): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (o): Linear(in_features=512, out_features=512, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (2): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=512, out_features=2048, bias=False)\n",
+       "              (wo): Linear(in_features=2048, out_features=512, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (2): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (k): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (v): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (o): Linear(in_features=512, out_features=512, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerCrossAttention(\n",
+       "            (EncDecAttention): T5Attention(\n",
+       "              (q): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (k): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (v): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (o): Linear(in_features=512, out_features=512, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (2): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=512, out_features=2048, bias=False)\n",
+       "              (wo): Linear(in_features=2048, out_features=512, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (3): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (k): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (v): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (o): Linear(in_features=512, out_features=512, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerCrossAttention(\n",
+       "            (EncDecAttention): T5Attention(\n",
+       "              (q): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (k): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (v): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (o): Linear(in_features=512, out_features=512, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (2): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=512, out_features=2048, bias=False)\n",
+       "              (wo): Linear(in_features=2048, out_features=512, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (4): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (k): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (v): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (o): Linear(in_features=512, out_features=512, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerCrossAttention(\n",
+       "            (EncDecAttention): T5Attention(\n",
+       "              (q): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (k): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (v): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (o): Linear(in_features=512, out_features=512, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (2): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=512, out_features=2048, bias=False)\n",
+       "              (wo): Linear(in_features=2048, out_features=512, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (5): T5Block(\n",
+       "        (layer): ModuleList(\n",
+       "          (0): T5LayerSelfAttention(\n",
+       "            (SelfAttention): T5Attention(\n",
+       "              (q): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (k): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (v): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (o): Linear(in_features=512, out_features=512, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (1): T5LayerCrossAttention(\n",
+       "            (EncDecAttention): T5Attention(\n",
+       "              (q): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (k): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (v): Linear(in_features=512, out_features=512, bias=False)\n",
+       "              (o): Linear(in_features=512, out_features=512, bias=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "          (2): T5LayerFF(\n",
+       "            (DenseReluDense): T5DenseReluDense(\n",
+       "              (wi): Linear(in_features=512, out_features=2048, bias=False)\n",
+       "              (wo): Linear(in_features=2048, out_features=512, bias=False)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (layer_norm): T5LayerNorm()\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "    )\n",
+       "    (final_layer_norm): T5LayerNorm()\n",
+       "    (dropout): Dropout(p=0.1, inplace=False)\n",
+       "  )\n",
+       ")"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model = T5Model(config)\n",
+    "load_tf_weights_in_t5(model, config, 't5-small-v2/model.ckpt-1000000')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "('config.json', 'pytorch_model.bin')"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from transformers import CONFIG_NAME, WEIGHTS_NAME\n",
+    "CONFIG_NAME, WEIGHTS_NAME"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "\n",
+    "torch.save(model.state_dict(), './' + WEIGHTS_NAME)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import T5Config, T5Model, T5Tokenizer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# !wget https://f000.backblazeb2.com/file/malaya-model/bpe/sp10m.cased.ms-en.model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "('./tokenizer_config.json',\n",
+       " './special_tokens_map.json',\n",
+       " './spiece.model',\n",
+       " './added_tokens.json')"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tokenizer = T5Tokenizer('sp10m.cased.ms-en.model')\n",
+    "tokenizer.save_pretrained('./')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tokenizer = T5Tokenizer.from_pretrained('./', lower = False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config = T5Config.from_pretrained('./')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = T5Model.from_pretrained('./pytorch_model.bin', config = config)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.save_pretrained('./')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import T5Tokenizer, T5ForConditionalGeneration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = T5ForConditionalGeneration.from_pretrained('./')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'<pad> Mahathir Mohamad</s>'"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "input_ids = tokenizer.encode('soalan: siapakah perdana menteri malaysia?', return_tensors = 'pt')\n",
+    "outputs = model.generate(input_ids)\n",
+    "tokenizer.decode(outputs[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'<pad> PETALING JAYA: Bekas perdana menteri Najib Razak mempersoalkan sama ada kerajaan tahu bagaimana menguruskan wabak Covid'"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "input_ids = tokenizer.encode('terjemah Inggeris ke Melayu: PETALING JAYA: Former prime minister Najib Razak has questioned whether the government knows how to manage the Covid-19 pandemic, outlining several seemingly contradictory announcements it has made.', return_tensors = 'pt')\n",
+    "outputs = model.generate(input_ids)\n",
+    "tokenizer.decode(outputs[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'<pad> PETALING JAYA: The meeting of former Prime Minister Datuk Seri Najib Tun Razak and Deputy Prime Minister'"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "input_ids = tokenizer.encode('terjemah Melayu ke Inggeris: PETALING JAYA: Pertemuan bekas Perdana Menteri, Datuk Seri Najib Tun Razak dan Timbalan Perdana Menteri, Datuk Seri Ismail Sabri Yaakob hari ini adalah bagi membincangkan isu berkaitan hala tuju dan dasar negara.', return_tensors = 'pt')\n",
+    "outputs = model.generate(input_ids)\n",
+    "tokenizer.decode(outputs[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'<pad> Roman Catholic Archdiocese of Maracaibo shares border with Roman Catholic Diocese'"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "input_ids = tokenizer.encode('grafik pengetahuan: Keuskupan Agung Katolik Rom Maracaibo terletak di barat daya Keuskupan Katolik Rom Machiques.', return_tensors = 'pt')\n",
+    "outputs = model.generate(input_ids)\n",
+    "tokenizer.decode(outputs[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!rm -rf t5-small-v2"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b447c69d9920811200c7322e2c594eef1099e7df6f7cc8fdf311e2ea8ef670e
+size 242087629

sp10m.cased.ms-en.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:26de51154cccc9db6e65e5d466bdb0b1fff9fab1d80f4689711de943448addd6
+size 803030

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"]}

spiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:26de51154cccc9db6e65e5d466bdb0b1fff9fab1d80f4689711de943448addd6
+size 803030

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "extra_ids": 100, "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"], "sp_model_kwargs": {}, "tokenizer_class": "T5Tokenizer"}