sonebu commited on Aug 19, 2023

Commit

2f6628d

1 Parent(s): b4c89d1

moving from github

Files changed (23) hide show

.gitattributes +8 -0
.gitignore +2 -0
LICENSE +2 -0
README.md +48 -3
assets/ai8x-nlp-demo.gif +3 -0
assets/demo.elf +3 -0
assets/en.json +3 -0
assets/es.json +3 -0
assets/es2en_hw_cp6.pt +3 -0
dataloader.py +33 -0
demo.ipynb +620 -0
evaluation.ipynb +257 -0
functions.py +70 -0
layers.py +169 -0
models.py +240 -0
news-comm-v15/news-comm-v15-all-test.en +3 -0
news-comm-v15/news-comm-v15-all-test.es +3 -0
news-comm-v15/news-comm-v15-all-valid.en +3 -0
news-comm-v15/news-comm-v15-all-valid.es +3 -0
news-comm-v15/news-comm-v15-all.en +3 -0
news-comm-v15/news-comm-v15-all.es +3 -0
news-comm-v15/readme +2 -0
utils.py +99 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.pth.tar filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.es filter=lfs diff=lfs merge=lfs -text
+*.en filter=lfs diff=lfs merge=lfs -text
+*.elf filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text
+*.json filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ **/.ipynb_checkpoints/
2	+ **/__pycache__/

LICENSE ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ NLP demo software by HyperbeeAI
2	+ Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai

README.md CHANGED Viewed

@@ -1,3 +1,48 @@
----
-license: other
----

+# NLP demo software by HyperbeeAI
+Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai
+This repository contains evaluation tools for the ai85 Spanish-to-English translation project.
+To run the demo, see explanations in "demo.ipynb", which acts as the serial terminal to communicate with the ai85 from the host PC. Further explanations are provided below as well as in the notebooks.
+![Demo](./assets/ai8x-nlp-demo.gif)
+### Contents:
+- **.py files:** python modules used by the Jupyter notebooks. These files define a simulation environment for the MAX78000 CNN accelerator hardware + some peripheral tools that help evaluation. Note that the simulator only includes the chip features that are relevant to this project (e.g., pooling not implemented because this project does not need it).
+- **evaluation.ipynb:** this Jupyter notebook provides an interface to try out different sentences from the test set on the model in the simulation environment, and compute the BLEU score of the model over the test set.
+- **demo.ipynb:** this Jupyter notebook acts as the serial interface with the chip. A sentence in the source language is sent over to the chip for translation via the serial port, the implementation on the chip translates this and sends it back via the same serial port in the target language, and the result is displayed on the notebook cell. This needs to be run together with the "assets/demo.elf" program on the chip, which does the actual translation job on the ai85. There is a specific cell on the notebook that needs to be run before the ai85 demo.elf is started. Check the notebook for further info.
+- **assets/demo.elf:** C program running the actual translation application. Run this together with the demo.ipynb notebook for the translation demo. See further explanations inside demo.ipynb.
+### Extras/Notes:
+- the demo C program does not require any extra modules/libraries, it can be directly run the same way as the Maxim SDK examples (i.e., using the arm gdb, defining the target as "remote localhost:3333", doing "load" etc.). However, note that the Jupyter notebook demo.ipynb needs to be run together with the C program for meaningful output. There is a specific cell on the notebook that needs to be run before the ai85 demo.elf is started. Check the notebook for further info.
+- The demo.ipynb notebook needs to run on the same host PC that programs the ai85 since it uses the on-board (USB) serial port (that programs the ai85) to communicate with the chip while the translation application is running.
+- Although the program should run on both the EVKit and the FeatherBoard without errors (since it uses common functionality), it was only explicitly tested with the FeatherBoard for now.
+### Setup:
+This demo has been tested with the following configuration:
+    Python              3.8.10.
+    datasets            1.8.0
+    huggingface-hub     0.0.10
+    ipykernel           5.5.3
+    ipython             7.22.0
+    notebook            6.3.0
+    numpy               1.20.2
+    pyserial            3.5
+    sacrebleu           1.5.1
+    tokenizers          0.10.3
+    torch               1.8.1
+    torchtext           0.9.1
+    tqdm                4.49.0
+Note1: torchtext might default to older versions (e.g., v0.8) on some containers (typically in those provided by AWS, which use older versions of python that don't align well with the newer torchtext versions), in that case, the .legacy submodule path needs to be removed from the import directives in the .py files and Jupyter notebooks.
+Note2: there are multiple python packages on pip that provide serial port implementation, with conflicting function/object names too. Although the package used here gets imported with "import serial", it needs to be installed via "pip install pyserial", not "pip install serial". Make sure you get the correct version.

assets/ai8x-nlp-demo.gif ADDED Viewed

Git LFS Details

SHA256: c7100951ce0b1aa5809782f5a27f1586c6a20f991844914755dca8f20cf6e32a
Pointer size: 132 Bytes
Size of remote file: 2.53 MB

assets/demo.elf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:504440cab7269b333570f11888979dd63e610bcfe9e84466a0f3dca79b49ebda
+size 2483932

assets/en.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f774c53ea142a16a7e507a67e46d882755e0b052604ea9f8afb4e51ccd48f894
+size 394357

assets/es.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0ee2fab6b130bffdc8748cd8ce8330fba8406eb61a83cdb0128972067bdc0a82
+size 407380

assets/es2en_hw_cp6.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f30f0e64f114594c83761887ecc9dd6edac9433d6efa9b25929f767423302fc8
+size 9953564

dataloader.py ADDED Viewed

	@@ -0,0 +1,33 @@

+###########################################################################
+# NLP demo software by HyperbeeAI.                                        #
+# Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai #
+###########################################################################
+license_statement = "NLP demo software by HyperbeeAI. Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai"
+print("imported dataloader.py")
+print(license_statement)
+print("")
+from torchtext.legacy.datasets import TranslationDataset
+from torchtext.legacy.data import Field, BucketIterator
+import os
+class NewsDataset(TranslationDataset):
+    name = 'news-comm-v15'
+    @staticmethod
+    def sort_key(ex):
+        return len(ex.src)
+    @classmethod
+    def splits(cls, exts, fields, root='./',
+               train='news-comm-v15-all', validation='news-comm-v15-all-valid', test='news-comm-v15-all-test', **kwargs):
+        if 'path' not in kwargs:
+            expected_folder = os.path.join(root, cls.name)
+            path = expected_folder if os.path.exists(expected_folder) else None
+        else:
+            path = kwargs['path']
+            del kwargs['path']
+        return super(NewsDataset, cls).splits(exts, fields, path, root, train, validation, test, **kwargs)

demo.ipynb ADDED Viewed

	@@ -0,0 +1,620 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "d3092ed4",
+   "metadata": {},
+   "source": [
+    "# NLP demo software by HyperbeeAI\n",
+    "\n",
+    "Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai \n",
+    "\n",
+    "### Deployment\n",
+    "\n",
+    "This notebook acts as the serial terminal that we use in the ai85 translation demo.\n",
+    "\n",
+    "- load parameter set\n",
+    "- run a test on the PC to determine what to expect from the chip\n",
+    "- run test on the chip via serial terminal on PC"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e6208384",
+   "metadata": {},
+   "source": [
+    "### Initialization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "6c10cb53",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "imported utils.py\n",
+      "NLP demo software by HyperbeeAI. Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai\n",
+      "\n",
+      "imported layers.py\n",
+      "NLP demo software by HyperbeeAI. Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai\n",
+      "\n",
+      "imported functions.py\n",
+      "NLP demo software by HyperbeeAI. Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai\n",
+      "\n",
+      "imported models.py\n",
+      "NLP demo software by HyperbeeAI. Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai\n",
+      "\n",
+      "imported dataloader.py\n",
+      "NLP demo software by HyperbeeAI. Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch, random\n",
+    "import numpy as np\n",
+    "import torch.nn as nn\n",
+    "from torchtext.legacy.datasets import TranslationDataset\n",
+    "from torchtext.legacy.data     import Field, BucketIterator\n",
+    "from utils      import tokenize_es, tokenize_en, tokenizer_es, tokenizer_en, TRG_PAD_IDX, \\\n",
+    "                       translate_sentence, calculate_bleu, license_statement\n",
+    "from models     import encoder, decoder, seq2seq\n",
+    "from dataloader import NewsDataset\n",
+    "\n",
+    "import serial"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "9966ccad",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "SEED = 1234\n",
+    "random.seed(SEED)\n",
+    "torch.manual_seed(SEED)\n",
+    "torch.cuda.manual_seed(SEED)\n",
+    "torch.backends.cudnn.deterministic = True\n",
+    "BATCH_SIZE  = 48"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "6d864c26",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Working with device: cuda\n"
+     ]
+    }
+   ],
+   "source": [
+    "SRC = Field(tokenize = tokenize_es, \n",
+    "            init_token = tokenizer_es.token_to_id(\"<BOS>\"), \n",
+    "            eos_token = tokenizer_es.token_to_id(\"<EOS>\"), \n",
+    "            pad_token = tokenizer_es.token_to_id(\"<PAD>\"),\n",
+    "            unk_token = tokenizer_es.token_to_id(\"<UNK>\"),\n",
+    "            use_vocab = False,\n",
+    "            batch_first = True)\n",
+    "\n",
+    "TRG = Field(tokenize = tokenize_en, \n",
+    "            init_token = tokenizer_en.token_to_id(\"<BOS>\"), \n",
+    "            eos_token = tokenizer_en.token_to_id(\"<EOS>\"), \n",
+    "            pad_token = tokenizer_en.token_to_id(\"<PAD>\"),\n",
+    "            unk_token = tokenizer_en.token_to_id(\"<UNK>\"),\n",
+    "            use_vocab = False,\n",
+    "            batch_first = True)\n",
+    "\n",
+    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
+    "#device = 'cpu'\n",
+    "print(\"Working with device:\", device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "7f1f2efb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_data, valid_data, test_data = NewsDataset.splits(exts=('.es', '.en'), fields=(SRC, TRG))\n",
+    "train_iterator, valid_iterator, test_iterator = BucketIterator.splits(\n",
+    "    (train_data, valid_data, test_data),\n",
+    "    batch_size = BATCH_SIZE,\n",
+    "    device = device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "ccd6c1fc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "enc   = encoder(device)\n",
+    "dec   = decoder(device, TRG_PAD_IDX)\n",
+    "model = seq2seq(enc, dec)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "6ae348e3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trained_checkpoint = \"assets/es2en_hw_cp6.pt\"\n",
+    "model.load_state_dict(torch.load(trained_checkpoint, map_location=device), strict=False);\n",
+    "model.to(device);"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ddb1a23b",
+   "metadata": {},
+   "source": [
+    "### serial conversion functions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "534e72f2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def singlepass64_tensor2serial(seq_length, tensor):\n",
+    "    data       = tensor.cpu().detach().numpy();\n",
+    "    char_array = '';\n",
+    "\n",
+    "    i=0;\n",
+    "    while i < 64:\n",
+    "        for j in range(0,seq_length):\n",
+    "            ch3 = data[0,i+3,j].astype('int8')\n",
+    "            ch2 = data[0,i+2,j].astype('int8')\n",
+    "            ch1 = data[0,i+1,j].astype('int8')\n",
+    "            ch0 = data[0,i+0,j].astype('int8')\n",
+    "\n",
+    "            # 2s complements\n",
+    "            val3 = \"{0:#0{1}x}\".format(int(np.binary_repr(ch3, width=8), 2),4)\n",
+    "            val2 = \"{0:#0{1}x}\".format(int(np.binary_repr(ch2, width=8), 2),4)\n",
+    "            val1 = \"{0:#0{1}x}\".format(int(np.binary_repr(ch1, width=8), 2),4)\n",
+    "            val0 = \"{0:#0{1}x}\".format(int(np.binary_repr(ch0, width=8), 2),4)\n",
+    "\n",
+    "            char_array += val3[2:] + val2[2:] + val1[2:] + val0[2:]\n",
+    "\n",
+    "        i=i+4\n",
+    "        \n",
+    "    return char_array\n",
+    "\n",
+    "def twos_comp(val, bits):\n",
+    "    if (val & (1 << (bits - 1))) != 0:\n",
+    "        val = val - (1 << bits)\n",
+    "    return val\n",
+    "\n",
+    "def tensor_fromserial_singlepass64(char_array, seq_length, typetensor):\n",
+    "    out_tensor = torch.zeros_like(typetensor)\n",
+    "    i=0;\n",
+    "    while i < 64:\n",
+    "        for j in range(0, seq_length):\n",
+    "            cursor = (i*seq_length*2 + j*8); # seq_length*2 because we use 2 characters per element due to pyserial \\CR \\LF issue\n",
+    "            word   = char_array[cursor : cursor+8];\n",
+    "            \n",
+    "            # 2s complements\n",
+    "            val3 = twos_comp(int(word[0:2],16), 8)\n",
+    "            val2 = twos_comp(int(word[2:4],16), 8)\n",
+    "            val1 = twos_comp(int(word[4:6],16), 8)\n",
+    "            val0 = twos_comp(int(word[6:8],16), 8)\n",
+    "            \n",
+    "            out_tensor[0,i+3,j] = val3;\n",
+    "            out_tensor[0,i+2,j] = val2;\n",
+    "            out_tensor[0,i+1,j] = val1;\n",
+    "            out_tensor[0,i+0,j] = val0;\n",
+    "            \n",
+    "        i=i+4\n",
+    "\n",
+    "    return out_tensor\n",
+    "\n",
+    "def widemode_twos_comp(val, bits):\n",
+    "    if (val & (1 << (bits - 1))) != 0:\n",
+    "        val = ((val - (1 << bits)) >> 5) + 1\n",
+    "    return (val >> 5)\n",
+    "\n",
+    "def tensor_fromserial_widemode64(char_array, seq_length, typetensor):\n",
+    "    out_tensor = torch.zeros_like(typetensor)\n",
+    "    i=0;\n",
+    "    while i < 64:\n",
+    "        for j in range(0, seq_length):\n",
+    "            cursor = (i*seq_length*8 + j*32); # seq_length*8 now because we use 8 characters per element, same pyserial issue\n",
+    "            word   = char_array[cursor : cursor+32];\n",
+    "            \n",
+    "            # 2s complements\n",
+    "            val0 = twos_comp(int(word[0:8],16),   32)\n",
+    "            val1 = twos_comp(int(word[8:16],16),  32)\n",
+    "            val2 = twos_comp(int(word[16:24],16), 32)\n",
+    "            val3 = twos_comp(int(word[24:32],16), 32)\n",
+    "            \n",
+    "            out_tensor[0,i+0,j] = val0;\n",
+    "            out_tensor[0,i+1,j] = val1;\n",
+    "            out_tensor[0,i+2,j] = val2;\n",
+    "            out_tensor[0,i+3,j] = val3;\n",
+    "            \n",
+    "        i=i+4\n",
+    "\n",
+    "    return out_tensor"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f248bc1d",
+   "metadata": {},
+   "source": [
+    "## Test"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "76d11d80",
+   "metadata": {},
+   "source": [
+    "### choose id of example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "cdbfd418",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "example_idx = 120"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "26e82b50",
+   "metadata": {},
+   "source": [
+    "### on PC"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "250dcc52",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "trg = but this won ’ t be the last answer , although for the time being it will drive corporate restructuring and the managerial mind .\n",
+      "\n",
+      "predicted trg = but this will not be the latest response , though it will now be the central force of corporate restructuring and managerial thinking .\n",
+      "\n",
+      "src = pero esto no será la última respuesta , aunque por ahora será la fuerza central de la reestructuración corporativa y el pensamiento gerencial .\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "model.to(device)\n",
+    "src = vars(test_data.examples[example_idx])['src']\n",
+    "trg = tokenizer_en.decode(vars(test_data.examples[example_idx])['trg'], skip_special_tokens=False)\n",
+    "print(f'trg = {trg}')\n",
+    "print(\"\")\n",
+    "translation = translate_sentence(src, SRC, TRG, model, device)\n",
+    "print(f'predicted trg = {translation}')\n",
+    "print(\"\")\n",
+    "src = tokenizer_es.decode(src, skip_special_tokens=False)\n",
+    "print(f'src = {src}')\n",
+    "print(\"\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "10e43fe8",
+   "metadata": {},
+   "source": [
+    "### on chip"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "b7aa9adc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "enc_pre = model.encoder.pre.to(device)\n",
+    "dec_pre = model.decoder.pre.to(device)\n",
+    "dec_i2w = model.decoder.fff.to(device)\n",
+    "\n",
+    "src = vars(test_data.examples[example_idx])['src']\n",
+    "trg = tokenizer_en.decode(vars(test_data.examples[example_idx])['trg'], skip_special_tokens=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "738e668a",
+   "metadata": {},
+   "source": [
+    "**MARK**\n",
+    "\n",
+    "The below cell starts running a serial terminal on this notebook. First run this cell, and when it says \"waiting for ai85\", load the \"assets/demo.elf\" program onto the ai85 chip, and start running it (type c in gdb). This should trigger the terminal here, and operation should resume normally.\n",
+    "\n",
+    "The cell is designed to translate a single sentence."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "0f5a5628",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def ai85_demo_function():\n",
+    "    \n",
+    "    print(\"Please enter a Spanish sentence\")\n",
+    "    textinput = input()\n",
+    "    print(\"\")\n",
+    "    print(\"\")\n",
+    "\n",
+    "    src = (tokenizer_es.encode(textinput)).ids\n",
+    "    trg = tokenizer_en.decode(vars(test_data.examples[example_idx])['trg'], skip_special_tokens=False)\n",
+    "    with serial.Serial('/dev/ttyACM0', 115200) as ser: # , timeout=5 (not necessary, just for info)\n",
+    "        tokens     = src\n",
+    "        tokens     = [SRC.init_token] + tokens + [SRC.eos_token] + [SRC.pad_token] * (48 - 2 - len(tokens)) \n",
+    "        src_tensor = torch.LongTensor(tokens).unsqueeze(0).to(device)\n",
+    "\n",
+    "        batch_size = src_tensor.shape[0];\n",
+    "        src_len    = src_tensor.shape[1];\n",
+    "        enc_pre_d  = enc_pre(src_tensor, 0, src_len, batch_size);\n",
+    "        encarray   = singlepass64_tensor2serial(48, enc_pre_d);\n",
+    "\n",
+    "        #### to chip\n",
+    "        print(\"** shallow.AI ai85 demo **\")\n",
+    "        print(\"** loading demo to ai85 **\")\n",
+    "        line = ser.readline()\n",
+    "        while(line != b''):\n",
+    "            line = ser.readline()\n",
+    "            if(line == b'GJcav7Wf2kmhaXJdsO0QVzX3slsv96Ck\\r\\n'):\n",
+    "                ser.write(encarray.encode(encoding=\"ascii\"))\n",
+    "                line = ser.readline()\n",
+    "                break\n",
+    "\n",
+    "        trg_indexes = [TRG.init_token, ] + [TRG.pad_token] * (48 - 1) \n",
+    "\n",
+    "        done_decoding_flag = False\n",
+    "        for i in range(47):\n",
+    "            start_idx = max(0, i - 7)\n",
+    "            trg_tensor = torch.LongTensor(trg_indexes[start_idx:start_idx + 8]).unsqueeze(0).to(device)\n",
+    "            batch_size   = trg_tensor.shape[0]\n",
+    "            trg_len      = trg_tensor.shape[1]\n",
+    "            pos_start    = max(0, i - 7)\n",
+    "            dec_pre_d    = dec_pre(trg_tensor, pos_start, trg_len + pos_start, batch_size)\n",
+    "            decarray     = singlepass64_tensor2serial(8, dec_pre_d);\n",
+    "            while(line != b''):\n",
+    "                line = ser.readline()\n",
+    "                if(line == b'gZMFxLf6muLVf9P6Iyea56VbA4qktpUR\\r\\n'):\n",
+    "                    if(done_decoding_flag):\n",
+    "                        print(\"****** ai85 is done ******\")\n",
+    "                        decarray = \"done\" + decarray[4:]\n",
+    "                    ser.write(decarray.encode(encoding=\"ascii\"))\n",
+    "                    line = ser.readline()\n",
+    "                    break\n",
+    "\n",
+    "            if(done_decoding_flag):\n",
+    "                break\n",
+    "\n",
+    "            line = ser.readline()\n",
+    "            h2e_out = tensor_fromserial_widemode64(line, 1, dec_pre_d[:,:,0:1]) / (128.0 * 2**(5+1))\n",
+    "            output  = dec_i2w(h2e_out.permute(0, 2, 1))\n",
+    "            pred_token = output.argmax(2)\n",
+    "            trg_indexes[i + 1] = pred_token\n",
+    "            if pred_token == TRG.eos_token:\n",
+    "                done_decoding_flag = True\n",
+    "                \n",
+    "    try:\n",
+    "        trg_indexes = trg_indexes[1:trg_indexes.index(TRG.eos_token)]\n",
+    "    except ValueError: \n",
+    "        trg_indexes = trg_indexes[1:]\n",
+    "\n",
+    "    trg_tokens = tokenizer_en.decode(trg_indexes, skip_special_tokens=False)\n",
+    "    \n",
+    "    print(\"\")\n",
+    "    print(\"\")\n",
+    "    print(\"English translation on ai85:\")\n",
+    "    print(f'{trg_tokens}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "af1aa370",
+   "metadata": {},
+   "source": [
+    "# NLP demo software by HyperbeeAI\n",
+    "\n",
+    "Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "7df357a0",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Please enter a Spanish sentence\n",
+      "La vinculación entre el crecimiento económico y el bienestar humano parece evidente.\n",
+      "\n",
+      "\n",
+      "** shallow.AI ai85 demo **\n",
+      "** loading demo to ai85 **\n",
+      "****** ai85 is done ******\n",
+      "\n",
+      "\n",
+      "English translation on ai85:\n",
+      "the link between economic growth and human welfare seems clear .\n"
+     ]
+    }
+   ],
+   "source": [
+    "ai85_demo_function()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3e7577a0",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "52a397de",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "96f7b68e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3fae6816",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0a92e88d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e60ac632",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9f982aec",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bfbc6cfc",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b59b5243",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "61b8c8d3",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "459a0550",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "82cc8933",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d9e43f05",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "04c6aee2",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "de644855",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

evaluation.ipynb ADDED Viewed

	@@ -0,0 +1,257 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "acb67391",
+   "metadata": {},
+   "source": [
+    "# NLP demo software by HyperbeeAI\n",
+    "\n",
+    "Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai \n",
+    "\n",
+    "### Evaluation\n",
+    "\n",
+    "This notebook evaluates the model on the test set with chosen examples, and calculates the BLEU score. A simulation of the ai85 chip implemented in pytorch is used for this purpose. See imported .py modules for further info."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "3899e26e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "imported utils.py\n",
+      "NLP demo software by HyperbeeAI. Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai\n",
+      "\n",
+      "imported layers.py\n",
+      "NLP demo software by HyperbeeAI. Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai\n",
+      "\n",
+      "imported functions.py\n",
+      "NLP demo software by HyperbeeAI. Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai\n",
+      "\n",
+      "imported models.py\n",
+      "NLP demo software by HyperbeeAI. Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai\n",
+      "\n",
+      "imported dataloader.py\n",
+      "NLP demo software by HyperbeeAI. Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch, random\n",
+    "import torch.nn as nn\n",
+    "from torchtext.legacy.datasets import TranslationDataset\n",
+    "from torchtext.legacy.data     import Field, BucketIterator\n",
+    "from utils      import tokenize_es, tokenize_en, tokenizer_es, tokenizer_en, TRG_PAD_IDX, \\\n",
+    "                       translate_sentence, calculate_bleu\n",
+    "from models     import encoder, decoder, seq2seq\n",
+    "from dataloader import NewsDataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "812af6e8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "SEED = 1234\n",
+    "random.seed(SEED)\n",
+    "torch.manual_seed(SEED)\n",
+    "torch.cuda.manual_seed(SEED)\n",
+    "torch.backends.cudnn.deterministic = True\n",
+    "BATCH_SIZE  = 48"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "b5717979",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Working with device: cuda\n"
+     ]
+    }
+   ],
+   "source": [
+    "SRC = Field(tokenize = tokenize_es, \n",
+    "            init_token = tokenizer_es.token_to_id(\"<BOS>\"), \n",
+    "            eos_token = tokenizer_es.token_to_id(\"<EOS>\"), \n",
+    "            pad_token = tokenizer_es.token_to_id(\"<PAD>\"),\n",
+    "            unk_token = tokenizer_es.token_to_id(\"<UNK>\"),\n",
+    "            use_vocab = False,\n",
+    "            batch_first = True)\n",
+    "\n",
+    "TRG = Field(tokenize = tokenize_en, \n",
+    "            init_token = tokenizer_en.token_to_id(\"<BOS>\"), \n",
+    "            eos_token = tokenizer_en.token_to_id(\"<EOS>\"), \n",
+    "            pad_token = tokenizer_en.token_to_id(\"<PAD>\"),\n",
+    "            unk_token = tokenizer_en.token_to_id(\"<UNK>\"),\n",
+    "            use_vocab = False,\n",
+    "            batch_first = True)\n",
+    "\n",
+    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
+    "#device = 'cpu'\n",
+    "print(\"Working with device:\", device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "5819e256",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_data, valid_data, test_data = NewsDataset.splits(exts=('.es', '.en'), fields=(SRC, TRG))\n",
+    "_, _, test_iterator = BucketIterator.splits(\n",
+    "    (train_data, valid_data, test_data),\n",
+    "    batch_size = BATCH_SIZE,\n",
+    "    device = device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "a2cbdf99",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "enc   = encoder(device)\n",
+    "dec   = decoder(device, TRG_PAD_IDX)\n",
+    "model = seq2seq(enc, dec)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "516e80e4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trained_checkpoint = \"assets/es2en_hw_cp6.pt\"\n",
+    "res = model.load_state_dict(torch.load(trained_checkpoint, map_location=device), strict=False);\n",
+    "model.to(device);"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "14a2a9ef",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Example from test data:\n",
+      "trg = for a relatively poor country like china , real unions could help balance employers ’ power , bringing quality - of - life benefits that outweigh the growth costs .\n",
+      "\n",
+      "predicted trg = for a relatively poor country as china , the existence of real unions could help balance employers ’ power , generating higher life benefits than the costs for growth .\n",
+      "\n",
+      "src = para un país relativamente pobre como es china , la existencia de sindicatos reales podría ayudar a equilibrar el poder de los empleadores , generando beneficios de calidad de vida mayores que los costes para el crecimiento .\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"Example from test data:\")\n",
+    "example_idx = 800\n",
+    "src = vars(test_data.examples[example_idx])['src']\n",
+    "trg = tokenizer_en.decode(vars(test_data.examples[example_idx])['trg'], skip_special_tokens=False)\n",
+    "print(f'trg = {trg}')\n",
+    "print(\"\")\n",
+    "translation = translate_sentence(src, SRC, TRG, model, device)\n",
+    "print(f'predicted trg = {translation}')\n",
+    "print(\"\")\n",
+    "src = tokenizer_es.decode(src, skip_special_tokens=False)\n",
+    "print(f'src = {src}')\n",
+    "print(\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "7e64577f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "1it [00:00,  5.08it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Evaluate on bleu:\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "3998it [14:55,  4.47it/s]\n",
+      "That's 100 lines that end in a tokenized period ('.')\n",
+      "It looks like you forgot to detokenize your test data, which may hurt your score.\n",
+      "If you insist your data is detokenized, or don't care, you can suppress this message with '--force'.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "BLEU score:\n",
+      "{'score': 28.35048236992193, 'counts': [57540, 32851, 20648, 13309], 'totals': [100210, 96590, 92970, 89354], 'precisions': [57.41941921963876, 34.01076716016151, 22.209314832741743, 14.894688542202923], 'bp': 1.0, 'sys_len': 100210, 'ref_len': 91115}\n"
+     ]
+    }
+   ],
+   "source": [
+    "b_score    = calculate_bleu(test_data, SRC, TRG, model, device)\n",
+    "print('BLEU score:')\n",
+    "print(b_score)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dd6ae971",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

functions.py ADDED Viewed

	@@ -0,0 +1,70 @@

+###########################################################################
+# NLP demo software by HyperbeeAI.                                        #
+# Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai #
+###########################################################################
+license_statement = "NLP demo software by HyperbeeAI. Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai"
+print("imported functions.py")
+print(license_statement)
+print("")
+import torch, sys
+import torch.nn as nn
+from torch.autograd import Function
+class Q_ud(Function):
+    @staticmethod
+    def forward(_, x, xb):
+        factor = 2**(xb-1)
+        return x.mul(factor).add(.5).floor().div(factor)
+class Q_u(Function):
+    @staticmethod
+    def forward(_, x, xb):
+        factor = 2**(8-xb)
+        return x.mul(factor).add(.5).floor()
+class Q_d(Function):
+    @staticmethod
+    def forward(_, x, xb):
+        factor = 2**(xb-1)
+        return x.div(factor).add(.5).floor()
+class quantization(nn.Module):
+    def __init__(self, xb = 8, mode='updown', wide=False):
+        super().__init__()
+        self.xb   = xb
+        self.mode = mode
+        self.wide = wide
+    def forward(self, x):
+        if(self.mode=='updown'):
+            return Q_ud.apply(x, self.xb)
+        elif(self.mode=='down'):
+            if(self.wide):
+                return Q_d.apply(x, self.xb - 5)
+            else:
+                return Q_d.apply(x, self.xb)
+        elif(self.mode=='up'):
+            return Q_u.apply(x, self.xb)
+        else:
+        	print('wrong quantization mode. exiting')
+        	sys.exit()
+class clamping_hw(nn.Module):
+    def __init__(self, xb = 8, wide=False):
+        super().__init__()
+        if(wide):
+            self.min_val = -2**(30-1)
+            self.max_val =  2**(30-1)-1
+        else:
+            self.min_val = -2**(xb-1)
+            self.max_val =  2**(xb-1)-1
+    def forward(self, x):
+        return x.clamp(min=self.min_val, max=self.max_val)
+###################################################
+### Linear layer functional
+def linear_functional(x, weight, bias, _stride, _padding):
+    # dummy linear function that has same arguments as conv
+    return nn.functional.linear(x, weight, bias)

layers.py ADDED Viewed

	@@ -0,0 +1,169 @@

+###########################################################################
+# NLP demo software by HyperbeeAI.                                        #
+# Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai #
+###########################################################################
+license_statement = "NLP demo software by HyperbeeAI. Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai"
+print("imported layers.py")
+print(license_statement)
+print("")
+import torch, sys
+import torch.nn as nn
+import numpy as np
+from torch.autograd import Function
+from functions import quantization, clamping_hw, linear_functional
+class ai85_base(nn.Module):
+    def __init__(
+            self,
+            operation_module  = None,
+            operation_fcnl    = None,
+            activation_module = None,
+            output_width_30b  = False
+    ):
+        super().__init__()
+        self.op               = operation_module
+        self.op_fcn           = operation_fcnl
+        self.act              = activation_module
+        self.wide             = output_width_30b
+        self.quantize_Q_d_8b    = None
+        self.quantize_Q_u_wb    = None
+        self.quantize_Q_d_wide  = None
+        self.clamp_C_hw_8b      = None
+        self.clamp_C_hw_wide    = None
+        self.output_shift        = nn.Parameter(torch.Tensor([ 0 ]), requires_grad=False)
+        self.weight_bits         = nn.Parameter(torch.Tensor([ 8 ]), requires_grad=False)
+        self.bias_bits           = nn.Parameter(torch.Tensor([ 8 ]), requires_grad=False)
+        self.quantize_activation = nn.Parameter(torch.Tensor([ 1 ]), requires_grad=False)
+        self.adjust_output_shift = nn.Parameter(torch.Tensor([ 0 ]), requires_grad=False)
+        self.shift_quantile      = nn.Parameter(torch.Tensor([ 1 ]), requires_grad=False)
+        weight_bits      = self.weight_bits
+        bias_bits        = self.bias_bits
+        shift_quantile   = self.shift_quantile
+        self.configure_layer_base( weight_bits, bias_bits, shift_quantile )
+    def configure_layer_base(self, weight_bits, bias_bits, shift_quantile):
+        self.quantize_Q_d_8b    = quantization(xb = 8,           mode ='down'   , wide=False) # 8 here is activation bits
+        self.quantize_Q_u_wb    = quantization(xb = weight_bits, mode ='up'     , wide=False)
+        self.quantize_Q_d_wide  = quantization(xb = 8,           mode ='down'   , wide=True)  # 8 here is activation bits, but its wide, so check inside
+        self.clamp_C_hw_8b    = clamping_hw(xb = 8,           wide=False) # 8 here is activation bits
+        self.clamp_C_hw_wide  = clamping_hw(xb = None,        wide=True)  # None to avoid misleading info on the # of bits, check inside
+        self.weight_bits     = nn.Parameter(torch.Tensor([ weight_bits    ]), requires_grad=False)
+        self.bias_bits       = nn.Parameter(torch.Tensor([ bias_bits      ]), requires_grad=False)
+        self.shift_quantile  = nn.Parameter(torch.Tensor([ shift_quantile ]), requires_grad=False)
+    def forward(self, x):
+        w = self.op.weight
+        b = self.op.bias
+        los  = self.output_shift
+        s_o  = 2**(los)
+        w_q = self.quantize_Q_u_wb(w);
+        b_q = self.quantize_Q_u_wb(b);
+        x = self.op_fcn(x, w_q, b_q, self.op.stride, self.op.padding) # convolution / linear
+        x = x*s_o
+        if(self.act is not None):
+            x = self.act(x)
+        if((self.wide) and (self.act is None)):
+            x = self.quantize_Q_d_wide(x)
+            x = self.clamp_C_hw_wide(x)
+            ### The +5 here is the 5 fractional bits the chip adds to the number in wide mode
+            ### we divide the number back here to get it back into range. ai8x-training does not do this for some reason
+            ### until the synthesis/deployment phase, and they do a +1 bit, why?
+            x = x / (2**(5)); # this is simulation of chip behavior
+            x = x / 128.0     # this is ours, for convenience + this part is done outside the chip since it's the step before table lookup
+            x = x / 2.0;      # this is ours, for convenience + this part is done outside the chip since it's the step before table lookup
+        else:
+            x = self.quantize_Q_d_8b(x)
+            x = self.clamp_C_hw_8b(x)
+        return x
+class ai85_conv1d(ai85_base):
+    def __init__(
+            self,
+            C_in_channels      = None,
+            D_out_channels     = None,
+            K_kernel_dimension = None,
+            padding            = 0,
+            activation         = None,
+            output_width_30b   = False,
+    ):
+        if(activation is None):
+            activation_fcn = None;
+        elif(activation == 'relu'):
+            activation_fcn = nn.ReLU(inplace=True);
+        else:
+            print('wrong activation type in model. only {relu} is acceptable. exiting')
+            sys.exit()
+        operation_mdl  = nn.Conv1d(C_in_channels, D_out_channels, kernel_size=K_kernel_dimension, stride=1, padding=padding, bias=True);
+        operation_fcn  = nn.functional.conv1d
+        super().__init__(
+            activation_module  = activation_fcn,
+            operation_module   = operation_mdl,
+            operation_fcnl     = operation_fcn,
+            output_width_30b   = output_width_30b,
+        )
+class ai85_add(nn.Module):
+    def __init__(self ):
+        super().__init__()
+        self.clamp_C_hw_8b    = clamping_hw( xb = 8, wide=False) # 8 here is activation bits
+    def forward(self, x, res):
+        x = self.clamp_C_hw_8b(x+res)
+        return x
+class ai85_fullyconnected(ai85_base):
+    def __init__(
+            self,
+            in_features        = None,
+            out_features       = None,
+            activation         = None,
+            output_width_30b   = False):
+        if(activation is None):
+            activation_fcn = None;
+        elif(activation == 'relu'):
+            activation_fcn = nn.ReLU(inplace=True);
+        else:
+            print('wrong activation type in model. only {relu} is acceptable. exiting')
+            sys.exit()
+        operation_mdl  = nn.Linear(in_features, out_features, bias=True);
+        operation_fcn  = linear_functional
+        super().__init__(
+            activation_module  = activation_fcn,
+            operation_module   = operation_mdl,
+            operation_fcnl     = operation_fcn,
+            output_width_30b   = output_width_30b
+        )
+        # Define dummy arguments to make Linear and conv compatible in ai85_base.
+        # the name "op" here refers to op in super, i.e., in base_layer
+        self.op.stride = None
+        self.op.padding = None
+class lpre(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.ee1 = nn.Embedding(16384, 64)
+        self.ee2 = nn.Embedding(48, 64)
+        self.quantize  = quantization(xb = 8, mode ='updown', wide=False)
+    def forward(self, x, sp1, sp2, sb):
+        pp= torch.arange(sp1, sp2).unsqueeze(0).repeat(sb, 1).to(x.device)
+        ee2_d = self.ee2(pp)
+        ee1_d = self.ee1(x)
+        ed = ee1_d + ee2_d
+        min_w = self.ee2.weight.data.min() + self.ee1.weight.data.min()
+        max_w = self.ee2.weight.data.max() + self.ee1.weight.data.max()
+        t = (ed - min_w) / (max_w - min_w)
+        t = t.add(-0.5).mul(2.0)
+        t = self.quantize(t)
+        t = t.clamp(min= -1.0, max=1.0-(1.0/128.0))
+        t = t.mul(2**(8-1)).add(0.5).floor().clamp(min=-128, max=127)
+        return t.permute(0, 2, 1)

models.py ADDED Viewed

	@@ -0,0 +1,240 @@

+###########################################################################
+# NLP demo software by HyperbeeAI.                                        #
+# Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai #
+###########################################################################
+license_statement = "NLP demo software by HyperbeeAI. Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai"
+print("imported models.py")
+print(license_statement)
+print("")
+import torch
+import torch.nn as nn
+import layers
+class encoder_ai85cnn(nn.Module):
+    def __init__(
+            self,
+            device,
+            **kwargs
+    ):
+        super().__init__()
+        self.cc0     = layers.ai85_conv1d(  64,      112,       1,           0, activation=None)
+        self.cc1     = layers.ai85_conv1d( 112,      112,       3,           1, activation='relu')
+        self.res1    = layers.ai85_add()
+        self.cc2     = layers.ai85_conv1d( 112,      112,       3,           1, activation='relu')
+        self.res2    = layers.ai85_add()
+        self.cc3     = layers.ai85_conv1d( 112,      112,       3,           1, activation='relu')
+        self.res3    = layers.ai85_add()
+        self.cc4     = layers.ai85_conv1d( 112,      112,       3,           1, activation='relu')
+        self.res4    = layers.ai85_add()
+        self.cc5     = layers.ai85_conv1d( 112,      64 ,       1,           0, activation=None)
+        self.resg    = layers.ai85_add()
+        self.device  = device
+    def forward(self, x):
+        r = self.cc0(x)
+        t = self.cc1( r )
+        r = self.res1(t, r)
+        t = self.cc2( r )
+        r = self.res2(t, r)
+        t = self.cc3( r )
+        r = self.res3(t, r)
+        t = self.cc4( r )
+        r = self.res4(t, r)
+        t = self.cc5(r)
+        y = self.resg(t, x)
+        return y
+class encoder(nn.Module):
+    def __init__(
+            self,
+            device,
+            **kwargs
+    ):
+        super().__init__()
+        self.pre       = layers.lpre()
+        self.cnn       = encoder_ai85cnn(device = device);
+        self.device    = device
+    def forward(self, x):
+        ssb   = x.shape[0]
+        sl    = x.shape[1]
+        pre_d = self.pre(x, 0, sl, ssb)
+        out   = self.cnn(pre_d)
+        return out, pre_d
+class decoder_ai85cnn_ccf(nn.Module):
+    def __init__(self,  **kwargs):
+        super().__init__()
+        self.op = layers.ai85_conv1d(   112,    64 ,       1,           0, activation=None, output_width_30b=True)
+    def forward(self, x):
+        y = self.op(x)
+        return y
+class decoder_ai85cnn_cpr(nn.Module):
+    def __init__(self, **kwargs):
+        super().__init__()
+        self.layer1   = layers.ai85_conv1d( 64*2,  64,       1,           0,       activation='relu')
+        self.layer2   = layers.ai85_conv1d( 64,    64,       1,           0,       activation='relu')
+    def forward(self, x):
+        x = self.layer1(x)
+        y = self.layer2(x)
+        return y
+class decoder_ai85cnn_cl1(nn.Module):
+    def __init__(self,  **kwargs):
+        super().__init__()
+        self.op    = layers.ai85_conv1d( 112,     112,       3,           0, activation='relu')
+    def forward(self, x):
+        y = self.op(x)
+        return y
+class decoder_ai85cnn_cma(nn.Module):
+    def __init__(self,  **kwargs):
+        super().__init__()
+        self.op = layers.ai85_conv1d(   64,     112,       1,           0, activation=None)
+        self.res= layers.ai85_add()
+    def forward(self, x, res):
+        t = self.op(x)
+        y = self.res(t, res)
+        return y
+class decoder_ai85cnn_claa(nn.Module):
+    def __init__(self,  **kwargs):
+        super().__init__()
+        self.op    = layers.ai85_conv1d( 112,     112,       3,           0, activation='relu')
+    def forward(self, x):
+        y = self.op(x)
+        return y
+class decoder_ai85cnn_cl0(nn.Module):
+    def __init__(self, **kwargs):
+        super().__init__()
+        self.op = layers.ai85_conv1d(   64,     112,       1,           0, activation=None)
+    def forward(self, x):
+        y = self.op(x)
+        return y
+class decoder_ai85cnn_clfa(nn.Module):
+    def __init__(self,  **kwargs):
+        super().__init__()
+        self.op    = layers.ai85_conv1d( 112,     112,       3,           0, activation='relu')
+    def forward(self, x):
+        y = self.op(x)
+        return y
+class decoder_ai85cnn_ccac(nn.Module):
+    def __init__(self,  **kwargs):
+        super().__init__()
+        self.op    = layers.ai85_conv1d( 112,     112,       3,           0, activation='relu')
+    def forward(self, x):
+        y = self.op(x)
+        return y
+class decoder_ai85cnn_cib(nn.Module):
+    def __init__(self,  **kwargs):
+        super().__init__()
+        self.op = layers.ai85_conv1d(   112,    64 ,       1,           0, activation=None)
+    def forward(self, x):
+        y = self.op(x)
+        return y
+class decoder(nn.Module):
+    def __init__(
+            self,
+            device,
+            tpi,
+            **kwargs
+    ):
+        super().__init__()
+        self.device       = device
+        self.tpi          = tpi
+        self.pre          = layers.lpre()
+        self.fff          = nn.Linear(64, 16384)
+        self.fff.weight   = self.pre.ee1.weight    # i.e., fff is not a layer, this is just an easy way of doing reverse embedding on pytorch
+        self.cl0          = decoder_ai85cnn_cl0();
+        self.ccf          = decoder_ai85cnn_ccf();
+        self.cib          = decoder_ai85cnn_cib();
+        self.cma          = decoder_ai85cnn_cma();
+        self.cpr          = decoder_ai85cnn_cpr();
+        self.cl1          = decoder_ai85cnn_cl1();
+        self.claa         = decoder_ai85cnn_claa();
+        self.clfa         = decoder_ai85cnn_clfa();
+        self.ccac         = decoder_ai85cnn_ccac();
+    def forward(self, x, ees , pss=0):
+        ssb = x.shape[0]
+        sst = x.shape[1]
+        sl  = ees.shape[2]
+        pre_d          = self.pre(x, pss, sst + pss, ssb)
+        t              = self.cl0(pre_d)
+        cl0_out        = t
+        ssb, ts1, _    = t.shape
+        tp             = torch.zeros(ssb, ts1, 2).fill_(self.tpi).to(t.device)
+        t              = torch.cat((tp, t), dim = 2)
+        xconv          = self.cl1(t)
+        t              = self.cib(xconv)
+        ssb, ss_p, sst = t.shape
+        x2             = ees.unsqueeze(3).repeat(1, 1, 1, sst).view(ssb, ss_p, -1)
+        t              = t.unsqueeze(2).repeat(1, 1, sl, 1).view(ssb, ss_p, -1)
+        t              = torch.cat([t, x2], dim=1)
+        t              = self.cpr(t)
+        t              = t.view(ssb, ss_p, sl, sst)
+        t              = torch.max(t, dim=2).values
+        t              = self.cma(t, xconv)
+        t              = torch.cat((tp, t), dim = 2)
+        xconv          = self.claa(t)
+        t              = self.cib(xconv)
+        t              = t.unsqueeze(2).repeat(1, 1, sl, 1).view(ssb, ss_p, -1)
+        t              = torch.cat([t, x2], dim=1)
+        t              = self.cpr(t)
+        t              = t.view(ssb, ss_p, sl, sst)
+        t              = torch.max(t, dim=2).values
+        t              = self.cma(t, xconv)
+        t              = torch.cat((tp, t), dim = 2)
+        xconv          = self.clfa(t)
+        t              = self.cib(xconv)
+        t              = t.unsqueeze(2).repeat(1, 1, sl, 1).view(ssb, ss_p, -1)
+        t              = torch.cat([t, x2], dim=1)
+        t              = self.cpr(t)
+        t              = t.view(ssb, ss_p, sl, sst)
+        t              = torch.max(t, dim=2).values
+        t              = self.cma(t, xconv)
+        t              = torch.cat((tp, t), dim = 2)
+        xconv          = self.ccac(t)
+        t              = self.cib(xconv)
+        t              = t.unsqueeze(2).repeat(1, 1, sl, 1).view(ssb, ss_p, -1)
+        t              = torch.cat([t, x2], dim=1)
+        t              = self.cpr(t)
+        t              = t.view(ssb, ss_p, sl, sst)
+        t              = torch.max(t, dim=2).values
+        t              = self.cma(t, xconv)
+        pss            = t + sst
+        ccf_out        = self.ccf(t)
+        output         = self.fff(ccf_out.permute(0, 2, 1))
+        return output, pre_d, ccf_out
+class seq2seq(nn.Module):
+    def __init__(self, encoder, decoder):
+        super().__init__()
+        self.encoder = encoder
+        self.decoder = decoder
+    def forward(self, src, trg):
+        enc_out, _ = self.encoder(src)
+        output, _, _  = self.decoder(trg, enc_out)
+        return output

news-comm-v15/news-comm-v15-all-test.en ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:757cea85bddca13bdbb0d4dbc187f748d3b97a4e04a5360b6ce7235c38b85261
+size 562915

news-comm-v15/news-comm-v15-all-test.es ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f459d6c1333abd7c545e0fd140e248dcfd05135562f25e14ffc6a98d3bccaa5
+size 654959

news-comm-v15/news-comm-v15-all-valid.en ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3973e022e93220f9212c18d0d0c543ae7c309e46640da93a4a0314de999f5112
+size 1

news-comm-v15/news-comm-v15-all-valid.es ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3973e022e93220f9212c18d0d0c543ae7c309e46640da93a4a0314de999f5112
+size 1

news-comm-v15/news-comm-v15-all.en ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2e0bfde74c1665f5b44edfe370780d9cffc413768a6ad2e1530e1e42d0b77ae2
+size 201

news-comm-v15/news-comm-v15-all.es ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f0cbc37784a40546152cd146c8f4468e44bb4a23921c51d19b1309fbd0e63200
+size 259

news-comm-v15/readme ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ Test data sampled from:
2	+ https://data.statmt.org/news-commentary/v15/training/news-commentary-v15.en-es.tsv.gz

utils.py ADDED Viewed

	@@ -0,0 +1,99 @@

+###########################################################################
+# NLP demo software by HyperbeeAI.                                        #
+# Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai #
+###########################################################################
+license_statement = "NLP demo software by HyperbeeAI. Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai"
+print("imported utils.py")
+print(license_statement)
+print("")
+import torch
+import layers
+from tokenizers import Tokenizer
+import time, torch, datasets
+from tqdm import tqdm
+tokenizer_en = None
+tokenizer_es = None
+def tokenize_es(text):
+    return tokenizer_es.encode(text).ids[:48 - 2]
+def tokenize_en(text):
+    return tokenizer_en.encode(text).ids[:48 - 1]
+def translate_sentence(sentence, src_field, trg_field, model, device):
+    model.eval()
+    if isinstance(sentence, str):
+        tokens = tokenize_es(sentence)
+    else:
+        tokens = sentence
+    tokens = [src_field.init_token] + tokens + [src_field.eos_token] + [src_field.pad_token] * (48 - 2 - len(tokens))
+    src_tensor = torch.LongTensor(tokens).unsqueeze(0).to(device)
+    with torch.no_grad():
+        enc_out, _ = model.encoder(src_tensor)
+    trg_indexes = [trg_field.init_token, ] + [trg_field.pad_token] * (48 - 1)
+    for i in range(48 - 1):
+        start_idx = max(0, i - 7)
+        trg_tensor = torch.LongTensor(trg_indexes[start_idx:start_idx + 8]).unsqueeze(0).to(device)
+        with torch.no_grad():
+            output, _, _ = model.decoder(trg_tensor, enc_out, max(0, i - 7))
+        pred_token = output.argmax(2)[:, min(i, 7)].item()
+        trg_indexes[i + 1] = pred_token
+        if pred_token == trg_field.eos_token:
+            break
+    try:
+        trg_indexes = trg_indexes[1:trg_indexes.index(trg_field.eos_token)]
+    except ValueError:
+        trg_indexes = trg_indexes[1:]
+    trg_tokens = tokenizer_en.decode(trg_indexes, skip_special_tokens=False)
+    return trg_tokens
+def calculate_bleu(data, src_field, trg_field, model, device, spiece=False, output_file = f"test.{time.time()}.out"):
+    if spiece:
+        from tokenizers import pre_tokenizers
+        pre_tokenizer = pre_tokenizers.Digits(individual_digits=True)
+    else:
+        pre_tokenizer = tokenizer_en.pre_tokenizer
+    trgs = []
+    pred_trgs = []
+    print('Evaluate on bleu:')
+    for src, trg in tqdm(zip(open("news-comm-v15/news-comm-v15-all-test.es"), open("news-comm-v15/news-comm-v15-all-test.en"))):
+        if len(src) < 3 or len(trg) < 3:
+            continue
+        normalized = pre_tokenizer.pre_tokenize_str(tokenizer_en.normalizer.normalize_str(trg))
+        if len(normalized) > 48:
+            continue
+        trgs.append([ " ".join(map(lambda x: x[0], normalized)) ])
+        pred_trg = translate_sentence(src, src_field, trg_field, model, device)
+        pred_trgs.append(pred_trg)
+    with open(output_file, "w") as fo:
+        fo.write("\n".join(pred_trgs))
+    sacrebleu = datasets.load_metric('sacrebleu')
+    return sacrebleu.compute(predictions=pred_trgs, references=trgs)
+tokenizer_es = Tokenizer.from_file(f"assets/es.json")
+tokenizer_en = Tokenizer.from_file(f"assets/en.json")
+TRG_PAD_IDX  = tokenizer_en.token_to_id("<PAD>")