{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "axhNf7ZHqblZ"
      },
      "source": [
        "# Model set-up\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 338,
          "referenced_widgets": [
            "577cf34de0614f64a76c68fab94eb968",
            "55daffcad1da49a3bf39a0f25f964c16",
            "6d1da3c2efcb4f238cf1734e172d7ea8",
            "b3249294ef9349a484a3b2f6199c1807",
            "a1f4fcb471da44878b42cc16f9791034",
            "9af1074a7ccc43f893c0a0b216cf1c0b",
            "1ad78a56bf4b4555b58676c6849b548b",
            "e8a023b93abb4efd9a664a8e270efedf",
            "779dd02a5f7f49dcbb631e900b4dea30",
            "6da91cce06ed496abfd08c19e99178db",
            "283a21c83451452d8969f6411397bb16",
            "fefcb0aae6e940c6b70fafbf4788ea0b",
            "9c9d71b94fac443fac8d17599474f819",
            "150019c3216543c6b4f489e30312e576",
            "9631f2bc95724c28bcdb2393c91aced7",
            "e4b7fd9d52df46328c6b6029409825b6",
            "4d92351da2e946eb891e768dd6e7f3c6",
            "32020436e6d04da693d120e5fc1a3ed3",
            "477c0d00dbfd45c3be98a6ff8978548d",
            "f1954f797f724e9383a5bc204185915e",
            "f285c11341cc4b1bb52b26016521c33e",
            "2f1cff6ac11a49088b3b5082617b24e4",
            "a0045505bdc04c4c96c4c11889a15b91",
            "caec94d8946b4de2a198bbbc529b636f",
            "ea3f1e329a614f50995713e654272d09",
            "d040060ce8ee425691eafeaa30a463c8",
            "169719f8fb5b415694731802bfd13eff",
            "087e6131e2204e85976c0b7d9814c066",
            "0961d49693c64270951f7c845cacc374",
            "b36a1c036916422fb093fe0403428e2e",
            "ce11e3708d9d4f5e8916d38d136dcba8",
            "5a9ce9ccbae1461384d06d5ee1bbce85",
            "de9e97e95fc84ae3a665ea7f35465ec0",
            "b7a90f8a364b4ac9b60c4ec6abef89c3",
            "1b19f33407cd44e4a4329e8cc1144d73",
            "56e1282076b143aa95a62c70a99d7373",
            "e196b029379d497c8cae678d23ee2ce6",
            "f6dbb7249c1f4ecd8a29f3d21238eef1",
            "7006f450d55b4e59ab2a834ef016c11a",
            "ba712b4d81924176881021e95484ced9",
            "ed73d5ddb3864006b175fe52ef795fb4",
            "435a2cc7ffb14827a90ec445fb8d9977",
            "e1c71907bb7c40ffb79b8cf52888052a",
            "4427805fbf7540388c2cee3e5b6123fb",
            "f5f38f0582b34c8599dabed07199ee9a",
            "80930655f9254c66b4a676fca773465b",
            "0991cf83ad504b89a03b1e4021688b78",
            "6e7269dd900b43899cd2ca4afb0cc23e",
            "ac385be2db5144e8a591837aaf403874",
            "d1ed211f2ad24d80b782946fcf366f67",
            "0788436212f24f98b90ff780463ff6cb",
            "9bdd6ab3ea8d4a2d947e7111dc29654b",
            "2f22a44b0c6f4e1e93e32a6f64caee85",
            "b9bf6fdd7ec94776ac565c5824aad89c",
            "f9bd1bfd216941a69c4200096648d208",
            "9ff030ff167945d289853c74190c8cda",
            "c467413a1af041ef8f2942d4d84bb50a",
            "9c8f7f4b6d6a4b0a98a2a86d17ab10f7",
            "f1376e789ccd4c35877f295384cf9c40",
            "e7eab4e2d8c448cfa962a9e2540c5270",
            "ca448d6c9ae0451ab3b3b8bec84b6200",
            "176633a381ce4caf90de05cf34dfadc9",
            "01596c886f3949f9858b6d9d1f6f7c79",
            "354537c460fd476f830d632ae8579e60",
            "8fed23da4e754ff2b340d00c0cc79250",
            "cfc7ccfab053430485b94b320cc08cb4"
          ]
        },
        "id": "az8A_hSIlJcw",
        "outputId": "0a8e2918-0201-4dce-bedf-c2e72cc8495b"
      },
      "outputs": [
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "/mnt/e/TOM/Learning/Projects/pronunciation-error-detector/.venv/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
            "  from .autonotebook import tqdm as notebook_tqdm\n"
          ]
        },
        {
          "data": {
            "text/plain": [
              "Wav2Vec2ForCTC(\n",
              "  (wav2vec2): Wav2Vec2Model(\n",
              "    (feature_extractor): Wav2Vec2FeatureEncoder(\n",
              "      (conv_layers): ModuleList(\n",
              "        (0): Wav2Vec2LayerNormConvLayer(\n",
              "          (conv): Conv1d(1, 512, kernel_size=(10,), stride=(5,))\n",
              "          (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n",
              "          (activation): GELUActivation()\n",
              "        )\n",
              "        (1-4): 4 x Wav2Vec2LayerNormConvLayer(\n",
              "          (conv): Conv1d(512, 512, kernel_size=(3,), stride=(2,))\n",
              "          (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n",
              "          (activation): GELUActivation()\n",
              "        )\n",
              "        (5-6): 2 x Wav2Vec2LayerNormConvLayer(\n",
              "          (conv): Conv1d(512, 512, kernel_size=(2,), stride=(2,))\n",
              "          (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n",
              "          (activation): GELUActivation()\n",
              "        )\n",
              "      )\n",
              "    )\n",
              "    (feature_projection): Wav2Vec2FeatureProjection(\n",
              "      (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n",
              "      (projection): Linear(in_features=512, out_features=1024, bias=True)\n",
              "      (dropout): Dropout(p=0.0, inplace=False)\n",
              "    )\n",
              "    (encoder): Wav2Vec2EncoderStableLayerNorm(\n",
              "      (pos_conv_embed): Wav2Vec2PositionalConvEmbedding(\n",
              "        (conv): ParametrizedConv1d(\n",
              "          1024, 1024, kernel_size=(128,), stride=(1,), padding=(64,), groups=16\n",
              "          (parametrizations): ModuleDict(\n",
              "            (weight): ParametrizationList(\n",
              "              (0): _WeightNorm()\n",
              "            )\n",
              "          )\n",
              "        )\n",
              "        (padding): Wav2Vec2SamePadLayer()\n",
              "        (activation): GELUActivation()\n",
              "      )\n",
              "      (layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)\n",
              "      (dropout): Dropout(p=0.1, inplace=False)\n",
              "      (layers): ModuleList(\n",
              "        (0-23): 24 x Wav2Vec2EncoderLayerStableLayerNorm(\n",
              "          (attention): Wav2Vec2SdpaAttention(\n",
              "            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)\n",
              "            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)\n",
              "            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)\n",
              "            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)\n",
              "          )\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "          (layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)\n",
              "          (feed_forward): Wav2Vec2FeedForward(\n",
              "            (intermediate_dropout): Dropout(p=0.0, inplace=False)\n",
              "            (intermediate_dense): Linear(in_features=1024, out_features=4096, bias=True)\n",
              "            (intermediate_act_fn): GELUActivation()\n",
              "            (output_dense): Linear(in_features=4096, out_features=1024, bias=True)\n",
              "            (output_dropout): Dropout(p=0.1, inplace=False)\n",
              "          )\n",
              "          (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)\n",
              "        )\n",
              "      )\n",
              "    )\n",
              "  )\n",
              "  (dropout): Dropout(p=0.0, inplace=False)\n",
              "  (lm_head): Linear(in_features=1024, out_features=40, bias=True)\n",
              ")"
            ]
          },
          "execution_count": 1,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "import torch\n",
        "import librosa\n",
        "import soundfile as sf\n",
        "from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC\n",
        "\n",
        "# Load the processor and model\n",
        "MODEL_NAME = \"mrrubino/wav2vec2-large-xlsr-53-l2-arctic-phoneme\" # wav2vec based phoneme trascriber trained on L2-ARTIC\n",
        "processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)\n",
        "model = Wav2Vec2ForCTC.from_pretrained(MODEL_NAME)\n",
        "model.eval()\n",
        "\n",
        "# Check device availability\n",
        "device = \"cpu\"\n",
        "model.to(device)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Load model directly\n",
        "from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq\n",
        "\n",
        "whisper_processor = AutoProcessor.from_pretrained(\"openai/whisper-tiny.en\")\n",
        "whisper_model = AutoModelForSpeechSeq2Seq.from_pretrained(\"openai/whisper-tiny.en\").to(device)\n",
        "\n",
        "# # Set language to English\n",
        "# forced_decoder_ids = whisper_processor.get_decoder_prompt_ids(language=\"en\", task=\"transcribe\")\n",
        "# whisper_model.config.forced_decoder_ids = forced_decoder_ids\n",
        "\n",
        "def transcribe_into_English(audio_input):\n",
        "    # Load audio file\n",
        "    # audio_input = audio_input[0]\n",
        "    audio_input = whisper_processor(audio_input, sampling_rate=16000, return_tensors=\"pt\").to(device)\n",
        "\n",
        "    # Perform transcription\n",
        "    with torch.no_grad():\n",
        "        generated_ids = whisper_model.generate(audio_input.input_features)\n",
        "\n",
        "    # Decode the transcription\n",
        "    transcription = whisper_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]\n",
        "    return transcription.lower().strip()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Load and preprocess the audio file\n",
        "def load_audio(audio_path, target_sr=16000):\n",
        "  \"\"\"Load an audio file and resample it to 16kHz.\"\"\"\n",
        "  audio, sr = librosa.load(audio_path, sr=target_sr)\n",
        "  return audio"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {},
      "outputs": [
        {
          "data": {
            "text/plain": [
              "False"
            ]
          },
          "execution_count": 2,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "# helper\n",
        "temp = {}\n",
        "'name' in temp"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "5BDwRwcsbmBg"
      },
      "source": [
        "# Utils\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 4,
      "metadata": {},
      "outputs": [],
      "source": [
        "import re \n",
        "\n",
        "def get_nested_position(nested_list, flat_index):\n",
        "    \"\"\"\n",
        "    Finds the nested list and the index within it for a given flat index.\n",
        "\n",
        "    Args:\n",
        "        nested_list (list of lists): The list of lists.\n",
        "        flat_index (int): The flattened index.\n",
        "\n",
        "    Returns:\n",
        "        tuple: (nested_list_index, element_index_in_nested_list)\n",
        "    \"\"\"\n",
        "    cumulative_index = 0\n",
        "\n",
        "    for list_index, sublist in enumerate(nested_list):\n",
        "        # Check if the flat index falls within the current sublist\n",
        "        if cumulative_index + len(sublist) > flat_index:\n",
        "            # Calculate the index within the sublist\n",
        "            element_index = flat_index - cumulative_index\n",
        "            return list_index, element_index\n",
        "        # Update cumulative index\n",
        "        cumulative_index += len(sublist)\n",
        "   \n",
        "    raise IndexError(\"Index out of range for the flattened list.\")\n",
        "\n",
        "def label_specific_elements_in_reference(reference, start_word_idx, start_element_idx, end_word_idx, end_element_idx, label):\n",
        "    \"\"\"\n",
        "    Labels elements in a nested list between specified start and end indices (inclusive).\n",
        "\n",
        "    Args:\n",
        "        reference (list of lists): The original list of lists.\n",
        "        start_word_idx (int): Index of the starting nested list.\n",
        "        start_element_idx (int): Index of the starting element in the start list.\n",
        "        end_word_idx (int): Index of the ending nested list.\n",
        "        end_element_idx (int): Index of the ending element in the end list.\n",
        "        label: The label to attach to the elements.\n",
        "\n",
        "    Returns:\n",
        "        list of lists: A new list of lists with labels attached where applicable.\n",
        "    \"\"\"\n",
        "    labeled_reference = []\n",
        "    for word_idx, sublist in enumerate(reference):\n",
        "        labeled_sublist = []\n",
        "\n",
        "        for element_idx, element in enumerate(sublist):\n",
        "            if start_word_idx < end_word_idx:\n",
        "                # Case 1: start_word_idx < end_word_idx\n",
        "                if (\n",
        "                    (word_idx > start_word_idx and word_idx < end_word_idx) or\n",
        "                    (word_idx == start_word_idx and element_idx >= start_element_idx) or\n",
        "                    (word_idx == end_word_idx and element_idx <= end_element_idx)\n",
        "                ):\n",
        "                    # Attach the label to elements within the inclusive range\n",
        "                    if isinstance(element, tuple):\n",
        "                        print(f\"There is already a label at index ({word_idx}, {element_idx})\") \n",
        "                    labeled_sublist.append((element, label))\n",
        "                else:\n",
        "                    # Keep elements outside the range unchanged\n",
        "                    labeled_sublist.append(element)\n",
        "            elif start_word_idx == end_word_idx:\n",
        "                # Case 2: start_word_idx == end_word_idx\n",
        "                if word_idx == start_word_idx and start_element_idx <= element_idx <= end_element_idx:\n",
        "                    # Attach the label to elements within the inclusive range\n",
        "                    if isinstance(element, tuple):\n",
        "                        print(f\"There is already a label at index ({word_idx}, {element_idx})\") \n",
        "                    labeled_sublist.append((element, label))\n",
        "                else:\n",
        "                    # Keep elements outside the range unchanged\n",
        "                    labeled_sublist.append(element)\n",
        "\n",
        "        labeled_reference.append(labeled_sublist)\n",
        "    \n",
        "    return labeled_reference\n",
        "\n",
        "def clean_text(text: str) -> str:\n",
        "    \"\"\"\n",
        "    Remove punctuation from the input string except for special characters \n",
        "    that are part of a word, such as ' in I'm or - in hard-working.\n",
        "\n",
        "    Parameters:\n",
        "        text (str): Input string to clean.\n",
        "        \n",
        "    Returns:\n",
        "        str: Cleaned string with allowed special characters retained.\n",
        "    \"\"\"\n",
        "    # Allow letters, spaces, apostrophes, and hyphens within words\n",
        "    cleaned_text = re.sub(r'[^\\w\\s\\'-]', '', text)  # Remove punctuation except ' and -\n",
        "    cleaned_text = re.sub(r'\\s+', ' ', cleaned_text)  # Normalize spaces\n",
        "    return cleaned_text.lower().strip()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {},
      "outputs": [],
      "source": [
        "import cmudict\n",
        "cmu_dict = cmudict.dict()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 17,
      "metadata": {},
      "outputs": [],
      "source": [
        "# WORKING: converting functions to class, currently done with the last function in the class\n",
        "import re\n",
        "from difflib import SequenceMatcher\n",
        "from IPython.display import HTML, display\n",
        "import copy   \n",
        "from IPython.display import HTML, display\n",
        "from Bio import pairwise2\n",
        "from Bio.pairwise2 import format_alignment\n",
        "\n",
        "class PronunciationAssessment:\n",
        "    def __init__(self, transcript, uttered_phonemes):\n",
        "        # NOTE: removed all long signals ('ː') for compatibility with L2-artic's phoneme set (ssl model training set). American English. \n",
        "        # ground truth phonemes are converted into arpabet first, and then into ipa using the arpabet_to_ipa dict, meaning the arpabet_to_ipa dict contains\n",
        "        # the core ipa phoeneme set\n",
        "\n",
        "        # NOTE: modifications to the list in https://www.dyslexia-reading-well.com/44-phonemes-in-english.html: \n",
        "        # removed 'sc', 'ps', and 'st', in ipa_to_orthography of 's', because I want to assume it's silient \n",
        "        self.ipa_to_orthography = {\n",
        "            'b': ['b', 'bb'],  # Examples: bug, bubble\n",
        "            'd': ['d', 'dd', 'ed'],  # Examples: dad, add, milled\n",
        "            'f': ['f', 'ff', 'ph', 'gh', 'lf', 'ft'],  # Examples: fat, cliff, phone, enough, half, often\n",
        "            'ɡ': ['g', 'gg', 'gh', 'gu', 'gue'],  # Examples: gun, egg, ghost, guest, prologue\n",
        "            'h': ['h', 'wh'],  # Examples: hop, who\n",
        "            'dʒ': ['j', 'ge', 'g', 'dge', 'di', 'gg'],  # Examples: jam, wage, giraffe, edge, soldier, exaggerate\n",
        "            'k': ['k', 'c', 'ch', 'cc', 'lk', 'qu', 'q(u)', 'ck', 'x'],  # Examples: kit, cat, chris, accent, folk, bouquet, queen, rack, box\n",
        "            'l': ['l', 'll'],  # Examples: live, well\n",
        "            'm': ['m', 'mm', 'mb', 'mn', 'lm'],  # Examples: man, summer, comb, column, palm\n",
        "            'n': ['n', 'nn', 'kn', 'gn', 'pn', 'mn'],  # Examples: net, funny, know, gnat, pneumonic, mnemonic\n",
        "            'p': ['p', 'pp'],  # Examples: pin, dippy\n",
        "            'r': ['r', 'rr', 'wr', 'rh'],  # Examples: run, carrot, wrench, rhyme\n",
        "            'ɹ': ['r', 'rr', 'wr', 'rh'],  # Examples: run, carrot, wrench, rhyme\n",
        "            's': ['s', 'ss', 'c', 'ce', 'se'],  # Examples: sit, less, circle, scene, psycho, listen, pace, course\n",
        "            't': ['t', 'tt', 'th', 'ed'],  # Examples: tip, matter, thomas, ripped\n",
        "            'v': ['v', 'f', 'ph', 've'],  # Examples: vine, of, stephen, five\n",
        "            'w': ['w', 'wh', 'u', 'o'],  # Examples: wit, why, quick, choir\n",
        "            'z': ['z', 'zz', 's', 'ss', 'x', 'ze', 'se'],  # Examples: zed, buzz, his, scissors, xylophone, craze\n",
        "            'ʒ': ['s', 'si', 'z'],  # Examples: treasure, division, azure\n",
        "            'tʃ': ['ch', 'tch', 'tu', 'te'],  # Examples: chip, watch, future, righteous\n",
        "            'ʃ': ['sh', 'ce', 's', 'ci', 'si', 'ch', 'sci', 'ti'],  # Examples: sham, ocean, sure, special, pension, machine, conscience, station\n",
        "            'θ': ['th'],  # Example: thongs\n",
        "            'ð': ['th'],  # Example: leather\n",
        "            'ŋ': ['ng', 'n', 'ngue'],  # Examples: ring, pink, tongue\n",
        "            'j': ['y', 'i', 'j'],  # Examples: you, onion, hallelujah\n",
        "            'æ': ['a', 'ai', 'au'],  # Examples: cat, plaid, laugh\n",
        "            'eɪ': ['a', 'ai', 'eigh', 'aigh', 'ay', 'er', 'et', 'ei', 'au', 'a_e', 'ea', 'ey'],  # Examples: bay, maid, weigh, straight, pay, foyer, filet, eight, gauge, mate, break, they\n",
        "            'ɛ': ['e', 'ea', 'u', 'ie', 'ai', 'a', 'eo', 'ei', 'ae'],  # Examples: end, bread, bury, friend, said, many, leopard, heifer, aesthetic\n",
        "            'i': ['e', 'ee', 'ea', 'y', 'ey', 'oe', 'ie', 'i', 'ei', 'eo', 'ay'],  # Examples: be, bee, meat, lady, key, phoenix, grief, ski, deceive, people, quay\n",
        "            'ɪ': ['i', 'e', 'o', 'u', 'ui', 'y', 'ie'],  # Examples: it, england, women, busy, guild, gym, sieve\n",
        "            'aɪ': ['i', 'y', 'igh', 'ie', 'uy', 'ye', 'ai', 'is', 'eigh', 'i_e'],  # Examples: spider, sky, night, pie, guy, stye, aisle, island, height, kite\n",
        "            'ɒ': ['a', 'ho', 'au', 'aw', 'ough'],  # Examples: swan, honest, maul, slaw, fought\n",
        "            'oʊ': ['o', 'oa', 'o_e', 'oe', 'ow', 'ough', 'eau', 'oo', 'ew'],  # Examples: open, moat, bone, toe, sow, dough, beau, brooch, sew\n",
        "            'ʊ': ['o', 'oo', 'u', 'ou'],  # Examples: wolf, look, bush, would\n",
        "            'ʌ': ['u', 'o', 'oo', 'ou'],  # Examples: lug, monkey, blood, double\n",
        "            'u': ['o', 'oo', 'ew', 'ue', 'u_e', 'oe', 'ough', 'ui', 'oew', 'ou'],  # Examples: who, loon, dew, blue, flute, shoe, through, fruit, manoeuvre, group\n",
        "            'ɔɪ': ['oi', 'oy', 'uoy'],  # Examples: join, boy, buoy\n",
        "            'aʊ': ['ow', 'ou', 'ough'],  # Examples: now, shout, bough\n",
        "            'ə': ['o', 'a', 'er', 'i', 'ar', 'our', 'ur', 'e'],  # Examples: about, ladder, pencil, dollar, honour, augur\n",
        "            'eəʳ': ['air', 'are', 'ear', 'ere', 'eir', 'ayer'],  # Examples: chair, dare, pear, where, their, prayer\n",
        "            'a': ['a'],  # Example: arm\n",
        "            'ɜʳ': ['ir', 'er', 'ur', 'ear', 'or', 'our', 'yr'],  # Examples: bird, term, burn, pearl, word, journey, myrtle\n",
        "            'ɔ': ['aw', 'a', 'au', 'or', 'ore', 'oar', 'our', 'augh', 'ar', 'ough'],  # Examples: law, ball, haul,\n",
        "            'ɪəʳ': ['ear', 'eer', 'ere', 'ier'], # Examples: beer, fear, here, tier\n",
        "            'ʊəʳ': ['ure', 'our'], # Examples: sure, tour\n",
        "\n",
        "            # Dialectal Variations\n",
        "            'ɚ': ['er', 'ir', 'ur', 'ar', 'or'],  # Examples: butter, bird, dollar\n",
        "            'ɝ': ['er', 'ir', 'ur'],  # Examples: herd, third, turn\n",
        "            'ʍ': ['wh'],  # Examples: where, which, whale\n",
        "            'ɑ': ['a', 'ah'],  # Examples: father, spa\n",
        "            'oʊ': ['o', 'ow', 'oe', 'ough', 'ew']  # Examples: go, snow, foe, though, sew\n",
        "        }\n",
        "\n",
        "        self.arpabet_to_ipa = {\n",
        "            \"AA\": \"a\",    # odd\n",
        "            \"AE\": \"æ\",    # at\n",
        "            # \"AH\": \"ə\",    # hut\n",
        "            \"AO\": \"ɔ\",    # ought\n",
        "            \"AW\": \"aʊ\",   # cow \n",
        "            \"AX\": \"ə\",    # discus\n",
        "            \"AY\": \"aɪ\",   # hide\n",
        "            \"B\": \"b\",     # be\n",
        "            \"CH\": \"tʃ\",   # cheese\n",
        "            \"D\": \"d\",     # dee\n",
        "            \"DH\": \"ð\",    # thee\n",
        "            \"EH\": \"ɛ\",    # Ed\n",
        "            # \"ER\": \"ɝ\",    # hurt\n",
        "            \"EY\": \"eɪ\",   # ate\n",
        "            \"F\": \"f\",     # fee\n",
        "            \"G\": \"ɡ\",     # green\n",
        "            \"HH\": \"h\",    # he\n",
        "            \"IH\": \"ɪ\",    # it\n",
        "            \"IY\": \"i\",    # eat\n",
        "            \"JH\": \"dʒ\",   # gee\n",
        "            \"K\": \"k\",     # key\n",
        "            \"L\": \"l\",     # lee\n",
        "            \"M\": \"m\",     # me\n",
        "            \"N\": \"n\",     # knee\n",
        "            \"NG\": \"ŋ\",    # ping\n",
        "            \"OW\": \"oʊ\",   # oat\n",
        "            \"OY\": \"ɔɪ\",   # toy\n",
        "            \"P\": \"p\",     # pee\n",
        "            \"R\": \"ɹ\",     # read\n",
        "            \"S\": \"s\",     # sea\n",
        "            \"SH\": \"ʃ\",    # she\n",
        "            \"T\": \"t\",     # tea\n",
        "            \"TH\": \"θ\",    # theta\n",
        "            \"UH\": \"ʊ\",    # hood\n",
        "            \"UW\": \"u\",    # two\n",
        "            \"V\": \"v\",     # vee\n",
        "            \"W\": \"w\",     # we\n",
        "            \"Y\": \"j\",     # yield\n",
        "            \"Z\": \"z\",     # zee\n",
        "            \"ZH\": \"ʒ\",     # seizure\n",
        "\n",
        "            # Vowels with stress affecting IPA\n",
        "            \"AH0\": \"ə\",    # unstressed (about)\n",
        "            \"AH1\": \"ʌ\",    # stressed (hut)\n",
        "            \"AH2\": \"ʌ\",    # secondary stress (hut)\n",
        "            \"ER0\": \"ɚ\",    # unstressed (runner)\n",
        "            \"ER1\": \"ɝ\",    # stressed (bird)\n",
        "            \"ER2\": \"ɝ\",    # secondary stress (bird)\n",
        "            \"EY0\": \"e\",    # unstressed (obey)\n",
        "            \"EY1\": \"eɪ\",   # stressed (day)\n",
        "            \"EY2\": \"eɪ\",   # secondary stress (day)\n",
        "            \"IH0\": \"ɨ\",    # unstressed (possible centralization)\n",
        "            \"IH1\": \"ɪ\",    # stressed (bit)\n",
        "            \"IH2\": \"ɪ\",    # secondary stress (bit)\n",
        "            \"UW0\": \"ʉ\",    # unstressed (possible centralization)\n",
        "            \"UW1\": \"u\",    # stressed (food)\n",
        "            \"UW2\": \"u\",    # secondary stress (food)\n",
        "            \"AO0\": \"ə\",    # unstressed (centralized in some accents)\n",
        "            \"AO1\": \"ɔ\",    # stressed (thought)\n",
        "            \"AO2\": \"ɔ\",    # secondary stress (thought)\n",
        "            \"AE0\": \"ə\",    # unstressed (centralized in some accents)\n",
        "            \"AE1\": \"æ\",    # stressed (cat)\n",
        "            \"AE2\": \"æ\",    # secondary stress (cat)\n",
        "            \"OW0\": \"o\",    # unstressed (less diphthongized)\n",
        "            \"OW1\": \"oʊ\",   # stressed (go)\n",
        "            \"OW2\": \"oʊ\",   # secondary stress (go)\n",
        "            \"UH0\": \"ɨ\",    # unstressed (centralized or reduced)\n",
        "            \"UH1\": \"ʊ\",    # stressed (put)\n",
        "            \"UH2\": \"ʊ\",    # secondary stress (put)\n",
        "\n",
        "            # unknown phoneme\n",
        "            \"unk\": \"unk\"\n",
        "        }\n",
        "\n",
        "        # whether the two phonemes are considered correct (value = 1), acceptable (value = 2), or wrong (value = 0)\n",
        "        self.phoneme_pair_label = {\n",
        "            # Completely correct pairs (self-similarity)\n",
        "            **{(p, p): 1 for p in [\n",
        "                'b', 'd', 'f', 'g', 'h', 'dʒ', 'k', 'l', 'm', 'n', 'p', 'r', 'ɹ', 's', 't', 'v', 'w', 'z', 'ʒ', 'tʃ',\n",
        "                'ʃ', 'θ', 'ð', 'ŋ', 'j', 'æ', 'eɪ', 'ɛ', 'i', 'ɪ', 'aɪ', 'ɒ', 'oʊ', 'ʊ', 'ʌ', 'u', 'ɔɪ', 'aʊ', 'ə',\n",
        "                'eəʳ', 'a', 'ɜʳ', 'ɔ', 'ɪəʳ', 'ʊəʳ', 'ɚ', 'ɝ', 'ʍ', 'ɑ', 'ɡ'\n",
        "            ]},\n",
        "\n",
        "            # Acceptable substitutions (value = 2)\n",
        "            **{pair: 2 for pair in [\n",
        "                ('b', 'p'), ('d', 't'), ('g', 'k'), ('ɡ', 'k'), ('v', 'f'), ('z', 's'), ('ʒ', 'ʃ'), ('ð', 'θ'),\n",
        "                ('m', 'n'), ('m', 'ŋ'), ('n', 'ŋ'), ('r', 'ɹ'), ('l', 'r'), ('l', 'ɹ'), ('w', 'ʍ'),\n",
        "                ('j', 'ɹ'), ('f', 'θ'), ('v', 'ð'), ('s', 'ʃ'), ('z', 'ʒ'), ('tʃ', 'dʒ'), ('tʃ', 'ʃ'),\n",
        "                ('dʒ', 'ʒ'), ('i', 'ɪ'), ('ɪ', 'ɛ'), ('ɛ', 'æ'), ('ə', 'ʌ'), ('ə', 'ɜʳ'), ('ʌ', 'ɜʳ'),\n",
        "                ('ə', 'ɚ'), ('u', 'ʊ'), ('ʊ', 'oʊ'), ('oʊ', 'ɔ'), ('ɔ', 'ɒ'), ('ɑ', 'ɒ'), ('eɪ', 'ɛ'),\n",
        "                ('eɪ', 'æ'), ('aɪ', 'ɪ'), ('aʊ', 'ʊ'), ('ɔɪ', 'ɔ'), ('ɝ', 'ɚ'), ('ɪəʳ', 'ɜʳ'), ('ʊəʳ', 'ɔ'),\n",
        "                ('ð', 'd'), ('ɑ', 'a')\n",
        "            ] + [(b, a) for (a, b) in [\n",
        "                ('b', 'p'), ('d', 't'), ('g', 'k'), ('ɡ', 'k'), ('v', 'f'), ('z', 's'), ('ʒ', 'ʃ'), ('ð', 'θ'),\n",
        "                ('m', 'n'), ('m', 'ŋ'), ('n', 'ŋ'), ('r', 'ɹ'), ('l', 'r'), ('l', 'ɹ'), ('w', 'ʍ'),\n",
        "                ('j', 'ɹ'), ('f', 'θ'), ('v', 'ð'), ('s', 'ʃ'), ('z', 'ʒ'), ('tʃ', 'dʒ'), ('tʃ', 'ʃ'),\n",
        "                ('dʒ', 'ʒ'), ('i', 'ɪ'), ('ɪ', 'ɛ'), ('ɛ', 'æ'), ('ə', 'ʌ'), ('ə', 'ɜʳ'), ('ʌ', 'ɜʳ'),\n",
        "                ('ə', 'ɚ'), ('u', 'ʊ'), ('ʊ', 'oʊ'), ('oʊ', 'ɔ'), ('ɔ', 'ɒ'), ('ɑ', 'ɒ'), ('eɪ', 'ɛ'),\n",
        "                ('eɪ', 'æ'), ('aɪ', 'ɪ'), ('aʊ', 'ʊ'), ('ɔɪ', 'ɔ'), ('ɝ', 'ɚ'), ('ɪəʳ', 'ɜʳ'), ('ʊəʳ', 'ɔ'),\n",
        "                ('ð', 'd'), ('ɑ', 'a')\n",
        "            ] if (b, a) not in [(a, b)]]},\n",
        "\n",
        "            # Completely wrong pairs (default value = 0)\n",
        "            **{(p1, p2): 0 for p1 in [\n",
        "                'b', 'd', 'f', 'g', 'h', 'dʒ', 'k', 'l', 'm', 'n', 'p', 'r', 'ɹ', 's', 't', 'v', 'w', 'z', 'ʒ', 'tʃ',\n",
        "                'ʃ', 'θ', 'ð', 'ŋ', 'j', 'æ', 'eɪ', 'ɛ', 'i', 'ɪ', 'aɪ', 'ɒ', 'oʊ', 'ʊ', 'ʌ', 'u', 'ɔɪ', 'aʊ', 'ə',\n",
        "                'eəʳ', 'a', 'ɜʳ', 'ɔ', 'ɪəʳ', 'ʊəʳ', 'ɚ', 'ɝ', 'ʍ', 'ɑ', 'ɡ'\n",
        "            ] for p2 in [\n",
        "                'b', 'd', 'f', 'g', 'h', 'dʒ', 'k', 'l', 'm', 'n', 'p', 'r', 'ɹ', 's', 't', 'v', 'w', 'z', 'ʒ', 'tʃ',\n",
        "                'ʃ', 'θ', 'ð', 'ŋ', 'j', 'æ', 'eɪ', 'ɛ', 'i', 'ɪ', 'aɪ', 'ɒ', 'oʊ', 'ʊ', 'ʌ', 'u', 'ɔɪ', 'aʊ', 'ə',\n",
        "                'eəʳ', 'a', 'ɜʳ', 'ɔ', 'ɪəʳ', 'ʊəʳ', 'ɚ', 'ɝ', 'ʍ', 'ɑ', 'ɡ'\n",
        "            ] if p1 != p2 and (p1, p2) not in [\n",
        "                ('b', 'p'), ('d', 't'), ('g', 'k'), ('ɡ', 'k'), ('v', 'f'), ('z', 's'), ('ʒ', 'ʃ'), ('ð', 'θ'),\n",
        "                ('m', 'n'), ('m', 'ŋ'), ('n', 'ŋ'), ('r', 'ɹ'), ('l', 'r'), ('l', 'ɹ'), ('w', 'ʍ'),\n",
        "                ('j', 'ɹ'), ('f', 'θ'), ('v', 'ð'), ('s', 'ʃ'), ('z', 'ʒ'), ('tʃ', 'dʒ'), ('tʃ', 'ʃ'),\n",
        "                ('dʒ', 'ʒ'), ('i', 'ɪ'), ('ɪ', 'ɛ'), ('ɛ', 'æ'), ('ə', 'ʌ'), ('ə', 'ɜʳ'), ('ʌ', 'ɜʳ'),\n",
        "                ('ə', 'ɚ'), ('u', 'ʊ'), ('ʊ', 'oʊ'), ('oʊ', 'ɔ'), ('ɔ', 'ɒ'), ('ɑ', 'ɒ'), ('eɪ', 'ɛ'),\n",
        "                ('eɪ', 'æ'), ('aɪ', 'ɪ'), ('aʊ', 'ʊ'), ('ɔɪ', 'ɔ'), ('ɝ', 'ɚ'), ('ɪəʳ', 'ɜʳ'), ('ʊəʳ', 'ɔ'),\n",
        "                ('ð', 'd'), ('ɑ', 'a')\n",
        "            ] + [(b, a) for (a, b) in [\n",
        "                ('b', 'p'), ('d', 't'), ('g', 'k'), ('ɡ', 'k'), ('v', 'f'), ('z', 's'), ('ʒ', 'ʃ'), ('ð', 'θ'),\n",
        "                ('m', 'n'), ('m', 'ŋ'), ('n', 'ŋ'), ('r', 'ɹ'), ('l', 'r'), ('l', 'ɹ'), ('w', 'ʍ'),\n",
        "                ('j', 'ɹ'), ('f', 'θ'), ('v', 'ð'), ('s', 'ʃ'), ('z', 'ʒ'), ('tʃ', 'dʒ'), ('tʃ', 'ʃ'),\n",
        "                ('dʒ', 'ʒ'), ('i', 'ɪ'), ('ɪ', 'ɛ'), ('ɛ', 'æ'), ('ə', 'ʌ'), ('ə', 'ɜʳ'), ('ʌ', 'ɜʳ'),\n",
        "                ('ə', 'ɚ'), ('u', 'ʊ'), ('ʊ', 'oʊ'), ('oʊ', 'ɔ'), ('ɔ', 'ɒ'), ('ɑ', 'ɒ'), ('eɪ', 'ɛ'),\n",
        "                ('eɪ', 'æ'), ('aɪ', 'ɪ'), ('aʊ', 'ʊ'), ('ɔɪ', 'ɔ'), ('ɝ', 'ɚ'), ('ɪəʳ', 'ɜʳ'), ('ʊəʳ', 'ɔ'),\n",
        "                ('ð', 'd'), ('ɑ', 'a')\n",
        "            ] if (b, a) not in [(a, b)]]}\n",
        "        }\n",
        "\n",
        "        self.ipa_phonemes = list(self.ipa_to_orthography.keys())\n",
        "        self.ipa_phonemes.append('unk')\n",
        "\n",
        "        # instance-specific variables\n",
        "        self.transcript = transcript.lower().strip()\n",
        "        self.uttered_ipa_phonemes = uttered_phonemes\n",
        "        self.ground_truth_arpabet_phonemes = \"\"\n",
        "        self.ground_truth_ipa_phonemes = \"\"\n",
        "\n",
        "        self.segmented_uttered_ipa_phonemes = []\n",
        "        self.segmented_ground_truth_arpabet_phonemes = []\n",
        "        self.segmented_ground_truth_ipa_phonemes = []\n",
        "\n",
        "    def get_phoneme_count(self):\n",
        "        return len(self.ipa_phonemes)\n",
        "\n",
        "    def has_phoneme(self, phoneme): \n",
        "        return phoneme in self.ipa_phonemes\n",
        "\n",
        "    def convert_transcript_into_phonemes(self, get_all_versions=True):\n",
        "        \"\"\"\n",
        "        Parameters:\n",
        "            get_all_versions (bool): Default to True. Whether to return all possible phoneme versions for each word.\n",
        "        Convert a list of word into IPA phonems through ARPABET phonemes.\n",
        "\n",
        "        Returns:    \n",
        "            bool: If the conversion is successful.\n",
        "        \"\"\"\n",
        "        if len(self.transcript) == 0:   \n",
        "            return False\n",
        "        \n",
        "        arap_phonemes = []\n",
        "        for word in self.transcript.split():\n",
        "            if len(cmu_dict[word]) != 0:\n",
        "                if not get_all_versions:\n",
        "                    arpa_phons = self.clean_single_arpabet_phoneme_list(cmu_dict[word][0])\n",
        "                else:\n",
        "                    phon_vers = cmu_dict[word]\n",
        "                    arpa_phons = [self.clean_single_arpabet_phoneme_list(phons) for phons in phon_vers]\n",
        "                arap_phonemes.append(arpa_phons)  # Use the first phoneme representation\n",
        "            else:\n",
        "                arap_phonemes.append([['unk']])  # Append 'UNK' for unknown words\\\n",
        "\n",
        "        self.segmented_ground_truth_arpabet_phonemes = arap_phonemes\n",
        "        if not get_all_versions:\n",
        "            ipa_phonemes = []\n",
        "            for word in arap_phonemes:\n",
        "                cur_phonemes = []\n",
        "                for phon in word:\n",
        "                    cur_phonemes.append(self.arpabet_to_ipa[phon])\n",
        "                ipa_phonemes.append(cur_phonemes)\n",
        "        else: \n",
        "            ipa_phonemes = []\n",
        "            for word in arap_phonemes:\n",
        "                cur_word = []\n",
        "                for ver in word:\n",
        "                    cur_ver = []\n",
        "                    for phon in ver:\n",
        "                        cur_ver.append(self.arpabet_to_ipa[phon])\n",
        "                    cur_word.append(cur_ver)\n",
        "                ipa_phonemes.append(cur_word)\n",
        "\n",
        "        self.segmented_ground_truth_ipa_phonemes = ipa_phonemes\n",
        "        return True\n",
        "        \n",
        "    def remove_ipa_stress_markers(self, phonemes):\n",
        "        \"\"\"\n",
        "        Parameters:\n",
        "            phonemes (str): A string of phonemes (e.g. \"ˈɪŋɡlɪʃ\")\n",
        "        \"\"\"\n",
        "        return re.sub(r\"[ˈˌ]\", \"\", phonemes)\n",
        "    \n",
        "    def remove_ipa_length_markers(self, phonemes):\n",
        "        \"\"\"\n",
        "        Parameters:\n",
        "            phonemes (str): A string of phonemes (e.g. \"ˈɪŋɡlɪʃ\")\n",
        "        \"\"\"\n",
        "        return re.sub(r\"[ːˑ]\", \"\", phonemes)\n",
        "    \n",
        "    def remove_ipa_break_markers(self, phonemes):\n",
        "        \"\"\"\n",
        "        Parameters:\n",
        "            phonemes (str): A string of phonemes (e.g. \"ˈɪŋɡlɪʃ\")\n",
        "        \"\"\"\n",
        "        return re.sub(r\"[.‖|]\", \"\", phonemes)\n",
        "    \n",
        "    def remove_ipa_tone_markers(self, phonemes):\n",
        "        \"\"\"\n",
        "        Parameters:\n",
        "            phonemes (str): A string of phonemes (e.g. \"ˈɪŋɡlɪʃ\")\n",
        "        \"\"\"\n",
        "        return re.sub(r\"[˥˦˧˨˩]\", \"\", phonemes)\n",
        "    \n",
        "    def remove_ipa_global_markers(self, phonemes):\n",
        "        \"\"\"\n",
        "        Parameters:\n",
        "            phonemes (str): A string of phonemes (e.g. \"ˈɪŋɡlɪʃ\")\n",
        "        \"\"\"\n",
        "        return re.sub(r\"[↗↘]\", \"\", phonemes)\n",
        "    \n",
        "    def remove_ipa_diacritics(self, phonemes):\n",
        "        \"\"\"\n",
        "        Parameters:\n",
        "            phonemes (str): A string of phonemes (e.g. \"ˈɪŋɡlɪʃ\")\n",
        "        \"\"\"\n",
        "        return re.sub(r\"[̩̯̪̠̟̹̜̬̥̤̰̼̩̯̝̞̊̃̚]\", \"\", phonemes)\n",
        "    \n",
        "    def remove_tie_bars(self, phonemes):\n",
        "        \"\"\"\n",
        "        Removes all tie bars (͡) from a string of phonemes.\n",
        "\n",
        "        Parameters:\n",
        "            phonemes (str): A string of phonemes (e.g. \"ˈɪŋɡlɪʃ\")\n",
        "        \"\"\"\n",
        "        return phonemes.replace('͡', '')\n",
        "    \n",
        "    def correct_shenanigans(self, ipa_phonemes):\n",
        "        \"\"\"\n",
        "        Manually correct phoneme-related problems, mostly arising from converstion from arpabet to ipa or from ssl's inferred ipa\n",
        "        Parameters:\n",
        "            ipa_phonemes (list of lists): Nested list of phonemes.\n",
        "        \"\"\"\n",
        "        new_ipa_phonemes = \"\"\n",
        "        for word in ipa_phonemes.split():\n",
        "            if len(new_ipa_phonemes) > 0:\n",
        "                new_ipa_phonemes += \" \" \n",
        "            cur_word = \"\"\n",
        "            for i, phoneme in enumerate(list(word)):\n",
        "                if phoneme == \"ʌ\":\n",
        "                    if i == 0 or i == len(word) - 1 or len(word) > 4:\n",
        "                        cur_word += \"ə\"\n",
        "                    else:\n",
        "                        cur_word += phoneme\n",
        "                else:\n",
        "                    cur_word += phoneme\n",
        "            new_ipa_phonemes += cur_word\n",
        "        return new_ipa_phonemes\n",
        "    \n",
        "    def clean_ipa_phonemes(self):\n",
        "        \"\"\"\n",
        "        Clean uttered phonemes by removing stress, length, break, tone, global, and diacritic markers, as well as tie bars.\n",
        "        \"\"\"\n",
        "        phonemes = self.uttered_ipa_phonemes\n",
        "        phonemes = self.remove_ipa_stress_markers(phonemes)\n",
        "        phonemes = self.remove_ipa_length_markers(phonemes)\n",
        "        phonemes = self.remove_ipa_break_markers(phonemes)\n",
        "        phonemes = self.remove_ipa_tone_markers(phonemes)\n",
        "        phonemes = self.remove_ipa_global_markers(phonemes)\n",
        "        phonemes = self.remove_ipa_diacritics(phonemes)\n",
        "        phonemes = self.remove_tie_bars(phonemes)\n",
        "        phonemes = self.correct_shenanigans(phonemes)\n",
        "        self.uttered_ipa_phonemes = phonemes\n",
        "        \n",
        "        return True\n",
        "    \n",
        "    def remove_stress_indicator_from_arpabet_phonemes(self, arpabet_phoneme_list):\n",
        "        \"\"\"\n",
        "        Remove all stress markers (trailing numbers), excluding AH and ER (due to their nuances, refer to the arpa_to_ipa dict for detail)\n",
        "\n",
        "        Parameters:\n",
        "        arpabet_phoneme_list (list of lists): Nested list of phonemes.\n",
        "\n",
        "        Returns:\n",
        "            list of lists: Updated nested list with numbers removed from phonemes.\n",
        "        \"\"\"\n",
        "        cleaned_phon_list = []\n",
        "        for word_phonemes in arpabet_phoneme_list:\n",
        "            cleaned_phon_list = []\n",
        "            for phoneme in word_phonemes:\n",
        "                if not phoneme.startswith(('AH', 'ER')):\n",
        "                    cleaned_phon_list.append(re.sub(r'\\d', '', phoneme))\n",
        "                else:\n",
        "                    cleaned_phon_list.append(phoneme)\n",
        "            cleaned_phon_list.append(cleaned_phon_list)\n",
        "\n",
        "        return cleaned_phon_list\n",
        "    \n",
        "    def remove_stress_indicator_from_single_arpabet_phoneme_list(self, phon_list):\n",
        "        \"\"\"\n",
        "        Remove all stress markers (trailing numbers), excluding AH and ER (due to their nuances, refer to the arpa_to_ipa dict for detail)\n",
        "\n",
        "        Parameters:\n",
        "        phon_list (list(str)): The list of arpabet phoneme\n",
        "\n",
        "        Returns:\n",
        "            str: Updated phoneme with numbers removed.\n",
        "        \"\"\"\n",
        "        cleaned_phon_list = []\n",
        "        for phoneme in phon_list:\n",
        "            if not phoneme.startswith(('AH', 'ER')):\n",
        "                cleaned_phon_list.append(re.sub(r'\\d', '', phoneme))\n",
        "            else:\n",
        "                cleaned_phon_list.append(phoneme)\n",
        "\n",
        "        return cleaned_phon_list\n",
        "            \n",
        "    def clean_arpabet_phonemes(self, arpabet_phoneme_list):\n",
        "        \"\"\"\n",
        "        Parameters:\n",
        "            arpabet_phoneme_list (list of lists): Nested list of phonemes.\n",
        "        \"\"\"\n",
        "        cleaned_phonemes = self.remove_stress_indicator_from_arpabet_phonemes(arpabet_phoneme_list)\n",
        "        return cleaned_phonemes\n",
        "    \n",
        "    def clean_single_arpabet_phoneme_list(self, phon_list):\n",
        "        \"\"\"\n",
        "        Parameters:\n",
        "            phon_list (list(str)): The list of arpabet phoneme\n",
        "        \"\"\"\n",
        "        cleaned_phon = self.remove_stress_indicator_from_single_arpabet_phoneme_list(phon_list)\n",
        "        return cleaned_phon\n",
        "    \n",
        "    def split_phoneme_sequence(self):\n",
        "        \"\"\"\n",
        "        Splits a the uttered phoneme sequence (of a string of phoneme with each word separated by a space) into individual phonemes based on the IPA dictionary keys.\n",
        "        \"\"\"\n",
        "        sequence = self.uttered_ipa_phonemes.strip()\n",
        "        i = 0\n",
        "        keys = sorted(self.ipa_phonemes, key=len, reverse=True)  # Prioritize longer matches\n",
        "        \n",
        "        sequence_phonemes = []\n",
        "        word_phonemes = []\n",
        "        while i < len(sequence):\n",
        "            # if reaches the end of a word\n",
        "            if sequence[i] == ' ':\n",
        "                if word_phonemes:\n",
        "                    sequence_phonemes.append(word_phonemes)\n",
        "                    word_phonemes = []\n",
        "                i += 1\n",
        "                continue\n",
        "            match = None\n",
        "\n",
        "            # otherwise\n",
        "            for key in keys:\n",
        "                if sequence[i:i+len(key)] == key:\n",
        "                    match = key\n",
        "                    word_phonemes.append(match)\n",
        "                    i += len(key)\n",
        "                    break\n",
        "            if not match:  # No phoneme matched\n",
        "                word_phonemes.append('unk')\n",
        "                i += 1\n",
        "\n",
        "        if word_phonemes:\n",
        "            sequence_phonemes.append(word_phonemes)\n",
        "        self.segmented_uttered_ipa_phonemes = sequence_phonemes\n",
        "    \n",
        "    def evaluate_pronunciation(self, reference: list, pronunciation: list):\n",
        "        \"\"\"\n",
        "        Evaluate the pronunciation of a word or sentence by comparing it to a reference.\n",
        "        \n",
        "        Args:\n",
        "            reference (list(list(str))): List of words, each word is a list of phonemes representing the correct pronunciation.\n",
        "            pronunciation (list(list(str))): List of words, each word is a list of phonemes representing the pronunciation to be evaluated.\n",
        "\n",
        "        Returns:\n",
        "            list(dict): A list of dictionaries (one for each word) containing the evaluation results.\n",
        "        \"\"\"\n",
        "        smushed_ref = []\n",
        "        smushed_pron = []\n",
        "\n",
        "        smushed_ref = [item for word in reference for item in word]\n",
        "        smushed_pron = [item for word in pronunciation for item in word]\n",
        "\n",
        "        matcher = SequenceMatcher(None, smushed_ref, smushed_pron)\n",
        "        alignment = matcher.get_opcodes()\n",
        "        \n",
        "        # Initialize results for errors and labels\n",
        "        errors = {\"matches\": [], \"substitutions\": [], \"insertions\": [], \"deletions\": []}\n",
        "        labels = copy.deepcopy(reference)\n",
        "        processed_indices = set()  # Track indices in the reference that are processed\n",
        "        \n",
        "        # Process each alignment operation\n",
        "        for tag, i1, i2, j1, j2 in alignment:\n",
        "            if tag == \"equal\":\n",
        "                # Matches: Add to errors and label as 1\n",
        "                errors[\"matches\"].extend(smushed_ref[i1:i2])\n",
        "                start_word_idx, start_element_idx = get_nested_position(reference, i1)\n",
        "                end_word_idx, end_element_idx = get_nested_position(reference, i2 - 1)\n",
        "\n",
        "                labels = label_specific_elements_in_reference(labels, start_word_idx, start_element_idx, end_word_idx, end_element_idx, 1)\n",
        "                # labels.extend([(phoneme, 1) for phoneme in reference[i1:i2]])\n",
        "                processed_indices.update(range(i1, i2))\n",
        "            elif tag == \"replace\":\n",
        "                # Substitutions: Check phoneme-by-phoneme\n",
        "                ref_segment = smushed_ref[i1:i2]\n",
        "                pron_segment = smushed_pron[j1:j2]\n",
        "                # go through each pair of phoneme in ref and pron segment, if they are labeled 2 or 1 in the phoneme_pair_label, remove them as mistakes\n",
        "                original_i1 = i1\n",
        "                original_i2 = i2\n",
        "                for ref_phoneme, pron_phoneme in zip(ref_segment, pron_segment):\n",
        "                    if (ref_phoneme, pron_phoneme) in self.phoneme_pair_label:\n",
        "                        if self.phoneme_pair_label[(ref_phoneme, pron_phoneme)] in [1, 2]:\n",
        "                            processed_indices.add(i1)\n",
        "                            i1 += 1  # Move to the next index in the reference\n",
        "                            j1 += 1  # Move to the next index in the pronunciation\n",
        "                    \n",
        "                if i1 > original_i1:\n",
        "                    start_word_idx, start_element_idx = get_nested_position(reference, original_i1)\n",
        "                    end_word_idx, end_element_idx = get_nested_position(reference, i1 - 1)\n",
        "                    labels = label_specific_elements_in_reference(labels, start_word_idx, start_element_idx, end_word_idx, end_element_idx, 1)\n",
        "                \n",
        "                if i1 >= original_i2: # if no more phoneme in reference left to process\n",
        "                    continue\n",
        "\n",
        "                start_word_idx, start_element_idx = get_nested_position(reference, i1)\n",
        "                end_word_idx, end_element_idx = get_nested_position(reference, i2 - 1)\n",
        "\n",
        "                labels = label_specific_elements_in_reference(labels, start_word_idx, start_element_idx, end_word_idx, end_element_idx, 0)\n",
        "                processed_indices.update(range(i1, i2))        \n",
        "\n",
        "                for ref_phoneme, pron_phoneme in zip(ref_segment, pron_segment):\n",
        "                    if ref_phoneme != pron_phoneme:\n",
        "                        errors[\"substitutions\"].append((ref_phoneme, pron_phoneme))\n",
        "                        # labels.append((ref_phoneme, 0))\n",
        "                        processed_indices.add(i1)\n",
        "                        i1 += 1  # Move to the next index in the reference\n",
        "                \n",
        "                # Handle leftover phonemes in reference (deletions)\n",
        "                if len(ref_segment) > len(pron_segment):\n",
        "                    for leftover in ref_segment[len(pron_segment):]:\n",
        "                        errors[\"deletions\"].append(leftover)\n",
        "                        # labels.append((leftover, 0))\n",
        "                        processed_indices.add(i1)\n",
        "                        i1 += 1\n",
        "                \n",
        "                # Handle leftover phonemes in pronunciation (insertions)\n",
        "                if len(pron_segment) > len(ref_segment):\n",
        "                    for leftover in pron_segment[len(ref_segment):]:\n",
        "                        errors[\"insertions\"].append(leftover)\n",
        "            elif tag == \"insert\":\n",
        "                # Insertions: Add to errors, no effect on reference labels\n",
        "                errors[\"insertions\"].extend(smushed_pron[j1:j2])\n",
        "            elif tag == \"delete\":\n",
        "                # Deletions: Add to errors and label as 0\n",
        "                errors[\"deletions\"].extend(smushed_ref[i1:i2])\n",
        "                start_word_idx, start_element_idx = get_nested_position(reference, i1)\n",
        "                end_word_idx, end_element_idx = get_nested_position(reference, i2 - 1)\n",
        "\n",
        "                labels = label_specific_elements_in_reference(labels, start_word_idx, start_element_idx, end_word_idx, end_element_idx, 0)\n",
        "                # labels.extend([(phoneme, 0) for phoneme in reference[i1:i2]])\n",
        "                processed_indices.update(range(i1, i2))\n",
        "                \n",
        "        # Post-check: Ensure all phonemes in the reference are processed\n",
        "        for i, phoneme in enumerate(smushed_ref):\n",
        "            if i not in processed_indices:\n",
        "                errors[\"deletions\"].append(phoneme)\n",
        "                start_word_idx, start_element_idx = get_nested_position(reference, i)\n",
        "                end_word_idx, end_element_idx = get_nested_position(reference, i)\n",
        "\n",
        "                labels = label_specific_elements_in_reference(labels, start_word_idx, start_element_idx, end_word_idx, end_element_idx, 0)\n",
        "                # labels.append((phoneme, 0))\n",
        "        \n",
        "        return errors, labels\n",
        "    \n",
        "    def map_boundary(self, segmented_ground_truth_list, segmented_uttered_list):\n",
        "        \"\"\"\n",
        "        Maps the boundaries of each word in the ground truth to the corresponding part in the uttered list.\n",
        "        Rewrites to self.segmented_uttered_ipa_phonemes\n",
        "        Args:\n",
        "            segmented_ground_truth_list (list): A single list of phonemes, word are separed with space \n",
        "            segmented_uttered_list (list): A single list of phonemes, word are separed with space\n",
        "        \"\"\"\n",
        "        \n",
        "        alignments = pairwise2.align.globalms(\n",
        "            segmented_ground_truth_list, segmented_uttered_list, \n",
        "            match=1,  # Score for match\n",
        "            mismatch=-1,  # Penalty for mismatch\n",
        "            open=-2,  # Penalty for opening a gap\n",
        "            extend=-1,  # Penalty for extending a gap,\n",
        "            gap_char=['-']\n",
        "        )\n",
        "        best_alignment = alignments[0]\n",
        "\n",
        "        # Extract the aligned sequences\n",
        "        aligned_ground_truth = best_alignment.seqA\n",
        "        aligned_uttered = best_alignment.seqB\n",
        "\n",
        "        # Process the alignment to group corresponding characters\n",
        "        segments = []\n",
        "        current_segment = []\n",
        "        for g_char, u_char in zip(aligned_ground_truth, aligned_uttered):\n",
        "            if g_char == \" \":  # Word boundary in ground truth\n",
        "                if current_segment:  # Append collected segment\n",
        "                    segments.append(current_segment)\n",
        "\n",
        "                    current_segment = []\n",
        "            else:\n",
        "                if g_char != \"-\" and g_char != \" \":  # Only consider characters from uttered list\n",
        "                    current_segment.append(u_char)\n",
        "\n",
        "        # Append the last segment, if any\n",
        "        if current_segment:\n",
        "            segments.append(current_segment)\n",
        "\n",
        "        # Output the segmented uttered list\n",
        "        self.segmented_uttered_ipa_phonemes = segments\n",
        "        \n",
        "    def evaluate_full_pronunciation(self):\n",
        "        \"\"\"\n",
        "        Evaluates the full pronunciation of the utterance against the ground truth. \n",
        "        self.segmented_ground_truth_ipa_phonemes and self.segmented_uttered_ipa_phonemes need to be available.\n",
        "        \"\"\"\n",
        "        if len(self.segmented_ground_truth_ipa_phonemes) == 0 or len(self.segmented_uttered_ipa_phonemes) == 0:\n",
        "            raise ValueError(\"Segmented ground truth and uttered phonemes must be non-empty.\")\n",
        "        \n",
        "        one_ground_truth = []\n",
        "        one_uttered = []\n",
        "        for word in self.segmented_ground_truth_ipa_phonemes:\n",
        "            one_ground_truth.extend(word[0])\n",
        "            one_ground_truth.append(\" \")\n",
        "        for word in self.segmented_uttered_ipa_phonemes:\n",
        "            one_uttered.extend(word)\n",
        "            one_uttered.append(\" \")\n",
        "\n",
        "        # correctly add spaces to uttered phonemes\n",
        "        self.map_boundary(one_ground_truth, one_uttered)\n",
        "\n",
        "        final_label_list = []\n",
        "        for reference, uttered in zip(self.segmented_ground_truth_ipa_phonemes, self.segmented_uttered_ipa_phonemes):\n",
        "            final_label_list.append(self.evaluate_pronunciation_for_word(uttered, reference))\n",
        "        return final_label_list\n",
        "    \n",
        "    def evaluate_pronunciation_for_word(self, uttered: list, reference: list):\n",
        "        \"\"\"\n",
        "        Evaluates pronunciation for a word.\n",
        "        \n",
        "        Args:\n",
        "            uttered (list): A list of phonemes representing the uttered phonemes for this word.\n",
        "            reference (list): A list of list, each nested list being a possible pronunciation (ground truth) of the word.\n",
        "        \n",
        "        Returns:\n",
        "            list(tuple): Each tuple is (phoneme_label)\n",
        "        \"\"\"\n",
        "        max_score = None\n",
        "        final_label_list = []\n",
        "        for ground_truth in reference:\n",
        "            score = 0\n",
        "            label_list = []\n",
        "\n",
        "            alignments = pairwise2.align.globalms(\n",
        "                ground_truth, uttered, \n",
        "                match=1,  # Score for match\n",
        "                mismatch=-1,  # Penalty for mismatch\n",
        "                open=-2,  # Penalty for opening a gap\n",
        "                extend=-1,  # Penalty for extending a gap,\n",
        "                gap_char=['-']\n",
        "            )\n",
        "\n",
        "            # Extract the aligned sequences\n",
        "            aligned_ground_truth, aligned_uttered, _, _, _ = alignments[0]\n",
        "\n",
        "            # Iterate through the characters in the aligned sequences\n",
        "            for gt_char, utt_char in zip(aligned_ground_truth, aligned_uttered):\n",
        "                # Skip gaps in the ground truth\n",
        "                if gt_char == '-' or gt_char == ' ':\n",
        "                    continue\n",
        "\n",
        "                # Assign a label based on the tuple (gt_char, utt_char)\n",
        "                if utt_char != '-':  # Only consider matched characters, not gaps in uttered\n",
        "                    key = (gt_char, utt_char)\n",
        "                    if key in self.phoneme_pair_label and self.phoneme_pair_label[key] in [1, 2]:\n",
        "                        label = 1\n",
        "                        score += 1\n",
        "                    else:\n",
        "                        label = 0\n",
        "                        score -= 1\n",
        "                else:\n",
        "                    label = 0  # Default label for unmatched characters\n",
        "                    score -= 1\n",
        "                # Append the result as a tuple (ground_truth_char, label)\n",
        "                label_list.append((gt_char, label))\n",
        "\n",
        "            if max_score is None or score > max_score:\n",
        "                max_score = score\n",
        "                final_label_list = label_list\n",
        "        # Return the label list\n",
        "        return final_label_list\n",
        "\n",
        "    def map_phonemes_to_segments(self, phoneme_labels, word):\n",
        "        \"\"\"\n",
        "        Maps each phoneme in the phoneme set to its corresponding segment (orthography) in the word.\n",
        "        \n",
        "        Args:\n",
        "            phoneme_labels (list): List of phoneme labels in order.\n",
        "            word (str): The word to map the phonemes to.\n",
        "\n",
        "        Returns:\n",
        "            list: List of tuples, each containing a phoneme and its corresponding segment.\n",
        "        \"\"\"\n",
        "        result = []\n",
        "        remaining_word = word\n",
        "\n",
        "        for phoneme_tup in phoneme_labels:\n",
        "            phoneme = phoneme_tup[0]\n",
        "        \n",
        "            if phoneme not in self.ipa_to_orthography:\n",
        "                # Skip the phoneme if not found in the map\n",
        "                continue\n",
        "\n",
        "            possible_spellings = self.ipa_to_orthography[phoneme]\n",
        "            # Sort spellings by length in descending order to prioritize the longest match\n",
        "            possible_spellings.sort(key=len, reverse=True)\n",
        "\n",
        "            matched_spelling = None\n",
        "            skipped_characters = []\n",
        "\n",
        "            while remaining_word: # WORKING: if possible_spellings are not exhaustive, will consider the rest a silient grapheme\n",
        "                for spelling in possible_spellings:\n",
        "                    if remaining_word.startswith(spelling):\n",
        "                        matched_spelling = spelling\n",
        "                        break\n",
        "\n",
        "                if matched_spelling:\n",
        "                    break\n",
        "\n",
        "                # If no match, treat the current character as part of a silent grapheme\n",
        "                skipped_characters.append(remaining_word[0])\n",
        "                remaining_word = remaining_word[1:]\n",
        "\n",
        "            if not matched_spelling: # reach the end of word but no match, possibly meaning the possible_spellings are not exhaustive\n",
        "                matched_spelling = \"\" \n",
        "\n",
        "            # Add skipped characters to the result as silent graphemes\n",
        "            for char in skipped_characters:\n",
        "                result.append((('', 1), char))\n",
        "\n",
        "            # Add the phoneme and matched spelling to the result\n",
        "            result.append((phoneme_tup, matched_spelling))\n",
        "\n",
        "            # Update the remaining word by removing the matched spelling\n",
        "            if matched_spelling:\n",
        "                remaining_word = remaining_word[len(matched_spelling):]\n",
        "\n",
        "        if remaining_word:\n",
        "            result.append((('', 1), remaining_word))\n",
        "            print(f\"Unmapped segment of the word remains: '{remaining_word}'\")\n",
        "\n",
        "        return result\n",
        "    \n",
        "    def map_phonemes_to_segments_for_api(self, phoneme_labels, word):\n",
        "        \"\"\"\n",
        "        Maps each phoneme in the phoneme set to its corresponding segment (orthography) in the word.\n",
        "        Same as above, but different format to return the API call\n",
        "        Args:\n",
        "            phoneme_labels (list): List of phoneme labels in order.\n",
        "            word (str): The word to map the phonemes to.\n",
        "\n",
        "        Returns:\n",
        "            list: List of tuples, each containing a phoneme and its corresponding segment.\n",
        "        \"\"\"\n",
        "        result = {\"word\": word, \"details\": []}\n",
        "        remaining_word = word\n",
        "\n",
        "        for phoneme_tup in phoneme_labels:\n",
        "            phoneme = phoneme_tup[0]\n",
        "        \n",
        "            if phoneme not in self.ipa_to_orthography:\n",
        "                # Skip the phoneme if not found in the map\n",
        "                continue\n",
        "\n",
        "            possible_spellings = self.ipa_to_orthography[phoneme]\n",
        "            # Sort spellings by length in descending order to prioritize the longest match\n",
        "            possible_spellings.sort(key=len, reverse=True)\n",
        "\n",
        "            matched_spelling = None\n",
        "            skipped_characters = []\n",
        "\n",
        "            while remaining_word: # WORKING: if possible_spellings are not exhaustive, will consider the rest a silient grapheme\n",
        "                for spelling in possible_spellings:\n",
        "                    if remaining_word.startswith(spelling):\n",
        "                        matched_spelling = spelling\n",
        "                        break\n",
        "\n",
        "                if matched_spelling:\n",
        "                    break\n",
        "\n",
        "                # If no match, treat the current character as part of a silent grapheme\n",
        "                skipped_characters.append(remaining_word[0])\n",
        "                remaining_word = remaining_word[1:]\n",
        "\n",
        "            if not matched_spelling: # reach the end of word but no match, possibly meaning the possible_spellings are not exhaustive\n",
        "                matched_spelling = \"\" \n",
        "\n",
        "            # Add skipped characters to the result as silent graphemes\n",
        "            for char in skipped_characters:\n",
        "                result[\"details\"].append({\n",
        "                    \"phoneme\": \"\",  # No phoneme\n",
        "                    \"word_segment\": char,\n",
        "                    \"label\": 1  # Assuming label for silent graphemes is 1\n",
        "                })\n",
        "\n",
        "            # Add the phoneme and matched spelling to the result\n",
        "            result[\"details\"].append({\n",
        "                \"phoneme\": phoneme_tup[0],\n",
        "                \"word_segment\": matched_spelling,\n",
        "                \"label\": phoneme_tup[1]  # Assuming `phoneme_tup[1]` is the label\n",
        "            })\n",
        "\n",
        "            # Update the remaining word by removing the matched spelling\n",
        "            if matched_spelling:\n",
        "                remaining_word = remaining_word[len(matched_spelling):]\n",
        "\n",
        "        if remaining_word: # WORKING: if possible_spellings are not exhaustive, will consider the rest a silient grapheme\n",
        "            result[\"details\"].append({\n",
        "                \"phoneme\": \"\",  # No phoneme\n",
        "                \"word_segment\": remaining_word,\n",
        "                \"label\": 1  \n",
        "            })\n",
        "            print(f\"Unmapped segment of the word remains: '{remaining_word}'\")\n",
        "\n",
        "        return result\n",
        "    \n",
        "    def generate_labels(self, display=True):\n",
        "        results = []\n",
        "        labels = self.evaluate_full_pronunciation()\n",
        "        for label, word in zip(labels, self.transcript.split()):\n",
        "            results.append(self.map_phonemes_to_segments(label, word))\n",
        "\n",
        "        if display:\n",
        "            self.display_ipa_phonemes_with_labels_and_segments(results, self.transcript)\n",
        "        return results    \n",
        "    \n",
        "    def generate_labels_for_api(self):\n",
        "        results = []\n",
        "        labels = self.evaluate_full_pronunciation()\n",
        "        for label, word in zip(labels, self.transcript.split()):\n",
        "            results.append(self.map_phonemes_to_segments_for_api(label, word))\n",
        "\n",
        "        return results    \n",
        "    \n",
        "    def handle_label_shenanigans(self, labels):\n",
        "        \"\"\"\n",
        "        Handle label shenanigans manually.\n",
        "        - if θ is the last phoneme in a word, and it's labelled 0, change it to 1\n",
        "        \"\"\"\n",
        "        for word in labels:\n",
        "            if word[-1][0] == \"θ\" and word[-1][1] == 0:\n",
        "                word[-1] = (\"θ\", 1)\n",
        "        return labels\n",
        "        \n",
        "    def display_ipa_phonemes_with_labels_and_segments(self, data, words):\n",
        "        \"\"\"\n",
        "        Display phonemes and their corresponding segments with labels.\n",
        "        Incorrect phonemes and segments are displayed in red.\n",
        "\n",
        "        Parameters:\n",
        "        data (list of lists): Each sublist represents a word, and each element is ((phoneme, label), corresponding_segment).\n",
        "        words (list of str): List of corresponding words for the data.\n",
        "        \"\"\"\n",
        "        # Initialize containers for styled phonemes and styled words\n",
        "        styled_phonemes = []\n",
        "        styled_words = []\n",
        "\n",
        "        for word_data, word in zip(data, words):\n",
        "            # Process phonemes and segments for each word\n",
        "            styled_phoneme_word = []\n",
        "            styled_word = []\n",
        "            for ((phoneme, label), segment) in word_data:\n",
        "                if label == 0:\n",
        "                    # Incorrect phoneme or segment\n",
        "                    styled_phoneme_word.append(f\"<span style='color:red;'>{phoneme}</span>\")\n",
        "                    styled_word.append(f\"<span style='color:red;'>{segment}</span>\")\n",
        "                else:\n",
        "                    # Correct phoneme and segment\n",
        "                    styled_phoneme_word.append(f\"<span>{phoneme}</span>\")\n",
        "                    styled_word.append(f\"<span>{segment}</span>\")\n",
        "\n",
        "            # Join phonemes for the current word and add to the phoneme container\n",
        "            styled_phonemes.append(\"\".join(styled_phoneme_word))\n",
        "            styled_words.append(\"\".join(styled_word))\n",
        "        # Combine phonemes and words for display\n",
        "        phoneme_content = \" \".join(styled_phonemes)\n",
        "        word_content = \" \".join(styled_words)\n",
        "\n",
        "        # Construct complete HTML\n",
        "        html_content = f\"<div style='font-size:20px;'>{phoneme_content} - <b>{word_content}</b></div>\"\n",
        "\n",
        "        # Display\n",
        "        display(HTML(html_content))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "LhWeaxMV1CMW"
      },
      "source": [
        "# Run\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "NOTE: load audio in notebook and app.py is different, since app.py requires a different processing"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 7,
      "metadata": {},
      "outputs": [],
      "source": [
        "# get uttered and ground truth phonemes\n",
        "import eng_to_ipa as ipa_conv\n",
        "import time\n",
        "import cmudict\n",
        "cmu = cmudict.dict()\n",
        "\n",
        "def run_full_pipeline(audio_path):\n",
        "    # Load audio and normalize\n",
        "    start_time = time.time()\n",
        "    audio_input = load_audio(audio_path)\n",
        "    input_values = processor(audio_input, return_tensors=\"pt\", sampling_rate=16000).input_values\n",
        "    input_values = input_values.to(device)\n",
        "\n",
        "    # Get transcript\n",
        "    transcript = transcribe_into_English(audio_input)\n",
        "    transcript = clean_text(transcript)\n",
        "    end_time = time.time()\n",
        "    print(f\"Time from call to finish transcription: {end_time - start_time} \")\n",
        "    print(transcript)\n",
        "    start_time = time.time()\n",
        "    # Step 3: Perform inference\n",
        "    with torch.no_grad():\n",
        "        logits = model(input_values).logits\n",
        "\n",
        "    # Step 4: Decode the phonemes\n",
        "    predicted_ids = torch.argmax(logits, dim=-1)\n",
        "    uttered_phonemes = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] \n",
        "    end_time = time.time()\n",
        "    print(\"Time taken for inference:\", end_time - start_time)\n",
        "\n",
        "    start_time = time.time()\n",
        "    # init PronunciationAssessment instance\n",
        "    cur = PronunciationAssessment(transcript, uttered_phonemes)\n",
        "    end_time = time.time()\n",
        "    print(\"Time for PronunciationAssessment:\", end_time - start_time)\n",
        "\n",
        "    start_time = time.time()\n",
        "    #for each of the processed below, show me the time it took\n",
        "    cur.convert_transcript_into_phonemes()\n",
        "    end_time = time.time()\n",
        "    print(\"Time for transcript conversion:\", end_time - start_time)\n",
        "\n",
        "    start_time = time.time()\n",
        "    cur.clean_ipa_phonemes()\n",
        "    end_time = time.time()\n",
        "    print(\"Time for cleaning:\", end_time - start_time)\n",
        "    \n",
        "    start_time = time.time()\n",
        "    cur.split_phoneme_sequence()\n",
        "    end_time = time.time()\n",
        "    print(\"Time for splitting:\", end_time - start_time)\n",
        "\n",
        "    # generate the final labels\n",
        "    start_time = time.time()\n",
        "    labels = cur.generate_labels(display=False)\n",
        "    end_time = time.time()\n",
        "    print(\"Time for labels:\", end_time - start_time)\n",
        "    return labels"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 9,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Time from call to finish transcription: 0.36937952041625977 \n",
            "the proposal was rejected with your lack of supporting evidence\n",
            "Time taken for inference: 1.0722057819366455\n",
            "Time for PronunciationAssessment: 0.015699148178100586\n",
            "Time for transcript conversion: 0.0006413459777832031\n",
            "Time for cleaning: 3.266334533691406e-05\n",
            "Time for splitting: 0.00012755393981933594\n",
            "Time for labels: 0.002108335494995117\n",
            "\n",
            "Time from call to finish transcription: 0.281353235244751 \n",
            "voice recorder vy regal\n",
            "Time taken for inference: 0.6068234443664551\n",
            "Time for PronunciationAssessment: 0.015409231185913086\n",
            "Time for transcript conversion: 0.00011730194091796875\n",
            "Time for cleaning: 2.4557113647460938e-05\n",
            "Time for splitting: 4.9114227294921875e-05\n",
            "Time for labels: 0.00042057037353515625\n",
            "\n",
            "Time from call to finish transcription: 0.2739439010620117 \n",
            "this knife is the best knife\n",
            "Time taken for inference: 0.5824999809265137\n",
            "Time for PronunciationAssessment: 0.013269662857055664\n",
            "Time for transcript conversion: 0.00011086463928222656\n",
            "Time for cleaning: 2.3603439331054688e-05\n",
            "Time for splitting: 4.8160552978515625e-05\n",
            "Unmapped segment of the word remains: 'e'\n",
            "Unmapped segment of the word remains: 'e'\n",
            "Time for labels: 0.000988006591796875\n",
            "\n",
            "Time from call to finish transcription: 0.28787732124328613 \n",
            "book appeals me book appeal me\n",
            "Time taken for inference: 0.6887168884277344\n",
            "Time for PronunciationAssessment: 0.013826847076416016\n",
            "Time for transcript conversion: 9.5367431640625e-05\n",
            "Time for cleaning: 2.288818359375e-05\n",
            "Time for splitting: 5.602836608886719e-05\n",
            "Time for labels: 0.00047516822814941406\n",
            "\n",
            "Time from call to finish transcription: 0.3140981197357178 \n",
            "the person who loved football in my brother\n",
            "Time taken for inference: 0.7233633995056152\n",
            "Time for PronunciationAssessment: 0.01448965072631836\n",
            "Time for transcript conversion: 0.00012874603271484375\n",
            "Time for cleaning: 2.6941299438476562e-05\n",
            "Time for splitting: 7.367134094238281e-05\n",
            "Time for labels: 0.0011677742004394531\n",
            "\n",
            "Time from call to finish transcription: 0.32087230682373047 \n",
            "the person that sat on the floor he puns\n",
            "Time taken for inference: 1.1759374141693115\n",
            "Time for PronunciationAssessment: 0.015174150466918945\n",
            "Time for transcript conversion: 0.0001361370086669922\n",
            "Time for cleaning: 2.7179718017578125e-05\n",
            "Time for splitting: 7.772445678710938e-05\n",
            "Time for labels: 0.0011637210845947266\n",
            "\n"
          ]
        }
      ],
      "source": [
        "test_data = {'audio_paths': ['Audios/test-luat.wav', 'Audios/test1.wav', 'Audios/test2.wav', 'Audios/test3.wav', 'Audios/test4-bad.mp3', 'Audios/test5-bad.wav']}\n",
        "\n",
        "for audio_path in test_data['audio_paths']:\n",
        "    run_full_pipeline(audio_path)\n",
        "    print()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 32,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Time from call to finish transcription: 0.3182995319366455 \n",
            "the person who love football is my brother\n",
            "Time taken for inference: 0.5652985572814941\n",
            "Time for PronunciationAssessment: 0.015761852264404297\n",
            "Time for transcript conversion: 0.00013518333435058594\n",
            "Time for cleaning: 4.982948303222656e-05\n",
            "Time for splitting: 0.00010395050048828125\n"
          ]
        },
        {
          "data": {
            "text/html": [
              "<div style='font-size:20px;'><span>ð</span><span>ə</span> <span>p</span><span>ɝ</span><span>s</span><span>ə</span><span>n</span> <span>h</span><span>u</span> <span>l</span><span>ʌ</span><span>v</span> <span>f</span><span>ʊ</span><span style='color:red;'>t</span><span>b</span><span>ɔ</span><span>l</span> <span>ɪ</span><span>z</span> <span>m</span><span>aɪ</span> <span>b</span><span>ɹ</span><span>ʌ</span><span>ð</span><span>ɚ</span> - <b><span>th</span><span>e</span> <span>p</span><span>er</span><span>s</span><span>o</span><span>n</span> <span>wh</span><span>o</span> <span>l</span><span>o</span><span>ve</span> <span>f</span><span>oo</span><span style='color:red;'>t</span><span>b</span><span>a</span><span>ll</span> <span>i</span><span>s</span> <span>m</span><span>y</span> <span>b</span><span>r</span><span>o</span><span>th</span><span>er</span></b></div>"
            ],
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Time for labels: 0.002454042434692383\n"
          ]
        }
      ],
      "source": [
        "audio_path = 'Audios/test4-machine-voice.mp3'\n",
        "# Load audio and normalize\n",
        "start_time = time.time()\n",
        "audio_input = load_audio(audio_path)\n",
        "input_values = processor(audio_input, return_tensors=\"pt\", sampling_rate=16000).input_values\n",
        "input_values = input_values.to(device)\n",
        "\n",
        "# Get transcript\n",
        "transcript = transcribe_into_English(audio_input)\n",
        "transcript = clean_text(transcript)\n",
        "end_time = time.time()\n",
        "print(f\"Time from call to finish transcription: {end_time - start_time} \")\n",
        "print(transcript)\n",
        "start_time = time.time()\n",
        "# Step 3: Perform inference\n",
        "with torch.no_grad():\n",
        "    logits = model(input_values).logits\n",
        "\n",
        "# Step 4: Decode the phonemes\n",
        "predicted_ids = torch.argmax(logits, dim=-1)\n",
        "uttered_phonemes = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] \n",
        "end_time = time.time()\n",
        "print(\"Time taken for inference:\", end_time - start_time)\n",
        "\n",
        "start_time = time.time()\n",
        "# init PronunciationAssessment instance\n",
        "cur = PronunciationAssessment(transcript, uttered_phonemes)\n",
        "end_time = time.time()\n",
        "print(\"Time for PronunciationAssessment:\", end_time - start_time)\n",
        "\n",
        "start_time = time.time()\n",
        "#for each of the processed below, show me the time it took\n",
        "cur.convert_transcript_into_phonemes()\n",
        "end_time = time.time()\n",
        "print(\"Time for transcript conversion:\", end_time - start_time)\n",
        "\n",
        "start_time = time.time()\n",
        "cur.clean_ipa_phonemes()\n",
        "end_time = time.time()\n",
        "print(\"Time for cleaning:\", end_time - start_time)\n",
        "\n",
        "start_time = time.time()\n",
        "cur.split_phoneme_sequence()\n",
        "end_time = time.time()\n",
        "print(\"Time for splitting:\", end_time - start_time)\n",
        "\n",
        "# generate the final labels\n",
        "start_time = time.time()\n",
        "labels = cur.generate_labels(display=True)\n",
        "end_time = time.time()\n",
        "print(\"Time for labels:\", end_time - start_time)"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "display_name": ".venv",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.9.21"
    },
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "01596c886f3949f9858b6d9d1f6f7c79": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "0788436212f24f98b90ff780463ff6cb": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "087e6131e2204e85976c0b7d9814c066": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "0961d49693c64270951f7c845cacc374": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "0991cf83ad504b89a03b1e4021688b78": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "FloatProgressModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_9bdd6ab3ea8d4a2d947e7111dc29654b",
            "max": 2272,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_2f22a44b0c6f4e1e93e32a6f64caee85",
            "value": 2272
          }
        },
        "150019c3216543c6b4f489e30312e576": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "FloatProgressModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_477c0d00dbfd45c3be98a6ff8978548d",
            "max": 354,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_f1954f797f724e9383a5bc204185915e",
            "value": 354
          }
        },
        "169719f8fb5b415694731802bfd13eff": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "176633a381ce4caf90de05cf34dfadc9": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "1ad78a56bf4b4555b58676c6849b548b": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "1b19f33407cd44e4a4329e8cc1144d73": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_7006f450d55b4e59ab2a834ef016c11a",
            "placeholder": "​",
            "style": "IPY_MODEL_ba712b4d81924176881021e95484ced9",
            "value": "special_tokens_map.json: 100%"
          }
        },
        "283a21c83451452d8969f6411397bb16": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "2f1cff6ac11a49088b3b5082617b24e4": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "2f22a44b0c6f4e1e93e32a6f64caee85": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "ProgressStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "32020436e6d04da693d120e5fc1a3ed3": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "354537c460fd476f830d632ae8579e60": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "ProgressStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "435a2cc7ffb14827a90ec445fb8d9977": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "ProgressStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "4427805fbf7540388c2cee3e5b6123fb": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "477c0d00dbfd45c3be98a6ff8978548d": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "4d92351da2e946eb891e768dd6e7f3c6": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "55daffcad1da49a3bf39a0f25f964c16": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_9af1074a7ccc43f893c0a0b216cf1c0b",
            "placeholder": "​",
            "style": "IPY_MODEL_1ad78a56bf4b4555b58676c6849b548b",
            "value": "preprocessor_config.json: 100%"
          }
        },
        "56e1282076b143aa95a62c70a99d7373": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "FloatProgressModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_ed73d5ddb3864006b175fe52ef795fb4",
            "max": 96,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_435a2cc7ffb14827a90ec445fb8d9977",
            "value": 96
          }
        },
        "577cf34de0614f64a76c68fab94eb968": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HBoxModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_55daffcad1da49a3bf39a0f25f964c16",
              "IPY_MODEL_6d1da3c2efcb4f238cf1734e172d7ea8",
              "IPY_MODEL_b3249294ef9349a484a3b2f6199c1807"
            ],
            "layout": "IPY_MODEL_a1f4fcb471da44878b42cc16f9791034"
          }
        },
        "5a9ce9ccbae1461384d06d5ee1bbce85": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "6d1da3c2efcb4f238cf1734e172d7ea8": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "FloatProgressModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_e8a023b93abb4efd9a664a8e270efedf",
            "max": 215,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_779dd02a5f7f49dcbb631e900b4dea30",
            "value": 215
          }
        },
        "6da91cce06ed496abfd08c19e99178db": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "6e7269dd900b43899cd2ca4afb0cc23e": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b9bf6fdd7ec94776ac565c5824aad89c",
            "placeholder": "​",
            "style": "IPY_MODEL_f9bd1bfd216941a69c4200096648d208",
            "value": " 2.27k/2.27k [00:00&lt;00:00, 137kB/s]"
          }
        },
        "7006f450d55b4e59ab2a834ef016c11a": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "779dd02a5f7f49dcbb631e900b4dea30": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "ProgressStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "80930655f9254c66b4a676fca773465b": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_d1ed211f2ad24d80b782946fcf366f67",
            "placeholder": "​",
            "style": "IPY_MODEL_0788436212f24f98b90ff780463ff6cb",
            "value": "config.json: 100%"
          }
        },
        "8fed23da4e754ff2b340d00c0cc79250": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "9631f2bc95724c28bcdb2393c91aced7": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f285c11341cc4b1bb52b26016521c33e",
            "placeholder": "​",
            "style": "IPY_MODEL_2f1cff6ac11a49088b3b5082617b24e4",
            "value": " 354/354 [00:00&lt;00:00, 13.5kB/s]"
          }
        },
        "9af1074a7ccc43f893c0a0b216cf1c0b": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "9bdd6ab3ea8d4a2d947e7111dc29654b": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "9c8f7f4b6d6a4b0a98a2a86d17ab10f7": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "FloatProgressModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_01596c886f3949f9858b6d9d1f6f7c79",
            "max": 1262059949,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_354537c460fd476f830d632ae8579e60",
            "value": 1262059949
          }
        },
        "9c9d71b94fac443fac8d17599474f819": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_4d92351da2e946eb891e768dd6e7f3c6",
            "placeholder": "​",
            "style": "IPY_MODEL_32020436e6d04da693d120e5fc1a3ed3",
            "value": "tokenizer_config.json: 100%"
          }
        },
        "9ff030ff167945d289853c74190c8cda": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HBoxModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_c467413a1af041ef8f2942d4d84bb50a",
              "IPY_MODEL_9c8f7f4b6d6a4b0a98a2a86d17ab10f7",
              "IPY_MODEL_f1376e789ccd4c35877f295384cf9c40"
            ],
            "layout": "IPY_MODEL_e7eab4e2d8c448cfa962a9e2540c5270"
          }
        },
        "a0045505bdc04c4c96c4c11889a15b91": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HBoxModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_caec94d8946b4de2a198bbbc529b636f",
              "IPY_MODEL_ea3f1e329a614f50995713e654272d09",
              "IPY_MODEL_d040060ce8ee425691eafeaa30a463c8"
            ],
            "layout": "IPY_MODEL_169719f8fb5b415694731802bfd13eff"
          }
        },
        "a1f4fcb471da44878b42cc16f9791034": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "ac385be2db5144e8a591837aaf403874": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b3249294ef9349a484a3b2f6199c1807": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_6da91cce06ed496abfd08c19e99178db",
            "placeholder": "​",
            "style": "IPY_MODEL_283a21c83451452d8969f6411397bb16",
            "value": " 215/215 [00:00&lt;00:00, 11.4kB/s]"
          }
        },
        "b36a1c036916422fb093fe0403428e2e": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b7a90f8a364b4ac9b60c4ec6abef89c3": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HBoxModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_1b19f33407cd44e4a4329e8cc1144d73",
              "IPY_MODEL_56e1282076b143aa95a62c70a99d7373",
              "IPY_MODEL_e196b029379d497c8cae678d23ee2ce6"
            ],
            "layout": "IPY_MODEL_f6dbb7249c1f4ecd8a29f3d21238eef1"
          }
        },
        "b9bf6fdd7ec94776ac565c5824aad89c": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "ba712b4d81924176881021e95484ced9": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "c467413a1af041ef8f2942d4d84bb50a": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_ca448d6c9ae0451ab3b3b8bec84b6200",
            "placeholder": "​",
            "style": "IPY_MODEL_176633a381ce4caf90de05cf34dfadc9",
            "value": "pytorch_model.bin: 100%"
          }
        },
        "ca448d6c9ae0451ab3b3b8bec84b6200": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "caec94d8946b4de2a198bbbc529b636f": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_087e6131e2204e85976c0b7d9814c066",
            "placeholder": "​",
            "style": "IPY_MODEL_0961d49693c64270951f7c845cacc374",
            "value": "vocab.json: 100%"
          }
        },
        "ce11e3708d9d4f5e8916d38d136dcba8": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "ProgressStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "cfc7ccfab053430485b94b320cc08cb4": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "d040060ce8ee425691eafeaa30a463c8": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_5a9ce9ccbae1461384d06d5ee1bbce85",
            "placeholder": "​",
            "style": "IPY_MODEL_de9e97e95fc84ae3a665ea7f35465ec0",
            "value": " 458/458 [00:00&lt;00:00, 16.9kB/s]"
          }
        },
        "d1ed211f2ad24d80b782946fcf366f67": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "de9e97e95fc84ae3a665ea7f35465ec0": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "e196b029379d497c8cae678d23ee2ce6": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_e1c71907bb7c40ffb79b8cf52888052a",
            "placeholder": "​",
            "style": "IPY_MODEL_4427805fbf7540388c2cee3e5b6123fb",
            "value": " 96.0/96.0 [00:00&lt;00:00, 3.44kB/s]"
          }
        },
        "e1c71907bb7c40ffb79b8cf52888052a": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e4b7fd9d52df46328c6b6029409825b6": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e7eab4e2d8c448cfa962a9e2540c5270": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e8a023b93abb4efd9a664a8e270efedf": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "ea3f1e329a614f50995713e654272d09": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "FloatProgressModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b36a1c036916422fb093fe0403428e2e",
            "max": 458,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_ce11e3708d9d4f5e8916d38d136dcba8",
            "value": 458
          }
        },
        "ed73d5ddb3864006b175fe52ef795fb4": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "f1376e789ccd4c35877f295384cf9c40": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_8fed23da4e754ff2b340d00c0cc79250",
            "placeholder": "​",
            "style": "IPY_MODEL_cfc7ccfab053430485b94b320cc08cb4",
            "value": " 1.26G/1.26G [00:30&lt;00:00, 42.8MB/s]"
          }
        },
        "f1954f797f724e9383a5bc204185915e": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "ProgressStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "f285c11341cc4b1bb52b26016521c33e": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "f5f38f0582b34c8599dabed07199ee9a": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HBoxModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_80930655f9254c66b4a676fca773465b",
              "IPY_MODEL_0991cf83ad504b89a03b1e4021688b78",
              "IPY_MODEL_6e7269dd900b43899cd2ca4afb0cc23e"
            ],
            "layout": "IPY_MODEL_ac385be2db5144e8a591837aaf403874"
          }
        },
        "f6dbb7249c1f4ecd8a29f3d21238eef1": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "f9bd1bfd216941a69c4200096648d208": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "fefcb0aae6e940c6b70fafbf4788ea0b": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HBoxModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_9c9d71b94fac443fac8d17599474f819",
              "IPY_MODEL_150019c3216543c6b4f489e30312e576",
              "IPY_MODEL_9631f2bc95724c28bcdb2393c91aced7"
            ],
            "layout": "IPY_MODEL_e4b7fd9d52df46328c6b6029409825b6"
          }
        }
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}