Spaces:

szhang99
/

fire-coml-summer-2022

Runtime error

App Files Files Community

Steven Zhang commited on Jul 12, 2022

Commit

a210e7f

•

0 Parent(s):

reset commit

Browse files

Files changed (16) hide show

.gitattributes +0 -0
.gitignore +129 -0
.idea/.gitignore +3 -0
.idea/2022-summer-speech-translation.iml +8 -0
.idea/inspectionProfiles/profiles_settings.xml +6 -0
.idea/misc.xml +4 -0
.idea/modules.xml +8 -0
.idea/vcs.xml +7 -0
AudioToText/audiotospeech.py +178 -0
AudioToText/testWav.wav +0 -0
Autocorrect/autocorrectreal.ipynb +131 -0
README.md +5 -0
TestTranslation/translation.py +280 -0
TestTranslation/translation_test.py +14 -0
TestTranslation/translation_train.py +14 -0
Video/Wav2Lip_TenDeepfake_eng.ipynb +0 -0

.gitattributes ADDED Viewed

File without changes

.gitignore ADDED Viewed

	@@ -0,0 +1,129 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+# Default ignored files
+/shelf/
+/workspace.xml

.idea/2022-summer-speech-translation.iml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,4 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
+</project>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/2022-summer-speech-translation.iml" filepath="$PROJECT_DIR$/.idea/2022-summer-speech-translation.iml" />
+    </modules>
+  </component>
+</project>

.idea/vcs.xml ADDED Viewed

	@@ -0,0 +1,7 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>

AudioToText/audiotospeech.py ADDED Viewed

	@@ -0,0 +1,178 @@

+# -*- coding: utf-8 -*-
+# IMPORTS
+import os
+import numpy as np
+import requests
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras import layers
+# MODEL STUFF
+# The set of characters accepted in the transcription.
+characters = [x for x in "abcdefghijklmnopqrstuvwxyz'?! "]
+# Mapping characters to integers
+char_to_num = keras.layers.StringLookup(vocabulary=characters, oov_token="")
+# Mapping integers back to original characters
+num_to_char = keras.layers.StringLookup(
+    vocabulary=char_to_num.get_vocabulary(), oov_token="", invert=True
+)
+# An integer scalar Tensor. The window length in samples.
+frame_length = 256
+# An integer scalar Tensor. The number of samples to step.
+frame_step = 160
+# An integer scalar Tensor. The size of the FFT to apply.
+# If not provided, uses the smallest power of 2 enclosing frame_length.
+fft_length = 384
+# MODEL LOSS
+def CTCLoss(y_true, y_pred):
+    # Compute the training-time loss value
+    batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
+    input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
+    label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")
+    input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
+    label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")
+    loss = keras.backend.ctc_batch_cost(y_true, y_pred, input_length, label_length)
+    return loss
+# BUILD MODEL
+def build_model(input_dim, output_dim, rnn_layers=5, rnn_units=128):
+    """Model similar to DeepSpeech2."""
+    # Model's input
+    input_spectrogram = layers.Input((None, input_dim), name="input")
+    # Expand the dimension to use 2D CNN.
+    x = layers.Reshape((-1, input_dim, 1), name="expand_dim")(input_spectrogram)
+    # Convolution layer 1
+    x = layers.Conv2D(
+        filters=32,
+        kernel_size=[11, 41],
+        strides=[2, 2],
+        padding="same",
+        use_bias=False,
+        name="conv_1",
+    )(x)
+    x = layers.BatchNormalization(name="conv_1_bn")(x)
+    x = layers.ReLU(name="conv_1_relu")(x)
+    # Convolution layer 2
+    x = layers.Conv2D(
+        filters=32,
+        kernel_size=[11, 21],
+        strides=[1, 2],
+        padding="same",
+        use_bias=False,
+        name="conv_2",
+    )(x)
+    x = layers.BatchNormalization(name="conv_2_bn")(x)
+    x = layers.ReLU(name="conv_2_relu")(x)
+    # Reshape the resulted volume to feed the RNNs layers
+    x = layers.Reshape((-1, x.shape[-2] * x.shape[-1]))(x)
+    # RNN layers
+    for i in range(1, rnn_layers + 1):
+        recurrent = layers.GRU(
+            units=rnn_units,
+            activation="tanh",
+            recurrent_activation="sigmoid",
+            use_bias=True,
+            return_sequences=True,
+            reset_after=True,
+            name=f"gru_{i}",
+        )
+        x = layers.Bidirectional(
+            recurrent, name=f"bidirectional_{i}", merge_mode="concat"
+        )(x)
+        if i < rnn_layers:
+            x = layers.Dropout(rate=0.5)(x)
+    # Dense layer
+    x = layers.Dense(units=rnn_units * 2, name="dense_1")(x)
+    x = layers.ReLU(name="dense_1_relu")(x)
+    x = layers.Dropout(rate=0.5)(x)
+    # Classification layer
+    output = layers.Dense(units=output_dim + 1, activation="softmax")(x)
+    # Model
+    model = keras.Model(input_spectrogram, output, name="DeepSpeech_2")
+    # Optimizer
+    opt = keras.optimizers.Adam(learning_rate=1e-4)
+    # Compile the model and return
+    model.compile(optimizer=opt, loss=CTCLoss)
+    return model
+# GET AND INSTANTIATE MODEL
+model = build_model(
+    input_dim = fft_length // 2 + 1,
+    output_dim = char_to_num.vocabulary_size(),
+    rnn_units = 512,
+)
+# GET TEXT FROM MODEL PREDICTION
+# A utility function to decode the output of the network
+def decode_batch_predictions(pred):
+    input_len = np.ones(pred.shape[0]) * pred.shape[1]
+    # Use greedy search. For complex tasks, you can use beam search
+    results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0]
+    # Iterate over the results and get back the text
+    output_text = []
+    for result in results:
+        result = tf.strings.reduce_join(num_to_char(result)).numpy().decode("utf-8")
+        output_text.append(result)
+    return output_text
+# PATH TO CKPT
+# google share link
+ckpt_link = 'https://drive.google.com/file/d/14mT_wJMuIqUEJSS12aAc6bnPCjYuLWGf/view?usp=sharing'
+# Define the local filename to save data
+local_file = 'AudioToTextCKPT.hdf5'
+# Make http request for remote file data
+data = requests.get(ckpt_link)
+# Save file data to local copy
+with open(local_file, 'wb')as file:
+    file.write(data.content)
+ckpt = local_file
+# LOAD CKPT TO MODEL
+model.load_weights(ckpt)
+# CONVERT AUDIO TO TEXT
+def AudioToText(wav_file):
+    ###########################################
+    ##  Process the Audio
+    ##########################################
+    # 1. Read wav file
+    file = tf.io.read_file(wav_file)
+    # 2. Decode the wav file
+    audio, _ = tf.audio.decode_wav(file)
+    audio = tf.squeeze(audio, axis=-1)
+    # 3. Change type to float
+    audio = tf.cast(audio, tf.float32)
+    # 4. Get the spectrogram
+    spectrogram = tf.signal.stft(
+        audio, frame_length=frame_length, frame_step=frame_step, fft_length=fft_length
+    )
+    # 5. We only need the magnitude, which can be derived by applying tf.abs
+    spectrogram = tf.abs(spectrogram)
+    spectrogram = tf.math.pow(spectrogram, 0.5)
+    # 6. normalisation
+    means = tf.math.reduce_mean(spectrogram, 1, keepdims=True)
+    stddevs = tf.math.reduce_std(spectrogram, 1, keepdims=True)
+    spectrogram = (spectrogram - means) / (stddevs + 1e-10)
+    pred = model.predict(spectrogram)
+    output_text = decode_batch_predictions(pred)
+    return output_text
+# testing model
+print(AudioToText('testWav.wav'))

AudioToText/testWav.wav ADDED Viewed

Binary file (288 kB). View file

Autocorrect/autocorrectreal.ipynb ADDED Viewed

	@@ -0,0 +1,131 @@

+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "wOvxbAShg-_s",
+        "outputId": "0e9a0f9a-fd6e-4ce0-81f6-8da736bd06be"
+      },
+      "outputs": [],
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "THLGsHmchJ9g",
+        "outputId": "d590fb47-7b15-4176-9b6e-719090ed2cbd"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install textdistance"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "eFxAvy03hPCX"
+      },
+      "outputs": [],
+      "source": [
+        "import re\n",
+        "from collections import Counter\n",
+        "import numpy as np\n",
+        "import pandas as pd\n",
+        "import textdistance\n",
+        "\n",
+        "w = []\n",
+        "with open('/content/drive/MyDrive/words.txt', 'r') as f:\n",
+        "  file_name_data = f.read()\n",
+        "  file_name_data = file_name_data.lower()\n",
+        "  w = re.findall('\\w+', file_name_data)\n",
+        "\n",
+        "v = set(w)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "RPON8Pm7h9Dx",
+        "outputId": "dd1309fd-3362-41c9-8f19-affe4739df3e"
+      },
+      "outputs": [],
+      "source": [
+        "print(f\"First 10 words: \\n{w[0:10]}\")\n",
+        "print(f\"{len(v)} total words \")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "U4s_UDWKig11"
+      },
+      "outputs": [],
+      "source": [
+        "from nltk.metrics.distance import edit_distance\n",
+        "def edit(input_sentence):\n",
+        "  sentence = input_sentence.split()\n",
+        "  \n",
+        "  for i in sentence:\n",
+        "    if i.lower() in w:\n",
+        "      continue\n",
+        "    else:\n",
+        "      distances = ((edit_distance(i,\n",
+        "                                    word), word)\n",
+        "                     for word in w)\n",
+        "      closest = min(distances)\n",
+        "      sentence[sentence.index(i)] = closest[1]\n",
+        "  output_sentence = ' '.join(sentence)\n",
+        "\n",
+        "  return output_sentence"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "c0af01o_i5X0",
+        "outputId": "fff4600b-163d-40c8-ce3b-c0b735ec286e"
+      },
+      "outputs": [],
+      "source": [
+        "print(edit(\"My namee is  uncele Steven\"))\n",
+        "print(edit(\"moneeyeh is greeat\"))"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "name": "autocorrectreal.ipynb",
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}

README.md ADDED Viewed

	@@ -0,0 +1,5 @@

+# 2022-summer-speech-translation
+To Run:
+- Add how to

TestTranslation/translation.py ADDED Viewed

	@@ -0,0 +1,280 @@

+# -*- coding: utf-8 -*-
+"""translation.ipynb
+Automatically generated by Colaboratory.
+Original file is located at
+    https://colab.research.google.com/drive/1PADMvkToYgpdhvQYlZw4q8O-gLvsvGmK
+"""
+import pathlib
+import random
+import string
+import re
+import numpy as np
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras import layers
+# googled fix to "cannot find TextVectorization"
+from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
+import os
+import gdown
+text_file = keras.utils.get_file(
+    fname = "spa-eng.zip",
+    origin = "http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip",
+    extract = True,
+)
+text_file = pathlib.Path(text_file).parent / "spa-eng" / "spa.txt"
+# change: added utf-8 encoding
+with open(text_file, encoding="utf-8") as f:
+    lines = f.read().split("\n")[:-1]
+text_pairs = []
+for line in lines:
+    eng, spa = line.split("\t")
+    spa = "[start] " + spa + " [end]"
+    text_pairs.append((eng, spa))
+for _ in range(5):
+    print(random.choice(text_pairs))
+random.shuffle(text_pairs)
+num_val_samples = int(0.15 * len(text_pairs))
+num_train_samples = len(text_pairs) - 2 * num_val_samples
+train_pairs = text_pairs[:num_train_samples]
+val_pairs = text_pairs[num_train_samples : num_train_samples + num_val_samples]
+test_pairs = text_pairs[num_train_samples + num_val_samples :]
+print(f"{len(text_pairs)} total pairs")
+print(f"{len(train_pairs)} training pairs")
+print(f"{len(val_pairs)} validation pairs")
+print(f"{len(test_pairs)} test pairs")
+strip_chars = string.punctuation + "¿"
+strip_chars = strip_chars.replace("[", "")
+strip_chars = strip_chars.replace("]", "")
+vocab_size = 15000
+sequence_length = 20
+batch_size = 64
+def custom_standardization(input_string):
+    lowercase = tf.strings.lower(input_string)
+    return tf.strings.regex_replace(lowercase, "[%s]" % re.escape(strip_chars), "")
+eng_vectorization = TextVectorization(
+    max_tokens=vocab_size,
+    output_mode="int",
+    output_sequence_length=sequence_length,
+)
+spa_vectorization = TextVectorization(
+    max_tokens=vocab_size,
+    output_mode="int",
+    output_sequence_length=sequence_length + 1,
+    standardize=custom_standardization,
+)
+train_eng_texts = [pair[0] for pair in train_pairs]
+train_spa_texts = [pair[1] for pair in train_pairs]
+eng_vectorization.adapt(train_eng_texts)
+spa_vectorization.adapt(train_spa_texts)
+def format_dataset(eng, spa):
+    eng = eng_vectorization(eng)
+    spa = spa_vectorization(spa)
+    return (
+        {
+            "encoder_inputs": eng,
+            "decoder_inputs": spa[:, :-1],
+        },
+        spa[:, 1:],
+    )
+def make_dataset(pairs):
+    eng_texts, spa_texts = zip(*pairs)
+    eng_texts = list(eng_texts)
+    spa_texts = list(spa_texts)
+    dataset = tf.data.Dataset.from_tensor_slices((eng_texts, spa_texts))
+    dataset = dataset.batch(batch_size)
+    dataset = dataset.map(format_dataset)
+    return dataset.shuffle(2048).prefetch(16).cache()
+train_ds = make_dataset(train_pairs)
+val_ds = make_dataset(val_pairs)
+for inputs, targets in train_ds.take(1):
+    print(f'inputs["encoder_inputs"].shape: {inputs["encoder_inputs"].shape}')
+    print(f'inputs["decoder_inputs"].shape: {inputs["decoder_inputs"].shape}')
+    print(f"targets.shape: {targets.shape}")
+class TransformerEncoder(layers.Layer):
+    def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
+        super(TransformerEncoder, self).__init__(**kwargs)
+        self.embed_dim = embed_dim
+        self.dense_dim = dense_dim
+        self.num_heads = num_heads
+        self.attention = layers.MultiHeadAttention(
+            num_heads=num_heads, key_dim=embed_dim
+        )
+        self.dense_proj = keras.Sequential(
+            [
+                layers.Dense(dense_dim, activation="relu"),
+                layers.Dense(embed_dim),
+            ]
+        )
+        self.layernorm_1 = layers.LayerNormalization()
+        self.layernorm_2 = layers.LayerNormalization()
+        self.supports_masking = True
+    def call(self, inputs, mask=None):
+        if mask is not None:
+            padding_mask = tf.cast(mask[:, tf.newaxis, tf.newaxis, :], dtype="int32")
+        attention_output = self.attention(
+            query=inputs, value=inputs, key=inputs, attention_mask=padding_mask
+        )
+        proj_input = self.layernorm_1(inputs + attention_output)
+        proj_output = self.dense_proj(proj_input)
+        return self.layernorm_2(proj_input + proj_output)
+class PositionalEmbedding(layers.Layer):
+    def __init__(self, sequence_length, vocab_size, embed_dim, **kwargs):
+        super(PositionalEmbedding, self).__init__(**kwargs)
+        self.token_embeddings = layers.Embedding(
+            input_dim=vocab_size, output_dim=embed_dim
+        )
+        self.position_embeddings = layers.Embedding(
+            input_dim=sequence_length, output_dim=embed_dim
+        )
+        self.sequence_length = sequence_length
+        self.vocab_size = vocab_size
+        self.embed_dim = embed_dim
+    def call(self, inputs):
+        length = tf.shape(inputs)[-1]
+        positions = tf.range(start=0, limit=length, delta=1)
+        embedded_tokens = self.token_embeddings(inputs)
+        embedded_positions = self.position_embeddings(positions)
+        return embedded_tokens + embedded_positions
+    def compute_mask(self, inputs, mask=None):
+        return tf.math.not_equal(inputs, 0)
+class TransformerDecoder(layers.Layer):
+    def __init__(self, embed_dim, latent_dim, num_heads, **kwargs):
+        super(TransformerDecoder, self).__init__(**kwargs)
+        self.embed_dim = embed_dim
+        self.latent_dim = latent_dim
+        self.num_heads = num_heads
+        self.attention_1 = layers.MultiHeadAttention(
+            num_heads=num_heads, key_dim=embed_dim
+        )
+        self.attention_2 = layers.MultiHeadAttention(
+            num_heads=num_heads, key_dim=embed_dim
+        )
+        self.dense_proj = keras.Sequential(
+            [
+                layers.Dense(latent_dim, activation="relu"),
+                layers.Dense(embed_dim),
+            ]
+        )
+        self.layernorm_1 = layers.LayerNormalization()
+        self.layernorm_2 = layers.LayerNormalization()
+        self.layernorm_3 = layers.LayerNormalization()
+        self.supports_masking = True
+    def call(self, inputs, encoder_outputs, mask=None):
+        causal_mask = self.get_causal_attention_mask(inputs)
+        if mask is not None:
+            padding_mask = tf.cast(mask[:, tf.newaxis, :], dtype="int32")
+            padding_mask = tf.minimum(padding_mask, causal_mask)
+        attention_output_1 = self.attention_1(
+            query=inputs, value=inputs, key=inputs, attention_mask=causal_mask
+        )
+        out_1 = self.layernorm_1(inputs + attention_output_1)
+        attention_output_2 = self.attention_2(
+            query=out_1,
+            value=encoder_outputs,
+            key=encoder_outputs,
+            attention_mask=padding_mask,
+        )
+        out_2 = self.layernorm_2(out_1 + attention_output_2)
+        proj_output = self.dense_proj(out_2)
+        return self.layernorm_3(out_2 + proj_output)
+    def get_causal_attention_mask(self, inputs):
+        input_shape = tf.shape(inputs)
+        batch_size, sequence_length = input_shape[0], input_shape[1]
+        i = tf.range(sequence_length)[:, tf.newaxis]
+        j = tf.range(sequence_length)
+        mask = tf.cast(i >= j, dtype="int32")
+        mask = tf.reshape(mask, (1, input_shape[1], input_shape[1]))
+        mult = tf.concat(
+            [tf.expand_dims(batch_size, -1), tf.constant([1, 1], dtype=tf.int32)],
+            axis=0,
+        )
+        return tf.tile(mask, mult)
+embed_dim = 256
+latent_dim = 2048
+num_heads = 8
+encoder_inputs = keras.Input(shape=(None,), dtype="int64", name="encoder_inputs")
+x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(encoder_inputs)
+encoder_outputs = TransformerEncoder(embed_dim, latent_dim, num_heads)(x)
+encoder = keras.Model(encoder_inputs, encoder_outputs)
+decoder_inputs = keras.Input(shape=(None,), dtype="int64", name="decoder_inputs")
+encoded_seq_inputs = keras.Input(shape=(None, embed_dim), name="decoder_state_inputs")
+x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(decoder_inputs)
+x = TransformerDecoder(embed_dim, latent_dim, num_heads)(x, encoded_seq_inputs)
+x = layers.Dropout(0.5)(x)
+decoder_outputs = layers.Dense(vocab_size, activation="softmax")(x)
+decoder = keras.Model([decoder_inputs, encoded_seq_inputs], decoder_outputs)
+decoder_outputs = decoder([decoder_inputs, encoder_outputs])
+transformer = keras.Model(
+    [encoder_inputs, decoder_inputs], decoder_outputs, name="transformer"
+)
+transformer.summary()
+#load weights using gdown
+gdown.download_folder("https://drive.google.com/drive/folders/1DwN-MlL6MMh7qVJbwoLrWBSMVBN5zbBi")
+transformer.load_weights("./EngToSpanishckpts/cp.ckpt").expect_partial()
+spa_vocab = spa_vectorization.get_vocabulary()
+spa_index_lookup = dict(zip(range(len(spa_vocab)), spa_vocab))
+max_decoded_sentence_length = 20
+def decode_sequence(input_sentence):
+    tokenized_input_sentence = eng_vectorization([input_sentence])
+    decoded_sentence = "[start]"
+    for i in range(max_decoded_sentence_length):
+        tokenized_target_sentence = spa_vectorization([decoded_sentence])[:, :-1]
+        predictions = transformer([tokenized_input_sentence, tokenized_target_sentence])
+        sampled_token_index = np.argmax(predictions[0, i, :])
+        sampled_token = spa_index_lookup[sampled_token_index]
+        decoded_sentence += " " + sampled_token
+        if sampled_token == "[end]":
+            break
+    return decoded_sentence

TestTranslation/translation_test.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from translation import *
+test_eng_texts = [pair[0] for pair in test_pairs]
+input_sentence = "This is a test."
+translated = decode_sequence(input_sentence)
+print(input_sentence)
+print(translated)
+for _ in range(30):
+    input_sentence = random.choice(test_eng_texts)
+    translated = decode_sequence(input_sentence)
+    print(input_sentence)
+    print(translated)

TestTranslation/translation_train.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from translation import *
+# steven's addition: saving checkpoints
+checkpoint_path = "ckpts-translator/cp.ckpt"
+checkpoint_dir = os.path.dirname(checkpoint_path)
+cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
+                                                 save_weights_only=True,
+                                                 verbose=1)
+epochs = 20  # This should be at least 30 for convergence
+transformer.compile(
+    "rmsprop", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
+)
+transformer.fit(train_ds, epochs=epochs, validation_data=val_ds, callbacks=[cp_callback])

Video/Wav2Lip_TenDeepfake_eng.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff