Spaces:

camilosegura
/

traductor-multilenguaje

Running

App Files Files Community

camilosegura commited on Jul 24, 2023

Commit

7d873e2

•

1 Parent(s): d1c8358

Upload folder using huggingface_hub

Browse files

Files changed (32) hide show

.gitattributes +2 -0
.gitignore +194 -0
Lenguajes soportados Deep Translator.txt +135 -0
README.md +38 -8
Tutorial meta-mms.txt +72 -0
app.py +354 -0
assets/images/favico.ico +0 -0
assets/images/icono.png +0 -0
assets/images/logo.png +0 -0
assets/styles/css.css +3 -0
audio_output.mp3 +3 -0
data/eng/D_100000.pth +3 -0
data/eng/G_100000.pth +3 -0
data/eng/config.json +87 -0
data/eng/vocab.txt +38 -0
data/gum/D_100000.pth +3 -0
data/gum/G_100000.pth +3 -0
data/gum/config.json +87 -0
data/gum/vocab.txt +43 -0
data/quz/D_100000.pth +3 -0
data/quz/G_100000.pth +3 -0
data/quz/config.json +87 -0
data/quz/vocab.txt +37 -0
data/spa/D_100000.pth +3 -0
data/spa/G_100000.pth +3 -0
data/spa/config.json +87 -0
data/spa/vocab.txt +45 -0
logs.txt +0 -0
output.wav +0 -0
requirements.txt +4 -0
video.mp4 +3 -0
video_subtitles.srt +117 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+audio_output.mp3 filter=lfs diff=lfs merge=lfs -text
+video.mp4 filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,194 @@

+# Created by .ignore support plugin (hsz.mobi)
+### Python template
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+data/
+flagged/
+env/
+venv/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# IPython Notebook
+.ipynb_checkpoints
+# pyenv
+.python-version
+# celery beat schedule file
+celerybeat-schedule
+# dotenv
+.env
+# virtualenv
+venv/
+ENV/
+# Spyder project settings
+.spyderproject
+# Rope project settings
+.ropeproject
+### VirtualEnv template
+# Virtualenv
+# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
+[Bb]in
+[Ii]nclude
+[Ll]ib
+[Ll]ib64
+[Ll]ocal
+[Ss]cripts
+pyvenv.cfg
+.venv
+pip-selfcheck.json
+### JetBrains template
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+# User-specific stuff
+.idea/**/workspace.xml
+.idea/**/tasks.xml
+.idea/**/usage.statistics.xml
+.idea/**/dictionaries
+.idea/**/shelf
+# AWS User-specific
+.idea/**/aws.xml
+# Generated files
+.idea/**/contentModel.xml
+# Sensitive or high-churn files
+.idea/**/dataSources/
+.idea/**/dataSources.ids
+.idea/**/dataSources.local.xml
+.idea/**/sqlDataSources.xml
+.idea/**/dynamic.xml
+.idea/**/uiDesigner.xml
+.idea/**/dbnavigator.xml
+# Gradle
+.idea/**/gradle.xml
+.idea/**/libraries
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn.  Uncomment if using
+# auto-import.
+# .idea/artifacts
+# .idea/compiler.xml
+# .idea/jarRepositories.xml
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+# CMake
+cmake-build-*/
+# Mongo Explorer plugin
+.idea/**/mongoSettings.xml
+# File-based project format
+*.iws
+# IntelliJ
+out/
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+# JIRA plugin
+atlassian-ide-plugin.xml
+# Cursive Clojure plugin
+.idea/replstate.xml
+# SonarLint plugin
+.idea/sonarlint/
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
+# Editor-based Rest Client
+.idea/httpRequests
+# Android studio 3.1+ serialized cache file
+.idea/caches/build_file_checksums.ser
+# idea folder, uncomment if you don't need it
+.idea
+*.gz
+*.wav
+main.py
+mms_ars.py
+.DS_Store

Lenguajes soportados Deep Translator.txt ADDED Viewed

	@@ -0,0 +1,135 @@

+{
+'afrikaans': 'af',
+'albanian': 'sq',
+'amharic': 'am',
+'arabic': 'ar',
+'armenian': 'hy',
+'assamese': 'as',
+'aymara': 'ay',
+'azerbaijani': 'az',
+'bambara': 'bm',
+'basque': 'eu',
+'belarusian': 'be',
+'bengali': 'bn',
+'bhojpuri': 'bho',
+'bosnian': 'bs',
+'bulgarian': 'bg',
+'catalan': 'ca',
+'cebuano': 'ceb',
+'chichewa': 'ny',
+'chinese (simplified)': 'zh-CN',
+'chinese (traditional)': 'zh-TW',
+'corsican': 'co',
+'croatian': 'hr',
+'czech': 'cs',
+'danish': 'da',
+'dhivehi': 'dv',
+'dogri': 'doi',
+'dutch': 'nl',
+'english': 'en',
+'esperanto': 'eo',
+'estonian': 'et',
+'ewe': 'ee',
+'filipino': 'tl',
+'finnish': 'fi',
+'french': 'fr',
+'frisian': 'fy',
+'galician': 'gl',
+'georgian': 'ka',
+'german': 'de',
+'greek': 'el',
+'guarani': 'gn',
+'gujarati': 'gu',
+'haitian creole': 'ht',
+'hausa': 'ha',
+'hawaiian': 'haw',
+'hebrew': 'iw',
+'hindi': 'hi',
+'hmong': 'hmn',
+'hungarian': 'hu',
+'icelandic': 'is',
+'igbo': 'ig',
+'ilocano': 'ilo',
+'indonesian': 'id',
+'irish': 'ga',
+'italian': 'it',
+'japanese': 'ja',
+'javanese': 'jw',
+'kannada': 'kn',
+'kazakh': 'kk',
+'khmer': 'km',
+'kinyarwanda': 'rw',
+'konkani': 'gom',
+'korean': 'ko',
+'krio': 'kri',
+'kurdish (kurmanji)': 'ku',
+'kurdish (sorani)': 'ckb',
+'kyrgyz': 'ky',
+'lao': 'lo',
+'latin': 'la',
+'latvian': 'lv',
+'lingala': 'ln',
+'lithuanian': 'lt',
+'luganda': 'lg',
+'luxembourgish': 'lb',
+'macedonian': 'mk',
+'maithili': 'mai',
+'malagasy': 'mg',
+'malay': 'ms',
+'malayalam': 'ml',
+'maltese': 'mt',
+'maori': 'mi',
+'marathi': 'mr',
+'meiteilon (manipuri)': 'mni-Mtei',
+'mizo': 'lus',
+'mongolian': 'mn',
+'myanmar': 'my',
+'nepali': 'ne',
+'norwegian': 'no',
+'odia (oriya)': 'or',
+'oromo': 'om',
+'pashto': 'ps',
+'persian': 'fa',
+'polish': 'pl',
+'portuguese': 'pt',
+'punjabi': 'pa',
+'quechua': 'qu',
+'romanian': 'ro',
+'russian': 'ru',
+'samoan': 'sm',
+'sanskrit': 'sa',
+'scots gaelic': 'gd',
+'sepedi': 'nso',
+'serbian': 'sr',
+'sesotho': 'st',
+'shona': 'sn',
+'sindhi': 'sd',
+'sinhala': 'si',
+'slovak': 'sk',
+'slovenian': 'sl',
+'somali': 'so',
+'spanish': 'es',
+'sundanese': 'su',
+'swahili': 'sw',
+'swedish': 'sv',
+'tajik': 'tg',
+'tamil': 'ta',
+'tatar': 'tt',
+'telugu': 'te',
+'thai': 'th',
+'tigrinya': 'ti',
+'tsonga': 'ts',
+'turkish': 'tr',
+'turkmen': 'tk',
+'twi': 'ak',
+'ukrainian': 'uk',
+'urdu': 'ur',
+'uyghur': 'ug',
+'uzbek': 'uz',
+'vietnamese': 'vi',
+'welsh': 'cy',
+'xhosa': 'xh',
+'yiddish': 'yi',
+'yoruba': 'yo',
+'zulu': 'zu'
+}

README.md CHANGED Viewed

@@ -1,12 +1,42 @@
 ---
-title: Traductor Multilenguaje
-emoji: 🔥
-colorFrom: yellow
-colorTo: gray
-sdk: gradio
-sdk_version: 3.38.0
 app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: traductor-multilenguaje
 app_file: app.py
+sdk: gradio
+sdk_version: 3.35.2
 ---
+# Web App of Meta's META's  Massively Multilingual Speech (MMS)
+This repository contains a Python code that implements a [META's  Massively Multilingual Speech (MMS)](https://github.com/facebookresearch/fairseq/tree/main/examples/mms)  using the Gradio library. The application allows users to record audio and convert it to text, or enter text and generate corresponding local speech output.
+## Step 1: Clone repo
+```shell
+git clone https://github.com/ikequan/meta-mms.git
+cd meta-mms
+```
+## Step 2: Prerequisites
+Before running the code, make sure you have the following requirements installed:
+- Python 3.x
+- gradio
+- speech_recognition
+- ttsmms
+- deep_translator
+You can install the required packages using the following command:
+```shell
+pip install gradio SpeechRecognition ttsmms deep_translator
+```
+## Step 3: Download language model
+Check [here](https://github.com/wannaphong/ttsmms/blob/main/support_list.txt) for supported languages and  their iso code for this step.
+```shell
+curl https://dl.fbaipublicfiles.com/mms/tts/{put your language iso code here}.tar.gz --output {put your language iso code here}.tar.gz # Update lang
+mkdir -p data && tar -xzf {put your language iso code here}.tar.gz -C data/ # Update langcode
+```
+## Step 4: Run code
+```shell
+python app.py
+```

Tutorial meta-mms.txt ADDED Viewed

	@@ -0,0 +1,72 @@

+https://www.youtube.com/watch?v=7K4b2S7X99w
+https://github.com/ikequan/meta-mms
+#Github proyecto
+https://github.com/AYTECOL/traductor-multilenguaje.git
+#Las librerias se instalan en:
+C:\Users\jorge\AppData\Local\Programs\Python\Python311\Scripts
+# Salida de Audio de video
+C:\Users\jorge\AppData\Local\Temp\gradio\04300dd9108b391bd8a7984ab530b47d54bfec91\
+#Lenguajes soportados:
+https://github.com/wannaphong/ttsmms/blob/main/support_list.txt
+# Descargar e instalar Phyton si no está instalado
+Instalar en C:\Users\jorge\AppData\Local\Programs\Python\Python311\
+Pasar al directorio \Scripts para que tome el comando .\pip
+# Comprobar la instalacion de PIP:
+.\pip help
+# Si PIP no está instalado descargarlo de:
+curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
+# Instalar PIP, pasar a la carpeta donde se descargó el archivo get-pip.py:
+py get-pip.py
+# Agregar el PATH de la instalación en las variables de entorno y colocarlo al inicio de la lista
+# Si no está instalado Microsoft Visual C++ 14.0 or greater instalarlo de:
+https://visualstudio.microsoft.com/visual-cpp-build-tools/
+seleccionando las utilidades para desktop windows
+# Instalar transformers:
+.\pip install torch datasets[audio]
+.\pip install --upgrade transformers
+# Instalar los complementos necesarios para la aplicación:
+.\pip install gradio SpeechRecognition ttsmms deep_translator
+# instalar ffmpeg mediante cmd como administrador:
+choco install ffmpeg
+# Comprobar ISO de idiomas disponibles:
+https://dl.fbaipublicfiles.com/mms/tts/all-tts-languages.html
+# descargar los idiomas a soportar cambiando el ISO del idioma correspondiente:
+https://dl.fbaipublicfiles.com/mms/tts/full_model/eng.tar.gz  #Inglés (eng)
+https://dl.fbaipublicfiles.com/mms/tts/full_model/spa.tar.gz  #Español (spa)
+https://dl.fbaipublicfiles.com/mms/tts/full_model/gum.tar.gz  #Misak (gum)
+https://dl.fbaipublicfiles.com/mms/tts/full_model/quz.tar.gz  #Quechua Cuzco (quz)
+# crear carpeta "data" y descomprimir ahi los lenguajes dentro del proyecto:
+/meta-mms/data/spa/
+/meta-mms/data/eng/
+# Comandos procesamiento de Audio y Video
+# extraer audio de un video
+ffmpeg -y -i input.mp4 -ar 16000 -ac 1 output_audio.wav
+# dejar un video sin audio
+ffmpeg -y -i input.mp4 -t 43 output_muted.webm
+ffmpeg -y -i input.mp4 -shortest output_muted.webm
+# unir audio con video
+ffmpeg -y -i input.mp4 -i audio.wav -an output_muted.webm
+# subtitulos
+ffmpeg -y -copyts -i input.webm -vf subtitles=subtitle.srt output_srt.webm
+ffmpeg -y -copyts -i noticias_caracol_tv.mp4 -vf subtitles=noticias_caracol_tv_subtitles.srt output_srt.webm
+# Para ejecutar la aplicación:
+py app.py

app.py ADDED Viewed

	@@ -0,0 +1,354 @@

+import gradio as gr
+import speech_recognition as sr                     # Libreria de Audio
+from ttsmms import TTS
+from deep_translator import GoogleTranslator
+import subprocess                                   # Libreria para procesamiento de comandos cmd para video
+import os
+import time                                         # Libreria para manejo de tiempos
+import math                                         # Libreria matemática, usada para redondeo de cifras
+from threading import Thread                        # Librería para manejo de Hilos de procesamiento
+# Idioma de ingreso
+input_language = 'es-ES'
+output_audio_format = 'mp3'
+output_video_format = 'mp4'
+# Inicializa el modelo TTS para los idiomas soportados
+spanish = TTS("data/spa")    #español
+english = TTS("data/eng")    #inglés
+misak = TTS("data/gum")      #misak
+quechua = TTS("data/quz")    #quechua
+# Crea la lista de idiomas soportados para traducir y su modelo TTS correspondiente
+#langs = [{"lang": 'spanish', "tts": spanish}, {"lang": 'english', "tts": english}, {"lang": 'guarani', "tts": misak}, {"lang": 'quechua', "tts": quechua}]
+langs = [{"lang": 'english', "tts": english}, {"lang": 'quechua', "tts": quechua}, {"lang": 'spanish', "tts": spanish}]
+# *************************** MÉTODOS ***************************
+# TEXT TO TEXT: Función que convierte texto a texto
+def text_to_text(text, lang, logs_file):
+    tiempo = time.ctime().split()
+    print(tiempo[3] + " - Traduciendo el texto a texto en el idioma seleccionado...")
+    logs_file.write(tiempo[3] + " - Traduciendo el texto a texto en el idioma seleccionado...\n")
+    selected_lang = next((lang_item for lang_item in langs if lang_item["lang"] == lang), None)   # Busca el idioma seleccionado en la lista de idiomas disponibles
+    if selected_lang is None:
+        raise ValueError(f"Lenguaje '{lang}' no disponible.")
+    text_translated = GoogleTranslator(source='auto', target=lang).translate(text)   # Traduce el texto al idioma seleccionado usando Google Translator
+    tiempo = time.ctime().split()
+    print(tiempo[3] + " - Texto traducido: ", text_translated)
+    logs_file.write(tiempo[3] + " - Texto traducido: " + text_translated + "\n")
+    return text_translated
+# TEXT TO AUDIO: Función que convierte texto a audio
+def text_to_audio(text, lang, logs_file):
+    tiempo = time.ctime().split()
+    print(tiempo[3] + " - Convirtiendo el texto extraido a audio en el idioma seleccionado...")
+    logs_file.write(tiempo[3] + " - Convirtiendo el texto extraido a audio en el idioma seleccionado...\n")
+    selected_lang = next((lang_item for lang_item in langs if lang_item["lang"] == lang), None)   # Busca el idioma seleccionado en la lista de idiomas disponibles
+    if selected_lang is None:
+        raise ValueError(f"Lenguaje '{lang}' no disponible.")
+    selected_tts = selected_lang["tts"]
+    text_translated = text_to_text(text, lang, logs_file)                                       # Traduce el texto al idioma seleccionado usando Google Translator
+    wav_path = "audio_output." + output_audio_format
+    print("wav_path!!!!!!!!!!!!!!!!1", wav_path)
+    selected_tts.synthesis(text_translated, wav_path=wav_path)                                  # Genera el audio y lo graba como un archivo WAV
+    tiempo = time.ctime().split()
+    print(tiempo[3] + " - Audio traducido generado: ",wav_path)
+    logs_file.write(tiempo[3] + " - Audio traducido generado: " + wav_path + "\n")
+    return wav_path, text_translated
+# AUDIO TO TEXT: Función que convierte audio a texto usando Google's speech recognition API
+def audio_to_text(audio_file, logs_file):
+    tiempo = time.ctime().split()
+    print(tiempo[3] + " - Convirtiendo el audio a texto...")
+    logs_file.write(tiempo[3] + " - Convirtiendo el audio a texto...\n")
+    r = sr.Recognizer()
+    with sr.AudioFile(audio_file) as source:
+        audio = r.record(source)
+    try:
+        text = r.recognize_google(audio, language=input_language)
+        tiempo = time.ctime().split()
+        print(tiempo[3] + " - Reconocimiento de texto obtenido del audio: ",text)
+        logs_file.write(tiempo[3] + " - Reconocimiento de texto obtenido del audio: " + text + "\n")
+        return text
+    except sr.UnknownValueError:
+        print("Google Speech Recognition no pudo transcribir el audio.")
+        logs_file.write("Google Speech Recognition no pudo transcribir el audio.\n")
+        return None
+    except sr.RequestError:
+        print("Reconocimiento de audio no disponible.")
+        logs_file.write("Reconocimiento de audio no disponible.\n")
+        return None
+# VIDEO TO AUDIO: Función que extrae el audio del video
+def video_to_audio(video_file, output_audio_ext, logs_file):
+    tiempo = time.ctime().split()
+    print(tiempo[3] + " - Extrayendo el audio del video...")
+    logs_file.write(tiempo[3] + " - Extrayendo el audio del video...\n")
+    filename, ext = os.path.splitext(video_file)                            # Se extrae el nombre del archivo y su extensión
+    subprocess.call(["ffmpeg", "-y", "-i", video_file, "-ar", "16000", "-ac", "1", f"{filename+'_audio'}.{output_audio_ext}"],  # Se extrae el archivo de audio del video
+                    stdout=subprocess.DEVNULL,
+                    stderr=subprocess.STDOUT)
+    audio_video = filename + "_audio." + output_audio_ext
+    tiempo = time.ctime().split()
+    print(tiempo[3] + " - Audio extraido: ",audio_video)
+    logs_file.write(tiempo[3] + " - Audio extraido: " + audio_video + "\n")
+    return audio_video
+# VIDEO TO VIDEO: Función que concatena audio con el video traducido
+def video_to_video(video_file, audio_file_traslated, output_video_ext, logs_file):
+    tiempo = time.ctime().split()
+    print(tiempo[3] + " - Procesando el video para obtenerlo sin audio...")
+    logs_file.write(tiempo[3] + " - Procesando el video para obtenerlo sin audio...\n")
+    filename, ext = os.path.splitext(video_file)                            # Se extrae el nombre del archivo y su extensión
+    subprocess.call(["ffmpeg", "-y", "-i", video_file, "-an", f"{filename+'_muted'}.{output_video_ext}"],
+                stdout=subprocess.DEVNULL,                                  # Se extrae el video sin audio
+                stderr=subprocess.STDOUT)
+    video_mute = filename + "_muted." + output_video_ext
+    tiempo = time.ctime().split()
+    print(tiempo[3] + " - Doblando el video con el audio traducido...")
+    logs_file.write(tiempo[3] + " - Doblando el video con el audio traducido...\n")
+    subprocess.call(["ffmpeg", "-y", "-i", video_mute, "-i", audio_file_traslated, "-shortest", f"{filename+'_traslated'}.{output_video_ext}"],
+                stdout=subprocess.DEVNULL,                                  # Se concatena el video sin audio con el audio traducido
+                stderr=subprocess.STDOUT)
+    video_traslated = filename + "_traslated." + output_video_ext
+    tiempo = time.ctime().split()
+    print(tiempo[3] + " - Video traducido: ",video_traslated)
+    logs_file.write(tiempo[3] + " - Video traducido: " + video_traslated + "\n")
+    return video_traslated
+# VIDEO TO VIDEO SUBTITULADO: Función que coloca subtitulos traducidos al video
+def video_to_video_subtitled(video_file, text_traslated, output_video_ext, logs_file):
+    tiempo = time.ctime().split()
+    print(tiempo[3] + " - Procesando el video subtitulado...")
+    logs_file.write(tiempo[3] + " - Procesando el video subtitulado...\n")
+    subtitles = text_traslated.split()
+    filename, ext = os.path.splitext(video_file)                            # Se extrae el nombre del archivo y su extensión
+    #filedir = os.path.dirname(video_file)
+    #subtitles_file = open(f"{filename+'_subtitles'}.srt","w+")
+    length_video = get_length_video(video_file)
+    length_line_subtitle = math.ceil(len(subtitles)/length_video)           # Rate de palabras por segundo de video
+    i=0
+    j=0
+    subtitles_line = []
+    while i < length_video//length_line_subtitle:                           # Ciclo para timming de subtítulos
+        while j < len(subtitles):
+            line = ' '.join(subtitles[j:j+length_line_subtitle])            # Concatena palabras en una frase de acuerdo al rate de palabras
+            subtitles_line.append(line)                                     # Inserta la frase en el vector final de subtítulos
+            j += length_line_subtitle
+        i += 1
+    subtitles_file = open(f"{'video_subtitles'}.srt","w+")                  # Se genera el archivo de subtítulos .srt
+    i=0
+    while i < len(subtitles_line):
+        subtitles_content = (''''''+str(i+1)+'''
+'''+ time.strftime('%H:%M:%S', time.gmtime(i)) + ''',001 --> ''' + time.strftime('%H:%M:%S', time.gmtime(i+1)) + ''',000 --> ''' +
+'''
+''''''<b>'''+ subtitles_line[i] +'''</b>'''
+'''
+''')
+        subtitles_file.write(subtitles_content)
+        i += 1
+    subtitles_file.close()
+    #subtitles_file = filename + "_subtitles.srt"
+    subprocess.call(["ffmpeg", "-y", "-copyts", "-i", video_file, "-vf", "subtitles=video_subtitles.srt:force_style='Fontname=Futura,Fontsize=20,MarginV=50,Shadow=1'", f"{filename+'_subtitled'}.{output_video_ext}"],
+                stdout=subprocess.DEVNULL,                                  # Se insertan los subtitulos al video con el audio original
+                stderr=subprocess.STDOUT)
+    video_subtitled = filename + "_subtitled." + output_video_ext
+    tiempo = time.ctime().split()
+    print(tiempo[3] + " - Video subtitulado: ",video_subtitled)
+    logs_file.write(tiempo[3] + " - Video subtitulado: " + video_subtitled + "\n")
+    return video_subtitled
+def get_length_video(filename):
+    result = subprocess.run(["ffprobe", "-v", "error", "-show_entries",
+                             "format=duration", "-of",
+                             "default=noprint_wrappers=1:nokey=1", filename],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT)
+    return float(result.stdout)
+# *************************** MAIN ***************************
+# ************************** ROUTER **************************
+# ROUTER: Función para transcribir video, audio y texto al lenguaje seleccionado
+def multimedia_to_multimedia_app(lang_input, video_file_upload, audio_file_upload, video_file_webcam, audio_file_microphone, text_input):
+    tiempo = time.ctime().split()
+    logs_file = open("logs.txt","w+")
+    logs_file.write("LOGS TRADUCTOR MULTILENGUAJE\n")
+    if video_file_webcam and lang_input:
+        print("PROCESANDO GRABACIÓN VIDEO DE LA WEBCAM")
+        logs_file.write("PROCESANDO GRABACIÓN VIDEO DE LA WEBCAM\n")
+        print(tiempo[3] + " - Traduciendo el video grabado: " + video_file_webcam + " al idioma " + lang_input)
+        logs_file.write(tiempo[3] + " - Traduciendo el video grabado: " + video_file_webcam + " al idioma " + lang_input + "\n")
+        text_transcribed = convert_video_to_text_app(lang_input, video_file_webcam, logs_file)
+        audio_traslated, text_translated = text_to_audio(text_transcribed, lang_input, logs_file)
+        #video_subtitled = convert_video_to_video_subtitled_app(video_file_webcam, text_translated, logs_file)
+        #video_traslated = convert_video_to_video_app(video_file_webcam, audio_traslated, logs_file)
+        return_video_subtitled = [None]*1
+        return_video_traslated = [None]*1
+        hilo_video_subtitled = Thread(target=convert_video_to_video_subtitled_app, args=(video_file_webcam, text_translated,logs_file,return_video_subtitled,))
+        hilo_video_traslated = Thread(target=convert_video_to_video_app, args=(video_file_webcam, audio_traslated,logs_file,return_video_traslated,))
+        hilo_video_subtitled.start()
+        hilo_video_traslated.start()
+        hilo_video_subtitled.join()
+        hilo_video_traslated.join()
+        video_subtitled = return_video_subtitled[0]
+        video_traslated = return_video_traslated[0]
+        print("FIN PROCESO GRABACIÓN VIDEO DE LA WEBCAM")
+        logs_file.write("FIN PROCESO GRABACIÓN VIDEO DE LA WEBCAM\n")
+        logs_file.close()
+        return text_transcribed, text_translated, audio_traslated, video_subtitled, video_traslated
+    if audio_file_microphone and lang_input:
+        print("PROCESANDO GRABACIÓN AUDIO DEL MICRÓFONO")
+        logs_file.write("PROCESANDO GRABACIÓN AUDIO DEL MICRÓFONO\n")
+        print(tiempo[3] + " - Traduciendo el audio grabado " + audio_file_microphone + " al idioma " + lang_input)
+        logs_file.write(tiempo[3] + " - Traduciendo el audio grabado " + audio_file_microphone + " al idioma " + lang_input + "\n")
+        text_translated, text_transcribed, audio_traslated = convert_audio_to_audio_app(lang_input,audio_file_microphone,logs_file)
+        video_subtitled = None
+        video_traslated = None
+        print("FIN PROCESO GRABACIÓN AUDIO DEL MICRÓFONO")
+        logs_file.write("FIN PROCESO GRABACIÓN AUDIO DEL MICRÓFONO\n")
+        logs_file.close()
+        return text_transcribed, text_translated, audio_traslated, video_subtitled, video_traslated
+    if video_file_upload and lang_input:
+        print("PROCESANDO ARCHIVO DE VIDEO")
+        logs_file.write("PROCESANDO ARCHIVO DE VIDEO\n")
+        print(tiempo[3] + " - Traduciendo el video ingresado " + video_file_upload + " al idioma " + lang_input)
+        logs_file.write(tiempo[3] + " - Traduciendo el video ingresado " + video_file_upload + " al idioma " + lang_input + "\n")
+        text_transcribed = convert_video_to_text_app(lang_input,video_file_upload,logs_file)
+        audio_traslated, text_translated = text_to_audio(text_transcribed, lang_input,logs_file)
+        #video_subtitled = convert_video_to_video_subtitled_app(video_file_upload, text_translated,logs_file)
+        #video_traslated = convert_video_to_video_app(video_file_upload, audio_traslated,logs_file)
+        return_video_subtitled = [None]*1
+        return_video_traslated = [None]*1
+        hilo_video_subtitled = Thread(target=convert_video_to_video_subtitled_app, args=(video_file_upload, text_translated,logs_file,return_video_subtitled,))
+        hilo_video_traslated = Thread(target=convert_video_to_video_app, args=(video_file_upload, audio_traslated,logs_file,return_video_traslated,))
+        hilo_video_subtitled.start()
+        hilo_video_traslated.start()
+        hilo_video_subtitled.join()
+        hilo_video_traslated.join()
+        video_subtitled = return_video_subtitled[0]
+        video_traslated = return_video_traslated[0]
+        print("FIN PROCESO ARCHIVO DE VIDEO")
+        logs_file.write("FIN PROCESO ARCHIVO DE VIDEO\n")
+        logs_file.close()
+        return text_transcribed, text_translated, audio_traslated, video_subtitled, video_traslated
+    if audio_file_upload and lang_input:
+        print("PROCESANDO ARCHIVO DE AUDIO")
+        logs_file.write("PROCESANDO ARCHIVO DE AUDIO\n")
+        print(tiempo[3] + " - Traduciendo el audio ingresado " + audio_file_upload + " al idioma " + lang_input)
+        logs_file.write(tiempo[3] + " - Traduciendo el audio ingresado " + audio_file_upload + " al idioma " + lang_input + "\n")
+        text_translated, text_transcribed, audio_traslated = convert_audio_to_audio_app(lang_input,audio_file_upload,logs_file)
+        video_subtitled = None
+        video_traslated = None
+        print("FIN PROCESO ARCHIVO DE AUDIO")
+        logs_file.write("FIN PROCESO ARCHIVO DE AUDIO\n")
+        logs_file.close()
+        return text_transcribed, text_translated, audio_traslated, video_subtitled, video_traslated
+    if text_input and lang_input:
+        print("PROCESANDO TEXTO INGRESADO")
+        logs_file.write("PROCESANDO TEXTO INGRESADO\n")
+        print(tiempo[3] + " - Traduciendo el texto ingresado " + text_input + " al idioma " + lang_input)
+        logs_file.write(tiempo[3] + " - Traduciendo el texto ingresado " + text_input + " al idioma " + lang_input + "\n")
+        audio_traslated, text_translated = text_to_audio(text_input, lang_input, logs_file)
+        video_subtitled = None
+        video_traslated = None
+        print("FIN PROCESO TEXTO INGRESADO")
+        logs_file.write("FIN PROCESO TEXTO INGRESADO\n")
+        logs_file.close()
+        return text_input, text_translated, audio_traslated, video_subtitled, video_traslated
+    if not lang_input:
+        print("Error - Lenguaje no ingresado")
+        raise gr.Error("Debes ingresar el idioma a traducir")                           # Muestra la alerta si no se ingresa un idioma a traducir
+# *************************** SERVICIOS ***************************
+# t2t: Traducir el texto a texto en el idioma deseado
+def convert_text_to_text_app(lang_input, text_to_translate, logs_file):
+    if text_to_translate:
+        print("Traduciendo texto " + text_to_translate + " al idioma " + lang_input)
+        logs_file.write("Traduciendo texto " + text_to_translate + " al idioma " + lang_input + "\n")
+        text_translated = text_to_text(text_to_translate, lang_input, logs_file)
+        return text_translated
+# a2t: Transcribir el audio a texto
+def convert_audio_to_text_app(lang_input, audio_file, logs_file):
+    if audio_file:
+        print("Convirtiendo audio " + audio_file + " al idioma " + lang_input)
+        logs_file.write("Convirtiendo audio " + audio_file + " al idioma " + lang_input + "\n")
+        text_translated = audio_to_text(audio_file, logs_file)
+        return text_translated
+# a2a: Transcribir el audio a texto y de texto al audio traducido
+def convert_audio_to_audio_app(lang_input, audio_file, logs_file):
+    if audio_file:
+        print("Traduciendo audio " + audio_file + " al idioma deseado...")
+        logs_file.write("Traduciendo audio " + audio_file + " al idioma deseado...\n")
+        text_transcribed = audio_to_text(audio_file, logs_file)
+        audio_traslated, text_translated = text_to_audio(text_transcribed, lang_input, logs_file)
+        return text_translated, text_transcribed, audio_traslated
+# v2t: Convertir video a audio usando 'ffmpeg' con módulo 'subprocess'
+def convert_video_to_text_app(lang_input,video_file, logs_file, output_audio_ext="wav"):
+    print("Procesando video " + video_file + " para convertirlo a texto...")
+    logs_file.write("Procesando video " + video_file + " para convertirlo a texto...\n")
+    audio_video = video_to_audio(video_file, output_audio_ext, logs_file)
+    text_translated = convert_audio_to_text_app(lang_input,audio_video, logs_file)
+    return text_translated
+# v2v: Convertir video a video
+def convert_video_to_video_app(video_file, audio_file_traslated, logs_file, return_video_traslated, output_video_ext=output_video_format):
+    print("Procesando video " + video_file + " para traducirlo...")
+    logs_file.write("Procesando video " + video_file + " para traducirlo...\n")
+    video_traslated = video_to_video(video_file, audio_file_traslated, output_video_ext,logs_file)
+    return_video_traslated[0] = video_traslated
+    #return video_traslated
+# v2vs: Convertir video a video subtitulado
+def convert_video_to_video_subtitled_app(video_file, text_translated, logs_file, return_video_subtitled, output_video_ext=output_video_format):
+    print("Procesando video " + video_file + " para subtitularlo...")
+    logs_file.write("Procesando video " + video_file + " para subtitularlo...\n")
+    video_subtitled = video_to_video_subtitled(video_file, text_translated, output_video_ext, logs_file)
+    return_video_subtitled[0] = video_subtitled
+    #return video_subtitled
+# *************************** INTERFAZ ***************************
+# Entradas y salidas en la interfaz Gradio
+lang_input = gr.components.Dropdown(choices=[lang["lang"] for lang in langs], label="Selecciona el idioma al cual deseas traducir:*")
+#video_input_file = gr.Video(label= "Noticias Caracol", value="D:/Noticias/noticias_caracol_long.mp4")
+video_input_file = gr.Video()
+video_input_file = gr.Video(label= "Noticias Caracol", source="upload")
+video_input_webcam = gr.Video(label= "Noticias Caracol en vivo", source="webcam", include_audio=1)
+#audio_input_file = gr.Audio(label="Blue Radio", value="D:/Noticias/caracol_radio.mp3")
+audio_input_file = gr.Audio(label="Blue Radio", source="upload", type="filepath")
+audio_input_microphone = gr.Audio(label="Blue Radio en vivo", source="microphone", type="filepath")
+text_input = gr.components.Textbox(label="Noticia a traducir:")
+output_text_transcribed = gr.components.Textbox(label="Transcripción")
+output_text_traslated = gr.components.Textbox(label="Traducción")
+output_audio = gr.components.Audio(label="Audio traducido", format=output_audio_format)
+output_video_subtitled = gr.components.Video(label="Noticia subtitulada", format=output_video_format)
+output_video_traslated = gr.components.Video(label="Noticia traducida", format=output_video_format)
+"""""""""
+embed_html = '<iframe width="560" height="315" src="https://www.youtube.com/embed/EngW7tLk6R8" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'
+with gr.Blocks() as interface:
+    gr.HTML(embed_html)
+"""""""""
+# Crea la interfaz Gradio para multimedia_to_multimedia_app
+interface = gr.Interface(
+    fn=multimedia_to_multimedia_app,
+    inputs=[lang_input, video_input_file, audio_input_file, video_input_webcam, audio_input_microphone, text_input],
+    outputs=[output_text_transcribed, output_text_traslated, output_audio, output_video_subtitled, output_video_traslated],
+    title="TRADUCTOR MULTILENGUA DE NOTICIAS | AYTÉ - CARACOL",
+    description="Ingresa la noticia que deseas traducir:",
+    #theme = gr.themes.Soft()
+    theme=gr.themes.Default(primary_hue="blue")
+)
+#interface.launch()              # Lanza la interfaz
+#interface.launch(share=True, auth=("caracol", "caracol"), server_name=("127.0.0.1"), server_port=(7860), favicon_path=())
+interface.launch(share=True, auth=("caracol", "caracol"), server_name=("127.0.0.1"), server_port=(7860))

assets/images/favico.ico ADDED Viewed

assets/images/icono.png ADDED Viewed

assets/images/logo.png ADDED Viewed

assets/styles/css.css ADDED Viewed

	@@ -0,0 +1,3 @@

+h1 {
+	color: orange;
+}

audio_output.mp3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:46d0e34a258a63efae8713b1b054d0875819a225531f988aac50e3751c6a394a
+size 1763386

data/eng/D_100000.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:04b1d7a2726b3cb27c18604ace828556d9c17c09f65eb041f690a89c99d7aea4
+size 561110135

data/eng/G_100000.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d276cee0f8291de23c8ed4f4a2ed15e3e4cff7b2d6af43660cd6b5e6e1149110
+size 436618116

data/eng/config.json ADDED Viewed

	@@ -0,0 +1,87 @@

+{
+    "train": {
+        "log_interval": 200,
+        "eval_interval": 1000,
+        "seed": 1234,
+        "epochs": 20000,
+        "learning_rate": 0.0002,
+        "betas": [
+            0.8,
+            0.99
+        ],
+        "eps": 1e-09,
+        "batch_size": 64,
+        "fp16_run": true,
+        "lr_decay": 0.999875,
+        "segment_size": 8192,
+        "init_lr_ratio": 1,
+        "warmup_epochs": 0,
+        "c_mel": 45,
+        "c_kl": 1.0
+    },
+    "data": {
+        "training_files": "train.ltr",
+        "validation_files": "dev.ltr",
+        "text_cleaners": [
+            "transliteration_cleaners"
+        ],
+        "max_wav_value": 32768.0,
+        "sampling_rate": 16000,
+        "filter_length": 1024,
+        "hop_length": 256,
+        "win_length": 1024,
+        "n_mel_channels": 80,
+        "mel_fmin": 0.0,
+        "mel_fmax": null,
+        "add_blank": true,
+        "n_speakers": 0,
+        "cleaned_text": true
+    },
+    "model": {
+        "inter_channels": 192,
+        "hidden_channels": 192,
+        "filter_channels": 768,
+        "n_heads": 2,
+        "n_layers": 6,
+        "kernel_size": 3,
+        "p_dropout": 0.1,
+        "resblock": "1",
+        "resblock_kernel_sizes": [
+            3,
+            7,
+            11
+        ],
+        "resblock_dilation_sizes": [
+            [
+                1,
+                3,
+                5
+            ],
+            [
+                1,
+                3,
+                5
+            ],
+            [
+                1,
+                3,
+                5
+            ]
+        ],
+        "upsample_rates": [
+            8,
+            8,
+            2,
+            2
+        ],
+        "upsample_initial_channel": 512,
+        "upsample_kernel_sizes": [
+            16,
+            16,
+            4,
+            4
+        ],
+        "n_layers_q": 3,
+        "use_spectral_norm": false
+    }
+}

data/eng/vocab.txt ADDED Viewed

	@@ -0,0 +1,38 @@

+k
+'
+z
+y
+u
+d
+h
+e
+s
+w
+–
+3
+c
+p
+-
+1
+j
+m
+i
+f
+l
+o
+0
+b
+r
+a
+4
+2
+n
+_
+x
+v
+t
+q
+5
+6
+g

data/gum/D_100000.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0755e583b0b39fe2cc3cf7dfd5c4c9d184de3c83bf562281c7fa23a272bcf9d2
+size 561109839

data/gum/G_100000.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea406973d9699d994463477d4adfeada83625459e1fa606b7cc7e0593f4c31c2
+size 436625202

data/gum/config.json ADDED Viewed

	@@ -0,0 +1,87 @@

+{
+    "train": {
+        "log_interval": 200,
+        "eval_interval": 1000,
+        "seed": 1234,
+        "epochs": 20000,
+        "learning_rate": 0.0002,
+        "betas": [
+            0.8,
+            0.99
+        ],
+        "eps": 1e-09,
+        "batch_size": 64,
+        "fp16_run": true,
+        "lr_decay": 0.999875,
+        "segment_size": 8192,
+        "init_lr_ratio": 1,
+        "warmup_epochs": 0,
+        "c_mel": 45,
+        "c_kl": 1.0
+    },
+    "data": {
+        "training_files": "train.ltr",
+        "validation_files": "dev.ltr",
+        "text_cleaners": [
+            "transliteration_cleaners"
+        ],
+        "max_wav_value": 32768.0,
+        "sampling_rate": 16000,
+        "filter_length": 1024,
+        "hop_length": 256,
+        "win_length": 1024,
+        "n_mel_channels": 80,
+        "mel_fmin": 0.0,
+        "mel_fmax": null,
+        "add_blank": true,
+        "n_speakers": 0,
+        "cleaned_text": true
+    },
+    "model": {
+        "inter_channels": 192,
+        "hidden_channels": 192,
+        "filter_channels": 768,
+        "n_heads": 2,
+        "n_layers": 6,
+        "kernel_size": 3,
+        "p_dropout": 0.1,
+        "resblock": "1",
+        "resblock_kernel_sizes": [
+            3,
+            7,
+            11
+        ],
+        "resblock_dilation_sizes": [
+            [
+                1,
+                3,
+                5
+            ],
+            [
+                1,
+                3,
+                5
+            ],
+            [
+                1,
+                3,
+                5
+            ]
+        ],
+        "upsample_rates": [
+            8,
+            8,
+            2,
+            2
+        ],
+        "upsample_initial_channel": 512,
+        "upsample_kernel_sizes": [
+            16,
+            16,
+            4,
+            4
+        ],
+        "n_layers_q": 3,
+        "use_spectral_norm": false
+    }
+}

data/gum/vocab.txt ADDED Viewed

	@@ -0,0 +1,43 @@

+|
+a
+i
+r
+n
+e
+ø
+u
+g
+m
+b
+t
+s
+k
+h
+c
+l
+w
+p
+y
+d
+o
+ñ
+ú
+j
+—
+í
+z
+é
+á
+'
+f
+v
+-
+ó
+q
+0
+x
+1
+2
+4
+3

data/quz/D_100000.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:22fd86a89725af83c7faf37d3824db296563871f8d357e07578f6183a992ffb0
+size 561078748

data/quz/G_100000.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1723774e696a2f11c58ce5e89a2ee2b47aad65955b0abae3d8865af28adf9364
+size 436378676

data/quz/config.json ADDED Viewed

	@@ -0,0 +1,87 @@

+{
+    "train": {
+        "log_interval": 200,
+        "eval_interval": 1000,
+        "seed": 1234,
+        "epochs": 20000,
+        "learning_rate": 0.0002,
+        "betas": [
+            0.8,
+            0.99
+        ],
+        "eps": 1e-09,
+        "batch_size": 64,
+        "fp16_run": true,
+        "lr_decay": 0.999875,
+        "segment_size": 8192,
+        "init_lr_ratio": 1,
+        "warmup_epochs": 0,
+        "c_mel": 45,
+        "c_kl": 1.0
+    },
+    "data": {
+        "training_files": "train.ltr",
+        "validation_files": "dev.ltr",
+        "text_cleaners": [
+            "transliteration_cleaners"
+        ],
+        "max_wav_value": 32768.0,
+        "sampling_rate": 16000,
+        "filter_length": 1024,
+        "hop_length": 256,
+        "win_length": 1024,
+        "n_mel_channels": 80,
+        "mel_fmin": 0.0,
+        "mel_fmax": null,
+        "add_blank": true,
+        "n_speakers": 0,
+        "cleaned_text": true
+    },
+    "model": {
+        "inter_channels": 192,
+        "hidden_channels": 192,
+        "filter_channels": 768,
+        "n_heads": 2,
+        "n_layers": 6,
+        "kernel_size": 3,
+        "p_dropout": 0.1,
+        "resblock": "1",
+        "resblock_kernel_sizes": [
+            3,
+            7,
+            11
+        ],
+        "resblock_dilation_sizes": [
+            [
+                1,
+                3,
+                5
+            ],
+            [
+                1,
+                3,
+                5
+            ],
+            [
+                1,
+                3,
+                5
+            ]
+        ],
+        "upsample_rates": [
+            8,
+            8,
+            2,
+            2
+        ],
+        "upsample_initial_channel": 512,
+        "upsample_kernel_sizes": [
+            16,
+            16,
+            4,
+            4
+        ],
+        "n_layers_q": 3,
+        "use_spectral_norm": false
+    }
+}

data/quz/vocab.txt ADDED Viewed

	@@ -0,0 +1,37 @@

+a
+|
+n
+i
+u
+q
+k
+s
+h
+p
+y
+c
+t
+m
+r
+l
+o
+w
+e
+ñ
+'
+d
+j
+g
+b
+-
+–
+v
+f
+í
+z
+é
+á
+ó
+ú
+x

data/spa/D_100000.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:07ab22829d36992fc47d7fde4d9e1313f2a8108d2442d489a0953b1910628d7a
+size 561110151

data/spa/G_100000.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8feb91089b706e231efb18d0038f5827f1a9d1e45c57c61fba7ebe2198a7c1e6
+size 436635085

data/spa/config.json ADDED Viewed

	@@ -0,0 +1,87 @@

+{
+    "train": {
+        "log_interval": 200,
+        "eval_interval": 1000,
+        "seed": 1234,
+        "epochs": 20000,
+        "learning_rate": 0.0002,
+        "betas": [
+            0.8,
+            0.99
+        ],
+        "eps": 1e-09,
+        "batch_size": 64,
+        "fp16_run": true,
+        "lr_decay": 0.999875,
+        "segment_size": 8192,
+        "init_lr_ratio": 1,
+        "warmup_epochs": 0,
+        "c_mel": 45,
+        "c_kl": 1.0
+    },
+    "data": {
+        "training_files": "train.ltr",
+        "validation_files": "dev.ltr",
+        "text_cleaners": [
+            "transliteration_cleaners"
+        ],
+        "max_wav_value": 32768.0,
+        "sampling_rate": 16000,
+        "filter_length": 1024,
+        "hop_length": 256,
+        "win_length": 1024,
+        "n_mel_channels": 80,
+        "mel_fmin": 0.0,
+        "mel_fmax": null,
+        "add_blank": true,
+        "n_speakers": 0,
+        "cleaned_text": true
+    },
+    "model": {
+        "inter_channels": 192,
+        "hidden_channels": 192,
+        "filter_channels": 768,
+        "n_heads": 2,
+        "n_layers": 6,
+        "kernel_size": 3,
+        "p_dropout": 0.1,
+        "resblock": "1",
+        "resblock_kernel_sizes": [
+            3,
+            7,
+            11
+        ],
+        "resblock_dilation_sizes": [
+            [
+                1,
+                3,
+                5
+            ],
+            [
+                1,
+                3,
+                5
+            ],
+            [
+                1,
+                3,
+                5
+            ]
+        ],
+        "upsample_rates": [
+            8,
+            8,
+            2,
+            2
+        ],
+        "upsample_initial_channel": 512,
+        "upsample_kernel_sizes": [
+            16,
+            16,
+            4,
+            4
+        ],
+        "n_layers_q": 3,
+        "use_spectral_norm": false
+    }
+}

data/spa/vocab.txt ADDED Viewed

	@@ -0,0 +1,45 @@

+7
+a
+v
+c
+—
+0
+5
+ó
+8
+p
+y
+z
+4
+m
+ü
+k
+s
+á
+q
+h
+n
+é
+_
+9
+1
+f
+t
+x
+d
+í
+b
+3
+j
+g
+l
+2
+i
+u
+e
+ú
+o
+ñ
+r
+6

logs.txt ADDED Viewed

File without changes

output.wav ADDED Viewed

Binary file (359 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio
+SpeechRecognition
+ttsmms
+deep_translator

video.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5508d3536f55aa02a9cde9a8326799e72f0b148003d50936e735dd23c40cd3ba
+size 2476504

video_subtitles.srt ADDED Viewed

	@@ -0,0 +1,117 @@

+1
+00:00:00,001 --> 00:00:01,000 -->
+<b>Can you</b>
+2
+00:00:01,001 --> 00:00:02,000 -->
+<b>imagine leaving</b>
+3
+00:00:02,001 --> 00:00:03,000 -->
+<b>home and</b>
+4
+00:00:03,001 --> 00:00:04,000 -->
+<b>finding pieces</b>
+5
+00:00:04,001 --> 00:00:05,000 -->
+<b>of a</b>
+6
+00:00:05,001 --> 00:00:06,000 -->
+<b>plane? Well,</b>
+7
+00:00:06,001 --> 00:00:07,000 -->
+<b>that happened</b>
+8
+00:00:07,001 --> 00:00:08,000 -->
+<b>in a</b>
+9
+00:00:08,001 --> 00:00:09,000 -->
+<b>Chicago neighborhood,</b>
+10
+00:00:09,001 --> 00:00:10,000 -->
+<b>where an</b>
+11
+00:00:10,001 --> 00:00:11,000 -->
+<b>emergency evacuation</b>
+12
+00:00:11,001 --> 00:00:12,000 -->
+<b>slide was</b>
+13
+00:00:12,001 --> 00:00:13,000 -->
+<b>found that</b>
+14
+00:00:13,001 --> 00:00:14,000 -->
+<b>had detached</b>
+15
+00:00:14,001 --> 00:00:15,000 -->
+<b>from an</b>
+16
+00:00:15,001 --> 00:00:16,000 -->
+<b>aircraft that</b>
+17
+00:00:16,001 --> 00:00:17,000 -->
+<b>hit you</b>
+18
+00:00:17,001 --> 00:00:18,000 -->
+<b>at the</b>
+19
+00:00:18,001 --> 00:00:19,000 -->
+<b>International Airport.</b>
+20
+00:00:19,001 --> 00:00:20,000 -->
+<b>Authorities confirmed</b>
+21
+00:00:20,001 --> 00:00:21,000 -->
+<b>that there</b>
+22
+00:00:21,001 --> 00:00:22,000 -->
+<b>were no</b>
+23
+00:00:22,001 --> 00:00:23,000 -->
+<b>injuries. The</b>
+24
+00:00:23,001 --> 00:00:24,000 -->
+<b>large piece</b>
+25
+00:00:24,001 --> 00:00:25,000 -->
+<b>of plastic</b>
+26
+00:00:25,001 --> 00:00:26,000 -->
+<b>was removed</b>
+27
+00:00:26,001 --> 00:00:27,000 -->
+<b>and later</b>
+28
+00:00:27,001 --> 00:00:28,000 -->
+<b>it was</b>
+29
+00:00:28,001 --> 00:00:29,000 -->
+<b>determined that</b>
+30
+00:00:29,001 --> 00:00:30,000 -->
+<b>it belonged</b>
+31
+00:00:30,001 --> 00:00:31,000 -->
+<b>to a</b>
+32
+00:00:31,001 --> 00:00:32,000 -->
+<b>United Airlines</b>
+33
+00:00:32,001 --> 00:00:33,000 -->
+<b>plane from</b>
+34
+00:00:33,001 --> 00:00:34,000 -->
+<b>Switzerland that</b>
+35
+00:00:34,001 --> 00:00:35,000 -->
+<b>landed safely</b>
+36
+00:00:35,001 --> 00:00:36,000 -->
+<b>with 155</b>
+37
+00:00:36,001 --> 00:00:37,000 -->
+<b>passengers and</b>
+38
+00:00:37,001 --> 00:00:38,000 -->
+<b>10 crew</b>
+39
+00:00:38,001 --> 00:00:39,000 -->
+<b>members.</b>