{ "cells": [ { "cell_type": "code", "execution_count": 55, "id": "23e98a8a-7128-4f35-ba1c-ff514ed462e0", "metadata": {}, "outputs": [], "source": [ "#Install Dependencies\n", "#!pip3 install torch torchvision torchaudio\n", "#!pip install transformers ipywidgets gradio --upgrade\n", "#!pip install --upgrade gradio\n", "#!pip install nltk\n", "#!pip install jiwer\n", "#!pip install sentencepiece\n", "#!pip install sacremoses\n", "#!pip install soundfile" ] }, { "cell_type": "code", "execution_count": 56, "id": "29275fa9-1b88-4e37-a278-7118bfca860a", "metadata": {}, "outputs": [], "source": [ "\n", "##translation_pipeline = pipeline('translation_en_to_fr')\n", "##Evaluation Metric = BLEU score\n", "##Exp1\n", "#model_name = \"Davlan/byt5-base-eng-yor-mt\"\n", "##Exp2\n", "#model_name = \"Davlan/m2m100_418M-eng-yor-mt\" \n", "##Exp3\n", "#model_name = \"Davlan/mbart50-large-eng-yor-mt\"\n", "##Exp4\n", "#model_name = \"Davlan/mt5_base_eng_yor_mt\"\n", "##Exp5\n", "#model_name = \"omoekan/opus-tatoeba-eng-yor\"\n", "##Exp6\n", "#model_name = \"masakhane/afrimt5_en_yor_news\"\n", "##Exp7\n", "#model_name = \"masakhane/afrimbart_en_yor_news\"\n", "##Exp8\n", "#model_name = \"masakhane/afribyt5_en_yor_news\"\n", "##Exp9\n", "#model_name = \"masakhane/byt5_en_yor_news\"\n", "##Exp10\n", "#model_name = \"masakhane/mt5_en_yor_news\"\n", "#translation_pipeline = pipeline('translation_en_to_yo', model = model_name, max_length=50)" ] }, { "cell_type": "code", "execution_count": 57, "id": "1ea4a2eb-6cbf-497a-a080-2db3dd64be36", "metadata": {}, "outputs": [], "source": [ "#results = translation_pipeline('My Name is Ayo, I love books')\n", "#results[0]['translation_text']" ] }, { "cell_type": "code", "execution_count": 58, "id": "f92487b5-158a-47ef-ab12-a361ea8d0a48", "metadata": {}, "outputs": [], "source": [ "#results = translation_pipeline('The wages of sin is death')\n", "#results[0]['translation_text']" ] }, { "cell_type": "code", "execution_count": 59, "id": "69d64db9-b083-46ae-80ce-9616ba99183d", "metadata": {}, "outputs": [], "source": [ "from transformers import pipeline\n", "import nltk\n", "import jiwer\n", "from nltk.translate.bleu_score import corpus_bleu\n", "from transformers import VitsModel, AutoTokenizer\n", "import torch\n", "import soundfile as sf\n", "\n", "\n", "WerScore = 0\n", "bleuScore = 0\n", "def translate_transformers(modelName, sourceLangText):\n", " #results = translation_pipeline(input_text)\n", " translation_pipeline = pipeline('translation_en_to_yo', model = modelName, max_length=500)\n", " translated_text = translation_pipeline(sourceLangText) #translator(text)[0][\"translation_text\"]\n", " translated_text_target = translated_text[0]['translation_text']\n", " hypothesis_translations = \"My name is Joy, I love reading\"\n", " \n", " #TTS for the translated_text_target\n", " #TTS Exp1\n", " ttsModel = VitsModel.from_pretrained(\"facebook/mms-tts-yor\")\n", " tokenizer = AutoTokenizer.from_pretrained(\"facebook/mms-tts-yor\")\n", " ttsInputs = tokenizer(translated_text_target, return_tensors=\"pt\")\n", " \n", " with torch.no_grad():\n", " ttsOutput = ttsModel(**ttsInputs).waveform\n", " #onvert the tensor to a numpy array\n", " ttsWaveform = ttsOutput.numpy()[0] \n", " #Save the waveform to an audio file\n", " #sf.write('output.wav', waveform, 22050)\n", " sf.write('ttsOutput.wav', ttsWaveform, 16000)\n", " \n", " #Calculate WerScore\n", " WerScore = jiwer.wer(translated_text_target, hypothesis_translations)\n", " #bleuScore = corpus_bleu(translated_text_target,hypothesis_translations)\n", " \n", " return translated_text_target,WerScore,'ttsOutput.wav'" ] }, { "cell_type": "code", "execution_count": 60, "id": "5d9ed5a2-0d28-4078-923d-c8c27196292a", "metadata": {}, "outputs": [], "source": [ "#text1 = \"Oruko mi ni Ayo, mo feran iwe kika gan\"\n", "#text2 = \"Agbaninímọ̀ràn kan lórí ọ̀ràn radiation and Clinical Oncologist, tórúkọ rẹ̀ ń jẹ́ Temitope Olatunji-Agunbiade ti kìlọ̀ fáwọn obìnrin pé kí wọ́n má ṣe lo oògùn máàjóyúndúró tàbí kí wọ́n lo oògùn máàjóyúndúró, ó sọ pé ìwádìí ti fi hàn pé lílò tí wọ́n ń lò ó ń mú kí ewu àrùn jẹjẹrẹ ọmú pọ̀ sí i.\"\n", "\n", "#with torch.no_grad():\n", " #output = ttsModel(**inputs).waveform" ] }, { "cell_type": "code", "execution_count": 61, "id": "54138308-b423-4e7c-9469-2002bfeb7918", "metadata": {}, "outputs": [], "source": [ "#from IPython.display import Audio\n", "#Audio(output, rate=ttsModel.config.sampling_rate)" ] }, { "cell_type": "code", "execution_count": 62, "id": "bbf259d6-922d-4f5c-9af1-cbd57158a814", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7879\n", "Running on public URL: https://ccee705195aed67b23.gradio.live\n", "\n", "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "