{ "cells": [ { "cell_type": "code", "execution_count": 55, "id": "23e98a8a-7128-4f35-ba1c-ff514ed462e0", "metadata": {}, "outputs": [], "source": [ "#Install Dependencies\n", "#!pip3 install torch torchvision torchaudio\n", "#!pip install transformers ipywidgets gradio --upgrade\n", "#!pip install --upgrade gradio\n", "#!pip install nltk\n", "#!pip install jiwer\n", "#!pip install sentencepiece\n", "#!pip install sacremoses\n", "#!pip install soundfile" ] }, { "cell_type": "code", "execution_count": 56, "id": "29275fa9-1b88-4e37-a278-7118bfca860a", "metadata": {}, "outputs": [], "source": [ "\n", "##translation_pipeline = pipeline('translation_en_to_fr')\n", "##Evaluation Metric = BLEU score\n", "##Exp1\n", "#model_name = \"Davlan/byt5-base-eng-yor-mt\"\n", "##Exp2\n", "#model_name = \"Davlan/m2m100_418M-eng-yor-mt\" \n", "##Exp3\n", "#model_name = \"Davlan/mbart50-large-eng-yor-mt\"\n", "##Exp4\n", "#model_name = \"Davlan/mt5_base_eng_yor_mt\"\n", "##Exp5\n", "#model_name = \"omoekan/opus-tatoeba-eng-yor\"\n", "##Exp6\n", "#model_name = \"masakhane/afrimt5_en_yor_news\"\n", "##Exp7\n", "#model_name = \"masakhane/afrimbart_en_yor_news\"\n", "##Exp8\n", "#model_name = \"masakhane/afribyt5_en_yor_news\"\n", "##Exp9\n", "#model_name = \"masakhane/byt5_en_yor_news\"\n", "##Exp10\n", "#model_name = \"masakhane/mt5_en_yor_news\"\n", "#translation_pipeline = pipeline('translation_en_to_yo', model = model_name, max_length=50)" ] }, { "cell_type": "code", "execution_count": 57, "id": "1ea4a2eb-6cbf-497a-a080-2db3dd64be36", "metadata": {}, "outputs": [], "source": [ "#results = translation_pipeline('My Name is Ayo, I love books')\n", "#results[0]['translation_text']" ] }, { "cell_type": "code", "execution_count": 58, "id": "f92487b5-158a-47ef-ab12-a361ea8d0a48", "metadata": {}, "outputs": [], "source": [ "#results = translation_pipeline('The wages of sin is death')\n", "#results[0]['translation_text']" ] }, { "cell_type": "code", "execution_count": 59, "id": "69d64db9-b083-46ae-80ce-9616ba99183d", "metadata": {}, "outputs": [], "source": [ "from transformers import pipeline\n", "import nltk\n", "import jiwer\n", "from nltk.translate.bleu_score import corpus_bleu\n", "from transformers import VitsModel, AutoTokenizer\n", "import torch\n", "import soundfile as sf\n", "\n", "\n", "WerScore = 0\n", "bleuScore = 0\n", "def translate_transformers(modelName, sourceLangText):\n", " #results = translation_pipeline(input_text)\n", " translation_pipeline = pipeline('translation_en_to_yo', model = modelName, max_length=500)\n", " translated_text = translation_pipeline(sourceLangText) #translator(text)[0][\"translation_text\"]\n", " translated_text_target = translated_text[0]['translation_text']\n", " hypothesis_translations = \"My name is Joy, I love reading\"\n", " \n", " #TTS for the translated_text_target\n", " #TTS Exp1\n", " ttsModel = VitsModel.from_pretrained(\"facebook/mms-tts-yor\")\n", " tokenizer = AutoTokenizer.from_pretrained(\"facebook/mms-tts-yor\")\n", " ttsInputs = tokenizer(translated_text_target, return_tensors=\"pt\")\n", " \n", " with torch.no_grad():\n", " ttsOutput = ttsModel(**ttsInputs).waveform\n", " #onvert the tensor to a numpy array\n", " ttsWaveform = ttsOutput.numpy()[0] \n", " #Save the waveform to an audio file\n", " #sf.write('output.wav', waveform, 22050)\n", " sf.write('ttsOutput.wav', ttsWaveform, 16000)\n", " \n", " #Calculate WerScore\n", " WerScore = jiwer.wer(translated_text_target, hypothesis_translations)\n", " #bleuScore = corpus_bleu(translated_text_target,hypothesis_translations)\n", " \n", " return translated_text_target,WerScore,'ttsOutput.wav'" ] }, { "cell_type": "code", "execution_count": 60, "id": "5d9ed5a2-0d28-4078-923d-c8c27196292a", "metadata": {}, "outputs": [], "source": [ "#text1 = \"Oruko mi ni Ayo, mo feran iwe kika gan\"\n", "#text2 = \"Agbaninímọ̀ràn kan lórí ọ̀ràn radiation and Clinical Oncologist, tórúkọ rẹ̀ ń jẹ́ Temitope Olatunji-Agunbiade ti kìlọ̀ fáwọn obìnrin pé kí wọ́n má ṣe lo oògùn máàjóyúndúró tàbí kí wọ́n lo oògùn máàjóyúndúró, ó sọ pé ìwádìí ti fi hàn pé lílò tí wọ́n ń lò ó ń mú kí ewu àrùn jẹjẹrẹ ọmú pọ̀ sí i.\"\n", "\n", "#with torch.no_grad():\n", " #output = ttsModel(**inputs).waveform" ] }, { "cell_type": "code", "execution_count": 61, "id": "54138308-b423-4e7c-9469-2002bfeb7918", "metadata": {}, "outputs": [], "source": [ "#from IPython.display import Audio\n", "#Audio(output, rate=ttsModel.config.sampling_rate)" ] }, { "cell_type": "code", "execution_count": 62, "id": "bbf259d6-922d-4f5c-9af1-cbd57158a814", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7879\n", "Running on public URL: https://ccee705195aed67b23.gradio.live\n", "\n", "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Gradio Function and Interface\n", "import gradio as gr\n", "from IPython.display import Audio\n", "interface = gr.Interface(\n", " fn=translate_transformers,\n", " inputs=[\n", " gr.Dropdown([\"Davlan/byt5-base-eng-yor-mt\", #Exp1\n", " \"Davlan/m2m100_418M-eng-yor-mt\", #Exp2\n", " \"Davlan/mbart50-large-eng-yor-mt\", #Exp3\n", " \"Davlan/mt5_base_eng_yor_mt\", #Exp4\n", " \"omoekan/opus-tatoeba-eng-yor\", #Exp5\n", " \"masakhane/afrimt5_en_yor_news\", #Exp6\n", " \"masakhane/afrimbart_en_yor_news\", #Exp7\n", " \"masakhane/afribyt5_en_yor_news\", #Exp8\n", " \"masakhane/byt5_en_yor_news\", #Exp9\n", " \"masakhane/mt5_en_yor_news\", #Exp10\n", " \"masakhane/mbart50_en_yor_news\", #Exp11\n", " \"masakhane/m2m100_418M_en_yor_news\", #Exp12\n", " \"masakhane/m2m100_418M_en_yor_rel_news\", #Exp13\n", " \"masakhane/m2m100_418M_en_yor_rel_news_ft\", #Exp14\n", " \"masakhane/m2m100_418M_en_yor_rel\", #Exp15\n", " #\"facebook/nllb-200-distilled-600M\", #Exp16\n", " #\"facebook/nllb-200-3.3B\", #Exp17\n", " #\"facebook/nllb-200-1.3B\", #Exp18\n", " #\"facebook/nllb-200-distilled-1.3B\", #Exp19\n", " #\"keithhon/nllb-200-3.3B\" #Exp20\n", " #\"CohereForAI/aya-101\" #Exp16\n", " ], \n", " label=\"Select Finetuned Eng2Yor Translation Model\"),\n", " gr.Textbox(lines=2, placeholder=\"Enter English Text Here...\", label=\"English Text\") \n", " ],\n", " #outputs = \"text\",\n", " #outputs=outputs=[\"text\", \"text\"],#\"text\"\n", " #outputs= gr.Textbox(value=\"text\", label=\"Translated Text\"),\n", " outputs=[\n", " gr.Textbox(value=\"text\", label=\"Translated Yoruba Text\"),\n", " #gr.Textbox(value=\"text\", label=translated_text_actual),\n", " gr.Textbox(value=\"number\", label=\"WER(Word Error Rate) Score - The Lower the Better\"),\n", " #gr.Textbox(value=\"number\", label=\"Bleu Score\")\n", " gr.Audio(type=\"filepath\", label=\"Click to Generate Yoruba Text2Speech\")\n", " ],\n", " title=\"ASPMIR NEURAL MACHINE TRANSLATION(NMT) TESTBED FOR LOW RESOURCED AFRICAN LANGUAGES\",\n", " description=\"{This Tool Allows Developers and Researchers to Carry Out Experiments on Low Resourced African Languages with State-of-the-Art NMT Finetuned Models.}\"\n", ")\n", "\n", "interface.launch(share=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "c3baee0f-fd85-4209-9d54-14451abd372a", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.10" } }, "nbformat": 4, "nbformat_minor": 5 }