diff --git "a/Voice2VoiceTranslation.ipynb" "b/Voice2VoiceTranslation.ipynb" --- "a/Voice2VoiceTranslation.ipynb" +++ "b/Voice2VoiceTranslation.ipynb" @@ -151,7 +151,7 @@ ], "source": [ "# Transcribe audio\n", - "from transcribe import transcribe_audio_locally\n", + "from my_transcribe import transcribe_audio_locally\n", "audio_file = \"Input Audio Sample.wav\" # Supports many audio formats\n", "result = transcribe_audio_locally(audio_file, \"base\") # Using base model" ] @@ -218,38 +218,20 @@ }, { "cell_type": "code", - "execution_count": 7, - "metadata": { - "id": "NNkRgTgkjwoG" - }, - "outputs": [], - "source": [ - "import argostranslate.package\n", - "import argostranslate.translate\n", - "\n", - "# Download language packs (e.g., English to Hindi)\n", - "argostranslate.package.update_package_index()\n", - "available_packages = argostranslate.package.get_available_packages()\n", - "package = next(filter(lambda x: x.from_code == \"en\" and x.to_code == \"hi\", available_packages))\n", - "argostranslate.package.install_from_path(package.download())\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "id": "fR_Q7Bb8w2ho" }, "outputs": [], "source": [ + "# Read the transcribed text from the file\n", "with open(\"transcribed_text.txt\", \"r\", encoding=\"utf-8\") as f:\n", " text = f.read()" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -268,9 +250,10 @@ } ], "source": [ - "# Translate offline\n", - "hindi_translation = argostranslate.translate.translate(text, \"en\", \"hi\")\n", + "# Translate the text to Hindi\n", + "from my_translate import translate_text\n", "\n", + "hindi_translation = translate_text(text, \"en\", \"hi\")\n", "print(\"English:\", text)\n", "print(\"Hindi:\", hindi_translation)" ] @@ -313,47 +296,7 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "E2VsTRWLMAqX" - }, - "outputs": [], - "source": [ - "from transformers import BarkModel\n", - "\n", - "model = BarkModel.from_pretrained(\"suno/bark-small\")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "id": "b8itQckZJqm6" - }, - "outputs": [], - "source": [ - "import torch\n", - "\n", - "device = \"cuda:0\" if torch.cuda.is_available() else \"cpu\"\n", - "model = model.to(device)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "UG5dU7LpL6rv" - }, - "outputs": [], - "source": [ - "from transformers import AutoProcessor\n", - "\n", - "processor = AutoProcessor.from_pretrained(\"suno/bark\")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -372,6 +315,7 @@ } ], "source": [ + "# Read the Hindi translation from the file\n", "with open(\"hindi_translation.txt\", \"r\", encoding=\"utf-8\") as f:\n", " hindi_translation = f.read()\n", " print(hindi_translation)" @@ -379,23 +323,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": { "id": "9WnxxN8oOGk0" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\rohit\\OneDrive\\Desktop\\Files\\Vaanee\\Audio-Video Data\\Individual Audio\\Prepared\\V2V\\.venv\\Lib\\site-packages\\huggingface_hub\\file_download.py:143: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\rohit\\.cache\\huggingface\\hub\\models--suno--bark-small. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n", + "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n", + " warnings.warn(message)\n", + "c:\\Users\\rohit\\OneDrive\\Desktop\\Files\\Vaanee\\Audio-Video Data\\Individual Audio\\Prepared\\V2V\\.venv\\Lib\\site-packages\\huggingface_hub\\file_download.py:143: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\rohit\\.cache\\huggingface\\hub\\models--suno--bark. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n", + "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n", + " warnings.warn(message)\n", + "c:\\Users\\rohit\\OneDrive\\Desktop\\Files\\Vaanee\\Audio-Video Data\\Individual Audio\\Prepared\\V2V\\.venv\\Lib\\site-packages\\huggingface_hub\\file_download.py:143: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\rohit\\.cache\\huggingface\\hub\\models--ylacombe--bark-large. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n", + "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n", + " warnings.warn(message)\n", + "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n", + "Setting `pad_token_id` to `eos_token_id`:10000 for open-end generation.\n", + "The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n" + ] + } + ], "source": [ - "voice_preset = \"v2/hi_speaker_2\"\n", - "# prepare the inputs\n", - "text_prompt = hindi_translation\n", - "inputs = processor(text_prompt, voice_preset=voice_preset)\n", - "for key, value in inputs.items():\n", - " inputs[key] = inputs[key].to(device)" + "# Convert the Hindi text to speech\n", + "from my_tts import text_to_speech\n", + "speech_output, sampling_rate = text_to_speech(hindi_translation, \"v2/hi_speaker_2\")" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -405,20 +365,12 @@ "outputId": "e0476c8e-9513-433d-82b7-2372d936a168" }, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n", - "Setting `pad_token_id` to `eos_token_id`:10000 for open-end generation.\n" - ] - }, { "data": { "text/html": [ "\n", " \n", " " @@ -427,18 +379,14 @@ "" ] }, - "execution_count": 11, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "from IPython.display import Audio\n", - "sampling_rate = model.generation_config.sample_rate\n", - "# generate speech\n", - "speech_output = model.generate(**inputs)\n", - "\n", "# let's hear it\n", + "from IPython.display import Audio\n", "Audio(speech_output[0].cpu().numpy(), rate=sampling_rate)" ] }, @@ -450,124 +398,15 @@ }, "outputs": [], "source": [ - "import scipy\n", - "\n", - "sample_rate = model.generation_config.sample_rate\n", - "scipy.io.wavfile.write(\"output_audio.wav\", rate=sample_rate, data=audio_array)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "GZDcQfEFxznp" - }, - "source": [ - "# Git" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Be0dEZo0fnrX" - }, - "outputs": [], - "source": [ - "!git config --global user.email \"rohitptnk03@gmail.com\"\n", - "!git config --global user.name \"Rohit Patnaik\"" + "# Save the generated audio to a fileimport scipy\n", + "import scipy \n", + "scipy.io.wavfile.write(\"output_audio.wav\", rate=sampling_rate, data=speech_output[0].cpu().numpy())" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "id": "y_LWxXcR6jik" - }, - "outputs": [], - "source": [ - "!git add ." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "GJQBBcsMfZlK", - "outputId": "395d67da-bb71-46b4-d68a-b5e656ec53f3" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "On branch main\n", - "Your branch is up to date with 'origin/main'.\n", - "\n", - "nothing to commit, working tree clean\n" - ] - } - ], - "source": [ - "!git status" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "-CIUFNSsfatq", - "outputId": "cdd48a15-cd1c-4648-ad64-550853e60fa9" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[main 7ec7d5d] save output_audio from suno_bark\n", - " 1 file changed, 0 insertions(+), 0 deletions(-)\n", - " rewrite output_audio.wav (82%)\n" - ] - } - ], - "source": [ - "!git commit -m\"save output_audio from suno_bark\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "cvivcfrxflSu", - "outputId": "e1aba536-b30c-429e-a7c4-b5cc8152fd8f" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "fatal: could not read Username for 'https://github.com': No such device or address\n" - ] - } - ], - "source": [ - "!git push origin main" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "gwN-bzaSgnJX" - }, + "metadata": {}, "outputs": [], "source": [] } @@ -580,11 +419,21 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3", + "display_name": ".venv", + "language": "python", "name": "python3" }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" } }, "nbformat": 4,