diff --git "a/Voice2VoiceTranslation.ipynb" "b/Voice2VoiceTranslation.ipynb"
--- "a/Voice2VoiceTranslation.ipynb"
+++ "b/Voice2VoiceTranslation.ipynb"
@@ -151,7 +151,7 @@
],
"source": [
"# Transcribe audio\n",
- "from transcribe import transcribe_audio_locally\n",
+ "from my_transcribe import transcribe_audio_locally\n",
"audio_file = \"Input Audio Sample.wav\" # Supports many audio formats\n",
"result = transcribe_audio_locally(audio_file, \"base\") # Using base model"
]
@@ -218,38 +218,20 @@
},
{
"cell_type": "code",
- "execution_count": 7,
- "metadata": {
- "id": "NNkRgTgkjwoG"
- },
- "outputs": [],
- "source": [
- "import argostranslate.package\n",
- "import argostranslate.translate\n",
- "\n",
- "# Download language packs (e.g., English to Hindi)\n",
- "argostranslate.package.update_package_index()\n",
- "available_packages = argostranslate.package.get_available_packages()\n",
- "package = next(filter(lambda x: x.from_code == \"en\" and x.to_code == \"hi\", available_packages))\n",
- "argostranslate.package.install_from_path(package.download())\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"metadata": {
"id": "fR_Q7Bb8w2ho"
},
"outputs": [],
"source": [
+ "# Read the transcribed text from the file\n",
"with open(\"transcribed_text.txt\", \"r\", encoding=\"utf-8\") as f:\n",
" text = f.read()"
]
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
@@ -268,9 +250,10 @@
}
],
"source": [
- "# Translate offline\n",
- "hindi_translation = argostranslate.translate.translate(text, \"en\", \"hi\")\n",
+ "# Translate the text to Hindi\n",
+ "from my_translate import translate_text\n",
"\n",
+ "hindi_translation = translate_text(text, \"en\", \"hi\")\n",
"print(\"English:\", text)\n",
"print(\"Hindi:\", hindi_translation)"
]
@@ -313,47 +296,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "E2VsTRWLMAqX"
- },
- "outputs": [],
- "source": [
- "from transformers import BarkModel\n",
- "\n",
- "model = BarkModel.from_pretrained(\"suno/bark-small\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {
- "id": "b8itQckZJqm6"
- },
- "outputs": [],
- "source": [
- "import torch\n",
- "\n",
- "device = \"cuda:0\" if torch.cuda.is_available() else \"cpu\"\n",
- "model = model.to(device)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "UG5dU7LpL6rv"
- },
- "outputs": [],
- "source": [
- "from transformers import AutoProcessor\n",
- "\n",
- "processor = AutoProcessor.from_pretrained(\"suno/bark\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
+ "execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
@@ -372,6 +315,7 @@
}
],
"source": [
+ "# Read the Hindi translation from the file\n",
"with open(\"hindi_translation.txt\", \"r\", encoding=\"utf-8\") as f:\n",
" hindi_translation = f.read()\n",
" print(hindi_translation)"
@@ -379,23 +323,39 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 5,
"metadata": {
"id": "9WnxxN8oOGk0"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "c:\\Users\\rohit\\OneDrive\\Desktop\\Files\\Vaanee\\Audio-Video Data\\Individual Audio\\Prepared\\V2V\\.venv\\Lib\\site-packages\\huggingface_hub\\file_download.py:143: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\rohit\\.cache\\huggingface\\hub\\models--suno--bark-small. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
+ "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
+ " warnings.warn(message)\n",
+ "c:\\Users\\rohit\\OneDrive\\Desktop\\Files\\Vaanee\\Audio-Video Data\\Individual Audio\\Prepared\\V2V\\.venv\\Lib\\site-packages\\huggingface_hub\\file_download.py:143: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\rohit\\.cache\\huggingface\\hub\\models--suno--bark. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
+ "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
+ " warnings.warn(message)\n",
+ "c:\\Users\\rohit\\OneDrive\\Desktop\\Files\\Vaanee\\Audio-Video Data\\Individual Audio\\Prepared\\V2V\\.venv\\Lib\\site-packages\\huggingface_hub\\file_download.py:143: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\rohit\\.cache\\huggingface\\hub\\models--ylacombe--bark-large. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
+ "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
+ " warnings.warn(message)\n",
+ "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
+ "Setting `pad_token_id` to `eos_token_id`:10000 for open-end generation.\n",
+ "The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n"
+ ]
+ }
+ ],
"source": [
- "voice_preset = \"v2/hi_speaker_2\"\n",
- "# prepare the inputs\n",
- "text_prompt = hindi_translation\n",
- "inputs = processor(text_prompt, voice_preset=voice_preset)\n",
- "for key, value in inputs.items():\n",
- " inputs[key] = inputs[key].to(device)"
+ "# Convert the Hindi text to speech\n",
+ "from my_tts import text_to_speech\n",
+ "speech_output, sampling_rate = text_to_speech(hindi_translation, \"v2/hi_speaker_2\")"
]
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 6,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
@@ -405,20 +365,12 @@
"outputId": "e0476c8e-9513-433d-82b7-2372d936a168"
},
"outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
- "Setting `pad_token_id` to `eos_token_id`:10000 for open-end generation.\n"
- ]
- },
{
"data": {
"text/html": [
"\n",
" \n",
" "
@@ -427,18 +379,14 @@
""
]
},
- "execution_count": 11,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "from IPython.display import Audio\n",
- "sampling_rate = model.generation_config.sample_rate\n",
- "# generate speech\n",
- "speech_output = model.generate(**inputs)\n",
- "\n",
"# let's hear it\n",
+ "from IPython.display import Audio\n",
"Audio(speech_output[0].cpu().numpy(), rate=sampling_rate)"
]
},
@@ -450,124 +398,15 @@
},
"outputs": [],
"source": [
- "import scipy\n",
- "\n",
- "sample_rate = model.generation_config.sample_rate\n",
- "scipy.io.wavfile.write(\"output_audio.wav\", rate=sample_rate, data=audio_array)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "GZDcQfEFxznp"
- },
- "source": [
- "# Git"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "Be0dEZo0fnrX"
- },
- "outputs": [],
- "source": [
- "!git config --global user.email \"rohitptnk03@gmail.com\"\n",
- "!git config --global user.name \"Rohit Patnaik\""
+ "# Save the generated audio to a fileimport scipy\n",
+ "import scipy \n",
+ "scipy.io.wavfile.write(\"output_audio.wav\", rate=sampling_rate, data=speech_output[0].cpu().numpy())"
]
},
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "id": "y_LWxXcR6jik"
- },
- "outputs": [],
- "source": [
- "!git add ."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "GJQBBcsMfZlK",
- "outputId": "395d67da-bb71-46b4-d68a-b5e656ec53f3"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "On branch main\n",
- "Your branch is up to date with 'origin/main'.\n",
- "\n",
- "nothing to commit, working tree clean\n"
- ]
- }
- ],
- "source": [
- "!git status"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "-CIUFNSsfatq",
- "outputId": "cdd48a15-cd1c-4648-ad64-550853e60fa9"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[main 7ec7d5d] save output_audio from suno_bark\n",
- " 1 file changed, 0 insertions(+), 0 deletions(-)\n",
- " rewrite output_audio.wav (82%)\n"
- ]
- }
- ],
- "source": [
- "!git commit -m\"save output_audio from suno_bark\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "cvivcfrxflSu",
- "outputId": "e1aba536-b30c-429e-a7c4-b5cc8152fd8f"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "fatal: could not read Username for 'https://github.com': No such device or address\n"
- ]
- }
- ],
- "source": [
- "!git push origin main"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "gwN-bzaSgnJX"
- },
+ "metadata": {},
"outputs": [],
"source": []
}
@@ -580,11 +419,21 @@
"provenance": []
},
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": ".venv",
+ "language": "python",
"name": "python3"
},
"language_info": {
- "name": "python"
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.9"
}
},
"nbformat": 4,