Spaces:

amanmibra
/

void-demo-aisf

Runtime error

App Files Files Community

amanmibra commited on May 13, 2023

Commit

3806d0c

•

1 Parent(s): 0908871

Add GPU device support to dataset

Browse files

Files changed (3) hide show

__pycache__/dataset.cpython-39.pyc +0 -0
dataset.py +11 -2
notebooks/playground.ipynb +41 -38

__pycache__/dataset.cpython-39.pyc CHANGED Viewed

Binary files a/__pycache__/dataset.cpython-39.pyc and b/__pycache__/dataset.cpython-39.pyc differ

dataset.py CHANGED Viewed

@@ -7,13 +7,21 @@ import torchaudio
 class VoiceDataset(Dataset):
-    def __init__(self, data_directory, transformation, target_sample_rate, time_limit_in_secs=5):
         # file processing
         self._data_path = os.path.join(data_directory)
         self._labels = os.listdir(self._data_path)
         self.audio_files_labels = self._join_audio_files()
         # audio processing
         self.transformation = transformation
         self.target_sample_rate = target_sample_rate
@@ -35,6 +43,7 @@ class VoiceDataset(Dataset):
         wav, sr = torchaudio.load(filepath, normalize=True)
         # modify wav file, if necessary
         wav = self._resample(wav, sr)
         wav = self._mix_down(wav)
         wav = self._cut_or_pad(wav)

 class VoiceDataset(Dataset):
+    def __init__(
+            self,
+            data_directory,
+            transformation,
+            target_sample_rate,
+            device,
+            time_limit_in_secs=5,
+        ):
         # file processing
         self._data_path = os.path.join(data_directory)
         self._labels = os.listdir(self._data_path)
         self.audio_files_labels = self._join_audio_files()
+        self.device = device
         # audio processing
         self.transformation = transformation
         self.target_sample_rate = target_sample_rate
         wav, sr = torchaudio.load(filepath, normalize=True)
         # modify wav file, if necessary
+        wav = wav.to(self.device)
         wav = self._resample(wav, sr)
         wav = self._mix_down(wav)
         wav = self._cut_or_pad(wav)

notebooks/playground.ipynb CHANGED Viewed

@@ -3,7 +3,7 @@
   {
    "cell_type": "code",
    "execution_count": 8,
-   "id": "26db4cdb",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -14,7 +14,7 @@
   {
    "cell_type": "code",
    "execution_count": 10,
-   "id": "c8244b70",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -24,18 +24,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
-   "id": "f3fd2d28",
    "metadata": {},
    "outputs": [],
    "source": [
-    "import os"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
-   "id": "da9fe647",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -44,29 +46,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
-   "id": "70905d2d",
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "dataset.VoiceDataset"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [
-    "VoiceDataset"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 64,
-   "id": "523d28f9",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -76,13 +79,13 @@
     "        hop_length=512,\n",
     "        n_mels=64\n",
     "    )\n",
-    "dataset = VoiceDataset('../data', mel_spectrogram, 16000,)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 65,
-   "id": "0044724d",
    "metadata": {},
    "outputs": [
     {
@@ -91,7 +94,7 @@
        "5718"
       ]
      },
-     "execution_count": 65,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -102,24 +105,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 66,
-   "id": "df7a9e58",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(tensor([[[0.2647, 0.0247, 0.0324,  ..., 0.0000, 0.0000, 0.0000],\n",
-       "          [0.0812, 0.0178, 0.0890,  ..., 0.0000, 0.0000, 0.0000],\n",
-       "          [0.0052, 0.0212, 0.1341,  ..., 0.0000, 0.0000, 0.0000],\n",
        "          ...,\n",
-       "          [0.5154, 0.3950, 0.4497,  ..., 0.0000, 0.0000, 0.0000],\n",
-       "          [0.1919, 0.4804, 0.5144,  ..., 0.0000, 0.0000, 0.0000],\n",
-       "          [0.1208, 0.4357, 0.4016,  ..., 0.0000, 0.0000, 0.0000]]]),\n",
        " 'aman')"
       ]
      },
-     "execution_count": 66,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -130,17 +133,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 67,
-   "id": "df064dbc",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "torch.Size([1, 64, 313])"
       ]
      },
-     "execution_count": 67,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -152,7 +155,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "ed4899bf",
    "metadata": {},
    "outputs": [],
    "source": []

   {
    "cell_type": "code",
    "execution_count": 8,
+   "id": "7f11e761",
    "metadata": {},
    "outputs": [],
    "source": [
   {
    "cell_type": "code",
    "execution_count": 10,
+   "id": "f3deb79d",
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 76,
+   "id": "eb9888a5",
    "metadata": {},
    "outputs": [],
    "source": [
+    "import os\n",
+    "\n",
+    "import torch"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 77,
+   "id": "75440e63",
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 78,
+   "id": "5b51f712",
    "metadata": {},
    "outputs": [
     {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Using device cpu\n"
+     ]
     }
    ],
    "source": [
+    "if torch.cuda.is_available():\n",
+    "        device = \"cuda\"\n",
+    "else:\n",
+    "        device = \"cpu\"\n",
+    "print(f\"Using device {device}\")"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 80,
+   "id": "253f87d6",
    "metadata": {},
    "outputs": [],
    "source": [
     "        hop_length=512,\n",
     "        n_mels=64\n",
     "    )\n",
+    "dataset = VoiceDataset('../data', mel_spectrogram, 16000, device)"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 81,
+   "id": "3d5c127a",
    "metadata": {},
    "outputs": [
     {
        "5718"
       ]
      },
+     "execution_count": 81,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 82,
+   "id": "cbac184f",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
+       "(tensor([[[0.2647, 0.0247, 0.0324,  ..., 0.0230, 0.1026, 0.5454],\n",
+       "          [0.0812, 0.0178, 0.0890,  ..., 0.2376, 0.5061, 0.5292],\n",
+       "          [0.0052, 0.0212, 0.1341,  ..., 0.9336, 0.2778, 0.1372],\n",
        "          ...,\n",
+       "          [0.5154, 0.3950, 0.4497,  ..., 0.4916, 0.4505, 0.7709],\n",
+       "          [0.1919, 0.4804, 0.5144,  ..., 0.5931, 0.4466, 0.4706],\n",
+       "          [0.1208, 0.4357, 0.4016,  ..., 0.5168, 0.7007, 0.3696]]]),\n",
        " 'aman')"
       ]
      },
+     "execution_count": 82,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 83,
+   "id": "2bd8c582",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
+       "torch.Size([1, 64, 157])"
       ]
      },
+     "execution_count": 83,
      "metadata": {},
      "output_type": "execute_result"
     }
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "c3c7b1d4",
    "metadata": {},
    "outputs": [],
    "source": []