baakaani commited on
Commit
36ad38b
1 Parent(s): 5b4f8ce

initial commit

Browse files
.gitignore ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DataSet/
2
+ **/Data Processing/pretrained_models
3
+ .ipynb_checkpoints
4
+ **/Data Processing/.ipynb_checkpoints
5
+ node_modules/
6
+ Retraining Pipeline/models
7
+ Retraining Pipeline/audio-diffusion
8
+ Enhancement/
9
+ ui/*.wav
10
+ ui/*.npy
11
+
12
+ input_songs/
13
+ uploads/
14
+ audio/
15
+ __pycache__/
README.md CHANGED
@@ -1,12 +1,52 @@
1
- ---
2
- title: Orpheus
3
- emoji: 🦀
4
- colorFrom: indigo
5
- colorTo: pink
6
- sdk: streamlit
7
- sdk_version: 1.34.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Orpheus_ai- User Study
2
+
3
+ ## Introduction
4
+
5
+ This repository hosts the code for conducting the user study for Orpheus_ai, a system designed for music generation. The user study aims to assess the efficacy of Orpheus_ai in creating music compositions.
6
+
7
+ ## Requirements
8
+
9
+ Before initiating the user study, ensure the following prerequisites are met:
10
+
11
+ 1. **Install Required Packages:**
12
+
13
+ Set up a virtual environment and install the necessary packages using the provided `requirements.txt` file:
14
+ ```
15
+ pip install -r requirements.txt
16
+ ```
17
+
18
+ Alternatively, if you opt not to use a virtual environment, manually install the required versions of `diffusers` and `audiodiffusion` packages:
19
+ ```
20
+ pip install diffusers==0.17.1 audiodiffusion==1.5.6
21
+ ```
22
+
23
+ Note: Ensure compatibility between the installed Torch version and the CUDA version on your system since a GPU is necessary for running the code.
24
+
25
+ 2. **Create Folders for Data Storage:**
26
+
27
+ Create the following directory structure within the root directory for storing songs and spectrograms:
28
+ ```
29
+ DataSet
30
+ ├───Song
31
+ ├───Spec
32
+ ```
33
+
34
+ ## Usage
35
+
36
+ To execute the user study and generate songs along with spectrograms, follow these steps:
37
+
38
+ 1. Navigate to the `ui` directory:
39
+ ```
40
+ cd ui
41
+ ```
42
+
43
+ 2. Launch the user study interface using Streamlit:
44
+ ```
45
+ streamlit run main.py
46
+ ```
47
+
48
+ Note: It's advisable to select only one song from the provided list during the initial phase. After generating the song and submitting the ratings, the resulting song and spectrogram will be saved in the respective folders. The files follow the naming convention:
49
+ ```
50
+ <model_name>_<song_name>_<similarity>_<rating>.wav
51
+ <model_name>_<song_name>_<similarity>_<rating>.npy
52
+ ```
Retraining Pipeline/collect_valid_samples.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
Retraining Pipeline/retrain.ipynb ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "Cloning into 'audio-diffusion'...\n"
13
+ ]
14
+ }
15
+ ],
16
+ "source": [
17
+ "!git clone https://github.com/teticio/audio-diffusion\n"
18
+ ]
19
+ },
20
+ {
21
+ "cell_type": "markdown",
22
+ "metadata": {},
23
+ "source": [
24
+ "### Creating Dataset"
25
+ ]
26
+ },
27
+ {
28
+ "cell_type": "code",
29
+ "execution_count": null,
30
+ "metadata": {},
31
+ "outputs": [],
32
+ "source": [
33
+ "python audio-diffusion/scripts/audio_to_images.py --resolution 256 --input_dir D:/Projects/Orpheus_ai/DataSet/Generated_music_withratings/training_samples/ --output_dir D:/Projects/Orpheus_ai/DataSet/Generated_music_withratings/train/ --push_to_hub SAint7579/orpheus_samples"
34
+ ]
35
+ },
36
+ {
37
+ "cell_type": "markdown",
38
+ "metadata": {},
39
+ "source": [
40
+ "### Retraining the model"
41
+ ]
42
+ },
43
+ {
44
+ "cell_type": "code",
45
+ "execution_count": 23,
46
+ "metadata": {},
47
+ "outputs": [
48
+ {
49
+ "ename": "SyntaxError",
50
+ "evalue": "invalid syntax (<ipython-input-23-e665de61672a>, line 1)",
51
+ "output_type": "error",
52
+ "traceback": [
53
+ "\u001b[1;36m File \u001b[1;32m\"<ipython-input-23-e665de61672a>\"\u001b[1;36m, line \u001b[1;32m1\u001b[0m\n\u001b[1;33m accelerate launch --config_file audio-diffusion/config/accelerate_local.yaml audio-diffusion/scripts/train_unet.py --dataset_name SAint7579/orpheus_samples --output_dir models/v1-0 --num_epochs 10 --train_batch_size 2 --eval_batch_size 2 --gradient_accumulation_steps 8 --learning_rate 1e-4 --mixed_precision no --push_to_hub True --hub_model_id SAint7579/orpheus_ldm_model_v1-0\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n"
54
+ ]
55
+ }
56
+ ],
57
+ "source": [
58
+ "accelerate launch --config_file audio-diffusion/config/accelerate_local.yaml audio-diffusion/scripts/train_unet.py --dataset_name SAint7579/orpheus_samples --output_dir models/v1-0 --num_epochs 1 --train_batch_size 2 --eval_batch_size 2 --gradient_accumulation_steps 8 --learning_rate 1e-4 --mixed_precision no --push_to_hub True --hub_model_id SAint7579/orpheus_ldm_model_v1-0 --vae teticio/latent-audio-diffusion-256 --from_pretrained teticio/latent-audio-diffusion-256"
59
+ ]
60
+ },
61
+ {
62
+ "cell_type": "code",
63
+ "execution_count": null,
64
+ "metadata": {},
65
+ "outputs": [],
66
+ "source": [
67
+ "--num_epochs 100 \\\n",
68
+ "--train_batch_size 2 \\\n",
69
+ "--eval_batch_size 2 \\\n",
70
+ "--gradient_accumulation_steps 8 \\\n",
71
+ "--learning_rate 1e-4 \\\n",
72
+ "--lr_warmup_steps 500 \\\n",
73
+ "--mixed_precision no \\\n",
74
+ "--push_to_hub True \\\n",
75
+ "--hub_model_id audio-diffusion-256 \\\n",
76
+ "--hub_token $(cat $HOME/.huggingface/token)"
77
+ ]
78
+ }
79
+ ],
80
+ "metadata": {
81
+ "kernelspec": {
82
+ "display_name": "base",
83
+ "language": "python",
84
+ "name": "python3"
85
+ },
86
+ "language_info": {
87
+ "codemirror_mode": {
88
+ "name": "ipython",
89
+ "version": 3
90
+ },
91
+ "file_extension": ".py",
92
+ "mimetype": "text/x-python",
93
+ "name": "python",
94
+ "nbconvert_exporter": "python",
95
+ "pygments_lexer": "ipython3",
96
+ "version": "3.8.8"
97
+ },
98
+ "orig_nbformat": 4
99
+ },
100
+ "nbformat": 4,
101
+ "nbformat_minor": 2
102
+ }
Retraining Pipeline/sample_collection.ipynb ADDED
@@ -0,0 +1,332 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import sys\n",
10
+ "sys.path.append('../')"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 3,
16
+ "metadata": {},
17
+ "outputs": [
18
+ {
19
+ "name": "stderr",
20
+ "output_type": "stream",
21
+ "text": [
22
+ "unet\\diffusion_pytorch_model.safetensors not found\n",
23
+ "unet\\diffusion_pytorch_model.safetensors not found\n"
24
+ ]
25
+ }
26
+ ],
27
+ "source": [
28
+ "import generation_utilities\n",
29
+ "import numpy as np\n",
30
+ "import librosa\n",
31
+ "from glob import glob\n",
32
+ "import random\n",
33
+ "import IPython.display as ipd\n",
34
+ "import soundfile as sf\n",
35
+ "import importlib\n",
36
+ "import ipywidgets as widgets\n",
37
+ "import numpy as np\n",
38
+ "importlib.reload(generation_utilities)\n",
39
+ "import os\n",
40
+ "os.environ[\"KMP_DUPLICATE_LIB_OK\"]=\"TRUE\""
41
+ ]
42
+ },
43
+ {
44
+ "cell_type": "code",
45
+ "execution_count": 6,
46
+ "metadata": {},
47
+ "outputs": [
48
+ {
49
+ "data": {
50
+ "text/plain": [
51
+ "{'22': '../input_songs\\\\22.mp3',\n",
52
+ " 'Anti-Hero': '../input_songs\\\\Anti-Hero.mp3',\n",
53
+ " 'Back-To-December': '../input_songs\\\\Back-To-December.mp3',\n",
54
+ " 'Blank-Space': '../input_songs\\\\Blank-Space.mp3',\n",
55
+ " 'Cardigan': '../input_songs\\\\Cardigan.mp3',\n",
56
+ " 'Delicate': '../input_songs\\\\Delicate.mp3',\n",
57
+ " 'Love-Story': '../input_songs\\\\Love-Story.mp3',\n",
58
+ " 'Lover': '../input_songs\\\\Lover.mp3',\n",
59
+ " 'uploaded_song': '../input_songs\\\\uploaded_song.mp3',\n",
60
+ " 'Willow': '../input_songs\\\\Willow.mp3',\n",
61
+ " 'You-Belong-With-Me': '../input_songs\\\\You-Belong-With-Me.mp3'}"
62
+ ]
63
+ },
64
+ "execution_count": 6,
65
+ "metadata": {},
66
+ "output_type": "execute_result"
67
+ }
68
+ ],
69
+ "source": [
70
+ "music = glob(\"../input_songs/*.mp3\")\n",
71
+ "\n",
72
+ "## Create music dictionary with keys as song names and values as path\n",
73
+ "music_dict = {}\n",
74
+ "for song in music:\n",
75
+ " song_name = song.split(\"\\\\\")[-1].split(\".\")[0]\n",
76
+ " music_dict[song_name] = song\n",
77
+ "\n",
78
+ "music_dict"
79
+ ]
80
+ },
81
+ {
82
+ "cell_type": "code",
83
+ "execution_count": 8,
84
+ "metadata": {},
85
+ "outputs": [],
86
+ "source": [
87
+ "def load_songs_as_numpy_array(song_dict):\n",
88
+ " song_names = list(song_dict.keys())\n",
89
+ " song_paths = list(song_dict.values())\n",
90
+ "\n",
91
+ " song_selector = widgets.SelectMultiple(\n",
92
+ " options=song_names,\n",
93
+ " description='Select songs:',\n",
94
+ " rows=len(song_names),\n",
95
+ " layout=widgets.Layout(width='400px')\n",
96
+ " )\n",
97
+ "\n",
98
+ " ## Add a slider between 0 and 1 to select the similarity\n",
99
+ " similarity_slider = widgets.FloatSlider(\n",
100
+ " value=0.75,\n",
101
+ " min=0,\n",
102
+ " max=1.0,\n",
103
+ " step=0.05,\n",
104
+ " description='Similarity:',\n",
105
+ " disabled=False,\n",
106
+ " continuous_update=False,\n",
107
+ " orientation='horizontal',\n",
108
+ " readout=True,\n",
109
+ " readout_format='.2f',\n",
110
+ " )\n",
111
+ "\n",
112
+ "\n",
113
+ " select_button = widgets.Button(description='Select')\n",
114
+ " output = widgets.Output()\n",
115
+ "\n",
116
+ " def on_select_button_clicked(_):\n",
117
+ " selected_songs = [librosa.load(song_paths[i], sr=22050)[0] for i in song_selector.index] \n",
118
+ " name = '+'.join([song_names[i] for i in song_selector.index])\n",
119
+ "\n",
120
+ " with output:\n",
121
+ " output.clear_output()\n",
122
+ " print(f'Selected songs: {name}')\n",
123
+ " ## Get similarity from the slider\n",
124
+ " similarity = similarity_slider.value\n",
125
+ " spec, song = generation_utilities.generate_songs(selected_songs, similarity=similarity, quality=500, merging_quality=100, device='cuda')\n",
126
+ "\n",
127
+ " ## Play all the selected songs\n",
128
+ " print(\"Selected songs: \")\n",
129
+ " for s in selected_songs:\n",
130
+ " ipd.display(ipd.Audio(s, rate=22050))\n",
131
+ "\n",
132
+ " # Play song\n",
133
+ " print(\"\\n\\nGenerated song: \")\n",
134
+ " ipd.display(ipd.Audio(song, rate=22050))\n",
135
+ "\n",
136
+ " ## Get a rating from the user with ipywidgets\n",
137
+ " rating = widgets.Text(\n",
138
+ " value='',\n",
139
+ " placeholder='Type something',\n",
140
+ " description='Rating:',\n",
141
+ " disabled=False\n",
142
+ " )\n",
143
+ " display(rating)\n",
144
+ " ## Make a submit button\n",
145
+ " submit_button = widgets.Button(description='Submit')\n",
146
+ " display(submit_button)\n",
147
+ "\n",
148
+ " def on_submit_button_clicked(_):\n",
149
+ " ## Convert rating to float\n",
150
+ " rating_value = float(rating.value)\n",
151
+ "\n",
152
+ " ## Save the song\n",
153
+ " total_files = glob('../DataSet/Song/*')\n",
154
+ " sf.write(f'../DataSet/Song/{len(total_files)}_{name}_{similarity}_{rating_value}.wav', song, 22050, 'PCM_24')\n",
155
+ "\n",
156
+ " ## Save the spectrogram\n",
157
+ " total_files = glob('../DataSet/Spec/*')\n",
158
+ " np.save(f'../DataSet/Spec/{len(total_files)}_{name}_{similarity}_{rating_value}.npy', spec)\n",
159
+ "\n",
160
+ " with output:\n",
161
+ " output.clear_output()\n",
162
+ " print(\"Saved\")\n",
163
+ "\n",
164
+ "\n",
165
+ " submit_button.on_click(on_submit_button_clicked)\n",
166
+ " \n",
167
+ " select_button.on_click(on_select_button_clicked)\n",
168
+ "\n",
169
+ " display(song_selector)\n",
170
+ " display(select_button)\n",
171
+ " display(similarity_slider)\n",
172
+ " display(output)\n",
173
+ "\n",
174
+ "\n",
175
+ " # return selected_songs, name\n"
176
+ ]
177
+ },
178
+ {
179
+ "cell_type": "code",
180
+ "execution_count": 9,
181
+ "metadata": {},
182
+ "outputs": [
183
+ {
184
+ "data": {
185
+ "application/vnd.jupyter.widget-view+json": {
186
+ "model_id": "c2beb314b7784af097f04ad2bfd70046",
187
+ "version_major": 2,
188
+ "version_minor": 0
189
+ },
190
+ "text/plain": [
191
+ "SelectMultiple(description='Select songs:', layout=Layout(width='400px'), options=('22', 'Anti-Hero', 'Back-To…"
192
+ ]
193
+ },
194
+ "metadata": {},
195
+ "output_type": "display_data"
196
+ },
197
+ {
198
+ "data": {
199
+ "application/vnd.jupyter.widget-view+json": {
200
+ "model_id": "2edb699552dd4196bec2683395fc0f11",
201
+ "version_major": 2,
202
+ "version_minor": 0
203
+ },
204
+ "text/plain": [
205
+ "Button(description='Select', style=ButtonStyle())"
206
+ ]
207
+ },
208
+ "metadata": {},
209
+ "output_type": "display_data"
210
+ },
211
+ {
212
+ "data": {
213
+ "application/vnd.jupyter.widget-view+json": {
214
+ "model_id": "80b12515c92242ffa9483eb3982464a2",
215
+ "version_major": 2,
216
+ "version_minor": 0
217
+ },
218
+ "text/plain": [
219
+ "FloatSlider(value=0.75, continuous_update=False, description='Similarity:', max=1.0, step=0.05)"
220
+ ]
221
+ },
222
+ "metadata": {},
223
+ "output_type": "display_data"
224
+ },
225
+ {
226
+ "data": {
227
+ "application/vnd.jupyter.widget-view+json": {
228
+ "model_id": "ef000bfed4b94491931224b0cb0ce823",
229
+ "version_major": 2,
230
+ "version_minor": 0
231
+ },
232
+ "text/plain": [
233
+ "Output()"
234
+ ]
235
+ },
236
+ "metadata": {},
237
+ "output_type": "display_data"
238
+ }
239
+ ],
240
+ "source": [
241
+ "load_songs_as_numpy_array(music_dict)"
242
+ ]
243
+ },
244
+ {
245
+ "cell_type": "code",
246
+ "execution_count": 4,
247
+ "metadata": {},
248
+ "outputs": [],
249
+ "source": [
250
+ "import os\n",
251
+ "os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'"
252
+ ]
253
+ },
254
+ {
255
+ "cell_type": "code",
256
+ "execution_count": 5,
257
+ "metadata": {},
258
+ "outputs": [
259
+ {
260
+ "name": "stdout",
261
+ "output_type": "stream",
262
+ "text": [
263
+ "Merging songs...\n",
264
+ "Generating song...\n"
265
+ ]
266
+ },
267
+ {
268
+ "data": {
269
+ "application/vnd.jupyter.widget-view+json": {
270
+ "model_id": "7ea21aa7cb7249d4ba35c63506cf0031",
271
+ "version_major": 2,
272
+ "version_minor": 0
273
+ },
274
+ "text/plain": [
275
+ " 0%| | 0/100 [00:00<?, ?it/s]"
276
+ ]
277
+ },
278
+ "metadata": {},
279
+ "output_type": "display_data"
280
+ },
281
+ {
282
+ "data": {
283
+ "text/plain": [
284
+ "(<PIL.Image.Image image mode=L size=256x256>,\n",
285
+ " array([-2.02207055e-04, -9.85335573e-05, -1.04858875e-04, ...,\n",
286
+ " -2.07161275e-03, -6.43183384e-03, -6.42244611e-03], dtype=float32))"
287
+ ]
288
+ },
289
+ "execution_count": 5,
290
+ "metadata": {},
291
+ "output_type": "execute_result"
292
+ }
293
+ ],
294
+ "source": [
295
+ "generation_utilities.generate_songs([librosa.load(music_dict[\"22\"], sr=22050)[0]] ,)"
296
+ ]
297
+ },
298
+ {
299
+ "cell_type": "code",
300
+ "execution_count": 14,
301
+ "metadata": {},
302
+ "outputs": [],
303
+ "source": [
304
+ "from streamlit_extras.switch_page_button import switch_page\n",
305
+ "\n",
306
+ "switch_page(\"New page name\")"
307
+ ]
308
+ }
309
+ ],
310
+ "metadata": {
311
+ "kernelspec": {
312
+ "display_name": "base",
313
+ "language": "python",
314
+ "name": "python3"
315
+ },
316
+ "language_info": {
317
+ "codemirror_mode": {
318
+ "name": "ipython",
319
+ "version": 3
320
+ },
321
+ "file_extension": ".py",
322
+ "mimetype": "text/x-python",
323
+ "name": "python",
324
+ "nbconvert_exporter": "python",
325
+ "pygments_lexer": "ipython3",
326
+ "version": "3.11.4"
327
+ },
328
+ "orig_nbformat": 4
329
+ },
330
+ "nbformat": 4,
331
+ "nbformat_minor": 2
332
+ }
change_song.py ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ import psycopg2
3
+ from io import BytesIO
4
+ from diffusers.configuration_utils import ConfigMixin, register_to_config
5
+ from diffusers.schedulers.scheduling_utils import SchedulerMixin
6
+ from scipy.io import wavfile
7
+ import soundfile as sf
8
+ import os
9
+ warnings.filterwarnings("ignore")
10
+ import sys
11
+ from generation_utilities import *
12
+
13
+ import numpy as np # noqa: E402
14
+ from PIL import Image # noqa: E402
15
+
16
+ import shutil
17
+ import torch
18
+ from IPython.display import Audio
19
+ from audiodiffusion import AudioDiffusion, AudioDiffusionPipeline
20
+ from audiodiffusion.audio_encoder import AudioEncoder
21
+ import librosa
22
+ import librosa.display
23
+
24
+ import IPython.display as ipd
25
+
26
+ device = "cuda" if torch.cuda.is_available() else "cpu"
27
+
28
+ # audio_diffusion = AudioDiffusionPipeline.from_pretrained("teticio/latent-audio-diffusion-256").to(device)
29
+ audio_diffusion = AudioDiffusionPipeline.from_pretrained("SAint7579/orpheus_ldm_model_v1-0").to(device)
30
+ ddim = AudioDiffusionPipeline.from_pretrained("teticio/audio-diffusion-ddim-256").to(device)
31
+
32
+
33
+
34
+
35
+
36
+ try:
37
+ import librosa # noqa: E402
38
+
39
+ _librosa_can_be_imported = True
40
+ _import_error = ""
41
+ except Exception as e:
42
+ _librosa_can_be_imported = False
43
+ _import_error = (
44
+ f"Cannot import librosa because {e}. Make sure to correctly install librosa to be able to install it."
45
+ )
46
+
47
+
48
+ class Mel(ConfigMixin, SchedulerMixin):
49
+ """
50
+ Parameters:
51
+ x_res (`int`): x resolution of spectrogram (time)
52
+ y_res (`int`): y resolution of spectrogram (frequency bins)
53
+ sample_rate (`int`): sample rate of audio
54
+ n_fft (`int`): number of Fast Fourier Transforms
55
+ hop_length (`int`): hop length (a higher number is recommended for lower than 256 y_res)
56
+ top_db (`int`): loudest in decibels
57
+ n_iter (`int`): number of iterations for Griffin Linn mel inversion
58
+ """
59
+
60
+ config_name = "mel_config.json"
61
+
62
+ @register_to_config
63
+ def __init__(
64
+ self,
65
+ x_res: int = 256,
66
+ y_res: int = 256,
67
+ sample_rate: int = 22050,
68
+ n_fft: int = 2048,
69
+ hop_length: int = 512,
70
+ top_db: int = 80,
71
+ n_iter: int = 32,
72
+ ):
73
+ self.hop_length = hop_length
74
+ self.sr = sample_rate
75
+ self.n_fft = n_fft
76
+ self.top_db = top_db
77
+ self.n_iter = n_iter
78
+ self.set_resolution(x_res, y_res)
79
+ self.audio = None
80
+
81
+ if not _librosa_can_be_imported:
82
+ raise ValueError(_import_error)
83
+
84
+ def set_resolution(self, x_res: int, y_res: int):
85
+ """Set resolution.
86
+
87
+ Args:
88
+ x_res (`int`): x resolution of spectrogram (time)
89
+ y_res (`int`): y resolution of spectrogram (frequency bins)
90
+ """
91
+ self.x_res = x_res
92
+ self.y_res = y_res
93
+ self.n_mels = self.y_res
94
+ self.slice_size = self.x_res * self.hop_length - 1
95
+
96
+ def load_audio(self, audio_file: str = None, raw_audio: np.ndarray = None):
97
+ """Load audio.
98
+
99
+ Args:
100
+ audio_file (`str`): must be a file on disk due to Librosa limitation or
101
+ raw_audio (`np.ndarray`): audio as numpy array
102
+ """
103
+ if audio_file is not None:
104
+ self.audio, _ = librosa.load(audio_file, mono=True, sr=self.sr)
105
+ else:
106
+ self.audio = raw_audio
107
+
108
+ # Pad with silence if necessary.
109
+ if len(self.audio) < self.x_res * self.hop_length:
110
+ self.audio = np.concatenate([self.audio, np.zeros((self.x_res * self.hop_length - len(self.audio),))])
111
+
112
+ def get_number_of_slices(self) -> int:
113
+ """Get number of slices in audio.
114
+
115
+ Returns:
116
+ `int`: number of spectograms audio can be sliced into
117
+ """
118
+ return len(self.audio) // self.slice_size
119
+
120
+ def get_audio_slice(self, slice: int = 0) -> np.ndarray:
121
+ """Get slice of audio.
122
+
123
+ Args:
124
+ slice (`int`): slice number of audio (out of get_number_of_slices())
125
+
126
+ Returns:
127
+ `np.ndarray`: audio as numpy array
128
+ """
129
+ return self.audio[self.slice_size * slice : self.slice_size * (slice + 1)]
130
+
131
+ def get_sample_rate(self) -> int:
132
+ """Get sample rate:
133
+
134
+ Returns:
135
+ `int`: sample rate of audio
136
+ """
137
+ return self.sr
138
+
139
+ def audio_slice_to_image(self, slice: int, ref=np.max) -> Image.Image:
140
+ """Convert slice of audio to spectrogram.
141
+
142
+ Args:
143
+ slice (`int`): slice number of audio to convert (out of get_number_of_slices())
144
+
145
+ Returns:
146
+ `PIL Image`: grayscale image of x_res x y_res
147
+ """
148
+ S = librosa.feature.melspectrogram(
149
+ y=self.get_audio_slice(slice), sr=self.sr, n_fft=self.n_fft, hop_length=self.hop_length, n_mels=self.n_mels
150
+ )
151
+ log_S = librosa.power_to_db(S, ref=ref, top_db=self.top_db)
152
+ bytedata = (((log_S + self.top_db) * 255 / self.top_db).clip(0, 255) + 0.5).astype(np.uint8)
153
+ image = Image.fromarray(bytedata)
154
+ return image
155
+
156
+ def image_to_audio(self, image: Image.Image) -> np.ndarray:
157
+ """Converts spectrogram to audio.
158
+
159
+ Args:
160
+ image (`PIL Image`): x_res x y_res grayscale image
161
+
162
+ Returns:
163
+ audio (`np.ndarray`): raw audio
164
+ """
165
+ bytedata = np.frombuffer(image.tobytes(), dtype="uint8").reshape((image.height, image.width))
166
+ log_S = bytedata.astype("float") * self.top_db / 255 - self.top_db
167
+ S = librosa.db_to_power(log_S)
168
+ audio = librosa.feature.inverse.mel_to_audio(
169
+ S, sr=self.sr, n_fft=self.n_fft, hop_length=self.hop_length, n_iter=self.n_iter
170
+ )
171
+ return audio
172
+
173
+ def audioarray_to_mp3(audioarray, file_path):
174
+ sample_rate = 22050
175
+ mel = Mel()
176
+ # Save the audio array as a temporary WAV file
177
+ temp_wav_file = "temp.wav"
178
+ wavfile.write(temp_wav_file, sample_rate, audioarray)
179
+
180
+ # Set the output MP3 file path
181
+ os.remove(file_path)
182
+ output_mp3_file = file_path
183
+
184
+ # Load the temporary WAV file
185
+ wav_data, sr = sf.read(temp_wav_file)
186
+
187
+ # Convert the WAV data to MP3 format
188
+ sf.write(output_mp3_file, wav_data, sample_rate, format="MP3")
189
+
190
+ return None
191
+
192
+ def audioarray_to_mp3_highdb(audioarray, file_path):
193
+ sample_rate = 22050
194
+ # Save the audio array as a temporary WAV file
195
+ temp_wav_file = "temp.wav"
196
+
197
+ audio = ipd.Audio(audioarray, rate=sample_rate)
198
+
199
+ ## Write file into a wav file with open
200
+ with open(file_path, 'wb') as f:
201
+ f.write(audio.data)
202
+
203
+ return None
204
+
205
+
206
+ def main():
207
+ # # Connect to the PostgreSQL database
208
+ # conn = psycopg2.connect(database="orpheus", user="postgres", password="1234", host="localhost", port="5432")
209
+ # cur = conn.cursor()
210
+
211
+ # # Assuming you have a table named 'images' with columns 'id' (serial primary key) and 'image_data' (bytea)
212
+ # table_name = "songs"
213
+
214
+ # image_id = np.random.randint(1, 10) # Replace with the actual ID of the image you want to retrieve
215
+ # print(image_id)
216
+ # # Retrieve the image data from the database
217
+ # cur.execute(f"SELECT song FROM {table_name} WHERE id = %s", (image_id,))
218
+ # result = cur.fetchone()
219
+
220
+ # # Convert the bytea data to PIL.Image.Image object
221
+ # image_bytes = BytesIO(result[0])
222
+ # image = Image.open(image_bytes)
223
+ # # image.save("C:/VS code projects/Orpheus-2/audio/thumbnail.png")
224
+ # # Close the database connection
225
+ # cur.close()
226
+ # conn.close()
227
+
228
+ # getting the song names
229
+ song1_name=sys.argv[1]
230
+ song2_name=sys.argv[2]
231
+ song3_name=sys.argv[3]
232
+ similarity_index=float(sys.argv[4])/100
233
+ print("Similarity index is",similarity_index)
234
+ print("Song1 name is",song1_name)
235
+ print("Song2 name is",song2_name)
236
+ print("Song3 name is",song3_name)
237
+ print("Similarity index is",similarity_index)
238
+ #
239
+ mel = Mel()
240
+ # audioarray_to_mp3(mel.image_to_audio(image), "audio/output.mp3")
241
+ # song_array_1, sr = librosa.load("audio\output.mp3", sr=22050)
242
+ # song_array_1 = song_array_1[:sr*5]
243
+
244
+ input_songs_array = []
245
+
246
+ if song1_name != "None":
247
+ song_array_1, sr = librosa.load(f"input_songs/{song1_name}.mp3", sr=22050)
248
+ input_songs_array.append(song_array_1)
249
+ # song_array_1, sr = librosa.load(f"input_songs/{song1_name}.mp3", sr=22050)
250
+
251
+ # song_array_1 = song_array_1[:sr*5]
252
+ if song2_name != "None":
253
+ song_array_2, sr = librosa.load(f"input_songs/{song2_name}.mp3", sr=22050)
254
+ input_songs_array.append(song_array_2)
255
+
256
+ # song_array_2, sr = librosa.load(f"input_songs/{song2_name}.mp3", sr=22050)
257
+ # song_array_2 = song_array_2[:sr*5]
258
+
259
+ if song3_name != "None":
260
+ song_array_3, sr = librosa.load(f"input_songs/{song3_name}.mp3", sr=22050)
261
+ input_songs_array.append(song_array_3)
262
+ # song_array_3, sr = librosa.load(f"input_songs/{song3_name}.mp3", sr=22050)
263
+ # song_array_3 = song_array_3[:sr*5]
264
+ mage, audio = generate_songs(input_songs_array, similarity=similarity_index, quality=200)
265
+ mage.save("audio/thumbnail.png")
266
+ audioarray_to_mp3_highdb(audio,"audio/generated_song.mp3")
267
+ # if i==3:
268
+ # shutil.copy2("aidio/nvg.mp3", "audio/generated_song.mp3")
269
+
270
+ print("Python script executed successfully")
271
+
272
+ if __name__ == "__main__":
273
+ main()
exp.ipynb ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import numpy as np"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": 2,
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "import glob\n",
19
+ "import matplotlib.pyplot as plt "
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": 15,
25
+ "metadata": {},
26
+ "outputs": [],
27
+ "source": [
28
+ "songs = glob.glob(\"DataSet/srija/*.wav\")\n",
29
+ "ratings = [s.split(\"_\")[-1].split(\".\")[0] for s in songs]\n",
30
+ "ratings = np.array(ratings, dtype=int)\n",
31
+ "version = [s.split(\"_\")[1][-2:] for s in songs]"
32
+ ]
33
+ },
34
+ {
35
+ "cell_type": "code",
36
+ "execution_count": 16,
37
+ "metadata": {},
38
+ "outputs": [],
39
+ "source": [
40
+ "import pandas as pd\n",
41
+ "rr =pd.DataFrame({\"rating\": ratings, \"version\": version})\n",
42
+ "\n",
43
+ "# plt = rr.groupby([\"version\"])[\"ratings\"].mean().plot(kind='bar', stacked=True)\n"
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "code",
48
+ "execution_count": 18,
49
+ "metadata": {},
50
+ "outputs": [
51
+ {
52
+ "data": {
53
+ "text/html": [
54
+ "<div>\n",
55
+ "<style scoped>\n",
56
+ " .dataframe tbody tr th:only-of-type {\n",
57
+ " vertical-align: middle;\n",
58
+ " }\n",
59
+ "\n",
60
+ " .dataframe tbody tr th {\n",
61
+ " vertical-align: top;\n",
62
+ " }\n",
63
+ "\n",
64
+ " .dataframe thead th {\n",
65
+ " text-align: right;\n",
66
+ " }\n",
67
+ "</style>\n",
68
+ "<table border=\"1\" class=\"dataframe\">\n",
69
+ " <thead>\n",
70
+ " <tr style=\"text-align: right;\">\n",
71
+ " <th></th>\n",
72
+ " <th>rating</th>\n",
73
+ " </tr>\n",
74
+ " <tr>\n",
75
+ " <th>version</th>\n",
76
+ " <th></th>\n",
77
+ " </tr>\n",
78
+ " </thead>\n",
79
+ " <tbody>\n",
80
+ " <tr>\n",
81
+ " <th>v0</th>\n",
82
+ " <td>4.740741</td>\n",
83
+ " </tr>\n",
84
+ " <tr>\n",
85
+ " <th>v1</th>\n",
86
+ " <td>4.703704</td>\n",
87
+ " </tr>\n",
88
+ " </tbody>\n",
89
+ "</table>\n",
90
+ "</div>"
91
+ ],
92
+ "text/plain": [
93
+ " rating\n",
94
+ "version \n",
95
+ "v0 4.740741\n",
96
+ "v1 4.703704"
97
+ ]
98
+ },
99
+ "execution_count": 18,
100
+ "metadata": {},
101
+ "output_type": "execute_result"
102
+ }
103
+ ],
104
+ "source": [
105
+ "rr.groupby([\"version\"]).mean()"
106
+ ]
107
+ },
108
+ {
109
+ "cell_type": "code",
110
+ "execution_count": 5,
111
+ "metadata": {},
112
+ "outputs": [
113
+ {
114
+ "data": {
115
+ "text/plain": [
116
+ "<BarContainer object of 54 artists>"
117
+ ]
118
+ },
119
+ "execution_count": 5,
120
+ "metadata": {},
121
+ "output_type": "execute_result"
122
+ },
123
+ {
124
+ "data": {
125
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAh8AAAGdCAYAAACyzRGfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAWHUlEQVR4nO3df6zd8/3A8ddtO0fZ7Z02+uPGpTXUML9lKUYN3ao6i0RmauvIRKdDNTEu25RFL7JIlzWpH5Hq0BIxZhE/l2kna5Ne1KwWUitO0HUyO6ft5Fjb8/3jGzcuVWOf8zr31OORvP84n/O+n/f7n089fc7n3tNWr9frAQCQZFCzNwAAfLaIDwAglfgAAFKJDwAglfgAAFKJDwAglfgAAFKJDwAg1ZBmb+CDtm7dGm+88Ua0t7dHW1tbs7cDAPwX6vV6bNiwITo7O2PQoO3f2xhw8fHGG29EV1dXs7cBAHwK5XI59thjj+3OGXDx0d7eHhH/v/lhw4Y1eTcAwH+jWq1GV1dX33/Ht2fAxcd7H7UMGzZMfABAi/lvHpnwwCkAkEp8AACpxAcAkEp8AACpxAcAkEp8AACpxAcAkEp8AACpxAcAkEp8AACpGh4fPT090dbWFrNmzWr0UgBAC2hofKxcuTJuueWWOPjggxu5DADQQhoWHxs3boxp06bFrbfeGrvttlujlgEAWkzD4mPmzJkxZcqUOOmkk7Y7r1arRbVa7TcAgB3XkEac9O67745nnnkmVq5c+bFze3p64uqrr27ENoDPsLGXP9TsLcCA9cp1U5q6fuF3Psrlclx88cVx5513xs477/yx87u7u6NSqfSNcrlc9JYAgAGk8DsfTz/9dKxfvz6OOOKIvmNbtmyJZcuWxfz586NWq8XgwYP73iuVSlEqlYreBgAwQBUeHyeeeGI8//zz/Y6dc845sf/++8dll13WLzwAgM+ewuOjvb09DjrooH7Hdt111xgxYsSHjgMAnz3+wikAkKohv+3yQU8++WTGMgBAC3DnAwBIJT4AgFTiAwBIJT4AgFTiAwBIJT4AgFTiAwBIJT4AgFTiAwBIJT4AgFTiAwBIJT4AgFTiAwBIJT4AgFTiAwBIJT4AgFTiAwBIJT4AgFTiAwBIJT4AgFTiAwBIJT4AgFTiAwBIJT4AgFTiAwBIJT4AgFTiAwBIJT4AgFTiAwBIJT4AgFTiAwBIJT4AgFTiAwBI9YnjY9myZTF16tTo7OyMtra2eOCBB/q9X6/XY86cOdHZ2RlDhw6NiRMnxurVq4vaLwDQ4j5xfGzatCkOOeSQmD9//jbfv+GGG+LGG2+M+fPnx8qVK2P06NFx8sknx4YNG/7nzQIArW/IJ/2ByZMnx+TJk7f5Xr1ej3nz5sWVV14Zp59+ekRELFq0KEaNGhWLFy+O888//3/bLQDQ8gp95mPt2rWxbt26mDRpUt+xUqkUxx9/fPzpT3/a5s/UarWoVqv9BgCw4/rEdz62Z926dRERMWrUqH7HR40aFa+++uo2f6anpyeuvvrqIrexXWMvfyhtLWg1r1w3pdlbAD4DGvLbLm1tbf1e1+v1Dx17T3d3d1Qqlb5RLpcbsSUAYIAo9M7H6NGjI+L/74CMGTOm7/j69es/dDfkPaVSKUqlUpHbAAAGsELvfIwbNy5Gjx4djz/+eN+xd999N5YuXRpHH310kUsBAC3qE9/52LhxY6xZs6bv9dq1a2PVqlUxfPjw2HPPPWPWrFkxd+7c2HfffWPfffeNuXPnxi677BJnnXVWoRsHAFrTJ46P3t7eOOGEE/pez549OyIipk+fHrfffnv8+Mc/jnfeeScuuOCCePvtt+MrX/lKPPbYY9He3l7crgGAlvWJ42PixIlRr9c/8v22traYM2dOzJkz53/ZFwCwg/LdLgBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAqsLjY/PmzfGTn/wkxo0bF0OHDo299947rrnmmti6dWvRSwEALWhI0Se8/vrr46abbopFixbFgQceGL29vXHOOedER0dHXHzxxUUvBwC0mMLjY/ny5XHaaafFlClTIiJi7NixsWTJkujt7S16KQCgBRX+scuxxx4bv//97+Oll16KiIjnnnsunnrqqTjllFO2Ob9Wq0W1Wu03AIAdV+F3Pi677LKoVCqx//77x+DBg2PLli1x7bXXxne+851tzu/p6Ymrr7666G0AAANU4Xc+7rnnnrjzzjtj8eLF8cwzz8SiRYviF7/4RSxatGib87u7u6NSqfSNcrlc9JYAgAGk8Dsfl156aVx++eVx5plnRkTEl7/85Xj11Vejp6cnpk+f/qH5pVIpSqVS0dsAAAaowu98/Pvf/45Bg/qfdvDgwX7VFgCIiAbc+Zg6dWpce+21seeee8aBBx4Yzz77bNx4441x7rnnFr0UANCCCo+PX/3qV/HTn/40Lrjggli/fn10dnbG+eefHz/72c+KXgoAaEGFx0d7e3vMmzcv5s2bV/SpAYAdgO92AQBSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSFR4fPT09cdRRR0V7e3uMHDkyvvWtb8WLL75Y9DIAQIsqPD6WLl0aM2fOjBUrVsTjjz8emzdvjkmTJsWmTZuKXgoAaEFDij7hI4880u/1woULY+TIkfH000/HcccdV/RyAECLKTw+PqhSqURExPDhw7f5fq1Wi1qt1ve6Wq02eksAQBM19IHTer0es2fPjmOPPTYOOuigbc7p6emJjo6OvtHV1dXILQEATdbQ+PjRj34Uf/7zn2PJkiUfOae7uzsqlUrfKJfLjdwSANBkDfvY5cILL4wHH3wwli1bFnvsscdHziuVSlEqlRq1DQBggCk8Pur1elx44YVx//33x5NPPhnjxo0regkAoIUVHh8zZ86MxYsXx29/+9tob2+PdevWRURER0dHDB06tOjlAIAWU/gzHwsWLIhKpRITJ06MMWPG9I177rmn6KUAgBbUkI9dAAA+iu92AQBSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSiQ8AIJX4AABSFR4fy5Yti6lTp0ZnZ2e0tbXFAw88UPQSAEALKzw+Nm3aFIccckjMnz+/6FMDADuAIUWfcPLkyTF58uSiTwsA7CAKj49PqlarRa1W63tdrVabuBsAoNGa/sBpT09PdHR09I2urq5mbwkAaKCmx0d3d3dUKpW+US6Xm70lAKCBmv6xS6lUilKp1OxtAABJmn7nAwD4bCn8zsfGjRtjzZo1fa/Xrl0bq1atiuHDh8eee+5Z9HIAQIspPD56e3vjhBNO6Hs9e/bsiIiYPn163H777UUvBwC0mMLjY+LEiVGv14s+LQCwg/DMBwCQSnwAAKnEBwCQSnwAAKnEBwCQSnwAAKnEBwCQSnwAAKnEBwCQSnwAAKnEBwCQSnwAAKnEBwCQSnwAAKnEBwCQSnwAAKnEBwCQSnwAAKnEBwCQSnwAAKnEBwCQSnwAAKnEBwCQSnwAAKnEBwCQSnwAAKnEBwCQSnwAAKnEBwCQSnwAAKnEBwCQSnwAAKnEBwCQqvD4GDt2bLS1tX1ozJw5s+ilAIAWNKToE65cuTK2bNnS9/ovf/lLnHzyyXHGGWcUvRQA0IIKj4/dd9+93+vrrrsuvvjFL8bxxx9f9FIAQAsqPD7e7913340777wzZs+eHW1tbducU6vVolar9b2uVquN3BIA0GQNfeD0gQceiH/961/x/e9//yPn9PT0REdHR9/o6upq5JYAgCZraHzcdtttMXny5Ojs7PzIOd3d3VGpVPpGuVxu5JYAgCZr2Mcur776ajzxxBPxm9/8ZrvzSqVSlEqlRm0DABhgGnbnY+HChTFy5MiYMmVKo5YAAFpQQ+Jj69atsXDhwpg+fXoMGdLQZ1oBgBbTkPh44okn4rXXXotzzz23EacHAFpYQ25LTJo0Ker1eiNODQC0ON/tAgCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkakh8vP7663H22WfHiBEjYpdddolDDz00nn766UYsBQC0mCFFn/Dtt9+OY445Jk444YR4+OGHY+TIkfHyyy/HF77whaKXAgBaUOHxcf3110dXV1csXLiw79jYsWOLXgYAaFGFf+zy4IMPxpFHHhlnnHFGjBw5Mg477LC49dZbP3J+rVaLarXabwAAO67C4+Nvf/tbLFiwIPbdd9949NFHY8aMGXHRRRfFr3/9623O7+npiY6Ojr7R1dVV9JYAgAGk8PjYunVrHH744TF37tw47LDD4vzzz4/zzjsvFixYsM353d3dUalU+ka5XC56SwDAAFJ4fIwZMyYOOOCAfse+9KUvxWuvvbbN+aVSKYYNG9ZvAAA7rsLj45hjjokXX3yx37GXXnop9tprr6KXAgBaUOHxcckll8SKFSti7ty5sWbNmli8eHHccsstMXPmzKKXAgBaUOHxcdRRR8X9998fS5YsiYMOOih+/vOfx7x582LatGlFLwUAtKDC/85HRMSpp54ap556aiNODQC0ON/tAgCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkEh8AQCrxAQCkKjw+FixYEAcffHAMGzYshg0bFhMmTIiHH3646GUAgBZVeHzssccecd1110Vvb2/09vbG1772tTjttNNi9erVRS8FALSgIUWfcOrUqf1eX3vttbFgwYJYsWJFHHjggUUvBwC0mMLj4/22bNkS9957b2zatCkmTJiwzTm1Wi1qtVrf62q12sgtAQBN1pAHTp9//vn4/Oc/H6VSKWbMmBH3339/HHDAAduc29PTEx0dHX2jq6urEVsCAAaIhsTH+PHjY9WqVbFixYr44Q9/GNOnT48XXnhhm3O7u7ujUqn0jXK53IgtAQADREM+dtlpp51in332iYiII488MlauXBm//OUv4+abb/7Q3FKpFKVSqRHbAAAGoJS/81Gv1/s91wEAfHYVfufjiiuuiMmTJ0dXV1ds2LAh7r777njyySfjkUceKXopAKAFFR4ff//73+O73/1uvPnmm9HR0REHH3xwPPLII3HyyScXvRQA0IIKj4/bbrut6FMCADsQ3+0CAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKRqSHxs2LAhZs2aFXvttVcMHTo0jj766Fi5cmUjlgIAWkxD4uMHP/hBPP7443HHHXfE888/H5MmTYqTTjopXn/99UYsBwC0kMLj45133on77rsvbrjhhjjuuONin332iTlz5sS4ceNiwYIFRS8HALSYIUWfcPPmzbFly5bYeeed+x0fOnRoPPXUUx+aX6vVolar9b2uVCoREVGtVoveWkREbK39uyHnhR1Bo667ZnCtw0drxLX+3jnr9frHT643wIQJE+rHH398/fXXX69v3ry5fscdd9Tb2trq++2334fmXnXVVfWIMAzDMAxjBxjlcvljO6GtXv9vEuWTefnll+Pcc8+NZcuWxeDBg+Pwww+P/fbbL5555pl44YUX+s394J2PrVu3xj//+c8YMWJEtLW1Fb01BpBqtRpdXV1RLpdj2LBhzd4O0CCu9c+Ger0eGzZsiM7Ozhg0aPtPdTQkPt6zadOmqFarMWbMmPj2t78dGzdujIceeqhRy9FiqtVqdHR0RKVS8Q8S7MBc63xQQ//Ox6677hpjxoyJt99+Ox599NE47bTTGrkcANACCn/gNCLi0UcfjXq9HuPHj481a9bEpZdeGuPHj49zzjmnEcsBAC2kIXc+KpVKzJw5M/bff//43ve+F8cee2w89thj8bnPfa4Ry9GiSqVSXHXVVVEqlZq9FaCBXOt8UEOf+QAA+CDf7QIApBIfAEAq8QEApBIfAEAq8UHTLV26NI444ojYeeedY++9946bbrqp2VsCCvbmm2/GWWedFePHj49BgwbFrFmzmr0lmkh80FRr166NU045Jb761a/Gs88+G1dccUVcdNFFcd999zV7a0CBarVa7L777nHllVfGIYcc0uzt0GR+1ZaGuvnmm+Oaa66Jcrnc72/9f/Ob34zddtstRo8eHQ8++GD89a9/7XtvxowZ8dxzz8Xy5cubsWXgU/i4a33RokV9xyZOnBiHHnpozJs3rwk7ZSBw54OGOuOMM+Ktt96KP/zhD33H3vtz+9OmTYvly5fHpEmT+v3M17/+9ejt7Y3//Oc/2dsFPqWPu9bh/cQHDTV8+PD4xje+EYsXL+47du+998bw4cPjxBNPjHXr1sWoUaP6/cyoUaNi8+bN8dZbb2VvF/iUPu5ah/cTHzTctGnT4r777otarRYREXfddVeceeaZMXjw4IiIaGtr6zf/vU8CP3gcGNg+7lqH94gPGm7q1KmxdevWeOihh6JcLscf//jHOPvssyMiYvTo0bFu3bp+89evXx9DhgyJESNGNGO7wKe0vWsd3q8h32oL7zd06NA4/fTT46677oo1a9bEfvvtF0cccUREREyYMCF+97vf9Zv/2GOPxZFHHumLCKHFbO9ah/cTH6SYNm1aTJ06NVavXt3v/4RmzJgR8+fPj9mzZ8d5550Xy5cvj9tuuy2WLFnSxN0Cn9ZHXesREatWrYqIiI0bN8Y//vGPWLVqVey0005xwAEHNGGnNJNftSXFli1boqurK9588814+eWXY++99+57b+nSpXHJJZfE6tWro7OzMy677LKYMWNGE3cLfFrbu9a39RzXXnvtFa+88kriDhkIxAcAkMoDpwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKQSHwBAKvEBAKT6Py+UoIoQmD0VAAAAAElFTkSuQmCC",
126
+ "text/plain": [
127
+ "<Figure size 640x480 with 1 Axes>"
128
+ ]
129
+ },
130
+ "metadata": {},
131
+ "output_type": "display_data"
132
+ }
133
+ ],
134
+ "source": [
135
+ "plt.bar(rr[\"version\"], rr[\"rating\"])"
136
+ ]
137
+ },
138
+ {
139
+ "cell_type": "code",
140
+ "execution_count": 4,
141
+ "metadata": {},
142
+ "outputs": [],
143
+ "source": [
144
+ "avg_ratings = {}"
145
+ ]
146
+ },
147
+ {
148
+ "cell_type": "code",
149
+ "execution_count": 14,
150
+ "metadata": {},
151
+ "outputs": [
152
+ {
153
+ "data": {
154
+ "image/png": "",
155
+ "text/plain": [
156
+ "<Figure size 640x480 with 1 Axes>"
157
+ ]
158
+ },
159
+ "metadata": {},
160
+ "output_type": "display_data"
161
+ }
162
+ ],
163
+ "source": [
164
+ "import glob\n",
165
+ "import matplotlib.pyplot as plt\n",
166
+ "\n",
167
+ "# Get the list of song files\n",
168
+ "songs = glob.glob(\"DataSet/srija/*.wav\")\n",
169
+ "\n",
170
+ "# Extract ratings and versions\n",
171
+ "ratings = [int(s.split(\"_\")[-1].split(\".\")[0]) for s in songs]\n",
172
+ "version = [s.split(\"_\")[1][-2:] for s in songs]\n",
173
+ "\n",
174
+ "# Create a dictionary to store ratings for each version\n",
175
+ "ratings_by_version = {}\n",
176
+ "\n",
177
+ "# Iterate over songs and populate ratings by version dictionary\n",
178
+ "for v, r in zip(version, ratings):\n",
179
+ " if v not in ratings_by_version:\n",
180
+ " ratings_by_version[v] = []\n",
181
+ " ratings_by_version[v].append(r)\n",
182
+ "\n",
183
+ "# Calculate average ratings for each version\n",
184
+ "avg_ratings = {v: sum(ratings) / len(ratings) for v, ratings in ratings_by_version.items()}\n",
185
+ "\n",
186
+ "# Plotting\n",
187
+ "plt.bar(avg_ratings.keys(), avg_ratings.values(), color='skyblue')\n",
188
+ "plt.xlabel('Version')\n",
189
+ "plt.ylabel('Average Rating')\n",
190
+ "plt.title('Average Rating by Version')\n",
191
+ "plt.xticks(list(avg_ratings.keys()))\n",
192
+ "plt.show()\n"
193
+ ]
194
+ },
195
+ {
196
+ "cell_type": "code",
197
+ "execution_count": null,
198
+ "metadata": {},
199
+ "outputs": [],
200
+ "source": []
201
+ }
202
+ ],
203
+ "metadata": {
204
+ "kernelspec": {
205
+ "display_name": "lmu310",
206
+ "language": "python",
207
+ "name": "python3"
208
+ },
209
+ "language_info": {
210
+ "codemirror_mode": {
211
+ "name": "ipython",
212
+ "version": 3
213
+ },
214
+ "file_extension": ".py",
215
+ "mimetype": "text/x-python",
216
+ "name": "python",
217
+ "nbconvert_exporter": "python",
218
+ "pygments_lexer": "ipython3",
219
+ "version": "3.10.9"
220
+ }
221
+ },
222
+ "nbformat": 4,
223
+ "nbformat_minor": 2
224
+ }
generation_utilities.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from IPython.display import Audio
3
+ from audiodiffusion import AudioDiffusion, AudioDiffusionPipeline
4
+ from audiodiffusion.audio_encoder import AudioEncoder
5
+ import librosa
6
+ import librosa.display
7
+ import numpy as np
8
+ import random
9
+
10
+ device = "cuda" if torch.cuda.is_available() else "cpu"
11
+ model_name = ["SAint7579/orpheus_ldm_model_v1-0", "teticio/audio-diffusion-ddim-256"]
12
+
13
+ audio_diffusion_v0 = AudioDiffusionPipeline.from_pretrained("teticio/latent-audio-diffusion-256").to(device)
14
+ audio_diffusion_v1 = AudioDiffusionPipeline.from_pretrained("SAint7579/orpheus_ldm_model_v1-0").to(device)
15
+ ddim = AudioDiffusionPipeline.from_pretrained("teticio/audio-diffusion-ddim-256").to(device)
16
+
17
+ ### Add numpy docstring to generate_from_music
18
+ def generate_from_music(song_array, diffuser, start_step, total_steps=100, device="cuda"):
19
+ """
20
+ Generates audio from a given song array using a given diffuser.
21
+ Parameters
22
+ ----------
23
+ song_array : numpy.ndarray
24
+ The song array to use as the raw audio.
25
+ diffuser : AudioDiffusionPipeline
26
+ The diffuser to use to generate the audio.
27
+ start_step : int
28
+ The step to start generating from.
29
+ total_steps : int
30
+ The total number of steps to generate.
31
+ device : str
32
+ The device to use for generation.
33
+ Returns
34
+ -------
35
+ numpy.ndarray
36
+ The generated audio.
37
+ """
38
+ generator = torch.Generator(device=device)
39
+ generator.seed()
40
+ output = diffuser(raw_audio=song_array, generator = generator, start_step=start_step, steps=total_steps)
41
+ return output.images[0], output.audios[0, 0]
42
+
43
+ def generate_from_music_long(song_array, diffuser, start_step, total_steps=100, device="cuda"):
44
+ """
45
+ Generates a 10 second audio from a given song array using a given diffuser.
46
+ Parameters
47
+ ----------
48
+ song_array : numpy.ndarray
49
+ The song array to use as the raw audio.
50
+ diffuser : AudioDiffusionPipeline
51
+ The diffuser to use to generate the audio.
52
+ start_step : int
53
+ The step to start generating from.
54
+ total_steps : int
55
+ The total number of steps to generate.
56
+ device : str
57
+ The device to use for generation.
58
+ Returns
59
+ -------
60
+ numpy.ndarray
61
+ The generated audio.
62
+ """
63
+ generator = torch.Generator(device=device)
64
+ generator.seed()
65
+ output = diffuser(raw_audio=song_array, generator = generator, start_step=start_step, steps=total_steps)
66
+
67
+ # Get the track and use the diffuser again to create the continuation
68
+ track = output.audios[0, 0]
69
+ sample_rate = diffuser.mel.get_sample_rate()
70
+ overlap_secs = 2
71
+ overlap_samples = overlap_secs * sample_rate
72
+
73
+ continue_output = diffuser(raw_audio=track[-overlap_samples:],
74
+ generator=generator,
75
+ start_step=start_step,
76
+ mask_start_secs=overlap_secs)
77
+ # image2 = output.images[0]
78
+ audio2 = continue_output.audios[0, 0]
79
+ track = np.concatenate([track, audio2[overlap_samples:]])
80
+
81
+ return output.images[0], track
82
+
83
+
84
+ ## Add docstring to iterative_slerp function in numpy format
85
+ def iterative_slerp(song_arrays, ddim, steps=10):
86
+ """Iterative slerp function.
87
+
88
+ Parameters
89
+ ----------
90
+ song_arrays : list
91
+ List of song arrays to slerp.
92
+ ddim : AudioDiffusion ddim model
93
+ AudioDiffusion object.
94
+
95
+ Returns
96
+ -------
97
+ slerp : torch.Tensor
98
+ Slerped tensor.
99
+ """
100
+ noise = []
101
+ for arr in song_arrays:
102
+ ddim.mel.audio = arr
103
+ noise.append(ddim.encode([ddim.mel.audio_slice_to_image(0)], steps=steps))
104
+
105
+ slerp = noise[0]
106
+ for i in range(1, len(noise)):
107
+ slerp = ddim.slerp(slerp, noise[i], 0.5)
108
+
109
+ return slerp
110
+
111
+ def merge_songs(song_arrays, ddim, slerp_steps=10, diffusion_steps=100, device="cuda"):
112
+ """Merge songs.
113
+
114
+ Parameters
115
+ ----------
116
+ song_arrays : list
117
+ List of song arrays to merge.
118
+ ddim : AudioDiffusion ddim model
119
+ AudioDiffusion object.
120
+
121
+ Returns
122
+ -------
123
+ spectrogram : np.ndarray
124
+ Merged spectrogram.
125
+ audio : np.ndarray
126
+ Merged audio.
127
+ """
128
+ generator = torch.Generator(device=device)
129
+ generator.manual_seed(7579)
130
+ slerp = iterative_slerp(song_arrays, ddim, slerp_steps)
131
+ merged = ddim(noise=slerp, generator=generator, steps=diffusion_steps)
132
+ return merged.images[0], merged.audios[0, 0]
133
+
134
+ ## Write generate songs function with numpy docstring
135
+ def generate_songs(conditioning_songs, similarity=0.9, quality=500, merging_quality=100, device='cuda'):
136
+ """Generate songs.
137
+
138
+ Parameters
139
+ ----------
140
+ conditioning_songs : list
141
+ List of conditioning songs.
142
+ similarity : float
143
+ Similarity between conditioning songs.
144
+ quality : int
145
+ Quality of generated song.
146
+
147
+ Returns
148
+ -------
149
+ spec_generated : np.ndarray
150
+ Spectrogram of generated song.
151
+ generated : np.ndarray
152
+ Generated song.
153
+ """
154
+ ## Merging songs
155
+ print("Merging songs...")
156
+ if len(conditioning_songs)>1:
157
+ # print(conditioning_songs)
158
+ # for c in conditioning_songs:
159
+ # print(len(c))
160
+ spec_merged, merged = merge_songs(conditioning_songs, ddim, slerp_steps=merging_quality, diffusion_steps=merging_quality, device=device)
161
+ else:
162
+ merged = conditioning_songs[0]
163
+
164
+ ## Take a random 10 second slice from the merged song
165
+ # sample_rate = ddim.mel.get_sample_rate()
166
+ # start = np.random.randint(0, len(merged) - 5 * sample_rate)
167
+ # merged = merged[start:start + 5 * sample_rate]
168
+
169
+ if random.random() < 0.5:
170
+ diffuser = audio_diffusion_v0
171
+ model_name = "v0"
172
+ else:
173
+ diffuser = audio_diffusion_v1
174
+ model_name = "v1"
175
+ print("Generating song...")
176
+ ## quality = X - similarity*X
177
+ total_steps = min([1000, int(quality/(1-similarity))])
178
+ start_step = int(total_steps*similarity)
179
+ spec_generated, generated = generate_from_music(merged, diffuser, start_step=start_step, total_steps=total_steps, device=device)
180
+
181
+ return spec_generated, generated, model_name
182
+
183
+ # if __name__ == '__main__':
184
+ # song1 = "D:/Projects/Orpheus_ai/DataSet/Sep_Dataset/accompaniment/000010.mp3"
185
+ # song2 = "D:/Projects/Orpheus_ai/DataSet/Sep_Dataset/accompaniment/000002.mp3"
186
+ # song3 = "D:/Projects/Orpheus_ai/DataSet/Sep_Dataset/accompaniment/000003.mp3"
187
+
188
+ # song_array_1, sr = librosa.load(song1, sr=22050)
189
+ # song_array_1 = song_array_1[:sr*5]
190
+
191
+ # song_array_2, sr = librosa.load(song2, sr=22050)
192
+ # song_array_2 = song_array_2[:sr*10]
193
+
194
+ # song_array_3, sr = librosa.load(song3, sr=22050)
195
+ # song_array_3 = song_array_3[:sr*10]
196
+
197
+ # generator = torch.Generator(device=device)
198
+
199
+ # # seed = 239150437427
200
+ # image, audio = generate_songs([song_array_2, song_array_3], similarity=0.5, quality=10, device=device)
201
+ # Audio(audio, rate=sr)
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ audiodiffusion==1.5.6
2
+ diffusers==0.17.1
3
+ librosa==0.10.1
4
+ numpy==1.26.2
5
+ torch==2.2.1+cu118
6
+ torchaudio==2.2.1+cu118
7
+ torchmetrics==1.3.1
8
+ torchvision==0.17.1
9
+ streamlit==1.32.2
10
+ st-star-rating==0.0.6
ui/app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ st.set_page_config(initial_sidebar_state="collapsed")
3
+ import sys
4
+ sys.path.append('../')
5
+ import generation_utilities
6
+ import numpy as np
7
+ import librosa
8
+ from glob import glob
9
+ import soundfile as sf
10
+ import importlib
11
+ import numpy as np
12
+ importlib.reload(generation_utilities)
13
+
14
+ import os
15
+ os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
16
+
17
+
18
+ if 'song_name' not in st.session_state:
19
+ st.session_state['song_name'] = None
20
+ if 'similarity' not in st.session_state:
21
+ st.session_state['similarity'] = None
22
+ if 'model_name' not in st.session_state:
23
+ st.session_state['model_name'] = None
24
+ if 'song_list' not in st.session_state:
25
+ st.session_state['song_list'] = None
26
+
27
+
28
+ form1 = st.form(key="form1")
29
+ song_default = np.random.choice(["22", "Anti-Hero", "Back-to-december","Blank-Space","Cardigan","Delicate","Lover","Love-Story","Willow","You-Belong-With-Me"])
30
+ similarity_default = np.random.uniform(0.8, 0.99).__round__(2)
31
+ song_options = form1.multiselect("Select songs from library",["22", "Anti-Hero", "Back-to-december","Blank-Space","Cardigan","Delicate","Lover","Love-Story","Willow","You-Belong-With-Me"], default=[song_default])
32
+ similarity = form1.slider("Similarity", 0.0, 1.0, similarity_default)
33
+ submit = form1.form_submit_button("Submit")
34
+
35
+ if submit:
36
+ song_list = [librosa.load(f"../input_songs/{song}.mp3", sr=22050)[0] for song in song_options]
37
+ print(song_options)
38
+
39
+ spectrogram, generated_song, model_name = generation_utilities.generate_songs(song_list, similarity=similarity, quality=500, merging_quality=100, device='cuda')
40
+ st.session_state['song_name'] = song_options[0]
41
+ st.session_state['song_list'] = [f"../input_songs/{song}.mp3" for song in song_options]
42
+ st.session_state['song_name'] = '_'.join(song_options)
43
+ st.session_state['similarity'] = similarity
44
+ st.session_state['model_name'] = model_name
45
+
46
+ # saving temps
47
+ sf.write(f"temp.wav", generated_song, 22050)
48
+ np.save("temp.npy", spectrogram)
49
+ st.switch_page("pages/rating.py")
50
+ # st.audio(generated_song, format='audio/wav',sample_rate=22050)
51
+
52
+ # generated_audio_path = "generated_audio.wav"
53
+ # sf.write(generated_audio_path, generated_song, 22050)
54
+ # st.audio(generated_audio_path, format='audio/wav')
55
+ # st.write("Generated!")
ui/pages/rating.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_star_rating import st_star_rating
3
+ st.set_page_config(initial_sidebar_state="collapsed")
4
+ import glob
5
+ import shutil
6
+ import sys
7
+ sys.path.append('../../')
8
+ spectrograms = glob.glob("temp*.npy")
9
+ generated_songs = glob.glob("temp*.wav")
10
+
11
+ st.markdown("# Original Song")
12
+ for s in st.session_state['song_list']:
13
+ st.markdown(f"### {s.split('/')[-1].split('.')[0]}")
14
+ st.audio(s, format='audio/wav')
15
+ st.markdown("# Generated Song")
16
+ st.audio(generated_songs[0], format='audio/wav')
17
+ rating = st_star_rating(label="rating", maxValue=10, defaultValue=3)
18
+
19
+
20
+ submit_rating = st.button("Submit Rating")
21
+
22
+ if submit_rating:
23
+ shutil.copy(generated_songs[0],f"../DataSet/Song/srija_{st.session_state['model_name']}_{st.session_state['song_name']}_{st.session_state['similarity']}_{rating}.wav")
24
+ shutil.copy(spectrograms[0],f"../DataSet/Spec/srija_{st.session_state['model_name']}_{st.session_state['song_name']}_{st.session_state['similarity']}_{rating}.npy")
25
+ st.switch_page("app.py")