Luis commited on
Commit
8726ca7
1 Parent(s): 3716f01

add lib pydub

Browse files
Files changed (2) hide show
  1. app.py +48 -24
  2. requirements.txt +1 -1
app.py CHANGED
@@ -12,37 +12,40 @@ from scipy.io import wavfile
12
  import scipy
13
 
14
  # import soundfile as sf
15
- import audio2numpy as a2n
16
  import os
17
 
18
  import gradio as gr
19
 
20
- import audio2numpy
21
  # import numpy as np
22
 
23
-
24
- # https://stackoverflow.com/questions/53633177/how-to-read-a-mp3-audio-file-into-a-numpy-array-save-a-numpy-array-to-mp3
25
- def read(f, normalized=False):
26
- """MP3 to numpy array"""
27
- a = pydub.AudioSegment.from_mp3(f)
28
- y = np.array(a.get_array_of_samples())
29
- if a.channels == 2:
30
- y = y.reshape((-1, 2))
31
- if normalized:
32
- return a.frame_rate, np.float32(y) / 2**15
33
- else:
34
- return a.frame_rate, y
35
 
36
 
37
- def write(f, sr, x, normalized=False):
38
- """numpy array to MP3"""
39
- channels = 2 if (x.ndim == 2 and x.shape[1] == 2) else 1
40
- if normalized: # normalized array - each item should be a float in [-1, 1)
41
- y = np.int16(x * 2 ** 15)
42
- else:
43
- y = np.int16(x)
44
- song = pydub.AudioSegment(y.tobytes(), frame_rate=sr, sample_width=2, channels=channels)
45
- song.export(f, format="mp3", bitrate="320k")
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
 
48
  # Load the model.
@@ -86,7 +89,28 @@ def inference(audio):
86
  if debug: print(f'read, wav_file_name: {wav_file_name}')
87
 
88
  if wav_file_name.endswith('.mp3'):
89
- wav_data, sample_rate = a2n.audio_from_file(wav_file_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  else:
91
  sample_rate, wav_data = wavfile.read(wav_file_name, 'rb')
92
 
 
12
  import scipy
13
 
14
  # import soundfile as sf
15
+ # import audio2numpy as a2n
16
  import os
17
 
18
  import gradio as gr
19
 
20
+ # import audio2numpy
21
  # import numpy as np
22
 
23
+ from pydub import AudioSegment
24
+ from matplotlib import pyplot as plt
 
 
 
 
 
 
 
 
 
 
25
 
26
 
27
+ # https://stackoverflow.com/questions/53633177/how-to-read-a-mp3-audio-file-into-a-numpy-array-save-a-numpy-array-to-mp3
28
+ # def read(f, normalized=False):
29
+ # """MP3 to numpy array"""
30
+ # a = pydub.AudioSegment.from_mp3(f)
31
+ # y = np.array(a.get_array_of_samples())
32
+ # if a.channels == 2:
33
+ # y = y.reshape((-1, 2))
34
+ # if normalized:
35
+ # return a.frame_rate, np.float32(y) / 2**15
36
+ # else:
37
+ # return a.frame_rate, y
38
+ #
39
+ #
40
+ # def write(f, sr, x, normalized=False):
41
+ # """numpy array to MP3"""
42
+ # channels = 2 if (x.ndim == 2 and x.shape[1] == 2) else 1
43
+ # if normalized: # normalized array - each item should be a float in [-1, 1)
44
+ # y = np.int16(x * 2 ** 15)
45
+ # else:
46
+ # y = np.int16(x)
47
+ # song = pydub.AudioSegment(y.tobytes(), frame_rate=sr, sample_width=2, channels=channels)
48
+ # song.export(f, format="mp3", bitrate="320k")
49
 
50
 
51
  # Load the model.
 
89
  if debug: print(f'read, wav_file_name: {wav_file_name}')
90
 
91
  if wav_file_name.endswith('.mp3'):
92
+ # https://stackoverflow.com/questions/16634128/how-to-extract-the-raw-data-from-a-mp3-file-using-python
93
+ # This will open and read the audio file with pydub. Replace the file path with
94
+ # your own file.
95
+ audio_file = AudioSegment.from_file(wav_file_name)
96
+
97
+ # Set up a list for us to dump PCM samples into, and create a 'data' variable
98
+ # so we don't need to type audio_file._data again
99
+ data = audio_file._data
100
+ pcm16_signed_integers = []
101
+
102
+ # This loop decodes the bytestring into PCM samples.
103
+ # The bytestring is a stream of little-endian encoded signed integers.
104
+ # This basically just cuts each two-byte sample out of the bytestring, converts
105
+ # it to an integer, and appends it to the list of samples.
106
+ for sample_index in range(len(data) // 2):
107
+ sample = int.from_bytes(data[sample_index * 2:sample_index * 2 + 2], 'little', signed=True)
108
+ pcm16_signed_integers.append(sample)
109
+ if debug: print(f'pcm16_signed_integers: {pcm16_signed_integers}')
110
+
111
+ # Now plot the samples!
112
+ # plt.plot(pcm16_signed_integers)
113
+ # plt.show()
114
  else:
115
  sample_rate, wav_data = wavfile.read(wav_file_name, 'rb')
116
 
requirements.txt CHANGED
@@ -3,4 +3,4 @@ tensorflow_hub
3
  numpy
4
  scipy
5
  gradio
6
- audio2numpy
 
3
  numpy
4
  scipy
5
  gradio
6
+ pydub