sohojoe commited on
Commit
d0639dc
1 Parent(s): 32ed25f

debug_app.py which choppy audio

Browse files
Files changed (2) hide show
  1. debug_001.py +3 -3
  2. debug_app.py +167 -0
debug_001.py CHANGED
@@ -54,9 +54,9 @@ with open("chunks.pkl", "rb") as f:
54
  bytes_io.seek(0, io.SEEK_END)
55
  continue
56
 
57
- with open("frames.pkl", "wb") as f:
58
- import pickle
59
- pickle.dump(chunks, f)
60
 
61
 
62
  if mpv_process.stdin:
 
54
  bytes_io.seek(0, io.SEEK_END)
55
  continue
56
 
57
+ # with open("frames.pkl", "wb") as f:
58
+ # import pickle
59
+ # pickle.dump(audio_frames, f)
60
 
61
 
62
  if mpv_process.stdin:
debug_app.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import io
3
+ import logging
4
+ import traceback
5
+ from typing import List
6
+
7
+ import av
8
+ import numpy as np
9
+ import streamlit as st
10
+ from streamlit_webrtc import WebRtcMode, webrtc_streamer
11
+ import pydub
12
+
13
+ from dotenv import load_dotenv
14
+ load_dotenv()
15
+ from sample_utils.turn import get_ice_servers
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class StreamingMP3ToFrames:
21
+ def __init__(self):
22
+ self.append = False
23
+
24
+ def process_chunk(self, chunk):
25
+ audio_frames = []
26
+ try:
27
+ if self.append:
28
+ self.bytes_io.write(chunk)
29
+ self.append = False
30
+ self.bytes_io.seek(0)
31
+ else:
32
+ self.bytes_io = io.BytesIO(chunk)
33
+ container = av.open(self.bytes_io, 'r', format='mp3')
34
+ audio_stream = next(s for s in container.streams if s.type == 'audio')
35
+ for frame in container.decode(audio_stream):
36
+ # Convert the audio frame to a NumPy array
37
+ array = frame.to_ndarray()
38
+
39
+ # Now you can use av.AudioFrame.from_ndarray
40
+
41
+ # audio_frame = av.AudioFrame.from_ndarray(array, format='flt', layout='mono')
42
+ audio_frame = av.AudioFrame.from_ndarray(array, format='fltp', layout='mono')
43
+ audio_frame.sample_rate = 44100
44
+
45
+ audio_frames.append(audio_frame)
46
+
47
+ return audio_frames
48
+
49
+ except Exception as e:
50
+ print (e)
51
+ self.append = True
52
+ self.bytes_io.seek(0, io.SEEK_END)
53
+ return audio_frames
54
+
55
+
56
+ def video_frame_callback(
57
+ frame: av.VideoFrame,
58
+ ) -> av.VideoFrame:
59
+ return frame
60
+
61
+
62
+
63
+ streaming_mp3_to_frames = StreamingMP3ToFrames()
64
+
65
+ with open("chunks.pkl", "rb") as f:
66
+ import pickle
67
+ debug_chunks = pickle.load(f)
68
+ debug_frames = []
69
+ debug_frame_idx = 0
70
+ for chunk in debug_chunks:
71
+ new_frames = streaming_mp3_to_frames.process_chunk(chunk)
72
+ for frame in new_frames:
73
+ debug_frames.append(frame)
74
+ # print (frame)
75
+
76
+ def dequeue_frame():
77
+ global debug_frame_idx, debug_frames
78
+ enqueued_frame = debug_frames[debug_frame_idx]
79
+ debug_frame_idx += 1
80
+ if debug_frame_idx >= len(debug_frames):
81
+ debug_frame_idx = 0
82
+ return enqueued_frame
83
+
84
+ # emptry array of type int16
85
+ sample_buffer = np.zeros((0), dtype=np.int16)
86
+
87
+ def process_frame(old_frame):
88
+
89
+ try:
90
+ output_channels = 2
91
+ output_sample_rate = 44100
92
+ required_samples = old_frame.samples
93
+
94
+ global sample_buffer
95
+ while sample_buffer.shape[0] < required_samples:
96
+ dequeued_frame = dequeue_frame()
97
+ if dequeued_frame is None:
98
+ break
99
+
100
+ # convert dequeued_frame to same format as old_frame
101
+ float_samples = dequeued_frame.to_ndarray()
102
+ max_sample = np.max(np.abs(float_samples))
103
+ min_sample = np.min(np.abs(float_samples))
104
+ if max_sample > 1.0 or min_sample > 1.0:
105
+ print(f"WARNING: max_sample: {max_sample}, min_sample: {min_sample}")
106
+ int_samples = np.int16(float_samples * 32767)
107
+ sound = pydub.AudioSegment(
108
+ data=int_samples.tobytes(),
109
+ sample_width=2,
110
+ frame_rate=output_sample_rate,
111
+ channels=len(dequeued_frame.layout.channels),
112
+ )
113
+ sound = sound.set_frame_rate(old_frame.sample_rate)
114
+
115
+ samples = np.array(sound.get_array_of_samples(), dtype=np.int16)
116
+ sample_buffer = np.append(sample_buffer, samples)
117
+
118
+ # handle case where we ran out of frames
119
+ if sample_buffer.shape[0] < required_samples:
120
+ empty_samples = np.zeros((required_samples - sample_buffer.shape[0]), dtype=np.int16)
121
+ sample_buffer = np.append(sample_buffer, empty_samples)
122
+
123
+ # take the first required_samples samples from the buffer
124
+ samples = sample_buffer[:required_samples]
125
+ sample_buffer = sample_buffer[required_samples:]
126
+
127
+ # Duplicate mono channel for stereo
128
+ if output_channels == 2:
129
+ samples = np.vstack((samples, samples)).reshape((-1,), order='F')
130
+
131
+ samples = samples.reshape(1, -1)
132
+
133
+ layout = 'stereo' if output_channels == 2 else 'mono'
134
+ new_frame = av.AudioFrame.from_ndarray(samples, format='s16', layout=layout)
135
+ new_frame.sample_rate = old_frame.sample_rate
136
+ new_frame.pts = old_frame.pts
137
+ return new_frame
138
+ except Exception as e:
139
+ print (e)
140
+ traceback.print_exc()
141
+ raise(e)
142
+
143
+
144
+
145
+ def audio_frame_callback(old_frame: av.AudioFrame) -> av.AudioFrame:
146
+
147
+ global debug_frame_idx, debug_frames
148
+
149
+ new_frame = process_frame(old_frame)
150
+
151
+ # print (f"new_frames: {len(new_frames)}, frames: {len(frames)}")
152
+ print (f"frame: {old_frame}, pts: {old_frame.pts}")
153
+ print (f"new_frame: {new_frame}, pts: {new_frame.pts}")
154
+
155
+ return new_frame
156
+ # return old_frame
157
+
158
+
159
+ webrtc_streamer(
160
+ key="delay",
161
+ mode=WebRtcMode.SENDRECV,
162
+ rtc_configuration={"iceServers": get_ice_servers()},
163
+
164
+ video_frame_callback=video_frame_callback,
165
+ audio_frame_callback=audio_frame_callback,
166
+
167
+ )