whitphx HF staff commited on
Commit
ae6b0ae
·
1 Parent(s): ce881eb

Upgrade streamlit-webrtc to 0.40.0 and update app.py to use class-less callbacks

Browse files
Files changed (2) hide show
  1. app.py +244 -292
  2. requirements.txt +1 -1
app.py CHANGED
@@ -4,12 +4,7 @@ import queue
4
  import threading
5
  import urllib.request
6
  from pathlib import Path
7
- from typing import List, NamedTuple
8
-
9
- try:
10
- from typing import Literal
11
- except ImportError:
12
- from typing_extensions import Literal # type: ignore
13
 
14
  import av
15
  import cv2
@@ -20,12 +15,12 @@ import streamlit as st
20
  from aiortc.contrib.media import MediaPlayer
21
 
22
  from streamlit_webrtc import (
23
- AudioProcessorBase,
24
  RTCConfiguration,
25
- VideoProcessorBase,
26
  WebRtcMode,
 
27
  webrtc_streamer,
28
  )
 
29
 
30
  HERE = Path(__file__).parent
31
 
@@ -86,63 +81,29 @@ RTC_CONFIGURATION = RTCConfiguration(
86
  def main():
87
  st.header("WebRTC demo")
88
 
89
- object_detection_page = "Real time object detection (sendrecv)"
90
- video_filters_page = (
91
- "Real time video transform with simple OpenCV filters (sendrecv)"
92
- )
93
- audio_filter_page = "Real time audio filter (sendrecv)"
94
- delayed_echo_page = "Delayed echo (sendrecv)"
95
- streaming_page = (
96
- "Consuming media files on server-side and streaming it to browser (recvonly)"
97
- )
98
- video_sendonly_page = (
99
- "WebRTC is sendonly and images are shown via st.image() (sendonly)"
100
- )
101
- audio_sendonly_page = (
102
- "WebRTC is sendonly and audio frames are visualized with matplotlib (sendonly)"
103
- )
104
- loopback_page = "Simple video and audio loopback (sendrecv)"
105
- media_constraints_page = (
106
- "Configure media constraints and HTML element styles with loopback (sendrecv)"
107
- )
108
- programatically_control_page = "Control the playing state programatically"
109
- app_mode = st.sidebar.selectbox(
110
  "Choose the app mode",
111
- [
112
- object_detection_page,
113
- video_filters_page,
114
- audio_filter_page,
115
- delayed_echo_page,
116
- streaming_page,
117
- video_sendonly_page,
118
- audio_sendonly_page,
119
- loopback_page,
120
- media_constraints_page,
121
- programatically_control_page,
122
- ],
123
  )
124
- st.subheader(app_mode)
125
-
126
- if app_mode == video_filters_page:
127
- app_video_filters()
128
- elif app_mode == object_detection_page:
129
- app_object_detection()
130
- elif app_mode == audio_filter_page:
131
- app_audio_filter()
132
- elif app_mode == delayed_echo_page:
133
- app_delayed_echo()
134
- elif app_mode == streaming_page:
135
- app_streaming()
136
- elif app_mode == video_sendonly_page:
137
- app_sendonly_video()
138
- elif app_mode == audio_sendonly_page:
139
- app_sendonly_audio()
140
- elif app_mode == loopback_page:
141
- app_loopback()
142
- elif app_mode == media_constraints_page:
143
- app_media_constraints()
144
- elif app_mode == programatically_control_page:
145
- app_programatically_play()
146
 
147
  st.sidebar.markdown(
148
  """
@@ -159,70 +120,61 @@ def main():
159
 
160
 
161
  def app_loopback():
162
- """ Simple video loopback """
163
  webrtc_streamer(key="loopback")
164
 
165
 
166
  def app_video_filters():
167
- """ Video transforms with OpenCV """
168
-
169
- class OpenCVVideoProcessor(VideoProcessorBase):
170
- type: Literal["noop", "cartoon", "edges", "rotate"]
171
-
172
- def __init__(self) -> None:
173
- self.type = "noop"
174
-
175
- def recv(self, frame: av.VideoFrame) -> av.VideoFrame:
176
- img = frame.to_ndarray(format="bgr24")
177
-
178
- if self.type == "noop":
179
- pass
180
- elif self.type == "cartoon":
181
- # prepare color
182
- img_color = cv2.pyrDown(cv2.pyrDown(img))
183
- for _ in range(6):
184
- img_color = cv2.bilateralFilter(img_color, 9, 9, 7)
185
- img_color = cv2.pyrUp(cv2.pyrUp(img_color))
186
-
187
- # prepare edges
188
- img_edges = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
189
- img_edges = cv2.adaptiveThreshold(
190
- cv2.medianBlur(img_edges, 7),
191
- 255,
192
- cv2.ADAPTIVE_THRESH_MEAN_C,
193
- cv2.THRESH_BINARY,
194
- 9,
195
- 2,
196
- )
197
- img_edges = cv2.cvtColor(img_edges, cv2.COLOR_GRAY2RGB)
198
 
199
- # combine color and edges
200
- img = cv2.bitwise_and(img_color, img_edges)
201
- elif self.type == "edges":
202
- # perform edge detection
203
- img = cv2.cvtColor(cv2.Canny(img, 100, 200), cv2.COLOR_GRAY2BGR)
204
- elif self.type == "rotate":
205
- # rotate image
206
- rows, cols, _ = img.shape
207
- M = cv2.getRotationMatrix2D((cols / 2, rows / 2), frame.time * 45, 1)
208
- img = cv2.warpAffine(img, M, (cols, rows))
209
 
210
- return av.VideoFrame.from_ndarray(img, format="bgr24")
211
 
212
- webrtc_ctx = webrtc_streamer(
213
  key="opencv-filter",
214
  mode=WebRtcMode.SENDRECV,
215
  rtc_configuration=RTC_CONFIGURATION,
216
- video_processor_factory=OpenCVVideoProcessor,
217
  media_stream_constraints={"video": True, "audio": False},
218
  async_processing=True,
219
  )
220
 
221
- if webrtc_ctx.video_processor:
222
- webrtc_ctx.video_processor.type = st.radio(
223
- "Select transform type", ("noop", "cartoon", "edges", "rotate")
224
- )
225
-
226
  st.markdown(
227
  "This demo is based on "
228
  "https://github.com/aiortc/aiortc/blob/2362e6d1f0c730a0f8c387bbea76546775ad2fe8/examples/server/server.py#L34. " # noqa: E501
@@ -231,80 +183,67 @@ def app_video_filters():
231
 
232
 
233
  def app_audio_filter():
234
- DEFAULT_GAIN = 1.0
235
-
236
- class AudioProcessor(AudioProcessorBase):
237
- gain = DEFAULT_GAIN
238
-
239
- def recv(self, frame: av.AudioFrame) -> av.AudioFrame:
240
- raw_samples = frame.to_ndarray()
241
- sound = pydub.AudioSegment(
242
- data=raw_samples.tobytes(),
243
- sample_width=frame.format.bytes,
244
- frame_rate=frame.sample_rate,
245
- channels=len(frame.layout.channels),
246
- )
247
 
248
- sound = sound.apply_gain(self.gain)
249
 
250
- # Ref: https://github.com/jiaaro/pydub/blob/master/API.markdown#audiosegmentget_array_of_samples # noqa
251
- channel_sounds = sound.split_to_mono()
252
- channel_samples = [s.get_array_of_samples() for s in channel_sounds]
253
- new_samples: np.ndarray = np.array(channel_samples).T
254
- new_samples = new_samples.reshape(raw_samples.shape)
255
 
256
- new_frame = av.AudioFrame.from_ndarray(
257
- new_samples, layout=frame.layout.name
258
- )
259
- new_frame.sample_rate = frame.sample_rate
260
- return new_frame
261
 
262
- webrtc_ctx = webrtc_streamer(
263
  key="audio-filter",
264
  mode=WebRtcMode.SENDRECV,
265
  rtc_configuration=RTC_CONFIGURATION,
266
- audio_processor_factory=AudioProcessor,
267
  async_processing=True,
268
  )
269
 
270
- if webrtc_ctx.audio_processor:
271
- webrtc_ctx.audio_processor.gain = st.slider(
272
- "Gain", -10.0, +20.0, DEFAULT_GAIN, 0.05
273
- )
274
-
275
 
276
  def app_delayed_echo():
277
- DEFAULT_DELAY = 1.0
278
-
279
- class VideoProcessor(VideoProcessorBase):
280
- delay = DEFAULT_DELAY
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
 
282
- async def recv_queued(self, frames: List[av.VideoFrame]) -> List[av.VideoFrame]:
283
- logger.debug("Delay:", self.delay)
284
- await asyncio.sleep(self.delay)
285
- return frames
286
-
287
- class AudioProcessor(AudioProcessorBase):
288
- delay = DEFAULT_DELAY
289
-
290
- async def recv_queued(self, frames: List[av.AudioFrame]) -> List[av.AudioFrame]:
291
- await asyncio.sleep(self.delay)
292
- return frames
293
-
294
- webrtc_ctx = webrtc_streamer(
295
  key="delay",
296
  mode=WebRtcMode.SENDRECV,
297
  rtc_configuration=RTC_CONFIGURATION,
298
- video_processor_factory=VideoProcessor,
299
- audio_processor_factory=AudioProcessor,
300
  async_processing=True,
301
  )
302
 
303
- if webrtc_ctx.video_processor and webrtc_ctx.audio_processor:
304
- delay = st.slider("Delay", 0.0, 5.0, DEFAULT_DELAY, 0.05)
305
- webrtc_ctx.video_processor.delay = delay
306
- webrtc_ctx.audio_processor.delay = delay
307
-
308
 
309
  def app_object_detection():
310
  """Object detection demo with MobileNet SSD.
@@ -339,7 +278,12 @@ def app_object_detection():
339
  "train",
340
  "tvmonitor",
341
  ]
342
- COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
 
 
 
 
 
343
 
344
  download_file(MODEL_URL, MODEL_LOCAL_PATH, expected_size=23147564)
345
  download_file(PROTOTXT_URL, PROTOTXT_LOCAL_PATH, expected_size=29353)
@@ -350,80 +294,79 @@ def app_object_detection():
350
  name: str
351
  prob: float
352
 
353
- class MobileNetSSDVideoProcessor(VideoProcessorBase):
354
- confidence_threshold: float
355
- result_queue: "queue.Queue[List[Detection]]"
 
 
356
 
357
- def __init__(self) -> None:
358
- self._net = cv2.dnn.readNetFromCaffe(
359
- str(PROTOTXT_LOCAL_PATH), str(MODEL_LOCAL_PATH)
360
- )
361
- self.confidence_threshold = DEFAULT_CONFIDENCE_THRESHOLD
362
- self.result_queue = queue.Queue()
363
-
364
- def _annotate_image(self, image, detections):
365
- # loop over the detections
366
- (h, w) = image.shape[:2]
367
- result: List[Detection] = []
368
- for i in np.arange(0, detections.shape[2]):
369
- confidence = detections[0, 0, i, 2]
370
-
371
- if confidence > self.confidence_threshold:
372
- # extract the index of the class label from the `detections`,
373
- # then compute the (x, y)-coordinates of the bounding box for
374
- # the object
375
- idx = int(detections[0, 0, i, 1])
376
- box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
377
- (startX, startY, endX, endY) = box.astype("int")
378
-
379
- name = CLASSES[idx]
380
- result.append(Detection(name=name, prob=float(confidence)))
381
-
382
- # display the prediction
383
- label = f"{name}: {round(confidence * 100, 2)}%"
384
- cv2.rectangle(image, (startX, startY), (endX, endY), COLORS[idx], 2)
385
- y = startY - 15 if startY - 15 > 15 else startY + 15
386
- cv2.putText(
387
- image,
388
- label,
389
- (startX, y),
390
- cv2.FONT_HERSHEY_SIMPLEX,
391
- 0.5,
392
- COLORS[idx],
393
- 2,
394
- )
395
- return image, result
396
 
397
- def recv(self, frame: av.VideoFrame) -> av.VideoFrame:
398
- image = frame.to_ndarray(format="bgr24")
399
- blob = cv2.dnn.blobFromImage(
400
- cv2.resize(image, (300, 300)), 0.007843, (300, 300), 127.5
401
- )
402
- self._net.setInput(blob)
403
- detections = self._net.forward()
404
- annotated_image, result = self._annotate_image(image, detections)
405
 
406
- # NOTE: This `recv` method is called in another thread,
407
- # so it must be thread-safe.
408
- self.result_queue.put(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
409
 
410
- return av.VideoFrame.from_ndarray(annotated_image, format="bgr24")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
411
 
412
  webrtc_ctx = webrtc_streamer(
413
  key="object-detection",
414
  mode=WebRtcMode.SENDRECV,
415
  rtc_configuration=RTC_CONFIGURATION,
416
- video_processor_factory=MobileNetSSDVideoProcessor,
417
  media_stream_constraints={"video": True, "audio": False},
418
  async_processing=True,
419
  )
420
 
421
- confidence_threshold = st.slider(
422
- "Confidence threshold", 0.0, 1.0, DEFAULT_CONFIDENCE_THRESHOLD, 0.05
423
- )
424
- if webrtc_ctx.video_processor:
425
- webrtc_ctx.video_processor.confidence_threshold = confidence_threshold
426
-
427
  if st.checkbox("Show the detected labels", value=True):
428
  if webrtc_ctx.state.playing:
429
  labels_placeholder = st.empty()
@@ -433,16 +376,11 @@ def app_object_detection():
433
  # Then the rendered video frames and the labels displayed here
434
  # are not strictly synchronized.
435
  while True:
436
- if webrtc_ctx.video_processor:
437
- try:
438
- result = webrtc_ctx.video_processor.result_queue.get(
439
- timeout=1.0
440
- )
441
- except queue.Empty:
442
- result = None
443
- labels_placeholder.table(result)
444
- else:
445
- break
446
 
447
  st.markdown(
448
  "This demo uses a model and code from "
@@ -452,7 +390,7 @@ def app_object_detection():
452
 
453
 
454
  def app_streaming():
455
- """ Media streamings """
456
  MEDIAFILES = {
457
  "big_buck_bunny_720p_2mb.mp4 (local)": {
458
  "url": "https://sample-videos.com/video123/mp4/720/big_buck_bunny_720p_2mb.mp4", # noqa: E501
@@ -499,51 +437,54 @@ def app_streaming():
499
  # options={"framerate": "30", "video_size": "1280x720"},
500
  # )
501
 
502
- class OpenCVVideoProcessor(VideoProcessorBase):
503
- type: Literal["noop", "cartoon", "edges", "rotate"]
504
-
505
- def __init__(self) -> None:
506
- self.type = "noop"
507
-
508
- def recv(self, frame: av.VideoFrame) -> av.VideoFrame:
509
- img = frame.to_ndarray(format="bgr24")
510
-
511
- if self.type == "noop":
512
- pass
513
- elif self.type == "cartoon":
514
- # prepare color
515
- img_color = cv2.pyrDown(cv2.pyrDown(img))
516
- for _ in range(6):
517
- img_color = cv2.bilateralFilter(img_color, 9, 9, 7)
518
- img_color = cv2.pyrUp(cv2.pyrUp(img_color))
519
-
520
- # prepare edges
521
- img_edges = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
522
- img_edges = cv2.adaptiveThreshold(
523
- cv2.medianBlur(img_edges, 7),
524
- 255,
525
- cv2.ADAPTIVE_THRESH_MEAN_C,
526
- cv2.THRESH_BINARY,
527
- 9,
528
- 2,
529
- )
530
- img_edges = cv2.cvtColor(img_edges, cv2.COLOR_GRAY2RGB)
 
 
 
531
 
532
- # combine color and edges
533
- img = cv2.bitwise_and(img_color, img_edges)
534
- elif self.type == "edges":
535
- # perform edge detection
536
- img = cv2.cvtColor(cv2.Canny(img, 100, 200), cv2.COLOR_GRAY2BGR)
537
- elif self.type == "rotate":
538
- # rotate image
539
- rows, cols, _ = img.shape
540
- M = cv2.getRotationMatrix2D((cols / 2, rows / 2), frame.time * 45, 1)
541
- img = cv2.warpAffine(img, M, (cols, rows))
542
 
543
- return av.VideoFrame.from_ndarray(img, format="bgr24")
544
 
545
- webrtc_ctx = webrtc_streamer(
546
- key=f"media-streaming-{media_file_label}",
547
  mode=WebRtcMode.RECVONLY,
548
  rtc_configuration=RTC_CONFIGURATION,
549
  media_stream_constraints={
@@ -551,14 +492,9 @@ def app_streaming():
551
  "audio": media_file_info["type"] == "audio",
552
  },
553
  player_factory=create_player,
554
- video_processor_factory=OpenCVVideoProcessor,
555
  )
556
 
557
- if media_file_info["type"] == "video" and webrtc_ctx.video_processor:
558
- webrtc_ctx.video_processor.type = st.radio(
559
- "Select transform type", ("noop", "cartoon", "edges", "rotate")
560
- )
561
-
562
  st.markdown(
563
  "The video filter in this demo is based on "
564
  "https://github.com/aiortc/aiortc/blob/2362e6d1f0c730a0f8c387bbea76546775ad2fe8/examples/server/server.py#L34. " # noqa: E501
@@ -673,7 +609,7 @@ def app_sendonly_audio():
673
 
674
 
675
  def app_media_constraints():
676
- """ A sample to configure MediaStreamConstraints object """
677
  frame_rate = 5
678
  webrtc_streamer(
679
  key="media-constraints",
@@ -692,17 +628,33 @@ def app_media_constraints():
692
 
693
 
694
  def app_programatically_play():
695
- """ A sample of controlling the playing state from Python. """
696
  playing = st.checkbox("Playing", value=True)
697
 
698
  webrtc_streamer(
699
- key="media-constraints",
700
  desired_playing_state=playing,
701
  mode=WebRtcMode.SENDRECV,
702
  rtc_configuration=RTC_CONFIGURATION,
703
  )
704
 
705
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
706
  if __name__ == "__main__":
707
  import os
708
 
 
4
  import threading
5
  import urllib.request
6
  from pathlib import Path
7
+ from typing import List, NamedTuple, Optional
 
 
 
 
 
8
 
9
  import av
10
  import cv2
 
15
  from aiortc.contrib.media import MediaPlayer
16
 
17
  from streamlit_webrtc import (
 
18
  RTCConfiguration,
 
19
  WebRtcMode,
20
+ WebRtcStreamerContext,
21
  webrtc_streamer,
22
  )
23
+ from streamlit_webrtc.session_info import get_session_id
24
 
25
  HERE = Path(__file__).parent
26
 
 
81
  def main():
82
  st.header("WebRTC demo")
83
 
84
+ pages = {
85
+ "Real time object detection (sendrecv)": app_object_detection,
86
+ "Real time video transform with simple OpenCV filters (sendrecv)": app_video_filters, # noqa: E501
87
+ "Real time audio filter (sendrecv)": app_audio_filter,
88
+ "Delayed echo (sendrecv)": app_delayed_echo,
89
+ "Consuming media files on server-side and streaming it to browser (recvonly)": app_streaming, # noqa: E501
90
+ "WebRTC is sendonly and images are shown via st.image() (sendonly)": app_sendonly_video, # noqa: E501
91
+ "WebRTC is sendonly and audio frames are visualized with matplotlib (sendonly)": app_sendonly_audio, # noqa: E501
92
+ "Simple video and audio loopback (sendrecv)": app_loopback,
93
+ "Configure media constraints and HTML element styles with loopback (sendrecv)": app_media_constraints, # noqa: E501
94
+ "Control the playing state programatically": app_programatically_play,
95
+ "Customize UI texts": app_customize_ui_texts,
96
+ }
97
+ page_titles = pages.keys()
98
+
99
+ page_title = st.sidebar.selectbox(
 
 
 
 
 
100
  "Choose the app mode",
101
+ page_titles,
 
 
 
 
 
 
 
 
 
 
 
102
  )
103
+ st.subheader(page_title)
104
+
105
+ page_func = pages[page_title]
106
+ page_func()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  st.sidebar.markdown(
109
  """
 
120
 
121
 
122
  def app_loopback():
123
+ """Simple video loopback"""
124
  webrtc_streamer(key="loopback")
125
 
126
 
127
  def app_video_filters():
128
+ """Video transforms with OpenCV"""
129
+
130
+ _type = st.radio("Select transform type", ("noop", "cartoon", "edges", "rotate"))
131
+
132
+ def callback(frame: av.VideoFrame) -> av.VideoFrame:
133
+ img = frame.to_ndarray(format="bgr24")
134
+
135
+ if _type == "noop":
136
+ pass
137
+ elif _type == "cartoon":
138
+ # prepare color
139
+ img_color = cv2.pyrDown(cv2.pyrDown(img))
140
+ for _ in range(6):
141
+ img_color = cv2.bilateralFilter(img_color, 9, 9, 7)
142
+ img_color = cv2.pyrUp(cv2.pyrUp(img_color))
143
+
144
+ # prepare edges
145
+ img_edges = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
146
+ img_edges = cv2.adaptiveThreshold(
147
+ cv2.medianBlur(img_edges, 7),
148
+ 255,
149
+ cv2.ADAPTIVE_THRESH_MEAN_C,
150
+ cv2.THRESH_BINARY,
151
+ 9,
152
+ 2,
153
+ )
154
+ img_edges = cv2.cvtColor(img_edges, cv2.COLOR_GRAY2RGB)
 
 
 
 
155
 
156
+ # combine color and edges
157
+ img = cv2.bitwise_and(img_color, img_edges)
158
+ elif _type == "edges":
159
+ # perform edge detection
160
+ img = cv2.cvtColor(cv2.Canny(img, 100, 200), cv2.COLOR_GRAY2BGR)
161
+ elif _type == "rotate":
162
+ # rotate image
163
+ rows, cols, _ = img.shape
164
+ M = cv2.getRotationMatrix2D((cols / 2, rows / 2), frame.time * 45, 1)
165
+ img = cv2.warpAffine(img, M, (cols, rows))
166
 
167
+ return av.VideoFrame.from_ndarray(img, format="bgr24")
168
 
169
+ webrtc_streamer(
170
  key="opencv-filter",
171
  mode=WebRtcMode.SENDRECV,
172
  rtc_configuration=RTC_CONFIGURATION,
173
+ video_frame_callback=callback,
174
  media_stream_constraints={"video": True, "audio": False},
175
  async_processing=True,
176
  )
177
 
 
 
 
 
 
178
  st.markdown(
179
  "This demo is based on "
180
  "https://github.com/aiortc/aiortc/blob/2362e6d1f0c730a0f8c387bbea76546775ad2fe8/examples/server/server.py#L34. " # noqa: E501
 
183
 
184
 
185
  def app_audio_filter():
186
+ gain = st.slider("Gain", -10.0, +20.0, 1.0, 0.05)
187
+
188
+ def process_audio(frame: av.AudioFrame) -> av.AudioFrame:
189
+ raw_samples = frame.to_ndarray()
190
+ sound = pydub.AudioSegment(
191
+ data=raw_samples.tobytes(),
192
+ sample_width=frame.format.bytes,
193
+ frame_rate=frame.sample_rate,
194
+ channels=len(frame.layout.channels),
195
+ )
 
 
 
196
 
197
+ sound = sound.apply_gain(gain)
198
 
199
+ # Ref: https://github.com/jiaaro/pydub/blob/master/API.markdown#audiosegmentget_array_of_samples # noqa
200
+ channel_sounds = sound.split_to_mono()
201
+ channel_samples = [s.get_array_of_samples() for s in channel_sounds]
202
+ new_samples: np.ndarray = np.array(channel_samples).T
203
+ new_samples = new_samples.reshape(raw_samples.shape)
204
 
205
+ new_frame = av.AudioFrame.from_ndarray(new_samples, layout=frame.layout.name)
206
+ new_frame.sample_rate = frame.sample_rate
207
+ return new_frame
 
 
208
 
209
+ webrtc_streamer(
210
  key="audio-filter",
211
  mode=WebRtcMode.SENDRECV,
212
  rtc_configuration=RTC_CONFIGURATION,
213
+ audio_frame_callback=process_audio,
214
  async_processing=True,
215
  )
216
 
 
 
 
 
 
217
 
218
  def app_delayed_echo():
219
+ delay = st.slider("Delay", 0.0, 5.0, 1.0, 0.05)
220
+
221
+ async def queued_video_frames_callback(
222
+ frames: List[av.VideoFrame],
223
+ ) -> List[av.VideoFrame]:
224
+ logger.debug("Delay: %f", delay)
225
+ # A standalone `await ...` is interpreted as an expression and
226
+ # the Streamlit magic's target, which leads implicit calls of `st.write`.
227
+ # To prevent it, fix it as `_ = await ...`, a statement.
228
+ # See https://discuss.streamlit.io/t/issue-with-asyncio-run-in-streamlit/7745/15
229
+ _ = await asyncio.sleep(delay)
230
+ return frames
231
+
232
+ async def queued_audio_frames_callback(
233
+ frames: List[av.AudioFrame],
234
+ ) -> List[av.AudioFrame]:
235
+ _ = await asyncio.sleep(delay)
236
+ return frames
237
 
238
+ webrtc_streamer(
 
 
 
 
 
 
 
 
 
 
 
 
239
  key="delay",
240
  mode=WebRtcMode.SENDRECV,
241
  rtc_configuration=RTC_CONFIGURATION,
242
+ queued_video_frames_callback=queued_video_frames_callback,
243
+ queued_audio_frames_callback=queued_audio_frames_callback,
244
  async_processing=True,
245
  )
246
 
 
 
 
 
 
247
 
248
  def app_object_detection():
249
  """Object detection demo with MobileNet SSD.
 
278
  "train",
279
  "tvmonitor",
280
  ]
281
+
282
+ @st.experimental_singleton
283
+ def generate_label_colors():
284
+ return np.random.uniform(0, 255, size=(len(CLASSES), 3))
285
+
286
+ COLORS = generate_label_colors()
287
 
288
  download_file(MODEL_URL, MODEL_LOCAL_PATH, expected_size=23147564)
289
  download_file(PROTOTXT_URL, PROTOTXT_LOCAL_PATH, expected_size=29353)
 
294
  name: str
295
  prob: float
296
 
297
+ @st.cache
298
+ def get_model(
299
+ session_id,
300
+ ): # HACK: Pass session_id as an arg to make the cache session-specific
301
+ return cv2.dnn.readNetFromCaffe(str(PROTOTXT_LOCAL_PATH), str(MODEL_LOCAL_PATH))
302
 
303
+ net = get_model(get_session_id())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
 
305
+ confidence_threshold = st.slider(
306
+ "Confidence threshold", 0.0, 1.0, DEFAULT_CONFIDENCE_THRESHOLD, 0.05
307
+ )
 
 
 
 
 
308
 
309
+ def _annotate_image(image, detections):
310
+ # loop over the detections
311
+ (h, w) = image.shape[:2]
312
+ result: List[Detection] = []
313
+ for i in np.arange(0, detections.shape[2]):
314
+ confidence = detections[0, 0, i, 2]
315
+
316
+ if confidence > confidence_threshold:
317
+ # extract the index of the class label from the `detections`,
318
+ # then compute the (x, y)-coordinates of the bounding box for
319
+ # the object
320
+ idx = int(detections[0, 0, i, 1])
321
+ box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
322
+ (startX, startY, endX, endY) = box.astype("int")
323
+
324
+ name = CLASSES[idx]
325
+ result.append(Detection(name=name, prob=float(confidence)))
326
+
327
+ # display the prediction
328
+ label = f"{name}: {round(confidence * 100, 2)}%"
329
+ cv2.rectangle(image, (startX, startY), (endX, endY), COLORS[idx], 2)
330
+ y = startY - 15 if startY - 15 > 15 else startY + 15
331
+ cv2.putText(
332
+ image,
333
+ label,
334
+ (startX, y),
335
+ cv2.FONT_HERSHEY_SIMPLEX,
336
+ 0.5,
337
+ COLORS[idx],
338
+ 2,
339
+ )
340
+ return image, result
341
 
342
+ result_queue = (
343
+ queue.Queue()
344
+ ) # TODO: A general-purpose shared state object may be more useful.
345
+
346
+ def callback(frame: av.VideoFrame) -> av.VideoFrame:
347
+ image = frame.to_ndarray(format="bgr24")
348
+ blob = cv2.dnn.blobFromImage(
349
+ cv2.resize(image, (300, 300)), 0.007843, (300, 300), 127.5
350
+ )
351
+ net.setInput(blob)
352
+ detections = net.forward()
353
+ annotated_image, result = _annotate_image(image, detections)
354
+
355
+ # NOTE: This `recv` method is called in another thread,
356
+ # so it must be thread-safe.
357
+ result_queue.put(result) # TODO:
358
+
359
+ return av.VideoFrame.from_ndarray(annotated_image, format="bgr24")
360
 
361
  webrtc_ctx = webrtc_streamer(
362
  key="object-detection",
363
  mode=WebRtcMode.SENDRECV,
364
  rtc_configuration=RTC_CONFIGURATION,
365
+ video_frame_callback=callback,
366
  media_stream_constraints={"video": True, "audio": False},
367
  async_processing=True,
368
  )
369
 
 
 
 
 
 
 
370
  if st.checkbox("Show the detected labels", value=True):
371
  if webrtc_ctx.state.playing:
372
  labels_placeholder = st.empty()
 
376
  # Then the rendered video frames and the labels displayed here
377
  # are not strictly synchronized.
378
  while True:
379
+ try:
380
+ result = result_queue.get(timeout=1.0)
381
+ except queue.Empty:
382
+ result = None
383
+ labels_placeholder.table(result)
 
 
 
 
 
384
 
385
  st.markdown(
386
  "This demo uses a model and code from "
 
390
 
391
 
392
  def app_streaming():
393
+ """Media streamings"""
394
  MEDIAFILES = {
395
  "big_buck_bunny_720p_2mb.mp4 (local)": {
396
  "url": "https://sample-videos.com/video123/mp4/720/big_buck_bunny_720p_2mb.mp4", # noqa: E501
 
437
  # options={"framerate": "30", "video_size": "1280x720"},
438
  # )
439
 
440
+ key = f"media-streaming-{media_file_label}"
441
+ ctx: Optional[WebRtcStreamerContext] = st.session_state.get(key)
442
+ if media_file_info["type"] == "video" and ctx and ctx.state.playing:
443
+ _type = st.radio(
444
+ "Select transform type", ("noop", "cartoon", "edges", "rotate")
445
+ )
446
+ else:
447
+ _type = "noop"
448
+
449
+ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
450
+ img = frame.to_ndarray(format="bgr24")
451
+
452
+ if _type == "noop":
453
+ pass
454
+ elif _type == "cartoon":
455
+ # prepare color
456
+ img_color = cv2.pyrDown(cv2.pyrDown(img))
457
+ for _ in range(6):
458
+ img_color = cv2.bilateralFilter(img_color, 9, 9, 7)
459
+ img_color = cv2.pyrUp(cv2.pyrUp(img_color))
460
+
461
+ # prepare edges
462
+ img_edges = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
463
+ img_edges = cv2.adaptiveThreshold(
464
+ cv2.medianBlur(img_edges, 7),
465
+ 255,
466
+ cv2.ADAPTIVE_THRESH_MEAN_C,
467
+ cv2.THRESH_BINARY,
468
+ 9,
469
+ 2,
470
+ )
471
+ img_edges = cv2.cvtColor(img_edges, cv2.COLOR_GRAY2RGB)
472
 
473
+ # combine color and edges
474
+ img = cv2.bitwise_and(img_color, img_edges)
475
+ elif _type == "edges":
476
+ # perform edge detection
477
+ img = cv2.cvtColor(cv2.Canny(img, 100, 200), cv2.COLOR_GRAY2BGR)
478
+ elif _type == "rotate":
479
+ # rotate image
480
+ rows, cols, _ = img.shape
481
+ M = cv2.getRotationMatrix2D((cols / 2, rows / 2), frame.time * 45, 1)
482
+ img = cv2.warpAffine(img, M, (cols, rows))
483
 
484
+ return av.VideoFrame.from_ndarray(img, format="bgr24")
485
 
486
+ webrtc_streamer(
487
+ key=key,
488
  mode=WebRtcMode.RECVONLY,
489
  rtc_configuration=RTC_CONFIGURATION,
490
  media_stream_constraints={
 
492
  "audio": media_file_info["type"] == "audio",
493
  },
494
  player_factory=create_player,
495
+ video_frame_callback=video_frame_callback,
496
  )
497
 
 
 
 
 
 
498
  st.markdown(
499
  "The video filter in this demo is based on "
500
  "https://github.com/aiortc/aiortc/blob/2362e6d1f0c730a0f8c387bbea76546775ad2fe8/examples/server/server.py#L34. " # noqa: E501
 
609
 
610
 
611
  def app_media_constraints():
612
+ """A sample to configure MediaStreamConstraints object"""
613
  frame_rate = 5
614
  webrtc_streamer(
615
  key="media-constraints",
 
628
 
629
 
630
  def app_programatically_play():
631
+ """A sample of controlling the playing state from Python."""
632
  playing = st.checkbox("Playing", value=True)
633
 
634
  webrtc_streamer(
635
+ key="programatic_control",
636
  desired_playing_state=playing,
637
  mode=WebRtcMode.SENDRECV,
638
  rtc_configuration=RTC_CONFIGURATION,
639
  )
640
 
641
 
642
+ def app_customize_ui_texts():
643
+ webrtc_streamer(
644
+ key="custom_ui_texts",
645
+ rtc_configuration=RTC_CONFIGURATION,
646
+ translations={
647
+ "start": "開始",
648
+ "stop": "停止",
649
+ "select_device": "デバイス選択",
650
+ "media_api_not_available": "Media APIが利用できない環境です",
651
+ "device_ask_permission": "メディアデバイスへのアクセスを許可してください",
652
+ "device_not_available": "メディアデバイスを利用できません",
653
+ "device_access_denied": "メディアデバイスへのアクセスが拒否されました",
654
+ },
655
+ )
656
+
657
+
658
  if __name__ == "__main__":
659
  import os
660
 
requirements.txt CHANGED
@@ -4,6 +4,6 @@ numpy==1.22.3
4
  opencv-python-headless==4.5.5.64
5
  pydub==0.25.1
6
  streamlit==1.9.0
7
- streamlit_webrtc==0.37.0
8
  typing_extensions==4.1.1
9
  protobuf~=3.19.0
 
4
  opencv-python-headless==4.5.5.64
5
  pydub==0.25.1
6
  streamlit==1.9.0
7
+ streamlit_webrtc==0.40.0
8
  typing_extensions==4.1.1
9
  protobuf~=3.19.0