Spaces:
Runtime error
Runtime error
Create nx-vad.livemd
Browse files- public-apps/nx-vad.livemd +85 -0
public-apps/nx-vad.livemd
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!-- livebook:{"app_settings":{"access_type":"public","auto_shutdown_ms":60000,"multi_session":true,"output_type":"rich","show_existing_sessions":false,"show_source":true,"slug":"vad"}} -->
|
2 |
+
|
3 |
+
# Nx Voice-Activity Detection
|
4 |
+
|
5 |
+
```elixir
|
6 |
+
Mix.install([
|
7 |
+
{:ortex, "~> 0.1.9"},
|
8 |
+
{:kino_vega_lite, "~> 0.1.10"},
|
9 |
+
{:kino_live_audio, "~> 0.1"},
|
10 |
+
{:req, "~> 0.4"}
|
11 |
+
])
|
12 |
+
```
|
13 |
+
|
14 |
+
## Setup Model & Plot
|
15 |
+
|
16 |
+
```elixir
|
17 |
+
url = "https://raw.githubusercontent.com/snakers4/silero-vad/master/files/silero_vad.onnx"
|
18 |
+
filename = url |> String.split("/") |> Enum.reverse() |> hd
|
19 |
+
|
20 |
+
resp =
|
21 |
+
Req.get!(
|
22 |
+
url,
|
23 |
+
decode_body: false,
|
24 |
+
into: File.stream!(filename)
|
25 |
+
)
|
26 |
+
|
27 |
+
model = Ortex.load(filename)
|
28 |
+
|
29 |
+
chart =
|
30 |
+
VegaLite.new(title: "Voice-Activated Detection", width: 800, height: 400)
|
31 |
+
|> VegaLite.mark(:line)
|
32 |
+
|> VegaLite.encode_field(:x, "x",
|
33 |
+
type: :quantitative,
|
34 |
+
title: "Time",
|
35 |
+
axis: [ticks: false, domain: false, grid: false, labels: false]
|
36 |
+
)
|
37 |
+
|> VegaLite.encode_field(:y, "y",
|
38 |
+
type: :quantitative,
|
39 |
+
title: "Voice",
|
40 |
+
scale: [domain_max: 1, domain_min: 0]
|
41 |
+
)
|
42 |
+
|> Kino.VegaLite.new()
|
43 |
+
```
|
44 |
+
|
45 |
+
```elixir
|
46 |
+
chunk_size = Kino.Input.text("Chunk Size", default: "1")
|
47 |
+
sample_rate = Kino.Input.text("Sample Rate", default: "16000")
|
48 |
+
|
49 |
+
unit =
|
50 |
+
Kino.Input.select(
|
51 |
+
"Unit",
|
52 |
+
[samples: "Samples", s: "Seconds", ms: "Miliseconds", mu: "Microseconds"],
|
53 |
+
default: :s
|
54 |
+
)
|
55 |
+
|
56 |
+
clear = Kino.Control.button("Clear Plot")
|
57 |
+
clear |> Kino.listen(fn _ -> Kino.VegaLite.clear(chart) end)
|
58 |
+
top_row = Kino.Layout.grid([sample_rate, chunk_size, unit], columns: 3)
|
59 |
+
Kino.Layout.grid([top_row, clear])
|
60 |
+
```
|
61 |
+
|
62 |
+
```elixir
|
63 |
+
liveAudio =
|
64 |
+
KinoLiveAudio.new(
|
65 |
+
chunk_size: Kino.Input.read(chunk_size) |> Integer.parse() |> elem(0),
|
66 |
+
unit: Kino.Input.read(unit),
|
67 |
+
sample_rate: Kino.Input.read(sample_rate) |> Integer.parse() |> elem(0)
|
68 |
+
)
|
69 |
+
```
|
70 |
+
|
71 |
+
```elixir
|
72 |
+
liveAudio
|
73 |
+
|> Kino.Control.stream()
|
74 |
+
|> Kino.listen({Nx.broadcast(0.0, {2, 1, 64}), Nx.broadcast(0.0, {2, 1, 64})}, fn
|
75 |
+
%{event: :audio_chunk, chunk: data}, {hn, cn} ->
|
76 |
+
input = Nx.tensor(data) |> Nx.stack()
|
77 |
+
sr = Nx.tensor(Kino.Input.read(sample_rate) |> Integer.parse() |> elem(0), type: :s64)
|
78 |
+
{input, sr, hn, cn}
|
79 |
+
{output, hn, cn} = Ortex.run(model, {input, sr, hn, cn})
|
80 |
+
[output] = Nx.to_list(output |> Nx.flatten())
|
81 |
+
row = %{x: :os.system_time(), y: output}
|
82 |
+
Kino.VegaLite.push(chart, row, window: 1000)
|
83 |
+
{:cont, {hn, cn}}
|
84 |
+
end)
|
85 |
+
```
|