File size: 2,151 Bytes
9fb11ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b21c366
9fb11ce
b21c366
 
9fb11ce
 
 
 
 
b21c366
 
 
 
 
 
9fb11ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b21c366
 
 
 
 
 
 
9fb11ce
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
<!-- livebook:{"app_settings":{"access_type":"public","slug":"whisper-chat"}} -->

# Whisper chat

```elixir
Mix.install(
  [
    {:kino_bumblebee, "~> 0.2.1"},
    {:exla, "~> 0.5.1"}
  ],
  config: [nx: [default_backend: EXLA.Backend]]
)
```

## Section

```elixir
Kino.Markdown.new("""
This chat is open to anyone, be polite and act responsibly. :) Note chat history has been disabled.
""")
```

```elixir
{:ok, model_info} =
  Bumblebee.load_model({:hf, "openai/whisper-small"}, log_params_diff: false)

{:ok, featurizer} = Bumblebee.load_featurizer({:hf, "openai/whisper-small"})
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "openai/whisper-small"})

serving =
  Bumblebee.Audio.speech_to_text(model_info, featurizer, tokenizer,
    max_new_tokens: 100,
    compile: [batch_size: 8],
    defn_options: [compiler: EXLA],
    forced_token_ids: [
      {1, Bumblebee.Tokenizer.token_to_id(tokenizer, "<|pt|>")},
      {2, Bumblebee.Tokenizer.token_to_id(tokenizer, "<|transcribe|>")},
      {3, Bumblebee.Tokenizer.token_to_id(tokenizer, "<|notimestamps|>")}
    ]
  )

Kino.start_child({Nx.Serving, serving: serving, name: WhisperChat})
```

```elixir
audio_input = Kino.Input.audio("Audio", sampling_rate: featurizer.sampling_rate)
name_input = Kino.Input.text("Name")
form = Kino.Control.form([name: name_input, audio: audio_input], submit: "Send")
frame = Kino.Frame.new()

Kino.async_listen(form, fn %{data: %{audio: audio, name: name}, origin: origin} ->
  if audio && name != "" do
    audio =
      audio.data
      |> Nx.from_binary(:f32)
      |> Nx.reshape({:auto, audio.num_channels})
      |> Nx.mean(axes: [1])

    {time, your_func_result} = :timer.tc(&Nx.Serving.batched_run/2, [WhisperChat, audio])

    %{results: [%{text: generated_text}]} = your_func_result

    content =
      Kino.Markdown.new("**#{name}** (in #{time / (1000 * 1000)} seconds) : #{generated_text}")

    Kino.Frame.append(frame, content, temporary: true)
  else
    content = Kino.Markdown.new("*Error! Name and Audio are required*")
    Kino.Frame.append(frame, content, to: origin)
  end
end)

Kino.Layout.grid([frame, form], boxed: true, gap: 16)
```