File size: 2,535 Bytes
408b86f
 
4d87f7f
408b86f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
<!-- livebook:{"app_settings":{"auto_shutdown_ms":5000,"multi_session":true,"slug":"medical-code-transcriber"}} -->

# MediCode

```elixir
Mix.install(
  [
    {:kino_bumblebee, "~> 0.4.0"},
    {:exla, ">= 0.0.0"},
    {:explorer, "~> 0.7.0"},
    {:kino_explorer, "~> 0.1.11"}
  ],
  config: [nx: [default_backend: EXLA.Backend]]
)
```

## Transcribe Audio to Text

### Step 1: Select your audio to transcribe

* First, upload (or record) your audio below.
* Then, run the second cell after the input to transcribe the audio to text.

```elixir
{:ok, model_info} = Bumblebee.load_model({:hf, "openai/whisper-tiny"})
{:ok, featurizer} = Bumblebee.load_featurizer({:hf, "openai/whisper-tiny"})
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "openai/whisper-tiny"})
{:ok, generation_config} = Bumblebee.load_generation_config({:hf, "openai/whisper-tiny"})
generation_config = Bumblebee.configure(generation_config, max_new_tokens: 100)

serving =
  Bumblebee.Audio.speech_to_text_whisper(
    model_info,
    featurizer,
    tokenizer,
    generation_config,
    compile: [batch_size: 4],
    chunk_num_seconds: 30,
    timestamps: :segments,
    stream: true,
    defn_options: [compiler: EXLA]
  )

audio_input = Kino.Input.audio("Audio", sampling_rate: featurizer.sampling_rate)
```

```elixir
chosen_audio = Kino.Input.read(audio_input)

audio =
  chosen_audio.file_ref
  |> Kino.Input.file_path()
  |> File.read!()
  |> Nx.from_binary(:f32)
  |> Nx.reshape({:auto, chosen_audio.num_channels})
  |> Nx.mean(axes: [1])

dataframe =
  Nx.Serving.run(serving, audio)
  |> Enum.reduce([], fn chunk, acc ->
    [start_mark, end_mark] =
      for seconds <- [chunk.start_timestamp_seconds, chunk.end_timestamp_seconds] do
        seconds |> round() |> Time.from_seconds_after_midnight() |> Time.to_string()
      end

    [%{start_mark: start_mark, end_mark: end_mark, text: chunk.text}] ++ acc
  end)
  |> Enum.reverse()
  |> Explorer.DataFrame.new()
```

```elixir
procedure_code_mapping = [
  ["followup visit", "FOLLOWUP"],
  ["cipher drug", "CIPHER"],
  ["catheterization", "CATH"],
  ["ventricularography", "VTR"],
  ["ejection fraction", "FR"]
]

codes_series =
  dataframe
  |> Explorer.DataFrame.pull("text")
  |> Explorer.Series.downcase()
  |> Explorer.Series.transform(fn element ->
    Enum.flat_map(procedure_code_mapping, fn [term, code] ->
      case String.contains?(element, term) do
        true -> [code]
        false -> []
      end
    end)
  end)

dataframe
|> Explorer.DataFrame.put("codes", codes_series)
```