File size: 2,535 Bytes
408b86f 4d87f7f 408b86f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
<!-- livebook:{"app_settings":{"auto_shutdown_ms":5000,"multi_session":true,"slug":"medical-code-transcriber"}} -->
# MediCode
```elixir
Mix.install(
[
{:kino_bumblebee, "~> 0.4.0"},
{:exla, ">= 0.0.0"},
{:explorer, "~> 0.7.0"},
{:kino_explorer, "~> 0.1.11"}
],
config: [nx: [default_backend: EXLA.Backend]]
)
```
## Transcribe Audio to Text
### Step 1: Select your audio to transcribe
* First, upload (or record) your audio below.
* Then, run the second cell after the input to transcribe the audio to text.
```elixir
{:ok, model_info} = Bumblebee.load_model({:hf, "openai/whisper-tiny"})
{:ok, featurizer} = Bumblebee.load_featurizer({:hf, "openai/whisper-tiny"})
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "openai/whisper-tiny"})
{:ok, generation_config} = Bumblebee.load_generation_config({:hf, "openai/whisper-tiny"})
generation_config = Bumblebee.configure(generation_config, max_new_tokens: 100)
serving =
Bumblebee.Audio.speech_to_text_whisper(
model_info,
featurizer,
tokenizer,
generation_config,
compile: [batch_size: 4],
chunk_num_seconds: 30,
timestamps: :segments,
stream: true,
defn_options: [compiler: EXLA]
)
audio_input = Kino.Input.audio("Audio", sampling_rate: featurizer.sampling_rate)
```
```elixir
chosen_audio = Kino.Input.read(audio_input)
audio =
chosen_audio.file_ref
|> Kino.Input.file_path()
|> File.read!()
|> Nx.from_binary(:f32)
|> Nx.reshape({:auto, chosen_audio.num_channels})
|> Nx.mean(axes: [1])
dataframe =
Nx.Serving.run(serving, audio)
|> Enum.reduce([], fn chunk, acc ->
[start_mark, end_mark] =
for seconds <- [chunk.start_timestamp_seconds, chunk.end_timestamp_seconds] do
seconds |> round() |> Time.from_seconds_after_midnight() |> Time.to_string()
end
[%{start_mark: start_mark, end_mark: end_mark, text: chunk.text}] ++ acc
end)
|> Enum.reverse()
|> Explorer.DataFrame.new()
```
```elixir
procedure_code_mapping = [
["followup visit", "FOLLOWUP"],
["cipher drug", "CIPHER"],
["catheterization", "CATH"],
["ventricularography", "VTR"],
["ejection fraction", "FR"]
]
codes_series =
dataframe
|> Explorer.DataFrame.pull("text")
|> Explorer.Series.downcase()
|> Explorer.Series.transform(fn element ->
Enum.flat_map(procedure_code_mapping, fn [term, code] ->
case String.contains?(element, term) do
true -> [code]
false -> []
end
end)
end)
dataframe
|> Explorer.DataFrame.put("codes", codes_series)
```
|