# Medical Code Transcriber ```elixir Mix.install( [ {:kino_bumblebee, "~> 0.4.0"}, {:exla, ">= 0.0.0"}, {:explorer, "~> 0.7.0"}, {:kino_explorer, "~> 0.1.11"} ], config: [nx: [default_backend: EXLA.Backend]] ) ``` ## Transcribe Audio to Text ### Step 1: Select your audio to transcribe * First, upload (or record) your audio below. * Then, run the second cell after the input to transcribe the audio to text. ```elixir {:ok, model_info} = Bumblebee.load_model({:hf, "openai/whisper-tiny"}) {:ok, featurizer} = Bumblebee.load_featurizer({:hf, "openai/whisper-tiny"}) {:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "openai/whisper-tiny"}) {:ok, generation_config} = Bumblebee.load_generation_config({:hf, "openai/whisper-tiny"}) generation_config = Bumblebee.configure(generation_config, max_new_tokens: 100) serving = Bumblebee.Audio.speech_to_text_whisper( model_info, featurizer, tokenizer, generation_config, compile: [batch_size: 4], chunk_num_seconds: 30, timestamps: :segments, stream: true, defn_options: [compiler: EXLA] ) audio_input = Kino.Input.audio("Audio", sampling_rate: featurizer.sampling_rate) ``` ```elixir chosen_audio = Kino.Input.read(audio_input) audio = chosen_audio.file_ref |> Kino.Input.file_path() |> File.read!() |> Nx.from_binary(:f32) |> Nx.reshape({:auto, chosen_audio.num_channels}) |> Nx.mean(axes: [1]) dataframe = Nx.Serving.run(serving, audio) |> Enum.reduce([], fn chunk, acc -> [start_mark, end_mark] = for seconds <- [chunk.start_timestamp_seconds, chunk.end_timestamp_seconds] do seconds |> round() |> Time.from_seconds_after_midnight() |> Time.to_string() end [%{start_mark: start_mark, end_mark: end_mark, text: chunk.text}] ++ acc end) |> Enum.reverse() |> Explorer.DataFrame.new() ``` ```elixir procedure_code_mapping = [ ["followup visit", "FOLLOWUP"], ["cipher drug", "CIPHER"], ["catheterization", "CATH"], ["ventricularography", "VTR"], ["ejection fraction", "FR"] ] codes_series = dataframe |> Explorer.DataFrame.pull("text") |> Explorer.Series.downcase() |> Explorer.Series.transform(fn element -> Enum.flat_map(procedure_code_mapping, fn [term, code] -> case String.contains?(element, term) do true -> [code] false -> [] end end) end) dataframe |> Explorer.DataFrame.put("codes", codes_series) ```