Spaces:

headway
/

medicode

Runtime error

App Files Files Community

noahsettersten commited on Jan 11

Commit

408b86f

•

1 Parent(s): 0a2902e

feat: Add sample livebooks

Browse files

Brought in from Hugging Face space at https://huggingface.co/spaces/headway/medical-code-transcriber/tree/main/public-apps

Files changed (2) hide show

livebooks/sample_implementation.livemd +96 -0
livebooks/using_audio_tagger_library.livemd +84 -0

livebooks/sample_implementation.livemd ADDED Viewed

	@@ -0,0 +1,96 @@

+<!-- livebook:{"app_settings":{"auto_shutdown_ms":5000,"multi_session":true,"slug":"medical-code-transcriber"}} -->
+# Medical Code Transcriber
+```elixir
+Mix.install(
+  [
+    {:kino_bumblebee, "~> 0.4.0"},
+    {:exla, ">= 0.0.0"},
+    {:explorer, "~> 0.7.0"},
+    {:kino_explorer, "~> 0.1.11"}
+  ],
+  config: [nx: [default_backend: EXLA.Backend]]
+)
+```
+## Transcribe Audio to Text
+### Step 1: Select your audio to transcribe
+* First, upload (or record) your audio below.
+* Then, run the second cell after the input to transcribe the audio to text.
+```elixir
+{:ok, model_info} = Bumblebee.load_model({:hf, "openai/whisper-tiny"})
+{:ok, featurizer} = Bumblebee.load_featurizer({:hf, "openai/whisper-tiny"})
+{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "openai/whisper-tiny"})
+{:ok, generation_config} = Bumblebee.load_generation_config({:hf, "openai/whisper-tiny"})
+generation_config = Bumblebee.configure(generation_config, max_new_tokens: 100)
+serving =
+  Bumblebee.Audio.speech_to_text_whisper(
+    model_info,
+    featurizer,
+    tokenizer,
+    generation_config,
+    compile: [batch_size: 4],
+    chunk_num_seconds: 30,
+    timestamps: :segments,
+    stream: true,
+    defn_options: [compiler: EXLA]
+  )
+audio_input = Kino.Input.audio("Audio", sampling_rate: featurizer.sampling_rate)
+```
+```elixir
+chosen_audio = Kino.Input.read(audio_input)
+audio =
+  chosen_audio.file_ref
+  |> Kino.Input.file_path()
+  |> File.read!()
+  |> Nx.from_binary(:f32)
+  |> Nx.reshape({:auto, chosen_audio.num_channels})
+  |> Nx.mean(axes: [1])
+dataframe =
+  Nx.Serving.run(serving, audio)
+  |> Enum.reduce([], fn chunk, acc ->
+    [start_mark, end_mark] =
+      for seconds <- [chunk.start_timestamp_seconds, chunk.end_timestamp_seconds] do
+        seconds |> round() |> Time.from_seconds_after_midnight() |> Time.to_string()
+      end
+    [%{start_mark: start_mark, end_mark: end_mark, text: chunk.text}] ++ acc
+  end)
+  |> Enum.reverse()
+  |> Explorer.DataFrame.new()
+```
+```elixir
+procedure_code_mapping = [
+  ["followup visit", "FOLLOWUP"],
+  ["cipher drug", "CIPHER"],
+  ["catheterization", "CATH"],
+  ["ventricularography", "VTR"],
+  ["ejection fraction", "FR"]
+]
+codes_series =
+  dataframe
+  |> Explorer.DataFrame.pull("text")
+  |> Explorer.Series.downcase()
+  |> Explorer.Series.transform(fn element ->
+    Enum.flat_map(procedure_code_mapping, fn [term, code] ->
+      case String.contains?(element, term) do
+        true -> [code]
+        false -> []
+      end
+    end)
+  end)
+dataframe
+|> Explorer.DataFrame.put("codes", codes_series)
+```

livebooks/using_audio_tagger_library.livemd ADDED Viewed

	@@ -0,0 +1,84 @@

+<!-- livebook:{"app_settings":{"auto_shutdown_ms":5000,"multi_session":true,"show_source":true,"slug":"transcriber"}} -->
+# Tag Audio
+```elixir
+Mix.install(
+  [
+    {:audio_tagger, path: "./development/ml/audio_tagger"},
+    {:kino_bumblebee, "~> 0.4.0"},
+    {:exla, ">= 0.0.0"},
+    {:explorer, "~> 0.7.0"},
+    {:kino_explorer, "~> 0.1.11"}
+  ],
+  config: [
+    nx: [default_backend: EXLA.Backend]
+    # exla: [
+    #   clients: [
+    #     cuda: [
+    #       platform: :cuda,
+    #       lazy_transfers: :never
+    #     ]
+    #   ]
+    # ]
+  ]
+)
+```
+## Step 1: Create Vector Embeddings for ICD-9 Codes
+```elixir
+# Use sentence-transformers/all-MiniLM-L6-v2 to create vectors for each medical code description
+tmpfile = Path.join(System.tmp_dir(), "CMS32_DESC_LONG_SHORT_DX")
+AudioTagger.Vectors.precalculate(tmpfile)
+```
+## Step 2: Transcribe Audio Recording
+```elixir
+# 1 - Prepare model and choose audio file
+featurizer = AudioTagger.Transcriber.prepare_featurizer()
+audio_input = Kino.Input.audio("Audio", sampling_rate: featurizer.sampling_rate)
+```
+```elixir
+# 2 - Transcribe audio recording to text using OpenAI's Whisper model (takes approximately a minute on an M1 Max)
+chosen_audio = Kino.Input.read(audio_input)
+if chosen_audio == nil do
+  raise "No file chosen. Please select a file in the widget above."
+end
+file = chosen_audio.file_ref |> Kino.Input.file_path() |> File.read!()
+options = [model_name: "openai/whisper-tiny", num_channels: chosen_audio.num_channels]
+transcription_df =
+  AudioTagger.Transcriber.transcribe_audio(featurizer, file, options)
+  |> Enum.map(&Function.identity/1)
+  |> Explorer.DataFrame.new()
+```
+## Step 3: Tag Transcribed Audio
+```elixir
+labels_df =
+  "#{tmpfile}.csv"
+  |> Explorer.DataFrame.from_csv!(
+    dtypes: [
+      {"DIAGNOSIS CODE", :string},
+      {"LONG DESCRIPTION", :string},
+      {"SHORT DESCRIPTION", :string}
+    ]
+  )
+  |> Explorer.DataFrame.select([0, 1, 2])
+  |> Explorer.DataFrame.rename(["code", "long_description", "short_description"])
+tagged_audio =
+  transcription_df
+  |> AudioTagger.Classifier.SemanticSearch.tag(
+    labels_df,
+    "#{tmpfile}.bin"
+  )
+```