Spaces:

headway
/

medicode

Runtime error

App Files Files Community

medicode / livebooks /sample_implementation.livemd

timgremore

fix: Replace name with MediCode

4d87f7f 9 months ago

raw

history blame

2.54 kB

	<!-- livebook:{"app_settings":{"auto_shutdown_ms":5000,"multi_session":true,"slug":"medical-code-transcriber"}} -->

	# MediCode

	```elixir
	Mix.install(
	[
	{:kino_bumblebee, "~> 0.4.0"},
	{:exla, ">= 0.0.0"},
	{:explorer, "~> 0.7.0"},
	{:kino_explorer, "~> 0.1.11"}
	],
	config: [nx: [default_backend: EXLA.Backend]]
	)
	```

	## Transcribe Audio to Text

	### Step 1: Select your audio to transcribe

	* First, upload (or record) your audio below.
	* Then, run the second cell after the input to transcribe the audio to text.

	```elixir
	{:ok, model_info} = Bumblebee.load_model({:hf, "openai/whisper-tiny"})
	{:ok, featurizer} = Bumblebee.load_featurizer({:hf, "openai/whisper-tiny"})
	{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "openai/whisper-tiny"})
	{:ok, generation_config} = Bumblebee.load_generation_config({:hf, "openai/whisper-tiny"})
	generation_config = Bumblebee.configure(generation_config, max_new_tokens: 100)

	serving =
	Bumblebee.Audio.speech_to_text_whisper(
	model_info,
	featurizer,
	tokenizer,
	generation_config,
	compile: [batch_size: 4],
	chunk_num_seconds: 30,
	timestamps: :segments,
	stream: true,
	defn_options: [compiler: EXLA]
	)

	audio_input = Kino.Input.audio("Audio", sampling_rate: featurizer.sampling_rate)
	```

	```elixir
	chosen_audio = Kino.Input.read(audio_input)

	audio =
	chosen_audio.file_ref
	\|> Kino.Input.file_path()
	\|> File.read!()
	\|> Nx.from_binary(:f32)
	\|> Nx.reshape({:auto, chosen_audio.num_channels})
	\|> Nx.mean(axes: [1])

	dataframe =
	Nx.Serving.run(serving, audio)
	\|> Enum.reduce([], fn chunk, acc ->
	[start_mark, end_mark] =
	for seconds <- [chunk.start_timestamp_seconds, chunk.end_timestamp_seconds] do
	seconds \|> round() \|> Time.from_seconds_after_midnight() \|> Time.to_string()
	end

	[%{start_mark: start_mark, end_mark: end_mark, text: chunk.text}] ++ acc
	end)
	\|> Enum.reverse()
	\|> Explorer.DataFrame.new()
	```

	```elixir
	procedure_code_mapping = [
	["followup visit", "FOLLOWUP"],
	["cipher drug", "CIPHER"],
	["catheterization", "CATH"],
	["ventricularography", "VTR"],
	["ejection fraction", "FR"]
	]

	codes_series =
	dataframe
	\|> Explorer.DataFrame.pull("text")
	\|> Explorer.Series.downcase()
	\|> Explorer.Series.transform(fn element ->
	Enum.flat_map(procedure_code_mapping, fn [term, code] ->
	case String.contains?(element, term) do
	true -> [code]
	false -> []
	end
	end)
	end)

	dataframe
	\|> Explorer.DataFrame.put("codes", codes_series)
	```