Spaces:

headway
/

medicode

Runtime error

App Files Files Community

medicode / livebooks /using_audio_tagger_library.livemd

noahsettersten

feat: Add sample livebooks

408b86f 7 months ago

raw

history blame

No virus

2.15 kB

	<!-- livebook:{"app_settings":{"auto_shutdown_ms":5000,"multi_session":true,"show_source":true,"slug":"transcriber"}} -->

	# Tag Audio

	```elixir
	Mix.install(
	[
	{:audio_tagger, path: "./development/ml/audio_tagger"},
	{:kino_bumblebee, "~> 0.4.0"},
	{:exla, ">= 0.0.0"},
	{:explorer, "~> 0.7.0"},
	{:kino_explorer, "~> 0.1.11"}
	],
	config: [
	nx: [default_backend: EXLA.Backend]
	# exla: [
	# clients: [
	# cuda: [
	# platform: :cuda,
	# lazy_transfers: :never
	# ]
	# ]
	# ]
	]
	)
	```

	## Step 1: Create Vector Embeddings for ICD-9 Codes

	```elixir
	# Use sentence-transformers/all-MiniLM-L6-v2 to create vectors for each medical code description
	tmpfile = Path.join(System.tmp_dir(), "CMS32_DESC_LONG_SHORT_DX")

	AudioTagger.Vectors.precalculate(tmpfile)
	```

	## Step 2: Transcribe Audio Recording

	```elixir
	# 1 - Prepare model and choose audio file
	featurizer = AudioTagger.Transcriber.prepare_featurizer()

	audio_input = Kino.Input.audio("Audio", sampling_rate: featurizer.sampling_rate)
	```

	```elixir
	# 2 - Transcribe audio recording to text using OpenAI's Whisper model (takes approximately a minute on an M1 Max)
	chosen_audio = Kino.Input.read(audio_input)

	if chosen_audio == nil do
	raise "No file chosen. Please select a file in the widget above."
	end

	file = chosen_audio.file_ref \|> Kino.Input.file_path() \|> File.read!()
	options = [model_name: "openai/whisper-tiny", num_channels: chosen_audio.num_channels]

	transcription_df =
	AudioTagger.Transcriber.transcribe_audio(featurizer, file, options)
	\|> Enum.map(&Function.identity/1)
	\|> Explorer.DataFrame.new()
	```

	## Step 3: Tag Transcribed Audio

	```elixir
	labels_df =
	"#{tmpfile}.csv"
	\|> Explorer.DataFrame.from_csv!(
	dtypes: [
	{"DIAGNOSIS CODE", :string},
	{"LONG DESCRIPTION", :string},
	{"SHORT DESCRIPTION", :string}
	]
	)
	\|> Explorer.DataFrame.select([0, 1, 2])
	\|> Explorer.DataFrame.rename(["code", "long_description", "short_description"])

	tagged_audio =
	transcription_df
	\|> AudioTagger.Classifier.SemanticSearch.tag(
	labels_df,
	"#{tmpfile}.bin"
	)
	```