|
<!-- livebook:{"app_settings":{"auto_shutdown_ms":5000,"multi_session":true,"show_source":true,"slug":"transcriber"}} --> |
|
|
|
|
|
|
|
```elixir |
|
Mix.install( |
|
[ |
|
{:audio_tagger, path: "./development/ml/audio_tagger"}, |
|
{:kino_bumblebee, "~> 0.4.0"}, |
|
{:exla, ">= 0.0.0"}, |
|
{:explorer, "~> 0.7.0"}, |
|
{:kino_explorer, "~> 0.1.11"} |
|
], |
|
config: [ |
|
nx: [default_backend: EXLA.Backend] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
] |
|
) |
|
``` |
|
|
|
|
|
|
|
```elixir |
|
|
|
tmpfile = Path.join(System.tmp_dir(), "CMS32_DESC_LONG_SHORT_DX") |
|
|
|
AudioTagger.Vectors.precalculate(tmpfile) |
|
``` |
|
|
|
|
|
|
|
```elixir |
|
|
|
featurizer = AudioTagger.Transcriber.prepare_featurizer() |
|
|
|
audio_input = Kino.Input.audio("Audio", sampling_rate: featurizer.sampling_rate) |
|
``` |
|
|
|
```elixir |
|
|
|
chosen_audio = Kino.Input.read(audio_input) |
|
|
|
if chosen_audio == nil do |
|
raise "No file chosen. Please select a file in the widget above." |
|
end |
|
|
|
file = chosen_audio.file_ref |> Kino.Input.file_path() |> File.read!() |
|
options = [model_name: "openai/whisper-tiny", num_channels: chosen_audio.num_channels] |
|
|
|
transcription_df = |
|
AudioTagger.Transcriber.transcribe_audio(featurizer, file, options) |
|
|> Enum.map(&Function.identity/1) |
|
|> Explorer.DataFrame.new() |
|
``` |
|
|
|
|
|
|
|
```elixir |
|
labels_df = |
|
"#{tmpfile}.csv" |
|
|> Explorer.DataFrame.from_csv!( |
|
dtypes: [ |
|
{"DIAGNOSIS CODE", :string}, |
|
{"LONG DESCRIPTION", :string}, |
|
{"SHORT DESCRIPTION", :string} |
|
] |
|
) |
|
|> Explorer.DataFrame.select([0, 1, 2]) |
|
|> Explorer.DataFrame.rename(["code", "long_description", "short_description"]) |
|
|
|
tagged_audio = |
|
transcription_df |
|
|> AudioTagger.Classifier.SemanticSearch.tag( |
|
labels_df, |
|
"#{tmpfile}.bin" |
|
) |
|
``` |
|
|