noahsettersten's picture
chore: Rename Transcriber to prepare for a transcription context
4553650
raw
history blame
2.09 kB
defmodule MedicalTranscription.Transcription do
@moduledoc """
Takes a path to an audio file and transcribes it to text. As each chunk is available, it passes it to the `Coding`
context to look for possible matching codes.
"""
alias MedicalTranscription.Coding
defp get_tags_and_send_result(chunk, index, live_view_pid) do
tags = Coding.process_chunk(chunk.text)
result = build_result(index, chunk, tags)
send(live_view_pid, {:transcription_row, result})
end
# Ideas for future exploration:
# - Instead of storing the long description vectors in a binary file on disk, we could store them within a vector DB
# (such as pgvector or Pinecone.io)
# - A potential improvement would be to not code each chunk of transcribed audio separately, but to instead gather
# complete sentences based on punctuation. We may want to suggest codes for the entire audio as a single piece as
# well
def stream_transcription_and_search(live_view_pid, audio_file_path) do
# audio transcription + semantic search
summary_text =
audio_file_path
|> stream_transcription()
|> Enum.reduce("", fn {chunk, index}, acc ->
get_tags_and_send_result(chunk, index, live_view_pid)
acc <> chunk.text
end)
summary_chunk = %{
text: summary_text,
start_timestamp_seconds: nil,
end_timestamp_seconds: nil
}
get_tags_and_send_result(summary_chunk, 0, live_view_pid)
end
defp stream_transcription(audio_file_path) do
MedicalTranscription.TranscriptionServing
|> Nx.Serving.batched_run({:file, audio_file_path})
|> Stream.with_index()
end
defp build_result(index, chunk, tags) do
%{
id: index,
start_mark: format_timestamp(chunk.start_timestamp_seconds),
end_mark: format_timestamp(chunk.end_timestamp_seconds),
text: chunk.text,
tags: tags
}
end
defp format_timestamp(seconds) when is_nil(seconds), do: nil
defp format_timestamp(seconds) do
seconds |> round() |> Time.from_seconds_after_midnight() |> Time.to_string()
end
end