defmodule MedicalTranscription.Transcription do @moduledoc """ Takes a path to an audio file and transcribes it to text. As each chunk is available, it passes it to the `Coding` context to look for possible matching codes. """ alias MedicalTranscription.Coding defp get_tags_and_send_result(chunk, index, live_view_pid) do tags = Coding.process_chunk(chunk.text) result = build_result(index, chunk, tags) send(live_view_pid, {:transcription_row, result}) end # Ideas for future exploration: # - Instead of storing the long description vectors in a binary file on disk, we could store them within a vector DB # (such as pgvector or Pinecone.io) # - A potential improvement would be to not code each chunk of transcribed audio separately, but to instead gather # complete sentences based on punctuation. We may want to suggest codes for the entire audio as a single piece as # well def stream_transcription_and_search(live_view_pid, audio_file_path) do # audio transcription + semantic search summary_text = audio_file_path |> stream_transcription() |> Enum.reduce("", fn {chunk, index}, acc -> get_tags_and_send_result(chunk, index, live_view_pid) acc <> chunk.text end) summary_chunk = %{ text: summary_text, start_timestamp_seconds: nil, end_timestamp_seconds: nil } get_tags_and_send_result(summary_chunk, 0, live_view_pid) end defp stream_transcription(audio_file_path) do MedicalTranscription.TranscriptionServing |> Nx.Serving.batched_run({:file, audio_file_path}) |> Stream.with_index() end defp build_result(index, chunk, tags) do %{ id: index, start_mark: format_timestamp(chunk.start_timestamp_seconds), end_mark: format_timestamp(chunk.end_timestamp_seconds), text: chunk.text, tags: tags } end defp format_timestamp(seconds) when is_nil(seconds), do: nil defp format_timestamp(seconds) do seconds |> round() |> Time.from_seconds_after_midnight() |> Time.to_string() end end