defmodule MedicalTranscription.Transcription do @moduledoc """ Takes a path to an audio file and transcribes it to text. """ # Ideas for future exploration: # - Instead of storing the long description vectors in a binary file on disk, we could store them within a vector DB # (such as pgvector or Pinecone.io) # - A potential improvement would be to not code each chunk of transcribed audio separately, but to instead gather # complete sentences based on punctuation. We may want to suggest codes for the entire audio as a single piece as # well def stream_transcription_and_search(live_view_pid, audio_file_path) do # audio transcription + semantic search summary_text = audio_file_path |> stream_transcription() |> Enum.reduce("", fn {chunk, index}, acc -> send_result(chunk, index + 1, live_view_pid) acc <> chunk.text end) summary_chunk = %{ text: summary_text, start_timestamp_seconds: nil, end_timestamp_seconds: nil } send_result(summary_chunk, 0, live_view_pid) end defp stream_transcription(audio_file_path) do MedicalTranscription.TranscriptionServing |> Nx.Serving.batched_run({:file, audio_file_path}) |> Stream.with_index() end defp send_result(chunk, index, live_view_pid) do result = %{ id: index, start_mark: format_timestamp(chunk.start_timestamp_seconds), end_mark: format_timestamp(chunk.end_timestamp_seconds), text: chunk.text } send(live_view_pid, {:transcription_row, result}) end defp format_timestamp(seconds) when is_nil(seconds), do: nil defp format_timestamp(seconds) do seconds |> round() |> Time.from_seconds_after_midnight() |> Time.to_string() end end