noahsettersten's picture
refactor: Move code search into TranscriptionTextComponent
3748deb
raw
history blame
1.77 kB
defmodule MedicalTranscription.Transcription do
@moduledoc """
Takes a path to an audio file and transcribes it to text.
"""
# Ideas for future exploration:
# - Instead of storing the long description vectors in a binary file on disk, we could store them within a vector DB
# (such as pgvector or Pinecone.io)
# - A potential improvement would be to not code each chunk of transcribed audio separately, but to instead gather
# complete sentences based on punctuation. We may want to suggest codes for the entire audio as a single piece as
# well
def stream_transcription_and_search(live_view_pid, audio_file_path) do
# audio transcription + semantic search
summary_text =
audio_file_path
|> stream_transcription()
|> Enum.reduce("", fn {chunk, index}, acc ->
send_result(chunk, index + 1, live_view_pid)
acc <> chunk.text
end)
summary_chunk = %{
text: summary_text,
start_timestamp_seconds: nil,
end_timestamp_seconds: nil
}
send_result(summary_chunk, 0, live_view_pid)
end
defp stream_transcription(audio_file_path) do
MedicalTranscription.TranscriptionServing
|> Nx.Serving.batched_run({:file, audio_file_path})
|> Stream.with_index()
end
defp send_result(chunk, index, live_view_pid) do
result = %{
id: index,
start_mark: format_timestamp(chunk.start_timestamp_seconds),
end_mark: format_timestamp(chunk.end_timestamp_seconds),
text: chunk.text
}
send(live_view_pid, {:transcription_row, result})
end
defp format_timestamp(seconds) when is_nil(seconds), do: nil
defp format_timestamp(seconds) do
seconds |> round() |> Time.from_seconds_after_midnight() |> Time.to_string()
end
end