medicode / lib /medical_transcription /transcription_server.ex
timgremore's picture
wip: feat: Define show screen for transcriptions
deecaf3
raw
history blame
2.91 kB
defmodule MedicalTranscription.TranscriptionServer do
@moduledoc """
GenServer responsible for transcribing audio files
"""
use GenServer
def start_link(args) do
GenServer.start_link(__MODULE__, args, [])
end
@impl GenServer
def init(init_arg) do
# NOTE: The rule is: don't do anything slow or risky in your GenServer's init function.
# But that isn't always practical. GenServers have a reasonably elegant solution to this: handle_continue/2.
# We can change our init function to return {:ok, INTIAL_STATE, {:continue, CONTINUE_TYPE}} which will both
# unblock the initialization and guarantee that handle_continue/2 is called before any other message is processed.
#
# Explanation from: https://www.openmymind.net/Elixir-A-Little-Beyond-The-Basics-Part-8-genservers
{:ok, init_arg, {:continue, :start}}
end
@impl GenServer
def handle_continue(:start, [transcription: transcription] = state) do
stream_transcription_and_search(transcription.filename)
{:noreply, state}
end
@impl GenServer
def handle_info({:chunk, _result}, state) do
{:noreply, state}
end
def handle_info({:summary, _result}, state) do
{:noreply, state}
end
def handle_info(:finished, _state) do
{:stop, :shutdown, "Transcription finished"}
end
# Ideas for future exploration:
# - Instead of storing the long description vectors in a binary file on disk, we could store them within a vector DB
# (such as pgvector or Pinecone.io)
# - A potential improvement would be to not code each chunk of transcribed audio separately, but to instead gather
# complete sentences based on punctuation. We may want to suggest codes for the entire audio as a single piece as
# well
defp stream_transcription_and_search(audio_file_path) do
pid = self()
# audio transcription + semantic search
summary_text =
MedicalTranscription.TranscriptionServing
|> Nx.Serving.batched_run({:file, audio_file_path})
|> Stream.with_index()
|> Stream.map(fn {chunk, index} ->
send_result(:chunk, chunk, index + 1, pid)
chunk.text
end)
|> Enum.to_list()
|> Enum.join()
summary_chunk = %{
text: summary_text,
start_timestamp_seconds: nil,
end_timestamp_seconds: nil
}
send_result(:summary, summary_chunk, 0, pid)
send(pid, :finished)
end
defp send_result(status, chunk, index, pid) when status in [:chunk, :summary] do
result = %{
id: index,
start_mark: format_timestamp(chunk.start_timestamp_seconds),
end_mark: format_timestamp(chunk.end_timestamp_seconds),
text: chunk.text
}
send(pid, {status, result})
end
defp format_timestamp(seconds) when is_nil(seconds), do: nil
defp format_timestamp(seconds) do
seconds |> round() |> Time.from_seconds_after_midnight() |> Time.to_string()
end
end