defmodule MedicalTranscription.Transcription do
  @moduledoc """
  Takes a path to an audio file and transcribes it to text. As each chunk is available, it passes it to the `Coding`
  context to look for possible matching codes.
  """

  alias MedicalTranscription.Coding

  defp get_tags_and_send_result(chunk, index, live_view_pid) do
    tags = Coding.process_chunk(chunk.text)
    result = build_result(index, chunk, tags)

    send(live_view_pid, {:transcription_row, result})
  end

  # Ideas for future exploration:
  # - Instead of storing the long description vectors in a binary file on disk, we could store them within a vector DB
  #   (such as pgvector or Pinecone.io)
  # - A potential improvement would be to not code each chunk of transcribed audio separately, but to instead gather
  #   complete sentences based on punctuation. We may want to suggest codes for the entire audio as a single piece as
  #   well
  def stream_transcription_and_search(live_view_pid, audio_file_path) do
    # audio transcription + semantic search
    summary_text =
      audio_file_path
      |> stream_transcription()
      |> Enum.reduce("", fn {chunk, index}, acc ->
        get_tags_and_send_result(chunk, index, live_view_pid)

        acc <> chunk.text
      end)

    summary_chunk = %{
      text: summary_text,
      start_timestamp_seconds: nil,
      end_timestamp_seconds: nil
    }

    get_tags_and_send_result(summary_chunk, 0, live_view_pid)
  end

  defp stream_transcription(audio_file_path) do
    MedicalTranscription.TranscriptionServing
    |> Nx.Serving.batched_run({:file, audio_file_path})
    |> Stream.with_index()
  end

  defp build_result(index, chunk, tags) do
    %{
      id: index,
      start_mark: format_timestamp(chunk.start_timestamp_seconds),
      end_mark: format_timestamp(chunk.end_timestamp_seconds),
      text: chunk.text,
      tags: tags
    }
  end

  defp format_timestamp(seconds) when is_nil(seconds), do: nil

  defp format_timestamp(seconds) do
    seconds |> round() |> Time.from_seconds_after_midnight() |> Time.to_string()
  end
end