Spaces:

headway
/

medicode

Runtime error

App Files Files Community

timgremore commited on Feb 6

Commit

34b4725

•

1 Parent(s): 29c5ebd

wip: feat: Separate transcriptions from liveview

Browse files

Files changed (11) hide show

lib/medical_transcription/application.ex +5 -0
lib/medical_transcription/transcription.ex +2 -47
lib/medical_transcription/transcription_server.ex +83 -0
lib/medical_transcription/transcription_supervisor.ex +18 -0
lib/medical_transcription/utilities.ex +8 -0
lib/medical_transcription_web/components/result_list_component.ex +32 -0
lib/medical_transcription_web/components/transcription_text_component.ex +15 -22
lib/medical_transcription_web/live/home_live/index.ex +28 -8
test/medical_transcription/transcription_server_test.exs +31 -0
test/medical_transcription/transcription_supervisor_test.exs +13 -0
test/medical_transcription_web/live/home_live_test.exs +7 -1

lib/medical_transcription/application.ex CHANGED Viewed

@@ -21,6 +21,11 @@ defmodule MedicalTranscription.Application do
       transcription_spec(),
       token_classification_spec(),
       text_embedding_spec(),
       # Start a worker by calling: MedicalTranscription.Worker.start_link(arg)
       # {MedicalTranscription.Worker, arg},
       # Start to serve requests, typically the last entry

       transcription_spec(),
       token_classification_spec(),
       text_embedding_spec(),
+      {
+        MedicalTranscription.TranscriptionSupervisor,
+        name: MedicalTranscription.TranscriptionSupervisor,
+        strategy: :one_for_one
+      },
       # Start a worker by calling: MedicalTranscription.Worker.start_link(arg)
       # {MedicalTranscription.Worker, arg},
       # Start to serve requests, typically the last entry

lib/medical_transcription/transcription.ex CHANGED Viewed

@@ -3,52 +3,7 @@ defmodule MedicalTranscription.Transcription do
   Takes a path to an audio file and transcribes it to text.
   """
-  # Ideas for future exploration:
-  # - Instead of storing the long description vectors in a binary file on disk, we could store them within a vector DB
-  #   (such as pgvector or Pinecone.io)
-  # - A potential improvement would be to not code each chunk of transcribed audio separately, but to instead gather
-  #   complete sentences based on punctuation. We may want to suggest codes for the entire audio as a single piece as
-  #   well
-  def stream_transcription_and_search(live_view_pid, audio_file_path) do
-    # audio transcription + semantic search
-    summary_text =
-      audio_file_path
-      |> stream_transcription()
-      |> Enum.reduce("", fn {chunk, index}, acc ->
-        send_result(chunk, index + 1, live_view_pid)
-        acc <> chunk.text
-      end)
-    summary_chunk = %{
-      text: summary_text,
-      start_timestamp_seconds: nil,
-      end_timestamp_seconds: nil
-    }
-    send_result(summary_chunk, 0, live_view_pid)
-  end
-  defp stream_transcription(audio_file_path) do
-    MedicalTranscription.TranscriptionServing
-    |> Nx.Serving.batched_run({:file, audio_file_path})
-    |> Stream.with_index()
-  end
-  defp send_result(chunk, index, live_view_pid) do
-    result = %{
-      id: index,
-      start_mark: format_timestamp(chunk.start_timestamp_seconds),
-      end_mark: format_timestamp(chunk.end_timestamp_seconds),
-      text: chunk.text
-    }
-    send(live_view_pid, {:transcription_row, result})
-  end
-  defp format_timestamp(seconds) when is_nil(seconds), do: nil
-  defp format_timestamp(seconds) do
-    seconds |> round() |> Time.from_seconds_after_midnight() |> Time.to_string()
   end
 end

   Takes a path to an audio file and transcribes it to text.
   """
+  def stream_transcription_and_search(audio_file_path) do
+    MedicalTranscription.TranscriptionSupervisor.start_transcription(audio_file_path)
   end
 end

lib/medical_transcription/transcription_server.ex ADDED Viewed

	@@ -0,0 +1,83 @@

+defmodule MedicalTranscription.TranscriptionServer do
+  @moduledoc """
+  GenServer responsible for transcribing audio files
+  """
+  use GenServer
+  def start_link(args) do
+    GenServer.start_link(__MODULE__, args, [])
+  end
+  @impl GenServer
+  def init(init_arg) do
+    {:ok, init_arg, {:continue, :start}}
+  end
+  @impl GenServer
+  def handle_continue(:start, [file_path: file_path] = state) do
+    stream_transcription_and_search(file_path)
+    {:noreply, state}
+  end
+  @impl GenServer
+  def handle_info({:chunk, _result}, state) do
+    {:noreply, state}
+  end
+  def handle_info({:summary, result}, state) do
+    {:noreply, state}
+  end
+  def handle_info(:finished, state) do
+    {:stop, :shutdown, "Transcription finished"}
+  end
+  # Ideas for future exploration:
+  # - Instead of storing the long description vectors in a binary file on disk, we could store them within a vector DB
+  #   (such as pgvector or Pinecone.io)
+  # - A potential improvement would be to not code each chunk of transcribed audio separately, but to instead gather
+  #   complete sentences based on punctuation. We may want to suggest codes for the entire audio as a single piece as
+  #   well
+  defp stream_transcription_and_search(audio_file_path) do
+    pid = self()
+    # audio transcription + semantic search
+    summary_text =
+      MedicalTranscription.TranscriptionServing
+      |> Nx.Serving.batched_run({:file, audio_file_path})
+      |> Stream.with_index()
+      |> Stream.map(fn {chunk, index} ->
+        send_result(:chunk, chunk, index + 1, pid)
+        chunk.text
+      end)
+      |> Enum.to_list()
+      |> Enum.join()
+    summary_chunk = %{
+      text: summary_text,
+      start_timestamp_seconds: nil,
+      end_timestamp_seconds: nil
+    }
+    send_result(:summary, summary_chunk, 0, pid)
+    send(pid, :finished)
+  end
+  defp send_result(status, chunk, index, pid) when status in [:chunk, :summary] do
+    result = %{
+      id: index,
+      start_mark: format_timestamp(chunk.start_timestamp_seconds),
+      end_mark: format_timestamp(chunk.end_timestamp_seconds),
+      text: chunk.text
+    }
+    send(pid, {status, result})
+  end
+  defp format_timestamp(seconds) when is_nil(seconds), do: nil
+  defp format_timestamp(seconds) do
+    seconds |> round() |> Time.from_seconds_after_midnight() |> Time.to_string()
+  end
+end

lib/medical_transcription/transcription_supervisor.ex ADDED Viewed

	@@ -0,0 +1,18 @@

+defmodule MedicalTranscription.TranscriptionSupervisor do
+  # Automatically defines child_spec/1
+  use DynamicSupervisor, restart: :transient
+  def start_link(init_arg) do
+    DynamicSupervisor.start_link(__MODULE__, init_arg, name: __MODULE__)
+  end
+  @impl true
+  def init(_init_arg) do
+    DynamicSupervisor.init(strategy: :one_for_one)
+  end
+  def start_transcription(uploaded_file) do
+    spec = {MedicalTranscription.TranscriptionServer, file_path: uploaded_file}
+    DynamicSupervisor.start_child(__MODULE__, spec)
+  end
+end

lib/medical_transcription/utilities.ex CHANGED Viewed

@@ -19,4 +19,12 @@ defmodule MedicalTranscription.Utilities do
     tallied_enumerable
     |> Enum.map_join(", ", fn {key, value} -> "#{key} (#{value})" end)
   end
 end

     tallied_enumerable
     |> Enum.map_join(", ", fn {key, value} -> "#{key} (#{value})" end)
   end
+  def map_set_toggle(map_set, item) do
+    if Enum.any?(map_set, &(&1 == item)) do
+      Enum.reject(map_set, &(&1 == item))
+    else
+      map_set ++ [item]
+    end
+  end
 end

lib/medical_transcription_web/components/result_list_component.ex ADDED Viewed

	@@ -0,0 +1,32 @@

+defmodule MedicalTranscriptionWeb.Components.ResultListComponent do
+  @moduledoc """
+  Transcription chunk results list component.
+  """
+  use MedicalTranscriptionWeb, :live_component
+  alias MedicalTranscriptionWeb.Components.TranscriptionTextComponent
+  @doc """
+  Shows a list of transcribed text from a stream, with actions for each.
+  """
+  @impl Phoenix.LiveComponent
+  def render(assigns) do
+    dbg(assigns.finalized_codes)
+    ~H"""
+    <div id="result_list" class="flex flex-col gap-14" phx-update="stream">
+      <%= for {dom_id, row} <- @rows do %>
+        <.live_component
+          module={TranscriptionTextComponent}
+          id={dom_id}
+          start_mark={row.start_mark}
+          end_mark={row.end_mark}
+          text={row.text}
+          finalized_codes={@finalized_codes}
+        />
+      <% end %>
+    </div>
+    """
+  end
+end

lib/medical_transcription_web/components/transcription_text_component.ex CHANGED Viewed

@@ -15,32 +15,21 @@ defmodule MedicalTranscriptionWeb.Components.TranscriptionTextComponent do
   alias MedicalTranscription.Coding.CodeVectorMatch
   @impl Phoenix.LiveComponent
-  def update(assigns, socket) do
-    socket = assign_initial_state(assigns, socket)
-    {:ok, socket}
-  end
-  defp assign_initial_state(
-         %{id: id, start_mark: start_mark, end_mark: end_mark, text: text},
-         socket
-       ) do
-    self_pid = self()
-    initial_state = %{
-      id: id,
-      start_mark: start_mark,
-      end_mark: end_mark,
-      text: text
-    }
-    socket
-    |> assign(initial_state)
-    |> assign_async(:tags, fn -> classify_text(text) end)
-    |> assign_async(:keywords, fn -> find_keywords(self_pid, text) end)
-  end
-  defp assign_initial_state(_assigns, socket), do: socket
   @impl Phoenix.LiveComponent
   def render(assigns) do
@@ -164,4 +153,8 @@ defmodule MedicalTranscriptionWeb.Components.TranscriptionTextComponent do
     # 2. A fast process finding the phrase closest in vector space to the whole text.
     KeywordFinder.find_most_similar_label(text, phrases, 2)
   end
 end

   alias MedicalTranscription.Coding.CodeVectorMatch
   @impl Phoenix.LiveComponent
+  def mount(socket) do
+    self_pid = self()
+    text = socket.assigns.text
+    socket =
+      socket
+      |> assign(socket.assigns)
+      |> assign_async(:tags, fn -> classify_text(text) end)
+      |> assign_async(:keywords, fn -> find_keywords(self_pid, text) end)
+    dbg(socket.assigns)
+    {:ok, socket}
+  end
   @impl Phoenix.LiveComponent
   def render(assigns) do
     # 2. A fast process finding the phrase closest in vector space to the whole text.
     KeywordFinder.find_most_similar_label(text, phrases, 2)
   end
+  defp code_selected?(code, finalized_codes) do
+    Enum.any?(finalized_codes, code)
+  end
 end

lib/medical_transcription_web/live/home_live/index.ex CHANGED Viewed

@@ -14,7 +14,8 @@ defmodule MedicalTranscriptionWeb.HomeLive.Index do
       status: :pending,
       audio_pipeline: nil,
       summary_keywords: [],
-      transcriptions: list_transcriptions(session["current_user"])
     }
     socket =
@@ -39,14 +40,14 @@ defmodule MedicalTranscriptionWeb.HomeLive.Index do
       <main class="flex-1 pl-16 pr-16 pt-[25px]">
         <div class="flex flex-col h-full mx-auto max-w-5xl">
           <div class="flex-1 flex flex-col space-y-6">
-            <.result_heading
-              status={@status}
-              filename={@uploaded_file_name}
-              summary_keywords={@summary_keywords}
-            />
             <%= if @status != :pending do %>
-              <.result_list rows={@streams.transcription_rows} />
             <% end %>
             <%= if @status == :pending do %>
@@ -184,6 +185,25 @@ defmodule MedicalTranscriptionWeb.HomeLive.Index do
     {:noreply, socket}
   end
   @impl true
   def handle_info({ref, _result}, socket) do
     # See this Fly article for the usage of Task.async to start `transcribe_and_tag_audio/2` and handle the end of the

       status: :pending,
       audio_pipeline: nil,
       summary_keywords: [],
+      transcriptions: list_transcriptions(session["current_user"]),
+      finalized_codes: []
     }
     socket =
       <main class="flex-1 pl-16 pr-16 pt-[25px]">
         <div class="flex flex-col h-full mx-auto max-w-5xl">
           <div class="flex-1 flex flex-col space-y-6">
             <%= if @status != :pending do %>
+              <.live_component
+                module={MedicalTranscriptionWeb.Components.ResultListComponent}
+                id="result_list"
+                rows={@streams.transcription_rows}
+                summary_keywords={@summary_keywords}
+                finalized_codes={@finalized_codes}
+              />
             <% end %>
             <%= if @status == :pending do %>
     {:noreply, socket}
   end
+  @impl Phoenix.LiveView
+  def handle_info({"toggle_user_selected_code", {:add, code}}, socket) do
+    new_codes =
+      if Enum.any?(socket.assigns.finalized_codes, code) do
+        socket.assigns.finalized_codes
+      else
+        socket.assigns.finalized_codes ++ [code]
+      end
+    {:noreply, assign(socket, :finalized_codes, new_codes)}
+  end
+  @impl Phoenix.LiveView
+  def handle_info({"toggle_user_selected_code", {:remove, code}}, socket) do
+    new_codes = Enum.reject(socket.assigns.finalized_codes, &(&1 == code))
+    {:noreply, assign(socket, :finalized_codes, new_codes)}
+  end
   @impl true
   def handle_info({ref, _result}, socket) do
     # See this Fly article for the usage of Task.async to start `transcribe_and_tag_audio/2` and handle the end of the

test/medical_transcription/transcription_server_test.exs ADDED Viewed

	@@ -0,0 +1,31 @@

+defmodule MedicalTranscription.TranscriptionServerTest do
+  @moduledoc """
+  Tests for MedicalTranscription.TranscriptionServer
+  """
+  use MedicalTranscription.DataCase
+  alias MedicalTranscription.TranscriptionServer
+  setup do
+    sample_file =
+      __DIR__
+      |> Path.join("../../medasrdemo-Paul.mp3")
+      |> Path.expand()
+    %{sample_file: sample_file}
+  end
+  test "transcribe and tag audio", %{sample_file: sample_file} do
+    spec = {TranscriptionServer, file_path: sample_file}
+    {:ok, pid} = start_supervised(spec)
+    # NOTE: Monitoring the async process to ensure it completes
+    # before asserting results. See https://elixirforum.com/t/whats-the-correct-way-to-handle-async-tasks-i-dont-care-about-in-an-exunit-test-error-postgrex-protocol-disconnected/36605/2
+    # for further explanation.
+    ref = Process.monitor(pid)
+    assert_receive({:DOWN, ^ref, :process, _object, _pid}, 5_000)
+  end
+end

test/medical_transcription/transcription_supervisor_test.exs ADDED Viewed

	@@ -0,0 +1,13 @@

+defmodule MedicalTranscription.TranscriptionSupervisorTest do
+  @moduledoc """
+  Tests for MedicalTranscription.TranscriptionServer
+  """
+  use MedicalTranscription.DataCase
+  alias MedicalTranscription.TranscriptionSupervisor
+  test "transcribe and tag audio" do
+    assert {:ok, _pid} = TranscriptionSupervisor.start_transcription("my-file.mp3")
+  end
+end

test/medical_transcription_web/live/home_live_test.exs CHANGED Viewed

@@ -53,11 +53,17 @@ defmodule MedicalTranscriptionWeb.HomeLiveTest do
       assert view
              |> form("#audio-form")
-             |> render_submit() =~ "Summary Keywords"
       # TODO: Test that transcribed text appears
       # TODO: Test that codes appear
       # assert render_async(view, 5_000) =~ "Coronary artery anomaly"
     end
   end
 end

       assert view
              |> form("#audio-form")
+             |> render_submit() =~ "Transcribing and tagging audio file..."
       # TODO: Test that transcribed text appears
       # TODO: Test that codes appear
       # assert render_async(view, 5_000) =~ "Coronary artery anomaly"
     end
+    test "renders transcription text", %{conn: conn} do
+      {:ok, view, html} = live(conn, "/")
+      html = view |> element("result_list")
+      assert html_response(conn, 200)
+    end
   end
 end