Spaces:

headway
/

medicode

Runtime error

Chris Held commited on Jan 25

Commit

96c379c

•

1 Parent(s): 62edbad

tag full transcription text

Files changed (1) hide show

lib/medical_transcription/transcriber.ex CHANGED Viewed

@@ -6,6 +6,13 @@ defmodule MedicalTranscription.Transcriber do
   alias MedicalTranscription.CodeSearcher
   # Ideas for future exploration:
   # - Instead of storing the long description vectors in a binary file on disk, we could store them within a vector DB
   #   (such as pgvector or Pinecone.io)
@@ -17,11 +24,23 @@ defmodule MedicalTranscription.Transcriber do
     # Audio transcription + semantic search
     for {chunk, index} <- stream_transcription(audio_file_path) do
-      tags = CodeSearcher.process_chunk(search_configuration, chunk.text)
-      result = build_result(index, chunk, tags)
-      send(live_view_pid, {:transcription_row, result})
     end
   end
   defp stream_transcription(audio_file_path) do
@@ -30,13 +49,14 @@ defmodule MedicalTranscription.Transcriber do
     |> Stream.with_index()
   end
-  defp build_result(index, chunk, tags) do
     %{
       id: index,
       start_mark: format_timestamp(chunk.start_timestamp_seconds),
       end_mark: format_timestamp(chunk.end_timestamp_seconds),
       text: chunk.text,
-      tags: tags
     }
   end

   alias MedicalTranscription.CodeSearcher
+  defp send_result(search_configuration, chunk, index, live_view_pid, is_summary) do
+    tags = CodeSearcher.process_chunk(search_configuration, chunk.text)
+    result = build_result(index, chunk, tags, is_summary)
+    send(live_view_pid, {:transcription_row, result})
+  end
   # Ideas for future exploration:
   # - Instead of storing the long description vectors in a binary file on disk, we could store them within a vector DB
   #   (such as pgvector or Pinecone.io)
     # Audio transcription + semantic search
     for {chunk, index} <- stream_transcription(audio_file_path) do
+      send_result(search_configuration, chunk, index, live_view_pid, false)
     end
+    # Add all text as it's own row
+    all_chunk_text =
+      stream_transcription(audio_file_path)
+      |> Enum.reduce("", fn {chunk, _index}, acc ->
+        acc <> chunk.text
+      end)
+    send_result(
+      search_configuration,
+      %{:text => all_chunk_text, :start_timestamp_seconds => 0, :end_timestamp_seconds => 0},
+      0,
+      live_view_pid,
+      true
+    )
   end
   defp stream_transcription(audio_file_path) do
     |> Stream.with_index()
   end
+  defp build_result(index, chunk, tags, is_summary) do
     %{
       id: index,
       start_mark: format_timestamp(chunk.start_timestamp_seconds),
       end_mark: format_timestamp(chunk.end_timestamp_seconds),
       text: chunk.text,
+      tags: tags,
+      is_summary: is_summary
     }
   end