Chris Held commited on
Commit
96c379c
1 Parent(s): 62edbad

tag full transcription text

Browse files
lib/medical_transcription/transcriber.ex CHANGED
@@ -6,6 +6,13 @@ defmodule MedicalTranscription.Transcriber do
6
 
7
  alias MedicalTranscription.CodeSearcher
8
 
 
 
 
 
 
 
 
9
  # Ideas for future exploration:
10
  # - Instead of storing the long description vectors in a binary file on disk, we could store them within a vector DB
11
  # (such as pgvector or Pinecone.io)
@@ -17,11 +24,23 @@ defmodule MedicalTranscription.Transcriber do
17
 
18
  # Audio transcription + semantic search
19
  for {chunk, index} <- stream_transcription(audio_file_path) do
20
- tags = CodeSearcher.process_chunk(search_configuration, chunk.text)
21
- result = build_result(index, chunk, tags)
22
-
23
- send(live_view_pid, {:transcription_row, result})
24
  end
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  end
26
 
27
  defp stream_transcription(audio_file_path) do
@@ -30,13 +49,14 @@ defmodule MedicalTranscription.Transcriber do
30
  |> Stream.with_index()
31
  end
32
 
33
- defp build_result(index, chunk, tags) do
34
  %{
35
  id: index,
36
  start_mark: format_timestamp(chunk.start_timestamp_seconds),
37
  end_mark: format_timestamp(chunk.end_timestamp_seconds),
38
  text: chunk.text,
39
- tags: tags
 
40
  }
41
  end
42
 
 
6
 
7
  alias MedicalTranscription.CodeSearcher
8
 
9
+ defp send_result(search_configuration, chunk, index, live_view_pid, is_summary) do
10
+ tags = CodeSearcher.process_chunk(search_configuration, chunk.text)
11
+ result = build_result(index, chunk, tags, is_summary)
12
+
13
+ send(live_view_pid, {:transcription_row, result})
14
+ end
15
+
16
  # Ideas for future exploration:
17
  # - Instead of storing the long description vectors in a binary file on disk, we could store them within a vector DB
18
  # (such as pgvector or Pinecone.io)
 
24
 
25
  # Audio transcription + semantic search
26
  for {chunk, index} <- stream_transcription(audio_file_path) do
27
+ send_result(search_configuration, chunk, index, live_view_pid, false)
 
 
 
28
  end
29
+
30
+ # Add all text as it's own row
31
+ all_chunk_text =
32
+ stream_transcription(audio_file_path)
33
+ |> Enum.reduce("", fn {chunk, _index}, acc ->
34
+ acc <> chunk.text
35
+ end)
36
+
37
+ send_result(
38
+ search_configuration,
39
+ %{:text => all_chunk_text, :start_timestamp_seconds => 0, :end_timestamp_seconds => 0},
40
+ 0,
41
+ live_view_pid,
42
+ true
43
+ )
44
  end
45
 
46
  defp stream_transcription(audio_file_path) do
 
49
  |> Stream.with_index()
50
  end
51
 
52
+ defp build_result(index, chunk, tags, is_summary) do
53
  %{
54
  id: index,
55
  start_mark: format_timestamp(chunk.start_timestamp_seconds),
56
  end_mark: format_timestamp(chunk.end_timestamp_seconds),
57
  text: chunk.text,
58
+ tags: tags,
59
+ is_summary: is_summary
60
  }
61
  end
62