noahsettersten commited on
Commit
076da4c
1 Parent(s): 609a959

feat: Retrieve cosine similarity from Postgres query

Browse files
lib/medical_transcription/coding.ex CHANGED
@@ -8,7 +8,7 @@ defmodule MedicalTranscription.Coding do
8
  import Ecto.Query
9
  import Pgvector.Ecto.Query
10
 
11
- alias MedicalTranscription.Coding.CodeVector
12
 
13
  def insert_vector(params) do
14
  changeset = CodeVector.changeset(%CodeVector{}, params)
@@ -37,14 +37,13 @@ defmodule MedicalTranscription.Coding do
37
 
38
  def process_chunk(text, opts \\ []) do
39
  k = Keyword.get(opts, :num_results, 5)
 
40
 
41
  search_vector_for_db = compute_vector_as_list(text)
42
 
43
  find_similar(search_vector_for_db, k)
44
-
45
  # -- Remove matches that don't exceed a given threshold
46
- # TODO: This depends on receiving a similarity score from the Postgres query
47
- # |> Enum.filter(fn {_index, score} -> score >= similarity_threshold end)
48
  end
49
 
50
  @doc "Creates a vector embedding for text using the text embedding serving in the application's supervision tree."
@@ -59,7 +58,8 @@ defmodule MedicalTranscription.Coding do
59
  Repo.all(
60
  from v in CodeVector,
61
  order_by: cosine_distance(v.description_vector, ^search_vector),
62
- limit: ^limit
 
63
  )
64
  end
65
  end
 
8
  import Ecto.Query
9
  import Pgvector.Ecto.Query
10
 
11
+ alias MedicalTranscription.Coding.{CodeVector, CodeVectorMatch}
12
 
13
  def insert_vector(params) do
14
  changeset = CodeVector.changeset(%CodeVector{}, params)
 
37
 
38
  def process_chunk(text, opts \\ []) do
39
  k = Keyword.get(opts, :num_results, 5)
40
+ similarity_threshold = Keyword.get(opts, :similarity_threshold, 0.80)
41
 
42
  search_vector_for_db = compute_vector_as_list(text)
43
 
44
  find_similar(search_vector_for_db, k)
 
45
  # -- Remove matches that don't exceed a given threshold
46
+ |> Enum.filter(fn %CodeVectorMatch{} = score -> score.cosine_similarity >= similarity_threshold end)
 
47
  end
48
 
49
  @doc "Creates a vector embedding for text using the text embedding serving in the application's supervision tree."
 
58
  Repo.all(
59
  from v in CodeVector,
60
  order_by: cosine_distance(v.description_vector, ^search_vector),
61
+ limit: ^limit,
62
+ select: %CodeVectorMatch{id: v.id, code: v.code, description: v.description, cosine_similarity: 1 - cosine_distance(v.description_vector, ^search_vector)}
63
  )
64
  end
65
  end
lib/medical_transcription/coding/code_vector_match.ex ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ defmodule MedicalTranscription.Coding.CodeVectorMatch do
2
+ @moduledoc """
3
+ Represents a vector match found in the database, along with its similarity score.
4
+ """
5
+ defstruct [:id, :code, :description, :cosine_similarity]
6
+ end
lib/medical_transcription_web/components/transcription_text_component.ex CHANGED
@@ -6,7 +6,7 @@ defmodule MedicalTranscriptionWeb.Components.TranscriptionTextComponent do
6
  use MedicalTranscriptionWeb, :live_component
7
  import MedicalTranscriptionWeb.Components
8
  import MedicalTranscriptionWeb.Components.KeywordHighlighter
9
- alias MedicalTranscription.Coding.CodeVector
10
 
11
  @impl Phoenix.LiveComponent
12
  def update(assigns, socket) do
@@ -53,10 +53,8 @@ defmodule MedicalTranscriptionWeb.Components.TranscriptionTextComponent do
53
  </div>
54
 
55
  <div class="flex-1 flex flex-col items-stretch gap-3">
56
- <%= for %CodeVector{id: id, code: code, description: label} <- @row.tags do %>
57
- <!-- NOTE: We're not currently deriving a similarity score from our Postgres query using cosine similarity.
58
- For now, pass 1.0 until we research whether we can determine that score from the query. -->
59
- <.tag_result code_vector_id={id} code={code} label={label} score={1.0} text={@row.text} />
60
  <% end %>
61
  </div>
62
  </div>
 
6
  use MedicalTranscriptionWeb, :live_component
7
  import MedicalTranscriptionWeb.Components
8
  import MedicalTranscriptionWeb.Components.KeywordHighlighter
9
+ alias MedicalTranscription.Coding.CodeVectorMatch
10
 
11
  @impl Phoenix.LiveComponent
12
  def update(assigns, socket) do
 
53
  </div>
54
 
55
  <div class="flex-1 flex flex-col items-stretch gap-3">
56
+ <%= for %CodeVectorMatch{id: id, code: code, description: label, cosine_similarity: score} <- @row.tags do %>
57
+ <.tag_result code_vector_id={id} code={code} label={label} score={score} text={@row.text} />
 
 
58
  <% end %>
59
  </div>
60
  </div>