Commit
•
076da4c
1
Parent(s):
609a959
feat: Retrieve cosine similarity from Postgres query
Browse files
lib/medical_transcription/coding.ex
CHANGED
@@ -8,7 +8,7 @@ defmodule MedicalTranscription.Coding do
|
|
8 |
import Ecto.Query
|
9 |
import Pgvector.Ecto.Query
|
10 |
|
11 |
-
alias MedicalTranscription.Coding.CodeVector
|
12 |
|
13 |
def insert_vector(params) do
|
14 |
changeset = CodeVector.changeset(%CodeVector{}, params)
|
@@ -37,14 +37,13 @@ defmodule MedicalTranscription.Coding do
|
|
37 |
|
38 |
def process_chunk(text, opts \\ []) do
|
39 |
k = Keyword.get(opts, :num_results, 5)
|
|
|
40 |
|
41 |
search_vector_for_db = compute_vector_as_list(text)
|
42 |
|
43 |
find_similar(search_vector_for_db, k)
|
44 |
-
|
45 |
# -- Remove matches that don't exceed a given threshold
|
46 |
-
|
47 |
-
# |> Enum.filter(fn {_index, score} -> score >= similarity_threshold end)
|
48 |
end
|
49 |
|
50 |
@doc "Creates a vector embedding for text using the text embedding serving in the application's supervision tree."
|
@@ -59,7 +58,8 @@ defmodule MedicalTranscription.Coding do
|
|
59 |
Repo.all(
|
60 |
from v in CodeVector,
|
61 |
order_by: cosine_distance(v.description_vector, ^search_vector),
|
62 |
-
limit: ^limit
|
|
|
63 |
)
|
64 |
end
|
65 |
end
|
|
|
8 |
import Ecto.Query
|
9 |
import Pgvector.Ecto.Query
|
10 |
|
11 |
+
alias MedicalTranscription.Coding.{CodeVector, CodeVectorMatch}
|
12 |
|
13 |
def insert_vector(params) do
|
14 |
changeset = CodeVector.changeset(%CodeVector{}, params)
|
|
|
37 |
|
38 |
def process_chunk(text, opts \\ []) do
|
39 |
k = Keyword.get(opts, :num_results, 5)
|
40 |
+
similarity_threshold = Keyword.get(opts, :similarity_threshold, 0.80)
|
41 |
|
42 |
search_vector_for_db = compute_vector_as_list(text)
|
43 |
|
44 |
find_similar(search_vector_for_db, k)
|
|
|
45 |
# -- Remove matches that don't exceed a given threshold
|
46 |
+
|> Enum.filter(fn %CodeVectorMatch{} = score -> score.cosine_similarity >= similarity_threshold end)
|
|
|
47 |
end
|
48 |
|
49 |
@doc "Creates a vector embedding for text using the text embedding serving in the application's supervision tree."
|
|
|
58 |
Repo.all(
|
59 |
from v in CodeVector,
|
60 |
order_by: cosine_distance(v.description_vector, ^search_vector),
|
61 |
+
limit: ^limit,
|
62 |
+
select: %CodeVectorMatch{id: v.id, code: v.code, description: v.description, cosine_similarity: 1 - cosine_distance(v.description_vector, ^search_vector)}
|
63 |
)
|
64 |
end
|
65 |
end
|
lib/medical_transcription/coding/code_vector_match.ex
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
defmodule MedicalTranscription.Coding.CodeVectorMatch do
|
2 |
+
@moduledoc """
|
3 |
+
Represents a vector match found in the database, along with its similarity score.
|
4 |
+
"""
|
5 |
+
defstruct [:id, :code, :description, :cosine_similarity]
|
6 |
+
end
|
lib/medical_transcription_web/components/transcription_text_component.ex
CHANGED
@@ -6,7 +6,7 @@ defmodule MedicalTranscriptionWeb.Components.TranscriptionTextComponent do
|
|
6 |
use MedicalTranscriptionWeb, :live_component
|
7 |
import MedicalTranscriptionWeb.Components
|
8 |
import MedicalTranscriptionWeb.Components.KeywordHighlighter
|
9 |
-
alias MedicalTranscription.Coding.
|
10 |
|
11 |
@impl Phoenix.LiveComponent
|
12 |
def update(assigns, socket) do
|
@@ -53,10 +53,8 @@ defmodule MedicalTranscriptionWeb.Components.TranscriptionTextComponent do
|
|
53 |
</div>
|
54 |
|
55 |
<div class="flex-1 flex flex-col items-stretch gap-3">
|
56 |
-
<%= for %
|
57 |
-
|
58 |
-
For now, pass 1.0 until we research whether we can determine that score from the query. -->
|
59 |
-
<.tag_result code_vector_id={id} code={code} label={label} score={1.0} text={@row.text} />
|
60 |
<% end %>
|
61 |
</div>
|
62 |
</div>
|
|
|
6 |
use MedicalTranscriptionWeb, :live_component
|
7 |
import MedicalTranscriptionWeb.Components
|
8 |
import MedicalTranscriptionWeb.Components.KeywordHighlighter
|
9 |
+
alias MedicalTranscription.Coding.CodeVectorMatch
|
10 |
|
11 |
@impl Phoenix.LiveComponent
|
12 |
def update(assigns, socket) do
|
|
|
53 |
</div>
|
54 |
|
55 |
<div class="flex-1 flex flex-col items-stretch gap-3">
|
56 |
+
<%= for %CodeVectorMatch{id: id, code: code, description: label, cosine_similarity: score} <- @row.tags do %>
|
57 |
+
<.tag_result code_vector_id={id} code={code} label={label} score={score} text={@row.text} />
|
|
|
|
|
58 |
<% end %>
|
59 |
</div>
|
60 |
</div>
|