timgremore
commited on
Commit
•
7625517
1
Parent(s):
cf5a59c
feat: Identify and store chunk keywords
Browse files- lib/medical_transcription/application.ex +5 -3
- lib/medical_transcription/classification_server.ex +108 -0
- lib/medical_transcription/classification_supervisor.ex +25 -0
- lib/medical_transcription/transcriptions.ex +20 -2
- lib/medical_transcription/transcriptions/transcription_chunk.ex +2 -0
- lib/medical_transcription/transcriptions/transcription_chunk_keyword.ex +22 -0
- priv/repo/migrations/20240209165222_create_transcription_chunk_keywords.exs +17 -0
- test/medical_transcription/classification_server_test.exs +42 -0
- test/medical_transcription/classification_supervisor_test.exs +28 -0
lib/medical_transcription/application.ex
CHANGED
@@ -23,9 +23,11 @@ defmodule MedicalTranscription.Application do
|
|
23 |
text_embedding_spec(),
|
24 |
{
|
25 |
MedicalTranscription.TranscriptionSupervisor,
|
26 |
-
|
27 |
-
|
28 |
-
|
|
|
|
|
29 |
},
|
30 |
# Start a worker by calling: MedicalTranscription.Worker.start_link(arg)
|
31 |
# {MedicalTranscription.Worker, arg},
|
|
|
23 |
text_embedding_spec(),
|
24 |
{
|
25 |
MedicalTranscription.TranscriptionSupervisor,
|
26 |
+
strategy: :one_for_one, max_restarts: 1
|
27 |
+
},
|
28 |
+
{
|
29 |
+
MedicalTranscription.ClassificationSupervisor,
|
30 |
+
strategy: :one_for_one, max_restarts: 1
|
31 |
},
|
32 |
# Start a worker by calling: MedicalTranscription.Worker.start_link(arg)
|
33 |
# {MedicalTranscription.Worker, arg},
|
lib/medical_transcription/classification_server.ex
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
defmodule MedicalTranscription.ClassificationServer do
|
2 |
+
@moduledoc """
|
3 |
+
GenServer responsible for classifying transcription text
|
4 |
+
"""
|
5 |
+
use GenServer
|
6 |
+
|
7 |
+
alias AudioTagger.KeywordFinder
|
8 |
+
|
9 |
+
alias MedicalTranscription.Transcriptions
|
10 |
+
alias MedicalTranscription.Transcriptions.TranscriptionChunk
|
11 |
+
|
12 |
+
def start_link(args) do
|
13 |
+
GenServer.start_link(__MODULE__, args, [])
|
14 |
+
end
|
15 |
+
|
16 |
+
@impl GenServer
|
17 |
+
def init(init_arg) do
|
18 |
+
{:ok, init_arg, {:continue, :start}}
|
19 |
+
end
|
20 |
+
|
21 |
+
@impl GenServer
|
22 |
+
def handle_continue(:start, {:chunk, chunk} = state) do
|
23 |
+
find_keywords(chunk)
|
24 |
+
|
25 |
+
Phoenix.PubSub.broadcast(
|
26 |
+
:medicode_pubsub,
|
27 |
+
"transcription_chunks:#{chunk.id}",
|
28 |
+
{:classification_started, chunk.id}
|
29 |
+
)
|
30 |
+
|
31 |
+
{:noreply, state}
|
32 |
+
end
|
33 |
+
|
34 |
+
@impl GenServer
|
35 |
+
def handle_info({:chunk_updated, result}, state) do
|
36 |
+
{:chunk, chunk} = state
|
37 |
+
|
38 |
+
%TranscriptionChunk{id: id} = chunk
|
39 |
+
|
40 |
+
Phoenix.PubSub.broadcast(
|
41 |
+
:medicode_pubsub,
|
42 |
+
"transcription_chunks:#{id}",
|
43 |
+
{:chunk_updated, chunk}
|
44 |
+
)
|
45 |
+
|
46 |
+
{:noreply, state}
|
47 |
+
end
|
48 |
+
|
49 |
+
def handle_info(:finished, state) do
|
50 |
+
{:stop, :normal, state}
|
51 |
+
end
|
52 |
+
|
53 |
+
@impl GenServer
|
54 |
+
def terminate(reason, state) do
|
55 |
+
{:chunk, chunk} = state
|
56 |
+
|
57 |
+
%TranscriptionChunk{id: id} = chunk
|
58 |
+
|
59 |
+
Phoenix.PubSub.broadcast(
|
60 |
+
:medicode_pubsub,
|
61 |
+
"transcription_chunks:#{id}",
|
62 |
+
{:classification_finished, reason}
|
63 |
+
)
|
64 |
+
|
65 |
+
reason
|
66 |
+
end
|
67 |
+
|
68 |
+
defp classify_chunk(chunk) do
|
69 |
+
find_keywords(chunk)
|
70 |
+
end
|
71 |
+
|
72 |
+
defp find_keywords(chunk) do
|
73 |
+
%{entities: entities} =
|
74 |
+
Nx.Serving.batched_run(MedicalTranscription.TokenClassificationServing, chunk.text)
|
75 |
+
|
76 |
+
phrases = KeywordFinder.cleanup_phrases(entities)
|
77 |
+
|
78 |
+
# Then, we use one of two processes to determine which to show as keywords
|
79 |
+
keywords =
|
80 |
+
chunk.text
|
81 |
+
|> determine_keywords(phrases)
|
82 |
+
|> Enum.map(fn %{label: label, score: score} ->
|
83 |
+
# TODO: Replace loop with an insert_all call and check for conflicts
|
84 |
+
# so that duplicate keywords are ignored.
|
85 |
+
Transcriptions.create_keyword_for_chunk(%{
|
86 |
+
transcription_chunk_id: chunk.id,
|
87 |
+
keyword: label,
|
88 |
+
score: score
|
89 |
+
})
|
90 |
+
end)
|
91 |
+
|
92 |
+
send(self(), {:chunk_updated, keywords})
|
93 |
+
|
94 |
+
send(self(), :finished)
|
95 |
+
end
|
96 |
+
|
97 |
+
# This clause handles cases where there is transcribed text, but no phrases were found.
|
98 |
+
defp determine_keywords(_text, []), do: []
|
99 |
+
|
100 |
+
defp determine_keywords(text, phrases) do
|
101 |
+
# 1. A slower process that looks to classify the text by the extracted phrases.
|
102 |
+
# serving = KeywordFinder.prepare_zero_shot_classification_serving(phrases)
|
103 |
+
# %{predictions: predictions} = Nx.Serving.run(serving, text)
|
104 |
+
|
105 |
+
# 2. A fast process finding the phrase closest in vector space to the whole text.
|
106 |
+
KeywordFinder.find_most_similar_label(text, phrases, 2)
|
107 |
+
end
|
108 |
+
end
|
lib/medical_transcription/classification_supervisor.ex
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
defmodule MedicalTranscription.ClassificationSupervisor do
|
2 |
+
# Automatically defines child_spec/1
|
3 |
+
use DynamicSupervisor
|
4 |
+
|
5 |
+
def start_link(init_arg) do
|
6 |
+
DynamicSupervisor.start_link(__MODULE__, init_arg, name: __MODULE__)
|
7 |
+
end
|
8 |
+
|
9 |
+
@impl true
|
10 |
+
def init(_init_arg) do
|
11 |
+
DynamicSupervisor.init(strategy: :one_for_one)
|
12 |
+
end
|
13 |
+
|
14 |
+
def start_classification(transcription_chunk) do
|
15 |
+
spec = %{
|
16 |
+
id: MedicalTranscription.ClassificationServer,
|
17 |
+
start:
|
18 |
+
{MedicalTranscription.ClassificationServer, :start_link, [chunk: transcription_chunk]},
|
19 |
+
restart: :transient,
|
20 |
+
type: :worker
|
21 |
+
}
|
22 |
+
|
23 |
+
DynamicSupervisor.start_child(__MODULE__, spec)
|
24 |
+
end
|
25 |
+
end
|
lib/medical_transcription/transcriptions.ex
CHANGED
@@ -57,7 +57,7 @@ defmodule MedicalTranscription.Transcriptions do
|
|
57 |
query =
|
58 |
if preload_transcription_chunks do
|
59 |
Transcription
|
60 |
-
|> preload(:
|
61 |
else
|
62 |
Transcription
|
63 |
end
|
@@ -83,7 +83,7 @@ defmodule MedicalTranscription.Transcriptions do
|
|
83 |
query =
|
84 |
if preload_transcription_chunks do
|
85 |
Transcription
|
86 |
-
|> preload(:
|
87 |
else
|
88 |
Transcription
|
89 |
end
|
@@ -127,6 +127,24 @@ defmodule MedicalTranscription.Transcriptions do
|
|
127 |
|> Repo.insert()
|
128 |
end
|
129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
@doc """
|
131 |
Updates a transcription.
|
132 |
|
|
|
57 |
query =
|
58 |
if preload_transcription_chunks do
|
59 |
Transcription
|
60 |
+
|> preload(chunks: :keywords)
|
61 |
else
|
62 |
Transcription
|
63 |
end
|
|
|
83 |
query =
|
84 |
if preload_transcription_chunks do
|
85 |
Transcription
|
86 |
+
|> preload(chunks: :keywords)
|
87 |
else
|
88 |
Transcription
|
89 |
end
|
|
|
127 |
|> Repo.insert()
|
128 |
end
|
129 |
|
130 |
+
@doc """
|
131 |
+
Creates a transcription chunk keyword.
|
132 |
+
|
133 |
+
## Examples
|
134 |
+
|
135 |
+
iex> create_keyword_for_chunk(%{transcription_chunk_id: 1, keyword: "healthy"})
|
136 |
+
{:ok, %TranscriptionChunkKeyword{}}
|
137 |
+
|
138 |
+
iex> create_keyword_for_chunk(%{field: bad_value})
|
139 |
+
{:error, %Ecto.Changeset{}}
|
140 |
+
|
141 |
+
"""
|
142 |
+
def create_keyword_for_chunk(attrs \\ %{}) do
|
143 |
+
%TranscriptionChunkKeyword{}
|
144 |
+
|> TranscriptionChunkKeyword.changeset(attrs)
|
145 |
+
|> Repo.insert()
|
146 |
+
end
|
147 |
+
|
148 |
@doc """
|
149 |
Updates a transcription.
|
150 |
|
lib/medical_transcription/transcriptions/transcription_chunk.ex
CHANGED
@@ -11,6 +11,8 @@ defmodule MedicalTranscription.Transcriptions.TranscriptionChunk do
|
|
11 |
|
12 |
belongs_to :transcription, MedicalTranscription.Transcriptions.Transcription
|
13 |
|
|
|
|
|
14 |
timestamps(type: :utc_datetime)
|
15 |
end
|
16 |
|
|
|
11 |
|
12 |
belongs_to :transcription, MedicalTranscription.Transcriptions.Transcription
|
13 |
|
14 |
+
has_many :keywords, MedicalTranscription.Transcriptions.TranscriptionChunkKeyword
|
15 |
+
|
16 |
timestamps(type: :utc_datetime)
|
17 |
end
|
18 |
|
lib/medical_transcription/transcriptions/transcription_chunk_keyword.ex
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
defmodule MedicalTranscription.Transcriptions.TranscriptionChunkKeyword do
|
2 |
+
use Ecto.Schema
|
3 |
+
import Ecto.Changeset
|
4 |
+
|
5 |
+
@primary_key {:id, :binary_id, autogenerate: true}
|
6 |
+
@foreign_key_type :binary_id
|
7 |
+
schema "transcription_chunk_keywords" do
|
8 |
+
field :keyword, :string
|
9 |
+
field :score, :float
|
10 |
+
|
11 |
+
belongs_to :transcription_chunk, MedicalTranscription.Transcriptions.TranscriptionChunk
|
12 |
+
|
13 |
+
timestamps(type: :utc_datetime)
|
14 |
+
end
|
15 |
+
|
16 |
+
@doc false
|
17 |
+
def changeset(transcription_chunk_keyword, attrs) do
|
18 |
+
transcription_chunk_keyword
|
19 |
+
|> cast(attrs, [:transcription_chunk_id, :keyword, :score])
|
20 |
+
|> validate_required([:transcription_chunk_id, :keyword, :score])
|
21 |
+
end
|
22 |
+
end
|
priv/repo/migrations/20240209165222_create_transcription_chunk_keywords.exs
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
defmodule MedicalTranscription.Repo.Migrations.CreateTranscriptionChunkKeywords do
|
2 |
+
use Ecto.Migration
|
3 |
+
|
4 |
+
def change do
|
5 |
+
create table(:transcription_chunk_keywords, primary_key: false) do
|
6 |
+
add :id, :binary_id, primary_key: true
|
7 |
+
add :keyword, :string
|
8 |
+
add :score, :float
|
9 |
+
|
10 |
+
add :transcription_chunk_id,
|
11 |
+
references(:transcription_chunks, type: :binary_id, on_delete: :delete_all),
|
12 |
+
null: false
|
13 |
+
|
14 |
+
timestamps(type: :utc_datetime)
|
15 |
+
end
|
16 |
+
end
|
17 |
+
end
|
test/medical_transcription/classification_server_test.exs
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
defmodule MedicalTranscription.ClassificationServerTest do
|
2 |
+
@moduledoc """
|
3 |
+
Tests for MedicalTranscription.ClassificationServer
|
4 |
+
"""
|
5 |
+
|
6 |
+
use MedicalTranscription.DataCase
|
7 |
+
|
8 |
+
import MedicalTranscription.TranscriptionsFixtures
|
9 |
+
import MedicalTranscription.TranscriptionChunksFixtures
|
10 |
+
|
11 |
+
alias MedicalTranscription.Transcriptions
|
12 |
+
alias MedicalTranscription.ClassificationServer
|
13 |
+
|
14 |
+
setup do
|
15 |
+
transcription = transcription_fixture()
|
16 |
+
|
17 |
+
chunk =
|
18 |
+
transcription_chunk_fixture(%{
|
19 |
+
transcription_id: transcription.id,
|
20 |
+
text: "patient is suffering from lower back pain"
|
21 |
+
})
|
22 |
+
|
23 |
+
%{chunk: chunk, transcription: transcription}
|
24 |
+
end
|
25 |
+
|
26 |
+
test "extract keywords and assign medical codes", %{chunk: chunk, transcription: transcription} do
|
27 |
+
spec = {ClassificationServer, {:chunk, chunk}}
|
28 |
+
|
29 |
+
{:ok, pid} = start_supervised(spec, restart: :transient)
|
30 |
+
|
31 |
+
ref = Process.monitor(pid)
|
32 |
+
assert_receive({:DOWN, ^ref, :process, _object, _pid}, 5_000)
|
33 |
+
|
34 |
+
keywords =
|
35 |
+
transcription.id
|
36 |
+
|> Transcriptions.get_transcription!(true)
|
37 |
+
|> Map.fetch!(:chunks)
|
38 |
+
|> Enum.flat_map(& &1.keywords)
|
39 |
+
|
40 |
+
assert 2 == Enum.count(keywords)
|
41 |
+
end
|
42 |
+
end
|
test/medical_transcription/classification_supervisor_test.exs
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
defmodule MedicalTranscription.ClassificationSupervisorTest do
|
2 |
+
@moduledoc """
|
3 |
+
Tests for MedicalTranscription.ClassificationSupervisor
|
4 |
+
"""
|
5 |
+
|
6 |
+
use MedicalTranscription.DataCase
|
7 |
+
|
8 |
+
import MedicalTranscription.TranscriptionChunksFixtures
|
9 |
+
|
10 |
+
alias MedicalTranscription.ClassificationSupervisor
|
11 |
+
|
12 |
+
setup do
|
13 |
+
chunk = transcription_chunk_fixture(%{text: "patient fell and is experiecing knee pain"})
|
14 |
+
%{chunk: chunk}
|
15 |
+
end
|
16 |
+
|
17 |
+
test "extract keywords and assign codes", %{chunk: chunk} do
|
18 |
+
assert {:ok, _pid} = ClassificationSupervisor.start_classification(chunk)
|
19 |
+
|
20 |
+
{_, server_pid, :worker, [MedicalTranscription.ClassificationServer]} =
|
21 |
+
ClassificationSupervisor
|
22 |
+
|> DynamicSupervisor.which_children()
|
23 |
+
|> Enum.at(0)
|
24 |
+
|
25 |
+
ref = Process.monitor(server_pid)
|
26 |
+
assert_receive({:DOWN, ^ref, :process, _object, _pid}, 5_000)
|
27 |
+
end
|
28 |
+
end
|