timgremore commited on
Commit
7625517
1 Parent(s): cf5a59c

feat: Identify and store chunk keywords

Browse files
lib/medical_transcription/application.ex CHANGED
@@ -23,9 +23,11 @@ defmodule MedicalTranscription.Application do
23
  text_embedding_spec(),
24
  {
25
  MedicalTranscription.TranscriptionSupervisor,
26
- # name: MedicalTranscription.TranscriptionSupervisor,
27
- strategy: :one_for_one,
28
- max_restarts: 1
 
 
29
  },
30
  # Start a worker by calling: MedicalTranscription.Worker.start_link(arg)
31
  # {MedicalTranscription.Worker, arg},
 
23
  text_embedding_spec(),
24
  {
25
  MedicalTranscription.TranscriptionSupervisor,
26
+ strategy: :one_for_one, max_restarts: 1
27
+ },
28
+ {
29
+ MedicalTranscription.ClassificationSupervisor,
30
+ strategy: :one_for_one, max_restarts: 1
31
  },
32
  # Start a worker by calling: MedicalTranscription.Worker.start_link(arg)
33
  # {MedicalTranscription.Worker, arg},
lib/medical_transcription/classification_server.ex ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defmodule MedicalTranscription.ClassificationServer do
2
+ @moduledoc """
3
+ GenServer responsible for classifying transcription text
4
+ """
5
+ use GenServer
6
+
7
+ alias AudioTagger.KeywordFinder
8
+
9
+ alias MedicalTranscription.Transcriptions
10
+ alias MedicalTranscription.Transcriptions.TranscriptionChunk
11
+
12
+ def start_link(args) do
13
+ GenServer.start_link(__MODULE__, args, [])
14
+ end
15
+
16
+ @impl GenServer
17
+ def init(init_arg) do
18
+ {:ok, init_arg, {:continue, :start}}
19
+ end
20
+
21
+ @impl GenServer
22
+ def handle_continue(:start, {:chunk, chunk} = state) do
23
+ find_keywords(chunk)
24
+
25
+ Phoenix.PubSub.broadcast(
26
+ :medicode_pubsub,
27
+ "transcription_chunks:#{chunk.id}",
28
+ {:classification_started, chunk.id}
29
+ )
30
+
31
+ {:noreply, state}
32
+ end
33
+
34
+ @impl GenServer
35
+ def handle_info({:chunk_updated, result}, state) do
36
+ {:chunk, chunk} = state
37
+
38
+ %TranscriptionChunk{id: id} = chunk
39
+
40
+ Phoenix.PubSub.broadcast(
41
+ :medicode_pubsub,
42
+ "transcription_chunks:#{id}",
43
+ {:chunk_updated, chunk}
44
+ )
45
+
46
+ {:noreply, state}
47
+ end
48
+
49
+ def handle_info(:finished, state) do
50
+ {:stop, :normal, state}
51
+ end
52
+
53
+ @impl GenServer
54
+ def terminate(reason, state) do
55
+ {:chunk, chunk} = state
56
+
57
+ %TranscriptionChunk{id: id} = chunk
58
+
59
+ Phoenix.PubSub.broadcast(
60
+ :medicode_pubsub,
61
+ "transcription_chunks:#{id}",
62
+ {:classification_finished, reason}
63
+ )
64
+
65
+ reason
66
+ end
67
+
68
+ defp classify_chunk(chunk) do
69
+ find_keywords(chunk)
70
+ end
71
+
72
+ defp find_keywords(chunk) do
73
+ %{entities: entities} =
74
+ Nx.Serving.batched_run(MedicalTranscription.TokenClassificationServing, chunk.text)
75
+
76
+ phrases = KeywordFinder.cleanup_phrases(entities)
77
+
78
+ # Then, we use one of two processes to determine which to show as keywords
79
+ keywords =
80
+ chunk.text
81
+ |> determine_keywords(phrases)
82
+ |> Enum.map(fn %{label: label, score: score} ->
83
+ # TODO: Replace loop with an insert_all call and check for conflicts
84
+ # so that duplicate keywords are ignored.
85
+ Transcriptions.create_keyword_for_chunk(%{
86
+ transcription_chunk_id: chunk.id,
87
+ keyword: label,
88
+ score: score
89
+ })
90
+ end)
91
+
92
+ send(self(), {:chunk_updated, keywords})
93
+
94
+ send(self(), :finished)
95
+ end
96
+
97
+ # This clause handles cases where there is transcribed text, but no phrases were found.
98
+ defp determine_keywords(_text, []), do: []
99
+
100
+ defp determine_keywords(text, phrases) do
101
+ # 1. A slower process that looks to classify the text by the extracted phrases.
102
+ # serving = KeywordFinder.prepare_zero_shot_classification_serving(phrases)
103
+ # %{predictions: predictions} = Nx.Serving.run(serving, text)
104
+
105
+ # 2. A fast process finding the phrase closest in vector space to the whole text.
106
+ KeywordFinder.find_most_similar_label(text, phrases, 2)
107
+ end
108
+ end
lib/medical_transcription/classification_supervisor.ex ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defmodule MedicalTranscription.ClassificationSupervisor do
2
+ # Automatically defines child_spec/1
3
+ use DynamicSupervisor
4
+
5
+ def start_link(init_arg) do
6
+ DynamicSupervisor.start_link(__MODULE__, init_arg, name: __MODULE__)
7
+ end
8
+
9
+ @impl true
10
+ def init(_init_arg) do
11
+ DynamicSupervisor.init(strategy: :one_for_one)
12
+ end
13
+
14
+ def start_classification(transcription_chunk) do
15
+ spec = %{
16
+ id: MedicalTranscription.ClassificationServer,
17
+ start:
18
+ {MedicalTranscription.ClassificationServer, :start_link, [chunk: transcription_chunk]},
19
+ restart: :transient,
20
+ type: :worker
21
+ }
22
+
23
+ DynamicSupervisor.start_child(__MODULE__, spec)
24
+ end
25
+ end
lib/medical_transcription/transcriptions.ex CHANGED
@@ -57,7 +57,7 @@ defmodule MedicalTranscription.Transcriptions do
57
  query =
58
  if preload_transcription_chunks do
59
  Transcription
60
- |> preload(:chunks)
61
  else
62
  Transcription
63
  end
@@ -83,7 +83,7 @@ defmodule MedicalTranscription.Transcriptions do
83
  query =
84
  if preload_transcription_chunks do
85
  Transcription
86
- |> preload(:chunks)
87
  else
88
  Transcription
89
  end
@@ -127,6 +127,24 @@ defmodule MedicalTranscription.Transcriptions do
127
  |> Repo.insert()
128
  end
129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  @doc """
131
  Updates a transcription.
132
 
 
57
  query =
58
  if preload_transcription_chunks do
59
  Transcription
60
+ |> preload(chunks: :keywords)
61
  else
62
  Transcription
63
  end
 
83
  query =
84
  if preload_transcription_chunks do
85
  Transcription
86
+ |> preload(chunks: :keywords)
87
  else
88
  Transcription
89
  end
 
127
  |> Repo.insert()
128
  end
129
 
130
+ @doc """
131
+ Creates a transcription chunk keyword.
132
+
133
+ ## Examples
134
+
135
+ iex> create_keyword_for_chunk(%{transcription_chunk_id: 1, keyword: "healthy"})
136
+ {:ok, %TranscriptionChunkKeyword{}}
137
+
138
+ iex> create_keyword_for_chunk(%{field: bad_value})
139
+ {:error, %Ecto.Changeset{}}
140
+
141
+ """
142
+ def create_keyword_for_chunk(attrs \\ %{}) do
143
+ %TranscriptionChunkKeyword{}
144
+ |> TranscriptionChunkKeyword.changeset(attrs)
145
+ |> Repo.insert()
146
+ end
147
+
148
  @doc """
149
  Updates a transcription.
150
 
lib/medical_transcription/transcriptions/transcription_chunk.ex CHANGED
@@ -11,6 +11,8 @@ defmodule MedicalTranscription.Transcriptions.TranscriptionChunk do
11
 
12
  belongs_to :transcription, MedicalTranscription.Transcriptions.Transcription
13
 
 
 
14
  timestamps(type: :utc_datetime)
15
  end
16
 
 
11
 
12
  belongs_to :transcription, MedicalTranscription.Transcriptions.Transcription
13
 
14
+ has_many :keywords, MedicalTranscription.Transcriptions.TranscriptionChunkKeyword
15
+
16
  timestamps(type: :utc_datetime)
17
  end
18
 
lib/medical_transcription/transcriptions/transcription_chunk_keyword.ex ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defmodule MedicalTranscription.Transcriptions.TranscriptionChunkKeyword do
2
+ use Ecto.Schema
3
+ import Ecto.Changeset
4
+
5
+ @primary_key {:id, :binary_id, autogenerate: true}
6
+ @foreign_key_type :binary_id
7
+ schema "transcription_chunk_keywords" do
8
+ field :keyword, :string
9
+ field :score, :float
10
+
11
+ belongs_to :transcription_chunk, MedicalTranscription.Transcriptions.TranscriptionChunk
12
+
13
+ timestamps(type: :utc_datetime)
14
+ end
15
+
16
+ @doc false
17
+ def changeset(transcription_chunk_keyword, attrs) do
18
+ transcription_chunk_keyword
19
+ |> cast(attrs, [:transcription_chunk_id, :keyword, :score])
20
+ |> validate_required([:transcription_chunk_id, :keyword, :score])
21
+ end
22
+ end
priv/repo/migrations/20240209165222_create_transcription_chunk_keywords.exs ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defmodule MedicalTranscription.Repo.Migrations.CreateTranscriptionChunkKeywords do
2
+ use Ecto.Migration
3
+
4
+ def change do
5
+ create table(:transcription_chunk_keywords, primary_key: false) do
6
+ add :id, :binary_id, primary_key: true
7
+ add :keyword, :string
8
+ add :score, :float
9
+
10
+ add :transcription_chunk_id,
11
+ references(:transcription_chunks, type: :binary_id, on_delete: :delete_all),
12
+ null: false
13
+
14
+ timestamps(type: :utc_datetime)
15
+ end
16
+ end
17
+ end
test/medical_transcription/classification_server_test.exs ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defmodule MedicalTranscription.ClassificationServerTest do
2
+ @moduledoc """
3
+ Tests for MedicalTranscription.ClassificationServer
4
+ """
5
+
6
+ use MedicalTranscription.DataCase
7
+
8
+ import MedicalTranscription.TranscriptionsFixtures
9
+ import MedicalTranscription.TranscriptionChunksFixtures
10
+
11
+ alias MedicalTranscription.Transcriptions
12
+ alias MedicalTranscription.ClassificationServer
13
+
14
+ setup do
15
+ transcription = transcription_fixture()
16
+
17
+ chunk =
18
+ transcription_chunk_fixture(%{
19
+ transcription_id: transcription.id,
20
+ text: "patient is suffering from lower back pain"
21
+ })
22
+
23
+ %{chunk: chunk, transcription: transcription}
24
+ end
25
+
26
+ test "extract keywords and assign medical codes", %{chunk: chunk, transcription: transcription} do
27
+ spec = {ClassificationServer, {:chunk, chunk}}
28
+
29
+ {:ok, pid} = start_supervised(spec, restart: :transient)
30
+
31
+ ref = Process.monitor(pid)
32
+ assert_receive({:DOWN, ^ref, :process, _object, _pid}, 5_000)
33
+
34
+ keywords =
35
+ transcription.id
36
+ |> Transcriptions.get_transcription!(true)
37
+ |> Map.fetch!(:chunks)
38
+ |> Enum.flat_map(& &1.keywords)
39
+
40
+ assert 2 == Enum.count(keywords)
41
+ end
42
+ end
test/medical_transcription/classification_supervisor_test.exs ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defmodule MedicalTranscription.ClassificationSupervisorTest do
2
+ @moduledoc """
3
+ Tests for MedicalTranscription.ClassificationSupervisor
4
+ """
5
+
6
+ use MedicalTranscription.DataCase
7
+
8
+ import MedicalTranscription.TranscriptionChunksFixtures
9
+
10
+ alias MedicalTranscription.ClassificationSupervisor
11
+
12
+ setup do
13
+ chunk = transcription_chunk_fixture(%{text: "patient fell and is experiecing knee pain"})
14
+ %{chunk: chunk}
15
+ end
16
+
17
+ test "extract keywords and assign codes", %{chunk: chunk} do
18
+ assert {:ok, _pid} = ClassificationSupervisor.start_classification(chunk)
19
+
20
+ {_, server_pid, :worker, [MedicalTranscription.ClassificationServer]} =
21
+ ClassificationSupervisor
22
+ |> DynamicSupervisor.which_children()
23
+ |> Enum.at(0)
24
+
25
+ ref = Process.monitor(server_pid)
26
+ assert_receive({:DOWN, ^ref, :process, _object, _pid}, 5_000)
27
+ end
28
+ end