noahsettersten commited on
Commit
7ae16f7
1 Parent(s): 6fe951d

feat: Gather keywords from transcribed text

Browse files
lib/medical_transcription/application.ex CHANGED
@@ -14,6 +14,9 @@ defmodule MedicalTranscription.Application do
14
  |> AudioTagger.Transcriber.prepare_featurizer()
15
  |> AudioTagger.Transcriber.prepare_serving(@model_name)
16
 
 
 
 
17
  children = [
18
  MedicalTranscriptionWeb.Telemetry,
19
  MedicalTranscription.Repo,
@@ -29,6 +32,13 @@ defmodule MedicalTranscription.Application do
29
  batch_size: 4,
30
  batch_timeout: 100
31
  },
 
 
 
 
 
 
 
32
  # Start a worker by calling: MedicalTranscription.Worker.start_link(arg)
33
  # {MedicalTranscription.Worker, arg},
34
  # Start to serve requests, typically the last entry
 
14
  |> AudioTagger.Transcriber.prepare_featurizer()
15
  |> AudioTagger.Transcriber.prepare_serving(@model_name)
16
 
17
+ token_classification_serving =
18
+ AudioTagger.KeywordFinder.prepare_token_classification_serving()
19
+
20
  children = [
21
  MedicalTranscriptionWeb.Telemetry,
22
  MedicalTranscription.Repo,
 
32
  batch_size: 4,
33
  batch_timeout: 100
34
  },
35
+ {
36
+ Nx.Serving,
37
+ serving: token_classification_serving,
38
+ name: TokenClassificationServing,
39
+ batch_size: 1,
40
+ batch_timeout: 100
41
+ },
42
  # Start a worker by calling: MedicalTranscription.Worker.start_link(arg)
43
  # {MedicalTranscription.Worker, arg},
44
  # Start to serve requests, typically the last entry
lib/medical_transcription_web/components/components.ex CHANGED
@@ -131,6 +131,7 @@ defmodule MedicalTranscriptionWeb.Components do
131
  end
132
 
133
  defp feedback_button(assigns) do
 
134
  ~H"""
135
  <button
136
  phx-click="add_feedback"
 
131
  end
132
 
133
  defp feedback_button(assigns) do
134
+ # TODO: Update UI after submitting feedback. E.g. change background color, etc. for confirmation.
135
  ~H"""
136
  <button
137
  phx-click="add_feedback"
lib/medical_transcription_web/components/layouts/app.html.heex CHANGED
@@ -11,6 +11,7 @@
11
 
12
  <div class="px-6">
13
  <p class="text-xs leading-normal tracking-[0.2em] font-semibold uppercase">Today</p>
 
14
  </div>
15
  </header>
16
 
 
11
 
12
  <div class="px-6">
13
  <p class="text-xs leading-normal tracking-[0.2em] font-semibold uppercase">Today</p>
14
+ <!-- TODO: Show a history of uploaded files here. -->
15
  </div>
16
  </header>
17
 
lib/medical_transcription_web/live/home_live/index.ex CHANGED
@@ -11,6 +11,7 @@ defmodule MedicalTranscriptionWeb.HomeLive.Index do
11
  |> assign(:uploaded_file_name, nil)
12
  |> stream(:transcription_rows, [])
13
  |> assign(:status, :pending)
 
14
  # To test the success UI, replace the two lines above with these:
15
  # |> stream(:transcription_rows, SampleResults.get_sample_results())
16
  # |> assign(:status, :success)
@@ -25,6 +26,7 @@ defmodule MedicalTranscriptionWeb.HomeLive.Index do
25
  # - Fine-tune results based on feedback:
26
  # - Allow users to accept/decline suggested codes.
27
  # - Train model based on user feedback for suggested codes.
 
28
  # - Allow editing the transcription inline to correct mistakes. Then, retag based on the updated transcription.
29
  # - Stream audio recording instead of uploaded audio.
30
 
@@ -32,10 +34,27 @@ defmodule MedicalTranscriptionWeb.HomeLive.Index do
32
  <div class="flex-1 flex flex-col space-y-6">
33
  <%= if @status == :loading || @status == :success do %>
34
  <.result_heading status={@status} filename={@uploaded_file_name} />
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  <% end %>
36
 
37
  <%= if @status != :pending do %>
38
- <.result_list rows={@streams.transcription_rows} />
39
  <% end %>
40
 
41
  <%= if @status == :pending do %>
@@ -77,6 +96,7 @@ defmodule MedicalTranscriptionWeb.HomeLive.Index do
77
  socket
78
  |> assign(:status, :loading)
79
  |> assign(:transcription_rows, [])
 
80
  |> assign(:uploaded_file_name, filename)
81
 
82
  {:noreply, socket}
@@ -109,14 +129,26 @@ defmodule MedicalTranscriptionWeb.HomeLive.Index do
109
  def handle_info({:transcription_row, chunk_result}, socket) do
110
  # The processing sends a message as each chunk of text is coded. See here for some background and potential
111
  # inspiration for this: https://elixirforum.com/t/liveview-asynchronous-task-patterns/44695
 
 
 
 
112
  {:noreply, stream_insert(socket, :transcription_rows, chunk_result)}
113
  end
114
 
 
 
 
 
 
 
 
115
  @impl true
116
  def handle_info({ref, _result}, socket) do
117
  # See this Fly article for the usage of Task.async to start `transcribe_and_tag_audio/2` and handle the end of the
118
  # task here: https://fly.io/phoenix-files/liveview-async-task/
119
  Process.demonitor(ref, [:flush])
 
120
  {:noreply, assign(socket, :status, :success)}
121
  end
122
 
@@ -127,6 +159,16 @@ defmodule MedicalTranscriptionWeb.HomeLive.Index do
127
  )
128
  end
129
 
 
 
 
 
 
 
 
 
 
 
130
  def error_to_string(:too_large), do: "Too large"
131
  def error_to_string(:not_accepted), do: "You have selected an unacceptable file type"
132
  end
 
11
  |> assign(:uploaded_file_name, nil)
12
  |> stream(:transcription_rows, [])
13
  |> assign(:status, :pending)
14
+ |> assign(:keywords_for_rows, [])
15
  # To test the success UI, replace the two lines above with these:
16
  # |> stream(:transcription_rows, SampleResults.get_sample_results())
17
  # |> assign(:status, :success)
 
26
  # - Fine-tune results based on feedback:
27
  # - Allow users to accept/decline suggested codes.
28
  # - Train model based on user feedback for suggested codes.
29
+ # - After uploading an audio file, save the results to the DB (current date & time, transcription, assigned codes, etc.)
30
  # - Allow editing the transcription inline to correct mistakes. Then, retag based on the updated transcription.
31
  # - Stream audio recording instead of uploaded audio.
32
 
 
34
  <div class="flex-1 flex flex-col space-y-6">
35
  <%= if @status == :loading || @status == :success do %>
36
  <.result_heading status={@status} filename={@uploaded_file_name} />
37
+ <!-- TODO: Include this data inline with the transcription text. -->
38
+ <div>
39
+ <p>Summary Keywords:</p>
40
+ <ul>
41
+ <%= for keywords <- @keywords_for_rows do %>
42
+ <li class="list-disc">
43
+ <%= keywords
44
+ |> Enum.filter(fn keyword -> keyword.score > 0.25 end)
45
+ |> Enum.take(3)
46
+ |> Enum.map(fn keyword ->
47
+ "#{keyword.label} (#{keyword.score})"
48
+ end)
49
+ |> Enum.join(", ") %>
50
+ </li>
51
+ <% end %>
52
+ </ul>
53
+ </div>
54
  <% end %>
55
 
56
  <%= if @status != :pending do %>
57
+ <.result_list rows={@streams.transcription_rows} keywords_for_rows={@keywords_for_rows} />
58
  <% end %>
59
 
60
  <%= if @status == :pending do %>
 
96
  socket
97
  |> assign(:status, :loading)
98
  |> assign(:transcription_rows, [])
99
+ |> assign(:keywords_for_rows, [])
100
  |> assign(:uploaded_file_name, filename)
101
 
102
  {:noreply, socket}
 
129
  def handle_info({:transcription_row, chunk_result}, socket) do
130
  # The processing sends a message as each chunk of text is coded. See here for some background and potential
131
  # inspiration for this: https://elixirforum.com/t/liveview-asynchronous-task-patterns/44695
132
+
133
+ live_view_pid = self()
134
+ Task.async(fn -> find_keywords(live_view_pid, chunk_result) end)
135
+
136
  {:noreply, stream_insert(socket, :transcription_rows, chunk_result)}
137
  end
138
 
139
+ @impl true
140
+ def handle_info({:keywords_for_row, output}, socket) do
141
+ keywords = socket.assigns.keywords_for_rows
142
+
143
+ {:noreply, assign(socket, :keywords_for_rows, keywords ++ [output])}
144
+ end
145
+
146
  @impl true
147
  def handle_info({ref, _result}, socket) do
148
  # See this Fly article for the usage of Task.async to start `transcribe_and_tag_audio/2` and handle the end of the
149
  # task here: https://fly.io/phoenix-files/liveview-async-task/
150
  Process.demonitor(ref, [:flush])
151
+
152
  {:noreply, assign(socket, :status, :success)}
153
  end
154
 
 
159
  )
160
  end
161
 
162
+ defp find_keywords(live_view_pid, chunk) do
163
+ output = Nx.Serving.batched_run(TokenClassificationServing, chunk.text)
164
+ phrases = AudioTagger.KeywordFinder.cleanup_phrases(output.entities)
165
+
166
+ serving = AudioTagger.KeywordFinder.prepare_zero_shot_classification_serving(phrases)
167
+ output = Nx.Serving.run(serving, chunk.text)
168
+
169
+ send(live_view_pid, {:keywords_for_row, output.predictions})
170
+ end
171
+
172
  def error_to_string(:too_large), do: "Too large"
173
  def error_to_string(:not_accepted), do: "You have selected an unacceptable file type"
174
  end
livebooks/summarization.livemd ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Text summarization
2
+
3
+ ```elixir
4
+ Mix.install(
5
+ [
6
+ {:kino_bumblebee, "~> 0.4.0"},
7
+ {:exla, ">= 0.0.0"},
8
+ {:explorer, "~> 0.7.0"},
9
+ {:kino_explorer, "~> 0.1.11"}
10
+ ],
11
+ config: [nx: [default_backend: EXLA.Backend]]
12
+ )
13
+ ```
14
+
15
+ ## Summarize an audio transcription
16
+
17
+ ```elixir
18
+ text = """
19
+ This 55-year-old man with known coronary artery disease comes for a follow-up visit today.
20
+ Last month he was admitted to our hospital with unstable angina.
21
+ He underwent heart catheterization on November 15th, 2007.
22
+ At that time he was found to have a tight 99% proxmost enosis, total occlusion and collateralization
23
+ of the mid-circumflex, right coronary artery was normal.
24
+ Ventricularography was normal and his ejection fraction was 65%.
25
+ He underwants an uncomplicated placement of a cipher drug-eleuting
26
+ stent to his proximal lesion.
27
+ The attempted coronary intervention of the circumflex was unsuccessful,
28
+ as his lesion cannot be crossed. His post procedure was uncomplicated, and he was discharged on the day following his intervention.
29
+ He comes today indicating that he is feeling great.
30
+ His current medications include aspirin,
31
+ 325 milligrams daily, lipatore, 40 milligrams daily,
32
+ and platvic, 75 milligrams daily.
33
+ """
34
+ ```
35
+
36
+ ## Section
37
+
38
+ ```elixir
39
+ {:ok, model_info} =
40
+ Bumblebee.load_model({:hf, "vblagoje/bert-english-uncased-finetuned-pos"})
41
+
42
+ {:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "bert-base-uncased"})
43
+
44
+ serving =
45
+ Bumblebee.Text.token_classification(model_info, tokenizer,
46
+ aggregation: :same,
47
+ compile: [batch_size: 1, sequence_length: 100],
48
+ defn_options: [compiler: EXLA]
49
+ )
50
+ ```
51
+
52
+ ```elixir
53
+ text = "This 55-year-old man with known coronary artery disease comes for a follow-up visit today.
54
+ Last month he was admitted to our hospital with unstable angina."
55
+
56
+ text = "Last month he was admitted to our hospital with unstable angina."
57
+
58
+ ignored = ["DET", "PUNCT", "ADP", "NUM", "AUX", "PRON"]
59
+ # ignored = []
60
+
61
+ output = Nx.Serving.run(serving, text)
62
+
63
+ output.entities
64
+ |> Enum.reduce([], fn entity, acc ->
65
+ if Enum.member?(ignored, entity.label) do
66
+ acc
67
+ else
68
+ # "VERB", "NOUN", and "ADJ"
69
+ next_phrase =
70
+ if entity.label == "ADJ" do
71
+ "#{entity.phrase} [CONTINUATION]"
72
+ else
73
+ entity.phrase
74
+ end
75
+
76
+ if Enum.count(acc) > 0 do
77
+ previous = Enum.at(acc, -1)
78
+
79
+ # First, check if the previous phrase ends with a continuation token.
80
+ if String.ends_with?(previous, "[CONTINUATION]") do
81
+ acc_without_last = Enum.take(acc, Enum.count(acc) - 1)
82
+ acc_without_last ++ [String.replace(previous, "[CONTINUATION]", next_phrase)]
83
+ else
84
+ acc ++ [next_phrase]
85
+ end
86
+ else
87
+ acc ++ [next_phrase]
88
+ end
89
+ end
90
+ end)
91
+ ```
92
+
93
+ <!-- livebook:{"attrs":"eyJjb21waWxlciI6ImV4bGEiLCJsYWJlbHMiOiJsYXN0IG1vbnRoLCBhZG1pdHRlZCwgaG9zcGl0YWwsIHVuc3RhYmxlIGFuZ2luYSIsInNlcXVlbmNlX2xlbmd0aCI6MTAwLCJ0YXNrX2lkIjoiemVyb19zaG90X3RleHRfY2xhc3NpZmljYXRpb24iLCJ0b3BfayI6bnVsbCwidmFyaWFudF9pZCI6ImJhcnRfbGFyZ2VfbW5saSJ9","chunks":[[0,396],[398,509]],"kind":"Elixir.KinoBumblebee.TaskCell","livebook_object":"smart_cell"} -->
94
+
95
+ ```elixir
96
+ {:ok, model_info} = Bumblebee.load_model({:hf, "facebook/bart-large-mnli"})
97
+ {:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "facebook/bart-large-mnli"})
98
+ labels = ["last month", "admitted", "hospital", "unstable angina"]
99
+
100
+ serving =
101
+ Bumblebee.Text.zero_shot_classification(model_info, tokenizer, labels,
102
+ compile: [batch_size: 1, sequence_length: 100],
103
+ defn_options: [compiler: EXLA]
104
+ )
105
+
106
+ text_input = Kino.Input.textarea("Text", default: "One day I will see the world.")
107
+ form = Kino.Control.form([text: text_input], submit: "Run")
108
+ frame = Kino.Frame.new()
109
+
110
+ Kino.listen(form, fn %{data: %{text: text}} ->
111
+ Kino.Frame.render(frame, Kino.Text.new("Running..."))
112
+ output = Nx.Serving.run(serving, text)
113
+
114
+ output.predictions
115
+ |> Enum.map(&{&1.label, &1.score})
116
+ |> Kino.Bumblebee.ScoredList.new()
117
+ |> then(&Kino.Frame.render(frame, &1))
118
+ end)
119
+
120
+ Kino.Layout.grid([form, frame], boxed: true, gap: 16)
121
+ ```
122
+
123
+ ## Question answering
124
+
125
+ <!-- livebook:{"attrs":"eyJjb21waWxlciI6ImV4bGEiLCJzZXF1ZW5jZV9sZW5ndGgiOjUwMCwidGFza19pZCI6InF1ZXN0aW9uX2Fuc3dlcmluZyIsInZhcmlhbnRfaWQiOiJkaXN0aWxiZXJ0X2Jhc2VfY2FzZWQifQ","chunks":[[0,344],[346,595]],"kind":"Elixir.KinoBumblebee.TaskCell","livebook_object":"smart_cell"} -->
126
+
127
+ ```elixir
128
+ {:ok, model_info} = Bumblebee.load_model({:hf, "distilbert-base-cased-distilled-squad"})
129
+
130
+ {:ok, tokenizer} =
131
+ Bumblebee.load_tokenizer({:hf, "distilbert-base-cased-distilled-squad"})
132
+
133
+ serving =
134
+ Bumblebee.Text.question_answering(model_info, tokenizer,
135
+ compile: [batch_size: 1, sequence_length: 500],
136
+ defn_options: [compiler: EXLA]
137
+ )
138
+
139
+ inputs = [
140
+ question: Kino.Input.text("Question", default: "Where do I live?"),
141
+ context: Kino.Input.textarea("Context", default: "My name is Sarah and I live in London.")
142
+ ]
143
+
144
+ form = Kino.Control.form(inputs, submit: "Run")
145
+ frame = Kino.Frame.new()
146
+
147
+ Kino.listen(form, fn %{data: %{question: question, context: context}} ->
148
+ output = Nx.Serving.run(serving, %{question: question, context: context})
149
+
150
+ output.results
151
+ |> Enum.map(&{&1.text, &1.score})
152
+ |> Kino.Bumblebee.ScoredList.new()
153
+ |> then(&Kino.Frame.render(frame, &1))
154
+ end)
155
+
156
+ Kino.Layout.grid([form, frame], boxed: true, gap: 16)
157
+ ```