Commit
•
7ae16f7
1
Parent(s):
6fe951d
feat: Gather keywords from transcribed text
Browse files
lib/medical_transcription/application.ex
CHANGED
@@ -14,6 +14,9 @@ defmodule MedicalTranscription.Application do
|
|
14 |
|> AudioTagger.Transcriber.prepare_featurizer()
|
15 |
|> AudioTagger.Transcriber.prepare_serving(@model_name)
|
16 |
|
|
|
|
|
|
|
17 |
children = [
|
18 |
MedicalTranscriptionWeb.Telemetry,
|
19 |
MedicalTranscription.Repo,
|
@@ -29,6 +32,13 @@ defmodule MedicalTranscription.Application do
|
|
29 |
batch_size: 4,
|
30 |
batch_timeout: 100
|
31 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
# Start a worker by calling: MedicalTranscription.Worker.start_link(arg)
|
33 |
# {MedicalTranscription.Worker, arg},
|
34 |
# Start to serve requests, typically the last entry
|
|
|
14 |
|> AudioTagger.Transcriber.prepare_featurizer()
|
15 |
|> AudioTagger.Transcriber.prepare_serving(@model_name)
|
16 |
|
17 |
+
token_classification_serving =
|
18 |
+
AudioTagger.KeywordFinder.prepare_token_classification_serving()
|
19 |
+
|
20 |
children = [
|
21 |
MedicalTranscriptionWeb.Telemetry,
|
22 |
MedicalTranscription.Repo,
|
|
|
32 |
batch_size: 4,
|
33 |
batch_timeout: 100
|
34 |
},
|
35 |
+
{
|
36 |
+
Nx.Serving,
|
37 |
+
serving: token_classification_serving,
|
38 |
+
name: TokenClassificationServing,
|
39 |
+
batch_size: 1,
|
40 |
+
batch_timeout: 100
|
41 |
+
},
|
42 |
# Start a worker by calling: MedicalTranscription.Worker.start_link(arg)
|
43 |
# {MedicalTranscription.Worker, arg},
|
44 |
# Start to serve requests, typically the last entry
|
lib/medical_transcription_web/components/components.ex
CHANGED
@@ -131,6 +131,7 @@ defmodule MedicalTranscriptionWeb.Components do
|
|
131 |
end
|
132 |
|
133 |
defp feedback_button(assigns) do
|
|
|
134 |
~H"""
|
135 |
<button
|
136 |
phx-click="add_feedback"
|
|
|
131 |
end
|
132 |
|
133 |
defp feedback_button(assigns) do
|
134 |
+
# TODO: Update UI after submitting feedback. E.g. change background color, etc. for confirmation.
|
135 |
~H"""
|
136 |
<button
|
137 |
phx-click="add_feedback"
|
lib/medical_transcription_web/components/layouts/app.html.heex
CHANGED
@@ -11,6 +11,7 @@
|
|
11 |
|
12 |
<div class="px-6">
|
13 |
<p class="text-xs leading-normal tracking-[0.2em] font-semibold uppercase">Today</p>
|
|
|
14 |
</div>
|
15 |
</header>
|
16 |
|
|
|
11 |
|
12 |
<div class="px-6">
|
13 |
<p class="text-xs leading-normal tracking-[0.2em] font-semibold uppercase">Today</p>
|
14 |
+
<!-- TODO: Show a history of uploaded files here. -->
|
15 |
</div>
|
16 |
</header>
|
17 |
|
lib/medical_transcription_web/live/home_live/index.ex
CHANGED
@@ -11,6 +11,7 @@ defmodule MedicalTranscriptionWeb.HomeLive.Index do
|
|
11 |
|> assign(:uploaded_file_name, nil)
|
12 |
|> stream(:transcription_rows, [])
|
13 |
|> assign(:status, :pending)
|
|
|
14 |
# To test the success UI, replace the two lines above with these:
|
15 |
# |> stream(:transcription_rows, SampleResults.get_sample_results())
|
16 |
# |> assign(:status, :success)
|
@@ -25,6 +26,7 @@ defmodule MedicalTranscriptionWeb.HomeLive.Index do
|
|
25 |
# - Fine-tune results based on feedback:
|
26 |
# - Allow users to accept/decline suggested codes.
|
27 |
# - Train model based on user feedback for suggested codes.
|
|
|
28 |
# - Allow editing the transcription inline to correct mistakes. Then, retag based on the updated transcription.
|
29 |
# - Stream audio recording instead of uploaded audio.
|
30 |
|
@@ -32,10 +34,27 @@ defmodule MedicalTranscriptionWeb.HomeLive.Index do
|
|
32 |
<div class="flex-1 flex flex-col space-y-6">
|
33 |
<%= if @status == :loading || @status == :success do %>
|
34 |
<.result_heading status={@status} filename={@uploaded_file_name} />
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
<% end %>
|
36 |
|
37 |
<%= if @status != :pending do %>
|
38 |
-
<.result_list rows={@streams.transcription_rows} />
|
39 |
<% end %>
|
40 |
|
41 |
<%= if @status == :pending do %>
|
@@ -77,6 +96,7 @@ defmodule MedicalTranscriptionWeb.HomeLive.Index do
|
|
77 |
socket
|
78 |
|> assign(:status, :loading)
|
79 |
|> assign(:transcription_rows, [])
|
|
|
80 |
|> assign(:uploaded_file_name, filename)
|
81 |
|
82 |
{:noreply, socket}
|
@@ -109,14 +129,26 @@ defmodule MedicalTranscriptionWeb.HomeLive.Index do
|
|
109 |
def handle_info({:transcription_row, chunk_result}, socket) do
|
110 |
# The processing sends a message as each chunk of text is coded. See here for some background and potential
|
111 |
# inspiration for this: https://elixirforum.com/t/liveview-asynchronous-task-patterns/44695
|
|
|
|
|
|
|
|
|
112 |
{:noreply, stream_insert(socket, :transcription_rows, chunk_result)}
|
113 |
end
|
114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
@impl true
|
116 |
def handle_info({ref, _result}, socket) do
|
117 |
# See this Fly article for the usage of Task.async to start `transcribe_and_tag_audio/2` and handle the end of the
|
118 |
# task here: https://fly.io/phoenix-files/liveview-async-task/
|
119 |
Process.demonitor(ref, [:flush])
|
|
|
120 |
{:noreply, assign(socket, :status, :success)}
|
121 |
end
|
122 |
|
@@ -127,6 +159,16 @@ defmodule MedicalTranscriptionWeb.HomeLive.Index do
|
|
127 |
)
|
128 |
end
|
129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
def error_to_string(:too_large), do: "Too large"
|
131 |
def error_to_string(:not_accepted), do: "You have selected an unacceptable file type"
|
132 |
end
|
|
|
11 |
|> assign(:uploaded_file_name, nil)
|
12 |
|> stream(:transcription_rows, [])
|
13 |
|> assign(:status, :pending)
|
14 |
+
|> assign(:keywords_for_rows, [])
|
15 |
# To test the success UI, replace the two lines above with these:
|
16 |
# |> stream(:transcription_rows, SampleResults.get_sample_results())
|
17 |
# |> assign(:status, :success)
|
|
|
26 |
# - Fine-tune results based on feedback:
|
27 |
# - Allow users to accept/decline suggested codes.
|
28 |
# - Train model based on user feedback for suggested codes.
|
29 |
+
# - After uploading an audio file, save the results to the DB (current date & time, transcription, assigned codes, etc.)
|
30 |
# - Allow editing the transcription inline to correct mistakes. Then, retag based on the updated transcription.
|
31 |
# - Stream audio recording instead of uploaded audio.
|
32 |
|
|
|
34 |
<div class="flex-1 flex flex-col space-y-6">
|
35 |
<%= if @status == :loading || @status == :success do %>
|
36 |
<.result_heading status={@status} filename={@uploaded_file_name} />
|
37 |
+
<!-- TODO: Include this data inline with the transcription text. -->
|
38 |
+
<div>
|
39 |
+
<p>Summary Keywords:</p>
|
40 |
+
<ul>
|
41 |
+
<%= for keywords <- @keywords_for_rows do %>
|
42 |
+
<li class="list-disc">
|
43 |
+
<%= keywords
|
44 |
+
|> Enum.filter(fn keyword -> keyword.score > 0.25 end)
|
45 |
+
|> Enum.take(3)
|
46 |
+
|> Enum.map(fn keyword ->
|
47 |
+
"#{keyword.label} (#{keyword.score})"
|
48 |
+
end)
|
49 |
+
|> Enum.join(", ") %>
|
50 |
+
</li>
|
51 |
+
<% end %>
|
52 |
+
</ul>
|
53 |
+
</div>
|
54 |
<% end %>
|
55 |
|
56 |
<%= if @status != :pending do %>
|
57 |
+
<.result_list rows={@streams.transcription_rows} keywords_for_rows={@keywords_for_rows} />
|
58 |
<% end %>
|
59 |
|
60 |
<%= if @status == :pending do %>
|
|
|
96 |
socket
|
97 |
|> assign(:status, :loading)
|
98 |
|> assign(:transcription_rows, [])
|
99 |
+
|> assign(:keywords_for_rows, [])
|
100 |
|> assign(:uploaded_file_name, filename)
|
101 |
|
102 |
{:noreply, socket}
|
|
|
129 |
def handle_info({:transcription_row, chunk_result}, socket) do
|
130 |
# The processing sends a message as each chunk of text is coded. See here for some background and potential
|
131 |
# inspiration for this: https://elixirforum.com/t/liveview-asynchronous-task-patterns/44695
|
132 |
+
|
133 |
+
live_view_pid = self()
|
134 |
+
Task.async(fn -> find_keywords(live_view_pid, chunk_result) end)
|
135 |
+
|
136 |
{:noreply, stream_insert(socket, :transcription_rows, chunk_result)}
|
137 |
end
|
138 |
|
139 |
+
@impl true
|
140 |
+
def handle_info({:keywords_for_row, output}, socket) do
|
141 |
+
keywords = socket.assigns.keywords_for_rows
|
142 |
+
|
143 |
+
{:noreply, assign(socket, :keywords_for_rows, keywords ++ [output])}
|
144 |
+
end
|
145 |
+
|
146 |
@impl true
|
147 |
def handle_info({ref, _result}, socket) do
|
148 |
# See this Fly article for the usage of Task.async to start `transcribe_and_tag_audio/2` and handle the end of the
|
149 |
# task here: https://fly.io/phoenix-files/liveview-async-task/
|
150 |
Process.demonitor(ref, [:flush])
|
151 |
+
|
152 |
{:noreply, assign(socket, :status, :success)}
|
153 |
end
|
154 |
|
|
|
159 |
)
|
160 |
end
|
161 |
|
162 |
+
defp find_keywords(live_view_pid, chunk) do
|
163 |
+
output = Nx.Serving.batched_run(TokenClassificationServing, chunk.text)
|
164 |
+
phrases = AudioTagger.KeywordFinder.cleanup_phrases(output.entities)
|
165 |
+
|
166 |
+
serving = AudioTagger.KeywordFinder.prepare_zero_shot_classification_serving(phrases)
|
167 |
+
output = Nx.Serving.run(serving, chunk.text)
|
168 |
+
|
169 |
+
send(live_view_pid, {:keywords_for_row, output.predictions})
|
170 |
+
end
|
171 |
+
|
172 |
def error_to_string(:too_large), do: "Too large"
|
173 |
def error_to_string(:not_accepted), do: "You have selected an unacceptable file type"
|
174 |
end
|
livebooks/summarization.livemd
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Text summarization
|
2 |
+
|
3 |
+
```elixir
|
4 |
+
Mix.install(
|
5 |
+
[
|
6 |
+
{:kino_bumblebee, "~> 0.4.0"},
|
7 |
+
{:exla, ">= 0.0.0"},
|
8 |
+
{:explorer, "~> 0.7.0"},
|
9 |
+
{:kino_explorer, "~> 0.1.11"}
|
10 |
+
],
|
11 |
+
config: [nx: [default_backend: EXLA.Backend]]
|
12 |
+
)
|
13 |
+
```
|
14 |
+
|
15 |
+
## Summarize an audio transcription
|
16 |
+
|
17 |
+
```elixir
|
18 |
+
text = """
|
19 |
+
This 55-year-old man with known coronary artery disease comes for a follow-up visit today.
|
20 |
+
Last month he was admitted to our hospital with unstable angina.
|
21 |
+
He underwent heart catheterization on November 15th, 2007.
|
22 |
+
At that time he was found to have a tight 99% proxmost enosis, total occlusion and collateralization
|
23 |
+
of the mid-circumflex, right coronary artery was normal.
|
24 |
+
Ventricularography was normal and his ejection fraction was 65%.
|
25 |
+
He underwants an uncomplicated placement of a cipher drug-eleuting
|
26 |
+
stent to his proximal lesion.
|
27 |
+
The attempted coronary intervention of the circumflex was unsuccessful,
|
28 |
+
as his lesion cannot be crossed. His post procedure was uncomplicated, and he was discharged on the day following his intervention.
|
29 |
+
He comes today indicating that he is feeling great.
|
30 |
+
His current medications include aspirin,
|
31 |
+
325 milligrams daily, lipatore, 40 milligrams daily,
|
32 |
+
and platvic, 75 milligrams daily.
|
33 |
+
"""
|
34 |
+
```
|
35 |
+
|
36 |
+
## Section
|
37 |
+
|
38 |
+
```elixir
|
39 |
+
{:ok, model_info} =
|
40 |
+
Bumblebee.load_model({:hf, "vblagoje/bert-english-uncased-finetuned-pos"})
|
41 |
+
|
42 |
+
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "bert-base-uncased"})
|
43 |
+
|
44 |
+
serving =
|
45 |
+
Bumblebee.Text.token_classification(model_info, tokenizer,
|
46 |
+
aggregation: :same,
|
47 |
+
compile: [batch_size: 1, sequence_length: 100],
|
48 |
+
defn_options: [compiler: EXLA]
|
49 |
+
)
|
50 |
+
```
|
51 |
+
|
52 |
+
```elixir
|
53 |
+
text = "This 55-year-old man with known coronary artery disease comes for a follow-up visit today.
|
54 |
+
Last month he was admitted to our hospital with unstable angina."
|
55 |
+
|
56 |
+
text = "Last month he was admitted to our hospital with unstable angina."
|
57 |
+
|
58 |
+
ignored = ["DET", "PUNCT", "ADP", "NUM", "AUX", "PRON"]
|
59 |
+
# ignored = []
|
60 |
+
|
61 |
+
output = Nx.Serving.run(serving, text)
|
62 |
+
|
63 |
+
output.entities
|
64 |
+
|> Enum.reduce([], fn entity, acc ->
|
65 |
+
if Enum.member?(ignored, entity.label) do
|
66 |
+
acc
|
67 |
+
else
|
68 |
+
# "VERB", "NOUN", and "ADJ"
|
69 |
+
next_phrase =
|
70 |
+
if entity.label == "ADJ" do
|
71 |
+
"#{entity.phrase} [CONTINUATION]"
|
72 |
+
else
|
73 |
+
entity.phrase
|
74 |
+
end
|
75 |
+
|
76 |
+
if Enum.count(acc) > 0 do
|
77 |
+
previous = Enum.at(acc, -1)
|
78 |
+
|
79 |
+
# First, check if the previous phrase ends with a continuation token.
|
80 |
+
if String.ends_with?(previous, "[CONTINUATION]") do
|
81 |
+
acc_without_last = Enum.take(acc, Enum.count(acc) - 1)
|
82 |
+
acc_without_last ++ [String.replace(previous, "[CONTINUATION]", next_phrase)]
|
83 |
+
else
|
84 |
+
acc ++ [next_phrase]
|
85 |
+
end
|
86 |
+
else
|
87 |
+
acc ++ [next_phrase]
|
88 |
+
end
|
89 |
+
end
|
90 |
+
end)
|
91 |
+
```
|
92 |
+
|
93 |
+
<!-- livebook:{"attrs":"eyJjb21waWxlciI6ImV4bGEiLCJsYWJlbHMiOiJsYXN0IG1vbnRoLCBhZG1pdHRlZCwgaG9zcGl0YWwsIHVuc3RhYmxlIGFuZ2luYSIsInNlcXVlbmNlX2xlbmd0aCI6MTAwLCJ0YXNrX2lkIjoiemVyb19zaG90X3RleHRfY2xhc3NpZmljYXRpb24iLCJ0b3BfayI6bnVsbCwidmFyaWFudF9pZCI6ImJhcnRfbGFyZ2VfbW5saSJ9","chunks":[[0,396],[398,509]],"kind":"Elixir.KinoBumblebee.TaskCell","livebook_object":"smart_cell"} -->
|
94 |
+
|
95 |
+
```elixir
|
96 |
+
{:ok, model_info} = Bumblebee.load_model({:hf, "facebook/bart-large-mnli"})
|
97 |
+
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "facebook/bart-large-mnli"})
|
98 |
+
labels = ["last month", "admitted", "hospital", "unstable angina"]
|
99 |
+
|
100 |
+
serving =
|
101 |
+
Bumblebee.Text.zero_shot_classification(model_info, tokenizer, labels,
|
102 |
+
compile: [batch_size: 1, sequence_length: 100],
|
103 |
+
defn_options: [compiler: EXLA]
|
104 |
+
)
|
105 |
+
|
106 |
+
text_input = Kino.Input.textarea("Text", default: "One day I will see the world.")
|
107 |
+
form = Kino.Control.form([text: text_input], submit: "Run")
|
108 |
+
frame = Kino.Frame.new()
|
109 |
+
|
110 |
+
Kino.listen(form, fn %{data: %{text: text}} ->
|
111 |
+
Kino.Frame.render(frame, Kino.Text.new("Running..."))
|
112 |
+
output = Nx.Serving.run(serving, text)
|
113 |
+
|
114 |
+
output.predictions
|
115 |
+
|> Enum.map(&{&1.label, &1.score})
|
116 |
+
|> Kino.Bumblebee.ScoredList.new()
|
117 |
+
|> then(&Kino.Frame.render(frame, &1))
|
118 |
+
end)
|
119 |
+
|
120 |
+
Kino.Layout.grid([form, frame], boxed: true, gap: 16)
|
121 |
+
```
|
122 |
+
|
123 |
+
## Question answering
|
124 |
+
|
125 |
+
<!-- livebook:{"attrs":"eyJjb21waWxlciI6ImV4bGEiLCJzZXF1ZW5jZV9sZW5ndGgiOjUwMCwidGFza19pZCI6InF1ZXN0aW9uX2Fuc3dlcmluZyIsInZhcmlhbnRfaWQiOiJkaXN0aWxiZXJ0X2Jhc2VfY2FzZWQifQ","chunks":[[0,344],[346,595]],"kind":"Elixir.KinoBumblebee.TaskCell","livebook_object":"smart_cell"} -->
|
126 |
+
|
127 |
+
```elixir
|
128 |
+
{:ok, model_info} = Bumblebee.load_model({:hf, "distilbert-base-cased-distilled-squad"})
|
129 |
+
|
130 |
+
{:ok, tokenizer} =
|
131 |
+
Bumblebee.load_tokenizer({:hf, "distilbert-base-cased-distilled-squad"})
|
132 |
+
|
133 |
+
serving =
|
134 |
+
Bumblebee.Text.question_answering(model_info, tokenizer,
|
135 |
+
compile: [batch_size: 1, sequence_length: 500],
|
136 |
+
defn_options: [compiler: EXLA]
|
137 |
+
)
|
138 |
+
|
139 |
+
inputs = [
|
140 |
+
question: Kino.Input.text("Question", default: "Where do I live?"),
|
141 |
+
context: Kino.Input.textarea("Context", default: "My name is Sarah and I live in London.")
|
142 |
+
]
|
143 |
+
|
144 |
+
form = Kino.Control.form(inputs, submit: "Run")
|
145 |
+
frame = Kino.Frame.new()
|
146 |
+
|
147 |
+
Kino.listen(form, fn %{data: %{question: question, context: context}} ->
|
148 |
+
output = Nx.Serving.run(serving, %{question: question, context: context})
|
149 |
+
|
150 |
+
output.results
|
151 |
+
|> Enum.map(&{&1.text, &1.score})
|
152 |
+
|> Kino.Bumblebee.ScoredList.new()
|
153 |
+
|> then(&Kino.Frame.render(frame, &1))
|
154 |
+
end)
|
155 |
+
|
156 |
+
Kino.Layout.grid([form, frame], boxed: true, gap: 16)
|
157 |
+
```
|