noahsettersten
commited on
Commit
•
09e4e72
1
Parent(s):
9022101
refactor: Move keyword highlighting to separate component
Browse files
lib/medical_transcription_web/components/keyword_highlighter.ex
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
defmodule MedicalTranscriptionWeb.Components.KeywordHighlighter do
|
2 |
+
use MedicalTranscriptionWeb, :html
|
3 |
+
# alias Phoenix.HTML.Tag
|
4 |
+
|
5 |
+
def highlight(assigns) do
|
6 |
+
# TODO: Eliminate the need to use `raw` here.
|
7 |
+
~H"""
|
8 |
+
<%= raw(format_text(@text, @keywords)) %>
|
9 |
+
"""
|
10 |
+
end
|
11 |
+
|
12 |
+
# To highlight the keywords within the transcription, we find the start and end index of each keyword within `text`,
|
13 |
+
# split the text at those points, and then join it back together, wrapping the keyword portion in a span with the
|
14 |
+
# `text-brand` class.
|
15 |
+
defp format_text(text, [first | rest]) do
|
16 |
+
if String.contains?(first.label, "and") do
|
17 |
+
format_text(text, rest)
|
18 |
+
else
|
19 |
+
# [one | two] = String.split(text, first.label)
|
20 |
+
#
|
21 |
+
# keyword =
|
22 |
+
# Tag.content_tag(:span, first.label,
|
23 |
+
# class: "text-brand",
|
24 |
+
# title: "Score: #{Float.round(first.score, 2)}"
|
25 |
+
# )
|
26 |
+
#
|
27 |
+
# with_replaced_keyword =
|
28 |
+
# Tag.content_tag :div do
|
29 |
+
# "#{one}#{keyword}#{two}"
|
30 |
+
# end
|
31 |
+
|
32 |
+
# with_replaced_keyword = ~E"""
|
33 |
+
# #{one}
|
34 |
+
# <span class="text-brand" title="Score: #{Float.round(first.score, 2)}">#{first.label}</span>
|
35 |
+
# #{two}
|
36 |
+
# """
|
37 |
+
|
38 |
+
# with_replaced_keyword = "#{one}#{keyword}#{two}"
|
39 |
+
|
40 |
+
with_replaced_keyword =
|
41 |
+
text
|
42 |
+
|> String.split(first.label)
|
43 |
+
|> Enum.join(
|
44 |
+
"<span class=\"text-brand\" title=\"Score: #{Float.round(first.score, 2)}\">#{first.label}</span>"
|
45 |
+
)
|
46 |
+
|
47 |
+
format_text(with_replaced_keyword, rest)
|
48 |
+
end
|
49 |
+
end
|
50 |
+
|
51 |
+
defp format_text(text, []), do: text
|
52 |
+
end
|
lib/medical_transcription_web/components/transcription_text_component.ex
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
defmodule MedicalTranscriptionWeb.Components.TranscriptionTextComponent do
|
2 |
use MedicalTranscriptionWeb, :live_component
|
3 |
import MedicalTranscriptionWeb.Components
|
|
|
4 |
alias AudioTagger.Structs.TagResult
|
5 |
|
6 |
@impl Phoenix.LiveComponent
|
@@ -30,8 +31,7 @@ defmodule MedicalTranscriptionWeb.Components.TranscriptionTextComponent do
|
|
30 |
<p class="text-[28px] leading-normal text-type-black-tertiary">
|
31 |
<.async_result :let={keywords} assign={@keywords}>
|
32 |
<:loading><%= @row.text %></:loading>
|
33 |
-
|
34 |
-
<%= raw(format_text(@row.text, keywords)) %>
|
35 |
</.async_result>
|
36 |
</p>
|
37 |
|
@@ -55,33 +55,17 @@ defmodule MedicalTranscriptionWeb.Components.TranscriptionTextComponent do
|
|
55 |
"""
|
56 |
end
|
57 |
|
58 |
-
# To highlight the keywords within the transcription, we find the start and end index of each keyword within `text`,
|
59 |
-
# split the text at those points, and then join it back together, wrapping the keyword portion in a span with the
|
60 |
-
# `text-brand` class.
|
61 |
-
defp format_text(text, [first | rest]) do
|
62 |
-
if String.contains?(first.label, "and") do
|
63 |
-
format_text(text, rest)
|
64 |
-
else
|
65 |
-
with_replaced_keyword =
|
66 |
-
text
|
67 |
-
|> String.split(first.label)
|
68 |
-
|> Enum.join(
|
69 |
-
"<span class=\"text-brand\" title=\"Score: #{Float.round(first.score, 2)}\">#{first.label}</span>"
|
70 |
-
)
|
71 |
-
|
72 |
-
format_text(with_replaced_keyword, rest)
|
73 |
-
end
|
74 |
-
end
|
75 |
-
|
76 |
-
defp format_text(text, []), do: text
|
77 |
-
|
78 |
defp find_keywords(live_view_pid, text) do
|
|
|
79 |
%{entities: entities} = Nx.Serving.batched_run(TokenClassificationServing, text)
|
80 |
phrases = AudioTagger.KeywordFinder.cleanup_phrases(entities)
|
81 |
|
|
|
|
|
82 |
# serving = AudioTagger.KeywordFinder.prepare_zero_shot_classification_serving(phrases)
|
83 |
# %{predictions: predictions} = Nx.Serving.run(serving, text)
|
84 |
|
|
|
85 |
predictions = AudioTagger.KeywordFinder.find_most_similar_label(text, phrases, 2)
|
86 |
|
87 |
# For now, retrieve the top three keywords that have a score of more than 0.25
|
|
|
1 |
defmodule MedicalTranscriptionWeb.Components.TranscriptionTextComponent do
|
2 |
use MedicalTranscriptionWeb, :live_component
|
3 |
import MedicalTranscriptionWeb.Components
|
4 |
+
import MedicalTranscriptionWeb.Components.KeywordHighlighter
|
5 |
alias AudioTagger.Structs.TagResult
|
6 |
|
7 |
@impl Phoenix.LiveComponent
|
|
|
31 |
<p class="text-[28px] leading-normal text-type-black-tertiary">
|
32 |
<.async_result :let={keywords} assign={@keywords}>
|
33 |
<:loading><%= @row.text %></:loading>
|
34 |
+
<.highlight text={@row.text} keywords={keywords} />
|
|
|
35 |
</.async_result>
|
36 |
</p>
|
37 |
|
|
|
55 |
"""
|
56 |
end
|
57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
defp find_keywords(live_view_pid, text) do
|
59 |
+
# First, we use token classification to determine parts of speech and then retrieve the verb and adjective+noun phrases.
|
60 |
%{entities: entities} = Nx.Serving.batched_run(TokenClassificationServing, text)
|
61 |
phrases = AudioTagger.KeywordFinder.cleanup_phrases(entities)
|
62 |
|
63 |
+
# Then, we use one of two processes to determine which to show as keywords:
|
64 |
+
# 1. A slower process that looks to classify the text by the extracted phrases.
|
65 |
# serving = AudioTagger.KeywordFinder.prepare_zero_shot_classification_serving(phrases)
|
66 |
# %{predictions: predictions} = Nx.Serving.run(serving, text)
|
67 |
|
68 |
+
# 2. A fast process finding the phrase closest in vector space to the whole text.
|
69 |
predictions = AudioTagger.KeywordFinder.find_most_similar_label(text, phrases, 2)
|
70 |
|
71 |
# For now, retrieve the top three keywords that have a score of more than 0.25
|