noahsettersten commited on
Commit
09e4e72
1 Parent(s): 9022101

refactor: Move keyword highlighting to separate component

Browse files
lib/medical_transcription_web/components/keyword_highlighter.ex ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defmodule MedicalTranscriptionWeb.Components.KeywordHighlighter do
2
+ use MedicalTranscriptionWeb, :html
3
+ # alias Phoenix.HTML.Tag
4
+
5
+ def highlight(assigns) do
6
+ # TODO: Eliminate the need to use `raw` here.
7
+ ~H"""
8
+ <%= raw(format_text(@text, @keywords)) %>
9
+ """
10
+ end
11
+
12
+ # To highlight the keywords within the transcription, we find the start and end index of each keyword within `text`,
13
+ # split the text at those points, and then join it back together, wrapping the keyword portion in a span with the
14
+ # `text-brand` class.
15
+ defp format_text(text, [first | rest]) do
16
+ if String.contains?(first.label, "and") do
17
+ format_text(text, rest)
18
+ else
19
+ # [one | two] = String.split(text, first.label)
20
+ #
21
+ # keyword =
22
+ # Tag.content_tag(:span, first.label,
23
+ # class: "text-brand",
24
+ # title: "Score: #{Float.round(first.score, 2)}"
25
+ # )
26
+ #
27
+ # with_replaced_keyword =
28
+ # Tag.content_tag :div do
29
+ # "#{one}#{keyword}#{two}"
30
+ # end
31
+
32
+ # with_replaced_keyword = ~E"""
33
+ # #{one}
34
+ # <span class="text-brand" title="Score: #{Float.round(first.score, 2)}">#{first.label}</span>
35
+ # #{two}
36
+ # """
37
+
38
+ # with_replaced_keyword = "#{one}#{keyword}#{two}"
39
+
40
+ with_replaced_keyword =
41
+ text
42
+ |> String.split(first.label)
43
+ |> Enum.join(
44
+ "<span class=\"text-brand\" title=\"Score: #{Float.round(first.score, 2)}\">#{first.label}</span>"
45
+ )
46
+
47
+ format_text(with_replaced_keyword, rest)
48
+ end
49
+ end
50
+
51
+ defp format_text(text, []), do: text
52
+ end
lib/medical_transcription_web/components/transcription_text_component.ex CHANGED
@@ -1,6 +1,7 @@
1
  defmodule MedicalTranscriptionWeb.Components.TranscriptionTextComponent do
2
  use MedicalTranscriptionWeb, :live_component
3
  import MedicalTranscriptionWeb.Components
 
4
  alias AudioTagger.Structs.TagResult
5
 
6
  @impl Phoenix.LiveComponent
@@ -30,8 +31,7 @@ defmodule MedicalTranscriptionWeb.Components.TranscriptionTextComponent do
30
  <p class="text-[28px] leading-normal text-type-black-tertiary">
31
  <.async_result :let={keywords} assign={@keywords}>
32
  <:loading><%= @row.text %></:loading>
33
- <!-- TODO: Replace this with a method that builds HTML tags instead of using `raw` -->
34
- <%= raw(format_text(@row.text, keywords)) %>
35
  </.async_result>
36
  </p>
37
 
@@ -55,33 +55,17 @@ defmodule MedicalTranscriptionWeb.Components.TranscriptionTextComponent do
55
  """
56
  end
57
 
58
- # To highlight the keywords within the transcription, we find the start and end index of each keyword within `text`,
59
- # split the text at those points, and then join it back together, wrapping the keyword portion in a span with the
60
- # `text-brand` class.
61
- defp format_text(text, [first | rest]) do
62
- if String.contains?(first.label, "and") do
63
- format_text(text, rest)
64
- else
65
- with_replaced_keyword =
66
- text
67
- |> String.split(first.label)
68
- |> Enum.join(
69
- "<span class=\"text-brand\" title=\"Score: #{Float.round(first.score, 2)}\">#{first.label}</span>"
70
- )
71
-
72
- format_text(with_replaced_keyword, rest)
73
- end
74
- end
75
-
76
- defp format_text(text, []), do: text
77
-
78
  defp find_keywords(live_view_pid, text) do
 
79
  %{entities: entities} = Nx.Serving.batched_run(TokenClassificationServing, text)
80
  phrases = AudioTagger.KeywordFinder.cleanup_phrases(entities)
81
 
 
 
82
  # serving = AudioTagger.KeywordFinder.prepare_zero_shot_classification_serving(phrases)
83
  # %{predictions: predictions} = Nx.Serving.run(serving, text)
84
 
 
85
  predictions = AudioTagger.KeywordFinder.find_most_similar_label(text, phrases, 2)
86
 
87
  # For now, retrieve the top three keywords that have a score of more than 0.25
 
1
  defmodule MedicalTranscriptionWeb.Components.TranscriptionTextComponent do
2
  use MedicalTranscriptionWeb, :live_component
3
  import MedicalTranscriptionWeb.Components
4
+ import MedicalTranscriptionWeb.Components.KeywordHighlighter
5
  alias AudioTagger.Structs.TagResult
6
 
7
  @impl Phoenix.LiveComponent
 
31
  <p class="text-[28px] leading-normal text-type-black-tertiary">
32
  <.async_result :let={keywords} assign={@keywords}>
33
  <:loading><%= @row.text %></:loading>
34
+ <.highlight text={@row.text} keywords={keywords} />
 
35
  </.async_result>
36
  </p>
37
 
 
55
  """
56
  end
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  defp find_keywords(live_view_pid, text) do
59
+ # First, we use token classification to determine parts of speech and then retrieve the verb and adjective+noun phrases.
60
  %{entities: entities} = Nx.Serving.batched_run(TokenClassificationServing, text)
61
  phrases = AudioTagger.KeywordFinder.cleanup_phrases(entities)
62
 
63
+ # Then, we use one of two processes to determine which to show as keywords:
64
+ # 1. A slower process that looks to classify the text by the extracted phrases.
65
  # serving = AudioTagger.KeywordFinder.prepare_zero_shot_classification_serving(phrases)
66
  # %{predictions: predictions} = Nx.Serving.run(serving, text)
67
 
68
+ # 2. A fast process finding the phrase closest in vector space to the whole text.
69
  predictions = AudioTagger.KeywordFinder.find_most_similar_label(text, phrases, 2)
70
 
71
  # For now, retrieve the top three keywords that have a score of more than 0.25