noahsettersten
commited on
Commit
•
1777102
1
Parent(s):
b1bd8cc
fix: Handle empty list of phrases when finding keywords
Browse filesAlthough we were handling an empty string for the `find_keywords/2`
function, it was still possible to have a non-empty string that resulted
in an empty list of phrases from the token classification and clean up
process. This rearranges this logic to create another function named
`determine_keywords/2` that has a clause to handle an empty list of
phrases and return an empty list of keywords.
lib/medical_transcription_web/components/transcription_text_component.ex
CHANGED
@@ -130,27 +130,38 @@ defmodule MedicalTranscriptionWeb.Components.TranscriptionTextComponent do
|
|
130 |
|
131 |
defp find_keywords(live_view_pid, text) do
|
132 |
# First, we use token classification to determine parts of speech and then retrieve the verb and adjective+noun phrases.
|
133 |
-
|
134 |
-
Nx.Serving.batched_run(MedicalTranscription.TokenClassificationServing, text)
|
135 |
|
136 |
-
|
|
|
137 |
|
138 |
-
|
139 |
-
# 1. A slower process that looks to classify the text by the extracted phrases.
|
140 |
-
# serving = KeywordFinder.prepare_zero_shot_classification_serving(phrases)
|
141 |
-
# %{predictions: predictions} = Nx.Serving.run(serving, text)
|
142 |
-
|
143 |
-
# 2. A fast process finding the phrase closest in vector space to the whole text.
|
144 |
-
predictions = KeywordFinder.find_most_similar_label(text, phrases, 2)
|
145 |
|
146 |
# For now, retrieve the top three keywords that have a score of more than 0.25
|
147 |
keywords =
|
148 |
-
|
149 |
|> Enum.filter(fn keyword -> keyword.score > 0.25 end)
|
150 |
|> Enum.take(3)
|
151 |
|
152 |
-
send(live_view_pid, {:new_keywords, predictions})
|
153 |
-
|
154 |
{:ok, %{keywords: keywords}}
|
155 |
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
end
|
|
|
130 |
|
131 |
defp find_keywords(live_view_pid, text) do
|
132 |
# First, we use token classification to determine parts of speech and then retrieve the verb and adjective+noun phrases.
|
133 |
+
phrases = classify_tokens(text)
|
|
|
134 |
|
135 |
+
# Then, we use one of two processes to determine which to show as keywords
|
136 |
+
full_keywords = determine_keywords(text, phrases)
|
137 |
|
138 |
+
send(live_view_pid, {:new_keywords, full_keywords})
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
|
140 |
# For now, retrieve the top three keywords that have a score of more than 0.25
|
141 |
keywords =
|
142 |
+
full_keywords
|
143 |
|> Enum.filter(fn keyword -> keyword.score > 0.25 end)
|
144 |
|> Enum.take(3)
|
145 |
|
|
|
|
|
146 |
{:ok, %{keywords: keywords}}
|
147 |
end
|
148 |
+
|
149 |
+
defp classify_tokens(text) do
|
150 |
+
%{entities: entities} =
|
151 |
+
Nx.Serving.batched_run(MedicalTranscription.TokenClassificationServing, text)
|
152 |
+
|
153 |
+
KeywordFinder.cleanup_phrases(entities)
|
154 |
+
end
|
155 |
+
|
156 |
+
# This clause handles cases where there is transcribed text, but no phrases were found.
|
157 |
+
defp determine_keywords(_text, []), do: []
|
158 |
+
|
159 |
+
defp determine_keywords(text, phrases) do
|
160 |
+
# 1. A slower process that looks to classify the text by the extracted phrases.
|
161 |
+
# serving = KeywordFinder.prepare_zero_shot_classification_serving(phrases)
|
162 |
+
# %{predictions: predictions} = Nx.Serving.run(serving, text)
|
163 |
+
|
164 |
+
# 2. A fast process finding the phrase closest in vector space to the whole text.
|
165 |
+
KeywordFinder.find_most_similar_label(text, phrases, 2)
|
166 |
+
end
|
167 |
end
|