|
defmodule Medicode.ClassificationServer do |
|
@moduledoc """ |
|
GenServer responsible for classifying transcription text |
|
""" |
|
use GenServer |
|
|
|
alias AudioTagger.KeywordFinder |
|
|
|
alias Medicode.Coding |
|
alias Medicode.Coding.CodeVectorMatch |
|
alias Medicode.Transcriptions |
|
|
|
@registry :transcription_registry |
|
|
|
def start_link(%{chunk: chunk, name: name}) do |
|
GenServer.start_link(__MODULE__, {:chunk, chunk}, name: via_tuple(name)) |
|
end |
|
|
|
@doc """ |
|
This function will be called by the supervisor to retrieve the specification |
|
of the child process.The child process is configured to restart only if it |
|
terminates abnormally. |
|
""" |
|
def child_spec(process_name) do |
|
%{ |
|
id: __MODULE__, |
|
start: {__MODULE__, :start_link, [process_name]}, |
|
restart: :transient |
|
} |
|
end |
|
|
|
@impl GenServer |
|
def init({:chunk, _chunk} = init_arg) do |
|
{:ok, init_arg, {:continue, :start}} |
|
end |
|
|
|
@impl GenServer |
|
def handle_continue(:start, {:chunk, chunk} = state) do |
|
Phoenix.PubSub.broadcast( |
|
:medicode_pubsub, |
|
"transcriptions:#{chunk.transcription_id}", |
|
{:classification_started, chunk.id} |
|
) |
|
|
|
classify_chunk(chunk) |
|
find_keywords(chunk) |
|
|
|
{:stop, :normal, state} |
|
end |
|
|
|
@impl GenServer |
|
def terminate(reason, {:chunk, chunk} = _state) do |
|
Phoenix.PubSub.broadcast( |
|
:medicode_pubsub, |
|
"transcriptions:#{chunk.transcription_id}", |
|
{:classification_finished, chunk.id} |
|
) |
|
|
|
reason |
|
end |
|
|
|
defp via_tuple(name), |
|
do: {:via, Registry, {@registry, name}} |
|
|
|
defp classify_chunk(chunk) do |
|
attrs = |
|
chunk |
|
|> Coding.process_chunk() |
|
|> Enum.map(fn %CodeVectorMatch{ |
|
code: code, |
|
cosine_similarity: cosine_similarity, |
|
weighting: weighting |
|
} -> |
|
code_vector = Coding.get_code_vector_by_code!(code) |
|
weighting_as_string = Enum.map(weighting, &Atom.to_string/1) |
|
|
|
%{ |
|
transcription_chunk_id: chunk.id, |
|
code_vector_id: code_vector.id, |
|
cosine_similarity: cosine_similarity, |
|
weighting: weighting_as_string, |
|
inserted_at: {:placeholder, :timestamp}, |
|
updated_at: {:placeholder, :timestamp} |
|
} |
|
end) |
|
|
|
Transcriptions.replace_all_code_vectors_for_chunk(chunk, attrs) |
|
end |
|
|
|
defp find_keywords(chunk) do |
|
%{entities: entities} = |
|
Nx.Serving.batched_run(Medicode.TokenClassificationServing, chunk.text) |
|
|
|
phrases = KeywordFinder.cleanup_phrases(entities) |
|
|
|
|
|
chunk.text |
|
|> determine_keywords(phrases) |
|
|> Enum.map(fn %{label: label, score: score} -> |
|
|
|
|
|
Transcriptions.create_keyword_for_chunk(%{ |
|
transcription_chunk_id: chunk.id, |
|
keyword: label, |
|
score: score |
|
}) |
|
end) |
|
end |
|
|
|
|
|
defp determine_keywords(_text, []), do: [] |
|
|
|
defp determine_keywords(text, phrases) do |
|
|
|
|
|
|
|
|
|
|
|
KeywordFinder.find_most_similar_label(text, phrases, 2) |
|
end |
|
end |
|
|