Spaces:

headway
/

medicode

Runtime error

App Files Files Community

medicode / lib /medical_transcription /classification_server.ex

timgremore

chore: Rename app to Medicode

3f219b5 9 months ago

raw

history blame

3.57 kB

	defmodule Medicode.ClassificationServer do
	@moduledoc """
	GenServer responsible for classifying transcription text
	"""
	use GenServer

	alias AudioTagger.KeywordFinder

	alias Medicode.Coding
	alias Medicode.Coding.CodeVectorMatch
	alias Medicode.Transcriptions

	@registry :transcription_registry

	def start_link(%{chunk: chunk, name: name}) do
	GenServer.start_link(__MODULE__, {:chunk, chunk}, name: via_tuple(name))
	end

	@doc """
	This function will be called by the supervisor to retrieve the specification
	of the child process.The child process is configured to restart only if it
	terminates abnormally.
	"""
	def child_spec(process_name) do
	%{
	id: __MODULE__,
	start: {__MODULE__, :start_link, [process_name]},
	restart: :transient
	}
	end

	@impl GenServer
	def init({:chunk, _chunk} = init_arg) do
	{:ok, init_arg, {:continue, :start}}
	end

	@impl GenServer
	def handle_continue(:start, {:chunk, chunk} = state) do
	Phoenix.PubSub.broadcast(
	:medicode_pubsub,
	"transcriptions:#{chunk.transcription_id}",
	{:classification_started, chunk.id}
	)

	classify_chunk(chunk)
	find_keywords(chunk)

	{:stop, :normal, state}
	end

	@impl GenServer
	def terminate(reason, {:chunk, chunk} = _state) do
	Phoenix.PubSub.broadcast(
	:medicode_pubsub,
	"transcriptions:#{chunk.transcription_id}",
	{:classification_finished, chunk.id}
	)

	reason
	end

	defp via_tuple(name),
	do: {:via, Registry, {@registry, name}}

	defp classify_chunk(chunk) do
	attrs =
	chunk
	\|> Coding.process_chunk()
	\|> Enum.map(fn %CodeVectorMatch{
	code: code,
	cosine_similarity: cosine_similarity,
	weighting: weighting
	} ->
	code_vector = Coding.get_code_vector_by_code!(code)
	weighting_as_string = Enum.map(weighting, &Atom.to_string/1)

	%{
	transcription_chunk_id: chunk.id,
	code_vector_id: code_vector.id,
	cosine_similarity: cosine_similarity,
	weighting: weighting_as_string,
	inserted_at: {:placeholder, :timestamp},
	updated_at: {:placeholder, :timestamp}
	}
	end)

	Transcriptions.replace_all_code_vectors_for_chunk(chunk, attrs)
	end

	defp find_keywords(chunk) do
	%{entities: entities} =
	Nx.Serving.batched_run(Medicode.TokenClassificationServing, chunk.text)

	phrases = KeywordFinder.cleanup_phrases(entities)

	# Then, we use one of two processes to determine which to show as keywords
	chunk.text
	\|> determine_keywords(phrases)
	\|> Enum.map(fn %{label: label, score: score} ->
	# TODO: Replace loop with an insert_all call and check for conflicts
	# so that duplicate keywords are ignored.
	Transcriptions.create_keyword_for_chunk(%{
	transcription_chunk_id: chunk.id,
	keyword: label,
	score: score
	})
	end)
	end

	# This clause handles cases where there is transcribed text, but no phrases were found.
	defp determine_keywords(_text, []), do: []

	defp determine_keywords(text, phrases) do
	# 1. A slower process that looks to classify the text by the extracted phrases.
	# serving = KeywordFinder.prepare_zero_shot_classification_serving(phrases)
	# %{predictions: predictions} = Nx.Serving.run(serving, text)

	# 2. A fast process finding the phrase closest in vector space to the whole text.
	KeywordFinder.find_most_similar_label(text, phrases, 2)
	end
	end