Spaces:

headway
/

medicode

Runtime error

App Files Files Community

medicode / lib /medical_transcription /coding.ex

noahsettersten

chore: Address Credo messages

59fddbd 9 months ago

raw

history blame

5.75 kB

	defmodule MedicalTranscription.Coding do
	@moduledoc """
	Takes a portion of text and searches for closely matching results within a list of vectors, by using AudioTagger's
	SemanticSearch module.
	"""

	alias MedicalTranscription.Repo
	import Ecto.Query
	import Pgvector.Ecto.Query

	alias MedicalTranscription.Coding.{CodeVector, CodeVectorMatch}

	def insert_vector(params) do
	changeset = CodeVector.changeset(%CodeVector{}, params)

	case Repo.insert(changeset) do
	{:ok, _} ->
	{:ok, "Success!"}

	{:error, changeset} ->
	{:error, Repo.collect_errors(changeset)}
	end
	end

	def exists_for_code?(code) do
	Repo.exists?(
	from v in CodeVector,
	where: v.code == ^code
	)
	end

	def icd9_present? do
	num_rows = Repo.aggregate(CodeVector, :count)

	num_rows >= 14_567
	end

	@doc """
	Takes a chunk of transcribed text and classifies it based on the list of code vectors in the database.

	1. First, create a vector embedding for the passed `text`.
	2. Then, search for similar codes in the list of `code_vectors`.
	3. Then, look to see if there is any related user-provided feedback in the `code_feedbacks` table.
	4. Pass through the found `code_vectors` and modify the similarity scores based on any relevant previous feedback.
	"""
	def process_chunk(text, opts \\ []) do
	k = Keyword.get(opts, :num_results, 5)
	similarity_threshold = Keyword.get(opts, :similarity_threshold, 0.80)

	search_vector_for_db = compute_vector_as_list(text)

	past_feedbacks =
	MedicalTranscription.Feedback.find_related_feedback(search_vector_for_db, opts)

	code_vectors = find_similar(search_vector_for_db, k)
	code_vectors_for_feedback = find_for_feedback(search_vector_for_db, past_feedbacks)

	code_vectors
	\|> Enum.concat(code_vectors_for_feedback)
	\|> Enum.uniq_by(& &1.id)
	\|> weight_code_vectors(past_feedbacks)
	\|> filter_below_threshold(similarity_threshold)
	\|> sort_by_similarity()
	end

	@doc "Creates a vector embedding for text using the text embedding serving in the application's supervision tree."
	def compute_vector_as_list(text) do
	MedicalTranscription.TextEmbeddingServing
	\|> Nx.Serving.batched_run(text)
	\|> Map.get(:embedding)
	\|> Nx.to_flat_list()
	end

	# Takes a list of `%CodeVectorMatch{}`es and a list of `%CodeFeedback{}`es, both found based on the same input
	# `text`. Then, modifies the similarity scores for each code vector match based on whether there was a past feedback
	# with a positive or negative response.
	@positive_response_factor 1.1
	@negative_response_factor 0.9
	defp weight_code_vectors(code_vector_matches, past_feedbacks) do
	Enum.map(code_vector_matches, &weight_code_vector(&1, past_feedbacks))
	end

	defp weight_code_vector(code_vector_match, past_feedbacks) do
	relevant_feedbacks =
	Enum.filter(past_feedbacks, &(&1.code_vector_id == code_vector_match.id))

	if relevant_feedbacks == [] do
	%{code_vector_match \| weighting: [:none]}
	else
	Enum.reduce(relevant_feedbacks, code_vector_match, fn feedback, acc ->
	new_attributes = weight_code_vector_similarity(acc.cosine_similarity, feedback)

	Map.merge(acc, new_attributes, &merge_code_vector_match_attributes/3)
	end)
	end
	end

	defp weight_code_vector_similarity(_similarity, nil), do: %{weighting: [:none]}

	defp weight_code_vector_similarity(similarity, %{response: true}) do
	%{
	cosine_similarity: similarity * @positive_response_factor,
	weighting: [:positive]
	}
	end

	defp weight_code_vector_similarity(similarity, %{response: false}) do
	%{
	cosine_similarity: similarity * @negative_response_factor,
	weighting: [:negative]
	}
	end

	defp merge_code_vector_match_attributes(:weighting, value1, value2) do
	value1 ++ value2
	end

	defp merge_code_vector_match_attributes(_key, _value1, value2), do: value2

	# Remove matches that don't exceed a given threshold.
	defp filter_below_threshold(code_vector_matches, similarity_threshold) do
	Enum.filter(
	code_vector_matches,
	&(&1.cosine_similarity >= similarity_threshold)
	)
	end

	# We sort by similarity again after weighting, to ensure that the results are sorted after processing.
	defp sort_by_similarity(code_vector_matches) do
	Enum.sort(code_vector_matches, &(&1.cosine_similarity >= &2.cosine_similarity))
	end

	# Finds similar records using cosine similarity on the vector embeddings in the database.
	defp find_similar(search_vector, limit) do
	Repo.all(
	from v in CodeVector,
	order_by: cosine_distance(v.description_vector, ^search_vector),
	limit: ^limit,
	select: %CodeVectorMatch{
	id: v.id,
	code: v.code,
	description: v.description,
	cosine_similarity: 1 - cosine_distance(v.description_vector, ^search_vector),
	weighting: []
	}
	)
	end

	# Retrieves code vectors referenced in past feedback for a given search text. This allows including additional codes
	# for the weighting process outside of the closest 5 returned by `find_similar/2`.
	defp find_for_feedback(search_vector, past_feedbacks) do
	code_vector_ids_for_past_feedback = Enum.map(past_feedbacks, & &1.code_vector_id)

	Repo.all(
	from v in CodeVector,
	order_by: cosine_distance(v.description_vector, ^search_vector),
	where: v.id in ^code_vector_ids_for_past_feedback,
	select: %CodeVectorMatch{
	id: v.id,
	code: v.code,
	description: v.description,
	cosine_similarity: 1 - cosine_distance(v.description_vector, ^search_vector),
	weighting: []
	}
	)
	end
	end