Spaces:

headway
/

medicode

Runtime error

noahsettersten commited on Jan 16

Commit

96d4a31

•

1 Parent(s): d8e387b

chore!: Adapt to changes in audio tagger's mix task

Files changed (1) hide show

lib/medical_transcription/transcriber.ex CHANGED Viewed

@@ -1,5 +1,5 @@
 defmodule MedicalTranscription.Transcriber do
-  @input_filename "CMS32_DESC_LONG_SHORT_DX"
   alias AudioTagger.Classifier.{SemanticSearch, SemanticSearchInput}
@@ -9,10 +9,8 @@ defmodule MedicalTranscription.Transcriber do
     labels_df = read_labels_from_csv!()
     # TODO: We could explore storing these vectors within pgvector or Pinecone.io
-    label_embeddings_path = Path.join(__DIR__, "../../#{@input_filename}.bin")
     label_embeddings =
-      AudioTagger.Classifier.SemanticSearch.load_label_vectors(label_embeddings_path)
     # Audio transcription + semantic search
     for {chunk, index} <-
@@ -37,17 +35,12 @@ defmodule MedicalTranscription.Transcriber do
   end
   defp read_labels_from_csv! do
-    __DIR__
-    |> Path.join("../../#{@input_filename}.csv")
-    |> Explorer.DataFrame.from_csv!(
-      dtypes: [
-        {"DIAGNOSIS CODE", :string},
-        {"LONG DESCRIPTION", :string},
-        {"SHORT DESCRIPTION", :string}
-      ]
-    )
-    |> Explorer.DataFrame.select([0, 1, 2])
-    |> Explorer.DataFrame.rename(["code", "long_description", "short_description"])
   end
   # defp process_chunk(model_tuple, labels_df, label_embeddings, index, chunk) do
@@ -70,4 +63,12 @@ defmodule MedicalTranscription.Transcriber do
       seconds |> round() |> Time.from_seconds_after_midnight() |> Time.to_string()
     end
   end
 end

 defmodule MedicalTranscription.Transcriber do
+  @input_filename "icd9_codelist"
   alias AudioTagger.Classifier.{SemanticSearch, SemanticSearchInput}
     labels_df = read_labels_from_csv!()
     # TODO: We could explore storing these vectors within pgvector or Pinecone.io
     label_embeddings =
+      AudioTagger.Classifier.SemanticSearch.load_label_vectors(vectors_filepath())
     # Audio transcription + semantic search
     for {chunk, index} <-
   end
   defp read_labels_from_csv! do
+    column_definitions = [
+      {"code", :string},
+      {"long_description", :string}
+    ]
+    Explorer.DataFrame.from_csv!(labels_filepath(), dtypes: column_definitions)
   end
   # defp process_chunk(model_tuple, labels_df, label_embeddings, index, chunk) do
       seconds |> round() |> Time.from_seconds_after_midnight() |> Time.to_string()
     end
   end
+  defp input_filepath() do
+    AudioTagger.SampleData.cache_dir()
+    |> Path.join(@input_filename)
+  end
+  defp vectors_filepath(), do: "#{input_filepath()}.bin"
+  defp labels_filepath(), do: "#{input_filepath()}.csv"
 end