noahsettersten commited on
Commit
dd55ec1
1 Parent(s): 755855d

chore: Parse labels from CSV

Browse files
lib/medical_transcription/transcriber.ex CHANGED
@@ -1,9 +1,10 @@
1
  defmodule MedicalTranscription.Transcriber do
 
 
2
  def stream_transcription_and_search(live_view_pid, audio_file_path) do
3
  {model_info, tokenizer} = AudioTagger.Classifier.SemanticSearch.prepare_model()
4
-
5
- labels_df = AudioTagger.SampleData.icd10_codes()
6
- label_embeddings_path = Path.join(__DIR__, "../../icd10_vector_tensors.bin")
7
 
8
  # TODO: We could explore storing these vectors within pgvector or Pinecone.io
9
  label_embeddings =
@@ -40,4 +41,18 @@ defmodule MedicalTranscription.Transcriber do
40
  send(live_view_pid, {:transcription_row, chunk_result})
41
  end
42
  end
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  end
 
1
  defmodule MedicalTranscription.Transcriber do
2
+ @input_filename "CMS32_DESC_LONG_SHORT_DX"
3
+
4
  def stream_transcription_and_search(live_view_pid, audio_file_path) do
5
  {model_info, tokenizer} = AudioTagger.Classifier.SemanticSearch.prepare_model()
6
+ labels_df = read_labels_from_csv!()
7
+ label_embeddings_path = Path.join(__DIR__, "../../#{@input_filename}.bin")
 
8
 
9
  # TODO: We could explore storing these vectors within pgvector or Pinecone.io
10
  label_embeddings =
 
41
  send(live_view_pid, {:transcription_row, chunk_result})
42
  end
43
  end
44
+
45
+ defp read_labels_from_csv! do
46
+ __DIR__
47
+ |> Path.join("../../#{@input_filename}.csv")
48
+ |> Explorer.DataFrame.from_csv!(
49
+ dtypes: [
50
+ {"DIAGNOSIS CODE", :string},
51
+ {"LONG DESCRIPTION", :string},
52
+ {"SHORT DESCRIPTION", :string}
53
+ ]
54
+ )
55
+ |> Explorer.DataFrame.select([0, 1, 2])
56
+ |> Explorer.DataFrame.rename(["code", "long_description", "short_description"])
57
+ end
58
  end