noahsettersten commited on
Commit
96d4a31
1 Parent(s): d8e387b

chore!: Adapt to changes in audio tagger's mix task

Browse files
lib/medical_transcription/transcriber.ex CHANGED
@@ -1,5 +1,5 @@
1
  defmodule MedicalTranscription.Transcriber do
2
- @input_filename "CMS32_DESC_LONG_SHORT_DX"
3
 
4
  alias AudioTagger.Classifier.{SemanticSearch, SemanticSearchInput}
5
 
@@ -9,10 +9,8 @@ defmodule MedicalTranscription.Transcriber do
9
  labels_df = read_labels_from_csv!()
10
 
11
  # TODO: We could explore storing these vectors within pgvector or Pinecone.io
12
- label_embeddings_path = Path.join(__DIR__, "../../#{@input_filename}.bin")
13
-
14
  label_embeddings =
15
- AudioTagger.Classifier.SemanticSearch.load_label_vectors(label_embeddings_path)
16
 
17
  # Audio transcription + semantic search
18
  for {chunk, index} <-
@@ -37,17 +35,12 @@ defmodule MedicalTranscription.Transcriber do
37
  end
38
 
39
  defp read_labels_from_csv! do
40
- __DIR__
41
- |> Path.join("../../#{@input_filename}.csv")
42
- |> Explorer.DataFrame.from_csv!(
43
- dtypes: [
44
- {"DIAGNOSIS CODE", :string},
45
- {"LONG DESCRIPTION", :string},
46
- {"SHORT DESCRIPTION", :string}
47
- ]
48
- )
49
- |> Explorer.DataFrame.select([0, 1, 2])
50
- |> Explorer.DataFrame.rename(["code", "long_description", "short_description"])
51
  end
52
 
53
  # defp process_chunk(model_tuple, labels_df, label_embeddings, index, chunk) do
@@ -70,4 +63,12 @@ defmodule MedicalTranscription.Transcriber do
70
  seconds |> round() |> Time.from_seconds_after_midnight() |> Time.to_string()
71
  end
72
  end
 
 
 
 
 
 
 
 
73
  end
 
1
  defmodule MedicalTranscription.Transcriber do
2
+ @input_filename "icd9_codelist"
3
 
4
  alias AudioTagger.Classifier.{SemanticSearch, SemanticSearchInput}
5
 
 
9
  labels_df = read_labels_from_csv!()
10
 
11
  # TODO: We could explore storing these vectors within pgvector or Pinecone.io
 
 
12
  label_embeddings =
13
+ AudioTagger.Classifier.SemanticSearch.load_label_vectors(vectors_filepath())
14
 
15
  # Audio transcription + semantic search
16
  for {chunk, index} <-
 
35
  end
36
 
37
  defp read_labels_from_csv! do
38
+ column_definitions = [
39
+ {"code", :string},
40
+ {"long_description", :string}
41
+ ]
42
+
43
+ Explorer.DataFrame.from_csv!(labels_filepath(), dtypes: column_definitions)
 
 
 
 
 
44
  end
45
 
46
  # defp process_chunk(model_tuple, labels_df, label_embeddings, index, chunk) do
 
63
  seconds |> round() |> Time.from_seconds_after_midnight() |> Time.to_string()
64
  end
65
  end
66
+
67
+ defp input_filepath() do
68
+ AudioTagger.SampleData.cache_dir()
69
+ |> Path.join(@input_filename)
70
+ end
71
+
72
+ defp vectors_filepath(), do: "#{input_filepath()}.bin"
73
+ defp labels_filepath(), do: "#{input_filepath()}.csv"
74
  end