noahsettersten
commited on
Commit
•
96d4a31
1
Parent(s):
d8e387b
chore!: Adapt to changes in audio tagger's mix task
Browse files
lib/medical_transcription/transcriber.ex
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
defmodule MedicalTranscription.Transcriber do
|
2 |
-
@input_filename "
|
3 |
|
4 |
alias AudioTagger.Classifier.{SemanticSearch, SemanticSearchInput}
|
5 |
|
@@ -9,10 +9,8 @@ defmodule MedicalTranscription.Transcriber do
|
|
9 |
labels_df = read_labels_from_csv!()
|
10 |
|
11 |
# TODO: We could explore storing these vectors within pgvector or Pinecone.io
|
12 |
-
label_embeddings_path = Path.join(__DIR__, "../../#{@input_filename}.bin")
|
13 |
-
|
14 |
label_embeddings =
|
15 |
-
AudioTagger.Classifier.SemanticSearch.load_label_vectors(
|
16 |
|
17 |
# Audio transcription + semantic search
|
18 |
for {chunk, index} <-
|
@@ -37,17 +35,12 @@ defmodule MedicalTranscription.Transcriber do
|
|
37 |
end
|
38 |
|
39 |
defp read_labels_from_csv! do
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
{"SHORT DESCRIPTION", :string}
|
47 |
-
]
|
48 |
-
)
|
49 |
-
|> Explorer.DataFrame.select([0, 1, 2])
|
50 |
-
|> Explorer.DataFrame.rename(["code", "long_description", "short_description"])
|
51 |
end
|
52 |
|
53 |
# defp process_chunk(model_tuple, labels_df, label_embeddings, index, chunk) do
|
@@ -70,4 +63,12 @@ defmodule MedicalTranscription.Transcriber do
|
|
70 |
seconds |> round() |> Time.from_seconds_after_midnight() |> Time.to_string()
|
71 |
end
|
72 |
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
end
|
|
|
1 |
defmodule MedicalTranscription.Transcriber do
|
2 |
+
@input_filename "icd9_codelist"
|
3 |
|
4 |
alias AudioTagger.Classifier.{SemanticSearch, SemanticSearchInput}
|
5 |
|
|
|
9 |
labels_df = read_labels_from_csv!()
|
10 |
|
11 |
# TODO: We could explore storing these vectors within pgvector or Pinecone.io
|
|
|
|
|
12 |
label_embeddings =
|
13 |
+
AudioTagger.Classifier.SemanticSearch.load_label_vectors(vectors_filepath())
|
14 |
|
15 |
# Audio transcription + semantic search
|
16 |
for {chunk, index} <-
|
|
|
35 |
end
|
36 |
|
37 |
defp read_labels_from_csv! do
|
38 |
+
column_definitions = [
|
39 |
+
{"code", :string},
|
40 |
+
{"long_description", :string}
|
41 |
+
]
|
42 |
+
|
43 |
+
Explorer.DataFrame.from_csv!(labels_filepath(), dtypes: column_definitions)
|
|
|
|
|
|
|
|
|
|
|
44 |
end
|
45 |
|
46 |
# defp process_chunk(model_tuple, labels_df, label_embeddings, index, chunk) do
|
|
|
63 |
seconds |> round() |> Time.from_seconds_after_midnight() |> Time.to_string()
|
64 |
end
|
65 |
end
|
66 |
+
|
67 |
+
defp input_filepath() do
|
68 |
+
AudioTagger.SampleData.cache_dir()
|
69 |
+
|> Path.join(@input_filename)
|
70 |
+
end
|
71 |
+
|
72 |
+
defp vectors_filepath(), do: "#{input_filepath()}.bin"
|
73 |
+
defp labels_filepath(), do: "#{input_filepath()}.csv"
|
74 |
end
|