noahsettersten
commited on
Commit
•
dd55ec1
1
Parent(s):
755855d
chore: Parse labels from CSV
Browse files
lib/medical_transcription/transcriber.ex
CHANGED
@@ -1,9 +1,10 @@
|
|
1 |
defmodule MedicalTranscription.Transcriber do
|
|
|
|
|
2 |
def stream_transcription_and_search(live_view_pid, audio_file_path) do
|
3 |
{model_info, tokenizer} = AudioTagger.Classifier.SemanticSearch.prepare_model()
|
4 |
-
|
5 |
-
|
6 |
-
label_embeddings_path = Path.join(__DIR__, "../../icd10_vector_tensors.bin")
|
7 |
|
8 |
# TODO: We could explore storing these vectors within pgvector or Pinecone.io
|
9 |
label_embeddings =
|
@@ -40,4 +41,18 @@ defmodule MedicalTranscription.Transcriber do
|
|
40 |
send(live_view_pid, {:transcription_row, chunk_result})
|
41 |
end
|
42 |
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
end
|
|
|
1 |
defmodule MedicalTranscription.Transcriber do
|
2 |
+
@input_filename "CMS32_DESC_LONG_SHORT_DX"
|
3 |
+
|
4 |
def stream_transcription_and_search(live_view_pid, audio_file_path) do
|
5 |
{model_info, tokenizer} = AudioTagger.Classifier.SemanticSearch.prepare_model()
|
6 |
+
labels_df = read_labels_from_csv!()
|
7 |
+
label_embeddings_path = Path.join(__DIR__, "../../#{@input_filename}.bin")
|
|
|
8 |
|
9 |
# TODO: We could explore storing these vectors within pgvector or Pinecone.io
|
10 |
label_embeddings =
|
|
|
41 |
send(live_view_pid, {:transcription_row, chunk_result})
|
42 |
end
|
43 |
end
|
44 |
+
|
45 |
+
defp read_labels_from_csv! do
|
46 |
+
__DIR__
|
47 |
+
|> Path.join("../../#{@input_filename}.csv")
|
48 |
+
|> Explorer.DataFrame.from_csv!(
|
49 |
+
dtypes: [
|
50 |
+
{"DIAGNOSIS CODE", :string},
|
51 |
+
{"LONG DESCRIPTION", :string},
|
52 |
+
{"SHORT DESCRIPTION", :string}
|
53 |
+
]
|
54 |
+
)
|
55 |
+
|> Explorer.DataFrame.select([0, 1, 2])
|
56 |
+
|> Explorer.DataFrame.rename(["code", "long_description", "short_description"])
|
57 |
+
end
|
58 |
end
|