timgremore
commited on
Commit
•
7f7d174
1
Parent(s):
7e656f0
feat: Support ICD-10 bulk inserts with notebook
Browse files
livebooks/icd-10-codes.livemd
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ICD-10 Codes
|
2 |
+
|
3 |
+
## Section
|
4 |
+
|
5 |
+
```elixir
|
6 |
+
defmodule VectorPrecomputation do
|
7 |
+
def precompute_vectors(df, _frame) do
|
8 |
+
num_rows = Explorer.DataFrame.n_rows(df)
|
9 |
+
now = DateTime.utc_now() |> DateTime.truncate(:second)
|
10 |
+
|
11 |
+
ProgressBar.render(0, num_rows, suffix: :count)
|
12 |
+
|
13 |
+
params =
|
14 |
+
df
|
15 |
+
|> Explorer.DataFrame.to_rows_stream()
|
16 |
+
|> Stream.filter(fn %{"code" => code} -> String.length(code) > 0 end)
|
17 |
+
|> Stream.with_index()
|
18 |
+
|> Enum.map(fn {%{"code" => code, "long_description" => description}, index} ->
|
19 |
+
vector_for_db = Medicode.Coding.compute_vector_as_list(description)
|
20 |
+
|
21 |
+
ProgressBar.render(index + 1, num_rows, suffix: :count)
|
22 |
+
|
23 |
+
%{
|
24 |
+
id: Ecto.UUID.generate(),
|
25 |
+
code: code,
|
26 |
+
description: description,
|
27 |
+
description_vector: vector_for_db,
|
28 |
+
inserted_at: now,
|
29 |
+
updated_at: now
|
30 |
+
}
|
31 |
+
end)
|
32 |
+
|
33 |
+
Medicode.Repo.insert_all(Medicode.Coding.CodeVector, params,
|
34 |
+
on_conflict: :replace_all,
|
35 |
+
conflict_target: [:code]
|
36 |
+
)
|
37 |
+
end
|
38 |
+
|
39 |
+
def foo_compute_vector_for_code(code, description) do
|
40 |
+
vector_for_db = Medicode.Coding.compute_vector_as_list(description)
|
41 |
+
|
42 |
+
params =
|
43 |
+
%{
|
44 |
+
code: code,
|
45 |
+
description: description,
|
46 |
+
description_vector: vector_for_db
|
47 |
+
}
|
48 |
+
|
49 |
+
changeset = Medicode.Coding.CodeVector.changeset(%Medicode.Coding.CodeVector{}, params)
|
50 |
+
|
51 |
+
case Medicode.Repo.insert(changeset) do
|
52 |
+
{:ok, _} ->
|
53 |
+
{:ok, "Success!"}
|
54 |
+
|
55 |
+
{:error, changeset} ->
|
56 |
+
{:error, changeset}
|
57 |
+
end
|
58 |
+
end
|
59 |
+
end
|
60 |
+
```
|
61 |
+
|
62 |
+
```elixir
|
63 |
+
%Req.Response{body: body} =
|
64 |
+
"https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Publications/ICD10CM/2024-Update/icd10cm-Codes-Descriptions-April-2024.zip"
|
65 |
+
|> Req.get!()
|
66 |
+
|
67 |
+
frame = Kino.Frame.new() |> Kino.render()
|
68 |
+
|
69 |
+
tmp_dir = System.tmp_dir!()
|
70 |
+
tmp_file = Path.join(tmp_dir, "icd10cm-Codes-Descriptions.zip")
|
71 |
+
|
72 |
+
File.write!(tmp_file, body)
|
73 |
+
|
74 |
+
{:ok, files} = :zip.unzip(String.to_charlist(tmp_file), [{:cwd, tmp_dir}])
|
75 |
+
|
76 |
+
icd10cm_codes_file =
|
77 |
+
Enum.find(files, fn item ->
|
78 |
+
String.contains?(to_string(item), "icd10cm-codes-April-2024")
|
79 |
+
end)
|
80 |
+
|
81 |
+
csv_data =
|
82 |
+
icd10cm_codes_file
|
83 |
+
|> File.read!()
|
84 |
+
|> String.split("\n")
|
85 |
+
|> Enum.map(fn line ->
|
86 |
+
split = String.split(line, " ", parts: 2)
|
87 |
+
code = Enum.at(split, 0)
|
88 |
+
long_description = Enum.at(split, 1)
|
89 |
+
|
90 |
+
description =
|
91 |
+
case is_binary(long_description) do
|
92 |
+
true -> String.trim(long_description)
|
93 |
+
false -> ""
|
94 |
+
end
|
95 |
+
|
96 |
+
"\"#{code}\",\"#{description}\""
|
97 |
+
end)
|
98 |
+
|
99 |
+
csv_data =
|
100 |
+
["\"code\",\"long_description\""] ++ csv_data
|
101 |
+
|
102 |
+
{:ok, df} =
|
103 |
+
csv_data
|
104 |
+
|> Enum.join("\n")
|
105 |
+
|> Explorer.DataFrame.load_csv(header: true, delimiter: ",")
|
106 |
+
|
107 |
+
n_rows = Explorer.DataFrame.n_rows(df)
|
108 |
+
number_in_group = 500
|
109 |
+
number_of_groups = floor(n_rows / number_in_group)
|
110 |
+
|
111 |
+
Enum.each(146..number_of_groups, fn n ->
|
112 |
+
Kino.Frame.clear(frame)
|
113 |
+
Kino.Frame.append(frame, "Processing group #{n + 1} of #{number_of_groups}")
|
114 |
+
|
115 |
+
df
|
116 |
+
|> Explorer.DataFrame.slice(n * number_in_group, number_in_group)
|
117 |
+
|> VectorPrecomputation.precompute_vectors(frame)
|
118 |
+
end)
|
119 |
+
|
120 |
+
Kino.DataTable.new(df)
|
121 |
+
```
|
priv/repo/migrations/20240306143720_add_unique_constraint_to_code_vectors_code.exs
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
defmodule Medicode.Repo.Migrations.AddUniqueConstraintToCodeVectorsCode do
|
2 |
+
use Ecto.Migration
|
3 |
+
|
4 |
+
def change do
|
5 |
+
create(
|
6 |
+
unique_index(
|
7 |
+
:code_vectors,
|
8 |
+
:code,
|
9 |
+
name: :index_for_code_vector_code_uniqueness
|
10 |
+
)
|
11 |
+
)
|
12 |
+
end
|
13 |
+
end
|