medicode / livebooks /icd-10-codes.livemd
timgremore's picture
feat: Support ICD-10 bulk inserts with notebook
7f7d174
# ICD-10 Codes
## Section
```elixir
defmodule VectorPrecomputation do
def precompute_vectors(df, _frame) do
num_rows = Explorer.DataFrame.n_rows(df)
now = DateTime.utc_now() |> DateTime.truncate(:second)
ProgressBar.render(0, num_rows, suffix: :count)
params =
df
|> Explorer.DataFrame.to_rows_stream()
|> Stream.filter(fn %{"code" => code} -> String.length(code) > 0 end)
|> Stream.with_index()
|> Enum.map(fn {%{"code" => code, "long_description" => description}, index} ->
vector_for_db = Medicode.Coding.compute_vector_as_list(description)
ProgressBar.render(index + 1, num_rows, suffix: :count)
%{
id: Ecto.UUID.generate(),
code: code,
description: description,
description_vector: vector_for_db,
inserted_at: now,
updated_at: now
}
end)
Medicode.Repo.insert_all(Medicode.Coding.CodeVector, params,
on_conflict: :replace_all,
conflict_target: [:code]
)
end
def foo_compute_vector_for_code(code, description) do
vector_for_db = Medicode.Coding.compute_vector_as_list(description)
params =
%{
code: code,
description: description,
description_vector: vector_for_db
}
changeset = Medicode.Coding.CodeVector.changeset(%Medicode.Coding.CodeVector{}, params)
case Medicode.Repo.insert(changeset) do
{:ok, _} ->
{:ok, "Success!"}
{:error, changeset} ->
{:error, changeset}
end
end
end
```
```elixir
%Req.Response{body: body} =
"https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Publications/ICD10CM/2024-Update/icd10cm-Codes-Descriptions-April-2024.zip"
|> Req.get!()
frame = Kino.Frame.new() |> Kino.render()
tmp_dir = System.tmp_dir!()
tmp_file = Path.join(tmp_dir, "icd10cm-Codes-Descriptions.zip")
File.write!(tmp_file, body)
{:ok, files} = :zip.unzip(String.to_charlist(tmp_file), [{:cwd, tmp_dir}])
icd10cm_codes_file =
Enum.find(files, fn item ->
String.contains?(to_string(item), "icd10cm-codes-April-2024")
end)
csv_data =
icd10cm_codes_file
|> File.read!()
|> String.split("\n")
|> Enum.map(fn line ->
split = String.split(line, " ", parts: 2)
code = Enum.at(split, 0)
long_description = Enum.at(split, 1)
description =
case is_binary(long_description) do
true -> String.trim(long_description)
false -> ""
end
"\"#{code}\",\"#{description}\""
end)
csv_data =
["\"code\",\"long_description\""] ++ csv_data
{:ok, df} =
csv_data
|> Enum.join("\n")
|> Explorer.DataFrame.load_csv(header: true, delimiter: ",")
n_rows = Explorer.DataFrame.n_rows(df)
number_in_group = 500
number_of_groups = floor(n_rows / number_in_group)
Enum.each(146..number_of_groups, fn n ->
Kino.Frame.clear(frame)
Kino.Frame.append(frame, "Processing group #{n + 1} of #{number_of_groups}")
df
|> Explorer.DataFrame.slice(n * number_in_group, number_in_group)
|> VectorPrecomputation.precompute_vectors(frame)
end)
Kino.DataTable.new(df)
```