|
|
|
|
|
|
|
|
|
```elixir |
|
defmodule VectorPrecomputation do |
|
def precompute_vectors(df, _frame) do |
|
num_rows = Explorer.DataFrame.n_rows(df) |
|
now = DateTime.utc_now() |> DateTime.truncate(:second) |
|
|
|
ProgressBar.render(0, num_rows, suffix: :count) |
|
|
|
params = |
|
df |
|
|> Explorer.DataFrame.to_rows_stream() |
|
|> Stream.filter(fn %{"code" => code} -> String.length(code) > 0 end) |
|
|> Stream.with_index() |
|
|> Enum.map(fn {%{"code" => code, "long_description" => description}, index} -> |
|
vector_for_db = Medicode.Coding.compute_vector_as_list(description) |
|
|
|
ProgressBar.render(index + 1, num_rows, suffix: :count) |
|
|
|
%{ |
|
id: Ecto.UUID.generate(), |
|
code: code, |
|
description: description, |
|
description_vector: vector_for_db, |
|
inserted_at: now, |
|
updated_at: now |
|
} |
|
end) |
|
|
|
Medicode.Repo.insert_all(Medicode.Coding.CodeVector, params, |
|
on_conflict: :replace_all, |
|
conflict_target: [:code] |
|
) |
|
end |
|
|
|
def foo_compute_vector_for_code(code, description) do |
|
vector_for_db = Medicode.Coding.compute_vector_as_list(description) |
|
|
|
params = |
|
%{ |
|
code: code, |
|
description: description, |
|
description_vector: vector_for_db |
|
} |
|
|
|
changeset = Medicode.Coding.CodeVector.changeset(%Medicode.Coding.CodeVector{}, params) |
|
|
|
case Medicode.Repo.insert(changeset) do |
|
{:ok, _} -> |
|
{:ok, "Success!"} |
|
|
|
{:error, changeset} -> |
|
{:error, changeset} |
|
end |
|
end |
|
end |
|
``` |
|
|
|
```elixir |
|
%Req.Response{body: body} = |
|
"https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Publications/ICD10CM/2024-Update/icd10cm-Codes-Descriptions-April-2024.zip" |
|
|> Req.get!() |
|
|
|
frame = Kino.Frame.new() |> Kino.render() |
|
|
|
tmp_dir = System.tmp_dir!() |
|
tmp_file = Path.join(tmp_dir, "icd10cm-Codes-Descriptions.zip") |
|
|
|
File.write!(tmp_file, body) |
|
|
|
{:ok, files} = :zip.unzip(String.to_charlist(tmp_file), [{:cwd, tmp_dir}]) |
|
|
|
icd10cm_codes_file = |
|
Enum.find(files, fn item -> |
|
String.contains?(to_string(item), "icd10cm-codes-April-2024") |
|
end) |
|
|
|
csv_data = |
|
icd10cm_codes_file |
|
|> File.read!() |
|
|> String.split("\n") |
|
|> Enum.map(fn line -> |
|
split = String.split(line, " ", parts: 2) |
|
code = Enum.at(split, 0) |
|
long_description = Enum.at(split, 1) |
|
|
|
description = |
|
case is_binary(long_description) do |
|
true -> String.trim(long_description) |
|
false -> "" |
|
end |
|
|
|
"\"#{code}\",\"#{description}\"" |
|
end) |
|
|
|
csv_data = |
|
["\"code\",\"long_description\""] ++ csv_data |
|
|
|
{:ok, df} = |
|
csv_data |
|
|> Enum.join("\n") |
|
|> Explorer.DataFrame.load_csv(header: true, delimiter: ",") |
|
|
|
n_rows = Explorer.DataFrame.n_rows(df) |
|
number_in_group = 500 |
|
number_of_groups = floor(n_rows / number_in_group) |
|
|
|
Enum.each(146..number_of_groups, fn n -> |
|
Kino.Frame.clear(frame) |
|
Kino.Frame.append(frame, "Processing group #{n + 1} of #{number_of_groups}") |
|
|
|
df |
|
|> Explorer.DataFrame.slice(n * number_in_group, number_in_group) |
|
|> VectorPrecomputation.precompute_vectors(frame) |
|
end) |
|
|
|
Kino.DataTable.new(df) |
|
``` |
|
|