# fly.toml app configuration file generated for medical-transcription-cpu on 2024-01-22T15:15:29-06:00
#
# See https://fly.io/docs/reference/configuration/ for information about how to use this file.
#

app = "medical-transcription-cpu"
primary_region = "ord"
kill_signal = "SIGTERM"

# TODO: Enable the following line and install the necessary CUDA packages in the Dockerfile for GPU usage.
# The volumes will need to be recreated as well to match the GPU kind.
# See https://fly.io/docs/gpus/gpu-quickstart/
# and https://fly.io/docs/gpus/getting-started-gpus/
# vm.size = "a100-40gb"

[build]

[deploy]
  release_command = "/app/bin/migrate"

[[mounts]]
  source = "data"
  destination = "/data"
  initial_size = "40gb"

[env]
  ELIXIR_ERL_OPTIONS = "-proto_dist inet6_tcp +sssdio 128"
  AUDIO_TAGGER_CACHE_DIR="/data/cache/audio_tagger"
  BUMBLEBEE_CACHE_DIR="/data/cache/bumblebee"
  XLA_CACHE_DIR="/data/cache/xla"
  # XLA_TARGET="cuda120"
  PHX_HOST = "medical-transcription-cpu.fly.dev"
  PORT = "8080"
  RELEASE_COOKIE = "0gfxcPtwryKxI2O1N0eFAg9p4MJGC-oUGShgj_wgvNEGiba5EDEJFA=="

[http_service]
  internal_port = 8080
  force_https = true
  auto_stop_machines = false
  auto_start_machines = false
  min_machines_running = 0
  processes = ["app"]
  [http_service.concurrency]
    type = "connections"
    hard_limit = 1000
    soft_limit = 1000

[[vm]]
  cpu_kind = "shared"
  cpus = 1
  memory_mb = 2048

  # To generate the vector embeddings for the ICD-9 codelist, we need more RAM available.
  # fly scale vm shared-cpu-8x --memory 16384