Add files using upload-large-folder tool
Browse files- .formatter.exs +4 -0
- .github/workflows/ci.yml +44 -0
- .gitignore +58 -0
- GRADIO_SPACE_README.md +7 -0
- LIVEBOOK_SPACE_README.md +10 -0
- MARIMO_SPACE_README.md +10 -0
- README.md +21 -0
- colab_kaggle/ml_e2e_python.ipynb +720 -0
- deps/ecto/.formatter.exs +31 -0
- deps/ecto/.hex +0 -0
- deps/ecto/CHANGELOG.md +1048 -0
- deps/ecto/README.md +200 -0
- deps/makeup_erlang/.formatter.exs +4 -0
- deps/makeup_erlang/.hex +0 -0
- deps/makeup_erlang/LICENSE +25 -0
- deps/makeup_erlang/README.md +22 -0
- deps/makeup_erlang/hex_metadata.config +23 -0
- deps/makeup_erlang/lib/makeup/lexers/erlang_lexer.ex +463 -0
- deps/makeup_erlang/lib/makeup/lexers/erlang_lexer/application.ex +17 -0
- deps/makeup_erlang/lib/makeup/lexers/erlang_lexer/helper.ex +45 -0
- deps/makeup_erlang/lib/makeup/lexers/erlang_lexer/testing.ex +17 -0
- deps/makeup_erlang/mix.exs +63 -0
- erl_crash.dump +0 -0
- hf_deploy/Dockerfile +33 -0
- hf_deploy/README.md +18 -0
- hf_deploy/deploy.sh +55 -0
- hf_deploy/setup.livemd +16 -0
- hf_deploy/startup.sh +11 -0
- hf_jobs_demo.py +15 -0
- jax/ml_e2e_jax.ipynb +633 -0
- justfile +106 -0
- lib/ml_learning.ex +18 -0
- marimo/Dockerfile +18 -0
- marimo/README.md +17 -0
- marimo/ml_e2e_marimo.py +904 -0
- marimo/requirements.txt +11 -0
- mise.toml +2 -0
- mix.exs +63 -0
- mix.lock +62 -0
- ml_e2e_template.livemd +1227 -0
- ml_tutorial.livemd +430 -0
- ml_tutorial_fixed.livemd +438 -0
- ml_tutorial_latest.livemd +416 -0
- skills/gradio.md +5 -0
- skills/hf-cli.md +35 -0
- skills/hf_dataset_viewer.md +5 -0
- skills/hf_jobs.md +285 -0
- skills/training_trl.md +590 -0
- test/ml_learning_test.exs +8 -0
- test/test_helper.exs +1 -0
.formatter.exs
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Used by "mix format"
|
| 2 |
+
[
|
| 3 |
+
inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"]
|
| 4 |
+
]
|
.github/workflows/ci.yml
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: CI
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches: [main]
|
| 6 |
+
pull_request:
|
| 7 |
+
branches: [main]
|
| 8 |
+
|
| 9 |
+
jobs:
|
| 10 |
+
elixir:
|
| 11 |
+
runs-on: ubuntu-latest
|
| 12 |
+
strategy:
|
| 13 |
+
matrix:
|
| 14 |
+
otp: [26.x]
|
| 15 |
+
elixir: [1.16.x]
|
| 16 |
+
steps:
|
| 17 |
+
- uses: actions/checkout@v4
|
| 18 |
+
- name: Set up Elixir
|
| 19 |
+
uses: erlef/setup-beam@v1
|
| 20 |
+
with:
|
| 21 |
+
otp-version: ${{ matrix.otp }}
|
| 22 |
+
elixir-version: ${{ matrix.elixir }}
|
| 23 |
+
- run: mix deps.get
|
| 24 |
+
- run: mix test
|
| 25 |
+
python:
|
| 26 |
+
runs-on: ubuntu-latest
|
| 27 |
+
steps:
|
| 28 |
+
- uses: actions/checkout@v4
|
| 29 |
+
- name: Set up Python
|
| 30 |
+
uses: actions/setup-python@v5
|
| 31 |
+
with:
|
| 32 |
+
python-version: "3.11"
|
| 33 |
+
- run: |
|
| 34 |
+
pip install -r colab_kaggle/requirements.txt || true
|
| 35 |
+
pip install -r gradio_hf_deploy/requirements.txt || true
|
| 36 |
+
pip install -r marimo/requirements.txt || true
|
| 37 |
+
pip install -r jax/requirements.txt || true
|
| 38 |
+
- name: Syntax check Python files
|
| 39 |
+
run: python -m py_compile $(git ls-files "*.py")
|
| 40 |
+
- name: Verify notebooks execute
|
| 41 |
+
run: |
|
| 42 |
+
pip install nbconvert
|
| 43 |
+
jupyter nbconvert --to notebook --execute colab_kaggle/ml_e2e_python.ipynb || true
|
| 44 |
+
jupyter nbconvert --to notebook --execute jax/ml_e2e_jax.ipynb || true
|
.gitignore
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# The directory Mix will write compiled artifacts to.
|
| 2 |
+
/_build/
|
| 3 |
+
|
| 4 |
+
# If you run "mix test --cover", coverage assets end up here.
|
| 5 |
+
/cover/
|
| 6 |
+
|
| 7 |
+
# The directory Mix downloads your dependencies sources to.
|
| 8 |
+
/deps/
|
| 9 |
+
|
| 10 |
+
# Where third-party dependencies like ExDoc output generated docs.
|
| 11 |
+
/doc/
|
| 12 |
+
|
| 13 |
+
# Ignore .fetch files in case you like to edit your project deps locally.
|
| 14 |
+
/.fetch
|
| 15 |
+
|
| 16 |
+
# If the VM crashes, it generates a dump, let's ignore it too.
|
| 17 |
+
erl_crash.dump
|
| 18 |
+
|
| 19 |
+
# Also ignore archive artifacts (built via "mix archive.build").
|
| 20 |
+
*.ez
|
| 21 |
+
|
| 22 |
+
# Ignore package tarball (built via "mix hex.build").
|
| 23 |
+
ml_elixir_learning-*.tar
|
| 24 |
+
|
| 25 |
+
# Temporary files, for example, from tests.
|
| 26 |
+
/tmp/
|
| 27 |
+
|
| 28 |
+
# ML model cache
|
| 29 |
+
priv/models/
|
| 30 |
+
*.params
|
| 31 |
+
*.safetensors
|
| 32 |
+
*.gguf
|
| 33 |
+
|
| 34 |
+
# Bumblebee / Hugging Face cache
|
| 35 |
+
bumblebee_cache/
|
| 36 |
+
.cache/
|
| 37 |
+
|
| 38 |
+
# OS
|
| 39 |
+
.DS_Store
|
| 40 |
+
Thumbs.db
|
| 41 |
+
|
| 42 |
+
# Editor
|
| 43 |
+
.vscode/
|
| 44 |
+
.idea/
|
| 45 |
+
*.swp
|
| 46 |
+
*.swo
|
| 47 |
+
*~
|
| 48 |
+
|
| 49 |
+
# Python (marimo / gradio / jupyter)
|
| 50 |
+
__pycache__/
|
| 51 |
+
*.pyc
|
| 52 |
+
.ipynb_checkpoints/
|
| 53 |
+
*.egg-info/
|
| 54 |
+
venv/
|
| 55 |
+
.venv/
|
| 56 |
+
|
| 57 |
+
# Livebook
|
| 58 |
+
*.beam
|
GRADIO_SPACE_README.md
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Gradio Space – ML in Elixir Companion
|
| 2 |
+
|
| 3 |
+
This Space runs a **Gradio** web UI that mirrors the Bumblebee examples from the Livebook template. It provides interactive demos for sentiment analysis, fill‑mask, zero‑shot classification, text generation, image classification, speech‑to‑text, and stable diffusion.
|
| 4 |
+
|
| 5 |
+
## Files
|
| 6 |
+
- `app.py` – the Gradio application.
|
| 7 |
+
- `requirements.txt` – Python dependencies needed.
|
LIVEBOOK_SPACE_README.md
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Livebook Space – ML in Elixir Companion
|
| 2 |
+
|
| 3 |
+
This Space runs the **Livebook** template `ml_e2e_template.livemd` using a Docker container. It demonstrates the full Bumblebee end‑to‑end workflow on the BEAM.
|
| 4 |
+
|
| 5 |
+
## Files
|
| 6 |
+
- `ml_e2e_template.livemd` – the Livebook notebook.
|
| 7 |
+
- `hf_deploy/Dockerfile` – Docker image for running Livebook.
|
| 8 |
+
- `hf_deploy/README.md` – setup instructions.
|
| 9 |
+
|
| 10 |
+
Run the Space to launch the notebook in the browser.
|
MARIMO_SPACE_README.md
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Marimo Space – ML in Elixir Companion
|
| 2 |
+
|
| 3 |
+
This Space runs the **marimo** notebook `ml_e2e_marimo.py` inside a Docker container. It provides a reactive UI mirroring the Livebook and Gradio examples, using the same Bumblebee models via the Hugging Face `transformers` pipeline.
|
| 4 |
+
|
| 5 |
+
## Files
|
| 6 |
+
- `ml_e2e_marimo.py` – the marimo notebook.
|
| 7 |
+
- `Dockerfile` – minimal Docker image to run `marimo`.
|
| 8 |
+
- `requirements.txt` – Python dependencies.
|
| 9 |
+
|
| 10 |
+
Running the Space launches the notebook UI where you can interact with the model demos.
|
README.md
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# MLLearning
|
| 2 |
+
|
| 3 |
+
**TODO: Add description**
|
| 4 |
+
|
| 5 |
+
## Installation
|
| 6 |
+
|
| 7 |
+
If [available in Hex](https://hex.pm/docs/publish), the package can be installed
|
| 8 |
+
by adding `ml_elixir_learning` to your list of dependencies in `mix.exs`:
|
| 9 |
+
|
| 10 |
+
```elixir
|
| 11 |
+
def deps do
|
| 12 |
+
[
|
| 13 |
+
{:ml_elixir_learning, "~> 0.1.0"}
|
| 14 |
+
]
|
| 15 |
+
end
|
| 16 |
+
```
|
| 17 |
+
|
| 18 |
+
Documentation can be generated with [ExDoc](https://github.com/elixir-lang/ex_doc)
|
| 19 |
+
and published on [HexDocs](https://hexdocs.pm). Once published, the docs can
|
| 20 |
+
be found at <https://hexdocs.pm/ml_elixir_learning>.
|
| 21 |
+
|
colab_kaggle/ml_e2e_python.ipynb
ADDED
|
@@ -0,0 +1,720 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"nbformat": 4,
|
| 3 |
+
"nbformat_minor": 0,
|
| 4 |
+
"metadata": {
|
| 5 |
+
"colab": {
|
| 6 |
+
"provenance": [],
|
| 7 |
+
"toc_visible": true,
|
| 8 |
+
"gpuType": "T4"
|
| 9 |
+
},
|
| 10 |
+
"kernelspec": {
|
| 11 |
+
"name": "python3",
|
| 12 |
+
"display_name": "Python 3"
|
| 13 |
+
},
|
| 14 |
+
"language_info": {
|
| 15 |
+
"name": "python"
|
| 16 |
+
},
|
| 17 |
+
"accelerator": "GPU"
|
| 18 |
+
},
|
| 19 |
+
"cells": [
|
| 20 |
+
{
|
| 21 |
+
"cell_type": "markdown",
|
| 22 |
+
"metadata": {},
|
| 23 |
+
"source": [
|
| 24 |
+
"# 🐝 Machine Learning in Elixir — Python Companion
|
| 25 |
+
|
| 26 |
+
## Skills
|
| 27 |
+
- `hf_cli.md` – Hugging Face CLI usage
|
| 28 |
+
- `hf_jobs.md` – Running workloads on HF Jobs
|
| 29 |
+
- `training_trl.md` – TRL model training
|
| 30 |
+
- `hf_dataset_viewer.md` – Dataset Viewer API
|
| 31 |
+
- `gradio.md` – Gradio UI integration
|
| 32 |
+
- *(Full catalog at https://skills.sh/huggingface/skills)*
|
| 33 |
+
\n",
|
| 34 |
+
"\n",
|
| 35 |
+
"This notebook mirrors the [Livebook template](../ml_elixir_learning/ml_e2e_template.livemd)\n",
|
| 36 |
+
"from *Machine Learning in Elixir* by Sean Moriarity, implemented in Python for\n",
|
| 37 |
+
"**Google Colab** and **Kaggle** compatibility.\n",
|
| 38 |
+
"\n",
|
| 39 |
+
"| Elixir | Python | Purpose |\n",
|
| 40 |
+
"|--------|--------|----------|\n",
|
| 41 |
+
"| `Nx` | `numpy` | Numerical computing |\n",
|
| 42 |
+
"| `Axon` | `torch` | Neural networks |\n",
|
| 43 |
+
"| `Bumblebee` | `transformers` | Pre-trained models |\n",
|
| 44 |
+
"| `Nx.Serving` | `pipeline()` | Batched inference |\n",
|
| 45 |
+
"| `Kino` | `gradio` | Interactive UI |\n",
|
| 46 |
+
"| `EXLA` | CUDA | GPU acceleration |"
|
| 47 |
+
]
|
| 48 |
+
},
|
| 49 |
+
{
|
| 50 |
+
"cell_type": "markdown",
|
| 51 |
+
"metadata": {},
|
| 52 |
+
"source": [
|
| 53 |
+
"## 0 — Install & Configure"
|
| 54 |
+
]
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"cell_type": "code",
|
| 58 |
+
"execution_count": null,
|
| 59 |
+
"metadata": {},
|
| 60 |
+
"outputs": [],
|
| 61 |
+
"source": [
|
| 62 |
+
"!pip install -q transformers torch datasets accelerate gradio scikit-learn numpy matplotlib"
|
| 63 |
+
]
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"cell_type": "code",
|
| 67 |
+
"execution_count": null,
|
| 68 |
+
"metadata": {},
|
| 69 |
+
"outputs": [],
|
| 70 |
+
"source": [
|
| 71 |
+
"import numpy as np\n",
|
| 72 |
+
"import torch\n",
|
| 73 |
+
"from transformers import pipeline, AutoModel, AutoTokenizer, AutoModelForSequenceClassification\n",
|
| 74 |
+
"from transformers import AutoModelForTokenClassification, AutoModelForCausalLM\n",
|
| 75 |
+
"from transformers import CLIPProcessor, CLIPModel\n",
|
| 76 |
+
"import gradio as gr\n",
|
| 77 |
+
"from sklearn.metrics import accuracy_score, classification_report\n",
|
| 78 |
+
"from sklearn.model_selection import train_test_split\n",
|
| 79 |
+
"import matplotlib.pyplot as plt\n",
|
| 80 |
+
"\n",
|
| 81 |
+
"device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
|
| 82 |
+
"print(f\"Device: {device}\")\n",
|
| 83 |
+
"print(f\"PyTorch: {torch.__version__}\")"
|
| 84 |
+
]
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"cell_type": "markdown",
|
| 88 |
+
"metadata": {},
|
| 89 |
+
"source": [
|
| 90 |
+
"## 1 — NumPy Foundations (Nx equivalent)\n",
|
| 91 |
+
"\n",
|
| 92 |
+
"The Elixir `Nx` library provides tensor operations. Python's `numpy` is the direct\n",
|
| 93 |
+
"counterpart."
|
| 94 |
+
]
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"cell_type": "code",
|
| 98 |
+
"execution_count": null,
|
| 99 |
+
"metadata": {},
|
| 100 |
+
"outputs": [],
|
| 101 |
+
"source": [
|
| 102 |
+
"# --- Tensors (Nx.tensor → np.array) ---\n",
|
| 103 |
+
"scalar = np.float64(3.14)\n",
|
| 104 |
+
"vector = np.array([1.0, 2.0, 3.0])\n",
|
| 105 |
+
"matrix = np.array([[1, 2, 3], [4, 5, 6]])\n",
|
| 106 |
+
"\n",
|
| 107 |
+
"print(f\"scalar shape={np.shape(scalar)} dtype={scalar.dtype}\")\n",
|
| 108 |
+
"print(f\"vector shape={np.shape(vector)} dtype={vector.dtype}\")\n",
|
| 109 |
+
"print(f\"matrix shape={np.shape(matrix)} dtype={matrix.dtype}\")"
|
| 110 |
+
]
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"cell_type": "code",
|
| 114 |
+
"execution_count": null,
|
| 115 |
+
"metadata": {},
|
| 116 |
+
"outputs": [],
|
| 117 |
+
"source": [
|
| 118 |
+
"# --- Operations (Nx.add → np.add, etc.) ---\n",
|
| 119 |
+
"a = np.array([1.0, 2.0, 3.0])\n",
|
| 120 |
+
"b = np.array([10.0, 20.0, 30.0])\n",
|
| 121 |
+
"\n",
|
| 122 |
+
"print(f\"add: {a + b}\")\n",
|
| 123 |
+
"print(f\"multiply: {a * b}\")\n",
|
| 124 |
+
"print(f\"dot: {np.dot(a, b)}\")\n",
|
| 125 |
+
"print(f\"sum: {np.sum(a)}\")\n",
|
| 126 |
+
"print(f\"mean: {np.mean(a)}\")\n",
|
| 127 |
+
"print(f\"std: {np.std(a)}\")"
|
| 128 |
+
]
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"cell_type": "code",
|
| 132 |
+
"execution_count": null,
|
| 133 |
+
"metadata": {},
|
| 134 |
+
"outputs": [],
|
| 135 |
+
"source": [
|
| 136 |
+
"# --- Automatic differentiation (Nx.Defn.grad → torch.autograd) ---\n",
|
| 137 |
+
"x = torch.tensor(3.0, requires_grad=True)\n",
|
| 138 |
+
"f = x**3 + 2 * x**2 # f(x) = x³ + 2x²\n",
|
| 139 |
+
"f.backward()\n",
|
| 140 |
+
"print(f\"f(3) = {f.item()}\")\n",
|
| 141 |
+
"print(f\"f'(3) = {x.grad.item()}\")\n",
|
| 142 |
+
"print(f\"expected = 3*9 + 2*2*3 = {3*9 + 2*2*3}\")"
|
| 143 |
+
]
|
| 144 |
+
},
|
| 145 |
+
{
|
| 146 |
+
"cell_type": "markdown",
|
| 147 |
+
"metadata": {},
|
| 148 |
+
"source": [
|
| 149 |
+
"## 2 — Pre-trained NLP (Bumblebee equivalent)\n",
|
| 150 |
+
"\n",
|
| 151 |
+
"In Elixir: `Bumblebee.load_model({:hf, \"...\"})` + `Nx.Serving.run()`\n",
|
| 152 |
+
"\n",
|
| 153 |
+
"In Python: `transformers.pipeline()` is the equivalent one-liner."
|
| 154 |
+
]
|
| 155 |
+
},
|
| 156 |
+
{
|
| 157 |
+
"cell_type": "markdown",
|
| 158 |
+
"metadata": {},
|
| 159 |
+
"source": [
|
| 160 |
+
"### 2.1 Fill-Mask (BERT)"
|
| 161 |
+
]
|
| 162 |
+
},
|
| 163 |
+
{
|
| 164 |
+
"cell_type": "code",
|
| 165 |
+
"execution_count": null,
|
| 166 |
+
"metadata": {},
|
| 167 |
+
"outputs": [],
|
| 168 |
+
"source": [
|
| 169 |
+
"# Elixir: {:ok, model} = Bumblebee.load_model({:hf, \"google-bert/bert-base-uncased\"})\n",
|
| 170 |
+
"# serving = Bumblebee.Text.fill_mask(model, tokenizer)\n",
|
| 171 |
+
"# Nx.Serving.run(serving, \"Elixir is a [MASK] language.\")\n",
|
| 172 |
+
"\n",
|
| 173 |
+
"# Python equivalent:\n",
|
| 174 |
+
"fill_mask = pipeline(\"fill-mask\", model=\"google-bert/bert-base-uncased\")\n",
|
| 175 |
+
"results = fill_mask(\"Elixir is a [MASK] language.\")\n",
|
| 176 |
+
"for r in results:\n",
|
| 177 |
+
" print(f\" {r['score']:.4f} {r['token_str']:15s} {r['sequence']}\")"
|
| 178 |
+
]
|
| 179 |
+
},
|
| 180 |
+
{
|
| 181 |
+
"cell_type": "markdown",
|
| 182 |
+
"metadata": {},
|
| 183 |
+
"source": [
|
| 184 |
+
"### 2.2 Sentiment Analysis (DistilBERT)"
|
| 185 |
+
]
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"cell_type": "code",
|
| 189 |
+
"execution_count": null,
|
| 190 |
+
"metadata": {},
|
| 191 |
+
"outputs": [],
|
| 192 |
+
"source": [
|
| 193 |
+
"sentiment = pipeline(\n",
|
| 194 |
+
" \"sentiment-analysis\",\n",
|
| 195 |
+
" model=\"distilbert/distilbert-base-uncased-finetuned-sst-2-english\"\n",
|
| 196 |
+
")\n",
|
| 197 |
+
"\n",
|
| 198 |
+
"texts = [\n",
|
| 199 |
+
" \"Machine learning in Elixir is amazing!\",\n",
|
| 200 |
+
" \"This tutorial is boring and confusing.\",\n",
|
| 201 |
+
" \"The BEAM VM handles concurrent ML workloads well.\",\n",
|
| 202 |
+
" \"I love how functional programming simplifies ML pipelines.\",\n",
|
| 203 |
+
"]\n",
|
| 204 |
+
"\n",
|
| 205 |
+
"for text in texts:\n",
|
| 206 |
+
" result = sentiment(text)[0]\n",
|
| 207 |
+
" print(f\" {result['label']:8s} {result['score']:.4f} ← \\\"{text[:50]}...\\\"\")"
|
| 208 |
+
]
|
| 209 |
+
},
|
| 210 |
+
{
|
| 211 |
+
"cell_type": "markdown",
|
| 212 |
+
"metadata": {},
|
| 213 |
+
"source": [
|
| 214 |
+
"### 2.3 Named Entity Recognition"
|
| 215 |
+
]
|
| 216 |
+
},
|
| 217 |
+
{
|
| 218 |
+
"cell_type": "code",
|
| 219 |
+
"execution_count": null,
|
| 220 |
+
"metadata": {},
|
| 221 |
+
"outputs": [],
|
| 222 |
+
"source": [
|
| 223 |
+
"ner = pipeline(\"ner\", model=\"dslim/bert-base-NER\", aggregation_strategy=\"simple\")\n",
|
| 224 |
+
"\n",
|
| 225 |
+
"text = \"Sean Moriarity wrote Machine Learning in Elixir for Pragmatic Bookshelf. He lives in Austin, Texas.\"\n",
|
| 226 |
+
"entities = ner(text)\n",
|
| 227 |
+
"\n",
|
| 228 |
+
"print(f\"Input: {text}\\n\")\n",
|
| 229 |
+
"for e in entities:\n",
|
| 230 |
+
" print(f\" {e['entity_group']:8s} {e['score']:.4f} {e['word']:30s} (pos {e['start']}-{e['end']})\")"
|
| 231 |
+
]
|
| 232 |
+
},
|
| 233 |
+
{
|
| 234 |
+
"cell_type": "markdown",
|
| 235 |
+
"metadata": {},
|
| 236 |
+
"source": [
|
| 237 |
+
"### 2.4 Zero-Shot Classification"
|
| 238 |
+
]
|
| 239 |
+
},
|
| 240 |
+
{
|
| 241 |
+
"cell_type": "code",
|
| 242 |
+
"execution_count": null,
|
| 243 |
+
"metadata": {},
|
| 244 |
+
"outputs": [],
|
| 245 |
+
"source": [
|
| 246 |
+
"zs_classifier = pipeline(\"zero-shot-classification\", model=\"facebook/bart-large-mnli\")\n",
|
| 247 |
+
"\n",
|
| 248 |
+
"article = \"\"\"\n",
|
| 249 |
+
"Nx brings numerical computing to the BEAM, enabling machine learning\n",
|
| 250 |
+
"pipelines that leverage Elixir's concurrency and fault tolerance.\n",
|
| 251 |
+
"Bumblebee provides access to thousands of pre-trained models from\n",
|
| 252 |
+
"the Hugging Face Hub directly in Livebook.\n",
|
| 253 |
+
"\"\"\"\n",
|
| 254 |
+
"\n",
|
| 255 |
+
"labels = [\"technology\", \"sports\", \"politics\", \"science\", \"finance\"]\n",
|
| 256 |
+
"result = zs_classifier(article, labels)\n",
|
| 257 |
+
"\n",
|
| 258 |
+
"for label, score in zip(result[\"labels\"], result[\"scores\"]):\n",
|
| 259 |
+
" bar = \"█\" * int(score * 30)\n",
|
| 260 |
+
" print(f\" {label:12s} {score:.4f} {bar}\")"
|
| 261 |
+
]
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"cell_type": "markdown",
|
| 265 |
+
"metadata": {},
|
| 266 |
+
"source": [
|
| 267 |
+
"### 2.5 Sentence Embeddings & Similarity"
|
| 268 |
+
]
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"cell_type": "code",
|
| 272 |
+
"execution_count": null,
|
| 273 |
+
"metadata": {},
|
| 274 |
+
"outputs": [],
|
| 275 |
+
"source": [
|
| 276 |
+
"from transformers import AutoModel\n",
|
| 277 |
+
"import torch.nn.functional as F\n",
|
| 278 |
+
"\n",
|
| 279 |
+
"emb_tokenizer = AutoTokenizer.from_pretrained(\"sentence-transformers/all-MiniLM-L6-v2\")\n",
|
| 280 |
+
"emb_model = AutoModel.from_pretrained(\"sentence-transformers/all-MiniLM-L6-v2\")\n",
|
| 281 |
+
"\n",
|
| 282 |
+
"def embed(texts):\n",
|
| 283 |
+
" inputs = emb_tokenizer(texts, padding=True, truncation=True, return_tensors=\"pt\")\n",
|
| 284 |
+
" with torch.no_grad():\n",
|
| 285 |
+
" outputs = emb_model(**inputs)\n",
|
| 286 |
+
" # Mean pooling\n",
|
| 287 |
+
" mask = inputs[\"attention_mask\"].unsqueeze(-1)\n",
|
| 288 |
+
" embeddings = (outputs.last_hidden_state * mask).sum(1) / mask.sum(1)\n",
|
| 289 |
+
" return F.normalize(embeddings, p=2, dim=1)\n",
|
| 290 |
+
"\n",
|
| 291 |
+
"sentences = [\n",
|
| 292 |
+
" \"Nx provides numerical computing for Elixir\",\n",
|
| 293 |
+
" \"Axon is a neural network library built on Nx\",\n",
|
| 294 |
+
" \"Bumblebee connects Elixir to the Hugging Face Hub\",\n",
|
| 295 |
+
" \"I enjoy cooking Italian food on weekends\",\n",
|
| 296 |
+
" \"The weather forecast predicts rain tomorrow\",\n",
|
| 297 |
+
"]\n",
|
| 298 |
+
"\n",
|
| 299 |
+
"embeddings = embed(sentences)\n",
|
| 300 |
+
"\n",
|
| 301 |
+
"query = \"How do I build neural networks in Elixir?\"\n",
|
| 302 |
+
"query_emb = embed([query])\n",
|
| 303 |
+
"\n",
|
| 304 |
+
"similarities = F.cosine_similarity(query_emb, embeddings)\n",
|
| 305 |
+
"ranked = similarities.argsort(descending=True)\n",
|
| 306 |
+
"\n",
|
| 307 |
+
"print(f'Query: \"{query}\"\\n')\n",
|
| 308 |
+
"for idx in ranked:\n",
|
| 309 |
+
" print(f\" {similarities[idx]:.4f} {sentences[idx]}\")"
|
| 310 |
+
]
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"cell_type": "markdown",
|
| 314 |
+
"metadata": {},
|
| 315 |
+
"source": [
|
| 316 |
+
"### 2.6 Text Generation (GPT-2)"
|
| 317 |
+
]
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"cell_type": "code",
|
| 321 |
+
"execution_count": null,
|
| 322 |
+
"metadata": {},
|
| 323 |
+
"outputs": [],
|
| 324 |
+
"source": [
|
| 325 |
+
"text_gen = pipeline(\n",
|
| 326 |
+
" \"text-generation\",\n",
|
| 327 |
+
" model=\"openai-community/gpt2\",\n",
|
| 328 |
+
" device=device,\n",
|
| 329 |
+
")\n",
|
| 330 |
+
"\n",
|
| 331 |
+
"prompt = \"Machine learning in Elixir is\"\n",
|
| 332 |
+
"output = text_gen(prompt, max_new_tokens=50, num_return_sequences=1)\n",
|
| 333 |
+
"print(output[0][\"generated_text\"])"
|
| 334 |
+
]
|
| 335 |
+
},
|
| 336 |
+
{
|
| 337 |
+
"cell_type": "markdown",
|
| 338 |
+
"metadata": {},
|
| 339 |
+
"source": [
|
| 340 |
+
"### 2.7 Image Classification (Vision Transformer)"
|
| 341 |
+
]
|
| 342 |
+
},
|
| 343 |
+
{
|
| 344 |
+
"cell_type": "code",
|
| 345 |
+
"execution_count": null,
|
| 346 |
+
"metadata": {},
|
| 347 |
+
"outputs": [],
|
| 348 |
+
"source": [
|
| 349 |
+
"# Elixir: Bumblebee.Vision.ImageClassification.image_classification(model, featurizer)\n",
|
| 350 |
+
"# Python: pipeline(\"image-classification\")\n",
|
| 351 |
+
"\n",
|
| 352 |
+
"from PIL import Image\n",
|
| 353 |
+
"import requests\n",
|
| 354 |
+
"\n",
|
| 355 |
+
"img_cls = pipeline(\"image-classification\", model=\"google/vit-base-patch16-224\")\n",
|
| 356 |
+
"\n",
|
| 357 |
+
"# Download sample image\n",
|
| 358 |
+
"img_url = \"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg\"\n",
|
| 359 |
+
"image = Image.open(requests.get(img_url, stream=True).raw)\n",
|
| 360 |
+
"\n",
|
| 361 |
+
"results = img_cls(image)\n",
|
| 362 |
+
"print(f\"Image: {image.size}\")\n",
|
| 363 |
+
"for r in results[:5]:\n",
|
| 364 |
+
" print(f\" {r['score']:.4f} {r['label']}\")"
|
| 365 |
+
]
|
| 366 |
+
},
|
| 367 |
+
{
|
| 368 |
+
"cell_type": "markdown",
|
| 369 |
+
"metadata": {},
|
| 370 |
+
"source": [
|
| 371 |
+
"### 2.8 Speech-to-Text (Whisper)"
|
| 372 |
+
]
|
| 373 |
+
},
|
| 374 |
+
{
|
| 375 |
+
"cell_type": "code",
|
| 376 |
+
"execution_count": null,
|
| 377 |
+
"metadata": {},
|
| 378 |
+
"outputs": [],
|
| 379 |
+
"source": [
|
| 380 |
+
"# Elixir: Bumblebee.Audio.speech_to_text(model, featurizer, tokenizer, generation_config)\n",
|
| 381 |
+
"# Python: pipeline(\"automatic-speech-recognition\")\n",
|
| 382 |
+
"\n",
|
| 383 |
+
"asr = pipeline(\"automatic-speech-recognition\", model=\"openai/whisper-tiny\")\n",
|
| 384 |
+
"\n",
|
| 385 |
+
"# Sample audio from Hugging Face\n",
|
| 386 |
+
"audio_url = \"https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac\"\n",
|
| 387 |
+
"result = asr(audio_url)\n",
|
| 388 |
+
"print(f\"Transcription: {result['text']}\")"
|
| 389 |
+
]
|
| 390 |
+
},
|
| 391 |
+
{
|
| 392 |
+
"cell_type": "markdown",
|
| 393 |
+
"metadata": {},
|
| 394 |
+
"source": [
|
| 395 |
+
"### 2.9 Image Generation (Stable Diffusion)\n",
|
| 396 |
+
"\n",
|
| 397 |
+
"> Requires GPU. On CPU this will be very slow."
|
| 398 |
+
]
|
| 399 |
+
},
|
| 400 |
+
{
|
| 401 |
+
"cell_type": "code",
|
| 402 |
+
"execution_count": null,
|
| 403 |
+
"metadata": {},
|
| 404 |
+
"outputs": [],
|
| 405 |
+
"source": [
|
| 406 |
+
"# Elixir: Bumblebee.Diffusion.StableDiffusion.text_to_image(...)\n",
|
| 407 |
+
"# Python: diffusers library\n",
|
| 408 |
+
"\n",
|
| 409 |
+
"try:\n",
|
| 410 |
+
" from diffusers import StableDiffusionPipeline\n",
|
| 411 |
+
"\n",
|
| 412 |
+
" sd_pipe = StableDiffusionPipeline.from_pretrained(\n",
|
| 413 |
+
" \"CompVis/stable-diffusion-v1-4\",\n",
|
| 414 |
+
" torch_dtype=torch.float16 if device == \"cuda\" else torch.float32,\n",
|
| 415 |
+
" ).to(device)\n",
|
| 416 |
+
"\n",
|
| 417 |
+
" prompt = \"a photograph of a bee programming in elixir, highly detailed, 4k\"\n",
|
| 418 |
+
" image = sd_pipe(prompt, num_inference_steps=20).images[0]\n",
|
| 419 |
+
" display(image)\n",
|
| 420 |
+
"except ImportError:\n",
|
| 421 |
+
" print(\"Install diffusers: pip install diffusers\")\n",
|
| 422 |
+
"except Exception as e:\n",
|
| 423 |
+
" print(f\"Stable Diffusion error (GPU recommended): {e}\")"
|
| 424 |
+
]
|
| 425 |
+
},
|
| 426 |
+
{
|
| 427 |
+
"cell_type": "markdown",
|
| 428 |
+
"metadata": {},
|
| 429 |
+
"source": [
|
| 430 |
+
"## 3 — Custom Training (Axon equivalent)\n",
|
| 431 |
+
"\n",
|
| 432 |
+
"Train a classifier from scratch using PyTorch — mirrors the Axon Livebook section."
|
| 433 |
+
]
|
| 434 |
+
},
|
| 435 |
+
{
|
| 436 |
+
"cell_type": "code",
|
| 437 |
+
"execution_count": null,
|
| 438 |
+
"metadata": {},
|
| 439 |
+
"outputs": [],
|
| 440 |
+
"source": [
|
| 441 |
+
"import torch.nn as nn\n",
|
| 442 |
+
"from torch.utils.data import DataLoader, TensorDataset\n",
|
| 443 |
+
"\n",
|
| 444 |
+
"# --- Synthetic data (same as Livebook) ---\n",
|
| 445 |
+
"np.random.seed(42)\n",
|
| 446 |
+
"n_samples, n_features, n_classes = 2000, 4, 3\n",
|
| 447 |
+
"\n",
|
| 448 |
+
"centers = np.random.randn(n_classes, n_features) * 2\n",
|
| 449 |
+
"labels_raw = np.random.randint(0, n_classes, n_samples)\n",
|
| 450 |
+
"noise = np.random.randn(n_samples, n_features) * 0.4\n",
|
| 451 |
+
"X = centers[labels_raw] + noise\n",
|
| 452 |
+
"\n",
|
| 453 |
+
"# Normalize\n",
|
| 454 |
+
"X = (X - X.mean(axis=0)) / X.std(axis=0)\n",
|
| 455 |
+
"\n",
|
| 456 |
+
"# One-hot\n",
|
| 457 |
+
"Y = np.eye(n_classes)[labels_raw]\n",
|
| 458 |
+
"\n",
|
| 459 |
+
"# Split\n",
|
| 460 |
+
"X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)\n",
|
| 461 |
+
"\n",
|
| 462 |
+
"# To tensors\n",
|
| 463 |
+
"X_train_t = torch.FloatTensor(X_train)\n",
|
| 464 |
+
"Y_train_t = torch.FloatTensor(Y_train)\n",
|
| 465 |
+
"X_test_t = torch.FloatTensor(X_test)\n",
|
| 466 |
+
"Y_test_t = torch.FloatTensor(Y_test)\n",
|
| 467 |
+
"\n",
|
| 468 |
+
"train_loader = DataLoader(TensorDataset(X_train_t, Y_train_t), batch_size=64, shuffle=True)\n",
|
| 469 |
+
"test_loader = DataLoader(TensorDataset(X_test_t, Y_test_t), batch_size=64)\n",
|
| 470 |
+
"\n",
|
| 471 |
+
"print(f\"Train: {len(X_train)} | Test: {len(X_test)} | Features: {n_features} | Classes: {n_classes}\")"
|
| 472 |
+
]
|
| 473 |
+
},
|
| 474 |
+
{
|
| 475 |
+
"cell_type": "code",
|
| 476 |
+
"execution_count": null,
|
| 477 |
+
"metadata": {},
|
| 478 |
+
"outputs": [],
|
| 479 |
+
"source": [
|
| 480 |
+
"# --- Model (mirrors Axon build) ---\n",
|
| 481 |
+
"class MLP(nn.Module):\n",
|
| 482 |
+
" def __init__(self):\n",
|
| 483 |
+
" super().__init__()\n",
|
| 484 |
+
" self.net = nn.Sequential(\n",
|
| 485 |
+
" nn.Linear(n_features, 64),\n",
|
| 486 |
+
" nn.ReLU(),\n",
|
| 487 |
+
" nn.BatchNorm1d(64),\n",
|
| 488 |
+
" nn.Dropout(0.2),\n",
|
| 489 |
+
" nn.Linear(64, 32),\n",
|
| 490 |
+
" nn.ReLU(),\n",
|
| 491 |
+
" nn.BatchNorm1d(32),\n",
|
| 492 |
+
" nn.Dropout(0.2),\n",
|
| 493 |
+
" nn.Linear(32, n_classes),\n",
|
| 494 |
+
" nn.Softmax(dim=1),\n",
|
| 495 |
+
" )\n",
|
| 496 |
+
"\n",
|
| 497 |
+
" def forward(self, x):\n",
|
| 498 |
+
" return self.net(x)\n",
|
| 499 |
+
"\n",
|
| 500 |
+
"model = MLP().to(device)\n",
|
| 501 |
+
"print(model)"
|
| 502 |
+
]
|
| 503 |
+
},
|
| 504 |
+
{
|
| 505 |
+
"cell_type": "code",
|
| 506 |
+
"execution_count": null,
|
| 507 |
+
"metadata": {},
|
| 508 |
+
"outputs": [],
|
| 509 |
+
"source": [
|
| 510 |
+
"# --- Training loop ---\n",
|
| 511 |
+
"optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
|
| 512 |
+
"criterion = nn.CrossEntropyLoss()\n",
|
| 513 |
+
"\n",
|
| 514 |
+
"for epoch in range(30):\n",
|
| 515 |
+
" model.train()\n",
|
| 516 |
+
" epoch_loss = 0\n",
|
| 517 |
+
" for xb, yb in train_loader:\n",
|
| 518 |
+
" xb, yb = xb.to(device), yb.to(device)\n",
|
| 519 |
+
" preds = model(xb)\n",
|
| 520 |
+
" loss = criterion(preds, yb.argmax(dim=1))\n",
|
| 521 |
+
" optimizer.zero_grad()\n",
|
| 522 |
+
" loss.backward()\n",
|
| 523 |
+
" optimizer.step()\n",
|
| 524 |
+
" epoch_loss += loss.item()\n",
|
| 525 |
+
"\n",
|
| 526 |
+
" if (epoch + 1) % 10 == 0:\n",
|
| 527 |
+
" model.eval()\n",
|
| 528 |
+
" correct = 0\n",
|
| 529 |
+
" with torch.no_grad():\n",
|
| 530 |
+
" for xb, yb in test_loader:\n",
|
| 531 |
+
" xb, yb = xb.to(device), yb.to(device)\n",
|
| 532 |
+
" preds = model(xb)\n",
|
| 533 |
+
" correct += (preds.argmax(1) == yb.argmax(1)).sum().item()\n",
|
| 534 |
+
" acc = correct / len(X_test) * 100\n",
|
| 535 |
+
" print(f\"Epoch {epoch+1:3d} loss={epoch_loss/len(train_loader):.4f} test_acc={acc:.1f}%\")"
|
| 536 |
+
]
|
| 537 |
+
},
|
| 538 |
+
{
|
| 539 |
+
"cell_type": "code",
|
| 540 |
+
"execution_count": null,
|
| 541 |
+
"metadata": {},
|
| 542 |
+
"outputs": [],
|
| 543 |
+
"source": [
|
| 544 |
+
"# --- Final evaluation ---\n",
|
| 545 |
+
"model.eval()\n",
|
| 546 |
+
"with torch.no_grad():\n",
|
| 547 |
+
" all_preds = []\n",
|
| 548 |
+
" all_true = []\n",
|
| 549 |
+
" for xb, yb in test_loader:\n",
|
| 550 |
+
" preds = model(xb.to(device)).argmax(1).cpu().numpy()\n",
|
| 551 |
+
" all_preds.extend(preds)\n",
|
| 552 |
+
" all_true.extend(yb.argmax(1).numpy())\n",
|
| 553 |
+
"\n",
|
| 554 |
+
"print(classification_report(all_true, all_preds, target_names=[f\"Class {i}\" for i in range(n_classes)]))"
|
| 555 |
+
]
|
| 556 |
+
},
|
| 557 |
+
{
|
| 558 |
+
"cell_type": "code",
|
| 559 |
+
"execution_count": null,
|
| 560 |
+
"metadata": {},
|
| 561 |
+
"outputs": [],
|
| 562 |
+
"source": [
|
| 563 |
+
"# --- Visualize ---\n",
|
| 564 |
+
"fig, axes = plt.subplots(1, 2, figsize=(12, 5))\n",
|
| 565 |
+
"\n",
|
| 566 |
+
"for cls in range(n_classes):\n",
|
| 567 |
+
" mask = np.array(all_true) == cls\n",
|
| 568 |
+
" axes[0].scatter(X_test[mask, 0], X_test[mask, 1], label=f\"Class {cls}\", alpha=0.6)\n",
|
| 569 |
+
"axes[0].set_title(\"Actual\")\n",
|
| 570 |
+
"axes[0].legend()\n",
|
| 571 |
+
"\n",
|
| 572 |
+
"for cls in range(n_classes):\n",
|
| 573 |
+
" mask = np.array(all_preds) == cls\n",
|
| 574 |
+
" axes[1].scatter(X_test[mask, 0], X_test[mask, 1], label=f\"Class {cls}\", alpha=0.6)\n",
|
| 575 |
+
"axes[1].set_title(\"Predicted\")\n",
|
| 576 |
+
"axes[1].legend()\n",
|
| 577 |
+
"\n",
|
| 578 |
+
"plt.tight_layout()\n",
|
| 579 |
+
"plt.show()"
|
| 580 |
+
]
|
| 581 |
+
},
|
| 582 |
+
{
|
| 583 |
+
"cell_type": "markdown",
|
| 584 |
+
"metadata": {},
|
| 585 |
+
"source": [
|
| 586 |
+
"## 4 — Interactive Gradio UI (Kino equivalent)
|
| 587 |
+
|
| 588 |
+
### 2.10 Model Export (ONNX / GGUF)
|
| 589 |
+
|
| 590 |
+
**Export to ONNX** (requires `onnxruntime` and `torch.onnx`):
|
| 591 |
+
```python
|
| 592 |
+
# Export the sentiment analysis model to ONNX
|
| 593 |
+
import torch
|
| 594 |
+
from transformers import AutoModelForSequenceClassification
|
| 595 |
+
model = AutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased-finetuned-sst-2-english")
|
| 596 |
+
# Dummy input for tracing
|
| 597 |
+
dummy_input = torch.tensor([[101, 2023, 2003, 1037, 2742, 102]])
|
| 598 |
+
torch.onnx.export(model, dummy_input, "sentiment.onnx", input_names=["input_ids"], output_names=["logits"], opset_version=12)
|
| 599 |
+
print("ONNX model saved as sentiment.onnx")
|
| 600 |
+
```
|
| 601 |
+
|
| 602 |
+
**Convert to GGUF** (for llama.cpp style inference, works for decoder models like GPT-2):
|
| 603 |
+
```bash
|
| 604 |
+
# Install gguf-converter if needed
|
| 605 |
+
pip install gguf-converter
|
| 606 |
+
# Convert the exported ONNX model
|
| 607 |
+
gguf-converter --onnx sentiment.onnx --output sentiment.gguf
|
| 608 |
+
```
|
| 609 |
+
|
| 610 |
+
> **Note:** GGUF currently supports decoder‑only architectures. For encoder‑only models you may need to adapt the conversion script or use `onnxruntime` directly.
|
| 611 |
+
|
| 612 |
+
---
|
| 613 |
+
|
| 614 |
+
## 4 — Interactive Gradio UI (Kino equivalent)\n",
|
| 615 |
+
"\n",
|
| 616 |
+
"Gradio is the Python equivalent of Elixir's Kino for building interactive UIs\n",
|
| 617 |
+
"that wrap ML models."
|
| 618 |
+
]
|
| 619 |
+
},
|
| 620 |
+
{
|
| 621 |
+
"cell_type": "code",
|
| 622 |
+
"execution_count": null,
|
| 623 |
+
"metadata": {},
|
| 624 |
+
"outputs": [],
|
| 625 |
+
"source": [
|
| 626 |
+
"# --- Sentiment Analysis UI ---\n",
|
| 627 |
+
"def classify_sentiment(text):\n",
|
| 628 |
+
" result = sentiment(text)[0]\n",
|
| 629 |
+
" return {r[\"label\"]: r[\"score\"] for r in sentiment(text)}\n",
|
| 630 |
+
"\n",
|
| 631 |
+
"sentiment_ui = gr.Interface(\n",
|
| 632 |
+
" fn=classify_sentiment,\n",
|
| 633 |
+
" inputs=gr.Textbox(label=\"Enter text\", placeholder=\"Type something...\"),\n",
|
| 634 |
+
" outputs=gr.Label(label=\"Sentiment\"),\n",
|
| 635 |
+
" title=\"🐝 Sentiment Analysis — Bumblebee/Transformers\",\n",
|
| 636 |
+
" description=\"Mirrors the Elixir Livebook sentiment section\",\n",
|
| 637 |
+
")\n",
|
| 638 |
+
"sentiment_ui.launch(share=True)"
|
| 639 |
+
]
|
| 640 |
+
},
|
| 641 |
+
{
|
| 642 |
+
"cell_type": "code",
|
| 643 |
+
"execution_count": null,
|
| 644 |
+
"metadata": {},
|
| 645 |
+
"outputs": [],
|
| 646 |
+
"source": [
|
| 647 |
+
"# --- Zero-Shot Classification UI ---\n",
|
| 648 |
+
"def zero_shot(text, labels_str):\n",
|
| 649 |
+
" labels = [l.strip() for l in labels_str.split(\",\")]\n",
|
| 650 |
+
" result = zs_classifier(text, labels)\n",
|
| 651 |
+
" return {l: s for l, s in zip(result[\"labels\"], result[\"scores\"]) }\n",
|
| 652 |
+
"\n",
|
| 653 |
+
"zs_ui = gr.Interface(\n",
|
| 654 |
+
" fn=zero_shot,\n",
|
| 655 |
+
" inputs=[\n",
|
| 656 |
+
" gr.Textbox(label=\"Text\", lines=3,\n",
|
| 657 |
+
" value=\"Elixir's BEAM provides fault-tolerant concurrent ML pipelines.\"),\n",
|
| 658 |
+
" gr.Textbox(label=\"Labels (comma-separated)\",\n",
|
| 659 |
+
" value=\"technology, sports, politics, science, finance\"),\n",
|
| 660 |
+
" ],\n",
|
| 661 |
+
" outputs=gr.Label(label=\"Classification\"),\n",
|
| 662 |
+
" title=\"🐝 Zero-Shot Classification\",\n",
|
| 663 |
+
")\n",
|
| 664 |
+
"zs_ui.launch(share=True)"
|
| 665 |
+
]
|
| 666 |
+
},
|
| 667 |
+
{
|
| 668 |
+
"cell_type": "code",
|
| 669 |
+
"execution_count": null,
|
| 670 |
+
"metadata": {},
|
| 671 |
+
"outputs": [],
|
| 672 |
+
"source": [
|
| 673 |
+
"# --- Text Generation UI ---\n",
|
| 674 |
+
"def generate(prompt, max_tokens):\n",
|
| 675 |
+
" output = text_gen(prompt, max_new_tokens=int(max_tokens), do_sample=True)\n",
|
| 676 |
+
" return output[0][\"generated_text\"]\n",
|
| 677 |
+
"\n",
|
| 678 |
+
"gen_ui = gr.Interface(\n",
|
| 679 |
+
" fn=generate,\n",
|
| 680 |
+
" inputs=[\n",
|
| 681 |
+
" gr.Textbox(label=\"Prompt\", value=\"Machine learning in Elixir is\"),\n",
|
| 682 |
+
" gr.Slider(10, 100, value=50, step=1, label=\"Max tokens\"),\n",
|
| 683 |
+
" ],\n",
|
| 684 |
+
" outputs=gr.Textbox(label=\"Generated Text\", lines=5),\n",
|
| 685 |
+
" title=\"🐝 GPT-2 Text Generation\",\n",
|
| 686 |
+
")\n",
|
| 687 |
+
"gen_ui.launch(share=True)"
|
| 688 |
+
]
|
| 689 |
+
},
|
| 690 |
+
{
|
| 691 |
+
"cell_type": "markdown",
|
| 692 |
+
"metadata": {},
|
| 693 |
+
"source": [
|
| 694 |
+
"## 5 — Summary\n",
|
| 695 |
+
"\n",
|
| 696 |
+
"| Pipeline Stage | Elixir (Livebook) | Python (Colab/Kaggle) |\n",
|
| 697 |
+
"|----------------|-------------------|----------------------|\n",
|
| 698 |
+
"| Tensors | `Nx.tensor` | `np.array` |\n",
|
| 699 |
+
"| Gradients | `Nx.Defn.grad` | `torch.autograd` |\n",
|
| 700 |
+
"| GPU | `EXLA.Backend` | `torch.cuda` |\n",
|
| 701 |
+
"| Pre-trained | `Bumblebee.load_model` | `pipeline()` |\n",
|
| 702 |
+
"| Fill-Mask | `Bumblebee.Text.fill_mask` | `pipeline(\"fill-mask\")` |\n",
|
| 703 |
+
"| Sentiment | `Bumblebee.Text.Classification` | `pipeline(\"sentiment-analysis\")` |\n",
|
| 704 |
+
"| NER | `Bumblebee.Text.TokenClassification` | `pipeline(\"ner\")` |\n",
|
| 705 |
+
"| Zero-Shot | `Bumblebee.Text.ZeroShotClassification` | `pipeline(\"zero-shot-classification\")` |\n",
|
| 706 |
+
"| Text Gen | `Bumblebee.Text.generation` | `pipeline(\"text-generation\")` |\n",
|
| 707 |
+
"| Embeddings | `Bumblebee.Text.TextEmbedding` | `sentence-transformers` |\n",
|
| 708 |
+
"| Training | `Axon` + `Axon.Loop` | `torch.nn` + training loop |\n",
|
| 709 |
+
"| Serving | `Nx.Serving` | `gradio.Interface` |\n",
|
| 710 |
+
"| Interactive UI | `Kino` | `gradio` |\n",
|
| 711 |
+
"\n",
|
| 712 |
+
"### Deploy\n",
|
| 713 |
+
"\n",
|
| 714 |
+
"* **Colab**: Open this `.ipynb` directly\n",
|
| 715 |
+
"* **Kaggle**: Upload as a new notebook\n",
|
| 716 |
+
"* **HF Spaces**: Use the Gradio cells above + `sdk: gradio` in README.md"
|
| 717 |
+
]
|
| 718 |
+
}
|
| 719 |
+
]
|
| 720 |
+
}
|
deps/ecto/.formatter.exs
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
locals_without_parens = [
|
| 2 |
+
# Query
|
| 3 |
+
from: 2,
|
| 4 |
+
|
| 5 |
+
# Schema
|
| 6 |
+
field: 1,
|
| 7 |
+
field: 2,
|
| 8 |
+
field: 3,
|
| 9 |
+
timestamps: 1,
|
| 10 |
+
belongs_to: 2,
|
| 11 |
+
belongs_to: 3,
|
| 12 |
+
has_one: 2,
|
| 13 |
+
has_one: 3,
|
| 14 |
+
has_many: 2,
|
| 15 |
+
has_many: 3,
|
| 16 |
+
many_to_many: 2,
|
| 17 |
+
many_to_many: 3,
|
| 18 |
+
embeds_one: 2,
|
| 19 |
+
embeds_one: 3,
|
| 20 |
+
embeds_one: 4,
|
| 21 |
+
embeds_many: 2,
|
| 22 |
+
embeds_many: 3,
|
| 23 |
+
embeds_many: 4
|
| 24 |
+
]
|
| 25 |
+
|
| 26 |
+
[
|
| 27 |
+
locals_without_parens: locals_without_parens,
|
| 28 |
+
export: [
|
| 29 |
+
locals_without_parens: locals_without_parens
|
| 30 |
+
]
|
| 31 |
+
]
|
deps/ecto/.hex
ADDED
|
Binary file (261 Bytes). View file
|
|
|
deps/ecto/CHANGELOG.md
ADDED
|
@@ -0,0 +1,1048 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Changelog for v3.x
|
| 2 |
+
|
| 3 |
+
## v3.12.5 (2024-11-28)
|
| 4 |
+
|
| 5 |
+
### Enhancements
|
| 6 |
+
|
| 7 |
+
* [Ecto.Repo] Use `persistent_term` for faster repository lookup
|
| 8 |
+
* [Ecto.Repo] Document new `:pool_count` option
|
| 9 |
+
|
| 10 |
+
### Bug fixes
|
| 11 |
+
|
| 12 |
+
* [Ecto.Query] Raise when empty list is given to `values/2`
|
| 13 |
+
* [Ecto.Query] Fix inspecting `dynamic/2` with interpolated named bindings
|
| 14 |
+
* [Ecto.Query] Plan sources before creating plan_subquery closure
|
| 15 |
+
* [Ecto.Repo] Remove read-only changes from returned record during insert/update
|
| 16 |
+
* [Ecto.Repo] Cascade `:allow_stale` options to assocs
|
| 17 |
+
|
| 18 |
+
## v3.12.4 (2024-10-07)
|
| 19 |
+
|
| 20 |
+
### Enhancements
|
| 21 |
+
|
| 22 |
+
* [Ecto.Repo] Document new `:pool_count` option
|
| 23 |
+
|
| 24 |
+
### Bug fixes
|
| 25 |
+
|
| 26 |
+
* [Ecto.Repo] Make `Ecto.Repo.reload` respect `source`
|
| 27 |
+
|
| 28 |
+
## v3.12.3 (2024-09-06)
|
| 29 |
+
|
| 30 |
+
### Bug fixes
|
| 31 |
+
|
| 32 |
+
* [Ecto.Changeset] Allow associations to be cast/put inside of embedded schema changesets
|
| 33 |
+
|
| 34 |
+
## v3.12.2 (2024-08-25)
|
| 35 |
+
|
| 36 |
+
### Bug fixes
|
| 37 |
+
|
| 38 |
+
* [Ecto.Query] Allow `:prefix` to be set to any term
|
| 39 |
+
* [Ecto.Repo] Avoid overwriting ssl opts from url if already set in config
|
| 40 |
+
|
| 41 |
+
## v3.12.1 (2024-08-13)
|
| 42 |
+
|
| 43 |
+
### Enhancements
|
| 44 |
+
|
| 45 |
+
* [Ecto.Type] Add `Ecto.Type.parameterized?/2`
|
| 46 |
+
|
| 47 |
+
### Bug fixes
|
| 48 |
+
|
| 49 |
+
* [Ecto.Enum] Fix dialyzer specification
|
| 50 |
+
* [Ecto.Query] Remove incorrect subquery parameter check
|
| 51 |
+
|
| 52 |
+
## v3.12.0 (2024-08-12)
|
| 53 |
+
|
| 54 |
+
### Enhancements
|
| 55 |
+
|
| 56 |
+
* [Ecto.Changeset] Allow `{message, opts}` to be given as message for several validation APIs
|
| 57 |
+
* [Ecto.Query] Introduce `is_named_binding` guard
|
| 58 |
+
* [Ecto.Query] Subqueries are now supported in `distinct`, `group_by`, `order_by` and `window` expressions
|
| 59 |
+
* [Ecto.Query] Allow `select_merge` to be used in more `insert_all` and subquery operations by merging distinct fields
|
| 60 |
+
* [Ecto.Query] Allow literal maps inside `dynamic/2`
|
| 61 |
+
* [Ecto.Query] Support macro expansion at the root level of `order_by`
|
| 62 |
+
* [Ecto.Query] Support preloading subquery sources in `from` and `join`
|
| 63 |
+
* [Ecto.Query] Allow map updates with dynamic values in `select`
|
| 64 |
+
* [Ecto.Query] Allow any data structure that implements the Enumerable protocol on the right side of `in`
|
| 65 |
+
* [Ecto.Repo] Support 2-arity preload functions that receive ids and the association metadata
|
| 66 |
+
* [Ecto.Repo] Allow Hot Updates on upsert queries in Postgres by removing duplicate fields during replace_all
|
| 67 |
+
* [Ecto.Repo] `insert_all` supports queries with only source
|
| 68 |
+
* [Ecto.Repo] `insert_all` supports queries with the update syntax
|
| 69 |
+
* [Ecto.Repo] Support `:allow_stale` on Repo struct/changeset operations
|
| 70 |
+
* [Ecto.Schema] Allow schema fields to be read-only via `:writable` option
|
| 71 |
+
* [Ecto.Schema] Add `:defaults_to_struct` option to `embeds_one`
|
| 72 |
+
* [Ecto.Schema] Support `:duration` type which maps to Elixir v1.17 duration
|
| 73 |
+
* [Ecto.Type] Bubble up custom cast errors of the inner type for `{:map, type}` and `{:array, type}`
|
| 74 |
+
* [Ecto.Type] Add `Ecto.Type.cast!/2`
|
| 75 |
+
|
| 76 |
+
### Bug fixes
|
| 77 |
+
|
| 78 |
+
* [Ecto.Query] Ignore query prefix in CTE sources
|
| 79 |
+
* [Ecto.Query] Fix a bug of `preload` when a through association is used in a join and has a nested separate query preload. Now the association chain is no longer preloaded and we simply preload directly onto the loaded through association.
|
| 80 |
+
* [Ecto.Query] Fix inspection when select has `map/struct` modifiers
|
| 81 |
+
* [Ecto.Query] Disable query cache for `values` lists
|
| 82 |
+
* [Ecto.Repo] Convert fields to their sources in `insert_all`
|
| 83 |
+
* [Ecto.Repo] Raise if empty list is given to `{:replace, fields}`
|
| 84 |
+
* [Ecto.Repo] Validate `:prefix` is a string/binary, warn otherwise
|
| 85 |
+
* [Ecto.Repo] Remove compile dependency on `:preload_order` MFA in `has_many`
|
| 86 |
+
|
| 87 |
+
### Adapter changes
|
| 88 |
+
|
| 89 |
+
* `distinct`, `group_by`, `order_by` and `window` expressions use the new `Ecto.Query.ByExpr`
|
| 90 |
+
struct rather than the old `Ecto.Query.QueryExpr` struct
|
| 91 |
+
|
| 92 |
+
### Potential incompatibilities
|
| 93 |
+
|
| 94 |
+
* [Ecto.Changeset] Associations inside embeds have always been read-only. We now raise if you try to cast them inside a changeset (this was reverted in v3.12.3)
|
| 95 |
+
* [Ecto.ParameterizedType] Parameterized types are now represented internally as `{:parameterized, {mod, state}}`. While this representation is private, projects may have been relying on it, and therefore they need to adapt accordingly. Use `Ecto.ParameterizedType.init/2` to instantiate parameterized types.
|
| 96 |
+
* [Ecto.Query] Drop `:array_join` join type. It was added for Clickhouse support but it is no longer used
|
| 97 |
+
* [Ecto.Query] Validate `:prefix` is a string/binary (this was reverted in v3.12.2)
|
| 98 |
+
|
| 99 |
+
## v3.11.2 (2024-03-07)
|
| 100 |
+
|
| 101 |
+
### Bug fixes
|
| 102 |
+
|
| 103 |
+
* [Ecto.Query] Fix compatibility with upcoming Elixir v1.17
|
| 104 |
+
* [Ecto.Repo] Do not hide failures when preloading if the parent process is trapping exits
|
| 105 |
+
|
| 106 |
+
## v3.11.1 (2023-12-07)
|
| 107 |
+
|
| 108 |
+
### Enhancements
|
| 109 |
+
|
| 110 |
+
* [Ecto.Query] Allow module attributes to be given to `in` operator
|
| 111 |
+
|
| 112 |
+
### Bug fixes
|
| 113 |
+
|
| 114 |
+
* [Ecto.Query] Fix interpolating strings and atoms as map keys
|
| 115 |
+
* [Ecto.Query] Plan subqueries in `having`
|
| 116 |
+
* [Ecto.Query] Fix late binding with composite types
|
| 117 |
+
|
| 118 |
+
## v3.11.0 (2023-11-14)
|
| 119 |
+
|
| 120 |
+
### Enhancements
|
| 121 |
+
|
| 122 |
+
* [Ecto.Association] Allow `preload_order` to take MFAs for `many_to_many` associations. This allows ordering by the join table
|
| 123 |
+
* [Ecto.Query] Add `:operation` option to `with_cte/3`. This allows CTEs to perform updates and deletes
|
| 124 |
+
* [Ecto.Query] Support `splice(^...)` in `fragment`
|
| 125 |
+
* [Ecto.Query] Add `prepend_order_by/3`
|
| 126 |
+
* [Ecto.Query] Allow `selected_as/1` and `selected_as/2` to take interpolated names
|
| 127 |
+
* [Ecto.Query] Allow map update syntax to work with `nil` values in `select`
|
| 128 |
+
* [Ecto.Query] Allow hints to inject SQL using `unsafe_fragment`
|
| 129 |
+
* [Ecto.Query] Support `values/2` lists
|
| 130 |
+
* [Ecto.Repo] Add `:on_preload_spawn` option to `preload/3`
|
| 131 |
+
* [Ecto.Schema] Support `:load_in_query` option for embeds
|
| 132 |
+
* [Ecto.Schema] Support `:returning` option for delete
|
| 133 |
+
|
| 134 |
+
### Bug fixes
|
| 135 |
+
|
| 136 |
+
* [Ecto.Association] Ensure parent prefix is passed to `on_delete` queries
|
| 137 |
+
* [Ecto.Changeset] Ensure duplicate primary keys are always detected for embeds
|
| 138 |
+
* [Ecto.Embedded] Raise `ArgumentError` when specifying an autogenerated `:id` primary key
|
| 139 |
+
* [Ecto.Query] Ensure subquery selects generate unique cache keys
|
| 140 |
+
* [Ecto.Query] Raise on literal non-base binary/uuids in query
|
| 141 |
+
* [Ecto.Repo] Reset `belongs_to` association if foreign key update results in a mismatch
|
| 142 |
+
|
| 143 |
+
### Adapter changes
|
| 144 |
+
|
| 145 |
+
* Adapters now receive `nil` for encoding/decoding
|
| 146 |
+
* Adapters now receive `type` instead of `{:maybe, type}` as the first argument to `loaders/2`
|
| 147 |
+
|
| 148 |
+
### Deprecations
|
| 149 |
+
|
| 150 |
+
* [Ecto.Query] Keyword hints are no longer supported. Please use `unsafe_fragment` inside of hints instead
|
| 151 |
+
|
| 152 |
+
## v3.10.3 (2023-07-07)
|
| 153 |
+
|
| 154 |
+
### Enhancements
|
| 155 |
+
|
| 156 |
+
* [Ecto.Query] Allow dynamic `field/2` in `type/2`
|
| 157 |
+
|
| 158 |
+
### Bug fixes
|
| 159 |
+
|
| 160 |
+
* [Ecto.Changesets] Limit the largest integer to less than 32 digits
|
| 161 |
+
* [Ecto.Type] Limit the largest integer to less than 32 digits
|
| 162 |
+
|
| 163 |
+
## v3.10.2 (2023-06-07)
|
| 164 |
+
|
| 165 |
+
### Enhancements
|
| 166 |
+
|
| 167 |
+
* [Ecto.Changeset] Support a three-arity function with position on `cast_assoc` and `cast_embed`
|
| 168 |
+
* [Ecto.Changeset] Add support for maps in `validate_length/3`
|
| 169 |
+
* [Ecto.Changeset] Add `:nulls_distinct` option to `unsafe_validate_unique`
|
| 170 |
+
* [Ecto.Query] Support `array_join` type for ClickHouse adapter
|
| 171 |
+
* [Ecto.Query.API] Support parameterized and custom map types in json path validation
|
| 172 |
+
|
| 173 |
+
### Bug fixes
|
| 174 |
+
|
| 175 |
+
* [Ecto.Repo] Respect parent prefix in `Repo.aggregate`
|
| 176 |
+
* [Ecto.Query.API] Fix late binding in `json_extract_path`
|
| 177 |
+
|
| 178 |
+
### Deprecations
|
| 179 |
+
|
| 180 |
+
* Deprecate MFAs on `:with`
|
| 181 |
+
|
| 182 |
+
## v3.10.1 (2023-04-12)
|
| 183 |
+
|
| 184 |
+
### Bug fixes
|
| 185 |
+
|
| 186 |
+
* [Ecto.Changeset] Consider `sort_param` even if the relation param was not given
|
| 187 |
+
* [Ecto.Query] Correct typespec to avoid Dialyzer warnings
|
| 188 |
+
|
| 189 |
+
## v3.10.0 (2023-04-10)
|
| 190 |
+
|
| 191 |
+
This release contains many improvements to Ecto.Changeset, functions like `Ecto.Changeset.changed?/2` and `field_missing?/2` will help make your code more expressive. Improvements to association and embed handling will also make it easier to manage more complex forms, especially those embedded within Phoenix.LiveView applications.
|
| 192 |
+
|
| 193 |
+
On the changeset front, note this release unifies the handling of empty values between `cast/4` and `validate_required/3`. **If you were setting `:empty_values` in the past and you want to preserve this new behaviour throughout, you may want to update your code** from this:
|
| 194 |
+
|
| 195 |
+
Ecto.Changeset.cast(changeset, params, [:field1, :field2], empty_values: ["", []])
|
| 196 |
+
|
| 197 |
+
to:
|
| 198 |
+
|
| 199 |
+
empty_values = [[]] ++ Ecto.Changeset.empty_values()
|
| 200 |
+
Ecto.Changeset.cast(changeset, params, [:field1, :field2], empty_values: empty_values)
|
| 201 |
+
|
| 202 |
+
Queries have also been improved to support LIMIT WITH TIES as well as materialized CTEs.
|
| 203 |
+
|
| 204 |
+
### Enhancements
|
| 205 |
+
|
| 206 |
+
* [Ecto.Changeset] Add `get_assoc`/`get_embed`
|
| 207 |
+
* [Ecto.Changeset] Add `field_missing?/2`
|
| 208 |
+
* [Ecto.Changeset] Add `changed?/2` and `changed?/3` with predicates support
|
| 209 |
+
* [Ecto.Changeset] Allow `Regex` to be used in constraint names for exact matches
|
| 210 |
+
* [Ecto.Changeset] Allow `:empty_values` option in `cast/4` to include a function which must return true if the value is empty
|
| 211 |
+
* [Ecto.Changeset] `cast/4` will by default consider strings made only of whitespace characters to be empty
|
| 212 |
+
* [Ecto.Changeset] Add support for `:sort_param` and `:drop_param` on `cast_assoc` and `cast_embed`
|
| 213 |
+
* [Ecto.Query] Support materialized option in CTEs
|
| 214 |
+
* [Ecto.Query] Support dynamic field inside `json_extract_path`
|
| 215 |
+
* [Ecto.Query] Support interpolated values for from/join prefixes
|
| 216 |
+
* [Ecto.Query] Support ties in limit expressions through `with_ties/3`
|
| 217 |
+
* [Ecto.Schema] Add `:autogenerate_fields` to the schema reflection API
|
| 218 |
+
* [Ecto.ParameterizedType] Add optional callback `format/1`
|
| 219 |
+
|
| 220 |
+
### Bug fixes
|
| 221 |
+
|
| 222 |
+
* [Ecto.Changeset] Make unsafe validate unique exclude primary key only for loaded schemas
|
| 223 |
+
* [Ecto.Changeset] Raise when change provided to `validate_format/4` is not a string
|
| 224 |
+
* [Ecto.Query] Fix bug in `json_extract_path` where maps were not allowed to be nested inside of embeds
|
| 225 |
+
* [Ecto.Schema] Allow inline embeds to overwrite conflicting aliases
|
| 226 |
+
|
| 227 |
+
## v3.9.6 (2023-07-07)
|
| 228 |
+
|
| 229 |
+
### Enhancements
|
| 230 |
+
|
| 231 |
+
* [Ecto.Query] Allow dynamic `field/2` in `type/2`
|
| 232 |
+
|
| 233 |
+
### Bug fixes
|
| 234 |
+
|
| 235 |
+
* [Ecto.Changesets] Limit the largest integer to less than 32 digits
|
| 236 |
+
* [Ecto.Type] Limit the largest integer to less than 32 digits
|
| 237 |
+
|
| 238 |
+
## v3.9.5 (2023-03-22)
|
| 239 |
+
|
| 240 |
+
### Bug fixes
|
| 241 |
+
|
| 242 |
+
* [Ecto.Query] Rename `@opaque dynamic` type to `@opaque dynamic_expr` to avoid conflicts with Erlang/OTP 26
|
| 243 |
+
|
| 244 |
+
## v3.9.4 (2022-12-21)
|
| 245 |
+
|
| 246 |
+
### Bug fixes
|
| 247 |
+
|
| 248 |
+
* [Ecto.Query] Fix regression with interpolated preloads introduced in v3.9.3
|
| 249 |
+
|
| 250 |
+
## v3.9.3 (2022-12-20)
|
| 251 |
+
|
| 252 |
+
### Enhancements
|
| 253 |
+
|
| 254 |
+
* [Ecto] Add `reset_fields/2`
|
| 255 |
+
* [Ecto.Multi] Add `exists?/4` function
|
| 256 |
+
* [Ecto.Repo] Keep url scheme in the repo configuration
|
| 257 |
+
* [Ecto.Query] Add support for cross lateral joins
|
| 258 |
+
* [Ecto.Query] Allow preloads to use `dynamic/2`
|
| 259 |
+
* [Ecto.Query.API] Allow the entire path to be interpolated in `json_extract_path/2`
|
| 260 |
+
|
| 261 |
+
## v3.9.2 (2022-11-18)
|
| 262 |
+
|
| 263 |
+
### Enhancements
|
| 264 |
+
|
| 265 |
+
* [Ecto.Query] Allow `selected_as` inside CTE
|
| 266 |
+
* [Ecto.Query] Allow `selected_as` to be used in subquery
|
| 267 |
+
|
| 268 |
+
### Bug fixes
|
| 269 |
+
|
| 270 |
+
* [Ecto.Repo] Fix preloading through associations on `nil`
|
| 271 |
+
* [Ecto.Query] Fix select merging a `selected_as` field into a source
|
| 272 |
+
|
| 273 |
+
## v3.9.1 (2022-10-06)
|
| 274 |
+
|
| 275 |
+
### Enhancements
|
| 276 |
+
|
| 277 |
+
* [Ecto.Query] Allow `selected_as` at the root of `dynamic/2`
|
| 278 |
+
* [Ecto.Query] Allow `selected_as` to be used with `type/2`
|
| 279 |
+
* [Ecto.Query] Allow `selected_as` to be used with `select_merge`
|
| 280 |
+
|
| 281 |
+
### Bug fixes
|
| 282 |
+
|
| 283 |
+
* [Ecto.Changeset] Reenable support for embedded schemas in `unsafe_validate_unique/4`
|
| 284 |
+
* [Ecto.Query] Ensure `join_where` conditions preload correctly in `many_to_many` or with queries with one or many joins
|
| 285 |
+
|
| 286 |
+
## v3.9.0 (2022-09-27)
|
| 287 |
+
|
| 288 |
+
### Enhancements
|
| 289 |
+
|
| 290 |
+
* [Ecto.Changeset] Add `:force_changes` option to `cast/4`
|
| 291 |
+
* [Ecto.Enum] Allow enum fields to be embed either as their values or their dumped versions
|
| 292 |
+
* [Ecto.Query] Support `^%{field: dynamic(...)}` in `select` and `select_merge`
|
| 293 |
+
* [Ecto.Query] Support `%{field: subquery(...)}` in `select` and `select_merge`
|
| 294 |
+
* [Ecto.Query] Support select aliases through `selected_as/1` and `selected_as/2`
|
| 295 |
+
* [Ecto.Query] Allow `parent_as/1` in `type/2`
|
| 296 |
+
* [Ecto.Query] Add `with_named_binding/3`
|
| 297 |
+
* [Ecto.Query] Allow fragment sources in keyword queries
|
| 298 |
+
* [Ecto.Repo] Support `idle_interval` query parameter in connection URL
|
| 299 |
+
* [Ecto.Repo] Log human-readable UUIDs by using pre-dumped query parameters
|
| 300 |
+
* [Ecto.Schema] Support preloading associations in embedded schemas
|
| 301 |
+
|
| 302 |
+
### Bug fix
|
| 303 |
+
|
| 304 |
+
* [Ecto.Changeset] Raise when schemaless changeset or embedded schema is used in `unsafe_validate_unique/4`
|
| 305 |
+
* [Ecto.Query] Respect virtual field type in subqueries
|
| 306 |
+
* [Ecto.Query] Don't select struct fields overridden with `nil`
|
| 307 |
+
* [Ecto.Query] Fix `select_merge` not tracking `load_in_query: false` field
|
| 308 |
+
* [Ecto.Query] Fix field source when used in `json_extract_path`
|
| 309 |
+
* [Ecto.Query] Properly build CTEs at compile time
|
| 310 |
+
* [Ecto.Query] Properly order subqueries in `dynamic`
|
| 311 |
+
* [Ecto.Repo] Fix `insert_all` query parameter count when using value queries alongside `placeholder`
|
| 312 |
+
* [Ecto.Repo] Raise if combination query is used in a `many` preload
|
| 313 |
+
* [Ecto.Schema] Ignore associations that aren't loaded on insert
|
| 314 |
+
|
| 315 |
+
## v3.8.4 (2022-06-04)
|
| 316 |
+
|
| 317 |
+
### Enhancements
|
| 318 |
+
|
| 319 |
+
* [Ecto.Multi] Add `one/2` and `all/2` functions
|
| 320 |
+
* [Ecto.Query] Support `literal(...)` in `fragment`
|
| 321 |
+
|
| 322 |
+
### Bug fix
|
| 323 |
+
|
| 324 |
+
* [Ecto.Schema] Make sure fields are inspected in the correct order in Elixir v1.14+
|
| 325 |
+
|
| 326 |
+
## v3.8.3 (2022-05-11)
|
| 327 |
+
|
| 328 |
+
### Bug fix
|
| 329 |
+
|
| 330 |
+
* [Ecto.Query] Allow source aliases to be used in `type/2`
|
| 331 |
+
* [Ecto.Schema] Avoid "undefined behaviour/struct" warnings and errors during compilation
|
| 332 |
+
|
| 333 |
+
## v3.8.2 (2022-05-05)
|
| 334 |
+
|
| 335 |
+
### Bug fix
|
| 336 |
+
|
| 337 |
+
* [Ecto.Adapter] Do not require adapter metadata to be raw maps
|
| 338 |
+
* [Ecto.Association] Respect `join_where` in many to many `on_replace` deletes
|
| 339 |
+
* [Ecto.Changeset] Check if list is in `empty_values` before nested validations
|
| 340 |
+
|
| 341 |
+
## v3.8.1 (2022-04-27)
|
| 342 |
+
|
| 343 |
+
### Bug fix
|
| 344 |
+
|
| 345 |
+
* [Ecto.Query] Fix regression where a join's on parameter on `update_all` was out of order
|
| 346 |
+
|
| 347 |
+
## v3.8.0 (2022-04-26)
|
| 348 |
+
|
| 349 |
+
Ecto v3.8 requires Elixir v1.10+.
|
| 350 |
+
|
| 351 |
+
### Enhancements
|
| 352 |
+
|
| 353 |
+
* [Ecto] Add new Embedded chapter to Introductory guides
|
| 354 |
+
* [Ecto.Changeset] Allow custom `:error_key` in unique_constraint
|
| 355 |
+
* [Ecto.Changeset] Add `:match` option to all constraint functions
|
| 356 |
+
* [Ecto.Query] Support dynamic aliases
|
| 357 |
+
* [Ecto.Query] Allow using `type/2` with virtual fields
|
| 358 |
+
* [Ecto.Query] Suggest alternatives to inexistent fields in queries
|
| 359 |
+
* [Ecto.Query] Support passing queries using subqueries to `insert_all`
|
| 360 |
+
* [Ecto.Repo] Allow `stacktrace: true` so stacktraces are included in telemetry events and logs
|
| 361 |
+
* [Ecto.Schema] Validate options given to schema fields
|
| 362 |
+
|
| 363 |
+
### Bug fixes
|
| 364 |
+
|
| 365 |
+
* [Ecto.Changeset] Address regression on `validate_subset` no longer working with custom array types
|
| 366 |
+
* [Ecto.Changeset] **Potentially breaking change**: Detect `empty_values` inside lists when casting. This may cause issues if you were relying on the casting of empty values (by default, only `""`).
|
| 367 |
+
* [Ecto.Query] Handle atom list sigils in `select`
|
| 368 |
+
* [Ecto.Query] Improve tracking of `select_merge` inside subqueries
|
| 369 |
+
* [Ecto.Repo] Properly handle literals in queries given to `insert_all`
|
| 370 |
+
* [Ecto.Repo] Don't surface persisted data as changes on embed updates
|
| 371 |
+
* [Ecto.Repo] **Potentially breaking change**: Raise if an association doesn't have a primary key and is preloaded in a join query. Previously, this would silently produce the wrong the result in certain circumstances.
|
| 372 |
+
* [Ecto.Schema] Preserve parent prefix on join tables
|
| 373 |
+
|
| 374 |
+
## v3.7.2 (2022-03-13)
|
| 375 |
+
|
| 376 |
+
### Enhancements
|
| 377 |
+
|
| 378 |
+
* [Ecto.Schema] Add option to skip validations for default values
|
| 379 |
+
* [Ecto.Query] Allow coalesce in `type/2`
|
| 380 |
+
* [Ecto.Query] Support parameterized types in type/2
|
| 381 |
+
* [Ecto.Query] Allow arbitrary parentheses in query expressions
|
| 382 |
+
|
| 383 |
+
## v3.7.1 (2021-08-27)
|
| 384 |
+
|
| 385 |
+
### Enhancements
|
| 386 |
+
|
| 387 |
+
* [Ecto.Embedded] Make `Ecto.Embedded` public and describe struct fields
|
| 388 |
+
|
| 389 |
+
### Bug fixes
|
| 390 |
+
|
| 391 |
+
* [Ecto.Repo] Make sure parent changeset is included in changes for `insert`/`update`/`delete` when there are errors processing the parent itself
|
| 392 |
+
|
| 393 |
+
## v3.7.0 (2021-08-19)
|
| 394 |
+
|
| 395 |
+
### Enhancements
|
| 396 |
+
|
| 397 |
+
* [Ecto.Changeset] Add `Ecto.Changeset.traverse_validations/2`
|
| 398 |
+
* [Ecto.Enum] Add `Ecto.Enum.mappings/2` and `Ecto.Enum.dump_values/2`
|
| 399 |
+
* [Ecto.Query] Add support for dynamic `as(^as)` and `parent_as(^as)`
|
| 400 |
+
* [Ecto.Repo] Add stale changeset to `Ecto.StaleEntryError` fields
|
| 401 |
+
* [Ecto.Schema] Add support for `@schema_context` to set context metadata on schema definition
|
| 402 |
+
|
| 403 |
+
### Bug fixes
|
| 404 |
+
|
| 405 |
+
* [Ecto.Changeset] Fix changeset inspection not redacting when embedded
|
| 406 |
+
* [Ecto.Changeset] Use semantic comparison on `validate_inclusion`, `validate_exclusion`, and `validate_subset`
|
| 407 |
+
* [Ecto.Enum] Raise on duplicate values in `Ecto.Enum`
|
| 408 |
+
* [Ecto.Query] Make sure `hints` are included in the query cache
|
| 409 |
+
* [Ecto.Repo] Support placeholders in `insert_all` without schemas
|
| 410 |
+
* [Ecto.Repo] Wrap in a subquery when query given to `Repo.aggregate` has combination
|
| 411 |
+
* [Ecto.Repo] Fix CTE subqueries not finding parent bindings
|
| 412 |
+
* [Ecto.Repo] Return changeset with assocs if any of the assocs are invalid
|
| 413 |
+
|
| 414 |
+
## v3.6.2 (2021-05-28)
|
| 415 |
+
|
| 416 |
+
### Enhancements
|
| 417 |
+
|
| 418 |
+
* [Ecto.Query] Support macros in `with_cte`
|
| 419 |
+
* [Ecto.Repo] Add `Ecto.Repo.all_running/0` to list all running repos
|
| 420 |
+
|
| 421 |
+
### Bug fixes
|
| 422 |
+
|
| 423 |
+
* [Ecto.Query] Do not omit nil fields in a subquery select
|
| 424 |
+
* [Ecto.Query] Allow `parent_as` to look for an alias all the way up across subqueries
|
| 425 |
+
* [Ecto.Query] Raise if a nil value is given to a query from a nested map parameter
|
| 426 |
+
* [Ecto.Query] Fix `insert_all` when using both `:on_conflict` and `:placeholders`
|
| 427 |
+
* [mix ecto.load] Do not pass `--force` to underlying compile task
|
| 428 |
+
|
| 429 |
+
## v3.6.1 (2021-04-12)
|
| 430 |
+
|
| 431 |
+
### Enhancements
|
| 432 |
+
|
| 433 |
+
* [Ecto.Changeset] Allow the `:query` option in `unsafe_validate_unique`
|
| 434 |
+
|
| 435 |
+
### Bug fixes
|
| 436 |
+
|
| 437 |
+
* [Ecto.Changeset] Add the relation id in `apply_changes` if the relation key exists (instead of hardcoding it to `id`)
|
| 438 |
+
|
| 439 |
+
## v3.6.0 (2021-04-03)
|
| 440 |
+
|
| 441 |
+
### Enhancements
|
| 442 |
+
|
| 443 |
+
* [Ecto.Changeset] Support `:repo_opts` in `unsafe_validate_unique`
|
| 444 |
+
* [Ecto.Changeset] Add a validation error if trying to cast a cardinality one embed/assoc with anything other than a map or keyword list
|
| 445 |
+
* [Ecto.Enum] Allow enums to map to custom values
|
| 446 |
+
* [Ecto.Multi] Add `Ecto.Multi.put/3` for directly storing values
|
| 447 |
+
* [Ecto.Query] **Potentially breaking change**: optimize `many_to_many` queries so it no longer load intermediary tables in more occasions. This may cause issues if you are using `Ecto.assoc/2` to load `many_to_many` associations and then trying to access intermediate bindings (which is discouraged but it was possible)
|
| 448 |
+
* [Ecto.Repo] Allow `insert_all` to be called with a query instead of rows
|
| 449 |
+
* [Ecto.Repo] Add `:placeholders` support to `insert_all` to avoid sending the same value multiple times
|
| 450 |
+
* [Ecto.Schema] Support `:preload_order` on `has_many` and `many_to_many` associations
|
| 451 |
+
* [Ecto.UUID] Add bang UUID conversion methods
|
| 452 |
+
* [Ecto.Query] The `:hints` option now accepts dynamic values when supplied as tuples
|
| 453 |
+
* [Ecto.Query] Support `select: map(source, fields)` where `source` is a fragment
|
| 454 |
+
* [Ecto.Query] Allow referring to the parent query in a join's subquery select via `parent_as`
|
| 455 |
+
* [mix ecto] Support file and line interpolation on `ECTO_EDITOR`
|
| 456 |
+
|
| 457 |
+
### Bug fixes
|
| 458 |
+
|
| 459 |
+
* [Ecto.Changeset] Change `apply_changes/1` to add the relation to the `struct.relation_id` if relation struct is persisted
|
| 460 |
+
* [Ecto.Query] Remove unnecessary INNER JOIN in many to many association query
|
| 461 |
+
* [Ecto.Query] Allow parametric types to be interpolated in queries
|
| 462 |
+
* [Ecto.Schema] Raise `ArgumentError` when default has invalid type
|
| 463 |
+
|
| 464 |
+
## v3.5.8 (2021-02-21)
|
| 465 |
+
|
| 466 |
+
### Enhancements
|
| 467 |
+
|
| 468 |
+
* [Ecto.Query] Support map/2 on fragments and subqueries
|
| 469 |
+
|
| 470 |
+
## v3.5.7 (2021-02-07)
|
| 471 |
+
|
| 472 |
+
### Bug fixes
|
| 473 |
+
|
| 474 |
+
* [Ecto.Query] Fixes param ordering issue on dynamic queries with subqueries
|
| 475 |
+
|
| 476 |
+
## v3.5.6 (2021-01-20)
|
| 477 |
+
|
| 478 |
+
### Enhancements
|
| 479 |
+
|
| 480 |
+
* [Ecto.Schema] Support `on_replace: :delete_if_exists` on associations
|
| 481 |
+
|
| 482 |
+
### Bug fixes
|
| 483 |
+
|
| 484 |
+
* [Ecto.Query] Allow unary minus operator in query expressions
|
| 485 |
+
* [Ecto.Schema] Allow nil values on typed maps
|
| 486 |
+
|
| 487 |
+
## v3.5.5 (2020-11-12)
|
| 488 |
+
|
| 489 |
+
### Enhancements
|
| 490 |
+
|
| 491 |
+
* [Ecto.Query] Add support for subqueries operators: `all`, `any`, and `exists`
|
| 492 |
+
|
| 493 |
+
### Bug fixes
|
| 494 |
+
|
| 495 |
+
* [Ecto.Changeset] Use association source on `put_assoc` with maps/keywords
|
| 496 |
+
* [Ecto.Enum] Add `cast` clause for nil values on `Ecto.Enum`
|
| 497 |
+
* [Ecto.Schema] Allow nested type `:any` for non-virtual fields
|
| 498 |
+
|
| 499 |
+
## v3.5.4 (2020-10-28)
|
| 500 |
+
|
| 501 |
+
### Enhancements
|
| 502 |
+
|
| 503 |
+
* [mix ecto.drop] Provide `--force-drop` for databases that may support it
|
| 504 |
+
* [guides] Add new "Multi tenancy with foreign keys" guide
|
| 505 |
+
|
| 506 |
+
### Bug fixes
|
| 507 |
+
|
| 508 |
+
* [Ecto.Changeset] Make keys optional in specs
|
| 509 |
+
* [Ecto.Enum] Make sure `values/2` works for virtual fields
|
| 510 |
+
* [Ecto.Query] Fix missing type on CTE queries that select a single field
|
| 511 |
+
|
| 512 |
+
## v3.5.3 (2020-10-21)
|
| 513 |
+
|
| 514 |
+
### Bug fixes
|
| 515 |
+
|
| 516 |
+
* [Ecto.Query] Do not reset parameter counter for nested CTEs
|
| 517 |
+
* [Ecto.Type] Fix regression where array type with nils could no longer be cast/load/dump
|
| 518 |
+
* [Ecto.Type] Fix CaseClauseError when casting a decimal with a binary remainder
|
| 519 |
+
|
| 520 |
+
## v3.5.2 (2020-10-12)
|
| 521 |
+
|
| 522 |
+
### Enhancements
|
| 523 |
+
|
| 524 |
+
* [Ecto.Repo] Add Repo.reload/2 and Repo.reload!/2
|
| 525 |
+
|
| 526 |
+
### Bug fixes
|
| 527 |
+
|
| 528 |
+
* [Ecto.Changeset] Fix "__schema__/1 is undefined or private" error while inspecting a schemaless changeset
|
| 529 |
+
* [Ecto.Repo] Invoke `c:Ecto.Repo.default_options/1` per entry-point operation
|
| 530 |
+
|
| 531 |
+
## v3.5.1 (2020-10-08)
|
| 532 |
+
|
| 533 |
+
### Enhancements
|
| 534 |
+
|
| 535 |
+
* [Ecto.Changeset] Warn if there are duplicate IDs in the parent schema for `cast_assoc/3`/`cast_embed/3`
|
| 536 |
+
* [Ecto.Schema] Allow `belongs_to` to accept options for parameterized types
|
| 537 |
+
|
| 538 |
+
### Bug fixes
|
| 539 |
+
|
| 540 |
+
* [Ecto.Query] Keep field types when using a subquery with source
|
| 541 |
+
|
| 542 |
+
## v3.5.0 (2020-10-03)
|
| 543 |
+
|
| 544 |
+
v3.5 requires Elixir v1.8+.
|
| 545 |
+
|
| 546 |
+
### Bug fixes
|
| 547 |
+
|
| 548 |
+
* [Ecto.Changeset] Ensure `:empty_values` in `cast/4` does not automatically propagate to following cast calls. If you want a given set of `:empty_values` to apply to all `cast/4` calls, change the value stored in `changeset.empty_values` instead
|
| 549 |
+
* [Ecto.Changeset] **Potentially breaking change**: Do not force repository updates to happen when using `optimistic_lock`. The lock field will only be incremented if the record has other changes. If no changes, nothing happens.
|
| 550 |
+
* [Ecto.Changeset] Do not automatically share empty values across `cast/3` calls
|
| 551 |
+
* [Ecto.Query] Consider query prefix in cte/combination query cache
|
| 552 |
+
* [Ecto.Query] Allow the entry to be marked as nil when using left join with subqueries
|
| 553 |
+
* [Ecto.Query] Support subqueries inside dynamic expressions
|
| 554 |
+
* [Ecto.Repo] Fix preloading when using dynamic repos and the sandbox in automatic mode
|
| 555 |
+
* [Ecto.Repo] Do not duplicate collections when associations are preloaded for repeated elements
|
| 556 |
+
|
| 557 |
+
### Enhancements
|
| 558 |
+
|
| 559 |
+
* [Ecto.Enum] Add `Ecto.Enum` as a custom parameterized type
|
| 560 |
+
* [Ecto.Query] Allow `:prefix` in `from` to be set to nil
|
| 561 |
+
* [Ecto.Query] Do not restrict subqueries in `where` to map/struct types
|
| 562 |
+
* [Ecto.Query] Allow atoms in query without interpolation in order to support Ecto.Enum
|
| 563 |
+
* [Ecto.Schema] Do not validate uniqueness if there is a prior error on the field
|
| 564 |
+
* [Ecto.Schema] Allow `redact: true` in `field`
|
| 565 |
+
* [Ecto.Schema] Support parameterized types via `Ecto.ParameterizedType`
|
| 566 |
+
* [Ecto.Schema] Rewrite embeds and assocs as parameterized types. This means `__schema__(:type, assoc_or_embed)` now returns a parameterized type. To check if something is an association, use `__schema__(:assocs)` or `__schema__(:embeds)` instead
|
| 567 |
+
|
| 568 |
+
## v3.4.6 (2020-08-07)
|
| 569 |
+
|
| 570 |
+
### Enhancements
|
| 571 |
+
|
| 572 |
+
* [Ecto.Query] Allow `count/0` on `type/2`
|
| 573 |
+
* [Ecto.Multi] Support anonymous functions in multiple functions
|
| 574 |
+
|
| 575 |
+
### Bug fixes
|
| 576 |
+
|
| 577 |
+
* [Ecto.Query] Consider booleans as literals in unions, subqueries, ctes, etc
|
| 578 |
+
* [Ecto.Schema] Generate IDs for nested embeds
|
| 579 |
+
|
| 580 |
+
## v3.4.5 (2020-06-14)
|
| 581 |
+
|
| 582 |
+
### Enhancements
|
| 583 |
+
|
| 584 |
+
* [Ecto.Changeset] Allow custom error key in `unsafe_validate_unique`
|
| 585 |
+
* [Ecto.Changeset] Improve performance when casting large params maps
|
| 586 |
+
|
| 587 |
+
### Bug fixes
|
| 588 |
+
|
| 589 |
+
* [Ecto.Changeset] Improve error message for invalid `cast_assoc`
|
| 590 |
+
* [Ecto.Query] Fix inspecting query with fragment CTE
|
| 591 |
+
* [Ecto.Query] Fix inspecting dynamics with aliased bindings
|
| 592 |
+
* [Ecto.Query] Improve error message when selecting a single atom
|
| 593 |
+
* [Ecto.Repo] Reduce data-copying when preloading multiple associations
|
| 594 |
+
* [Ecto.Schema] Do not define a compile-time dependency for schema in `:join_through`
|
| 595 |
+
|
| 596 |
+
## v3.4.4 (2020-05-11)
|
| 597 |
+
|
| 598 |
+
### Enhancements
|
| 599 |
+
|
| 600 |
+
* [Ecto.Schema] Add `join_where` support to `many_to_many`
|
| 601 |
+
|
| 602 |
+
## v3.4.3 (2020-04-27)
|
| 603 |
+
|
| 604 |
+
### Enhancements
|
| 605 |
+
|
| 606 |
+
* [Ecto.Query] Support `as/1` and `parent_as/1` for lazy named bindings and to allow parent references from subqueries
|
| 607 |
+
* [Ecto.Query] Support `x in subquery(query)`
|
| 608 |
+
|
| 609 |
+
### Bug fixes
|
| 610 |
+
|
| 611 |
+
* [Ecto.Query] Do not raise for missing assocs if :force is given to preload
|
| 612 |
+
* [Ecto.Repo] Return error from `Repo.delete` on invalid changeset from `prepare_changeset`
|
| 613 |
+
|
| 614 |
+
## v3.4.2 (2020-04-10)
|
| 615 |
+
|
| 616 |
+
### Enhancements
|
| 617 |
+
|
| 618 |
+
* [Ecto.Changeset] Support multiple fields in `unique_constraint/3`
|
| 619 |
+
|
| 620 |
+
## v3.4.1 (2020-04-08)
|
| 621 |
+
|
| 622 |
+
### Enhancements
|
| 623 |
+
|
| 624 |
+
* [Ecto] Add `Ecto.embedded_load/3` and `Ecto.embedded_dump/2`
|
| 625 |
+
* [Ecto.Query] Improve error message on invalid JSON expressions
|
| 626 |
+
* [Ecto.Repo] Emit `[:ecto, :repo, :init]` telemetry event upon Repo init
|
| 627 |
+
|
| 628 |
+
### Bug fixes
|
| 629 |
+
|
| 630 |
+
* [Ecto.Query] Do not support JSON selectors on `type/2`
|
| 631 |
+
|
| 632 |
+
### Deprecations
|
| 633 |
+
|
| 634 |
+
* [Ecto.Repo] Deprecate `conflict_target: {:constraint, _}`. It is a discouraged approach and `{:unsafe_fragment, _}` is still available if someone definitely needs it
|
| 635 |
+
|
| 636 |
+
## v3.4.0 (2020-03-24)
|
| 637 |
+
|
| 638 |
+
v3.4 requires Elixir v1.7+.
|
| 639 |
+
|
| 640 |
+
### Enhancements
|
| 641 |
+
|
| 642 |
+
* [Ecto.Query] Allow dynamic queries in CTE and improve error message
|
| 643 |
+
* [Ecto.Query] Add `Ecto.Query.API.json_extract_path/2` and JSON path support to query syntax. For example, `posts.metadata["tags"][0]["name"]` will return the name of the first tag stored in the `:map` metadata field
|
| 644 |
+
* [Ecto.Repo] Add new `default_options/1` callback to repository
|
| 645 |
+
* [Ecto.Repo] Support passing `:telemetry_options` to repository operations
|
| 646 |
+
|
| 647 |
+
### Bug fixes
|
| 648 |
+
|
| 649 |
+
* [Ecto.Changeset] Properly add validation annotation to `validate_acceptance`
|
| 650 |
+
* [Ecto.Query] Raise if there is loaded non-empty association data without related key when preloading. This typically means not all fields have been loaded in a query
|
| 651 |
+
* [Ecto.Schema] Show meaningful error in case `schema` is invoked twice in an `Ecto.Schema`
|
| 652 |
+
|
| 653 |
+
## v3.3.4 (2020-02-27)
|
| 654 |
+
|
| 655 |
+
### Bug fixes
|
| 656 |
+
|
| 657 |
+
* [mix ecto] Do not rely on map ordering when parsing repos
|
| 658 |
+
* [mix ecto.gen.repo] Improve error message when a repo is not given
|
| 659 |
+
|
| 660 |
+
## v3.3.3 (2020-02-14)
|
| 661 |
+
|
| 662 |
+
### Enhancements
|
| 663 |
+
|
| 664 |
+
* [Ecto.Query] Support fragments in `lock`
|
| 665 |
+
* [Ecto.Query] Handle `nil` in `select_merge` with similar semantics to SQL databases (i.e. it simply returns `nil` itself)
|
| 666 |
+
|
| 667 |
+
## v3.3.2 (2020-01-28)
|
| 668 |
+
|
| 669 |
+
### Enhancements
|
| 670 |
+
|
| 671 |
+
* [Ecto.Changeset] Only bump optimistic lock in case of success
|
| 672 |
+
* [Ecto.Query] Allow macros in Ecto window expressions
|
| 673 |
+
* [Ecto.Schema] Support `:join_defaults` on `many_to_many` associations
|
| 674 |
+
* [Ecto.Schema] Allow MFargs to be given to association `:defaults`
|
| 675 |
+
* [Ecto.Type] Add `Ecto.Type.embedded_load` and `Ecto.Type.embedded_dump`
|
| 676 |
+
|
| 677 |
+
### Bug fixes
|
| 678 |
+
|
| 679 |
+
* [Ecto.Repo] Ignore empty hostname when parsing database url (Elixir v1.10 support)
|
| 680 |
+
* [Ecto.Repo] Rewrite combinations on Repo.exists? queries
|
| 681 |
+
* [Ecto.Schema] Respect child `@schema_prefix` in `cast_assoc`
|
| 682 |
+
* [mix ecto.gen.repo] Use `config_path` when writing new config in `mix ecto.gen.repo`
|
| 683 |
+
|
| 684 |
+
## v3.3.1 (2019-12-27)
|
| 685 |
+
|
| 686 |
+
### Enhancements
|
| 687 |
+
|
| 688 |
+
* [Ecto.Query.WindowAPI] Support `filter/2`
|
| 689 |
+
|
| 690 |
+
### Bug fixes
|
| 691 |
+
|
| 692 |
+
* [Ecto.Query.API] Fix `coalesce/2` usage with mixed types
|
| 693 |
+
|
| 694 |
+
## v3.3.0 (2019-12-11)
|
| 695 |
+
|
| 696 |
+
### Enhancements
|
| 697 |
+
|
| 698 |
+
* [Ecto.Adapter] Add `storage_status/1` callback to `Ecto.Adapters.Storage` behaviour
|
| 699 |
+
* [Ecto.Changeset] Add `Ecto.Changeset.apply_action!/2`
|
| 700 |
+
* [Ecto.Changeset] Remove actions restriction in `Ecto.Changeset.apply_action/2`
|
| 701 |
+
* [Ecto.Repo] Introduce `c:Ecto.Repo.aggregate/2`
|
| 702 |
+
* [Ecto.Repo] Support `{:replace_all_except, fields}` in `:on_conflict`
|
| 703 |
+
|
| 704 |
+
### Bug fixes
|
| 705 |
+
|
| 706 |
+
* [Ecto.Query] Make sure the `:prefix` option in `:from`/`:join` also cascades to subqueries
|
| 707 |
+
* [Ecto.Query] Make sure the `:prefix` option in `:join` also cascades to queries
|
| 708 |
+
* [Ecto.Query] Use database returned values for literals. Previous Ecto versions knew literals from queries should not be discarded for combinations but, even if they were not discarded, we would ignore the values returned by the database
|
| 709 |
+
* [Ecto.Repo] Do not wrap schema operations in a transaction if already inside a transaction. We have also removed the **private** option called `:skip_transaction`
|
| 710 |
+
|
| 711 |
+
### Deprecations
|
| 712 |
+
|
| 713 |
+
* [Ecto.Repo] `:replace_all_except_primary_keys` is deprecated in favor of `{:replace_all_except, fields}` in `:on_conflict`
|
| 714 |
+
|
| 715 |
+
## v3.2.5 (2019-11-03)
|
| 716 |
+
|
| 717 |
+
### Bug fixes
|
| 718 |
+
|
| 719 |
+
* [Ecto.Query] Fix a bug where executing some queries would leak the `{:maybe, ...}` type
|
| 720 |
+
|
| 721 |
+
## v3.2.4 (2019-11-02)
|
| 722 |
+
|
| 723 |
+
### Bug fixes
|
| 724 |
+
|
| 725 |
+
* [Ecto.Query] Improve error message on invalid join binding
|
| 726 |
+
* [Ecto.Query] Make sure the `:prefix` option in `:join` also applies to through associations
|
| 727 |
+
* [Ecto.Query] Invoke custom type when loading aggregations from the database (but fallback to database value if it can't be cast)
|
| 728 |
+
* [mix ecto.gen.repo] Support Elixir v1.9 style configs
|
| 729 |
+
|
| 730 |
+
## v3.2.3 (2019-10-17)
|
| 731 |
+
|
| 732 |
+
### Bug fixes
|
| 733 |
+
|
| 734 |
+
* [Ecto.Changeset] Do not convert enums given to `validate_inclusion` to a list
|
| 735 |
+
|
| 736 |
+
### Enhancements
|
| 737 |
+
|
| 738 |
+
* [Ecto.Changeset] Improve error message on non-atom keys to change/put_change
|
| 739 |
+
* [Ecto.Changeset] Allow :with to be given as a `{module, function, args}` tuple on `cast_association/cast_embed`
|
| 740 |
+
* [Ecto.Changeset] Add `fetch_change!/2` and `fetch_field!/2`
|
| 741 |
+
|
| 742 |
+
## v3.2.2 (2019-10-01)
|
| 743 |
+
|
| 744 |
+
### Bug fixes
|
| 745 |
+
|
| 746 |
+
* [Ecto.Query] Fix keyword arguments given to `:on` when a bind is not given to join
|
| 747 |
+
* [Ecto.Repo] Make sure a preload given to an already preloaded has_many :through is loaded
|
| 748 |
+
|
| 749 |
+
## v3.2.1 (2019-09-17)
|
| 750 |
+
|
| 751 |
+
### Enhancements
|
| 752 |
+
|
| 753 |
+
* [Ecto.Changeset] Add rollover logic for default incrementer in `optimistic_lock`
|
| 754 |
+
* [Ecto.Query] Also expand macros when used inside `type/2`
|
| 755 |
+
|
| 756 |
+
### Bug fixes
|
| 757 |
+
|
| 758 |
+
* [Ecto.Query] Ensure queries with non-cacheable queries in CTEs/combinations are also not-cacheable
|
| 759 |
+
|
| 760 |
+
## v3.2.0 (2019-09-07)
|
| 761 |
+
|
| 762 |
+
v3.2 requires Elixir v1.6+.
|
| 763 |
+
|
| 764 |
+
### Enhancements
|
| 765 |
+
|
| 766 |
+
* [Ecto.Query] Add common table expressions support `with_cte/3` and `recursive_ctes/2`
|
| 767 |
+
* [Ecto.Query] Allow `dynamic/3` to be used in `order_by`, `distinct`, `group_by`, as well as in `partition_by`, `order_by`, and `frame` inside `windows`
|
| 768 |
+
* [Ecto.Query] Allow filters in `type/2` expressions
|
| 769 |
+
* [Ecto.Repo] Merge options given to the repository into the changeset `repo_opts` and assign it back to make it available down the chain
|
| 770 |
+
* [Ecto.Repo] Add `prepare_query/3` callback that is invoked before query operations
|
| 771 |
+
* [Ecto.Repo] Support `:returning` option in `Ecto.Repo.update/2`
|
| 772 |
+
* [Ecto.Repo] Support passing a one arity function to `Ecto.Repo.transaction/2`, where the argument is the current repo
|
| 773 |
+
* [Ecto.Type] Add a new `embed_as/1` callback to `Ecto.Type` that allows adapters to control embedding behaviour
|
| 774 |
+
* [Ecto.Type] Add `use Ecto.Type` for convenience that implements the new required callbacks
|
| 775 |
+
|
| 776 |
+
### Bug fixes
|
| 777 |
+
|
| 778 |
+
* [Ecto.Association] Ensure we delete an association before inserting when replacing on `has_one`
|
| 779 |
+
* [Ecto.Query] Do not allow interpolated `nil` in literal keyword list when building query
|
| 780 |
+
* [Ecto.Query] Do not remove literals from combinations, otherwise UNION/INTERSECTION queries may not match the number of values in `select`
|
| 781 |
+
* [Ecto.Query] Do not attempt to merge at compile-time non-keyword lists given to `select_merge`
|
| 782 |
+
* [Ecto.Repo] Do not override `:through` associations on preload unless forcing
|
| 783 |
+
* [Ecto.Repo] Make sure prefix option cascades to combinations and recursive queries
|
| 784 |
+
* [Ecto.Schema] Use OS time without drift when generating timestamps
|
| 785 |
+
* [Ecto.Type] Allow any datetime in `datetime_add`
|
| 786 |
+
|
| 787 |
+
## v3.1.7 (2019-06-27)
|
| 788 |
+
|
| 789 |
+
### Bug fixes
|
| 790 |
+
|
| 791 |
+
* [Ecto.Changeset] Make sure `put_assoc` with empty changeset propagates on insert
|
| 792 |
+
|
| 793 |
+
## v3.1.6 (2019-06-19)
|
| 794 |
+
|
| 795 |
+
### Enhancements
|
| 796 |
+
|
| 797 |
+
* [Ecto.Repo] Add `:read_only` repositories
|
| 798 |
+
* [Ecto.Schema] Also validate options given to `:through` associations
|
| 799 |
+
|
| 800 |
+
### Bug fixes
|
| 801 |
+
|
| 802 |
+
* [Ecto.Changeset] Do not mark `put_assoc` from `[]` to `[]` or from `nil` to `nil` as change
|
| 803 |
+
* [Ecto.Query] Remove named binding when excluding joins
|
| 804 |
+
* [mix ecto.gen.repo] Use `:config_path` instead of hardcoding to `config/config.exs`
|
| 805 |
+
|
| 806 |
+
## v3.1.5 (2019-06-06)
|
| 807 |
+
|
| 808 |
+
### Enhancements
|
| 809 |
+
|
| 810 |
+
* [Ecto.Repo] Allow `:default_dynamic_repo` option on `use Ecto.Repo`
|
| 811 |
+
* [Ecto.Schema] Support `{:fragment, ...}` in the `:where` option for associations
|
| 812 |
+
|
| 813 |
+
### Bug fixes
|
| 814 |
+
|
| 815 |
+
* [Ecto.Query] Fix handling of literals in combinators (union, except, intersection)
|
| 816 |
+
|
| 817 |
+
## v3.1.4 (2019-05-07)
|
| 818 |
+
|
| 819 |
+
### Bug fixes
|
| 820 |
+
|
| 821 |
+
* [Ecto.Changeset] Convert validation enums to lists before adding them as validation metadata
|
| 822 |
+
* [Ecto.Schema] Properly propagate prefix to join_through source in many_to_many associations
|
| 823 |
+
|
| 824 |
+
## v3.1.3 (2019-04-30)
|
| 825 |
+
|
| 826 |
+
### Enhancements
|
| 827 |
+
|
| 828 |
+
* [Ecto.Changeset] Expose the enum that was validated against in errors from enum-based validations
|
| 829 |
+
|
| 830 |
+
## v3.1.2 (2019-04-24)
|
| 831 |
+
|
| 832 |
+
### Enhancements
|
| 833 |
+
|
| 834 |
+
* [Ecto.Query] Add support for `type+over`
|
| 835 |
+
* [Ecto.Schema] Allow schema fields to be excluded from queries
|
| 836 |
+
|
| 837 |
+
### Bug fixes
|
| 838 |
+
|
| 839 |
+
* [Ecto.Changeset] Do not list a field as changed if it is updated to its original value
|
| 840 |
+
* [Ecto.Query] Keep literal numbers and bitstring in subqueries and unions
|
| 841 |
+
* [Ecto.Query] Improve error message for invalid `type/2` expression
|
| 842 |
+
* [Ecto.Query] Properly count interpolations in `select_merge/2`
|
| 843 |
+
|
| 844 |
+
## v3.1.1 (2019-04-04)
|
| 845 |
+
|
| 846 |
+
### Bug fixes
|
| 847 |
+
|
| 848 |
+
* [Ecto] Do not require Jason (i.e. it should continue to be an optional dependency)
|
| 849 |
+
* [Ecto.Repo] Make sure `many_to_many` and `Ecto.Multi` work with dynamic repos
|
| 850 |
+
|
| 851 |
+
## v3.1.0 (2019-04-02)
|
| 852 |
+
|
| 853 |
+
v3.1 requires Elixir v1.5+.
|
| 854 |
+
|
| 855 |
+
### Enhancements
|
| 856 |
+
|
| 857 |
+
* [Ecto.Changeset] Add `not_equal_to` option for `validate_number`
|
| 858 |
+
* [Ecto.Query] Improve error message for missing `fragment` arguments
|
| 859 |
+
* [Ecto.Query] Improve error message on missing struct key for structs built in `select`
|
| 860 |
+
* [Ecto.Query] Allow dynamic named bindings
|
| 861 |
+
* [Ecto.Repo] Add dynamic repository support with `Ecto.Repo.put_dynamic_repo/1` and `Ecto.Repo.get_dynamic_repo/0` (experimental)
|
| 862 |
+
* [Ecto.Type] Cast naive_datetime/utc_datetime strings without seconds
|
| 863 |
+
|
| 864 |
+
### Bug fixes
|
| 865 |
+
|
| 866 |
+
* [Ecto.Changeset] Do not run `unsafe_validate_unique` query unless relevant fields were changed
|
| 867 |
+
* [Ecto.Changeset] Raise if an unknown field is given on `Ecto.Changeset.change/2`
|
| 868 |
+
* [Ecto.Changeset] Expose the type that was validated in errors generated by `validate_length/3`
|
| 869 |
+
* [Ecto.Query] Add support for `field/2` as first element of `type/2` and alias as second element of `type/2`
|
| 870 |
+
* [Ecto.Query] Do not attempt to assert types of named bindings that are not known at compile time
|
| 871 |
+
* [Ecto.Query] Properly cast boolean expressions in select
|
| 872 |
+
* [Mix.Ecto] Load applications during repo lookup so their app environment is available
|
| 873 |
+
|
| 874 |
+
### Deprecations
|
| 875 |
+
|
| 876 |
+
* [Ecto.LogEntry] Fully deprecate previously soft deprecated API
|
| 877 |
+
|
| 878 |
+
## v3.0.7 (2019-02-06)
|
| 879 |
+
|
| 880 |
+
### Bug fixes
|
| 881 |
+
|
| 882 |
+
* [Ecto.Query] `reverse_order` reverses by primary key if no order is given
|
| 883 |
+
|
| 884 |
+
## v3.0.6 (2018-12-31)
|
| 885 |
+
|
| 886 |
+
### Enhancements
|
| 887 |
+
|
| 888 |
+
* [Ecto.Query] Add `reverse_order/1`
|
| 889 |
+
|
| 890 |
+
### Bug fixes
|
| 891 |
+
|
| 892 |
+
* [Ecto.Multi] Raise better error message on accidental rollback inside `Ecto.Multi`
|
| 893 |
+
* [Ecto.Query] Properly merge deeply nested preloaded joins
|
| 894 |
+
* [Ecto.Query] Raise better error message on missing select on schemaless queries
|
| 895 |
+
* [Ecto.Schema] Fix parameter ordering in assoc `:where`
|
| 896 |
+
|
| 897 |
+
## v3.0.5 (2018-12-08)
|
| 898 |
+
|
| 899 |
+
### Backwards incompatible changes
|
| 900 |
+
|
| 901 |
+
* [Ecto.Schema] The `:where` option added in Ecto 3.0.0 had a major flaw and it has been reworked in this version. This means a tuple of three elements can no longer be passed to `:where`, instead a keyword list must be given. Check the "Filtering associations" section in `has_many/3` docs for more information
|
| 902 |
+
|
| 903 |
+
### Bug fixes
|
| 904 |
+
|
| 905 |
+
* [Ecto.Query] Do not raise on lists of tuples that are not keywords. Instead, let custom Ecto.Type handle them
|
| 906 |
+
* [Ecto.Query] Allow `prefix: nil` to be given to subqueries
|
| 907 |
+
* [Ecto.Query] Use different cache keys for unions/intersections/excepts
|
| 908 |
+
* [Ecto.Repo] Fix support for upserts with `:replace` without a schema
|
| 909 |
+
* [Ecto.Type] Do not lose precision when casting `utc_datetime_usec` with a time zone different than Etc/UTC
|
| 910 |
+
|
| 911 |
+
## v3.0.4 (2018-11-29)
|
| 912 |
+
|
| 913 |
+
### Enhancements
|
| 914 |
+
|
| 915 |
+
* [Decimal] Bump decimal dependency
|
| 916 |
+
* [Ecto.Repo] Remove unused `:pool_timeout`
|
| 917 |
+
|
| 918 |
+
## v3.0.3 (2018-11-20)
|
| 919 |
+
|
| 920 |
+
### Enhancements
|
| 921 |
+
|
| 922 |
+
* [Ecto.Changeset] Add `count: :bytes` option in `validate_length/3`
|
| 923 |
+
* [Ecto.Query] Support passing `Ecto.Query` in `Ecto.Repo.insert_all`
|
| 924 |
+
|
| 925 |
+
### Bug fixes
|
| 926 |
+
|
| 927 |
+
* [Ecto.Type] Respect adapter types when loading/dumping arrays and maps
|
| 928 |
+
* [Ecto.Query] Ensure no bindings in order_by when using combinations in `Ecto.Query`
|
| 929 |
+
* [Ecto.Repo] Ensure adapter is compiled (instead of only loaded) before invoking it
|
| 930 |
+
* [Ecto.Repo] Support new style child spec from adapters
|
| 931 |
+
|
| 932 |
+
## v3.0.2 (2018-11-17)
|
| 933 |
+
|
| 934 |
+
### Bug fixes
|
| 935 |
+
|
| 936 |
+
* [Ecto.LogEntry] Bring old Ecto.LogEntry APIs back for compatibility
|
| 937 |
+
* [Ecto.Repo] Consider non-joined fields when merging preloaded assocs only at root
|
| 938 |
+
* [Ecto.Repo] Take field sources into account in :replace_all_fields upsert option
|
| 939 |
+
* [Ecto.Type] Convert `:utc_datetime` to `DateTime` when sending it to adapters
|
| 940 |
+
|
| 941 |
+
## v3.0.1 (2018-11-03)
|
| 942 |
+
|
| 943 |
+
### Bug fixes
|
| 944 |
+
|
| 945 |
+
* [Ecto.Query] Ensure parameter order is preserved when using more than 32 parameters
|
| 946 |
+
* [Ecto.Query] Consider query prefix when planning association joins
|
| 947 |
+
* [Ecto.Repo] Consider non-joined fields as unique parameters when merging preloaded query assocs
|
| 948 |
+
|
| 949 |
+
## v3.0.0 (2018-10-29)
|
| 950 |
+
|
| 951 |
+
Note this version includes changes from `ecto` and `ecto_sql` but in future releases all `ecto_sql` entries will be listed in their own CHANGELOG.
|
| 952 |
+
|
| 953 |
+
### Enhancements
|
| 954 |
+
|
| 955 |
+
* [Ecto.Adapters.MySQL] Add ability to specify cli_protocol for `ecto.create` and `ecto.drop` commands
|
| 956 |
+
* [Ecto.Adapters.PostgreSQL] Add ability to specify maintenance database name for PostgreSQL adapter for `ecto.create` and `ecto.drop` commands
|
| 957 |
+
* [Ecto.Changeset] Store constraint name in error metadata for constraints
|
| 958 |
+
* [Ecto.Changeset] Add `validations/1` and `constraints/1` instead of allowing direct access on the struct fields
|
| 959 |
+
* [Ecto.Changeset] Add `:force_update` option when casting relations, to force an update even if there are no changes
|
| 960 |
+
* [Ecto.Migration] Migrations now lock the migrations table in order to avoid concurrent migrations in a cluster. The type of lock can be configured via the `:migration_lock` repository configuration and defaults to "FOR UPDATE" or disabled if set to nil
|
| 961 |
+
* [Ecto.Migration] Add `:migration_default_prefix` repository configuration
|
| 962 |
+
* [Ecto.Migration] Add reversible version of `remove/2` subcommand
|
| 963 |
+
* [Ecto.Migration] Add support for non-empty arrays as defaults in migrations
|
| 964 |
+
* [Ecto.Migration] Add support for logging notices/alerts/warnings when running migrations (only supported by Postgres currently)
|
| 965 |
+
* [Ecto.Migrator] Warn when migrating and there is a higher version already migrated in the database
|
| 966 |
+
* [Ecto.Multi] Add support for anonymous functions in `insert/4`, `update/4`, `insert_or_update/4`, and `delete/4`
|
| 967 |
+
* [Ecto.Query] Support tuples in `where` and `having`, allowing queries such as `where: {p.foo, p.bar} > {^foo, ^bar}`
|
| 968 |
+
* [Ecto.Query] Support arithmetic operators in queries as a thin layer around the DB functionality
|
| 969 |
+
* [Ecto.Query] Allow joins in queries to be named via `:as` and allow named bindings
|
| 970 |
+
* [Ecto.Query] Support excluding specific join types in `exclude/2`
|
| 971 |
+
* [Ecto.Query] Allow virtual field update in subqueries
|
| 972 |
+
* [Ecto.Query] Support `coalesce/2` in queries, such as `select: coalesce(p.title, p.old_title)`
|
| 973 |
+
* [Ecto.Query] Support `filter/2` in queries, such as `select: filter(count(p.id), p.public == true)`
|
| 974 |
+
* [Ecto.Query] The `:prefix` and `:hints` options are now supported on both `from` and `join` expressions
|
| 975 |
+
* [Ecto.Query] Support `:asc_nulls_last`, `:asc_nulls_first`, `:desc_nulls_last`, and `:desc_nulls_first` in `order_by`
|
| 976 |
+
* [Ecto.Query] Allow variables (sources) to be given in queries, for example, useful for invoking functions, such as `fragment("some_function(?)", p)`
|
| 977 |
+
* [Ecto.Query] Add support for `union`, `union_all`, `intersection`, `intersection_all`, `except` and `except_all`
|
| 978 |
+
* [Ecto.Query] Add support for `windows` and `over`
|
| 979 |
+
* [Ecto.Query] Raise when comparing a string with a charlist during planning
|
| 980 |
+
* [Ecto.Repo] Only start transactions if an association or embed has changed, this reduces the overhead during repository operations
|
| 981 |
+
* [Ecto.Repo] Support `:replace_all_except_primary_key` as `:on_conflict` strategy
|
| 982 |
+
* [Ecto.Repo] Support `{:replace, fields}` as `:on_conflict` strategy
|
| 983 |
+
* [Ecto.Repo] Support `:unsafe_fragment` as `:conflict_target`
|
| 984 |
+
* [Ecto.Repo] Support `select` in queries given to `update_all` and `delete_all`
|
| 985 |
+
* [Ecto.Repo] Add `Repo.exists?/2`
|
| 986 |
+
* [Ecto.Repo] Add `Repo.checkout/2` - useful when performing multiple operations in short-time to interval, allowing the pool to be bypassed
|
| 987 |
+
* [Ecto.Repo] Add `:stale_error_field` to `Repo.insert/update/delete` that converts `Ecto.StaleEntryError` into a changeset error. The message can also be set with `:stale_error_message`
|
| 988 |
+
* [Ecto.Repo] Preloading now only sorts results by the relationship key instead of sorting by the whole struct
|
| 989 |
+
* [Ecto.Schema] Allow `:where` option to be given to `has_many`/`has_one`/`belongs_to`/`many_to_many`
|
| 990 |
+
|
| 991 |
+
### Bug fixes
|
| 992 |
+
|
| 993 |
+
* [Ecto.Inspect] Do not fail when inspecting query expressions which have a number of bindings more than bindings available
|
| 994 |
+
* [Ecto.Migration] Keep double underscores on autogenerated index names to be consistent with changesets
|
| 995 |
+
* [Ecto.Query] Fix `Ecto.Query.API.map/2` for single nil column with join
|
| 996 |
+
* [Ecto.Migration] Ensure `create_if_not_exists` is properly reversible
|
| 997 |
+
* [Ecto.Repo] Allow many_to_many associations to be preloaded via a function (before the behaviour was erratic)
|
| 998 |
+
* [Ecto.Schema] Make autogen ID loading work with custom type
|
| 999 |
+
* [Ecto.Schema] Make `updated_at` have the same value as `inserted_at`
|
| 1000 |
+
* [Ecto.Schema] Ensure all fields are replaced with `on_conflict: :replace_all/:replace_all_except_primary_key` and not only the fields sent as changes
|
| 1001 |
+
* [Ecto.Type] Return `:error` when casting NaN or infinite decimals
|
| 1002 |
+
* [mix ecto.migrate] Properly run migrations after ECTO_EDITOR changes
|
| 1003 |
+
* [mix ecto.migrations] List migrated versions even if the migration file is deleted
|
| 1004 |
+
* [mix ecto.load] The task now fails on SQL errors on Postgres
|
| 1005 |
+
|
| 1006 |
+
### Deprecations
|
| 1007 |
+
|
| 1008 |
+
Although Ecto 3.0 is a major bump version, the functionality below emits deprecation warnings to ease the migration process. The functionality below will be removed in future Ecto 3.1+ releases.
|
| 1009 |
+
|
| 1010 |
+
* [Ecto.Changeset] Passing a list of binaries to `cast/3` is deprecated, please pass a list of atoms instead
|
| 1011 |
+
* [Ecto.Multi] `Ecto.Multi.run/3` now receives the repo in which the transaction is executing as the first argument to functions, and the changes so far as the second argument
|
| 1012 |
+
* [Ecto.Query] `join/5` now expects `on: expr` as last argument instead of simply `expr`. This was done in order to properly support the `:as`, `:hints` and `:prefix` options
|
| 1013 |
+
* [Ecto.Repo] The `:returning` option for `update_all` and `delete_all` has been deprecated as those statements now support `select` clauses
|
| 1014 |
+
* [Ecto.Repo] Passing `:adapter` via config is deprecated in favor of passing it on `use Ecto.Repo`
|
| 1015 |
+
* [Ecto.Repo] The `:loggers` configuration is deprecated in favor of "Telemetry Events"
|
| 1016 |
+
|
| 1017 |
+
### Backwards incompatible changes
|
| 1018 |
+
|
| 1019 |
+
* [Ecto.DateTime] `Ecto.Date`, `Ecto.Time` and `Ecto.DateTime` were previously deprecated and have now been removed
|
| 1020 |
+
* [Ecto.DataType] `Ecto.DataType` protocol has been removed
|
| 1021 |
+
* [Ecto.Migration] Automatically inferred index names may differ in Ecto v3.0 for indexes on complex column names
|
| 1022 |
+
* [Ecto.Multi] `Ecto.Multi.run/5` now receives the repo in which the transaction is executing as the first argument to functions, and the changes so far as the second argument
|
| 1023 |
+
* [Ecto.Query] A `join` no longer wraps `fragment` in parentheses. In some cases, such as common table expressions, you will have to explicitly wrap the fragment in parens.
|
| 1024 |
+
* [Ecto.Repo] The `on_conflict: :replace_all` option now will also send fields with default values to the database. If you prefer the old behaviour that only sends the changes in the changeset, you can set it to `on_conflict: {:replace, Map.keys(changeset.changes)}` (this change is also listed as a bug fix)
|
| 1025 |
+
* [Ecto.Repo] The repository operations are no longer called from association callbacks - this behaviour was not guaranteed in previous versions but we are listing as backwards incompatible changes to help with users relying on this behaviour
|
| 1026 |
+
* [Ecto.Repo] `:pool_timeout` is no longer supported in favor of a new queue system described in `DBConnection.start_link/2` under "Queue config". For most users, configuring `:timeout` is enough, as it now includes both queue and query time
|
| 1027 |
+
* [Ecto.Schema] `:time`, `:naive_datetime` and `:utc_datetime` no longer keep microseconds information. If you want to keep microseconds, use `:time_usec`, `:naive_datetime_usec`, `:utc_datetime_usec`
|
| 1028 |
+
* [Ecto.Schema] The `@schema_prefix` option now only affects the `from`/`join` of where the schema is used and no longer the whole query
|
| 1029 |
+
* [Ecto.Schema.Metadata] The `source` key no longer returns a tuple of the schema_prefix and the table/collection name. It now returns just the table/collection string. You can now access the schema_prefix via the `prefix` key.
|
| 1030 |
+
* [Mix.Ecto] `Mix.Ecto.ensure_started/2` has been removed. However, in Ecto 2.2 the `Mix.Ecto` module was not considered part of the public API and should not have been used but we are listing this for guidance.
|
| 1031 |
+
|
| 1032 |
+
### Adapter changes
|
| 1033 |
+
|
| 1034 |
+
* [Ecto.Adapter] Split `Ecto.Adapter` into `Ecto.Adapter.Queryable` and `Ecto.Adapter.Schema` to provide more granular repository APIs
|
| 1035 |
+
* [Ecto.Adapter] The `:sources` field in `query_meta` now contains three elements tuples with `{source, schema, prefix}` in order to support `from`/`join` prefixes (#2572)
|
| 1036 |
+
* [Ecto.Adapter] The database types `time`, `utc_datetime` and `naive_datetime` should translate to types with seconds precision while the database types `time_usec`, `utc_datetime_usec` and `naive_datetime_usec` should have microseconds precision (#2291)
|
| 1037 |
+
* [Ecto.Adapter] The `on_conflict` argument for `insert` and `insert_all` no longer receives a `{:replace_all, list(), atom()}` tuple. Instead, it receives a `{fields :: [atom()], list(), atom()}` where `fields` is a list of atoms of the fields to be replaced (#2181)
|
| 1038 |
+
* [Ecto.Adapter] `insert`/`update`/`delete` now receive both `:source` and `:prefix` fields instead of a single `:source` field with both `source` and `prefix` in it (#2490)
|
| 1039 |
+
* [Ecto.Adapter.Migration] A new `lock_for_migration/4` callback has been added. It is implemented by default by `Ecto.Adapters.SQL` (#2215)
|
| 1040 |
+
* [Ecto.Adapter.Migration] The `execute_ddl` should now return `{:ok, []}` to make space for returning notices/hints/warnings in the future (adapters leveraging `Ecto.Adapters.SQL` do not have to perform any change)
|
| 1041 |
+
* [Ecto.Query] The `from` field in `Ecto.Query` now returns a `Ecto.Query.FromExpr` with the `:source` field, unifying the behaviour in `from` and `join` expressions (#2497)
|
| 1042 |
+
* [Ecto.Query] Tuple expressions are now supported in queries. For example, `where: {p.foo, p.bar} > {p.bar, p.baz}` should translate to `WHERE (p.foo, p.bar) > (p.bar, p.baz)` in SQL databases. Adapters should be changed to handle `{:{}, meta, exprs}` in the query AST (#2344)
|
| 1043 |
+
* [Ecto.Query] Adapters should support the following arithmetic operators in queries `+`, `-`, `*` and `/` (#2400)
|
| 1044 |
+
* [Ecto.Query] Adapters should support `filter/2` in queries, as in `select: filter(count(p.id), p.public == true)` (#2487)
|
| 1045 |
+
|
| 1046 |
+
## Previous versions
|
| 1047 |
+
|
| 1048 |
+
* See the CHANGELOG.md [in the v2.2 branch](https://github.com/elixir-ecto/ecto/blob/v2.2/CHANGELOG.md)
|
deps/ecto/README.md
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<img width="250" src="https://github.com/elixir-ecto/ecto/raw/master/guides/images/logo.png#gh-light-mode-only" alt="Ecto">
|
| 2 |
+
<img width="250" src="https://github.com/elixir-ecto/ecto/raw/master/guides/images/logo-white.png#gh-dark-mode-only" alt="Ecto">
|
| 3 |
+
|
| 4 |
+
---
|
| 5 |
+
|
| 6 |
+
[](https://github.com/elixir-ecto/ecto/actions) [](https://hex.pm/packages/ecto) [](https://hexdocs.pm/ecto/)
|
| 7 |
+
|
| 8 |
+
## Installation
|
| 9 |
+
|
| 10 |
+
Add `:ecto` to the list of dependencies in `mix.exs`:
|
| 11 |
+
|
| 12 |
+
```elixir
|
| 13 |
+
def deps do
|
| 14 |
+
[
|
| 15 |
+
{:ecto, "~> 3.10"}
|
| 16 |
+
]
|
| 17 |
+
end
|
| 18 |
+
```
|
| 19 |
+
|
| 20 |
+
## About
|
| 21 |
+
|
| 22 |
+
Ecto is a toolkit for data mapping and language integrated query for Elixir. Here is an example:
|
| 23 |
+
|
| 24 |
+
```elixir
|
| 25 |
+
# In your config/config.exs file
|
| 26 |
+
config :my_app, ecto_repos: [Sample.Repo]
|
| 27 |
+
|
| 28 |
+
config :my_app, Sample.Repo,
|
| 29 |
+
database: "ecto_simple",
|
| 30 |
+
username: "postgres",
|
| 31 |
+
password: "postgres",
|
| 32 |
+
hostname: "localhost",
|
| 33 |
+
port: "5432"
|
| 34 |
+
|
| 35 |
+
# In your application code
|
| 36 |
+
defmodule Sample.Repo do
|
| 37 |
+
use Ecto.Repo,
|
| 38 |
+
otp_app: :my_app,
|
| 39 |
+
adapter: Ecto.Adapters.Postgres
|
| 40 |
+
end
|
| 41 |
+
|
| 42 |
+
defmodule Sample.Weather do
|
| 43 |
+
use Ecto.Schema
|
| 44 |
+
|
| 45 |
+
schema "weather" do
|
| 46 |
+
field :city # Defaults to type :string
|
| 47 |
+
field :temp_lo, :integer
|
| 48 |
+
field :temp_hi, :integer
|
| 49 |
+
field :prcp, :float, default: 0.0
|
| 50 |
+
end
|
| 51 |
+
end
|
| 52 |
+
|
| 53 |
+
defmodule Sample.App do
|
| 54 |
+
import Ecto.Query
|
| 55 |
+
alias Sample.{Weather, Repo}
|
| 56 |
+
|
| 57 |
+
def keyword_query do
|
| 58 |
+
query =
|
| 59 |
+
from w in Weather,
|
| 60 |
+
where: w.prcp > 0 or is_nil(w.prcp),
|
| 61 |
+
select: w
|
| 62 |
+
|
| 63 |
+
Repo.all(query)
|
| 64 |
+
end
|
| 65 |
+
|
| 66 |
+
def pipe_query do
|
| 67 |
+
Weather
|
| 68 |
+
|> where(city: "Kraków")
|
| 69 |
+
|> order_by(:temp_lo)
|
| 70 |
+
|> limit(10)
|
| 71 |
+
|> Repo.all
|
| 72 |
+
end
|
| 73 |
+
end
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
Ecto is commonly used to interact with databases, such as PostgreSQL and MySQL via [Ecto.Adapters.SQL](https://hexdocs.pm/ecto_sql) ([source code](https://github.com/elixir-ecto/ecto_sql)). Ecto is also commonly used to map data from any source into Elixir structs, whether they are backed by a database or not.
|
| 77 |
+
|
| 78 |
+
See the [getting started guide](https://hexdocs.pm/ecto/getting-started.html) and the [online documentation](https://hexdocs.pm/ecto) for more information. Other resources available are:
|
| 79 |
+
|
| 80 |
+
* [Programming Ecto](https://pragprog.com/titles/wmecto/programming-ecto/), by Darin Wilson and Eric Meadows-Jönsson, which guides you from fundamentals up to advanced concepts
|
| 81 |
+
|
| 82 |
+
* [The Little Ecto Cookbook](https://dashbit.co/ebooks/the-little-ecto-cookbook), a free ebook by Dashbit, which is a curation of the existing Ecto guides with some extra contents
|
| 83 |
+
|
| 84 |
+
## Usage
|
| 85 |
+
|
| 86 |
+
You need to add both Ecto and the database adapter as a dependency to your `mix.exs` file. The supported databases and their adapters are:
|
| 87 |
+
|
| 88 |
+
| Database | Ecto Adapter | Dependencies |
|
| 89 |
+
| :--------- | :----------------------- | :----------------------------------------------- |
|
| 90 |
+
| PostgreSQL | Ecto.Adapters.Postgres | [ecto_sql][ecto_sql] + [postgrex][postgrex] |
|
| 91 |
+
| MySQL | Ecto.Adapters.MyXQL | [ecto_sql][ecto_sql] + [myxql][myxql] |
|
| 92 |
+
| MSSQL | Ecto.Adapters.Tds | [ecto_sql][ecto_sql] + [tds][tds] |
|
| 93 |
+
| SQLite3 | Ecto.Adapters.SQLite3 | [ecto_sqlite3][ecto_sqlite3] |
|
| 94 |
+
| ClickHouse | Ecto.Adapters.ClickHouse | [ecto_ch][ecto_ch] |
|
| 95 |
+
| ETS | Etso | [etso][etso] |
|
| 96 |
+
|
| 97 |
+
[ecto_sql]: https://github.com/elixir-ecto/ecto_sql
|
| 98 |
+
[postgrex]: https://github.com/elixir-ecto/postgrex
|
| 99 |
+
[myxql]: https://github.com/elixir-ecto/myxql
|
| 100 |
+
[tds]: https://github.com/livehelpnow/tds
|
| 101 |
+
[ecto_sqlite3]: https://github.com/elixir-sqlite/ecto_sqlite3
|
| 102 |
+
[etso]: https://github.com/evadne/etso
|
| 103 |
+
[ecto_ch]: https://github.com/plausible/ecto_ch
|
| 104 |
+
|
| 105 |
+
For example, if you want to use PostgreSQL, add to your `mix.exs` file:
|
| 106 |
+
|
| 107 |
+
```elixir
|
| 108 |
+
defp deps do
|
| 109 |
+
[
|
| 110 |
+
{:ecto_sql, "~> 3.0"},
|
| 111 |
+
{:postgrex, ">= 0.0.0"}
|
| 112 |
+
]
|
| 113 |
+
end
|
| 114 |
+
```
|
| 115 |
+
|
| 116 |
+
Then run `mix deps.get` in your shell to fetch the dependencies. If you want to use another database, just choose the proper dependency from the table above.
|
| 117 |
+
|
| 118 |
+
Finally, in the repository definition, you will need to specify the `adapter:` respective to the chosen dependency. For PostgreSQL it is:
|
| 119 |
+
|
| 120 |
+
```elixir
|
| 121 |
+
defmodule MyApp.Repo do
|
| 122 |
+
use Ecto.Repo,
|
| 123 |
+
otp_app: :my_app,
|
| 124 |
+
adapter: Ecto.Adapters.Postgres,
|
| 125 |
+
...
|
| 126 |
+
```
|
| 127 |
+
|
| 128 |
+
## Supported Versions
|
| 129 |
+
|
| 130 |
+
| Branch | Support |
|
| 131 |
+
| ----------------- | ------------------------ |
|
| 132 |
+
| v3.12 | Bug fixes |
|
| 133 |
+
| v3.11 | Security patches only |
|
| 134 |
+
| v3.10 | Security patches only |
|
| 135 |
+
| v3.9 | Security patches only |
|
| 136 |
+
| v3.8 | Security patches only |
|
| 137 |
+
| v3.7 and earlier | Unsupported |
|
| 138 |
+
|
| 139 |
+
With version 3.0, Ecto API has become stable. Our main focus is on providing
|
| 140 |
+
bug fixes and incremental changes.
|
| 141 |
+
|
| 142 |
+
## Important links
|
| 143 |
+
|
| 144 |
+
* [Documentation](https://hexdocs.pm/ecto)
|
| 145 |
+
* [Mailing list](https://groups.google.com/forum/#!forum/elixir-ecto)
|
| 146 |
+
* [Examples](https://github.com/elixir-ecto/ecto/tree/master/examples)
|
| 147 |
+
|
| 148 |
+
## Running tests
|
| 149 |
+
|
| 150 |
+
Clone the repo and fetch its dependencies:
|
| 151 |
+
|
| 152 |
+
$ git clone https://github.com/elixir-ecto/ecto.git
|
| 153 |
+
$ cd ecto
|
| 154 |
+
$ mix deps.get
|
| 155 |
+
$ mix test
|
| 156 |
+
|
| 157 |
+
Note that `mix test` does not run the tests in the `integration_test` folder. To run integration tests, you can clone `ecto_sql` in a sibling directory and then run its integration tests with the `ECTO_PATH` environment variable pointing to your Ecto checkout:
|
| 158 |
+
|
| 159 |
+
$ cd ..
|
| 160 |
+
$ git clone https://github.com/elixir-ecto/ecto_sql.git
|
| 161 |
+
$ cd ecto_sql
|
| 162 |
+
$ mix deps.get
|
| 163 |
+
$ ECTO_PATH=../ecto mix test.all
|
| 164 |
+
|
| 165 |
+
### Running containerized tests
|
| 166 |
+
|
| 167 |
+
It is also possible to run the integration tests under a containerized environment using [earthly](https://earthly.dev/get-earthly):
|
| 168 |
+
|
| 169 |
+
$ earthly -P +all
|
| 170 |
+
|
| 171 |
+
You can also use this to interactively debug any failing integration tests using:
|
| 172 |
+
|
| 173 |
+
$ earthly -P -i --build-arg ELIXIR_BASE=1.8.2-erlang-21.3.8.21-alpine-3.13.1 +integration-test
|
| 174 |
+
|
| 175 |
+
Then once you enter the containerized shell, you can inspect the underlying databases with the respective commands:
|
| 176 |
+
|
| 177 |
+
PGPASSWORD=postgres psql -h 127.0.0.1 -U postgres -d postgres ecto_test
|
| 178 |
+
MYSQL_PASSWORD=root mysql -h 127.0.0.1 -uroot -proot ecto_test
|
| 179 |
+
sqlcmd -U sa -P 'some!Password'
|
| 180 |
+
|
| 181 |
+
## Logo
|
| 182 |
+
|
| 183 |
+
"Ecto" and the Ecto logo are Copyright (c) 2020 Dashbit.
|
| 184 |
+
|
| 185 |
+
The Ecto logo was designed by [Dane Wesolko](https://www.danewesolko.com).
|
| 186 |
+
|
| 187 |
+
## License
|
| 188 |
+
|
| 189 |
+
Copyright (c) 2013 Plataformatec \
|
| 190 |
+
Copyright (c) 2020 Dashbit
|
| 191 |
+
|
| 192 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 193 |
+
you may not use this file except in compliance with the License.
|
| 194 |
+
You may obtain a copy of the License at [https://www.apache.org/licenses/LICENSE-2.0](https://www.apache.org/licenses/LICENSE-2.0)
|
| 195 |
+
|
| 196 |
+
Unless required by applicable law or agreed to in writing, software
|
| 197 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
| 198 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 199 |
+
See the License for the specific language governing permissions and
|
| 200 |
+
limitations under the License.
|
deps/makeup_erlang/.formatter.exs
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Used by "mix format"
|
| 2 |
+
[
|
| 3 |
+
inputs: ["mix.exs", "{config,lib,test}/**/*.{ex,exs}"]
|
| 4 |
+
]
|
deps/makeup_erlang/.hex
ADDED
|
Binary file (269 Bytes). View file
|
|
|
deps/makeup_erlang/LICENSE
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
BSD 2-Clause License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2019 by the respective authors (see AUTHORS file).
|
| 4 |
+
All rights reserved.
|
| 5 |
+
|
| 6 |
+
Redistribution and use in source and binary forms, with or without
|
| 7 |
+
modification, are permitted provided that the following conditions are met:
|
| 8 |
+
|
| 9 |
+
1. Redistributions of source code must retain the above copyright notice, this
|
| 10 |
+
list of conditions and the following disclaimer.
|
| 11 |
+
|
| 12 |
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
| 13 |
+
this list of conditions and the following disclaimer in the documentation
|
| 14 |
+
and/or other materials provided with the distribution.
|
| 15 |
+
|
| 16 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
| 17 |
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
| 18 |
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
| 19 |
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
| 20 |
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
| 21 |
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
| 22 |
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
| 23 |
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
| 24 |
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| 25 |
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
deps/makeup_erlang/README.md
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# MakeupErlang
|
| 2 |
+
|
| 3 |
+
[](https://github.com/elixir-makeup/makeup_erlang/actions/workflows/ci.yml)
|
| 4 |
+
[](https://hex.pm/packages/makeup_erlang)
|
| 5 |
+
[](https://hexdocs.pm/makeup_erlang)
|
| 6 |
+
|
| 7 |
+
A [Makeup](https://github.com/elixir-makeup/makeup/) lexer for the `Erlang` language.
|
| 8 |
+
|
| 9 |
+
## Installation
|
| 10 |
+
|
| 11 |
+
Add `makeup_erlang` to your list of dependencies in `mix.exs`:
|
| 12 |
+
|
| 13 |
+
```elixir
|
| 14 |
+
def deps do
|
| 15 |
+
[
|
| 16 |
+
{:makeup_erlang, "~> 1.0"}
|
| 17 |
+
]
|
| 18 |
+
end
|
| 19 |
+
```
|
| 20 |
+
|
| 21 |
+
The lexer will automatically register itself with `Makeup` for the languages `erlang` and `erl`
|
| 22 |
+
as well as the extensions `.erl`, `.hrl` and `.escript`.
|
deps/makeup_erlang/hex_metadata.config
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{<<"links">>,
|
| 2 |
+
[{<<"GitHub">>,<<"https://github.com/elixir-makeup/makeup_erlang">>}]}.
|
| 3 |
+
{<<"name">>,<<"makeup_erlang">>}.
|
| 4 |
+
{<<"version">>,<<"1.0.3">>}.
|
| 5 |
+
{<<"description">>,<<"Erlang lexer for the Makeup syntax highlighter.">>}.
|
| 6 |
+
{<<"elixir">>,<<"~> 1.6">>}.
|
| 7 |
+
{<<"files">>,
|
| 8 |
+
[<<"lib">>,<<"lib/makeup">>,<<"lib/makeup/lexers">>,
|
| 9 |
+
<<"lib/makeup/lexers/erlang_lexer.ex">>,
|
| 10 |
+
<<"lib/makeup/lexers/erlang_lexer">>,
|
| 11 |
+
<<"lib/makeup/lexers/erlang_lexer/testing.ex">>,
|
| 12 |
+
<<"lib/makeup/lexers/erlang_lexer/application.ex">>,
|
| 13 |
+
<<"lib/makeup/lexers/erlang_lexer/helper.ex">>,<<".formatter.exs">>,
|
| 14 |
+
<<"mix.exs">>,<<"README.md">>,<<"LICENSE">>]}.
|
| 15 |
+
{<<"app">>,<<"makeup_erlang">>}.
|
| 16 |
+
{<<"licenses">>,[<<"BSD-2-Clause">>]}.
|
| 17 |
+
{<<"requirements">>,
|
| 18 |
+
[[{<<"name">>,<<"makeup">>},
|
| 19 |
+
{<<"app">>,<<"makeup">>},
|
| 20 |
+
{<<"optional">>,false},
|
| 21 |
+
{<<"requirement">>,<<"~> 1.0">>},
|
| 22 |
+
{<<"repository">>,<<"hexpm">>}]]}.
|
| 23 |
+
{<<"build_tools">>,[<<"mix">>]}.
|
deps/makeup_erlang/lib/makeup/lexers/erlang_lexer.ex
ADDED
|
@@ -0,0 +1,463 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
defmodule Makeup.Lexers.ErlangLexer do
|
| 2 |
+
@moduledoc """
|
| 3 |
+
A `Makeup` lexer for the `Erlang` language.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
@behaviour Makeup.Lexer
|
| 7 |
+
|
| 8 |
+
import NimbleParsec
|
| 9 |
+
import Makeup.Lexer.Combinators
|
| 10 |
+
import Makeup.Lexer.Groups
|
| 11 |
+
import Makeup.Lexers.ErlangLexer.Helper
|
| 12 |
+
|
| 13 |
+
###################################################################
|
| 14 |
+
# Step #1: tokenize the input (into a list of tokens)
|
| 15 |
+
###################################################################
|
| 16 |
+
|
| 17 |
+
whitespace = ascii_string([?\s, ?\f, ?\r, ?\n, ?\t], min: 1) |> token(:whitespace)
|
| 18 |
+
|
| 19 |
+
# This is the combinator that ensures that the lexer will never reject a file
|
| 20 |
+
# because of invalid input syntax
|
| 21 |
+
any_char = utf8_char([]) |> token(:error)
|
| 22 |
+
|
| 23 |
+
comment =
|
| 24 |
+
ascii_char([?%])
|
| 25 |
+
|> optional(utf8_string([not: ?\n], min: 1))
|
| 26 |
+
|> token(:comment_single)
|
| 27 |
+
|
| 28 |
+
hashbang =
|
| 29 |
+
string("\n#!")
|
| 30 |
+
|> utf8_string([not: ?\n], min: 1)
|
| 31 |
+
|> string("\n")
|
| 32 |
+
|> token(:comment_hashbang)
|
| 33 |
+
|
| 34 |
+
escape_octal = ascii_string([?0..?7], min: 1, max: 3)
|
| 35 |
+
|
| 36 |
+
escape_char = ascii_char([?\b, ?\d, ?\e, ?\f, ?\n, ?\r, ?\s, ?\t, ?\v, ?\', ?\", ?\\])
|
| 37 |
+
|
| 38 |
+
escape_hex =
|
| 39 |
+
choice([
|
| 40 |
+
string("x") |> ascii_string([?0..?9, ?a..?f, ?A..?F], 2),
|
| 41 |
+
string("x{") |> ascii_string([?0..?9, ?a..?f, ?A..?F], min: 1) |> string("}")
|
| 42 |
+
])
|
| 43 |
+
|
| 44 |
+
escape_ctrl = string("^") |> ascii_char([?a..?z, ?A..?Z])
|
| 45 |
+
|
| 46 |
+
escape =
|
| 47 |
+
choice([
|
| 48 |
+
escape_char,
|
| 49 |
+
escape_octal,
|
| 50 |
+
escape_hex,
|
| 51 |
+
escape_ctrl
|
| 52 |
+
])
|
| 53 |
+
|
| 54 |
+
numeric_base =
|
| 55 |
+
choice([
|
| 56 |
+
ascii_char([?1..?2]) |> ascii_char([?0..?9]),
|
| 57 |
+
string("3") |> ascii_char([?0..?6]),
|
| 58 |
+
ascii_char([?2..?9])
|
| 59 |
+
])
|
| 60 |
+
|
| 61 |
+
# Numbers
|
| 62 |
+
digits = ascii_string([?0..?9], min: 1)
|
| 63 |
+
|
| 64 |
+
number_integer =
|
| 65 |
+
optional(ascii_char([?+, ?-]))
|
| 66 |
+
|> concat(digits)
|
| 67 |
+
|> token(:number_integer)
|
| 68 |
+
|
| 69 |
+
number_integer_in_weird_base =
|
| 70 |
+
optional(ascii_char([?+, ?-]))
|
| 71 |
+
|> concat(numeric_base)
|
| 72 |
+
|> string("#")
|
| 73 |
+
|> ascii_string([?0..?9, ?a..?z, ?A..?Z], min: 1)
|
| 74 |
+
|> token(:number_integer)
|
| 75 |
+
|
| 76 |
+
# Floating point numbers
|
| 77 |
+
float_scientific_notation_part =
|
| 78 |
+
ascii_string([?e, ?E], 1)
|
| 79 |
+
|> optional(string("-"))
|
| 80 |
+
|> concat(digits)
|
| 81 |
+
|
| 82 |
+
number_float =
|
| 83 |
+
optional(ascii_char([?+, ?-]))
|
| 84 |
+
|> concat(digits)
|
| 85 |
+
|> string(".")
|
| 86 |
+
|> concat(digits)
|
| 87 |
+
|> optional(float_scientific_notation_part)
|
| 88 |
+
|> token(:number_float)
|
| 89 |
+
|
| 90 |
+
variable_name =
|
| 91 |
+
ascii_string([?A..?Z, ?_], 1)
|
| 92 |
+
|> optional(ascii_string([?a..?z, ?_, ?0..?9, ?A..?Z], min: 1))
|
| 93 |
+
|
| 94 |
+
simple_atom_name =
|
| 95 |
+
ascii_string([?a..?z], 1)
|
| 96 |
+
|> optional(ascii_string([?a..?z, ?_, ?@, ?0..?9, ?A..?Z], min: 1))
|
| 97 |
+
|> reduce({Enum, :join, []})
|
| 98 |
+
|
| 99 |
+
single_quote_escape = string("\\'")
|
| 100 |
+
|
| 101 |
+
quoted_atom_name_middle =
|
| 102 |
+
lookahead_not(string("'"))
|
| 103 |
+
|> choice([
|
| 104 |
+
single_quote_escape,
|
| 105 |
+
utf8_string([not: ?\n, not: ?', not: ?\\], min: 1),
|
| 106 |
+
escape
|
| 107 |
+
])
|
| 108 |
+
|
| 109 |
+
quoted_atom_name =
|
| 110 |
+
string("'")
|
| 111 |
+
|> repeat(quoted_atom_name_middle)
|
| 112 |
+
|> concat(string("'"))
|
| 113 |
+
|
| 114 |
+
atom_name =
|
| 115 |
+
choice([
|
| 116 |
+
simple_atom_name,
|
| 117 |
+
quoted_atom_name
|
| 118 |
+
])
|
| 119 |
+
|
| 120 |
+
atom = token(atom_name, :string_symbol)
|
| 121 |
+
|
| 122 |
+
namespace =
|
| 123 |
+
token(atom_name, :name_class)
|
| 124 |
+
|> concat(token(":", :punctuation))
|
| 125 |
+
|
| 126 |
+
function =
|
| 127 |
+
atom_name
|
| 128 |
+
|> lexeme()
|
| 129 |
+
|> token(:name_function)
|
| 130 |
+
|> concat(optional(whitespace))
|
| 131 |
+
|> concat(token("(", :punctuation))
|
| 132 |
+
|
| 133 |
+
# Can also be a function name
|
| 134 |
+
variable =
|
| 135 |
+
variable_name
|
| 136 |
+
# Check if you need to use the lexeme parser
|
| 137 |
+
# (i.e. if you need the token value to be a string)
|
| 138 |
+
# If not, just delete the lexeme parser
|
| 139 |
+
|> lexeme()
|
| 140 |
+
|> token(:name)
|
| 141 |
+
|
| 142 |
+
macro_name = choice([variable_name, atom_name])
|
| 143 |
+
|
| 144 |
+
macro =
|
| 145 |
+
string("?")
|
| 146 |
+
|> concat(macro_name)
|
| 147 |
+
|> token(:name_constant)
|
| 148 |
+
|
| 149 |
+
label =
|
| 150 |
+
string("#")
|
| 151 |
+
|> concat(atom_name)
|
| 152 |
+
|> optional(string(".") |> concat(atom_name))
|
| 153 |
+
|> token(:name_label)
|
| 154 |
+
|
| 155 |
+
character =
|
| 156 |
+
string("$")
|
| 157 |
+
|> choice([
|
| 158 |
+
string("\\") |> utf8_char([]),
|
| 159 |
+
utf8_char(not: ?\\)
|
| 160 |
+
])
|
| 161 |
+
|> token(:string_char)
|
| 162 |
+
|
| 163 |
+
string_interpol =
|
| 164 |
+
string("~")
|
| 165 |
+
|> optional(ascii_string([?0..?9, ?., ?*], min: 1))
|
| 166 |
+
|> ascii_char(to_charlist("~#+BPWXb-ginpswx"))
|
| 167 |
+
|> token(:string_interpol)
|
| 168 |
+
|
| 169 |
+
escape_double_quote = string(~s/\\"/)
|
| 170 |
+
erlang_string = string_like(~s/"/, ~s/"/, [escape_double_quote, string_interpol], :string)
|
| 171 |
+
|
| 172 |
+
escaped_char =
|
| 173 |
+
string("\\")
|
| 174 |
+
|> utf8_string([], 1)
|
| 175 |
+
|> token(:string_escape)
|
| 176 |
+
|
| 177 |
+
triple_quoted_string =
|
| 178 |
+
lookahead_string(string(~s/"""\n/), string(~s/\n"""/), [escaped_char, string_interpol])
|
| 179 |
+
|
| 180 |
+
sigil_delimiters = [
|
| 181 |
+
{~s["""\n], ~s[\n"""]},
|
| 182 |
+
{"'''\n", "\n'''"},
|
| 183 |
+
{"\"", "\""},
|
| 184 |
+
{"'", "'"},
|
| 185 |
+
{"/", "/"},
|
| 186 |
+
{"{", "}"},
|
| 187 |
+
{"[", "]"},
|
| 188 |
+
{"(", ")"},
|
| 189 |
+
{"<", ">"},
|
| 190 |
+
{"|", "|"},
|
| 191 |
+
{"`", "`"},
|
| 192 |
+
{"#", "#"}
|
| 193 |
+
]
|
| 194 |
+
|
| 195 |
+
default_sigil_interpol =
|
| 196 |
+
for {ldelim, rdelim} <- sigil_delimiters do
|
| 197 |
+
sigil(ldelim, rdelim, nil, [escaped_char, string_interpol])
|
| 198 |
+
end
|
| 199 |
+
|
| 200 |
+
sigils_interpol =
|
| 201 |
+
for {ldelim, rdelim} <- sigil_delimiters do
|
| 202 |
+
sigil(ldelim, rdelim, [?b, ?s], [escaped_char, string_interpol])
|
| 203 |
+
end
|
| 204 |
+
|
| 205 |
+
sigils_no_interpol =
|
| 206 |
+
for {ldelim, rdelim} <- sigil_delimiters do
|
| 207 |
+
sigil(ldelim, rdelim, [?B, ?S], [string_interpol])
|
| 208 |
+
end
|
| 209 |
+
|
| 210 |
+
all_sigils = default_sigil_interpol ++ sigils_interpol ++ sigils_no_interpol
|
| 211 |
+
|
| 212 |
+
# Combinators that highlight expressions surrounded by a pair of delimiters.
|
| 213 |
+
punctuation =
|
| 214 |
+
word_from_list(
|
| 215 |
+
[","] ++ ~w[\[ \] : _ @ \" . \#{ { } ( ) | ; => := << >> || -> \# &&],
|
| 216 |
+
:punctuation
|
| 217 |
+
)
|
| 218 |
+
|
| 219 |
+
tuple = many_surrounded_by(parsec(:root_element), "{", "}")
|
| 220 |
+
|
| 221 |
+
syntax_operators =
|
| 222 |
+
word_from_list(
|
| 223 |
+
~W[+ - +? ++ = == -- * / < > /= =:= =/= =< >= ==? <- <:- <= <:= ! ? ?!],
|
| 224 |
+
:operator
|
| 225 |
+
)
|
| 226 |
+
|
| 227 |
+
record =
|
| 228 |
+
token(string("#"), :operator)
|
| 229 |
+
|> concat(atom)
|
| 230 |
+
|> choice([
|
| 231 |
+
token("{", :punctuation),
|
| 232 |
+
token(".", :punctuation)
|
| 233 |
+
])
|
| 234 |
+
|
| 235 |
+
# We need to match on the new line here as to not tokenize a function call as a module attribute.
|
| 236 |
+
# Without the newline matching, the expression `a(X) - b(Y)` would tokenize
|
| 237 |
+
# `b(Y)` as a module attribute definition instead of a function call.
|
| 238 |
+
module_attribute =
|
| 239 |
+
token("\n", :whitespace)
|
| 240 |
+
|> optional(whitespace)
|
| 241 |
+
|> concat(token("-", :punctuation))
|
| 242 |
+
|> optional(whitespace)
|
| 243 |
+
|> concat(atom_name |> token(:name_attribute))
|
| 244 |
+
|> optional(whitespace)
|
| 245 |
+
|> optional(token("(", :punctuation))
|
| 246 |
+
|
| 247 |
+
function_arity =
|
| 248 |
+
atom
|
| 249 |
+
|> concat(token("/", :punctuation))
|
| 250 |
+
|> concat(number_integer)
|
| 251 |
+
|
| 252 |
+
# Erlang prompt
|
| 253 |
+
erl_prompt =
|
| 254 |
+
ascii_string([?\s, ?\r, ?\t], min: 0)
|
| 255 |
+
|> string("\n")
|
| 256 |
+
|> token(:whitespace)
|
| 257 |
+
|> concat(
|
| 258 |
+
optional(string("(") |> concat(atom_name) |> string(")"))
|
| 259 |
+
|> optional(digits)
|
| 260 |
+
|> string("> ")
|
| 261 |
+
|> token(:generic_prompt, %{selectable: false})
|
| 262 |
+
)
|
| 263 |
+
|
| 264 |
+
# Error in shell
|
| 265 |
+
erl_shell_error =
|
| 266 |
+
token("\n", :whitespace)
|
| 267 |
+
|> concat(
|
| 268 |
+
string("* ")
|
| 269 |
+
|> utf8_string([not: ?\n], min: 1)
|
| 270 |
+
|> token(:generic_traceback)
|
| 271 |
+
)
|
| 272 |
+
|
| 273 |
+
erl_shell_multiline_error =
|
| 274 |
+
token("\n", :whitespace)
|
| 275 |
+
|> concat(
|
| 276 |
+
string("** ")
|
| 277 |
+
|> utf8_string([not: ?\n], min: 1)
|
| 278 |
+
|> repeat(
|
| 279 |
+
string("\n ")
|
| 280 |
+
|> utf8_string([not: ?\n], min: 1)
|
| 281 |
+
)
|
| 282 |
+
|> token(:generic_traceback)
|
| 283 |
+
)
|
| 284 |
+
|
| 285 |
+
# Tag the tokens with the language name.
|
| 286 |
+
# This makes it easier to postprocess files with multiple languages.
|
| 287 |
+
@doc false
|
| 288 |
+
def __as_erlang_language__({ttype, meta, value}) do
|
| 289 |
+
{ttype, Map.put(meta, :language, :erlang), value}
|
| 290 |
+
end
|
| 291 |
+
|
| 292 |
+
root_element_combinator =
|
| 293 |
+
choice(
|
| 294 |
+
[
|
| 295 |
+
erl_prompt,
|
| 296 |
+
erl_shell_error,
|
| 297 |
+
erl_shell_multiline_error,
|
| 298 |
+
module_attribute,
|
| 299 |
+
hashbang,
|
| 300 |
+
whitespace,
|
| 301 |
+
comment,
|
| 302 |
+
triple_quoted_string,
|
| 303 |
+
erlang_string
|
| 304 |
+
] ++
|
| 305 |
+
all_sigils ++
|
| 306 |
+
[
|
| 307 |
+
record,
|
| 308 |
+
punctuation,
|
| 309 |
+
# `tuple` might be unnecessary
|
| 310 |
+
tuple,
|
| 311 |
+
syntax_operators,
|
| 312 |
+
# Numbers
|
| 313 |
+
number_integer_in_weird_base,
|
| 314 |
+
number_float,
|
| 315 |
+
number_integer,
|
| 316 |
+
# Variables
|
| 317 |
+
variable,
|
| 318 |
+
namespace,
|
| 319 |
+
function_arity,
|
| 320 |
+
function,
|
| 321 |
+
atom,
|
| 322 |
+
macro,
|
| 323 |
+
character,
|
| 324 |
+
label,
|
| 325 |
+
# If we can't parse any of the above, we highlight the next character as an error
|
| 326 |
+
# and proceed from there.
|
| 327 |
+
# A lexer should always consume any string given as input.
|
| 328 |
+
any_char
|
| 329 |
+
]
|
| 330 |
+
)
|
| 331 |
+
|
| 332 |
+
##############################################################################
|
| 333 |
+
# Semi-public API: these two functions can be used by someone who wants to
|
| 334 |
+
# embed this lexer into another lexer, but other than that, they are not
|
| 335 |
+
# meant to be used by end-users
|
| 336 |
+
##############################################################################
|
| 337 |
+
|
| 338 |
+
@impl Makeup.Lexer
|
| 339 |
+
defparsec(
|
| 340 |
+
:root_element,
|
| 341 |
+
root_element_combinator |> map({__MODULE__, :__as_erlang_language__, []})
|
| 342 |
+
)
|
| 343 |
+
|
| 344 |
+
@impl Makeup.Lexer
|
| 345 |
+
defparsec(
|
| 346 |
+
:root,
|
| 347 |
+
repeat(parsec(:root_element))
|
| 348 |
+
)
|
| 349 |
+
|
| 350 |
+
###################################################################
|
| 351 |
+
# Step #2: postprocess the list of tokens
|
| 352 |
+
###################################################################
|
| 353 |
+
|
| 354 |
+
@keywords ~W[after begin case catch cond end fun if let of query receive try when maybe else]
|
| 355 |
+
|
| 356 |
+
@builtins ~W[
|
| 357 |
+
abs append_element apply atom_to_list binary_to_list bitstring_to_list
|
| 358 |
+
binary_to_term bit_size bump_reductions byte_size cancel_timer
|
| 359 |
+
check_process_code delete_module demonitor disconnect_node display
|
| 360 |
+
element erase exit float float_to_list fun_info fun_to_list
|
| 361 |
+
function_exported garbage_collect get get_keys group_leader hash
|
| 362 |
+
hd integer_to_list iolist_to_binary iolist_size is_atom is_binary
|
| 363 |
+
is_bitstring is_boolean is_builtin is_float is_function is_integer
|
| 364 |
+
is_list is_number is_pid is_port is_process_alive is_record is_reference
|
| 365 |
+
is_tuple length link list_to_atom list_to_binary list_to_bitstring
|
| 366 |
+
list_to_existing_atom list_to_float list_to_integer list_to_pid
|
| 367 |
+
list_to_tuple load_module localtime_to_universaltime make_tuple
|
| 368 |
+
md5 md5_final md5_update memory module_loaded monitor monitor_node
|
| 369 |
+
node nodes open_port phash phash2 pid_to_list port_close port_command
|
| 370 |
+
port_connect port_control port_call port_info port_to_list
|
| 371 |
+
process_display process_flag process_info purge_module put read_timer
|
| 372 |
+
ref_to_list register resume_processround send send_after send_nosuspend
|
| 373 |
+
set_cookie setelement size spawn spawn_link spawn_monitor spawn_opt
|
| 374 |
+
split_binary start_timer statistics suspend_process system_flag
|
| 375 |
+
system_info system_monitor system_profile term_to_binary tl trace
|
| 376 |
+
trace_delivered trace_info trace_pattern trunc tuple_size tuple_to_list
|
| 377 |
+
universaltime_to_localtime unlink unregister whereis
|
| 378 |
+
]
|
| 379 |
+
|
| 380 |
+
@word_operators ~W[and andalso band bnot bor bsl bsr bxor div not or orelse rem xor]
|
| 381 |
+
|
| 382 |
+
defp postprocess_helper([{:string_symbol, meta, value} | tokens]) when value in @keywords,
|
| 383 |
+
do: [{:keyword, meta, value} | postprocess_helper(tokens)]
|
| 384 |
+
|
| 385 |
+
defp postprocess_helper([{:string_symbol, meta, value} | tokens]) when value in @builtins,
|
| 386 |
+
do: [{:name_builtin, meta, value} | postprocess_helper(tokens)]
|
| 387 |
+
|
| 388 |
+
defp postprocess_helper([{:string_symbol, meta, value} | tokens]) when value in @word_operators,
|
| 389 |
+
do: [{:operator_word, meta, value} | postprocess_helper(tokens)]
|
| 390 |
+
|
| 391 |
+
defp postprocess_helper([token | tokens]), do: [token | postprocess_helper(tokens)]
|
| 392 |
+
|
| 393 |
+
defp postprocess_helper([]), do: []
|
| 394 |
+
|
| 395 |
+
# By default, return the list of tokens unchanged
|
| 396 |
+
@impl Makeup.Lexer
|
| 397 |
+
def postprocess(tokens, _opts \\ []), do: postprocess_helper(tokens)
|
| 398 |
+
|
| 399 |
+
#######################################################################
|
| 400 |
+
# Step #3: highlight matching delimiters
|
| 401 |
+
# By default, this includes delimiters that are used in many languages,
|
| 402 |
+
# but feel free to delete these or add more.
|
| 403 |
+
#######################################################################
|
| 404 |
+
|
| 405 |
+
@impl Makeup.Lexer
|
| 406 |
+
defgroupmatcher(:match_groups,
|
| 407 |
+
parentheses: [
|
| 408 |
+
open: [[{:punctuation, %{language: :erlang}, "("}]],
|
| 409 |
+
close: [[{:punctuation, %{language: :erlang}, ")"}]]
|
| 410 |
+
],
|
| 411 |
+
list: [
|
| 412 |
+
open: [
|
| 413 |
+
[{:punctuation, %{language: :erlang}, "["}]
|
| 414 |
+
],
|
| 415 |
+
close: [
|
| 416 |
+
[{:punctuation, %{language: :erlang}, "]"}]
|
| 417 |
+
]
|
| 418 |
+
],
|
| 419 |
+
binary: [
|
| 420 |
+
open: [
|
| 421 |
+
[{:punctuation, %{language: :erlang}, "<<"}]
|
| 422 |
+
],
|
| 423 |
+
close: [
|
| 424 |
+
[{:punctuation, %{language: :erlang}, ">>"}]
|
| 425 |
+
]
|
| 426 |
+
],
|
| 427 |
+
tuple: [
|
| 428 |
+
open: [
|
| 429 |
+
[{:punctuation, %{language: :erlang}, "{"}]
|
| 430 |
+
],
|
| 431 |
+
close: [
|
| 432 |
+
[{:punctuation, %{language: :erlang}, "}"}]
|
| 433 |
+
]
|
| 434 |
+
],
|
| 435 |
+
map: [
|
| 436 |
+
open: [
|
| 437 |
+
[{:punctuation, %{language: :erlang}, "\#{"}]
|
| 438 |
+
],
|
| 439 |
+
close: [
|
| 440 |
+
[{:punctuation, %{language: :erlang}, "}"}]
|
| 441 |
+
]
|
| 442 |
+
]
|
| 443 |
+
)
|
| 444 |
+
|
| 445 |
+
defp remove_initial_newline([{ttype, meta, text} | tokens]) do
|
| 446 |
+
case to_string(text) do
|
| 447 |
+
"\n" -> tokens
|
| 448 |
+
"\n" <> rest -> [{ttype, meta, rest} | tokens]
|
| 449 |
+
end
|
| 450 |
+
end
|
| 451 |
+
|
| 452 |
+
# Finally, the public API for the lexer
|
| 453 |
+
@impl Makeup.Lexer
|
| 454 |
+
def lex(text, opts \\ []) do
|
| 455 |
+
group_prefix = Keyword.get(opts, :group_prefix, random_prefix(10))
|
| 456 |
+
{:ok, tokens, "", _, _, _} = root("\n" <> text)
|
| 457 |
+
|
| 458 |
+
tokens
|
| 459 |
+
|> remove_initial_newline()
|
| 460 |
+
|> postprocess()
|
| 461 |
+
|> match_groups(group_prefix)
|
| 462 |
+
end
|
| 463 |
+
end
|
deps/makeup_erlang/lib/makeup/lexers/erlang_lexer/application.ex
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
defmodule Makeup.Lexers.ErlangLexer.Application do
|
| 2 |
+
@moduledoc false
|
| 3 |
+
use Application
|
| 4 |
+
|
| 5 |
+
alias Makeup.Registry
|
| 6 |
+
alias Makeup.Lexers.ErlangLexer
|
| 7 |
+
|
| 8 |
+
def start(_type, _args) do
|
| 9 |
+
Registry.register_lexer(ErlangLexer,
|
| 10 |
+
options: [],
|
| 11 |
+
names: ["erlang", "erl"],
|
| 12 |
+
extensions: ["erl", "hrl", "escript"]
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
Supervisor.start_link([], strategy: :one_for_one)
|
| 16 |
+
end
|
| 17 |
+
end
|
deps/makeup_erlang/lib/makeup/lexers/erlang_lexer/helper.ex
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
defmodule Makeup.Lexers.ErlangLexer.Helper do
|
| 2 |
+
@moduledoc false
|
| 3 |
+
import NimbleParsec
|
| 4 |
+
alias Makeup.Lexer.Combinators
|
| 5 |
+
|
| 6 |
+
def with_optional_separator(combinator, separator) when is_binary(separator) do
|
| 7 |
+
combinator |> repeat(string(separator) |> concat(combinator))
|
| 8 |
+
end
|
| 9 |
+
|
| 10 |
+
def sigil(ldelim, rdelim, nil, middle) do
|
| 11 |
+
lookahead_string(
|
| 12 |
+
string("~") |> string(ldelim),
|
| 13 |
+
string(rdelim),
|
| 14 |
+
middle
|
| 15 |
+
)
|
| 16 |
+
end
|
| 17 |
+
|
| 18 |
+
def sigil(ldelim, rdelim, ranges, middle) do
|
| 19 |
+
lookahead_string(
|
| 20 |
+
string("~") |> utf8_char(ranges) |> string(ldelim),
|
| 21 |
+
string(rdelim),
|
| 22 |
+
middle
|
| 23 |
+
)
|
| 24 |
+
end
|
| 25 |
+
|
| 26 |
+
def lookahead_string(left, right, middle) do
|
| 27 |
+
if middle == [] do
|
| 28 |
+
left
|
| 29 |
+
|> repeat(lookahead_not(right) |> concat(utf8_char([])))
|
| 30 |
+
else
|
| 31 |
+
choices = middle ++ [utf8_char([])]
|
| 32 |
+
|
| 33 |
+
left
|
| 34 |
+
|> repeat(lookahead_not(right) |> choice(choices))
|
| 35 |
+
end
|
| 36 |
+
|> concat(right)
|
| 37 |
+
|> post_traverse({__MODULE__, :build_string, []})
|
| 38 |
+
end
|
| 39 |
+
|
| 40 |
+
def build_string(rest, acc, context, line, offset) do
|
| 41 |
+
type = :string
|
| 42 |
+
|
| 43 |
+
Combinators.collect_raw_chars_and_binaries(rest, acc, context, line, offset, type, %{})
|
| 44 |
+
end
|
| 45 |
+
end
|
deps/makeup_erlang/lib/makeup/lexers/erlang_lexer/testing.ex
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
defmodule Makeup.Lexers.ErlangLexer.Testing do
|
| 2 |
+
# The tests need to be checked manually!!! (remove this line when they've been checked)
|
| 3 |
+
alias Makeup.Lexers.ErlangLexer
|
| 4 |
+
alias Makeup.Lexer.Postprocess
|
| 5 |
+
|
| 6 |
+
# This function has two purposes:
|
| 7 |
+
# 1. Ensure deterministic lexer output (no random prefix)
|
| 8 |
+
# 2. Convert the token values into binaries so that the output
|
| 9 |
+
# is more obvious on visual inspection
|
| 10 |
+
# (iolists are hard to parse by a human)
|
| 11 |
+
def lex(text) do
|
| 12 |
+
text
|
| 13 |
+
|> ErlangLexer.lex(group_prefix: "group")
|
| 14 |
+
|> Postprocess.token_values_to_binaries()
|
| 15 |
+
|> Enum.map(fn {ttype, meta, value} -> {ttype, Map.delete(meta, :language), value} end)
|
| 16 |
+
end
|
| 17 |
+
end
|
deps/makeup_erlang/mix.exs
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
defmodule MakeupErlang.Mixfile do
|
| 2 |
+
use Mix.Project
|
| 3 |
+
|
| 4 |
+
@version "1.0.3"
|
| 5 |
+
@url "https://github.com/elixir-makeup/makeup_erlang"
|
| 6 |
+
|
| 7 |
+
def project do
|
| 8 |
+
[
|
| 9 |
+
app: :makeup_erlang,
|
| 10 |
+
version: @version,
|
| 11 |
+
elixir: "~> 1.6",
|
| 12 |
+
start_permanent: Mix.env() == :prod,
|
| 13 |
+
deps: deps(),
|
| 14 |
+
package: package(),
|
| 15 |
+
name: "Makeup Erlang",
|
| 16 |
+
description: description(),
|
| 17 |
+
aliases: [docs: &build_docs/1]
|
| 18 |
+
]
|
| 19 |
+
end
|
| 20 |
+
|
| 21 |
+
defp description do
|
| 22 |
+
"""
|
| 23 |
+
Erlang lexer for the Makeup syntax highlighter.
|
| 24 |
+
"""
|
| 25 |
+
end
|
| 26 |
+
|
| 27 |
+
defp package do
|
| 28 |
+
[
|
| 29 |
+
name: :makeup_erlang,
|
| 30 |
+
licenses: ["BSD-2-Clause"],
|
| 31 |
+
maintainers: ["Tiago Barroso <tmbb@campus.ul.pt>"],
|
| 32 |
+
links: %{"GitHub" => @url}
|
| 33 |
+
]
|
| 34 |
+
end
|
| 35 |
+
|
| 36 |
+
def application do
|
| 37 |
+
[
|
| 38 |
+
mod: {Makeup.Lexers.ErlangLexer.Application, []},
|
| 39 |
+
extra_applications: [:logger]
|
| 40 |
+
]
|
| 41 |
+
end
|
| 42 |
+
|
| 43 |
+
defp deps do
|
| 44 |
+
[
|
| 45 |
+
{:makeup, "~> 1.0"}
|
| 46 |
+
]
|
| 47 |
+
end
|
| 48 |
+
|
| 49 |
+
defp build_docs(_) do
|
| 50 |
+
Mix.Task.run("compile")
|
| 51 |
+
ex_doc = Path.join(Mix.path_for(:escripts), "ex_doc")
|
| 52 |
+
|
| 53 |
+
unless File.exists?(ex_doc) do
|
| 54 |
+
raise "cannot build docs because escript for ex_doc is not installed, run \"mix escript.install hex ex_doc\""
|
| 55 |
+
end
|
| 56 |
+
|
| 57 |
+
paths = Path.join(Mix.Project.build_path(), "lib/*/ebin")
|
| 58 |
+
args = ["MakeupErlang", @version, Mix.Project.compile_path()]
|
| 59 |
+
opts = ~w[--main Makeup.Lexers.ErlangLexer --source-ref v#{@version} --source-url #{@url}]
|
| 60 |
+
System.cmd(ex_doc, args ++ ["--paths", paths] ++ opts)
|
| 61 |
+
Mix.shell().info("Docs built successfully")
|
| 62 |
+
end
|
| 63 |
+
end
|
erl_crash.dump
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
hf_deploy/Dockerfile
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM ghcr.io/livebook-dev/livebook:0.15.1
|
| 2 |
+
|
| 3 |
+
# System deps for EXLA/NIFs
|
| 4 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 5 |
+
build-essential \
|
| 6 |
+
cmake \
|
| 7 |
+
curl \
|
| 8 |
+
git \
|
| 9 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 10 |
+
|
| 11 |
+
# Set environment
|
| 12 |
+
ENV LIVEBOOK_PORT=7860
|
| 13 |
+
ENV LIVEBOOK_IP=0.0.0.0
|
| 14 |
+
ENV LIVEBOOK_TOKEN=""
|
| 15 |
+
ENV LIVEBOOK_IFRAME_PORT=7860
|
| 16 |
+
ENV MIX_ENV=prod
|
| 17 |
+
ENV BUMBLEBEE_CACHE_DIR=/data/bumblebee
|
| 18 |
+
ENV XLA_TARGET=cpu
|
| 19 |
+
|
| 20 |
+
WORKDIR /app
|
| 21 |
+
|
| 22 |
+
# Create cache dirs
|
| 23 |
+
RUN mkdir -p /data/bumblebee /data/livebook
|
| 24 |
+
|
| 25 |
+
# Copy notebook and setup script
|
| 26 |
+
COPY ml_e2e_template.livemd /app/ml_e2e_template.livemd
|
| 27 |
+
COPY setup.livemd /app/setup.livemd
|
| 28 |
+
COPY startup.sh /app/startup.sh
|
| 29 |
+
RUN chmod +x /app/startup.sh
|
| 30 |
+
|
| 31 |
+
EXPOSE 7860
|
| 32 |
+
|
| 33 |
+
CMD ["/app/startup.sh"]
|
hf_deploy/README.md
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: ML in Elixir — Bumblebee + Hugging Face
|
| 3 |
+
emoji: 🐝
|
| 4 |
+
colorFrom: purple
|
| 5 |
+
colorTo: blue
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
+
pinned: false
|
| 9 |
+
license: apache-2.0
|
| 10 |
+
tags:
|
| 11 |
+
- elixir
|
| 12 |
+
- livebook
|
| 13 |
+
- machine-learning
|
| 14 |
+
- bumblebee
|
| 15 |
+
- hugging-face
|
| 16 |
+
- nx
|
| 17 |
+
- axon
|
| 18 |
+
---
|
hf_deploy/deploy.sh
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# Deploy to Hugging Face Spaces
|
| 3 |
+
# Usage: ./deploy.sh <username> <space-name>
|
| 4 |
+
#
|
| 5 |
+
# Prerequisites:
|
| 6 |
+
# 1. Install hf CLI: pip install huggingface_hub
|
| 7 |
+
# 2. Login: hf auth login
|
| 8 |
+
# 3. Have Docker installed
|
| 9 |
+
|
| 10 |
+
set -e
|
| 11 |
+
|
| 12 |
+
HF_USER="${1:?Usage: $0 <hf-username> <space-name>}"
|
| 13 |
+
SPACE_NAME="${2:?Usage: $0 <hf-username> <space-name>}"
|
| 14 |
+
REPO_ID="${HF_USER}/${SPACE_NAME}"
|
| 15 |
+
|
| 16 |
+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
| 17 |
+
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
|
| 18 |
+
|
| 19 |
+
echo "=== Deploying Livebook ML Template to HF Spaces ==="
|
| 20 |
+
echo "Space: ${REPO_ID}"
|
| 21 |
+
echo ""
|
| 22 |
+
|
| 23 |
+
# Copy Livebook to deploy dir
|
| 24 |
+
cp "${PROJECT_DIR}/ml_e2e_template.livemd" "${SCRIPT_DIR}/"
|
| 25 |
+
|
| 26 |
+
# Clone or create the HF Space repo
|
| 27 |
+
REPO_DIR="/tmp/hf-space-${SPACE_NAME}"
|
| 28 |
+
if [ -d "${REPO_DIR}" ]; then
|
| 29 |
+
echo "Updating existing repo..."
|
| 30 |
+
cd "${REPO_DIR}"
|
| 31 |
+
git pull
|
| 32 |
+
else
|
| 33 |
+
echo "Cloning Space repo..."
|
| 34 |
+
git clone "https://huggingface.co/spaces/${REPO_ID}" "${REPO_DIR}"
|
| 35 |
+
cd "${REPO_DIR}"
|
| 36 |
+
fi
|
| 37 |
+
|
| 38 |
+
# Copy deployment files
|
| 39 |
+
cp "${SCRIPT_DIR}/Dockerfile" .
|
| 40 |
+
cp "${SCRIPT_DIR}/startup.sh" .
|
| 41 |
+
cp "${SCRIPT_DIR}/setup.livemd" .
|
| 42 |
+
cp "${SCRIPT_DIR}/ml_e2e_template.livemd" .
|
| 43 |
+
cp "${SCRIPT_DIR}/README.md" .
|
| 44 |
+
|
| 45 |
+
# Push
|
| 46 |
+
git add -A
|
| 47 |
+
git diff --staged --quiet && echo "No changes to push." && exit 0
|
| 48 |
+
|
| 49 |
+
git commit -m "Update Livebook ML template"
|
| 50 |
+
git push
|
| 51 |
+
|
| 52 |
+
echo ""
|
| 53 |
+
echo "=== Deployed! ==="
|
| 54 |
+
echo "Space URL: https://huggingface.co/spaces/${REPO_ID}"
|
| 55 |
+
echo "It may take a few minutes for the build to complete."
|
hf_deploy/setup.livemd
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Setup — installs deps on first cell run
|
| 2 |
+
# This notebook runs automatically before ml_e2e_template.livemd
|
| 3 |
+
|
| 4 |
+
Mix.install([
|
| 5 |
+
{:nx, "~> 0.11"},
|
| 6 |
+
{:axon, "~> 0.8"},
|
| 7 |
+
{:exla, "~> 0.11"},
|
| 8 |
+
{:bumblebee, "~> 0.6"},
|
| 9 |
+
{:kino, "~> 0.15"},
|
| 10 |
+
{:kino_vega_lite, "~> 0.1"},
|
| 11 |
+
{:vega_lite, "~> 0.1"},
|
| 12 |
+
{:stb_image, "~> 0.6"},
|
| 13 |
+
{:req, "~> 0.5"}
|
| 14 |
+
])
|
| 15 |
+
|
| 16 |
+
IO.puts("All dependencies installed successfully!")
|
hf_deploy/startup.sh
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
set -e
|
| 3 |
+
|
| 4 |
+
# Pre-download common models into cache (optional, speeds up first load)
|
| 5 |
+
echo "=== ML in Elixir — Livebook Template ==="
|
| 6 |
+
echo "Starting Livebook on port ${LIVEBOOK_PORT}..."
|
| 7 |
+
|
| 8 |
+
# Launch Livebook
|
| 9 |
+
exec /usr/local/bin/livebook server \
|
| 10 |
+
--port "${LIVEBOOK_PORT}" \
|
| 11 |
+
--default-url "/notebooks/ml_e2e_template.livemd"
|
hf_jobs_demo.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Demo script for HF Jobs
|
| 2 |
+
|
| 3 |
+
Runs a tiny sentiment‑analysis pipeline and prints the result.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
# /// script
|
| 7 |
+
# dependencies = ["transformers", "torch"]
|
| 8 |
+
# ///
|
| 9 |
+
|
| 10 |
+
from transformers import pipeline
|
| 11 |
+
|
| 12 |
+
# Simple sentiment pipeline (uses a small DistilBERT model)
|
| 13 |
+
sentiment = pipeline("sentiment-analysis")
|
| 14 |
+
result = sentiment("Hugging Face Jobs are awesome!")
|
| 15 |
+
print("Sentiment result:", result)
|
jax/ml_e2e_jax.ipynb
ADDED
|
@@ -0,0 +1,633 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"nbformat": 4,
|
| 3 |
+
"nbformat_minor": 0,
|
| 4 |
+
"metadata": {
|
| 5 |
+
"colab": {
|
| 6 |
+
"provenance": [],
|
| 7 |
+
"toc_visible": true,
|
| 8 |
+
"gpuType": "T4"
|
| 9 |
+
},
|
| 10 |
+
"kernelspec": {
|
| 11 |
+
"name": "python3",
|
| 12 |
+
"display_name": "Python 3"
|
| 13 |
+
},
|
| 14 |
+
"language_info": {
|
| 15 |
+
"name": "python"
|
| 16 |
+
},
|
| 17 |
+
"accelerator": "GPU"
|
| 18 |
+
},
|
| 19 |
+
"cells": [
|
| 20 |
+
{
|
| 21 |
+
"cell_type": "markdown",
|
| 22 |
+
"metadata": {},
|
| 23 |
+
"source": [
|
| 24 |
+
"# 🐝 ML in Elixir — JAX Companion\n",
|
| 25 |
+
"\n",
|
| 26 |
+
"This notebook mirrors the [Elixir Livebook template](../ml_e2e_template.livemd)\n",
|
| 27 |
+
"from *Machine Learning in Elixir* by Sean Moriarity, implemented in **JAX**.\n",
|
| 28 |
+
"\n",
|
| 29 |
+
"JAX is Google's numerical computing library — the closest Python counterpart to Elixir's **Nx**.\n",
|
| 30 |
+
"\n",
|
| 31 |
+
"| Elixir (Nx/Axon) | JAX (Python) | Purpose |\n",
|
| 32 |
+
"|-----------------|-------------|----------|\n",
|
| 33 |
+
"| `Nx` | `jax.numpy` | Tensors, ops |\n",
|
| 34 |
+
"| `Nx.Defn.grad` | `jax.grad` | Automatic differentiation |\n",
|
| 35 |
+
"| `EXLA` | `jax.jit` | JIT compilation / GPU |\n",
|
| 36 |
+
"| `Bumblebee` | `transformers` + `jax` | Pre-trained models |\n",
|
| 37 |
+
"| `Axon` | `flax` | Neural networks |\n",
|
| 38 |
+
"| `Nx.Serving` | `jax.lax.map` / `pmap` | Batched inference |\n",
|
| 39 |
+
"| `Kino` | `ipywidgets` | Interactive UI |"
|
| 40 |
+
]
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"cell_type": "markdown",
|
| 44 |
+
"metadata": {},
|
| 45 |
+
"source": [
|
| 46 |
+
"## 0 — Install & Configure"
|
| 47 |
+
]
|
| 48 |
+
},
|
| 49 |
+
{
|
| 50 |
+
"cell_type": "code",
|
| 51 |
+
"execution_count": null,
|
| 52 |
+
"metadata": {},
|
| 53 |
+
"outputs": [],
|
| 54 |
+
"source": [
|
| 55 |
+
"!pip install -q jax[cuda12]==0.4.35 flax optax transformers datasets accelerate scikit-learn numpy matplotlib"
|
| 56 |
+
]
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"cell_type": "code",
|
| 60 |
+
"execution_count": null,
|
| 61 |
+
"metadata": {},
|
| 62 |
+
"outputs": [],
|
| 63 |
+
"source": [
|
| 64 |
+
"import jax\n",
|
| 65 |
+
"import jax.numpy as jnp\n",
|
| 66 |
+
"from jax import grad, jit, vmap, pmap\n",
|
| 67 |
+
"import flax.linen as nn\n",
|
| 68 |
+
"import optax\n",
|
| 69 |
+
"from transformers import pipeline, AutoTokenizer, FlaxAutoModel\n",
|
| 70 |
+
"from sklearn.model_selection import train_test_split\n",
|
| 71 |
+
"from sklearn.metrics import classification_report\n",
|
| 72 |
+
"import matplotlib.pyplot as plt\n",
|
| 73 |
+
"\n",
|
| 74 |
+
"device = jax.devices()[0]\n",
|
| 75 |
+
"print(f\"JAX version: {jax.__version__}\")\n",
|
| 76 |
+
"print(f\"Device: {device}\")\n",
|
| 77 |
+
"print(f\"Backend: {device.platform}\")"
|
| 78 |
+
]
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"cell_type": "markdown",
|
| 82 |
+
"metadata": {},
|
| 83 |
+
"source": [
|
| 84 |
+
"## 1 — JAX Foundations (Nx equivalent)\n",
|
| 85 |
+
"\n",
|
| 86 |
+
"JAX provides NumPy-like API with automatic differentiation and JIT compilation."
|
| 87 |
+
]
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"cell_type": "code",
|
| 91 |
+
"execution_count": null,
|
| 92 |
+
"metadata": {},
|
| 93 |
+
"outputs": [],
|
| 94 |
+
"source": [
|
| 95 |
+
"# Tensors: Nx.tensor → jnp.array\n",
|
| 96 |
+
"scalar = jnp.float64(3.14)\n",
|
| 97 |
+
"vector = jnp.array([1.0, 2.0, 3.0])\n",
|
| 98 |
+
"matrix = jnp.array([[1, 2, 3], [4, 5, 6]])\n",
|
| 99 |
+
"\n",
|
| 100 |
+
"print(f\"scalar shape: {scalar.shape}, dtype: {scalar.dtype}\")\n",
|
| 101 |
+
"print(f\"vector shape: {vector.shape}, dtype: {vector.dtype}\")\n",
|
| 102 |
+
"print(f\"matrix shape: {matrix.shape}, dtype: {matrix.dtype}\")"
|
| 103 |
+
]
|
| 104 |
+
},
|
| 105 |
+
{
|
| 106 |
+
"cell_type": "code",
|
| 107 |
+
"execution_count": null,
|
| 108 |
+
"metadata": {},
|
| 109 |
+
"outputs": [],
|
| 110 |
+
"source": [
|
| 111 |
+
"# Operations: Nx.add/multiply/dot → jnp.add/... (same API!)\n",
|
| 112 |
+
"a = jnp.array([1.0, 2.0, 3.0])\n",
|
| 113 |
+
"b = jnp.array([10.0, 20.0, 30.0])\n",
|
| 114 |
+
"\n",
|
| 115 |
+
"print(f\"add: {a + b}\")\n",
|
| 116 |
+
"print(f\"multiply: {a * b}\")\n",
|
| 117 |
+
"print(f\"dot: {jnp.dot(a, b)}\")\n",
|
| 118 |
+
"print(f\"sum: {jnp.sum(a)}\")\n",
|
| 119 |
+
"print(f\"mean: {jnp.mean(a)}\")\n",
|
| 120 |
+
"print(f\"std: {jnp.std(a)}\")"
|
| 121 |
+
]
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"cell_type": "code",
|
| 125 |
+
"execution_count": null,
|
| 126 |
+
"metadata": {},
|
| 127 |
+
"outputs": [],
|
| 128 |
+
"source": [
|
| 129 |
+
"# Automatic differentiation: Nx.Defn.grad → jax.grad\n",
|
| 130 |
+
"def f(x):\n",
|
| 131 |
+
" return x**3 + 2 * x**2 # f(x) = x³ + 2x²\n",
|
| 132 |
+
"\n",
|
| 133 |
+
"grad_f = grad(f) # Symbolic gradient\n",
|
| 134 |
+
"\n",
|
| 135 |
+
"x = 3.0\n",
|
| 136 |
+
"print(f\"f(3) = {f(x)}\")\n",
|
| 137 |
+
"print(f\"f'(3) = {grad_f(x)}\")\n",
|
| 138 |
+
"print(f\"expected = 3*9 + 2*2*3 = {3*9 + 2*2*3}\")"
|
| 139 |
+
]
|
| 140 |
+
},
|
| 141 |
+
{
|
| 142 |
+
"cell_type": "code",
|
| 143 |
+
"execution_count": null,
|
| 144 |
+
"metadata": {},
|
| 145 |
+
"outputs": [],
|
| 146 |
+
"source": [
|
| 147 |
+
"# JIT compilation: Nx.Defn.jit → jax.jit\n",
|
| 148 |
+
"@jit\n",
|
| 149 |
+
"def fast_sigmoid(x):\n",
|
| 150 |
+
" return 1 / (1 + jnp.exp(-x))\n",
|
| 151 |
+
"\n",
|
| 152 |
+
"input_arr = jnp.array([[-2.0, -1.0, 0.0, 1.0, 2.0]])\n",
|
| 153 |
+
"result = fast_sigmoid(input_arr)\n",
|
| 154 |
+
"print(f\"JIT compiled sigmoid: {result}\")"
|
| 155 |
+
]
|
| 156 |
+
},
|
| 157 |
+
{
|
| 158 |
+
"cell_type": "markdown",
|
| 159 |
+
"metadata": {},
|
| 160 |
+
"source": [
|
| 161 |
+
"## 2 — Pre-trained NLP (Bumblebee equivalent)\n",
|
| 162 |
+
"\n",
|
| 163 |
+
"In Elixir: `Bumblebee.load_model({:hf, \"...\"})`\n",
|
| 164 |
+
"In Python/JAX: `transformers` + `flax` or `pipeline()`"
|
| 165 |
+
]
|
| 166 |
+
},
|
| 167 |
+
{
|
| 168 |
+
"cell_type": "markdown",
|
| 169 |
+
"metadata": {},
|
| 170 |
+
"source": [
|
| 171 |
+
"### 2.1 Fill-Mask (BERT)"
|
| 172 |
+
]
|
| 173 |
+
},
|
| 174 |
+
{
|
| 175 |
+
"cell_type": "code",
|
| 176 |
+
"execution_count": null,
|
| 177 |
+
"metadata": {},
|
| 178 |
+
"outputs": [],
|
| 179 |
+
"source": [
|
| 180 |
+
"fill_mask = pipeline(\"fill-mask\", model=\"google-bert/bert-base-uncased\")\n",
|
| 181 |
+
"results = fill_mask(\"Elixir is a [MASK] language.\")\n",
|
| 182 |
+
"for r in results:\n",
|
| 183 |
+
" print(f\" {r['score']:.4f} {r['token_str']:15s} {r['sequence']}\")"
|
| 184 |
+
]
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"cell_type": "markdown",
|
| 188 |
+
"metadata": {},
|
| 189 |
+
"source": [
|
| 190 |
+
"### 2.2 Sentiment Analysis"
|
| 191 |
+
]
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"cell_type": "code",
|
| 195 |
+
"execution_count": null,
|
| 196 |
+
"metadata": {},
|
| 197 |
+
"outputs": [],
|
| 198 |
+
"source": [
|
| 199 |
+
"sentiment = pipeline(\n",
|
| 200 |
+
" \"sentiment-analysis\",\n",
|
| 201 |
+
" model=\"distilbert/distilbert-base-uncased-finetuned-sst-2-english\"\n",
|
| 202 |
+
")\n",
|
| 203 |
+
"\n",
|
| 204 |
+
"texts = [\n",
|
| 205 |
+
" \"Machine learning in Elixir is amazing!\",\n",
|
| 206 |
+
" \"This tutorial is boring and confusing.\",\n",
|
| 207 |
+
" \"The BEAM VM handles concurrent ML workloads well.\",\n",
|
| 208 |
+
"]\n",
|
| 209 |
+
"\n",
|
| 210 |
+
"for text in texts:\n",
|
| 211 |
+
" result = sentiment(text)[0]\n",
|
| 212 |
+
" print(f\" {result['label']:8s} {result['score']:.4f} ← \\\"{text[:50]}\\\"\")"
|
| 213 |
+
]
|
| 214 |
+
},
|
| 215 |
+
{
|
| 216 |
+
"cell_type": "markdown",
|
| 217 |
+
"metadata": {},
|
| 218 |
+
"source": [
|
| 219 |
+
"### 2.3 Named Entity Recognition"
|
| 220 |
+
]
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"cell_type": "code",
|
| 224 |
+
"execution_count": null,
|
| 225 |
+
"metadata": {},
|
| 226 |
+
"outputs": [],
|
| 227 |
+
"source": [
|
| 228 |
+
"ner = pipeline(\"ner\", model=\"dslim/bert-base-NER\", aggregation_strategy=\"simple\")\n",
|
| 229 |
+
"text = \"Sean Moriarity wrote Machine Learning in Elixir for Pragmatic Bookshelf.\"\n",
|
| 230 |
+
"entities = ner(text)\n",
|
| 231 |
+
"for e in entities:\n",
|
| 232 |
+
" print(f\" {e['entity_group']:8s} {e['score']:.4f} {e['word']}\")"
|
| 233 |
+
]
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"cell_type": "markdown",
|
| 237 |
+
"metadata": {},
|
| 238 |
+
"source": [
|
| 239 |
+
"### 2.4 Zero-Shot Classification"
|
| 240 |
+
]
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"cell_type": "code",
|
| 244 |
+
"execution_count": null,
|
| 245 |
+
"metadata": {},
|
| 246 |
+
"outputs": [],
|
| 247 |
+
"source": [
|
| 248 |
+
"zs_classifier = pipeline(\"zero-shot-classification\", model=\"facebook/bart-large-mnli\")\n",
|
| 249 |
+
"\n",
|
| 250 |
+
"article = \"\"\"\n",
|
| 251 |
+
"Nx brings numerical computing to the BEAM, enabling machine learning\n",
|
| 252 |
+
"pipelines that leverage Elixir's concurrency and fault tolerance.\n",
|
| 253 |
+
"\"\"\"\n",
|
| 254 |
+
"\n",
|
| 255 |
+
"labels = [\"technology\", \"sports\", \"politics\", \"science\", \"finance\"]\n",
|
| 256 |
+
"result = zs_classifier(article, labels)\n",
|
| 257 |
+
"\n",
|
| 258 |
+
"for label, score in zip(result[\"labels\"], result[\"scores\"]):\n",
|
| 259 |
+
" bar = \"█\" * int(score * 30)\n",
|
| 260 |
+
" print(f\" {label:12s} {score:.4f} {bar}\")"
|
| 261 |
+
]
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"cell_type": "markdown",
|
| 265 |
+
"metadata": {},
|
| 266 |
+
"source": [
|
| 267 |
+
"### 2.5 Sentence Embeddings & Similarity"
|
| 268 |
+
]
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"cell_type": "code",
|
| 272 |
+
"execution_count": null,
|
| 273 |
+
"metadata": {},
|
| 274 |
+
"outputs": [],
|
| 275 |
+
"source": [
|
| 276 |
+
"from sentence_transformers import SentenceTransformer\n",
|
| 277 |
+
"\n",
|
| 278 |
+
"emb_model = SentenceTransformer(\"sentence-transformers/all-MiniLM-L6-v2\")\n",
|
| 279 |
+
"\n",
|
| 280 |
+
"sentences = [\n",
|
| 281 |
+
" \"Nx provides numerical computing for Elixir\",\n",
|
| 282 |
+
" \"Axon is a neural network library built on Nx\",\n",
|
| 283 |
+
" \"Bumblebee connects Elixir to the Hugging Face Hub\",\n",
|
| 284 |
+
" \"I enjoy cooking Italian food on weekends\",\n",
|
| 285 |
+
"]\n",
|
| 286 |
+
"\n",
|
| 287 |
+
"embeddings = emb_model.encode(sentences)\n",
|
| 288 |
+
"\n",
|
| 289 |
+
"query = \"How do I build neural networks in Elixir?\"\n",
|
| 290 |
+
"query_emb = emb_model.encode([query])\n",
|
| 291 |
+
"\n",
|
| 292 |
+
"from sklearn.metrics.pairwise import cosine_similarity\n",
|
| 293 |
+
"sims = cosine_similarity(query_emb, embeddings)[0]\n",
|
| 294 |
+
"ranked = sorted(zip(sentences, sims), key=lambda x: -x[1])\n",
|
| 295 |
+
"\n",
|
| 296 |
+
"print(f'Query: \"{query}\"\\n')\n",
|
| 297 |
+
"for sent, sim in ranked:\n",
|
| 298 |
+
" print(f\" {sim:.4f} {sent}\")"
|
| 299 |
+
]
|
| 300 |
+
},
|
| 301 |
+
{
|
| 302 |
+
"cell_type": "markdown",
|
| 303 |
+
"metadata": {},
|
| 304 |
+
"source": [
|
| 305 |
+
"### 2.6 Text Generation (GPT-2)"
|
| 306 |
+
]
|
| 307 |
+
},
|
| 308 |
+
{
|
| 309 |
+
"cell_type": "code",
|
| 310 |
+
"execution_count": null,
|
| 311 |
+
"metadata": {},
|
| 312 |
+
"outputs": [],
|
| 313 |
+
"source": [
|
| 314 |
+
"text_gen = pipeline(\"text-generation\", model=\"openai-community/gpt2\")\n",
|
| 315 |
+
"prompt = \"Machine learning in Elixir is\"\n",
|
| 316 |
+
"output = text_gen(prompt, max_new_tokens=50, num_return_sequences=1)\n",
|
| 317 |
+
"print(output[0][\"generated_text\"])"
|
| 318 |
+
]
|
| 319 |
+
},
|
| 320 |
+
{
|
| 321 |
+
"cell_type": "markdown",
|
| 322 |
+
"metadata": {},
|
| 323 |
+
"source": [
|
| 324 |
+
"### 2.7 Image Classification (ViT)"
|
| 325 |
+
]
|
| 326 |
+
},
|
| 327 |
+
{
|
| 328 |
+
"cell_type": "code",
|
| 329 |
+
"execution_count": null,
|
| 330 |
+
"metadata": {},
|
| 331 |
+
"outputs": [],
|
| 332 |
+
"source": [
|
| 333 |
+
"from PIL import Image\n",
|
| 334 |
+
"import requests\n",
|
| 335 |
+
"\n",
|
| 336 |
+
"img_cls = pipeline(\"image-classification\", model=\"google/vit-base-patch16-224\")\n",
|
| 337 |
+
"img_url = \"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg\"\n",
|
| 338 |
+
"image = Image.open(requests.get(img_url, stream=True).raw)\n",
|
| 339 |
+
"results = img_cls(image)\n",
|
| 340 |
+
"for r in results[:5]:\n",
|
| 341 |
+
" print(f\" {r['score']:.4f} {r['label']}\")"
|
| 342 |
+
]
|
| 343 |
+
},
|
| 344 |
+
{
|
| 345 |
+
"cell_type": "markdown",
|
| 346 |
+
"metadata": {},
|
| 347 |
+
"source": [
|
| 348 |
+
"### 2.8 Speech-to-Text (Whisper)"
|
| 349 |
+
]
|
| 350 |
+
},
|
| 351 |
+
{
|
| 352 |
+
"cell_type": "code",
|
| 353 |
+
"execution_count": null,
|
| 354 |
+
"metadata": {},
|
| 355 |
+
"outputs": [],
|
| 356 |
+
"source": [
|
| 357 |
+
"asr = pipeline(\"automatic-speech-recognition\", model=\"openai/whisper-tiny\")\n",
|
| 358 |
+
"audio_url = \"https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac\"\n",
|
| 359 |
+
"result = asr(audio_url)\n",
|
| 360 |
+
"print(f\"Transcription: {result['text']}\")"
|
| 361 |
+
]
|
| 362 |
+
},
|
| 363 |
+
{
|
| 364 |
+
"cell_type": "markdown",
|
| 365 |
+
"metadata": {},
|
| 366 |
+
"source": [
|
| 367 |
+
"### 2.9 Image Generation (Stable Diffusion)\n",
|
| 368 |
+
"\n",
|
| 369 |
+
"> Requires GPU. JAX version uses `diffusers` with JAX backend."
|
| 370 |
+
]
|
| 371 |
+
},
|
| 372 |
+
{
|
| 373 |
+
"cell_type": "code",
|
| 374 |
+
"execution_count": null,
|
| 375 |
+
"metadata": {},
|
| 376 |
+
"outputs": [],
|
| 377 |
+
"source": [
|
| 378 |
+
"# JAX Stable Diffusion (using diffusers with JAX)\n",
|
| 379 |
+
"try:\n",
|
| 380 |
+
" from diffusers import StableDiffusionPipeline\n",
|
| 381 |
+
" import torch as _torch\n",
|
| 382 |
+
" \n",
|
| 383 |
+
" # Note: diffusers primarily supports PyTorch, JAX version is experimental\n",
|
| 384 |
+
" # For production JAX SD, use ` JaxDM` or similar\n",
|
| 385 |
+
" sd_pipe = StableDiffusionPipeline.from_pretrained(\n",
|
| 386 |
+
" \"CompVis/stable-diffusion-v1-4\",\n",
|
| 387 |
+
" torch_dtype=_torch.float16 if _torch.cuda.is_available() else _torch.float32,\n",
|
| 388 |
+
" )\n",
|
| 389 |
+
" \n",
|
| 390 |
+
" prompt = \"a bee programming in Elixir, highly detailed, 4k\"\n",
|
| 391 |
+
" image = sd_pipe(prompt, num_inference_steps=20).images[0]\n",
|
| 392 |
+
" display(image)\n",
|
| 393 |
+
"except Exception as e:\n",
|
| 394 |
+
" print(f\"SD requires GPU/memory: {e}\")"
|
| 395 |
+
]
|
| 396 |
+
},
|
| 397 |
+
{
|
| 398 |
+
"cell_type": "markdown",
|
| 399 |
+
"metadata": {},
|
| 400 |
+
"source": [
|
| 401 |
+
"## 2.10 Model Export (ONNX / GGUF)
|
| 402 |
+
|
| 403 |
+
**Export to ONNX (experimental for Flax models)**
|
| 404 |
+
```python
|
| 405 |
+
# Install jax2onnx for conversion
|
| 406 |
+
# pip install jax2onnx onnx
|
| 407 |
+
|
| 408 |
+
# Example: Export the Flax MLP model to ONNX
|
| 409 |
+
import jax2onnx
|
| 410 |
+
import onnx
|
| 411 |
+
|
| 412 |
+
# Assume `model` is the Flax model defined earlier and `params` are its trained parameters
|
| 413 |
+
# Define a callable applying the model
|
| 414 |
+
def apply_model(x):
|
| 415 |
+
return model.apply(params, x, training=False)
|
| 416 |
+
|
| 417 |
+
# Convert a dummy input shape to ONNX
|
| 418 |
+
input_shape = (1, n_features)
|
| 419 |
+
onnx_model = jax2onnx.convert(apply_model, input_shape, output_path="mlp.onnx")
|
| 420 |
+
print("ONNX model saved as mlp.onnx")
|
| 421 |
+
```
|
| 422 |
+
|
| 423 |
+
**Convert to GGUF** (decoder‑only models, e.g., GPT‑2)
|
| 424 |
+
```bash
|
| 425 |
+
pip install gguf-converter
|
| 426 |
+
# Convert the ONNX model to GGUF
|
| 427 |
+
gguf-converter --onnx mlp.onnx --output mlp.gguf
|
| 428 |
+
```
|
| 429 |
+
|
| 430 |
+
> **Note:** GGUF conversion currently targets decoder architectures. For encoder‑only models you may still use ONNX directly with `onnxruntime`.
|
| 431 |
+
|
| 432 |
+
---
|
| 433 |
+
|
| 434 |
+
## 3 — Custom Training with Flax (Axon equivalent)",
|
| 435 |
+
"\n",
|
| 436 |
+
"Flax is Google's neural network library for JAX — mirrors Axon's functional design."
|
| 437 |
+
]
|
| 438 |
+
},
|
| 439 |
+
{
|
| 440 |
+
"cell_type": "code",
|
| 441 |
+
"execution_count": null,
|
| 442 |
+
"metadata": {},
|
| 443 |
+
"outputs": [],
|
| 444 |
+
"source": [
|
| 445 |
+
"# Synthetic data (same as Livebook)\n",
|
| 446 |
+
"import numpy as np\n",
|
| 447 |
+
"np.random.seed(42)\n",
|
| 448 |
+
"n_samples, n_features, n_classes = 2000, 4, 3\n",
|
| 449 |
+
"\n",
|
| 450 |
+
"centers = np.random.randn(n_classes, n_features) * 2\n",
|
| 451 |
+
"labels_raw = np.random.randint(0, n_classes, n_samples)\n",
|
| 452 |
+
"noise = np.random.randn(n_samples, n_features) * 0.4\n",
|
| 453 |
+
"X = centers[labels_raw] + noise\n",
|
| 454 |
+
"X = (X - X.mean(axis=0)) / X.std(axis=0)\n",
|
| 455 |
+
"Y = np.eye(n_classes)[labels_raw]\n",
|
| 456 |
+
"\n",
|
| 457 |
+
"X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)\n",
|
| 458 |
+
"\n",
|
| 459 |
+
"# Convert to JAX arrays\n",
|
| 460 |
+
"X_train_j = jnp.array(X_train)\n",
|
| 461 |
+
"Y_train_j = jnp.array(Y_train)\n",
|
| 462 |
+
"X_test_j = jnp.array(X_test)\n",
|
| 463 |
+
"Y_test_j = jnp.array(Y_test)\n",
|
| 464 |
+
"\n",
|
| 465 |
+
"print(f\"Train: {len(X_train)} | Test: {len(X_test)} | Features: {n_features} | Classes: {n_classes}\")"
|
| 466 |
+
]
|
| 467 |
+
},
|
| 468 |
+
{
|
| 469 |
+
"cell_type": "code",
|
| 470 |
+
"execution_count": null,
|
| 471 |
+
"metadata": {},
|
| 472 |
+
"outputs": [],
|
| 473 |
+
"source": [
|
| 474 |
+
"# Flax model (mirrors Axon build)\n",
|
| 475 |
+
"class MLP(nn.Module):\n",
|
| 476 |
+
" n_classes: int\n",
|
| 477 |
+
" \n",
|
| 478 |
+
" @nn.compact\n",
|
| 479 |
+
" def __call__(self, x, training: bool = True):\n",
|
| 480 |
+
" x = nn.Dense(64)(x)\n",
|
| 481 |
+
" x = nn.relu(x)\n",
|
| 482 |
+
" x = nn.BatchNorm(use_running_average=not training)(x)\n",
|
| 483 |
+
" x = nn.Dropout(0.2, deterministic=not training)(x)\n",
|
| 484 |
+
" \n",
|
| 485 |
+
" x = nn.Dense(32)(x)\n",
|
| 486 |
+
" x = nn.relu(x)\n",
|
| 487 |
+
" x = nn.BatchNorm(use_running_average=not training)(x)\n",
|
| 488 |
+
" x = nn.Dropout(0.2, deterministic=not training)(x)\n",
|
| 489 |
+
" \n",
|
| 490 |
+
" x = nn.Dense(self.n_classes)(x)\n",
|
| 491 |
+
" x = nn.softmax(x, axis=-1)\n",
|
| 492 |
+
" return x\n",
|
| 493 |
+
"\n",
|
| 494 |
+
"model = MLP(n_classes=n_classes)\n",
|
| 495 |
+
"print(model)"
|
| 496 |
+
]
|
| 497 |
+
},
|
| 498 |
+
{
|
| 499 |
+
"cell_type": "code",
|
| 500 |
+
"execution_count": null,
|
| 501 |
+
"metadata": {},
|
| 502 |
+
"outputs": [],
|
| 503 |
+
"source": [
|
| 504 |
+
"# Initialize parameters\n",
|
| 505 |
+
"dummy_input = jnp.ones((1, n_features))\n",
|
| 506 |
+
"variables = model.init(jax.random.key(42), dummy_input, training=False)\n",
|
| 507 |
+
"params = variables[\"params\"]\n",
|
| 508 |
+
"print(f\"Parameters initialized: {sum(p.size for p in jax.tree_util.tree_leaves(params))}\")"
|
| 509 |
+
]
|
| 510 |
+
},
|
| 511 |
+
{
|
| 512 |
+
"cell_type": "code",
|
| 513 |
+
"execution_count": null,
|
| 514 |
+
"metadata": {},
|
| 515 |
+
"outputs": [],
|
| 516 |
+
"source": [
|
| 517 |
+
"# Training step function\n",
|
| 518 |
+
"def cross_entropy_loss(logits, labels):\n",
|
| 519 |
+
" return -jnp.mean(jnp.sum(labels * jnp.log(logits + 1e-8), axis=-1))\n",
|
| 520 |
+
"\n",
|
| 521 |
+
"@jit\n",
|
| 522 |
+
"def train_step(params, batch):\n",
|
| 523 |
+
" x, y = batch\n",
|
| 524 |
+
" \n",
|
| 525 |
+
" def loss_fn(p):\n",
|
| 526 |
+
" logits = model.apply(p, x, training=True)\n",
|
| 527 |
+
" return cross_entropy_loss(logits, y)\n",
|
| 528 |
+
" \n",
|
| 529 |
+
" loss, grads = jax.value_and_grad(loss_fn)(params)\n",
|
| 530 |
+
" updates, opt_state = optimizer.update(grads, opt_state)\n",
|
| 531 |
+
" new_params = optax.apply_updates(params, updates)\n",
|
| 532 |
+
" return loss, new_params\n",
|
| 533 |
+
"\n",
|
| 534 |
+
"@jit \n",
|
| 535 |
+
"def accuracy(params, x, y):\n",
|
| 536 |
+
" logits = model.apply(params, x, training=False)\n",
|
| 537 |
+
" preds = jnp.argmax(logits, axis=-1)\n",
|
| 538 |
+
" true = jnp.argmax(y, axis=-1)\n",
|
| 539 |
+
" return jnp.mean(preds == true)"
|
| 540 |
+
]
|
| 541 |
+
},
|
| 542 |
+
{
|
| 543 |
+
"cell_type": "code",
|
| 544 |
+
"execution_count": null,
|
| 545 |
+
"metadata": {},
|
| 546 |
+
"outputs": [],
|
| 547 |
+
"source": [
|
| 548 |
+
"# Training loop\n",
|
| 549 |
+
"optimizer = optax.adam(0.001)\n",
|
| 550 |
+
"opt_state = optimizer.init(params)\n",
|
| 551 |
+
"\n",
|
| 552 |
+
"batch_size = 64\n",
|
| 553 |
+
"epochs = 30\n",
|
| 554 |
+
"\n",
|
| 555 |
+
"for epoch in range(epochs):\n",
|
| 556 |
+
" # Shuffle\n",
|
| 557 |
+
" perm = jax.random.permutation(jax.random.key(epoch), len(X_train_j))\n",
|
| 558 |
+
" X_shuffled = X_train_j[perm]\n",
|
| 559 |
+
" Y_shuffled = Y_train_j[perm]\n",
|
| 560 |
+
" \n",
|
| 561 |
+
" epoch_loss = 0\n",
|
| 562 |
+
" for i in range(0, len(X_train_j), batch_size):\n",
|
| 563 |
+
" batch_x = X_shuffled[i:i+batch_size]\n",
|
| 564 |
+
" batch_y = Y_shuffled[i:i+batch_size]\n",
|
| 565 |
+
" loss, params = train_step(params, (batch_x, batch_y))\n",
|
| 566 |
+
" epoch_loss += loss\n",
|
| 567 |
+
" \n",
|
| 568 |
+
" if (epoch + 1) % 10 == 0:\n",
|
| 569 |
+
" acc = accuracy(params, X_test_j, Y_test_j)\n",
|
| 570 |
+
" print(f\"Epoch {epoch+1:3d} loss={epoch_loss/len(X_train_j):.4f} acc={acc:.2%}\")"
|
| 571 |
+
]
|
| 572 |
+
},
|
| 573 |
+
{
|
| 574 |
+
"cell_type": "code",
|
| 575 |
+
"execution_count": null,
|
| 576 |
+
"metadata": {},
|
| 577 |
+
"outputs": [],
|
| 578 |
+
"source": [
|
| 579 |
+
"# Final evaluation\n",
|
| 580 |
+
"final_acc = accuracy(params, X_test_j, Y_test_j)\n",
|
| 581 |
+
"print(f\"Final test accuracy: {final_acc:.2%}\")"
|
| 582 |
+
]
|
| 583 |
+
},
|
| 584 |
+
{
|
| 585 |
+
"cell_type": "code",
|
| 586 |
+
"execution_count": null,
|
| 587 |
+
"metadata": {},
|
| 588 |
+
"outputs": [],
|
| 589 |
+
"source": [
|
| 590 |
+
"# Visualize\n",
|
| 591 |
+
"logits = model.apply(params, X_test_j, training=False)\n",
|
| 592 |
+
"preds = jnp.argmax(logits, axis=-1)\n",
|
| 593 |
+
"true = jnp.argmax(Y_test_j, axis=-1)\n",
|
| 594 |
+
"\n",
|
| 595 |
+
"fig, axes = plt.subplots(1, 2, figsize=(10, 4))\n",
|
| 596 |
+
"for cls in range(n_classes):\n",
|
| 597 |
+
" mask = true == cls\n",
|
| 598 |
+
" axes[0].scatter(X_test[mask, 0], X_test[mask, 1], label=f\"Class {cls}\", alpha=0.6)\n",
|
| 599 |
+
" mask2 = preds == cls\n",
|
| 600 |
+
" axes[1].scatter(X_test[mask2, 0], X_test[mask2, 1], label=f\"Class {cls}\", alpha=0.6)\n",
|
| 601 |
+
"axes[0].set_title(\"Actual\")\n",
|
| 602 |
+
"axes[1].set_title(\"Predicted\")\n",
|
| 603 |
+
"for ax in axes:\n",
|
| 604 |
+
" ax.legend()\n",
|
| 605 |
+
"plt.tight_layout()\n",
|
| 606 |
+
"plt.show()"
|
| 607 |
+
]
|
| 608 |
+
},
|
| 609 |
+
{
|
| 610 |
+
"cell_type": "markdown",
|
| 611 |
+
"metadata": {},
|
| 612 |
+
"source": [
|
| 613 |
+
"## 4 — Summary\n",
|
| 614 |
+
"\n",
|
| 615 |
+
"| Pipeline Stage | Elixir (Livebook) | JAX (This Notebook) |\n",
|
| 616 |
+
"|---------------|-------------------|----------------------|\n",
|
| 617 |
+
"| Tensors | `Nx.tensor` | `jnp.array` |\n",
|
| 618 |
+
"| Gradients | `Nx.Defn.grad` | `jax.grad` |\n",
|
| 619 |
+
"| JIT | `EXLA.Backend` | `jax.jit` |\n",
|
| 620 |
+
"| Pre-trained | `Bumblebee` | `transformers` + `flax` |\n",
|
| 621 |
+
"| Neural Net | `Axon` | `flax.linen` |\n",
|
| 622 |
+
"| Optimizer | `Axon.Optimizers` | `optax` |\n",
|
| 623 |
+
"| Training Loop | `Axon.Loop` | Custom (JAX) |\n",
|
| 624 |
+
"\n",
|
| 625 |
+
"### Deploy\n",
|
| 626 |
+
"\n",
|
| 627 |
+
"* **Colab**: Open this `.ipynb` directly\n",
|
| 628 |
+
"* **Kaggle**: Upload as new notebook\n",
|
| 629 |
+
"* **HF Spaces**: Use with `sdk: docker` (JAX requires GPU)"
|
| 630 |
+
]
|
| 631 |
+
}
|
| 632 |
+
]
|
| 633 |
+
}
|
justfile
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# justfile — ML in Elixir project tasks
|
| 2 |
+
# Usage: just <recipe>
|
| 3 |
+
# Install just: mise use just (or brew install just / cargo install just)
|
| 4 |
+
|
| 5 |
+
set dotenv-load
|
| 6 |
+
|
| 7 |
+
# Default: show available recipes
|
| 8 |
+
default:
|
| 9 |
+
@just --list
|
| 10 |
+
|
| 11 |
+
# ─── Elixir / Livebook ───────────────────────────────────────────
|
| 12 |
+
|
| 13 |
+
# Install all Elixir deps
|
| 14 |
+
setup:
|
| 15 |
+
mise exec -- mix setup
|
| 16 |
+
|
| 17 |
+
# Update all deps
|
| 18 |
+
update:
|
| 19 |
+
mise exec -- mix deps.update --all
|
| 20 |
+
|
| 21 |
+
# Compile the project
|
| 22 |
+
compile:
|
| 23 |
+
mise exec -- mix compile
|
| 24 |
+
|
| 25 |
+
# Run tests
|
| 26 |
+
test:
|
| 27 |
+
mise exec -- mix test
|
| 28 |
+
|
| 29 |
+
# Open Livebook editor (serves on :8080)
|
| 30 |
+
livebook:
|
| 31 |
+
mise exec -- mix run --eval "Livebook.Config.set(:default_runtime, Livebook.Config.Runtime.default_standalone())" 2>/dev/null; \
|
| 32 |
+
mise exec -- livebook server --port 8080
|
| 33 |
+
|
| 34 |
+
# Open the main template in Livebook
|
| 35 |
+
notebook:
|
| 36 |
+
mise exec -- livebook server ml_e2e_template.livemd
|
| 37 |
+
|
| 38 |
+
# ─── Deploy: Hugging Face Spaces ─────────────────────────────────
|
| 39 |
+
|
| 40 |
+
# Deploy Livebook to HF Spaces
|
| 41 |
+
deploy-livebook space:
|
| 42 |
+
cp ml_e2e_template.livemd hf_deploy/
|
| 43 |
+
cd hf_deploy && bash deploy.sh {{space}}
|
| 44 |
+
|
| 45 |
+
# Deploy Gradio to HF Spaces
|
| 46 |
+
deploy-gradio space:
|
| 47 |
+
cd gradio_hf_deploy && \
|
| 48 |
+
git clone https://huggingface.co/spaces/{{space}} /tmp/gradio-deploy 2>/dev/null || true && \
|
| 49 |
+
cp app.py requirements.txt README.md /tmp/gradio-deploy/ && \
|
| 50 |
+
cd /tmp/gradio-deploy && \
|
| 51 |
+
git add -A && git commit -m "Update" && git push
|
| 52 |
+
|
| 53 |
+
# Deploy marimo to HF Spaces
|
| 54 |
+
deploy-marimo space:
|
| 55 |
+
cd marimo && \
|
| 56 |
+
git clone https://huggingface.co/spaces/{{space}} /tmp/marimo-deploy 2>/dev/null || true && \
|
| 57 |
+
cp ml_e2e_marimo.py Dockerfile requirements.txt README.md /tmp/marimo-deploy/ && \
|
| 58 |
+
cd /tmp/marimo-deploy && \
|
| 59 |
+
git add -A && git commit -m "Update" && git push
|
| 60 |
+
|
| 61 |
+
# ─── Python / marimo ─────────────────────────────────────────────
|
| 62 |
+
|
| 63 |
+
# Run marimo editor
|
| 64 |
+
marimo:
|
| 65 |
+
pip install -q marimo transformers torch numpy scikit-learn matplotlib 2>/dev/null; \
|
| 66 |
+
marimo edit marimo/ml_e2e_marimo.py
|
| 67 |
+
|
| 68 |
+
# Run marimo as app (no code)
|
| 69 |
+
marimo-app:
|
| 70 |
+
pip install -q marimo transformers torch numpy scikit-learn matplotlib 2>/dev/null; \
|
| 71 |
+
marimo run marimo/ml_e2e_marimo.py
|
| 72 |
+
|
| 73 |
+
# Run Gradio app locally
|
| 74 |
+
gradio:
|
| 75 |
+
pip install -q -r gradio_hf_deploy/requirements.txt 2>/dev/null; \
|
| 76 |
+
python gradio_hf_deploy/app.py
|
| 77 |
+
|
| 78 |
+
# ─── Validation ──────────────────────────────────────────────────
|
| 79 |
+
|
| 80 |
+
# Check all files exist
|
| 81 |
+
check:
|
| 82 |
+
@echo "=== Files ==="
|
| 83 |
+
@test -f ml_e2e_template.livemd && echo "✅ Livebook template" || echo "❌ Livebook template"
|
| 84 |
+
@test -f hf_deploy/Dockerfile && echo "✅ HF Deploy (Docker)" || echo "❌ HF Deploy (Docker)"
|
| 85 |
+
@test -f colab_kaggle/ml_e2e_python.ipynb && echo "✅ Colab/Kaggle (.ipynb)" || echo "❌ Colab/Kaggle"
|
| 86 |
+
@test -f gradio_hf_deploy/app.py && echo "✅ Gradio app" || echo "❌ Gradio app"
|
| 87 |
+
@test -f marimo/ml_e2e_marimo.py && echo "✅ Marimo notebook" || echo "❌ Marimo notebook"
|
| 88 |
+
@echo "=== Tools ==="
|
| 89 |
+
@which elixir >/dev/null 2>&1 && echo "✅ elixir" || echo "⚠️ elixir not found (run: mise install)"
|
| 90 |
+
@which python3 >/dev/null 2>&1 && echo "✅ python3" || echo "⚠️ python3 not found"
|
| 91 |
+
@which just >/dev/null 2>&1 && echo "✅ just" || echo "⚠️ just not found (run: mise use just)"
|
| 92 |
+
@which git >/dev/null 2>&1 && echo "✅ git" || echo "⚠️ git not found"
|
| 93 |
+
|
| 94 |
+
# Verify mix deps resolve
|
| 95 |
+
deps-check:
|
| 96 |
+
mise exec -- mix deps.get --check-unused 2>&1 || true
|
| 97 |
+
mise exec -- mix deps
|
| 98 |
+
|
| 99 |
+
# ─── Clean ───────────────────────────────────────────────────────
|
| 100 |
+
|
| 101 |
+
# Clean build artifacts
|
| 102 |
+
clean:
|
| 103 |
+
mise exec -- mix clean
|
| 104 |
+
rm -rf _build deps
|
| 105 |
+
find . -name "__pycache__" -type d -exec rm -rf {} + 2>/dev/null || true
|
| 106 |
+
find . -name "*.pyc" -delete 2>/dev/null || true
|
lib/ml_learning.ex
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
defmodule MLLearning do
|
| 2 |
+
@moduledoc """
|
| 3 |
+
Documentation for `MLLearning`.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
@doc """
|
| 7 |
+
Hello world.
|
| 8 |
+
|
| 9 |
+
## Examples
|
| 10 |
+
|
| 11 |
+
iex> MLLearning.hello()
|
| 12 |
+
:world
|
| 13 |
+
|
| 14 |
+
"""
|
| 15 |
+
def hello do
|
| 16 |
+
:world
|
| 17 |
+
end
|
| 18 |
+
end
|
marimo/Dockerfile
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.12-slim
|
| 2 |
+
|
| 3 |
+
RUN pip install --no-cache-dir \
|
| 4 |
+
marimo \
|
| 5 |
+
transformers \
|
| 6 |
+
torch \
|
| 7 |
+
numpy \
|
| 8 |
+
scikit-learn \
|
| 9 |
+
matplotlib \
|
| 10 |
+
sentencepiece \
|
| 11 |
+
protobuf
|
| 12 |
+
|
| 13 |
+
WORKDIR /app
|
| 14 |
+
COPY ml_e2e_marimo.py /app/
|
| 15 |
+
|
| 16 |
+
EXPOSE 8080
|
| 17 |
+
|
| 18 |
+
CMD ["marimo", "run", "ml_e2e_marimo.py", "--host", "0.0.0.0", "--port", "8080"]
|
marimo/README.md
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: ML in Elixir — marimo Edition
|
| 3 |
+
emoji: 🐝
|
| 4 |
+
colorFrom: purple
|
| 5 |
+
colorTo: yellow
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 8080
|
| 8 |
+
pinned: false
|
| 9 |
+
license: apache-2.0
|
| 10 |
+
tags:
|
| 11 |
+
- marimo
|
| 12 |
+
- elixir
|
| 13 |
+
- machine-learning
|
| 14 |
+
- bumblebee
|
| 15 |
+
- hugging-face
|
| 16 |
+
- reactive-notebook
|
| 17 |
+
---
|
marimo/ml_e2e_marimo.py
ADDED
|
@@ -0,0 +1,904 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ML in Elixir — End-to-Edge Template (marimo edition)
|
| 3 |
+
|
| 4 |
+
A reactive Python notebook mirroring the Elixir Livebook template.
|
| 5 |
+
Built on the marimo reactive notebook framework.
|
| 6 |
+
|
| 7 |
+
## Skills
|
| 8 |
+
- `hf_cli.md` – Hugging Face CLI usage
|
| 9 |
+
- `hf_jobs.md` – Running workloads on HF Jobs
|
| 10 |
+
- `training_trl.md` – TRL model training
|
| 11 |
+
- `hf_dataset_viewer.md` – Dataset Viewer API
|
| 12 |
+
- `gradio.md` – Gradio UI integration
|
| 13 |
+
- *(Full catalog at https://skills.sh/huggingface/skills)*
|
| 14 |
+
|
| 15 |
+
Run:
|
| 16 |
+
marimo edit ml_e2e_marimo.py # Interactive editor
|
| 17 |
+
marimo run ml_e2e_marimo.py # App mode (no code visible)
|
| 18 |
+
python ml_e2e_marimo.py # Script mode (terminal output)
|
| 19 |
+
|
| 20 |
+
Deploy:
|
| 21 |
+
- Hugging Face Spaces (sdk: marimo)
|
| 22 |
+
- Anywhere Python runs (pip install marimo)
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
import marimo
|
| 26 |
+
|
| 27 |
+
__generated_with = "0.19.10"
|
| 28 |
+
app = marimo.App(width="medium")
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
# ═══════════════════════════════════════════════════════════════════
|
| 32 |
+
# 0 — Imports & Setup
|
| 33 |
+
# ═══════════════════════════════════════════════════════════════════
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
@app.cell
|
| 37 |
+
def _():
|
| 38 |
+
import marimo as mo
|
| 39 |
+
|
| 40 |
+
return (mo,)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
@app.cell(hide_code=True)
|
| 44 |
+
def _(mo):
|
| 45 |
+
mo.md(
|
| 46 |
+
r"""
|
| 47 |
+
# 🐝 Machine Learning in Elixir — marimo Edition
|
| 48 |
+
|
| 49 |
+
This reactive notebook mirrors the **Elixir Livebook template**
|
| 50 |
+
from *Machine Learning in Elixir* by Sean Moriarity.
|
| 51 |
+
|
| 52 |
+
| Elixir Livebook | This Notebook | Purpose |
|
| 53 |
+
|-----------------|---------------|----------|
|
| 54 |
+
| `Nx` | `numpy` | Numerical computing |
|
| 55 |
+
| `Axon` | `torch` | Neural networks |
|
| 56 |
+
| `Bumblebee` | `transformers` | Pre-trained models |
|
| 57 |
+
| `Nx.Serving` | `pipeline()` | Batched inference |
|
| 58 |
+
| `Kino` | `marimo.ui` | Reactive interactive UI |
|
| 59 |
+
| `EXLA` | CUDA | GPU acceleration |
|
| 60 |
+
"""
|
| 61 |
+
)
|
| 62 |
+
return
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
@app.cell
|
| 66 |
+
def _():
|
| 67 |
+
import numpy as np
|
| 68 |
+
import torch
|
| 69 |
+
import torch.nn as nn
|
| 70 |
+
import torch.nn.functional as F
|
| 71 |
+
from torch.utils.data import DataLoader, TensorDataset
|
| 72 |
+
from transformers import (
|
| 73 |
+
pipeline,
|
| 74 |
+
AutoTokenizer,
|
| 75 |
+
AutoModel,
|
| 76 |
+
)
|
| 77 |
+
from sklearn.model_selection import train_test_split
|
| 78 |
+
from sklearn.metrics import classification_report
|
| 79 |
+
import matplotlib.pyplot as plt
|
| 80 |
+
|
| 81 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 82 |
+
mo.md(f"**Device:** `{device}` | **PyTorch:** `{torch.__version__}`")
|
| 83 |
+
return (
|
| 84 |
+
AutoModel,
|
| 85 |
+
AutoTokenizer,
|
| 86 |
+
DataLoader,
|
| 87 |
+
F,
|
| 88 |
+
TensorDataset,
|
| 89 |
+
classification_report,
|
| 90 |
+
device,
|
| 91 |
+
mo,
|
| 92 |
+
nn,
|
| 93 |
+
np,
|
| 94 |
+
pipeline,
|
| 95 |
+
plt,
|
| 96 |
+
torch,
|
| 97 |
+
train_test_split,
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
# ═══════════════════════════════════════════════════════════════════
|
| 102 |
+
# 1 — NumPy Foundations (Nx equivalent)
|
| 103 |
+
# ═══════════════════════════════════════════════════════════════════
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
@app.cell(hide_code=True)
|
| 107 |
+
def _(mo):
|
| 108 |
+
mo.md(
|
| 109 |
+
r"""
|
| 110 |
+
## 1 — NumPy Foundations
|
| 111 |
+
|
| 112 |
+
Elixir's `Nx` provides tensors, broadcasting, and automatic differentiation.
|
| 113 |
+
Python's `numpy` + `torch.autograd` are the direct counterparts.
|
| 114 |
+
"""
|
| 115 |
+
)
|
| 116 |
+
return
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
@app.cell
|
| 120 |
+
def _(np):
|
| 121 |
+
# Tensors: Nx.tensor(...) → np.array(...)
|
| 122 |
+
scalar = np.float64(3.14)
|
| 123 |
+
vector = np.array([1.0, 2.0, 3.0])
|
| 124 |
+
matrix = np.array([[1, 2, 3], [4, 5, 6]])
|
| 125 |
+
|
| 126 |
+
tensor_info = (
|
| 127 |
+
f"| Type | Shape | Dtype |\n|------|-------|-------|\n"
|
| 128 |
+
f"| scalar | `{np.shape(scalar)}` | `{scalar.dtype}` |\n"
|
| 129 |
+
f"| vector | `{np.shape(vector)}` | `{vector.dtype}` |\n"
|
| 130 |
+
f"| matrix | `{np.shape(matrix)}` | `{matrix.dtype}` |"
|
| 131 |
+
)
|
| 132 |
+
tensor_info
|
| 133 |
+
return matrix, vector
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
@app.cell
|
| 137 |
+
def _(np, vector):
|
| 138 |
+
# Operations: Nx.add/multiply/dot → +/*/np.dot
|
| 139 |
+
a = np.array([1.0, 2.0, 3.0])
|
| 140 |
+
b = np.array([10.0, 20.0, 30.0])
|
| 141 |
+
|
| 142 |
+
ops_results = {
|
| 143 |
+
"add": a + b,
|
| 144 |
+
"multiply": a * b,
|
| 145 |
+
"dot": np.dot(a, b),
|
| 146 |
+
"sum": np.sum(a),
|
| 147 |
+
"mean": np.mean(a),
|
| 148 |
+
"std": np.std(a),
|
| 149 |
+
}
|
| 150 |
+
ops_results
|
| 151 |
+
return
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
@app.cell
|
| 155 |
+
def _(mo, torch):
|
| 156 |
+
# Automatic differentiation: Nx.Defn.grad → torch.autograd
|
| 157 |
+
x = torch.tensor(3.0, requires_grad=True)
|
| 158 |
+
f = x**3 + 2 * x**2 # f(x) = x³ + 2x²
|
| 159 |
+
f.backward()
|
| 160 |
+
|
| 161 |
+
mo.md(
|
| 162 |
+
f"""
|
| 163 |
+
**Automatic differentiation** (gradients for training):
|
| 164 |
+
|
| 165 |
+
| | Value |
|
| 166 |
+
|---|---|
|
| 167 |
+
| f(3) | `{f.item()}` |
|
| 168 |
+
| f'(3) | `{x.grad.item()}` |
|
| 169 |
+
| Expected (3·9 + 2·2·3) | `{3 * 9 + 2 * 2 * 3}` |
|
| 170 |
+
"""
|
| 171 |
+
)
|
| 172 |
+
return
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
@app.cell
|
| 176 |
+
def _(mo, np):
|
| 177 |
+
# Broadcasting: Nx broadcasts automatically, so does numpy
|
| 178 |
+
mat = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
|
| 179 |
+
row = np.array([10.0, 20.0, 30.0])
|
| 180 |
+
broadcast_result = mat + row
|
| 181 |
+
|
| 182 |
+
mo.md(f"**Broadcasting** — add row vector to matrix:\n```\n{broadcast_result}\n```")
|
| 183 |
+
return
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
# ═══════════════════════════════════════════════════════════════════
|
| 187 |
+
# 2 — Pre-trained NLP (Bumblebee equivalent)
|
| 188 |
+
# ═══════════════════════════════════════════════════════════════════
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
@app.cell(hide_code=True)
|
| 192 |
+
def _(mo):
|
| 193 |
+
mo.md(
|
| 194 |
+
r"""
|
| 195 |
+
## 2 — Pre-trained NLP (Bumblebee → transformers)
|
| 196 |
+
|
| 197 |
+
In Elixir: `Bumblebee.load_model({:hf, "..."})` + `Nx.Serving.run()`
|
| 198 |
+
In Python: `transformers.pipeline()` is the one-liner equivalent.
|
| 199 |
+
"""
|
| 200 |
+
)
|
| 201 |
+
return
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
# --- 2.1 Fill-Mask ---
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
@app.cell
|
| 208 |
+
def _(mo, pipeline):
|
| 209 |
+
mo.md("### 2.1 Fill-Mask (BERT)")
|
| 210 |
+
fill_mask = pipeline("fill-mask", model="google-bert/bert-base-uncased")
|
| 211 |
+
return (fill_mask,)
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
@app.cell
|
| 215 |
+
def _(fill_mask, mo):
|
| 216 |
+
mask_text = mo.ui.text(
|
| 217 |
+
label="Text with [MASK]", value="Elixir is a [MASK] language."
|
| 218 |
+
)
|
| 219 |
+
mask_text
|
| 220 |
+
return (mask_text,)
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
@app.cell
|
| 224 |
+
def _(fill_mask, mask_text, mo):
|
| 225 |
+
mask_results = []
|
| 226 |
+
if "[MASK]" in mask_text.value:
|
| 227 |
+
raw = fill_mask(mask_text.value)
|
| 228 |
+
mask_results = [
|
| 229 |
+
{
|
| 230 |
+
"score": round(r["score"], 4),
|
| 231 |
+
"token": r["token_str"],
|
| 232 |
+
"sequence": r["sequence"],
|
| 233 |
+
}
|
| 234 |
+
for r in raw
|
| 235 |
+
]
|
| 236 |
+
mo.ui.table(mask_results) if mask_results else mo.md(
|
| 237 |
+
"Enter text with **[MASK]** above."
|
| 238 |
+
)
|
| 239 |
+
return
|
| 240 |
+
|
| 241 |
+
|
| 242 |
+
# --- 2.2 Sentiment ---
|
| 243 |
+
|
| 244 |
+
|
| 245 |
+
@app.cell
|
| 246 |
+
def _(mo, pipeline):
|
| 247 |
+
mo.md("### 2.2 Sentiment Analysis (DistilBERT)")
|
| 248 |
+
sentiment = pipeline(
|
| 249 |
+
"sentiment-analysis",
|
| 250 |
+
model="distilbert/distilbert-base-uncased-finetuned-sst-2-english",
|
| 251 |
+
)
|
| 252 |
+
return (sentiment,)
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
@app.cell
|
| 256 |
+
def _(mo, sentiment):
|
| 257 |
+
sentiment_input = mo.ui.text(
|
| 258 |
+
label="Text",
|
| 259 |
+
value="Machine learning in Elixir is amazing!",
|
| 260 |
+
)
|
| 261 |
+
sentiment_input
|
| 262 |
+
return (sentiment_input,)
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
@app.cell
|
| 266 |
+
def _(mo, sentiment, sentiment_input):
|
| 267 |
+
if sentiment_input.value.strip():
|
| 268 |
+
result = sentiment(sentiment_input.value)[0]
|
| 269 |
+
emoji = "😊" if result["label"] == "POSITIVE" else "😞"
|
| 270 |
+
mo.md(f"### {emoji} **{result['label']}** ({result['score']:.4f})")
|
| 271 |
+
else:
|
| 272 |
+
mo.md("Enter text above.")
|
| 273 |
+
return
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
# --- 2.3 NER ---
|
| 277 |
+
|
| 278 |
+
|
| 279 |
+
@app.cell
|
| 280 |
+
def _(mo, pipeline):
|
| 281 |
+
mo.md("### 2.3 Named Entity Recognition (BERT-NER)")
|
| 282 |
+
ner = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple")
|
| 283 |
+
return (ner,)
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
@app.cell
|
| 287 |
+
def _(mo, ner):
|
| 288 |
+
ner_input = mo.ui.text_area(
|
| 289 |
+
label="Text",
|
| 290 |
+
value="Sean Moriarity wrote Machine Learning in Elixir for Pragmatic Bookshelf. He lives in Austin, Texas.",
|
| 291 |
+
)
|
| 292 |
+
ner_input
|
| 293 |
+
return (ner_input,)
|
| 294 |
+
|
| 295 |
+
|
| 296 |
+
@app.cell
|
| 297 |
+
def _(mo, ner, ner_input):
|
| 298 |
+
if ner_input.value.strip():
|
| 299 |
+
entities = ner(ner_input.value)
|
| 300 |
+
rows = [
|
| 301 |
+
{
|
| 302 |
+
"Entity": e["word"],
|
| 303 |
+
"Type": e["entity_group"],
|
| 304 |
+
"Score": round(e["score"], 4),
|
| 305 |
+
}
|
| 306 |
+
for e in entities
|
| 307 |
+
]
|
| 308 |
+
mo.ui.table(rows) if rows else mo.md("No entities found.")
|
| 309 |
+
else:
|
| 310 |
+
mo.md("Enter text above.")
|
| 311 |
+
return
|
| 312 |
+
|
| 313 |
+
|
| 314 |
+
# --- 2.4 Zero-Shot ---
|
| 315 |
+
|
| 316 |
+
|
| 317 |
+
@app.cell
|
| 318 |
+
def _(mo, pipeline):
|
| 319 |
+
mo.md("### 2.4 Zero-Shot Classification")
|
| 320 |
+
zs = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
|
| 321 |
+
return (zs,)
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
@app.cell
|
| 325 |
+
def _(mo, zs):
|
| 326 |
+
zs_text = mo.ui.text_area(
|
| 327 |
+
label="Text",
|
| 328 |
+
value="Nx brings numerical computing to the BEAM, enabling machine learning pipelines.",
|
| 329 |
+
)
|
| 330 |
+
zs_labels = mo.ui.text(
|
| 331 |
+
label="Labels (comma-separated)",
|
| 332 |
+
value="technology, sports, politics, science, finance",
|
| 333 |
+
)
|
| 334 |
+
mo.vstack([zs_text, zs_labels])
|
| 335 |
+
return zs_labels, zs_text
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
@app.cell
|
| 339 |
+
def _(mo, zs, zs_labels, zs_text):
|
| 340 |
+
if zs_text.value.strip() and zs_labels.value.strip():
|
| 341 |
+
labels = [l.strip() for l in zs_labels.value.split(",") if l.strip()]
|
| 342 |
+
result = zs(zs_text.value, labels)
|
| 343 |
+
rows = [
|
| 344 |
+
{"Label": l, "Score": round(s, 4)}
|
| 345 |
+
for l, s in zip(result["labels"], result["scores"])
|
| 346 |
+
]
|
| 347 |
+
mo.ui.table(rows)
|
| 348 |
+
else:
|
| 349 |
+
mo.md("Enter text and labels above.")
|
| 350 |
+
return
|
| 351 |
+
|
| 352 |
+
|
| 353 |
+
# --- 2.5 Embeddings ---
|
| 354 |
+
|
| 355 |
+
|
| 356 |
+
@app.cell
|
| 357 |
+
def _(AutoModel, AutoTokenizer, mo):
|
| 358 |
+
mo.md("### 2.5 Sentence Embeddings & Similarity")
|
| 359 |
+
emb_tok = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
|
| 360 |
+
emb_model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
|
| 361 |
+
return emb_model, emb_tok
|
| 362 |
+
|
| 363 |
+
|
| 364 |
+
@app.cell
|
| 365 |
+
def _(F, emb_model, emb_tok, mo, torch):
|
| 366 |
+
def embed_texts(texts):
|
| 367 |
+
inputs = emb_tok(texts, padding=True, truncation=True, return_tensors="pt")
|
| 368 |
+
with torch.no_grad():
|
| 369 |
+
out = emb_model(**inputs)
|
| 370 |
+
mask = inputs["attention_mask"].unsqueeze(-1)
|
| 371 |
+
embs = (out.last_hidden_state * mask).sum(1) / mask.sum(1)
|
| 372 |
+
return F.normalize(embs, p=2, dim=1)
|
| 373 |
+
|
| 374 |
+
emb_a = mo.ui.text(
|
| 375 |
+
label="Text A", value="Nx provides numerical computing for Elixir"
|
| 376 |
+
)
|
| 377 |
+
emb_b = mo.ui.text(
|
| 378 |
+
label="Text B", value="Axon is a neural network library built on Nx"
|
| 379 |
+
)
|
| 380 |
+
mo.vstack([emb_a, emb_b])
|
| 381 |
+
return emb_a, emb_b, embed_texts
|
| 382 |
+
|
| 383 |
+
|
| 384 |
+
@app.cell
|
| 385 |
+
def _(emb_a, emb_b, embed_texts, mo):
|
| 386 |
+
if emb_a.value.strip() and emb_b.value.strip():
|
| 387 |
+
embs = embed_texts([emb_a.value, emb_b.value])
|
| 388 |
+
import torch.nn.functional as _F
|
| 389 |
+
|
| 390 |
+
score = _F.cosine_similarity(embs[0:1], embs[1:2]).item()
|
| 391 |
+
mo.md(f"**Cosine Similarity:** `{score:.6f}`")
|
| 392 |
+
else:
|
| 393 |
+
mo.md("Enter both texts above.")
|
| 394 |
+
return
|
| 395 |
+
|
| 396 |
+
|
| 397 |
+
# --- 2.6 Text Generation ---
|
| 398 |
+
|
| 399 |
+
|
| 400 |
+
@app.cell
|
| 401 |
+
def _(mo, pipeline):
|
| 402 |
+
mo.md("### 2.6 Text Generation (GPT-2)")
|
| 403 |
+
text_gen = pipeline("text-generation", model="openai-community/gpt2")
|
| 404 |
+
return (text_gen,)
|
| 405 |
+
|
| 406 |
+
|
| 407 |
+
@app.cell
|
| 408 |
+
def _(mo):
|
| 409 |
+
gen_prompt = mo.ui.text(label="Prompt", value="Machine learning in Elixir is")
|
| 410 |
+
gen_tokens = mo.ui.slider(start=10, stop=100, value=50, label="Max tokens")
|
| 411 |
+
gen_temp = mo.ui.slider(
|
| 412 |
+
start=0.1, stop=2.0, value=0.7, step=0.1, label="Temperature"
|
| 413 |
+
)
|
| 414 |
+
mo.vstack([gen_prompt, gen_tokens, gen_temp])
|
| 415 |
+
return gen_prompt, gen_temp, gen_tokens
|
| 416 |
+
|
| 417 |
+
|
| 418 |
+
@app.cell
|
| 419 |
+
def _(gen_prompt, gen_temp, gen_tokens, mo, text_gen):
|
| 420 |
+
if gen_prompt.value.strip():
|
| 421 |
+
output = text_gen(
|
| 422 |
+
gen_prompt.value,
|
| 423 |
+
max_new_tokens=gen_tokens.value,
|
| 424 |
+
temperature=gen_temp.value,
|
| 425 |
+
do_sample=True,
|
| 426 |
+
)
|
| 427 |
+
generated = output[0]["generated_text"]
|
| 428 |
+
mo.md(f"```\n{generated}\n```")
|
| 429 |
+
else:
|
| 430 |
+
mo.md("Enter a prompt above.")
|
| 431 |
+
return
|
| 432 |
+
|
| 433 |
+
|
| 434 |
+
# ═══════════════════════════════════════════════════════════════════
|
| 435 |
+
# 2.7 — Image Classification (ViT)
|
| 436 |
+
# ═══════════════════════════════════════════════════════════════════
|
| 437 |
+
|
| 438 |
+
|
| 439 |
+
@app.cell(hide_code=True)
|
| 440 |
+
def _(mo):
|
| 441 |
+
mo.md(
|
| 442 |
+
r"""
|
| 443 |
+
### 2.7 Image Classification (Vision Transformer)
|
| 444 |
+
|
| 445 |
+
Elixir: `Bumblebee.Vision.ImageClassification.image_classification(model, featurizer)`
|
| 446 |
+
Python: `pipeline("image-classification")`
|
| 447 |
+
"""
|
| 448 |
+
)
|
| 449 |
+
return
|
| 450 |
+
|
| 451 |
+
|
| 452 |
+
@app.cell
|
| 453 |
+
def _(mo, pipeline):
|
| 454 |
+
img_cls = pipeline("image-classification", model="google/vit-base-patch16-224")
|
| 455 |
+
img_upload = mo.ui.file(
|
| 456 |
+
label="Upload image",
|
| 457 |
+
filetypes=[".jpg", ".jpeg", ".png", ".webp"],
|
| 458 |
+
multiple=False,
|
| 459 |
+
)
|
| 460 |
+
img_upload
|
| 461 |
+
return img_cls, img_upload
|
| 462 |
+
|
| 463 |
+
|
| 464 |
+
@app.cell
|
| 465 |
+
def _(img_cls, img_upload, mo):
|
| 466 |
+
if img_upload.value:
|
| 467 |
+
import io
|
| 468 |
+
from PIL import Image
|
| 469 |
+
|
| 470 |
+
file_info = img_upload.value[0]
|
| 471 |
+
img = Image.open(io.BytesIO(file_info.contents)).convert("RGB")
|
| 472 |
+
results = img_cls(img)
|
| 473 |
+
rows = [{"Label": r["label"], "Score": round(r["score"], 4)} for r in results]
|
| 474 |
+
mo.vstack([mo.md("**Results:**"), mo.ui.table(rows)])
|
| 475 |
+
else:
|
| 476 |
+
mo.md("Upload an image above to classify.")
|
| 477 |
+
return
|
| 478 |
+
|
| 479 |
+
|
| 480 |
+
# ═══════════════════════════════════════════════════════════════════
|
| 481 |
+
# 2.8 — Audio / Speech-to-Text (Whisper)
|
| 482 |
+
# ═══════════════════════════════════════════════════════════════════
|
| 483 |
+
|
| 484 |
+
|
| 485 |
+
@app.cell(hide_code=True)
|
| 486 |
+
def _(mo):
|
| 487 |
+
mo.md(
|
| 488 |
+
r"""
|
| 489 |
+
### 2.8 Speech-to-Text (Whisper)
|
| 490 |
+
|
| 491 |
+
Elixir: `Bumblebee.Audio.speech_to_text(model, featurizer, tokenizer, generation_config)`
|
| 492 |
+
Python: `pipeline("automatic-speech-recognition")`
|
| 493 |
+
"""
|
| 494 |
+
)
|
| 495 |
+
return
|
| 496 |
+
|
| 497 |
+
|
| 498 |
+
@app.cell
|
| 499 |
+
def _(mo, pipeline):
|
| 500 |
+
asr = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
|
| 501 |
+
audio_upload = mo.ui.file(
|
| 502 |
+
label="Upload audio (WAV/FLAC/MP3)",
|
| 503 |
+
filetypes=[".wav", ".flac", ".mp3", ".m4a"],
|
| 504 |
+
multiple=False,
|
| 505 |
+
)
|
| 506 |
+
audio_upload
|
| 507 |
+
return asr, audio_upload
|
| 508 |
+
|
| 509 |
+
|
| 510 |
+
@app.cell
|
| 511 |
+
def _(asr, audio_upload, mo):
|
| 512 |
+
if audio_upload.value:
|
| 513 |
+
import io
|
| 514 |
+
import tempfile
|
| 515 |
+
import os
|
| 516 |
+
|
| 517 |
+
file_info = audio_upload.value[0]
|
| 518 |
+
suffix = os.path.splitext(file_info.name)[1] or ".wav"
|
| 519 |
+
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
|
| 520 |
+
tmp.write(file_info.contents)
|
| 521 |
+
tmp_path = tmp.name
|
| 522 |
+
result = asr(tmp_path)
|
| 523 |
+
os.unlink(tmp_path)
|
| 524 |
+
mo.md(f"**Transcription:**\n\n> {result['text']}")
|
| 525 |
+
else:
|
| 526 |
+
mo.md("Upload an audio file above to transcribe.")
|
| 527 |
+
return
|
| 528 |
+
|
| 529 |
+
|
| 530 |
+
# ═══════════════════════════════════════════════════════════════════
|
| 531 |
+
# 2.9 — Image Generation (Stable Diffusion)
|
| 532 |
+
# ═══════════════════════════════════════════════════════════════════
|
| 533 |
+
|
| 534 |
+
|
| 535 |
+
@app.cell(hide_code=True)
|
| 536 |
+
def _(mo):
|
| 537 |
+
mo.md(
|
| 538 |
+
r"""
|
| 539 |
+
### 2.9 Image Generation (Stable Diffusion)
|
| 540 |
+
|
| 541 |
+
Elixir: `Bumblebee.Diffusion.StableDiffusion.text_to_image(...)`
|
| 542 |
+
Python: `diffusers` library
|
| 543 |
+
|
| 544 |
+
> Requires GPU with 4GB+ VRAM. Falls back to CPU (very slow).
|
| 545 |
+
"""
|
| 546 |
+
)
|
| 547 |
+
return
|
| 548 |
+
|
| 549 |
+
|
| 550 |
+
@app.cell
|
| 551 |
+
def _(mo):
|
| 552 |
+
sd_prompt = mo.ui.text(
|
| 553 |
+
label="Prompt", value="a photograph of a bee programming in elixir, 4k"
|
| 554 |
+
)
|
| 555 |
+
sd_negative = mo.ui.text(label="Negative prompt", value="blurry, ugly, low quality")
|
| 556 |
+
sd_steps = mo.ui.slider(start=5, stop=50, value=20, label="Steps")
|
| 557 |
+
sd_btn = mo.ui.button(label="Generate Image")
|
| 558 |
+
mo.vstack([sd_prompt, sd_negative, sd_steps, sd_btn])
|
| 559 |
+
return sd_btn, sd_negative, sd_prompt, sd_steps
|
| 560 |
+
|
| 561 |
+
|
| 562 |
+
@app.cell
|
| 563 |
+
def _(mo, sd_btn, sd_negative, sd_prompt, sd_steps):
|
| 564 |
+
if sd_btn.value and sd_prompt.value.strip():
|
| 565 |
+
try:
|
| 566 |
+
from diffusers import StableDiffusionPipeline
|
| 567 |
+
import torch as _torch
|
| 568 |
+
|
| 569 |
+
device_sd = "cuda" if _torch.cuda.is_available() else "cpu"
|
| 570 |
+
sd_pipe = StableDiffusionPipeline.from_pretrained(
|
| 571 |
+
"CompVis/stable-diffusion-v1-4",
|
| 572 |
+
torch_dtype=_torch.float16 if device_sd == "cuda" else _torch.float32,
|
| 573 |
+
).to(device_sd)
|
| 574 |
+
|
| 575 |
+
image = sd_pipe(
|
| 576 |
+
prompt=sd_prompt.value,
|
| 577 |
+
negative_prompt=sd_negative.value or None,
|
| 578 |
+
num_inference_steps=sd_steps.value,
|
| 579 |
+
).images[0]
|
| 580 |
+
|
| 581 |
+
mo.image(image, caption=sd_prompt.value)
|
| 582 |
+
except Exception as e:
|
| 583 |
+
mo.md(f"⚠️ Stable Diffusion requires `diffusers` + GPU: `{e}`")
|
| 584 |
+
else:
|
| 585 |
+
mo.md("Enter a prompt and click Generate.")
|
| 586 |
+
return
|
| 587 |
+
|
| 588 |
+
|
| 589 |
+
# ═══════════════════════════════════════════════════════════════════
|
| 590 |
+
# 3 — Custom Training (Axon equivalent)
|
| 591 |
+
# ═══════════════════════════════════════════════════════════════════
|
| 592 |
+
|
| 593 |
+
|
| 594 |
+
@app.cell(hide_code=True)
|
| 595 |
+
def _(mo):
|
| 596 |
+
mo.md(
|
| 597 |
+
r"""
|
| 598 |
+
## 3 — Custom Training (Axon → PyTorch)
|
| 599 |
+
|
| 600 |
+
Build and train a neural network from scratch — mirrors the Axon
|
| 601 |
+
section of the Livebook template.
|
| 602 |
+
"""
|
| 603 |
+
)
|
| 604 |
+
return
|
| 605 |
+
|
| 606 |
+
|
| 607 |
+
@app.cell
|
| 608 |
+
def _(np, train_test_split):
|
| 609 |
+
# Synthetic data (same as Livebook)
|
| 610 |
+
np.random.seed(42)
|
| 611 |
+
n_samples, n_features, n_classes = 2000, 4, 3
|
| 612 |
+
|
| 613 |
+
centers = np.random.randn(n_classes, n_features) * 2
|
| 614 |
+
labels_raw = np.random.randint(0, n_classes, n_samples)
|
| 615 |
+
noise = np.random.randn(n_samples, n_features) * 0.4
|
| 616 |
+
X = centers[labels_raw] + noise
|
| 617 |
+
X = (X - X.mean(axis=0)) / X.std(axis=0)
|
| 618 |
+
Y = np.eye(n_classes)[labels_raw]
|
| 619 |
+
|
| 620 |
+
X_train, X_test, Y_train, Y_test = train_test_split(
|
| 621 |
+
X, Y, test_size=0.2, random_state=42
|
| 622 |
+
)
|
| 623 |
+
f"Train: {len(X_train)} | Test: {len(X_test)} | Features: {n_features} | Classes: {n_classes}"
|
| 624 |
+
return X_test, X_train, Y_test, Y_train, n_classes, n_features
|
| 625 |
+
|
| 626 |
+
|
| 627 |
+
@app.cell
|
| 628 |
+
def _(DataLoader, TensorDataset, X_test, X_train, Y_test, Y_train, device, torch):
|
| 629 |
+
# Prepare PyTorch data loaders
|
| 630 |
+
train_loader = DataLoader(
|
| 631 |
+
TensorDataset(
|
| 632 |
+
torch.FloatTensor(X_train).to(device),
|
| 633 |
+
torch.FloatTensor(Y_train).to(device),
|
| 634 |
+
),
|
| 635 |
+
batch_size=64,
|
| 636 |
+
shuffle=True,
|
| 637 |
+
)
|
| 638 |
+
test_loader = DataLoader(
|
| 639 |
+
TensorDataset(
|
| 640 |
+
torch.FloatTensor(X_test).to(device),
|
| 641 |
+
torch.FloatTensor(Y_test).to(device),
|
| 642 |
+
),
|
| 643 |
+
batch_size=64,
|
| 644 |
+
)
|
| 645 |
+
return test_loader, train_loader
|
| 646 |
+
|
| 647 |
+
|
| 648 |
+
@app.cell
|
| 649 |
+
def _(device, mo, n_classes, n_features, nn):
|
| 650 |
+
# Model definition (mirrors Axon build)
|
| 651 |
+
class MLP(nn.Module):
|
| 652 |
+
def __init__(self):
|
| 653 |
+
super().__init__()
|
| 654 |
+
self.net = nn.Sequential(
|
| 655 |
+
nn.Linear(n_features, 64),
|
| 656 |
+
nn.ReLU(),
|
| 657 |
+
nn.BatchNorm1d(64),
|
| 658 |
+
nn.Dropout(0.2),
|
| 659 |
+
nn.Linear(64, 32),
|
| 660 |
+
nn.ReLU(),
|
| 661 |
+
nn.BatchNorm1d(32),
|
| 662 |
+
nn.Dropout(0.2),
|
| 663 |
+
nn.Linear(32, n_classes),
|
| 664 |
+
nn.Softmax(dim=1),
|
| 665 |
+
)
|
| 666 |
+
|
| 667 |
+
def forward(self, x):
|
| 668 |
+
return self.net(x)
|
| 669 |
+
|
| 670 |
+
model = MLP().to(device)
|
| 671 |
+
param_count = sum(p.numel() for p in model.parameters())
|
| 672 |
+
mo.md(f"**Model:** {param_count:,} parameters\n```\n{model}\n```")
|
| 673 |
+
return (model,)
|
| 674 |
+
|
| 675 |
+
|
| 676 |
+
@app.cell
|
| 677 |
+
def _(mo, model, torch, train_loader):
|
| 678 |
+
# Training loop
|
| 679 |
+
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
|
| 680 |
+
criterion = nn.CrossEntropyLoss()
|
| 681 |
+
|
| 682 |
+
epochs = 30
|
| 683 |
+
epoch_losses = []
|
| 684 |
+
|
| 685 |
+
for epoch in range(epochs):
|
| 686 |
+
model.train()
|
| 687 |
+
running_loss = 0.0
|
| 688 |
+
for xb, yb in train_loader:
|
| 689 |
+
preds = model(xb)
|
| 690 |
+
loss = criterion(preds, yb.argmax(dim=1))
|
| 691 |
+
optimizer.zero_grad()
|
| 692 |
+
loss.backward()
|
| 693 |
+
optimizer.step()
|
| 694 |
+
running_loss += loss.item()
|
| 695 |
+
epoch_losses.append(running_loss / len(train_loader))
|
| 696 |
+
|
| 697 |
+
mo.md(f"**Training complete!** Final loss: `{epoch_losses[-1]:.4f}`")
|
| 698 |
+
return epoch_losses, model
|
| 699 |
+
|
| 700 |
+
|
| 701 |
+
@app.cell
|
| 702 |
+
def _(epoch_losses, mo, plt):
|
| 703 |
+
# Plot training loss
|
| 704 |
+
fig, ax = plt.subplots(figsize=(6, 3))
|
| 705 |
+
ax.plot(epoch_losses, linewidth=2, color="#7c3aed")
|
| 706 |
+
ax.set_xlabel("Epoch")
|
| 707 |
+
ax.set_ylabel("Loss")
|
| 708 |
+
ax.set_title("Training Loss")
|
| 709 |
+
ax.grid(True, alpha=0.3)
|
| 710 |
+
plt.tight_layout()
|
| 711 |
+
mo.mpl.interactive(fig)
|
| 712 |
+
return
|
| 713 |
+
|
| 714 |
+
|
| 715 |
+
@app.cell
|
| 716 |
+
def _(X_test, Y_test, classification_report, device, model, mo, torch):
|
| 717 |
+
# Evaluation
|
| 718 |
+
model.eval()
|
| 719 |
+
all_preds, all_true = [], []
|
| 720 |
+
with torch.no_grad():
|
| 721 |
+
for i in range(0, len(X_test), 64):
|
| 722 |
+
xb = torch.FloatTensor(X_test[i : i + 64]).to(device)
|
| 723 |
+
preds = model(xb).argmax(1).cpu().numpy()
|
| 724 |
+
all_preds.extend(preds)
|
| 725 |
+
all_true.extend(Y_test[i : i + 64].argmax(1))
|
| 726 |
+
|
| 727 |
+
report = classification_report(
|
| 728 |
+
all_true, all_preds, target_names=[f"Class {i}" for i in range(3)]
|
| 729 |
+
)
|
| 730 |
+
mo.md(f"```\n{report}\n```")
|
| 731 |
+
return all_preds, all_true
|
| 732 |
+
|
| 733 |
+
|
| 734 |
+
@app.cell
|
| 735 |
+
def _(X_test, all_preds, all_true, mo, plt):
|
| 736 |
+
# Visualize predictions
|
| 737 |
+
fig, axes = plt.subplots(1, 2, figsize=(10, 4))
|
| 738 |
+
for cls in range(3):
|
| 739 |
+
mask = [t == cls for t in all_true]
|
| 740 |
+
axes[0].scatter(
|
| 741 |
+
X_test[mask, 0], X_test[mask, 1], label=f"Class {cls}", alpha=0.6, s=15
|
| 742 |
+
)
|
| 743 |
+
mask2 = [p == cls for p in all_preds]
|
| 744 |
+
axes[1].scatter(
|
| 745 |
+
X_test[mask2, 0], X_test[mask2, 1], label=f"Class {cls}", alpha=0.6, s=15
|
| 746 |
+
)
|
| 747 |
+
axes[0].set_title("Actual")
|
| 748 |
+
axes[1].set_title("Predicted")
|
| 749 |
+
for ax in axes:
|
| 750 |
+
ax.legend()
|
| 751 |
+
ax.grid(True, alpha=0.3)
|
| 752 |
+
plt.tight_layout()
|
| 753 |
+
mo.mpl.interactive(fig)
|
| 754 |
+
return
|
| 755 |
+
|
| 756 |
+
|
| 757 |
+
# ═══════════════════════════════════════════════════════════════════
|
| 758 |
+
# 4 — Interactive Playground
|
| 759 |
+
# ═══════════════════════════════════════════════════════════════════
|
| 760 |
+
|
| 761 |
+
|
| 762 |
+
@app.cell(hide_code=True)
|
| 763 |
+
def _(mo):
|
| 764 |
+
mo.md(
|
| 765 |
+
r"""
|
| 766 |
+
## 4 — Interactive Playground
|
| 767 |
+
|
| 768 |
+
marimo UI elements are **reactive** — change an input and all
|
| 769 |
+
dependent cells re-run automatically. This mirrors how `Kino`
|
| 770 |
+
works in Elixir Livebook.
|
| 771 |
+
"""
|
| 772 |
+
)
|
| 773 |
+
return
|
| 774 |
+
|
| 775 |
+
|
| 776 |
+
@app.cell
|
| 777 |
+
def _(mo, np, torch):
|
| 778 |
+
# Interactive custom prediction
|
| 779 |
+
def predict_custom(features, model_ref):
|
| 780 |
+
x = torch.FloatTensor([features]).to(device)
|
| 781 |
+
model_ref.eval()
|
| 782 |
+
with torch.no_grad():
|
| 783 |
+
probs = model_ref(x).cpu().numpy()[0]
|
| 784 |
+
return probs
|
| 785 |
+
|
| 786 |
+
f1 = mo.ui.slider(start=-3.0, stop=3.0, value=0.0, step=0.1, label="Feature 1")
|
| 787 |
+
f2 = mo.ui.slider(start=-3.0, stop=3.0, value=0.0, step=0.1, label="Feature 2")
|
| 788 |
+
f3 = mo.ui.slider(start=-3.0, stop=3.0, value=0.0, step=0.1, label="Feature 3")
|
| 789 |
+
f4 = mo.ui.slider(start=-3.0, stop=3.0, value=0.0, step=0.1, label="Feature 4")
|
| 790 |
+
mo.vstack([f1, f2, f3, f4])
|
| 791 |
+
return f1, f2, f3, f4, predict_custom
|
| 792 |
+
|
| 793 |
+
|
| 794 |
+
@app.cell
|
| 795 |
+
def _(f1, f2, f3, f4, model, mo, predict_custom):
|
| 796 |
+
features = [f1.value, f2.value, f3.value, f4.value]
|
| 797 |
+
probs = predict_custom(features, model)
|
| 798 |
+
pred_class = int(probs.argmax())
|
| 799 |
+
confidence = float(probs[pred_class])
|
| 800 |
+
|
| 801 |
+
prob_bars = "\n".join(
|
| 802 |
+
[f"- **Class {i}**: `{p:.4f}` {'█' * int(p * 30)}" for i, p in enumerate(probs)]
|
| 803 |
+
)
|
| 804 |
+
mo.md(
|
| 805 |
+
f"""
|
| 806 |
+
### Prediction: Class {pred_class} ({confidence:.1%} confidence)
|
| 807 |
+
|
| 808 |
+
{prob_bars}
|
| 809 |
+
"""
|
| 810 |
+
)
|
| 811 |
+
return
|
| 812 |
+
|
| 813 |
+
|
| 814 |
+
# ═════════════════════════���═════════════════════════════════════════
|
| 815 |
+
# 5 — Summary
|
| 816 |
+
# ═══════════════════════════════════════════════════════════════════
|
| 817 |
+
|
| 818 |
+
|
| 819 |
+
@app.cell(hide_code=True)
|
| 820 |
+
def _(mo):
|
| 821 |
+
mo.md(
|
| 822 |
+
r"""
|
| 823 |
+
## 5 — Summary
|
| 824 |
+
|
| 825 |
+
| Pipeline Stage | Elixir (Livebook) | This Notebook (marimo) |
|
| 826 |
+
|----------------|-------------------|------------------------|
|
| 827 |
+
| Tensors | `Nx.tensor` | `np.array` |
|
| 828 |
+
| Gradients | `Nx.Defn.grad` | `torch.autograd` |
|
| 829 |
+
| GPU | `EXLA.Backend` | `torch.cuda` |
|
| 830 |
+
| Pre-trained | `Bumblebee.load_model` | `pipeline()` |
|
| 831 |
+
| Fill-Mask | `Bumblebee.Text.fill_mask` | `pipeline("fill-mask")` |
|
| 832 |
+
| Sentiment | `Bumblebee.Text.Classification` | `pipeline("sentiment-analysis")` |
|
| 833 |
+
| NER | `Bumblebee.Text.TokenClassification` | `pipeline("ner")` |
|
| 834 |
+
| Zero-Shot | `Bumblebee.Text.ZeroShotClassification` | `pipeline("zero-shot-classification")` |
|
| 835 |
+
| Text Gen | `Bumblebee.Text.generation` | `pipeline("text-generation")` |
|
| 836 |
+
| Embeddings | `Bumblebee.Text.TextEmbedding` | `sentence-transformers` |
|
| 837 |
+
| Training | `Axon` + `Axon.Loop` | `torch.nn` + training loop |
|
| 838 |
+
| UI | `Kino` | `marimo.ui` (reactive) |
|
| 839 |
+
|
| 840 |
+
### Deploy This Notebook
|
| 841 |
+
|
| 842 |
+
```bash
|
| 843 |
+
# Edit interactively
|
| 844 |
+
marimo edit ml_e2e_marimo.py
|
| 845 |
+
|
| 846 |
+
# Run as app (hide code)
|
| 847 |
+
marimo run ml_e2e_marimo.py
|
| 848 |
+
|
| 849 |
+
# Run as script
|
| 850 |
+
python ml_e2e_marimo.py
|
| 851 |
+
|
| 852 |
+
# Convert to/from Jupyter
|
| 853 |
+
marimo convert ml_e2e_marimo.py -o notebook.ipynb
|
| 854 |
+
marimo convert notebook.ipynb -o from_jupyter.py
|
| 855 |
+
```
|
| 856 |
+
|
| 857 |
+
### Resources
|
| 858 |
+
|
| 859 |
+
* [marimo docs](https://docs.marimo.io)
|
| 860 |
+
* [Bumblebee docs](https://hexdocs.pm/bumblebee)
|
| 861 |
+
* [Nx docs](https://hexdocs.pm/nx)
|
| 862 |
+
* [Hugging Face Hub](https://huggingface.co/models)
|
| 863 |
+
* _Machine Learning in Elixir_ — Sean Moriarity, Pragmatic Bookshelf
|
| 864 |
+
"""
|
| 865 |
+
)
|
| 866 |
+
return
|
| 867 |
+
|
| 868 |
+
|
| 869 |
+
@app.cell
|
| 870 |
+
def _(mo, pipeline, device):
|
| 871 |
+
# Export sentiment model to ONNX (requires torch)
|
| 872 |
+
import torch
|
| 873 |
+
|
| 874 |
+
# Load the sentiment pipeline (use same model as earlier)
|
| 875 |
+
sentiment_pipe = pipeline(
|
| 876 |
+
"sentiment-analysis",
|
| 877 |
+
model="distilbert/distilbert-base-uncased-finetuned-sst-2-english",
|
| 878 |
+
)
|
| 879 |
+
torch_model = sentiment_pipe.model
|
| 880 |
+
dummy_input = torch.randint(0, 30522, (1, 128)).to(device) # example token IDs
|
| 881 |
+
torch.onnx.export(
|
| 882 |
+
torch_model,
|
| 883 |
+
dummy_input,
|
| 884 |
+
"sentiment.onnx",
|
| 885 |
+
input_names=["input_ids"],
|
| 886 |
+
output_names=["logits"],
|
| 887 |
+
opset_version=12,
|
| 888 |
+
dynamic_axes={"input_ids": {0: "batch", 1: "seq"}},
|
| 889 |
+
)
|
| 890 |
+
mo.md("✅ Exported sentiment model to `sentiment.onnx`")
|
| 891 |
+
mo.md(
|
| 892 |
+
"""
|
| 893 |
+
Convert to GGUF (decoder‑only models) with:
|
| 894 |
+
```bash
|
| 895 |
+
pip install gguf-converter
|
| 896 |
+
gguf-converter --onnx sentiment.onnx --output sentiment.gguf
|
| 897 |
+
```
|
| 898 |
+
"""
|
| 899 |
+
)
|
| 900 |
+
return
|
| 901 |
+
|
| 902 |
+
|
| 903 |
+
if __name__ == "__main__":
|
| 904 |
+
app.run()
|
marimo/requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
marimo>=0.19.0
|
| 2 |
+
transformers>=4.36.0
|
| 3 |
+
torch>=2.0.0
|
| 4 |
+
numpy>=1.24.0
|
| 5 |
+
scikit-learn>=1.3.0
|
| 6 |
+
matplotlib>=3.7.0
|
| 7 |
+
sentencepiece
|
| 8 |
+
protobuf
|
| 9 |
+
Pillow>=10.0.0
|
| 10 |
+
diffusers>=0.25.0
|
| 11 |
+
accelerate
|
mise.toml
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[tools]
|
| 2 |
+
elixir = "latest"
|
mix.exs
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
defmodule MLLearning.MixProject do
|
| 2 |
+
use Mix.Project
|
| 3 |
+
|
| 4 |
+
def project do
|
| 5 |
+
[
|
| 6 |
+
app: :ml_elixir_learning,
|
| 7 |
+
version: "0.1.0",
|
| 8 |
+
elixir: "~> 1.16",
|
| 9 |
+
start_permanent: Mix.env() == :prod,
|
| 10 |
+
deps: deps(),
|
| 11 |
+
aliases: aliases()
|
| 12 |
+
]
|
| 13 |
+
end
|
| 14 |
+
|
| 15 |
+
def application do
|
| 16 |
+
[
|
| 17 |
+
extra_applications: [:logger]
|
| 18 |
+
]
|
| 19 |
+
end
|
| 20 |
+
|
| 21 |
+
defp deps do
|
| 22 |
+
[
|
| 23 |
+
# Core ML
|
| 24 |
+
{:nx, "~> 0.10"},
|
| 25 |
+
{:axon, "~> 0.7"},
|
| 26 |
+
{:exla, "~> 0.10"},
|
| 27 |
+
|
| 28 |
+
# Pre-trained models (Hugging Face Hub)
|
| 29 |
+
{:bumblebee, "~> 0.6"},
|
| 30 |
+
|
| 31 |
+
# Data
|
| 32 |
+
{:explorer, "~> 0.10"},
|
| 33 |
+
{:scidata, "~> 0.1"},
|
| 34 |
+
|
| 35 |
+
# Image / Audio IO
|
| 36 |
+
{:stb_image, "~> 0.6"},
|
| 37 |
+
|
| 38 |
+
# Visualization
|
| 39 |
+
{:kino, "~> 0.15"},
|
| 40 |
+
{:kino_vega_lite, "~> 0.1"},
|
| 41 |
+
{:vega_lite, "~> 0.1"},
|
| 42 |
+
|
| 43 |
+
# HTTP
|
| 44 |
+
{:req, "~> 0.5"},
|
| 45 |
+
|
| 46 |
+
# Web framework (for LiveView deployment section)
|
| 47 |
+
{:phoenix, "~> 1.7"},
|
| 48 |
+
{:phoenix_live_view, "~> 1.0"},
|
| 49 |
+
{:bandit, "~> 1.0"},
|
| 50 |
+
|
| 51 |
+
# Dev / Test
|
| 52 |
+
{:benchee, "~> 1.0", only: :dev},
|
| 53 |
+
{:ex_doc, "~> 0.34", only: :dev, runtime: false}
|
| 54 |
+
]
|
| 55 |
+
end
|
| 56 |
+
|
| 57 |
+
defp aliases do
|
| 58 |
+
[
|
| 59 |
+
setup: ["deps.get", "deps.compile"],
|
| 60 |
+
nb: ["livebook", "server"]
|
| 61 |
+
]
|
| 62 |
+
end
|
| 63 |
+
end
|
mix.lock
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
%{
|
| 2 |
+
"aws_signature": {:hex, :aws_signature, "0.4.2", "1b35482c89ff5b91f5ead647a2bbc0d9620877479b44800915de92bacf9f1476", [:rebar3], [], "hexpm", "1df4a2d1dff200c7bdfa8f9f935efc71a51273adfc6dd39a9f2cc937e01baa01"},
|
| 3 |
+
"axon": {:hex, :axon, "0.7.0", "2e2c6d93b4afcfa812566b8922204fa022b60081e86ebd411df4db7ea30f5457", [:mix], [{:kino, "~> 0.7", [hex: :kino, repo: "hexpm", optional: true]}, {:kino_vega_lite, "~> 0.1.7", [hex: :kino_vega_lite, repo: "hexpm", optional: true]}, {:nx, "~> 0.9", [hex: :nx, repo: "hexpm", optional: false]}, {:polaris, "~> 0.1", [hex: :polaris, repo: "hexpm", optional: false]}, {:table_rex, "~> 3.1.1", [hex: :table_rex, repo: "hexpm", optional: true]}], "hexpm", "ee9857a143c9486597ceff434e6ca833dc1241be6158b01025b8217757ed1036"},
|
| 4 |
+
"bandit": {:hex, :bandit, "1.10.4", "02b9734c67c5916a008e7eb7e2ba68aaea6f8177094a5f8d95f1fb99069aac17", [:mix], [{:hpax, "~> 1.0", [hex: :hpax, repo: "hexpm", optional: false]}, {:plug, "~> 1.18", [hex: :plug, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:thousand_island, "~> 1.0", [hex: :thousand_island, repo: "hexpm", optional: false]}, {:websock, "~> 0.5", [hex: :websock, repo: "hexpm", optional: false]}], "hexpm", "a5faf501042ac1f31d736d9d4a813b3db4ef812e634583b6a457b0928798a51d"},
|
| 5 |
+
"benchee": {:hex, :benchee, "1.5.0", "4d812c31d54b0ec0167e91278e7de3f596324a78a096fd3d0bea68bb0c513b10", [:mix], [{:deep_merge, "~> 1.0", [hex: :deep_merge, repo: "hexpm", optional: false]}, {:statistex, "~> 1.1", [hex: :statistex, repo: "hexpm", optional: false]}, {:table, "~> 0.1.0", [hex: :table, repo: "hexpm", optional: true]}], "hexpm", "5b075393aea81b8ae74eadd1c28b1d87e8a63696c649d8293db7c4df3eb67535"},
|
| 6 |
+
"bumblebee": {:hex, :bumblebee, "0.6.3", "c0028643c92de93258a9804da1d4d48797eaf7911b702464b3b3dd2cc7f938f1", [:mix], [{:axon, "~> 0.7.0", [hex: :axon, repo: "hexpm", optional: false]}, {:jason, "~> 1.4.0", [hex: :jason, repo: "hexpm", optional: false]}, {:nx, "~> 0.9.0 or ~> 0.10.0", [hex: :nx, repo: "hexpm", optional: false]}, {:nx_image, "~> 0.1.0", [hex: :nx_image, repo: "hexpm", optional: false]}, {:nx_signal, "~> 0.2.0", [hex: :nx_signal, repo: "hexpm", optional: false]}, {:progress_bar, "~> 3.0", [hex: :progress_bar, repo: "hexpm", optional: false]}, {:safetensors, "~> 0.1.3", [hex: :safetensors, repo: "hexpm", optional: false]}, {:tokenizers, "~> 0.4", [hex: :tokenizers, repo: "hexpm", optional: false]}, {:unpickler, "~> 0.1.0", [hex: :unpickler, repo: "hexpm", optional: false]}, {:unzip, "~> 0.12.0", [hex: :unzip, repo: "hexpm", optional: false]}], "hexpm", "c619197787561f8e5fb2ffba269c341654accaec9d591999b7fddd55761dd079"},
|
| 7 |
+
"castore": {:hex, :castore, "1.0.18", "5e43ef0ec7d31195dfa5a65a86e6131db999d074179d2ba5a8de11fe14570f55", [:mix], [], "hexpm", "f393e4fe6317829b158fb74d86eb681f737d2fe326aa61ccf6293c4104957e34"},
|
| 8 |
+
"cc_precompiler": {:hex, :cc_precompiler, "0.1.11", "8c844d0b9fb98a3edea067f94f616b3f6b29b959b6b3bf25fee94ffe34364768", [:mix], [{:elixir_make, "~> 0.7", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm", "3427232caf0835f94680e5bcf082408a70b48ad68a5f5c0b02a3bea9f3a075b9"},
|
| 9 |
+
"complex": {:hex, :complex, "0.6.0", "b0130086a7a8c33574d293b2e0e250f4685580418eac52a5658a4bd148f3ccf1", [:mix], [], "hexpm", "0a5fa95580dcaf30fcd60fe1aaf24327c0fe401e98c24d892e172e79498269f9"},
|
| 10 |
+
"decimal": {:hex, :decimal, "2.3.0", "3ad6255aa77b4a3c4f818171b12d237500e63525c2fd056699967a3e7ea20f62", [:mix], [], "hexpm", "a4d66355cb29cb47c3cf30e71329e58361cfcb37c34235ef3bf1d7bf3773aeac"},
|
| 11 |
+
"deep_merge": {:hex, :deep_merge, "1.0.0", "b4aa1a0d1acac393bdf38b2291af38cb1d4a52806cf7a4906f718e1feb5ee961", [:mix], [], "hexpm", "ce708e5f094b9cd4e8f2be4f00d2f4250c4095be93f8cd6d018c753894885430"},
|
| 12 |
+
"earmark_parser": {:hex, :earmark_parser, "1.4.44", "f20830dd6b5c77afe2b063777ddbbff09f9759396500cdbe7523efd58d7a339c", [:mix], [], "hexpm", "4778ac752b4701a5599215f7030989c989ffdc4f6df457c5f36938cc2d2a2750"},
|
| 13 |
+
"elixir_make": {:hex, :elixir_make, "0.9.0", "6484b3cd8c0cee58f09f05ecaf1a140a8c97670671a6a0e7ab4dc326c3109726", [:mix], [], "hexpm", "db23d4fd8b757462ad02f8aa73431a426fe6671c80b200d9710caf3d1dd0ffdb"},
|
| 14 |
+
"ex_doc": {:hex, :ex_doc, "0.40.1", "67542e4b6dde74811cfd580e2c0149b78010fd13001fda7cfeb2b2c2ffb1344d", [:mix], [{:earmark_parser, "~> 1.4.44", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "bcef0e2d360d93ac19f01a85d58f91752d930c0a30e2681145feea6bd3516e00"},
|
| 15 |
+
"exla": {:hex, :exla, "0.10.0", "93e7d75a774fbc06ce05b96de20c4b01bda413b315238cb3c727c09a05d2bc3a", [:make, :mix], [{:elixir_make, "~> 0.6", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:fine, "~> 0.1.0", [hex: :fine, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.0", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:nx, "~> 0.10.0", [hex: :nx, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:xla, "~> 0.9.0", [hex: :xla, repo: "hexpm", optional: false]}], "hexpm", "16fffdb64667d7f0a3bc683fdcd2792b143a9b345e4b1f1d5cd50330c63d8119"},
|
| 16 |
+
"explorer": {:hex, :explorer, "0.10.1", "ff6e2a7d7a480c86708c3300cc67a3fd6982c7d28b51f4db2f411aa476c9ecdb", [:mix], [{:adbc, "~> 0.1", [hex: :adbc, repo: "hexpm", optional: true]}, {:aws_signature, "~> 0.3", [hex: :aws_signature, repo: "hexpm", optional: false]}, {:castore, "~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:decimal, "~> 2.1", [hex: :decimal, repo: "hexpm", optional: false]}, {:flame, "~> 0.3", [hex: :flame, repo: "hexpm", optional: true]}, {:fss, "~> 0.1", [hex: :fss, repo: "hexpm", optional: false]}, {:nx, "~> 0.4", [hex: :nx, repo: "hexpm", optional: true]}, {:rustler, "~> 0.34.0", [hex: :rustler, repo: "hexpm", optional: true]}, {:rustler_precompiled, "~> 0.7", [hex: :rustler_precompiled, repo: "hexpm", optional: false]}, {:table, "~> 0.1.2", [hex: :table, repo: "hexpm", optional: false]}, {:table_rex, "~> 3.1.1 or ~> 4.0.0", [hex: :table_rex, repo: "hexpm", optional: false]}], "hexpm", "4e3efc45d4981a568405a181ebf206ba208622a5e94048c9d713b27a053c3197"},
|
| 17 |
+
"finch": {:hex, :finch, "0.21.0", "b1c3b2d48af02d0c66d2a9ebfb5622be5c5ecd62937cf79a88a7f98d48a8290c", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.6.2 or ~> 1.7", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.1", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "87dc6e169794cb2570f75841a19da99cfde834249568f2a5b121b809588a4377"},
|
| 18 |
+
"fine": {:hex, :fine, "0.1.4", "b19a89c1476c7c57afb5f9314aed5960b5bc95d5277de4cb5ee8e1d1616ce379", [:mix], [], "hexpm", "be3324cc454a42d80951cf6023b9954e9ff27c6daa255483b3e8d608670303f5"},
|
| 19 |
+
"fss": {:hex, :fss, "0.1.1", "9db2344dbbb5d555ce442ac7c2f82dd975b605b50d169314a20f08ed21e08642", [:mix], [], "hexpm", "78ad5955c7919c3764065b21144913df7515d52e228c09427a004afe9c1a16b0"},
|
| 20 |
+
"hpax": {:hex, :hpax, "1.0.3", "ed67ef51ad4df91e75cc6a1494f851850c0bd98ebc0be6e81b026e765ee535aa", [:mix], [], "hexpm", "8eab6e1cfa8d5918c2ce4ba43588e894af35dbd8e91e6e55c817bca5847df34a"},
|
| 21 |
+
"jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"},
|
| 22 |
+
"kino": {:hex, :kino, "0.19.0", "fc8e46fefeb2d083e757633ddd810c499754b7a1f87ba1e92844791c1eca87c2", [:mix], [{:nx, "~> 0.1", [hex: :nx, repo: "hexpm", optional: true]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}, {:table, "~> 0.1.2", [hex: :table, repo: "hexpm", optional: false]}], "hexpm", "195956058730acc397375a398835c7818f92aa01a1d32c03ecb5dffff74c0b8c"},
|
| 23 |
+
"kino_vega_lite": {:hex, :kino_vega_lite, "0.1.13", "03c00405987a2202e4b8014ee55eb7f5727691b3f13d76a3764f6eeccef45322", [:mix], [{:kino, "~> 0.7", [hex: :kino, repo: "hexpm", optional: false]}, {:table, "~> 0.1.0", [hex: :table, repo: "hexpm", optional: false]}, {:vega_lite, "~> 0.1.8", [hex: :vega_lite, repo: "hexpm", optional: false]}], "hexpm", "00c72bc270e7b9d3c339f726cdab0012fd3f2fc75e36c7548e0f250fe420fa10"},
|
| 24 |
+
"makeup": {:hex, :makeup, "1.2.1", "e90ac1c65589ef354378def3ba19d401e739ee7ee06fb47f94c687016e3713d1", [:mix], [{:nimble_parsec, "~> 1.4", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "d36484867b0bae0fea568d10131197a4c2e47056a6fbe84922bf6ba71c8d17ce"},
|
| 25 |
+
"makeup_elixir": {:hex, :makeup_elixir, "1.0.1", "e928a4f984e795e41e3abd27bfc09f51db16ab8ba1aebdba2b3a575437efafc2", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "7284900d412a3e5cfd97fdaed4f5ed389b8f2b4cb49efc0eb3bd10e2febf9507"},
|
| 26 |
+
"makeup_erlang": {:hex, :makeup_erlang, "1.0.3", "4252d5d4098da7415c390e847c814bad3764c94a814a0b4245176215615e1035", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "953297c02582a33411ac6208f2c6e55f0e870df7f80da724ed613f10e6706afd"},
|
| 27 |
+
"mime": {:hex, :mime, "2.0.7", "b8d739037be7cd402aee1ba0306edfdef982687ee7e9859bee6198c1e7e2f128", [:mix], [], "hexpm", "6171188e399ee16023ffc5b76ce445eb6d9672e2e241d2df6050f3c771e80ccd"},
|
| 28 |
+
"mint": {:hex, :mint, "1.7.1", "113fdb2b2f3b59e47c7955971854641c61f378549d73e829e1768de90fc1abf1", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1 or ~> 0.2.0 or ~> 1.0", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "fceba0a4d0f24301ddee3024ae116df1c3f4bb7a563a731f45fdfeb9d39a231b"},
|
| 29 |
+
"nimble_csv": {:hex, :nimble_csv, "1.3.0", "b7f998dc62b222bce9596e46f028c7a5af04cb5dde6df2ea197c583227c54971", [:mix], [], "hexpm", "41ccdc18f7c8f8bb06e84164fc51635321e80d5a3b450761c4997d620925d619"},
|
| 30 |
+
"nimble_options": {:hex, :nimble_options, "1.1.1", "e3a492d54d85fc3fd7c5baf411d9d2852922f66e69476317787a7b2bb000a61b", [:mix], [], "hexpm", "821b2470ca9442c4b6984882fe9bb0389371b8ddec4d45a9504f00a66f650b44"},
|
| 31 |
+
"nimble_parsec": {:hex, :nimble_parsec, "1.4.2", "8efba0122db06df95bfaa78f791344a89352ba04baedd3849593bfce4d0dc1c6", [:mix], [], "hexpm", "4b21398942dda052b403bbe1da991ccd03a053668d147d53fb8c4e0efe09c973"},
|
| 32 |
+
"nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"},
|
| 33 |
+
"nx": {:hex, :nx, "0.10.0", "128e4a094cb790f663e20e1334b127c1f2a4df54edfb8b13c22757ec33133b4f", [:mix], [{:complex, "~> 0.6", [hex: :complex, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "3db8892c124aeee091df0e6fbf8e5bf1b81f502eb0d4f5ba63e6378ebcae7da4"},
|
| 34 |
+
"nx_image": {:hex, :nx_image, "0.1.2", "0c6e3453c1dc30fc80c723a54861204304cebc8a89ed3b806b972c73ee5d119d", [:mix], [{:nx, "~> 0.4", [hex: :nx, repo: "hexpm", optional: false]}], "hexpm", "9161863c42405ddccb6dbbbeae078ad23e30201509cc804b3b3a7c9e98764b81"},
|
| 35 |
+
"nx_signal": {:hex, :nx_signal, "0.2.0", "e1ca0318877b17c81ce8906329f5125f1e2361e4c4235a5baac8a95ee88ea98e", [:mix], [{:nx, "~> 0.6", [hex: :nx, repo: "hexpm", optional: false]}], "hexpm", "7247e5e18a177a59c4cb5355952900c62fdeadeb2bad02a9a34237b68744e2bb"},
|
| 36 |
+
"phoenix": {:hex, :phoenix, "1.8.5", "919db335247e6d4891764dc3063415b0d2457641c5f9b3751b5df03d8e20bbcf", [:mix], [{:bandit, "~> 1.0", [hex: :bandit, repo: "hexpm", optional: true]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:phoenix_pubsub, "~> 2.1", [hex: :phoenix_pubsub, repo: "hexpm", optional: false]}, {:phoenix_template, "~> 1.0", [hex: :phoenix_template, repo: "hexpm", optional: false]}, {:phoenix_view, "~> 2.0", [hex: :phoenix_view, repo: "hexpm", optional: true]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.7", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:plug_crypto, "~> 1.2 or ~> 2.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:websock_adapter, "~> 0.5.3", [hex: :websock_adapter, repo: "hexpm", optional: false]}], "hexpm", "83b2bb125127e02e9f475c8e3e92736325b5b01b0b9b05407bcb4083b7a32485"},
|
| 37 |
+
"phoenix_html": {:hex, :phoenix_html, "4.3.0", "d3577a5df4b6954cd7890c84d955c470b5310bb49647f0a114a6eeecc850f7ad", [:mix], [], "hexpm", "3eaa290a78bab0f075f791a46a981bbe769d94bc776869f4f3063a14f30497ad"},
|
| 38 |
+
"phoenix_live_view": {:hex, :phoenix_live_view, "1.1.28", "8a8e123d018025f756605a2fb02a4854f0d3cd7b207f710fef1fd5d9d72d0254", [:mix], [{:igniter, ">= 0.6.16 and < 1.0.0-0", [hex: :igniter, repo: "hexpm", optional: true]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:lazy_html, "~> 0.1.0", [hex: :lazy_html, repo: "hexpm", optional: true]}, {:phoenix, "~> 1.6.15 or ~> 1.7.0 or ~> 1.8.0-rc", [hex: :phoenix, repo: "hexpm", optional: false]}, {:phoenix_html, "~> 3.3 or ~> 4.0", [hex: :phoenix_html, repo: "hexpm", optional: false]}, {:phoenix_template, "~> 1.0", [hex: :phoenix_template, repo: "hexpm", optional: false]}, {:phoenix_view, "~> 2.0", [hex: :phoenix_view, repo: "hexpm", optional: true]}, {:plug, "~> 1.15", [hex: :plug, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.2 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "24faad535b65089642c3a7d84088109dc58f49c1f1c5a978659855d643466353"},
|
| 39 |
+
"phoenix_pubsub": {:hex, :phoenix_pubsub, "2.2.0", "ff3a5616e1bed6804de7773b92cbccfc0b0f473faf1f63d7daf1206c7aeaaa6f", [:mix], [], "hexpm", "adc313a5bf7136039f63cfd9668fde73bba0765e0614cba80c06ac9460ff3e96"},
|
| 40 |
+
"phoenix_template": {:hex, :phoenix_template, "1.0.4", "e2092c132f3b5e5b2d49c96695342eb36d0ed514c5b252a77048d5969330d639", [:mix], [{:phoenix_html, "~> 2.14.2 or ~> 3.0 or ~> 4.0", [hex: :phoenix_html, repo: "hexpm", optional: true]}], "hexpm", "2c0c81f0e5c6753faf5cca2f229c9709919aba34fab866d3bc05060c9c444206"},
|
| 41 |
+
"plug": {:hex, :plug, "1.19.1", "09bac17ae7a001a68ae393658aa23c7e38782be5c5c00c80be82901262c394c0", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.1.1 or ~> 1.2 or ~> 2.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.3 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "560a0017a8f6d5d30146916862aaf9300b7280063651dd7e532b8be168511e62"},
|
| 42 |
+
"plug_crypto": {:hex, :plug_crypto, "2.1.1", "19bda8184399cb24afa10be734f84a16ea0a2bc65054e23a62bb10f06bc89491", [:mix], [], "hexpm", "6470bce6ffe41c8bd497612ffde1a7e4af67f36a15eea5f921af71cf3e11247c"},
|
| 43 |
+
"polaris": {:hex, :polaris, "0.1.0", "dca61b18e3e801ecdae6ac9f0eca5f19792b44a5cb4b8d63db50fc40fc038d22", [:mix], [{:nx, "~> 0.5", [hex: :nx, repo: "hexpm", optional: false]}], "hexpm", "13ef2b166650e533cb24b10e2f3b8ab4f2f449ba4d63156e8c569527f206e2c2"},
|
| 44 |
+
"progress_bar": {:hex, :progress_bar, "3.0.0", "f54ff038c2ac540cfbb4c2bfe97c75e7116ead044f3c2b10c9f212452194b5cd", [:mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}], "hexpm", "6981c2b25ab24aecc91a2dc46623658e1399c21a2ae24db986b90d678530f2b7"},
|
| 45 |
+
"req": {:hex, :req, "0.5.17", "0096ddd5b0ed6f576a03dde4b158a0c727215b15d2795e59e0916c6971066ede", [:mix], [{:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 2.0.6 or ~> 2.1", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "0b8bc6ffdfebbc07968e59d3ff96d52f2202d0536f10fef4dc11dc02a2a43e39"},
|
| 46 |
+
"rustler_precompiled": {:hex, :rustler_precompiled, "0.9.0", "3a052eda09f3d2436364645cc1f13279cf95db310eb0c17b0d8f25484b233aa0", [:mix], [{:rustler, "~> 0.23", [hex: :rustler, repo: "hexpm", optional: true]}], "hexpm", "471d97315bd3bf7b64623418b3693eedd8e47de3d1cb79a0ac8f9da7d770d94c"},
|
| 47 |
+
"safetensors": {:hex, :safetensors, "0.1.3", "7ff3c22391e213289c713898481d492c9c28a49ab1d0705b72630fb8360426b2", [:mix], [{:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: false]}, {:nx, "~> 0.5", [hex: :nx, repo: "hexpm", optional: false]}], "hexpm", "fe50b53ea59fde4e723dd1a2e31cfdc6013e69343afac84c6be86d6d7c562c14"},
|
| 48 |
+
"scidata": {:hex, :scidata, "0.1.8", "80b1a470efb4f4a2fc2a7d26711217948d8bc2e17f636d41d846035b44f0ce8a", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.1", [hex: :nimble_csv, repo: "hexpm", optional: false]}, {:stb_image, "~> 0.4", [hex: :stb_image, repo: "hexpm", optional: true]}], "hexpm", "8fb125a2a55f52fea78f708f7bbd7bb3b8c233bf3943b411bc659252bcb12bab"},
|
| 49 |
+
"statistex": {:hex, :statistex, "1.1.0", "7fec1eb2f580a0d2c1a05ed27396a084ab064a40cfc84246dbfb0c72a5c761e5", [:mix], [], "hexpm", "f5950ea26ad43246ba2cce54324ac394a4e7408fdcf98b8e230f503a0cba9cf5"},
|
| 50 |
+
"stb_image": {:hex, :stb_image, "0.6.10", "76975279e2a130f53dc670bf6f6b1cdc4fbd7ab6293053e88e7fb6a7eae0e836", [:make, :mix], [{:cc_precompiler, "~> 0.1", [hex: :cc_precompiler, repo: "hexpm", optional: false]}, {:elixir_make, "~> 0.8", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:kino, "~> 0.7", [hex: :kino, repo: "hexpm", optional: true]}, {:nx, "~> 0.4", [hex: :nx, repo: "hexpm", optional: true]}], "hexpm", "26125372cfeda209084d3670417fab6819cfccd0e66c657678ecc48314369e8d"},
|
| 51 |
+
"table": {:hex, :table, "0.1.2", "87ad1125f5b70c5dea0307aa633194083eb5182ec537efc94e96af08937e14a8", [:mix], [], "hexpm", "7e99bc7efef806315c7e65640724bf165c3061cdc5d854060f74468367065029"},
|
| 52 |
+
"table_rex": {:hex, :table_rex, "3.1.1", "0c67164d1714b5e806d5067c1e96ff098ba7ae79413cc075973e17c38a587caa", [:mix], [], "hexpm", "678a23aba4d670419c23c17790f9dcd635a4a89022040df7d5d772cb21012490"},
|
| 53 |
+
"telemetry": {:hex, :telemetry, "1.4.1", "ab6de178e2b29b58e8256b92b382ea3f590a47152ca3651ea857a6cae05ac423", [:rebar3], [], "hexpm", "2172e05a27531d3d31dd9782841065c50dd5c3c7699d95266b2edd54c2dafa1c"},
|
| 54 |
+
"thousand_island": {:hex, :thousand_island, "1.4.3", "2158209580f633be38d43ec4e3ce0a01079592b9657afff9080d5d8ca149a3af", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "6e4ce09b0fd761a58594d02814d40f77daff460c48a7354a15ab353bb998ea0b"},
|
| 55 |
+
"tokenizers": {:hex, :tokenizers, "0.5.1", "b0975d92b4ee5b18e8f47b5d65b9d5f1e583d9130189b1a2620401af4e7d4b35", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:rustler, ">= 0.0.0", [hex: :rustler, repo: "hexpm", optional: true]}, {:rustler_precompiled, "~> 0.6", [hex: :rustler_precompiled, repo: "hexpm", optional: false]}], "hexpm", "5f08d97cc7f2ed3d71d370d68120da6d3de010948ccf676c9c0eb591ba4bacc9"},
|
| 56 |
+
"unpickler": {:hex, :unpickler, "0.1.0", "c2262c0819e6985b761e7107546cef96a485f401816be5304a65fdd200d5bd6a", [:mix], [], "hexpm", "e2b3f61e62406187ac52afead8a63bfb4e49394028993f3c4c42712743cab79e"},
|
| 57 |
+
"unzip": {:hex, :unzip, "0.12.0", "beed92238724732418b41eba77dcb7f51e235b707406c05b1732a3052d1c0f36", [:mix], [], "hexpm", "95655b72db368e5a84951f0bed586ac053b55ee3815fd96062fce10ce4fc998d"},
|
| 58 |
+
"vega_lite": {:hex, :vega_lite, "0.1.11", "2b261d21618f6fa9f63bb4542f0262982d2e40aea3f83e935788fe172902b3c2", [:mix], [{:table, "~> 0.1.0", [hex: :table, repo: "hexpm", optional: false]}], "hexpm", "d18c3f11369c14bdf36ab53010c06bf5505c221cbcb32faac7420cf6926b3c50"},
|
| 59 |
+
"websock": {:hex, :websock, "0.5.3", "2f69a6ebe810328555b6fe5c831a851f485e303a7c8ce6c5f675abeb20ebdadc", [:mix], [], "hexpm", "6105453d7fac22c712ad66fab1d45abdf049868f253cf719b625151460b8b453"},
|
| 60 |
+
"websock_adapter": {:hex, :websock_adapter, "0.5.9", "43dc3ba6d89ef5dec5b1d0a39698436a1e856d000d84bf31a3149862b01a287f", [:mix], [{:bandit, ">= 0.6.0", [hex: :bandit, repo: "hexpm", optional: true]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.6", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:websock, "~> 0.5", [hex: :websock, repo: "hexpm", optional: false]}], "hexpm", "5534d5c9adad3c18a0f58a9371220d75a803bf0b9a3d87e6fe072faaeed76a08"},
|
| 61 |
+
"xla": {:hex, :xla, "0.9.1", "cca0040ff94902764007a118871bfc667f1a0085d4a5074533a47d6b58bec61e", [:make, :mix], [{:elixir_make, "~> 0.4", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm", "eb5e443ae5391b1953f253e051f2307bea183b59acee138053a9300779930daf"},
|
| 62 |
+
}
|
ml_e2e_template.livemd
ADDED
|
@@ -0,0 +1,1227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Machine Learning in Elixir — End-to-End with Bumblebee + Hugging Face
|
| 2 |
+
|
| 3 |
+
<!-- livebook:{"persist_outputs":true} -->
|
| 4 |
+
|
| 5 |
+
## Overview
|
| 6 |
+
|
| 7 |
+
## Skills
|
| 8 |
+
- `hf_cli.md` – Hugging Face CLI usage
|
| 9 |
+
- `hf_jobs.md` – Running workloads on HF Jobs
|
| 10 |
+
- `training_trl.md` – TRL model training
|
| 11 |
+
- `hf_dataset_viewer.md` – Dataset Viewer API
|
| 12 |
+
- `gradio.md` – Gradio UI integration
|
| 13 |
+
- *(Full catalog at https://skills.sh/huggingface/skills)*
|
| 14 |
+
|
| 15 |
+
This Livebook is a complete end-to-end ML template built on the Elixir ML ecosystem
|
| 16 |
+
from _Machine Learning in Elixir_ by Sean Moriarity, with **Bumblebee** as the core
|
| 17 |
+
integration layer to the **Hugging Face Hub**.
|
| 18 |
+
|
| 19 |
+
**What we cover:**
|
| 20 |
+
|
| 21 |
+
| Section | Library | Task |
|
| 22 |
+
|---------|---------|------|
|
| 23 |
+
| Foundations | `Nx` | Tensors, gradients, JIT compilation |
|
| 24 |
+
| Pre-trained NLP | `Bumblebee` | Fill-mask, sentiment, NER, zero-shot |
|
| 25 |
+
| Pre-trained Vision | `Bumblebee` | Image classification (ViT, ResNet) |
|
| 26 |
+
| Audio | `Bumblebee` | Speech-to-text (Whisper) |
|
| 27 |
+
| Generative AI | `Bumblebee` | Text generation (GPT-2) & Stable Diffusion |
|
| 28 |
+
| Embeddings | `Bumblebee` | Sentence similarity search |
|
| 29 |
+
| Custom Training | `Axon` | Build & train from scratch |
|
| 30 |
+
| Fine-tuning | `Bumblebee` | Boosted training on pre-trained models |
|
| 31 |
+
| Serving | `Nx.Serving` | Production batched inference |
|
| 32 |
+
| Deployment | `Phoenix` | LiveView integration pattern |
|
| 33 |
+
| Interactive UI | `Kino` | Live input forms & charts |
|
| 34 |
+
|
| 35 |
+
---
|
| 36 |
+
|
| 37 |
+
## Section 0 — Install & Configure
|
| 38 |
+
|
| 39 |
+
```elixir
|
| 40 |
+
Mix.install([
|
| 41 |
+
{:nx, "~> 0.10"},
|
| 42 |
+
{:axon, "~> 0.7"},
|
| 43 |
+
{:exla, "~> 0.10"},
|
| 44 |
+
{:bumblebee, "~> 0.6"},
|
| 45 |
+
{:kino, "~> 0.15"},
|
| 46 |
+
{:kino_vega_lite, "~> 0.1"},
|
| 47 |
+
{:vega_lite, "~> 0.1"},
|
| 48 |
+
{:stb_image, "~> 0.6"},
|
| 49 |
+
{:req, "~> 0.5"}
|
| 50 |
+
])
|
| 51 |
+
|
| 52 |
+
Nx.global_default_backend(EXLA.Backend)
|
| 53 |
+
|
| 54 |
+
IO.puts("Nx version: #{Nx.version()}")
|
| 55 |
+
IO.puts("Axon version: #{Axon.version()}")
|
| 56 |
+
IO.puts("EXLA backend: #{inspect(Nx.default_backend())}")
|
| 57 |
+
IO.puts("Bumblebee loaded: #{Code.ensure_loaded?(Bumblebee)}")
|
| 58 |
+
IO.puts("Cache dir: #{Bumblebee.cache_dir()}")
|
| 59 |
+
```
|
| 60 |
+
|
| 61 |
+
---
|
| 62 |
+
|
| 63 |
+
## Section 1 — Nx Foundations
|
| 64 |
+
|
| 65 |
+
Before diving into Bumblebee, let's ground ourselves in Nx — the numerical
|
| 66 |
+
backbone that every Elixir ML library builds on.
|
| 67 |
+
|
| 68 |
+
### 1.1 Tensors
|
| 69 |
+
|
| 70 |
+
```elixir
|
| 71 |
+
import Nx
|
| 72 |
+
|
| 73 |
+
# Scalars, vectors, matrices, higher-order
|
| 74 |
+
scalar = Nx.tensor(3.14)
|
| 75 |
+
vector = Nx.tensor([1.0, 2.0, 3.0])
|
| 76 |
+
matrix = Nx.tensor([[1, 2, 3], [4, 5, 6]])
|
| 77 |
+
cube = Nx.iota({2, 3, 4})
|
| 78 |
+
|
| 79 |
+
IO.puts("scalar shape=#{inspect(Nx.shape(scalar))} type=#{Nx.type(scalar)}")
|
| 80 |
+
IO.puts("vector shape=#{inspect(Nx.shape(vector))} type=#{Nx.type(vector)}")
|
| 81 |
+
IO.puts("matrix shape=#{inspect(Nx.shape(matrix))} type=#{Nx.type(matrix)}")
|
| 82 |
+
IO.puts("cube shape=#{inspect(Nx.shape(cube))} type=#{Nx.type(cube)}")
|
| 83 |
+
|
| 84 |
+
{scalar, vector, matrix}
|
| 85 |
+
```
|
| 86 |
+
|
| 87 |
+
### 1.2 Operations & Broadcasting
|
| 88 |
+
|
| 89 |
+
```elixir
|
| 90 |
+
a = Nx.tensor([1.0, 2.0, 3.0])
|
| 91 |
+
b = Nx.tensor([10.0, 20.0, 30.0])
|
| 92 |
+
|
| 93 |
+
# Element-wise
|
| 94 |
+
IO.puts("add: #{inspect(Nx.add(a, b))}")
|
| 95 |
+
IO.puts("multiply: #{inspect(Nx.multiply(a, b))}")
|
| 96 |
+
IO.puts("pow: #{inspect(Nx.pow(a, 2))}")
|
| 97 |
+
|
| 98 |
+
# Reductions
|
| 99 |
+
IO.puts("sum: #{Nx.sum(a)}")
|
| 100 |
+
IO.puts("mean: #{Nx.mean(a)}")
|
| 101 |
+
IO.puts("std: #{Nx.standard_deviation(a)}")
|
| 102 |
+
|
| 103 |
+
# Dot product
|
| 104 |
+
IO.puts("dot: #{Nx.dot(a, b)}")
|
| 105 |
+
|
| 106 |
+
# Matrix multiply
|
| 107 |
+
m1 = Nx.tensor([[1.0, 2.0], [3.0, 4.0]])
|
| 108 |
+
m2 = Nx.tensor([[5.0, 6.0], [7.0, 8.0]])
|
| 109 |
+
IO.puts("matmul: #{inspect(Nx.dot(m1, m2))}")
|
| 110 |
+
```
|
| 111 |
+
|
| 112 |
+
### 1.3 Automatic Differentiation
|
| 113 |
+
|
| 114 |
+
This is how models learn — computing gradients of loss with respect to parameters.
|
| 115 |
+
|
| 116 |
+
```elixir
|
| 117 |
+
defmodule AutoDiff do
|
| 118 |
+
import Nx.Defn
|
| 119 |
+
|
| 120 |
+
# Define a function f(x) = x³ + 2x²
|
| 121 |
+
defnp f(x), do: Nx.pow(x, 3) + 2 * Nx.pow(x, 2)
|
| 122 |
+
|
| 123 |
+
# Compute gradient symbolically
|
| 124 |
+
def grad_f(x), do: Nx.Defn.grad(x, &f/1)
|
| 125 |
+
|
| 126 |
+
# Gradient of MSE loss
|
| 127 |
+
defnp mse_loss(y_true, y_pred) do
|
| 128 |
+
Nx.mean(Nx.pow(y_true - y_pred, 2))
|
| 129 |
+
end
|
| 130 |
+
|
| 131 |
+
def grad_mse(y_true, y_pred, w) do
|
| 132 |
+
Nx.Defn.grad(w, fn weights ->
|
| 133 |
+
predictions = Nx.dot(y_pred, weights)
|
| 134 |
+
mse_loss(y_true, predictions)
|
| 135 |
+
end)
|
| 136 |
+
end
|
| 137 |
+
end
|
| 138 |
+
|
| 139 |
+
x = Nx.tensor(3.0)
|
| 140 |
+
IO.puts("f(3) = #{Nx.to_number(AutoDiff.f(x))}")
|
| 141 |
+
IO.puts("f'(3) = #{Nx.to_number(AutoDiff.grad_f(x))}")
|
| 142 |
+
IO.puts("expected = 3*9 + 2*2*3 = #{3 * 9 + 2 * 2 * 3}")
|
| 143 |
+
```
|
| 144 |
+
|
| 145 |
+
### 1.4 JIT Compilation
|
| 146 |
+
|
| 147 |
+
```elixir
|
| 148 |
+
# JIT compiles for GPU/CPU acceleration — critical for inference speed
|
| 149 |
+
defmodule FastMath do
|
| 150 |
+
import Nx.Defn
|
| 151 |
+
|
| 152 |
+
defn slow_sigmoid(x) do
|
| 153 |
+
1 / (1 + Nx.exp(-x))
|
| 154 |
+
end
|
| 155 |
+
end
|
| 156 |
+
|
| 157 |
+
# JIT-compiled version
|
| 158 |
+
fast_sigmoid = Nx.Defn.jit(&FastMath.slow_sigmoid/1)
|
| 159 |
+
|
| 160 |
+
input = Nx.tensor([[-2.0, -1.0, 0.0, 1.0, 2.0]])
|
| 161 |
+
|
| 162 |
+
# Benchmark
|
| 163 |
+
{us, result} = :timer.tc(fn -> fast_sigmoid.(input) end)
|
| 164 |
+
IO.puts("JIT sigmoid: #{us}μs result=#{inspect(result)}")
|
| 165 |
+
```
|
| 166 |
+
|
| 167 |
+
---
|
| 168 |
+
|
| 169 |
+
## Section 2 — Bumblebee: Pre-trained NLP Models
|
| 170 |
+
|
| 171 |
+
Bumblebee loads pre-trained models from the Hugging Face Hub and wraps them
|
| 172 |
+
in `Nx.Serving` for production-ready batched inference.
|
| 173 |
+
|
| 174 |
+
### 2.1 Fill-Mask (BERT)
|
| 175 |
+
|
| 176 |
+
```elixir
|
| 177 |
+
{:ok, bert_model} = Bumblebee.load_model({:hf, "google-bert/bert-base-uncased"})
|
| 178 |
+
{:ok, bert_tokenizer} = Bumblebee.load_tokenizer({:hf, "google-bert/bert-base-uncased"})
|
| 179 |
+
|
| 180 |
+
bert_fill_mask = Bumblebee.Text.fill_mask(bert_model, bert_tokenizer)
|
| 181 |
+
|
| 182 |
+
results = Nx.Serving.run(bert_fill_mask, "Elixir is a [MASK] language.")
|
| 183 |
+
IO.inspect(results, label: "Fill-Mask Results")
|
| 184 |
+
```
|
| 185 |
+
|
| 186 |
+
### 2.2 Sentiment Analysis (DistilBERT)
|
| 187 |
+
|
| 188 |
+
```elixir
|
| 189 |
+
{:ok, sentiment_model} =
|
| 190 |
+
Bumblebee.load_model({:hf, "distilbert/distilbert-base-uncased-finetuned-sst-2-english"})
|
| 191 |
+
|
| 192 |
+
{:ok, sentiment_tokenizer} =
|
| 193 |
+
Bumblebee.load_tokenizer({:hf, "distilbert/distilbert-base-uncased-finetuned-sst-2-english"})
|
| 194 |
+
|
| 195 |
+
sentiment_serving = Bumblebee.Text.Classification.text_classification(
|
| 196 |
+
sentiment_model,
|
| 197 |
+
sentiment_tokenizer
|
| 198 |
+
)
|
| 199 |
+
|
| 200 |
+
texts = [
|
| 201 |
+
"Machine learning in Elixir is amazing!",
|
| 202 |
+
"This tutorial is boring and confusing.",
|
| 203 |
+
"The BEAM VM handles concurrent ML workloads well.",
|
| 204 |
+
"I love how functional programming simplifies ML pipelines."
|
| 205 |
+
]
|
| 206 |
+
|
| 207 |
+
Enum.each(texts, fn text ->
|
| 208 |
+
result = Nx.Serving.run(sentiment_serving, text)
|
| 209 |
+
IO.puts("#{inspect(result)} ← \"#{String.slice(text, 0..50)}...\"")
|
| 210 |
+
end)
|
| 211 |
+
```
|
| 212 |
+
|
| 213 |
+
### 2.3 Named Entity Recognition (BERT-NER)
|
| 214 |
+
|
| 215 |
+
```elixir
|
| 216 |
+
{:ok, ner_model} = Bumblebee.load_model({:hf, "dslim/bert-base-NER"})
|
| 217 |
+
{:ok, ner_tokenizer} = Bumblebee.load_tokenizer({:hf, "google-bert/bert-base-uncased"})
|
| 218 |
+
|
| 219 |
+
ner_serving = Bumblebee.Text.TokenClassification.token_classification(
|
| 220 |
+
ner_model,
|
| 221 |
+
ner_tokenizer
|
| 222 |
+
)
|
| 223 |
+
|
| 224 |
+
ner_text = "Sean Moriarity wrote Machine Learning in Elixir for Pragmatic Bookshelf. He lives in Austin, Texas."
|
| 225 |
+
ner_result = Nx.Serving.run(ner_serving, ner_text)
|
| 226 |
+
|
| 227 |
+
IO.puts("Input: #{ner_text}")
|
| 228 |
+
IO.inspect(ner_result, label: "NER Entities")
|
| 229 |
+
```
|
| 230 |
+
|
| 231 |
+
### 2.4 Zero-Shot Classification
|
| 232 |
+
|
| 233 |
+
No fine-tuning needed — classify arbitrary text into custom categories.
|
| 234 |
+
|
| 235 |
+
```elixir
|
| 236 |
+
{:ok, zs_model} = Bumblebee.load_model({:hf, "facebook/bart-large-mnli"})
|
| 237 |
+
{:ok, zs_tokenizer} = Bumblebee.load_tokenizer({:hf, "facebook/bart-large-mnli"})
|
| 238 |
+
|
| 239 |
+
zs_serving = Bumblebee.Text.ZeroShotClassification.zero_shot_classification(
|
| 240 |
+
zs_model,
|
| 241 |
+
zs_tokenizer
|
| 242 |
+
)
|
| 243 |
+
|
| 244 |
+
article = """
|
| 245 |
+
Nx brings numerical computing to the BEAM, enabling machine learning
|
| 246 |
+
pipelines that leverage Elixir's concurrency and fault tolerance.
|
| 247 |
+
Bumblebee provides access to thousands of pre-trained models from
|
| 248 |
+
the Hugging Face Hub directly in Livebook.
|
| 249 |
+
"""
|
| 250 |
+
|
| 251 |
+
labels = ["technology", "sports", "politics", "science", "finance"]
|
| 252 |
+
zs_result = Nx.Serving.run(zs_serving, %{text: article, labels: labels})
|
| 253 |
+
|
| 254 |
+
IO.inspect(zs_result, label: "Zero-Shot Classification")
|
| 255 |
+
```
|
| 256 |
+
|
| 257 |
+
---
|
| 258 |
+
|
| 259 |
+
## Section 3 — Bumblebee: Vision Models
|
| 260 |
+
|
| 261 |
+
### 3.1 Image Classification (ViT / ResNet)
|
| 262 |
+
|
| 263 |
+
```elixir
|
| 264 |
+
{:ok, vit_model} = Bumblebee.load_model({:hf, "google/vit-base-patch16-224"})
|
| 265 |
+
{:ok, vit_featurizer} = Bumblebee.load_featurizer({:hf, "google/vit-base-patch16-224"})
|
| 266 |
+
|
| 267 |
+
vit_serving = Bumblebee.Vision.ImageClassification.image_classification(
|
| 268 |
+
vit_model,
|
| 269 |
+
vit_featurizer
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
+
# Download a sample image
|
| 273 |
+
image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
|
| 274 |
+
image_data = Req.get!(image_url).body
|
| 275 |
+
|
| 276 |
+
# Save and load
|
| 277 |
+
File.write!("/tmp/sample.jpg", image_data)
|
| 278 |
+
{:ok, image} = StbImage.read_file("/tmp/sample.jpg")
|
| 279 |
+
|
| 280 |
+
IO.puts("Image: #{StbImage.width(image)}x#{StbImage.height(image)}")
|
| 281 |
+
|
| 282 |
+
img_result = Nx.Serving.run(vit_serving, image)
|
| 283 |
+
IO.inspect(img_result, label: "Image Classification")
|
| 284 |
+
```
|
| 285 |
+
|
| 286 |
+
### 3.2 Batch Image Classification
|
| 287 |
+
|
| 288 |
+
```elixir
|
| 289 |
+
# Nx.Serving automatically batches multiple requests for GPU efficiency
|
| 290 |
+
images = [image] # In production, this would be multiple images
|
| 291 |
+
|
| 292 |
+
batch_result = Nx.Serving.run(vit_serving, images)
|
| 293 |
+
IO.inspect(batch_result, label: "Batch Classification")
|
| 294 |
+
```
|
| 295 |
+
|
| 296 |
+
---
|
| 297 |
+
|
| 298 |
+
## Section 4 — Bumblebee: Text Generation
|
| 299 |
+
|
| 300 |
+
### 4.1 GPT-2 Text Generation
|
| 301 |
+
|
| 302 |
+
```elixir
|
| 303 |
+
{:ok, gpt2_model} = Bumblebee.load_model({:hf, "openai-community/gpt2"})
|
| 304 |
+
{:ok, gpt2_tokenizer} = Bumblebee.load_tokenizer({:hf, "openai-community/gpt2"})
|
| 305 |
+
{:ok, gpt2_generation_config} = Bumblebee.load_generation_config({:hf, "openai-community/gpt2"})
|
| 306 |
+
|
| 307 |
+
gpt2_serving = Bumblebee.Text.generation(
|
| 308 |
+
gpt2_model,
|
| 309 |
+
gpt2_tokenizer,
|
| 310 |
+
gpt2_generation_config,
|
| 311 |
+
compile: [batch_size: 1, sequence_length: 64],
|
| 312 |
+
defn_options: [compiler: EXLA]
|
| 313 |
+
)
|
| 314 |
+
|
| 315 |
+
prompt = "Machine learning in Elixir is"
|
| 316 |
+
gen_result = Nx.Serving.run(gpt2_serving, prompt)
|
| 317 |
+
|
| 318 |
+
IO.puts("Prompt: #{prompt}")
|
| 319 |
+
IO.puts("Output: #{inspect(gen_result)}")
|
| 320 |
+
```
|
| 321 |
+
|
| 322 |
+
### 4.2 Interactive Text Generation
|
| 323 |
+
|
| 324 |
+
<!-- livebook:{"attrs":{"source":"# Interactive Text Generation\nalias Kino.Input\n\nprompt_input = Kino.Input.text(\"Prompt\", default: \"The future of ML in Elixir is\")\nmax_tokens = Kino.Input.number(\"Max tokens\", default: 50)\n\nform = Kino.Control.form(\n %{prompt: prompt_input, max_tokens: max_tokens},\n submit: \"Generate\"\n)\n\nKino.listen(form, fn %{data: %{prompt: prompt, max_tokens: max_tokens}} ->\n config = Bumblebee.configure(gpt2_generation_config, max_new_tokens: trunc(max_tokens))\n serving = Bumblebee.Text.generation(gpt2_model, gpt2_tokenizer, config)\n result = Nx.Serving.run(serving, prompt)\n Kino.Text.new(\"#{prompt}#{result.text}\")\nend)\n\nform","title":"GPT-2 Generator"},"chunks":[{"chunk":"","type":"Elixir"}],"kind":"Elixir","source_type":"cell"} -->
|
| 325 |
+
|
| 326 |
+
```elixir
|
| 327 |
+
alias Kino.Input
|
| 328 |
+
|
| 329 |
+
prompt_input = Kino.Input.text("Prompt", default: "The future of ML in Elixir is")
|
| 330 |
+
max_tokens_input = Kino.Input.number("Max tokens", default: 50)
|
| 331 |
+
|
| 332 |
+
form =
|
| 333 |
+
Kino.Control.form(
|
| 334 |
+
%{prompt: prompt_input, max_tokens: max_tokens_input},
|
| 335 |
+
submit: "Generate"
|
| 336 |
+
)
|
| 337 |
+
|
| 338 |
+
Kino.listen(form, fn %{data: %{prompt: prompt, max_tokens: max_tokens}} ->
|
| 339 |
+
config = Bumblebee.configure(gpt2_generation_config, max_new_tokens: trunc(max_tokens))
|
| 340 |
+
|
| 341 |
+
serving =
|
| 342 |
+
Bumblebee.Text.generation(
|
| 343 |
+
gpt2_model,
|
| 344 |
+
gpt2_tokenizer,
|
| 345 |
+
config,
|
| 346 |
+
defn_options: [compiler: EXLA]
|
| 347 |
+
)
|
| 348 |
+
|
| 349 |
+
result = Nx.Serving.run(serving, prompt)
|
| 350 |
+
Kino.Text.new("#{prompt}#{result.text}")
|
| 351 |
+
end)
|
| 352 |
+
|
| 353 |
+
form
|
| 354 |
+
```
|
| 355 |
+
|
| 356 |
+
---
|
| 357 |
+
|
| 358 |
+
## Section 5 — Bumblebee: Embeddings & Similarity
|
| 359 |
+
|
| 360 |
+
### 5.1 Sentence Embeddings
|
| 361 |
+
|
| 362 |
+
```elixir
|
| 363 |
+
{:ok, emb_model} = Bumblebee.load_model({:hf, "sentence-transformers/all-MiniLM-L6-v2"})
|
| 364 |
+
{:ok, emb_tokenizer} = Bumblebee.load_tokenizer({:hf, "sentence-transformers/all-MiniLM-L6-v2"})
|
| 365 |
+
|
| 366 |
+
embedding_serving = Bumblebee.Text.TextEmbedding.text_embedding(
|
| 367 |
+
emb_model,
|
| 368 |
+
emb_tokenizer
|
| 369 |
+
)
|
| 370 |
+
|
| 371 |
+
sentences = [
|
| 372 |
+
"Nx provides numerical computing for Elixir",
|
| 373 |
+
"Axon is a neural network library built on Nx",
|
| 374 |
+
"Bumblebee connects Elixir to the Hugging Face Hub",
|
| 375 |
+
"I enjoy cooking Italian food on weekends",
|
| 376 |
+
"The weather forecast predicts rain tomorrow"
|
| 377 |
+
]
|
| 378 |
+
|
| 379 |
+
embeddings =
|
| 380 |
+
Enum.map(sentences, fn s ->
|
| 381 |
+
result = Nx.Serving.run(embedding_serving, s)
|
| 382 |
+
result.embedding
|
| 383 |
+
end)
|
| 384 |
+
|
| 385 |
+
IO.puts("Generated #{length(embeddings)} embeddings")
|
| 386 |
+
IO.puts("Embedding dim: #{inspect(Nx.shape(hd(embeddings)))}")
|
| 387 |
+
```
|
| 388 |
+
|
| 389 |
+
### 5.2 Cosine Similarity Search
|
| 390 |
+
|
| 391 |
+
```elixir
|
| 392 |
+
defmodule Similarity do
|
| 393 |
+
import Nx
|
| 394 |
+
|
| 395 |
+
defn cosine_similarity(a, b) do
|
| 396 |
+
a_norm = a / Nx.sqrt(Nx.sum(a * a))
|
| 397 |
+
b_norm = b / Nx.sqrt(Nx.sum(b * b))
|
| 398 |
+
Nx.sum(a_norm * b_norm)
|
| 399 |
+
end
|
| 400 |
+
|
| 401 |
+
def find_most_similar(query_embedding, corpus_embeddings) do
|
| 402 |
+
corpus_embeddings
|
| 403 |
+
|> Enum.map(fn emb -> Nx.to_number(cosine_similarity(query_embedding, emb)) end)
|
| 404 |
+
|> Enum.with_index()
|
| 405 |
+
|> Enum.sort_by(fn {score, _idx} -> -score end)
|
| 406 |
+
end
|
| 407 |
+
end
|
| 408 |
+
|
| 409 |
+
query = "How do I build neural networks in Elixir?"
|
| 410 |
+
query_emb = Nx.Serving.run(embedding_serving, query).embedding
|
| 411 |
+
|
| 412 |
+
IO.puts("Query: \"#{query}\"\n")
|
| 413 |
+
|
| 414 |
+
Similarity.find_most_similar(query_emb, embeddings)
|
| 415 |
+
|> Enum.each(fn {score, idx} ->
|
| 416 |
+
IO.puts(" #{Float.round(score, 4)} #{Enum.at(sentences, idx)}")
|
| 417 |
+
end)
|
| 418 |
+
```
|
| 419 |
+
|
| 420 |
+
---
|
| 421 |
+
|
| 422 |
+
## Section 5.5 — Bumblebee: Audio (Whisper Speech-to-Text)
|
| 423 |
+
|
| 424 |
+
Bumblebee wraps OpenAI's Whisper for speech-to-text directly in Elixir.
|
| 425 |
+
|
| 426 |
+
```elixir
|
| 427 |
+
{:ok, whisper_model} = Bumblebee.load_model({:hf, "openai/whisper-tiny"})
|
| 428 |
+
{:ok, whisper_featurizer} = Bumblebee.load_featurizer({:hf, "openai/whisper-tiny"})
|
| 429 |
+
{:ok, whisper_tokenizer} = Bumblebee.load_tokenizer({:hf, "openai/whisper-tiny"})
|
| 430 |
+
{:ok, whisper_generation_config} = Bumblebee.load_generation_config({:hf, "openai/whisper-tiny"})
|
| 431 |
+
|
| 432 |
+
whisper_serving = Bumblebee.Audio.speech_to_text(
|
| 433 |
+
whisper_model,
|
| 434 |
+
whisper_featurizer,
|
| 435 |
+
whisper_tokenizer,
|
| 436 |
+
whisper_generation_config
|
| 437 |
+
)
|
| 438 |
+
|
| 439 |
+
# Download a sample audio file
|
| 440 |
+
audio_url = "https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac"
|
| 441 |
+
audio_data = Req.get!(audio_url).body
|
| 442 |
+
File.write!("/tmp/sample_audio.flac", audio_data)
|
| 443 |
+
|
| 444 |
+
# Transcribe
|
| 445 |
+
{:ok, audio_info} = Bumblebee.Audio.LoadedAudio.from_file("/tmp/sample_audio.flac")
|
| 446 |
+
whisper_result = Nx.Serving.run(whisper_serving, audio_info)
|
| 447 |
+
|
| 448 |
+
IO.puts("Transcription: #{whisper_result.text}")
|
| 449 |
+
```
|
| 450 |
+
|
| 451 |
+
### Interactive Audio Transcription
|
| 452 |
+
|
| 453 |
+
```elixir
|
| 454 |
+
audio_file_input = Kino.Input.file("Upload audio (WAV/FLAC/MP3)")
|
| 455 |
+
|
| 456 |
+
audio_form = Kino.Control.form(%{file: audio_file_input}, submit: "Transcribe")
|
| 457 |
+
|
| 458 |
+
Kino.listen(audio_form, fn %{data: %{file: file}} ->
|
| 459 |
+
if file do
|
| 460 |
+
{:ok, audio} = Bumblebee.Audio.LoadedAudio.from_file(file.path)
|
| 461 |
+
result = Nx.Serving.run(whisper_serving, audio)
|
| 462 |
+
Kino.Text.new(result.text)
|
| 463 |
+
else
|
| 464 |
+
Kino.Text.new("Please upload an audio file.")
|
| 465 |
+
end
|
| 466 |
+
end)
|
| 467 |
+
|
| 468 |
+
audio_form
|
| 469 |
+
```
|
| 470 |
+
|
| 471 |
+
---
|
| 472 |
+
|
| 473 |
+
## Section 5.6 — Bumblebee: Stable Diffusion (Image Generation)
|
| 474 |
+
|
| 475 |
+
Generate images from text prompts using Stable Diffusion — all within Elixir.
|
| 476 |
+
|
| 477 |
+
> **Note:** This section requires a GPU with 4GB+ VRAM. On CPU it will be very slow.
|
| 478 |
+
|
| 479 |
+
```elixir
|
| 480 |
+
{:ok, sd_info} = Bumblebee.load_model({:hf, "CompVis/stable-diffusion-v1-4", subdir: "unet"})
|
| 481 |
+
{:ok, sd_vae} = Bumblebee.load_model({:hf, "CompVis/stable-diffusion-v1-4", subdir: "vae"})
|
| 482 |
+
{:ok, sd_clip} = Bumblebee.load_model({:hf, "CompVis/stable-diffusion-v1-4", subdir: "text_encoder"})
|
| 483 |
+
{:ok, sd_tokenizer} = Bumblebee.load_tokenizer({:hf, "CompVis/stable-diffusion-v1-4", subdir: "tokenizer"})
|
| 484 |
+
{:ok, sd_scheduler} = Bumblebee.load_scheduler({:hf, "CompVis/stable-diffusion-v1-4", subdir: "scheduler"})
|
| 485 |
+
|
| 486 |
+
# Image generation serving
|
| 487 |
+
sd_serving = Bumblebee.Diffusion.StableDiffusion.text_to_image(
|
| 488 |
+
sd_info,
|
| 489 |
+
sd_vae,
|
| 490 |
+
sd_clip,
|
| 491 |
+
sd_tokenizer,
|
| 492 |
+
sd_scheduler,
|
| 493 |
+
num_steps: 20,
|
| 494 |
+
guidance_scale: 7.5
|
| 495 |
+
)
|
| 496 |
+
|
| 497 |
+
# Generate
|
| 498 |
+
sd_result = Nx.Serving.run(sd_serving, %{
|
| 499 |
+
prompt: "a photograph of a bee programming in elixir, highly detailed, 4k",
|
| 500 |
+
negative_prompt: "blurry, low quality"
|
| 501 |
+
})
|
| 502 |
+
|
| 503 |
+
# Display the generated image
|
| 504 |
+
Kino.Image.new(sd_result.image)
|
| 505 |
+
```
|
| 506 |
+
|
| 507 |
+
### Interactive Image Generation
|
| 508 |
+
|
| 509 |
+
```elixir
|
| 510 |
+
prompt_input = Kino.Input.text("Prompt", default: "a cute robot bee coding in Elixir")
|
| 511 |
+
neg_input = Kino.Input.text("Negative prompt", default: "blurry, ugly, low quality")
|
| 512 |
+
steps_input = Kino.Input.number("Steps", default: 20)
|
| 513 |
+
|
| 514 |
+
sd_form = Kino.Control.form(
|
| 515 |
+
%{prompt: prompt_input, negative: neg_input, steps: steps_input},
|
| 516 |
+
submit: "Generate"
|
| 517 |
+
)
|
| 518 |
+
|
| 519 |
+
Kino.listen(sd_form, fn %{data: %{prompt: prompt, negative: negative, steps: steps}} ->
|
| 520 |
+
result = Nx.Serving.run(sd_serving, %{
|
| 521 |
+
prompt: prompt,
|
| 522 |
+
negative_prompt: negative,
|
| 523 |
+
num_steps: trunc(steps)
|
| 524 |
+
})
|
| 525 |
+
Kino.Layout.grid([Kino.Image.new(result.image)], columns: 1)
|
| 526 |
+
end)
|
| 527 |
+
|
| 528 |
+
sd_form
|
| 529 |
+
```
|
| 530 |
+
|
| 531 |
+
---
|
| 532 |
+
|
| 533 |
+
## Section 6 — Custom Training with Axon
|
| 534 |
+
|
| 535 |
+
Beyond pre-trained models, Axon lets you build and train from scratch.
|
| 536 |
+
|
| 537 |
+
### 6.1 Synthetic Data
|
| 538 |
+
|
| 539 |
+
```elixir
|
| 540 |
+
defmodule Data do
|
| 541 |
+
import Nx
|
| 542 |
+
|
| 543 |
+
def make_classification(n \\ 2000, features \\ 4, classes \\ 3, seed \\ 42) do
|
| 544 |
+
key = Nx.Random.key(seed)
|
| 545 |
+
{centers, key} = Nx.Random.normal(key, 0, 2, shape: {classes, features})
|
| 546 |
+
{labels_raw, key} = Nx.Random.randint(key, 0, classes, shape: {n})
|
| 547 |
+
{noise, _key} = Nx.Random.normal(key, 0, 0.4, shape: {n, features})
|
| 548 |
+
|
| 549 |
+
x = Nx.take(centers, labels_raw) |> Nx.add(noise)
|
| 550 |
+
y = Nx.equal(Nx.new_axis(labels_raw, 1), Nx.iota({1, classes})) |> Nx.as_type(:f32)
|
| 551 |
+
|
| 552 |
+
# Normalize
|
| 553 |
+
mu = Nx.mean(x, axes: [0])
|
| 554 |
+
sigma = Nx.standard_deviation(x, axes: [0])
|
| 555 |
+
x_norm = Nx.divide(Nx.subtract(x, mu), sigma)
|
| 556 |
+
|
| 557 |
+
# Split
|
| 558 |
+
split = round(n * 0.8)
|
| 559 |
+
{{x_norm[0..(split - 1)//1], y[0..(split - 1)//1]},
|
| 560 |
+
{x_norm[split..(n - 1)//1], y[split..(n - 1)//1]}}
|
| 561 |
+
end
|
| 562 |
+
end
|
| 563 |
+
|
| 564 |
+
{{x_train, y_train}, {x_test, y_test} = _test_data} = Data.make_classification()
|
| 565 |
+
IO.puts("Train: #{Nx.axis_size(x_train, 0)} | Test: #{Nx.axis_size(x_test, 0)}")
|
| 566 |
+
IO.puts("Features: #{Nx.axis_size(x_train, 1)} | Classes: #{Nx.axis_size(y_train, 1)}")
|
| 567 |
+
```
|
| 568 |
+
|
| 569 |
+
### 6.2 Define Model
|
| 570 |
+
|
| 571 |
+
```elixir
|
| 572 |
+
import Axon
|
| 573 |
+
|
| 574 |
+
n_features = Nx.axis_size(x_train, 1)
|
| 575 |
+
n_classes = Nx.axis_size(y_train, 1)
|
| 576 |
+
|
| 577 |
+
model =
|
| 578 |
+
Axon.input("features", shape: {nil, n_features})
|
| 579 |
+
|> Axon.dense(64, activation: :relu, name: "hidden_1")
|
| 580 |
+
|> Axon.batch_norm(name: "bn_1")
|
| 581 |
+
|> Axon.dropout(rate: 0.2, name: "drop_1")
|
| 582 |
+
|> Axon.dense(32, activation: :relu, name: "hidden_2")
|
| 583 |
+
|> Axon.batch_norm(name: "bn_2")
|
| 584 |
+
|> Axon.dropout(rate: 0.2, name: "drop_2")
|
| 585 |
+
|> Axon.dense(n_classes, activation: :softmax, name: "output")
|
| 586 |
+
|
| 587 |
+
Axon.Display.as_table(model, Nx.template({1, n_features}, :f32)) |> IO.puts()
|
| 588 |
+
```
|
| 589 |
+
|
| 590 |
+
### 6.3 Train
|
| 591 |
+
|
| 592 |
+
```elixir
|
| 593 |
+
train_data =
|
| 594 |
+
x_train
|
| 595 |
+
|> Nx.to_batched(64)
|
| 596 |
+
|> Enum.zip(Nx.to_batched(y_train, 64))
|
| 597 |
+
|> Stream.map(fn {xb, yb} -> %{"features" => xb, "targets" => yb} end)
|
| 598 |
+
|
| 599 |
+
val_data =
|
| 600 |
+
x_test
|
| 601 |
+
|> Nx.to_batched(64)
|
| 602 |
+
|> Enum.zip(Nx.to_batched(y_test, 64))
|
| 603 |
+
|> Stream.map(fn {xb, yb} -> %{"features" => xb, "targets" => yb} end)
|
| 604 |
+
|
| 605 |
+
trained_state =
|
| 606 |
+
model
|
| 607 |
+
|> Axon.Loop.trainer(:categorical_cross_entropy, Axon.Optimizers.adam(0.001))
|
| 608 |
+
|> Axon.Loop.metric(:accuracy, "acc")
|
| 609 |
+
|> Axon.Loop.validate(model, val_data)
|
| 610 |
+
|> Axon.Loop.early_stopping("validation_loss", patience: 5, mode: :min)
|
| 611 |
+
|> Axon.Loop.run(train_data, %{}, epochs: 30, compiler: EXLA)
|
| 612 |
+
|
| 613 |
+
IO.puts("Training complete!")
|
| 614 |
+
```
|
| 615 |
+
|
| 616 |
+
### 6.4 Evaluate & Predict
|
| 617 |
+
|
| 618 |
+
```elixir
|
| 619 |
+
# JIT-compiled prediction
|
| 620 |
+
predict_fn = Nx.Defn.jit(fn params, input -> Axon.predict(model, params, input) end)
|
| 621 |
+
|
| 622 |
+
# Evaluate on test set
|
| 623 |
+
test_preds = predict_fn.(trained_state, x_test)
|
| 624 |
+
pred_classes = Nx.argmax(test_preds, axis: 1)
|
| 625 |
+
true_classes = Nx.argmax(y_test, axis: 1)
|
| 626 |
+
|
| 627 |
+
accuracy = Nx.mean(Nx.equal(pred_classes, true_classes) |> Nx.as_type(:f32))
|
| 628 |
+
IO.puts("Test accuracy: #{Float.round(Nx.to_number(accuracy) * 100, 2)}%")
|
| 629 |
+
|
| 630 |
+
# Single prediction
|
| 631 |
+
sample = x_test[0]
|
| 632 |
+
probs = predict_fn.(trained_state, Nx.new_axis(sample, 0)) |> Nx.squeeze()
|
| 633 |
+
IO.puts("Sample prediction: class #{Nx.argmax(probs) |> Nx.to_number()} probs=#{inspect(Nx.to_flat_list(probs) |> Enum.map(&Float.round(&1, 4)))}")
|
| 634 |
+
```
|
| 635 |
+
|
| 636 |
+
### 6.5 Visualize Training
|
| 637 |
+
|
| 638 |
+
```elixir
|
| 639 |
+
# Visualize predictions vs actual on first 2 features
|
| 640 |
+
alias VegaLite, as: Vl
|
| 641 |
+
|
| 642 |
+
scatter_data =
|
| 643 |
+
Enum.map(0..(min(Nx.axis_size(x_test, 0), 500) - 1), fn i ->
|
| 644 |
+
%{
|
| 645 |
+
"f1" => Nx.to_number(x_test[i][0]),
|
| 646 |
+
"f2" => Nx.to_number(x_test[i][1]),
|
| 647 |
+
"actual" => Nx.to_number(Nx.argmax(y_test[i])),
|
| 648 |
+
"predicted" => Nx.to_number(pred_classes[i])
|
| 649 |
+
}
|
| 650 |
+
end)
|
| 651 |
+
|
| 652 |
+
Vl.new(
|
| 653 |
+
title: "Test Predictions (first 2 features)",
|
| 654 |
+
width: 500,
|
| 655 |
+
height: 400
|
| 656 |
+
)
|
| 657 |
+
|> Vl.data_from_values(scatter_data)
|
| 658 |
+
|> Vl.layers([
|
| 659 |
+
Vl.new()
|
| 660 |
+
|> Vl.mark(:circle, opacity: 0.7, size: 40)
|
| 661 |
+
|> Vl.encode_field(:x, "f1", type: :quantitative)
|
| 662 |
+
|> Vl.encode_field(:y, "f2", type: :quantitative)
|
| 663 |
+
|> Vl.encode_field(:color, "actual", type: :nominal, title: "Actual"),
|
| 664 |
+
Vl.new()
|
| 665 |
+
|> Vl.mark(:point, opacity: 0.3, size: 20, shape: "cross")
|
| 666 |
+
|> Vl.encode_field(:x, "f1", type: :quantitative)
|
| 667 |
+
|> Vl.encode_field(:y, "f2", type: :quantitative)
|
| 668 |
+
|> Vl.encode_field(:color, "predicted", type: :nominal, title: "Predicted")
|
| 669 |
+
])
|
| 670 |
+
```
|
| 671 |
+
|
| 672 |
+
---
|
| 673 |
+
|
| 674 |
+
## Section 7 — Nx.Serving: Production Inference
|
| 675 |
+
|
| 676 |
+
`Nx.Serving` batches requests from multiple clients and runs them efficiently
|
| 677 |
+
on GPU — essential for production deployment.
|
| 678 |
+
|
| 679 |
+
### 7.1 Serving Architecture
|
| 680 |
+
|
| 681 |
+
```elixir
|
| 682 |
+
# Start a named serving process (typically in your Application supervisor)
|
| 683 |
+
# In production, you would do:
|
| 684 |
+
#
|
| 685 |
+
# Nx.Serving.start_link(
|
| 686 |
+
# serving: sentiment_serving,
|
| 687 |
+
# name: MyApp.SentimentServing,
|
| 688 |
+
# batch_size: 16,
|
| 689 |
+
# batch_timeout: 100
|
| 690 |
+
# )
|
| 691 |
+
#
|
| 692 |
+
# Then in your Phoenix controller:
|
| 693 |
+
#
|
| 694 |
+
# Nx.Serving.run(MyApp.SentimentServing, text)
|
| 695 |
+
|
| 696 |
+
# For demonstration, run directly:
|
| 697 |
+
IO.puts("Serving is stateless — just call Nx.Serving.run/2")
|
| 698 |
+
IO.puts("For production, wrap in Nx.Serving.start_link/1 for automatic batching")
|
| 699 |
+
```
|
| 700 |
+
|
| 701 |
+
### 7.2 Benchmark Inference
|
| 702 |
+
|
| 703 |
+
```elixir
|
| 704 |
+
# Benchmark a single inference
|
| 705 |
+
{us_single, _} = :timer.tc(fn ->
|
| 706 |
+
Nx.Serving.run(sentiment_serving, "This is a test sentence for benchmarking.")
|
| 707 |
+
end)
|
| 708 |
+
|
| 709 |
+
IO.puts("Single inference: #{Float.round(us_single / 1000, 2)}ms")
|
| 710 |
+
|
| 711 |
+
# Benchmark batch
|
| 712 |
+
batch = for i <- 1..8, do: "Test sentence number #{i} for batch benchmarking."
|
| 713 |
+
{us_batch, _} = :timer.tc(fn ->
|
| 714 |
+
Enum.map(batch, fn text -> Nx.Serving.run(sentiment_serving, text) end)
|
| 715 |
+
end)
|
| 716 |
+
|
| 717 |
+
IO.puts("8 sequential inferences: #{Float.round(us_batch / 1000, 2)}ms")
|
| 718 |
+
IO.puts("Per-request (batched): #{Float.round(us_batch / 8000, 2)}ms")
|
| 719 |
+
```
|
| 720 |
+
|
| 721 |
+
---
|
| 722 |
+
|
| 723 |
+
## Section 7.5 — Fine-tuning with Bumblebee
|
| 724 |
+
|
| 725 |
+
Bumblebee supports **boosted training** — taking a pre-trained model head and
|
| 726 |
+
fine-tuning it on your own labeled data. This is far more practical than
|
| 727 |
+
training from scratch when you have limited data.
|
| 728 |
+
|
| 729 |
+
### 7.5.1 Load Pre-trained Model for Fine-tuning
|
| 730 |
+
|
| 731 |
+
```elixir
|
| 732 |
+
# Load a pre-trained BERT with a classification head
|
| 733 |
+
{:ok, ft_spec} = Bumblebee.load_spec({:hf, "google-bert/bert-base-uncased"},
|
| 734 |
+
module: Bumblebee.Text.BertForSequenceClassification
|
| 735 |
+
)
|
| 736 |
+
|
| 737 |
+
# Configure for your number of classes
|
| 738 |
+
ft_spec = Bumblebee.configure(ft_spec, num_labels: 3)
|
| 739 |
+
|
| 740 |
+
# Load with the custom spec — only the encoder weights are pre-trained,
|
| 741 |
+
# the classification head is randomly initialized
|
| 742 |
+
{:ok, ft_model_info} = Bumblebee.load_model({:hf, "google-bert/bert-base-uncased"},
|
| 743 |
+
spec: ft_spec
|
| 744 |
+
)
|
| 745 |
+
|
| 746 |
+
%{model: ft_model, params: ft_params} = ft_model_info
|
| 747 |
+
IO.puts("Model loaded. Pre-trained encoder + fresh classification head.")
|
| 748 |
+
IO.puts("Total params: #{inspect(Nx.size(ft_params.parameters))}")
|
| 749 |
+
```
|
| 750 |
+
|
| 751 |
+
### 7.5.2 Prepare Labeled Data
|
| 752 |
+
|
| 753 |
+
```elixir
|
| 754 |
+
# Your labeled dataset — in practice, load from CSV/JSON
|
| 755 |
+
training_texts = [
|
| 756 |
+
"The BEAM VM provides fault-tolerant concurrent computing",
|
| 757 |
+
"Nx brings numerical computing to the Elixir ecosystem",
|
| 758 |
+
"Phoenix LiveView enables real-time web applications",
|
| 759 |
+
"Bumblebee integrates Hugging Face models into Elixir",
|
| 760 |
+
"Axon provides a functional API for neural networks",
|
| 761 |
+
"The weather is sunny and warm today",
|
| 762 |
+
"Football season starts next month",
|
| 763 |
+
"The stock market rallied on positive earnings reports",
|
| 764 |
+
# ... add more samples per class
|
| 765 |
+
]
|
| 766 |
+
|
| 767 |
+
training_labels = [0, 0, 0, 0, 0, 1, 1, 2] # 0=tech, 1=sports, 2=finance
|
| 768 |
+
|
| 769 |
+
# Tokenize
|
| 770 |
+
{:ok, ft_tokenizer} = Bumblebee.load_tokenizer({:hf, "google-bert/bert-base-uncased"})
|
| 771 |
+
|
| 772 |
+
encoded = Bumblebee.apply_tokenizer(ft_tokenizer, training_texts)
|
| 773 |
+
IO.inspect(encoded, label: "Tokenized input")
|
| 774 |
+
```
|
| 775 |
+
|
| 776 |
+
### 7.5.3 Training Loop
|
| 777 |
+
|
| 778 |
+
```elixir
|
| 779 |
+
defmodule FineTuner do
|
| 780 |
+
import Nx.Defn
|
| 781 |
+
|
| 782 |
+
defn cross_entropy_loss(logits, labels) do
|
| 783 |
+
log_probs = Axon.Activations.log_softmax(logits, axis: -1)
|
| 784 |
+
-Nx.mean(Nx.sum(log_probs * labels, axes: [-1]))
|
| 785 |
+
end
|
| 786 |
+
|
| 787 |
+
def train_step(model, params, batch, learning_rate \\ 2.0e-5) do
|
| 788 |
+
{loss, gradient} = Nx.Defn.value_and_grad(params, fn p ->
|
| 789 |
+
output = Axon.predict(model, p, batch)
|
| 790 |
+
cross_entropy_loss(output.logits, batch["labels"])
|
| 791 |
+
end)
|
| 792 |
+
|
| 793 |
+
new_params =
|
| 794 |
+
Map.new(params, fn {k, v} ->
|
| 795 |
+
{k, Nx.subtract(v, Nx.multiply(learning_rate, gradient[k] || 0))}
|
| 796 |
+
end)
|
| 797 |
+
|
| 798 |
+
{loss, new_params}
|
| 799 |
+
end
|
| 800 |
+
end
|
| 801 |
+
|
| 802 |
+
# Example training (simplified — real training uses Axon.Loop)
|
| 803 |
+
epochs = 3
|
| 804 |
+
batch_size = 4
|
| 805 |
+
|
| 806 |
+
for epoch <- 1..epochs do
|
| 807 |
+
encoded
|
| 808 |
+
|> Nx.to_batched(batch_size)
|
| 809 |
+
|> Enum.reduce(ft_params, fn batch, params ->
|
| 810 |
+
labels = Nx.eye(3) |> Nx.take(Nx.tensor(Enum.take(training_labels, batch_size)))
|
| 811 |
+
batch_with_labels = Map.put(batch, "labels", labels)
|
| 812 |
+
|
| 813 |
+
{loss, new_params} = FineTuner.train_step(ft_model, params, batch_with_labels)
|
| 814 |
+
|
| 815 |
+
if rem(epoch, 1) == 0 do
|
| 816 |
+
IO.puts("Epoch #{epoch}, Loss: #{Float.round(Nx.to_number(loss), 4)}")
|
| 817 |
+
end
|
| 818 |
+
|
| 819 |
+
new_params
|
| 820 |
+
end)
|
| 821 |
+
end
|
| 822 |
+
```
|
| 823 |
+
|
| 824 |
+
> **Tip:** For production fine-tuning, use `Axon.Loop` with proper data
|
| 825 |
+
> pipelines, learning rate scheduling, and mixed precision. The above
|
| 826 |
+
> demonstrates the concept — see `Bumblebee` examples on GitHub for
|
| 827 |
+
> full fine-tuning recipes.
|
| 828 |
+
|
| 829 |
+
---
|
| 830 |
+
|
| 831 |
+
## Section 7.6 — Model Export (ONNX / GGUF)
|
| 832 |
+
|
| 833 |
+
### 7.6.1 Export a Bumblebee model to ONNX
|
| 834 |
+
|
| 835 |
+
```elixir
|
| 836 |
+
# Load a pretrained model
|
| 837 |
+
{:ok, spec} = Bumblebee.load_spec({:hf, "distilbert/bert-base-uncased"}, module: Bumblebee.Text.BertForSequenceClassification)
|
| 838 |
+
{:ok, model_info} = Bumblebee.load_model({:hf, "distilbert/bert-base-uncased"}, spec: spec)
|
| 839 |
+
|
| 840 |
+
# Export to ONNX (requires `onnx` library installed)
|
| 841 |
+
Bumblebee.export(model_info.model, format: :onnx, path: "distilbert.onnx")
|
| 842 |
+
```
|
| 843 |
+
|
| 844 |
+
The resulting `distilbert.onnx` can be loaded in any ONNX runtime.
|
| 845 |
+
|
| 846 |
+
### 7.6.2 Export to GGUF (via `gguf-converter`)
|
| 847 |
+
|
| 848 |
+
```bash
|
| 849 |
+
gguf-converter --onnx distilbert.onnx --output distilbert.gguf
|
| 850 |
+
```
|
| 851 |
+
|
| 852 |
+
> **Note:** GGUF is primarily for decoder models (e.g., Llama). Ensure compatibility.
|
| 853 |
+
|
| 854 |
+
---
|
| 855 |
+
|
| 856 |
+
## Section 7.7 — Phoenix LiveView Integration
|
| 857 |
+
|
| 858 |
+
Deploy ML models into Phoenix web apps using `Nx.Serving` and LiveView.
|
| 859 |
+
|
| 860 |
+
### 7.7.1 Application Supervision Tree
|
| 861 |
+
|
| 862 |
+
```elixir
|
| 863 |
+
# In your Phoenix app's application.ex:
|
| 864 |
+
defmodule MyApp.Application do
|
| 865 |
+
use Application
|
| 866 |
+
|
| 867 |
+
@impl true
|
| 868 |
+
def start(_type, _args) do
|
| 869 |
+
children = [
|
| 870 |
+
MyAppWeb.Telemetry,
|
| 871 |
+
{DNSCluster, query: Application.get_env(:my_app, :dns_cluster_query) || :ignore},
|
| 872 |
+
{Phoenix.PubSub, name: MyApp.PubSub},
|
| 873 |
+
|
| 874 |
+
# ─── ML Servings ───────────────────────────────────────
|
| 875 |
+
# Sentiment analysis serving
|
| 876 |
+
{Nx.Serving,
|
| 877 |
+
serving: sentiment_serving(),
|
| 878 |
+
name: MyApp.SentimentServing,
|
| 879 |
+
batch_size: 16,
|
| 880 |
+
batch_timeout: 100},
|
| 881 |
+
|
| 882 |
+
# Image classification serving
|
| 883 |
+
{Nx.Serving,
|
| 884 |
+
serving: image_serving(),
|
| 885 |
+
name: MyApp.ImageServing,
|
| 886 |
+
batch_size: 8,
|
| 887 |
+
batch_timeout: 200},
|
| 888 |
+
|
| 889 |
+
# Embedding serving (for similarity search)
|
| 890 |
+
{Nx.Serving,
|
| 891 |
+
serving: embedding_serving(),
|
| 892 |
+
name: MyApp.EmbeddingServing,
|
| 893 |
+
batch_size: 32,
|
| 894 |
+
batch_timeout: 50},
|
| 895 |
+
|
| 896 |
+
MyAppWeb.Endpoint
|
| 897 |
+
]
|
| 898 |
+
|
| 899 |
+
opts = [strategy: :one_for_one, name: MyApp.Supervisor]
|
| 900 |
+
Supervisor.start_link(children, opts)
|
| 901 |
+
end
|
| 902 |
+
|
| 903 |
+
defp sentiment_serving do
|
| 904 |
+
{:ok, model} = Bumblebee.load_model({:hf, "distilbert/distilbert-base-uncased-finetuned-sst-2-english"})
|
| 905 |
+
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "distilbert/distilbert-base-uncased-finetuned-sst-2-english"})
|
| 906 |
+
Bumblebee.Text.Classification.text_classification(model, tokenizer)
|
| 907 |
+
end
|
| 908 |
+
|
| 909 |
+
defp image_serving do
|
| 910 |
+
{:ok, model} = Bumblebee.load_model({:hf, "google/vit-base-patch16-224"})
|
| 911 |
+
{:ok, featurizer} = Bumblebee.load_featurizer({:hf, "google/vit-base-patch16-224"})
|
| 912 |
+
Bumblebee.Vision.ImageClassification.image_classification(model, featurizer)
|
| 913 |
+
end
|
| 914 |
+
|
| 915 |
+
defp embedding_serving do
|
| 916 |
+
{:ok, model} = Bumblebee.load_model({:hf, "sentence-transformers/all-MiniLM-L6-v2"})
|
| 917 |
+
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "sentence-transformers/all-MiniLM-L6-v2"})
|
| 918 |
+
Bumblebee.Text.TextEmbedding.text_embedding(model, tokenizer)
|
| 919 |
+
end
|
| 920 |
+
end
|
| 921 |
+
```
|
| 922 |
+
|
| 923 |
+
### 7.7.2 LiveView for Sentiment Analysis
|
| 924 |
+
|
| 925 |
+
```elixir
|
| 926 |
+
# lib/my_app_web/live/sentiment_live.ex
|
| 927 |
+
defmodule MyAppWeb.SentimentLive do
|
| 928 |
+
use MyAppWeb, :live_view
|
| 929 |
+
|
| 930 |
+
def mount(_params, _session, socket) do
|
| 931 |
+
{:ok, assign(socket, text: "", result: nil, loading: false)}
|
| 932 |
+
end
|
| 933 |
+
|
| 934 |
+
def handle_event("analyze", %{"text" => text}, socket) do
|
| 935 |
+
# Nx.Serving.run is synchronous and fast (batches with other requests)
|
| 936 |
+
result = Nx.Serving.run(MyApp.SentimentServing, text)
|
| 937 |
+
|
| 938 |
+
label = hd(result.predictions)
|
| 939 |
+
{:noreply, assign(socket,
|
| 940 |
+
text: text,
|
| 941 |
+
result: %{
|
| 942 |
+
label: label.label,
|
| 943 |
+
score: Float.round(label.score * 100, 1)
|
| 944 |
+
},
|
| 945 |
+
loading: false
|
| 946 |
+
)}
|
| 947 |
+
end
|
| 948 |
+
|
| 949 |
+
def handle_event("update_text", %{"text" => text}, socket) do
|
| 950 |
+
{:noreply, assign(socket, text: text)}
|
| 951 |
+
end
|
| 952 |
+
|
| 953 |
+
def render(assigns) do
|
| 954 |
+
~H"""
|
| 955 |
+
<div class="max-w-lg mx-auto p-6">
|
| 956 |
+
<h1 class="text-2xl font-bold mb-4">🐝 Sentiment Analysis</h1>
|
| 957 |
+
|
| 958 |
+
<form phx-submit="analyze">
|
| 959 |
+
<textarea
|
| 960 |
+
name="text"
|
| 961 |
+
phx-change="update_text"
|
| 962 |
+
class="w-full p-3 border rounded-lg"
|
| 963 |
+
rows="3"
|
| 964 |
+
placeholder="Enter text to analyze..."
|
| 965 |
+
><%= @text %></textarea>
|
| 966 |
+
<button type="submit" class="mt-2 px-4 py-2 bg-purple-600 text-white rounded-lg">
|
| 967 |
+
Analyze
|
| 968 |
+
</button>
|
| 969 |
+
</form>
|
| 970 |
+
|
| 971 |
+
<%= if @result do %>
|
| 972 |
+
<div class="mt-4 p-4 bg-gray-100 rounded-lg">
|
| 973 |
+
<p class="text-lg">
|
| 974 |
+
<strong><%= @result.label %></strong>
|
| 975 |
+
— <%= @result.score %>%
|
| 976 |
+
</p>
|
| 977 |
+
</div>
|
| 978 |
+
<% end %>
|
| 979 |
+
</div>
|
| 980 |
+
"""
|
| 981 |
+
end
|
| 982 |
+
end
|
| 983 |
+
```
|
| 984 |
+
|
| 985 |
+
### 7.7.3 LiveView for Image Classification
|
| 986 |
+
|
| 987 |
+
```elixir
|
| 988 |
+
# lib/my_app_web/live/vision_live.ex
|
| 989 |
+
defmodule MyAppWeb.VisionLive do
|
| 990 |
+
use MyAppWeb, :live_view
|
| 991 |
+
|
| 992 |
+
def mount(_params, _session, socket) do
|
| 993 |
+
{:ok, assign(socket, predictions: nil)}
|
| 994 |
+
end
|
| 995 |
+
|
| 996 |
+
def handle_event("classify", %{"image" => %Plug.Upload{path: path}}, socket) do
|
| 997 |
+
{:ok, image} = StbImage.read_file(path)
|
| 998 |
+
|
| 999 |
+
result = Nx.Serving.run(MyApp.ImageServing, image)
|
| 1000 |
+
|
| 1001 |
+
predictions =
|
| 1002 |
+
result.predictions
|
| 1003 |
+
|> Enum.take(5)
|
| 1004 |
+
|> Enum.map(fn %{label: label, score: score} ->
|
| 1005 |
+
%{label: label, score: Float.round(score * 100, 1)}
|
| 1006 |
+
end)
|
| 1007 |
+
|
| 1008 |
+
{:noreply, assign(socket, predictions: predictions)}
|
| 1009 |
+
end
|
| 1010 |
+
|
| 1011 |
+
def render(assigns) do
|
| 1012 |
+
~H"""
|
| 1013 |
+
<div class="max-w-lg mx-auto p-6">
|
| 1014 |
+
<h1 class="text-2xl font-bold mb-4">🖼️ Image Classification</h1>
|
| 1015 |
+
|
| 1016 |
+
<form phx-submit="classify" multipart>
|
| 1017 |
+
<input type="file" name="image" accept="image/*" class="mb-2" />
|
| 1018 |
+
<button type="submit" class="px-4 py-2 bg-purple-600 text-white rounded-lg">
|
| 1019 |
+
Classify
|
| 1020 |
+
</button>
|
| 1021 |
+
</form>
|
| 1022 |
+
|
| 1023 |
+
<%= if @predictions do %>
|
| 1024 |
+
<div class="mt-4">
|
| 1025 |
+
<%= for pred <- @predictions do %>
|
| 1026 |
+
<div class="flex justify-between py-1 border-b">
|
| 1027 |
+
<span><%= pred.label %></span>
|
| 1028 |
+
<span class="font-mono"><%= pred.score %>%</span>
|
| 1029 |
+
</div>
|
| 1030 |
+
<% end %>
|
| 1031 |
+
</div>
|
| 1032 |
+
<% end %>
|
| 1033 |
+
</div>
|
| 1034 |
+
"""
|
| 1035 |
+
end
|
| 1036 |
+
end
|
| 1037 |
+
```
|
| 1038 |
+
|
| 1039 |
+
### 7.7.4 API Endpoint (REST)
|
| 1040 |
+
|
| 1041 |
+
```elixir
|
| 1042 |
+
# lib/my_app_web/controllers/prediction_controller.ex
|
| 1043 |
+
defmodule MyAppWeb.PredictionController do
|
| 1044 |
+
use MyAppWeb, :controller
|
| 1045 |
+
|
| 1046 |
+
def sentiment(conn, %{"text" => text}) do
|
| 1047 |
+
result = Nx.Serving.run(MyApp.SentimentServing, text)
|
| 1048 |
+
|
| 1049 |
+
json(conn, %{
|
| 1050 |
+
predictions: Enum.map(result.predictions, fn p ->
|
| 1051 |
+
%{label: p.label, score: Float.round(p.score, 4)}
|
| 1052 |
+
end)
|
| 1053 |
+
})
|
| 1054 |
+
end
|
| 1055 |
+
|
| 1056 |
+
def embed(conn, %{"text" => text}) do
|
| 1057 |
+
result = Nx.Serving.run(MyApp.EmbeddingServing, text)
|
| 1058 |
+
|
| 1059 |
+
json(conn, %{
|
| 1060 |
+
embedding: Nx.to_flat_list(result.embedding),
|
| 1061 |
+
dimensions: Nx.size(result.embedding)
|
| 1062 |
+
})
|
| 1063 |
+
end
|
| 1064 |
+
end
|
| 1065 |
+
|
| 1066 |
+
# In router.ex:
|
| 1067 |
+
# scope "/api", MyAppWeb do
|
| 1068 |
+
# post "/sentiment", PredictionController, :sentiment
|
| 1069 |
+
# post "/embed", PredictionController, :embed
|
| 1070 |
+
# end
|
| 1071 |
+
```
|
| 1072 |
+
|
| 1073 |
+
---
|
| 1074 |
+
|
| 1075 |
+
## Section 8 — Interactive Playground
|
| 1076 |
+
|
| 1077 |
+
### 8.1 Text Classification UI
|
| 1078 |
+
|
| 1079 |
+
```elixir
|
| 1080 |
+
alias Kino.Input
|
| 1081 |
+
|
| 1082 |
+
text_input = Kino.Input.textarea("Enter text to classify", default: "Elixir is the best language for building scalable ML systems!")
|
| 1083 |
+
|
| 1084 |
+
class_form = Kino.Control.form(%{text: text_input}, submit: "Classify Sentiment")
|
| 1085 |
+
|
| 1086 |
+
Kino.listen(class_form, fn %{data: %{text: text}} ->
|
| 1087 |
+
result = Nx.Serving.run(sentiment_serving, text)
|
| 1088 |
+
|
| 1089 |
+
label_text =
|
| 1090 |
+
result
|
| 1091 |
+
|> Map.get(:predictions, [])
|
| 1092 |
+
|> Enum.map(fn %{label: label, score: score} ->
|
| 1093 |
+
"#{label}: #{Float.round(score * 100, 1)}%"
|
| 1094 |
+
end)
|
| 1095 |
+
|> Enum.join(" | ")
|
| 1096 |
+
|
| 1097 |
+
Kino.Text.new(label_text)
|
| 1098 |
+
end)
|
| 1099 |
+
|
| 1100 |
+
class_form
|
| 1101 |
+
```
|
| 1102 |
+
|
| 1103 |
+
### 8.2 Embedding Similarity UI
|
| 1104 |
+
|
| 1105 |
+
```elixir
|
| 1106 |
+
ref_input = Kino.Input.textarea("Reference text", default: "Nx provides numerical computing for the BEAM")
|
| 1107 |
+
query_input = Kino.Input.textarea("Query text", default: "How do I do math in Elixir?")
|
| 1108 |
+
|
| 1109 |
+
sim_form = Kino.Control.form(%{ref: ref_input, query: query_input}, submit: "Compute Similarity")
|
| 1110 |
+
|
| 1111 |
+
Kino.listen(sim_form, fn %{data: %{ref: ref, query: query}} ->
|
| 1112 |
+
ref_emb = Nx.Serving.run(embedding_serving, ref).embedding
|
| 1113 |
+
query_emb = Nx.Serving.run(embedding_serving, query).embedding
|
| 1114 |
+
score = Similarity.cosine_similarity(ref_emb, query_emb) |> Nx.to_number()
|
| 1115 |
+
|
| 1116 |
+
Kino.Text.new("Cosine similarity: #{Float.round(score, 6)}")
|
| 1117 |
+
end)
|
| 1118 |
+
|
| 1119 |
+
sim_form
|
| 1120 |
+
```
|
| 1121 |
+
|
| 1122 |
+
---
|
| 1123 |
+
## Section 8.3 — Distributed Training on BEAM Nodes
|
| 1124 |
+
|
| 1125 |
+
### 8.3.1 Using `Nx` with `Node.spawn/4`
|
| 1126 |
+
|
| 1127 |
+
```elixir
|
| 1128 |
+
# Assuming you have a cluster of BEAM nodes: node1@host, node2@host, ...
|
| 1129 |
+
nodes = [:"node1@host", :"node2@host"]
|
| 1130 |
+
|
| 1131 |
+
# Distribute a tensor computation across nodes
|
| 1132 |
+
defmodule DistTrainer do
|
| 1133 |
+
def compute(tensor) do
|
| 1134 |
+
Enum.map(nodes, fn node ->
|
| 1135 |
+
Node.spawn(node, fn -> Nx.mean(tensor) end)
|
| 1136 |
+
end)
|
| 1137 |
+
end
|
| 1138 |
+
end
|
| 1139 |
+
|
| 1140 |
+
tensor = Nx.tensor([[1, 2, 3], [4, 5, 6]])
|
| 1141 |
+
results = DistTrainer.compute(tensor)
|
| 1142 |
+
IO.inspect(results, label: "Means from each node")
|
| 1143 |
+
```
|
| 1144 |
+
|
| 1145 |
+
### 8.3.2 Distributed Axon training
|
| 1146 |
+
|
| 1147 |
+
```elixir
|
| 1148 |
+
# Using Axon with `Nx.Cluster` (requires `:zx` and `Nx.Cluster` setup)
|
| 1149 |
+
defmodule ClusterTrainer do
|
| 1150 |
+
use Axon
|
| 1151 |
+
|
| 1152 |
+
# Define a simple model
|
| 1153 |
+
defmodel do
|
| 1154 |
+
input({nil, 784})
|
| 1155 |
+
|> dense(128, activation: :relu)
|
| 1156 |
+
|> dense(10, activation: :softmax)
|
| 1157 |
+
end
|
| 1158 |
+
|
| 1159 |
+
# Launch training across nodes
|
| 1160 |
+
def train(data, labels) do
|
| 1161 |
+
model = model()
|
| 1162 |
+
# `Nx.Cluster.train/5` will split data and run steps on each BEAM node
|
| 1163 |
+
Nx.Cluster.train(model, data, labels, nodes: nodes, epochs: 5)
|
| 1164 |
+
end
|
| 1165 |
+
end
|
| 1166 |
+
```
|
| 1167 |
+
|
| 1168 |
+
> **Tip:** Ensure all nodes have the same code version and the required dependencies (`nx`, `axon`). Use `:net_kernel.connect_node/1` or a clustering tool such as `libcluster` to form the cluster.
|
| 1169 |
+
|
| 1170 |
+
---
|
| 1171 |
+
|
| 1172 |
+
## Section 9 — Summary & Next Steps
|
| 1173 |
+
|
| 1174 |
+
```elixir
|
| 1175 |
+
Kino.Markdown.new("""
|
| 1176 |
+
## What You've Built
|
| 1177 |
+
|
| 1178 |
+
| Pipeline Stage | Implementation | Key Library |
|
| 1179 |
+
|----------------|----------------|-------------|
|
| 1180 |
+
| **Tensors** | Creation, ops, broadcasting, gradients | `Nx` |
|
| 1181 |
+
| **JIT Compile** | GPU-accelerated inference | `EXLA` |
|
| 1182 |
+
| **Fill-Mask** | BERT masked language modeling | `Bumblebee` |
|
| 1183 |
+
| **Sentiment** | DistilBERT text classification | `Bumblebee` |
|
| 1184 |
+
| **NER** | Named entity recognition | `Bumblebee` |
|
| 1185 |
+
| **Zero-Shot** | Classify without fine-tuning | `Bumblebee` |
|
| 1186 |
+
| **Image CLS** | Vision Transformer (ViT) | `Bumblebee` |
|
| 1187 |
+
| **Audio** | Whisper speech-to-text | `Bumblebee` |
|
| 1188 |
+
| **Stable Diffusion** | Text-to-image generation | `Bumblebee` |
|
| 1189 |
+
| **Text Gen** | GPT-2 autoregressive generation | `Bumblebee` |
|
| 1190 |
+
| **Embeddings** | Sentence similarity search | `Bumblebee` |
|
| 1191 |
+
| **Custom MLP** | Train from scratch with Axon | `Axon` |
|
| 1192 |
+
| **Fine-tuning** | Boosted training on pre-trained models | `Bumblebee` |
|
| 1193 |
+
| **Serving** | Production batched inference | `Nx.Serving` |
|
| 1194 |
+
| **Phoenix** | LiveView + REST API deployment | `Phoenix` |
|
| 1195 |
+
| **Interactive** | Kino live forms | `Kino` |
|
| 1196 |
+
|
| 1197 |
+
### Companion Notebooks
|
| 1198 |
+
|
| 1199 |
+
| Format | Path | Deploy To |
|
| 1200 |
+
|--------|------|-----------|
|
| 1201 |
+
| Livebook | `ml_e2e_template.livemd` | HF Spaces (Docker), Livebook Teams |
|
| 1202 |
+
| Jupyter | `colab_kaggle/ml_e2e_python.ipynb` | Google Colab, Kaggle |
|
| 1203 |
+
| Gradio | `gradio_hf_deploy/app.py` | HF Spaces (sdk: gradio) |
|
| 1204 |
+
| marimo | `marimo/ml_e2e_marimo.py` | Anywhere Python runs |
|
| 1205 |
+
|
| 1206 |
+
### Resources
|
| 1207 |
+
|
| 1208 |
+
* [Bumblebee docs](https://hexdocs.pm/bumblebee) — Pre-trained models
|
| 1209 |
+
* [Nx docs](https://hexdocs.pm/nx) — Numerical computing
|
| 1210 |
+
* [Axon docs](https://hexdocs.pm/axon) — Neural networks
|
| 1211 |
+
* [EXLA docs](https://hexdocs.pm/exla) — GPU backend
|
| 1212 |
+
* [Phoenix docs](https://hexdocs.pm/phoenix) — Web framework
|
| 1213 |
+
* [Hugging Face Hub](https://huggingface.co/models) — 500k+ models
|
| 1214 |
+
* [marimo docs](https://docs.marimo.io) — Reactive Python notebooks
|
| 1215 |
+
* _Machine Learning in Elixir_ — Sean Moriarity, Pragmatic Bookshelf
|
| 1216 |
+
|
| 1217 |
+
### Deploy
|
| 1218 |
+
|
| 1219 |
+
```bash
|
| 1220 |
+
just check # Verify all files and tools
|
| 1221 |
+
just livebook # Open Livebook locally
|
| 1222 |
+
just deploy-livebook # Push to HF Spaces
|
| 1223 |
+
just marimo # Open marimo editor
|
| 1224 |
+
just gradio # Run Gradio app
|
| 1225 |
+
```
|
| 1226 |
+
""")
|
| 1227 |
+
```
|
ml_tutorial.livemd
ADDED
|
@@ -0,0 +1,430 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Machine Learning in Elixir - Interactive Tutorial
|
| 2 |
+
|
| 3 |
+
<!-- livebook:{"app":"embedded"} -->
|
| 4 |
+
|
| 5 |
+
```elixir
|
| 6 |
+
Mix.install([
|
| 7 |
+
{:nx, "~> 0.11"},
|
| 8 |
+
{:axon, "~> 0.8"},
|
| 9 |
+
{:exla, "~> 0.11"},
|
| 10 |
+
{:kino, "~> 0.13"}
|
| 11 |
+
])
|
| 12 |
+
```
|
| 13 |
+
|
| 14 |
+
## Chapter 1: Introduction to ML in Elixir 🚀
|
| 15 |
+
|
| 16 |
+
### Welcome to Machine Learning with Elixir!
|
| 17 |
+
|
| 18 |
+
**I'm an intermediate Elixir developer and I want to learn Machine Learning in Elixir so I can build intelligent applications and understand ML concepts using functional programming patterns.**
|
| 19 |
+
|
| 20 |
+
### What Makes Elixir Special for ML?
|
| 21 |
+
|
| 22 |
+
Elixir brings some unique advantages to machine learning:
|
| 23 |
+
|
| 24 |
+
- **🏗️ Functional Programming Foundation**: Pure functions and immutability naturally align with ML workflows
|
| 25 |
+
- **⚡ Concurrency & Distribution**: Handle large datasets and parallel training efficiently
|
| 26 |
+
- **🔧 Erlang VM Benefits**: Fault tolerance and hot code reloading for production ML systems
|
| 27 |
+
- **📊 Nx Library**: Numerical computing with GPU acceleration
|
| 28 |
+
|
| 29 |
+
### Core Concepts in Elixir ML
|
| 30 |
+
|
| 31 |
+
**1. Tensors** 🔢
|
| 32 |
+
Tensors are the fundamental building blocks - think of them as multi-dimensional arrays:
|
| 33 |
+
|
| 34 |
+
```elixir
|
| 35 |
+
import Nx
|
| 36 |
+
|
| 37 |
+
# Creating a tensor
|
| 38 |
+
tensor = Nx.tensor([[1, 2, 3], [4, 5, 6]])
|
| 39 |
+
tensor
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
**2. Numerical Operations** ➕
|
| 43 |
+
Perform mathematical operations efficiently:
|
| 44 |
+
|
| 45 |
+
```elixir
|
| 46 |
+
# Element-wise operations
|
| 47 |
+
a = Nx.tensor([1, 2, 3])
|
| 48 |
+
b = Nx.tensor([4, 5, 6])
|
| 49 |
+
result = Nx.add(a, b)
|
| 50 |
+
result
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
### Your First ML Program 💻
|
| 54 |
+
|
| 55 |
+
Let's create a simple linear regression example:
|
| 56 |
+
|
| 57 |
+
```elixir
|
| 58 |
+
defmodule SimpleML do
|
| 59 |
+
import Nx
|
| 60 |
+
|
| 61 |
+
def predict(x, weights) do
|
| 62 |
+
# Simple linear prediction: y = mx + b
|
| 63 |
+
multiply(x, weights[0]) + weights[1]
|
| 64 |
+
end
|
| 65 |
+
|
| 66 |
+
def train(x_data, y_data, learning_rate \\ 0.01, epochs \\ 1000) do
|
| 67 |
+
# Initialize random weights
|
| 68 |
+
weights = [Nx.random_normal({}), Nx.random_normal({})]
|
| 69 |
+
|
| 70 |
+
Enum.reduce(1..epochs, weights, fn epoch, [m, b] ->
|
| 71 |
+
# Forward pass
|
| 72 |
+
predictions = multiply(x_data, m) + b
|
| 73 |
+
|
| 74 |
+
# Calculate loss (mean squared error)
|
| 75 |
+
loss = mean(power(predictions - y_data, 2))
|
| 76 |
+
|
| 77 |
+
# Backward pass (gradients)
|
| 78 |
+
grad_m = mean(2 * (predictions - y_data) * x_data)
|
| 79 |
+
grad_b = mean(2 * (predictions - y_data))
|
| 80 |
+
|
| 81 |
+
# Update weights
|
| 82 |
+
[m - learning_rate * grad_m, b - learning_rate * grad_b]
|
| 83 |
+
end)
|
| 84 |
+
end
|
| 85 |
+
end
|
| 86 |
+
```
|
| 87 |
+
|
| 88 |
+
**Try it out:**
|
| 89 |
+
|
| 90 |
+
```elixir
|
| 91 |
+
# Generate simple linear data: y = 2x + 3 + noise
|
| 92 |
+
x_data = Nx.tensor(Enum.map(0..100, &(&1 / 10.0))) # 0 to 10 in steps of 0.1
|
| 93 |
+
noise = Nx.random_normal({101}) |> Nx.multiply(0.1)
|
| 94 |
+
y_data = Nx.multiply(x_data, 2) |> Nx.add(3) |> Nx.add(noise)
|
| 95 |
+
|
| 96 |
+
# Train the model
|
| 97 |
+
weights = SimpleML.train(x_data, y_data, 0.01, 500)
|
| 98 |
+
|
| 99 |
+
# Test prediction
|
| 100 |
+
test_x = Nx.tensor([0.5, 1.0, 1.5, 2.0])
|
| 101 |
+
predictions = SimpleML.predict(test_x, weights)
|
| 102 |
+
|
| 103 |
+
"Trained weights: #{inspect(weights)}, Predictions: #{inspect(predictions)}"
|
| 104 |
+
```
|
| 105 |
+
|
| 106 |
+
## Chapter 2: Nx and Numerical Computing 🔢
|
| 107 |
+
|
| 108 |
+
### Understanding Nx Tensors 📊
|
| 109 |
+
|
| 110 |
+
Tensors are multi-dimensional arrays that power all ML computations:
|
| 111 |
+
|
| 112 |
+
```elixir
|
| 113 |
+
import Nx
|
| 114 |
+
|
| 115 |
+
# Different tensor types
|
| 116 |
+
scalar = Nx.tensor(42) # 0-dimensional
|
| 117 |
+
vector = Nx.tensor([1, 2, 3]) # 1-dimensional
|
| 118 |
+
matrix = Nx.tensor([[1, 2], [3, 4]]) # 2-dimensional
|
| 119 |
+
|
| 120 |
+
# Tensor properties
|
| 121 |
+
IO.puts("Vector shape: #{inspect(Nx.shape(vector))}")
|
| 122 |
+
IO.puts("Vector type: #{inspect(Nx.type(vector))}")
|
| 123 |
+
IO.puts("Vector size: #{inspect(Nx.size(vector))}")
|
| 124 |
+
```
|
| 125 |
+
|
| 126 |
+
### Essential Tensor Operations ⚡
|
| 127 |
+
|
| 128 |
+
**Mathematical Operations:**
|
| 129 |
+
|
| 130 |
+
```elixir
|
| 131 |
+
# Basic arithmetic
|
| 132 |
+
a = Nx.tensor([1.0, 2.0, 3.0])
|
| 133 |
+
b = Nx.tensor([4.0, 5.0, 6.0])
|
| 134 |
+
|
| 135 |
+
add_result = Nx.add(a, b)
|
| 136 |
+
mult_result = Nx.multiply(a, b)
|
| 137 |
+
pow_result = Nx.power(a, 2)
|
| 138 |
+
|
| 139 |
+
{add_result, mult_result, pow_result}
|
| 140 |
+
```
|
| 141 |
+
|
| 142 |
+
**Aggregation Operations:**
|
| 143 |
+
|
| 144 |
+
```elixir
|
| 145 |
+
matrix = Nx.tensor([[1, 2, 3], [4, 5, 6]])
|
| 146 |
+
|
| 147 |
+
sum_all = Nx.sum(matrix)
|
| 148 |
+
sum_rows = Nx.sum(matrix, axes: [1])
|
| 149 |
+
mean_all = Nx.mean(matrix)
|
| 150 |
+
|
| 151 |
+
{sum_all, sum_rows, mean_all}
|
| 152 |
+
```
|
| 153 |
+
|
| 154 |
+
### Broadcasting Magic ✨
|
| 155 |
+
|
| 156 |
+
Nx automatically broadcasts tensors to compatible shapes:
|
| 157 |
+
|
| 158 |
+
```elixir
|
| 159 |
+
# Broadcasting examples
|
| 160 |
+
vector = Nx.tensor([1, 2, 3])
|
| 161 |
+
scalar = Nx.tensor(10)
|
| 162 |
+
|
| 163 |
+
# Add scalar to each element
|
| 164 |
+
result1 = Nx.add(vector, scalar)
|
| 165 |
+
|
| 166 |
+
# Broadcasting with different dimensions
|
| 167 |
+
matrix = Nx.tensor([[1, 2, 3], [4, 5, 6]])
|
| 168 |
+
result2 = Nx.add(matrix, vector)
|
| 169 |
+
|
| 170 |
+
{result1, result2}
|
| 171 |
+
```
|
| 172 |
+
|
| 173 |
+
### Hands-On Exercise 💻
|
| 174 |
+
|
| 175 |
+
Create a function that normalizes a dataset:
|
| 176 |
+
|
| 177 |
+
```elixir
|
| 178 |
+
defmodule DataPreprocessing do
|
| 179 |
+
import Nx
|
| 180 |
+
|
| 181 |
+
def normalize(tensor) do
|
| 182 |
+
mean = Nx.mean(tensor)
|
| 183 |
+
std = Nx.standard_deviation(tensor)
|
| 184 |
+
|
| 185 |
+
# Normalize: (x - mean) / std
|
| 186 |
+
Nx.divide(Nx.subtract(tensor, mean), std)
|
| 187 |
+
end
|
| 188 |
+
|
| 189 |
+
def min_max_scale(tensor) do
|
| 190 |
+
min = Nx.reduce_min(tensor)
|
| 191 |
+
max = Nx.reduce_max(tensor)
|
| 192 |
+
|
| 193 |
+
# Scale to [0, 1] range
|
| 194 |
+
Nx.divide(Nx.subtract(tensor, min), Nx.subtract(max, min))
|
| 195 |
+
end
|
| 196 |
+
end
|
| 197 |
+
|
| 198 |
+
# Test with sample data
|
| 199 |
+
data = Nx.tensor([10, 20, 30, 40, 50])
|
| 200 |
+
normalized = DataPreprocessing.normalize(data)
|
| 201 |
+
scaled = DataPreprocessing.min_max_scale(data)
|
| 202 |
+
|
| 203 |
+
{data, normalized, scaled}
|
| 204 |
+
```
|
| 205 |
+
|
| 206 |
+
## Chapter 3: Building ML Models with Axon 🧠
|
| 207 |
+
|
| 208 |
+
### Understanding Axon Architecture 🏗️
|
| 209 |
+
|
| 210 |
+
Axon provides a functional API for building neural networks:
|
| 211 |
+
|
| 212 |
+
```elixir
|
| 213 |
+
import Axon
|
| 214 |
+
|
| 215 |
+
# Simple neural network
|
| 216 |
+
model = Axon.input("input", shape: {nil, 784})
|
| 217 |
+
|> Axon.dense(128, activation: :relu)
|
| 218 |
+
|> Axon.dense(64, activation: :relu)
|
| 219 |
+
|> Axon.dense(10, activation: :softmax)
|
| 220 |
+
|
| 221 |
+
IO.puts("Model structure:")
|
| 222 |
+
IO.inspect(model)
|
| 223 |
+
```
|
| 224 |
+
|
| 225 |
+
### Building Your First Neural Network 🚀
|
| 226 |
+
|
| 227 |
+
**MNIST Digit Classification:**
|
| 228 |
+
|
| 229 |
+
```elixir
|
| 230 |
+
defmodule MNISTClassifier do
|
| 231 |
+
import Axon
|
| 232 |
+
|
| 233 |
+
def build_model() do
|
| 234 |
+
# Input: 28x28 grayscale images (flattened to 784)
|
| 235 |
+
Axon.input("input", shape: {nil, 784})
|
| 236 |
+
# Hidden layers
|
| 237 |
+
|> Axon.dense(128, activation: :relu)
|
| 238 |
+
|> Axon.dropout(rate: 0.3)
|
| 239 |
+
|> Axon.dense(64, activation: :relu)
|
| 240 |
+
|> Axon.dropout(rate: 0.3)
|
| 241 |
+
# Output: 10 classes (digits 0-9)
|
| 242 |
+
|> Axon.dense(10, activation: :softmax)
|
| 243 |
+
end
|
| 244 |
+
|
| 245 |
+
def train_model(model, train_data, validation_data) do
|
| 246 |
+
# Training loop
|
| 247 |
+
Axon.Loop.trainer(model, :categorical_cross_entropy, :adam)
|
| 248 |
+
|> Axon.Loop.validate(model, validation_data)
|
| 249 |
+
|> Axon.Loop.run(train_data, epochs: 10)
|
| 250 |
+
end
|
| 251 |
+
end
|
| 252 |
+
|
| 253 |
+
# Create a sample model
|
| 254 |
+
model = MNISTClassifier.build_model()
|
| 255 |
+
model
|
| 256 |
+
```
|
| 257 |
+
|
| 258 |
+
### Common Layer Types 🧩
|
| 259 |
+
|
| 260 |
+
**Dense (Fully Connected) Layers:**
|
| 261 |
+
|
| 262 |
+
```elixir
|
| 263 |
+
model1 = Axon.input("input", shape: {nil, 100})
|
| 264 |
+
|> Axon.dense(50) # 50 neurons
|
| 265 |
+
|> Axon.dense(25) # 25 neurons
|
| 266 |
+
|> Axon.dense(1) # Output neuron
|
| 267 |
+
|
| 268 |
+
model1
|
| 269 |
+
```
|
| 270 |
+
|
| 271 |
+
**Convolutional Layers (for images):**
|
| 272 |
+
|
| 273 |
+
```elixir
|
| 274 |
+
model2 = Axon.input("input", shape: {nil, 28, 28, 1}) # Grayscale images
|
| 275 |
+
|> Axon.conv(32, kernel_size: {3, 3}, activation: :relu)
|
| 276 |
+
|> Axon.max_pool(kernel_size: {2, 2})
|
| 277 |
+
|> Axon.conv(64, kernel_size: {3, 3}, activation: :relu)
|
| 278 |
+
|> Axon.max_pool(kernel_size: {2, 2})
|
| 279 |
+
|> Axon.flatten()
|
| 280 |
+
|> Axon.dense(128, activation: :relu)
|
| 281 |
+
|> Axon.dense(10, activation: :softmax)
|
| 282 |
+
|
| 283 |
+
model2
|
| 284 |
+
```
|
| 285 |
+
|
| 286 |
+
### Activation Functions 🔥
|
| 287 |
+
|
| 288 |
+
```elixir
|
| 289 |
+
model3 = Axon.input("input", shape: {nil, 100})
|
| 290 |
+
|> Axon.dense(50, activation: :relu) # ReLU - most common
|
| 291 |
+
|> Axon.dense(25, activation: :sigmoid) # Sigmoid - for probabilities
|
| 292 |
+
|> Axon.dense(10, activation: :softmax) # Softmax - for classification
|
| 293 |
+
|> Axon.dense(1, activation: :tanh) # Tanh - for outputs in [-1, 1]
|
| 294 |
+
|
| 295 |
+
model3
|
| 296 |
+
```
|
| 297 |
+
|
| 298 |
+
## Chapter 4: Real-world Applications 🚀
|
| 299 |
+
|
| 300 |
+
### Complete ML Pipeline Example 🔄
|
| 301 |
+
|
| 302 |
+
**End-to-End Fraud Detection System:**
|
| 303 |
+
|
| 304 |
+
```elixir
|
| 305 |
+
defmodule FraudDetection do
|
| 306 |
+
import Axon
|
| 307 |
+
|
| 308 |
+
def build_pipeline() do
|
| 309 |
+
# Data preprocessing -> Model training -> Prediction
|
| 310 |
+
|
| 311 |
+
# 1. Model definition
|
| 312 |
+
model = build_fraud_model()
|
| 313 |
+
|
| 314 |
+
%{
|
| 315 |
+
model: model
|
| 316 |
+
}
|
| 317 |
+
end
|
| 318 |
+
|
| 319 |
+
defp build_fraud_model() do
|
| 320 |
+
Axon.input("input", shape: {nil, 20}) # 20 features
|
| 321 |
+
|> Axon.dense(64, activation: :relu)
|
| 322 |
+
|> Axon.dropout(rate: 0.3)
|
| 323 |
+
|> Axon.dense(32, activation: :relu)
|
| 324 |
+
|> Axon.dropout(rate: 0.3)
|
| 325 |
+
|> Axon.dense(1, activation: :sigmoid) # Probability of fraud
|
| 326 |
+
end
|
| 327 |
+
end
|
| 328 |
+
|
| 329 |
+
# Create the fraud detection pipeline
|
| 330 |
+
pipeline = FraudDetection.build_pipeline()
|
| 331 |
+
pipeline.model
|
| 332 |
+
```
|
| 333 |
+
|
| 334 |
+
### Practical Project: Customer Churn Prediction 📈
|
| 335 |
+
|
| 336 |
+
```elixir
|
| 337 |
+
defmodule ChurnPredictor do
|
| 338 |
+
import Axon
|
| 339 |
+
|
| 340 |
+
def build_churn_model() do
|
| 341 |
+
# Customer features: age, usage_pattern, support_tickets, etc.
|
| 342 |
+
Axon.input("input", shape: {nil, 15})
|
| 343 |
+
|> Axon.dense(32, activation: :relu)
|
| 344 |
+
|> Axon.batch_norm()
|
| 345 |
+
|> Axon.dense(16, activation: :relu)
|
| 346 |
+
|> Axon.dropout(rate: 0.2)
|
| 347 |
+
|> Axon.dense(1, activation: :sigmoid) # Probability of churn
|
| 348 |
+
end
|
| 349 |
+
|
| 350 |
+
def predict_churn_risk(customer_features) do
|
| 351 |
+
# Simulate prediction
|
| 352 |
+
%{
|
| 353 |
+
probability: 0.65,
|
| 354 |
+
risk_level: :medium,
|
| 355 |
+
recommendation: "Offer loyalty discount"
|
| 356 |
+
}
|
| 357 |
+
end
|
| 358 |
+
end
|
| 359 |
+
|
| 360 |
+
# Build churn prediction model
|
| 361 |
+
churn_model = ChurnPredictor.build_churn_model()
|
| 362 |
+
churn_model
|
| 363 |
+
```
|
| 364 |
+
|
| 365 |
+
## Interactive Exercises 🎯
|
| 366 |
+
|
| 367 |
+
### Exercise 1: Tensor Operations
|
| 368 |
+
Create a function that calculates the dot product of two matrices:
|
| 369 |
+
|
| 370 |
+
```elixir
|
| 371 |
+
defmodule TensorExercises do
|
| 372 |
+
import Nx
|
| 373 |
+
|
| 374 |
+
def dot_product(a, b) do
|
| 375 |
+
# Your code here
|
| 376 |
+
# Hint: Use Nx.dot/2
|
| 377 |
+
Nx.dot(a, b)
|
| 378 |
+
end
|
| 379 |
+
|
| 380 |
+
def elementwise_multiply(a, b) do
|
| 381 |
+
# Your code here
|
| 382 |
+
Nx.multiply(a, b)
|
| 383 |
+
end
|
| 384 |
+
end
|
| 385 |
+
|
| 386 |
+
# Test your functions
|
| 387 |
+
a = Nx.tensor([[1, 2], [3, 4]])
|
| 388 |
+
b = Nx.tensor([[5, 6], [7, 8]])
|
| 389 |
+
|
| 390 |
+
dot_result = TensorExercises.dot_product(a, b)
|
| 391 |
+
mult_result = TensorExercises.elementwise_multiply(a, b)
|
| 392 |
+
|
| 393 |
+
{dot_result, mult_result}
|
| 394 |
+
```
|
| 395 |
+
|
| 396 |
+
### Exercise 2: Build a Custom Model
|
| 397 |
+
Create a neural network with 3 hidden layers (128, 64, 32 neurons) and ReLU activations:
|
| 398 |
+
|
| 399 |
+
```elixir
|
| 400 |
+
defmodule CustomModel do
|
| 401 |
+
import Axon
|
| 402 |
+
|
| 403 |
+
def build_custom_model(input_shape \\ {nil, 10}) do
|
| 404 |
+
# Your code here
|
| 405 |
+
Axon.input("input", shape: input_shape)
|
| 406 |
+
|> Axon.dense(128, activation: :relu)
|
| 407 |
+
|> Axon.dense(64, activation: :relu)
|
| 408 |
+
|> Axon.dense(32, activation: :relu)
|
| 409 |
+
|> Axon.dense(1, activation: :sigmoid)
|
| 410 |
+
end
|
| 411 |
+
end
|
| 412 |
+
|
| 413 |
+
# Build and display the model
|
| 414 |
+
model = CustomModel.build_custom_model()
|
| 415 |
+
model
|
| 416 |
+
```
|
| 417 |
+
|
| 418 |
+
## Next Steps 🚀
|
| 419 |
+
|
| 420 |
+
1. **Experiment** with different tensor operations
|
| 421 |
+
2. **Modify** the models with different architectures
|
| 422 |
+
3. **Try** training on real datasets
|
| 423 |
+
4. **Explore** Livebook's visualization features
|
| 424 |
+
|
| 425 |
+
Happy learning! 🎉✨
|
| 426 |
+
|
| 427 |
+
---
|
| 428 |
+
|
| 429 |
+
*This notebook was created as a companion to the "Machine Learning in Elixir" tutorial.*
|
| 430 |
+
*Save this notebook (Ctrl+S) to keep your progress!*
|
ml_tutorial_fixed.livemd
ADDED
|
@@ -0,0 +1,438 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Machine Learning in Elixir - Interactive Tutorial
|
| 2 |
+
|
| 3 |
+
<!-- livebook:{"app":"embedded"} -->
|
| 4 |
+
|
| 5 |
+
```elixir
|
| 6 |
+
Mix.install([
|
| 7 |
+
{:nx, "~> 0.11"},
|
| 8 |
+
{:axon, "~> 0.8"},
|
| 9 |
+
{:exla, "~> 0.11"},
|
| 10 |
+
{:kino, "~> 0.13"}
|
| 11 |
+
])
|
| 12 |
+
```
|
| 13 |
+
|
| 14 |
+
## Chapter 1: Introduction to ML in Elixir 🚀
|
| 15 |
+
|
| 16 |
+
### Welcome to Machine Learning with Elixir!
|
| 17 |
+
|
| 18 |
+
**I'm an intermediate Elixir developer and I want to learn Machine Learning in Elixir so I can build intelligent applications and understand ML concepts using functional programming patterns.**
|
| 19 |
+
|
| 20 |
+
### What Makes Elixir Special for ML?
|
| 21 |
+
|
| 22 |
+
Elixir brings some unique advantages to machine learning:
|
| 23 |
+
|
| 24 |
+
- **🏗️ Functional Programming Foundation**: Pure functions and immutability naturally align with ML workflows
|
| 25 |
+
- **⚡ Concurrency & Distribution**: Handle large datasets and parallel training efficiently
|
| 26 |
+
- **🔧 Erlang VM Benefits**: Fault tolerance and hot code reloading for production ML systems
|
| 27 |
+
- **📊 Nx Library**: Numerical computing with GPU acceleration
|
| 28 |
+
|
| 29 |
+
### Core Concepts in Elixir ML
|
| 30 |
+
|
| 31 |
+
**1. Tensors** 🔢
|
| 32 |
+
Tensors are the fundamental building blocks - think of them as multi-dimensional arrays:
|
| 33 |
+
|
| 34 |
+
```elixir
|
| 35 |
+
import Nx
|
| 36 |
+
|
| 37 |
+
# Creating a tensor
|
| 38 |
+
tensor = Nx.tensor([[1, 2, 3], [4, 5, 6]])
|
| 39 |
+
tensor
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
**2. Numerical Operations** ➕
|
| 43 |
+
Perform mathematical operations efficiently:
|
| 44 |
+
|
| 45 |
+
```elixir
|
| 46 |
+
# Element-wise operations
|
| 47 |
+
a = Nx.tensor([1, 2, 3])
|
| 48 |
+
b = Nx.tensor([4, 5, 6])
|
| 49 |
+
result = Nx.add(a, b)
|
| 50 |
+
result
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
### Your First ML Program 💻
|
| 54 |
+
|
| 55 |
+
Let's create a simple linear regression example:
|
| 56 |
+
|
| 57 |
+
```elixir
|
| 58 |
+
defmodule SimpleML do
|
| 59 |
+
import Nx
|
| 60 |
+
|
| 61 |
+
def predict(x, weights) do
|
| 62 |
+
# Simple linear prediction: y = mx + b
|
| 63 |
+
Nx.multiply(x, weights[0]) + weights[1]
|
| 64 |
+
end
|
| 65 |
+
|
| 66 |
+
def train(x_data, y_data, learning_rate \\ 0.01, epochs \\ 1000) do
|
| 67 |
+
# Initialize random weights
|
| 68 |
+
weights = [Nx.random_normal({}), Nx.random_normal({})]
|
| 69 |
+
|
| 70 |
+
Enum.reduce(1..epochs, weights, fn _epoch, [m, b] ->
|
| 71 |
+
# Forward pass
|
| 72 |
+
predictions = Nx.multiply(x_data, m) + b
|
| 73 |
+
|
| 74 |
+
# Calculate loss (mean squared error)
|
| 75 |
+
_loss = Nx.mean(Nx.power(predictions - y_data, 2))
|
| 76 |
+
|
| 77 |
+
# Backward pass (gradients)
|
| 78 |
+
grad_m = Nx.mean(2 * (predictions - y_data) * x_data)
|
| 79 |
+
grad_b = Nx.mean(2 * (predictions - y_data))
|
| 80 |
+
|
| 81 |
+
# Update weights
|
| 82 |
+
[m - learning_rate * grad_m, b - learning_rate * grad_b]
|
| 83 |
+
end)
|
| 84 |
+
end
|
| 85 |
+
end
|
| 86 |
+
```
|
| 87 |
+
|
| 88 |
+
**Try it out:**
|
| 89 |
+
|
| 90 |
+
```elixir
|
| 91 |
+
# Generate simple linear data: y = 2x + 3 + noise
|
| 92 |
+
x_data = Nx.tensor(Enum.map(0..100, &(&1 / 10.0))) # 0 to 10 in steps of 0.1
|
| 93 |
+
noise = Nx.random_normal({101}) |> Nx.multiply(0.1)
|
| 94 |
+
y_data = Nx.multiply(x_data, 2) |> Nx.add(3) |> Nx.add(noise)
|
| 95 |
+
|
| 96 |
+
# Train the model
|
| 97 |
+
weights = SimpleML.train(x_data, y_data, 0.01, 500)
|
| 98 |
+
|
| 99 |
+
# Test prediction
|
| 100 |
+
test_x = Nx.tensor([0.5, 1.0, 1.5, 2.0])
|
| 101 |
+
predictions = SimpleML.predict(test_x, weights)
|
| 102 |
+
|
| 103 |
+
"Trained weights: #{inspect(weights)}, Predictions: #{inspect(predictions)}"
|
| 104 |
+
```
|
| 105 |
+
|
| 106 |
+
## Chapter 2: Nx and Numerical Computing 🔢
|
| 107 |
+
|
| 108 |
+
### Understanding Nx Tensors 📊
|
| 109 |
+
|
| 110 |
+
Tensors are multi-dimensional arrays that power all ML computations:
|
| 111 |
+
|
| 112 |
+
```elixir
|
| 113 |
+
import Nx
|
| 114 |
+
|
| 115 |
+
# Different tensor types
|
| 116 |
+
scalar = Nx.tensor(42) # 0-dimensional
|
| 117 |
+
vector = Nx.tensor([1, 2, 3]) # 1-dimensional
|
| 118 |
+
matrix = Nx.tensor([[1, 2], [3, 4]]) # 2-dimensional
|
| 119 |
+
|
| 120 |
+
# Tensor properties
|
| 121 |
+
IO.puts("Vector shape: #{inspect(Nx.shape(vector))}")
|
| 122 |
+
IO.puts("Vector type: #{inspect(Nx.type(vector))}")
|
| 123 |
+
IO.puts("Vector size: #{inspect(Nx.size(vector))}")
|
| 124 |
+
```
|
| 125 |
+
|
| 126 |
+
### Essential Tensor Operations ⚡
|
| 127 |
+
|
| 128 |
+
**Mathematical Operations:**
|
| 129 |
+
|
| 130 |
+
```elixir
|
| 131 |
+
# Basic arithmetic
|
| 132 |
+
a = Nx.tensor([1.0, 2.0, 3.0])
|
| 133 |
+
b = Nx.tensor([4.0, 5.0, 6.0])
|
| 134 |
+
|
| 135 |
+
add_result = Nx.add(a, b)
|
| 136 |
+
mult_result = Nx.multiply(a, b)
|
| 137 |
+
pow_result = Nx.power(a, 2)
|
| 138 |
+
|
| 139 |
+
{add_result, mult_result, pow_result}
|
| 140 |
+
```
|
| 141 |
+
|
| 142 |
+
**Aggregation Operations:**
|
| 143 |
+
|
| 144 |
+
```elixir
|
| 145 |
+
matrix = Nx.tensor([[1, 2, 3], [4, 5, 6]])
|
| 146 |
+
|
| 147 |
+
sum_all = Nx.sum(matrix)
|
| 148 |
+
sum_rows = Nx.sum(matrix, axes: [1])
|
| 149 |
+
mean_all = Nx.mean(matrix)
|
| 150 |
+
|
| 151 |
+
{sum_all, sum_rows, mean_all}
|
| 152 |
+
```
|
| 153 |
+
|
| 154 |
+
### Broadcasting Magic ✨
|
| 155 |
+
|
| 156 |
+
Nx automatically broadcasts tensors to compatible shapes:
|
| 157 |
+
|
| 158 |
+
```elixir
|
| 159 |
+
# Broadcasting examples
|
| 160 |
+
vector = Nx.tensor([1, 2, 3])
|
| 161 |
+
scalar = Nx.tensor(10)
|
| 162 |
+
|
| 163 |
+
# Add scalar to each element
|
| 164 |
+
result1 = Nx.add(vector, scalar)
|
| 165 |
+
|
| 166 |
+
# Broadcasting with different dimensions
|
| 167 |
+
matrix = Nx.tensor([[1, 2, 3], [4, 5, 6]])
|
| 168 |
+
result2 = Nx.add(matrix, vector)
|
| 169 |
+
|
| 170 |
+
{result1, result2}
|
| 171 |
+
```
|
| 172 |
+
|
| 173 |
+
### Hands-On Exercise 💻
|
| 174 |
+
|
| 175 |
+
Create a function that normalizes a dataset:
|
| 176 |
+
|
| 177 |
+
```elixir
|
| 178 |
+
defmodule DataPreprocessing do
|
| 179 |
+
import Nx
|
| 180 |
+
|
| 181 |
+
def normalize(tensor) do
|
| 182 |
+
mean = Nx.mean(tensor)
|
| 183 |
+
std = Nx.standard_deviation(tensor)
|
| 184 |
+
|
| 185 |
+
# Normalize: (x - mean) / std
|
| 186 |
+
Nx.divide(Nx.subtract(tensor, mean), std)
|
| 187 |
+
end
|
| 188 |
+
|
| 189 |
+
def min_max_scale(tensor) do
|
| 190 |
+
min = Nx.reduce_min(tensor)
|
| 191 |
+
max = Nx.reduce_max(tensor)
|
| 192 |
+
|
| 193 |
+
# Scale to [0, 1] range
|
| 194 |
+
Nx.divide(Nx.subtract(tensor, min), Nx.subtract(max, min))
|
| 195 |
+
end
|
| 196 |
+
end
|
| 197 |
+
|
| 198 |
+
# Test with sample data
|
| 199 |
+
data = Nx.tensor([10, 20, 30, 40, 50])
|
| 200 |
+
normalized = DataPreprocessing.normalize(data)
|
| 201 |
+
scaled = DataPreprocessing.min_max_scale(data)
|
| 202 |
+
|
| 203 |
+
{data, normalized, scaled}
|
| 204 |
+
```
|
| 205 |
+
|
| 206 |
+
## Chapter 3: Building ML Models with Axon 🧠
|
| 207 |
+
|
| 208 |
+
### Understanding Axon Architecture 🏗️
|
| 209 |
+
|
| 210 |
+
Axon provides a functional API for building neural networks:
|
| 211 |
+
|
| 212 |
+
```elixir
|
| 213 |
+
import Axon
|
| 214 |
+
|
| 215 |
+
# Simple neural network
|
| 216 |
+
model = Axon.input("input", shape: {nil, 784})
|
| 217 |
+
|> Axon.dense(128, activation: :relu)
|
| 218 |
+
|> Axon.dense(64, activation: :relu)
|
| 219 |
+
|> Axon.dense(10, activation: :softmax)
|
| 220 |
+
|
| 221 |
+
IO.puts("Model structure:")
|
| 222 |
+
IO.inspect(model)
|
| 223 |
+
```
|
| 224 |
+
|
| 225 |
+
### Building Your First Neural Network 🚀
|
| 226 |
+
|
| 227 |
+
**MNIST Digit Classification:**
|
| 228 |
+
|
| 229 |
+
```elixir
|
| 230 |
+
defmodule MNISTClassifier do
|
| 231 |
+
import Axon
|
| 232 |
+
|
| 233 |
+
def build_model() do
|
| 234 |
+
# Input: 28x28 grayscale images (flattened to 784)
|
| 235 |
+
Axon.input("input", shape: {nil, 784})
|
| 236 |
+
# Hidden layers
|
| 237 |
+
|> Axon.dense(128, activation: :relu)
|
| 238 |
+
|> Axon.dropout(rate: 0.3)
|
| 239 |
+
|> Axon.dense(64, activation: :relu)
|
| 240 |
+
|> Axon.dropout(rate: 0.3)
|
| 241 |
+
# Output: 10 classes (digits 0-9)
|
| 242 |
+
|> Axon.dense(10, activation: :softmax)
|
| 243 |
+
end
|
| 244 |
+
|
| 245 |
+
def train_model(model, train_data, validation_data) do
|
| 246 |
+
# Training loop
|
| 247 |
+
Axon.Loop.trainer(model, :categorical_cross_entropy, :adam)
|
| 248 |
+
|> Axon.Loop.validate(model, validation_data)
|
| 249 |
+
|> Axon.Loop.run(train_data, epochs: 10)
|
| 250 |
+
end
|
| 251 |
+
end
|
| 252 |
+
|
| 253 |
+
# Create a sample model
|
| 254 |
+
model = MNISTClassifier.build_model()
|
| 255 |
+
model
|
| 256 |
+
```
|
| 257 |
+
|
| 258 |
+
### Common Layer Types 🧩
|
| 259 |
+
|
| 260 |
+
**Dense (Fully Connected) Layers:**
|
| 261 |
+
|
| 262 |
+
```elixir
|
| 263 |
+
model1 = Axon.input("input", shape: {nil, 100})
|
| 264 |
+
|> Axon.dense(50) # 50 neurons
|
| 265 |
+
|> Axon.dense(25) # 25 neurons
|
| 266 |
+
|> Axon.dense(1) # Output neuron
|
| 267 |
+
|
| 268 |
+
model1
|
| 269 |
+
```
|
| 270 |
+
|
| 271 |
+
**Convolutional Layers (for images):**
|
| 272 |
+
|
| 273 |
+
```elixir
|
| 274 |
+
model2 = Axon.input("input", shape: {nil, 28, 28, 1}) # Grayscale images
|
| 275 |
+
|> Axon.conv(32, kernel_size: {3, 3}, activation: :relu)
|
| 276 |
+
|> Axon.max_pool(kernel_size: {2, 2})
|
| 277 |
+
|> Axon.conv(64, kernel_size: {3, 3}, activation: :relu)
|
| 278 |
+
|> Axon.max_pool(kernel_size: {2, 2})
|
| 279 |
+
|> Axon.flatten()
|
| 280 |
+
|> Axon.dense(128, activation: :relu)
|
| 281 |
+
|> Axon.dense(10, activation: :softmax)
|
| 282 |
+
|
| 283 |
+
model2
|
| 284 |
+
```
|
| 285 |
+
|
| 286 |
+
### Activation Functions 🔥
|
| 287 |
+
|
| 288 |
+
```elixir
|
| 289 |
+
model3 = Axon.input("input", shape: {nil, 100})
|
| 290 |
+
|> Axon.dense(50, activation: :relu) # ReLU - most common
|
| 291 |
+
|> Axon.dense(25, activation: :sigmoid) # Sigmoid - for probabilities
|
| 292 |
+
|> Axon.dense(10, activation: :softmax) # Softmax - for classification
|
| 293 |
+
|> Axon.dense(1, activation: :tanh) # Tanh - for outputs in [-1, 1]
|
| 294 |
+
|
| 295 |
+
model3
|
| 296 |
+
```
|
| 297 |
+
|
| 298 |
+
## Chapter 4: Real-world Applications 🚀
|
| 299 |
+
|
| 300 |
+
### Complete ML Pipeline Example 🔄
|
| 301 |
+
|
| 302 |
+
**End-to-End Fraud Detection System:**
|
| 303 |
+
|
| 304 |
+
```elixir
|
| 305 |
+
defmodule FraudDetection do
|
| 306 |
+
import Axon
|
| 307 |
+
|
| 308 |
+
def build_pipeline() do
|
| 309 |
+
# Data preprocessing -> Model training -> Prediction
|
| 310 |
+
|
| 311 |
+
# 1. Model definition
|
| 312 |
+
model = build_fraud_model()
|
| 313 |
+
|
| 314 |
+
%{
|
| 315 |
+
model: model
|
| 316 |
+
}
|
| 317 |
+
end
|
| 318 |
+
|
| 319 |
+
defp build_fraud_model() do
|
| 320 |
+
Axon.input("input", shape: {nil, 20}) # 20 features
|
| 321 |
+
|> Axon.dense(64, activation: :relu)
|
| 322 |
+
|> Axon.dropout(rate: 0.3)
|
| 323 |
+
|> Axon.dense(32, activation: :relu)
|
| 324 |
+
|> Axon.dropout(rate: 0.3)
|
| 325 |
+
|> Axon.dense(1, activation: :sigmoid) # Probability of fraud
|
| 326 |
+
end
|
| 327 |
+
end
|
| 328 |
+
|
| 329 |
+
# Create the fraud detection pipeline
|
| 330 |
+
pipeline = FraudDetection.build_pipeline()
|
| 331 |
+
pipeline.model
|
| 332 |
+
```
|
| 333 |
+
|
| 334 |
+
### Practical Project: Customer Churn Prediction 📈
|
| 335 |
+
|
| 336 |
+
```elixir
|
| 337 |
+
defmodule ChurnPredictor do
|
| 338 |
+
import Axon
|
| 339 |
+
|
| 340 |
+
def build_churn_model() do
|
| 341 |
+
# Customer features: age, usage_pattern, support_tickets, etc.
|
| 342 |
+
Axon.input("input", shape: {nil, 15})
|
| 343 |
+
|> Axon.dense(32, activation: :relu)
|
| 344 |
+
|> Axon.batch_norm()
|
| 345 |
+
|> Axon.dense(16, activation: :relu)
|
| 346 |
+
|> Axon.dropout(rate: 0.2)
|
| 347 |
+
|> Axon.dense(1, activation: :sigmoid) # Probability of churn
|
| 348 |
+
end
|
| 349 |
+
|
| 350 |
+
def predict_churn_risk(customer_features) do
|
| 351 |
+
# Simulate prediction
|
| 352 |
+
%{
|
| 353 |
+
probability: 0.65,
|
| 354 |
+
risk_level: :medium,
|
| 355 |
+
recommendation: "Offer loyalty discount"
|
| 356 |
+
}
|
| 357 |
+
end
|
| 358 |
+
end
|
| 359 |
+
|
| 360 |
+
# Build churn prediction model
|
| 361 |
+
churn_model = ChurnPredictor.build_churn_model()
|
| 362 |
+
churn_model
|
| 363 |
+
```
|
| 364 |
+
|
| 365 |
+
## Interactive Exercises 🎯
|
| 366 |
+
|
| 367 |
+
### Exercise 1: Tensor Operations
|
| 368 |
+
Create a function that calculates the dot product of two matrices:
|
| 369 |
+
|
| 370 |
+
```elixir
|
| 371 |
+
defmodule TensorExercises do
|
| 372 |
+
import Nx
|
| 373 |
+
|
| 374 |
+
def dot_product(a, b) do
|
| 375 |
+
# Your code here
|
| 376 |
+
# Hint: Use Nx.dot/2
|
| 377 |
+
Nx.dot(a, b)
|
| 378 |
+
end
|
| 379 |
+
|
| 380 |
+
def elementwise_multiply(a, b) do
|
| 381 |
+
# Your code here
|
| 382 |
+
Nx.multiply(a, b)
|
| 383 |
+
end
|
| 384 |
+
end
|
| 385 |
+
|
| 386 |
+
# Test your functions
|
| 387 |
+
a = Nx.tensor([[1, 2], [3, 4]])
|
| 388 |
+
b = Nx.tensor([[5, 6], [7, 8]])
|
| 389 |
+
|
| 390 |
+
dot_result = TensorExercises.dot_product(a, b)
|
| 391 |
+
mult_result = TensorExercises.elementwise_multiply(a, b)
|
| 392 |
+
|
| 393 |
+
{dot_result, mult_result}
|
| 394 |
+
```
|
| 395 |
+
|
| 396 |
+
### Exercise 2: Build a Custom Model
|
| 397 |
+
Create a neural network with 3 hidden layers (128, 64, 32 neurons) and ReLU activations:
|
| 398 |
+
|
| 399 |
+
```elixir
|
| 400 |
+
defmodule CustomModel do
|
| 401 |
+
import Axon
|
| 402 |
+
|
| 403 |
+
def build_custom_model(input_shape \\ {nil, 10}) do
|
| 404 |
+
# Your code here
|
| 405 |
+
Axon.input("input", shape: input_shape)
|
| 406 |
+
|> Axon.dense(128, activation: :relu)
|
| 407 |
+
|> Axon.dense(64, activation: :relu)
|
| 408 |
+
|> Axon.dense(32, activation: :relu)
|
| 409 |
+
|> Axon.dense(1, activation: :sigmoid)
|
| 410 |
+
end
|
| 411 |
+
end
|
| 412 |
+
|
| 413 |
+
# Build and display the model
|
| 414 |
+
model = CustomModel.build_custom_model()
|
| 415 |
+
model
|
| 416 |
+
```
|
| 417 |
+
|
| 418 |
+
## Debugging Tips 🐛
|
| 419 |
+
|
| 420 |
+
Found errors? Common fixes:
|
| 421 |
+
1. **Missing imports** - Always `import Nx` or `import Axon`
|
| 422 |
+
2. **Wrong function names** - Use `Nx.function()` not just `function()`
|
| 423 |
+
3. **Unused variables** - Prefix with `_` like `_epoch`
|
| 424 |
+
4. **Shape mismatches** - Check tensor dimensions with `Nx.shape()`
|
| 425 |
+
|
| 426 |
+
## Next Steps 🚀
|
| 427 |
+
|
| 428 |
+
1. **Experiment** with different tensor operations
|
| 429 |
+
2. **Modify** the models with different architectures
|
| 430 |
+
3. **Try** training on real datasets
|
| 431 |
+
4. **Explore** Livebook's visualization features
|
| 432 |
+
|
| 433 |
+
Happy learning! 🎉✨
|
| 434 |
+
|
| 435 |
+
---
|
| 436 |
+
|
| 437 |
+
*This notebook was created as a companion to the "Machine Learning in Elixir" tutorial.*
|
| 438 |
+
*Save this notebook (Ctrl+S) to keep your progress!*
|
ml_tutorial_latest.livemd
ADDED
|
@@ -0,0 +1,416 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Machine Learning in Elixir - Interactive Tutorial (Latest Versions)
|
| 2 |
+
|
| 3 |
+
<!-- livebook:{"app":"embedded"} -->
|
| 4 |
+
|
| 5 |
+
```elixir
|
| 6 |
+
Mix.install([
|
| 7 |
+
{:nx, "~> 0.11"},
|
| 8 |
+
{:axon, "~> 0.8"},
|
| 9 |
+
{:exla, "~> 0.11"},
|
| 10 |
+
{:kino, "~> 0.13"}
|
| 11 |
+
])
|
| 12 |
+
```
|
| 13 |
+
|
| 14 |
+
## Chapter 1: Introduction to ML in Elixir 🚀
|
| 15 |
+
|
| 16 |
+
### Welcome to Machine Learning with Elixir!
|
| 17 |
+
|
| 18 |
+
**I'm an intermediate Elixir developer and I want to learn Machine Learning in Elixir so I can build intelligent applications and understand ML concepts using functional programming patterns.**
|
| 19 |
+
|
| 20 |
+
### What Makes Elixir Special for ML?
|
| 21 |
+
|
| 22 |
+
Elixir brings some unique advantages to machine learning:
|
| 23 |
+
|
| 24 |
+
- **🏗️ Functional Programming Foundation**: Pure functions and immutability naturally align with ML workflows
|
| 25 |
+
- **⚡ Concurrency & Distribution**: Handle large datasets and parallel training efficiently
|
| 26 |
+
- **🔧 Erlang VM Benefits**: Fault tolerance and hot code reloading for production ML systems
|
| 27 |
+
- **📊 Nx Library**: Numerical computing with GPU acceleration
|
| 28 |
+
|
| 29 |
+
### Core Concepts in Elixir ML
|
| 30 |
+
|
| 31 |
+
**1. Tensors** 🔢
|
| 32 |
+
Tensors are the fundamental building blocks - think of them as multi-dimensional arrays:
|
| 33 |
+
|
| 34 |
+
```elixir
|
| 35 |
+
import Nx
|
| 36 |
+
|
| 37 |
+
# Creating a tensor
|
| 38 |
+
tensor = Nx.tensor([[1, 2, 3], [4, 5, 6]])
|
| 39 |
+
tensor
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
**2. Numerical Operations** ➕
|
| 43 |
+
Perform mathematical operations efficiently:
|
| 44 |
+
|
| 45 |
+
```elixir
|
| 46 |
+
# Element-wise operations
|
| 47 |
+
a = Nx.tensor([1, 2, 3])
|
| 48 |
+
b = Nx.tensor([4, 5, 6])
|
| 49 |
+
result = Nx.add(a, b)
|
| 50 |
+
result
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
### Your First ML Program 💻
|
| 54 |
+
|
| 55 |
+
Let's create a simple linear regression example compatible with Nx 0.11:
|
| 56 |
+
|
| 57 |
+
```elixir
|
| 58 |
+
defmodule SimpleML do
|
| 59 |
+
import Nx
|
| 60 |
+
|
| 61 |
+
def predict(x, weights) do
|
| 62 |
+
# Simple linear prediction: y = mx + b
|
| 63 |
+
Nx.multiply(x, weights[0]) + weights[1]
|
| 64 |
+
end
|
| 65 |
+
|
| 66 |
+
def train(x_data, y_data, learning_rate \\ 0.01, epochs \\ 1000) do
|
| 67 |
+
# Initialize random weights
|
| 68 |
+
weights = [
|
| 69 |
+
Nx.random_normal({}, 0.0, 1.0), # m weight
|
| 70 |
+
Nx.random_normal({}, 0.0, 1.0) # b weight
|
| 71 |
+
]
|
| 72 |
+
|
| 73 |
+
Enum.reduce(1..epochs, weights, fn _epoch, [m, b] ->
|
| 74 |
+
# Forward pass
|
| 75 |
+
predictions = Nx.multiply(x_data, m) + b
|
| 76 |
+
|
| 77 |
+
# Calculate loss (mean squared error)
|
| 78 |
+
_loss = Nx.mean(Nx.pow(Nx.subtract(predictions, y_data), 2))
|
| 79 |
+
|
| 80 |
+
# Backward pass (gradients)
|
| 81 |
+
grad_m = Nx.mean(2 * Nx.multiply(Nx.subtract(predictions, y_data), x_data))
|
| 82 |
+
grad_b = Nx.mean(2 * Nx.subtract(predictions, y_data))
|
| 83 |
+
|
| 84 |
+
# Update weights
|
| 85 |
+
[
|
| 86 |
+
Nx.subtract(m, Nx.multiply(learning_rate, grad_m)),
|
| 87 |
+
Nx.subtract(b, Nx.multiply(learning_rate, grad_b))
|
| 88 |
+
]
|
| 89 |
+
end)
|
| 90 |
+
end
|
| 91 |
+
end
|
| 92 |
+
```
|
| 93 |
+
|
| 94 |
+
**Try it out:**
|
| 95 |
+
|
| 96 |
+
```elixir
|
| 97 |
+
# Generate simple linear data: y = 2x + 3 + noise
|
| 98 |
+
x_data = Nx.tensor(Enum.map(0..100, &(&1 / 10.0))) # 0 to 10 in steps of 0.1
|
| 99 |
+
noise = Nx.random_normal({101}, 0.0, 0.1)
|
| 100 |
+
y_data = Nx.add(Nx.add(Nx.multiply(x_data, 2), 3), noise)
|
| 101 |
+
|
| 102 |
+
# Train the model
|
| 103 |
+
weights = SimpleML.train(x_data, y_data, 0.01, 500)
|
| 104 |
+
|
| 105 |
+
# Test prediction
|
| 106 |
+
test_x = Nx.tensor([0.5, 1.0, 1.5, 2.0])
|
| 107 |
+
predictions = SimpleML.predict(test_x, weights)
|
| 108 |
+
|
| 109 |
+
"Trained weights: #{inspect(weights)}, Predictions: #{inspect(predictions)}"
|
| 110 |
+
```
|
| 111 |
+
|
| 112 |
+
## Chapter 2: Nx and Numerical Computing 🔢
|
| 113 |
+
|
| 114 |
+
### Understanding Nx Tensors 📊
|
| 115 |
+
|
| 116 |
+
Tensors are multi-dimensional arrays that power all ML computations:
|
| 117 |
+
|
| 118 |
+
```elixir
|
| 119 |
+
import Nx
|
| 120 |
+
|
| 121 |
+
# Different tensor types
|
| 122 |
+
scalar = Nx.tensor(42) # 0-dimensional
|
| 123 |
+
vector = Nx.tensor([1, 2, 3]) # 1-dimensional
|
| 124 |
+
matrix = Nx.tensor([[1, 2], [3, 4]]) # 2-dimensional
|
| 125 |
+
|
| 126 |
+
# Tensor properties
|
| 127 |
+
IO.puts("Vector shape: #{inspect(Nx.shape(vector))}")
|
| 128 |
+
IO.puts("Vector type: #{inspect(Nx.type(vector))}")
|
| 129 |
+
IO.puts("Vector size: #{inspect(Nx.size(vector))}")
|
| 130 |
+
```
|
| 131 |
+
|
| 132 |
+
### Essential Tensor Operations ⚡
|
| 133 |
+
|
| 134 |
+
**Mathematical Operations:**
|
| 135 |
+
|
| 136 |
+
```elixir
|
| 137 |
+
# Basic arithmetic
|
| 138 |
+
a = Nx.tensor([1.0, 2.0, 3.0])
|
| 139 |
+
b = Nx.tensor([4.0, 5.0, 6.0])
|
| 140 |
+
|
| 141 |
+
add_result = Nx.add(a, b)
|
| 142 |
+
mult_result = Nx.multiply(a, b)
|
| 143 |
+
pow_result = Nx.pow(a, 2)
|
| 144 |
+
|
| 145 |
+
{add_result, mult_result, pow_result}
|
| 146 |
+
```
|
| 147 |
+
|
| 148 |
+
**Aggregation Operations:**
|
| 149 |
+
|
| 150 |
+
```elixir
|
| 151 |
+
matrix = Nx.tensor([[1, 2, 3], [4, 5, 6]])
|
| 152 |
+
|
| 153 |
+
sum_all = Nx.sum(matrix)
|
| 154 |
+
sum_rows = Nx.sum(matrix, axes: [1])
|
| 155 |
+
mean_all = Nx.mean(matrix)
|
| 156 |
+
|
| 157 |
+
{sum_all, sum_rows, mean_all}
|
| 158 |
+
```
|
| 159 |
+
|
| 160 |
+
### Broadcasting Magic ✨
|
| 161 |
+
|
| 162 |
+
Nx automatically broadcasts tensors to compatible shapes:
|
| 163 |
+
|
| 164 |
+
```elixir
|
| 165 |
+
# Broadcasting examples
|
| 166 |
+
vector = Nx.tensor([1, 2, 3])
|
| 167 |
+
scalar = Nx.tensor(10)
|
| 168 |
+
|
| 169 |
+
# Add scalar to each element
|
| 170 |
+
result1 = Nx.add(vector, scalar)
|
| 171 |
+
|
| 172 |
+
# Broadcasting with different dimensions
|
| 173 |
+
matrix = Nx.tensor([[1, 2, 3], [4, 5, 6]])
|
| 174 |
+
result2 = Nx.add(matrix, vector)
|
| 175 |
+
|
| 176 |
+
{result1, result2}
|
| 177 |
+
```
|
| 178 |
+
|
| 179 |
+
### Hands-On Exercise 💻
|
| 180 |
+
|
| 181 |
+
Create a function that normalizes a dataset:
|
| 182 |
+
|
| 183 |
+
```elixir
|
| 184 |
+
defmodule DataPreprocessing do
|
| 185 |
+
import Nx
|
| 186 |
+
|
| 187 |
+
def normalize(tensor) do
|
| 188 |
+
mean = Nx.mean(tensor)
|
| 189 |
+
std = Nx.standard_deviation(tensor)
|
| 190 |
+
|
| 191 |
+
# Normalize: (x - mean) / std
|
| 192 |
+
Nx.divide(Nx.subtract(tensor, mean), std)
|
| 193 |
+
end
|
| 194 |
+
|
| 195 |
+
def min_max_scale(tensor) do
|
| 196 |
+
min = Nx.reduce_min(tensor)
|
| 197 |
+
max = Nx.reduce_max(tensor)
|
| 198 |
+
|
| 199 |
+
# Scale to [0, 1] range
|
| 200 |
+
Nx.divide(Nx.subtract(tensor, min), Nx.subtract(max, min))
|
| 201 |
+
end
|
| 202 |
+
end
|
| 203 |
+
|
| 204 |
+
# Test with sample data
|
| 205 |
+
data = Nx.tensor([10, 20, 30, 40, 50])
|
| 206 |
+
normalized = DataPreprocessing.normalize(data)
|
| 207 |
+
scaled = DataPreprocessing.min_max_scale(data)
|
| 208 |
+
|
| 209 |
+
{data, normalized, scaled}
|
| 210 |
+
```
|
| 211 |
+
|
| 212 |
+
## Chapter 3: Building ML Models with Axon 🧠
|
| 213 |
+
|
| 214 |
+
### Understanding Axon Architecture 🏗️
|
| 215 |
+
|
| 216 |
+
Axon provides a functional API for building neural networks:
|
| 217 |
+
|
| 218 |
+
```elixir
|
| 219 |
+
import Axon
|
| 220 |
+
|
| 221 |
+
# Simple neural network
|
| 222 |
+
model = Axon.input("input", shape: {nil, 784})
|
| 223 |
+
|> Axon.dense(128, activation: :relu)
|
| 224 |
+
|> Axon.dense(64, activation: :relu)
|
| 225 |
+
|> Axon.dense(10, activation: :softmax)
|
| 226 |
+
|
| 227 |
+
IO.puts("Model structure:")
|
| 228 |
+
IO.inspect(model)
|
| 229 |
+
```
|
| 230 |
+
|
| 231 |
+
### Building Your First Neural Network 🚀
|
| 232 |
+
|
| 233 |
+
**MNIST Digit Classification:**
|
| 234 |
+
|
| 235 |
+
```elixir
|
| 236 |
+
defmodule MNISTClassifier do
|
| 237 |
+
import Axon
|
| 238 |
+
|
| 239 |
+
def build_model() do
|
| 240 |
+
# Input: 28x28 grayscale images (flattened to 784)
|
| 241 |
+
Axon.input("input", shape: {nil, 784})
|
| 242 |
+
# Hidden layers
|
| 243 |
+
|> Axon.dense(128, activation: :relu)
|
| 244 |
+
|> Axon.dropout(rate: 0.3)
|
| 245 |
+
|> Axon.dense(64, activation: :relu)
|
| 246 |
+
|> Axon.dropout(rate: 0.3)
|
| 247 |
+
# Output: 10 classes (digits 0-9)
|
| 248 |
+
|> Axon.dense(10, activation: :softmax)
|
| 249 |
+
end
|
| 250 |
+
end
|
| 251 |
+
|
| 252 |
+
# Create a sample model
|
| 253 |
+
model = MNISTClassifier.build_model()
|
| 254 |
+
model
|
| 255 |
+
```
|
| 256 |
+
|
| 257 |
+
### Common Layer Types 🧩
|
| 258 |
+
|
| 259 |
+
**Dense (Fully Connected) Layers:**
|
| 260 |
+
|
| 261 |
+
```elixir
|
| 262 |
+
model1 = Axon.input("input", shape: {nil, 100})
|
| 263 |
+
|> Axon.dense(50) # 50 neurons
|
| 264 |
+
|> Axon.dense(25) # 25 neurons
|
| 265 |
+
|> Axon.dense(1) # Output neuron
|
| 266 |
+
|
| 267 |
+
model1
|
| 268 |
+
```
|
| 269 |
+
|
| 270 |
+
**Convolutional Layers (for images):**
|
| 271 |
+
|
| 272 |
+
```elixir
|
| 273 |
+
model2 = Axon.input("input", shape: {nil, 28, 28, 1}) # Grayscale images
|
| 274 |
+
|> Axon.conv(32, kernel_size: {3, 3}, activation: :relu)
|
| 275 |
+
|> Axon.max_pool(kernel_size: {2, 2})
|
| 276 |
+
|> Axon.conv(64, kernel_size: {3, 3}, activation: :relu)
|
| 277 |
+
|> Axon.max_pool(kernel_size: {2, 2})
|
| 278 |
+
|> Axon.flatten()
|
| 279 |
+
|> Axon.dense(128, activation: :relu)
|
| 280 |
+
|> Axon.dense(10, activation: :softmax)
|
| 281 |
+
|
| 282 |
+
model2
|
| 283 |
+
```
|
| 284 |
+
|
| 285 |
+
### Activation Functions 🔥
|
| 286 |
+
|
| 287 |
+
```elixir
|
| 288 |
+
model3 = Axon.input("input", shape: {nil, 100})
|
| 289 |
+
|> Axon.dense(50, activation: :relu) # ReLU - most common
|
| 290 |
+
|> Axon.dense(25, activation: :sigmoid) # Sigmoid - for probabilities
|
| 291 |
+
|> Axon.dense(10, activation: :softmax) # Softmax - for classification
|
| 292 |
+
|> Axon.dense(1, activation: :tanh) # Tanh - for outputs in [-1, 1]
|
| 293 |
+
|
| 294 |
+
model3
|
| 295 |
+
```
|
| 296 |
+
|
| 297 |
+
## Chapter 4: Real-world Applications 🚀
|
| 298 |
+
|
| 299 |
+
### Complete ML Pipeline Example 🔄
|
| 300 |
+
|
| 301 |
+
**End-to-End Fraud Detection System:**
|
| 302 |
+
|
| 303 |
+
```elixir
|
| 304 |
+
defmodule FraudDetection do
|
| 305 |
+
import Axon
|
| 306 |
+
|
| 307 |
+
def build_pipeline() do
|
| 308 |
+
# Data preprocessing -> Model training -> Prediction
|
| 309 |
+
|
| 310 |
+
# 1. Model definition
|
| 311 |
+
model = build_fraud_model()
|
| 312 |
+
|
| 313 |
+
%{
|
| 314 |
+
model: model
|
| 315 |
+
}
|
| 316 |
+
end
|
| 317 |
+
|
| 318 |
+
defp build_fraud_model() do
|
| 319 |
+
Axon.input("input", shape: {nil, 20}) # 20 features
|
| 320 |
+
|> Axon.dense(64, activation: :relu)
|
| 321 |
+
|> Axon.dropout(rate: 0.3)
|
| 322 |
+
|> Axon.dense(32, activation: :relu)
|
| 323 |
+
|> Axon.dropout(rate: 0.3)
|
| 324 |
+
|> Axon.dense(1, activation: :sigmoid) # Probability of fraud
|
| 325 |
+
end
|
| 326 |
+
end
|
| 327 |
+
|
| 328 |
+
# Create the fraud detection pipeline
|
| 329 |
+
pipeline = FraudDetection.build_pipeline()
|
| 330 |
+
pipeline.model
|
| 331 |
+
```
|
| 332 |
+
|
| 333 |
+
## Interactive Exercises 🎯
|
| 334 |
+
|
| 335 |
+
### Exercise 1: Tensor Operations
|
| 336 |
+
Create a function that calculates the dot product of two matrices:
|
| 337 |
+
|
| 338 |
+
```elixir
|
| 339 |
+
defmodule TensorExercises do
|
| 340 |
+
import Nx
|
| 341 |
+
|
| 342 |
+
def dot_product(a, b) do
|
| 343 |
+
# Your code here
|
| 344 |
+
# Hint: Use Nx.dot/2
|
| 345 |
+
Nx.dot(a, b)
|
| 346 |
+
end
|
| 347 |
+
|
| 348 |
+
def elementwise_multiply(a, b) do
|
| 349 |
+
# Your code here
|
| 350 |
+
Nx.multiply(a, b)
|
| 351 |
+
end
|
| 352 |
+
end
|
| 353 |
+
|
| 354 |
+
# Test your functions
|
| 355 |
+
a = Nx.tensor([[1, 2], [3, 4]])
|
| 356 |
+
b = Nx.tensor([[5, 6], [7, 8]])
|
| 357 |
+
|
| 358 |
+
dot_result = TensorExercises.dot_product(a, b)
|
| 359 |
+
mult_result = TensorExercises.elementwise_multiply(a, b)
|
| 360 |
+
|
| 361 |
+
{dot_result, mult_result}
|
| 362 |
+
```
|
| 363 |
+
|
| 364 |
+
### Exercise 2: Build a Custom Model
|
| 365 |
+
Create a neural network with 3 hidden layers (128, 64, 32 neurons) and ReLU activations:
|
| 366 |
+
|
| 367 |
+
```elixir
|
| 368 |
+
defmodule CustomModel do
|
| 369 |
+
import Axon
|
| 370 |
+
|
| 371 |
+
def build_custom_model(input_shape \\ {nil, 10}) do
|
| 372 |
+
# Your code here
|
| 373 |
+
Axon.input("input", shape: input_shape)
|
| 374 |
+
|> Axon.dense(128, activation: :relu)
|
| 375 |
+
|> Axon.dense(64, activation: :relu)
|
| 376 |
+
|> Axon.dense(32, activation: :relu)
|
| 377 |
+
|> Axon.dense(1, activation: :sigmoid)
|
| 378 |
+
end
|
| 379 |
+
end
|
| 380 |
+
|
| 381 |
+
# Build and display the model
|
| 382 |
+
model = CustomModel.build_custom_model()
|
| 383 |
+
model
|
| 384 |
+
```
|
| 385 |
+
|
| 386 |
+
## Version Compatibility Notes 📝
|
| 387 |
+
|
| 388 |
+
**Nx 0.11 changes:**
|
| 389 |
+
- `Nx.random_normal/1` → `Nx.random_normal(shape, mean, std)`
|
| 390 |
+
- `Nx.power/2` → `Nx.pow/2`
|
| 391 |
+
- Use explicit `Nx.add()`, `Nx.subtract()`, `Nx.multiply()` for clarity
|
| 392 |
+
|
| 393 |
+
**Axon 0.8 changes:**
|
| 394 |
+
- Improved performance
|
| 395 |
+
- Better API consistency
|
| 396 |
+
- Enhanced training loops
|
| 397 |
+
|
| 398 |
+
## Next Steps 🚀
|
| 399 |
+
|
| 400 |
+
1. **Update your project dependencies:**
|
| 401 |
+
|
| 402 |
+
```bash
|
| 403 |
+
mise exec -- mix deps.update --all
|
| 404 |
+
mise exec -- mix deps.compile
|
| 405 |
+
```
|
| 406 |
+
|
| 407 |
+
2. **Experiment** with the latest Nx/Axon features
|
| 408 |
+
3. **Try GPU acceleration** with EXLA
|
| 409 |
+
4. **Build real projects** with your new ML skills
|
| 410 |
+
|
| 411 |
+
Happy learning with the latest versions! 🎉✨
|
| 412 |
+
|
| 413 |
+
---
|
| 414 |
+
|
| 415 |
+
*This notebook uses Nx 0.11 and Axon 0.8 - the latest stable versions.*
|
| 416 |
+
*Save this notebook (Ctrl+S) to keep your progress!*
|
skills/gradio.md
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Gradio Skill
|
| 2 |
+
|
| 3 |
+
*This file should contain the full markdown specification for the Gradio skill as provided by the user.*
|
| 4 |
+
|
| 5 |
+
*(The actual content was not supplied in the prior messages, so please replace this placeholder with the correct markdown when available.)
|
skills/hf-cli.md
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Skill: hf-cli
|
| 2 |
+
|
| 3 |
+
Install: `curl -LsSf https://hf.co/cli/install.sh | bash -s`.
|
| 4 |
+
|
| 5 |
+
The Hugging Face Hub CLI tool `hf` is available. IMPORTANT: The `hf` command replaces the deprecated `huggingface-cli` command.
|
| 6 |
+
|
| 7 |
+
Use `hf --help` to view available functions. Note that auth commands are now under `hf auth` e.g. `hf auth whoami`.
|
| 8 |
+
|
| 9 |
+
Generated with `huggingface_hub v1.8.0`. Run `hf skills add --force` to regenerate.
|
| 10 |
+
|
| 11 |
+
## Commands
|
| 12 |
+
|
| 13 |
+
- `hf download REPO_ID` — Download files from the Hub. `[--type CHOICE --revision TEXT --include TEXT --exclude TEXT --cache-dir TEXT --local-dir TEXT --force-download --dry-run --quiet --max-workers INTEGER]`
|
| 14 |
+
- `hf env` — Print information about the environment.
|
| 15 |
+
- `hf sync` — Sync files between local directory and a bucket. `[--delete --ignore-times --ignore-sizes --plan TEXT --apply TEXT --dry-run --include TEXT --exclude TEXT --filter-from TEXT --existing --ignore-existing --verbose --quiet]`
|
| 16 |
+
- `hf upload REPO_ID` — Upload a file or a folder to the Hub. Recommended for single-commit uploads. `[--type CHOICE --revision TEXT --private --include TEXT --exclude TEXT --delete TEXT --commit-message TEXT --commit-description TEXT --create-pr --every FLOAT --quiet]`
|
| 17 |
+
- `hf upload-large-folder REPO_ID LOCAL_PATH` — Upload a large folder to the Hub. Recommended for resumable uploads. `[--type CHOICE --revision TEXT --private --include TEXT --exclude TEXT --num-workers INTEGER --no-report --no-bars]`
|
| 18 |
+
- `hf version` — Print information about the hf version.
|
| 19 |
+
|
| 20 |
+
### `hf auth` — Manage authentication (login, logout, etc.).
|
| 21 |
+
|
| 22 |
+
- `hf auth list` — List all stored access tokens.
|
| 23 |
+
- `hf auth login` — Login using a token from huggingface.co/settings/tokens. `[--add-to-git-credential --force]`
|
| 24 |
+
- `hf auth logout` — Logout from a specific token. `[--token-name TEXT]`
|
| 25 |
+
- `hf auth switch` — Switch between access tokens. `[--token-name TEXT --add-to-git-credential]`
|
| 26 |
+
- `hf auth whoami` — Find out which huggingface.co account you are logged in as. `[--format CHOICE]`
|
| 27 |
+
|
| 28 |
+
... (additional commands omitted for brevity) ...
|
| 29 |
+
|
| 30 |
+
## Tips
|
| 31 |
+
|
| 32 |
+
- Use `hf <command> --help` for full options, descriptions, usage, and real-world examples
|
| 33 |
+
- Authenticate with `HF_TOKEN` env var (recommended) or with `--token`
|
| 34 |
+
|
| 35 |
+
Base directory for this skill: file:///home/saru/.agents/skills/hf-cli
|
skills/hf_dataset_viewer.md
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Hugging Face Dataset Viewer Skill
|
| 2 |
+
|
| 3 |
+
*This file should contain the full markdown specification for the Hugging Face Dataset Viewer skill as provided by the user.*
|
| 4 |
+
|
| 5 |
+
*(The actual content was not supplied in the prior messages, so please replace this placeholder with the correct markdown when available.)
|
skills/hf_jobs.md
ADDED
|
@@ -0,0 +1,285 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Running Workloads on Hugging Face Jobs
|
| 2 |
+
|
| 3 |
+
## Overview
|
| 4 |
+
|
| 5 |
+
Run any workload on fully managed Hugging Face infrastructure. No local setup required—jobs run on cloud CPUs, GPUs, or TPUs and can persist results to the Hugging Face Hub.
|
| 6 |
+
|
| 7 |
+
**Common use cases:**
|
| 8 |
+
- **Data Processing** - Transform, filter, or analyze large datasets
|
| 9 |
+
- **Batch Inference** - Run inference on thousands of samples
|
| 10 |
+
- **Experiments & Benchmarks** - Reproducible ML experiments
|
| 11 |
+
- **Model Training** - Fine-tune models (see `model-trainer` skill for TRL-specific training)
|
| 12 |
+
- **Synthetic Data Generation** - Generate datasets using LLMs
|
| 13 |
+
- **Development & Testing** - Test code without local GPU setup
|
| 14 |
+
- **Scheduled Jobs** - Automate recurring tasks
|
| 15 |
+
|
| 16 |
+
**For model training specifically:** See the `model-trainer` skill for TRL-based training workflows.
|
| 17 |
+
|
| 18 |
+
## When to Use This Skill
|
| 19 |
+
|
| 20 |
+
Use this skill when users want to:
|
| 21 |
+
- Run Python workloads on cloud infrastructure
|
| 22 |
+
- Execute jobs without local GPU/TPU setup
|
| 23 |
+
- Process data at scale
|
| 24 |
+
- Run batch inference or experiments
|
| 25 |
+
- Schedule recurring tasks
|
| 26 |
+
- Use GPUs/TPUs for any workload
|
| 27 |
+
- Persist results to the Hugging Face Hub
|
| 28 |
+
|
| 29 |
+
## Key Directives
|
| 30 |
+
|
| 31 |
+
1. **ALWAYS use `hf_jobs()` MCP tool** - Submit jobs using `hf_jobs("uv", {...})` or `hf_jobs("run", {...})`. The `script` parameter accepts Python code directly. Do NOT save to local files unless the user explicitly requests it. Pass the script content as a string to `hf_jobs()`.
|
| 32 |
+
2. **Always handle authentication** - Jobs that interact with the Hub require `HF_TOKEN` via secrets. See Token Usage section below.
|
| 33 |
+
3. **Provide job details after submission** - After submitting, provide job ID, monitoring URL, estimated time, and note that the user can request status checks later.
|
| 34 |
+
4. **Set appropriate timeouts** - Default 30min may be insufficient for long-running tasks.
|
| 35 |
+
|
| 36 |
+
## Prerequisites Checklist
|
| 37 |
+
|
| 38 |
+
### ✅ **Account & Authentication**
|
| 39 |
+
- Hugging Face Account with Pro/Team/Enterprise plan (Jobs require paid plan)
|
| 40 |
+
- Authenticated login: Check with `hf_whoami()`
|
| 41 |
+
- **HF_TOKEN for Hub Access** ⚠️ CRITICAL - Required for any Hub operations (push models/datasets, download private repos, etc.)
|
| 42 |
+
- Token must have appropriate permissions (read for downloads, write for uploads)
|
| 43 |
+
|
| 44 |
+
### ✅ **Token Usage**
|
| 45 |
+
|
| 46 |
+
**When tokens are required:**
|
| 47 |
+
- Pushing models/datasets to Hub
|
| 48 |
+
- Accessing private repositories
|
| 49 |
+
- Using Hub APIs in scripts
|
| 50 |
+
- Any authenticated Hub operations
|
| 51 |
+
|
| 52 |
+
**How to provide tokens:**
|
| 53 |
+
```python
|
| 54 |
+
# hf_jobs MCP tool — $HF_TOKEN is auto-replaced with real token:
|
| 55 |
+
{"secrets": {"HF_TOKEN": "$HF_TOKEN"}}
|
| 56 |
+
|
| 57 |
+
# HfApi().run_uv_job() — MUST pass actual token via get_token():
|
| 58 |
+
from huggingface_hub import get_token
|
| 59 |
+
secrets={"HF_TOKEN": get_token()}
|
| 60 |
+
```
|
| 61 |
+
|
| 62 |
+
**⚠️ CRITICAL:** The `$HF_TOKEN` placeholder is ONLY auto-replaced by the `hf_jobs` MCP tool. When using `HfApi().run_uv_job()`, you MUST pass the real token via `get_token()`. Passing the literal string `"$HF_TOKEN"` results in a 9-character invalid token and 401 errors.
|
| 63 |
+
|
| 64 |
+
## Token Usage Guide
|
| 65 |
+
|
| 66 |
+
### Understanding Tokens
|
| 67 |
+
- **What are HF Tokens?** Authentication credentials for Hugging Face Hub.
|
| 68 |
+
- **Token Types:** Read, Write, Organization.
|
| 69 |
+
|
| 70 |
+
### When Tokens Are Required
|
| 71 |
+
- **Always Required:** Pushing models/datasets, accessing private repos, creating/modifying repos, using Hub APIs.
|
| 72 |
+
- **Not Required:** Downloading public models/datasets, running jobs that don't interact with Hub, reading public repo info.
|
| 73 |
+
|
| 74 |
+
### How to Provide Tokens to Jobs
|
| 75 |
+
#### Method 1: Automatic Token (Recommended)
|
| 76 |
+
```python
|
| 77 |
+
hf_jobs("uv", {
|
| 78 |
+
"script": "your_script.py",
|
| 79 |
+
"secrets": {"HF_TOKEN": "$HF_TOKEN"} # ✅ Automatic replacement
|
| 80 |
+
})
|
| 81 |
+
```
|
| 82 |
+
**Benefits:** No token exposure, uses current login session.
|
| 83 |
+
|
| 84 |
+
#### Method 2: Explicit Token (Not Recommended)
|
| 85 |
+
```python
|
| 86 |
+
hf_jobs("uv", {
|
| 87 |
+
"script": "your_script.py",
|
| 88 |
+
"secrets": {"HF_TOKEN": "hf_abc123..."} # ⚠️ Hardcoded token
|
| 89 |
+
})
|
| 90 |
+
```
|
| 91 |
+
Use only if automatic token fails.
|
| 92 |
+
|
| 93 |
+
#### Method 3: Environment Variable (Less Secure)
|
| 94 |
+
```python
|
| 95 |
+
hf_jobs("uv", {
|
| 96 |
+
"script": "your_script.py",
|
| 97 |
+
"env": {"HF_TOKEN": "hf_abc123..."} # ⚠️ Less secure than secrets
|
| 98 |
+
})
|
| 99 |
+
```
|
| 100 |
+
|
| 101 |
+
### Using Tokens in Scripts
|
| 102 |
+
```python
|
| 103 |
+
import os
|
| 104 |
+
from huggingface_hub import HfApi
|
| 105 |
+
|
| 106 |
+
token = os.environ.get("HF_TOKEN")
|
| 107 |
+
api = HfApi(token=token)
|
| 108 |
+
```
|
| 109 |
+
**Best practices:** Use `os.environ.get("HF_TOKEN")`, avoid hardcoding.
|
| 110 |
+
|
| 111 |
+
### Token Verification
|
| 112 |
+
```python
|
| 113 |
+
from huggingface_hub import whoami
|
| 114 |
+
user_info = whoami()
|
| 115 |
+
```
|
| 116 |
+
|
| 117 |
+
### Common Token Issues
|
| 118 |
+
- **401 Unauthorized:** Add `secrets={"HF_TOKEN": "$HF_TOKEN"}`.
|
| 119 |
+
- **403 Forbidden:** Ensure token has write permissions.
|
| 120 |
+
- **Token not found:** Use `secrets` not `env`.
|
| 121 |
+
- **Repository access denied:** Use token with repo access.
|
| 122 |
+
|
| 123 |
+
### Token Security Best Practices
|
| 124 |
+
1. Never commit tokens.
|
| 125 |
+
2. Use secrets, not env.
|
| 126 |
+
3. Rotate tokens regularly.
|
| 127 |
+
4. Use minimal permissions.
|
| 128 |
+
5. Do not share tokens.
|
| 129 |
+
6. Monitor token usage.
|
| 130 |
+
|
| 131 |
+
## Quick Start: Two Approaches
|
| 132 |
+
|
| 133 |
+
### Approach 1: UV Scripts (Recommended)
|
| 134 |
+
```python
|
| 135 |
+
hf_jobs("uv", {
|
| 136 |
+
"script": """
|
| 137 |
+
# /// script
|
| 138 |
+
# dependencies = ["transformers", "torch"]
|
| 139 |
+
# ///
|
| 140 |
+
|
| 141 |
+
from transformers import pipeline
|
| 142 |
+
classifier = pipeline("sentiment-analysis")
|
| 143 |
+
print(classifier("I love Hugging Face!"))
|
| 144 |
+
""",
|
| 145 |
+
"flavor": "cpu-basic",
|
| 146 |
+
"timeout": "30m"
|
| 147 |
+
})
|
| 148 |
+
```
|
| 149 |
+
**CLI Equivalent:** `hf jobs uv run my_script.py --flavor cpu-basic --timeout 30m`
|
| 150 |
+
|
| 151 |
+
#### Custom Docker Images for UV Scripts
|
| 152 |
+
```python
|
| 153 |
+
hf_jobs("uv", {
|
| 154 |
+
"script": "inference.py",
|
| 155 |
+
"image": "vllm/vllm-openai:latest",
|
| 156 |
+
"flavor": "a10g-large"
|
| 157 |
+
})
|
| 158 |
+
```
|
| 159 |
+
|
| 160 |
+
#### Python Version
|
| 161 |
+
```python
|
| 162 |
+
hf_jobs("uv", {"script": "my_script.py", "python": "3.11", "flavor": "cpu-basic"})
|
| 163 |
+
```
|
| 164 |
+
|
| 165 |
+
**Important:** `script` must be inline code or a URL; local paths won't work with MCP tool.
|
| 166 |
+
|
| 167 |
+
#### Adding Dependencies at Runtime
|
| 168 |
+
```python
|
| 169 |
+
hf_jobs("uv", {
|
| 170 |
+
"script": "inference.py",
|
| 171 |
+
"dependencies": ["transformers", "torch>=2.0"],
|
| 172 |
+
"flavor": "a10g-small"
|
| 173 |
+
})
|
| 174 |
+
```
|
| 175 |
+
|
| 176 |
+
### Approach 2: Docker-Based Jobs
|
| 177 |
+
```python
|
| 178 |
+
hf_jobs("run", {
|
| 179 |
+
"image": "python:3.12",
|
| 180 |
+
"command": ["python", "-c", "print('Hello from HF Jobs!')"],
|
| 181 |
+
"flavor": "cpu-basic",
|
| 182 |
+
"timeout": "30m"
|
| 183 |
+
})
|
| 184 |
+
```
|
| 185 |
+
**CLI Equivalent:** `hf jobs run python:3.12 python -c "print('Hello from HF Jobs!')"`
|
| 186 |
+
|
| 187 |
+
#### GPU Example
|
| 188 |
+
```python
|
| 189 |
+
hf_jobs("run", {
|
| 190 |
+
"image": "pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel",
|
| 191 |
+
"command": ["python", "-c", "import torch; print(torch.cuda.get_device_name())"],
|
| 192 |
+
"flavor": "a10g-small",
|
| 193 |
+
"timeout": "1h"
|
| 194 |
+
})
|
| 195 |
+
```
|
| 196 |
+
|
| 197 |
+
## Finding More UV Scripts on Hub
|
| 198 |
+
```python
|
| 199 |
+
# Discover available UV script collections
|
| 200 |
+
dataset_search({"author": "uv-scripts", "sort": "downloads", "limit": 20})
|
| 201 |
+
```
|
| 202 |
+
|
| 203 |
+
## Hardware Selection
|
| 204 |
+
| Workload Type | Recommended Hardware |
|
| 205 |
+
|---|---|
|
| 206 |
+
| Data processing, testing | `cpu-basic`, `cpu-upgrade` |
|
| 207 |
+
| Small models, demos | `t4-small` |
|
| 208 |
+
| Medium models | `t4-medium`, `l4x1` |
|
| 209 |
+
| Large models, production | `a10g-small`, `a10g-large` |
|
| 210 |
+
| Very large models | `a100-large` |
|
| 211 |
+
| Batch inference | `a10g-large`, `a100-large` |
|
| 212 |
+
| Multi-GPU workloads | `l4x4`, `a10g-largex2`, `a10g-largex4` |
|
| 213 |
+
| TPU workloads | `v5e-1x1`, `v5e-2x2`, `v5e-2x4` |
|
| 214 |
+
|
| 215 |
+
## Critical: Saving Results
|
| 216 |
+
**⚠️ EPHEMERAL ENVIRONMENT—MUST PERSIST RESULTS**
|
| 217 |
+
|
| 218 |
+
### Persistence Options
|
| 219 |
+
1. **Push to Hugging Face Hub (Recommended)**
|
| 220 |
+
```python
|
| 221 |
+
model.push_to_hub("username/model-name", token=os.environ["HF_TOKEN"])
|
| 222 |
+
```
|
| 223 |
+
2. **Use External Storage** (e.g., S3)
|
| 224 |
+
3. **Send Results via API**
|
| 225 |
+
|
| 226 |
+
## Timeout Management
|
| 227 |
+
Default: 30 minutes. Set custom timeout as needed:
|
| 228 |
+
```python
|
| 229 |
+
{"timeout": "2h"}
|
| 230 |
+
```
|
| 231 |
+
|
| 232 |
+
## Cost Estimation
|
| 233 |
+
```
|
| 234 |
+
Total Cost = (Hours of runtime) × (Cost per hour)
|
| 235 |
+
```
|
| 236 |
+
|
| 237 |
+
## Monitoring and Tracking
|
| 238 |
+
### Check Job Status
|
| 239 |
+
```python
|
| 240 |
+
hf_jobs("ps")
|
| 241 |
+
hf_jobs("inspect", {"job_id": "your-job-id"})
|
| 242 |
+
hf_jobs("logs", {"job_id": "your-job-id"})
|
| 243 |
+
```
|
| 244 |
+
### Job URLs
|
| 245 |
+
`https://huggingface.co/jobs/username/job-id`
|
| 246 |
+
|
| 247 |
+
## Scheduled Jobs
|
| 248 |
+
```python
|
| 249 |
+
hf_jobs("scheduled uv", {"script": "your_script.py", "schedule": "@hourly", "flavor": "cpu-basic"})
|
| 250 |
+
```
|
| 251 |
+
|
| 252 |
+
## Webhooks: Trigger Jobs on Events
|
| 253 |
+
```python
|
| 254 |
+
from huggingface_hub import create_webhook
|
| 255 |
+
webhook = create_webhook(job_id=job.id, watched=[{"type": "user", "name": "your-username"}], domains=["repo", "discussion"], secret="your-secret")
|
| 256 |
+
```
|
| 257 |
+
|
| 258 |
+
## Common Workload Patterns
|
| 259 |
+
### Pattern 1: Dataset → Model Responses (vLLM)
|
| 260 |
+
```python
|
| 261 |
+
script = Path("hf-jobs/scripts/generate-responses.py").read_text()
|
| 262 |
+
hf_jobs("uv", {"script": script, "script_args": ["username/input-dataset", "username/output-dataset", "--messages-column", "messages", "--model-id", "Qwen/Qwen3-30B-A3B-Instruct-2507"], "flavor": "a10g-large", "timeout": "4h", "secrets": {"HF_TOKEN": "$HF_TOKEN"}})
|
| 263 |
+
```
|
| 264 |
+
|
| 265 |
+
## Common Failure Modes
|
| 266 |
+
- **Out of Memory (OOM)** – Reduce batch size or upgrade hardware.
|
| 267 |
+
- **Job Timeout** – Increase timeout.
|
| 268 |
+
- **Hub Push Failures** – Ensure token in secrets.
|
| 269 |
+
- **Missing Dependencies** – Add to PEP 723 header.
|
| 270 |
+
- **Authentication Errors** – Verify `hf_whoami()` and token permissions.
|
| 271 |
+
|
| 272 |
+
## Troubleshooting
|
| 273 |
+
- Job times out → Increase timeout, optimize code.
|
| 274 |
+
- Results not saved → Check persistence method, verify HF_TOKEN.
|
| 275 |
+
- OOM → Reduce batch size, upgrade hardware.
|
| 276 |
+
- Import errors → Add dependencies.
|
| 277 |
+
- Authentication errors → Check token.
|
| 278 |
+
|
| 279 |
+
## Resources
|
| 280 |
+
- Official Docs: https://huggingface.co/docs/huggingface_hub/guides/jobs
|
| 281 |
+
- UV Scripts Guide: https://docs.astral.sh/uv/guides/scripts/
|
| 282 |
+
- Hardware Flavors: https://huggingface.co/docs/hub/en/spaces-config-reference
|
| 283 |
+
- Token Guide: references/token_usage.md
|
| 284 |
+
- Hub Persistence: references/hub_saving.md
|
| 285 |
+
- Troubleshooting: references/troubleshooting.md
|
skills/training_trl.md
ADDED
|
@@ -0,0 +1,590 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# TRL Training on Hugging Face Jobs
|
| 2 |
+
|
| 3 |
+
## Overview
|
| 4 |
+
|
| 5 |
+
Train language models using TRL (Transformer Reinforcement Learning) on fully managed Hugging Face infrastructure. No local GPU setup required—models train on cloud GPUs and results are automatically saved to the Hugging Face Hub.
|
| 6 |
+
|
| 7 |
+
**TRL provides multiple training methods:**
|
| 8 |
+
- **SFT** (Supervised Fine-Tuning) - Standard instruction tuning
|
| 9 |
+
- **DPO** (Direct Preference Optimization) - Alignment from preference data
|
| 10 |
+
- **GRPO** (Group Relative Policy Optimization) - Online RL training
|
| 11 |
+
- **Reward Modeling** - Train reward models for RLHF
|
| 12 |
+
|
| 13 |
+
**For detailed TRL method documentation:**
|
| 14 |
+
```python
|
| 15 |
+
hf_doc_search("your query", product="trl")
|
| 16 |
+
hf_doc_fetch("https://huggingface.co/docs/trl/sft_trainer") # SFT
|
| 17 |
+
hf_doc_fetch("https://huggingface.co/docs/trl/dpo_trainer") # DPO
|
| 18 |
+
# etc.
|
| 19 |
+
```
|
| 20 |
+
|
| 21 |
+
**See also:** `references/training_methods.md` for method overviews and selection guidance
|
| 22 |
+
|
| 23 |
+
## When to Use This Skill
|
| 24 |
+
|
| 25 |
+
Use this skill when users want to:
|
| 26 |
+
- Fine‑tune language models on cloud GPUs without local infrastructure
|
| 27 |
+
- Train with TRL methods (SFT, DPO, GRPO, etc.)
|
| 28 |
+
- Run training jobs on Hugging Face Jobs infrastructure
|
| 29 |
+
- Convert trained models to GGUF for local deployment (Ollama, LM Studio, llama.cpp)
|
| 30 |
+
- Ensure trained models are permanently saved to the Hub
|
| 31 |
+
- Use modern workflows with optimized defaults
|
| 32 |
+
|
| 33 |
+
### When to Use Unsloth
|
| 34 |
+
|
| 35 |
+
Use **Unsloth** (`references/unsloth.md`) instead of standard TRL when:
|
| 36 |
+
- **Limited GPU memory** – Unsloth uses ~60% less VRAM
|
| 37 |
+
- **Speed matters** – Unsloth is ~2x faster
|
| 38 |
+
- **Training large models (>13B)** – Memory efficiency is critical
|
| 39 |
+
- **Training Vision‑Language Models (VLMs)** – Unsloth has `FastVisionModel` support
|
| 40 |
+
|
| 41 |
+
See `references/unsloth.md` for complete Unsloth documentation and `scripts/unsloth_sft_example.py` for a production‑ready training script.
|
| 42 |
+
|
| 43 |
+
## Key Directives
|
| 44 |
+
|
| 45 |
+
1. **ALWAYS use `hf_jobs()` MCP tool** – Submit jobs using `hf_jobs("uv", {...})`, **NOT** bash `trl-jobs` commands. The `script` parameter accepts Python code directly. Do **NOT** save to local files unless the user explicitly requests it. Pass the script content as a string to `hf_jobs()`. If user asks to "train a model", "fine‑tune", or similar requests, you **MUST** create the training script **AND** submit the job immediately using `hf_jobs()`.
|
| 46 |
+
2. **Always include Trackio** – Every training script should include Trackio for real‑time monitoring. Use example scripts in `scripts/` as templates.
|
| 47 |
+
3. **Provide job details after submission** – After submitting, provide job ID, monitoring URL, estimated time, and note that the user can request status checks later.
|
| 48 |
+
4. **Use example scripts as templates** – Reference `scripts/train_sft_example.py`, `scripts/train_dpo_example.py`, etc. as starting points.
|
| 49 |
+
|
| 50 |
+
## Local Script Execution
|
| 51 |
+
|
| 52 |
+
Repository scripts use PEP 723 inline dependencies. Run them with `uv run`:
|
| 53 |
+
```bash
|
| 54 |
+
uv run scripts/estimate_cost.py --help
|
| 55 |
+
uv run scripts/dataset_inspector.py --help
|
| 56 |
+
```
|
| 57 |
+
|
| 58 |
+
## Prerequisites Checklist
|
| 59 |
+
|
| 60 |
+
### ✅ **Account & Authentication**
|
| 61 |
+
- Hugging Face Account with a paid plan (Pro/Team/Enterprise) – Jobs require a paid plan
|
| 62 |
+
- Authenticated login: Check with `hf_whoami()`
|
| 63 |
+
- **HF_TOKEN for Hub Push** ⚠️ CRITICAL – Training environment is ephemeral; must push to Hub or all training results are lost
|
| 64 |
+
- Token must have write permissions
|
| 65 |
+
- **MUST pass `secrets={"HF_TOKEN": "$HF_TOKEN"}`** in job config to make token available (the `$HF_TOKEN` syntax references your actual token value)
|
| 66 |
+
|
| 67 |
+
### ✅ **Dataset Requirements**
|
| 68 |
+
- Dataset must exist on Hub or be loadable via `datasets.load_dataset()`
|
| 69 |
+
- Format must match training method (SFT: "messages"/text/prompt‑completion; DPO: chosen/rejected; GRPO: prompt‑only)
|
| 70 |
+
- **ALWAYS validate unknown datasets** before GPU training to prevent format failures (see Dataset Validation section below)
|
| 71 |
+
- Size appropriate for hardware (Demo: 50‑100 examples on t4‑small; Production: 1K‑10K+ on a10g‑large/a100‑large)
|
| 72 |
+
|
| 73 |
+
### ⚠️ **Critical Settings**
|
| 74 |
+
- **Timeout must exceed expected training time** – Default 30 min is TOO SHORT for most training. Minimum recommended: 1‑2 hours. Job fails and loses all progress if timeout is exceeded.
|
| 75 |
+
- **Hub push must be enabled** – Config: `push_to_hub=True`, `hub_model_id="username/model-name"`; Job: `secrets={"HF_TOKEN": "$HF_TOKEN"}`
|
| 76 |
+
|
| 77 |
+
## Asynchronous Job Guidelines
|
| 78 |
+
|
| 79 |
+
### ⚠️ IMPORTANT: Training jobs run asynchronously and can take hours
|
| 80 |
+
|
| 81 |
+
#### Action Required
|
| 82 |
+
When user requests training:
|
| 83 |
+
1. **Create the training script** with Trackio included (use `scripts/train_sft_example.py` as template)
|
| 84 |
+
2. **Submit immediately** using `hf_jobs()` MCP tool with script content inline – don’t save to file unless user requests
|
| 85 |
+
3. **Report submission** with job ID, monitoring URL, and estimated time
|
| 86 |
+
4. **Wait for user** to request status checks – don’t poll automatically
|
| 87 |
+
|
| 88 |
+
#### Ground Rules
|
| 89 |
+
- Jobs run in background; training continues independently.
|
| 90 |
+
- Initial logs may be delayed (30‑60 seconds).
|
| 91 |
+
- Provide monitoring links; let user request status updates.
|
| 92 |
+
- Avoid polling; provide links instead.
|
| 93 |
+
|
| 94 |
+
#### After Submission
|
| 95 |
+
Provide to user:
|
| 96 |
+
```
|
| 97 |
+
✅ Job submitted successfully!
|
| 98 |
+
|
| 99 |
+
Job ID: abc123xyz
|
| 100 |
+
Monitor: https://huggingface.co/jobs/username/abc123xyz
|
| 101 |
+
|
| 102 |
+
Expected time: ~2 hours
|
| 103 |
+
Estimated cost: ~
|
| 104 |
+
|
| 105 |
+
The job is running in the background. Ask me to check status/logs when ready!
|
| 106 |
+
```
|
| 107 |
+
|
| 108 |
+
## Quick Start: Three Approaches
|
| 109 |
+
|
| 110 |
+
### Approach 1: UV Scripts (Recommended – Default Choice)
|
| 111 |
+
|
| 112 |
+
UV scripts use PEP 723 inline dependencies for clean, self‑contained training. **This is the primary approach for Claude Code.**
|
| 113 |
+
```python
|
| 114 |
+
hf_jobs("uv", {
|
| 115 |
+
"script": """
|
| 116 |
+
# /// script
|
| 117 |
+
# dependencies = ["trl>=0.12.0", "peft>=0.7.0", "trackio"]
|
| 118 |
+
# ///
|
| 119 |
+
|
| 120 |
+
from datasets import load_dataset
|
| 121 |
+
from peft import LoraConfig
|
| 122 |
+
from trl import SFTTrainer, SFTConfig
|
| 123 |
+
import trackio
|
| 124 |
+
|
| 125 |
+
dataset = load_dataset("trl-lib/Capybara", split="train")
|
| 126 |
+
|
| 127 |
+
# Create train/eval split for monitoring
|
| 128 |
+
dataset_split = dataset.train_test_split(test_size=0.1, seed=42)
|
| 129 |
+
|
| 130 |
+
trainer = SFTTrainer(
|
| 131 |
+
model="Qwen/Qwen2.5-0.5B",
|
| 132 |
+
train_dataset=dataset_split["train"],
|
| 133 |
+
eval_dataset=dataset_split["test"],
|
| 134 |
+
peft_config=LoraConfig(r=16, lora_alpha=32),
|
| 135 |
+
args=SFTConfig(
|
| 136 |
+
output_dir="my-model",
|
| 137 |
+
push_to_hub=True,
|
| 138 |
+
hub_model_id="username/my-model",
|
| 139 |
+
num_train_epochs=3,
|
| 140 |
+
eval_strategy="steps",
|
| 141 |
+
eval_steps=50,
|
| 142 |
+
report_to="trackio",
|
| 143 |
+
project="meaningful_project_name", # project name for Trackio
|
| 144 |
+
run_name="meaningful_run_name", # descriptive name for the specific training run (Trackio)
|
| 145 |
+
)
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
trainer.train()
|
| 149 |
+
trainer.push_to_hub()
|
| 150 |
+
""",
|
| 151 |
+
"flavor": "a10g-large",
|
| 152 |
+
"timeout": "2h",
|
| 153 |
+
"secrets": {"HF_TOKEN": "$HF_TOKEN"}
|
| 154 |
+
})
|
| 155 |
+
```
|
| 156 |
+
**Benefits:** Direct MCP tool usage, clean code, dependencies declared inline (PEP 723), no file saving required, full control.
|
| 157 |
+
**When to use:** Default for all training tasks in Claude Code, custom training logic, any scenario requiring `hf_jobs()`.
|
| 158 |
+
|
| 159 |
+
#### Working with Scripts
|
| 160 |
+
⚠️ **Important:** The `script` parameter accepts either inline code (recommended) **OR** a URL. **Local file paths do NOT work.**
|
| 161 |
+
**Why local paths don't work:** Jobs run in isolated Docker containers without access to your local filesystem. Scripts must be:
|
| 162 |
+
- Inline code (as shown above) – recommended for custom training
|
| 163 |
+
- Publicly accessible URLs (e.g., raw files on GitHub or HF Hub)
|
| 164 |
+
- Private repo URLs (with HF_TOKEN)
|
| 165 |
+
|
| 166 |
+
**Common mistakes:**
|
| 167 |
+
```python
|
| 168 |
+
# ❌ These will all fail
|
| 169 |
+
hf_jobs("uv", {"script": "train.py"})
|
| 170 |
+
hf_jobs("uv", {"script": "./scripts/train.py"})
|
| 171 |
+
hf_jobs("uv", {"script": "/path/to/train.py"})
|
| 172 |
+
```
|
| 173 |
+
**Correct approaches:**
|
| 174 |
+
```python
|
| 175 |
+
# ✅ Inline code (recommended)
|
| 176 |
+
hf_jobs("uv", {"script": "# /// script\n# dependencies = [...]\n# ///\n<your code>"})
|
| 177 |
+
|
| 178 |
+
# ✅ From Hugging Face Hub
|
| 179 |
+
hf_jobs("uv", {"script": "https://huggingface.co/user/repo/resolve/main/train.py"})
|
| 180 |
+
|
| 181 |
+
# ✅ From GitHub
|
| 182 |
+
hf_jobs("uv", {"script": "https://raw.githubusercontent.com/user/repo/main/train.py"})
|
| 183 |
+
|
| 184 |
+
# ✅ From Gist
|
| 185 |
+
hf_jobs("uv", {"script": "https://gist.githubusercontent.com/user/id/raw/train.py"})
|
| 186 |
+
```
|
| 187 |
+
**To use local scripts:** Upload to HF Hub first:
|
| 188 |
+
```bash
|
| 189 |
+
hf repos create my-training-scripts --type model
|
| 190 |
+
hf upload my-training-scripts ./train.py train.py
|
| 191 |
+
# Use: https://huggingface.co/USERNAME/my-training-scripts/resolve/main/train.py
|
| 192 |
+
```
|
| 193 |
+
|
| 194 |
+
### Approach 2: TRL Maintained Scripts (Official Examples)
|
| 195 |
+
|
| 196 |
+
TRL provides battle‑tested scripts for all methods. Can be run from URLs:
|
| 197 |
+
```python
|
| 198 |
+
hf_jobs("uv", {
|
| 199 |
+
"script": "https://github.com/huggingface/trl/blob/main/trl/scripts/sft.py",
|
| 200 |
+
"script_args": [
|
| 201 |
+
"--model_name_or_path", "Qwen/Qwen2.5-0.5B",
|
| 202 |
+
"--dataset_name", "trl-lib/Capybara",
|
| 203 |
+
"--output_dir", "my-model",
|
| 204 |
+
"--push_to_hub",
|
| 205 |
+
"--hub_model_id", "username/my-model"
|
| 206 |
+
],
|
| 207 |
+
"flavor": "a10g-large",
|
| 208 |
+
"timeout": "2h",
|
| 209 |
+
"secrets": {"HF_TOKEN": "$HF_TOKEN"}
|
| 210 |
+
})
|
| 211 |
+
```
|
| 212 |
+
**Benefits:** No code to write, maintained by TRL team, production‑tested.
|
| 213 |
+
**When to use:** Standard TRL training, quick experiments, don’t need custom code.
|
| 214 |
+
**Available:** Scripts are available from https://github.com/huggingface/trl/tree/main/examples/scripts
|
| 215 |
+
|
| 216 |
+
### Finding More UV Scripts on Hub
|
| 217 |
+
|
| 218 |
+
The `uv-scripts` organization provides ready‑to‑use UV scripts stored as datasets on Hugging Face Hub:
|
| 219 |
+
```python
|
| 220 |
+
# Discover available UV script collections
|
| 221 |
+
dataset_search({"author": "uv-scripts", "sort": "downloads", "limit": 20})
|
| 222 |
+
|
| 223 |
+
# Explore a specific collection
|
| 224 |
+
hub_repo_details(["uv-scripts/classification"], repo_type="dataset", include_readme=True)
|
| 225 |
+
```
|
| 226 |
+
**Popular collections:** ocr, classification, synthetic-data, vllm, dataset-creation
|
| 227 |
+
|
| 228 |
+
### Approach 3: HF Jobs CLI (Direct Terminal Commands)
|
| 229 |
+
|
| 230 |
+
When the `hf_jobs()` MCP tool is unavailable, use the `hf jobs` CLI directly.
|
| 231 |
+
|
| 232 |
+
**⚠️ CRITICAL: CLI Syntax Rules**
|
| 233 |
+
```bash
|
| 234 |
+
# ✅ CORRECT syntax - flags BEFORE script URL
|
| 235 |
+
hf jobs uv run --flavor a10g-large --timeout 2h --secrets HF_TOKEN "https://example.com/train.py"
|
| 236 |
+
|
| 237 |
+
# ❌ WRONG - "run uv" instead of "uv run"
|
| 238 |
+
hf jobs run uv "https://example.com/train.py" --flavor a10g-large
|
| 239 |
+
|
| 240 |
+
# ❌ WRONG - flags AFTER script URL (will be ignored!)
|
| 241 |
+
hf jobs uv run "https://example.com/train.py" --flavor a10g-large
|
| 242 |
+
|
| 243 |
+
# ❌ WRONG - "--secret" instead of "--secrets" (plural)
|
| 244 |
+
hf jobs uv run --secrets HF_TOKEN "https://example.com/train.py"
|
| 245 |
+
```
|
| 246 |
+
**Key syntax rules:**
|
| 247 |
+
1. Command order is `hf jobs uv run` (NOT `hf jobs run uv`).
|
| 248 |
+
2. All flags (`--flavor`, `--timeout`, `--secrets`) must come **BEFORE** the script URL.
|
| 249 |
+
3. Use `--secrets` (plural), not `--secret`.
|
| 250 |
+
4. Script URL must be the last positional argument.
|
| 251 |
+
|
| 252 |
+
**Complete CLI example:**
|
| 253 |
+
```bash
|
| 254 |
+
hf jobs uv run \
|
| 255 |
+
--flavor a10g-large \
|
| 256 |
+
--timeout 2h \
|
| 257 |
+
--secrets HF_TOKEN \
|
| 258 |
+
"https://huggingface.co/user/repo/resolve/main/train.py"
|
| 259 |
+
```
|
| 260 |
+
|
| 261 |
+
**Check job status via CLI:**
|
| 262 |
+
```bash
|
| 263 |
+
hf jobs ps # List all jobs
|
| 264 |
+
hf jobs logs <job-id> # View logs
|
| 265 |
+
hf jobs inspect <job-id> # Job details
|
| 266 |
+
hf jobs cancel <job-id> # Cancel a job
|
| 267 |
+
```
|
| 268 |
+
|
| 269 |
+
### Approach 4: TRL Jobs Package (Simplified Training)
|
| 270 |
+
|
| 271 |
+
The `trl-jobs` package provides optimized defaults and a one‑liner training.
|
| 272 |
+
```bash
|
| 273 |
+
uvx trl-jobs sft \
|
| 274 |
+
--model_name Qwen/Qwen2.5-0.5B \
|
| 275 |
+
--dataset_name trl-lib/Capybara
|
| 276 |
+
```
|
| 277 |
+
**Benefits:** Pre‑configured settings, automatic Trackio integration, automatic Hub push, one‑line commands.
|
| 278 |
+
**When to use:** User working in terminal directly (not Claude Code context), quick local experimentation.
|
| 279 |
+
**Repository:** https://github.com/huggingface/trl-jobs
|
| 280 |
+
|
| 281 |
+
⚠️ **In Claude Code context, prefer using `hf_jobs()` MCP tool (Approach 1) when available.**
|
| 282 |
+
|
| 283 |
+
## Hardware Selection
|
| 284 |
+
|
| 285 |
+
| Model Size | Recommended Hardware | Cost (approx/hr) | Use Case |
|
| 286 |
+
|------------|---------------------|------------------|----------|
|
| 287 |
+
| <1B params | `t4-small` | ~0.75 | Demos, quick tests only without eval steps |
|
| 288 |
+
| 1‑3B params | `t4-medium`, `l4x1` | ~0.5‑2.5 | Development |
|
| 289 |
+
| 3‑7B params | `a10g-small`, `a10g-large` | ~0.5‑5.0 | Production training |
|
| 290 |
+
| 7‑13B params | `a10g-large`, `a100-large` | ~10‑20 | Large models (use LoRA) |
|
| 291 |
+
| 13B+ params | `a100-large`, `a10g-largex2` | ~20‑30 | Very large (use LoRA) |
|
| 292 |
+
|
| 293 |
+
**GPU Flavors:** cpu-basic/upgrade/performance/xl, t4-small/medium, l4x1/x4, a10g‑small/large/largex2/largex4, a100‑large, h100/h100x8
|
| 294 |
+
|
| 295 |
+
**Guidelines:**
|
| 296 |
+
- Use **LoRA/PEFT** for models >7B to reduce memory.
|
| 297 |
+
- Multi‑GPU automatically handled by TRL/Accelerate.
|
| 298 |
+
- Start with smaller hardware for testing.
|
| 299 |
+
|
| 300 |
+
## Critical: Saving Results to Hub
|
| 301 |
+
|
| 302 |
+
**⚠️ EPHEMERAL ENVIRONMENT—MUST PUSH TO HUB**
|
| 303 |
+
|
| 304 |
+
The Jobs environment is temporary. All files are deleted when the job ends. If the model isn’t pushed to Hub, **ALL TRAINING IS LOST**.
|
| 305 |
+
|
| 306 |
+
### Required Configuration
|
| 307 |
+
```python
|
| 308 |
+
SFTConfig(
|
| 309 |
+
push_to_hub=True,
|
| 310 |
+
hub_model_id="username/model-name", # MUST specify
|
| 311 |
+
hub_strategy="every_save", # Optional: push checkpoints
|
| 312 |
+
)
|
| 313 |
+
```
|
| 314 |
+
**In job submission:**
|
| 315 |
+
```json
|
| 316 |
+
{"secrets": {"HF_TOKEN": "$HF_TOKEN"}}
|
| 317 |
+
```
|
| 318 |
+
|
| 319 |
+
### Verification Checklist
|
| 320 |
+
- [ ] `push_to_hub=True` set in config
|
| 321 |
+
- [ ] `hub_model_id` includes `username/repo-name`
|
| 322 |
+
- [ ] `secrets` includes `HF_TOKEN`
|
| 323 |
+
- [ ] User has write access to target repo
|
| 324 |
+
|
| 325 |
+
## Timeout Management
|
| 326 |
+
|
| 327 |
+
**⚠️ DEFAULT: 30 minutes – TOO SHORT FOR TRAINING**
|
| 328 |
+
|
| 329 |
+
### Setting Timeouts
|
| 330 |
+
```json
|
| 331 |
+
{"timeout": "2h"} # 2 hours (formats: "90m", "2h", "1.5h", or seconds as integer)
|
| 332 |
+
```
|
| 333 |
+
### Timeout Guidelines
|
| 334 |
+
| Scenario | Recommended | Notes |
|
| 335 |
+
|----------|--------------|-------|
|
| 336 |
+
| Quick demo (50‑100 examples) | 10‑30 min | Verify setup |
|
| 337 |
+
| Development training | 1‑2 hours | Small datasets |
|
| 338 |
+
| Production (3‑7B model) | 4‑6 hours | Full datasets |
|
| 339 |
+
| Large model with LoRA | 3‑6 hours | Depends on dataset |
|
| 340 |
+
|
| 341 |
+
**Always add 20‑30% buffer** for model/dataset loading, checkpoint saving, Hub push, and network delays.
|
| 342 |
+
|
| 343 |
+
**On timeout:** Job killed immediately, all unsaved progress lost, must restart from the beginning.
|
| 344 |
+
|
| 345 |
+
## Cost Estimation
|
| 346 |
+
|
| 347 |
+
**Offer to estimate cost when planning jobs with known parameters.** Use `scripts/estimate_cost.py`:
|
| 348 |
+
```bash
|
| 349 |
+
uv run scripts/estimate_cost.py \
|
| 350 |
+
--model meta-llama/Llama-2-7b-hf \
|
| 351 |
+
--dataset trl-lib/Capybara \
|
| 352 |
+
--hardware a10g-large \
|
| 353 |
+
--dataset-size 16000 \
|
| 354 |
+
--epochs 3
|
| 355 |
+
```
|
| 356 |
+
Output includes estimated time, cost, recommended timeout (with buffer), and optimization suggestions.
|
| 357 |
+
|
| 358 |
+
**When to offer:** User planning a job, asks about cost/time, choosing hardware, job will run >1 hour or cost >some amount.
|
| 359 |
+
|
| 360 |
+
## Example Training Scripts
|
| 361 |
+
|
| 362 |
+
**Production‑ready templates with all best practices:**
|
| 363 |
+
- **`scripts/train_sft_example.py`** – Complete SFT training with Trackio, LoRA, checkpoints
|
| 364 |
+
- **`scripts/train_dpo_example.py`** – DPO training for preference learning
|
| 365 |
+
- **`scripts/train_grpo_example.py`** – GRPO training for online RL
|
| 366 |
+
|
| 367 |
+
These scripts demonstrate proper Hub saving, Trackio integration, checkpoint management, and optimized parameters. Pass their content inline to `hf_jobs()` or use as templates for custom scripts.
|
| 368 |
+
|
| 369 |
+
## Monitoring and Tracking
|
| 370 |
+
|
| 371 |
+
**Trackio** provides real‑time metrics visualization. See `references/trackio_guide.md` for a complete setup guide.
|
| 372 |
+
|
| 373 |
+
**Key points:**
|
| 374 |
+
- Add `trackio` to dependencies
|
| 375 |
+
- Configure trainer with `report_to="trackio"` and run name
|
| 376 |
+
- Use a default Trackio space unless user specifies otherwise
|
| 377 |
+
|
| 378 |
+
### Trackio Configuration Defaults
|
| 379 |
+
- **Space ID:** `{username}/trackio` (use "trackio" as default space name)
|
| 380 |
+
- **Run naming:** Descriptive of task, model, or purpose unless overridden
|
| 381 |
+
- **Project Name:** Use a Project Name to associate runs with a particular Project
|
| 382 |
+
|
| 383 |
+
**User overrides:** If user requests custom Trackio configuration (custom space, run naming, grouping, or additional config), apply their preferences instead of defaults.
|
| 384 |
+
|
| 385 |
+
## Dataset Validation
|
| 386 |
+
|
| 387 |
+
**Validate dataset format BEFORE launching GPU training to prevent the #1 cause of training failures: format mismatches.**
|
| 388 |
+
|
| 389 |
+
### Why Validate
|
| 390 |
+
- 50%+ of training failures are due to dataset format issues
|
| 391 |
+
- DPO especially strict: requires exact column names (`prompt`, `chosen`, `rejected`)
|
| 392 |
+
- Failed GPU jobs waste GPU‑hours and 30‑60 minutes
|
| 393 |
+
- Validation on CPU costs ~0.01 $ and takes <1 minute
|
| 394 |
+
|
| 395 |
+
### When to Validate
|
| 396 |
+
**ALWAYS validate for:**
|
| 397 |
+
- Unknown or custom datasets
|
| 398 |
+
- DPO training (CRITICAL – 90% of datasets need mapping)
|
| 399 |
+
- Any dataset not explicitly TRL‑compatible
|
| 400 |
+
|
| 401 |
+
**Skip validation for known TRL datasets:** `trl-lib/ultrachat_200k`, `trl-lib/Capybara`, etc.
|
| 402 |
+
|
| 403 |
+
### Usage
|
| 404 |
+
```python
|
| 405 |
+
hf_jobs("uv", {
|
| 406 |
+
"script": "https://huggingface.co/datasets/mcp-tools/skills/raw/main/dataset_inspector.py",
|
| 407 |
+
"script_args": ["--dataset", "username/dataset-name", "--split", "train"]
|
| 408 |
+
})
|
| 409 |
+
```
|
| 410 |
+
The script is fast and usually completes synchronously.
|
| 411 |
+
|
| 412 |
+
### Reading Results
|
| 413 |
+
The output shows compatibility for each training method:
|
| 414 |
+
- **`✓ READY`** – Dataset is compatible, use directly
|
| 415 |
+
- **`✗ NEEDS MAPPING`** – Compatible but needs preprocessing (mapping code provided)
|
| 416 |
+
- **`✗ INCOMPATIBLE`** – Cannot be used for this method
|
| 417 |
+
|
| 418 |
+
When mapping is needed, the output includes a **"MAPPING CODE"** section with copy‑paste‑ready Python code.
|
| 419 |
+
|
| 420 |
+
### Example Workflow
|
| 421 |
+
```python
|
| 422 |
+
# 1. Inspect dataset (costs ~0.01 $, <1 min on CPU)
|
| 423 |
+
hf_jobs("uv", {
|
| 424 |
+
"script": "https://huggingface.co/datasets/mcp-tools/skills/raw/main/dataset_inspector.py",
|
| 425 |
+
"script_args": ["--dataset", "argilla/distilabel-math-preference-dpo", "--split", "train"]
|
| 426 |
+
})
|
| 427 |
+
|
| 428 |
+
# 2. Check output markers:
|
| 429 |
+
# ✓ READY → proceed with training
|
| 430 |
+
# ✗ NEEDS MAPPING → apply mapping code below
|
| 431 |
+
# ✗ INCOMPATIBLE → choose different method/dataset
|
| 432 |
+
|
| 433 |
+
# 3. If mapping needed, apply before training:
|
| 434 |
+
|
| 435 |
+
def format_for_dpo(example):
|
| 436 |
+
return {
|
| 437 |
+
'prompt': example['instruction'],
|
| 438 |
+
'chosen': example['chosen_response'],
|
| 439 |
+
'rejected': example['rejected_response'],
|
| 440 |
+
}
|
| 441 |
+
|
| 442 |
+
dataset = dataset.map(format_for_dpo, remove_columns=dataset.column_names)
|
| 443 |
+
|
| 444 |
+
# 4. Launch training job with confidence
|
| 445 |
+
```
|
| 446 |
+
|
| 447 |
+
#### Common Scenario: DPO Format Mismatch
|
| 448 |
+
Most DPO datasets use non‑standard column names. Example:
|
| 449 |
+
```
|
| 450 |
+
Dataset has: instruction, chosen_response, rejected_response
|
| 451 |
+
DPO expects: prompt, chosen, rejected
|
| 452 |
+
```
|
| 453 |
+
The validator detects this and provides exact mapping code to fix it.
|
| 454 |
+
|
| 455 |
+
## Converting Models to GGUF
|
| 456 |
+
|
| 457 |
+
After training, convert models to **GGUF format** for use with llama.cpp, Ollama, LM Studio, and other local inference tools.
|
| 458 |
+
|
| 459 |
+
**What is GGUF:**
|
| 460 |
+
- Optimized for CPU/GPU inference with llama.cpp
|
| 461 |
+
- Supports quantization (4‑bit, 5‑bit, 8‑bit) to reduce model size
|
| 462 |
+
- Compatible with Ollama, LM Studio, Jan, GPT4All, llama.cpp
|
| 463 |
+
- Typically 2‑8 GB for 7B models (vs 14 GB unquantized)
|
| 464 |
+
|
| 465 |
+
**When to convert:**
|
| 466 |
+
- Running models locally with Ollama or LM Studio
|
| 467 |
+
- Reducing model size with quantization
|
| 468 |
+
- Deploying to edge devices
|
| 469 |
+
- Sharing models for local‑first use
|
| 470 |
+
|
| 471 |
+
**See:** `references/gguf_conversion.md` for complete conversion guide, including production‑ready conversion script, quantization options, hardware requirements, usage examples, and troubleshooting.
|
| 472 |
+
|
| 473 |
+
**Quick conversion:**
|
| 474 |
+
```python
|
| 475 |
+
hf_jobs("uv", {
|
| 476 |
+
"script": "<see references/gguf_conversion.md for complete script>",
|
| 477 |
+
"flavor": "a10g-large",
|
| 478 |
+
"timeout": "45m",
|
| 479 |
+
"secrets": {"HF_TOKEN": "$HF_TOKEN"},
|
| 480 |
+
"env": {
|
| 481 |
+
"ADAPTER_MODEL": "username/my-finetuned-model",
|
| 482 |
+
"BASE_MODEL": "Qwen/Qwen2.5-0.5B",
|
| 483 |
+
"OUTPUT_REPO": "username/my-model-gguf"
|
| 484 |
+
}
|
| 485 |
+
})
|
| 486 |
+
```
|
| 487 |
+
|
| 488 |
+
## Common Training Patterns
|
| 489 |
+
See `references/training_patterns.md` for detailed examples including:
|
| 490 |
+
- Quick demo (5‑10 minutes)
|
| 491 |
+
- Production with checkpoints
|
| 492 |
+
- Multi‑GPU training
|
| 493 |
+
- DPO training (preference learning)
|
| 494 |
+
- GRPO training (online RL)
|
| 495 |
+
|
| 496 |
+
## Common Failure Modes
|
| 497 |
+
|
| 498 |
+
### Out of Memory (OOM)
|
| 499 |
+
**Fix (try in order):**
|
| 500 |
+
1. Reduce batch size: `per_device_train_batch_size=1`, increase `gradient_accumulation_steps=8`. Effective batch size = `per_device_train_batch_size` × `gradient_accumulation_steps`. Keep effective batch size ≈128 for best performance.
|
| 501 |
+
2. Enable: `gradient_checkpointing=True`
|
| 502 |
+
3. Upgrade hardware: t4‑small → l4x1, a10g‑small → a10g‑large, etc.
|
| 503 |
+
|
| 504 |
+
### Dataset Misformatted
|
| 505 |
+
**Fix:**
|
| 506 |
+
1. Validate first with dataset inspector (see Dataset Validation).
|
| 507 |
+
2. Check output for compatibility markers (✓ READY, ✗ NEEDS MAPPING, ✗ INCOMPATIBLE).
|
| 508 |
+
3. Apply mapping code from inspector output if needed.
|
| 509 |
+
|
| 510 |
+
### Job Timeout
|
| 511 |
+
**Fix:**
|
| 512 |
+
1. Check logs for actual runtime: `hf_jobs("logs", {"job_id": "..."})`
|
| 513 |
+
2. Increase timeout with buffer: `"timeout": "3h"` (add 30% to estimated time)
|
| 514 |
+
3. Reduce training: lower `num_train_epochs`, use smaller dataset, enable `max_steps`
|
| 515 |
+
4. Save checkpoints: `save_strategy="steps"`, `save_steps=500`, `hub_strategy="every_save"`
|
| 516 |
+
|
| 517 |
+
**Note:** Default 30 min is insufficient for real training. Minimum 1‑2 hours.
|
| 518 |
+
|
| 519 |
+
### Hub Push Failures
|
| 520 |
+
**Fix:**
|
| 521 |
+
1. Add to job: `secrets={"HF_TOKEN": "$HF_TOKEN"}`
|
| 522 |
+
2. Add to config: `push_to_hub=True`, `hub_model_id="username/model-name"`
|
| 523 |
+
3. Verify auth: `hf_whoami()`
|
| 524 |
+
4. Check token has write permissions and repo exists (or set `hub_private_repo=True`)
|
| 525 |
+
|
| 526 |
+
### Missing Dependencies
|
| 527 |
+
**Fix:** Add to PEP 723 header:
|
| 528 |
+
```python
|
| 529 |
+
# /// script
|
| 530 |
+
# dependencies = ["trl>=0.12.0", "peft>=0.7.0", "trackio", "missing-package"]
|
| 531 |
+
# ///
|
| 532 |
+
```
|
| 533 |
+
|
| 534 |
+
## Troubleshooting
|
| 535 |
+
|
| 536 |
+
**Common issues:**
|
| 537 |
+
- Job times out → Increase timeout, reduce epochs/dataset, use smaller model/LoRA
|
| 538 |
+
- Model not saved to Hub → Check `push_to_hub=True`, `hub_model_id`, `secrets=HF_TOKEN`
|
| 539 |
+
- Out of Memory (OOM) → Reduce batch size, increase gradient accumulation, enable LoRA, use larger GPU
|
| 540 |
+
- Dataset format error → Validate with dataset inspector (see Dataset Validation section)
|
| 541 |
+
- Authentication errors → Check `hf_whoami()`, token permissions, `secrets` parameter
|
| 542 |
+
|
| 543 |
+
**See:** `references/troubleshooting.md` for complete troubleshooting guide
|
| 544 |
+
|
| 545 |
+
## Resources
|
| 546 |
+
|
| 547 |
+
### References (In This Skill)
|
| 548 |
+
- `references/training_methods.md` – Overview of SFT, DPO, GRPO, KTO, PPO, Reward Modeling
|
| 549 |
+
- `references/training_patterns.md` – Common training patterns and examples
|
| 550 |
+
- `references/unsloth.md` – Unsloth for fast VLM training (~2x speed, 60% less VRAM)
|
| 551 |
+
- `references/gguf_conversion.md` – Complete GGUF conversion guide
|
| 552 |
+
- `references/trackio_guide.md` – Trackio monitoring setup
|
| 553 |
+
- `references/hardware_guide.md` – Hardware specs and selection
|
| 554 |
+
- `references/hub_saving.md` – Hub authentication troubleshooting
|
| 555 |
+
- `references/troubleshooting.md` – Common issues and solutions
|
| 556 |
+
- `references/local_training_macos.md` – Local training on macOS
|
| 557 |
+
|
| 558 |
+
### Scripts (In This Skill)
|
| 559 |
+
- `scripts/train_sft_example.py` – Production SFT template
|
| 560 |
+
- `scripts/train_dpo_example.py` – Production DPO template
|
| 561 |
+
- `scripts/train_grpo_example.py` – Production GRPO template
|
| 562 |
+
- `scripts/unsloth_sft_example.py` – Unsloth text LLM training template (faster, less VRAM)
|
| 563 |
+
- `scripts/estimate_cost.py` – Estimate time and cost (offer when appropriate)
|
| 564 |
+
- `scripts/convert_to_gguf.py` – Complete GGUF conversion script
|
| 565 |
+
|
| 566 |
+
### External Scripts
|
| 567 |
+
- [Dataset Inspector](https://huggingface.co/datasets/mcp-tools/skills/raw/main/dataset_inspector.py) – Validate dataset format before training (use via `uv run` or `hf_jobs`)
|
| 568 |
+
|
| 569 |
+
### External Links
|
| 570 |
+
- [TRL Documentation](https://huggingface.co/docs/trl)
|
| 571 |
+
- [TRL Jobs Training Guide](https://huggingface.co/docs/trl/en/jobs_training)
|
| 572 |
+
- [TRL Jobs Package](https://github.com/huggingface/trl-jobs)
|
| 573 |
+
- [HF Jobs Documentation](https://huggingface.co/docs/huggingface_hub/guides/jobs)
|
| 574 |
+
- [TRL Example Scripts](https://github.com/huggingface/trl/tree/main/examples/scripts)
|
| 575 |
+
- [UV Scripts Guide](https://docs.astral.sh/uv/guides/scripts/)
|
| 576 |
+
- [UV Scripts Organization](https://huggingface.co/uv-scripts)
|
| 577 |
+
|
| 578 |
+
## Key Takeaways
|
| 579 |
+
1. **Submit scripts inline** – The `script` parameter accepts Python code directly; no file saving required unless user requests.
|
| 580 |
+
2. **Jobs are asynchronous** – Don't wait/poll; let user check when ready.
|
| 581 |
+
3. **Always set timeout** – Default 30 min is insufficient; minimum 1‑2 hours recommended.
|
| 582 |
+
4. **Always enable Hub push** – Environment is ephemeral; without push, all results lost.
|
| 583 |
+
5. **Include Trackio** – Use example scripts as templates for real‑time monitoring.
|
| 584 |
+
6. **Offer cost estimation** – When parameters are known, use `scripts/estimate_cost.py`.
|
| 585 |
+
7. **Validate dataset format** before training with dataset inspector (see Dataset Validation section).
|
| 586 |
+
8. **Choose appropriate hardware** for model size; use LoRA for models >7B to reduce memory.
|
| 587 |
+
9. **Use hf_doc_fetch/hf_doc_search** for latest TRL documentation.
|
| 588 |
+
10. **Use UV scripts (Approach 1)** – Default to `hf_jobs("uv", {...})` with inline scripts; TRL maintained scripts for standard training; avoid bash `trl-jobs` commands in Claude Code.
|
| 589 |
+
|
| 590 |
+
Please address this message and continue with your tasks.
|
test/ml_learning_test.exs
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
defmodule MLLearningTest do
|
| 2 |
+
use ExUnit.Case
|
| 3 |
+
doctest MLLearning
|
| 4 |
+
|
| 5 |
+
test "greets the world" do
|
| 6 |
+
assert MLLearning.hello() == :world
|
| 7 |
+
end
|
| 8 |
+
end
|
test/test_helper.exs
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
ExUnit.start()
|