diff --git a/.github/workflows/check-empty-cells.yml b/.github/workflows/check-empty-cells.yml index 657d9b5dab1a9d461f71e2ce62f1df6da6439956..17d49936a0d5772cea9ce9c0fcaece5be846ca51 100644 --- a/.github/workflows/check-empty-cells.yml +++ b/.github/workflows/check-empty-cells.yml @@ -17,6 +17,9 @@ jobs: - name: πŸ”„ Checkout code uses: actions/checkout@v4 + - name: πŸš€ Install uv + uses: astral-sh/setup-uv@v4 + - name: 🐍 Set up Python uses: actions/setup-python@v5 with: @@ -24,7 +27,7 @@ jobs: - name: πŸ” Check for empty cells run: | - python scripts/check_empty_cells.py + make check_empty - name: πŸ“Š Report results if: failure() diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 07f2d069755c31f7859e4074279779a7661d8865..c94dba9d8210001874130e6ac07e7b9f313dfcfd 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -32,7 +32,7 @@ jobs: - name: πŸ› οΈ Export notebooks run: | - python scripts/build.py + make build - name: πŸ“€ Upload artifact uses: actions/upload-pages-artifact@v3 diff --git a/.gitignore b/.gitignore index 8a15952d45fcd80c20777b1f7e6e7289412200ea..90e00cc9cb7b25b64829a69a2a88057ee6921530 100644 --- a/.gitignore +++ b/.gitignore @@ -175,3 +175,12 @@ __marimo__ # Generated site content _site/ + +# Editors +*~ + +# Temporary build files +tmp/ +example.db +example.db.wal +log_data_filtered*.* diff --git a/.typos.toml b/.typos.toml index 1be8d6ef5ea558f897f9df642d8249af405c812e..971fb52d2c912068abb68630c8687a07a6d5282f 100644 --- a/.typos.toml +++ b/.typos.toml @@ -15,7 +15,10 @@ extend-ignore-re = [ # Words to explicitly accept [default.extend-words] +bimap = "bimap" pn = "pn" +setp = "setp" +Plas = "Plas" # You can also exclude specific files or directories if needed # [files] diff --git a/Makefile b/Makefile index 66476f3384fd818046c11117b4c9d1ebfd7eb283..f7e0801ce87b748f392ca4d3bac3f1d81e1901da 100644 --- a/Makefile +++ b/Makefile @@ -1,24 +1,116 @@ -# Default target. -all : commands +ROOT := . +SITE := _site +TMP := ./tmp +LESSON_DATA := ${TMP}/lessons.json +TEMPLATES := $(wildcard templates/*.html) -## commands : show all commands. -commands : +NOTEBOOK_INDEX := $(wildcard */index.md) +NOTEBOOK_DIR := $(patsubst %/index.md,%,${NOTEBOOK_INDEX}) +NOTEBOOK_SRC := $(foreach dir,$(NOTEBOOK_DIR),$(wildcard $(dir)/??_*.py)) +NOTEBOOK_OUT := $(patsubst %.py,${SITE}/%.html,$(NOTEBOOK_SRC)) + +DATABASES := \ +sql/public/lab.db \ +sql/public/penguins.db \ +sql/public/survey.db + +MARIMO := uv run marimo +PYTHON := uv run python + +# Default target +all: commands + +## commands : show all commands +commands: @grep -h -E '^##' ${MAKEFILE_LIST} | sed -e 's/## //g' | column -t -s ':' -## install: install minimal required packages into current environment. +## install: install required packages install: - uv pip install marimo jinja2 markdown + uv pip install -r requirements.txt + +## check: run all simple checks +check: + -@make check_empty + -@make check_titles + -@make check_typos + -@make check_packages -## build: build entire site. -build: - rm -rf _site - uv run scripts/build.py +## check_exec: run notebooks to check for runtime errors +check_exec: + @if [ -z "$(NOTEBOOKS)" ]; then \ + bash bin/run_notebooks.sh $(NOTEBOOK_SRC); \ + else \ + bash bin/run_notebooks.sh $(NOTEBOOKS); \ + fi -## serve: run local web server without rebuilding. +## build: build website +build: ${LESSON_DATA} ${NOTEBOOK_OUT} ${TEMPLATES} + ${PYTHON} bin/build.py --root ${ROOT} --output ${SITE} --data ${LESSON_DATA} + +## links: check links locally (while 'make serve') +links: + linkchecker -F text http://localhost:8000 + +## serve: run local web server without rebuilding serve: - uv run python -m http.server --directory _site + ${PYTHON} -m http.server --directory ${SITE} + +## databases: rebuild datasets for SQL lessons +databases: ${DATABASES} -## clean: clean up stray files. +## ---: --- + +## clean: clean up stray files clean: @find . -name '*~' -exec rm {} + @find . -name '.DS_Store' -exec rm {} + + @rm -rf ${TMP} + @rm -f log_data_filtered*.* + +## check_empty: check for empty cells +check_empty: + @${PYTHON} bin/check_empty_cells.py + +## check_titles: check for missing titles in notebooks +check_titles: + @${PYTHON} bin/check_missing_titles.py + +## check_packages: check for inconsistent package versions across notebooks +check_packages: + @if [ -z "$(NOTEBOOKS)" ]; then \ + ${PYTHON} bin/check_notebook_packages.py $(NOTEBOOK_SRC); \ + else \ + ${PYTHON} bin/check_notebook_packages.py $(NOTEBOOKS); \ + fi + +## check_typos: check for typos +check_typos: + @typos ${TEMPLATES} ${NOTEBOOK_INDEX} ${NOTEBOOK_SRC} + +## extract: extract lesson data +extract: ${LESSON_DATA} + +# +# subsidiary targets +# + +tmp/lessons.json: $(NOTEBOOK_INDEX) + ${PYTHON} bin/extract.py --root ${ROOT} --data ${LESSON_DATA} + +${SITE}/%.html: %.py + ${MARIMO} export html-wasm --force --mode edit $< -o $@ --sandbox + +sql/public/lab.db: bin/create_sql_lab.sql + @rm -f $@ + @mkdir -p sql/public + sqlite3 $@ < $< + +sql/public/penguins.db: bin/create_sql_penguins.py data/penguins.csv + @rm -f $@ + @mkdir -p sql/public + ${PYTHON} $< data/penguins.csv $@ + +sql/public/survey.db: bin/create_sql_survey.py + @rm -f $@ + @mkdir -p sql/public + ${PYTHON} $< $@ 192837 diff --git a/_server/README.md b/_server/README.md index 80de9a7fec1ef68c920bdd552c41c0995971dfb6..18c3d6a2e500c4344bc3f7c87e5bc07c56499e67 100644 --- a/_server/README.md +++ b/_server/README.md @@ -1,8 +1,3 @@ ---- -title: Readme -marimo-version: 0.18.4 ---- - # marimo learn server This folder contains server code for hosting marimo apps. diff --git a/_server/main.py b/_server/main.py index 16f3179e75ba691c48c517e6c9fdaa3db23df1e3..a518aca7524a2f3c37d729be01320bfb19b87db5 100644 --- a/_server/main.py +++ b/_server/main.py @@ -6,14 +6,14 @@ # "starlette", # "python-dotenv", # "pydantic", -# "duckdb==1.3.2", -# "altair==5.5.0", +# "duckdb==1.4.4", +# "altair==6.0.0", # "beautifulsoup4==4.13.3", # "httpx==0.28.1", # "marimo", # "nest-asyncio==1.6.0", # "numba==0.61.0", -# "numpy==2.1.3", +# "numpy==2.4.3", # "polars==1.24.0", # ] # /// diff --git a/altair/01_introduction.py b/altair/01_introduction.py new file mode 100644 index 0000000000000000000000000000000000000000..5e350001b5b35d9891a724c0a6f67f061806f60a --- /dev/null +++ b/altair/01_introduction.py @@ -0,0 +1,671 @@ +# /// script +# requires-python = ">=3.11" +# dependencies = [ +# "altair==6.0.0", +# "marimo", +# "pandas==3.0.1", +# "vega_datasets==0.9.0", +# ] +# /// + +import marimo + +__generated_with = "0.20.4" +app = marimo.App() + + +@app.cell +def _(): + import marimo as mo + + return (mo,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + # Introduction to Altair + + [Altair](https://altair-viz.github.io/) is a declarative statistical visualization library for Python. Altair offers a powerful and concise visualization grammar for quickly building a wide range of statistical graphics. + + By *declarative*, we mean that you can provide a high-level specification of *what* you want the visualization to include, in terms of *data*, *graphical marks*, and *encoding channels*, rather than having to specify *how* to implement the visualization in terms of for-loops, low-level drawing commands, *etc*. The key idea is that you declare links between data fields and visual encoding channels, such as the x-axis, y-axis, color, *etc*. The rest of the plot details are handled automatically. Building on this declarative plotting idea, a surprising range of simple to sophisticated visualizations can be created using a concise grammar. + + Altair is based on [Vega-Lite](https://vega.github.io/vega-lite/), a high-level grammar of interactive graphics. Altair provides a friendly Python [API (Application Programming Interface)](https://en.wikipedia.org/wiki/Application_programming_interface) that generates Vega-Lite specifications in [JSON (JavaScript Object Notation)](https://en.wikipedia.org/wiki/JSON) format. Environments such as Jupyter Notebooks, JupyterLab, and Colab can then take this specification and render it directly in the web browser. To learn more about the motivation and basic concepts behind Altair and Vega-Lite, watch the [Vega-Lite presentation video from OpenVisConf 2017](https://www.youtube.com/watch?v=9uaHRWj04D4). + + This notebook will guide you through the basic process of creating visualizations in Altair. First, you will need to make sure you have the Altair package and its dependencies installed (for more, see the [Altair installation documentation](https://altair-viz.github.io/getting_started/installation.html)), or you are using a notebook environment that includes the dependencies pre-installed. + + _This notebook is part of the [data visualization curriculum](https://github.com/uwdata/visualization-curriculum)._ + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Imports + + To start, we must import the necessary libraries: Pandas for data frames and Altair for visualization. + """) + return + + +@app.cell +def _(): + import pandas as pd + import altair as alt + + return alt, pd + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Renderers + + Depending on your environment, you may need to specify a [renderer](https://altair-viz.github.io/user_guide/display_frontends.html) for Altair. If you are using __JupyterLab__, __Jupyter Notebook__, or __Google Colab__ with a live Internet connection you should not need to do anything. Otherwise, please read the documentation for [Displaying Altair Charts](https://altair-viz.github.io/user_guide/display_frontends.html). + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Data + + Data in Altair is built around the Pandas data frame, which consists of a set of named data *columns*. We will also regularly refer to data columns as data *fields*. + + When using Altair, datasets are commonly provided as data frames. Alternatively, Altair can also accept a URL to load a network-accessible dataset. As we will see, the named columns of the data frame are an essential piece of plotting with Altair. + + We will often use datasets from the [vega-datasets](https://github.com/vega/vega-datasets) repository. Some of these datasets are directly available as Pandas data frames: + """) + return + + +@app.cell +def _(): + from vega_datasets import data # import vega_datasets + cars = data.cars() # load cars data as a Pandas data frame + cars.head() # display the first five rows + return cars, data + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Datasets in the vega-datasets collection can also be accessed via URLs: + """) + return + + +@app.cell +def _(data): + data.cars.url + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Dataset URLs can be passed directly to Altair (for supported formats like JSON and [CSV](https://en.wikipedia.org/wiki/Comma-separated_values)), or loaded into a Pandas data frame like so: + """) + return + + +@app.cell +def _(data, pd): + pd.read_json(data.cars.url).head() # load JSON data into a data frame + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + For more information about data frames - and some useful transformations to prepare Pandas data frames for plotting with Altair! - see the [Specifying Data with Altair documentation](https://altair-viz.github.io/user_guide/data.html). + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Weather Data + + Statistical visualization in Altair begins with ["tidy"](http://vita.had.co.nz/papers/tidy-data.html) data frames. Here, we'll start by creating a simple data frame (`df`) containing the average precipitation (`precip`) for a given `city` and `month` : + """) + return + + +@app.cell +def _(pd): + df = pd.DataFrame({ + 'city': ['Seattle', 'Seattle', 'Seattle', 'New York', 'New York', 'New York', 'Chicago', 'Chicago', 'Chicago'], + 'month': ['Apr', 'Aug', 'Dec', 'Apr', 'Aug', 'Dec', 'Apr', 'Aug', 'Dec'], + 'precip': [2.68, 0.87, 5.31, 3.94, 4.13, 3.58, 3.62, 3.98, 2.56] + }) + + df + return (df,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## The Chart Object + + The fundamental object in Altair is the `Chart`, which takes a data frame as a single argument: + """) + return + + +@app.cell +def _(alt, df): + _chart = alt.Chart(df) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + So far, we have defined the `Chart` object and passed it the simple data frame we generated above. We have not yet told the chart to *do* anything with the data. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Marks and Encodings + + With a chart object in hand, we can now specify how we would like the data to be visualized. We first indicate what kind of graphical *mark* (geometric shape) we want to use to represent the data. We can set the `mark` attribute of the chart object using the the `Chart.mark_*` methods. + + For example, we can show the data as a point using `Chart.mark_point()`: + """) + return + + +@app.cell +def _(alt, df): + alt.Chart(df).mark_point() + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Here the rendering consists of one point per row in the dataset, all plotted on top of each other, since we have not yet specified positions for these points. + + To visually separate the points, we can map various *encoding channels*, or *channels* for short, to fields in the dataset. For example, we could *encode* the field `city` of the data using the `y` channel, which represents the y-axis position of the points. To specify this, use the `encode` method: + """) + return + + +@app.cell +def _(alt, df): + alt.Chart(df).mark_point().encode( + y='city', + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + The `encode()` method builds a key-value mapping between encoding channels (such as `x`, `y`, `color`, `shape`, `size`, *etc.*) to fields in the dataset, accessed by field name. For Pandas data frames, Altair automatically determines an appropriate data type for the mapped column, which in this case is the *nominal* type, indicating unordered, categorical values. + + Though we've now separated the data by one attribute, we still have multiple points overlapping within each category. Let's further separate these by adding an `x` encoding channel, mapped to the `'precip'` field: + """) + return + + +@app.cell +def _(alt, df): + alt.Chart(df).mark_point().encode( + x='precip', + y='city' + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _Seattle exhibits both the least-rainiest and most-rainiest months!_ + + The data type of the `'precip'` field is again automatically inferred by Altair, and this time is treated as a *quantitative* type (that is, a real-valued number). We see that grid lines and appropriate axis titles are automatically added as well. + + Above we have specified key-value pairs using keyword arguments (`x='precip'`). In addition, Altair provides construction methods for encoding definitions, using the syntax `alt.X('precip')`. This alternative is useful for providing more parameters to an encoding, as we will see later in this notebook. + """) + return + + +@app.cell +def _(alt, df): + alt.Chart(df).mark_point().encode( + alt.X('precip'), + alt.Y('city') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + The two styles of specifying encodings can be interleaved: `x='precip', alt.Y('city')` is also a valid input to the `encode` function. + + In the examples above, the data type for each field is inferred automatically based on its type within the Pandas data frame. We can also explicitly indicate the data type to Altair by annotating the field name: + + - `'b:N'` indicates a *nominal* type (unordered, categorical data), + - `'b:O'` indicates an *ordinal* type (rank-ordered data), + - `'b:Q'` indicates a *quantitative* type (numerical data with meaningful magnitudes), and + - `'b:T'` indicates a *temporal* type (date/time data) + + For example, `alt.X('precip:N')`. + + Explicit annotation of data types is necessary when data is loaded from an external URL directly by Vega-Lite (skipping Pandas entirely), or when we wish to use a type that differs from the type that was automatically inferred. + + What do you think will happen to our chart above if we treat `precip` as a nominal or ordinal variable, rather than a quantitative variable? _Modify the code above and find out!_ + + We will take a closer look at data types and encoding channels in the next notebook of the [data visualization curriculum](https://github.com/uwdata/visualization-curriculum#data-visualization-curriculum). + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Data Transformation: Aggregation + + To allow for more flexibility in how data are visualized, Altair has a built-in syntax for *aggregation* of data. For example, we can compute the average of all values by specifying an aggregation function along with the field name: + """) + return + + +@app.cell +def _(alt, df): + alt.Chart(df).mark_point().encode( + x='average(precip)', + y='city' + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Now within each x-axis category, we see a single point reflecting the *average* of the values within that category. + + _Does Seattle really have the lowest average precipitation of these cities? (It does!) Still, how might this plot mislead? Which months are included? What counts as precipitation?_ + + Altair supports a variety of aggregation functions, including `count`, `min` (minimum), `max` (maximum), `average`, `median`, and `stdev` (standard deviation). In a later notebook, we will take a tour of data transformations, including aggregation, sorting, filtering, and creation of new derived fields using calculation formulas. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Changing the Mark Type + + Let's say we want to represent our aggregated values using rectangular bars rather than circular points. We can do this by replacing `Chart.mark_point` with `Chart.mark_bar`: + """) + return + + +@app.cell +def _(alt, df): + alt.Chart(df).mark_bar().encode( + x='average(precip)', + y='city' + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Because the nominal field `a` is mapped to the `y`-axis, the result is a horizontal bar chart. To get a vertical bar chart, we can simply swap the `x` and `y` keywords: + """) + return + + +@app.cell +def _(alt, df): + alt.Chart(df).mark_bar().encode( + x='city', + y='average(precip)' + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Customizing a Visualization + + By default Altair / Vega-Lite make some choices about properties of the visualization, but these can be changed using methods to customize the look of the visualization. For example, we can specify the axis titles using the `axis` attribute of channel classes, we can modify scale properties using the `scale` attribute, and we can specify the color of the marking by setting the `color` keyword of the `Chart.mark_*` methods to any valid [CSS color string](https://developer.mozilla.org/en-US/docs/Web/CSS/color_value): + """) + return + + +@app.cell +def _(alt, df): + alt.Chart(df).mark_point(color='firebrick').encode( + alt.X('precip', scale=alt.Scale(type='log'), axis=alt.Axis(title='Log-Scaled Values')), + alt.Y('city', axis=alt.Axis(title='Category')), + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + A subsequent module will explore the various options available for scales, axes, and legends to create customized charts. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Multiple Views + + As we've seen above, the Altair `Chart` object represents a plot with a single mark type. What about more complicated diagrams, involving multiple charts or layers? Using a set of *view composition* operators, Altair can take multiple chart definitions and combine them to create more complex views. + + As a starting point, let's plot the cars dataset in a line chart showing the average mileage by the year of manufacture: + """) + return + + +@app.cell +def _(alt, cars): + alt.Chart(cars).mark_line().encode( + alt.X('Year'), + alt.Y('average(Miles_per_Gallon)') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + To augment this plot, we might like to add `circle` marks for each averaged data point. (The `circle` mark is just a convenient shorthand for `point` marks that used filled circles.) + + We can start by defining each chart separately: first a line plot, then a scatter plot. We can then use the `layer` operator to combine the two into a layered chart. Here we use the shorthand `+` (plus) operator to invoke layering: + """) + return + + +@app.cell +def _(alt, cars): + line = alt.Chart(cars).mark_line().encode( + alt.X('Year'), + alt.Y('average(Miles_per_Gallon)') + ) + + point = alt.Chart(cars).mark_circle().encode( + alt.X('Year'), + alt.Y('average(Miles_per_Gallon)') + ) + + line + point + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + We can also create this chart by *reusing* and *modifying* a previous chart definition! Rather than completely re-write a chart, we can start with the line chart, then invoke the `mark_point` method to generate a new chart definition with a different mark type: + """) + return + + +@app.cell +def _(alt, cars): + mpg = alt.Chart(cars).mark_line().encode( + alt.X('Year'), + alt.Y('average(Miles_per_Gallon)') + ) + + mpg + mpg.mark_circle() + return (mpg,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + (The need to place points on lines is so common, the `line` mark also includes a shorthand to generate a new layer for you. Trying adding the argument `point=True` to the `mark_line` method!) + + Now, what if we'd like to see this chart alongside other plots, such as the average horsepower over time? + + We can use *concatenation* operators to place multiple charts side-by-side, either vertically or horizontally. Here, we'll use the `|` (pipe) operator to perform horizontal concatenation of two charts: + """) + return + + +@app.cell +def _(alt, cars, mpg): + hp = alt.Chart(cars).mark_line().encode( + alt.X('Year'), + alt.Y('average(Horsepower)') + ) + + (mpg + mpg.mark_circle()) | (hp + hp.mark_circle()) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _We can see that, in this dataset, over the 1970s and early '80s the average fuel efficiency improved while the average horsepower decreased._ + + A later notebook will focus on *view composition*, including not only layering and concatenation, but also the `facet` operator for splitting data into sub-plots and the `repeat` operator to concisely generate concatenated charts from a template. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Interactivity + + In addition to basic plotting and view composition, one of Altair and Vega-Lite's most exciting features is its support for interaction. + + To create a simple interactive plot that supports panning and zooming, we can invoke the `interactive()` method of the `Chart` object. In the chart below, click and drag to *pan* or use the scroll wheel to *zoom*: + """) + return + + +@app.cell +def _(alt, cars): + alt.Chart(cars).mark_point().encode( + x='Horsepower', + y='Miles_per_Gallon', + color='Origin', + ).interactive() + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + To provide more details upon mouse hover, we can use the `tooltip` encoding channel: + """) + return + + +@app.cell +def _(alt, cars): + alt.Chart(cars).mark_point().encode( + x='Horsepower', + y='Miles_per_Gallon', + color='Origin', + tooltip=['Name', 'Origin'] # show Name and Origin in a tooltip + ).interactive() + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + For more complex interactions, such as linked charts and cross-filtering, Altair provides a *selection* abstraction for defining interactive selections and then binding them to components of a chart. We will cover this is in detail in a later notebook. + + Below is a more complex example. The upper histogram shows the count of cars per year and uses an interactive selection to modify the opacity of points in the lower scatter plot, which shows horsepower versus mileage. + + _Drag out an interval in the upper chart and see how it affects the points in the lower chart. As you examine the code, **don't worry if parts don't make sense yet!** This is an aspirational example, and we will fill in all the needed details over the course of the different notebooks._ + """) + return + + +@app.cell +def _(alt, cars): + # create an interval selection over an x-axis encoding + brush = alt.selection_interval(encodings=['x']) + + # determine opacity based on brush + opacity = alt.condition(brush, alt.value(0.9), alt.value(0.1)) + + # an overview histogram of cars per year + # add the interval brush to select cars over time + overview = alt.Chart(cars).mark_bar().encode( + alt.X('Year:O', timeUnit='year', # extract year unit, treat as ordinal + axis=alt.Axis(title=None, labelAngle=0) # no title, no label angle + ), + alt.Y('count()', title=None), # counts, no axis title + opacity=opacity + ).add_params( + brush # add interval brush selection to the chart + ).properties( + width=400, # set the chart width to 400 pixels + height=50 # set the chart height to 50 pixels + ) + + # a detail scatterplot of horsepower vs. mileage + # modulate point opacity based on the brush selection + detail = alt.Chart(cars).mark_point().encode( + alt.X('Horsepower'), + alt.Y('Miles_per_Gallon'), + # set opacity based on brush selection + opacity=opacity + ).properties(width=400) # set chart width to match the first chart + + # vertically concatenate (vconcat) charts using the '&' operator + overview & detail + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Aside: Examining the JSON Output + + As a Python API to Vega-Lite, Altair's main purpose is to convert plot specifications to a JSON string that conforms to the Vega-Lite schema. Using the `Chart.to_json` method, we can inspect the JSON specification that Altair is exporting and sending to Vega-Lite: + """) + return + + +@app.cell +def _(alt, df): + _chart = alt.Chart(df).mark_bar().encode(x='average(precip)', y='city') + print(_chart.to_json()) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Notice here that `encode(x='average(precip)')` has been expanded to a JSON structure with a `field` name, a `type` for the data, and includes an `aggregate` field. The `encode(y='city')` statement has been expanded similarly. + + As we saw earlier, Altair's shorthand syntax includes a way to specify the type of the field as well: + """) + return + + +@app.cell +def _(alt): + _x = alt.X('average(precip):Q') + print(_x.to_json()) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + This short-hand is equivalent to spelling-out the attributes by name: + """) + return + + +@app.cell +def _(alt): + _x = alt.X(aggregate='average', field='precip', type='quantitative') + print(_x.to_json()) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Publishing a Visualization + + Once you have visualized your data, perhaps you would like to publish it somewhere on the web. This can be done straightforwardly using the [vega-embed JavaScript package](https://github.com/vega/vega-embed). A simple example of a stand-alone HTML document can be generated for any chart using the `Chart.save` method: + + ```python + chart = alt.Chart(df).mark_bar().encode( + x='average(precip)', + y='city', + ) + chart.save('chart.html') + ``` + + + The basic HTML template produces output that looks like this, where the JSON specification for your plot produced by `Chart.to_json` should be stored in the `spec` JavaScript variable: + + ```html + + + + + + + + +
+ + + + ``` + + The `Chart.save` method provides a convenient way to save such HTML output to file. For more information on embedding Altair/Vega-Lite, see the [documentation of the vega-embed project](https://github.com/vega/vega-embed). + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Next Steps + + πŸŽ‰ Hooray, you've completed the introduction to Altair! In the next notebook, we will dive deeper into creating visualizations using Altair's model of data types, graphical marks, and visual encoding channels. + """) + return + + +if __name__ == "__main__": + app.run() diff --git a/altair/02_marks_encoding.py b/altair/02_marks_encoding.py new file mode 100644 index 0000000000000000000000000000000000000000..c3b088d708e77e4a536e2f2e72cba119c284d6e2 --- /dev/null +++ b/altair/02_marks_encoding.py @@ -0,0 +1,1126 @@ +# /// script +# requires-python = ">=3.11" +# dependencies = [ +# "altair==6.0.0", +# "marimo", +# "pandas==3.0.1", +# "vega_datasets==0.9.0", +# ] +# /// + +import marimo + +__generated_with = "0.20.4" +app = marimo.App() + + +@app.cell +def _(): + import marimo as mo + + return (mo,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + # Data Types, Graphical Marks, and Visual Encoding Channels + + A visualization represents data using a collection of _graphical marks_ (bars, lines, points, etc.). The attributes of a mark — such as its position, shape, size, or color — serve as _channels_ through which we can encode underlying data values. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + With a basic framework of _data types_, _marks_, and _encoding channels_, we can concisely create a wide variety of visualizations. In this notebook, we explore each of these elements and show how to use them to create custom statistical graphics. + + _This notebook is part of the [data visualization curriculum](https://github.com/uwdata/visualization-curriculum)._ + """) + return + + +@app.cell +def _(): + import pandas as pd + import altair as alt + + return (alt,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Global Development Data + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + We will be visualizing global health and population data for a number of countries, over the time period of 1955 to 2005. The data was collected by the [Gapminder Foundation](https://www.gapminder.org/) and shared in [Hans Rosling's popular TED talk](https://www.youtube.com/watch?v=hVimVzgtD6w). If you haven't seen the talk, we encourage you to watch it first! + + Let's first load the dataset from the [vega-datasets](https://github.com/vega/vega-datasets) collection into a Pandas data frame. + """) + return + + +@app.cell +def _(): + from vega_datasets import data as vega_data + data = vega_data.gapminder() + return (data,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + How big is the data? + """) + return + + +@app.cell +def _(data): + data.shape + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + 693 rows and 6 columns! Let's take a peek at the data content: + """) + return + + +@app.cell +def _(data): + data.head(5) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + For each `country` and `year` (in 5-year intervals), we have measures of fertility in terms of the number of children per woman (`fertility`), life expectancy in years (`life_expect`), and total population (`pop`). + + We also see a `cluster` field with an integer code. What might this represent? We'll try and solve this mystery as we visualize the data! + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Let's also create a smaller data frame, filtered down to values for the year 2000 only: + """) + return + + +@app.cell +def _(data): + data2000 = data.loc[data['year'] == 2000] + return (data2000,) + + +@app.cell +def _(data2000): + data2000.head(5) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Data Types + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + The first ingredient in effective visualization is the input data. Data values can represent different forms of measurement. What kinds of comparisons do those measurements support? And what kinds of visual encodings then support those comparisons? + + We will start by looking at the basic data types that Altair uses to inform visual encoding choices. These data types determine the kinds of comparisons we can make, and thereby guide our visualization design decisions. + + ### Nominal (N) + + *Nominal* data (also called *categorical* data) consist of category names. + + With nominal data we can compare the equality of values: *is value A the same or different than value B? (A = B)*, supporting statements like β€œA is equal to B” or β€œA is not equal to B”. + In the dataset above, the `country` field is nominal. + + When visualizing nominal data we should readily be able to see if values are the same or different: position, color hue (blue, red, green, *etc.*), and shape can help. However, using a size channel to encode nominal data might mislead us, suggesting rank-order or magnitude differences among values that do not exist! + + ### Ordinal (O) + + *Ordinal* data consist of values that have a specific ordering. + + With ordinal data we can compare the rank-ordering of values: *does value A come before or after value B? (A < B)*, supporting statements like β€œA is less than B” or β€œA is greater than B”. + In the dataset above, we can treat the `year` field as ordinal. + + When visualizing ordinal data, we should perceive a sense of rank-order. Position, size, or color value (brightness) might be appropriate, where as color hue (which is not perceptually ordered) would be less appropriate. + + ### Quantitative (Q) + + With *quantitative* data we can measure numerical differences among values. There are multiple sub-types of quantitative data: + + For *interval* data we can measure the distance (interval) between points: *what is the distance to value A from value B? (A - B)*, supporting statements such as β€œA is 12 units away from B”. + + For *ratio* data the zero-point is meaningful and so we can also measure proportions or scale factors: *value A is what proportion of value B? (A / B)*, supporting statements such as β€œA is 10% of B” or β€œB is 7 times larger than A”. + + In the dataset above, `year` is a quantitative interval field (the value of year "zero" is subjective), whereas `fertility` and `life_expect` are quantitative ratio fields (zero is meaningful for calculating proportions). + Vega-Lite represents quantitative data, but does not make a distinction between interval and ratio types. + + Quantitative values can be visualized using position, size, or color value, among other channels. An axis with a zero baseline is essential for proportional comparisons of ratio values, but can be safely omitted for interval comparisons. + + ### Temporal (T) + + *Temporal* values measure time points or intervals. This type is a special case of quantitative values (timestamps) with rich semantics and conventions (i.e., the [Gregorian calendar](https://en.wikipedia.org/wiki/Gregorian_calendar)). The temporal type in Vega-Lite supports reasoning about time units (year, month, day, hour, etc.), and provides methods for requesting specific time intervals. + + Example temporal values include date strings such as `β€œ2019-01-04”` and `β€œJan 04 2019”`, as well as standardized date-times such as the [ISO date-time format](https://en.wikipedia.org/wiki/ISO_8601): `β€œ2019-01-04T17:50:35.643Z”`. + + There are no temporal values in our global development dataset above, as the `year` field is simply encoded as an integer. For more details about using temporal data in Altair, see the [Times and Dates documentation](https://altair-viz.github.io/user_guide/times_and_dates.html). + + ### Summary + + These data types are not mutually exclusive, but rather form a hierarchy: ordinal data support nominal (equality) comparisons, while quantitative data support ordinal (rank-order) comparisons. + + Moreover, these data types do _not_ provide a fixed categorization. Just because a data field is represented using a number doesn't mean we have to treat it as a quantitative type! For example, we might interpret a set of ages (10 years old, 20 years old, etc) as nominal (underage or overage), ordinal (grouped by year), or quantitative (calculate average age). + + Now let's examine how to visually encode these data types! + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Encoding Channels + + At the heart of Altair is the use of *encodings* that bind data fields (with a given data type) to available encoding *channels* of a chosen *mark* type. In this notebook we'll examine the following encoding channels: + + - `x`: Horizontal (x-axis) position of the mark. + - `y`: Vertical (y-axis) position of the mark. + - `size`: Size of the mark. May correspond to area or length, depending on the mark type. + - `color`: Mark color, specified as a [legal CSS color](https://developer.mozilla.org/en-US/docs/Web/CSS/color_value). + - `opacity`: Mark opacity, ranging from 0 (fully transparent) to 1 (fully opaque). + - `shape`: Plotting symbol shape for `point` marks. + - `tooltip`: Tooltip text to display upon mouse hover over the mark. + - `order`: Mark ordering, determines line/area point order and drawing order. + - `column`: Facet the data into horizontally-aligned subplots. + - `row`: Facet the data into vertically-aligned subplots. + + For a complete list of available channels, see the [Altair encoding documentation](https://altair-viz.github.io/user_guide/encodings/index.html). + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### X + + The `x` encoding channel sets a mark's horizontal position (x-coordinate). In addition, default choices of axis and title are made automatically. In the chart below, the choice of a quantitative data type results in a continuous linear axis scale: + """) + return + + +@app.cell +def _(alt, data2000): + alt.Chart(data2000).mark_point().encode( + alt.X('fertility:Q') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Y + + The `y` encoding channel sets a mark's vertical position (y-coordinate). Here we've added the `cluster` field using an ordinal (`O`) data type. The result is a discrete axis that includes a sized band, with a default step size, for each unique value: + """) + return + + +@app.cell +def _(alt, data2000): + alt.Chart(data2000).mark_point().encode( + alt.X('fertility:Q'), + alt.Y('cluster:O') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _What happens to the chart above if you swap the `O` and `Q` field types?_ + + If we instead add the `life_expect` field as a quantitative (`Q`) variable, the result is a scatter plot with linear scales for both axes: + """) + return + + +@app.cell +def _(alt, data2000): + alt.Chart(data2000).mark_point().encode( + alt.X('fertility:Q'), + alt.Y('life_expect:Q') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + By default, axes for linear quantitative scales include zero to ensure a proper baseline for comparing ratio-valued data. In some cases, however, a zero baseline may be meaningless or you may want to focus on interval comparisons. To disable automatic inclusion of zero, configure the scale mapping using the encoding `scale` attribute: + """) + return + + +@app.cell +def _(alt, data2000): + alt.Chart(data2000).mark_point().encode( + alt.X('fertility:Q', scale=alt.Scale(zero=False)), + alt.Y('life_expect:Q', scale=alt.Scale(zero=False)) + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Now the axis scales no longer include zero by default. Some padding still remains, as the axis domain end points are automatically snapped to _nice_ numbers like multiples of 5 or 10. + + _What happens if you also add `nice=False` to the scale attribute above?_ + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Size + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + The `size` encoding channel sets a mark's size or extent. The meaning of the channel can vary based on the mark type. For `point` marks, the `size` channel maps to the pixel area of the plotting symbol, such that the diameter of the point matches the square root of the size value. + + Let's augment our scatter plot by encoding population (`pop`) on the `size` channel. As a result, the chart now also includes a legend for interpreting the size values. + """) + return + + +@app.cell +def _(alt, data2000): + alt.Chart(data2000).mark_point().encode( + alt.X('fertility:Q'), + alt.Y('life_expect:Q'), + alt.Size('pop:Q') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + In some cases we might be unsatisfied with the default size range. To provide a customized span of sizes, set the `range` parameter of the `scale` attribute to an array indicating the smallest and largest sizes. Here we update the size encoding to range from 0 pixels (for zero values) to 1,000 pixels (for the maximum value in the scale domain): + """) + return + + +@app.cell +def _(alt, data2000): + alt.Chart(data2000).mark_point().encode( + alt.X('fertility:Q'), + alt.Y('life_expect:Q'), + alt.Size('pop:Q', scale=alt.Scale(range=[0,1000])) + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Color and Opacity + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + The `color` encoding channel sets a mark's color. The style of color encoding is highly dependent on the data type: nominal data will default to a multi-hued qualitative color scheme, whereas ordinal and quantitative data will use perceptually ordered color gradients. + + Here, we encode the `cluster` field using the `color` channel and a nominal (`N`) data type, resulting in a distinct hue for each cluster value. Can you start to guess what the `cluster` field might indicate? + """) + return + + +@app.cell +def _(alt, data2000): + alt.Chart(data2000).mark_point().encode( + alt.X('fertility:Q'), + alt.Y('life_expect:Q'), + alt.Size('pop:Q', scale=alt.Scale(range=[0,1000])), + alt.Color('cluster:N') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + If we prefer filled shapes, we can can pass a `filled=True` parameter to the `mark_point` method: + """) + return + + +@app.cell +def _(alt, data2000): + alt.Chart(data2000).mark_point(filled=True).encode( + alt.X('fertility:Q'), + alt.Y('life_expect:Q'), + alt.Size('pop:Q', scale=alt.Scale(range=[0,1000])), + alt.Color('cluster:N') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + By default, Altair uses a bit of transparency to help combat over-plotting. We are free to further adjust the opacity, either by passing a default value to the `mark_*` method, or using a dedicated encoding channel. + + Here we demonstrate how to provide a constant value to an encoding channel instead of binding a data field: + """) + return + + +@app.cell +def _(alt, data2000): + alt.Chart(data2000).mark_point(filled=True).encode( + alt.X('fertility:Q'), + alt.Y('life_expect:Q'), + alt.Size('pop:Q', scale=alt.Scale(range=[0,1000])), + alt.Color('cluster:N'), + alt.OpacityValue(0.5) + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Shape + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + The `shape` encoding channel sets the geometric shape used by `point` marks. Unlike the other channels we have seen so far, the `shape` channel can not be used by other mark types. The shape encoding channel should only be used with nominal data, as perceptual rank-order and magnitude comparisons are not supported. + + Let's encode the `cluster` field using `shape` as well as `color`. Using multiple channels for the same underlying data field is known as a *redundant encoding*. The resulting chart combines both color and shape information into a single symbol legend: + """) + return + + +@app.cell +def _(alt, data2000): + alt.Chart(data2000).mark_point(filled=True).encode( + alt.X('fertility:Q'), + alt.Y('life_expect:Q'), + alt.Size('pop:Q', scale=alt.Scale(range=[0,1000])), + alt.Color('cluster:N'), + alt.OpacityValue(0.5), + alt.Shape('cluster:N') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Tooltips & Ordering + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + By this point, you might feel a bit frustrated: we've built up a chart, but we still don't know what countries the visualized points correspond to! Let's add interactive tooltips to enable exploration. + + The `tooltip` encoding channel determines tooltip text to show when a user moves the mouse cursor over a mark. Let's add a tooltip encoding for the `country` field, then investigate which countries are being represented. + """) + return + + +@app.cell +def _(alt, data2000): + alt.Chart(data2000).mark_point(filled=True).encode( + alt.X('fertility:Q'), + alt.Y('life_expect:Q'), + alt.Size('pop:Q', scale=alt.Scale(range=[0,1000])), + alt.Color('cluster:N'), + alt.OpacityValue(0.5), + alt.Tooltip('country') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + As you mouse around you may notice that you can not select some of the points. For example, the largest dark blue circle corresponds to India, which is drawn on top of a country with a smaller population, preventing the mouse from hovering over that country. To fix this problem, we can use the `order` encoding channel. + + The `order` encoding channel determines the order of data points, affecting both the order in which they are drawn and, for `line` and `area` marks, the order in which they are connected to one another. + + Let's order the values in descending rank order by the population (`pop`), ensuring that smaller circles are drawn later than larger circles: + """) + return + + +@app.cell +def _(alt, data2000): + alt.Chart(data2000).mark_point(filled=True).encode( + alt.X('fertility:Q'), + alt.Y('life_expect:Q'), + alt.Size('pop:Q', scale=alt.Scale(range=[0,1000])), + alt.Color('cluster:N'), + alt.OpacityValue(0.5), + alt.Tooltip('country:N'), + alt.Order('pop:Q', sort='descending') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Now we can identify the smaller country being obscured by India: it's Bangladesh! + + We can also now figure out what the `cluster` field represents. Mouse over the various colored points to formulate your own explanation. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + At this point we've added tooltips that show only a single property of the underlying data record. To show multiple values, we can provide the `tooltip` channel an array of encodings, one for each field we want to include: + """) + return + + +@app.cell +def _(alt, data2000): + alt.Chart(data2000).mark_point(filled=True).encode( + alt.X('fertility:Q'), + alt.Y('life_expect:Q'), + alt.Size('pop:Q', scale=alt.Scale(range=[0,1000])), + alt.Color('cluster:N'), + alt.OpacityValue(0.5), + alt.Order('pop:Q', sort='descending'), + tooltip = [ + alt.Tooltip('country:N'), + alt.Tooltip('fertility:Q'), + alt.Tooltip('life_expect:Q') + ] + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Now we can see multiple data fields upon mouse over! + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Column and Row Facets + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Spatial position is one of the most powerful and flexible channels for visual encoding, but what can we do if we already have assigned fields to the `x` and `y` channels? One valuable technique is to create a *trellis plot*, consisting of sub-plots that show a subset of the data. A trellis plot is one example of the more general technique of presenting data using [small multiples](https://en.wikipedia.org/wiki/Small_multiple) of views. + + The `column` and `row` encoding channels generate either a horizontal (columns) or vertical (rows) set of sub-plots, in which the data is partitioned according to the provided data field. + + Here is a trellis plot that divides the data into one column per \`cluster\` value: + """) + return + + +@app.cell +def _(alt, data2000): + alt.Chart(data2000).mark_point(filled=True).encode( + alt.X('fertility:Q'), + alt.Y('life_expect:Q'), + alt.Size('pop:Q', scale=alt.Scale(range=[0,1000])), + alt.Color('cluster:N'), + alt.OpacityValue(0.5), + alt.Tooltip('country:N'), + alt.Order('pop:Q', sort='descending'), + alt.Column('cluster:N') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + The plot above does not fit on screen, making it difficult to compare all the sub-plots to each other! We can set the default `width` and `height` properties to create a smaller set of multiples. Also, as the column headers already label the `cluster` values, let's remove our `color` legend by setting it to `None`. To make better use of space we can also orient our `size` legend to the `'bottom'` of the chart. + """) + return + + +@app.cell +def _(alt, data2000): + alt.Chart(data2000).mark_point(filled=True).encode( + alt.X('fertility:Q'), + alt.Y('life_expect:Q'), + alt.Size('pop:Q', scale=alt.Scale(range=[0,1000]), + legend=alt.Legend(orient='bottom', titleOrient='left')), + alt.Color('cluster:N', legend=None), + alt.OpacityValue(0.5), + alt.Tooltip('country:N'), + alt.Order('pop:Q', sort='descending'), + alt.Column('cluster:N') + ).properties(width=135, height=135) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Underneath the hood, the `column` and `row` encodings are translated into a new specification that uses the `facet` view composition operator. We will re-visit faceting in greater depth later on! + + In the meantime, _can you rewrite the chart above to facet into rows instead of columns?_ + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### A Peek Ahead: Interactive Filtering + + In later modules, we'll dive into interaction techniques for data exploration. Here is a sneak peak: binding a range slider to the `year` field to enable interactive scrubbing through each year of data. Don't worry if the code below is a bit confusing at this point, as we will cover interaction in detail later. + + _Drag the slider back and forth to see how the data values change over time!_ + """) + return + + +@app.cell +def _(alt, data): + select_year = alt.selection_point( + name='select', fields=['year'], value=[{'year': 1955}], + bind=alt.binding_range(min=1955, max=2005, step=5) + ) + + alt.Chart(data).mark_point(filled=True).encode( + alt.X('fertility:Q', scale=alt.Scale(domain=[0,9])), + alt.Y('life_expect:Q', scale=alt.Scale(domain=[0,90])), + alt.Size('pop:Q', scale=alt.Scale(domain=[0, 1200000000], range=[0,1000])), + alt.Color('cluster:N', legend=None), + alt.OpacityValue(0.5), + alt.Tooltip('country:N'), + alt.Order('pop:Q', sort='descending') + ).add_params(select_year).transform_filter(select_year) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Graphical Marks + + Our exploration of encoding channels above exclusively uses `point` marks to visualize the data. However, the `point` mark type is only one of the many geometric shapes that can be used to visually represent data. Altair includes a number of built-in mark types, including: + + - `mark_area()` - Filled areas defined by a top-line and a baseline. + - `mark_bar()` - Rectangular bars. + - `mark_circle()` - Scatter plot points as filled circles. + - `mark_line()` - Connected line segments. + - `mark_point()` - Scatter plot points with configurable shapes. + - `mark_rect()` - Filled rectangles, useful for heatmaps. + - `mark_rule()` - Vertical or horizontal lines spanning the axis. + - `mark_square()` - Scatter plot points as filled squares. + - `mark_text()` - Scatter plot points represented by text. + - `mark_tick()` - Vertical or horizontal tick marks. + + For a complete list, and links to examples, see the [Altair marks documentation](https://altair-viz.github.io/user_guide/marks/index.html). Next, we will step through a number of the most commonly used mark types for statistical graphics. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Point Marks + + The `point` mark type conveys specific points, as in *scatter plots* and *dot plots*. In addition to `x` and `y` encoding channels (to specify 2D point positions), point marks can use `color`, `size`, and `shape` encodings to convey additional data fields. + + Below is a dot plot of `fertility`, with the `cluster` field redundantly encoded using both the `y` and `shape` channels. + """) + return + + +@app.cell +def _(alt, data2000): + alt.Chart(data2000).mark_point().encode( + alt.X('fertility:Q'), + alt.Y('cluster:N'), + alt.Shape('cluster:N') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + In addition to encoding channels, marks can be stylized by providing values to the `mark_*()` methods. + + For example: point marks are drawn with stroked outlines by default, but can be specified to use `filled` shapes instead. Similarly, you can set a default `size` to set the total pixel area of the point mark. + """) + return + + +@app.cell +def _(alt, data2000): + alt.Chart(data2000).mark_point(filled=True, size=100).encode( + alt.X('fertility:Q'), + alt.Y('cluster:N'), + alt.Shape('cluster:N') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Circle Marks + + The `circle` mark type is a convenient shorthand for `point` marks drawn as filled circles. + """) + return + + +@app.cell +def _(alt, data2000): + alt.Chart(data2000).mark_circle(size=100).encode( + alt.X('fertility:Q'), + alt.Y('cluster:N'), + alt.Shape('cluster:N') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Square Marks + + The `square` mark type is a convenient shorthand for `point` marks drawn as filled squares. + """) + return + + +@app.cell +def _(alt, data2000): + alt.Chart(data2000).mark_square(size=100).encode( + alt.X('fertility:Q'), + alt.Y('cluster:N'), + alt.Shape('cluster:N') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Tick Marks + + The `tick` mark type conveys a data point using a short line segment or "tick". These are particularly useful for comparing values along a single dimension with minimal overlap. A *dot plot* drawn with tick marks is sometimes referred to as a *strip plot*. + """) + return + + +@app.cell +def _(alt, data2000): + alt.Chart(data2000).mark_tick().encode( + alt.X('fertility:Q'), + alt.Y('cluster:N'), + alt.Shape('cluster:N') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Bar Marks + + The \`bar\` mark type draws a rectangle with a position, width, and height. + + The plot below is a simple bar chart of the population (\`pop\`) of each country. + """) + return + + +@app.cell +def _(alt, data2000): + alt.Chart(data2000).mark_bar().encode( + alt.X('country:N'), + alt.Y('pop:Q') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + The bar width is set to a default size. We will discuss how to adjust the bar width later in this notebook. (A subsequent notebook will take a closer look at configuring axes, scales, and legends.) + + Bars can also be stacked. Let's change the `x` encoding to use the `cluster` field, and encode `country` using the `color` channel. We'll also disable the legend (which would be very long with colors for all countries!) and use tooltips for the country name. + """) + return + + +@app.cell +def _(alt, data2000): + alt.Chart(data2000).mark_bar().encode( + alt.X('cluster:N'), + alt.Y('pop:Q'), + alt.Color('country:N', legend=None), + alt.Tooltip('country:N') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + In the chart above, the use of the `color` encoding channel causes Altair / Vega-Lite to automatically stack the bar marks. Otherwise, bars would be drawn on top of each other! Try adding the parameter `stack=None` to the `y` encoding channel to see what happens if we don't apply stacking... + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + The examples above create bar charts from a zero-baseline, and the `y` channel only encodes the non-zero value (or height) of the bar. However, the bar mark also allows you to specify starting and ending points to convey ranges. + + The chart below uses the `x` (starting point) and `x2` (ending point) channels to show the range of life expectancies within each regional cluster. Below we use the `min` and `max` aggregation functions to determine the end points of the range; we will discuss aggregation in greater detail in the next notebook! + + Alternatively, you can use `x` and `width` to provide a starting point plus offset, such that `x2 = x + width`. + """) + return + + +@app.cell +def _(alt, data2000): + alt.Chart(data2000).mark_bar().encode( + alt.X('min(life_expect):Q'), + alt.X2('max(life_expect):Q'), + alt.Y('cluster:N') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Line Marks + + The `line` mark type connects plotted points with line segments, for example so that a line's slope conveys information about the rate of change. + + Let's plot a line chart of fertility per country over the years, using the full, unfiltered global development data frame. We'll again hide the legend and use tooltips instead. + """) + return + + +@app.cell +def _(alt, data): + alt.Chart(data).mark_line().encode( + alt.X('year:O'), + alt.Y('fertility:Q'), + alt.Color('country:N', legend=None), + alt.Tooltip('country:N') + ).properties( + width=400 + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + We can see interesting variations per country, but overall trends for lower numbers of children per family over time. Also note that we set a custom width of 400 pixels. _Try changing (or removing) the widths and see what happens!_ + + Let's change some of the default mark parameters to customize the plot. We can set the `strokeWidth` to determine the thickness of the lines and the `opacity` to add some transparency. By default, the `line` mark uses straight line segments to connect data points. In some cases we might want to smooth the lines. We can adjust the interpolation used to connect data points by setting the `interpolate` mark parameter. Let's use `'monotone'` interpolation to provide smooth lines that are also guaranteed not to inadvertently generate "false" minimum or maximum values as a result of the interpolation. + """) + return + + +@app.cell +def _(alt, data): + alt.Chart(data).mark_line( + strokeWidth=3, + opacity=0.5, + interpolate='monotone' + ).encode( + alt.X('year:O'), + alt.Y('fertility:Q'), + alt.Color('country:N', legend=None), + alt.Tooltip('country:N') + ).properties( + width=400 + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + The `line` mark can also be used to create *slope graphs*, charts that highlight the change in value between two comparison points using line slopes. + + Below let's create a slope graph comparing the populations of each country at minimum and maximum years in our full dataset: 1955 and 2005. We first create a new Pandas data frame filtered to those years, then use Altair to create the slope graph. + + By default, Altair places the years close together. To better space out the years along the x-axis, we can indicate the size (in pixels) of discrete steps along the width of our chart as indicated by the comment below. Try adjusting the width `step` value below and see how the chart changes in response. + """) + return + + +@app.cell +def _(alt, data): + dataTime = data.loc[(data['year'] == 1955) | (data['year'] == 2005)] + + alt.Chart(dataTime).mark_line(opacity=0.5).encode( + alt.X('year:O'), + alt.Y('pop:Q'), + alt.Color('country:N', legend=None), + alt.Tooltip('country:N') + ).properties( + width={"step": 50} # adjust the step parameter + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Area Marks + + The `area` mark type combines aspects of `line` and `bar` marks: it visualizes connections (slopes) among data points, but also shows a filled region, with one edge defaulting to a zero-valued baseline. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + The chart below is an area chart of population over time for just the United States: + """) + return + + +@app.cell +def _(alt, data): + dataUS = data.loc[data['country'] == 'United States'] + + alt.Chart(dataUS).mark_area().encode( + alt.X('year:O'), + alt.Y('fertility:Q') + ) + return (dataUS,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Similar to `line` marks, `area` marks support an `interpolate` parameter. + """) + return + + +@app.cell +def _(alt, dataUS): + alt.Chart(dataUS).mark_area(interpolate='monotone').encode( + alt.X('year:O'), + alt.Y('fertility:Q') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Similar to `bar` marks, `area` marks also support stacking. Here we create a new data frame with data for the three North American countries, then plot them using an `area` mark and a `color` encoding channel to stack by country. + """) + return + + +@app.cell +def _(alt, data): + dataNA = data.loc[ + (data['country'] == 'United States') | + (data['country'] == 'Canada') | + (data['country'] == 'Mexico') + ] + + alt.Chart(dataNA).mark_area().encode( + alt.X('year:O'), + alt.Y('pop:Q'), + alt.Color('country:N') + ) + return (dataNA,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + By default, stacking is performed relative to a zero baseline. However, other `stack` options are available: + + * `center` - to stack relative to a baseline in the center of the chart, creating a *streamgraph* visualization, and + * `normalize` - to normalize the summed data at each stacking point to 100%, enabling percentage comparisons. + + Below we adapt the chart by setting the `y` encoding `stack` attribute to `center`. What happens if you instead set it `normalize`? + """) + return + + +@app.cell +def _(alt, dataNA): + alt.Chart(dataNA).mark_area().encode( + alt.X('year:O'), + alt.Y('pop:Q', stack='center'), + alt.Color('country:N') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + To disable stacking altogether, set the `stack` attribute to `None`. We can also add `opacity` as a default mark parameter to ensure we see the overlapping areas! + """) + return + + +@app.cell +def _(alt, dataNA): + alt.Chart(dataNA).mark_area(opacity=0.5).encode( + alt.X('year:O'), + alt.Y('pop:Q', stack=None), + alt.Color('country:N') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + The `area` mark type also supports data-driven baselines, with both the upper and lower series determined by data fields. As with `bar` marks, we can use the `x` and `x2` (or `y` and `y2`) channels to provide end points for the area mark. + + The chart below visualizes the range of minimum and maximum fertility, per year, for North American countries: + """) + return + + +@app.cell +def _(alt, dataNA): + alt.Chart(dataNA).mark_area().encode( + alt.X('year:O'), + alt.Y('min(fertility):Q'), + alt.Y2('max(fertility):Q') + ).properties( + width={"step": 40} + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + We can see a larger range of values in 1995, from just under 4 to just under 7. By 2005, both the overall fertility values and the variability have declined, centered around 2 children per familty. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + All the `area` mark examples above use a vertically oriented area. However, Altair and Vega-Lite support horizontal areas as well. Let's transpose the chart above, simply by swapping the `x` and `y` channels. + """) + return + + +@app.cell +def _(alt, dataNA): + alt.Chart(dataNA).mark_area().encode( + alt.Y('year:O'), + alt.X('min(fertility):Q'), + alt.X2('max(fertility):Q') + ).properties( + width={"step": 40} + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Summary + + We've completed our tour of data types, encoding channels, and graphical marks! You should now be well-equipped to further explore the space of encodings, mark types, and mark parameters. For a comprehensive reference – including features we've skipped over here! – see the Altair [marks](https://altair-viz.github.io/user_guide/marks/index.html) and [encoding](https://altair-viz.github.io/user_guide/encodings/index.html) documentation. + + In the next module, we will look at the use of data transformations to create charts that summarize data or visualize new derived fields. In a later module, we'll examine how to further customize your charts by modifying scales, axes, and legends. + + Interested in learning more about visual encoding? + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" +
+ + Bertin's taxonomy of visual encodings from SΓ©miologie Graphique, as adapted by Mike Bostock. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + - The systematic study of marks, visual encodings, and backing data types was initiated by [Jacques Bertin](https://en.wikipedia.org/wiki/Jacques_Bertin) in his pioneering 1967 work [_SΓ©miologie Graphique (The Semiology of Graphics)_](https://books.google.com/books/about/Semiology_of_Graphics.html?id=X5caQwAACAAJ). The image above illustrates position, size, value (brightness), texture, color (hue), orientation, and shape channels, alongside Bertin's recommendations for the data types they support. + - The framework of data types, marks, and channels also guides _automated_ visualization design tools, starting with [Mackinlay's APT (A Presentation Tool)](https://scholar.google.com/scholar?cluster=10191273548472217907) in 1986 and continuing in more recent systems such as [Voyager](http://idl.cs.washington.edu/papers/voyager/) and [Draco](http://idl.cs.washington.edu/papers/draco/). + - The identification of nominal, ordinal, interval, and ratio types dates at least as far back as S. S. Steven's 1947 article [_On the theory of scales of measurement_](https://scholar.google.com/scholar?cluster=14356809180080326415). + """) + return + + +if __name__ == "__main__": + app.run() diff --git a/altair/03_data_transformation.py b/altair/03_data_transformation.py new file mode 100644 index 0000000000000000000000000000000000000000..55925aa0d2a1c618c4a2369fc12f55611be4fcfd --- /dev/null +++ b/altair/03_data_transformation.py @@ -0,0 +1,641 @@ +# /// script +# requires-python = ">=3.11" +# dependencies = [ +# "altair==6.0.0", +# "marimo", +# "pandas==3.0.1", +# ] +# /// + +import marimo + +__generated_with = "0.20.4" +app = marimo.App() + + +@app.cell +def _(): + import marimo as mo + + return (mo,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + # Data Transformation + + In previous notebooks we learned how to use marks and visual encodings to represent individual data records. Here we will explore methods for *transforming* data, including the use of aggregates to summarize multiple records. Data transformation is an integral part of visualization: choosing the variables to show and their level of detail is just as important as choosing appropriate visual encodings. After all, it doesn't matter how well chosen your visual encodings are if you are showing the wrong information! + + As you work through this module, we recommend that you open the [Altair Data Transformations documentation](https://altair-viz.github.io/user_guide/transform/) in another tab. It will be a useful resource if at any point you'd like more details or want to see what other transformations are available. + + _This notebook is part of the [data visualization curriculum](https://github.com/uwdata/visualization-curriculum)._ + """) + return + + +@app.cell +def _(): + import pandas as pd + import altair as alt + + return alt, pd + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## The Movies Dataset + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + We will be working with a table of data about motion pictures, taken from the [vega-datasets](https://vega.github.io/vega-datasets/) collection. The data includes variables such as the film name, director, genre, release date, ratings, and gross revenues. However, _be careful when working with this data_: the films are from unevenly sampled years, using data combined from multiple sources. If you dig in you will find issues with missing values and even some subtle errors! Nevertheless, the data should prove interesting to explore... + + Let's retrieve the URL for the JSON data file from the vega_datasets package, and then read the data into a Pandas data frame so that we can inspect its contents. + """) + return + + +@app.cell +def _(pd): + movies_url = 'https://cdn.jsdelivr.net/npm/vega-datasets@1/data/movies.json' + movies = pd.read_json(movies_url) + return movies, movies_url + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + How many rows (records) and columns (fields) are in the movies dataset? + """) + return + + +@app.cell +def _(movies): + movies.shape + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Now let's peek at the first 5 rows of the table to get a sense of the fields and data types... + """) + return + + +@app.cell +def _(movies): + movies.head(5) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Histograms + + We'll start our transformation tour by _binning_ data into discrete groups and _counting_ records to summarize those groups. The resulting plots are known as [_histograms_](https://en.wikipedia.org/wiki/Histogram). + + Let's first look at unaggregated data: a scatter plot showing movie ratings from Rotten Tomatoes versus ratings from IMDB users. We'll provide data to Altair by passing the movies data URL to the `Chart` method. (We could also pass the Pandas data frame directly to get the same result.) We can then encode the Rotten Tomatoes and IMDB ratings fields using the `x` and `y` channels: + """) + return + + +@app.cell +def _(alt, movies_url): + alt.Chart(movies_url).mark_circle().encode( + alt.X('Rotten_Tomatoes_Rating:Q'), + alt.Y('IMDB_Rating:Q') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + To summarize this data, we can *bin* a data field to group numeric values into discrete groups. Here we bin along the x-axis by adding `bin=True` to the `x` encoding channel. The result is a set of ten bins of equal step size, each corresponding to a span of ten ratings points. + """) + return + + +@app.cell +def _(alt, movies_url): + alt.Chart(movies_url).mark_circle().encode( + alt.X('Rotten_Tomatoes_Rating:Q', bin=True), + alt.Y('IMDB_Rating:Q') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Setting `bin=True` uses default binning settings, but we can exercise more control if desired. Let's instead set the maximum bin count (`maxbins`) to 20, which has the effect of doubling the number of bins. Now each bin corresponds to a span of five ratings points. + """) + return + + +@app.cell +def _(alt, movies_url): + alt.Chart(movies_url).mark_circle().encode( + alt.X('Rotten_Tomatoes_Rating:Q', bin=alt.BinParams(maxbins=20)), + alt.Y('IMDB_Rating:Q') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + With the data binned, let's now summarize the distribution of Rotten Tomatoes ratings. We will drop the IMDB ratings for now and instead use the `y` encoding channel to show an aggregate `count` of records, so that the vertical position of each point indicates the number of movies per Rotten Tomatoes rating bin. + + As the `count` aggregate counts the number of total records in each bin regardless of the field values, we do not need to include a field name in the `y` encoding. + """) + return + + +@app.cell +def _(alt, movies_url): + alt.Chart(movies_url).mark_circle().encode( + alt.X('Rotten_Tomatoes_Rating:Q', bin=alt.BinParams(maxbins=20)), + alt.Y('count()') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + To arrive at a standard histogram, let's change the mark type from `circle` to `bar`: + """) + return + + +@app.cell +def _(alt, movies_url): + alt.Chart(movies_url).mark_bar().encode( + alt.X('Rotten_Tomatoes_Rating:Q', bin=alt.BinParams(maxbins=20)), + alt.Y('count()') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _We can now examine the distribution of ratings more clearly: we can see fewer movies on the negative end, and a bit more movies on the high end, but a generally uniform distribution overall. Rotten Tomatoes ratings are determined by taking "thumbs up" and "thumbs down" judgments from film critics and calculating the percentage of positive reviews. It appears this approach does a good job of utilizing the full range of rating values._ + + Similarly, we can create a histogram for IMDB ratings by changing the field in the `x` encoding channel: + """) + return + + +@app.cell +def _(alt, movies_url): + alt.Chart(movies_url).mark_bar().encode( + alt.X('IMDB_Rating:Q', bin=alt.BinParams(maxbins=20)), + alt.Y('count()') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _In contrast to the more uniform distribution we saw before, IMDB ratings exhibit a bell-shaped (though [negatively skewed](https://en.wikipedia.org/wiki/Skewness)) distribution. IMDB ratings are formed by averaging scores (ranging from 1 to 10) provided by the site's users. We can see that this form of measurement leads to a different shape than the Rotten Tomatoes ratings. We can also see that the mode of the distribution is between 6.5 and 7: people generally enjoy watching movies, potentially explaining the positive bias!_ + + Now let's turn back to our scatter plot of Rotten Tomatoes and IMDB ratings. Here's what happens if we bin *both* axes of our original plot. + """) + return + + +@app.cell +def _(alt, movies_url): + alt.Chart(movies_url).mark_circle().encode( + alt.X('Rotten_Tomatoes_Rating:Q', bin=alt.BinParams(maxbins=20)), + alt.Y('IMDB_Rating:Q', bin=alt.BinParams(maxbins=20)), + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Detail is lost due to *overplotting*, with many points drawn directly on top of each other. + + To form a two-dimensional histogram we can add a `count` aggregate as before. As both the `x` and `y` encoding channels are already taken, we must use a different encoding channel to convey the counts. Here is the result of using circular area by adding a *size* encoding channel. + """) + return + + +@app.cell +def _(alt, movies_url): + alt.Chart(movies_url).mark_circle().encode( + alt.X('Rotten_Tomatoes_Rating:Q', bin=alt.BinParams(maxbins=20)), + alt.Y('IMDB_Rating:Q', bin=alt.BinParams(maxbins=20)), + alt.Size('count()') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Alternatively, we can encode counts using the `color` channel and change the mark type to `bar`. The result is a two-dimensional histogram in the form of a [*heatmap*](https://en.wikipedia.org/wiki/Heat_map). + """) + return + + +@app.cell +def _(alt, movies_url): + alt.Chart(movies_url).mark_bar().encode( + alt.X('Rotten_Tomatoes_Rating:Q', bin=alt.BinParams(maxbins=20)), + alt.Y('IMDB_Rating:Q', bin=alt.BinParams(maxbins=20)), + alt.Color('count()') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Compare the *size* and *color*-based 2D histograms above. Which encoding do you think should be preferred? Why? In which plot can you more precisely compare the magnitude of individual values? In which plot can you more accurately see the overall density of ratings? + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Aggregation + + Counts are just one type of aggregate. We might also calculate summaries using measures such as the `average`, `median`, `min`, or `max`. The Altair documentation includes the [full set of available aggregation functions](https://altair-viz.github.io/user_guide/transform/aggregate.html#user-guide-aggregate-transform). + + Let's look at some examples! + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Averages and Sorting + + _Do different genres of films receive consistently different ratings from critics?_ As a first step towards answering this question, we might examine the [*average* (a.k.a. the *arithmetic mean*)](https://en.wikipedia.org/wiki/Arithmetic_mean) rating for each genre of movie. + + Let's visualize genre along the `y` axis and plot `average` Rotten Tomatoes ratings along the `x` axis. + """) + return + + +@app.cell +def _(alt, movies_url): + alt.Chart(movies_url).mark_bar().encode( + alt.X('average(Rotten_Tomatoes_Rating):Q'), + alt.Y('Major_Genre:N') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _There does appear to be some interesting variation, but looking at the data as an alphabetical list is not very helpful for ranking critical reactions to the genres._ + + For a tidier picture, let's sort the genres in descending order of average rating. To do so, we will add a `sort` parameter to the `y` encoding channel, stating that we wish to sort by the *average* (`op`, the aggregate operation) Rotten Tomatoes rating (the `field`) in descending `order`. + """) + return + + +@app.cell +def _(alt, movies_url): + alt.Chart(movies_url).mark_bar().encode( + alt.X('average(Rotten_Tomatoes_Rating):Q'), + alt.Y('Major_Genre:N', sort=alt.EncodingSortField( + op='average', field='Rotten_Tomatoes_Rating', order='descending') + ) + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _The sorted plot suggests that critics think highly of documentaries, musicals, westerns, and dramas, but look down upon romantic comedies and horror films... and who doesn't love `null` movies!?_ + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Medians and the Inter-Quartile Range + + While averages are a common way to summarize data, they can sometimes mislead. For example, very large or very small values ([*outliers*](https://en.wikipedia.org/wiki/Outlier)) might skew the average. To be safe, we can compare the genres according to the [*median*](https://en.wikipedia.org/wiki/Median) ratings as well. + + The median is a point that splits the data evenly, such that half of the values are less than the median and the other half are greater. The median is less sensitive to outliers and so is referred to as a [*robust* statistic](https://en.wikipedia.org/wiki/Robust_statistics). For example, arbitrarily increasing the largest rating value will not cause the median to change. + + Let's update our plot to use a `median` aggregate and sort by those values: + """) + return + + +@app.cell +def _(alt, movies_url): + alt.Chart(movies_url).mark_bar().encode( + alt.X('median(Rotten_Tomatoes_Rating):Q'), + alt.Y('Major_Genre:N', sort=alt.EncodingSortField( + op='median', field='Rotten_Tomatoes_Rating', order='descending') + ) + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _We can see that some of the genres with similar averages have swapped places (films of unknown genre, or `null`, are now rated highest!), but the overall groups have stayed stable. Horror films continue to get little love from professional film critics._ + + It's a good idea to stay skeptical when viewing aggregate statistics. So far we've only looked at *point estimates*. We have not examined how ratings vary within a genre. + + Let's visualize the variation among the ratings to add some nuance to our rankings. Here we will encode the [*inter-quartile range* (IQR)](https://en.wikipedia.org/wiki/Interquartile_range) for each genre. The IQR is the range in which the middle half of data values reside. A [*quartile*](https://en.wikipedia.org/wiki/Quartile) contains 25% of the data values. The inter-quartile range consists of the two middle quartiles, and so contains the middle 50%. + + To visualize ranges, we can use the `x` and `x2` encoding channels to indicate the starting and ending points. We use the aggregate functions `q1` (the lower quartile boundary) and `q3` (the upper quartile boundary) to provide the inter-quartile range. (In case you are wondering, *q2* would be the median.) + """) + return + + +@app.cell +def _(alt, movies_url): + alt.Chart(movies_url).mark_bar().encode( + alt.X('q1(Rotten_Tomatoes_Rating):Q'), + alt.X2('q3(Rotten_Tomatoes_Rating):Q'), + alt.Y('Major_Genre:N', sort=alt.EncodingSortField( + op='median', field='Rotten_Tomatoes_Rating', order='descending') + ) + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Time Units + + _Now let's ask a completely different question: do box office returns vary by season?_ + + To get an initial answer, let's plot the median U.S. gross revenue by month. + + To make this chart, use the `timeUnit` transform to map release dates to the `month` of the year. The result is similar to binning, but using meaningful time intervals. Other valid time units include `year`, `quarter`, `date` (numeric day in month), `day` (day of the week), and `hours`, as well as compound units such as `yearmonth` or `hoursminutes`. See the Altair documentation for a [complete list of time units](https://altair-viz.github.io/user_guide/transform/timeunit.html#user-guide-timeunit-transform). + """) + return + + +@app.cell +def _(alt, movies_url): + alt.Chart(movies_url).mark_area().encode( + alt.X('month(Release_Date):T'), + alt.Y('median(US_Gross):Q') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _Looking at the resulting plot, median movie sales in the U.S. appear to spike around the summer blockbuster season and the end of year holiday period. Of course, people around the world (not just the U.S.) go out to the movies. Does a similar pattern arise for worldwide gross revenue?_ + """) + return + + +@app.cell +def _(alt, movies_url): + alt.Chart(movies_url).mark_area().encode( + alt.X('month(Release_Date):T'), + alt.Y('median(Worldwide_Gross):Q') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _Yes!_ + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Advanced Data Transformation + + The examples above all use transformations (*bin*, *timeUnit*, *aggregate*, *sort*) that are defined relative to an encoding channel. However, at times you may want to apply a chain of multiple transformations prior to visualization, or use transformations that don't integrate into encoding definitions. For such cases, Altair and Vega-Lite support data transformations defined separately from encodings. These transformations are applied to the data *before* any encodings are considered. + + We *could* also perform transformations using Pandas directly, and then visualize the result. However, using the built-in transforms allows our visualizations to be published more easily in other contexts; for example, exporting the Vega-Lite JSON to use in a stand-alone web interface. Let's look at the built-in transforms supported by Altair, such as `calculate`, `filter`, `aggregate`, and `window`. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Calculate + + _Think back to our comparison of U.S. gross and worldwide gross. Doesn't worldwide revenue include the U.S.? (Indeed it does.) How might we get a better sense of trends outside the U.S.?_ + + With the `calculate` transform we can derive new fields. Here we want to subtract U.S. gross from worldwide gross. The `calculate` transform takes a [Vega expression string](https://vega.github.io/vega/docs/expressions/) to define a formula over a single record. Vega expressions use JavaScript syntax. The `datum.` prefix accesses a field value on the input record. + """) + return + + +@app.cell +def _(alt, movies): + alt.Chart(movies).mark_area().transform_calculate( + NonUS_Gross='datum.Worldwide_Gross - datum.US_Gross' + ).encode( + alt.X('month(Release_Date):T'), + alt.Y('median(NonUS_Gross):Q') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _We can see that seasonal trends hold outside the U.S., but with a more pronounced decline in the non-peak months._ + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Filter + + The *filter* transform creates a new table with a subset of the original data, removing rows that fail to meet a provided [*predicate*](https://en.wikipedia.org/wiki/Predicate_%28mathematical_logic%29) test. Similar to the *calculate* transform, filter predicates are expressed using the [Vega expression language](https://vega.github.io/vega/docs/expressions/). + + Below we add a filter to limit our initial scatter plot of IMDB vs. Rotten Tomatoes ratings to only films in the major genre of "Romantic Comedy". + """) + return + + +@app.cell +def _(alt, movies_url): + alt.Chart(movies_url).mark_circle().encode( + alt.X('Rotten_Tomatoes_Rating:Q'), + alt.Y('IMDB_Rating:Q') + ).transform_filter('datum.Major_Genre == "Romantic Comedy"') + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _How does the plot change if we filter to view other genres? Edit the filter expression to find out._ + + Now let's filter to look at films released before 1970. + """) + return + + +@app.cell +def _(alt, movies_url): + alt.Chart(movies_url).mark_circle().encode( + alt.X('Rotten_Tomatoes_Rating:Q'), + alt.Y('IMDB_Rating:Q') + ).transform_filter('year(datum.Release_Date) < 1970') + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _They seem to score unusually high! Are older films simply better, or is there a [selection bias](https://en.wikipedia.org/wiki/Selection%5Fbias) towards more highly-rated older films in this dataset?_ + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Aggregate + + We have already seen `aggregate` transforms such as `count` and `average` in the context of encoding channels. We can also specify aggregates separately, as a pre-processing step for other transforms (as in the `window` transform examples below). The output of an `aggregate` transform is a new data table with records that contain both the `groupby` fields and the computed `aggregate` measures. + + Let's recreate our plot of average ratings by genre, but this time using a separate `aggregate` transform. The output table from the aggregate transform contains 13 rows, one for each genre. + + To order the `y` axis we must include a required aggregate operation in our sorting instructions. Here we use the `max` operator, which works fine because there is only one output record per genre. We could similarly use the `min` operator and end up with the same plot. + """) + return + + +@app.cell +def _(alt, movies_url): + alt.Chart(movies_url).mark_bar().transform_aggregate( + groupby=['Major_Genre'], + Average_Rating='average(Rotten_Tomatoes_Rating)' + ).encode( + alt.X('Average_Rating:Q'), + alt.Y('Major_Genre:N', sort=alt.EncodingSortField( + op='max', field='Average_Rating', order='descending' + ) + ) + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Window + + The `window` transform performs calculations over sorted groups of data records. Window transforms are quite powerful, supporting tasks such as ranking, lead/lag analysis, cumulative totals, and running sums or averages. Values calculated by a `window` transform are written back to the input data table as new fields. Window operations include the aggregate operations we've seen earlier, as well as specialized operations such as `rank`, `row_number`, `lead`, and `lag`. The Vega-Lite documentation lists [all valid window operations](https://vega.github.io/vega-lite/docs/window.html#ops). + + One use case for a `window` transform is to calculate top-k lists. Let's plot the top 20 directors in terms of total worldwide gross. + + We first use a `filter` transform to remove records for which we don't know the director. Otherwise, the director `null` would dominate the list! We then apply an `aggregate` to sum up the worldwide gross for all films, grouped by director. At this point we could plot a sorted bar chart, but we'd end up with hundreds and hundreds of directors. How can we limit the display to the top 20? + + The `window` transform allows us to determine the top directors by calculating their rank order. Within our `window` transform definition we can `sort` by gross and use the `rank` operation to calculate rank scores according to that sort order. We can then add a subsequent `filter` transform to limit the data to only records with a rank value less than or equal to 20. + """) + return + + +@app.cell +def _(alt, movies_url): + alt.Chart(movies_url).mark_bar().transform_filter( + 'datum.Director != null' + ).transform_aggregate( + Gross='sum(Worldwide_Gross)', + groupby=['Director'] + ).transform_window( + Rank='rank()', + sort=[alt.SortField('Gross', order='descending')] + ).transform_filter( + 'datum.Rank < 20' + ).encode( + alt.X('Gross:Q'), + alt.Y('Director:N', sort=alt.EncodingSortField( + op='max', field='Gross', order='descending' + )) + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _We can see that Steven Spielberg has been quite successful in his career! However, showing sums might favor directors who have had longer careers, and so have made more movies and thus more money. What happens if we change the choice of aggregate operation? Who is the most successful director in terms of `average` or `median` gross per film? Modify the aggregate transform above!_ + + Earlier in this notebook we looked at histograms, which approximate the [*probability density function*](https://en.wikipedia.org/wiki/Probability_density_function) of a set of values. A complementary approach is to look at the [*cumulative distribution*](https://en.wikipedia.org/wiki/Cumulative_distribution_function). For example, think of a histogram in which each bin includes not only its own count but also the counts from all previous bins — the result is a _running total_, with the last bin containing the total number of records. A cumulative chart directly shows us, for a given reference value, how many data values are less than or equal to that reference. + + As a concrete example, let's look at the cumulative distribution of films by running time (in minutes). Only a subset of records actually include running time information, so we first `filter` down to the subset of films for which we have running times. Next, we apply an `aggregate` to count the number of films per duration (implicitly using "bins" of 1 minute each). We then use a `window` transform to compute a running total of counts across bins, sorted by increasing running time. + """) + return + + +@app.cell +def _(alt, movies_url): + alt.Chart(movies_url).mark_line(interpolate='step-before').transform_filter( + 'datum.Running_Time_min != null' + ).transform_aggregate( + groupby=['Running_Time_min'], + Count='count()', + ).transform_window( + Cumulative_Sum='sum(Count)', + sort=[alt.SortField('Running_Time_min', order='ascending')] + ).encode( + alt.X('Running_Time_min:Q', axis=alt.Axis(title='Duration (min)')), + alt.Y('Cumulative_Sum:Q', axis=alt.Axis(title='Cumulative Count of Films')) + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _Let's examine the cumulative distribution of film lengths. We can see that films under 110 minutes make up about half of all the films for which we have running times. We see a steady accumulation of films between 90 minutes and 2 hours, after which the distribution begins to taper off. Though rare, the dataset does contain multiple films more than 3 hours long!_ + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Summary + + We've only scratched the surface of what data transformations can do! For more details, including all the available transformations and their parameters, see the [Altair data transformation documentation](https://altair-viz.github.io/user_guide/transform/index.html). + + Sometimes you will need to perform significant data transformation to prepare your data _prior_ to using visualization tools. To engage in [_data wrangling_](https://en.wikipedia.org/wiki/Data_wrangling) right here in Python, you can use the [Pandas library](https://pandas.pydata.org/). + """) + return + + +if __name__ == "__main__": + app.run() diff --git a/altair/04_scales_axes_legends.py b/altair/04_scales_axes_legends.py new file mode 100644 index 0000000000000000000000000000000000000000..a5910c73a73a008cd312c7e67bacea0acc5bde20 --- /dev/null +++ b/altair/04_scales_axes_legends.py @@ -0,0 +1,840 @@ +# /// script +# requires-python = ">=3.11" +# dependencies = [ +# "altair==6.0.0", +# "marimo", +# "pandas==3.0.1", +# ] +# /// + +import marimo + +__generated_with = "0.20.4" +app = marimo.App() + + +@app.cell +def _(): + import marimo as mo + + return (mo,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + # Scales, Axes, and Legends + + Visual encoding – mapping data to visual variables such as position, size, shape, or color – is the beating heart of data visualization. The workhorse that actually performs this mapping is the *scale*: a function that takes a data value as input (the scale *domain*) and returns a visual value, such as a pixel position or RGB color, as output (the scale *range*). Of course, a visualization is useless if no one can figure out what it conveys! In addition to graphical marks, a chart needs reference elements, or *guides*, that allow readers to decode the graphic. Guides such as *axes* (which visualize scales with spatial ranges) and *legends* (which visualize scales with color, size, or shape ranges), are the unsung heroes of effective data visualization! + + In this notebook, we will explore the options Altair provides to support customized designs of scale mappings, axes, and legends, using a running example about the effectiveness of antibiotic drugs. + + _This notebook is part of the [data visualization curriculum](https://github.com/uwdata/visualization-curriculum)._ + """) + return + + +@app.cell +def _(): + import pandas as pd + import altair as alt + + return alt, pd + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Antibiotics Data + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + After World War II, antibiotics were considered "wonder drugs", as they were an easy remedy for what had been intractable ailments. To learn which drug worked most effectively for which bacterial infection, performance of the three most popular antibiotics on 16 bacteria were gathered. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + We will be using an antibiotics dataset from the [vega-datasets collection](https://github.com/vega/vega-datasets). In the examples below, we will pass the URL directly to Altair: + """) + return + + +@app.cell +def _(): + antibiotics = 'https://cdn.jsdelivr.net/npm/vega-datasets@1/data/burtin.json' + return (antibiotics,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + We can first load the data with Pandas to view the dataset in its entirety and get acquainted with the available fields: + """) + return + + +@app.cell +def _(antibiotics, pd): + pd.read_json(antibiotics) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + The numeric values in the table indicate the [minimum inhibitory concentration (MIC)](https://en.wikipedia.org/wiki/Minimum_inhibitory_concentration), a measure of the effectiveness of the antibiotic, which represents the concentration of antibiotic (in micrograms per milliliter) required to prevent growth in vitro. The reaction of the bacteria to a procedure called [Gram staining](https://en.wikipedia.org/wiki/Gram_stain) is described by the nominal field `Gram_Staining`. Bacteria that turn dark blue or violet are Gram-positive. Otherwise, they are Gram-negative. + + As we examine different visualizations of this dataset, ask yourself: What might we learn about the relative effectiveness of the antibiotics? What might we learn about the bacterial species based on their antibiotic response? + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Configuring Scales and Axes + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Plotting Antibiotic Resistance: Adjusting the Scale Type + + Let's start by looking at a simple dot plot of the MIC for Neomycin. + """) + return + + +@app.cell +def _(alt, antibiotics): + alt.Chart(antibiotics).mark_circle().encode( + alt.X('Neomycin:Q') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _We can see that the MIC values span orders of magnitude: most points to cluster on the left, with a few large outliers to the right._ + + By default Altair uses a `linear` mapping between the domain values (MIC) and the range values (pixels). To get a better overview of the data, we can apply a different scale transformation. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + To change the scale type, we'll set the `scale` attribute, using the `alt.Scale` method and `type` parameter. + + Here's the result of using a square root (`sqrt`) scale type. Distances in the pixel range now correspond to the square root of distances in the data domain. + """) + return + + +@app.cell +def _(alt, antibiotics): + alt.Chart(antibiotics).mark_circle().encode( + alt.X('Neomycin:Q', + scale=alt.Scale(type='sqrt')) + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _The points on the left are now better differentiated, but we still see some heavy skew._ + + Let's try using a [logarithmic scale](https://en.wikipedia.org/wiki/Logarithmic_scale) (`log`) instead: + """) + return + + +@app.cell +def _(alt, antibiotics): + alt.Chart(antibiotics).mark_circle().encode( + alt.X('Neomycin:Q', + scale=alt.Scale(type='log')) + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _Now the data is much more evenly distributed and we can see the very large differences in concentrations required for different bacteria._ + + In a standard linear scale, a visual (pixel) distance of 10 units might correspond to an *addition* of 10 units in the data domain. A logarithmic transform maps between multiplication and addition, such that `log(u) + log(v) = log(u*v)`. As a result, in a logarithmic scale, a visual distance of 10 units instead corresponds to *multiplication* by 10 units in the data domain, assuming a base 10 logarithm. The `log` scale above defaults to using the logarithm base 10, but we can adjust this by providing a `base` parameter to the scale. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Styling an Axis + + Lower dosages indicate higher effectiveness. However, some people may expect values that are "better" to be "up and to the right" within a chart. If we want to cater to this convention, we can reverse the axis to encode "effectiveness" as a reversed MIC scale. + + To do this, we can set the encoding `sort` property to `'descending'`: + """) + return + + +@app.cell +def _(alt, antibiotics): + alt.Chart(antibiotics).mark_circle().encode( + alt.X('Neomycin:Q', + sort='descending', + scale=alt.Scale(type='log')) + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _Unfortunately the axis is starting to get a bit confusing: we're plotting data on a logarithmic scale, in the reverse direction, and without a clear indication of what our units are!_ + + Let's add a more informative axis title: we'll use the `title` property of the encoding to provide the desired title text: + """) + return + + +@app.cell +def _(alt, antibiotics): + alt.Chart(antibiotics).mark_circle().encode( + alt.X('Neomycin:Q', + sort='descending', + scale=alt.Scale(type='log'), + title='Neomycin MIC (ΞΌg/ml, reverse log scale)') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Much better! + + By default, Altair places the x-axis along the bottom of the chart. To change these defaults, we can add an `axis` attribute with `orient='top'`: + """) + return + + +@app.cell +def _(alt, antibiotics): + alt.Chart(antibiotics).mark_circle().encode( + alt.X('Neomycin:Q', + sort='descending', + scale=alt.Scale(type='log'), + axis=alt.Axis(orient='top'), + title='Neomycin MIC (ΞΌg/ml, reverse log scale)') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Similarly, the y-axis defaults to a `'left'` orientation, but can be set to `'right'`. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Comparing Antibiotics: Adjusting Grid Lines, Tick Counts, and Sizing + + _How does neomycin compare to other antibiotics, such as streptomycin and penicillin?_ + + To start answering this question, we can create scatter plots, adding a y-axis encoding for another antibiotic that mirrors the design of our x-axis for neomycin. + """) + return + + +@app.cell +def _(alt, antibiotics): + alt.Chart(antibiotics).mark_circle().encode( + alt.X('Neomycin:Q', + sort='descending', + scale=alt.Scale(type='log'), + title='Neomycin MIC (ΞΌg/ml, reverse log scale)'), + alt.Y('Streptomycin:Q', + sort='descending', + scale=alt.Scale(type='log'), + title='Streptomycin MIC (ΞΌg/ml, reverse log scale)') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _We can see that neomycin and streptomycin appear highly correlated, as the bacterial strains respond similarly to both antibiotics._ + + Let's move on and compare neomycin with penicillin: + """) + return + + +@app.cell +def _(alt, antibiotics): + alt.Chart(antibiotics).mark_circle().encode( + alt.X('Neomycin:Q', + sort='descending', + scale=alt.Scale(type='log'), + title='Neomycin MIC (ΞΌg/ml, reverse log scale)'), + alt.Y('Penicillin:Q', + sort='descending', + scale=alt.Scale(type='log'), + title='Penicillin MIC (ΞΌg/ml, reverse log scale)') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _Now we see a more differentiated response: some bacteria respond well to neomycin but not penicillin, and vice versa!_ + + While this plot is useful, we can make it better. The x and y axes use the same units, but have different extents (the chart width is larger than the height) and different domains (0.001 to 100 for the x-axis, and 0.001 to 1,000 for the y-axis). + + Let's equalize the axes: we can add explicit `width` and `height` settings for the chart, and specify matching domains using the scale `domain` property. + """) + return + + +@app.cell +def _(alt, antibiotics): + alt.Chart(antibiotics).mark_circle().encode( + alt.X('Neomycin:Q', + sort='descending', + scale=alt.Scale(type='log', domain=[0.001, 1000]), + title='Neomycin MIC (ΞΌg/ml, reverse log scale)'), + alt.Y('Penicillin:Q', + sort='descending', + scale=alt.Scale(type='log', domain=[0.001, 1000]), + title='Penicillin MIC (ΞΌg/ml, reverse log scale)') + ).properties(width=250, height=250) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _The resulting plot is more balanced, and less prone to subtle misinterpretations!_ + + However, the grid lines are now rather dense. If we want to remove grid lines altogether, we can add `grid=False` to the `axis` attribute. But what if we instead want to reduce the number of tick marks, for example only including grid lines for each order of magnitude? + + To change the number of ticks, we can specify a target `tickCount` property for an `Axis` object. The `tickCount` is treated as a *suggestion* to Altair, to be considered alongside other aspects such as using nice, human-friendly intervals. We may not get *exactly* the number of tick marks we request, but we should get something close. + """) + return + + +@app.cell +def _(alt, antibiotics): + alt.Chart(antibiotics).mark_circle().encode( + alt.X('Neomycin:Q', + sort='descending', + scale=alt.Scale(type='log', domain=[0.001, 1000]), + axis=alt.Axis(tickCount=5), + title='Neomycin MIC (ΞΌg/ml, reverse log scale)'), + alt.Y('Penicillin:Q', + sort='descending', + scale=alt.Scale(type='log', domain=[0.001, 1000]), + axis=alt.Axis(tickCount=5), + title='Penicillin MIC (ΞΌg/ml, reverse log scale)') + ).properties(width=250, height=250) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + By setting the `tickCount` to 5, we have the desired effect. + + Our scatter plot points feel a bit small. Let's change the default size by setting the `size` property of the circle mark. This size value is the *area* of the mark in pixels. + """) + return + + +@app.cell +def _(alt, antibiotics): + alt.Chart(antibiotics).mark_circle(size=80).encode( + alt.X('Neomycin:Q', + sort='descending', + scale=alt.Scale(type='log', domain=[0.001, 1000]), + axis=alt.Axis(tickCount=5), + title='Neomycin MIC (ΞΌg/ml, reverse log scale)'), + alt.Y('Penicillin:Q', + sort='descending', + scale=alt.Scale(type='log', domain=[0.001, 1000]), + axis=alt.Axis(tickCount=5), + title='Penicillin MIC (ΞΌg/ml, reverse log scale)'), + ).properties(width=250, height=250) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Here we've set the circle mark area to 80 pixels. _Further adjust the value as you see fit!_ + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Configuring Color Legends + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Color by Gram Staining + + _Above we saw that neomycin is more effective for some bacteria, while penicillin is more effective for others. But how can we tell which antibiotic to use if we don't know the specific species of bacteria? Gram staining serves as a diagnostic for discriminating classes of bacteria!_ + + Let's encode `Gram_Staining` on the `color` channel as a nominal data type: + """) + return + + +@app.cell +def _(alt, antibiotics): + alt.Chart(antibiotics).mark_circle(size=80).encode( + alt.X('Neomycin:Q', + sort='descending', + scale=alt.Scale(type='log', domain=[0.001, 1000]), + axis=alt.Axis(tickCount=5), + title='Neomycin MIC (ΞΌg/ml, reverse log scale)'), + alt.Y('Penicillin:Q', + sort='descending', + scale=alt.Scale(type='log', domain=[0.001, 1000]), + axis=alt.Axis(tickCount=5), + title='Penicillin MIC (ΞΌg/ml, reverse log scale)'), + alt.Color('Gram_Staining:N') + ).properties(width=250, height=250) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _We can see that Gram-positive bacteria seem most susceptible to penicillin, whereas neomycin is more effective for Gram-negative bacteria!_ + + The color scheme above was automatically chosen to provide perceptually-distinguishable colors for nominal (equal or not equal) comparisons. However, we might wish to customize the colors used. In this case, Gram staining results in [distinctive physical colorings: pink for Gram-negative, purple for Gram-positive](https://en.wikipedia.org/wiki/Gram_stain#/media/File:Gram_stain_01.jpg). + + Let's use those colors by specifying an explicit scale mapping from the data `domain` to the color `range`: + """) + return + + +@app.cell +def _(alt, antibiotics): + alt.Chart(antibiotics).mark_circle(size=80).encode( + alt.X('Neomycin:Q', + sort='descending', + scale=alt.Scale(type='log', domain=[0.001, 1000]), + axis=alt.Axis(tickCount=5), + title='Neomycin MIC (ΞΌg/ml, reverse log scale)'), + alt.Y('Penicillin:Q', + sort='descending', + scale=alt.Scale(type='log', domain=[0.001, 1000]), + axis=alt.Axis(tickCount=5), + title='Penicillin MIC (ΞΌg/ml, reverse log scale)'), + alt.Color('Gram_Staining:N', + scale=alt.Scale(domain=['negative', 'positive'], range=['hotpink', 'purple']) + ) + ).properties(width=250, height=250) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + By default legends are placed on the right side of the chart. Similar to axes, we can change the legend orientation using the `orient` parameter: + """) + return + + +@app.cell +def _(alt, antibiotics): + alt.Chart(antibiotics).mark_circle(size=80).encode( + alt.X('Neomycin:Q', + sort='descending', + scale=alt.Scale(type='log', domain=[0.001, 1000]), + axis=alt.Axis(tickCount=5), + title='Neomycin MIC (ΞΌg/ml, reverse log scale)'), + alt.Y('Penicillin:Q', + sort='descending', + scale=alt.Scale(type='log', domain=[0.001, 1000]), + axis=alt.Axis(tickCount=5), + title='Penicillin MIC (ΞΌg/ml, reverse log scale)'), + alt.Color('Gram_Staining:N', + scale=alt.Scale(domain=['negative', 'positive'], range=['hotpink', 'purple']), + legend=alt.Legend(orient='left') + ) + ).properties(width=250, height=250) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + We can also remove a legend entirely by specifying `legend=None`: + """) + return + + +@app.cell +def _(alt, antibiotics): + alt.Chart(antibiotics).mark_circle(size=80).encode( + alt.X('Neomycin:Q', + sort='descending', + scale=alt.Scale(type='log', domain=[0.001, 1000]), + axis=alt.Axis(tickCount=5), + title='Neomycin MIC (ΞΌg/ml, reverse log scale)'), + alt.Y('Penicillin:Q', + sort='descending', + scale=alt.Scale(type='log', domain=[0.001, 1000]), + axis=alt.Axis(tickCount=5), + title='Penicillin MIC (ΞΌg/ml, reverse log scale)'), + alt.Color('Gram_Staining:N', + scale=alt.Scale(domain=['negative', 'positive'], range=['hotpink', 'purple']), + legend=None + ) + ).properties(width=250, height=250) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Color by Species + + _So far we've considered the effectiveness of antibiotics. Let's turn around and ask a different question: what might antibiotic response teach us about the different species of bacteria?_ + + To start, let's encode `Bacteria` (a nominal data field) using the `color` channel: + """) + return + + +@app.cell +def _(alt, antibiotics): + alt.Chart(antibiotics).mark_circle(size=80).encode( + alt.X('Neomycin:Q', + sort='descending', + scale=alt.Scale(type='log', domain=[0.001, 1000]), + axis=alt.Axis(tickCount=5), + title='Neomycin MIC (ΞΌg/ml, reverse log scale)'), + alt.Y('Penicillin:Q', + sort='descending', + scale=alt.Scale(type='log', domain=[0.001, 1000]), + axis=alt.Axis(tickCount=5), + title='Penicillin MIC (ΞΌg/ml, reverse log scale)'), + alt.Color('Bacteria:N') + ).properties(width=250, height=250) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _The result is a bit of a mess!_ There are enough unique bacteria that Altair starts repeating colors from its default 10-color palette for nominal values. + + To use custom colors, we can update the color encoding `scale` property. One option is to provide explicit scale `domain` and `range` values to indicate the precise color mappings per value, as we did above for Gram staining. Another option is to use an alternative color scheme. Altair includes a variety of built-in color schemes. For a complete list, see the [Vega color scheme documentation](https://vega.github.io/vega/docs/schemes/#reference). + + Let's try switching to a built-in 20-color scheme, `tableau20`, and set that using the scale `scheme` property. + """) + return + + +@app.cell +def _(alt, antibiotics): + alt.Chart(antibiotics).mark_circle(size=80).encode( + alt.X('Neomycin:Q', + sort='descending', + scale=alt.Scale(type='log', domain=[0.001, 1000]), + axis=alt.Axis(tickCount=5), + title='Neomycin MIC (ΞΌg/ml, reverse log scale)'), + alt.Y('Penicillin:Q', + sort='descending', + scale=alt.Scale(type='log', domain=[0.001, 1000]), + axis=alt.Axis(tickCount=5), + title='Penicillin MIC (ΞΌg/ml, reverse log scale)'), + alt.Color('Bacteria:N', + scale=alt.Scale(scheme='tableau20')) + ).properties(width=250, height=250) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _We now have a unique color for each bacteria, but the chart is still a mess. Among other issues, the encoding takes no account of bacteria that belong to the same genus. In the chart above, the two different Salmonella strains have very different hues (teal and pink), despite being biological cousins._ + + To try a different scheme, we can also change the data type from nominal to ordinal. The default ordinal scheme uses blue shades, ramping from light to dark: + """) + return + + +@app.cell +def _(alt, antibiotics): + alt.Chart(antibiotics).mark_circle(size=80).encode( + alt.X('Neomycin:Q', + sort='descending', + scale=alt.Scale(type='log', domain=[0.001, 1000]), + axis=alt.Axis(tickCount=5), + title='Neomycin MIC (ΞΌg/ml, reverse log scale)'), + alt.Y('Penicillin:Q', + sort='descending', + scale=alt.Scale(type='log', domain=[0.001, 1000]), + axis=alt.Axis(tickCount=5), + title='Penicillin MIC (ΞΌg/ml, reverse log scale)'), + alt.Color('Bacteria:O') + ).properties(width=250, height=250) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _Some of those blue shades may be hard to distinguish._ + + For more differentiated colors, we can experiment with alternatives to the default `blues` color scheme. The `viridis` scheme ramps through both hue and luminance: + """) + return + + +@app.cell +def _(alt, antibiotics): + alt.Chart(antibiotics).mark_circle(size=80).encode( + alt.X('Neomycin:Q', + sort='descending', + scale=alt.Scale(type='log', domain=[0.001, 1000]), + axis=alt.Axis(tickCount=5), + title='Neomycin MIC (ΞΌg/ml, reverse log scale)'), + alt.Y('Penicillin:Q', + sort='descending', + scale=alt.Scale(type='log', domain=[0.001, 1000]), + axis=alt.Axis(tickCount=5), + title='Penicillin MIC (ΞΌg/ml, reverse log scale)'), + alt.Color('Bacteria:O', + scale=alt.Scale(scheme='viridis')) + ).properties(width=250, height=250) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _Bacteria from the same genus now have more similar colors than before, but the chart still remains confusing. There are many colors, they are hard to look up in the legend accurately, and two bacteria may have similar colors but different genus._ + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Color by Genus + + Let's try to color by genus instead of bacteria. To do so, we will add a `calculate` transform that splits up the bacteria name on space characters and takes the first word in the resulting array. We can then encode the resulting `Genus` field using the `tableau20` color scheme. + + (Note that the antibiotics dataset includes a pre-calculated `Genus` field, but we will ignore it here in order to further explore Altair's data transformations.) + """) + return + + +@app.cell +def _(alt, antibiotics): + alt.Chart(antibiotics).mark_circle(size=80).transform_calculate( + Genus='split(datum.Bacteria, " ")[0]' + ).encode( + alt.X('Neomycin:Q', + sort='descending', + scale=alt.Scale(type='log', domain=[0.001, 1000]), + axis=alt.Axis(tickCount=5), + title='Neomycin MIC (ΞΌg/ml, reverse log scale)'), + alt.Y('Penicillin:Q', + sort='descending', + scale=alt.Scale(type='log', domain=[0.001, 1000]), + axis=alt.Axis(tickCount=5), + title='Penicillin MIC (ΞΌg/ml, reverse log scale)'), + alt.Color('Genus:N', + scale=alt.Scale(scheme='tableau20')) + ).properties(width=250, height=250) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _Hmm... While the data are better segregated by genus, this cacapohony of colors doesn't seem particularly useful._ + + _If we look at some of the previous charts carefully, we can see that only a handful of bacteria have a genus shared with another bacteria: Salmonella, Staphylococcus, and Streptococcus. To focus our comparison, we might add colors only for these repeated genus values._ + + Let's add another `calculate` transform that takes a genus name, keeps it if it is one of the repeated values, and otherwise uses the string `"Other"`. + + In addition, we can add custom color encodings using explicit `domain` and `range` arrays for the color encoding `scale`. + """) + return + + +@app.cell +def _(alt, antibiotics): + alt.Chart(antibiotics).mark_circle(size=80).transform_calculate( + Split='split(datum.Bacteria, " ")[0]' + ).transform_calculate( + Genus='indexof(["Salmonella", "Staphylococcus", "Streptococcus"], datum.Split) >= 0 ? datum.Split : "Other"' + ).encode( + alt.X('Neomycin:Q', + sort='descending', + scale=alt.Scale(type='log', domain=[0.001, 1000]), + axis=alt.Axis(tickCount=5), + title='Neomycin MIC (ΞΌg/ml, reverse log scale)'), + alt.Y('Penicillin:Q', + sort='descending', + scale=alt.Scale(type='log', domain=[0.001, 1000]), + axis=alt.Axis(tickCount=5), + title='Penicillin MIC (ΞΌg/ml, reverse log scale)'), + alt.Color('Genus:N', + scale=alt.Scale( + domain=['Salmonella', 'Staphylococcus', 'Streptococcus', 'Other'], + range=['rgb(76,120,168)', 'rgb(84,162,75)', 'rgb(228,87,86)', 'rgb(121,112,110)'] + )) + ).properties(width=250, height=250) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _We now have a much more revealing plot, made possible by customizations to the axes and legend. Take a moment to examine the plot above. Notice any surprising groupings?_ + + _The upper-left region has a cluster of red Streptococcus bacteria, but with a grey Other bacteria alongside them. Meanwhile, towards the middle-right we see another red Streptococcus placed far away from its "cousins". Might we expect bacteria from the same genus (and thus presumably more genetically similar) to be grouped closer together?_ + + As it so happens, the underlying dataset actually contains errors. The dataset reflects the species designations used in the early 1950s. However, the scientific consensus has since been overturned. That gray point in the upper-left? It's now considered a Streptococcus! That red point towards the middle-right? It's no longer considered a Streptococcus! + + Of course, on its own, this dataset doesn't fully justify these reclassifications. Nevertheless, the data contain valuable biological clues that went overlooked for decades! Visualization, when used by an appropriately skilled and inquisitive viewer, can be a powerful tool for discovery. + + This example also reinforces an important lesson: **_always be skeptical of your data!_** + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Color by Antibiotic Response + + We might also use the `color` channel to encode quantitative values. Though keep in mind that typically color is not as effective for conveying quantities as position or size encodings! + + Here is a basic heatmap of penicillin MIC values for each bacteria. We'll use a `rect` mark and sort the bacteria by descending MIC values (from most to least resistant): + """) + return + + +@app.cell +def _(alt, antibiotics): + alt.Chart(antibiotics).mark_rect().encode( + alt.Y('Bacteria:N', + sort=alt.EncodingSortField(field='Penicillin', op='max', order='descending') + ), + alt.Color('Penicillin:Q') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + We can further improve this chart by combining features we've seen thus far: a log-transformed scale, a change of axis orientation, a custom color scheme (`plasma`), tick count adjustment, and custom title text. We'll also exercise configuration options to adjust the axis title placement and legend title alignment. + """) + return + + +@app.cell +def _(alt, antibiotics): + alt.Chart(antibiotics).mark_rect().encode( + alt.Y('Bacteria:N', + sort=alt.EncodingSortField(field='Penicillin', op='max', order='descending'), + axis=alt.Axis( + orient='right', # orient axis on right side of chart + titleX=7, # set x-position to 7 pixels right of chart + titleY=-2, # set y-position to 2 pixels above chart + titleAlign='left', # use left-aligned text + titleAngle=0 # undo default title rotation + ) + ), + alt.Color('Penicillin:Q', + scale=alt.Scale(type='log', scheme='plasma', nice=True), + legend=alt.Legend(titleOrient='right', tickCount=5), + title='Penicillin MIC (ΞΌg/ml)' + ) + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Alternatively, we can remove the axis title altogether, and use the top-level `title` property to add a title for the entire chart: + """) + return + + +@app.cell +def _(alt, antibiotics): + alt.Chart(antibiotics, title='Penicillin Resistance of Bacterial Strains').mark_rect().encode( + alt.Y('Bacteria:N', + sort=alt.EncodingSortField(field='Penicillin', op='max', order='descending'), + axis=alt.Axis(orient='right', title=None) + ), + alt.Color('Penicillin:Q', + scale=alt.Scale(type='log', scheme='plasma', nice=True), + legend=alt.Legend(titleOrient='right', tickCount=5), + title='Penicillin MIC (ΞΌg/ml)' + ) + ).configure_title( + anchor='start', # anchor and left-align title + offset=5 # set title offset from chart + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Summary + + Integrating what we've learned across the notebooks so far about encodings, data transforms, and customization, you should now be prepared to make a wide variety of statistical graphics. Now you can put Altair into everyday use for exploring and communicating data! + + Interested in learning more about this topic? + + - Start with the [Altair Customizing Visualizations documentation](https://altair-viz.github.io/user_guide/customization.html). + - For a complementary discussion of scale mappings, see ["Introducing d3-scale"](https://medium.com/@mbostock/introducing-d3-scale-61980c51545f). + - For a more in-depth exploration of all the ways axes and legends can be styled by the underlying Vega library (which powers Altair and Vega-Lite), see ["A Guide to Guides: Axes & Legends in Vega"](https://beta.observablehq.com/@jheer/a-guide-to-guides-axes-legends-in-vega). + - For a fascinating history of the antibiotics dataset, see [Wainer & Lysen's "That's Funny..."](https://www.americanscientist.org/article/thats-funny) in the _American Scientist_. + """) + return + + +if __name__ == "__main__": + app.run() diff --git a/altair/05_view_composition.py b/altair/05_view_composition.py new file mode 100644 index 0000000000000000000000000000000000000000..1a905bfa4cfa17582dd3b358cd3f76f47572284b --- /dev/null +++ b/altair/05_view_composition.py @@ -0,0 +1,818 @@ +# /// script +# requires-python = ">=3.11" +# dependencies = [ +# "altair==6.0.0", +# "marimo", +# "pandas==3.0.1", +# ] +# /// + +import marimo + +__generated_with = "0.20.4" +app = marimo.App() + + +@app.cell +def _(): + import marimo as mo + + return (mo,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + # Multi-View Composition + + When visualizing a number of different data fields, we might be tempted to use as many visual encoding channels as we can: `x`, `y`, `color`, `size`, `shape`, and so on. However, as the number of encoding channels increases, a chart can rapidly become cluttered and difficult to read. An alternative to "over-loading" a single chart is to instead _compose multiple charts_ in a way that facilitates rapid comparisons. + + In this notebook, we will examine a variety of operations for _multi-view composition_: + + - _layer_: place compatible charts directly on top of each other, + - _facet_: partition data into multiple charts, organized in rows or columns, + - _concatenate_: position arbitrary charts within a shared layout, and + - _repeat_: take a base chart specification and apply it to multiple data fields. + + We'll then look at how these operations form a _view composition algebra_, in which the operations can be combined to build a variety of complex multi-view displays. + + _This notebook is part of the [data visualization curriculum](https://github.com/uwdata/visualization-curriculum)._ + """) + return + + +@app.cell +def _(): + import pandas as pd + import altair as alt + + return alt, pd + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Weather Data + + We will be visualizing weather statistics for the U.S. cities of Seattle and New York. Let's load the dataset and peek at the first and last 10 rows: + """) + return + + +@app.cell +def _(): + weather = 'https://cdn.jsdelivr.net/npm/vega-datasets@1/data/weather.csv' + return (weather,) + + +@app.cell +def _(pd, weather): + df = pd.read_csv(weather) + df.head(10) + return (df,) + + +@app.cell +def _(df): + df.tail(10) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + We will create multi-view displays to examine weather within and across the cities. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Layer + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + One of the most common ways of combining multiple charts is to *layer* marks on top of each other. If the underlying scale domains are compatible, we can merge them to form _shared axes_. If either of the `x` or `y` encodings is not compatible, we might instead create a _dual-axis chart_, which overlays marks using separate scales and axes. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Shared Axes + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Let's start by plotting the minimum and maximum average temperatures per month: + """) + return + + +@app.cell +def _(alt, weather): + alt.Chart(weather).mark_area().encode( + alt.X('month(date):T'), + alt.Y('average(temp_max):Q'), + alt.Y2('average(temp_min):Q') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _The plot shows us temperature ranges for each month over the entirety of our data. However, this is pretty misleading as it aggregates the measurements for both Seattle and New York!_ + + Let's subdivide the data by location using a color encoding, while also adjusting the mark opacity to accommodate overlapping areas: + """) + return + + +@app.cell +def _(alt, weather): + alt.Chart(weather).mark_area(opacity=0.3).encode( + alt.X('month(date):T'), + alt.Y('average(temp_max):Q'), + alt.Y2('average(temp_min):Q'), + alt.Color('location:N') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _We can see that Seattle is more temperate: warmer in the winter, and cooler in the summer._ + + In this case we've created a layered chart without any special features by simply subdividing the area marks by color. While the chart above shows us the temperature ranges, we might also want to emphasize the middle of the range. + + Let's create a line chart showing the average temperature midpoint. We'll use a `calculate` transform to compute the midpoints between the minimum and maximum daily temperatures: + """) + return + + +@app.cell +def _(alt, weather): + alt.Chart(weather).mark_line().transform_calculate( + temp_mid='(+datum.temp_min + +datum.temp_max) / 2' + ).encode( + alt.X('month(date):T'), + alt.Y('average(temp_mid):Q'), + alt.Color('location:N') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _Aside_: note the use of `+datum.temp_min` within the calculate transform. As we are loading the data directly from a CSV file without any special parsing instructions, the temperature values may be internally represented as string values. Adding the `+` in front of the value forces it to be treated as a number. + + We'd now like to combine these charts by layering the midpoint lines over the range areas. Using the syntax `chart1 + chart2`, we can specify that we want a new layered chart in which `chart1` is the first layer and `chart2` is a second layer drawn on top: + """) + return + + +@app.cell +def _(alt, weather): + tempMinMax = alt.Chart(weather).mark_area(opacity=0.3).encode( + alt.X('month(date):T'), + alt.Y('average(temp_max):Q'), + alt.Y2('average(temp_min):Q'), + alt.Color('location:N') + ) + + tempMid = alt.Chart(weather).mark_line().transform_calculate( + temp_mid='(+datum.temp_min + +datum.temp_max) / 2' + ).encode( + alt.X('month(date):T'), + alt.Y('average(temp_mid):Q'), + alt.Color('location:N') + ) + + tempMinMax + tempMid + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _Now we have a multi-layer plot! However, the y-axis title (though informative) has become a bit long and unruly..._ + + Let's customize our axes to clean up the plot. If we set a custom axis title within one of the layers, it will automatically be used as a shared axis title for all the layers: + """) + return + + +@app.cell +def _(alt, weather): + tempMinMax_1 = alt.Chart(weather).mark_area(opacity=0.3).encode(alt.X('month(date):T', title=None, axis=alt.Axis(format='%b')), alt.Y('average(temp_max):Q', title='Avg. Temperature Β°C'), alt.Y2('average(temp_min):Q'), alt.Color('location:N')) + tempMid_1 = alt.Chart(weather).mark_line().transform_calculate(temp_mid='(+datum.temp_min + +datum.temp_max) / 2').encode(alt.X('month(date):T'), alt.Y('average(temp_mid):Q'), alt.Color('location:N')) + tempMinMax_1 + tempMid_1 + return tempMid_1, tempMinMax_1 + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _What happens if both layers have custom axis titles? Modify the code above to find out..._ + + Above used the `+` operator, a convenient shorthand for Altair's `layer` method. We can generate an identical layered chart using the `layer` method directly: + """) + return + + +@app.cell +def _(alt, tempMid_1, tempMinMax_1): + alt.layer(tempMinMax_1, tempMid_1) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Note that the order of inputs to a layer matters, as subsequent layers will be drawn on top of earlier layers. _Try swapping the order of the charts in the cells above. What happens? (Hint: look closely at the color of the `line` marks.)_ + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Dual-Axis Charts + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _Seattle has a reputation as a rainy city. Is that deserved?_ + + Let's look at precipitation alongside temperature to learn more. First let's create a base plot the shows average monthly precipitation in Seattle: + """) + return + + +@app.cell +def _(alt, weather): + alt.Chart(weather).transform_filter( + 'datum.location == "Seattle"' + ).mark_line( + interpolate='monotone', + stroke='grey' + ).encode( + alt.X('month(date):T', title=None), + alt.Y('average(precipitation):Q', title='Precipitation') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + To facilitate comparison with the temperature data, let's create a new layered chart. Here's what happens if we try to layer the charts as we did earlier: + """) + return + + +@app.cell +def _(alt, weather): + tempMinMax_2 = alt.Chart(weather).transform_filter('datum.location == "Seattle"').mark_area(opacity=0.3).encode(alt.X('month(date):T', title=None, axis=alt.Axis(format='%b')), alt.Y('average(temp_max):Q', title='Avg. Temperature Β°C'), alt.Y2('average(temp_min):Q')) + _precip = alt.Chart(weather).transform_filter('datum.location == "Seattle"').mark_line(interpolate='monotone', stroke='grey').encode(alt.X('month(date):T'), alt.Y('average(precipitation):Q', title='Precipitation')) + alt.layer(tempMinMax_2, _precip) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _The precipitation values use a much smaller range of the y-axis then the temperatures!_ + + By default, layered charts use a *shared domain*: the values for the x-axis or y-axis are combined across all the layers to determine a shared extent. This default behavior assumes that the layered values have the same units. However, this doesn't hold up for this example, as we are combining temperature values (degrees Celsius) with precipitation values (inches)! + + If we want to use different y-axis scales, we need to specify how we want Altair to *resolve* the data across layers. In this case, we want to resolve the y-axis `scale` domains to be `independent` rather than use a `shared` domain. The `Chart` object produced by a layer operator includes a `resolve_scale` method with which we can specify the desired resolution: + """) + return + + +@app.cell +def _(alt, weather): + tempMinMax_3 = alt.Chart(weather).transform_filter('datum.location == "Seattle"').mark_area(opacity=0.3).encode(alt.X('month(date):T', title=None, axis=alt.Axis(format='%b')), alt.Y('average(temp_max):Q', title='Avg. Temperature Β°C'), alt.Y2('average(temp_min):Q')) + _precip = alt.Chart(weather).transform_filter('datum.location == "Seattle"').mark_line(interpolate='monotone', stroke='grey').encode(alt.X('month(date):T'), alt.Y('average(precipitation):Q', title='Precipitation')) + alt.layer(tempMinMax_3, _precip).resolve_scale(y='independent') + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _We can now see that autumn is the rainiest season in Seattle (peaking in November), complemented by dry summers._ + + You may have noticed some redundancy in our plot specifications above: both use the same dataset and the same filter to look at Seattle only. If you want, you can streamline the code a bit by providing the data and filter transform to the top-level layered chart. The individual layers will then inherit the data if they don't have their own data definitions: + """) + return + + +@app.cell +def _(alt, weather): + tempMinMax_4 = alt.Chart().mark_area(opacity=0.3).encode(alt.X('month(date):T', title=None, axis=alt.Axis(format='%b')), alt.Y('average(temp_max):Q', title='Avg. Temperature Β°C'), alt.Y2('average(temp_min):Q')) + _precip = alt.Chart().mark_line(interpolate='monotone', stroke='grey').encode(alt.X('month(date):T'), alt.Y('average(precipitation):Q', title='Precipitation')) + alt.layer(tempMinMax_4, _precip, data=weather).transform_filter('datum.location == "Seattle"').resolve_scale(y='independent') + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + While dual-axis charts can be useful, _they are often prone to misinterpretation_, as the different units and axis scales may be incommensurate. As is feasible, you might consider transformations that map different data fields to shared units, for example showing [quantiles](https://en.wikipedia.org/wiki/Quantile) or relative percentage change. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Facet + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + *Faceting* involves subdividing a dataset into groups and creating a separate plot for each group. In earlier notebooks, we learned how to create faceted charts using the `row` and `column` encoding channels. We'll first review those channels and then show how they are instances of the more general `facet` operator. + + Let's start with a basic histogram of maximum temperature values in Seattle: + """) + return + + +@app.cell +def _(alt, weather): + alt.Chart(weather).mark_bar().transform_filter( + 'datum.location == "Seattle"' + ).encode( + alt.X('temp_max:Q', bin=True, title='Temperature (Β°C)'), + alt.Y('count():Q') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _How does this temperature profile change based on the weather of a given day – that is, whether there was drizzle, fog, rain, snow, or sun?_ + + Let's use the `column` encoding channel to facet the data by weather type. We can also use `color` as a redundant encoding, using a customized color range: + """) + return + + +@app.cell +def _(alt, weather): + _colors = alt.Scale(domain=['drizzle', 'fog', 'rain', 'snow', 'sun'], range=['#aec7e8', '#c7c7c7', '#1f77b4', '#9467bd', '#e7ba52']) + alt.Chart(weather).mark_bar().transform_filter('datum.location == "Seattle"').encode(alt.X('temp_max:Q', bin=True, title='Temperature (Β°C)'), alt.Y('count():Q'), alt.Color('weather:N', scale=_colors), alt.Column('weather:N')).properties(width=150, height=150) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _Unsurprisingly, those rare snow days center on the coldest temperatures, followed by rainy and foggy days. Sunny days are warmer and, despite Seattle stereotypes, are the most plentiful. Though as any Seattleite can tell you, the drizzle occasionally comes, no matter the temperature!_ + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + In addition to `row` and `column` encoding channels *within* a chart definition, we can take a basic chart definition and apply faceting using an explicit `facet` operator. + + Let's recreate the chart above, but this time using `facet`. We start with the same basic histogram definition, but remove the data source, filter transform, and column channel. We can then invoke the `facet` method, passing in the data and specifying that we should facet into columns according to the `weather` field. The `facet` method accepts both `row` and `column` arguments. The two can be used together to create a 2D grid of faceted plots. + + Finally we include our filter transform, applying it to the top-level faceted chart. While we could apply the filter transform to the histogram definition as before, that is slightly less efficient. Rather than filter out "New York" values within each facet cell, applying the filter to the faceted chart lets Vega-Lite know that we can filter out those values up front, prior to the facet subdivision. + """) + return + + +@app.cell +def _(alt, weather): + _colors = alt.Scale(domain=['drizzle', 'fog', 'rain', 'snow', 'sun'], range=['#aec7e8', '#c7c7c7', '#1f77b4', '#9467bd', '#e7ba52']) + alt.Chart().mark_bar().encode(alt.X('temp_max:Q', bin=True, title='Temperature (Β°C)'), alt.Y('count():Q'), alt.Color('weather:N', scale=_colors)).properties(width=150, height=150).facet(data=weather, column='weather:N').transform_filter('datum.location == "Seattle"') + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Given all the extra code above, why would we want to use an explicit `facet` operator? For basic charts, we should certainly use the `column` or `row` encoding channels if we can. However, using the `facet` operator explicitly is useful if we want to facet composed views, such as layered charts. + + Let's revisit our layered temperature plots from earlier. Instead of plotting data for New York and Seattle in the same plot, let's break them up into separate facets. The individual chart definitions are nearly the same as before: one area chart and one line chart. The only difference is that this time we won't pass the data directly to the chart constructors; we'll wait and pass it to the facet operator later. We can layer the charts much as before, then invoke `facet` on the layered chart object, passing in the data and specifying `column` facets based on the `location` field: + """) + return + + +@app.cell +def _(alt, weather): + tempMinMax_5 = alt.Chart().mark_area(opacity=0.3).encode(alt.X('month(date):T', title=None, axis=alt.Axis(format='%b')), alt.Y('average(temp_max):Q', title='Avg. Temperature (Β°C)'), alt.Y2('average(temp_min):Q'), alt.Color('location:N')) + tempMid_2 = alt.Chart().mark_line().transform_calculate(temp_mid='(+datum.temp_min + +datum.temp_max) / 2').encode(alt.X('month(date):T'), alt.Y('average(temp_mid):Q'), alt.Color('location:N')) + alt.layer(tempMinMax_5, tempMid_2).facet(data=weather, column='location:N') + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + The faceted charts we have seen so far use the same axis scale domains across the facet cells. This default of using *shared* scales and axes helps aid accurate comparison of values. However, in some cases you may wish to scale each chart independently, for example if the range of values in the cells differs significantly. + + Similar to layered charts, faceted charts also support _resolving_ to independent scales or axes across plots. Let's see what happens if we call the `resolve_axis` method to request `independent` y-axes: + """) + return + + +@app.cell +def _(alt, weather): + tempMinMax_6 = alt.Chart().mark_area(opacity=0.3).encode(alt.X('month(date):T', title=None, axis=alt.Axis(format='%b')), alt.Y('average(temp_max):Q', title='Avg. Temperature (Β°C)'), alt.Y2('average(temp_min):Q'), alt.Color('location:N')) + tempMid_3 = alt.Chart().mark_line().transform_calculate(temp_mid='(+datum.temp_min + +datum.temp_max) / 2').encode(alt.X('month(date):T'), alt.Y('average(temp_mid):Q'), alt.Color('location:N')) + alt.layer(tempMinMax_6, tempMid_3).facet(data=weather, column='location:N').resolve_axis(y='independent') + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _The chart above looks largely unchanged, but the plot for Seattle now includes its own axis._ + + What if we instead call `resolve_scale` to resolve the underlying scale domains? + """) + return + + +@app.cell +def _(alt, weather): + tempMinMax_7 = alt.Chart().mark_area(opacity=0.3).encode(alt.X('month(date):T', title=None, axis=alt.Axis(format='%b')), alt.Y('average(temp_max):Q', title='Avg. Temperature (Β°C)'), alt.Y2('average(temp_min):Q'), alt.Color('location:N')) + tempMid_4 = alt.Chart().mark_line().transform_calculate(temp_mid='(+datum.temp_min + +datum.temp_max) / 2').encode(alt.X('month(date):T'), alt.Y('average(temp_mid):Q'), alt.Color('location:N')) + alt.layer(tempMinMax_7, tempMid_4).facet(data=weather, column='location:N').resolve_scale(y='independent') + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _Now we see facet cells with different axis scale domains. In this case, using independent scales seems like a bad idea! The domains aren't very different, and one might be fooled into thinking that New York and Seattle have similar maximum summer temperatures._ + + To borrow a clichΓ©: just because you *can* do something, doesn't mean you *should*... + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Concatenate + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Faceting creates [small multiple](https://en.wikipedia.org/wiki/Small_multiple) plots that show separate subdivisions of the data. However, we might wish to create a multi-view display with different views of the *same* dataset (not subsets) or views involving *different* datasets. + + Altair provides *concatenation* operators to combine arbitrary charts into a composed chart. The `hconcat` operator (shorthand `|` ) performs horizontal concatenation, while the `vconcat` operator (shorthand `&`) performs vertical concatenation. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Let's start with a basic line chart showing the average maximum temperature per month for both New York and Seattle, much like we've seen before: + """) + return + + +@app.cell +def _(alt, weather): + alt.Chart(weather).mark_line().encode( + alt.X('month(date):T', title=None), + alt.Y('average(temp_max):Q'), + color='location:N' + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _What if we want to compare not just temperature over time, but also precipitation and wind levels?_ + + Let's create a concatenated chart consisting of three plots. We'll start by defining a "base" chart definition that contains all the aspects that should be shared by our three plots. We can then modify this base chart to create customized variants, with different y-axis encodings for the `temp_max`, `precipitation`, and `wind` fields. We can then concatenate them using the pipe (`|`) shorthand operator: + """) + return + + +@app.cell +def _(alt, weather): + base = alt.Chart(weather).mark_line().encode(alt.X('month(date):T', title=None), color='location:N').properties(width=240, height=180) + temp = base.encode(alt.Y('average(temp_max):Q')) + _precip = base.encode(alt.Y('average(precipitation):Q')) + wind = base.encode(alt.Y('average(wind):Q')) + temp | _precip | wind + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Alternatively, we could use the more explicit `alt.hconcat()` method in lieu of the pipe `|` operator. _Try rewriting the code above to use `hconcat` instead._ + + Vertical concatenation works similarly to horizontal concatenation. _Using the `&` operator (or `alt.vconcat` method), modify the code to use a vertical ordering instead of a horizontal ordering._ + + Finally, note that horizontal and vertical concatenation can be combined. _What happens if you write something like `(temp | precip) & wind`?_ + + _Aside_: Note the importance of those parentheses... what happens if you remove them? Keep in mind that these overloaded operators are still subject to [Python's operator precendence rules](https://docs.python.org/3/reference/expressions.html#operator-precedence), and so vertical concatenation with `&` will take precedence over horizontal concatenation with `|`! + + As we will revisit later, concatenation operators let you combine any and all charts into a multi-view dashboard! + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Repeat + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + The concatenation operators above are quite general, allowing arbitrary charts to be composed. Nevertheless, the example above was still a bit verbose: we have three very similar charts, yet have to define them separately and then concatenate them. + + For cases where only one or two variables are changing, the `repeat` operator provides a convenient shortcut for creating multiple charts. Given a *template* specification with some free variables, the repeat operator will then create a chart for each specified assignment to those variables. + + Let's recreate our concatenation example above using the `repeat` operator. The only aspect that changes across charts is the choice of data field for the `y` encoding channel. To create a template specification, we can use the *repeater variable* `alt.repeat('column')` as our y-axis field. This code simply states that we want to use the variable assigned to the `column` repeater, which organizes repeated charts in a horizontal direction. (As the repeater provides the field name only, we have to specify the field data type separately as `type='quantitative'`.) + + We then invoke the `repeat` method, passing in data field names for each column: + """) + return + + +@app.cell +def _(alt, weather): + alt.Chart(weather).mark_line().encode( + alt.X('month(date):T',title=None), + alt.Y(alt.repeat('column'), aggregate='average', type='quantitative'), + color='location:N' + ).properties( + width=240, + height=180 + ).repeat( + column=['temp_max', 'precipitation', 'wind'] + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Repetition is supported for both columns and rows. _What happens if you modify the code above to use `row` instead of `column`?_ + + We can also use `row` and `column` repetition together! One common visualization for exploratory data analysis is the [scatter plot matrix (or SPLOM)](https://en.wikipedia.org/wiki/Scatter_plot#Scatterplot_matrices). Given a collection of variables to inspect, a SPLOM provides a grid of all pairwise plots of those variables, allowing us to assess potential associations. + + Let's use the `repeat` operator to create a SPLOM for the `temp_max`, `precipitation`, and `wind` fields. We first create our template specification, with repeater variables for both the x- and y-axis data fields. We then invoke `repeat`, passing in arrays of field names to use for both `row` and `column`. Altair will then generate the [cross product (or, Cartesian product)](https://en.wikipedia.org/wiki/Cartesian_product) to create the full space of repeated charts: + """) + return + + +@app.cell +def _(alt, weather): + alt.Chart().mark_point(filled=True, size=15, opacity=0.5).encode( + alt.X(alt.repeat('column'), type='quantitative'), + alt.Y(alt.repeat('row'), type='quantitative') + ).properties( + width=150, + height=150 + ).repeat( + data=weather, + row=['temp_max', 'precipitation', 'wind'], + column=['wind', 'precipitation', 'temp_max'] + ).transform_filter( + 'datum.location == "Seattle"' + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _Looking at these plots, there does not appear to be a strong association between precipitation and wind, though we do see that extreme wind and precipitation events occur in similar temperature ranges (~5-15Β° C). However, this observation is not particularly surprising: if we revisit our histogram at the beginning of the facet section, we can plainly see that the days with maximum temperatures in the range of 5-15Β° C are the most commonly occurring._ + + *Modify the code above to get a better understanding of chart repetition. Try adding another variable (`temp_min`) to the SPLOM. What happens if you rearrange the order of the field names in either the `row` or `column` parameters for the `repeat` operator?* + + _Finally, to really appreciate what the `repeat` operator provides, take a moment to imagine how you might recreate the SPLOM above using only `hconcat` and `vconcat`!_ + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## A View Composition Algebra + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Together, the composition operators `layer`, `facet`, `concat`, and `repeat` form a *view composition algebra*: the various operators can be combined to construct a variety of multi-view visualizations. + + As an example, let's start with two basic charts: a histogram and a simple line (a single `rule` mark) showing a global average. + """) + return + + +@app.cell +def _(alt, weather): + basic1 = alt.Chart(weather).transform_filter( + 'datum.location == "Seattle"' + ).mark_bar().encode( + alt.X('month(date):O'), + alt.Y('average(temp_max):Q') + ) + + basic2 = alt.Chart(weather).transform_filter( + 'datum.location == "Seattle"' + ).mark_rule(stroke='firebrick').encode( + alt.Y('average(temp_max):Q') + ) + + basic1 | basic2 + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + We can then combine the two charts using a `layer` operator, and then `repeat` that layered chart to show histograms with overlaid averages for multiple fields: + """) + return + + +@app.cell +def _(alt, weather): + alt.layer( + alt.Chart().mark_bar().encode( + alt.X('month(date):O', title='Month'), + alt.Y(alt.repeat('column'), aggregate='average', type='quantitative') + ), + alt.Chart().mark_rule(stroke='firebrick').encode( + alt.Y(alt.repeat('column'), aggregate='average', type='quantitative') + ) + ).properties( + width=200, + height=150 + ).repeat( + data=weather, + column=['temp_max', 'precipitation', 'wind'] + ).transform_filter( + 'datum.location == "Seattle"' + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Focusing only on the multi-view composition operators, the model for the visualization above is: + + ``` + repeat(column=[...]) + |- layer + |- basic1 + |- basic2 + ``` + + Now let's explore how we can apply *all* the operators within a final [dashboard](https://en.wikipedia.org/wiki/Dashboard_%28business%29) that provides an overview of Seattle weather. We'll combine the SPLOM and faceted histogram displays from earlier sections with the repeated histograms above: + """) + return + + +@app.cell +def _(alt, weather): + splom = alt.Chart().mark_point(filled=True, size=15, opacity=0.5).encode( + alt.X(alt.repeat('column'), type='quantitative'), + alt.Y(alt.repeat('row'), type='quantitative') + ).properties( + width=125, + height=125 + ).repeat( + row=['temp_max', 'precipitation', 'wind'], + column=['wind', 'precipitation', 'temp_max'] + ) + + dateHist = alt.layer( + alt.Chart().mark_bar().encode( + alt.X('month(date):O', title='Month'), + alt.Y(alt.repeat('row'), aggregate='average', type='quantitative') + ), + alt.Chart().mark_rule(stroke='firebrick').encode( + alt.Y(alt.repeat('row'), aggregate='average', type='quantitative') + ) + ).properties( + width=175, + height=125 + ).repeat( + row=['temp_max', 'precipitation', 'wind'] + ) + + tempHist = alt.Chart(weather).mark_bar().encode( + alt.X('temp_max:Q', bin=True, title='Temperature (Β°C)'), + alt.Y('count():Q'), + alt.Color('weather:N', scale=alt.Scale( + domain=['drizzle', 'fog', 'rain', 'snow', 'sun'], + range=['#aec7e8', '#c7c7c7', '#1f77b4', '#9467bd', '#e7ba52'] + )) + ).properties( + width=115, + height=100 + ).facet( + column='weather:N' + ) + + alt.vconcat( + alt.hconcat(splom, dateHist), + tempHist, + data=weather, + title='Seattle Weather Dashboard' + ).transform_filter( + 'datum.location == "Seattle"' + ).resolve_legend( + color='independent' + ).configure_axis( + labelAngle=0 + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + The full composition model for this dashboard is: + + ``` + vconcat + |- hconcat + | |- repeat(row=[...], column=[...]) + | | |- splom base chart + | |- repeat(row=[...]) + | |- layer + | |- dateHist base chart 1 + | |- dateHist base chart 2 + |- facet(column='weather') + |- tempHist base chart + ``` + + _Phew!_ The dashboard also includes a few customizations to improve the layout: + + - We adjust chart `width` and `height` properties to assist alignment and ensure the full visualization fits on the screen. + - We add `resolve_legend(color='independent')` to ensure the color legend is associated directly with the colored histograms by temperature. Otherwise, the legend will resolve to the dashboard as a whole. + - We use `configure_axis(labelAngle=0)` to ensure that no axis labels are rotated. This helps to ensure proper alignment among the scatter plots in the SPLOM and the histograms by month on the right. + + _Try removing or modifying any of these adjustments and see how the dashboard layout responds!_ + + This dashboard can be reused to show data for other locations or from other datasets. _Update the dashboard to show weather patterns for New York instead of Seattle._ + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Summary + + For more details on multi-view composition, including control over sub-plot spacing and header labels, see the [Altair Compound Charts documentation](https://altair-viz.github.io/user_guide/compound_charts.html). + + Now that we've seen how to compose multiple views, we're ready to put them into action. In addition to statically presenting data, multiple views can enable interactive multi-dimensional exploration. For example, using _linked selections_ we can highlight points in one view to see corresponding values highlight in other views. + + In the next notebook, we'll examine how to author *interactive selections* for both individual plots and multi-view compositions. + """) + return + + +if __name__ == "__main__": + app.run() diff --git a/altair/06_interaction.py b/altair/06_interaction.py new file mode 100644 index 0000000000000000000000000000000000000000..3447f78d91007eedc06ab45840908ac36d2d766a --- /dev/null +++ b/altair/06_interaction.py @@ -0,0 +1,671 @@ +# /// script +# requires-python = ">=3.11" +# dependencies = [ +# "altair==6.0.0", +# "marimo", +# "pandas==3.0.1", +# ] +# /// + +import marimo + +__generated_with = "0.20.4" +app = marimo.App() + + +@app.cell +def _(): + import marimo as mo + + return (mo,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + # Interaction + + _β€œA graphic is not β€˜drawn’ once and for all; it is β€˜constructed’ and reconstructed until it reveals all the relationships constituted by the interplay of the data. The best graphic operations are those carried out by the decision-maker themself.”_ — [Jacques Bertin](https://books.google.com/books?id=csqX_xnm4tcC) + + Visualization provides a powerful means of making sense of data. A single image, however, typically provides answers to, at best, a handful of questions. Through _interaction_ we can transform static images into tools for exploration: highlighting points of interest, zooming in to reveal finer-grained patterns, and linking across multiple views to reason about multi-dimensional relationships. + + At the core of interaction is the notion of a _selection_: a means of indicating to the computer which elements or regions we are interested in. For example, we might hover the mouse over a point, click multiple marks, or draw a bounding box around a region to highlight subsets of the data for further scrutiny. + + Alongside visual encodings and data transformations, Altair provides a _selection_ abstraction for authoring interactions. These selections encompass three aspects: + + 1. Input event handling to select points or regions of interest, such as mouse hover, click, drag, scroll, and touch events. + 2. Generalizing from the input to form a selection rule (or [_predicate_](https://en.wikipedia.org/wiki/Predicate_%28mathematical_logic%29)) that determines whether or not a given data record lies within the selection. + 3. Using the selection predicate to dynamically configure a visualization by driving _conditional encodings_, _filter transforms_, or _scale domains_. + + This notebook introduces interactive selections and explores how to use them to author a variety of interaction techniques, such as dynamic queries, panning & zooming, details-on-demand, and brushing & linking. + + _This notebook is part of the [data visualization curriculum](https://github.com/uwdata/visualization-curriculum)._ + """) + return + + +@app.cell +def _(): + import pandas as pd + import altair as alt + + return alt, pd + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Datasets + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + We will visualize a variety of datasets from the [vega-datasets](https://github.com/vega/vega-datasets) collection: + + - A dataset of `cars` from the 1970s and early 1980s, + - A dataset of `movies`, previously used in the [Data Transformation](https://github.com/uwdata/visualization-curriculum/blob/master/altair_data_transformation.ipynb) notebook, + - A dataset containing ten years of [S&P 500](https://en.wikipedia.org/wiki/S%26P_500_Index) (`sp500`) stock prices, + - A dataset of technology company `stocks`, and + - A dataset of `flights`, including departure time, distance, and arrival delay. + """) + return + + +@app.cell +def _(): + cars = 'https://cdn.jsdelivr.net/npm/vega-datasets@1/data/cars.json' + movies = 'https://cdn.jsdelivr.net/npm/vega-datasets@1/data/movies.json' + sp500 = 'https://cdn.jsdelivr.net/npm/vega-datasets@1/data/sp500.csv' + stocks = 'https://cdn.jsdelivr.net/npm/vega-datasets@1/data/stocks.csv' + flights = 'https://cdn.jsdelivr.net/npm/vega-datasets@1/data/flights-5k.json' + return cars, flights, movies, sp500, stocks + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Introducing Selections + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Let's start with a basic selection: simply clicking a point to highlight it. Using the `cars` dataset, we'll start with a scatter plot of horsepower versus miles per gallon, with a color encoding for the number cylinders in the car engine. + + In addition, we'll create a selection instance by calling `alt.selection_single()`, indicating we want a selection defined over a _single value_. By default, the selection uses a mouse click to determine the selected value. To register a selection with a chart, we must add it using the `.add_params()` method. + + Once our selection has been defined, we can use it as a parameter for _conditional encodings_, which apply a different encoding depending on whether a data record lies in or out of the selection. For example, consider the following code: + + ~~~ python + color=alt.condition(selection, 'Cylinders:O', alt.value('grey')) + ~~~ + + This encoding definition states that data points contained within the `selection` should be colored according to the `Cylinder` field, while non-selected data points should use a default `grey`. An empty selection includes _all_ data points, and so initially all points will be colored. + + _Try clicking different points in the chart below. What happens? (Click the background to clear the selection state and return to an "empty" selection.)_ + """) + return + + +@app.cell +def _(alt, cars): + _selection = alt.selection_point(toggle=False) + alt.Chart(cars).mark_circle().add_params(_selection).encode(x='Horsepower:Q', y='Miles_per_Gallon:Q', color=alt.condition(_selection, 'Cylinders:O', alt.value('grey')), opacity=alt.condition(_selection, alt.value(0.8), alt.value(0.1))) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Of course, highlighting individual data points one-at-a-time is not particularly exciting! As we'll see, however, single value selections provide a useful building block for more powerful interactions. Moreover, single value selections are just one of the three selection types provided by Altair: + + - `selection_single` - select a single discrete value, by default on click events. + - `selection_multi` - select multiple discrete values. The first value is selected on mouse click and additional values toggled using shift-click. + - `selection_interval` - select a continuous range of values, initiated by mouse drag. + + Let's compare each of these selection types side-by-side. To keep our code tidy we'll first define a function (`plot`) that generates a scatter plot specification just like the one above. We can pass a selection to the `plot` function to have it applied to the chart: + """) + return + + +@app.cell +def _(alt, cars): + def plot(selection): + return alt.Chart(cars).mark_circle().add_params(selection).encode(x='Horsepower:Q', y='Miles_per_Gallon:Q', color=alt.condition(selection, 'Cylinders:O', alt.value('grey')), opacity=alt.condition(selection, alt.value(0.8), alt.value(0.1))).properties(width=240, height=180) + + return (plot,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Let's use our `plot` function to create three chart variants, one per selection type. + + The first (`single`) chart replicates our earlier example. The second (`multi`) chart supports shift-click interactions to toggle inclusion of multiple points within the selection. The third (`interval`) chart generates a selection region (or _brush_) upon mouse drag. Once created, you can drag the brush around to select different points, or scroll when the cursor is inside the brush to scale (zoom) the brush size. + + _Try interacting with each of the charts below!_ + """) + return + + +@app.cell +def _(alt, plot): + alt.hconcat( + plot(alt.selection_point(toggle=False)).properties(title='Single (Click)'), + plot(alt.selection_point()).properties(title='Multi (Shift-Click)'), + plot(alt.selection_interval()).properties(title='Interval (Drag)') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + The examples above use default interactions (click, shift-click, drag) for each selection type. We can further customize the interactions by providing input event specifications using [Vega event selector syntax](https://vega.github.io/vega/docs/event-streams/). For example, we can modify our `single` and `multi` charts to trigger upon `mouseover` events instead of `click` events. + + _Hold down the shift key in the second chart to "paint" with data!_ + """) + return + + +@app.cell +def _(alt, plot): + alt.hconcat( + plot(alt.selection_point(toggle=False, on='mouseover')).properties(title='Single (Mouseover)'), + plot(alt.selection_point(on='mouseover')).properties(title='Multi (Shift-Mouseover)') + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Now that we've covered the basics of Altair selections, let's take a tour through the various interaction techniques they enable! + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Dynamic Queries + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _Dynamic queries_ enables rapid, reversible exploration of data to isolate patterns of interest. As defined by [Ahlberg, Williamson, & Shneiderman](https://www.cs.umd.edu/~ben/papers/Ahlberg1992Dynamic.pdf), a dynamic query: + + - represents a query graphically, + - provides visible limits on the query range, + - provides a graphical representation of the data and query result, + - gives immediate feedback of the result after every query adjustment, + - and allows novice users to begin working with little training. + + A common approach is to manipulate query parameters using standard user interface widgets such as sliders, radio buttons, and drop-down menus. To generate dynamic query widgets, we can apply a selection's `bind` operation to one or more data fields we wish to query. + + Let's build an interactive scatter plot that uses a dynamic query to filter the display. Given a scatter plot of movie ratings (from Rotten Tomates and IMDB), we can add a selection over the `Major_Genre` field to enable interactive filtering by film genre. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + To start, let's extract the unique (non-null) genres from the `movies` data: + """) + return + + +@app.cell +def _(movies, pd): + df = pd.read_json(movies) # load movies data + genres = df['Major_Genre'].unique() # get unique field values + genres = list(filter(pd.notna, genres)) # filter out None/NaN values + genres.sort() # sort alphabetically + return (genres,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + For later use, let's also define a list of unique `MPAA_Rating` values: + """) + return + + +@app.cell +def _(): + mpaa = ['G', 'PG', 'PG-13', 'R', 'NC-17', 'Not Rated'] + return (mpaa,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Now let's create a `single` selection bound to a drop-down menu. + + *Use the dynamic query menu below to explore the data. How do ratings vary by genre? How would you revise the code to filter `MPAA_Rating` (G, PG, PG-13, etc.) instead of `Major_Genre`?* + """) + return + + +@app.cell +def _(alt, genres, movies): + selectGenre = alt.selection_point( + toggle=False, + name='Select', # name the selection 'Select' + fields=['Major_Genre'], # limit selection to the Major_Genre field + value=[{'Major_Genre': genres[0]}], # use first genre entry as initial value + bind=alt.binding_select(options=genres) # bind to a menu of unique genre values + ) + + alt.Chart(movies).mark_circle().add_params( + selectGenre + ).encode( + x='Rotten_Tomatoes_Rating:Q', + y='IMDB_Rating:Q', + tooltip='Title:N', + opacity=alt.condition(selectGenre, alt.value(0.75), alt.value(0.05)) + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Our construction above leverages multiple aspects of selections: + + - We give the selection a name (`'Select'`). This name is not required, but allows us to influence the label text of the generated dynamic query menu. (_What happens if you remove the name? Try it!_) + - We constrain the selection to a specific data field (`Major_Genre`). Earlier when we used a `single` selection, the selection mapped to individual data points. By limiting the selection to a specific field, we can select _all_ data points whose `Major_Genre` field value matches the single selected value. + - We initialize `init=...` the selection to a starting value. + - We `bind` the selection to an interface widget, in this case a drop-down menu via `binding_select`. + - As before, we then use a conditional encoding to control the opacity channel. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Binding Selections to Multiple Inputs + + One selection instance can be bound to _multiple_ dynamic query widgets. Let's modify the example above to provide filters for _both_ `Major_Genre` and `MPAA_Rating`, using radio buttons instead of a menu. Our `single` selection is now defined over a single _pair_ of genre and MPAA rating values + + _Look for surprising conjunctions of genre and rating. Are there any G or PG-rated horror films?_ + """) + return + + +@app.cell +def _(alt, genres, movies, mpaa): + # single-value selection over [Major_Genre, MPAA_Rating] pairs + # use specific hard-wired values as the initial selected values + _selection = alt.selection_point(toggle=False, name='Select', fields=['Major_Genre', 'MPAA_Rating'], value=[{'Major_Genre': 'Drama', 'MPAA_Rating': 'R'}], bind={'Major_Genre': alt.binding_select(options=genres), 'MPAA_Rating': alt.binding_radio(options=mpaa)}) + # scatter plot, modify opacity based on selection + alt.Chart(movies).mark_circle().add_params(_selection).encode(x='Rotten_Tomatoes_Rating:Q', y='IMDB_Rating:Q', tooltip='Title:N', opacity=alt.condition(_selection, alt.value(0.75), alt.value(0.05))) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _Fun facts: The PG-13 rating didn't exist when the movies [Jaws](https://www.imdb.com/title/tt0073195/) and [Jaws 2](https://www.imdb.com/title/tt0077766/) were released. The first film to receive a PG-13 rating was 1984's [Red Dawn](https://www.imdb.com/title/tt0087985/)._ + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Using Visualizations as Dynamic Queries + + Though standard interface widgets show the _possible_ query parameter values, they do not visualize the _distribution_ of those values. We might also wish to use richer interactions, such as multi-value or interval selections, rather than input widgets that select only a single value at a time. + + To address these issues, we can author additional charts to both visualize data and support dynamic queries. Let's add a histogram of the count of films per year and use an interval selection to dynamically highlight films over selected time periods. + + *Interact with the year histogram to explore films from different time periods. Do you seen any evidence of [sampling bias](https://en.wikipedia.org/wiki/Sampling_bias) across the years? (How do year and critics' ratings relate?)* + + _The years range from 1930 to 2040! Are future films in pre-production, or are there "off-by-one century" errors? Also, depending on which time zone you're in, you may see a small bump in either 1969 or 1970. Why might that be? (See the end of the notebook for an explanation!)_ + """) + return + + +@app.cell +def _(alt, movies): + _brush = alt.selection_interval(encodings=['x']) + years = alt.Chart(movies).mark_bar().add_params(_brush).encode(alt.X('year(Release_Date):T', title='Films by Release Year'), alt.Y('count():Q', title=None)).properties(width=650, height=50) # limit selection to x-axis (year) values + ratings = alt.Chart(movies).mark_circle().encode(x='Rotten_Tomatoes_Rating:Q', y='IMDB_Rating:Q', tooltip='Title:N', opacity=alt.condition(_brush, alt.value(0.75), alt.value(0.05))).properties(width=650, height=400) + # dynamic query histogram + # scatter plot, modify opacity based on selection + alt.vconcat(years, ratings).properties(spacing=5) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + The example above provides dynamic queries using a _linked selection_ between charts: + + - We create an `interval` selection (`brush`), and set `encodings=['x']` to limit the selection to the x-axis only, resulting in a one-dimensional selection interval. + - We register `brush` with our histogram of films per year via `.add_params(brush)`. + - We use `brush` in a conditional encoding to adjust the scatter plot `opacity`. + + This interaction technique of selecting elements in one chart and seeing linked highlights in one or more other charts is known as [_brushing & linking_](https://en.wikipedia.org/wiki/Brushing_and_linking). + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Panning & Zooming + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + The movie rating scatter plot is a bit cluttered in places, making it hard to examine points in denser regions. Using the interaction techniques of _panning_ and _zooming_, we can inspect dense regions more closely. + + Let's start by thinking about how we might express panning and zooming using Altair selections. What defines the "viewport" of a chart? _Axis scale domains!_ + + We can change the scale domains to modify the visualized range of data values. To do so interactively, we can bind an `interval` selection to scale domains with the code `bind='scales'`. The result is that instead of an interval brush that we can drag and zoom, we instead can drag and zoom the entire plotting area! + + _In the chart below, click and drag to pan (translate) the view, or scroll to zoom (scale) the view. What can you discover about the precision of the provided rating values?_ + """) + return + + +@app.cell +def _(alt, movies): + alt.Chart(movies).mark_circle().add_params( + alt.selection_interval(bind='scales') + ).encode( + x='Rotten_Tomatoes_Rating:Q', + y=alt.Y('IMDB_Rating:Q', axis=alt.Axis(minExtent=30)), # use min extent to stabilize axis title placement + tooltip=['Title:N', 'Release_Date:N', 'IMDB_Rating:Q', 'Rotten_Tomatoes_Rating:Q'] + ).properties( + width=600, + height=400 + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _Zooming in, we can see that the rating values have limited precision! The Rotten Tomatoes ratings are integers, while the IMDB ratings are truncated to tenths. As a result, there is overplotting even when we zoom, with multiple movies sharing the same rating values._ + + Reading the code above, you may notice the code `alt.Axis(minExtent=30)` in the `y` encoding channel. The `minExtent` parameter ensures a minimum amount of space is reserved for axis ticks and labels. Why do this? When we pan and zoom, the axis labels may change and cause the axis title position to shift. By setting a minimum extent we can reduce distracting movements in the plot. _Try changing the `minExtent` value, for example setting it to zero, and then zoom out to see what happens when longer axis labels enter the view._ + + Altair also includes a shorthand for adding panning and zooming to a plot. Instead of directly creating a selection, you can call `.interactive()` to have Altair automatically generate an interval selection bound to the chart's scales: + """) + return + + +@app.cell +def _(alt, movies): + alt.Chart(movies).mark_circle().encode( + x='Rotten_Tomatoes_Rating:Q', + y=alt.Y('IMDB_Rating:Q', axis=alt.Axis(minExtent=30)), # use min extent to stabilize axis title placement + tooltip=['Title:N', 'Release_Date:N', 'IMDB_Rating:Q', 'Rotten_Tomatoes_Rating:Q'] + ).properties( + width=600, + height=400 + ).interactive() + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + By default, scale bindings for selections include both the `x` and `y` encoding channels. What if we want to limit panning and zooming along a single dimension? We can invoke `encodings=['x']` to constrain the selection to the `x` channel only: + """) + return + + +@app.cell +def _(alt, movies): + alt.Chart(movies).mark_circle().add_params( + alt.selection_interval(bind='scales', encodings=['x']) + ).encode( + x='Rotten_Tomatoes_Rating:Q', + y=alt.Y('IMDB_Rating:Q', axis=alt.Axis(minExtent=30)), # use min extent to stabilize axis title placement + tooltip=['Title:N', 'Release_Date:N', 'IMDB_Rating:Q', 'Rotten_Tomatoes_Rating:Q'] + ).properties( + width=600, + height=400 + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _When zooming along a single axis only, the shape of the visualized data can change, potentially affecting our perception of relationships in the data. [Choosing an appropriate aspect ratio](http://vis.stanford.edu/papers/arclength-banking) is an important visualization design concern!_ + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Navigation: Overview + Detail + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + When panning and zooming, we directly adjust the "viewport" of a chart. The related navigation strategy of _overview + detail_ instead uses an overview display to show _all_ of the data, while supporting selections that pan and zoom a separate focus display. + + Below we have two area charts showing a decade of price fluctuations for the S&P 500 stock index. Initially both charts show the same data range. _Click and drag in the bottom overview chart to update the focus display and examine specific time spans._ + """) + return + + +@app.cell +def _(alt, sp500): + _brush = alt.selection_interval(encodings=['x']) + _base = alt.Chart().mark_area().encode(alt.X('date:T', title=None), alt.Y('price:Q')).properties(width=700) + alt.vconcat(_base.encode(alt.X('date:T', title=None, scale=alt.Scale(domain=_brush))), _base.add_params(_brush).properties(height=60), data=sp500) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Unlike our earlier panning & zooming case, here we don't want to bind a selection directly to the scales of a single interactive chart. Instead, we want to bind the selection to a scale domain in _another_ chart. To do so, we update the `x` encoding channel for our focus chart, setting the scale `domain` property to reference our `brush` selection. If no interval is defined (the selection is empty), Altair ignores the brush and uses the underlying data to determine the domain. When a brush interval is created, Altair instead uses that as the scale `domain` for the focus chart. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Details on Demand + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Once we spot points of interest within a visualization, we often want to know more about them. _Details-on-demand_ refers to interactively querying for more information about selected values. _Tooltips_ are one useful means of providing details on demand. However, tooltips typically only show information for one data point at a time. How might we show more? + + The movie ratings scatterplot includes a number of potentially interesting outliers where the Rotten Tomatoes and IMDB ratings disagree. Let's create a plot that allows us to interactively select points and show their labels. To trigger the filter query on either the hover or click interaction, we will use the [Altair composition operator](https://altair-viz.github.io/user_guide/interactions.html#composing-multiple-selections) `|` ("or"). + + _Mouse over points in the scatter plot below to see a highlight and title label. Shift-click points to make annotations persistent and view multiple labels at once. Which movies are loved by Rotten Tomatoes critics, but not the general audience on IMDB (or vice versa)? See if you can find possible errors, where two different movies with the same name were accidentally combined!_ + """) + return + + +@app.cell +def _(alt, movies): + hover = alt.selection_point(toggle=False, on='mouseover', nearest=True, empty=False) + click = alt.selection_point(empty=False) + plot_1 = alt.Chart().mark_circle().encode(x='Rotten_Tomatoes_Rating:Q', y='IMDB_Rating:Q') + _base = plot_1.transform_filter(hover | click) + alt.layer(plot_1.add_params(hover).add_params(click), _base.mark_point(size=100, stroke='firebrick', strokeWidth=1), _base.mark_text(dx=4, dy=-8, align='right', stroke='white', strokeWidth=2).encode(text='Title:N'), _base.mark_text(dx=4, dy=-8, align='right').encode(text='Title:N'), data=movies).properties(width=600, height=450) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + The example above adds three new layers to the scatter plot: a circular annotation, white text to provide a legible background, and black text showing a film title. In addition, this example uses two selections in tandem: + + 1. A single selection (`hover`) that includes `nearest=True` to automatically select the nearest data point as the mouse moves. + 2. A multi selection (`click`) to create persistent selections via shift-click. + + Both selections include the set `empty='none'` to indicate that no points should be included if a selection is empty. These selections are then combined into a single filter predicate — the logical _or_ of `hover` and `click` — to include points that reside in _either_ selection. We use this predicate to filter the new layers to show annotations and labels for selected points only. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Using selections and layers, we can realize a number of different designs for details on demand! For example, here is a log-scaled time series of technology stock prices, annotated with a guideline and labels for the date nearest the mouse cursor: + """) + return + + +@app.cell +def _(alt, stocks): + # select a point for which to provide details-on-demand + label = alt.selection_point(toggle=False, encodings=['x'], on='mouseover', nearest=True, empty=False) + _base = alt.Chart().mark_line().encode(alt.X('date:T'), alt.Y('price:Q', scale=alt.Scale(type='log')), alt.Color('symbol:N')) # limit selection to x-axis value + # define our base line chart of stock prices + alt.layer(_base, alt.Chart().mark_rule(color='#aaa').encode(x='date:T').transform_filter(label), _base.mark_circle().encode(opacity=alt.condition(label, alt.value(1), alt.value(0))).add_params(label), _base.mark_text(align='left', dx=5, dy=-5, stroke='white', strokeWidth=2).encode(text='price:Q').transform_filter(label), _base.mark_text(align='left', dx=5, dy=-5).encode(text='price:Q').transform_filter(label), data=stocks).properties(width=700, height=400) # select on mouseover events # select data point nearest the cursor # empty selection includes no data points # base line chart # add a rule mark to serve as a guide line # add circle marks for selected time points, hide unselected points # add white stroked text to provide a legible background for labels # add text labels for stock prices + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _Putting into action what we've learned so far: can you modify the movie scatter plot above (the one with the dynamic query over years) to include a `rule` mark that shows the average IMDB (or Rotten Tomatoes) rating for the data contained within the year `interval` selection?_ + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Brushing & Linking, Revisited + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Earlier in this notebook we saw an example of _brushing & linking_: using a dynamic query histogram to highlight points in a movie rating scatter plot. Here, we'll visit some additional examples involving linked selections. + + Returning to the `cars` dataset, we can use the `repeat` operator to build a [scatter plot matrix (SPLOM)](https://en.wikipedia.org/wiki/Scatter_plot#Scatterplot_matrices) that shows associations between mileage, acceleration, and horsepower. We can define an `interval` selection and include it _within_ our repeated scatter plot specification to enable linked selections among all the plots. + + _Click and drag in any of the plots below to perform brushing & linking!_ + """) + return + + +@app.cell +def _(alt, cars): + _brush = alt.selection_interval(resolve='global') + alt.Chart(cars).mark_circle().add_params(_brush).encode(alt.X(alt.repeat('column'), type='quantitative'), alt.Y(alt.repeat('row'), type='quantitative'), color=alt.condition(_brush, 'Cylinders:O', alt.value('grey')), opacity=alt.condition(_brush, alt.value(0.8), alt.value(0.1))).properties(width=140, height=140).repeat(column=['Acceleration', 'Horsepower', 'Miles_per_Gallon'], row=['Miles_per_Gallon', 'Horsepower', 'Acceleration']) # resolve all selections to a single global instance + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Note above the use of `resolve='global'` on the `interval` selection. The default setting of `'global'` indicates that across all plots only one brush can be active at a time. However, in some cases we might want to define brushes in multiple plots and combine the results. If we use `resolve='union'`, the selection will be the _union_ of all brushes: if a point resides within any brush it will be selected. Alternatively, if we use `resolve='intersect'`, the selection will consist of the _intersection_ of all brushes: only points that reside within all brushes will be selected. + + _Try setting the `resolve` parameter to `'union'` and `'intersect'` and see how it changes the resulting selection logic._ + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Cross-Filtering + + The brushing & linking examples we've looked at all use conditional encodings, for example to change opacity values in response to a selection. Another option is to use a selection defined in one view to _filter_ the content of another view. + + Let's build a collection of histograms for the `flights` dataset: arrival `delay` (how early or late a flight arrives, in minutes), `distance` flown (in miles), and `time` of departure (hour of the day). We'll use the `repeat` operator to create the histograms, and add an `interval` selection for the `x` axis with brushes resolved via intersection. + + In particular, each histogram will consist of two layers: a gray background layer and a blue foreground layer, with the foreground layer filtered by our intersection of brush selections. The result is a _cross-filtering_ interaction across the three charts! + + _Drag out brush intervals in the charts below. As you select flights with longer or shorter arrival delays, how do the distance and time distributions respond?_ + """) + return + + +@app.cell +def _(alt, flights): + _brush = alt.selection_interval(encodings=['x'], resolve='intersect') + hist = alt.Chart().mark_bar().encode(alt.X(alt.repeat('row'), type='quantitative', bin=alt.Bin(maxbins=100, minstep=1), axis=alt.Axis(format='d', titleAnchor='start')), alt.Y('count():Q', title=None)) + alt.layer(hist.add_params(_brush).encode(color=alt.value('lightgrey')), hist.transform_filter(_brush)).properties(width=900, height=100).repeat(row=['delay', 'distance', 'time'], data=flights).transform_calculate(delay='datum.delay < 180 ? datum.delay : 180', time='hours(datum.date) + minutes(datum.date) / 60').configure_view(stroke='transparent') # up to 100 bins # integer format, left-aligned title # no y-axis title # clamp delays > 3 hours # fractional hours # no outline + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _By cross-filtering you can observe that delayed flights are more likely to depart at later hours. This phenomenon is familiar to frequent fliers: a delay can propagate through the day, affecting subsequent travel by that plane. For the best odds of an on-time arrival, book an early flight!_ + + The combination of multiple views and interactive selections can enable valuable forms of multi-dimensional reasoning, turning even basic histograms into powerful input devices for asking questions of a dataset! + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Summary + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + For more information about the supported interaction options in Altair, please consult the [Altair interactive selection documentation](https://altair-viz.github.io/user_guide/interactions.html). For details about customizing event handlers, for example to compose multiple interaction techniques or support touch-based input on mobile devices, see the [Vega-Lite selection documentation](https://vega.github.io/vega-lite/docs/selection.html). + + Interested in learning more? + - The _selection_ abstraction was introduced in the paper [Vega-Lite: A Grammar of Interactive Graphics](http://idl.cs.washington.edu/papers/vega-lite/), by Satyanarayan, Moritz, Wongsuphasawat, & Heer. + - The PRIM-9 system (for projection, rotation, isolation, and masking in up to 9 dimensions) is one of the earliest interactive visualization tools, built in the early 1970s by Fisherkeller, Tukey, & Friedman. [A retro demo video survives!](https://www.youtube.com/watch?v=B7XoW2qiFUA) + - The concept of brushing & linking was crystallized by Becker, Cleveland, & Wilks in their 1987 article [Dynamic Graphics for Data Analysis](https://scholar.google.com/scholar?cluster=14817303117298653693). + - For a comprehensive summary of interaction techniques for visualization, see [Interactive Dynamics for Visual Analysis](https://queue.acm.org/detail.cfm?id=2146416) by Heer & Shneiderman. + - Finally, for a treatise on what makes interaction effective, read the classic [Direct Manipulation Interfaces](https://scholar.google.com/scholar?cluster=15702972136892195211) paper by Hutchins, Hollan, & Norman. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + #### Appendix: On The Representation of Time + + Earlier we observed a small bump in the number of movies in either 1969 and 1970. Where does that bump come from? And why 1969 _or_ 1970? The answer stems from a combination of missing data and how your computer represents time. + + Internally, dates and times are represented relative to the [UNIX epoch](https://en.wikipedia.org/wiki/Unix_time), in which time "zero" corresponds to the stroke of midnight on January 1, 1970 in [UTC time](https://en.wikipedia.org/wiki/Coordinated_Universal_Time), which runs along the [prime meridian](https://en.wikipedia.org/wiki/Prime_meridian). It turns out there are a few movies with missing (`null`) release dates. Those `null` values get interpreted as time `0`, and thus map to January 1, 1970 in UTC time. If you live in the Americas – and thus in "earlier" time zones – this precise point in time corresponds to an earlier hour on December 31, 1969 in your local time zone. On the other hand, if you live near or east of the prime meridian, the date in your local time zone will be January 1, 1970. + + The takeaway? Always be skeptical of your data, and be mindful that how data is represented (whether as date times, or floating point numbers, or latitudes and longitudes, _etc._) can sometimes lead to artifacts that impact analysis! + """) + return + + +if __name__ == "__main__": + app.run() diff --git a/altair/07_cartographic.py b/altair/07_cartographic.py new file mode 100644 index 0000000000000000000000000000000000000000..589e72c474e29032c0ad39ca79394aa1a124f2b9 --- /dev/null +++ b/altair/07_cartographic.py @@ -0,0 +1,898 @@ +# /// script +# requires-python = ">=3.11" +# dependencies = [ +# "altair==6.0.0", +# "marimo", +# "pandas==3.0.1", +# "vega_datasets==0.9.0", +# ] +# /// + +import marimo + +__generated_with = "0.20.4" +app = marimo.App() + + +@app.cell +def _(): + import marimo as mo + + return (mo,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + # Cartographic Visualization + + _β€œThe making of maps is one of humanity's longest established intellectual endeavors and also one of its most complex, with scientific theory, graphical representation, geographical facts, and practical considerations blended together in an unending variety of ways.”_ — [H. J. Steward](https://books.google.com/books?id=cVy1Ms43fFYC) + + Cartography – the study and practice of map-making – has a rich history spanning centuries of discovery and design. Cartographic visualization leverages mapping techniques to convey data containing spatial information, such as locations, routes, or trajectories on the surface of the Earth. + +
+ + Approximating the Earth as a sphere, we can denote positions using a spherical coordinate system of _latitude_ (angle in degrees north or south of the _equator_) and _longitude_ (angle in degrees specifying east-west position). In this system, a _parallel_ is a circle of constant latitude and a _meridian_ is a circle of constant longitude. The [_prime meridian_](https://en.wikipedia.org/wiki/Prime_meridian) lies at 0Β° longitude and by convention is defined to pass through the Royal Observatory in Greenwich, England. + + To "flatten" a three-dimensional sphere on to a two-dimensional plane, we must apply a [projection](https://en.wikipedia.org/wiki/Map_projection) that maps (`longitude`, `latitude`) pairs to (`x`, `y`) coordinates. Similar to [scales](https://github.com/uwdata/visualization-curriculum/blob/master/altair_scales_axes_legends.ipynb), projections map from a data domain (spatial position) to a visual range (pixel position). However, the scale mappings we've seen thus far accept a one-dimensional domain, whereas map projections are inherently two-dimensional. + + In this notebook, we will introduce the basics of creating maps and visualizing spatial data with Altair, including: + + - Data formats for representing geographic features, + - Geo-visualization techniques such as point, symbol, and choropleth maps, and + - A review of common cartographic projections. + + _This notebook is part of the [data visualization curriculum](https://github.com/uwdata/visualization-curriculum)._ + """) + return + + +@app.cell +def _(): + import pandas as pd + import altair as alt + from vega_datasets import data + + return alt, data + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Geographic Data: GeoJSON and TopoJSON + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Up to this point, we have worked with JSON and CSV formatted datasets that correspond to data tables made up of rows (records) and columns (fields). In order to represent geographic regions (countries, states, _etc._) and trajectories (flight paths, subway lines, _etc._), we need to expand our repertoire with additional formats designed to support rich geometries. + + [GeoJSON](https://en.wikipedia.org/wiki/GeoJSON) models geographic features within a specialized JSON format. A GeoJSON `feature` can include geometric data – such as `longitude`, `latitude` coordinates that make up a country boundary – as well as additional data attributes. + + Here is a GeoJSON `feature` object for the boundary of the U.S. state of Colorado: + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ~~~ json + { + "type": "Feature", + "id": 8, + "properties": {"name": "Colorado"}, + "geometry": { + "type": "Polygon", + "coordinates": [ + [[-106.32056285448942,40.998675790862656],[-106.19134826714341,40.99813863734313],[-105.27607827344248,40.99813863734313],[-104.9422739227986,40.99813863734313],[-104.05212898774828,41.00136155846029],[-103.57475287338661,41.00189871197981],[-103.38093099236758,41.00189871197981],[-102.65589358559272,41.00189871197981],[-102.62000064466328,41.00189871197981],[-102.052892177978,41.00189871197981],[-102.052892177978,40.74889940428302],[-102.052892177978,40.69733266640851],[-102.052892177978,40.44003613055551],[-102.052892177978,40.3492571857556],[-102.052892177978,40.00333031918079],[-102.04930288388505,39.57414465707943],[-102.04930288388505,39.56823596836465],[-102.0457135897921,39.1331416175485],[-102.0457135897921,39.0466599009048],[-102.0457135897921,38.69751011321283],[-102.0457135897921,38.61478847120581],[-102.0457135897921,38.268861604631],[-102.0457135897921,38.262415762396685],[-102.04212429569915,37.738153927339205],[-102.04212429569915,37.64415206142214],[-102.04212429569915,37.38900413964724],[-102.04212429569915,36.99365914927603],[-103.00046581851544,37.00010499151034],[-103.08660887674611,37.00010499151034],[-104.00905745863294,36.99580776335414],[-105.15404227428235,36.995270609834606],[-105.2222388620483,36.995270609834606],[-105.7175614468747,36.99580776335414],[-106.00829426840322,36.995270609834606],[-106.47490250048605,36.99365914927603],[-107.4224761410235,37.00010499151034],[-107.48349414060355,37.00010499151034],[-108.38081766383978,36.99903068447129],[-109.04483707103458,36.99903068447129],[-109.04483707103458,37.484617466122884],[-109.04124777694163,37.88049961001363],[-109.04124777694163,38.15283644441336],[-109.05919424740635,38.49983761802722],[-109.05201565922046,39.36680339854235],[-109.05201565922046,39.49786885730673],[-109.05201565922046,39.66062637372313],[-109.05201565922046,40.22248895514744],[-109.05201565922046,40.653823231326896],[-109.05201565922046,41.000287251421234],[-107.91779872584989,41.00189871197981],[-107.3183866123281,41.00297301901887],[-106.85895696843116,41.00189871197981],[-106.32056285448942,40.998675790862656]] + ] + } + } + ~~~ + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + The `feature` includes a `properties` object, which can include any number of data fields, plus a `geometry` object, which in this case contains a single polygon that consists of `[longitude, latitude]` coordinates for the state boundary. The coordinates continue off to the right for a while should you care to scroll... + + To learn more about the nitty-gritty details of GeoJSON, see the [official GeoJSON specification](http://geojson.org/) or read [Tom MacWright's helpful primer](https://macwright.org/2015/03/23/geojson-second-bite). + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + One drawback of GeoJSON as a storage format is that it can be redundant, resulting in larger file sizes. Consider: Colorado shares boundaries with six other states (seven if you include the corner touching Arizona). Instead of using separate, overlapping coordinate lists for each of those states, a more compact approach is to encode shared borders only once, representing the _topology_ of geographic regions. Fortunately, this is precisely what the [TopoJSON](https://github.com/topojson/topojson/blob/master/README.md) format does! + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Let's load a TopoJSON file of world countries (at 110 meter resolution): + """) + return + + +@app.cell +def _(data): + world = data.world_110m.url + world + return (world,) + + +@app.cell +def _(data): + world_topo = data.world_110m() + return (world_topo,) + + +@app.cell +def _(world_topo): + world_topo.keys() + return + + +@app.cell +def _(world_topo): + world_topo['type'] + return + + +@app.cell +def _(world_topo): + world_topo['objects'].keys() + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _Inspect the `world_topo` TopoJSON dictionary object above to see its contents._ + + In the data above, the `objects` property indicates the named elements we can extract from the data: geometries for all `countries`, or a single polygon representing all `land` on Earth. Either of these can be unpacked to GeoJSON data we can then visualize. + + As TopoJSON is a specialized format, we need to instruct Altair to parse the TopoJSON format, indicating which named faeture object we wish to extract from the topology. The following code indicates that we want to extract GeoJSON features from the `world` dataset for the `countries` object: + + ~~~ js + alt.topo_feature(world, 'countries') + ~~~ + + This `alt.topo_feature` method call expands to the following Vega-Lite JSON: + + ~~~ json + { + "values": world, + "format": {"type": "topojson", "feature": "countries"} + } + ~~~ + + Now that we can load geographic data, we're ready to start making maps! + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Geoshape Marks + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + To visualize geographic data, Altair provides the `geoshape` mark type. To create a basic map, we can create a `geoshape` mark and pass it our TopoJSON data, which is then unpacked into GeoJSON features, one for each country of the world: + """) + return + + +@app.cell +def _(alt, world): + alt.Chart(alt.topo_feature(world, 'countries')).mark_geoshape() + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + In the example above, Altair applies a default blue color and uses a default map projection (`mercator`). We can customize the colors and boundary stroke widths using standard mark properties. Using the `project` method we can also add our own map projection: + """) + return + + +@app.cell +def _(alt, world): + alt.Chart(alt.topo_feature(world, 'countries')).mark_geoshape( + fill='#2a1d0c', stroke='#706545', strokeWidth=0.5 + ).project( + type='mercator' + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + By default Altair automatically adjusts the projection so that all the data fits within the width and height of the chart. We can also specify projection parameters, such as `scale` (zoom level) and `translate` (panning), to customize the projection settings. Here we adjust the `scale` and `translate` parameters to focus on Europe: + """) + return + + +@app.cell +def _(alt, world): + alt.Chart(alt.topo_feature(world, 'countries')).mark_geoshape( + fill='#2a1d0c', stroke='#706545', strokeWidth=0.5 + ).project( + type='mercator', scale=400, translate=[100, 550] + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _Note how the 110m resolution of the data becomes apparent at this scale. To see more detailed coast lines and boundaries, we need an input file with more fine-grained geometries. Adjust the `scale` and `translate` parameters to focus the map on other regions!_ + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + So far our map shows countries only. Using the `layer` operator, we can combine multiple map elements. Altair includes _data generators_ we can use to create data for additional map layers: + + - The sphere generator (`{'sphere': True}`) provides a GeoJSON representation of the full sphere of the Earth. We can create an additional `geoshape` mark that fills in the shape of the Earth as a background layer. + - The graticule generator (`{'graticule': ...}`) creates a GeoJSON feature representing a _graticule_: a grid formed by lines of latitude and longitude. The default graticule has meridians and parallels every 10Β° between Β±80Β° latitude. For the polar regions, there are meridians every 90Β°. These settings can be customized using the `stepMinor` and `stepMajor` properties. + + Let's layer sphere, graticule, and country marks into a reusable map specification: + """) + return + + +@app.cell +def _(alt, world): + map = alt.layer( + # use the sphere of the Earth as the base layer + alt.Chart({'sphere': True}).mark_geoshape( + fill='#e6f3ff' + ), + # add a graticule for geographic reference lines + alt.Chart({'graticule': True}).mark_geoshape( + stroke='#ffffff', strokeWidth=1 + ), + # and then the countries of the world + alt.Chart(alt.topo_feature(world, 'countries')).mark_geoshape( + fill='#2a1d0c', stroke='#706545', strokeWidth=0.5 + ) + ).properties( + width=600, + height=400 + ) + return (map,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + We can extend the map with a desired projection and draw the result. Here we apply a [Natural Earth projection](https://en.wikipedia.org/wiki/Natural_Earth_projection). The _sphere_ layer provides the light blue background; the _graticule_ layer provides the white geographic reference lines. + """) + return + + +@app.cell +def _(map): + map.project( + type='naturalEarth1', scale=110, translate=[300, 200] + ).configure_view(stroke=None) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Point Maps + + In addition to the _geometric_ data provided by GeoJSON or TopoJSON files, many tabular datasets include geographic information in the form of fields for `longitude` and `latitude` coordinates, or references to geographic regions such as country names, state names, postal codes, _etc._, which can be mapped to coordinates using a [geocoding service](https://en.wikipedia.org/wiki/Geocoding). In some cases, location data is rich enough that we can see meaningful patterns by projecting the data points alone — no base map required! + + Let's look at a dataset of 5-digit zip codes in the United States, including `longitude`, `latitude` coordinates for each post office in addition to a `zip_code` field. + """) + return + + +@app.cell +def _(data): + zipcodes = data.zipcodes.url + zipcodes + return (zipcodes,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + We can visualize each post office location using a small (1-pixel) `square` mark. However, to set the positions we do _not_ use `x` and `y` channels. _Why is that?_ + + While cartographic projections map (`longitude`, `latitude`) coordinates to (`x`, `y`) coordinates, they can do so in arbitrary ways. There is no guarantee, for example, that `longitude` β†’ `x` and `latitude` β†’ `y`! Instead, Altair includes special `longitude` and `latitude` encoding channels to handle geographic coordinates. These channels indicate which data fields should be mapped to `longitude` and `latitude` coordinates, and then applies a projection to map those coordinates to (`x`, `y`) positions. + """) + return + + +@app.cell +def _(alt, zipcodes): + alt.Chart(zipcodes).mark_square( + size=1, opacity=1 + ).encode( + longitude='longitude:Q', # apply the field named 'longitude' to the longitude channel + latitude='latitude:Q' # apply the field named 'latitude' to the latitude channel + ).project( + type='albersUsa' + ).properties( + width=900, + height=500 + ).configure_view( + stroke=None + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _Plotting zip codes only, we can see the outline of the United States and discern meaningful patterns in the density of post offices, without a base map or additional reference elements!_ + + We use the `albersUsa` projection, which takes some liberties with the actual geometry of the Earth, with scaled versions of Alaska and Hawaii in the bottom-left corner. As we did not specify projection `scale` or `translate` parameters, Altair sets them automatically to fit the visualized data. + + We can now go on to ask more questions of our dataset. For example, is there any rhyme or reason to the allocation of zip codes? To assess this question we can add a color encoding based on the first digit of the zip code. We first add a `calculate` transform to extract the first digit, and encode the result using the color channel: + """) + return + + +@app.cell +def _(alt, zipcodes): + alt.Chart(zipcodes).transform_calculate( + digit='datum.zip_code[0]' + ).mark_square( + size=2, opacity=1 + ).encode( + longitude='longitude:Q', + latitude='latitude:Q', + color='digit:N' + ).project( + type='albersUsa' + ).properties( + width=900, + height=500 + ).configure_view( + stroke=None + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _To zoom in on a specific digit, add a filter transform to limit the data shown! Try adding an [interactive selection](https://github.com/uwdata/visualization-curriculum/blob/master/altair_interaction.ipynb) to filter to a single digit and dynamically update the map. And be sure to use strings (\`'1'\`) instead of numbers (\`1\`) when filtering digit values!_ + + (This example is inspired by Ben Fry's classic [zipdecode](https://benfry.com/zipdecode/) visualization!) + + We might further wonder what the _sequence_ of zip codes might indicate. One way to explore this question is to connect each consecutive zip code using a `line` mark, as done in Robert Kosara's [ZipScribble](https://eagereyes.org/zipscribble-maps/united-states) visualization: + """) + return + + +@app.cell +def _(alt, zipcodes): + alt.Chart(zipcodes).transform_filter( + '-150 < datum.longitude && 22 < datum.latitude && datum.latitude < 55' + ).transform_calculate( + digit='datum.zip_code[0]' + ).mark_line( + strokeWidth=0.5 + ).encode( + longitude='longitude:Q', + latitude='latitude:Q', + color='digit:N', + order='zip_code:O' + ).project( + type='albersUsa' + ).properties( + width=900, + height=500 + ).configure_view( + stroke=None + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _We can now see how zip codes further cluster into smaller areas, indicating a hierarchical allocation of codes by location, but with some notable variability within local clusters._ + + If you were paying careful attention to our earlier maps, you may have noticed that there are zip codes being plotted in the upper-left corner! These correspond to locations such as Puerto Rico or American Samoa, which contain U.S. zip codes but are mapped to `null` coordinates (`0`, `0`) by the `albersUsa` projection. In addition, Alaska and Hawaii can complicate our view of the connecting line segments. In response, the code above includes an additional filter that removes points outside our chosen `longitude` and `latitude` spans. + + _Remove the filter above to see what happens!_ + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Symbol Maps + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Now let's combine a base map and plotted data as separate layers. We'll examine the U.S. commercial flight network, considering both airports and flight routes. To do so, we'll need three datasets. + For our base map, we'll use a TopoJSON file for the United States at 10m resolution, containing features for `states` or `counties`: + """) + return + + +@app.cell +def _(data): + usa = data.us_10m.url + usa + return (usa,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + For the airports, we will use a dataset with fields for the `longitude` and `latitude` coordinates of each airport as well as the `iata` airport code — for example, `'SEA'` for [Seattle-Tacoma International Airport](https://en.wikipedia.org/wiki/Seattle%E2%80%93Tacoma_International_Airport). + """) + return + + +@app.cell +def _(data): + airports = data.airports.url + airports + return (airports,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Finally, we will use a dataset of flight routes, which contains `origin` and `destination` fields with the IATA codes for the corresponding airports: + """) + return + + +@app.cell +def _(data): + flights = data.flights_airport.url + flights + return (flights,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Let's start by creating a base map using the `albersUsa` projection, and add a layer that plots `circle` marks for each airport: + """) + return + + +@app.cell +def _(airports, alt, usa): + alt.layer( + alt.Chart(alt.topo_feature(usa, 'states')).mark_geoshape( + fill='#ddd', stroke='#fff', strokeWidth=1 + ), + alt.Chart(airports).mark_circle(size=9).encode( + latitude='latitude:Q', + longitude='longitude:Q', + tooltip='iata:N' + ) + ).project( + type='albersUsa' + ).properties( + width=900, + height=500 + ).configure_view( + stroke=None + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _That's a lot of airports! Obviously, not all of them are major hubs._ + + Similar to our zip codes dataset, our airport data includes points that lie outside the continental United States. So we again see points in the upper-left corner. We might want to filter these points, but to do so we first need to know more about them. + + _Update the map projection above to `albers` – side-stepping the idiosyncratic behavior of `albersUsa` – so that the actual locations of these additional points is revealed!_ + + Now, instead of showing all airports in an undifferentiated fashion, let's identify major hubs by considering the total number of routes that originate at each airport. We'll use the `routes` dataset as our primary data source: it contains a list of flight routes that we can aggregate to count the number of routes for each `origin` airport. + + However, the `routes` dataset does not include the _locations_ of the airports! To augment the `routes` data with locations, we need a new data transformation: `lookup`. The `lookup` transform takes a field value in a primary dataset and uses it as a _key_ to look up related information in another table. In this case, we want to match the `origin` airport code in our `routes` dataset against the `iata` field of the `airports` dataset, then extract the corresponding `latitude` and `longitude` fields. + """) + return + + +@app.cell +def _(airports, alt, flights, usa): + alt.layer( + alt.Chart(alt.topo_feature(usa, 'states')).mark_geoshape( + fill='#ddd', stroke='#fff', strokeWidth=1 + ), + alt.Chart(flights).mark_circle().transform_aggregate( + groupby=['origin'], + routes='count()' + ).transform_lookup( + lookup='origin', + from_=alt.LookupData(data=airports, key='iata', + fields=['state', 'latitude', 'longitude']) + ).transform_filter( + 'datum.state !== "PR" && datum.state !== "VI"' + ).encode( + latitude='latitude:Q', + longitude='longitude:Q', + tooltip=['origin:N', 'routes:Q'], + size=alt.Size('routes:Q', scale=alt.Scale(range=[0, 1000]), legend=None), + order=alt.Order('routes:Q', sort='descending') + ) + ).project( + type='albersUsa' + ).properties( + width=900, + height=500 + ).configure_view( + stroke=None + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _Which U.S. airports have the highest number of outgoing routes?_ + + Now that we can see the airports, which may wish to interact with them to better understand the structure of the air traffic network. We can add a `rule` mark layer to represent paths from `origin` airports to `destination` airports, which requires two `lookup` transforms to retreive coordinates for each end point. In addition, we can use a `single` selection to filter these routes, such that only the routes originating at the currently selected airport are shown. + + _Starting from the static map above, can you build an interactive version? Feel free to skip the code below to engage with the interactive map first, and think through how you might build it on your own!_ + """) + return + + +@app.cell +def _(airports, alt, flights, usa): + # interactive selection for origin airport + # select nearest airport to mouse cursor + origin = alt.selection_point( + on='mouseover', nearest=True, + fields=['origin'], empty='none' + ) + + # shared data reference for lookup transforms + foreign = alt.LookupData(data=airports, key='iata', + fields=['latitude', 'longitude']) + + alt.layer( + # base map of the United States + alt.Chart(alt.topo_feature(usa, 'states')).mark_geoshape( + fill='#ddd', stroke='#fff', strokeWidth=1 + ), + # route lines from selected origin airport to destination airports + alt.Chart(flights).mark_rule( + color='#000', opacity=0.35 + ).transform_filter( + origin # filter to selected origin only + ).transform_lookup( + lookup='origin', from_=foreign # origin lat/lon + ).transform_lookup( + lookup='destination', from_=foreign, as_=['lat2', 'lon2'] # dest lat/lon + ).encode( + latitude='latitude:Q', + longitude='longitude:Q', + latitude2='lat2', + longitude2='lon2', + ), + # size airports by number of outgoing routes + # 1. aggregate flights-airport data set + # 2. lookup location data from airports data set + # 3. remove Puerto Rico (PR) and Virgin Islands (VI) + alt.Chart(flights).mark_circle().transform_aggregate( + groupby=['origin'], + routes='count()' + ).transform_lookup( + lookup='origin', + from_=alt.LookupData(data=airports, key='iata', + fields=['state', 'latitude', 'longitude']) + ).transform_filter( + 'datum.state !== "PR" && datum.state !== "VI"' + ).add_params( + origin + ).encode( + latitude='latitude:Q', + longitude='longitude:Q', + tooltip=['origin:N', 'routes:Q'], + size=alt.Size('routes:Q', scale=alt.Scale(range=[0, 1000]), legend=None), + order=alt.Order('routes:Q', sort='descending') # place smaller circles on top + ) + ).project( + type='albersUsa' + ).properties( + width=900, + height=500 + ).configure_view( + stroke=None + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _Mouseover the map to probe the flight network!_ + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Choropleth Maps + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + A [choropleth map](https://en.wikipedia.org/wiki/Choropleth_map) uses shaded or textured regions to visualize data values. Sized symbol maps are often more accurate to read, as people tend to be better at estimating proportional differences between the area of circles than between color shades. Nevertheless, choropleth maps are popular in practice and particularly useful when too many symbols become perceptually overwhelming. + + For example, while the United States only has 50 states, it has thousands of counties within those states. Let's build a choropleth map of the unemployment rate per county, back in the recession year of 2008. In some cases, input GeoJSON or TopoJSON files might include statistical data that we can directly visualize. In this case, however, we have two files: our TopoJSON file that includes county boundary features (`usa`), and a separate text file that contains unemployment statistics: + """) + return + + +@app.cell +def _(data): + unemp = data.unemployment.url + unemp + return (unemp,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + To integrate our data sources, we will again need to use the `lookup` transform, augmenting our TopoJSON-based `geoshape` data with unemployment rates. We can then create a map that includes a `color` encoding for the looked-up `rate` field. + """) + return + + +@app.cell +def _(alt, unemp, usa): + alt.Chart(alt.topo_feature(usa, 'counties')).mark_geoshape( + stroke='#aaa', strokeWidth=0.25 + ).transform_lookup( + lookup='id', from_=alt.LookupData(data=unemp, key='id', fields=['rate']) + ).encode( + alt.Color('rate:Q', + scale=alt.Scale(domain=[0, 0.3], clamp=True), + legend=alt.Legend(format='%')), + alt.Tooltip('rate:Q', format='.0%') + ).project( + type='albersUsa' + ).properties( + width=900, + height=500 + ).configure_view( + stroke=None + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + *Examine the unemployment rates by county. Higher values in Michigan may relate to the automotive industry. Counties in the [Great Plains](https://en.wikipedia.org/wiki/Great_Plains) and Mountain states exhibit both low **and** high rates. Is this variation meaningful, or is it possibly an [artifact of lower sample sizes](https://medium.com/@uwdata/surprise-maps-showing-the-unexpected-e92b67398865)? To explore further, try changing the upper scale domain (e.g., to `0.2`) to adjust the color mapping.* + + A central concern for choropleth maps is the choice of colors. Above, we use Altair's default `'yellowgreenblue'` scheme for heatmaps. Below we compare other schemes, including a _single-hue sequential_ scheme (`teals`) that varies in luminance only, a _multi-hue sequential_ scheme (`viridis`) that ramps in both luminance and hue, and a _diverging_ scheme (`blueorange`) that uses a white mid-point: + """) + return + + +@app.cell +def _(alt, unemp, usa): + # utility function to generate a map specification for a provided color scheme + def map_(scheme): + return alt.Chart().mark_geoshape().project(type='albersUsa').encode( + alt.Color('rate:Q', scale=alt.Scale(scheme=scheme), legend=None) + ).properties(width=305, height=200) + + alt.hconcat( + map_('tealblues'), map_('viridis'), map_('blueorange'), + data=alt.topo_feature(usa, 'counties') + ).transform_lookup( + lookup='id', from_=alt.LookupData(data=unemp, key='id', fields=['rate']) + ).configure_view( + stroke=None + ).resolve_scale( + color='independent' + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + _Which color schemes do you find to be more effective? Why might that be? Modify the maps above to use other available schemes, as described in the [Vega Color Schemes documentation](https://vega.github.io/vega/docs/schemes/)._ + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Cartographic Projections + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Now that we have some experience creating maps, let's take a closer look at cartographic projections. As explained by [Wikipedia](https://en.wikipedia.org/wiki/Map_projection), + + > _All map projections necessarily distort the surface in some fashion. Depending on the purpose of the map, some distortions are acceptable and others are not; therefore, different map projections exist in order to preserve some properties of the sphere-like body at the expense of other properties._ + + Some of the properties we might wish to consider include: + + - _Area_: Does the projection distort region sizes? + - _Bearing_: Does a straight line correspond to a constant direction of travel? + - _Distance_: Do lines of equal length correspond to equal distances on the globe? + - _Shape_: Does the projection preserve spatial relations (angles) between points? + + Selecting an appropriate projection thus depends on the use case for the map. For example, if we are assessing land use and the extent of land matters, we might choose an area-preserving projection. If we want to visualize shockwaves emanating from an earthquake, we might focus the map on the quake's epicenter and preserve distances outward from that point. Or, if we wish to aid navigation, the preservation of bearing and shape may be more important. + + We can also characterize projections in terms of the _projection surface_. Cylindrical projections, for example, project surface points of the sphere onto a surrounding cylinder; the "unrolled" cylinder then provides our map. As we further describe below, we might alternatively project onto the surface of a cone (conic projections) or directly onto a flat plane (azimuthal projections). + + *Let's first build up our intuition by interacting with a variety of projections! **[Open the online Vega-Lite Cartographic Projections notebook](https://observablehq.com/@vega/vega-lite-cartographic-projections).** Use the controls on that page to select a projection and explore projection parameters, such as the `scale` (zooming) and x/y translation (panning). The rotation ([yaw, pitch, roll](https://en.wikipedia.org/wiki/Aircraft_principal_axes)) controls determine the orientation of the globe relative to the surface being projected upon.* + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### A Tour of Specific Projection Types + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + [**Cylindrical projections**](https://en.wikipedia.org/wiki/Map_projection#Cylindrical) map the sphere onto a surrounding cylinder, then unroll the cylinder. If the major axis of the cylinder is oriented north-south, meridians are mapped to straight lines. [Pseudo-cylindrical](https://en.wikipedia.org/wiki/Map_projection#Pseudocylindrical) projections represent a central meridian as a straight line, with other meridians "bending" away from the center. + """) + return + + +@app.cell +def _(alt, map): + _minimap = map.properties(width=225, height=225) + alt.hconcat(_minimap.project(type='equirectangular').properties(title='equirectangular'), _minimap.project(type='mercator').properties(title='mercator'), _minimap.project(type='transverseMercator').properties(title='transverseMercator'), _minimap.project(type='naturalEarth1').properties(title='naturalEarth1')).properties(spacing=10).configure_view(stroke=None) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + - [Equirectangular](https://en.wikipedia.org/wiki/Equirectangular_projection) (`equirectangular`): Scale `lat`, `lon` coordinate values directly. + - [Mercator](https://en.wikipedia.org/wiki/Mercator_projection) (`mercator`): Project onto a cylinder, using `lon` directly, but subjecting `lat` to a non-linear transformation. Straight lines preserve constant compass bearings ([rhumb lines](https://en.wikipedia.org/wiki/Rhumb_line)), making this projection well-suited to navigation. However, areas in the far north or south can be greatly distorted. + - [Transverse Mercator](https://en.wikipedia.org/wiki/Transverse_Mercator_projection) (`transverseMercator`): A mercator projection, but with the bounding cylinder rotated to a transverse axis. Whereas the standard Mercator projection has highest accuracy along the equator, the Transverse Mercator projection is most accurate along the central meridian. + - [Natural Earth](https://en.wikipedia.org/wiki/Natural_Earth_projection) (`naturalEarth1`): A pseudo-cylindrical projection designed for showing the whole Earth in one view. +

+ """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + [**Conic projections**](https://en.wikipedia.org/wiki/Map_projection#Conic) map the sphere onto a cone, and then unroll the cone on to the plane. Conic projections are configured by two _standard parallels_, which determine where the cone intersects the globe. + """) + return + + +@app.cell +def _(alt, map): + _minimap = map.properties(width=180, height=130) + alt.hconcat(_minimap.project(type='conicEqualArea').properties(title='conicEqualArea'), _minimap.project(type='conicEquidistant').properties(title='conicEquidistant'), _minimap.project(type='conicConformal', scale=35, translate=[90, 65]).properties(title='conicConformal'), _minimap.project(type='albers').properties(title='albers'), _minimap.project(type='albersUsa').properties(title='albersUsa')).properties(spacing=10).configure_view(stroke=None) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + - [Conic Equal Area](https://en.wikipedia.org/wiki/Albers_projection) (`conicEqualArea`): Area-preserving conic projection. Shape and distance are not preserved, but roughly accurate within standard parallels. + - [Conic Equidistant](https://en.wikipedia.org/wiki/Equidistant_conic_projection) (`conicEquidistant`): Conic projection that preserves distance along the meridians and standard parallels. + - [Conic Conformal](https://en.wikipedia.org/wiki/Lambert_conformal_conic_projection) (`conicConformal`): Conic projection that preserves shape (local angles), but not area or distance. + - [Albers](https://en.wikipedia.org/wiki/Albers_projection) (`albers`): A variant of the conic equal area projection with standard parallels optimized for creating maps of the United States. + - [Albers USA](https://en.wikipedia.org/wiki/Albers_projection) (`albersUsa`): A hybrid projection for the 50 states of the United States of America. This projection stitches together three Albers projections with different parameters for the continental U.S., Alaska, and Hawaii. +

+ """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + [**Azimuthal projections**](https://en.wikipedia.org/wiki/Map_projection#Azimuthal_%28projections_onto_a_plane%29) map the sphere directly onto a plane. + """) + return + + +@app.cell +def _(alt, map): + _minimap = map.properties(width=180, height=180) + alt.hconcat(_minimap.project(type='azimuthalEqualArea').properties(title='azimuthalEqualArea'), _minimap.project(type='azimuthalEquidistant').properties(title='azimuthalEquidistant'), _minimap.project(type='orthographic').properties(title='orthographic'), _minimap.project(type='stereographic').properties(title='stereographic'), _minimap.project(type='gnomonic').properties(title='gnomonic')).properties(spacing=10).configure_view(stroke=None) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + - [Azimuthal Equal Area](https://en.wikipedia.org/wiki/Lambert_azimuthal_equal-area_projection) (`azimuthalEqualArea`): Accurately projects area in all parts of the globe, but does not preserve shape (local angles). + - [Azimuthal Equidistant](https://en.wikipedia.org/wiki/Azimuthal_equidistant_projection) (`azimuthalEquidistant`): Preserves proportional distance from the projection center to all other points on the globe. + - [Orthographic](https://en.wikipedia.org/wiki/Orthographic_projection_in_cartography) (`orthographic`): Projects a visible hemisphere onto a distant plane. Approximately matches a view of the Earth from outer space. + - [Stereographic](https://en.wikipedia.org/wiki/Stereographic_projection) (`stereographic`): Preserves shape, but not area or distance. + - [Gnomonic](https://en.wikipedia.org/wiki/Gnomonic_projection) (`gnomonic`): Projects the surface of the sphere directly onto a tangent plane. [Great circles](https://en.wikipedia.org/wiki/Great_circle) around the Earth are projected to straight lines, showing the shortest path between points. +

+ """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Coda: Wrangling Geographic Data + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + The examples above all draw from the vega-datasets collection, including geometric (TopoJSON) and tabular (airports, unemployment rates) data. A common challenge to getting starting with geographic visualization is collecting the necessary data for your task. A number of data providers abound, including services such as the [United States Geological Survey](https://www.usgs.gov/products/data/all-data) and [U.S. Census Bureau](https://www.census.gov/data/datasets.html). + + In many cases you may have existing data with a geographic component, but require additional measures or geometry. To help you get started, here is one workflow: + + 1. Visit [Natural Earth Data](http://www.naturalearthdata.com/downloads/) and browse to select data for regions and resolutions of interest. Download the corresponding zip file(s). + 2. Go to [MapShaper](https://mapshaper.org/) and drop your downloaded zip file onto the page. Revise the data as desired, and then "Export" generated TopoJSON or GeoJSON files. + 3. Load the exported data from MapShaper for use with Altair! + + Of course, many other tools – both open-source and proprietary – exist for working with geographic data. For more about geo-data wrangling and map creation, see Mike Bostock's tutorial series on [Command-Line Cartography](https://medium.com/@mbostock/command-line-cartography-part-1-897aa8f8ca2c). + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Summary + + At this point, we've only dipped our toes into the waters of map-making. _(You didn't expect a single notebook to impart centuries of learning, did you?)_ For example, we left untouched topics such as [_cartograms_](https://en.wikipedia.org/wiki/Cartogram) and conveying [_topography_](https://en.wikipedia.org/wiki/Topography) — as in Imhof's illuminating book [_Cartographic Relief Presentation_](https://books.google.com/books?id=cVy1Ms43fFYC). Nevertheless, you should now be well-equipped to create a rich array of geo-visualizations. For more, MacEachren's book [_How Maps Work: Representation, Visualization, and Design_](https://books.google.com/books?id=xhAvN3B0CkUC) provides a valuable overview of map-making from the perspective of data visualization. + """) + return + + +if __name__ == "__main__": + app.run() diff --git a/altair/08_debugging.py b/altair/08_debugging.py new file mode 100644 index 0000000000000000000000000000000000000000..c7dd49768845bf41291e06ec368d8fab3b09831d --- /dev/null +++ b/altair/08_debugging.py @@ -0,0 +1,370 @@ +# /// script +# requires-python = ">=3.11" +# dependencies = [ +# "altair==6.0.0", +# "marimo", +# "pandas==3.0.1", +# "vega_datasets==0.9.0", +# ] +# /// + +import marimo + +__generated_with = "0.20.4" +app = marimo.App() + + +@app.cell +def _(): + import marimo as mo + + return (mo,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + # Altair Debugging Guide + + In this notebook we show you common debugging techniques that you can use if you run into issues with Altair. + + You can jump to the following sections: + + * [Installation and Setup](#Installation) when Altair is not installed correctly + * [Display Issues](#Display-Troubleshooting) when you don't see a chart + * [Invalid Specifications](#Invalid-Specifications) when you get an error + * [Properties are Being Ignored](#Properties-are-Being-Ignored) when you don't see any errors or warnings + * [Asking for Help](#Asking-for-Help) when you get stuck + * [Reporting Issues](#Reporting-Issues) when you find a bug + + In addition to this notebook, you might find the [Frequently Asked Questions](https://altair-viz.github.io/user_guide/faq.html) and [Display Troubleshooting](https://altair-viz.github.io/user_guide/troubleshooting.html) guides helpful. + + _This notebook is part of the [data visualization curriculum](https://github.com/uwdata/visualization-curriculum)._ + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Installation + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + These instructions follow [the Altair documentation](https://altair-viz.github.io/getting_started/installation.html) but focus on some specifics for this series of notebooks. + + In every notebook, we will import the [Altair](https://github.com/altair-viz/altair) and [Vega Datasets](https://github.com/altair-viz/vega_datasets) packages. If you are running this notebook on [Colab](https://colab.research.google.com), Altair and Vega Datasets should be preinstalled and ready to go. The notebooks in this series are designed for Colab but should also work in Jupyter Lab or the Jupyter Notebook (the notebook requires a bit more setup [described below](#Special-Setup-for-the-Jupyter-Notebook)) but additional packages are required. + + If you are running in Jupyter Lab or Jupyter Notebooks, you have to install the necessary packages by running the following command in your terminal. + + ```bash + pip install altair vega_datasets + ``` + + Or if you use [Conda](https://conda.io) + + ```bash + conda install -c conda-forge altair vega_datasets + ``` + + You can run command line commands from a code cell by prefixing it with `!`. For example, to install Altair and Vega Datasets with [Pip](https://pip.pypa.io/), you can run the following cell. + """) + return + + +@app.cell +def _(): + # packages added via marimo's package management: altair vega_datasets !pip install altair vega_datasets + return + + +@app.cell +def _(): + import altair as alt + from vega_datasets import data + + return alt, data + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Make sure you are Using the Latest Version of Altair + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + If you are running into issues with Altair, first make sure that you are running the latest version. To check the version of Altair that you have installed, run the cell below. + """) + return + + +@app.cell +def _(alt): + alt.__version__ + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + To check what the latest version of altair is, go to [this page](https://pypi.org/project/altair/) or run the cell below (requires Python 3). + """) + return + + +@app.cell +def _(): + import urllib.request, json + with urllib.request.urlopen("https://pypi.org/pypi/altair/json") as url: + print(json.loads(url.read().decode())['info']['version']) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + If you are not running the latest version, you can update it with `pip`. You can update Altair and Vega Datasets by running this command in your terminal. + + ``` + pip install -U altair vega_datasets + ``` + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Try Making a Chart + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Now you can create an Altair chart. + """) + return + + +@app.cell +def _(alt, data): + cars = data.cars() + + alt.Chart(cars).mark_point().encode( + x='Horsepower', + y='Displacement', + color='Origin' + ) + return (cars,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Special Setup for the Jupyter Notebook + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + If you are running in Jupyter Lab, Jupyter Notebook, or Colab (and have a working Internet connection) you should be seeing a chart. If you are running in another environment (or offline), you will need to tell Altair to use a different renderer; + + To activate a different renderer in a notebook cell: + + ```python + # to run in nteract, VSCode, or offline in JupyterLab + alt.renderers.enable('mimebundle') + + ``` + + To run offline in Jupyter Notebook you must install an additional dependency, the `vega` package. Run this command in your terminal: + + ```bash + pip install vega + ``` + + Then activate the notebook renderer: + + ```python + # to run offline in Jupyter Notebook + alt.renderers.enable('notebook') + + ``` + + + These instruction follow [the instructions on the Altair website](https://altair-viz.github.io/getting_started/installation.html#installation-notebook). + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Display Troubleshooting + + If you are having issues with seeing a chart, make sure your setup is correct by following the [debugging instruction above](#Installation). If you are still having issues, follow the [instruction about debugging display issues in the Altair documentation](https://iliatimofeev.github.io/altair-viz.github.io/user_guide/troubleshooting.html). + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Non Existent Fields + + A common error is [accidentally using a field that does not exist](https://iliatimofeev.github.io/altair-viz.github.io/user_guide/troubleshooting.html#plot-displays-but-the-content-is-empty). + """) + return + + +@app.cell +def _(alt): + import pandas as pd + + df = pd.DataFrame({'x': [1, 2, 3], + 'y': [3, 1, 4]}) + + alt.Chart(df).mark_point().encode( + x='x:Q', + y='y:Q', + color='color:Q' # <-- this field does not exist in the data! + ) + return (df,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Check the spelling of your files and print the data source to confirm that the data and fields exist. For instance, here you see that `color` is not a vaid field. + """) + return + + +@app.cell +def _(df): + df.head() + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Invalid Specifications + + Another common issue is creating an invalid specification and getting an error. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Invalid Properties + + Altair might show an `SchemaValidationError` or `ValueError`. Read the error message carefully. Usually it will tell you what is going wrong. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + For example, if you forget the mark type, you will see this `SchemaValidationError`. + """) + return + + +@app.cell +def _(alt, cars): + alt.Chart(cars).encode( + y='Horsepower' + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + Or if you use a non-existent channel, you get a `TypeError`. + """) + return + + +@app.cell +def _(alt, cars): + try: + alt.Chart(cars).mark_point().encode( + z='Horsepower' + ) + except TypeError as e: + print(f"TypeError: {e}") + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Properties are Being Ignored + + Altair might ignore a property that you specified. In the chart below, we are using a `text` channel, which is only compatible with `mark_text`. You do not see an error or a warning about this in the notebook. However, the underlying Vega-Lite library will show a warning in the browser console. Press Alt+Cmd+I on Mac or Alt+Ctrl+I on Windows and Linux to open the developer tools and click on the `Console` tab. When you run the example in the cell below, you will see a the following warning. + + ``` + WARN text dropped as it is incompatible with "bar". + ``` + """) + return + + +@app.cell +def _(alt, cars): + alt.Chart(cars).mark_bar().encode( + y='mean(Horsepower)', + text='mean(Acceleration)' + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + If you find yourself debugging issues related to Vega-Lite, you can open the chart in the [Vega Editor](https://vega.github.io/editor/) either by clicking on the "Open in Vega Editor" link at the bottom of the chart or in the action menu (click to open) at the top right of a chart. The Vega Editor provides additional debugging but you will be writing Vega-Lite JSON instead of Altair in Python. + + **Note**: The Vega Editor may be using a newer version of Vega-Lite and so the behavior may vary. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Asking for Help + + If you find a problem with Altair and get stuck, you can ask a question on Stack Overflow. Ask your question with the `altair` and `vega-lite` tags. You can find a list of questions people have asked before [here](https://stackoverflow.com/questions/tagged/altair). + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Reporting Issues + + If you find a problem with Altair and believe it is a bug, please [create an issue in the Altair GitHub repo](https://github.com/altair-viz/altair/issues/new) with a description of your problem. If you believe the issue is related to the underlying Vega-Lite library, please [create an issue in the Vega-Lite GitHub repo](https://github.com/vega/vega-lite/issues/new). + """) + return + + +if __name__ == "__main__": + app.run() diff --git a/altair/altair_introduction.py.lock b/altair/altair_introduction.py.lock new file mode 100644 index 0000000000000000000000000000000000000000..973bc5fe07cffa7c4f6a6f95e98e1650278f5e6b --- /dev/null +++ b/altair/altair_introduction.py.lock @@ -0,0 +1,952 @@ +version = 1 +revision = 3 +requires-python = ">=3.11" +resolution-markers = [ + "python_full_version >= '3.14' and sys_platform == 'win32'", + "python_full_version >= '3.14' and sys_platform == 'emscripten'", + "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'", + "python_full_version < '3.14' and sys_platform == 'win32'", + "python_full_version < '3.14' and sys_platform == 'emscripten'", + "python_full_version < '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'", +] + +[manifest] +requirements = [ + { name = "altair", specifier = "==6.0.0" }, + { name = "marimo" }, + { name = "pandas", specifier = "==3.0.1" }, + { name = "vega-datasets", specifier = "==0.9.0" }, +] + +[[package]] +name = "altair" +version = "6.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jinja2" }, + { name = "jsonschema" }, + { name = "narwhals" }, + { name = "packaging" }, + { name = "typing-extensions", marker = "python_full_version < '3.15'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f7/c0/184a89bd5feba14ff3c41cfaf1dd8a82c05f5ceedbc92145e17042eb08a4/altair-6.0.0.tar.gz", hash = "sha256:614bf5ecbe2337347b590afb111929aa9c16c9527c4887d96c9bc7f6640756b4", size = 763834, upload-time = "2025-11-12T08:59:11.519Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/db/33/ef2f2409450ef6daa61459d5de5c08128e7d3edb773fefd0a324d1310238/altair-6.0.0-py3-none-any.whl", hash = "sha256:09ae95b53d5fe5b16987dccc785a7af8588f2dca50de1e7a156efa8a461515f8", size = 795410, upload-time = "2025-11-12T08:59:09.804Z" }, +] + +[[package]] +name = "anyio" +version = "4.12.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "idna" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685, upload-time = "2026-01-06T11:45:21.246Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" }, +] + +[[package]] +name = "attrs" +version = "25.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6b/5c/685e6633917e101e5dcb62b9dd76946cbb57c26e133bae9e0cd36033c0a9/attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11", size = 934251, upload-time = "2025-10-06T13:54:44.725Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373", size = 67615, upload-time = "2025-10-06T13:54:43.17Z" }, +] + +[[package]] +name = "click" +version = "8.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, +] + +[[package]] +name = "docutils" +version = "0.22.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ae/b6/03bb70946330e88ffec97aefd3ea75ba575cb2e762061e0e62a213befee8/docutils-0.22.4.tar.gz", hash = "sha256:4db53b1fde9abecbb74d91230d32ab626d94f6badfc575d6db9194a49df29968", size = 2291750, upload-time = "2025-12-18T19:00:26.443Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/02/10/5da547df7a391dcde17f59520a231527b8571e6f46fc8efb02ccb370ab12/docutils-0.22.4-py3-none-any.whl", hash = "sha256:d0013f540772d1420576855455d050a2180186c91c15779301ac2ccb3eeb68de", size = 633196, upload-time = "2025-12-18T19:00:18.077Z" }, +] + +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, +] + +[[package]] +name = "idna" +version = "3.11" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, +] + +[[package]] +name = "itsdangerous" +version = "2.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9c/cb/8ac0172223afbccb63986cc25049b154ecfb5e85932587206f42317be31d/itsdangerous-2.2.0.tar.gz", hash = "sha256:e0050c0b7da1eea53ffaf149c0cfbb5c6e2e2b69c4bef22c81fa6eb73e5f6173", size = 54410, upload-time = "2024-04-16T21:28:15.614Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/96/92447566d16df59b2a776c0fb82dbc4d9e07cd95062562af01e408583fc4/itsdangerous-2.2.0-py3-none-any.whl", hash = "sha256:c6242fc49e35958c8b15141343aa660db5fc54d4f13a1db01a3f5891b98700ef", size = 16234, upload-time = "2024-04-16T21:28:14.499Z" }, +] + +[[package]] +name = "jedi" +version = "0.19.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "parso" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/72/3a/79a912fbd4d8dd6fbb02bf69afd3bb72cf0c729bb3063c6f4498603db17a/jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0", size = 1231287, upload-time = "2024-11-11T01:41:42.873Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c0/5a/9cac0c82afec3d09ccd97c8b6502d48f165f9124db81b4bcb90b4af974ee/jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9", size = 1572278, upload-time = "2024-11-11T01:41:40.175Z" }, +] + +[[package]] +name = "jinja2" +version = "3.1.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, +] + +[[package]] +name = "jsonschema" +version = "4.26.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "jsonschema-specifications" }, + { name = "referencing" }, + { name = "rpds-py" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b3/fc/e067678238fa451312d4c62bf6e6cf5ec56375422aee02f9cb5f909b3047/jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326", size = 366583, upload-time = "2026-01-07T13:41:07.246Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/90/f63fb5873511e014207a475e2bb4e8b2e570d655b00ac19a9a0ca0a385ee/jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce", size = 90630, upload-time = "2026-01-07T13:41:05.306Z" }, +] + +[[package]] +name = "jsonschema-specifications" +version = "2025.9.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "referencing" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/74/a633ee74eb36c44aa6d1095e7cc5569bebf04342ee146178e2d36600708b/jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d", size = 32855, upload-time = "2025-09-08T01:34:59.186Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" }, +] + +[[package]] +name = "loro" +version = "1.10.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7d/27/ea6f3298fc87ea5f2d60ebfbca088e7d9b2ceb3993f67c83bfb81778ec01/loro-1.10.3.tar.gz", hash = "sha256:68184ab1c2ab94af6ad4aaba416d22f579cabee0b26cbb09a1f67858207bbce8", size = 68833, upload-time = "2025-12-09T10:14:06.644Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7d/bb/61f36aac7981f84ffba922ac1220505365df3e064bc91c015790bff92007/loro-1.10.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:7ee0e1c9a6d0e4a1df4f1847d3b31cef8088860c1193442f131936d084bd3fe1", size = 3254532, upload-time = "2025-12-09T10:11:31.215Z" }, + { url = "https://files.pythonhosted.org/packages/15/28/5708da252eb6be90131338b104e5030c9b815c41f9e97647391206bec092/loro-1.10.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d7225471b29a892a10589d7cf59c70b0e4de502fa20da675e9aaa1060c7703ae", size = 3055231, upload-time = "2025-12-09T10:11:16.111Z" }, + { url = "https://files.pythonhosted.org/packages/16/b6/68c350a39fd96f24c55221f883230aa83db0bb5f5d8e9776ccdb25ea1f7b/loro-1.10.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc04a714e0a604e191279501fa4d2db3b39cee112275f31e87d95ecfbafdfb6c", size = 3286945, upload-time = "2025-12-09T10:08:12.633Z" }, + { url = "https://files.pythonhosted.org/packages/23/af/8245b8a20046423e035cd17de9811ab1b27fc9e73425394c34387b41cc13/loro-1.10.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:375c888a4ddf758b034eb6ebd093348547d17364fae72aa7459d1358e4843b1f", size = 3349533, upload-time = "2025-12-09T10:08:46.754Z" }, + { url = "https://files.pythonhosted.org/packages/cc/8c/d764c60914e45a2b8c562e01792172e3991430103c019cc129d56c24c868/loro-1.10.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2020d9384a426e91a7d38c9d0befd42e8ad40557892ed50d47aad79f8d92b654", size = 3704622, upload-time = "2025-12-09T10:09:25.068Z" }, + { url = "https://files.pythonhosted.org/packages/54/cc/ebdbdf0b1c7a223fe84fc0de78678904ed6424b426f90b98503b95b1dff9/loro-1.10.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:95afacd832dce152700c2bc643f7feb27d5611fc97b5141684b5831b22845380", size = 3416659, upload-time = "2025-12-09T10:09:59.107Z" }, + { url = "https://files.pythonhosted.org/packages/fa/bc/db7f3fc619483b60c03d85b4f9bb5812b2229865b574c8802b46a578f545/loro-1.10.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c95868bcf6361d700e215f33a88b8f51d7bc3ae7bbe3d35998148932e23d3fa", size = 3345007, upload-time = "2025-12-09T10:10:53.327Z" }, + { url = "https://files.pythonhosted.org/packages/91/65/bcd3b1d3a3615e679177c1256f2e0ff7ee242c3d5d1b9cb725b0ec165b51/loro-1.10.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:68f5c7fad09d8937ef4b55e7dd4a0f9f175f026369b3f55a5b054d3513f6846d", size = 3687874, upload-time = "2025-12-09T10:10:31.674Z" }, + { url = "https://files.pythonhosted.org/packages/3a/e4/0d51e2da2ae6143bfd03f7127b9daf58a3f8dae9d5ca7740ccba63a04de4/loro-1.10.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:740bb548139d71eccd6317f3df40a0dc5312e98bbb2be09a6e4aaddcaf764206", size = 3467200, upload-time = "2025-12-09T10:11:47.994Z" }, + { url = "https://files.pythonhosted.org/packages/06/99/ada2baeaf6496e34962fe350cd41129e583219bf4ce5e680c37baa0613a8/loro-1.10.3-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:c756a6ee37ed851e9cf91e5fedbc68ca21e05969c4e2ec6531c15419a4649b58", size = 3618468, upload-time = "2025-12-09T10:12:24.182Z" }, + { url = "https://files.pythonhosted.org/packages/87/ec/83335935959c5e3946e02b748af71d801412b2aa3876f870beae1cd56d4d/loro-1.10.3-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:3553390518e188c055b56bcbae76bf038329f9c3458cb1d69068c55b3f8f49f1", size = 3666852, upload-time = "2025-12-09T10:12:59.117Z" }, + { url = "https://files.pythonhosted.org/packages/9f/53/1bd455b3254afa35638d617e06c65a22e604b1fae2f494abb9a621c8e69b/loro-1.10.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0885388c0c2b53f5140229921bd64c7838827e3101a05d4d53346191ba76b15d", size = 3556829, upload-time = "2025-12-09T10:13:34.002Z" }, + { url = "https://files.pythonhosted.org/packages/66/30/6f48726ef50f911751c6b69d7fa81482cac70d4ed817216f846776fec28c/loro-1.10.3-cp311-cp311-win32.whl", hash = "sha256:764b68c4ff0411399c9cf936d8b6db1161ec445388ff2944a25bbdeb2bbac15c", size = 2723776, upload-time = "2025-12-09T10:14:27.261Z" }, + { url = "https://files.pythonhosted.org/packages/69/39/0b08203d94a6f200bbfefa8025a1b825c8cfb30e8cc8b2a1224629150d08/loro-1.10.3-cp311-cp311-win_amd64.whl", hash = "sha256:9e583e6aabd6f9b2bdf3ff3f6e0de10c3f7f8ab9d4c05c01a9ecca309c969017", size = 2950529, upload-time = "2025-12-09T10:14:08.857Z" }, + { url = "https://files.pythonhosted.org/packages/dd/b6/cfbf8088e8ca07d66e6c1eccde42e00bd61708f28e8ea0936f9582306323/loro-1.10.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:028948b48dcc5c2127f974dae4ad466ab69f0d1eeaf367a8145eb6501fb988f2", size = 3239592, upload-time = "2025-12-09T10:11:32.505Z" }, + { url = "https://files.pythonhosted.org/packages/78/e4/7b614260bf16c5e33c0bea6ac47ab0284efd21f89f2e5e4e15cd93bead40/loro-1.10.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5253b8f436d90412b373c583f22ac9539cfb495bf88f78d4bb41daafef0830b7", size = 3045107, upload-time = "2025-12-09T10:11:17.481Z" }, + { url = "https://files.pythonhosted.org/packages/ae/17/0a78ec341ca69d376629ff2a1b9b3511ee7dd54f2b018616ef03328024f7/loro-1.10.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14be8a5539d49468c94d65742355dbe79745123d78bf769a23e53bf9b60dd46a", size = 3292720, upload-time = "2025-12-09T10:08:14.027Z" }, + { url = "https://files.pythonhosted.org/packages/d4/9b/f36a4654508e9b8ddbe08a62a0ce8b8e7fd511a39b161821917530cffd8e/loro-1.10.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:91b2b9139dfc5314a0197132a53b6673fddb63738380a522d12a05cec7ad76b4", size = 3353260, upload-time = "2025-12-09T10:08:48.251Z" }, + { url = "https://files.pythonhosted.org/packages/b4/0e/7d441ddecc7695153dbe68af4067d62e8d7607fce3747a184878456a91f6/loro-1.10.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:247897288911c712ee7746965573299fc23ce091e94456da8da371e6adae30f4", size = 3712354, upload-time = "2025-12-09T10:09:26.38Z" }, + { url = "https://files.pythonhosted.org/packages/1c/33/10e66bb84599e61df124f76c00c5398eb59cbb6f69755f81c40f65a18344/loro-1.10.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:835abc6025eb5b6a0fe22c808472affc95e9a661b212400cfd88ba186b0d304c", size = 3422926, upload-time = "2025-12-09T10:10:00.347Z" }, + { url = "https://files.pythonhosted.org/packages/b2/70/00dc4246d9f3c69ecbb9bc36d5ad1a359884464a44711c665cb0afb1e9de/loro-1.10.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e660853617fc29e71bb7b796e6f2c21f7722c215f593a89e95cd4d8d5a32aca0", size = 3353092, upload-time = "2025-12-09T10:10:55.786Z" }, + { url = "https://files.pythonhosted.org/packages/19/37/60cc0353c5702e1e469b5d49d1762e782af5d5bd5e7c4e8c47556335b4c6/loro-1.10.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8059063cab57ca521012ed315a454784c20b0a86653e9014795e804e0a333659", size = 3687798, upload-time = "2025-12-09T10:10:33.253Z" }, + { url = "https://files.pythonhosted.org/packages/88/c4/4db1887eb08dfbb305d9424fdf1004c0edf147fd53ab0aaf64a90450567a/loro-1.10.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9748359343b5fd7019ab3c2d1d583a0c13c633a4dd21d75e50e3815ab479f493", size = 3474451, upload-time = "2025-12-09T10:11:49.489Z" }, + { url = "https://files.pythonhosted.org/packages/d8/66/10d2e00c43b05f56e96e62100f86a1261f8bbd6422605907f118a752fe61/loro-1.10.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:def7c9c2e16ad5470c9c56f096ac649dd4cd42d5936a32bb0817509a92d82467", size = 3621647, upload-time = "2025-12-09T10:12:25.536Z" }, + { url = "https://files.pythonhosted.org/packages/47/f0/ef8cd6654b09a03684195c650b1fba00f42791fa4844ea400d94030c5615/loro-1.10.3-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:34b223fab58591a823f439d9a13d1a1ddac18dc4316866503c588ae8a9147cb1", size = 3667946, upload-time = "2025-12-09T10:13:00.711Z" }, + { url = "https://files.pythonhosted.org/packages/bb/5d/960b62bf85c38d6098ea067438f037a761958f3a17ba674db0cf316b0f60/loro-1.10.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9d5fa4baceb248d771897b76d1426c7656176e82e770f6790940bc3e3812436d", size = 3565866, upload-time = "2025-12-09T10:13:35.401Z" }, + { url = "https://files.pythonhosted.org/packages/8f/d4/0d499a5e00df13ce497263aef2494d9de9e9d1f11d8ab68f89328203befb/loro-1.10.3-cp312-cp312-win32.whl", hash = "sha256:f25ab769b84a5fbeb1f9a1111f5d28927eaeaa8f5d2d871e237f80eaca5c684e", size = 2720785, upload-time = "2025-12-09T10:14:28.79Z" }, + { url = "https://files.pythonhosted.org/packages/1a/9b/2b5be23f1da4cf20c6ce213cfffc66bdab2ea012595abc9e3383103793d0/loro-1.10.3-cp312-cp312-win_amd64.whl", hash = "sha256:3b73b7a3a32e60c3424fc7deaf8b127af7580948e27d8bbe749e3f43508aa0a2", size = 2954650, upload-time = "2025-12-09T10:14:10.235Z" }, + { url = "https://files.pythonhosted.org/packages/75/67/8467cc1c119149ada86903b67ce10fc4b47fb6eb2a8ca5f94c0938fd010f/loro-1.10.3-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:380ef692c5272e8b607be2ee6a8eef5113e65dc38e6739526c30e3db6abc3fbc", size = 3239527, upload-time = "2025-12-09T10:11:33.884Z" }, + { url = "https://files.pythonhosted.org/packages/bc/3b/d1a01af3446cb98890349215bea7e71ba49dc3e50ffbfb90c5649657a8b8/loro-1.10.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ed966ce6ff1fb3787b3f6c4ed6dd036baa5fb738b84a466a5e764f2ab534ccc2", size = 3044767, upload-time = "2025-12-09T10:11:18.777Z" }, + { url = "https://files.pythonhosted.org/packages/6b/93/37f891fa46767001ae2518697fb01fc187497e3a5238fe28102be626055d/loro-1.10.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4d7c8d2f3d88578fdf69845a9ae16fc5ea3ac54aa838a6bf43a24ce11908220", size = 3292648, upload-time = "2025-12-09T10:08:15.404Z" }, + { url = "https://files.pythonhosted.org/packages/6c/67/82273eeba2416b0410595071eda1eefcdf4072c014d44d2501b660aa7145/loro-1.10.3-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:62283c345bfeedef19c8a6d029cd8830e5d2c20b5fb45975d8a70a8a30a7944b", size = 3353181, upload-time = "2025-12-09T10:08:50.144Z" }, + { url = "https://files.pythonhosted.org/packages/82/33/894dccf132bece82168dfbe61fad25a13ed89d18f20649f99e87c38f9228/loro-1.10.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d1e7e6ae091179fa5f0fca1f8612fde20236ee0a678744bf51ff7d26103ea04f", size = 3712583, upload-time = "2025-12-09T10:09:27.934Z" }, + { url = "https://files.pythonhosted.org/packages/b2/b7/99292729d8b271bcc4bff5faa20b33e4c749173af4c9cb9d34880ae3b4c8/loro-1.10.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6abc6de4876aa205498cef52a002bc38662fbd8d742351ea0f535479208b8b1c", size = 3421491, upload-time = "2025-12-09T10:10:01.63Z" }, + { url = "https://files.pythonhosted.org/packages/be/fb/188b808ef1d9b6d842d53969b99a16afb1b71f04739150959c8946345d0e/loro-1.10.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:acbbfd24cf28a71bbdad8544852e9bbba0ba8535f8221f8859b2693555fa8356", size = 3352623, upload-time = "2025-12-09T10:10:57.361Z" }, + { url = "https://files.pythonhosted.org/packages/53/cc/e2d008cc24bddcf05d1a15b8907a73b1731921ab40897f73a3385fdd274a/loro-1.10.3-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5faf4ebbe8ca39605024f16dbbbde354365f4e2dcfda82c753797461b504bbd3", size = 3687687, upload-time = "2025-12-09T10:10:34.453Z" }, + { url = "https://files.pythonhosted.org/packages/ec/b6/4251822674230027103caa4fd46a1e83c4d676500074e7ab297468bf8f40/loro-1.10.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e049c21b292c4ff992b23a98812840735db84620721c10ae7f047a921202d090", size = 3474316, upload-time = "2025-12-09T10:11:51.207Z" }, + { url = "https://files.pythonhosted.org/packages/c4/54/ecff3ec08d814f3b9ec1c78a14ecf2e7ff132a71b8520f6aa6ad1ace0056/loro-1.10.3-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:20e8dacfb827c1f7ffb73e127029d7995a9ab2c3b7b7bc3ecc91d22ee32d78d0", size = 3622069, upload-time = "2025-12-09T10:12:27.059Z" }, + { url = "https://files.pythonhosted.org/packages/ac/84/c1b8251000f46df5f4d043af8c711bdbff9818727d26429378e0f3a5115e/loro-1.10.3-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1b743c1c4f93f5b4f0e12efbb352d26e9f80bcbf20f45d9c70f3d0b522f42060", size = 3667722, upload-time = "2025-12-09T10:13:02.012Z" }, + { url = "https://files.pythonhosted.org/packages/ef/13/c5c02776f4ad52c6361b95e1d7396c29071533cef45e3861a2e35745be27/loro-1.10.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:446d67bc9e28036a5a5e03526d28a1559ef2a47b3ccad6b07820dae123cc3697", size = 3564952, upload-time = "2025-12-09T10:13:37.227Z" }, + { url = "https://files.pythonhosted.org/packages/1e/f1/63d4bc63a1521a9b577f6d13538ec4790865584fdf87569d5af943792406/loro-1.10.3-cp313-cp313-win32.whl", hash = "sha256:45d7d8ec683599897695bb714771baccabc1b4c4a412283cc39787c7a59f7ff0", size = 2720952, upload-time = "2025-12-09T10:14:30.17Z" }, + { url = "https://files.pythonhosted.org/packages/29/3c/65c8b0b7f96c9b4fbd458867cf91f30fcd58ac25449d8ba9303586061671/loro-1.10.3-cp313-cp313-win_amd64.whl", hash = "sha256:a42bf73b99b07fed11b65feb0a5362b33b19de098f2235848687f4c41204830e", size = 2953768, upload-time = "2025-12-09T10:14:11.965Z" }, + { url = "https://files.pythonhosted.org/packages/4e/e9/f6a242f61aa4d8b56bd11fa467be27d416401d89cc3244b58651a3a44c88/loro-1.10.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4866325b154aeebcd34be106c7597acf150c374481ac3c12035a1af715ac0f01", size = 3289791, upload-time = "2025-12-09T10:08:16.926Z" }, + { url = "https://files.pythonhosted.org/packages/a7/81/8f5f4d6805658c654264e99467f3f46facdbb2062cbf86743768ee4b942a/loro-1.10.3-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ea7b8849660a28ce8cd90a82db4f76c23453836fcbc88f5767feaaf8739045e2", size = 3348007, upload-time = "2025-12-09T10:08:53.305Z" }, + { url = "https://files.pythonhosted.org/packages/c3/15/bba0fad18ec5561a140e9781fd2b38672210b52e847d207c57ae85379efd/loro-1.10.3-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9e82cdaf9a5892557d3167e07ed5093f87dfa31ef860a63b0eac6c0c2f435705", size = 3707937, upload-time = "2025-12-09T10:09:29.165Z" }, + { url = "https://files.pythonhosted.org/packages/7a/b2/5519c92bd4f9cde068dc60ba35d7f3e4f8cce41e7bf39febd4fb08908e97/loro-1.10.3-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c7ee99e5dc844fb20fca830906a0d721022ad1c37aad0b1a440c4ecb98d0c02f", size = 3416744, upload-time = "2025-12-09T10:10:02.956Z" }, + { url = "https://files.pythonhosted.org/packages/81/ba/92d97c27582c0ce12bb83df19b9e080c0dfe95068966296a4fa2279c0477/loro-1.10.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:153c297672ad98d0fe6ff8985decf1e64528ad1dd01ae1452bb83bdeb31f858f", size = 3470978, upload-time = "2025-12-09T10:11:52.707Z" }, + { url = "https://files.pythonhosted.org/packages/f3/8b/acb39b0e74af1c317d3121e75a4bc5bc77d7fda5a79c60399746486f60d9/loro-1.10.3-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:0ed72f8c6a5f521252ee726954055339abba3fcf00404fb4b5c2da168f0cce79", size = 3615039, upload-time = "2025-12-09T10:12:28.631Z" }, + { url = "https://files.pythonhosted.org/packages/4f/c3/154e3361e5ef42012f6842dbd93f8fbace6eec06517b5a4a9f8c4a46e873/loro-1.10.3-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:f612ab17acdac16c0139e63ff45b33175ebfb22e61a60eb7929a4583389348d6", size = 3663731, upload-time = "2025-12-09T10:13:03.557Z" }, + { url = "https://files.pythonhosted.org/packages/c6/dd/a283cf5b1c957e0bbc67503a10e17606a8f8c87f51d3cf3d83dc3a0ac88a/loro-1.10.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f2741db05c79f3618c954bac90f4572d28c01c243884453f379e9a8738f93d81", size = 3558807, upload-time = "2025-12-09T10:13:38.926Z" }, + { url = "https://files.pythonhosted.org/packages/8d/4a/a5340b6fdf4cd34d758bed23bd1f64063b3b1b41ff4ecc94ee39259ee9a7/loro-1.10.3-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:623cf7df17626aa55bc6ca54e89177dbe71a5f1c293e102d6153f43991a1a041", size = 3213589, upload-time = "2025-12-09T10:11:35.377Z" }, + { url = "https://files.pythonhosted.org/packages/00/93/5164e93a77e365a92def77c1258386daef233516a29fb674a3b9d973b8b8/loro-1.10.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:d8e715d475f32a1462969aca27eeb3f998f309182978f55bc37ce5c515d92e90", size = 3029557, upload-time = "2025-12-09T10:11:20.076Z" }, + { url = "https://files.pythonhosted.org/packages/6c/30/94592d7c01f480ce99e1783b0d9203eb20ba2eab42575dabd384e3c9d1fa/loro-1.10.3-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61e012a80e8c9fe248b9d0a76e91664c9479a72d976eaeed78f87b15b5d1d732", size = 3282335, upload-time = "2025-12-09T10:08:18.168Z" }, + { url = "https://files.pythonhosted.org/packages/e9/a8/7ae3c0b955aa638fa7dbd2d194c7759749a0d0d96a94805d5dec9b30eaea/loro-1.10.3-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:686ece56756acbaf80c986848915e9126a29a06d7a62209747e3ef1efc0bd8f6", size = 3333071, upload-time = "2025-12-09T10:08:55.314Z" }, + { url = "https://files.pythonhosted.org/packages/f7/10/151edebdb2bca626ad50911b761164ced16984b25b0b37b34b674ded8b29/loro-1.10.3-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3aa821c8871deca98f4605eb0c40fb26bcf82bd29c9e7fa33b183516c5395b11", size = 3698226, upload-time = "2025-12-09T10:09:30.474Z" }, + { url = "https://files.pythonhosted.org/packages/f4/ac/02a490e38466506b1003df4910d2a8ae582265023dae9e2217c98b56ea3f/loro-1.10.3-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:507d34137adb4148f79e1da7f89a21a4aab18565621a5dc2b389773fe98ac25b", size = 3407322, upload-time = "2025-12-09T10:10:04.199Z" }, + { url = "https://files.pythonhosted.org/packages/81/db/da51f2bcad81ca3733bc21e83f3b6752446436b565b90f5c350ad227ad01/loro-1.10.3-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:91d3b2e187ccfe2b14118a6e5617266fedcdf3435f6fa0a3db7b4afce8afa687", size = 3330268, upload-time = "2025-12-09T10:10:58.61Z" }, + { url = "https://files.pythonhosted.org/packages/4e/af/50d136c83d504a3a1f4ad33a6bf38b6933985a82741302255cf446a5f7ad/loro-1.10.3-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c0016f834fd1626710081334400aed8494380b55ef131f7133d21c3bd22d892a", size = 3673582, upload-time = "2025-12-09T10:10:35.849Z" }, + { url = "https://files.pythonhosted.org/packages/63/4d/53288aae777218e05c43af9c080652bcdbbc8d97c031607eedd3fc15617d/loro-1.10.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:71c4275dca5a8a86219d60545d4f60e081b4af44b490ac912c0481906934bfc6", size = 3463731, upload-time = "2025-12-09T10:11:54.102Z" }, + { url = "https://files.pythonhosted.org/packages/75/01/2389f26ffe8bc3ffe48a0a578f610dd49c709bbcf0d5d2642c6e2b52f490/loro-1.10.3-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:490f12571b2ed1a8eaf1edd3a7fffc55adac5010b1875fe1bb9e9af9a3907c38", size = 3602334, upload-time = "2025-12-09T10:12:30.082Z" }, + { url = "https://files.pythonhosted.org/packages/a7/16/07b64af13f5fcea025e003ca27bbd6f748217abbd4803dad88ea0900526c/loro-1.10.3-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a374a43cadaa48528a5411496481df9ae52bf01e513f4509e37d6c986f199c0e", size = 3657896, upload-time = "2025-12-09T10:13:04.86Z" }, + { url = "https://files.pythonhosted.org/packages/c9/2f/4050770d7675ceced71651fe76971d5c27456b7098c0de03a4ecdbb0a02d/loro-1.10.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:1a93b2ee59f1fa8d98dd552211fd5693551893b34c1dd2ba0324806d6d14022f", size = 3544339, upload-time = "2025-12-09T10:13:40.396Z" }, + { url = "https://files.pythonhosted.org/packages/c9/21/67e27cb404c968fc19a841d5c6277f13a17c69a56f49e3c15ea1c92a28eb/loro-1.10.3-cp314-cp314-win32.whl", hash = "sha256:baa863e3d869422e3320e822c0b1f87f5dc44cda903d1bd3b7a16f8413ce3d92", size = 2706731, upload-time = "2025-12-09T10:14:31.604Z" }, + { url = "https://files.pythonhosted.org/packages/08/54/6770cf36aeb994489375e9ab9c01201e70ab7cc286fa97e907aa41b1bae6/loro-1.10.3-cp314-cp314-win_amd64.whl", hash = "sha256:f10ed3ca89485f942b8b2de796ed9783edb990e7e570605232de77489e9f3548", size = 2933563, upload-time = "2025-12-09T10:14:13.805Z" }, + { url = "https://files.pythonhosted.org/packages/24/f5/eb089fd25eb428709dbe79fd4d36b82a00572aa54badd1dff62511a38fe3/loro-1.10.3-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b4d049efb1953aebfc16fa0b445ff5a37d4d08a1ab93f3b5a577a454b7a5ded", size = 3282369, upload-time = "2025-12-09T10:08:20.011Z" }, + { url = "https://files.pythonhosted.org/packages/30/d7/692cb87c908f6a8af6cbfc10ebab69e16780e3796e11454c2b481b5c3817/loro-1.10.3-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:56ecad7fbac58aa8bee52bb261a764aeef6c7b39c20f0d69e8fad908ab2ca7d8", size = 3332530, upload-time = "2025-12-09T10:08:57.07Z" }, + { url = "https://files.pythonhosted.org/packages/54/46/ed3afbf749288b6f70f3b859a6762538818bf6a557ca873b07d6b036946b/loro-1.10.3-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5d8d1be349d08b3a95592c6a17b80b1ea6aef892b1b8e2b93b540062d04e34e0", size = 3702599, upload-time = "2025-12-09T10:09:31.779Z" }, + { url = "https://files.pythonhosted.org/packages/fe/30/6cb616939c12bfe96a71a01a6e3551febf1c34bf9de114fafadbcfb65064/loro-1.10.3-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1ec0a0b9bc4e32c46f14710062ec5b536c72110318aaf85632a4f8b37e9a470a", size = 3404412, upload-time = "2025-12-09T10:10:05.448Z" }, + { url = "https://files.pythonhosted.org/packages/02/a2/3d4006d3333589f9158ac6d403979bf5c985be8b461b18e7a2ea23b05414/loro-1.10.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c5d4437987f7a4a4ff5927f39d0f43ded5b34295dfb0a3c8e150687e25c3d6b8", size = 3462948, upload-time = "2025-12-09T10:11:55.405Z" }, + { url = "https://files.pythonhosted.org/packages/41/30/c640ccd3e570b08770a9f459decc2d8e7ceefdc34ac28a745418fb9cb5ba/loro-1.10.3-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:86d4f0c631ca274ad2fa2c0bdb8e1e141882d94339b7284a8bef5bf73fa6957d", size = 3599851, upload-time = "2025-12-09T10:12:31.759Z" }, + { url = "https://files.pythonhosted.org/packages/59/8f/062ea50554c47ae30e98b1f0442a458c0edecc6d4edc7fcfc4d901734dd0/loro-1.10.3-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:15e03084ff1b472e14623183ed6e1e43e0f717c2112697beda5e69b5bd0ff236", size = 3655558, upload-time = "2025-12-09T10:13:06.529Z" }, + { url = "https://files.pythonhosted.org/packages/f3/f5/c7dd8cdbd57454b23d89799c22cd42b6d2dda283cd87d7b198dc424a462c/loro-1.10.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:42d6a5ce5bc518eaa682413e82d597299650eeb03e8bc39341752d6e0d22503e", size = 3541282, upload-time = "2025-12-09T10:13:42.189Z" }, + { url = "https://files.pythonhosted.org/packages/43/1a/49e864102721e0e15a4e4c56d7f2dddad5cd589c2d0aceafe14990513583/loro-1.10.3-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16ca42e991589ea300b59da9e98940d5ddda76275fe4363b1f1e079d244403a1", size = 3284236, upload-time = "2025-12-09T10:08:25.836Z" }, + { url = "https://files.pythonhosted.org/packages/e9/c6/d46b433105d8002e4c90248c07f00cd2c8ea76f1048cc5f35b733be96723/loro-1.10.3-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b9ca16dae359397aa7772891bb3967939ffda8da26e0b392d331b506e16afc78", size = 3348996, upload-time = "2025-12-09T10:09:03.951Z" }, + { url = "https://files.pythonhosted.org/packages/e7/f3/e918c7b396c547b22a7ab3cff1b570c5ce94293f0dcb17cd96cbe6ba2d50/loro-1.10.3-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d87cfc0a6e119c1c8cfa93078f5d012e557c6b75edcd0977da58ec46d28dc242", size = 3701875, upload-time = "2025-12-09T10:09:37.924Z" }, + { url = "https://files.pythonhosted.org/packages/4c/67/140ecb65b4f436099ad674fbe7502378156f43b737cb43f5fd76c42a0da8/loro-1.10.3-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4541ed987306c51e718f51196fd2b2d05e87b323da5d850b37900d2e8ac6aae6", size = 3412283, upload-time = "2025-12-09T10:10:10.946Z" }, + { url = "https://files.pythonhosted.org/packages/d0/93/b7b41cf8b3e591b7191494e12be24cbb101f137fe82f0a24ed7934bbacf3/loro-1.10.3-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce0b0a500e08b190038380d4593efcb33c98ed4282cc8347ca6ce55d05cbdf6e", size = 3340580, upload-time = "2025-12-09T10:11:02.956Z" }, + { url = "https://files.pythonhosted.org/packages/94/19/fdc9ea9ce6510147460200c90164a84c22b0cc9e33f7dd5c0d5f76484314/loro-1.10.3-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:987dbcb42b4b8d2c799660a6d8942e53ae346f51d51c9ad7ef5d7e640422fe4a", size = 3680924, upload-time = "2025-12-09T10:10:39.877Z" }, + { url = "https://files.pythonhosted.org/packages/40/61/548491499394fe02e7451b0d7367f7eeed32f0f6dd8f1826be8b4c329f28/loro-1.10.3-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:f876d477cb38c6c623c4ccb5dc4b7041dbeff04167bf9c19fa461d57a3a1b916", size = 3465033, upload-time = "2025-12-09T10:12:03.122Z" }, + { url = "https://files.pythonhosted.org/packages/26/68/d8bebb6b583fe5a3dc4da32c9070964548e3ca1d524f383c71f9becf4197/loro-1.10.3-pp311-pypy311_pp73-musllinux_1_2_armv7l.whl", hash = "sha256:641c8445bd1e4181b5b28b75a0bc544ef51f065b15746e8714f90e2e029b5202", size = 3616740, upload-time = "2025-12-09T10:12:38.187Z" }, + { url = "https://files.pythonhosted.org/packages/52/9b/8f8ecc85eb925122a79348eb77ff7109a7ee41ee7d1a282122be2daff378/loro-1.10.3-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:a6ab6244472402b8d1f4f77e5210efa44dfa4914423cafcfcbd09232ea8bbff0", size = 3661160, upload-time = "2025-12-09T10:13:12.513Z" }, + { url = "https://files.pythonhosted.org/packages/79/3c/e884d06859f9a9fc64afd21c426b9d681af0856181c1fe66571a65d35ef7/loro-1.10.3-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:ae4c765671ee7d7618962ec11cb3bb471965d9b88c075166fe383263235d58d6", size = 3553653, upload-time = "2025-12-09T10:13:47.917Z" }, +] + +[[package]] +name = "marimo" +version = "0.21.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "docutils" }, + { name = "itsdangerous" }, + { name = "jedi" }, + { name = "loro" }, + { name = "markdown" }, + { name = "msgspec" }, + { name = "narwhals" }, + { name = "packaging" }, + { name = "psutil" }, + { name = "pygments" }, + { name = "pymdown-extensions" }, + { name = "pyyaml" }, + { name = "starlette" }, + { name = "tomlkit" }, + { name = "uvicorn" }, + { name = "websockets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ea/30/052e4ebf21bd3f49fa47fd844716c53292e468ee6d6d8c716852dcbe3764/marimo-0.21.0.tar.gz", hash = "sha256:b4d515858eded6b6a25b58ce971fd62080cbb89738a73aaf6c29c34da489d47b", size = 38366236, upload-time = "2026-03-16T21:55:12.436Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b6/e7/215d55f2d485f508d23a1dcbec57f3edd4a913ad06c3cabe6cb353d31933/marimo-0.21.0-py3-none-any.whl", hash = "sha256:5a54bdc6ae38486f7af1d287778d4b888e7ebaea6fce885439835ea2a037d381", size = 38780820, upload-time = "2026-03-16T21:55:09.217Z" }, +] + +[[package]] +name = "markdown" +version = "3.10.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2b/f4/69fa6ed85ae003c2378ffa8f6d2e3234662abd02c10d216c0ba96081a238/markdown-3.10.2.tar.gz", hash = "sha256:994d51325d25ad8aa7ce4ebaec003febcce822c3f8c911e3b17c52f7f589f950", size = 368805, upload-time = "2026-02-09T14:57:26.942Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/de/1f/77fa3081e4f66ca3576c896ae5d31c3002ac6607f9747d2e3aa49227e464/markdown-3.10.2-py3-none-any.whl", hash = "sha256:e91464b71ae3ee7afd3017d9f358ef0baf158fd9a298db92f1d4761133824c36", size = 108180, upload-time = "2026-02-09T14:57:25.787Z" }, +] + +[[package]] +name = "markupsafe" +version = "3.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313, upload-time = "2025-09-27T18:37:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/08/db/fefacb2136439fc8dd20e797950e749aa1f4997ed584c62cfb8ef7c2be0e/markupsafe-3.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1cc7ea17a6824959616c525620e387f6dd30fec8cb44f649e31712db02123dad", size = 11631, upload-time = "2025-09-27T18:36:18.185Z" }, + { url = "https://files.pythonhosted.org/packages/e1/2e/5898933336b61975ce9dc04decbc0a7f2fee78c30353c5efba7f2d6ff27a/markupsafe-3.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4bd4cd07944443f5a265608cc6aab442e4f74dff8088b0dfc8238647b8f6ae9a", size = 12058, upload-time = "2025-09-27T18:36:19.444Z" }, + { url = "https://files.pythonhosted.org/packages/1d/09/adf2df3699d87d1d8184038df46a9c80d78c0148492323f4693df54e17bb/markupsafe-3.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b5420a1d9450023228968e7e6a9ce57f65d148ab56d2313fcd589eee96a7a50", size = 24287, upload-time = "2025-09-27T18:36:20.768Z" }, + { url = "https://files.pythonhosted.org/packages/30/ac/0273f6fcb5f42e314c6d8cd99effae6a5354604d461b8d392b5ec9530a54/markupsafe-3.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0bf2a864d67e76e5c9a34dc26ec616a66b9888e25e7b9460e1c76d3293bd9dbf", size = 22940, upload-time = "2025-09-27T18:36:22.249Z" }, + { url = "https://files.pythonhosted.org/packages/19/ae/31c1be199ef767124c042c6c3e904da327a2f7f0cd63a0337e1eca2967a8/markupsafe-3.0.3-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc51efed119bc9cfdf792cdeaa4d67e8f6fcccab66ed4bfdd6bde3e59bfcbb2f", size = 21887, upload-time = "2025-09-27T18:36:23.535Z" }, + { url = "https://files.pythonhosted.org/packages/b2/76/7edcab99d5349a4532a459e1fe64f0b0467a3365056ae550d3bcf3f79e1e/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:068f375c472b3e7acbe2d5318dea141359e6900156b5b2ba06a30b169086b91a", size = 23692, upload-time = "2025-09-27T18:36:24.823Z" }, + { url = "https://files.pythonhosted.org/packages/a4/28/6e74cdd26d7514849143d69f0bf2399f929c37dc2b31e6829fd2045b2765/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7be7b61bb172e1ed687f1754f8e7484f1c8019780f6f6b0786e76bb01c2ae115", size = 21471, upload-time = "2025-09-27T18:36:25.95Z" }, + { url = "https://files.pythonhosted.org/packages/62/7e/a145f36a5c2945673e590850a6f8014318d5577ed7e5920a4b3448e0865d/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f9e130248f4462aaa8e2552d547f36ddadbeaa573879158d721bbd33dfe4743a", size = 22923, upload-time = "2025-09-27T18:36:27.109Z" }, + { url = "https://files.pythonhosted.org/packages/0f/62/d9c46a7f5c9adbeeeda52f5b8d802e1094e9717705a645efc71b0913a0a8/markupsafe-3.0.3-cp311-cp311-win32.whl", hash = "sha256:0db14f5dafddbb6d9208827849fad01f1a2609380add406671a26386cdf15a19", size = 14572, upload-time = "2025-09-27T18:36:28.045Z" }, + { url = "https://files.pythonhosted.org/packages/83/8a/4414c03d3f891739326e1783338e48fb49781cc915b2e0ee052aa490d586/markupsafe-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:de8a88e63464af587c950061a5e6a67d3632e36df62b986892331d4620a35c01", size = 15077, upload-time = "2025-09-27T18:36:29.025Z" }, + { url = "https://files.pythonhosted.org/packages/35/73/893072b42e6862f319b5207adc9ae06070f095b358655f077f69a35601f0/markupsafe-3.0.3-cp311-cp311-win_arm64.whl", hash = "sha256:3b562dd9e9ea93f13d53989d23a7e775fdfd1066c33494ff43f5418bc8c58a5c", size = 13876, upload-time = "2025-09-27T18:36:29.954Z" }, + { url = "https://files.pythonhosted.org/packages/5a/72/147da192e38635ada20e0a2e1a51cf8823d2119ce8883f7053879c2199b5/markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e", size = 11615, upload-time = "2025-09-27T18:36:30.854Z" }, + { url = "https://files.pythonhosted.org/packages/9a/81/7e4e08678a1f98521201c3079f77db69fb552acd56067661f8c2f534a718/markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce", size = 12020, upload-time = "2025-09-27T18:36:31.971Z" }, + { url = "https://files.pythonhosted.org/packages/1e/2c/799f4742efc39633a1b54a92eec4082e4f815314869865d876824c257c1e/markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d", size = 24332, upload-time = "2025-09-27T18:36:32.813Z" }, + { url = "https://files.pythonhosted.org/packages/3c/2e/8d0c2ab90a8c1d9a24f0399058ab8519a3279d1bd4289511d74e909f060e/markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d", size = 22947, upload-time = "2025-09-27T18:36:33.86Z" }, + { url = "https://files.pythonhosted.org/packages/2c/54/887f3092a85238093a0b2154bd629c89444f395618842e8b0c41783898ea/markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a", size = 21962, upload-time = "2025-09-27T18:36:35.099Z" }, + { url = "https://files.pythonhosted.org/packages/c9/2f/336b8c7b6f4a4d95e91119dc8521402461b74a485558d8f238a68312f11c/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b", size = 23760, upload-time = "2025-09-27T18:36:36.001Z" }, + { url = "https://files.pythonhosted.org/packages/32/43/67935f2b7e4982ffb50a4d169b724d74b62a3964bc1a9a527f5ac4f1ee2b/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f", size = 21529, upload-time = "2025-09-27T18:36:36.906Z" }, + { url = "https://files.pythonhosted.org/packages/89/e0/4486f11e51bbba8b0c041098859e869e304d1c261e59244baa3d295d47b7/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b", size = 23015, upload-time = "2025-09-27T18:36:37.868Z" }, + { url = "https://files.pythonhosted.org/packages/2f/e1/78ee7a023dac597a5825441ebd17170785a9dab23de95d2c7508ade94e0e/markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d", size = 14540, upload-time = "2025-09-27T18:36:38.761Z" }, + { url = "https://files.pythonhosted.org/packages/aa/5b/bec5aa9bbbb2c946ca2733ef9c4ca91c91b6a24580193e891b5f7dbe8e1e/markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c", size = 15105, upload-time = "2025-09-27T18:36:39.701Z" }, + { url = "https://files.pythonhosted.org/packages/e5/f1/216fc1bbfd74011693a4fd837e7026152e89c4bcf3e77b6692fba9923123/markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f", size = 13906, upload-time = "2025-09-27T18:36:40.689Z" }, + { url = "https://files.pythonhosted.org/packages/38/2f/907b9c7bbba283e68f20259574b13d005c121a0fa4c175f9bed27c4597ff/markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795", size = 11622, upload-time = "2025-09-27T18:36:41.777Z" }, + { url = "https://files.pythonhosted.org/packages/9c/d9/5f7756922cdd676869eca1c4e3c0cd0df60ed30199ffd775e319089cb3ed/markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219", size = 12029, upload-time = "2025-09-27T18:36:43.257Z" }, + { url = "https://files.pythonhosted.org/packages/00/07/575a68c754943058c78f30db02ee03a64b3c638586fba6a6dd56830b30a3/markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6", size = 24374, upload-time = "2025-09-27T18:36:44.508Z" }, + { url = "https://files.pythonhosted.org/packages/a9/21/9b05698b46f218fc0e118e1f8168395c65c8a2c750ae2bab54fc4bd4e0e8/markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676", size = 22980, upload-time = "2025-09-27T18:36:45.385Z" }, + { url = "https://files.pythonhosted.org/packages/7f/71/544260864f893f18b6827315b988c146b559391e6e7e8f7252839b1b846a/markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9", size = 21990, upload-time = "2025-09-27T18:36:46.916Z" }, + { url = "https://files.pythonhosted.org/packages/c2/28/b50fc2f74d1ad761af2f5dcce7492648b983d00a65b8c0e0cb457c82ebbe/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1", size = 23784, upload-time = "2025-09-27T18:36:47.884Z" }, + { url = "https://files.pythonhosted.org/packages/ed/76/104b2aa106a208da8b17a2fb72e033a5a9d7073c68f7e508b94916ed47a9/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc", size = 21588, upload-time = "2025-09-27T18:36:48.82Z" }, + { url = "https://files.pythonhosted.org/packages/b5/99/16a5eb2d140087ebd97180d95249b00a03aa87e29cc224056274f2e45fd6/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12", size = 23041, upload-time = "2025-09-27T18:36:49.797Z" }, + { url = "https://files.pythonhosted.org/packages/19/bc/e7140ed90c5d61d77cea142eed9f9c303f4c4806f60a1044c13e3f1471d0/markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed", size = 14543, upload-time = "2025-09-27T18:36:51.584Z" }, + { url = "https://files.pythonhosted.org/packages/05/73/c4abe620b841b6b791f2edc248f556900667a5a1cf023a6646967ae98335/markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5", size = 15113, upload-time = "2025-09-27T18:36:52.537Z" }, + { url = "https://files.pythonhosted.org/packages/f0/3a/fa34a0f7cfef23cf9500d68cb7c32dd64ffd58a12b09225fb03dd37d5b80/markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485", size = 13911, upload-time = "2025-09-27T18:36:53.513Z" }, + { url = "https://files.pythonhosted.org/packages/e4/d7/e05cd7efe43a88a17a37b3ae96e79a19e846f3f456fe79c57ca61356ef01/markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73", size = 11658, upload-time = "2025-09-27T18:36:54.819Z" }, + { url = "https://files.pythonhosted.org/packages/99/9e/e412117548182ce2148bdeacdda3bb494260c0b0184360fe0d56389b523b/markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37", size = 12066, upload-time = "2025-09-27T18:36:55.714Z" }, + { url = "https://files.pythonhosted.org/packages/bc/e6/fa0ffcda717ef64a5108eaa7b4f5ed28d56122c9a6d70ab8b72f9f715c80/markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19", size = 25639, upload-time = "2025-09-27T18:36:56.908Z" }, + { url = "https://files.pythonhosted.org/packages/96/ec/2102e881fe9d25fc16cb4b25d5f5cde50970967ffa5dddafdb771237062d/markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025", size = 23569, upload-time = "2025-09-27T18:36:57.913Z" }, + { url = "https://files.pythonhosted.org/packages/4b/30/6f2fce1f1f205fc9323255b216ca8a235b15860c34b6798f810f05828e32/markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6", size = 23284, upload-time = "2025-09-27T18:36:58.833Z" }, + { url = "https://files.pythonhosted.org/packages/58/47/4a0ccea4ab9f5dcb6f79c0236d954acb382202721e704223a8aafa38b5c8/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f", size = 24801, upload-time = "2025-09-27T18:36:59.739Z" }, + { url = "https://files.pythonhosted.org/packages/6a/70/3780e9b72180b6fecb83a4814d84c3bf4b4ae4bf0b19c27196104149734c/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb", size = 22769, upload-time = "2025-09-27T18:37:00.719Z" }, + { url = "https://files.pythonhosted.org/packages/98/c5/c03c7f4125180fc215220c035beac6b9cb684bc7a067c84fc69414d315f5/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009", size = 23642, upload-time = "2025-09-27T18:37:01.673Z" }, + { url = "https://files.pythonhosted.org/packages/80/d6/2d1b89f6ca4bff1036499b1e29a1d02d282259f3681540e16563f27ebc23/markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354", size = 14612, upload-time = "2025-09-27T18:37:02.639Z" }, + { url = "https://files.pythonhosted.org/packages/2b/98/e48a4bfba0a0ffcf9925fe2d69240bfaa19c6f7507b8cd09c70684a53c1e/markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218", size = 15200, upload-time = "2025-09-27T18:37:03.582Z" }, + { url = "https://files.pythonhosted.org/packages/0e/72/e3cc540f351f316e9ed0f092757459afbc595824ca724cbc5a5d4263713f/markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287", size = 13973, upload-time = "2025-09-27T18:37:04.929Z" }, + { url = "https://files.pythonhosted.org/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619, upload-time = "2025-09-27T18:37:06.342Z" }, + { url = "https://files.pythonhosted.org/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029, upload-time = "2025-09-27T18:37:07.213Z" }, + { url = "https://files.pythonhosted.org/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408, upload-time = "2025-09-27T18:37:09.572Z" }, + { url = "https://files.pythonhosted.org/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005, upload-time = "2025-09-27T18:37:10.58Z" }, + { url = "https://files.pythonhosted.org/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048, upload-time = "2025-09-27T18:37:11.547Z" }, + { url = "https://files.pythonhosted.org/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821, upload-time = "2025-09-27T18:37:12.48Z" }, + { url = "https://files.pythonhosted.org/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606, upload-time = "2025-09-27T18:37:13.485Z" }, + { url = "https://files.pythonhosted.org/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043, upload-time = "2025-09-27T18:37:14.408Z" }, + { url = "https://files.pythonhosted.org/packages/46/11/f333a06fc16236d5238bfe74daccbca41459dcd8d1fa952e8fbd5dccfb70/markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9", size = 14747, upload-time = "2025-09-27T18:37:15.36Z" }, + { url = "https://files.pythonhosted.org/packages/28/52/182836104b33b444e400b14f797212f720cbc9ed6ba34c800639d154e821/markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581", size = 15341, upload-time = "2025-09-27T18:37:16.496Z" }, + { url = "https://files.pythonhosted.org/packages/6f/18/acf23e91bd94fd7b3031558b1f013adfa21a8e407a3fdb32745538730382/markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4", size = 14073, upload-time = "2025-09-27T18:37:17.476Z" }, + { url = "https://files.pythonhosted.org/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661, upload-time = "2025-09-27T18:37:18.453Z" }, + { url = "https://files.pythonhosted.org/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069, upload-time = "2025-09-27T18:37:19.332Z" }, + { url = "https://files.pythonhosted.org/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670, upload-time = "2025-09-27T18:37:20.245Z" }, + { url = "https://files.pythonhosted.org/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598, upload-time = "2025-09-27T18:37:21.177Z" }, + { url = "https://files.pythonhosted.org/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261, upload-time = "2025-09-27T18:37:22.167Z" }, + { url = "https://files.pythonhosted.org/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835, upload-time = "2025-09-27T18:37:23.296Z" }, + { url = "https://files.pythonhosted.org/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733, upload-time = "2025-09-27T18:37:24.237Z" }, + { url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672, upload-time = "2025-09-27T18:37:25.271Z" }, + { url = "https://files.pythonhosted.org/packages/fb/df/5bd7a48c256faecd1d36edc13133e51397e41b73bb77e1a69deab746ebac/markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d", size = 14819, upload-time = "2025-09-27T18:37:26.285Z" }, + { url = "https://files.pythonhosted.org/packages/1a/8a/0402ba61a2f16038b48b39bccca271134be00c5c9f0f623208399333c448/markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9", size = 15426, upload-time = "2025-09-27T18:37:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" }, +] + +[[package]] +name = "msgspec" +version = "0.20.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ea/9c/bfbd12955a49180cbd234c5d29ec6f74fe641698f0cd9df154a854fc8a15/msgspec-0.20.0.tar.gz", hash = "sha256:692349e588fde322875f8d3025ac01689fead5901e7fb18d6870a44519d62a29", size = 317862, upload-time = "2025-11-24T03:56:28.934Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/03/59/fdcb3af72f750a8de2bcf39d62ada70b5eb17b06d7f63860e0a679cb656b/msgspec-0.20.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:09e0efbf1ac641fedb1d5496c59507c2f0dc62a052189ee62c763e0aae217520", size = 193345, upload-time = "2025-11-24T03:55:20.613Z" }, + { url = "https://files.pythonhosted.org/packages/5a/15/3c225610da9f02505d37d69a77f4a2e7daae2a125f99d638df211ba84e59/msgspec-0.20.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:23ee3787142e48f5ee746b2909ce1b76e2949fbe0f97f9f6e70879f06c218b54", size = 186867, upload-time = "2025-11-24T03:55:22.4Z" }, + { url = "https://files.pythonhosted.org/packages/81/36/13ab0c547e283bf172f45491edfdea0e2cecb26ae61e3a7b1ae6058b326d/msgspec-0.20.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:81f4ac6f0363407ac0465eff5c7d4d18f26870e00674f8fcb336d898a1e36854", size = 215351, upload-time = "2025-11-24T03:55:23.958Z" }, + { url = "https://files.pythonhosted.org/packages/6b/96/5c095b940de3aa6b43a71ec76275ac3537b21bd45c7499b5a17a429110fa/msgspec-0.20.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bb4d873f24ae18cd1334f4e37a178ed46c9d186437733351267e0a269bdf7e53", size = 219896, upload-time = "2025-11-24T03:55:25.356Z" }, + { url = "https://files.pythonhosted.org/packages/98/7a/81a7b5f01af300761087b114dafa20fb97aed7184d33aab64d48874eb187/msgspec-0.20.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b92b8334427b8393b520c24ff53b70f326f79acf5f74adb94fd361bcff8a1d4e", size = 220389, upload-time = "2025-11-24T03:55:26.99Z" }, + { url = "https://files.pythonhosted.org/packages/70/c0/3d0cce27db9a9912421273d49eab79ce01ecd2fed1a2f1b74af9b445f33c/msgspec-0.20.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:562c44b047c05cc0384e006fae7a5e715740215c799429e0d7e3e5adf324285a", size = 223348, upload-time = "2025-11-24T03:55:28.311Z" }, + { url = "https://files.pythonhosted.org/packages/89/5e/406b7d578926b68790e390d83a1165a9bfc2d95612a1a9c1c4d5c72ea815/msgspec-0.20.0-cp311-cp311-win_amd64.whl", hash = "sha256:d1dcc93a3ce3d3195985bfff18a48274d0b5ffbc96fa1c5b89da6f0d9af81b29", size = 188713, upload-time = "2025-11-24T03:55:29.553Z" }, + { url = "https://files.pythonhosted.org/packages/47/87/14fe2316624ceedf76a9e94d714d194cbcb699720b210ff189f89ca4efd7/msgspec-0.20.0-cp311-cp311-win_arm64.whl", hash = "sha256:aa387aa330d2e4bd69995f66ea8fdc87099ddeedf6fdb232993c6a67711e7520", size = 174229, upload-time = "2025-11-24T03:55:31.107Z" }, + { url = "https://files.pythonhosted.org/packages/d9/6f/1e25eee957e58e3afb2a44b94fa95e06cebc4c236193ed0de3012fff1e19/msgspec-0.20.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2aba22e2e302e9231e85edc24f27ba1f524d43c223ef5765bd8624c7df9ec0a5", size = 196391, upload-time = "2025-11-24T03:55:32.677Z" }, + { url = "https://files.pythonhosted.org/packages/7f/ee/af51d090ada641d4b264992a486435ba3ef5b5634bc27e6eb002f71cef7d/msgspec-0.20.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:716284f898ab2547fedd72a93bb940375de9fbfe77538f05779632dc34afdfde", size = 188644, upload-time = "2025-11-24T03:55:33.934Z" }, + { url = "https://files.pythonhosted.org/packages/49/d6/9709ee093b7742362c2934bfb1bbe791a1e09bed3ea5d8a18ce552fbfd73/msgspec-0.20.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:558ed73315efa51b1538fa8f1d3b22c8c5ff6d9a2a62eff87d25829b94fc5054", size = 218852, upload-time = "2025-11-24T03:55:35.575Z" }, + { url = "https://files.pythonhosted.org/packages/5c/a2/488517a43ccf5a4b6b6eca6dd4ede0bd82b043d1539dd6bb908a19f8efd3/msgspec-0.20.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:509ac1362a1d53aa66798c9b9fd76872d7faa30fcf89b2fba3bcbfd559d56eb0", size = 224937, upload-time = "2025-11-24T03:55:36.859Z" }, + { url = "https://files.pythonhosted.org/packages/d5/e8/49b832808aa23b85d4f090d1d2e48a4e3834871415031ed7c5fe48723156/msgspec-0.20.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1353c2c93423602e7dea1aa4c92f3391fdfc25ff40e0bacf81d34dbc68adb870", size = 222858, upload-time = "2025-11-24T03:55:38.187Z" }, + { url = "https://files.pythonhosted.org/packages/9f/56/1dc2fa53685dca9c3f243a6cbecd34e856858354e455b77f47ebd76cf5bf/msgspec-0.20.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cb33b5eb5adb3c33d749684471c6a165468395d7aa02d8867c15103b81e1da3e", size = 227248, upload-time = "2025-11-24T03:55:39.496Z" }, + { url = "https://files.pythonhosted.org/packages/5a/51/aba940212c23b32eedce752896205912c2668472ed5b205fc33da28a6509/msgspec-0.20.0-cp312-cp312-win_amd64.whl", hash = "sha256:fb1d934e435dd3a2b8cf4bbf47a8757100b4a1cfdc2afdf227541199885cdacb", size = 190024, upload-time = "2025-11-24T03:55:40.829Z" }, + { url = "https://files.pythonhosted.org/packages/41/ad/3b9f259d94f183daa9764fef33fdc7010f7ecffc29af977044fa47440a83/msgspec-0.20.0-cp312-cp312-win_arm64.whl", hash = "sha256:00648b1e19cf01b2be45444ba9dc961bd4c056ffb15706651e64e5d6ec6197b7", size = 175390, upload-time = "2025-11-24T03:55:42.05Z" }, + { url = "https://files.pythonhosted.org/packages/8a/d1/b902d38b6e5ba3bdddbec469bba388d647f960aeed7b5b3623a8debe8a76/msgspec-0.20.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9c1ff8db03be7598b50dd4b4a478d6fe93faae3bd54f4f17aa004d0e46c14c46", size = 196463, upload-time = "2025-11-24T03:55:43.405Z" }, + { url = "https://files.pythonhosted.org/packages/57/b6/eff0305961a1d9447ec2b02f8c73c8946f22564d302a504185b730c9a761/msgspec-0.20.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f6532369ece217fd37c5ebcfd7e981f2615628c21121b7b2df9d3adcf2fd69b8", size = 188650, upload-time = "2025-11-24T03:55:44.761Z" }, + { url = "https://files.pythonhosted.org/packages/99/93/f2ec1ae1de51d3fdee998a1ede6b2c089453a2ee82b5c1b361ed9095064a/msgspec-0.20.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f9a1697da2f85a751ac3cc6a97fceb8e937fc670947183fb2268edaf4016d1ee", size = 218834, upload-time = "2025-11-24T03:55:46.441Z" }, + { url = "https://files.pythonhosted.org/packages/28/83/36557b04cfdc317ed8a525c4993b23e43a8fbcddaddd78619112ca07138c/msgspec-0.20.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7fac7e9c92eddcd24c19d9e5f6249760941485dff97802461ae7c995a2450111", size = 224917, upload-time = "2025-11-24T03:55:48.06Z" }, + { url = "https://files.pythonhosted.org/packages/8f/56/362037a1ed5be0b88aced59272442c4b40065c659700f4b195a7f4d0ac88/msgspec-0.20.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f953a66f2a3eb8d5ea64768445e2bb301d97609db052628c3e1bcb7d87192a9f", size = 222821, upload-time = "2025-11-24T03:55:49.388Z" }, + { url = "https://files.pythonhosted.org/packages/92/75/fa2370ec341cedf663731ab7042e177b3742645c5dd4f64dc96bd9f18a6b/msgspec-0.20.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:247af0313ae64a066d3aea7ba98840f6681ccbf5c90ba9c7d17f3e39dbba679c", size = 227227, upload-time = "2025-11-24T03:55:51.125Z" }, + { url = "https://files.pythonhosted.org/packages/f1/25/5e8080fe0117f799b1b68008dc29a65862077296b92550632de015128579/msgspec-0.20.0-cp313-cp313-win_amd64.whl", hash = "sha256:67d5e4dfad52832017018d30a462604c80561aa62a9d548fc2bd4e430b66a352", size = 189966, upload-time = "2025-11-24T03:55:52.458Z" }, + { url = "https://files.pythonhosted.org/packages/79/b6/63363422153937d40e1cb349c5081338401f8529a5a4e216865decd981bf/msgspec-0.20.0-cp313-cp313-win_arm64.whl", hash = "sha256:91a52578226708b63a9a13de287b1ec3ed1123e4a088b198143860c087770458", size = 175378, upload-time = "2025-11-24T03:55:53.721Z" }, + { url = "https://files.pythonhosted.org/packages/bb/18/62dc13ab0260c7d741dda8dc7f481495b93ac9168cd887dda5929880eef8/msgspec-0.20.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:eead16538db1b3f7ec6e3ed1f6f7c5dec67e90f76e76b610e1ffb5671815633a", size = 196407, upload-time = "2025-11-24T03:55:55.001Z" }, + { url = "https://files.pythonhosted.org/packages/dd/1d/b9949e4ad6953e9f9a142c7997b2f7390c81e03e93570c7c33caf65d27e1/msgspec-0.20.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:703c3bb47bf47801627fb1438f106adbfa2998fe586696d1324586a375fca238", size = 188889, upload-time = "2025-11-24T03:55:56.311Z" }, + { url = "https://files.pythonhosted.org/packages/1e/19/f8bb2dc0f1bfe46cc7d2b6b61c5e9b5a46c62298e8f4d03bbe499c926180/msgspec-0.20.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6cdb227dc585fb109305cee0fd304c2896f02af93ecf50a9c84ee54ee67dbb42", size = 219691, upload-time = "2025-11-24T03:55:57.908Z" }, + { url = "https://files.pythonhosted.org/packages/b8/8e/6b17e43f6eb9369d9858ee32c97959fcd515628a1df376af96c11606cf70/msgspec-0.20.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:27d35044dd8818ac1bd0fedb2feb4fbdff4e3508dd7c5d14316a12a2d96a0de0", size = 224918, upload-time = "2025-11-24T03:55:59.322Z" }, + { url = "https://files.pythonhosted.org/packages/1c/db/0e833a177db1a4484797adba7f429d4242585980b90882cc38709e1b62df/msgspec-0.20.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b4296393a29ee42dd25947981c65506fd4ad39beaf816f614146fa0c5a6c91ae", size = 223436, upload-time = "2025-11-24T03:56:00.716Z" }, + { url = "https://files.pythonhosted.org/packages/c3/30/d2ee787f4c918fd2b123441d49a7707ae9015e0e8e1ab51aa7967a97b90e/msgspec-0.20.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:205fbdadd0d8d861d71c8f3399fe1a82a2caf4467bc8ff9a626df34c12176980", size = 227190, upload-time = "2025-11-24T03:56:02.371Z" }, + { url = "https://files.pythonhosted.org/packages/ff/37/9c4b58ff11d890d788e700b827db2366f4d11b3313bf136780da7017278b/msgspec-0.20.0-cp314-cp314-win_amd64.whl", hash = "sha256:7dfebc94fe7d3feec6bc6c9df4f7e9eccc1160bb5b811fbf3e3a56899e398a6b", size = 193950, upload-time = "2025-11-24T03:56:03.668Z" }, + { url = "https://files.pythonhosted.org/packages/e9/4e/cab707bf2fa57408e2934e5197fc3560079db34a1e3cd2675ff2e47e07de/msgspec-0.20.0-cp314-cp314-win_arm64.whl", hash = "sha256:2ad6ae36e4a602b24b4bf4eaf8ab5a441fec03e1f1b5931beca8ebda68f53fc0", size = 179018, upload-time = "2025-11-24T03:56:05.038Z" }, + { url = "https://files.pythonhosted.org/packages/4c/06/3da3fc9aaa55618a8f43eb9052453cfe01f82930bca3af8cea63a89f3a11/msgspec-0.20.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:f84703e0e6ef025663dd1de828ca028774797b8155e070e795c548f76dde65d5", size = 200389, upload-time = "2025-11-24T03:56:06.375Z" }, + { url = "https://files.pythonhosted.org/packages/83/3b/cc4270a5ceab40dfe1d1745856951b0a24fd16ac8539a66ed3004a60c91e/msgspec-0.20.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7c83fc24dd09cf1275934ff300e3951b3adc5573f0657a643515cc16c7dee131", size = 193198, upload-time = "2025-11-24T03:56:07.742Z" }, + { url = "https://files.pythonhosted.org/packages/cd/ae/4c7905ac53830c8e3c06fdd60e3cdcfedc0bbc993872d1549b84ea21a1bd/msgspec-0.20.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f13ccb1c335a124e80c4562573b9b90f01ea9521a1a87f7576c2e281d547f56", size = 225973, upload-time = "2025-11-24T03:56:09.18Z" }, + { url = "https://files.pythonhosted.org/packages/d9/da/032abac1de4d0678d99eaeadb1323bd9d247f4711c012404ba77ed6f15ca/msgspec-0.20.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:17c2b5ca19f19306fc83c96d85e606d2cc107e0caeea85066b5389f664e04846", size = 229509, upload-time = "2025-11-24T03:56:10.898Z" }, + { url = "https://files.pythonhosted.org/packages/69/52/fdc7bdb7057a166f309e0b44929e584319e625aaba4771b60912a9321ccd/msgspec-0.20.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d931709355edabf66c2dd1a756b2d658593e79882bc81aae5964969d5a291b63", size = 230434, upload-time = "2025-11-24T03:56:12.48Z" }, + { url = "https://files.pythonhosted.org/packages/cb/fe/1dfd5f512b26b53043884e4f34710c73e294e7cc54278c3fe28380e42c37/msgspec-0.20.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:565f915d2e540e8a0c93a01ff67f50aebe1f7e22798c6a25873f9fda8d1325f8", size = 231758, upload-time = "2025-11-24T03:56:13.765Z" }, + { url = "https://files.pythonhosted.org/packages/97/f6/9ba7121b8e0c4e0beee49575d1dbc804e2e72467692f0428cf39ceba1ea5/msgspec-0.20.0-cp314-cp314t-win_amd64.whl", hash = "sha256:726f3e6c3c323f283f6021ebb6c8ccf58d7cd7baa67b93d73bfbe9a15c34ab8d", size = 206540, upload-time = "2025-11-24T03:56:15.029Z" }, + { url = "https://files.pythonhosted.org/packages/c8/3e/c5187de84bb2c2ca334ab163fcacf19a23ebb1d876c837f81a1b324a15bf/msgspec-0.20.0-cp314-cp314t-win_arm64.whl", hash = "sha256:93f23528edc51d9f686808a361728e903d6f2be55c901d6f5c92e44c6d546bfc", size = 183011, upload-time = "2025-11-24T03:56:16.442Z" }, +] + +[[package]] +name = "narwhals" +version = "2.18.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/47/b4/02a8add181b8d2cd5da3b667cd102ae536e8c9572ab1a130816d70a89edb/narwhals-2.18.0.tar.gz", hash = "sha256:1de5cee338bc17c338c6278df2c38c0dd4290499fcf70d75e0a51d5f22a6e960", size = 620222, upload-time = "2026-03-10T15:51:27.14Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fe/75/0b4a10da17a44cf13567d08a9c7632a285297e46253263f1ae119129d10a/narwhals-2.18.0-py3-none-any.whl", hash = "sha256:68378155ee706ac9c5b25868ef62ecddd62947b6df7801a0a156bc0a615d2d0d", size = 444865, upload-time = "2026-03-10T15:51:24.085Z" }, +] + +[[package]] +name = "numpy" +version = "2.4.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/10/8b/c265f4823726ab832de836cdd184d0986dcf94480f81e8739692a7ac7af2/numpy-2.4.3.tar.gz", hash = "sha256:483a201202b73495f00dbc83796c6ae63137a9bdade074f7648b3e32613412dd", size = 20727743, upload-time = "2026-03-09T07:58:53.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/51/5093a2df15c4dc19da3f79d1021e891f5dcf1d9d1db6ba38891d5590f3fe/numpy-2.4.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:33b3bf58ee84b172c067f56aeadc7ee9ab6de69c5e800ab5b10295d54c581adb", size = 16957183, upload-time = "2026-03-09T07:55:57.774Z" }, + { url = "https://files.pythonhosted.org/packages/b5/7c/c061f3de0630941073d2598dc271ac2f6cbcf5c83c74a5870fea07488333/numpy-2.4.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8ba7b51e71c05aa1f9bc3641463cd82308eab40ce0d5c7e1fd4038cbf9938147", size = 14968734, upload-time = "2026-03-09T07:56:00.494Z" }, + { url = "https://files.pythonhosted.org/packages/ef/27/d26c85cbcd86b26e4f125b0668e7a7c0542d19dd7d23ee12e87b550e95b5/numpy-2.4.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:a1988292870c7cb9d0ebb4cc96b4d447513a9644801de54606dc7aabf2b7d920", size = 5475288, upload-time = "2026-03-09T07:56:02.857Z" }, + { url = "https://files.pythonhosted.org/packages/2b/09/3c4abbc1dcd8010bf1a611d174c7aa689fc505585ec806111b4406f6f1b1/numpy-2.4.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:23b46bb6d8ecb68b58c09944483c135ae5f0e9b8d8858ece5e4ead783771d2a9", size = 6805253, upload-time = "2026-03-09T07:56:04.53Z" }, + { url = "https://files.pythonhosted.org/packages/21/bc/e7aa3f6817e40c3f517d407742337cbb8e6fc4b83ce0b55ab780c829243b/numpy-2.4.3-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a016db5c5dba78fa8fe9f5d80d6708f9c42ab087a739803c0ac83a43d686a470", size = 15969479, upload-time = "2026-03-09T07:56:06.638Z" }, + { url = "https://files.pythonhosted.org/packages/78/51/9f5d7a41f0b51649ddf2f2320595e15e122a40610b233d51928dd6c92353/numpy-2.4.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:715de7f82e192e8cae5a507a347d97ad17598f8e026152ca97233e3666daaa71", size = 16901035, upload-time = "2026-03-09T07:56:09.405Z" }, + { url = "https://files.pythonhosted.org/packages/64/6e/b221dd847d7181bc5ee4857bfb026182ef69499f9305eb1371cbb1aea626/numpy-2.4.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2ddb7919366ee468342b91dea2352824c25b55814a987847b6c52003a7c97f15", size = 17325657, upload-time = "2026-03-09T07:56:12.067Z" }, + { url = "https://files.pythonhosted.org/packages/eb/b8/8f3fd2da596e1063964b758b5e3c970aed1949a05200d7e3d46a9d46d643/numpy-2.4.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a315e5234d88067f2d97e1f2ef670a7569df445d55400f1e33d117418d008d52", size = 18635512, upload-time = "2026-03-09T07:56:14.629Z" }, + { url = "https://files.pythonhosted.org/packages/5c/24/2993b775c37e39d2f8ab4125b44337ab0b2ba106c100980b7c274a22bee7/numpy-2.4.3-cp311-cp311-win32.whl", hash = "sha256:2b3f8d2c4589b1a2028d2a770b0fc4d1f332fb5e01521f4de3199a896d158ddd", size = 6238100, upload-time = "2026-03-09T07:56:17.243Z" }, + { url = "https://files.pythonhosted.org/packages/76/1d/edccf27adedb754db7c4511d5eac8b83f004ae948fe2d3509e8b78097d4c/numpy-2.4.3-cp311-cp311-win_amd64.whl", hash = "sha256:77e76d932c49a75617c6d13464e41203cd410956614d0a0e999b25e9e8d27eec", size = 12609816, upload-time = "2026-03-09T07:56:19.089Z" }, + { url = "https://files.pythonhosted.org/packages/92/82/190b99153480076c8dce85f4cfe7d53ea84444145ffa54cb58dcd460d66b/numpy-2.4.3-cp311-cp311-win_arm64.whl", hash = "sha256:eb610595dd91560905c132c709412b512135a60f1851ccbd2c959e136431ff67", size = 10485757, upload-time = "2026-03-09T07:56:21.753Z" }, + { url = "https://files.pythonhosted.org/packages/a9/ed/6388632536f9788cea23a3a1b629f25b43eaacd7d7377e5d6bc7b9deb69b/numpy-2.4.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:61b0cbabbb6126c8df63b9a3a0c4b1f44ebca5e12ff6997b80fcf267fb3150ef", size = 16669628, upload-time = "2026-03-09T07:56:24.252Z" }, + { url = "https://files.pythonhosted.org/packages/74/1b/ee2abfc68e1ce728b2958b6ba831d65c62e1b13ce3017c13943f8f9b5b2e/numpy-2.4.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7395e69ff32526710748f92cd8c9849b361830968ea3e24a676f272653e8983e", size = 14696872, upload-time = "2026-03-09T07:56:26.991Z" }, + { url = "https://files.pythonhosted.org/packages/ba/d1/780400e915ff5638166f11ca9dc2c5815189f3d7cf6f8759a1685e586413/numpy-2.4.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:abdce0f71dcb4a00e4e77f3faf05e4616ceccfe72ccaa07f47ee79cda3b7b0f4", size = 5203489, upload-time = "2026-03-09T07:56:29.414Z" }, + { url = "https://files.pythonhosted.org/packages/0b/bb/baffa907e9da4cc34a6e556d6d90e032f6d7a75ea47968ea92b4858826c4/numpy-2.4.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:48da3a4ee1336454b07497ff7ec83903efa5505792c4e6d9bf83d99dc07a1e18", size = 6550814, upload-time = "2026-03-09T07:56:32.225Z" }, + { url = "https://files.pythonhosted.org/packages/7b/12/8c9f0c6c95f76aeb20fc4a699c33e9f827fa0d0f857747c73bb7b17af945/numpy-2.4.3-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:32e3bef222ad6b052280311d1d60db8e259e4947052c3ae7dd6817451fc8a4c5", size = 15666601, upload-time = "2026-03-09T07:56:34.461Z" }, + { url = "https://files.pythonhosted.org/packages/bd/79/cc665495e4d57d0aa6fbcc0aa57aa82671dfc78fbf95fe733ed86d98f52a/numpy-2.4.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e7dd01a46700b1967487141a66ac1a3cf0dd8ebf1f08db37d46389401512ca97", size = 16621358, upload-time = "2026-03-09T07:56:36.852Z" }, + { url = "https://files.pythonhosted.org/packages/a8/40/b4ecb7224af1065c3539f5ecfff879d090de09608ad1008f02c05c770cb3/numpy-2.4.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:76f0f283506c28b12bba319c0fab98217e9f9b54e6160e9c79e9f7348ba32e9c", size = 17016135, upload-time = "2026-03-09T07:56:39.337Z" }, + { url = "https://files.pythonhosted.org/packages/f7/b1/6a88e888052eed951afed7a142dcdf3b149a030ca59b4c71eef085858e43/numpy-2.4.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:737f630a337364665aba3b5a77e56a68cc42d350edd010c345d65a3efa3addcc", size = 18345816, upload-time = "2026-03-09T07:56:42.31Z" }, + { url = "https://files.pythonhosted.org/packages/f3/8f/103a60c5f8c3d7fc678c19cd7b2476110da689ccb80bc18050efbaeae183/numpy-2.4.3-cp312-cp312-win32.whl", hash = "sha256:26952e18d82a1dbbc2f008d402021baa8d6fc8e84347a2072a25e08b46d698b9", size = 5960132, upload-time = "2026-03-09T07:56:44.851Z" }, + { url = "https://files.pythonhosted.org/packages/d7/7c/f5ee1bf6ed888494978046a809df2882aad35d414b622893322df7286879/numpy-2.4.3-cp312-cp312-win_amd64.whl", hash = "sha256:65f3c2455188f09678355f5cae1f959a06b778bc66d535da07bf2ef20cd319d5", size = 12316144, upload-time = "2026-03-09T07:56:47.057Z" }, + { url = "https://files.pythonhosted.org/packages/71/46/8d1cb3f7a00f2fb6394140e7e6623696e54c6318a9d9691bb4904672cf42/numpy-2.4.3-cp312-cp312-win_arm64.whl", hash = "sha256:2abad5c7fef172b3377502bde47892439bae394a71bc329f31df0fd829b41a9e", size = 10220364, upload-time = "2026-03-09T07:56:49.849Z" }, + { url = "https://files.pythonhosted.org/packages/b6/d0/1fe47a98ce0df229238b77611340aff92d52691bcbc10583303181abf7fc/numpy-2.4.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b346845443716c8e542d54112966383b448f4a3ba5c66409771b8c0889485dd3", size = 16665297, upload-time = "2026-03-09T07:56:52.296Z" }, + { url = "https://files.pythonhosted.org/packages/27/d9/4e7c3f0e68dfa91f21c6fb6cf839bc829ec920688b1ce7ec722b1a6202fb/numpy-2.4.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2629289168f4897a3c4e23dc98d6f1731f0fc0fe52fb9db19f974041e4cc12b9", size = 14691853, upload-time = "2026-03-09T07:56:54.992Z" }, + { url = "https://files.pythonhosted.org/packages/3a/66/bd096b13a87549683812b53ab211e6d413497f84e794fb3c39191948da97/numpy-2.4.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:bb2e3cf95854233799013779216c57e153c1ee67a0bf92138acca0e429aefaee", size = 5198435, upload-time = "2026-03-09T07:56:57.184Z" }, + { url = "https://files.pythonhosted.org/packages/a2/2f/687722910b5a5601de2135c891108f51dfc873d8e43c8ed9f4ebb440b4a2/numpy-2.4.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:7f3408ff897f8ab07a07fbe2823d7aee6ff644c097cc1f90382511fe982f647f", size = 6546347, upload-time = "2026-03-09T07:56:59.531Z" }, + { url = "https://files.pythonhosted.org/packages/bf/ec/7971c4e98d86c564750393fab8d7d83d0a9432a9d78bb8a163a6dc59967a/numpy-2.4.3-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:decb0eb8a53c3b009b0962378065589685d66b23467ef5dac16cbe818afde27f", size = 15664626, upload-time = "2026-03-09T07:57:01.385Z" }, + { url = "https://files.pythonhosted.org/packages/7e/eb/7daecbea84ec935b7fc732e18f532073064a3816f0932a40a17f3349185f/numpy-2.4.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5f51900414fc9204a0e0da158ba2ac52b75656e7dce7e77fb9f84bfa343b4cc", size = 16608916, upload-time = "2026-03-09T07:57:04.008Z" }, + { url = "https://files.pythonhosted.org/packages/df/58/2a2b4a817ffd7472dca4421d9f0776898b364154e30c95f42195041dc03b/numpy-2.4.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6bd06731541f89cdc01b261ba2c9e037f1543df7472517836b78dfb15bd6e476", size = 17015824, upload-time = "2026-03-09T07:57:06.347Z" }, + { url = "https://files.pythonhosted.org/packages/4a/ca/627a828d44e78a418c55f82dd4caea8ea4a8ef24e5144d9e71016e52fb40/numpy-2.4.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:22654fe6be0e5206f553a9250762c653d3698e46686eee53b399ab90da59bd92", size = 18334581, upload-time = "2026-03-09T07:57:09.114Z" }, + { url = "https://files.pythonhosted.org/packages/cd/c0/76f93962fc79955fcba30a429b62304332345f22d4daec1cb33653425643/numpy-2.4.3-cp313-cp313-win32.whl", hash = "sha256:d71e379452a2f670ccb689ec801b1218cd3983e253105d6e83780967e899d687", size = 5958618, upload-time = "2026-03-09T07:57:11.432Z" }, + { url = "https://files.pythonhosted.org/packages/b1/3c/88af0040119209b9b5cb59485fa48b76f372c73068dbf9254784b975ac53/numpy-2.4.3-cp313-cp313-win_amd64.whl", hash = "sha256:0a60e17a14d640f49146cb38e3f105f571318db7826d9b6fef7e4dce758faecd", size = 12312824, upload-time = "2026-03-09T07:57:13.586Z" }, + { url = "https://files.pythonhosted.org/packages/58/ce/3d07743aced3d173f877c3ef6a454c2174ba42b584ab0b7e6d99374f51ed/numpy-2.4.3-cp313-cp313-win_arm64.whl", hash = "sha256:c9619741e9da2059cd9c3f206110b97583c7152c1dc9f8aafd4beb450ac1c89d", size = 10221218, upload-time = "2026-03-09T07:57:16.183Z" }, + { url = "https://files.pythonhosted.org/packages/62/09/d96b02a91d09e9d97862f4fc8bfebf5400f567d8eb1fe4b0cc4795679c15/numpy-2.4.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:7aa4e54f6469300ebca1d9eb80acd5253cdfa36f2c03d79a35883687da430875", size = 14819570, upload-time = "2026-03-09T07:57:18.564Z" }, + { url = "https://files.pythonhosted.org/packages/b5/ca/0b1aba3905fdfa3373d523b2b15b19029f4f3031c87f4066bd9d20ef6c6b/numpy-2.4.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d1b90d840b25874cf5cd20c219af10bac3667db3876d9a495609273ebe679070", size = 5326113, upload-time = "2026-03-09T07:57:21.052Z" }, + { url = "https://files.pythonhosted.org/packages/c0/63/406e0fd32fcaeb94180fd6a4c41e55736d676c54346b7efbce548b94a914/numpy-2.4.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:a749547700de0a20a6718293396ec237bb38218049cfce788e08fcb716e8cf73", size = 6646370, upload-time = "2026-03-09T07:57:22.804Z" }, + { url = "https://files.pythonhosted.org/packages/b6/d0/10f7dc157d4b37af92720a196be6f54f889e90dcd30dce9dc657ed92c257/numpy-2.4.3-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94f3c4a151a2e529adf49c1d54f0f57ff8f9b233ee4d44af623a81553ab86368", size = 15723499, upload-time = "2026-03-09T07:57:24.693Z" }, + { url = "https://files.pythonhosted.org/packages/66/f1/d1c2bf1161396629701bc284d958dc1efa3a5a542aab83cf11ee6eb4cba5/numpy-2.4.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22c31dc07025123aedf7f2db9e91783df13f1776dc52c6b22c620870dc0fab22", size = 16657164, upload-time = "2026-03-09T07:57:27.676Z" }, + { url = "https://files.pythonhosted.org/packages/1a/be/cca19230b740af199ac47331a21c71e7a3d0ba59661350483c1600d28c37/numpy-2.4.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:148d59127ac95979d6f07e4d460f934ebdd6eed641db9c0db6c73026f2b2101a", size = 17081544, upload-time = "2026-03-09T07:57:30.664Z" }, + { url = "https://files.pythonhosted.org/packages/b9/c5/9602b0cbb703a0936fb40f8a95407e8171935b15846de2f0776e08af04c7/numpy-2.4.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:a97cbf7e905c435865c2d939af3d93f99d18eaaa3cabe4256f4304fb51604349", size = 18380290, upload-time = "2026-03-09T07:57:33.763Z" }, + { url = "https://files.pythonhosted.org/packages/ed/81/9f24708953cd30be9ee36ec4778f4b112b45165812f2ada4cc5ea1c1f254/numpy-2.4.3-cp313-cp313t-win32.whl", hash = "sha256:be3b8487d725a77acccc9924f65fd8bce9af7fac8c9820df1049424a2115af6c", size = 6082814, upload-time = "2026-03-09T07:57:36.491Z" }, + { url = "https://files.pythonhosted.org/packages/e2/9e/52f6eaa13e1a799f0ab79066c17f7016a4a8ae0c1aefa58c82b4dab690b4/numpy-2.4.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1ec84fd7c8e652b0f4aaaf2e6e9cc8eaa9b1b80a537e06b2e3a2fb176eedcb26", size = 12452673, upload-time = "2026-03-09T07:57:38.281Z" }, + { url = "https://files.pythonhosted.org/packages/c4/04/b8cece6ead0b30c9fbd99bb835ad7ea0112ac5f39f069788c5558e3b1ab2/numpy-2.4.3-cp313-cp313t-win_arm64.whl", hash = "sha256:120df8c0a81ebbf5b9020c91439fccd85f5e018a927a39f624845be194a2be02", size = 10290907, upload-time = "2026-03-09T07:57:40.747Z" }, + { url = "https://files.pythonhosted.org/packages/70/ae/3936f79adebf8caf81bd7a599b90a561334a658be4dcc7b6329ebf4ee8de/numpy-2.4.3-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:5884ce5c7acfae1e4e1b6fde43797d10aa506074d25b531b4f54bde33c0c31d4", size = 16664563, upload-time = "2026-03-09T07:57:43.817Z" }, + { url = "https://files.pythonhosted.org/packages/9b/62/760f2b55866b496bb1fa7da2a6db076bef908110e568b02fcfc1422e2a3a/numpy-2.4.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:297837823f5bc572c5f9379b0c9f3a3365f08492cbdc33bcc3af174372ebb168", size = 14702161, upload-time = "2026-03-09T07:57:46.169Z" }, + { url = "https://files.pythonhosted.org/packages/32/af/a7a39464e2c0a21526fb4fb76e346fb172ebc92f6d1c7a07c2c139cc17b1/numpy-2.4.3-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:a111698b4a3f8dcbe54c64a7708f049355abd603e619013c346553c1fd4ca90b", size = 5208738, upload-time = "2026-03-09T07:57:48.506Z" }, + { url = "https://files.pythonhosted.org/packages/29/8c/2a0cf86a59558fa078d83805589c2de490f29ed4fb336c14313a161d358a/numpy-2.4.3-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:4bd4741a6a676770e0e97fe9ab2e51de01183df3dcbcec591d26d331a40de950", size = 6543618, upload-time = "2026-03-09T07:57:50.591Z" }, + { url = "https://files.pythonhosted.org/packages/aa/b8/612ce010c0728b1c363fa4ea3aa4c22fe1c5da1de008486f8c2f5cb92fae/numpy-2.4.3-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:54f29b877279d51e210e0c80709ee14ccbbad647810e8f3d375561c45ef613dd", size = 15680676, upload-time = "2026-03-09T07:57:52.34Z" }, + { url = "https://files.pythonhosted.org/packages/a9/7e/4f120ecc54ba26ddf3dc348eeb9eb063f421de65c05fc961941798feea18/numpy-2.4.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:679f2a834bae9020f81534671c56fd0cc76dd7e5182f57131478e23d0dc59e24", size = 16613492, upload-time = "2026-03-09T07:57:54.91Z" }, + { url = "https://files.pythonhosted.org/packages/2c/86/1b6020db73be330c4b45d5c6ee4295d59cfeef0e3ea323959d053e5a6909/numpy-2.4.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d84f0f881cb2225c2dfd7f78a10a5645d487a496c6668d6cc39f0f114164f3d0", size = 17031789, upload-time = "2026-03-09T07:57:57.641Z" }, + { url = "https://files.pythonhosted.org/packages/07/3a/3b90463bf41ebc21d1b7e06079f03070334374208c0f9a1f05e4ae8455e7/numpy-2.4.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d213c7e6e8d211888cc359bab7199670a00f5b82c0978b9d1c75baf1eddbeac0", size = 18339941, upload-time = "2026-03-09T07:58:00.577Z" }, + { url = "https://files.pythonhosted.org/packages/a8/74/6d736c4cd962259fd8bae9be27363eb4883a2f9069763747347544c2a487/numpy-2.4.3-cp314-cp314-win32.whl", hash = "sha256:52077feedeff7c76ed7c9f1a0428558e50825347b7545bbb8523da2cd55c547a", size = 6007503, upload-time = "2026-03-09T07:58:03.331Z" }, + { url = "https://files.pythonhosted.org/packages/48/39/c56ef87af669364356bb011922ef0734fc49dad51964568634c72a009488/numpy-2.4.3-cp314-cp314-win_amd64.whl", hash = "sha256:0448e7f9caefb34b4b7dd2b77f21e8906e5d6f0365ad525f9f4f530b13df2afc", size = 12444915, upload-time = "2026-03-09T07:58:06.353Z" }, + { url = "https://files.pythonhosted.org/packages/9d/1f/ab8528e38d295fd349310807496fabb7cf9fe2e1f70b97bc20a483ea9d4a/numpy-2.4.3-cp314-cp314-win_arm64.whl", hash = "sha256:b44fd60341c4d9783039598efadd03617fa28d041fc37d22b62d08f2027fa0e7", size = 10494875, upload-time = "2026-03-09T07:58:08.734Z" }, + { url = "https://files.pythonhosted.org/packages/e6/ef/b7c35e4d5ef141b836658ab21a66d1a573e15b335b1d111d31f26c8ef80f/numpy-2.4.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0a195f4216be9305a73c0e91c9b026a35f2161237cf1c6de9b681637772ea657", size = 14822225, upload-time = "2026-03-09T07:58:11.034Z" }, + { url = "https://files.pythonhosted.org/packages/cd/8d/7730fa9278cf6648639946cc816e7cc89f0d891602584697923375f801ed/numpy-2.4.3-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:cd32fbacb9fd1bf041bf8e89e4576b6f00b895f06d00914820ae06a616bdfef7", size = 5328769, upload-time = "2026-03-09T07:58:13.67Z" }, + { url = "https://files.pythonhosted.org/packages/47/01/d2a137317c958b074d338807c1b6a383406cdf8b8e53b075d804cc3d211d/numpy-2.4.3-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:2e03c05abaee1f672e9d67bc858f300b5ccba1c21397211e8d77d98350972093", size = 6649461, upload-time = "2026-03-09T07:58:15.912Z" }, + { url = "https://files.pythonhosted.org/packages/5c/34/812ce12bc0f00272a4b0ec0d713cd237cb390666eb6206323d1cc9cedbb2/numpy-2.4.3-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7d1ce23cce91fcea443320a9d0ece9b9305d4368875bab09538f7a5b4131938a", size = 15725809, upload-time = "2026-03-09T07:58:17.787Z" }, + { url = "https://files.pythonhosted.org/packages/25/c0/2aed473a4823e905e765fee3dc2cbf504bd3e68ccb1150fbdabd5c39f527/numpy-2.4.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c59020932feb24ed49ffd03704fbab89f22aa9c0d4b180ff45542fe8918f5611", size = 16655242, upload-time = "2026-03-09T07:58:20.476Z" }, + { url = "https://files.pythonhosted.org/packages/f2/c8/7e052b2fc87aa0e86de23f20e2c42bd261c624748aa8efd2c78f7bb8d8c6/numpy-2.4.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:9684823a78a6cd6ad7511fc5e25b07947d1d5b5e2812c93fe99d7d4195130720", size = 17080660, upload-time = "2026-03-09T07:58:23.067Z" }, + { url = "https://files.pythonhosted.org/packages/f3/3d/0876746044db2adcb11549f214d104f2e1be00f07a67edbb4e2812094847/numpy-2.4.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0200b25c687033316fb39f0ff4e3e690e8957a2c3c8d22499891ec58c37a3eb5", size = 18380384, upload-time = "2026-03-09T07:58:25.839Z" }, + { url = "https://files.pythonhosted.org/packages/07/12/8160bea39da3335737b10308df4f484235fd297f556745f13092aa039d3b/numpy-2.4.3-cp314-cp314t-win32.whl", hash = "sha256:5e10da9e93247e554bb1d22f8edc51847ddd7dde52d85ce31024c1b4312bfba0", size = 6154547, upload-time = "2026-03-09T07:58:28.289Z" }, + { url = "https://files.pythonhosted.org/packages/42/f3/76534f61f80d74cc9cdf2e570d3d4eeb92c2280a27c39b0aaf471eda7b48/numpy-2.4.3-cp314-cp314t-win_amd64.whl", hash = "sha256:45f003dbdffb997a03da2d1d0cb41fbd24a87507fb41605c0420a3db5bd4667b", size = 12633645, upload-time = "2026-03-09T07:58:30.384Z" }, + { url = "https://files.pythonhosted.org/packages/1f/b6/7c0d4334c15983cec7f92a69e8ce9b1e6f31857e5ee3a413ac424e6bd63d/numpy-2.4.3-cp314-cp314t-win_arm64.whl", hash = "sha256:4d382735cecd7bcf090172489a525cd7d4087bc331f7df9f60ddc9a296cf208e", size = 10565454, upload-time = "2026-03-09T07:58:33.031Z" }, + { url = "https://files.pythonhosted.org/packages/64/e4/4dab9fb43c83719c29241c535d9e07be73bea4bc0c6686c5816d8e1b6689/numpy-2.4.3-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:c6b124bfcafb9e8d3ed09130dbee44848c20b3e758b6bbf006e641778927c028", size = 16834892, upload-time = "2026-03-09T07:58:35.334Z" }, + { url = "https://files.pythonhosted.org/packages/c9/29/f8b6d4af90fed3dfda84ebc0df06c9833d38880c79ce954e5b661758aa31/numpy-2.4.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:76dbb9d4e43c16cf9aa711fcd8de1e2eeb27539dcefb60a1d5e9f12fae1d1ed8", size = 14893070, upload-time = "2026-03-09T07:58:37.7Z" }, + { url = "https://files.pythonhosted.org/packages/9a/04/a19b3c91dbec0a49269407f15d5753673a09832daed40c45e8150e6fa558/numpy-2.4.3-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:29363fbfa6f8ee855d7569c96ce524845e3d726d6c19b29eceec7dd555dab152", size = 5399609, upload-time = "2026-03-09T07:58:39.853Z" }, + { url = "https://files.pythonhosted.org/packages/79/34/4d73603f5420eab89ea8a67097b31364bf7c30f811d4dd84b1659c7476d9/numpy-2.4.3-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:bc71942c789ef415a37f0d4eab90341425a00d538cd0642445d30b41023d3395", size = 6714355, upload-time = "2026-03-09T07:58:42.365Z" }, + { url = "https://files.pythonhosted.org/packages/58/ad/1100d7229bb248394939a12a8074d485b655e8ed44207d328fdd7fcebc7b/numpy-2.4.3-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7e58765ad74dcebd3ef0208a5078fba32dc8ec3578fe84a604432950cd043d79", size = 15800434, upload-time = "2026-03-09T07:58:44.837Z" }, + { url = "https://files.pythonhosted.org/packages/0c/fd/16d710c085d28ba4feaf29ac60c936c9d662e390344f94a6beaa2ac9899b/numpy-2.4.3-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e236dbda4e1d319d681afcbb136c0c4a8e0f1a5c58ceec2adebb547357fe857", size = 16729409, upload-time = "2026-03-09T07:58:47.972Z" }, + { url = "https://files.pythonhosted.org/packages/57/a7/b35835e278c18b85206834b3aa3abe68e77a98769c59233d1f6300284781/numpy-2.4.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:4b42639cdde6d24e732ff823a3fa5b701d8acad89c4142bc1d0bd6dc85200ba5", size = 12504685, upload-time = "2026-03-09T07:58:50.525Z" }, +] + +[[package]] +name = "packaging" +version = "26.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" }, +] + +[[package]] +name = "pandas" +version = "3.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "python-dateutil" }, + { name = "tzdata", marker = "sys_platform == 'emscripten' or sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2e/0c/b28ed414f080ee0ad153f848586d61d1878f91689950f037f976ce15f6c8/pandas-3.0.1.tar.gz", hash = "sha256:4186a699674af418f655dbd420ed87f50d56b4cd6603784279d9eef6627823c8", size = 4641901, upload-time = "2026-02-17T22:20:16.434Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ff/07/c7087e003ceee9b9a82539b40414ec557aa795b584a1a346e89180853d79/pandas-3.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:de09668c1bf3b925c07e5762291602f0d789eca1b3a781f99c1c78f6cac0e7ea", size = 10323380, upload-time = "2026-02-17T22:18:16.133Z" }, + { url = "https://files.pythonhosted.org/packages/c1/27/90683c7122febeefe84a56f2cde86a9f05f68d53885cebcc473298dfc33e/pandas-3.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:24ba315ba3d6e5806063ac6eb717504e499ce30bd8c236d8693a5fd3f084c796", size = 9923455, upload-time = "2026-02-17T22:18:19.13Z" }, + { url = "https://files.pythonhosted.org/packages/0e/f1/ed17d927f9950643bc7631aa4c99ff0cc83a37864470bc419345b656a41f/pandas-3.0.1-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:406ce835c55bac912f2a0dcfaf27c06d73c6b04a5dde45f1fd3169ce31337389", size = 10753464, upload-time = "2026-02-17T22:18:21.134Z" }, + { url = "https://files.pythonhosted.org/packages/2e/7c/870c7e7daec2a6c7ff2ac9e33b23317230d4e4e954b35112759ea4a924a7/pandas-3.0.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:830994d7e1f31dd7e790045235605ab61cff6c94defc774547e8b7fdfbff3dc7", size = 11255234, upload-time = "2026-02-17T22:18:24.175Z" }, + { url = "https://files.pythonhosted.org/packages/5c/39/3653fe59af68606282b989c23d1a543ceba6e8099cbcc5f1d506a7bae2aa/pandas-3.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a64ce8b0f2de1d2efd2ae40b0abe7f8ae6b29fbfb3812098ed5a6f8e235ad9bf", size = 11767299, upload-time = "2026-02-17T22:18:26.824Z" }, + { url = "https://files.pythonhosted.org/packages/9b/31/1daf3c0c94a849c7a8dab8a69697b36d313b229918002ba3e409265c7888/pandas-3.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9832c2c69da24b602c32e0c7b1b508a03949c18ba08d4d9f1c1033426685b447", size = 12333292, upload-time = "2026-02-17T22:18:28.996Z" }, + { url = "https://files.pythonhosted.org/packages/1f/67/af63f83cd6ca603a00fe8530c10a60f0879265b8be00b5930e8e78c5b30b/pandas-3.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:84f0904a69e7365f79a0c77d3cdfccbfb05bf87847e3a51a41e1426b0edb9c79", size = 9892176, upload-time = "2026-02-17T22:18:31.79Z" }, + { url = "https://files.pythonhosted.org/packages/79/ab/9c776b14ac4b7b4140788eca18468ea39894bc7340a408f1d1e379856a6b/pandas-3.0.1-cp311-cp311-win_arm64.whl", hash = "sha256:4a68773d5a778afb31d12e34f7dd4612ab90de8c6fb1d8ffe5d4a03b955082a1", size = 9151328, upload-time = "2026-02-17T22:18:35.721Z" }, + { url = "https://files.pythonhosted.org/packages/37/51/b467209c08dae2c624873d7491ea47d2b47336e5403309d433ea79c38571/pandas-3.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:476f84f8c20c9f5bc47252b66b4bb25e1a9fc2fa98cead96744d8116cb85771d", size = 10344357, upload-time = "2026-02-17T22:18:38.262Z" }, + { url = "https://files.pythonhosted.org/packages/7c/f1/e2567ffc8951ab371db2e40b2fe068e36b81d8cf3260f06ae508700e5504/pandas-3.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0ab749dfba921edf641d4036c4c21c0b3ea70fea478165cb98a998fb2a261955", size = 9884543, upload-time = "2026-02-17T22:18:41.476Z" }, + { url = "https://files.pythonhosted.org/packages/d7/39/327802e0b6d693182403c144edacbc27eb82907b57062f23ef5a4c4a5ea7/pandas-3.0.1-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b8e36891080b87823aff3640c78649b91b8ff6eea3c0d70aeabd72ea43ab069b", size = 10396030, upload-time = "2026-02-17T22:18:43.822Z" }, + { url = "https://files.pythonhosted.org/packages/3d/fe/89d77e424365280b79d99b3e1e7d606f5165af2f2ecfaf0c6d24c799d607/pandas-3.0.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:532527a701281b9dd371e2f582ed9094f4c12dd9ffb82c0c54ee28d8ac9520c4", size = 10876435, upload-time = "2026-02-17T22:18:45.954Z" }, + { url = "https://files.pythonhosted.org/packages/b5/a6/2a75320849dd154a793f69c951db759aedb8d1dd3939eeacda9bdcfa1629/pandas-3.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:356e5c055ed9b0da1580d465657bc7d00635af4fd47f30afb23025352ba764d1", size = 11405133, upload-time = "2026-02-17T22:18:48.533Z" }, + { url = "https://files.pythonhosted.org/packages/58/53/1d68fafb2e02d7881df66aa53be4cd748d25cbe311f3b3c85c93ea5d30ca/pandas-3.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9d810036895f9ad6345b8f2a338dd6998a74e8483847403582cab67745bff821", size = 11932065, upload-time = "2026-02-17T22:18:50.837Z" }, + { url = "https://files.pythonhosted.org/packages/75/08/67cc404b3a966b6df27b38370ddd96b3b023030b572283d035181854aac5/pandas-3.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:536232a5fe26dd989bd633e7a0c450705fdc86a207fec7254a55e9a22950fe43", size = 9741627, upload-time = "2026-02-17T22:18:53.905Z" }, + { url = "https://files.pythonhosted.org/packages/86/4f/caf9952948fb00d23795f09b893d11f1cacb384e666854d87249530f7cbe/pandas-3.0.1-cp312-cp312-win_arm64.whl", hash = "sha256:0f463ebfd8de7f326d38037c7363c6dacb857c5881ab8961fb387804d6daf2f7", size = 9052483, upload-time = "2026-02-17T22:18:57.31Z" }, + { url = "https://files.pythonhosted.org/packages/0b/48/aad6ec4f8d007534c091e9a7172b3ec1b1ee6d99a9cbb936b5eab6c6cf58/pandas-3.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5272627187b5d9c20e55d27caf5f2cd23e286aba25cadf73c8590e432e2b7262", size = 10317509, upload-time = "2026-02-17T22:18:59.498Z" }, + { url = "https://files.pythonhosted.org/packages/a8/14/5990826f779f79148ae9d3a2c39593dc04d61d5d90541e71b5749f35af95/pandas-3.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:661e0f665932af88c7877f31da0dc743fe9c8f2524bdffe23d24fdcb67ef9d56", size = 9860561, upload-time = "2026-02-17T22:19:02.265Z" }, + { url = "https://files.pythonhosted.org/packages/fa/80/f01ff54664b6d70fed71475543d108a9b7c888e923ad210795bef04ffb7d/pandas-3.0.1-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:75e6e292ff898679e47a2199172593d9f6107fd2dd3617c22c2946e97d5df46e", size = 10365506, upload-time = "2026-02-17T22:19:05.017Z" }, + { url = "https://files.pythonhosted.org/packages/f2/85/ab6d04733a7d6ff32bfc8382bf1b07078228f5d6ebec5266b91bfc5c4ff7/pandas-3.0.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1ff8cf1d2896e34343197685f432450ec99a85ba8d90cce2030c5eee2ef98791", size = 10873196, upload-time = "2026-02-17T22:19:07.204Z" }, + { url = "https://files.pythonhosted.org/packages/48/a9/9301c83d0b47c23ac5deab91c6b39fd98d5b5db4d93b25df8d381451828f/pandas-3.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eca8b4510f6763f3d37359c2105df03a7a221a508f30e396a51d0713d462e68a", size = 11370859, upload-time = "2026-02-17T22:19:09.436Z" }, + { url = "https://files.pythonhosted.org/packages/59/fe/0c1fc5bd2d29c7db2ab372330063ad555fb83e08422829c785f5ec2176ca/pandas-3.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:06aff2ad6f0b94a17822cf8b83bbb563b090ed82ff4fe7712db2ce57cd50d9b8", size = 11924584, upload-time = "2026-02-17T22:19:11.562Z" }, + { url = "https://files.pythonhosted.org/packages/d6/7d/216a1588b65a7aa5f4535570418a599d943c85afb1d95b0876fc00aa1468/pandas-3.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:9fea306c783e28884c29057a1d9baa11a349bbf99538ec1da44c8476563d1b25", size = 9742769, upload-time = "2026-02-17T22:19:13.926Z" }, + { url = "https://files.pythonhosted.org/packages/c4/cb/810a22a6af9a4e97c8ab1c946b47f3489c5bca5adc483ce0ffc84c9cc768/pandas-3.0.1-cp313-cp313-win_arm64.whl", hash = "sha256:a8d37a43c52917427e897cb2e429f67a449327394396a81034a4449b99afda59", size = 9043855, upload-time = "2026-02-17T22:19:16.09Z" }, + { url = "https://files.pythonhosted.org/packages/92/fa/423c89086cca1f039cf1253c3ff5b90f157b5b3757314aa635f6bf3e30aa/pandas-3.0.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d54855f04f8246ed7b6fc96b05d4871591143c46c0b6f4af874764ed0d2d6f06", size = 10752673, upload-time = "2026-02-17T22:19:18.304Z" }, + { url = "https://files.pythonhosted.org/packages/22/23/b5a08ec1f40020397f0faba72f1e2c11f7596a6169c7b3e800abff0e433f/pandas-3.0.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4e1b677accee34a09e0dc2ce5624e4a58a1870ffe56fc021e9caf7f23cd7668f", size = 10404967, upload-time = "2026-02-17T22:19:20.726Z" }, + { url = "https://files.pythonhosted.org/packages/5c/81/94841f1bb4afdc2b52a99daa895ac2c61600bb72e26525ecc9543d453ebc/pandas-3.0.1-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a9cabbdcd03f1b6cd254d6dda8ae09b0252524be1592594c00b7895916cb1324", size = 10320575, upload-time = "2026-02-17T22:19:24.919Z" }, + { url = "https://files.pythonhosted.org/packages/0a/8b/2ae37d66a5342a83adadfd0cb0b4bf9c3c7925424dd5f40d15d6cfaa35ee/pandas-3.0.1-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5ae2ab1f166668b41e770650101e7090824fd34d17915dd9cd479f5c5e0065e9", size = 10710921, upload-time = "2026-02-17T22:19:27.181Z" }, + { url = "https://files.pythonhosted.org/packages/a2/61/772b2e2757855e232b7ccf7cb8079a5711becb3a97f291c953def15a833f/pandas-3.0.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6bf0603c2e30e2cafac32807b06435f28741135cb8697eae8b28c7d492fc7d76", size = 11334191, upload-time = "2026-02-17T22:19:29.411Z" }, + { url = "https://files.pythonhosted.org/packages/1b/08/b16c6df3ef555d8495d1d265a7963b65be166785d28f06a350913a4fac78/pandas-3.0.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6c426422973973cae1f4a23e51d4ae85974f44871b24844e4f7de752dd877098", size = 11782256, upload-time = "2026-02-17T22:19:32.34Z" }, + { url = "https://files.pythonhosted.org/packages/55/80/178af0594890dee17e239fca96d3d8670ba0f5ff59b7d0439850924a9c09/pandas-3.0.1-cp313-cp313t-win_amd64.whl", hash = "sha256:b03f91ae8c10a85c1613102c7bef5229b5379f343030a3ccefeca8a33414cf35", size = 10485047, upload-time = "2026-02-17T22:19:34.605Z" }, + { url = "https://files.pythonhosted.org/packages/bb/8b/4bb774a998b97e6c2fd62a9e6cfdaae133b636fd1c468f92afb4ae9a447a/pandas-3.0.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:99d0f92ed92d3083d140bf6b97774f9f13863924cf3f52a70711f4e7588f9d0a", size = 10322465, upload-time = "2026-02-17T22:19:36.803Z" }, + { url = "https://files.pythonhosted.org/packages/72/3a/5b39b51c64159f470f1ca3b1c2a87da290657ca022f7cd11442606f607d1/pandas-3.0.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:3b66857e983208654294bb6477b8a63dee26b37bdd0eb34d010556e91261784f", size = 9910632, upload-time = "2026-02-17T22:19:39.001Z" }, + { url = "https://files.pythonhosted.org/packages/4e/f7/b449ffb3f68c11da12fc06fbf6d2fa3a41c41e17d0284d23a79e1c13a7e4/pandas-3.0.1-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:56cf59638bf24dc9bdf2154c81e248b3289f9a09a6d04e63608c159022352749", size = 10440535, upload-time = "2026-02-17T22:19:41.157Z" }, + { url = "https://files.pythonhosted.org/packages/55/77/6ea82043db22cb0f2bbfe7198da3544000ddaadb12d26be36e19b03a2dc5/pandas-3.0.1-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c1a9f55e0f46951874b863d1f3906dcb57df2d9be5c5847ba4dfb55b2c815249", size = 10893940, upload-time = "2026-02-17T22:19:43.493Z" }, + { url = "https://files.pythonhosted.org/packages/03/30/f1b502a72468c89412c1b882a08f6eed8a4ee9dc033f35f65d0663df6081/pandas-3.0.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1849f0bba9c8a2fb0f691d492b834cc8dadf617e29015c66e989448d58d011ee", size = 11442711, upload-time = "2026-02-17T22:19:46.074Z" }, + { url = "https://files.pythonhosted.org/packages/0d/f0/ebb6ddd8fc049e98cabac5c2924d14d1dda26a20adb70d41ea2e428d3ec4/pandas-3.0.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c3d288439e11b5325b02ae6e9cc83e6805a62c40c5a6220bea9beb899c073b1c", size = 11963918, upload-time = "2026-02-17T22:19:48.838Z" }, + { url = "https://files.pythonhosted.org/packages/09/f8/8ce132104074f977f907442790eaae24e27bce3b3b454e82faa3237ff098/pandas-3.0.1-cp314-cp314-win_amd64.whl", hash = "sha256:93325b0fe372d192965f4cca88d97667f49557398bbf94abdda3bf1b591dbe66", size = 9862099, upload-time = "2026-02-17T22:19:51.081Z" }, + { url = "https://files.pythonhosted.org/packages/e6/b7/6af9aac41ef2456b768ef0ae60acf8abcebb450a52043d030a65b4b7c9bd/pandas-3.0.1-cp314-cp314-win_arm64.whl", hash = "sha256:97ca08674e3287c7148f4858b01136f8bdfe7202ad25ad04fec602dd1d29d132", size = 9185333, upload-time = "2026-02-17T22:19:53.266Z" }, + { url = "https://files.pythonhosted.org/packages/66/fc/848bb6710bc6061cb0c5badd65b92ff75c81302e0e31e496d00029fe4953/pandas-3.0.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:58eeb1b2e0fb322befcf2bbc9ba0af41e616abadb3d3414a6bc7167f6cbfce32", size = 10772664, upload-time = "2026-02-17T22:19:55.806Z" }, + { url = "https://files.pythonhosted.org/packages/69/5c/866a9bbd0f79263b4b0db6ec1a341be13a1473323f05c122388e0f15b21d/pandas-3.0.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cd9af1276b5ca9e298bd79a26bda32fa9cc87ed095b2a9a60978d2ca058eaf87", size = 10421286, upload-time = "2026-02-17T22:19:58.091Z" }, + { url = "https://files.pythonhosted.org/packages/51/a4/2058fb84fb1cfbfb2d4a6d485e1940bb4ad5716e539d779852494479c580/pandas-3.0.1-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94f87a04984d6b63788327cd9f79dda62b7f9043909d2440ceccf709249ca988", size = 10342050, upload-time = "2026-02-17T22:20:01.376Z" }, + { url = "https://files.pythonhosted.org/packages/22/1b/674e89996cc4be74db3c4eb09240c4bb549865c9c3f5d9b086ff8fcfbf00/pandas-3.0.1-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85fe4c4df62e1e20f9db6ebfb88c844b092c22cd5324bdcf94bfa2fc1b391221", size = 10740055, upload-time = "2026-02-17T22:20:04.328Z" }, + { url = "https://files.pythonhosted.org/packages/d0/f8/e954b750764298c22fa4614376531fe63c521ef517e7059a51f062b87dca/pandas-3.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:331ca75a2f8672c365ae25c0b29e46f5ac0c6551fdace8eec4cd65e4fac271ff", size = 11357632, upload-time = "2026-02-17T22:20:06.647Z" }, + { url = "https://files.pythonhosted.org/packages/6d/02/c6e04b694ffd68568297abd03588b6d30295265176a5c01b7459d3bc35a3/pandas-3.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:15860b1fdb1973fffade772fdb931ccf9b2f400a3f5665aef94a00445d7d8dd5", size = 11810974, upload-time = "2026-02-17T22:20:08.946Z" }, + { url = "https://files.pythonhosted.org/packages/89/41/d7dfb63d2407f12055215070c42fc6ac41b66e90a2946cdc5e759058398b/pandas-3.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:44f1364411d5670efa692b146c748f4ed013df91ee91e9bec5677fb1fd58b937", size = 10884622, upload-time = "2026-02-17T22:20:11.711Z" }, + { url = "https://files.pythonhosted.org/packages/68/b0/34937815889fa982613775e4b97fddd13250f11012d769949c5465af2150/pandas-3.0.1-cp314-cp314t-win_arm64.whl", hash = "sha256:108dd1790337a494aa80e38def654ca3f0968cf4f362c85f44c15e471667102d", size = 9452085, upload-time = "2026-02-17T22:20:14.331Z" }, +] + +[[package]] +name = "parso" +version = "0.8.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/81/76/a1e769043c0c0c9fe391b702539d594731a4362334cdf4dc25d0c09761e7/parso-0.8.6.tar.gz", hash = "sha256:2b9a0332696df97d454fa67b81618fd69c35a7b90327cbe6ba5c92d2c68a7bfd", size = 401621, upload-time = "2026-02-09T15:45:24.425Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b6/61/fae042894f4296ec49e3f193aff5d7c18440da9e48102c3315e1bc4519a7/parso-0.8.6-py2.py3-none-any.whl", hash = "sha256:2c549f800b70a5c4952197248825584cb00f033b29c692671d3bf08bf380baff", size = 106894, upload-time = "2026-02-09T15:45:21.391Z" }, +] + +[[package]] +name = "psutil" +version = "7.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/aa/c6/d1ddf4abb55e93cebc4f2ed8b5d6dbad109ecb8d63748dd2b20ab5e57ebe/psutil-7.2.2.tar.gz", hash = "sha256:0746f5f8d406af344fd547f1c8daa5f5c33dbc293bb8d6a16d80b4bb88f59372", size = 493740, upload-time = "2026-01-28T18:14:54.428Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/51/08/510cbdb69c25a96f4ae523f733cdc963ae654904e8db864c07585ef99875/psutil-7.2.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2edccc433cbfa046b980b0df0171cd25bcaeb3a68fe9022db0979e7aa74a826b", size = 130595, upload-time = "2026-01-28T18:14:57.293Z" }, + { url = "https://files.pythonhosted.org/packages/d6/f5/97baea3fe7a5a9af7436301f85490905379b1c6f2dd51fe3ecf24b4c5fbf/psutil-7.2.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e78c8603dcd9a04c7364f1a3e670cea95d51ee865e4efb3556a3a63adef958ea", size = 131082, upload-time = "2026-01-28T18:14:59.732Z" }, + { url = "https://files.pythonhosted.org/packages/37/d6/246513fbf9fa174af531f28412297dd05241d97a75911ac8febefa1a53c6/psutil-7.2.2-cp313-cp313t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1a571f2330c966c62aeda00dd24620425d4b0cc86881c89861fbc04549e5dc63", size = 181476, upload-time = "2026-01-28T18:15:01.884Z" }, + { url = "https://files.pythonhosted.org/packages/b8/b5/9182c9af3836cca61696dabe4fd1304e17bc56cb62f17439e1154f225dd3/psutil-7.2.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:917e891983ca3c1887b4ef36447b1e0873e70c933afc831c6b6da078ba474312", size = 184062, upload-time = "2026-01-28T18:15:04.436Z" }, + { url = "https://files.pythonhosted.org/packages/16/ba/0756dca669f5a9300d0cbcbfae9a4c30e446dfc7440ffe43ded5724bfd93/psutil-7.2.2-cp313-cp313t-win_amd64.whl", hash = "sha256:ab486563df44c17f5173621c7b198955bd6b613fb87c71c161f827d3fb149a9b", size = 139893, upload-time = "2026-01-28T18:15:06.378Z" }, + { url = "https://files.pythonhosted.org/packages/1c/61/8fa0e26f33623b49949346de05ec1ddaad02ed8ba64af45f40a147dbfa97/psutil-7.2.2-cp313-cp313t-win_arm64.whl", hash = "sha256:ae0aefdd8796a7737eccea863f80f81e468a1e4cf14d926bd9b6f5f2d5f90ca9", size = 135589, upload-time = "2026-01-28T18:15:08.03Z" }, + { url = "https://files.pythonhosted.org/packages/81/69/ef179ab5ca24f32acc1dac0c247fd6a13b501fd5534dbae0e05a1c48b66d/psutil-7.2.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:eed63d3b4d62449571547b60578c5b2c4bcccc5387148db46e0c2313dad0ee00", size = 130664, upload-time = "2026-01-28T18:15:09.469Z" }, + { url = "https://files.pythonhosted.org/packages/7b/64/665248b557a236d3fa9efc378d60d95ef56dd0a490c2cd37dafc7660d4a9/psutil-7.2.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7b6d09433a10592ce39b13d7be5a54fbac1d1228ed29abc880fb23df7cb694c9", size = 131087, upload-time = "2026-01-28T18:15:11.724Z" }, + { url = "https://files.pythonhosted.org/packages/d5/2e/e6782744700d6759ebce3043dcfa661fb61e2fb752b91cdeae9af12c2178/psutil-7.2.2-cp314-cp314t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fa4ecf83bcdf6e6c8f4449aff98eefb5d0604bf88cb883d7da3d8d2d909546a", size = 182383, upload-time = "2026-01-28T18:15:13.445Z" }, + { url = "https://files.pythonhosted.org/packages/57/49/0a41cefd10cb7505cdc04dab3eacf24c0c2cb158a998b8c7b1d27ee2c1f5/psutil-7.2.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e452c464a02e7dc7822a05d25db4cde564444a67e58539a00f929c51eddda0cf", size = 185210, upload-time = "2026-01-28T18:15:16.002Z" }, + { url = "https://files.pythonhosted.org/packages/dd/2c/ff9bfb544f283ba5f83ba725a3c5fec6d6b10b8f27ac1dc641c473dc390d/psutil-7.2.2-cp314-cp314t-win_amd64.whl", hash = "sha256:c7663d4e37f13e884d13994247449e9f8f574bc4655d509c3b95e9ec9e2b9dc1", size = 141228, upload-time = "2026-01-28T18:15:18.385Z" }, + { url = "https://files.pythonhosted.org/packages/f2/fc/f8d9c31db14fcec13748d373e668bc3bed94d9077dbc17fb0eebc073233c/psutil-7.2.2-cp314-cp314t-win_arm64.whl", hash = "sha256:11fe5a4f613759764e79c65cf11ebdf26e33d6dd34336f8a337aa2996d71c841", size = 136284, upload-time = "2026-01-28T18:15:19.912Z" }, + { url = "https://files.pythonhosted.org/packages/e7/36/5ee6e05c9bd427237b11b3937ad82bb8ad2752d72c6969314590dd0c2f6e/psutil-7.2.2-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ed0cace939114f62738d808fdcecd4c869222507e266e574799e9c0faa17d486", size = 129090, upload-time = "2026-01-28T18:15:22.168Z" }, + { url = "https://files.pythonhosted.org/packages/80/c4/f5af4c1ca8c1eeb2e92ccca14ce8effdeec651d5ab6053c589b074eda6e1/psutil-7.2.2-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:1a7b04c10f32cc88ab39cbf606e117fd74721c831c98a27dc04578deb0c16979", size = 129859, upload-time = "2026-01-28T18:15:23.795Z" }, + { url = "https://files.pythonhosted.org/packages/b5/70/5d8df3b09e25bce090399cf48e452d25c935ab72dad19406c77f4e828045/psutil-7.2.2-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:076a2d2f923fd4821644f5ba89f059523da90dc9014e85f8e45a5774ca5bc6f9", size = 155560, upload-time = "2026-01-28T18:15:25.976Z" }, + { url = "https://files.pythonhosted.org/packages/63/65/37648c0c158dc222aba51c089eb3bdfa238e621674dc42d48706e639204f/psutil-7.2.2-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b0726cecd84f9474419d67252add4ac0cd9811b04d61123054b9fb6f57df6e9e", size = 156997, upload-time = "2026-01-28T18:15:27.794Z" }, + { url = "https://files.pythonhosted.org/packages/8e/13/125093eadae863ce03c6ffdbae9929430d116a246ef69866dad94da3bfbc/psutil-7.2.2-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fd04ef36b4a6d599bbdb225dd1d3f51e00105f6d48a28f006da7f9822f2606d8", size = 148972, upload-time = "2026-01-28T18:15:29.342Z" }, + { url = "https://files.pythonhosted.org/packages/04/78/0acd37ca84ce3ddffaa92ef0f571e073faa6d8ff1f0559ab1272188ea2be/psutil-7.2.2-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b58fabe35e80b264a4e3bb23e6b96f9e45a3df7fb7eed419ac0e5947c61e47cc", size = 148266, upload-time = "2026-01-28T18:15:31.597Z" }, + { url = "https://files.pythonhosted.org/packages/b4/90/e2159492b5426be0c1fef7acba807a03511f97c5f86b3caeda6ad92351a7/psutil-7.2.2-cp37-abi3-win_amd64.whl", hash = "sha256:eb7e81434c8d223ec4a219b5fc1c47d0417b12be7ea866e24fb5ad6e84b3d988", size = 137737, upload-time = "2026-01-28T18:15:33.849Z" }, + { url = "https://files.pythonhosted.org/packages/8c/c7/7bb2e321574b10df20cbde462a94e2b71d05f9bbda251ef27d104668306a/psutil-7.2.2-cp37-abi3-win_arm64.whl", hash = "sha256:8c233660f575a5a89e6d4cb65d9f938126312bca76d8fe087b947b3a1aaac9ee", size = 134617, upload-time = "2026-01-28T18:15:36.514Z" }, +] + +[[package]] +name = "pygments" +version = "2.19.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, +] + +[[package]] +name = "pymdown-extensions" +version = "10.21" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown" }, + { name = "pyyaml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ba/63/06673d1eb6d8f83c0ea1f677d770e12565fb516928b4109c9e2055656a9e/pymdown_extensions-10.21.tar.gz", hash = "sha256:39f4a020f40773f6b2ff31d2cd2546c2c04d0a6498c31d9c688d2be07e1767d5", size = 853363, upload-time = "2026-02-15T20:44:06.748Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6f/2c/5b079febdc65e1c3fb2729bf958d18b45be7113828528e8a0b5850dd819a/pymdown_extensions-10.21-py3-none-any.whl", hash = "sha256:91b879f9f864d49794c2d9534372b10150e6141096c3908a455e45ca72ad9d3f", size = 268877, upload-time = "2026-02-15T20:44:05.464Z" }, +] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, +] + +[[package]] +name = "pyyaml" +version = "6.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6d/16/a95b6757765b7b031c9374925bb718d55e0a9ba8a1b6a12d25962ea44347/pyyaml-6.0.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e", size = 185826, upload-time = "2025-09-25T21:31:58.655Z" }, + { url = "https://files.pythonhosted.org/packages/16/19/13de8e4377ed53079ee996e1ab0a9c33ec2faf808a4647b7b4c0d46dd239/pyyaml-6.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824", size = 175577, upload-time = "2025-09-25T21:32:00.088Z" }, + { url = "https://files.pythonhosted.org/packages/0c/62/d2eb46264d4b157dae1275b573017abec435397aa59cbcdab6fc978a8af4/pyyaml-6.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c", size = 775556, upload-time = "2025-09-25T21:32:01.31Z" }, + { url = "https://files.pythonhosted.org/packages/10/cb/16c3f2cf3266edd25aaa00d6c4350381c8b012ed6f5276675b9eba8d9ff4/pyyaml-6.0.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00", size = 882114, upload-time = "2025-09-25T21:32:03.376Z" }, + { url = "https://files.pythonhosted.org/packages/71/60/917329f640924b18ff085ab889a11c763e0b573da888e8404ff486657602/pyyaml-6.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d", size = 806638, upload-time = "2025-09-25T21:32:04.553Z" }, + { url = "https://files.pythonhosted.org/packages/dd/6f/529b0f316a9fd167281a6c3826b5583e6192dba792dd55e3203d3f8e655a/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a", size = 767463, upload-time = "2025-09-25T21:32:06.152Z" }, + { url = "https://files.pythonhosted.org/packages/f2/6a/b627b4e0c1dd03718543519ffb2f1deea4a1e6d42fbab8021936a4d22589/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4", size = 794986, upload-time = "2025-09-25T21:32:07.367Z" }, + { url = "https://files.pythonhosted.org/packages/45/91/47a6e1c42d9ee337c4839208f30d9f09caa9f720ec7582917b264defc875/pyyaml-6.0.3-cp311-cp311-win32.whl", hash = "sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b", size = 142543, upload-time = "2025-09-25T21:32:08.95Z" }, + { url = "https://files.pythonhosted.org/packages/da/e3/ea007450a105ae919a72393cb06f122f288ef60bba2dc64b26e2646fa315/pyyaml-6.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf", size = 158763, upload-time = "2025-09-25T21:32:09.96Z" }, + { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063, upload-time = "2025-09-25T21:32:11.445Z" }, + { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973, upload-time = "2025-09-25T21:32:12.492Z" }, + { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116, upload-time = "2025-09-25T21:32:13.652Z" }, + { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011, upload-time = "2025-09-25T21:32:15.21Z" }, + { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870, upload-time = "2025-09-25T21:32:16.431Z" }, + { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089, upload-time = "2025-09-25T21:32:17.56Z" }, + { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181, upload-time = "2025-09-25T21:32:18.834Z" }, + { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658, upload-time = "2025-09-25T21:32:20.209Z" }, + { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003, upload-time = "2025-09-25T21:32:21.167Z" }, + { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344, upload-time = "2025-09-25T21:32:22.617Z" }, + { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669, upload-time = "2025-09-25T21:32:23.673Z" }, + { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252, upload-time = "2025-09-25T21:32:25.149Z" }, + { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081, upload-time = "2025-09-25T21:32:26.575Z" }, + { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159, upload-time = "2025-09-25T21:32:27.727Z" }, + { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626, upload-time = "2025-09-25T21:32:28.878Z" }, + { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613, upload-time = "2025-09-25T21:32:30.178Z" }, + { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115, upload-time = "2025-09-25T21:32:31.353Z" }, + { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427, upload-time = "2025-09-25T21:32:32.58Z" }, + { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090, upload-time = "2025-09-25T21:32:33.659Z" }, + { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" }, + { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814, upload-time = "2025-09-25T21:32:35.712Z" }, + { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809, upload-time = "2025-09-25T21:32:36.789Z" }, + { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454, upload-time = "2025-09-25T21:32:37.966Z" }, + { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355, upload-time = "2025-09-25T21:32:39.178Z" }, + { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175, upload-time = "2025-09-25T21:32:40.865Z" }, + { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228, upload-time = "2025-09-25T21:32:42.084Z" }, + { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194, upload-time = "2025-09-25T21:32:43.362Z" }, + { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429, upload-time = "2025-09-25T21:32:57.844Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912, upload-time = "2025-09-25T21:32:59.247Z" }, + { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108, upload-time = "2025-09-25T21:32:44.377Z" }, + { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641, upload-time = "2025-09-25T21:32:45.407Z" }, + { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901, upload-time = "2025-09-25T21:32:48.83Z" }, + { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132, upload-time = "2025-09-25T21:32:50.149Z" }, + { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261, upload-time = "2025-09-25T21:32:51.808Z" }, + { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272, upload-time = "2025-09-25T21:32:52.941Z" }, + { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923, upload-time = "2025-09-25T21:32:54.537Z" }, + { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062, upload-time = "2025-09-25T21:32:55.767Z" }, + { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" }, +] + +[[package]] +name = "referencing" +version = "0.37.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "rpds-py" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" }, +] + +[[package]] +name = "rpds-py" +version = "0.30.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/20/af/3f2f423103f1113b36230496629986e0ef7e199d2aa8392452b484b38ced/rpds_py-0.30.0.tar.gz", hash = "sha256:dd8ff7cf90014af0c0f787eea34794ebf6415242ee1d6fa91eaba725cc441e84", size = 69469, upload-time = "2025-11-30T20:24:38.837Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4d/6e/f964e88b3d2abee2a82c1ac8366da848fce1c6d834dc2132c3fda3970290/rpds_py-0.30.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:a2bffea6a4ca9f01b3f8e548302470306689684e61602aa3d141e34da06cf425", size = 370157, upload-time = "2025-11-30T20:21:53.789Z" }, + { url = "https://files.pythonhosted.org/packages/94/ba/24e5ebb7c1c82e74c4e4f33b2112a5573ddc703915b13a073737b59b86e0/rpds_py-0.30.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dc4f992dfe1e2bc3ebc7444f6c7051b4bc13cd8e33e43511e8ffd13bf407010d", size = 359676, upload-time = "2025-11-30T20:21:55.475Z" }, + { url = "https://files.pythonhosted.org/packages/84/86/04dbba1b087227747d64d80c3b74df946b986c57af0a9f0c98726d4d7a3b/rpds_py-0.30.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:422c3cb9856d80b09d30d2eb255d0754b23e090034e1deb4083f8004bd0761e4", size = 389938, upload-time = "2025-11-30T20:21:57.079Z" }, + { url = "https://files.pythonhosted.org/packages/42/bb/1463f0b1722b7f45431bdd468301991d1328b16cffe0b1c2918eba2c4eee/rpds_py-0.30.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:07ae8a593e1c3c6b82ca3292efbe73c30b61332fd612e05abee07c79359f292f", size = 402932, upload-time = "2025-11-30T20:21:58.47Z" }, + { url = "https://files.pythonhosted.org/packages/99/ee/2520700a5c1f2d76631f948b0736cdf9b0acb25abd0ca8e889b5c62ac2e3/rpds_py-0.30.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:12f90dd7557b6bd57f40abe7747e81e0c0b119bef015ea7726e69fe550e394a4", size = 525830, upload-time = "2025-11-30T20:21:59.699Z" }, + { url = "https://files.pythonhosted.org/packages/e0/ad/bd0331f740f5705cc555a5e17fdf334671262160270962e69a2bdef3bf76/rpds_py-0.30.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:99b47d6ad9a6da00bec6aabe5a6279ecd3c06a329d4aa4771034a21e335c3a97", size = 412033, upload-time = "2025-11-30T20:22:00.991Z" }, + { url = "https://files.pythonhosted.org/packages/f8/1e/372195d326549bb51f0ba0f2ecb9874579906b97e08880e7a65c3bef1a99/rpds_py-0.30.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33f559f3104504506a44bb666b93a33f5d33133765b0c216a5bf2f1e1503af89", size = 390828, upload-time = "2025-11-30T20:22:02.723Z" }, + { url = "https://files.pythonhosted.org/packages/ab/2b/d88bb33294e3e0c76bc8f351a3721212713629ffca1700fa94979cb3eae8/rpds_py-0.30.0-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:946fe926af6e44f3697abbc305ea168c2c31d3e3ef1058cf68f379bf0335a78d", size = 404683, upload-time = "2025-11-30T20:22:04.367Z" }, + { url = "https://files.pythonhosted.org/packages/50/32/c759a8d42bcb5289c1fac697cd92f6fe01a018dd937e62ae77e0e7f15702/rpds_py-0.30.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:495aeca4b93d465efde585977365187149e75383ad2684f81519f504f5c13038", size = 421583, upload-time = "2025-11-30T20:22:05.814Z" }, + { url = "https://files.pythonhosted.org/packages/2b/81/e729761dbd55ddf5d84ec4ff1f47857f4374b0f19bdabfcf929164da3e24/rpds_py-0.30.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d9a0ca5da0386dee0655b4ccdf46119df60e0f10da268d04fe7cc87886872ba7", size = 572496, upload-time = "2025-11-30T20:22:07.713Z" }, + { url = "https://files.pythonhosted.org/packages/14/f6/69066a924c3557c9c30baa6ec3a0aa07526305684c6f86c696b08860726c/rpds_py-0.30.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8d6d1cc13664ec13c1b84241204ff3b12f9bb82464b8ad6e7a5d3486975c2eed", size = 598669, upload-time = "2025-11-30T20:22:09.312Z" }, + { url = "https://files.pythonhosted.org/packages/5f/48/905896b1eb8a05630d20333d1d8ffd162394127b74ce0b0784ae04498d32/rpds_py-0.30.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3896fa1be39912cf0757753826bc8bdc8ca331a28a7c4ae46b7a21280b06bb85", size = 561011, upload-time = "2025-11-30T20:22:11.309Z" }, + { url = "https://files.pythonhosted.org/packages/22/16/cd3027c7e279d22e5eb431dd3c0fbc677bed58797fe7581e148f3f68818b/rpds_py-0.30.0-cp311-cp311-win32.whl", hash = "sha256:55f66022632205940f1827effeff17c4fa7ae1953d2b74a8581baaefb7d16f8c", size = 221406, upload-time = "2025-11-30T20:22:13.101Z" }, + { url = "https://files.pythonhosted.org/packages/fa/5b/e7b7aa136f28462b344e652ee010d4de26ee9fd16f1bfd5811f5153ccf89/rpds_py-0.30.0-cp311-cp311-win_amd64.whl", hash = "sha256:a51033ff701fca756439d641c0ad09a41d9242fa69121c7d8769604a0a629825", size = 236024, upload-time = "2025-11-30T20:22:14.853Z" }, + { url = "https://files.pythonhosted.org/packages/14/a6/364bba985e4c13658edb156640608f2c9e1d3ea3c81b27aa9d889fff0e31/rpds_py-0.30.0-cp311-cp311-win_arm64.whl", hash = "sha256:47b0ef6231c58f506ef0b74d44e330405caa8428e770fec25329ed2cb971a229", size = 229069, upload-time = "2025-11-30T20:22:16.577Z" }, + { url = "https://files.pythonhosted.org/packages/03/e7/98a2f4ac921d82f33e03f3835f5bf3a4a40aa1bfdc57975e74a97b2b4bdd/rpds_py-0.30.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a161f20d9a43006833cd7068375a94d035714d73a172b681d8881820600abfad", size = 375086, upload-time = "2025-11-30T20:22:17.93Z" }, + { url = "https://files.pythonhosted.org/packages/4d/a1/bca7fd3d452b272e13335db8d6b0b3ecde0f90ad6f16f3328c6fb150c889/rpds_py-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6abc8880d9d036ecaafe709079969f56e876fcf107f7a8e9920ba6d5a3878d05", size = 359053, upload-time = "2025-11-30T20:22:19.297Z" }, + { url = "https://files.pythonhosted.org/packages/65/1c/ae157e83a6357eceff62ba7e52113e3ec4834a84cfe07fa4b0757a7d105f/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca28829ae5f5d569bb62a79512c842a03a12576375d5ece7d2cadf8abe96ec28", size = 390763, upload-time = "2025-11-30T20:22:21.661Z" }, + { url = "https://files.pythonhosted.org/packages/d4/36/eb2eb8515e2ad24c0bd43c3ee9cd74c33f7ca6430755ccdb240fd3144c44/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1010ed9524c73b94d15919ca4d41d8780980e1765babf85f9a2f90d247153dd", size = 408951, upload-time = "2025-11-30T20:22:23.408Z" }, + { url = "https://files.pythonhosted.org/packages/d6/65/ad8dc1784a331fabbd740ef6f71ce2198c7ed0890dab595adb9ea2d775a1/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8d1736cfb49381ba528cd5baa46f82fdc65c06e843dab24dd70b63d09121b3f", size = 514622, upload-time = "2025-11-30T20:22:25.16Z" }, + { url = "https://files.pythonhosted.org/packages/63/8e/0cfa7ae158e15e143fe03993b5bcd743a59f541f5952e1546b1ac1b5fd45/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d948b135c4693daff7bc2dcfc4ec57237a29bd37e60c2fabf5aff2bbacf3e2f1", size = 414492, upload-time = "2025-11-30T20:22:26.505Z" }, + { url = "https://files.pythonhosted.org/packages/60/1b/6f8f29f3f995c7ffdde46a626ddccd7c63aefc0efae881dc13b6e5d5bb16/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47f236970bccb2233267d89173d3ad2703cd36a0e2a6e92d0560d333871a3d23", size = 394080, upload-time = "2025-11-30T20:22:27.934Z" }, + { url = "https://files.pythonhosted.org/packages/6d/d5/a266341051a7a3ca2f4b750a3aa4abc986378431fc2da508c5034d081b70/rpds_py-0.30.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:2e6ecb5a5bcacf59c3f912155044479af1d0b6681280048b338b28e364aca1f6", size = 408680, upload-time = "2025-11-30T20:22:29.341Z" }, + { url = "https://files.pythonhosted.org/packages/10/3b/71b725851df9ab7a7a4e33cf36d241933da66040d195a84781f49c50490c/rpds_py-0.30.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a8fa71a2e078c527c3e9dc9fc5a98c9db40bcc8a92b4e8858e36d329f8684b51", size = 423589, upload-time = "2025-11-30T20:22:31.469Z" }, + { url = "https://files.pythonhosted.org/packages/00/2b/e59e58c544dc9bd8bd8384ecdb8ea91f6727f0e37a7131baeff8d6f51661/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73c67f2db7bc334e518d097c6d1e6fed021bbc9b7d678d6cc433478365d1d5f5", size = 573289, upload-time = "2025-11-30T20:22:32.997Z" }, + { url = "https://files.pythonhosted.org/packages/da/3e/a18e6f5b460893172a7d6a680e86d3b6bc87a54c1f0b03446a3c8c7b588f/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5ba103fb455be00f3b1c2076c9d4264bfcb037c976167a6047ed82f23153f02e", size = 599737, upload-time = "2025-11-30T20:22:34.419Z" }, + { url = "https://files.pythonhosted.org/packages/5c/e2/714694e4b87b85a18e2c243614974413c60aa107fd815b8cbc42b873d1d7/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7cee9c752c0364588353e627da8a7e808a66873672bcb5f52890c33fd965b394", size = 563120, upload-time = "2025-11-30T20:22:35.903Z" }, + { url = "https://files.pythonhosted.org/packages/6f/ab/d5d5e3bcedb0a77f4f613706b750e50a5a3ba1c15ccd3665ecc636c968fd/rpds_py-0.30.0-cp312-cp312-win32.whl", hash = "sha256:1ab5b83dbcf55acc8b08fc62b796ef672c457b17dbd7820a11d6c52c06839bdf", size = 223782, upload-time = "2025-11-30T20:22:37.271Z" }, + { url = "https://files.pythonhosted.org/packages/39/3b/f786af9957306fdc38a74cef405b7b93180f481fb48453a114bb6465744a/rpds_py-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:a090322ca841abd453d43456ac34db46e8b05fd9b3b4ac0c78bcde8b089f959b", size = 240463, upload-time = "2025-11-30T20:22:39.021Z" }, + { url = "https://files.pythonhosted.org/packages/f3/d2/b91dc748126c1559042cfe41990deb92c4ee3e2b415f6b5234969ffaf0cc/rpds_py-0.30.0-cp312-cp312-win_arm64.whl", hash = "sha256:669b1805bd639dd2989b281be2cfd951c6121b65e729d9b843e9639ef1fd555e", size = 230868, upload-time = "2025-11-30T20:22:40.493Z" }, + { url = "https://files.pythonhosted.org/packages/ed/dc/d61221eb88ff410de3c49143407f6f3147acf2538c86f2ab7ce65ae7d5f9/rpds_py-0.30.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:f83424d738204d9770830d35290ff3273fbb02b41f919870479fab14b9d303b2", size = 374887, upload-time = "2025-11-30T20:22:41.812Z" }, + { url = "https://files.pythonhosted.org/packages/fd/32/55fb50ae104061dbc564ef15cc43c013dc4a9f4527a1f4d99baddf56fe5f/rpds_py-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e7536cd91353c5273434b4e003cbda89034d67e7710eab8761fd918ec6c69cf8", size = 358904, upload-time = "2025-11-30T20:22:43.479Z" }, + { url = "https://files.pythonhosted.org/packages/58/70/faed8186300e3b9bdd138d0273109784eea2396c68458ed580f885dfe7ad/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2771c6c15973347f50fece41fc447c054b7ac2ae0502388ce3b6738cd366e3d4", size = 389945, upload-time = "2025-11-30T20:22:44.819Z" }, + { url = "https://files.pythonhosted.org/packages/bd/a8/073cac3ed2c6387df38f71296d002ab43496a96b92c823e76f46b8af0543/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0a59119fc6e3f460315fe9d08149f8102aa322299deaa5cab5b40092345c2136", size = 407783, upload-time = "2025-11-30T20:22:46.103Z" }, + { url = "https://files.pythonhosted.org/packages/77/57/5999eb8c58671f1c11eba084115e77a8899d6e694d2a18f69f0ba471ec8b/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:76fec018282b4ead0364022e3c54b60bf368b9d926877957a8624b58419169b7", size = 515021, upload-time = "2025-11-30T20:22:47.458Z" }, + { url = "https://files.pythonhosted.org/packages/e0/af/5ab4833eadc36c0a8ed2bc5c0de0493c04f6c06de223170bd0798ff98ced/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:692bef75a5525db97318e8cd061542b5a79812d711ea03dbc1f6f8dbb0c5f0d2", size = 414589, upload-time = "2025-11-30T20:22:48.872Z" }, + { url = "https://files.pythonhosted.org/packages/b7/de/f7192e12b21b9e9a68a6d0f249b4af3fdcdff8418be0767a627564afa1f1/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9027da1ce107104c50c81383cae773ef5c24d296dd11c99e2629dbd7967a20c6", size = 394025, upload-time = "2025-11-30T20:22:50.196Z" }, + { url = "https://files.pythonhosted.org/packages/91/c4/fc70cd0249496493500e7cc2de87504f5aa6509de1e88623431fec76d4b6/rpds_py-0.30.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:9cf69cdda1f5968a30a359aba2f7f9aa648a9ce4b580d6826437f2b291cfc86e", size = 408895, upload-time = "2025-11-30T20:22:51.87Z" }, + { url = "https://files.pythonhosted.org/packages/58/95/d9275b05ab96556fefff73a385813eb66032e4c99f411d0795372d9abcea/rpds_py-0.30.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a4796a717bf12b9da9d3ad002519a86063dcac8988b030e405704ef7d74d2d9d", size = 422799, upload-time = "2025-11-30T20:22:53.341Z" }, + { url = "https://files.pythonhosted.org/packages/06/c1/3088fc04b6624eb12a57eb814f0d4997a44b0d208d6cace713033ff1a6ba/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5d4c2aa7c50ad4728a094ebd5eb46c452e9cb7edbfdb18f9e1221f597a73e1e7", size = 572731, upload-time = "2025-11-30T20:22:54.778Z" }, + { url = "https://files.pythonhosted.org/packages/d8/42/c612a833183b39774e8ac8fecae81263a68b9583ee343db33ab571a7ce55/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ba81a9203d07805435eb06f536d95a266c21e5b2dfbf6517748ca40c98d19e31", size = 599027, upload-time = "2025-11-30T20:22:56.212Z" }, + { url = "https://files.pythonhosted.org/packages/5f/60/525a50f45b01d70005403ae0e25f43c0384369ad24ffe46e8d9068b50086/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:945dccface01af02675628334f7cf49c2af4c1c904748efc5cf7bbdf0b579f95", size = 563020, upload-time = "2025-11-30T20:22:58.2Z" }, + { url = "https://files.pythonhosted.org/packages/0b/5d/47c4655e9bcd5ca907148535c10e7d489044243cc9941c16ed7cd53be91d/rpds_py-0.30.0-cp313-cp313-win32.whl", hash = "sha256:b40fb160a2db369a194cb27943582b38f79fc4887291417685f3ad693c5a1d5d", size = 223139, upload-time = "2025-11-30T20:23:00.209Z" }, + { url = "https://files.pythonhosted.org/packages/f2/e1/485132437d20aa4d3e1d8b3fb5a5e65aa8139f1e097080c2a8443201742c/rpds_py-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:806f36b1b605e2d6a72716f321f20036b9489d29c51c91f4dd29a3e3afb73b15", size = 240224, upload-time = "2025-11-30T20:23:02.008Z" }, + { url = "https://files.pythonhosted.org/packages/24/95/ffd128ed1146a153d928617b0ef673960130be0009c77d8fbf0abe306713/rpds_py-0.30.0-cp313-cp313-win_arm64.whl", hash = "sha256:d96c2086587c7c30d44f31f42eae4eac89b60dabbac18c7669be3700f13c3ce1", size = 230645, upload-time = "2025-11-30T20:23:03.43Z" }, + { url = "https://files.pythonhosted.org/packages/ff/1b/b10de890a0def2a319a2626334a7f0ae388215eb60914dbac8a3bae54435/rpds_py-0.30.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:eb0b93f2e5c2189ee831ee43f156ed34e2a89a78a66b98cadad955972548be5a", size = 364443, upload-time = "2025-11-30T20:23:04.878Z" }, + { url = "https://files.pythonhosted.org/packages/0d/bf/27e39f5971dc4f305a4fb9c672ca06f290f7c4e261c568f3dea16a410d47/rpds_py-0.30.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:922e10f31f303c7c920da8981051ff6d8c1a56207dbdf330d9047f6d30b70e5e", size = 353375, upload-time = "2025-11-30T20:23:06.342Z" }, + { url = "https://files.pythonhosted.org/packages/40/58/442ada3bba6e8e6615fc00483135c14a7538d2ffac30e2d933ccf6852232/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdc62c8286ba9bf7f47befdcea13ea0e26bf294bda99758fd90535cbaf408000", size = 383850, upload-time = "2025-11-30T20:23:07.825Z" }, + { url = "https://files.pythonhosted.org/packages/14/14/f59b0127409a33c6ef6f5c1ebd5ad8e32d7861c9c7adfa9a624fc3889f6c/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:47f9a91efc418b54fb8190a6b4aa7813a23fb79c51f4bb84e418f5476c38b8db", size = 392812, upload-time = "2025-11-30T20:23:09.228Z" }, + { url = "https://files.pythonhosted.org/packages/b3/66/e0be3e162ac299b3a22527e8913767d869e6cc75c46bd844aa43fb81ab62/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1f3587eb9b17f3789ad50824084fa6f81921bbf9a795826570bda82cb3ed91f2", size = 517841, upload-time = "2025-11-30T20:23:11.186Z" }, + { url = "https://files.pythonhosted.org/packages/3d/55/fa3b9cf31d0c963ecf1ba777f7cf4b2a2c976795ac430d24a1f43d25a6ba/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39c02563fc592411c2c61d26b6c5fe1e51eaa44a75aa2c8735ca88b0d9599daa", size = 408149, upload-time = "2025-11-30T20:23:12.864Z" }, + { url = "https://files.pythonhosted.org/packages/60/ca/780cf3b1a32b18c0f05c441958d3758f02544f1d613abf9488cd78876378/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51a1234d8febafdfd33a42d97da7a43f5dcb120c1060e352a3fbc0c6d36e2083", size = 383843, upload-time = "2025-11-30T20:23:14.638Z" }, + { url = "https://files.pythonhosted.org/packages/82/86/d5f2e04f2aa6247c613da0c1dd87fcd08fa17107e858193566048a1e2f0a/rpds_py-0.30.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:eb2c4071ab598733724c08221091e8d80e89064cd472819285a9ab0f24bcedb9", size = 396507, upload-time = "2025-11-30T20:23:16.105Z" }, + { url = "https://files.pythonhosted.org/packages/4b/9a/453255d2f769fe44e07ea9785c8347edaf867f7026872e76c1ad9f7bed92/rpds_py-0.30.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6bdfdb946967d816e6adf9a3d8201bfad269c67efe6cefd7093ef959683c8de0", size = 414949, upload-time = "2025-11-30T20:23:17.539Z" }, + { url = "https://files.pythonhosted.org/packages/a3/31/622a86cdc0c45d6df0e9ccb6becdba5074735e7033c20e401a6d9d0e2ca0/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c77afbd5f5250bf27bf516c7c4a016813eb2d3e116139aed0096940c5982da94", size = 565790, upload-time = "2025-11-30T20:23:19.029Z" }, + { url = "https://files.pythonhosted.org/packages/1c/5d/15bbf0fb4a3f58a3b1c67855ec1efcc4ceaef4e86644665fff03e1b66d8d/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:61046904275472a76c8c90c9ccee9013d70a6d0f73eecefd38c1ae7c39045a08", size = 590217, upload-time = "2025-11-30T20:23:20.885Z" }, + { url = "https://files.pythonhosted.org/packages/6d/61/21b8c41f68e60c8cc3b2e25644f0e3681926020f11d06ab0b78e3c6bbff1/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c5f36a861bc4b7da6516dbdf302c55313afa09b81931e8280361a4f6c9a2d27", size = 555806, upload-time = "2025-11-30T20:23:22.488Z" }, + { url = "https://files.pythonhosted.org/packages/f9/39/7e067bb06c31de48de3eb200f9fc7c58982a4d3db44b07e73963e10d3be9/rpds_py-0.30.0-cp313-cp313t-win32.whl", hash = "sha256:3d4a69de7a3e50ffc214ae16d79d8fbb0922972da0356dcf4d0fdca2878559c6", size = 211341, upload-time = "2025-11-30T20:23:24.449Z" }, + { url = "https://files.pythonhosted.org/packages/0a/4d/222ef0b46443cf4cf46764d9c630f3fe4abaa7245be9417e56e9f52b8f65/rpds_py-0.30.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f14fc5df50a716f7ece6a80b6c78bb35ea2ca47c499e422aa4463455dd96d56d", size = 225768, upload-time = "2025-11-30T20:23:25.908Z" }, + { url = "https://files.pythonhosted.org/packages/86/81/dad16382ebbd3d0e0328776d8fd7ca94220e4fa0798d1dc5e7da48cb3201/rpds_py-0.30.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:68f19c879420aa08f61203801423f6cd5ac5f0ac4ac82a2368a9fcd6a9a075e0", size = 362099, upload-time = "2025-11-30T20:23:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/2b/60/19f7884db5d5603edf3c6bce35408f45ad3e97e10007df0e17dd57af18f8/rpds_py-0.30.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ec7c4490c672c1a0389d319b3a9cfcd098dcdc4783991553c332a15acf7249be", size = 353192, upload-time = "2025-11-30T20:23:29.151Z" }, + { url = "https://files.pythonhosted.org/packages/bf/c4/76eb0e1e72d1a9c4703c69607cec123c29028bff28ce41588792417098ac/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f251c812357a3fed308d684a5079ddfb9d933860fc6de89f2b7ab00da481e65f", size = 384080, upload-time = "2025-11-30T20:23:30.785Z" }, + { url = "https://files.pythonhosted.org/packages/72/87/87ea665e92f3298d1b26d78814721dc39ed8d2c74b86e83348d6b48a6f31/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac98b175585ecf4c0348fd7b29c3864bda53b805c773cbf7bfdaffc8070c976f", size = 394841, upload-time = "2025-11-30T20:23:32.209Z" }, + { url = "https://files.pythonhosted.org/packages/77/ad/7783a89ca0587c15dcbf139b4a8364a872a25f861bdb88ed99f9b0dec985/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3e62880792319dbeb7eb866547f2e35973289e7d5696c6e295476448f5b63c87", size = 516670, upload-time = "2025-11-30T20:23:33.742Z" }, + { url = "https://files.pythonhosted.org/packages/5b/3c/2882bdac942bd2172f3da574eab16f309ae10a3925644e969536553cb4ee/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4e7fc54e0900ab35d041b0601431b0a0eb495f0851a0639b6ef90f7741b39a18", size = 408005, upload-time = "2025-11-30T20:23:35.253Z" }, + { url = "https://files.pythonhosted.org/packages/ce/81/9a91c0111ce1758c92516a3e44776920b579d9a7c09b2b06b642d4de3f0f/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47e77dc9822d3ad616c3d5759ea5631a75e5809d5a28707744ef79d7a1bcfcad", size = 382112, upload-time = "2025-11-30T20:23:36.842Z" }, + { url = "https://files.pythonhosted.org/packages/cf/8e/1da49d4a107027e5fbc64daeab96a0706361a2918da10cb41769244b805d/rpds_py-0.30.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:b4dc1a6ff022ff85ecafef7979a2c6eb423430e05f1165d6688234e62ba99a07", size = 399049, upload-time = "2025-11-30T20:23:38.343Z" }, + { url = "https://files.pythonhosted.org/packages/df/5a/7ee239b1aa48a127570ec03becbb29c9d5a9eb092febbd1699d567cae859/rpds_py-0.30.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4559c972db3a360808309e06a74628b95eaccbf961c335c8fe0d590cf587456f", size = 415661, upload-time = "2025-11-30T20:23:40.263Z" }, + { url = "https://files.pythonhosted.org/packages/70/ea/caa143cf6b772f823bc7929a45da1fa83569ee49b11d18d0ada7f5ee6fd6/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:0ed177ed9bded28f8deb6ab40c183cd1192aa0de40c12f38be4d59cd33cb5c65", size = 565606, upload-time = "2025-11-30T20:23:42.186Z" }, + { url = "https://files.pythonhosted.org/packages/64/91/ac20ba2d69303f961ad8cf55bf7dbdb4763f627291ba3d0d7d67333cced9/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:ad1fa8db769b76ea911cb4e10f049d80bf518c104f15b3edb2371cc65375c46f", size = 591126, upload-time = "2025-11-30T20:23:44.086Z" }, + { url = "https://files.pythonhosted.org/packages/21/20/7ff5f3c8b00c8a95f75985128c26ba44503fb35b8e0259d812766ea966c7/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:46e83c697b1f1c72b50e5ee5adb4353eef7406fb3f2043d64c33f20ad1c2fc53", size = 553371, upload-time = "2025-11-30T20:23:46.004Z" }, + { url = "https://files.pythonhosted.org/packages/72/c7/81dadd7b27c8ee391c132a6b192111ca58d866577ce2d9b0ca157552cce0/rpds_py-0.30.0-cp314-cp314-win32.whl", hash = "sha256:ee454b2a007d57363c2dfd5b6ca4a5d7e2c518938f8ed3b706e37e5d470801ed", size = 215298, upload-time = "2025-11-30T20:23:47.696Z" }, + { url = "https://files.pythonhosted.org/packages/3e/d2/1aaac33287e8cfb07aab2e6b8ac1deca62f6f65411344f1433c55e6f3eb8/rpds_py-0.30.0-cp314-cp314-win_amd64.whl", hash = "sha256:95f0802447ac2d10bcc69f6dc28fe95fdf17940367b21d34e34c737870758950", size = 228604, upload-time = "2025-11-30T20:23:49.501Z" }, + { url = "https://files.pythonhosted.org/packages/e8/95/ab005315818cc519ad074cb7784dae60d939163108bd2b394e60dc7b5461/rpds_py-0.30.0-cp314-cp314-win_arm64.whl", hash = "sha256:613aa4771c99f03346e54c3f038e4cc574ac09a3ddfb0e8878487335e96dead6", size = 222391, upload-time = "2025-11-30T20:23:50.96Z" }, + { url = "https://files.pythonhosted.org/packages/9e/68/154fe0194d83b973cdedcdcc88947a2752411165930182ae41d983dcefa6/rpds_py-0.30.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:7e6ecfcb62edfd632e56983964e6884851786443739dbfe3582947e87274f7cb", size = 364868, upload-time = "2025-11-30T20:23:52.494Z" }, + { url = "https://files.pythonhosted.org/packages/83/69/8bbc8b07ec854d92a8b75668c24d2abcb1719ebf890f5604c61c9369a16f/rpds_py-0.30.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a1d0bc22a7cdc173fedebb73ef81e07faef93692b8c1ad3733b67e31e1b6e1b8", size = 353747, upload-time = "2025-11-30T20:23:54.036Z" }, + { url = "https://files.pythonhosted.org/packages/ab/00/ba2e50183dbd9abcce9497fa5149c62b4ff3e22d338a30d690f9af970561/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d08f00679177226c4cb8c5265012eea897c8ca3b93f429e546600c971bcbae7", size = 383795, upload-time = "2025-11-30T20:23:55.556Z" }, + { url = "https://files.pythonhosted.org/packages/05/6f/86f0272b84926bcb0e4c972262f54223e8ecc556b3224d281e6598fc9268/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5965af57d5848192c13534f90f9dd16464f3c37aaf166cc1da1cae1fd5a34898", size = 393330, upload-time = "2025-11-30T20:23:57.033Z" }, + { url = "https://files.pythonhosted.org/packages/cb/e9/0e02bb2e6dc63d212641da45df2b0bf29699d01715913e0d0f017ee29438/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a4e86e34e9ab6b667c27f3211ca48f73dba7cd3d90f8d5b11be56e5dbc3fb4e", size = 518194, upload-time = "2025-11-30T20:23:58.637Z" }, + { url = "https://files.pythonhosted.org/packages/ee/ca/be7bca14cf21513bdf9c0606aba17d1f389ea2b6987035eb4f62bd923f25/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5d3e6b26f2c785d65cc25ef1e5267ccbe1b069c5c21b8cc724efee290554419", size = 408340, upload-time = "2025-11-30T20:24:00.2Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c7/736e00ebf39ed81d75544c0da6ef7b0998f8201b369acf842f9a90dc8fce/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:626a7433c34566535b6e56a1b39a7b17ba961e97ce3b80ec62e6f1312c025551", size = 383765, upload-time = "2025-11-30T20:24:01.759Z" }, + { url = "https://files.pythonhosted.org/packages/4a/3f/da50dfde9956aaf365c4adc9533b100008ed31aea635f2b8d7b627e25b49/rpds_py-0.30.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:acd7eb3f4471577b9b5a41baf02a978e8bdeb08b4b355273994f8b87032000a8", size = 396834, upload-time = "2025-11-30T20:24:03.687Z" }, + { url = "https://files.pythonhosted.org/packages/4e/00/34bcc2565b6020eab2623349efbdec810676ad571995911f1abdae62a3a0/rpds_py-0.30.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fe5fa731a1fa8a0a56b0977413f8cacac1768dad38d16b3a296712709476fbd5", size = 415470, upload-time = "2025-11-30T20:24:05.232Z" }, + { url = "https://files.pythonhosted.org/packages/8c/28/882e72b5b3e6f718d5453bd4d0d9cf8df36fddeb4ddbbab17869d5868616/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:74a3243a411126362712ee1524dfc90c650a503502f135d54d1b352bd01f2404", size = 565630, upload-time = "2025-11-30T20:24:06.878Z" }, + { url = "https://files.pythonhosted.org/packages/3b/97/04a65539c17692de5b85c6e293520fd01317fd878ea1995f0367d4532fb1/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:3e8eeb0544f2eb0d2581774be4c3410356eba189529a6b3e36bbbf9696175856", size = 591148, upload-time = "2025-11-30T20:24:08.445Z" }, + { url = "https://files.pythonhosted.org/packages/85/70/92482ccffb96f5441aab93e26c4d66489eb599efdcf96fad90c14bbfb976/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:dbd936cde57abfee19ab3213cf9c26be06d60750e60a8e4dd85d1ab12c8b1f40", size = 556030, upload-time = "2025-11-30T20:24:10.956Z" }, + { url = "https://files.pythonhosted.org/packages/20/53/7c7e784abfa500a2b6b583b147ee4bb5a2b3747a9166bab52fec4b5b5e7d/rpds_py-0.30.0-cp314-cp314t-win32.whl", hash = "sha256:dc824125c72246d924f7f796b4f63c1e9dc810c7d9e2355864b3c3a73d59ade0", size = 211570, upload-time = "2025-11-30T20:24:12.735Z" }, + { url = "https://files.pythonhosted.org/packages/d0/02/fa464cdfbe6b26e0600b62c528b72d8608f5cc49f96b8d6e38c95d60c676/rpds_py-0.30.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27f4b0e92de5bfbc6f86e43959e6edd1425c33b5e69aab0984a72047f2bcf1e3", size = 226532, upload-time = "2025-11-30T20:24:14.634Z" }, + { url = "https://files.pythonhosted.org/packages/69/71/3f34339ee70521864411f8b6992e7ab13ac30d8e4e3309e07c7361767d91/rpds_py-0.30.0-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c2262bdba0ad4fc6fb5545660673925c2d2a5d9e2e0fb603aad545427be0fc58", size = 372292, upload-time = "2025-11-30T20:24:16.537Z" }, + { url = "https://files.pythonhosted.org/packages/57/09/f183df9b8f2d66720d2ef71075c59f7e1b336bec7ee4c48f0a2b06857653/rpds_py-0.30.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:ee6af14263f25eedc3bb918a3c04245106a42dfd4f5c2285ea6f997b1fc3f89a", size = 362128, upload-time = "2025-11-30T20:24:18.086Z" }, + { url = "https://files.pythonhosted.org/packages/7a/68/5c2594e937253457342e078f0cc1ded3dd7b2ad59afdbf2d354869110a02/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3adbb8179ce342d235c31ab8ec511e66c73faa27a47e076ccc92421add53e2bb", size = 391542, upload-time = "2025-11-30T20:24:20.092Z" }, + { url = "https://files.pythonhosted.org/packages/49/5c/31ef1afd70b4b4fbdb2800249f34c57c64beb687495b10aec0365f53dfc4/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:250fa00e9543ac9b97ac258bd37367ff5256666122c2d0f2bc97577c60a1818c", size = 404004, upload-time = "2025-11-30T20:24:22.231Z" }, + { url = "https://files.pythonhosted.org/packages/e3/63/0cfbea38d05756f3440ce6534d51a491d26176ac045e2707adc99bb6e60a/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9854cf4f488b3d57b9aaeb105f06d78e5529d3145b1e4a41750167e8c213c6d3", size = 527063, upload-time = "2025-11-30T20:24:24.302Z" }, + { url = "https://files.pythonhosted.org/packages/42/e6/01e1f72a2456678b0f618fc9a1a13f882061690893c192fcad9f2926553a/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:993914b8e560023bc0a8bf742c5f303551992dcb85e247b1e5c7f4a7d145bda5", size = 413099, upload-time = "2025-11-30T20:24:25.916Z" }, + { url = "https://files.pythonhosted.org/packages/b8/25/8df56677f209003dcbb180765520c544525e3ef21ea72279c98b9aa7c7fb/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58edca431fb9b29950807e301826586e5bbf24163677732429770a697ffe6738", size = 392177, upload-time = "2025-11-30T20:24:27.834Z" }, + { url = "https://files.pythonhosted.org/packages/4a/b4/0a771378c5f16f8115f796d1f437950158679bcd2a7c68cf251cfb00ed5b/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:dea5b552272a944763b34394d04577cf0f9bd013207bc32323b5a89a53cf9c2f", size = 406015, upload-time = "2025-11-30T20:24:29.457Z" }, + { url = "https://files.pythonhosted.org/packages/36/d8/456dbba0af75049dc6f63ff295a2f92766b9d521fa00de67a2bd6427d57a/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ba3af48635eb83d03f6c9735dfb21785303e73d22ad03d489e88adae6eab8877", size = 423736, upload-time = "2025-11-30T20:24:31.22Z" }, + { url = "https://files.pythonhosted.org/packages/13/64/b4d76f227d5c45a7e0b796c674fd81b0a6c4fbd48dc29271857d8219571c/rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:dff13836529b921e22f15cb099751209a60009731a68519630a24d61f0b1b30a", size = 573981, upload-time = "2025-11-30T20:24:32.934Z" }, + { url = "https://files.pythonhosted.org/packages/20/91/092bacadeda3edf92bf743cc96a7be133e13a39cdbfd7b5082e7ab638406/rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:1b151685b23929ab7beec71080a8889d4d6d9fa9a983d213f07121205d48e2c4", size = 599782, upload-time = "2025-11-30T20:24:35.169Z" }, + { url = "https://files.pythonhosted.org/packages/d1/b7/b95708304cd49b7b6f82fdd039f1748b66ec2b21d6a45180910802f1abf1/rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:ac37f9f516c51e5753f27dfdef11a88330f04de2d564be3991384b2f3535d02e", size = 562191, upload-time = "2025-11-30T20:24:36.853Z" }, +] + +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, +] + +[[package]] +name = "starlette" +version = "0.52.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c4/68/79977123bb7be889ad680d79a40f339082c1978b5cfcf62c2d8d196873ac/starlette-0.52.1.tar.gz", hash = "sha256:834edd1b0a23167694292e94f597773bc3f89f362be6effee198165a35d62933", size = 2653702, upload-time = "2026-01-18T13:34:11.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/0d/13d1d239a25cbfb19e740db83143e95c772a1fe10202dda4b76792b114dd/starlette-0.52.1-py3-none-any.whl", hash = "sha256:0029d43eb3d273bc4f83a08720b4912ea4b071087a3b48db01b7c839f7954d74", size = 74272, upload-time = "2026-01-18T13:34:09.188Z" }, +] + +[[package]] +name = "tomlkit" +version = "0.14.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/af/14b24e41977adb296d6bd1fb59402cf7d60ce364f90c890bd2ec65c43b5a/tomlkit-0.14.0.tar.gz", hash = "sha256:cf00efca415dbd57575befb1f6634c4f42d2d87dbba376128adb42c121b87064", size = 187167, upload-time = "2026-01-13T01:14:53.304Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b5/11/87d6d29fb5d237229d67973a6c9e06e048f01cf4994dee194ab0ea841814/tomlkit-0.14.0-py3-none-any.whl", hash = "sha256:592064ed85b40fa213469f81ac584f67a4f2992509a7c3ea2d632208623a3680", size = 39310, upload-time = "2026-01-13T01:14:51.965Z" }, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, +] + +[[package]] +name = "tzdata" +version = "2025.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5e/a7/c202b344c5ca7daf398f3b8a477eeb205cf3b6f32e7ec3a6bac0629ca975/tzdata-2025.3.tar.gz", hash = "sha256:de39c2ca5dc7b0344f2eba86f49d614019d29f060fc4ebc8a417896a620b56a7", size = 196772, upload-time = "2025-12-13T17:45:35.667Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/b0/003792df09decd6849a5e39c28b513c06e84436a54440380862b5aeff25d/tzdata-2025.3-py2.py3-none-any.whl", hash = "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1", size = 348521, upload-time = "2025-12-13T17:45:33.889Z" }, +] + +[[package]] +name = "uvicorn" +version = "0.42.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e3/ad/4a96c425be6fb67e0621e62d86c402b4a17ab2be7f7c055d9bd2f638b9e2/uvicorn-0.42.0.tar.gz", hash = "sha256:9b1f190ce15a2dd22e7758651d9b6d12df09a13d51ba5bf4fc33c383a48e1775", size = 85393, upload-time = "2026-03-16T06:19:50.077Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0a/89/f8827ccff89c1586027a105e5630ff6139a64da2515e24dafe860bd9ae4d/uvicorn-0.42.0-py3-none-any.whl", hash = "sha256:96c30f5c7abe6f74ae8900a70e92b85ad6613b745d4879eb9b16ccad15645359", size = 68830, upload-time = "2026-03-16T06:19:48.325Z" }, +] + +[[package]] +name = "vega-datasets" +version = "0.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pandas" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8f/a0/ce608d9a5b82fce2ebaa2311136b1e1d1dc2807f501bbdfa56bd174fff76/vega_datasets-0.9.0.tar.gz", hash = "sha256:9dbe9834208e8ec32ab44970df315de9102861e4cda13d8e143aab7a80d93fc0", size = 215013, upload-time = "2020-11-26T13:56:59.421Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e6/9f/ca52771fe972e0dcc5167fedb609940e01516066938ff2ee28b273ae4f29/vega_datasets-0.9.0-py3-none-any.whl", hash = "sha256:3d7c63917be6ca9b154b565f4779a31fedce57b01b5b9d99d8a34a7608062a1d", size = 210822, upload-time = "2020-11-26T13:56:57.776Z" }, +] + +[[package]] +name = "websockets" +version = "16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/04/24/4b2031d72e840ce4c1ccb255f693b15c334757fc50023e4db9537080b8c4/websockets-16.0.tar.gz", hash = "sha256:5f6261a5e56e8d5c42a4497b364ea24d94d9563e8fbd44e78ac40879c60179b5", size = 179346, upload-time = "2026-01-10T09:23:47.181Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f2/db/de907251b4ff46ae804ad0409809504153b3f30984daf82a1d84a9875830/websockets-16.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:31a52addea25187bde0797a97d6fc3d2f92b6f72a9370792d65a6e84615ac8a8", size = 177340, upload-time = "2026-01-10T09:22:34.539Z" }, + { url = "https://files.pythonhosted.org/packages/f3/fa/abe89019d8d8815c8781e90d697dec52523fb8ebe308bf11664e8de1877e/websockets-16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:417b28978cdccab24f46400586d128366313e8a96312e4b9362a4af504f3bbad", size = 175022, upload-time = "2026-01-10T09:22:36.332Z" }, + { url = "https://files.pythonhosted.org/packages/58/5d/88ea17ed1ded2079358b40d31d48abe90a73c9e5819dbcde1606e991e2ad/websockets-16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:af80d74d4edfa3cb9ed973a0a5ba2b2a549371f8a741e0800cb07becdd20f23d", size = 175319, upload-time = "2026-01-10T09:22:37.602Z" }, + { url = "https://files.pythonhosted.org/packages/d2/ae/0ee92b33087a33632f37a635e11e1d99d429d3d323329675a6022312aac2/websockets-16.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:08d7af67b64d29823fed316505a89b86705f2b7981c07848fb5e3ea3020c1abe", size = 184631, upload-time = "2026-01-10T09:22:38.789Z" }, + { url = "https://files.pythonhosted.org/packages/c8/c5/27178df583b6c5b31b29f526ba2da5e2f864ecc79c99dae630a85d68c304/websockets-16.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7be95cfb0a4dae143eaed2bcba8ac23f4892d8971311f1b06f3c6b78952ee70b", size = 185870, upload-time = "2026-01-10T09:22:39.893Z" }, + { url = "https://files.pythonhosted.org/packages/87/05/536652aa84ddc1c018dbb7e2c4cbcd0db884580bf8e95aece7593fde526f/websockets-16.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d6297ce39ce5c2e6feb13c1a996a2ded3b6832155fcfc920265c76f24c7cceb5", size = 185361, upload-time = "2026-01-10T09:22:41.016Z" }, + { url = "https://files.pythonhosted.org/packages/6d/e2/d5332c90da12b1e01f06fb1b85c50cfc489783076547415bf9f0a659ec19/websockets-16.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1c1b30e4f497b0b354057f3467f56244c603a79c0d1dafce1d16c283c25f6e64", size = 184615, upload-time = "2026-01-10T09:22:42.442Z" }, + { url = "https://files.pythonhosted.org/packages/77/fb/d3f9576691cae9253b51555f841bc6600bf0a983a461c79500ace5a5b364/websockets-16.0-cp311-cp311-win32.whl", hash = "sha256:5f451484aeb5cafee1ccf789b1b66f535409d038c56966d6101740c1614b86c6", size = 178246, upload-time = "2026-01-10T09:22:43.654Z" }, + { url = "https://files.pythonhosted.org/packages/54/67/eaff76b3dbaf18dcddabc3b8c1dba50b483761cccff67793897945b37408/websockets-16.0-cp311-cp311-win_amd64.whl", hash = "sha256:8d7f0659570eefb578dacde98e24fb60af35350193e4f56e11190787bee77dac", size = 178684, upload-time = "2026-01-10T09:22:44.941Z" }, + { url = "https://files.pythonhosted.org/packages/84/7b/bac442e6b96c9d25092695578dda82403c77936104b5682307bd4deb1ad4/websockets-16.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:71c989cbf3254fbd5e84d3bff31e4da39c43f884e64f2551d14bb3c186230f00", size = 177365, upload-time = "2026-01-10T09:22:46.787Z" }, + { url = "https://files.pythonhosted.org/packages/b0/fe/136ccece61bd690d9c1f715baaeefd953bb2360134de73519d5df19d29ca/websockets-16.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8b6e209ffee39ff1b6d0fa7bfef6de950c60dfb91b8fcead17da4ee539121a79", size = 175038, upload-time = "2026-01-10T09:22:47.999Z" }, + { url = "https://files.pythonhosted.org/packages/40/1e/9771421ac2286eaab95b8575b0cb701ae3663abf8b5e1f64f1fd90d0a673/websockets-16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:86890e837d61574c92a97496d590968b23c2ef0aeb8a9bc9421d174cd378ae39", size = 175328, upload-time = "2026-01-10T09:22:49.809Z" }, + { url = "https://files.pythonhosted.org/packages/18/29/71729b4671f21e1eaa5d6573031ab810ad2936c8175f03f97f3ff164c802/websockets-16.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9b5aca38b67492ef518a8ab76851862488a478602229112c4b0d58d63a7a4d5c", size = 184915, upload-time = "2026-01-10T09:22:51.071Z" }, + { url = "https://files.pythonhosted.org/packages/97/bb/21c36b7dbbafc85d2d480cd65df02a1dc93bf76d97147605a8e27ff9409d/websockets-16.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e0334872c0a37b606418ac52f6ab9cfd17317ac26365f7f65e203e2d0d0d359f", size = 186152, upload-time = "2026-01-10T09:22:52.224Z" }, + { url = "https://files.pythonhosted.org/packages/4a/34/9bf8df0c0cf88fa7bfe36678dc7b02970c9a7d5e065a3099292db87b1be2/websockets-16.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a0b31e0b424cc6b5a04b8838bbaec1688834b2383256688cf47eb97412531da1", size = 185583, upload-time = "2026-01-10T09:22:53.443Z" }, + { url = "https://files.pythonhosted.org/packages/47/88/4dd516068e1a3d6ab3c7c183288404cd424a9a02d585efbac226cb61ff2d/websockets-16.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:485c49116d0af10ac698623c513c1cc01c9446c058a4e61e3bf6c19dff7335a2", size = 184880, upload-time = "2026-01-10T09:22:55.033Z" }, + { url = "https://files.pythonhosted.org/packages/91/d6/7d4553ad4bf1c0421e1ebd4b18de5d9098383b5caa1d937b63df8d04b565/websockets-16.0-cp312-cp312-win32.whl", hash = "sha256:eaded469f5e5b7294e2bdca0ab06becb6756ea86894a47806456089298813c89", size = 178261, upload-time = "2026-01-10T09:22:56.251Z" }, + { url = "https://files.pythonhosted.org/packages/c3/f0/f3a17365441ed1c27f850a80b2bc680a0fa9505d733fe152fdf5e98c1c0b/websockets-16.0-cp312-cp312-win_amd64.whl", hash = "sha256:5569417dc80977fc8c2d43a86f78e0a5a22fee17565d78621b6bb264a115d4ea", size = 178693, upload-time = "2026-01-10T09:22:57.478Z" }, + { url = "https://files.pythonhosted.org/packages/cc/9c/baa8456050d1c1b08dd0ec7346026668cbc6f145ab4e314d707bb845bf0d/websockets-16.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:878b336ac47938b474c8f982ac2f7266a540adc3fa4ad74ae96fea9823a02cc9", size = 177364, upload-time = "2026-01-10T09:22:59.333Z" }, + { url = "https://files.pythonhosted.org/packages/7e/0c/8811fc53e9bcff68fe7de2bcbe75116a8d959ac699a3200f4847a8925210/websockets-16.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:52a0fec0e6c8d9a784c2c78276a48a2bdf099e4ccc2a4cad53b27718dbfd0230", size = 175039, upload-time = "2026-01-10T09:23:01.171Z" }, + { url = "https://files.pythonhosted.org/packages/aa/82/39a5f910cb99ec0b59e482971238c845af9220d3ab9fa76dd9162cda9d62/websockets-16.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e6578ed5b6981005df1860a56e3617f14a6c307e6a71b4fff8c48fdc50f3ed2c", size = 175323, upload-time = "2026-01-10T09:23:02.341Z" }, + { url = "https://files.pythonhosted.org/packages/bd/28/0a25ee5342eb5d5f297d992a77e56892ecb65e7854c7898fb7d35e9b33bd/websockets-16.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:95724e638f0f9c350bb1c2b0a7ad0e83d9cc0c9259f3ea94e40d7b02a2179ae5", size = 184975, upload-time = "2026-01-10T09:23:03.756Z" }, + { url = "https://files.pythonhosted.org/packages/f9/66/27ea52741752f5107c2e41fda05e8395a682a1e11c4e592a809a90c6a506/websockets-16.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0204dc62a89dc9d50d682412c10b3542d748260d743500a85c13cd1ee4bde82", size = 186203, upload-time = "2026-01-10T09:23:05.01Z" }, + { url = "https://files.pythonhosted.org/packages/37/e5/8e32857371406a757816a2b471939d51c463509be73fa538216ea52b792a/websockets-16.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:52ac480f44d32970d66763115edea932f1c5b1312de36df06d6b219f6741eed8", size = 185653, upload-time = "2026-01-10T09:23:06.301Z" }, + { url = "https://files.pythonhosted.org/packages/9b/67/f926bac29882894669368dc73f4da900fcdf47955d0a0185d60103df5737/websockets-16.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6e5a82b677f8f6f59e8dfc34ec06ca6b5b48bc4fcda346acd093694cc2c24d8f", size = 184920, upload-time = "2026-01-10T09:23:07.492Z" }, + { url = "https://files.pythonhosted.org/packages/3c/a1/3d6ccdcd125b0a42a311bcd15a7f705d688f73b2a22d8cf1c0875d35d34a/websockets-16.0-cp313-cp313-win32.whl", hash = "sha256:abf050a199613f64c886ea10f38b47770a65154dc37181bfaff70c160f45315a", size = 178255, upload-time = "2026-01-10T09:23:09.245Z" }, + { url = "https://files.pythonhosted.org/packages/6b/ae/90366304d7c2ce80f9b826096a9e9048b4bb760e44d3b873bb272cba696b/websockets-16.0-cp313-cp313-win_amd64.whl", hash = "sha256:3425ac5cf448801335d6fdc7ae1eb22072055417a96cc6b31b3861f455fbc156", size = 178689, upload-time = "2026-01-10T09:23:10.483Z" }, + { url = "https://files.pythonhosted.org/packages/f3/1d/e88022630271f5bd349ed82417136281931e558d628dd52c4d8621b4a0b2/websockets-16.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:8cc451a50f2aee53042ac52d2d053d08bf89bcb31ae799cb4487587661c038a0", size = 177406, upload-time = "2026-01-10T09:23:12.178Z" }, + { url = "https://files.pythonhosted.org/packages/f2/78/e63be1bf0724eeb4616efb1ae1c9044f7c3953b7957799abb5915bffd38e/websockets-16.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:daa3b6ff70a9241cf6c7fc9e949d41232d9d7d26fd3522b1ad2b4d62487e9904", size = 175085, upload-time = "2026-01-10T09:23:13.511Z" }, + { url = "https://files.pythonhosted.org/packages/bb/f4/d3c9220d818ee955ae390cf319a7c7a467beceb24f05ee7aaaa2414345ba/websockets-16.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:fd3cb4adb94a2a6e2b7c0d8d05cb94e6f1c81a0cf9dc2694fb65c7e8d94c42e4", size = 175328, upload-time = "2026-01-10T09:23:14.727Z" }, + { url = "https://files.pythonhosted.org/packages/63/bc/d3e208028de777087e6fb2b122051a6ff7bbcca0d6df9d9c2bf1dd869ae9/websockets-16.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:781caf5e8eee67f663126490c2f96f40906594cb86b408a703630f95550a8c3e", size = 185044, upload-time = "2026-01-10T09:23:15.939Z" }, + { url = "https://files.pythonhosted.org/packages/ad/6e/9a0927ac24bd33a0a9af834d89e0abc7cfd8e13bed17a86407a66773cc0e/websockets-16.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:caab51a72c51973ca21fa8a18bd8165e1a0183f1ac7066a182ff27107b71e1a4", size = 186279, upload-time = "2026-01-10T09:23:17.148Z" }, + { url = "https://files.pythonhosted.org/packages/b9/ca/bf1c68440d7a868180e11be653c85959502efd3a709323230314fda6e0b3/websockets-16.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:19c4dc84098e523fd63711e563077d39e90ec6702aff4b5d9e344a60cb3c0cb1", size = 185711, upload-time = "2026-01-10T09:23:18.372Z" }, + { url = "https://files.pythonhosted.org/packages/c4/f8/fdc34643a989561f217bb477cbc47a3a07212cbda91c0e4389c43c296ebf/websockets-16.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:a5e18a238a2b2249c9a9235466b90e96ae4795672598a58772dd806edc7ac6d3", size = 184982, upload-time = "2026-01-10T09:23:19.652Z" }, + { url = "https://files.pythonhosted.org/packages/dd/d1/574fa27e233764dbac9c52730d63fcf2823b16f0856b3329fc6268d6ae4f/websockets-16.0-cp314-cp314-win32.whl", hash = "sha256:a069d734c4a043182729edd3e9f247c3b2a4035415a9172fd0f1b71658a320a8", size = 177915, upload-time = "2026-01-10T09:23:21.458Z" }, + { url = "https://files.pythonhosted.org/packages/8a/f1/ae6b937bf3126b5134ce1f482365fde31a357c784ac51852978768b5eff4/websockets-16.0-cp314-cp314-win_amd64.whl", hash = "sha256:c0ee0e63f23914732c6d7e0cce24915c48f3f1512ec1d079ed01fc629dab269d", size = 178381, upload-time = "2026-01-10T09:23:22.715Z" }, + { url = "https://files.pythonhosted.org/packages/06/9b/f791d1db48403e1f0a27577a6beb37afae94254a8c6f08be4a23e4930bc0/websockets-16.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:a35539cacc3febb22b8f4d4a99cc79b104226a756aa7400adc722e83b0d03244", size = 177737, upload-time = "2026-01-10T09:23:24.523Z" }, + { url = "https://files.pythonhosted.org/packages/bd/40/53ad02341fa33b3ce489023f635367a4ac98b73570102ad2cdd770dacc9a/websockets-16.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:b784ca5de850f4ce93ec85d3269d24d4c82f22b7212023c974c401d4980ebc5e", size = 175268, upload-time = "2026-01-10T09:23:25.781Z" }, + { url = "https://files.pythonhosted.org/packages/74/9b/6158d4e459b984f949dcbbb0c5d270154c7618e11c01029b9bbd1bb4c4f9/websockets-16.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:569d01a4e7fba956c5ae4fc988f0d4e187900f5497ce46339c996dbf24f17641", size = 175486, upload-time = "2026-01-10T09:23:27.033Z" }, + { url = "https://files.pythonhosted.org/packages/e5/2d/7583b30208b639c8090206f95073646c2c9ffd66f44df967981a64f849ad/websockets-16.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:50f23cdd8343b984957e4077839841146f67a3d31ab0d00e6b824e74c5b2f6e8", size = 185331, upload-time = "2026-01-10T09:23:28.259Z" }, + { url = "https://files.pythonhosted.org/packages/45/b0/cce3784eb519b7b5ad680d14b9673a31ab8dcb7aad8b64d81709d2430aa8/websockets-16.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:152284a83a00c59b759697b7f9e9cddf4e3c7861dd0d964b472b70f78f89e80e", size = 186501, upload-time = "2026-01-10T09:23:29.449Z" }, + { url = "https://files.pythonhosted.org/packages/19/60/b8ebe4c7e89fb5f6cdf080623c9d92789a53636950f7abacfc33fe2b3135/websockets-16.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:bc59589ab64b0022385f429b94697348a6a234e8ce22544e3681b2e9331b5944", size = 186062, upload-time = "2026-01-10T09:23:31.368Z" }, + { url = "https://files.pythonhosted.org/packages/88/a8/a080593f89b0138b6cba1b28f8df5673b5506f72879322288b031337c0b8/websockets-16.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:32da954ffa2814258030e5a57bc73a3635463238e797c7375dc8091327434206", size = 185356, upload-time = "2026-01-10T09:23:32.627Z" }, + { url = "https://files.pythonhosted.org/packages/c2/b6/b9afed2afadddaf5ebb2afa801abf4b0868f42f8539bfe4b071b5266c9fe/websockets-16.0-cp314-cp314t-win32.whl", hash = "sha256:5a4b4cc550cb665dd8a47f868c8d04c8230f857363ad3c9caf7a0c3bf8c61ca6", size = 178085, upload-time = "2026-01-10T09:23:33.816Z" }, + { url = "https://files.pythonhosted.org/packages/9f/3e/28135a24e384493fa804216b79a6a6759a38cc4ff59118787b9fb693df93/websockets-16.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b14dc141ed6d2dde437cddb216004bcac6a1df0935d79656387bd41632ba0bbd", size = 178531, upload-time = "2026-01-10T09:23:35.016Z" }, + { url = "https://files.pythonhosted.org/packages/72/07/c98a68571dcf256e74f1f816b8cc5eae6eb2d3d5cfa44d37f801619d9166/websockets-16.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:349f83cd6c9a415428ee1005cadb5c2c56f4389bc06a9af16103c3bc3dcc8b7d", size = 174947, upload-time = "2026-01-10T09:23:36.166Z" }, + { url = "https://files.pythonhosted.org/packages/7e/52/93e166a81e0305b33fe416338be92ae863563fe7bce446b0f687b9df5aea/websockets-16.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:4a1aba3340a8dca8db6eb5a7986157f52eb9e436b74813764241981ca4888f03", size = 175260, upload-time = "2026-01-10T09:23:37.409Z" }, + { url = "https://files.pythonhosted.org/packages/56/0c/2dbf513bafd24889d33de2ff0368190a0e69f37bcfa19009ef819fe4d507/websockets-16.0-pp311-pypy311_pp73-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f4a32d1bd841d4bcbffdcb3d2ce50c09c3909fbead375ab28d0181af89fd04da", size = 176071, upload-time = "2026-01-10T09:23:39.158Z" }, + { url = "https://files.pythonhosted.org/packages/a5/8f/aea9c71cc92bf9b6cc0f7f70df8f0b420636b6c96ef4feee1e16f80f75dd/websockets-16.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0298d07ee155e2e9fda5be8a9042200dd2e3bb0b8a38482156576f863a9d457c", size = 176968, upload-time = "2026-01-10T09:23:41.031Z" }, + { url = "https://files.pythonhosted.org/packages/9a/3f/f70e03f40ffc9a30d817eef7da1be72ee4956ba8d7255c399a01b135902a/websockets-16.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:a653aea902e0324b52f1613332ddf50b00c06fdaf7e92624fbf8c77c78fa5767", size = 178735, upload-time = "2026-01-10T09:23:42.259Z" }, + { url = "https://files.pythonhosted.org/packages/6f/28/258ebab549c2bf3e64d2b0217b973467394a9cea8c42f70418ca2c5d0d2e/websockets-16.0-py3-none-any.whl", hash = "sha256:1637db62fad1dc833276dded54215f2c7fa46912301a24bd94d45d46a011ceec", size = 171598, upload-time = "2026-01-10T09:23:45.395Z" }, +] diff --git a/altair/index.md b/altair/index.md new file mode 100644 index 0000000000000000000000000000000000000000..8ab1bf2a4b347414482716e438465e0744c19f9f --- /dev/null +++ b/altair/index.md @@ -0,0 +1,14 @@ +--- +title: Learn Altair +description: > + Learn the basics of Altair, a high-performance visualization library, + using lessons developed at the University of Washington. +--- + +## Acknowledgments + +These notebooks were created by Jeffrey Heer, Dominik Moritz, Jake VanderPlas, and Brock Craft +as part of the [Visualization Curriculum](https://uwdata.github.io/visualization-curriculum/intro.html) +at the University of Washington. +Our thanks to the authors for making their work available under an open license: +if we all share a little, we all get a lot. diff --git a/assets/styles.css b/assets/styles.css new file mode 100644 index 0000000000000000000000000000000000000000..ca85ec2c8e79d94affa672e8c8f2f51fe1aa8205 --- /dev/null +++ b/assets/styles.css @@ -0,0 +1,51 @@ +:root { + --primary-green: #10B981; + --dark-green: #047857; + --light-green: #D1FAE5; +} +.bg-primary { background-color: var(--primary-green); } +.text-primary { color: var(--primary-green); } +.border-primary { border-color: var(--primary-green); } +.bg-light { background-color: var(--light-green); } +.hover-grow { transition: transform 0.2s ease; } +.hover-grow:hover { transform: scale(1.02); } +.card-shadow { box-shadow: 0 4px 6px rgba(0, 0, 0, 0.05), 0 1px 3px rgba(0, 0, 0, 0.1); } + +/* Prose styles for markdown-generated content */ +.prose h1 { font-size: 1.875rem; font-weight: 700; color: #1f2937; margin: 1.5rem 0 0.75rem; } +.prose h2 { font-size: 1.5rem; font-weight: 700; color: #1f2937; margin: 1.5rem 0 0.75rem; } +.prose h3 { font-size: 1.25rem; font-weight: 600; color: #1f2937; margin: 1.25rem 0 0.5rem; } +.prose h4 { font-size: 1.125rem; font-weight: 600; color: #1f2937; margin: 1rem 0 0.5rem; } +.prose p { color: #4b5563; margin-bottom: 1rem; line-height: 1.75; } +.prose ul { list-style-type: disc; padding-left: 1.25rem; margin-bottom: 1rem; color: #4b5563; } +.prose ol { list-style-type: decimal; padding-left: 1.25rem; margin-bottom: 1rem; color: #4b5563; } +.prose li { margin-bottom: 0.25rem; line-height: 1.75; } +.prose a { color: var(--primary-green); } +.prose a:hover { color: var(--dark-green); } +.prose strong { font-weight: 600; } +.prose code { font-family: ui-monospace, monospace; font-size: 0.875em; + background-color: #f3f4f6; padding: 0.1em 0.3em; border-radius: 0.25rem; } +.prose pre { background-color: #f3f4f6; color: #1f2937; padding: 1rem; + border-radius: 0.5rem; overflow-x: auto; margin-bottom: 1rem; } +.prose pre code { background: none; padding: 0; font-size: 0.875rem; color: inherit; } + +/* Component classes */ +.logo-container { background-color: var(--light-green); padding: 0.25rem; border-radius: 0.5rem; } +.card-accent { height: 0.5rem; background-color: var(--primary-green); } +.feature-card { background-color: #ffffff; padding: 1.5rem; border-radius: 0.5rem; + box-shadow: 0 4px 6px rgba(0, 0, 0, 0.05), 0 1px 3px rgba(0, 0, 0, 0.1); } +.content-card { background-color: #ffffff; border: 1px solid #e5e7eb; border-radius: 0.5rem; + overflow: hidden; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.05), 0 1px 3px rgba(0, 0, 0, 0.1); } +.icon-container { width: 3rem; height: 3rem; background-color: var(--light-green); + border-radius: 9999px; display: flex; align-items: center; + justify-content: center; margin-bottom: 1rem; } + +.link-primary { color: var(--primary-green); } +.link-primary:hover { color: var(--dark-green); } + +.btn-primary { background-color: var(--primary-green); color: #ffffff; font-weight: 500; + border-radius: 0.375rem; transition: background-color 300ms ease-in-out; } +.btn-primary:hover { background-color: var(--dark-green); } + +.footer-link { color: #d1d5db; transition: color 300ms ease-in-out; } +.footer-link:hover { color: #ffffff; } diff --git a/bin/build.py b/bin/build.py new file mode 100644 index 0000000000000000000000000000000000000000..fe9cb56f6205291a2064f6dc1774fe155b27efdc --- /dev/null +++ b/bin/build.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python +"""Generate a static site from Jinja2 templates and lesson data.""" + +import argparse +import datetime +import json +import re +import shutil +from pathlib import Path + +import frontmatter +import markdown as md +from jinja2 import Environment, FileSystemLoader + +from utils import get_notebook_title + + +def transform_lessons(data: dict, root: Path) -> dict: + """Transform raw lesson data into template-ready form.""" + for course_id, course in data.items(): + desc = course.get("description", "").strip() + course["description_html"] = f"

{desc}

" if desc else "" + course["notebooks"] = [ + { + "title": get_notebook_title(root / course_id / nb) + or re.sub(r"^\d+_", "", nb.replace(".py", "")).replace("_", " ").title(), + "html_path": f"{course_id}/{nb.replace('.py', '.html')}", + "local_html_path": nb.replace(".py", ".html"), + } + for nb in course.get("notebooks", []) + ] + index_md = root / course_id / "index.md" + post = frontmatter.load(index_md) + course["body_html"] = md.markdown(post.content, extensions=["fenced_code", "tables"]) + return data + + +def render(template, path, **kwargs): + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(template.render(**kwargs)) + + +def main(): + parser = argparse.ArgumentParser(description="Generate static site from lesson data") + parser.add_argument("--root", required=True, help="Project root directory") + parser.add_argument("--output", required=True, help="Output directory") + parser.add_argument("--data", required=True, help="Path to lessons JSON file") + args = parser.parse_args() + + root = Path(args.root) + output = Path(args.output) + output.mkdir(parents=True, exist_ok=True) + + lessons = transform_lessons(json.loads(Path(args.data).read_text()), root) + env = Environment(loader=FileSystemLoader(root / "templates")) + current_year = datetime.date.today().year + + render( + env.get_template("index.html"), + output / "index.html", + courses=lessons, + current_year=current_year, + root_path="", + ) + + assets_src = root / "assets" + if assets_src.exists(): + shutil.copytree(assets_src, output / "assets", dirs_exist_ok=True) + + for course_id, lesson in lessons.items(): + render( + env.get_template("lesson.html"), + output / course_id / "index.html", + lesson=lesson, + current_year=current_year, + root_path="../", + ) + + page_template = env.get_template("page.html") + for page_src in sorted((root / "pages").glob("*.md")): + post = frontmatter.load(page_src) + render( + page_template, + output / page_src.stem / "index.html", + title=post.get("title", page_src.stem), + body_html=md.markdown(post.content, extensions=["fenced_code", "tables"]), + current_year=current_year, + root_path="../", + ) + + +if __name__ == "__main__": + main() diff --git a/scripts/check_empty_cells.py b/bin/check_empty_cells.py similarity index 99% rename from scripts/check_empty_cells.py rename to bin/check_empty_cells.py index d03bf27d5a234d399a44f33fb302399be8e0b14c..90208c51f9f97392051a5f769cb8ffa55e945947 100644 --- a/scripts/check_empty_cells.py +++ b/bin/check_empty_cells.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python """ Script to detect empty cells in marimo notebooks. @@ -15,7 +15,6 @@ This script will: """ import os -import re import sys from pathlib import Path from typing import List, Tuple @@ -136,4 +135,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/bin/check_missing_titles.py b/bin/check_missing_titles.py new file mode 100644 index 0000000000000000000000000000000000000000..bea2f33400e50525d2d5a259350d31289aa1df87 --- /dev/null +++ b/bin/check_missing_titles.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python +"""Report marimo notebooks that are missing an H1 title.""" + +import sys +from pathlib import Path + +from utils import get_notebook_title + + +def main(): + root = Path(__file__).parent.parent + notebooks = sorted(root.glob("*/[0-9]*.py")) + missing = [nb for nb in notebooks if get_notebook_title(nb) is None] + if missing: + for nb in missing: + print(nb.relative_to(root)) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/bin/check_notebook_packages.py b/bin/check_notebook_packages.py new file mode 100644 index 0000000000000000000000000000000000000000..f5787b28d72b2b9057588cf21cd7bb7883af72f3 --- /dev/null +++ b/bin/check_notebook_packages.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python +"""Check that marimo notebooks in the same lesson directory agree on package versions. + +It is acceptable for different notebooks in a directory to specify different packages, +but if two or more notebooks specify the same package, their version constraints must +be identical. +""" + +import argparse +import re +import sys +from collections import defaultdict +from pathlib import Path + + +# Regex to extract the inline script metadata block (PEP 723) +SCRIPT_BLOCK_RE = re.compile(r"^# /// script\s*\n((?:#[^\n]*\n)*?)# ///", re.MULTILINE) +DEPENDENCY_LINE_RE = re.compile(r'^#\s+"([^"]+)",?\s*$') + + +def parse_script_header(text: str) -> list[str]: + """Return the list of dependency strings from a PEP 723 script header, or [].""" + match = SCRIPT_BLOCK_RE.search(text) + if not match: + return [] + block = match.group(1) + deps: list[str] = [] + in_deps = False + for raw_line in block.splitlines(): + line = raw_line.lstrip("#").strip() + if line.startswith("dependencies"): + in_deps = True + continue + if in_deps: + if line.startswith("]"): + break + # strip surrounding quotes and comma: e.g. ' "polars==1.0",' -> 'polars==1.0' + stripped = line.strip().strip('"\'').rstrip(",").strip('"\'') + if stripped: + deps.append(stripped) + return deps + + +def package_name(dep: str) -> str: + """Extract the bare package name from a PEP 508 dependency string. + + Examples: + "polars==1.22.0" -> "polars" + "pandas>=2.0,<3" -> "pandas" + "marimo" -> "marimo" + """ + return re.split(r"[><=!;\s\[]", dep, maxsplit=1)[0].lower() + + +def check_directory(lesson_dir: Path, only: set[str]) -> list[str]: + """Return a list of error messages for version inconsistencies among *only* in lesson_dir.""" + # Map package name -> {version_spec: [notebook_path, ...]} + seen: dict[str, dict[str, list[str]]] = defaultdict(lambda: defaultdict(list)) + + for nb in sorted(lesson_dir.glob("*.py")): + if nb.name not in only: + continue + try: + text = nb.read_text(encoding="utf-8") + except IOError: + continue + if "marimo.App" not in text: + continue + for dep in parse_script_header(text): + name = package_name(dep) + seen[name][dep].append(nb.name) + + errors: list[str] = [] + for name, specs in sorted(seen.items()): + if len(specs) > 1: + errors.append(f" Package '{name}' has conflicting specifications:") + for spec, files in sorted(specs.items()): + errors.append(f" {spec!r} in: {', '.join(files)}") + return errors + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("notebooks", nargs="+", metavar="NOTEBOOK", + help="notebook files to check (grouped by directory)") + args = parser.parse_args() + + dir_filter: dict[Path, set[str]] = defaultdict(set) + for nb_path in (Path(p) for p in args.notebooks): + dir_filter[nb_path.parent].add(nb_path.name) + + total_errors = 0 + for lesson_dir, only in sorted(dir_filter.items()): + errors = check_directory(lesson_dir, only=only) + if errors: + print(f"\n{lesson_dir}/") + for msg in errors: + print(msg) + total_errors += len(errors) + + if total_errors: + print(f"\nFound package version inconsistencies in {total_errors} package(s).") + sys.exit(1) + else: + print("All package version specifications are consistent.") + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/bin/create_sql_lab.sql b/bin/create_sql_lab.sql new file mode 100644 index 0000000000000000000000000000000000000000..1a787b94c5cc7c2d149094459ce99274078627c6 --- /dev/null +++ b/bin/create_sql_lab.sql @@ -0,0 +1,22 @@ +create table job ( + name text not null, + credits real not null +); + +create table work ( + person text not null, + job text not null +); + +insert into job values +('calibrate', 1.5), +('clean', 0.5); + +insert into work values +('Amal', 'calibrate'), +('Amal', 'clean'), +('Amal', 'complain'), +('Gita', 'clean'), +('Gita', 'clean'), +('Gita', 'complain'), +('Madhi', 'complain'); diff --git a/bin/create_sql_penguins.py b/bin/create_sql_penguins.py new file mode 100644 index 0000000000000000000000000000000000000000..4f06490d23aac562f95555fdec0da9ec57742eb5 --- /dev/null +++ b/bin/create_sql_penguins.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +import csv +import sqlite3 +import sys + + +SCHEMA = """ +CREATE TABLE penguins ( + species text, + island text, + bill_length_mm real, + bill_depth_mm real, + flipper_length_mm real, + body_mass_g real, + sex text +); +""" + +def main(): + infile = sys.argv[1] + outfile = sys.argv[2] + + con = sqlite3.connect(outfile) + con.execute(SCHEMA) + + with open(infile, newline="") as f: + reader = csv.DictReader(f) + rows = [ + ( + row["species"], + row["island"], + float(row["bill_length_mm"]) if row["bill_length_mm"] else None, + float(row["bill_depth_mm"]) if row["bill_depth_mm"] else None, + float(row["flipper_length_mm"]) if row["flipper_length_mm"] else None, + float(row["body_mass_g"]) if row["body_mass_g"] else None, + row["sex"] if row["sex"] else None, + ) + for row in reader + ] + + con.executemany( + "INSERT INTO penguins VALUES (?, ?, ?, ?, ?, ?, ?)", rows + ) + con.commit() + con.close() + + +if __name__ == "__main__": + main() diff --git a/bin/create_sql_survey.py b/bin/create_sql_survey.py new file mode 100644 index 0000000000000000000000000000000000000000..e0340e694824530d9e8f7f011d1c7c81cb428c1d --- /dev/null +++ b/bin/create_sql_survey.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python + +import datetime +import faker +import itertools +import random +import sqlite3 +import sys + + +LOCALE = "es" + +NUM_PERSONS = 6 + +DATE_START = datetime.date(2025, 9, 1) +DATE_END = datetime.date(2025, 12, 31) +DATE_DURATION = 7 + +NUM_MACHINES = 5 + +CREATE_PERSONS = """\ +create table person( + person_id text not null primary key, + personal text not null, + family text not null, + supervisor_id text, + foreign key(supervisor_id) references person(person_id) +); +""" +INSERT_PERSONS = """\ +insert into person values (:person_id, :personal, :family, :supervisor_id); +""" + +CREATE_SURVEYS = """\ +create table survey( + survey_id text not null primary key, + person_id text not null, + start_date text, + end_date text, + foreign key(person_id) references person(person_id) +); +""" +INSERT_SURVEYS = """\ +insert into survey values(:survey_id, :person_id, :start, :end); +""" + +CREATE_MACHINES = """\ +create table machine( + machine_id text not null primary key, + machine_type text not null +); +""" +INSERT_MACHINES = """\ +insert into machine values(:machine_id, :machine_type); +""" + +CREATE_RATINGS = """\ +create table rating( + person_id text not null, + machine_id text not null, + level integer, + foreign key(person_id) references person(person_id), + foreign key(machine_id) references machine(machine_id) +); +""" +INSERT_RATINGS = """\ +insert into rating values(:person_id, :machine_id, :level); +""" + +def main(): + db_name = sys.argv[1] + seed = int(sys.argv[2]) + random.seed(seed) + + persons_counter = itertools.count() + next(persons_counter) + persons = gen_persons(NUM_PERSONS, persons_counter) + + supers = gen_persons(int(NUM_PERSONS / 2), persons_counter) + for p in persons: + p["supervisor_id"] = random.choice(supers)["person_id"] + if len(supers) > 1: + supers[0]["supervisor_id"] = supers[-1]["person_id"] + + surveys = gen_surveys(persons + supers[0:int(len(supers)/2)]) + surveys[int(len(surveys)/2)]["start"] = None + + cnx = sqlite3.connect(db_name) + cur = cnx.cursor() + + everyone = persons + supers + random.shuffle(everyone) + cur.execute(CREATE_PERSONS) + cur.executemany(INSERT_PERSONS, everyone) + + cur.execute(CREATE_SURVEYS) + cur.executemany(INSERT_SURVEYS, surveys) + + machines = gen_machines() + cur.execute(CREATE_MACHINES) + cur.executemany(INSERT_MACHINES, machines) + + ratings = gen_ratings(everyone, machines) + cur.execute(CREATE_RATINGS) + cur.executemany(INSERT_RATINGS, ratings) + + cnx.commit() + cnx.close() + + +def gen_machines(): + adjectives = "hydraulic rotary modular industrial automated".split() + nouns = "press conveyor generator actuator compressor".split() + machines = set() + while len(machines) < NUM_MACHINES: + candidate = f"{random.choice(adjectives)} {random.choice(nouns)}" + if candidate not in machines: + machines.add(candidate) + counter = itertools.count() + next(counter) + return [ + {"machine_id": f"M{next(counter):04d}", "machine_type": m} + for m in machines + ] + + +def gen_persons(num, counter): + fake = faker.Faker(LOCALE) + fake.seed_instance(random.randint(0, 1_000_000)) + return [ + { + "person_id": f"P{next(counter):03d}", + "personal": fake.first_name(), + "family": fake.last_name(), + "supervisor_id": None, + } + for _ in range(num) + ] + + +def gen_ratings(persons, machines): + temp = {} + while len(temp) < int(len(persons) * len(machines) / 4): + p = random.choice(persons)["person_id"] + m = random.choice(machines)["machine_id"] + if (p, m) in temp: + continue + temp[(p, m)] = random.choice([None, 1, 2, 3]) + return [ + {"person_id": p, "machine_id": m, "level": v} + for ((p, m), v) in temp.items() + ] + +def gen_surveys(persons): + surveys = [] + counter = itertools.count() + next(counter) + for person in persons: + person_id = person["person_id"] + start = DATE_START + while start <= DATE_END: + survey_id = f"S{next(counter):04d}" + end = start + datetime.timedelta(days=random.randint(1, DATE_DURATION)) + surveys.append({ + "survey_id": survey_id, + "person_id": person_id, + "start": start.isoformat(), + "end": end.isoformat() if end <= DATE_END else None + }) + start = end + datetime.timedelta(days=random.randint(1, DATE_DURATION)) + return surveys + + +if __name__ == "__main__": + main() diff --git a/bin/extract.py b/bin/extract.py new file mode 100644 index 0000000000000000000000000000000000000000..44b9134242ebc8da41c52b8d23cd2df32da213bf --- /dev/null +++ b/bin/extract.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +"""Extract lesson metadata and notebook lists into a JSON file.""" + +import argparse +import json +import re +from pathlib import Path + +import frontmatter + + +NOTEBOOK_PATTERN = re.compile(r"^\d{2}_.*\.py$") + + +def extract_lessons(root: Path) -> dict: + lessons = {} + for index_file in sorted(root.glob("*/index.md")): + lesson_dir = index_file.parent + post = frontmatter.load(index_file) + notebooks = sorted( + p.name + for p in lesson_dir.glob("*.py") + if NOTEBOOK_PATTERN.match(p.name) + ) + lessons[lesson_dir.name] = { + **post.metadata, + "notebooks": notebooks, + } + return lessons + + +def main(): + parser = argparse.ArgumentParser(description="Extract lesson metadata to JSON") + parser.add_argument("--root", required=True, help="Project root directory") + parser.add_argument("--data", required=True, help="Output JSON file") + args = parser.parse_args() + + root = Path(args.root) + data = Path(args.data) + data.parent.mkdir(parents=True, exist_ok=True) + + lessons = extract_lessons(root) + data.write_text(json.dumps(lessons, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/scripts/preview.py b/bin/preview.py similarity index 98% rename from scripts/preview.py rename to bin/preview.py index d5dce86167ced55f3744a4506b64e35ddb7224f1..626f688900b1e5e620918d9c12281c2eefdb1c9e 100644 --- a/scripts/preview.py +++ b/bin/preview.py @@ -1,10 +1,9 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python import os import subprocess import argparse import webbrowser -import time import sys from pathlib import Path diff --git a/bin/run_notebooks.sh b/bin/run_notebooks.sh new file mode 100755 index 0000000000000000000000000000000000000000..a79da7c9575f0476edf25919085e96a8e2fc0535 --- /dev/null +++ b/bin/run_notebooks.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +for nb in $* +do + cd $(dirname $nb) + if ! output=$(uv run $(basename $nb) 2>&1); then + echo "=== $nb ===" + echo "$output" + echo + fi + cd $OLDPWD +done diff --git a/bin/utils.py b/bin/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..e5a86e32a052c33ffa90bb4a2b47b20c59b1092b --- /dev/null +++ b/bin/utils.py @@ -0,0 +1,14 @@ +"""Utility functions for working with marimo notebooks.""" + +import re +from pathlib import Path + + +def get_notebook_title(path: Path) -> str | None: + """Return the first H1 Markdown heading in a marimo notebook, or None.""" + text = path.read_text(encoding="utf-8") + for match in re.finditer(r'mo\.md\(r?f?"""(.*?)"""', text, re.DOTALL): + for line in match.group(1).splitlines(): + if line.strip().startswith("# "): + return line.strip()[2:].strip() + return None diff --git a/daft/README.md b/daft/README.md deleted file mode 100644 index e51a66fd2f1991207d7bbe3a52703550a3526b3e..0000000000000000000000000000000000000000 --- a/daft/README.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: Readme -marimo-version: 0.18.4 ---- - -# Learn Daft - -_🚧 This collection is a work in progress. Please help us add notebooks!_ - -This collection of marimo notebooks is designed to teach you the basics of -Daft, a distributed dataframe engine that unifies data engineering, analytics & ML/AI workflows. - -**Help us build this course! βš’οΈ** - -We're seeking contributors to help us build these notebooks. Every contributor -will be acknowledged as an author in this README and in their contributed -notebooks. Head over to the [tracking -issue](https://github.com/marimo-team/learn/issues/43) to sign up for a planned -notebook or propose your own. - -**Running notebooks.** To run a notebook locally, use - -```bash -uvx marimo edit -``` - -You can also open notebooks in our online playground by appending marimo.app/ to a notebook's URL. - -**Thanks to all our notebook authors!** - -* [PΓ©ter Gyarmati](https://github.com/peter-gy) \ No newline at end of file diff --git a/daft/_index.md b/daft/_index.md new file mode 100644 index 0000000000000000000000000000000000000000..8d4352604954bdb082da5f9d4014e87ad122956d --- /dev/null +++ b/daft/_index.md @@ -0,0 +1,13 @@ +--- +title: Learn Daft +description: > + These notebooks introduce Daft, a distributed dataframe engine + that unifies data engineering, analysis, and ML/AI workflows. +tracking: 43 +--- + +## Contributors + +Thanks to our notebook authors: + +* [PΓ©ter Gyarmati](https://github.com/peter-gy) diff --git a/data/penguins.csv b/data/penguins.csv new file mode 100644 index 0000000000000000000000000000000000000000..51fd0fe50c4e01e6f42e54063925571c004ef25a --- /dev/null +++ b/data/penguins.csv @@ -0,0 +1,345 @@ +species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex +Adelie,Torgersen,39.1,18.7,181,3750,MALE +Adelie,Torgersen,39.5,17.4,186,3800,FEMALE +Adelie,Torgersen,40.3,18,195,3250,FEMALE +Adelie,Torgersen,,,,, +Adelie,Torgersen,36.7,19.3,193,3450,FEMALE +Adelie,Torgersen,39.3,20.6,190,3650,MALE +Adelie,Torgersen,38.9,17.8,181,3625,FEMALE +Adelie,Torgersen,39.2,19.6,195,4675,MALE +Adelie,Torgersen,34.1,18.1,193,3475, +Adelie,Torgersen,42,20.2,190,4250, +Adelie,Torgersen,37.8,17.1,186,3300, +Adelie,Torgersen,37.8,17.3,180,3700, +Adelie,Torgersen,41.1,17.6,182,3200,FEMALE +Adelie,Torgersen,38.6,21.2,191,3800,MALE +Adelie,Torgersen,34.6,21.1,198,4400,MALE +Adelie,Torgersen,36.6,17.8,185,3700,FEMALE +Adelie,Torgersen,38.7,19,195,3450,FEMALE +Adelie,Torgersen,42.5,20.7,197,4500,MALE +Adelie,Torgersen,34.4,18.4,184,3325,FEMALE +Adelie,Torgersen,46,21.5,194,4200,MALE +Adelie,Biscoe,37.8,18.3,174,3400,FEMALE +Adelie,Biscoe,37.7,18.7,180,3600,MALE +Adelie,Biscoe,35.9,19.2,189,3800,FEMALE +Adelie,Biscoe,38.2,18.1,185,3950,MALE +Adelie,Biscoe,38.8,17.2,180,3800,MALE +Adelie,Biscoe,35.3,18.9,187,3800,FEMALE +Adelie,Biscoe,40.6,18.6,183,3550,MALE +Adelie,Biscoe,40.5,17.9,187,3200,FEMALE +Adelie,Biscoe,37.9,18.6,172,3150,FEMALE +Adelie,Biscoe,40.5,18.9,180,3950,MALE +Adelie,Dream,39.5,16.7,178,3250,FEMALE +Adelie,Dream,37.2,18.1,178,3900,MALE +Adelie,Dream,39.5,17.8,188,3300,FEMALE +Adelie,Dream,40.9,18.9,184,3900,MALE +Adelie,Dream,36.4,17,195,3325,FEMALE +Adelie,Dream,39.2,21.1,196,4150,MALE +Adelie,Dream,38.8,20,190,3950,MALE +Adelie,Dream,42.2,18.5,180,3550,FEMALE +Adelie,Dream,37.6,19.3,181,3300,FEMALE +Adelie,Dream,39.8,19.1,184,4650,MALE +Adelie,Dream,36.5,18,182,3150,FEMALE +Adelie,Dream,40.8,18.4,195,3900,MALE +Adelie,Dream,36,18.5,186,3100,FEMALE +Adelie,Dream,44.1,19.7,196,4400,MALE +Adelie,Dream,37,16.9,185,3000,FEMALE +Adelie,Dream,39.6,18.8,190,4600,MALE +Adelie,Dream,41.1,19,182,3425,MALE +Adelie,Dream,37.5,18.9,179,2975, +Adelie,Dream,36,17.9,190,3450,FEMALE +Adelie,Dream,42.3,21.2,191,4150,MALE +Adelie,Biscoe,39.6,17.7,186,3500,FEMALE +Adelie,Biscoe,40.1,18.9,188,4300,MALE +Adelie,Biscoe,35,17.9,190,3450,FEMALE +Adelie,Biscoe,42,19.5,200,4050,MALE +Adelie,Biscoe,34.5,18.1,187,2900,FEMALE +Adelie,Biscoe,41.4,18.6,191,3700,MALE +Adelie,Biscoe,39,17.5,186,3550,FEMALE +Adelie,Biscoe,40.6,18.8,193,3800,MALE +Adelie,Biscoe,36.5,16.6,181,2850,FEMALE +Adelie,Biscoe,37.6,19.1,194,3750,MALE +Adelie,Biscoe,35.7,16.9,185,3150,FEMALE +Adelie,Biscoe,41.3,21.1,195,4400,MALE +Adelie,Biscoe,37.6,17,185,3600,FEMALE +Adelie,Biscoe,41.1,18.2,192,4050,MALE +Adelie,Biscoe,36.4,17.1,184,2850,FEMALE +Adelie,Biscoe,41.6,18,192,3950,MALE +Adelie,Biscoe,35.5,16.2,195,3350,FEMALE +Adelie,Biscoe,41.1,19.1,188,4100,MALE +Adelie,Torgersen,35.9,16.6,190,3050,FEMALE +Adelie,Torgersen,41.8,19.4,198,4450,MALE +Adelie,Torgersen,33.5,19,190,3600,FEMALE +Adelie,Torgersen,39.7,18.4,190,3900,MALE +Adelie,Torgersen,39.6,17.2,196,3550,FEMALE +Adelie,Torgersen,45.8,18.9,197,4150,MALE +Adelie,Torgersen,35.5,17.5,190,3700,FEMALE +Adelie,Torgersen,42.8,18.5,195,4250,MALE +Adelie,Torgersen,40.9,16.8,191,3700,FEMALE +Adelie,Torgersen,37.2,19.4,184,3900,MALE +Adelie,Torgersen,36.2,16.1,187,3550,FEMALE +Adelie,Torgersen,42.1,19.1,195,4000,MALE +Adelie,Torgersen,34.6,17.2,189,3200,FEMALE +Adelie,Torgersen,42.9,17.6,196,4700,MALE +Adelie,Torgersen,36.7,18.8,187,3800,FEMALE +Adelie,Torgersen,35.1,19.4,193,4200,MALE +Adelie,Dream,37.3,17.8,191,3350,FEMALE +Adelie,Dream,41.3,20.3,194,3550,MALE +Adelie,Dream,36.3,19.5,190,3800,MALE +Adelie,Dream,36.9,18.6,189,3500,FEMALE +Adelie,Dream,38.3,19.2,189,3950,MALE +Adelie,Dream,38.9,18.8,190,3600,FEMALE +Adelie,Dream,35.7,18,202,3550,FEMALE +Adelie,Dream,41.1,18.1,205,4300,MALE +Adelie,Dream,34,17.1,185,3400,FEMALE +Adelie,Dream,39.6,18.1,186,4450,MALE +Adelie,Dream,36.2,17.3,187,3300,FEMALE +Adelie,Dream,40.8,18.9,208,4300,MALE +Adelie,Dream,38.1,18.6,190,3700,FEMALE +Adelie,Dream,40.3,18.5,196,4350,MALE +Adelie,Dream,33.1,16.1,178,2900,FEMALE +Adelie,Dream,43.2,18.5,192,4100,MALE +Adelie,Biscoe,35,17.9,192,3725,FEMALE +Adelie,Biscoe,41,20,203,4725,MALE +Adelie,Biscoe,37.7,16,183,3075,FEMALE +Adelie,Biscoe,37.8,20,190,4250,MALE +Adelie,Biscoe,37.9,18.6,193,2925,FEMALE +Adelie,Biscoe,39.7,18.9,184,3550,MALE +Adelie,Biscoe,38.6,17.2,199,3750,FEMALE +Adelie,Biscoe,38.2,20,190,3900,MALE +Adelie,Biscoe,38.1,17,181,3175,FEMALE +Adelie,Biscoe,43.2,19,197,4775,MALE +Adelie,Biscoe,38.1,16.5,198,3825,FEMALE +Adelie,Biscoe,45.6,20.3,191,4600,MALE +Adelie,Biscoe,39.7,17.7,193,3200,FEMALE +Adelie,Biscoe,42.2,19.5,197,4275,MALE +Adelie,Biscoe,39.6,20.7,191,3900,FEMALE +Adelie,Biscoe,42.7,18.3,196,4075,MALE +Adelie,Torgersen,38.6,17,188,2900,FEMALE +Adelie,Torgersen,37.3,20.5,199,3775,MALE +Adelie,Torgersen,35.7,17,189,3350,FEMALE +Adelie,Torgersen,41.1,18.6,189,3325,MALE +Adelie,Torgersen,36.2,17.2,187,3150,FEMALE +Adelie,Torgersen,37.7,19.8,198,3500,MALE +Adelie,Torgersen,40.2,17,176,3450,FEMALE +Adelie,Torgersen,41.4,18.5,202,3875,MALE +Adelie,Torgersen,35.2,15.9,186,3050,FEMALE +Adelie,Torgersen,40.6,19,199,4000,MALE +Adelie,Torgersen,38.8,17.6,191,3275,FEMALE +Adelie,Torgersen,41.5,18.3,195,4300,MALE +Adelie,Torgersen,39,17.1,191,3050,FEMALE +Adelie,Torgersen,44.1,18,210,4000,MALE +Adelie,Torgersen,38.5,17.9,190,3325,FEMALE +Adelie,Torgersen,43.1,19.2,197,3500,MALE +Adelie,Dream,36.8,18.5,193,3500,FEMALE +Adelie,Dream,37.5,18.5,199,4475,MALE +Adelie,Dream,38.1,17.6,187,3425,FEMALE +Adelie,Dream,41.1,17.5,190,3900,MALE +Adelie,Dream,35.6,17.5,191,3175,FEMALE +Adelie,Dream,40.2,20.1,200,3975,MALE +Adelie,Dream,37,16.5,185,3400,FEMALE +Adelie,Dream,39.7,17.9,193,4250,MALE +Adelie,Dream,40.2,17.1,193,3400,FEMALE +Adelie,Dream,40.6,17.2,187,3475,MALE +Adelie,Dream,32.1,15.5,188,3050,FEMALE +Adelie,Dream,40.7,17,190,3725,MALE +Adelie,Dream,37.3,16.8,192,3000,FEMALE +Adelie,Dream,39,18.7,185,3650,MALE +Adelie,Dream,39.2,18.6,190,4250,MALE +Adelie,Dream,36.6,18.4,184,3475,FEMALE +Adelie,Dream,36,17.8,195,3450,FEMALE +Adelie,Dream,37.8,18.1,193,3750,MALE +Adelie,Dream,36,17.1,187,3700,FEMALE +Adelie,Dream,41.5,18.5,201,4000,MALE +Chinstrap,Dream,46.5,17.9,192,3500,FEMALE +Chinstrap,Dream,50,19.5,196,3900,MALE +Chinstrap,Dream,51.3,19.2,193,3650,MALE +Chinstrap,Dream,45.4,18.7,188,3525,FEMALE +Chinstrap,Dream,52.7,19.8,197,3725,MALE +Chinstrap,Dream,45.2,17.8,198,3950,FEMALE +Chinstrap,Dream,46.1,18.2,178,3250,FEMALE +Chinstrap,Dream,51.3,18.2,197,3750,MALE +Chinstrap,Dream,46,18.9,195,4150,FEMALE +Chinstrap,Dream,51.3,19.9,198,3700,MALE +Chinstrap,Dream,46.6,17.8,193,3800,FEMALE +Chinstrap,Dream,51.7,20.3,194,3775,MALE +Chinstrap,Dream,47,17.3,185,3700,FEMALE +Chinstrap,Dream,52,18.1,201,4050,MALE +Chinstrap,Dream,45.9,17.1,190,3575,FEMALE +Chinstrap,Dream,50.5,19.6,201,4050,MALE +Chinstrap,Dream,50.3,20,197,3300,MALE +Chinstrap,Dream,58,17.8,181,3700,FEMALE +Chinstrap,Dream,46.4,18.6,190,3450,FEMALE +Chinstrap,Dream,49.2,18.2,195,4400,MALE +Chinstrap,Dream,42.4,17.3,181,3600,FEMALE +Chinstrap,Dream,48.5,17.5,191,3400,MALE +Chinstrap,Dream,43.2,16.6,187,2900,FEMALE +Chinstrap,Dream,50.6,19.4,193,3800,MALE +Chinstrap,Dream,46.7,17.9,195,3300,FEMALE +Chinstrap,Dream,52,19,197,4150,MALE +Chinstrap,Dream,50.5,18.4,200,3400,FEMALE +Chinstrap,Dream,49.5,19,200,3800,MALE +Chinstrap,Dream,46.4,17.8,191,3700,FEMALE +Chinstrap,Dream,52.8,20,205,4550,MALE +Chinstrap,Dream,40.9,16.6,187,3200,FEMALE +Chinstrap,Dream,54.2,20.8,201,4300,MALE +Chinstrap,Dream,42.5,16.7,187,3350,FEMALE +Chinstrap,Dream,51,18.8,203,4100,MALE +Chinstrap,Dream,49.7,18.6,195,3600,MALE +Chinstrap,Dream,47.5,16.8,199,3900,FEMALE +Chinstrap,Dream,47.6,18.3,195,3850,FEMALE +Chinstrap,Dream,52,20.7,210,4800,MALE +Chinstrap,Dream,46.9,16.6,192,2700,FEMALE +Chinstrap,Dream,53.5,19.9,205,4500,MALE +Chinstrap,Dream,49,19.5,210,3950,MALE +Chinstrap,Dream,46.2,17.5,187,3650,FEMALE +Chinstrap,Dream,50.9,19.1,196,3550,MALE +Chinstrap,Dream,45.5,17,196,3500,FEMALE +Chinstrap,Dream,50.9,17.9,196,3675,FEMALE +Chinstrap,Dream,50.8,18.5,201,4450,MALE +Chinstrap,Dream,50.1,17.9,190,3400,FEMALE +Chinstrap,Dream,49,19.6,212,4300,MALE +Chinstrap,Dream,51.5,18.7,187,3250,MALE +Chinstrap,Dream,49.8,17.3,198,3675,FEMALE +Chinstrap,Dream,48.1,16.4,199,3325,FEMALE +Chinstrap,Dream,51.4,19,201,3950,MALE +Chinstrap,Dream,45.7,17.3,193,3600,FEMALE +Chinstrap,Dream,50.7,19.7,203,4050,MALE +Chinstrap,Dream,42.5,17.3,187,3350,FEMALE +Chinstrap,Dream,52.2,18.8,197,3450,MALE +Chinstrap,Dream,45.2,16.6,191,3250,FEMALE +Chinstrap,Dream,49.3,19.9,203,4050,MALE +Chinstrap,Dream,50.2,18.8,202,3800,MALE +Chinstrap,Dream,45.6,19.4,194,3525,FEMALE +Chinstrap,Dream,51.9,19.5,206,3950,MALE +Chinstrap,Dream,46.8,16.5,189,3650,FEMALE +Chinstrap,Dream,45.7,17,195,3650,FEMALE +Chinstrap,Dream,55.8,19.8,207,4000,MALE +Chinstrap,Dream,43.5,18.1,202,3400,FEMALE +Chinstrap,Dream,49.6,18.2,193,3775,MALE +Chinstrap,Dream,50.8,19,210,4100,MALE +Chinstrap,Dream,50.2,18.7,198,3775,FEMALE +Gentoo,Biscoe,46.1,13.2,211,4500,FEMALE +Gentoo,Biscoe,50,16.3,230,5700,MALE +Gentoo,Biscoe,48.7,14.1,210,4450,FEMALE +Gentoo,Biscoe,50,15.2,218,5700,MALE +Gentoo,Biscoe,47.6,14.5,215,5400,MALE +Gentoo,Biscoe,46.5,13.5,210,4550,FEMALE +Gentoo,Biscoe,45.4,14.6,211,4800,FEMALE +Gentoo,Biscoe,46.7,15.3,219,5200,MALE +Gentoo,Biscoe,43.3,13.4,209,4400,FEMALE +Gentoo,Biscoe,46.8,15.4,215,5150,MALE +Gentoo,Biscoe,40.9,13.7,214,4650,FEMALE +Gentoo,Biscoe,49,16.1,216,5550,MALE +Gentoo,Biscoe,45.5,13.7,214,4650,FEMALE +Gentoo,Biscoe,48.4,14.6,213,5850,MALE +Gentoo,Biscoe,45.8,14.6,210,4200,FEMALE +Gentoo,Biscoe,49.3,15.7,217,5850,MALE +Gentoo,Biscoe,42,13.5,210,4150,FEMALE +Gentoo,Biscoe,49.2,15.2,221,6300,MALE +Gentoo,Biscoe,46.2,14.5,209,4800,FEMALE +Gentoo,Biscoe,48.7,15.1,222,5350,MALE +Gentoo,Biscoe,50.2,14.3,218,5700,MALE +Gentoo,Biscoe,45.1,14.5,215,5000,FEMALE +Gentoo,Biscoe,46.5,14.5,213,4400,FEMALE +Gentoo,Biscoe,46.3,15.8,215,5050,MALE +Gentoo,Biscoe,42.9,13.1,215,5000,FEMALE +Gentoo,Biscoe,46.1,15.1,215,5100,MALE +Gentoo,Biscoe,44.5,14.3,216,4100, +Gentoo,Biscoe,47.8,15,215,5650,MALE +Gentoo,Biscoe,48.2,14.3,210,4600,FEMALE +Gentoo,Biscoe,50,15.3,220,5550,MALE +Gentoo,Biscoe,47.3,15.3,222,5250,MALE +Gentoo,Biscoe,42.8,14.2,209,4700,FEMALE +Gentoo,Biscoe,45.1,14.5,207,5050,FEMALE +Gentoo,Biscoe,59.6,17,230,6050,MALE +Gentoo,Biscoe,49.1,14.8,220,5150,FEMALE +Gentoo,Biscoe,48.4,16.3,220,5400,MALE +Gentoo,Biscoe,42.6,13.7,213,4950,FEMALE +Gentoo,Biscoe,44.4,17.3,219,5250,MALE +Gentoo,Biscoe,44,13.6,208,4350,FEMALE +Gentoo,Biscoe,48.7,15.7,208,5350,MALE +Gentoo,Biscoe,42.7,13.7,208,3950,FEMALE +Gentoo,Biscoe,49.6,16,225,5700,MALE +Gentoo,Biscoe,45.3,13.7,210,4300,FEMALE +Gentoo,Biscoe,49.6,15,216,4750,MALE +Gentoo,Biscoe,50.5,15.9,222,5550,MALE +Gentoo,Biscoe,43.6,13.9,217,4900,FEMALE +Gentoo,Biscoe,45.5,13.9,210,4200,FEMALE +Gentoo,Biscoe,50.5,15.9,225,5400,MALE +Gentoo,Biscoe,44.9,13.3,213,5100,FEMALE +Gentoo,Biscoe,45.2,15.8,215,5300,MALE +Gentoo,Biscoe,46.6,14.2,210,4850,FEMALE +Gentoo,Biscoe,48.5,14.1,220,5300,MALE +Gentoo,Biscoe,45.1,14.4,210,4400,FEMALE +Gentoo,Biscoe,50.1,15,225,5000,MALE +Gentoo,Biscoe,46.5,14.4,217,4900,FEMALE +Gentoo,Biscoe,45,15.4,220,5050,MALE +Gentoo,Biscoe,43.8,13.9,208,4300,FEMALE +Gentoo,Biscoe,45.5,15,220,5000,MALE +Gentoo,Biscoe,43.2,14.5,208,4450,FEMALE +Gentoo,Biscoe,50.4,15.3,224,5550,MALE +Gentoo,Biscoe,45.3,13.8,208,4200,FEMALE +Gentoo,Biscoe,46.2,14.9,221,5300,MALE +Gentoo,Biscoe,45.7,13.9,214,4400,FEMALE +Gentoo,Biscoe,54.3,15.7,231,5650,MALE +Gentoo,Biscoe,45.8,14.2,219,4700,FEMALE +Gentoo,Biscoe,49.8,16.8,230,5700,MALE +Gentoo,Biscoe,46.2,14.4,214,4650, +Gentoo,Biscoe,49.5,16.2,229,5800,MALE +Gentoo,Biscoe,43.5,14.2,220,4700,FEMALE +Gentoo,Biscoe,50.7,15,223,5550,MALE +Gentoo,Biscoe,47.7,15,216,4750,FEMALE +Gentoo,Biscoe,46.4,15.6,221,5000,MALE +Gentoo,Biscoe,48.2,15.6,221,5100,MALE +Gentoo,Biscoe,46.5,14.8,217,5200,FEMALE +Gentoo,Biscoe,46.4,15,216,4700,FEMALE +Gentoo,Biscoe,48.6,16,230,5800,MALE +Gentoo,Biscoe,47.5,14.2,209,4600,FEMALE +Gentoo,Biscoe,51.1,16.3,220,6000,MALE +Gentoo,Biscoe,45.2,13.8,215,4750,FEMALE +Gentoo,Biscoe,45.2,16.4,223,5950,MALE +Gentoo,Biscoe,49.1,14.5,212,4625,FEMALE +Gentoo,Biscoe,52.5,15.6,221,5450,MALE +Gentoo,Biscoe,47.4,14.6,212,4725,FEMALE +Gentoo,Biscoe,50,15.9,224,5350,MALE +Gentoo,Biscoe,44.9,13.8,212,4750,FEMALE +Gentoo,Biscoe,50.8,17.3,228,5600,MALE +Gentoo,Biscoe,43.4,14.4,218,4600,FEMALE +Gentoo,Biscoe,51.3,14.2,218,5300,MALE +Gentoo,Biscoe,47.5,14,212,4875,FEMALE +Gentoo,Biscoe,52.1,17,230,5550,MALE +Gentoo,Biscoe,47.5,15,218,4950,FEMALE +Gentoo,Biscoe,52.2,17.1,228,5400,MALE +Gentoo,Biscoe,45.5,14.5,212,4750,FEMALE +Gentoo,Biscoe,49.5,16.1,224,5650,MALE +Gentoo,Biscoe,44.5,14.7,214,4850,FEMALE +Gentoo,Biscoe,50.8,15.7,226,5200,MALE +Gentoo,Biscoe,49.4,15.8,216,4925,MALE +Gentoo,Biscoe,46.9,14.6,222,4875,FEMALE +Gentoo,Biscoe,48.4,14.4,203,4625,FEMALE +Gentoo,Biscoe,51.1,16.5,225,5250,MALE +Gentoo,Biscoe,48.5,15,219,4850,FEMALE +Gentoo,Biscoe,55.9,17,228,5600,MALE +Gentoo,Biscoe,47.2,15.5,215,4975,FEMALE +Gentoo,Biscoe,49.1,15,228,5500,MALE +Gentoo,Biscoe,47.3,13.8,216,4725, +Gentoo,Biscoe,46.8,16.1,215,5500,MALE +Gentoo,Biscoe,41.7,14.7,210,4700,FEMALE +Gentoo,Biscoe,53.4,15.8,219,5500,MALE +Gentoo,Biscoe,43.3,14,208,4575,FEMALE +Gentoo,Biscoe,48.1,15.1,209,5500,MALE +Gentoo,Biscoe,50.5,15.2,216,5000,FEMALE +Gentoo,Biscoe,49.8,15.9,229,5950,MALE +Gentoo,Biscoe,43.5,15.2,213,4650,FEMALE +Gentoo,Biscoe,51.5,16.3,230,5500,MALE +Gentoo,Biscoe,46.2,14.1,217,4375,FEMALE +Gentoo,Biscoe,55.1,16,230,5850,MALE +Gentoo,Biscoe,44.5,15.7,217,4875, +Gentoo,Biscoe,48.8,16.2,222,6000,MALE +Gentoo,Biscoe,47.2,13.7,214,4925,FEMALE +Gentoo,Biscoe,,,,, +Gentoo,Biscoe,46.8,14.3,215,4850,FEMALE +Gentoo,Biscoe,50.4,15.7,222,5750,MALE +Gentoo,Biscoe,45.2,14.8,212,5200,FEMALE +Gentoo,Biscoe,49.9,16.1,213,5400,MALE diff --git a/duckdb/01_getting_started.py b/duckdb/01_getting_started.py index d6a735f2a793e0ef889b8a9edeb8c262f73617fb..57e1820414747233e061c15bbb03a1f2b2757276 100644 --- a/duckdb/01_getting_started.py +++ b/duckdb/01_getting_started.py @@ -2,14 +2,13 @@ # requires-python = ">=3.11" # dependencies = [ # "marimo", -# "duckdb==1.3.2", -# "polars==1.17.1", -# "numpy==2.2.4", -# "pyarrow==19.0.1", -# "pandas==2.2.3", -# "sqlglot==26.12.1", -# "plotly==5.24.1", -# "statsmodels==0.14.4", +# "duckdb==1.4.4", +# "numpy==2.4.3", +# "pandas==2.3.2", +# "plotly[express]==6.3.0", +# "polars[pyarrow]==1.24.0", +# "sqlglot==27.0.0", +# "statsmodels==0.14.5", # ] # /// @@ -32,9 +31,7 @@ def _(mo): @app.cell(hide_code=True) def _(mo): mo.md(rf""" - # πŸ¦† **DuckDB**: An Embeddable Analytical Database System - - ## What is DuckDB? + # What is DuckDB? [DuckDB](https://duckdb.org/) is a _high-performance_, in-process, embeddable SQL OLAP (Online Analytical Processing) Database Management System (DBMS) designed for simplicity and speed. It's essentially a fully-featured database that runs directly within your application's process, without needing a separate server. This makes it excellent for complex analytical workloads, offering a robust SQL interface and efficient processing – perfect for learning about databases and data analysis concepts. It's a great alternative to heavier database systems like PostgreSQL or MySQL when you don't need a full-blown server. diff --git a/duckdb/008_loading_parquet.py b/duckdb/08_loading_parquet.py similarity index 98% rename from duckdb/008_loading_parquet.py rename to duckdb/08_loading_parquet.py index ffc0b4f35f0f77fea0d3ecc7b4f0c0e722306d2f..0e9f28e5292afbe7383f8fb30f3123c934ed6278 100644 --- a/duckdb/008_loading_parquet.py +++ b/duckdb/08_loading_parquet.py @@ -2,9 +2,9 @@ # requires-python = ">=3.10" # dependencies = [ # "marimo", -# "duckdb==1.3.2", -# "pyarrow==19.0.1", -# "plotly.express", +# "duckdb==1.4.4", +# "polars[pyarrow]==1.24.0", +# "plotly[express]==6.3.0", # "sqlglot==27.0.0", # ] # /// diff --git a/duckdb/009_loading_json.py b/duckdb/09_loading_json.py similarity index 98% rename from duckdb/009_loading_json.py rename to duckdb/09_loading_json.py index d48cadb5339bf0f69c4d12896e13e2e8e6364d71..efbd982d6cd7b843db83dfdce418e60cf74bd085 100644 --- a/duckdb/009_loading_json.py +++ b/duckdb/09_loading_json.py @@ -2,9 +2,9 @@ # requires-python = ">=3.11" # dependencies = [ # "marimo", -# "duckdb==1.3.2", -# "sqlglot==26.11.1", -# "polars[pyarrow]==1.25.2", +# "duckdb==1.4.4", +# "polars[pyarrow]==1.24.0", +# "sqlglot==27.0.0", # ] # /// diff --git a/duckdb/011_working_with_apache_arrow.py b/duckdb/11_working_with_apache_arrow.py similarity index 93% rename from duckdb/011_working_with_apache_arrow.py rename to duckdb/11_working_with_apache_arrow.py index 7765754b77735a5b8526decb5610584aa63c6215..91136784fc1c0df970fa8fa1af0749bf7e5f377c 100644 --- a/duckdb/011_working_with_apache_arrow.py +++ b/duckdb/11_working_with_apache_arrow.py @@ -2,13 +2,11 @@ # requires-python = ">=3.11" # dependencies = [ # "marimo", -# "duckdb==1.3.2", -# "pyarrow==19.0.1", -# "polars[pyarrow]==1.25.2", -# "pandas==2.2.3", +# "altair==6.0.0", +# "duckdb==1.4.4", +# "pandas==2.3.2", +# "polars[pyarrow]==1.24.0", # "sqlglot==27.0.0", -# "psutil==7.0.0", -# "altair", # ] # /// @@ -534,15 +532,8 @@ def _(mo): @app.cell -def _(polars_data, psutil, time): - import os - import pyarrow.compute as pc # Add this import - - # Get current process - process = psutil.Process(os.getpid()) - - # Measure memory before operations - memory_before = process.memory_info().rss / 1024 / 1024 # MB +def _(mo, polars_data, time): + import pyarrow.compute as pc # Perform multiple Arrow-based operations (zero-copy) latest_start_time = time.time() @@ -550,11 +541,9 @@ def _(polars_data, psutil, time): # These operations use Arrow's zero-copy capabilities arrow_table = polars_data.to_arrow() arrow_sliced = arrow_table.slice(0, 100000) - # Use PyArrow compute functions for filtering arrow_filtered = arrow_table.filter(pc.greater(arrow_table['value'], 500000)) arrow_ops_time = time.time() - latest_start_time - memory_after_arrow = process.memory_info().rss / 1024 / 1024 # MB # Compare with traditional copy-based operations latest_start_time = time.time() @@ -565,16 +554,21 @@ def _(polars_data, psutil, time): pandas_filtered = pandas_copy[pandas_copy['value'] > 500000].copy() copy_ops_time = time.time() - latest_start_time - memory_after_copy = process.memory_info().rss / 1024 / 1024 # MB - print("Memory Usage Comparison:") - print(f"Initial memory: {memory_before:.2f} MB") - print(f"After Arrow operations: {memory_after_arrow:.2f} MB (diff: +{memory_after_arrow - memory_before:.2f} MB)") - print(f"After copy operations: {memory_after_copy:.2f} MB (diff: +{memory_after_copy - memory_before:.2f} MB)") - print(f"\nTime comparison:") - print(f"Arrow operations: {arrow_ops_time:.3f} seconds") - print(f"Copy operations: {copy_ops_time:.3f} seconds") - print(f"Speedup: {copy_ops_time/arrow_ops_time:.1f}x") + mo.vstack([ + mo.md(f""" +**Time comparison:** + +| Method | Time (s) | +|--------|----------| +| Arrow operations | {arrow_ops_time:.3f} | +| Copy operations | {copy_ops_time:.3f} | +| Speedup | {copy_ops_time/arrow_ops_time:.1f}x | + +> **Note:** Memory usage statistics are not available in this environment. +> Arrow's zero-copy design typically uses 20–40% less memory than Pandas copies. +"""), + ]) return @@ -608,8 +602,7 @@ def _(): import pandas as pd import duckdb import sqlglot - import psutil - return duckdb, mo, pa, pd, pl, psutil + return duckdb, mo, pa, pd, pl if __name__ == "__main__": diff --git a/duckdb/DuckDB_Loading_CSVs.py b/duckdb/DuckDB_Loading_CSVs.py index d7a25a2314a1bfa8ae2b932f5bf8f2f259db0d4d..ac340c2aef2430b4150a65dfc3543396de50124d 100644 --- a/duckdb/DuckDB_Loading_CSVs.py +++ b/duckdb/DuckDB_Loading_CSVs.py @@ -2,12 +2,11 @@ # requires-python = ">=3.10" # dependencies = [ # "marimo", -# "plotly.express", +# "duckdb==1.4.4", # "plotly==6.0.1", -# "duckdb==1.3.2", -# "sqlglot==26.11.1", -# "pyarrow==19.0.1", # "polars==1.27.1", +# "pyarrow==19.0.1", +# "sqlglot==27.0.0", # ] # /// diff --git a/duckdb/README.md b/duckdb/README.md deleted file mode 100644 index 8d4b80b21b718dd48ef9af963f065d77e2e749b0..0000000000000000000000000000000000000000 --- a/duckdb/README.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: Readme -marimo-version: 0.18.4 ---- - -# Learn DuckDB - -_🚧 This collection is a work in progress. Please help us add notebooks!_ - -This collection of marimo notebooks is designed to teach you the basics of -DuckDB, a fast in-memory OLAP engine that can interoperate with Dataframes. -These notebooks also show how marimo gives DuckDB superpowers. - -**Help us build this course! βš’οΈ** - -We're seeking contributors to help us build these notebooks. Every contributor -will be acknowledged as an author in this README and in their contributed -notebooks. Head over to the [tracking -issue](https://github.com/marimo-team/learn/issues/48) to sign up for a planned -notebook or propose your own. - -**Running notebooks.** To run a notebook locally, use - -```bash -uvx marimo edit -``` - -You can also open notebooks in our online playground by appending marimo.app/ to a notebook's URL. - - -**Authors.** - -Thanks to all our notebook authors! - -* [Mustjaab](https://github.com/Mustjaab) -* [julius383](https://github.com/julius383) -* [thliang01](https://github.com/thliang01) \ No newline at end of file diff --git a/duckdb/index.md b/duckdb/index.md new file mode 100644 index 0000000000000000000000000000000000000000..cb8bfb0e32a156c9fc92a14767a26e53add8154d --- /dev/null +++ b/duckdb/index.md @@ -0,0 +1,16 @@ +--- +title: Learn DuckDB +description: > + These notebooks teach you the basics of DuckDB, + a fast in-memory database engine that can interoperate + with dataframes, and show how marimo gives DuckDB superpowers. +tracking: 48 +--- + +## Contributors + +Thanks to our notebook authors: + +* [Mustjaab](https://github.com/Mustjaab) +* [julius383](https://github.com/julius383) +* [thliang01](https://github.com/thliang01) diff --git a/functional_programming/05_functors.py b/functional/05_functors.py similarity index 99% rename from functional_programming/05_functors.py rename to functional/05_functors.py index cf942c543f8b9ea8a3b87c039e68ae44ec0fa9a3..e28094d6ac66b34d06babcc0d5b3c5c806c08a42 100644 --- a/functional_programming/05_functors.py +++ b/functional/05_functors.py @@ -875,7 +875,7 @@ def _(mo): @app.cell(hide_code=True) def _(mo): - mo.md(""" + mo.md(r""" ## Functor laws, again Once again there are a few axioms that functors have to obey. diff --git a/functional_programming/06_applicatives.py b/functional/06_applicatives.py similarity index 99% rename from functional_programming/06_applicatives.py rename to functional/06_applicatives.py index 22e19e0ac3dee560b395ec4c9c41b0ab56bc61ec..a701b626ef864d52bae03ed68e6cd32bac036372 100644 --- a/functional_programming/06_applicatives.py +++ b/functional/06_applicatives.py @@ -14,7 +14,7 @@ app = marimo.App(app_title="Applicative programming with effects") @app.cell(hide_code=True) def _(mo): mo.md(r""" - # Applicative programming with effects + # Applicative Programming with Effects `Applicative Functor` encapsulates certain sorts of *effectful* computations in a functionally pure way, and encourages an *applicative* programming style. diff --git a/functional/_index.md b/functional/_index.md new file mode 100644 index 0000000000000000000000000000000000000000..b2901b5680632852486e8c837426fd0b7dd07d55 --- /dev/null +++ b/functional/_index.md @@ -0,0 +1,25 @@ +--- +title: Learn Functional Programming +description: > + These notebooks introduce powerful ideas from functional programming + in Python, taking inspiration from Haskell and category theory. +tracking: 51 +--- + +Using only Python's standard library, these lessons construct +functional programming concepts from first principles. +Topics include: + +- Currying and higher-order functions +- Functors, Applicatives, and Monads +- Category theory fundamentals + +## Contributors + +Thanks to our notebook authors: + +- mΓ©taboulie + +and reviewers: + +- [Srihari Thyagarajan](https://github.com/Haleshot) diff --git a/functional_programming/CHANGELOG.md b/functional_programming/CHANGELOG.md deleted file mode 100644 index 0c8dd2ae71762c1e7b59bd17ebd8ddb19f7e623a..0000000000000000000000000000000000000000 --- a/functional_programming/CHANGELOG.md +++ /dev/null @@ -1,129 +0,0 @@ ---- -title: Changelog -marimo-version: 0.18.4 ---- - -# Changelog of the functional-programming course - -## 2025-04-16 - -**applicatives.py** - -- replace `return NotImplementedError` with `raise NotImplementedError` - -- add `Either` applicative -- Add `Alternative` - -## 2025-04-11 - -**functors.py** - -- add `Bifunctor` section - -- replace `return NotImplementedError` with `raise NotImplementedError` - -## 2025-04-08 - -**functors.py** - -- restructure the notebook -- replace `f` in the function signatures with `g` to indicate regular functions and - distinguish from functors -- move `Maybe` funtor to section `More Functor instances` - -- add `Either` functor - -- add `unzip` utility function for functors - -## 2025-04-07 - -**applicatives.py** - -- the `apply` method of `Maybe` _Applicative_ should return `None` when `fg` or `fa` is - `None` - -- add `sequenceL` as a classmethod for `Applicative` and add examples for `Wrapper`, - `Maybe`, `List` -- add description for utility functions of `Applicative` - -- refine the implementation of `IO` _Applicative_ -- reimplement `get_chars` with `IO.sequenceL` - -- add an example to show that `ListMonoidal` is equivalent to `List` _Applicative_ - -## 2025-04-06 - -**applicatives.py** - -- remove `sequenceL` from `Applicative` because it should be a classmethod but can't be - generically implemented - -## 2025-04-02 - -**functors.py** - -- Migrate to `python3.13` - - - Replace all occurrences of - - ```python - class Functor(Generic[A]) - ``` - - with - - ```python - class Functor[A] - ``` - - for conciseness - -- Use `fa` in function signatures instead of `a` when `fa` is a _Functor_ - -**applicatives.py** - -- `0.1.0` version of notebook `06_applicatives.py` - -## 2025-03-16 - -**functors.py** - -- Use uppercased letters for `Generic` types, e.g. `A = TypeVar("A")` -- Refactor the `Functor` class, changing `fmap` and utility methods to `classmethod` - - For example: - - ```python - @dataclass - class Wrapper(Functor, Generic[A]): - value: A - - @classmethod - def fmap(cls, f: Callable[[A], B], a: "Wrapper[A]") -> "Wrapper[B]": - return Wrapper(f(a.value)) - - >>> Wrapper.fmap(lambda x: x + 1, wrapper) - Wrapper(value=2) - ``` - -- Move the `check_functor_law` method from `Functor` class to a standard function - -- Rename `ListWrapper` to `List` for simplicity -- Remove the `Just` class - -- Rewrite proofs - -## 2025-03-13 - -**functors.py** - -- `0.1.0` version of notebook `05_functors` - -Thank [Akshay](https://github.com/akshayka) and [Haleshot](https://github.com/Haleshot) -for reviewing - -## 2025-03-11 - -**functors.py** - -- Demo version of notebook `05_functors.py` \ No newline at end of file diff --git a/functional_programming/README.md b/functional_programming/README.md deleted file mode 100644 index 72f94a5fc4db533aa1f0b9a845fd768e0d5e3948..0000000000000000000000000000000000000000 --- a/functional_programming/README.md +++ /dev/null @@ -1,77 +0,0 @@ ---- -title: Readme -marimo-version: 0.18.4 ---- - -# Learn Functional Programming - -_🚧 This collection is a [work in progress](https://github.com/marimo-team/learn/issues/51)._ - -This series of marimo notebooks introduces the powerful paradigm of functional -programming through Python. Taking inspiration from Haskell and Category -Theory, we'll build a strong foundation in FP concepts that can transform how -you approach software development. - -## What You'll Learn - -**Using only Python's standard library**, we'll construct functional -programming concepts from first principles. - -Topics include: - -+ Currying and higher-order functions -+ Functors, Applicatives, and Monads -+ Category theory fundamentals - -## Running Notebooks - -### Locally - -To run a notebook locally, use - -```bash -uvx marimo edit -``` - -For example, run the `Functor` tutorial with - -```bash -uvx marimo edit https://github.com/marimo-team/learn/blob/main/functional_programming/05_functors.py -``` - -### On Our Online Playground - -You can also open notebooks in our online playground by appending `marimo.app/` to a notebook's URL like: - -https://marimo.app/https://github.com/marimo-team/learn/blob/main/functional_programming/05_functors.py - -### On Our Landing Page - -Open the notebooks in our landing page page [here](https://marimo-team.github.io/learn/functional_programming/05_functors.html) - -## Collaboration - -If you're interested in collaborating or have questions, please reach out to me -on Discord (@eugene.hs). - -## Description of notebooks - -Check [here](https://github.com/marimo-team/learn/issues/51) for current series -structure. - -| Notebook | Title | Key Concepts | Prerequisites | -|----------|-------|--------------|---------------| -| [05. Functors](https://github.com/marimo-team/learn/blob/main/functional_programming/05_functors.py) | Category Theory and Functors | Category Theory, Functor, fmap, Bifunctor | Basic Python, Functions | -| [06. Applicatives](https://github.com/marimo-team/learn/blob/main/functional_programming/06_applicatives.py) | Applicative programming with effects | Applicative Functor, pure, apply, Effectful programming, Alternative | Functors | - -**Authors.** - -Thanks to all our notebook authors! - -- [mΓ©taboulie](https://github.com/metaboulie) - -**Reviewers.** - -Thanks to all our notebook reviews! - -- [Haleshot](https://github.com/Haleshot) \ No newline at end of file diff --git a/optimization/01_least_squares.py b/optimization/01_least_squares.py index b69d71966f1494f648bb6e466bfdda88c233703f..da6894c0aa6b3a3c87446db82fac26bd4616f4e7 100644 --- a/optimization/01_least_squares.py +++ b/optimization/01_least_squares.py @@ -1,9 +1,9 @@ # /// script # requires-python = ">=3.11" # dependencies = [ -# "cvxpy==1.6.0", +# "cvxpy-base", # "marimo", -# "numpy==2.2.2", +# "numpy==2.4.3", # ] # /// @@ -22,7 +22,7 @@ def _(): @app.cell(hide_code=True) def _(mo): mo.md(r""" - # Least squares + # Least Squares In a least-squares problem, we have measurements $A \in \mathcal{R}^{m \times n}$ (i.e., $m$ rows and $n$ columns) and $b \in \mathcal{R}^m$. We seek a vector diff --git a/optimization/02_linear_program.py b/optimization/02_linear_program.py index 40cdc1f19b9ad84fd86dfab5b53f049d0889bea0..4934cdd815f132f90e396c85f41b2da14da3cbcb 100644 --- a/optimization/02_linear_program.py +++ b/optimization/02_linear_program.py @@ -1,11 +1,11 @@ # /// script # requires-python = ">=3.13" # dependencies = [ -# "cvxpy==1.6.0", +# "cvxpy-base", # "marimo", -# "matplotlib==3.10.0", -# "numpy==2.2.2", -# "wigglystuff==0.1.9", +# "matplotlib==3.10.8", +# "numpy==2.4.3", +# "wigglystuff==0.2.37", # ] # /// @@ -24,7 +24,7 @@ def _(): @app.cell(hide_code=True) def _(mo): mo.md(r""" - # Linear program + # Linear Program A linear program is an optimization problem with a linear objective and affine inequality constraints. A common standard form is the following: diff --git a/optimization/03_minimum_fuel_optimal_control.py b/optimization/03_minimum_fuel_optimal_control.py index 916f566208c309d15cd3cff4642b9eecf411389e..184f602760957e44fd7deca871b2e1baae15f8b6 100644 --- a/optimization/03_minimum_fuel_optimal_control.py +++ b/optimization/03_minimum_fuel_optimal_control.py @@ -1,7 +1,11 @@ # /// script # requires-python = ">=3.13" # dependencies = [ +# "cvxpy-base", # "marimo", +# "matplotlib==3.10.8", +# "numpy==2.4.3", +# "wigglystuff==0.2.37", # ] # /// import marimo @@ -19,7 +23,7 @@ def _(): @app.cell(hide_code=True) def _(mo): mo.md(r""" - # Minimal fuel optimal control + # Minimal Fuel Optimal Control This notebook includes an application of linear programming to controlling a physical system, adapted from [Convex @@ -128,14 +132,14 @@ def _(): @app.cell -def _(A, T, b, cp, mo, n, x0, xdes): +def _(A, T, b, cp, mo, n, np, x0, xdes): X, u = cp.Variable(shape=(n, T + 1)), cp.Variable(shape=(1, T)) objective = cp.sum(cp.maximum(cp.abs(u), 2 * cp.abs(u) - 1)) constraints = [ X[:, 1:] == A @ X[:, :-1] + b @ u, - X[:, 0] == x0, - X[:, -1] == xdes, + X[:, 0] == np.array(x0).flatten(), + X[:, -1] == np.array(xdes).flatten(), ] fuel_used = cp.Problem(cp.Minimize(objective), constraints).solve() diff --git a/optimization/04_quadratic_program.py b/optimization/04_quadratic_program.py index b81fa6857c885959e93bd3a815d23c392ddf1205..b0f7692e3b4e6c1b622cf622c01d208caf58ce19 100644 --- a/optimization/04_quadratic_program.py +++ b/optimization/04_quadratic_program.py @@ -1,11 +1,11 @@ # /// script # requires-python = ">=3.13" # dependencies = [ -# "cvxpy==1.6.0", +# "cvxpy-base", # "marimo", -# "matplotlib==3.10.0", -# "numpy==2.2.2", -# "wigglystuff==0.1.9", +# "matplotlib==3.10.8", +# "numpy==2.4.3", +# "wigglystuff==0.2.37", # ] # /// @@ -24,7 +24,7 @@ def _(): @app.cell(hide_code=True) def _(mo): mo.md(r""" - # Quadratic program + # Quadratic Program A quadratic program is an optimization problem with a quadratic objective and affine equality and inequality constraints. A common standard form is the diff --git a/optimization/05_portfolio_optimization.py b/optimization/05_portfolio_optimization.py index b3c42476e6f7ae0926ac8e0e216ddea693968f37..36e7cf50865d5732962e5853299c6f8ca98934d0 100644 --- a/optimization/05_portfolio_optimization.py +++ b/optimization/05_portfolio_optimization.py @@ -1,12 +1,12 @@ # /// script # requires-python = ">=3.13" # dependencies = [ -# "cvxpy==1.6.0", +# "cvxpy-base", # "marimo", -# "matplotlib==3.10.0", -# "numpy==2.2.2", -# "scipy==1.15.1", -# "wigglystuff==0.1.9", +# "matplotlib==3.10.8", +# "numpy==2.4.3", +# "scipy==1.17.1", +# "wigglystuff==0.2.37", # ] # /// @@ -25,7 +25,7 @@ def _(): @app.cell(hide_code=True) def _(mo): mo.md(r""" - # Portfolio optimization + # Portfolio Optimization """) return @@ -145,7 +145,7 @@ def _(mo, np): def _(mu_widget, np): np.random.seed(1) n = 10 - mu = np.array(mu_widget.matrix) + mu = np.array(mu_widget.matrix).flatten() Sigma = np.random.randn(n, n) Sigma = Sigma.T.dot(Sigma) return Sigma, mu, n @@ -153,7 +153,7 @@ def _(mu_widget, np): @app.cell(hide_code=True) def _(mo): - mo.md(""" + mo.md(r""" Next, we solve the problem for 100 different values of $\gamma$ """) return @@ -187,7 +187,7 @@ def _(cp, gamma, np, prob, ret, risk): @app.cell(hide_code=True) def _(mo): - mo.md(""" + mo.md(r""" Plotted below are the risk return tradeoffs for two values of $\gamma$ (blue squares), and the risk return tradeoffs for investing fully in each asset (red circles) """) return diff --git a/optimization/06_convex_optimization.py b/optimization/06_convex_optimization.py index cbf1f7d74bf6ba6c292e2b9ff2a554a9f0806853..24c70b15cc30d52eda839d43d1fc8c283a393d1c 100644 --- a/optimization/06_convex_optimization.py +++ b/optimization/06_convex_optimization.py @@ -1,9 +1,9 @@ # /// script # requires-python = ">=3.13" # dependencies = [ -# "cvxpy==1.6.0", +# "cvxpy-base", # "marimo", -# "numpy==2.2.2", +# "numpy==2.4.3", # ] # /// @@ -22,7 +22,7 @@ def _(): @app.cell(hide_code=True) def _(mo): mo.md(r""" - # Convex optimization + # Convex Optimization In the previous tutorials, we learned about least squares, linear programming, and quadratic programming, and saw applications of each. We also learned that these problem diff --git a/optimization/07_sdp.py b/optimization/07_sdp.py index 0783ad3a473e4d0a6d0b28ae51cbd1f619576fed..9da845d476c8238d3b0722b89aea123b433a5a58 100644 --- a/optimization/07_sdp.py +++ b/optimization/07_sdp.py @@ -1,10 +1,10 @@ # /// script # requires-python = ">=3.13" # dependencies = [ -# "cvxpy==1.6.0", +# "cvxpy-base", # "marimo", -# "numpy==2.2.2", -# "wigglystuff==0.1.9", +# "numpy==2.4.3", +# "wigglystuff==0.2.37", # ] # /// @@ -23,7 +23,7 @@ def _(): @app.cell(hide_code=True) def _(mo): mo.md(r""" - # Semidefinite program + # Semidefinite Program """) return diff --git a/optimization/README.md b/optimization/README.md deleted file mode 100644 index edbfa9db0b1974dc235d7b888b6fe3b7df55dd9d..0000000000000000000000000000000000000000 --- a/optimization/README.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -title: Readme -marimo-version: 0.18.4 ---- - -# Learn optimization - -This collection of marimo notebooks teaches you the basics of convex -optimization. - -After working through these notebooks, you'll understand how to create -and solve optimization problems using the Python library -[CVXPY](https://github.com/cvxpy/cvxpy), as well as how to apply what you've -learned to real-world problems such as portfolio allocation in finance, -control of vehicles, and more. - -![SpaceX](https://www.debugmind.com/wp-content/uploads/2020/01/spacex-1.jpg) - -_SpaceX solves convex optimization problems onboard to land its rockets, using CVXGEN, a code generator for quadratic programming developed at Stephen Boyd’s Stanford lab. Photo by SpaceX, licensed CC BY-NC 2.0._ - -**Running notebooks.** To run a notebook locally, use - -```bash -uvx marimo edit -``` - -For example, run the least-squares tutorial with - -```bash -uvx marimo edit https://github.com/marimo-team/learn/blob/main/optimization/01_least_squares.py -``` - -You can also open notebooks in our online playground by appending `marimo.app/` -to a notebook's URL: [marimo.app/github.com/marimo-team/learn/blob/main/optimization/01_least_squares.py](https://marimo.app/https://github.com/marimo-team/learn/blob/main/optimization/01_least_squares.py). - -**Thanks to all our notebook authors!** - -* [Akshay Agrawal](https://github.com/akshayka) \ No newline at end of file diff --git a/optimization/index.md b/optimization/index.md new file mode 100644 index 0000000000000000000000000000000000000000..c8c0c3387c5a821225c726244560b6d552964c6f --- /dev/null +++ b/optimization/index.md @@ -0,0 +1,22 @@ +--- +title: Learn Optimization +description: > + Learn the basics of convex optimization using Python, and + see how to apply these ideas to vehicle control, portfolio + allocation in finance, and other areas. +--- + +After working through these notebooks, you'll understand how to create +and solve optimization problems using Python's +[CVXPY](https://github.com/cvxpy/cvxpy) library, as well as how to +apply what you've learned to real-world problems. + +![SpaceX](https://www.debugmind.com/wp-content/uploads/2020/01/spacex-1.jpg) + +_SpaceX solves convex optimization problems onboard to land its rockets, using CVXGEN, a code generator for quadratic programming developed at Stephen Boyd's Stanford lab. Photo by SpaceX, licensed CC BY-NC 2.0._ + +## Contributors + +Thanks to our notebook authors: + +* [Akshay Agrawal](https://github.com/akshayka) diff --git a/pages/contributors.md b/pages/contributors.md new file mode 100644 index 0000000000000000000000000000000000000000..86c29c9150ae68a3070f08b2633988303cd96ab9 --- /dev/null +++ b/pages/contributors.md @@ -0,0 +1,46 @@ +--- +title: Contributing to This Site +--- + +## Introduction + +- what we're trying to achieve +- what we're looking for +- guidance for educators +- where to find community +- licensing +- use of AI +- how we acknowledge contributions + +## How to Contribute + +- setting up environment + - including a quick intro to useful `uv` commands +- WASM + - what it is + - package compatibility (discussed in more detail below) +- formatting and checking with `ruff` +- naming conventions + - `dd_some_title.py` is included in index page + - other Python files aren't (e.g., notebooks under development) + - see note above about WASM file inclusion +- useful `make` targets + - `make install`: install packages required *to build the site* + - `make check`: run all quick checks + - `make check_exec NOTEBOOKS="??_*.py"`: run a set of notebooks to check for runtime errors + - `make check_packages NOTEBOOKS="??_*.py"`: check for inconsistent package versions across notebooks + - `make build`: build website + - `make clean`: clean up stray files + +## Things to Know + +- marimo skills +- underscore-prefixed variables +- returning `mo.markdown()` from code cell +- `mo.show_code()` +- marimo slides +- localizing files for WASM + - the `public` directory and the `marimo_learn` package + - examples of URLs +- WASM package compatibility issues (polars, numba) +- widgets (wigglystuff and https://anywidget.dev/en/community/#widgets-gallery) diff --git a/pages/educators.md b/pages/educators.md new file mode 100644 index 0000000000000000000000000000000000000000..f7025445aa3925c9a01ce11fccc013eec479e672 --- /dev/null +++ b/pages/educators.md @@ -0,0 +1,362 @@ +--- +title: marimo for Educators +--- + +## Introduction + +- what *is* a notebook? + - *literate programming* mixes prose and software in a single "runnable paper" + - each *cell* is prose or software + - prose typically written in Markdown + - software written in whatever programming languages the notebook supports + - software's output displayed in the notebook as well +- why notebooks for everyday work? + - easier to understand (think about the way textbooks present material) + - improves reproducibility + - [GVW: if we emphasize embedded AI] keep track of what you asked for as well as what you did +- why notebooks for learning? + - more engaging than static material: learners are active users of material, not passive consumers, can experiment with settings, alter code, etc. + - no installation required: notebooks can be hosted so learners don't have to struggle with the hard bits first (i.e., focus on learning rather than on the tool) + - reproducibility helps collaboration as well [GVW: but we don’t support concurrent editing a la Google Docs, which some people will regard as table stakes] + - less intimidating than jumping straight into scripting + - introduces a real-world tool + - [if we emphasize embedded AI] a natural way to bring LLMs into the classroom +- why notebooks for teaching? + - all of the above… + - create interactive lecture material in a single place +- why the marimo notebook? + - open source + - more than Notebook but not as intimidating as VS Code + - reactivity allows for (encourages) dynamic, interactive elements + - marimo is both a notebook and a library of UI elements + - and AnyWidget makes it relatively easy to extend [GVW: point at [faw](https://github.com/gvwilson/faw)] + - doesn't allow out-of-order execution of cells, which reduces β€œworked for me” complaints + - plays nicely with other Python tools (because a notebook is a Python file) + - plays nicely with version control (same reason) + - helps instructors keep their prose and examples in sync + - configurable interaction with AI tools + - [if we emphasize embedded AI] natural way to teaching prompting and review +- why *not* marimo? + - not yet as widely known as Jupyter (i.e., your IT department may not already support it) + - not yet integrated with auto-grading tools ([faw](https://github.com/gvwilson/faw) is a start, but we're waiting to see what you want) + - doesn't yet support multi-notebook books + - some quirks that might not make it the right tool for a CS-101 course (see below) + +## Ways to Teach With marimo + +- high level + - follow along with lesson (code already present) + - workbooks for assignments ("fill in these cells") + - notebooks as apps (play with data rather than write code) + - notebooks as lab reports (models real-world use) +- micro + - scroll through a pre-executed notebook + - step through a notebook by executing the cells in order + - fill out details or values into a mostly complete notebook + - tweak or fill in a notebook with some content + - add content to a completely blank notebook + - ask learners what to add *or* what's going to happen + - ask AI to do something and then explore/correct/improve its output + +## Things to Watch Out For + +- Variable names + - Underscored variable names are different from common usage, and require some understanding of scope + - Solution is functions-early teaching methodology, which has a sound pedagogical basis +- Image files + - For security reasons, marimo requires local image files to be in a folder called `public` below the directory the notebook is run from, and to be accessed in Markdown as `[alt text](/public/image.ext)` + - Which means it’s important to launch the notebook from the right place + - Can get around this using `mo.image` but that can’t be embedded in Markdown +- [Using pytest in marimo](https://docs.marimo.io/guides/testing/pytest/#testing-in-notebook) is straightforward as long as the cell *only* contains tests +- marimo uses [KaTeX](https://katex.org/) rather than [MathJax](https://www.mathjax.org/) for rendering math - see the appendix to this document for notes + +## Pedagogical Patterns + +### Shift-Enter + +**Description:** Learner starts with complete notebook, re-executes cells; (possibly) fills in prose cells with analysis/description + +**Use For:** Introduce new topics; check understanding (e.g., warmup exercise) + +**Works For:** Any audience + +**Format:** Synchronous + +**Pro:** Gives learners a complete working example + +**Con:** Low engagement + +### Fill in the blanks + +**Description:** Some code cells filled in, learner must complete + +**Use For:** Reducing cognitive load + +**Works For:** Any audience + +**Format:** Assignments and labs + +**Pro:** Focus attention on a specific concept (e.g., filtering data) + +**Con:** β€œJust get AI to do it”; required work can be too easy or too hard + +### Tweak and twiddle + +**Description:** Learner starts with complete working notebook, is asked to alter parameters to achieve some goal + +**Use For:** Compare and contrast; acquiring domain knowledge + +**Works For:** Learners without programming experience (but requires some domain knowledge) + +**Format:** Fixed-time workshop exercise; pair programming + +**Pro:** Helps learners overcome code anxiety + +**Con:** β€œWhere do I start?” and going down rabbit holes + +### Notebook as app + +**Description:** Use notebook as interactive dashboard (note: usually keep prose in a separate document to make the dashboard look like an app) + +**Use For:** Exploring datasets + +**Works For:** Non-programmers + +**Format:** Use instead of slides (but must know where you’re going); have learners suggest alternatives to explore; data analysis after (physical) lab experiment + +**Pro:** Less effort to build than custom UI + +**Con:** Requires testing; does not develop programming skills + +### Top-down delivery + +**Description:** Give learners just enough control to get to a motivating result quickly (β€œday one”) + +**Use For:** Follow-along lectures + +**Works For:** Any audience (but most engaging for people with low programming skills) + +**Format:** Tutorials and workshops (synchronous) + +**Pro:** Student engagement + +**Con:** Hard to get the right level of detail for a mixed-ability audience + +### Coding as translation + +**Description:** Convert prose to code (or vice versa) + +**Use For:** Connect concepts to implementation (and implementation to concepts) + +**Works For:** Learners who understand theory but struggle with coding (or vice versa) + +**Format:** Notebook with scaffolding text and possibly some (scaffolded) code + +**Pro:** Low barrier to entry for learners with limited programming knowledge + +**Con:** Hard to get the level right for mixed-ability audience + +### Symbolic math + +**Description:** Use SymPy for symbolic math in notebook + +**Use For:** Extension of previous exercise: convert math to code or code to math + +**Works For:** STEM students interested in theory + +**Format:** Any + +**Pro:** Introduce another real-world tool + +**Con:** Math in SymPy is yet another thing to learn + +### Numerical methods / simulation + +**Description:** Use calculation or simulation instead of formulaic analysis + +**Use For:** Make concepts tangible before introducing mathematical abstraction + +**Works For:** Learners with some programming skill + +**Format:** Any + +**Pro:** Going from specific to general is often more engaging and approachable + +**Con:** Requires programming skill; can be hard to debug + +### Learn an API + +**Description:** Introduce a key API example by example + +**Use For:** Put focus on tools to be used in other places / lessons + +**Works For:** Learners with some programming skill (and patience) + +**Format:** Examples in order of increasing complexity or decreasing frequency of use + +**Pro:** Guide learning in a sensible order (which AI sometimes struggles with) + +**Con:** β€œCan’t see the forest for the trees”; learners may prefer just asking AI as needed + +### Choose your data + +**Description:** Replace the dataset used in a notebook with another one (which may require some modifications to code) + +**Use For:** Engagement + +**Works For:** Learners with specific domain interest (e.g., sports analytics) + +**Format:** Common first half, learners explore on their own for second half; learners create presentations to share with others + +**Pro:** Improves self-efficacy; leverages engagement with personal interests + +**Con:** Can’t find data, data is too messy, learners’ interest don’t overlap + +### Test-driven learning + +**Description:** Instructor provides notebook full of tests; learners must write code to make those tests pass (e.g., handle messy data) + +**Use For:** Think in terms of a spec + +**Works For:** Learners who want firm goalposts + +**Format:** Notebook full of test cases with empty cells (and function stubs) for code; works well for homework exercises + +**Pro:** Helps learners stay focused on well-defined task + +**Con:** Very easy to have AI generate the code without understanding it + +### Bug hunt + +**Description:** Give learners a notebook with one or more bugs (which can include misleading prose) + +**Use For:** Developing critical reading skills (especially important for learners using AI) + +**Works For:** Learners with enough programming experience to be able to debug systematically + +**Format:** Works well as homework exercise + +**Pro:** Some learners enjoy playing detective; extremely useful skill to learn + +**Con:** Hard to calibrate bug difficulty to learner level; hard for learners to know when they’re done + +### Adversarial programming + +**Description:** Given a notebook full of code, write tests that break it (reverse of bug hunt) + +**Use For:** Learning critical thinking + +**Works For:** Learners with enough programming experience to be able to debug systematically + +**Format:** Works well as homework exercise + +**Pro:** Helps learners appreciate how hard it is to write robust code; improves their debugging skills + +**Con:** Learners can break code in repetitive ways (e.g., provide several inputs that trigger the same flaw) + +## Acknowledgments + +Much of this is inspired by or taken from +[*Teaching and Learning with Jupyter*](https://jupyter4edu.github.io/jupyter-edu-book/). + +## Appendix: Learner Personas + +### Anya Academic + +**Background:** Biology professor at mid-sized state university; teaches undergrad microbiology and biostatistics classes, both of which emphasize data management and visualization. + +**Relevant Experience:** Used R for 15 years, switched to Python three years ago, mostly self-taught. Frequently remixes teaching material she finds online, particularly examples. + +**Goals** + +1. Wants to equip her students with modern skills, especially AI-related, both because she thinks they’re important and to increase student engagement. + +2. Wants more recognition at her university for her teaching work, which she believes is more likely to come from publishable innovation than from high student evaluations. + +3. Would like to get student engagement back to pre-COVID levels; she feels that today’s cohorts don’t know each other as well and aren’t as excited about material because of the shift to online education. + +**Complications** + +1. Is concerned about tool setup and maintenance overheads. Doesn't have time to completely rewrite courses, so will only move over if there's an incremental migration path that allows her to back out if thing don't appear to be working. + +2. Anya's department has two overworked IT staff, and nothing at her university is allowed to go beyond the pilot phase if it doesn't integrate with the LMS somehow. + +### Ellis Engineer + +**Background:** Senior undergraduate in mechanical engineering who just returned to school from their third and final co-op placement. They are very excited about drones. + +**Relevant Experience:** Used Jupyter notebooks with Colab in their second semester. They are comfortable with NumPy and Altair and has bumped into Pandas, but has done as many classes with MATLAB and AutoCAD as with Python. + +**Goals** + +1. Ellis wants to create an impressive senior project to secure themself a place in a good graduate program (which they think is essential to doing interesting work with drones). They have seen custom widgets in notebooks, and are willing to invest some time to learn how to build one with AI support. + +2. They also want to explore small-craft aerodynamics, particularly feedback stability problems, out of personal interest and as a way to become part of the β€œserious” drone community. + +**Complications** + +Having spent several months convinced that Lisp was the language of the future, Ellis is leery of investing too much in new technologies just because they’re cool. + +### Nang Newbie + +**Background:** Undergraduate business student; decided not to minor in CS because "AI is going to eat all those jobs". Nang chooses courses, tools, and interests based primarily on what the web tells him potential future employers are going to look for. He routinely uses ChatGPT for help with homework. + +**Relevant Experience:** Used Scratch in middle school and did one CS class in high school that covered HTML and a bit of Python. He just finished an intro stats class that used Pandas, which to his surprise he enjoyed enough to sign up for the sequel. + +**Goals** + +1. Nang wants to be able to do homework assignments more quickly and with less effort (hence his interest in ChatGPT). + +2. He wants to learn how to explore and analyze sports statistics for fun (he's a keen basketball fan), and to share what he finds with like-minded fans through online forums. + +**Complications** + +Nang is taking five courses and volunteering with two campus clubs (one for the sake of his CV, and one because of his passion for basketball), so he is chronically over-committed. + +## Appendix: KaTeX vs. MathJax + +marimo uses [KaTeX](https://katex.org/) for rendering math (faster, slightly narrower coverage, silent errors) rather than [MathJax](https://www.mathjax.org/). + +### Use raw strings + +LaTeX lives in Python strings in marimo, so use `r"..."` to preserve backslashes: + +```python +mo.md(r"$\\frac{1}{2}$") # βœ… +mo.md("$\\frac{1}{2}$") # ❌ β€” \\f is a form-feed character +``` + +### MathJax β†’ KaTeX + +| Category | MathJax | KaTeX | +| --- | --- | --- | +| Text | `\\mbox`, `\\bbox` | `\\text{}` | +| Text style | `\\textsc`, `\\textsl` | `\\text{}` | +| Environments | `\\begin{eqnarray}` | `\\begin{align}` | +| | `\\begin{multline}` | `\\begin{gather}` | +| References | `\\label`, `\\eqref`, `\\ref` | `\\tag{}` for manual numbering | +| Arrays | `\\cline`, `\\multicolumn`, `\\hfill`, `\\vline` | β€” | +| Macros | `\\DeclareMathOperator` | `\\operatorname{}` inline | +| | `\\newenvironment` | β€” | +| Spacing | `\\mspace`, `\\setlength`, `\\strut`, `\\rotatebox` | β€” | +| Conditionals | `\\if`, `\\else`, `\\fi`, `\\ifx` | β€” | + +These *do* work in KaTeX (despite outdated claims): `\\newcommand`, `\\def`, `\\hbox`, `\\hskip`, `\\cal`, `\\pmb`, `\\begin{equation}`, `\\begin{split}`, `\\operatorname*`. + +### Shared macros across cells + +`\\newcommand` works inline. For cross-cell reuse, use `mo.latex(filename="macros.tex")` in the same cell as `import marimo`. + +### Migration checklist + +1. Find-replace `\\mbox{` β†’ `\\text{` +2. Use raw strings (`r"..."`) +3. Replace `\\begin{eqnarray}` β†’ `\\begin{align}` +4. Replace `\\DeclareMathOperator` β†’ `\\operatorname{}` +5. Remove `\\label`/`\\eqref` β†’ use `\\tag{}` if needed +6. Visually verify β€” KaTeX fails silently + +### References + +- [KaTeX Support Table](https://katex.org/docs/support_table) β€” definitive command lookup +- [KaTeX Unsupported Features](https://github.com/KaTeX/KaTeX/wiki/Things-that-KaTeX-does-not-(yet)-support) \ No newline at end of file diff --git a/polars/01_why_polars.py b/polars/01_why_polars.py index 0ed303c6b0ec806499a754e907aea3cb28ec91fc..1e91084b297e3c5d3d7d09321b97d411342d162f 100644 --- a/polars/01_why_polars.py +++ b/polars/01_why_polars.py @@ -2,8 +2,8 @@ # requires-python = ">=3.12" # dependencies = [ # "marimo", -# "pandas==2.2.3", -# "polars==1.22.0", +# "pandas==2.3.2", +# "polars==1.24.0", # ] # /// diff --git a/polars/02_dataframes.py b/polars/02_dataframes.py index 71ad9658833bab2f4deb8d4857186d7871449945..291e25e8544b1df2fc0c45c3f57f0f5ad2af56e0 100644 --- a/polars/02_dataframes.py +++ b/polars/02_dataframes.py @@ -2,9 +2,9 @@ # requires-python = ">=3.11" # dependencies = [ # "marimo", -# "numpy==2.2.5", -# "pandas==2.2.3", -# "polars==1.29.0", +# "numpy==2.4.3", +# "pandas==2.3.2", +# "polars==1.24.0", # ] # /// diff --git a/polars/03_loading_data.py b/polars/03_loading_data.py index ff9ee9885b7fc55dae400e3c0b38b8cdb2d84440..7f2c59d827723550d6813dda0624b5f412ea9623 100644 --- a/polars/03_loading_data.py +++ b/polars/03_loading_data.py @@ -1,14 +1,13 @@ # /// script # requires-python = ">=3.12" # dependencies = [ -# "adbc-driver-sqlite==1.7.0", -# "duckdb==1.4.0", +# "duckdb==1.4.4", # "lxml==6.0.0", # "marimo", # "pandas==2.3.2", -# "polars==1.32.3", -# "pyarrow==21.0.0", -# "sqlalchemy==2.0.43", +# "polars==1.24.0", +# "pyarrow==22.0.0", +# "sqlalchemy==2.0.45", # ] # /// @@ -162,20 +161,20 @@ def _(mo): You can also use other libraries with [arrow support](#arrow-support) or [polars plugins](#plugin-support) to read from databases before loading into polars, some of which support lazy reading. - Using the Arrow Database Connectivity SQLite support as an example: + Using DuckDB as an example: """) return @app.cell -def _(df, folder, pl): - URI = "sqlite:///" + f"/{folder.resolve()}/db.sqlite" - df.write_database(table_name="quick_reference", connection=URI, engine="adbc", if_table_exists="replace") +def _(df, duckdb, folder): + conn = duckdb.connect(str(folder / "db.duckdb")) + conn.register("df_polars", df) + conn.execute("CREATE OR REPLACE TABLE quick_reference AS SELECT * FROM df_polars") query = """SELECT * FROM quick_reference WHERE format LIKE '%Database%'""" - - pl.read_database_uri(query=query, uri=URI, engine="adbc") - return + conn.sql(query).pl() + return (conn,) @app.cell(hide_code=True) @@ -234,9 +233,8 @@ def _(mo): @app.cell -def _(duckdb, folder): +def _(conn): # Requires duckdb >= 1.4.0 - conn = duckdb.connect(folder / "db.sqlite") conn.sql("SELECT * FROM quick_reference").pl(lazy=True) return diff --git a/polars/04_basic_operations.py b/polars/04_basic_operations.py index fdcebeabc4d11e2398c43448fce1d3b07c79c11e..88f1dc1d5c67b102d21a23e7cd541b67ddaa10c4 100644 --- a/polars/04_basic_operations.py +++ b/polars/04_basic_operations.py @@ -2,7 +2,7 @@ # requires-python = ">=3.13" # dependencies = [ # "marimo", -# "polars==1.23.0", +# "polars==1.24.0", # ] # /// diff --git a/polars/05_reactive_plots.py b/polars/05_reactive_plots.py index cb4696cb98f900bd053e20bbb256ae7d9bce4c0a..af9ca52fcf2236929f77e4baee05eab5a7f8e1e2 100644 --- a/polars/05_reactive_plots.py +++ b/polars/05_reactive_plots.py @@ -2,10 +2,10 @@ # requires-python = ">=3.10" # dependencies = [ # "marimo", -# "numpy==2.2.3", -# "plotly[express]==6.0.0", -# "polars==1.27.1", -# "statsmodels==0.14.4", +# "numpy==2.4.3", +# "plotly[express]==6.3.0", +# "polars==1.24.0", +# "statsmodels==0.14.5", # ] # /// diff --git a/polars/06_Dataframe_Transformer.py b/polars/06_dataframe_transformer.py similarity index 99% rename from polars/06_Dataframe_Transformer.py rename to polars/06_dataframe_transformer.py index cb06c1dec90f9815a257a7aa2a9bba24d7de3ea4..748f75ce18dd3c5cec614578f5e6d78181c403b0 100644 --- a/polars/06_Dataframe_Transformer.py +++ b/polars/06_dataframe_transformer.py @@ -2,9 +2,9 @@ # requires-python = ">=3.13" # dependencies = [ # "marimo", -# "numpy==2.2.3", -# "plotly[express]==6.0.0", -# "polars==1.28.1", +# "numpy==2.4.3", +# "plotly[express]==6.3.0", +# "polars==1.24.0", # "requests==2.32.3", # ] # [tool.marimo.runtime] diff --git a/polars/07-querying-with-sql.py b/polars/07_querying_with_sql.py similarity index 94% rename from polars/07-querying-with-sql.py rename to polars/07_querying_with_sql.py index 2776c663ae9ef7f35cf9dc713bb081c4bcaef88d..a5f2e995db98359ec05993c02b158424c1604d55 100644 --- a/polars/07-querying-with-sql.py +++ b/polars/07_querying_with_sql.py @@ -1,10 +1,10 @@ # /// script # requires-python = ">=3.12" # dependencies = [ -# "duckdb==1.4.3", +# "duckdb==1.4.4", # "kagglehub==0.3.13", # "marimo", -# "polars==1.36.1", +# "polars==1.24.0", # "pyarrow==22.0.0", # "sqlalchemy==2.0.45", # "sqlglot==28.3.0", @@ -13,14 +13,14 @@ import marimo -__generated_with = "0.18.4" +__generated_with = "0.20.4" app = marimo.App(width="medium") @app.cell(hide_code=True) def _(mo): mo.md(r""" - ## SQL Features in Marimo and Polars + # SQL Features in Marimo and Polars _By [etrotta](https://github.com/etrotta)_ @@ -36,12 +36,12 @@ def _(mo): @app.cell -def _(mo, sqlite_engine): +def _(mo, reviews, sqlite_engine): _df = mo.sql( f""" SELECT * FROM reviews LIMIT 100 """, - engine=sqlite_engine, + engine=sqlite_engine ) return @@ -92,12 +92,12 @@ def _(mo): @app.cell -def _(mo, sqlite_engine): +def _(hotels, mo, sqlite_engine): _df = mo.sql( f""" SELECT * FROM hotels LIMIT 10 """, - engine=sqlite_engine, + engine=sqlite_engine ) return @@ -113,12 +113,12 @@ def _(mo): @app.cell -def _(mo, sqlite_engine): +def _(mo, reviews, sqlite_engine, users): polars_age_groups = mo.sql( f""" SELECT reviews.*, age_group FROM reviews JOIN users ON reviews.user_id = users.user_id LIMIT 1000 """, - engine=sqlite_engine, + engine=sqlite_engine ) return (polars_age_groups,) @@ -140,12 +140,12 @@ def _(mo): @app.cell -def _(mo, sqlite_engine): +def _(mo, reviews, sqlite_engine, users): _df = mo.sql( f""" SELECT age_group, AVG(reviews.score_overall) FROM reviews JOIN users ON reviews.user_id = users.user_id GROUP BY age_group """, - engine=sqlite_engine, + engine=sqlite_engine ) return @@ -159,7 +159,7 @@ def _(mo): @app.cell -def _(mo): +def _(mo, polars_age_groups): _df = mo.sql( f""" SELECT * FROM polars_age_groups LIMIT 10 @@ -262,7 +262,7 @@ def _(mo): @app.cell -def _(duckdb): +def _(duckdb, hotels): duckdb.sql("SELECT * FROM hotels").pl(lazy=True).sort("cleanliness_base", descending=True).limit(5).collect() return @@ -292,36 +292,42 @@ def _(cached_file, delete_file_button, pathlib): @app.cell def _(): import marimo as mo + return (mo,) @app.cell def _(): import polars as pl + return (pl,) @app.cell def _(): import duckdb + return (duckdb,) @app.cell def _(): import sqlalchemy + return (sqlalchemy,) @app.cell def _(): import kagglehub + return (kagglehub,) @app.cell def _(): import pathlib + return (pathlib,) diff --git a/polars/08_working_with_columns.py b/polars/08_working_with_columns.py index 915b7080c48ba9ea7a8d36fcbd3f939d0d2e9f18..1142473d8ff9520e48b4a0bc9afe862d5eb0d8f3 100644 --- a/polars/08_working_with_columns.py +++ b/polars/08_working_with_columns.py @@ -1,8 +1,8 @@ # /// script # requires-python = ">=3.11" # dependencies = [ -# "polars==1.18.0", # "marimo", +# "polars==1.24.0", # ] # /// diff --git a/polars/09_data_types.py b/polars/09_data_types.py index c719c0dbb4752ab0252d434ce5f15ae22d059d65..7c9e46e91abc2fa531111f4574e65caee4176b9a 100644 --- a/polars/09_data_types.py +++ b/polars/09_data_types.py @@ -1,8 +1,8 @@ # /// script # requires-python = ">=3.11" # dependencies = [ -# "polars==1.18.0", # "marimo", +# "polars==1.24.0", # ] # /// diff --git a/polars/10_strings.py b/polars/10_strings.py index 9c5b4d8c28db49ac98b7ecbb724946eb598d10bf..fe1b39ca25097152db12f3ba776560756ceb30f9 100644 --- a/polars/10_strings.py +++ b/polars/10_strings.py @@ -1,9 +1,9 @@ # /// script # requires-python = ">=3.12" # dependencies = [ -# "altair==5.5.0", +# "altair==6.0.0", # "marimo", -# "numpy==2.2.3", +# "numpy==2.4.3", # "polars==1.24.0", # ] # /// diff --git a/polars/11_missing_data.py b/polars/11_missing_data.py index 8565122d3e86e570796d25fa150fa05afea6bc3b..6f4ae89ab37ad8d9a9f70cd34dc6a47bc68cf777 100644 --- a/polars/11_missing_data.py +++ b/polars/11_missing_data.py @@ -3,13 +3,13 @@ # dependencies = [ # "marimo", # "plotly[express]==6.3.0", -# "polars==1.33.1", +# "polars==1.24.0", # ] # /// import marimo -__generated_with = "0.18.4" +__generated_with = "0.20.4" app = marimo.App(width="medium") @@ -646,14 +646,12 @@ def _(dirty_weather, pl, rain): @app.cell(hide_code=True) def _(day_perc, mo, perc_col): - mo.md( - f""" + mo.md(f""" It is null for {day_perc.select(perc_col.is_null().mean()).item():.4%} of the rows, but is NaN for {day_perc.select(perc_col.is_nan().mean()).item():.4%} of them. If we use the cleaned weather dataframe to calculate it instead of the dirty_weather, we will have no nulls, but note how for this calculation we can end up with both, with each having a different meaning. In this case it makes sense to fill in NaNs as 0 to indicate there was no rain during that period, but treating the nulls the same could lead to a different interpretation of the data, so remember to handle NaNs and nulls separately. - """ - ) + """) return @@ -773,18 +771,21 @@ def _(pl, raw_weather): @app.cell def _(): import marimo as mo + return (mo,) @app.cell def _(): import polars as pl + return (pl,) @app.cell def _(): import plotly.express as px + return (px,) diff --git a/polars/12_aggregations.py b/polars/12_aggregations.py index fe5385e4aa2a65ab20ecd9adacb5d6d77f53dd88..08676009d9a1e565278ef7cef9e1456e7fce8966 100644 --- a/polars/12_aggregations.py +++ b/polars/12_aggregations.py @@ -2,7 +2,7 @@ # requires-python = ">=3.13" # dependencies = [ # "marimo", -# "polars==1.23.0", +# "polars==1.24.0", # ] # /// diff --git a/polars/13_window_functions.py b/polars/13_window_functions.py index c4f3117d48358e1df6f47111584b5b061d237c41..bbcebbd58ef569613c4a4b68e17b9d2b9f8d7fbd 100644 --- a/polars/13_window_functions.py +++ b/polars/13_window_functions.py @@ -1,11 +1,11 @@ # /// script # requires-python = ">=3.13" # dependencies = [ -# "duckdb==1.2.2", +# "duckdb==1.4.4", # "marimo", -# "polars==1.29.0", -# "pyarrow==20.0.0", -# "sqlglot==26.16.4", +# "polars==1.24.0", +# "pyarrow==22.0.0", +# "sqlglot==28.3.0", # ] # /// @@ -154,7 +154,7 @@ def _(mo): def _(df, pl): ( df.with_columns( - is_weekday=pl.col("date").dt.is_business_day(), + is_weekday=pl.col("date").dt.weekday() < 5, ).with_columns( max_rev_by_channel_and_weekday=pl.col("revenue").max().over("is_weekday", "channel"), ) @@ -179,7 +179,7 @@ def _(df, pl): df.with_columns( max_rev_by_channel_and_weekday=pl.col("revenue") .max() - .over((pl.col("date").dt.is_business_day()), "channel") + .over((pl.col("date").dt.weekday() < 5), "channel") ) return diff --git a/polars/14_user_defined_functions.py b/polars/14_user_defined_functions.py index 34e568ce582f86a57ef4fc5a3e85844359bdebdd..5d1b252823471d40dfe494ddd732e15557d17f5e 100644 --- a/polars/14_user_defined_functions.py +++ b/polars/14_user_defined_functions.py @@ -1,13 +1,13 @@ # /// script # requires-python = ">=3.12" # dependencies = [ -# "altair==5.5.0", +# "altair==6.0.0", # "beautifulsoup4==4.13.3", # "httpx==0.28.1", # "marimo", # "nest-asyncio==1.6.0", -# "numba==0.61.0", -# "numpy==2.1.3", +# "numba==0.64.0", +# "numpy==2.4.3", # "polars==1.24.0", # ] # /// @@ -239,7 +239,7 @@ def _(parsed_html_df, pl): pl.struct( *( pl.element() - .str.extract(f'(?:"{key}"|{key})\s*:\s*"([^"]*)"') + .str.extract(rf'(?:"{key}"|{key})\s*:\s*"([^"]*)"') .alias(key) for key in ["path", "thumbnail", "title"] ) diff --git a/polars/16_lazy_execution.py b/polars/16_lazy_execution.py index 64842c981a05602e280a9fa5fb7ea1f79c3abccd..ac503124bc99f71c9ddec868f4601e2362abba00 100644 --- a/polars/16_lazy_execution.py +++ b/polars/16_lazy_execution.py @@ -3,13 +3,13 @@ # dependencies = [ # "marimo", # "faker==37.1.0", -# "scipy==1.13.1", -# "numpy==2.0.2", -# "numba==0.60.0", -# "polars==1.26.0", -# "matplotlib==3.9.4", -# "statsmodels", -# "pandas==2.2.3", +# "scipy==1.17.1", +# "numpy==2.4.3", +# "numba==0.64.0", +# "polars==1.24.0", +# "matplotlib==3.10.8", +# "statsmodels==0.14.5", +# "pandas==2.3.2", # ] # /// @@ -22,7 +22,7 @@ app = marimo.App(width="medium") @app.cell(hide_code=True) def _(mo): mo.md(r""" - # Lazy Execution (a.k.a. the Lazy API) + # Lazy Execution Author: [Deb Debnath](https://github.com/debajyotid2) """) diff --git a/polars/README.md b/polars/README.md deleted file mode 100644 index 3035f1897d6e1a7947e147f209cae249aa2304e3..0000000000000000000000000000000000000000 --- a/polars/README.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: Readme -marimo-version: 0.18.4 ---- - -# Learn Polars - -_🚧 This collection is a work in progress. Please help us add notebooks!_ - -This collection of marimo notebooks is designed to teach you the basics of data wrangling using a Python library called Polars. - -**Help us build this course! βš’οΈ** - -We're seeking contributors to help us build these notebooks. Every contributor will be acknowledged as an author in this README and in their contributed notebooks. Head over to the [tracking issue](https://github.com/marimo-team/learn/issues/40) to sign up for a planned notebook or propose your own. - -**Running notebooks.** To run a notebook locally, use - -```bash -uvx marimo edit -``` - -You can also open notebooks in our online playground by appending marimo.app/ to a notebook's URL: - -[https://marimo.app/github.com/marimo-team/learn/blob/main/polars/01_why_polars.py](https://marimo.app/github.com/marimo-team/learn/blob/main/polars/01_why_polars.py). - -**Thanks to all our notebook authors!** - -* [Koushik Khan](https://github.com/koushikkhan) -* [PΓ©ter Gyarmati](https://github.com/peter-gy) -* [Joram Mutenge](https://github.com/jorammutenge) -* [etrotta](https://github.com/etrotta) -* [Debajyoti Das](https://github.com/debajyotid2) \ No newline at end of file diff --git a/polars/index.md b/polars/index.md new file mode 100644 index 0000000000000000000000000000000000000000..11ce1d3ceb8bcf94c996df0bfab231b154b9cc15 --- /dev/null +++ b/polars/index.md @@ -0,0 +1,17 @@ +--- +title: Learn Polars +description: > + Learn the basics of data wrangling with a high-performance Python library + called Polars. +tracking: 40 +--- + +## Contributors + +Thanks to our notebook authors: + +* [Koushik Khan](https://github.com/koushikkhan) +* [PΓ©ter Gyarmati](https://github.com/peter-gy) +* [Joram Mutenge](https://github.com/jorammutenge) +* [etrotta](https://github.com/etrotta) +* [Debajyoti Das](https://github.com/debajyotid2) diff --git a/probability/02_axioms.py b/probability/02_axioms.py index 71de3dbe909eb941a7eb60ca18c1433fac0ba94d..3fbf464f669d759552c01344d627b9856cb29c21 100644 --- a/probability/02_axioms.py +++ b/probability/02_axioms.py @@ -2,8 +2,8 @@ # requires-python = ">=3.11" # dependencies = [ # "marimo", -# "matplotlib==3.10.0", -# "numpy==2.2.2", +# "matplotlib==3.10.8", +# "numpy==2.4.3", # ] # /// diff --git a/probability/03_probability_of_or.py b/probability/03_probability_of_or.py index 68cf41e30375c6d3e4e3e027613c34227d62fe86..b49ee4ace3f0aaf8611e9d88a76df084b961fa0e 100644 --- a/probability/03_probability_of_or.py +++ b/probability/03_probability_of_or.py @@ -2,8 +2,8 @@ # requires-python = ">=3.10" # dependencies = [ # "marimo", -# "matplotlib", -# "matplotlib-venn" +# "matplotlib==3.10.8", +# "matplotlib-venn==1.1.2" # ] # /// diff --git a/probability/04_conditional_probability.py b/probability/04_conditional_probability.py index ce2e1a8d19cacc9e7da1812fa01d665557f9c173..81992c6055310f0d266f37fa6e0447e696dd373a 100644 --- a/probability/04_conditional_probability.py +++ b/probability/04_conditional_probability.py @@ -2,9 +2,9 @@ # requires-python = ">=3.10" # dependencies = [ # "marimo", -# "matplotlib==3.10.0", -# "matplotlib-venn==1.1.1", -# "numpy==2.2.2", +# "matplotlib==3.10.8", +# "matplotlib-venn==1.1.2", +# "numpy==2.4.3", # ] # /// @@ -333,7 +333,7 @@ def _(mo): @app.cell(hide_code=True) def _(mo): - mo.md(""" + mo.md(r""" ## Summary You've learned: diff --git a/probability/05_independence.py b/probability/05_independence.py index 154635d03f377ad3349ac370ebc38744e14021ac..f76410cc72c25503a25d15bd620e5cc07b7d980d 100644 --- a/probability/05_independence.py +++ b/probability/05_independence.py @@ -2,6 +2,8 @@ # requires-python = ">=3.10" # dependencies = [ # "marimo", +# "numpy==2.4.3", +# "pandas==2.3.2", # ] # /// @@ -19,7 +21,7 @@ def _(): @app.cell(hide_code=True) def _(mo): - mo.md(""" + mo.md(r""" # Independence in Probability Theory _This notebook is a computational companion to the book ["Probability for Computer Scientists"](https://chrispiech.github.io/probabilityForComputerScientists/en/part1/independence/), by Stanford professor Chris Piech._ diff --git a/probability/06_probability_of_and.py b/probability/06_probability_of_and.py index f3a3308d0ed5eaee1c80fac75cdddd94538a0510..88c6814185ab1503c8aa32073c32fde421b8d6c8 100644 --- a/probability/06_probability_of_and.py +++ b/probability/06_probability_of_and.py @@ -2,8 +2,8 @@ # requires-python = ">=3.10" # dependencies = [ # "marimo", -# "matplotlib", -# "matplotlib-venn" +# "matplotlib==3.10.8", +# "matplotlib-venn==1.1.2" # ] # /// diff --git a/probability/07_law_of_total_probability.py b/probability/07_law_of_total_probability.py index b4b3380e299b15693f455108cd42bb6818d54cd5..94db72e5f1256cd1a3843932a6d4261d4cf20b63 100644 --- a/probability/07_law_of_total_probability.py +++ b/probability/07_law_of_total_probability.py @@ -2,8 +2,8 @@ # requires-python = ">=3.10" # dependencies = [ # "marimo", -# "matplotlib", -# "matplotlib-venn" +# "matplotlib==3.10.8", +# "matplotlib-venn==1.1.2" # ] # /// @@ -220,7 +220,7 @@ def _(mo): # Calculate total probability total_error = total_probability(error_probs, state_probs) - explanation = mo.md(f""" + explanation = mo.md(rf""" ### System Error Analysis Given: @@ -278,7 +278,7 @@ def _(late_given_dry, late_given_rain, mo, plt, venn2, weather_prob): p_late = late_given_rain.value * p_rain + late_given_dry.value * p_dry # Create explanation - explanation_example = mo.md(f""" + explanation_example = mo.md(rf""" ### Weather and Traffic Analysis Given: diff --git a/probability/08_bayes_theorem.py b/probability/08_bayes_theorem.py index 3f5c1362e7d65b042cb8a46a66fdfe863537233d..8ca59fbb1e150c1289848de5ea924e1d361e5a6c 100644 --- a/probability/08_bayes_theorem.py +++ b/probability/08_bayes_theorem.py @@ -2,8 +2,8 @@ # requires-python = ">=3.10" # dependencies = [ # "marimo", -# "matplotlib==3.10.0", -# "numpy==2.2.3", +# "matplotlib==3.10.8", +# "numpy==2.4.3", # ] # /// @@ -452,7 +452,7 @@ def _(p_e_given_h, p_e_given_not_h, p_h): @app.cell(hide_code=True) def _(mo): p_h = mo.ui.slider(0.0, 1, label="$P(H)$", value=0.1, step=0.1) - p_e_given_h = mo.ui.slider(0.0, 1, label="$P(E \mid H)$", value=0.3, step=0.1) + p_e_given_h = mo.ui.slider(0.0, 1, label=r"$P(E \mid H)$", value=0.3, step=0.1) p_e_given_not_h = mo.ui.slider( 0.0, 1, label=r"$P(E \mid \neg H)$", value=0.3, step=0.1 ) diff --git a/probability/09_random_variables.py b/probability/09_random_variables.py index 2158ee8ba0ec70b86804359374d4074858aa40ef..fa3688f21d5fc0d593948febb662a80aaa59cd0c 100644 --- a/probability/09_random_variables.py +++ b/probability/09_random_variables.py @@ -2,9 +2,9 @@ # requires-python = ">=3.10" # dependencies = [ # "marimo", -# "matplotlib==3.10.0", -# "numpy==2.2.3", -# "scipy==1.15.2", +# "matplotlib==3.10.8", +# "numpy==2.4.3", +# "scipy==1.17.1", # ] # /// diff --git a/probability/10_probability_mass_function.py b/probability/10_probability_mass_function.py index 2a2d5772601550502d9d35c1819187c00f4ec216..e024e1d70a859364e944ea41d603fb64c2fe5511 100644 --- a/probability/10_probability_mass_function.py +++ b/probability/10_probability_mass_function.py @@ -2,9 +2,9 @@ # requires-python = ">=3.10" # dependencies = [ # "marimo", -# "matplotlib==3.10.0", -# "numpy==2.2.3", -# "scipy==1.15.2", +# "matplotlib==3.10.8", +# "numpy==2.4.3", +# "scipy==1.17.1", # ] # /// diff --git a/probability/11_expectation.py b/probability/11_expectation.py index 4faa62e762a47781e13426b21112fda78c65d698..2997e8fad94f3ee5b6f7c75c5cb8a2de5826df9f 100644 --- a/probability/11_expectation.py +++ b/probability/11_expectation.py @@ -2,9 +2,9 @@ # requires-python = ">=3.10" # dependencies = [ # "marimo", -# "matplotlib==3.10.0", -# "numpy==2.2.3", -# "scipy==1.15.2", +# "matplotlib==3.10.8", +# "numpy==2.4.3", +# "scipy==1.17.1", # ] # /// diff --git a/probability/12_variance.py b/probability/12_variance.py index a5021b05c0ced1931b5369ee17de9a0124408e76..77057d70d4d88585297a15bae8fd914dafb962b2 100644 --- a/probability/12_variance.py +++ b/probability/12_variance.py @@ -2,10 +2,10 @@ # requires-python = ">=3.10" # dependencies = [ # "marimo", -# "matplotlib==3.10.0", -# "numpy==2.2.3", -# "scipy==1.15.2", -# "wigglystuff==0.1.10", +# "matplotlib==3.10.8", +# "numpy==2.4.3", +# "scipy==1.17.1", +# "wigglystuff==0.2.37", # ] # /// diff --git a/probability/13_bernoulli_distribution.py b/probability/13_bernoulli_distribution.py index 1cf7563d34935cc11680a0ce17735c76c2a281c6..b8dc4ca92e177fbcddb3def506a7a6fbedf3f030 100644 --- a/probability/13_bernoulli_distribution.py +++ b/probability/13_bernoulli_distribution.py @@ -2,9 +2,9 @@ # requires-python = ">=3.10" # dependencies = [ # "marimo", -# "matplotlib==3.10.0", -# "numpy==2.2.3", -# "scipy==1.15.2", +# "matplotlib==3.10.8", +# "numpy==2.4.3", +# "scipy==1.17.1", # ] # /// diff --git a/probability/14_binomial_distribution.py b/probability/14_binomial_distribution.py index 8a421e5e19e20a7eb322de635410335ec2a3460c..3b03959a4a05e9c09f9c309be576ab4415d5eb0a 100644 --- a/probability/14_binomial_distribution.py +++ b/probability/14_binomial_distribution.py @@ -2,12 +2,12 @@ # requires-python = ">=3.10" # dependencies = [ # "marimo", -# "matplotlib==3.10.0", -# "numpy==2.2.4", -# "scipy==1.15.2", -# "altair==5.2.0", -# "wigglystuff==0.1.10", -# "pandas==2.2.3", +# "matplotlib==3.10.8", +# "numpy==2.4.3", +# "scipy==1.17.1", +# "altair==6.0.0", +# "wigglystuff==0.2.37", +# "pandas==2.3.2", # ] # /// diff --git a/probability/15_poisson_distribution.py b/probability/15_poisson_distribution.py index 2c1f1c4f84f452e00f79bcce6a85fb84029e0fed..1c0752da99aa603978669299ea2401e0c8f32cf9 100644 --- a/probability/15_poisson_distribution.py +++ b/probability/15_poisson_distribution.py @@ -2,12 +2,12 @@ # requires-python = ">=3.10" # dependencies = [ # "marimo", -# "matplotlib==3.10.0", -# "numpy==2.2.4", -# "scipy==1.15.2", -# "altair==5.2.0", -# "wigglystuff==0.1.10", -# "pandas==2.2.3", +# "matplotlib==3.10.8", +# "numpy==2.4.3", +# "scipy==1.17.1", +# "altair==6.0.0", +# "wigglystuff==0.2.37", +# "pandas==2.3.2", # ] # /// diff --git a/probability/16_continuous_distribution.py b/probability/16_continuous_distribution.py index a3f094ccc6a0677f6a326bc36c14e7b5b0f377eb..acf548f7c1d688aa07489bcd80a1bcec6515bbf8 100644 --- a/probability/16_continuous_distribution.py +++ b/probability/16_continuous_distribution.py @@ -2,13 +2,13 @@ # requires-python = ">=3.11" # dependencies = [ # "marimo", -# "altair==5.5.0", -# "matplotlib==3.10.1", -# "numpy==2.2.4", -# "scipy==1.15.2", +# "altair==6.0.0", +# "matplotlib==3.10.8", +# "numpy==2.4.3", +# "scipy==1.17.1", # "sympy==1.13.3", -# "wigglystuff==0.1.10", -# "polars==1.26.0", +# "wigglystuff==0.2.37", +# "polars==1.24.0", # ] # /// @@ -253,31 +253,31 @@ def _( # Add appropriate explanation if distribution == "uniform": _explanation = mo.md( - f""" - In the **uniform distribution**, all values between 0 and 5 are equally likely. + rf""" + In the **uniform distribution**, all values between 0 and 5 are equally likely. The probability density is constant at 0.2 (which is 1/5, ensuring the total area is 1). - For a uniform distribution, the probability that $X$ is in the interval $[{a:.1f}, {b:.1f}]$ + For a uniform distribution, the probability that $X$ is in the interval $[{a:.1f}, {b:.1f}]$ is simply proportional to the width of the interval: $P({a:.1f} \leq X \leq {b:.1f}) = {_probability:.4f}$ Note that while the PDF has a constant value of 0.2, this is not a probability but a density! """ ) elif distribution == "triangular": _explanation = mo.md( - f""" - In this **triangular distribution**, the probability density increases linearly from 0 to 2.5, + rf""" + In this **triangular distribution**, the probability density increases linearly from 0 to 2.5, then decreases linearly from 2.5 to 5. The distribution's peak is at x = 2.5, where the value is highest. - The orange shaded area representing $P({a:.1f} \leq X \leq {b:.1f}) = {_probability:.4f}$ + The orange shaded area representing $P({a:.1f} \leq X \leq {b:.1f}) = {_probability:.4f}$ is calculated by integrating the PDF over the interval. """ ) else: _explanation = mo.md( - f""" + rf""" The **exponential distribution** (with Ξ» = 0.5) models the time between events in a Poisson process. - Unlike the uniform and triangular distributions, the exponential distribution has infinite support + Unlike the uniform and triangular distributions, the exponential distribution has infinite support (extends from 0 to infinity). The probability density decreases exponentially as x increases. - The orange shaded area representing $P({a:.1f} \leq X \leq {b:.1f}) = {_probability:.4f}$ + The orange shaded area representing $P({a:.1f} \leq X \leq {b:.1f}) = {_probability:.4f}$ is calculated by integrating $f(x) = 0.5e^{{-0.5x}}$ over the interval. """ ) @@ -848,7 +848,7 @@ def _(np, plt): ax.legend(loc='upper right') # relevant annotations - ax.annotate(f'$P({a:.1f} \leq X \leq {b:.1f}) = {probability:.4f}$', + ax.annotate(rf'$P({a:.1f} \leq X \leq {b:.1f}) = {probability:.4f}$', xy=(0.5, 0.9), xycoords='axes fraction', bbox=dict(boxstyle='round,pad=0.5', facecolor='white', alpha=0.8), horizontalalignment='center', fontsize=12) diff --git a/probability/17_normal_distribution.py b/probability/17_normal_distribution.py index 83a8aae177664cbf50b19ac58552d1126fa0d004..538a501d3612df07c916255b3d8f1b20f69879b8 100644 --- a/probability/17_normal_distribution.py +++ b/probability/17_normal_distribution.py @@ -2,10 +2,10 @@ # requires-python = ">=3.10" # dependencies = [ # "marimo", -# "matplotlib==3.10.1", -# "scipy==1.15.2", -# "wigglystuff==0.1.10", -# "numpy==2.2.4", +# "matplotlib==3.10.8", +# "scipy==1.17.1", +# "wigglystuff==0.2.37", +# "numpy==2.4.3", # ] # /// @@ -787,8 +787,8 @@ def _(np, plt, stats): textstr = '\n'.join(( r'Normal (aka Gaussian) Random Variable', r'', - f'Parameter $\mu$: {mu}', - f'Parameter $\sigma$: {sigma}' + rf'Parameter $\mu$: {mu}', + rf'Parameter $\sigma$: {sigma}' )) ax.text(0.05, 0.95, textstr, transform=ax.transAxes, fontsize=10, verticalalignment='top', bbox=props) diff --git a/probability/18_central_limit_theorem.py b/probability/18_central_limit_theorem.py index 3ef2f4a8bea6a6d4660ff42b03fe65c33883ecc8..4db1ad1321c1cde7b6f287c68893d6e79b473ab0 100644 --- a/probability/18_central_limit_theorem.py +++ b/probability/18_central_limit_theorem.py @@ -2,11 +2,11 @@ # requires-python = ">=3.10" # dependencies = [ # "marimo", -# "matplotlib==3.10.1", -# "scipy==1.15.2", -# "numpy==2.2.4", +# "matplotlib==3.10.8", +# "scipy==1.17.1", +# "numpy==2.4.3", # "plotly==5.18.0", -# "wigglystuff==0.1.13", +# "wigglystuff==0.2.37", # ] # /// diff --git a/probability/19_maximum_likelihood_estimation.py b/probability/19_maximum_likelihood_estimation.py index 45d946e99ab234d0667f3783e60e818926f4d389..022c9d2393e848b13fa71cd2135003c13f9941d3 100644 --- a/probability/19_maximum_likelihood_estimation.py +++ b/probability/19_maximum_likelihood_estimation.py @@ -2,10 +2,10 @@ # requires-python = ">=3.10" # dependencies = [ # "marimo", -# "matplotlib==3.10.1", -# "scipy==1.15.2", -# "numpy==2.2.4", -# "polars==0.20.2", +# "matplotlib==3.10.8", +# "scipy==1.17.1", +# "numpy==2.4.3", +# "polars==1.24.0", # "plotly==5.18.0", # ] # /// @@ -400,7 +400,7 @@ def _( # relevant markdown for the results normal_explanation = mo.md( - f""" + rf""" ### Normal MLE Results **True parameters**: $\mu = {normal_true_mu:.3f}$, $\sigma^2 = {normal_true_var:.3f}$ diff --git a/probability/20_naive_bayes.py b/probability/20_naive_bayes.py index ec6a39443e2aded36af49d68e6c6ce097a4f4be0..2dd50aad914b686404f76d459d6bb3ca09b467f9 100644 --- a/probability/20_naive_bayes.py +++ b/probability/20_naive_bayes.py @@ -2,10 +2,10 @@ # requires-python = ">=3.10" # dependencies = [ # "marimo", -# "matplotlib==3.10.1", -# "scipy==1.15.2", -# "numpy==2.2.4", -# "polars==1.26.0", +# "matplotlib==3.10.8", +# "scipy==1.17.1", +# "numpy==2.4.3", +# "polars==1.24.0", # "plotly==5.18.0", # "scikit-learn==1.6.1", # ] diff --git a/probability/21_logistic_regression.py b/probability/21_logistic_regression.py index 9426ac93515396f1e1231549f64f3960144a9590..784661b0f7b6ce82ae8d1b0353bf3832358adf4a 100644 --- a/probability/21_logistic_regression.py +++ b/probability/21_logistic_regression.py @@ -2,11 +2,11 @@ # requires-python = ">=3.10" # dependencies = [ # "marimo", -# "matplotlib==3.10.1", -# "numpy==2.2.4", +# "matplotlib==3.10.8", +# "numpy==2.4.3", # "drawdata==0.3.7", # "scikit-learn==1.6.1", -# "polars==1.26.0", +# "polars==1.24.0", # ] # /// diff --git a/probability/README.md b/probability/README.md deleted file mode 100644 index a446eef3b02b7ec29c4ea055f94c428a8c40f1a7..0000000000000000000000000000000000000000 --- a/probability/README.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: Readme -marimo-version: 0.18.4 ---- - -# Learn probability - -This collection of marimo notebooks teaches the fundamentals of probability, -with an emphasis on computation with Python. - -Much of the structure and many explanations here are adapted from Chris Piech's [Probability for Computer Scientists](https://chrispiech.github.io/probabilityForComputerScientists/en/index.html) course reader. - - -**Running notebooks.** To run a notebook locally, use - -```bash -uvx marimo edit -``` - -For example, run the numbers tutorial with - -```bash -uvx marimo edit https://github.com/marimo-team/learn/blob/main/probability/01_sets.py -``` - -You can also open notebooks in our online playground by appending `marimo.app/` -to a notebook's URL: [marimo.app/https://github.com/marimo-team/learn/blob/main/probability/01_sets.py](https://marimo.app/https://github.com/marimo-team/learn/blob/main/probability/01_sets.py). - -**Thanks to all our notebook authors!** - -* [Srihari Thyagarajan](https://github.com/Haleshot) \ No newline at end of file diff --git a/probability/index.md b/probability/index.md new file mode 100644 index 0000000000000000000000000000000000000000..8bb764be48146aa1b3412a573fd86818d29f7be4 --- /dev/null +++ b/probability/index.md @@ -0,0 +1,16 @@ +--- +title: Learn Probability +description: > + These marimo notebooks teach the fundamental of probability with + an emphasis on interactive learning and computation in Python. +--- + +Much of the structure and many explanations are adapted from Chris Piech's +[Probability for Computer Scientists](https://chrispiech.github.io/probabilityForComputerScientists/en/index.html) +course reader. + +## Contributors + +Thanks to our notebook authors: + +* [Srihari Thyagarajan](https://github.com/Haleshot) diff --git a/python/001_numbers.py b/python/01_numbers.py similarity index 99% rename from python/001_numbers.py rename to python/01_numbers.py index a51892d4981b0304d5d687d9fbb229c0b00c5e37..2284ea50d0f120431a84a97f0d4102678b3258c8 100644 --- a/python/001_numbers.py +++ b/python/01_numbers.py @@ -14,7 +14,7 @@ app = marimo.App() @app.cell(hide_code=True) def _(mo): mo.md(""" - # πŸ”’ Numbers + # Numbers This tutorial provides a brief overview of working with numbers. diff --git a/python/002_strings.py b/python/02_strings.py similarity index 99% rename from python/002_strings.py rename to python/02_strings.py index 444d54bcafcd53241e3116587bcadf6e83d56991..422eebf7b3a77c40f326fd9a16a66e3386ba06b3 100644 --- a/python/002_strings.py +++ b/python/02_strings.py @@ -14,7 +14,7 @@ app = marimo.App(width="medium") @app.cell(hide_code=True) def _(mo): mo.md(""" - # 🎭 Strings + # Strings This notebook introduces **strings**, which are containers for text. diff --git a/python/003_collections.py b/python/03_collections.py similarity index 99% rename from python/003_collections.py rename to python/03_collections.py index 9803db64c56ec5e5f782b675005fabdf1fe2cfa0..169ea5f04ee68ec3f8b6a71537d851719fa1dfb0 100644 --- a/python/003_collections.py +++ b/python/03_collections.py @@ -14,7 +14,7 @@ app = marimo.App(width="medium") @app.cell(hide_code=True) def _(mo): mo.md(""" - # πŸ“¦ Collections + # Collections A "collection" is a type of variable that holds multiple values. diff --git a/python/004_conditional_logic.py b/python/04_conditional_logic.py similarity index 99% rename from python/004_conditional_logic.py rename to python/04_conditional_logic.py index f012008ac8c7a4b01439fa9725497f933d120fbd..bba29824d5f4e9146d3fec43e7e279d0040d1855 100644 --- a/python/004_conditional_logic.py +++ b/python/04_conditional_logic.py @@ -14,7 +14,7 @@ app = marimo.App() @app.cell(hide_code=True) def _(mo): mo.md(""" - # πŸ”„ Conditional logic + # Conditional logic This tutorial teaches you how to how to make **decisions** in your code, using Python's conditional statements. diff --git a/python/005_loops.py b/python/05_loops.py similarity index 99% rename from python/005_loops.py rename to python/05_loops.py index d0ecf14b1690e2d53099e74d0f6eaabf94803afe..4a5e06e0b3e42c55090474816357599f45a8aa06 100644 --- a/python/005_loops.py +++ b/python/05_loops.py @@ -14,7 +14,7 @@ app = marimo.App() @app.cell(hide_code=True) def _(mo): mo.md(""" - # πŸ”„ Loops + # Loops Let's learn how Python helps us repeat tasks efficiently with loops. diff --git a/python/006_dictionaries.py b/python/06_dictionaries.py similarity index 99% rename from python/006_dictionaries.py rename to python/06_dictionaries.py index 3331b4b117d56b90327b5ca3be2c3e9d7efad342..b9459b72fe3cee468fd52381a05653e902eda9a0 100644 --- a/python/006_dictionaries.py +++ b/python/06_dictionaries.py @@ -14,7 +14,7 @@ app = marimo.App() @app.cell(hide_code=True) def _(mo): mo.md(""" - # πŸ“š Dictionaries + # Dictionaries Dictionaries are collections of key-value pairs, with each key associated with a value. The keys are unique, meaning they show up only once. diff --git a/python/007_advanced_collections.py b/python/07_advanced_collections.py similarity index 97% rename from python/007_advanced_collections.py rename to python/07_advanced_collections.py index b510772d3ec3a784e8f84a099d40ee9886b74bf9..1d64e8f776480df547a56fe30291f55f19b6f769 100644 --- a/python/007_advanced_collections.py +++ b/python/07_advanced_collections.py @@ -14,9 +14,9 @@ app = marimo.App() @app.cell(hide_code=True) def _(mo): mo.md(""" - # πŸ”„ Advanced collections + # Advanced Collections - This tutorials hows advanced patterns for working with collections. + This tutorial shows advanced patterns for working with collections. ## Lists of dictionaries diff --git a/python/008_functions.py b/python/08_functions.py similarity index 99% rename from python/008_functions.py rename to python/08_functions.py index ca579bace9f6372f5b41257701fe97a77f5a7064..f80425663c8bc1bb54c4f6e41756d7e3ea42835b 100644 --- a/python/008_functions.py +++ b/python/08_functions.py @@ -14,7 +14,7 @@ app = marimo.App() @app.cell(hide_code=True) def _(mo): mo.md(""" - # 🧩 Functions + # Functions This tutorial is about an important topic: **functions.** diff --git a/python/009_modules.py b/python/09_modules.py similarity index 99% rename from python/009_modules.py rename to python/09_modules.py index bb4062d2d7ea5c6be8fda55908e2dc45bd310746..6383e2044a5948a8ca9f2d4bc9e78f8252f68acb 100644 --- a/python/009_modules.py +++ b/python/09_modules.py @@ -14,7 +14,7 @@ app = marimo.App() @app.cell(hide_code=True) def _(mo): mo.md(""" - # 🧩 Using modules + # Using Modules A `module` in Python is a Python file that defines functions and variables. Modules can be `imported` into other Python files, letting you reuse their functions and variables. diff --git a/python/010_exceptions.py b/python/10_exceptions.py similarity index 99% rename from python/010_exceptions.py rename to python/10_exceptions.py index 1761ec770b3210f3e49bfe20bf83fb4ced3eb7f6..c5e328c300bfa06d80921c437f4c238015160899 100644 --- a/python/010_exceptions.py +++ b/python/10_exceptions.py @@ -14,7 +14,7 @@ app = marimo.App() @app.cell(hide_code=True) def _(mo): mo.md(""" - # πŸ›‘οΈ Handling errors + # Handling Errors Sometimes things go wrong in programs. When that happens, Python raises `exceptions` to tell you what went amiss. For example, maybe you divided by 0: """) diff --git a/python/README.md b/python/README.md deleted file mode 100644 index 4927c65636d7fe11fe2affe8ce20441f31b92996..0000000000000000000000000000000000000000 --- a/python/README.md +++ /dev/null @@ -1,28 +0,0 @@ ---- -title: Readme -marimo-version: 0.18.4 ---- - -# Learn Python - -This collection of marimo notebooks is designed to teach you the basics -of the Python programming language. - -**Running notebooks.** To run a notebook locally, use - -```bash -uvx marimo edit -``` - -For example, run the numbers tutorial with - -```bash -uvx marimo edit https://github.com/marimo-team/learn/blob/main/python/001_numbers.py -``` - -You can also open notebooks in our online playground by appending `marimo.app/` -to a notebook's URL: [marimo.app/https://github.com/marimo-team/learn/blob/main/python/001_numbers.py](https://marimo.app/https://github.com/marimo-team/learn/blob/main/python/001_numbers.py). - -**Thanks to all our notebook authors!** - -* [Srihari Thyagarajan](https://github.com/Haleshot) \ No newline at end of file diff --git a/python/index.md b/python/index.md new file mode 100644 index 0000000000000000000000000000000000000000..98488aaf87c093cb81a0bdca72662391a6f9dc9a --- /dev/null +++ b/python/index.md @@ -0,0 +1,12 @@ +--- +title: Learn Python +description: > + These notebooks will help you learn the basics of Python + programming in an easy, interactive way. +--- + +## Contributors + +Thanks to our notebook authors: + +* [Srihari Thyagarajan](https://github.com/Haleshot) diff --git a/queueing/01_basic_ideas.py b/queueing/01_basic_ideas.py new file mode 100644 index 0000000000000000000000000000000000000000..65cdc3a25dc58a85cb9e7ba91ea356b68a58ca60 --- /dev/null +++ b/queueing/01_basic_ideas.py @@ -0,0 +1,285 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "altair", +# "asimpy", +# "marimo", +# "polars==1.24.0", +# ] +# /// + +import marimo + +__generated_with = "0.20.4" +app = marimo.App(width="medium") + + +@app.cell +def _(): + import marimo as mo + import math + import random + + import altair as alt + import polars as pl + + from asimpy import Environment, Process, Resource + + return Environment, Process, Resource, alt, math, mo, pl, random + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + # Basic Ideas in Queueing Theory + + ## *Arrivals, Servers, and Utilization* + + Three concepts underpin every queueing model. The first is *Poisson arrivals*: when customers arrive independently at a constant average rate $\lambda$, gaps between consecutive arrivals follow an *exponential* distribution with mean $1/\lambda$, and the count of arrivals in any window of width $t$ follows a Poisson distribution with mean $\lambda t$. These two descriptions are equivalent: if one holds, the other must too. + + The second key idea is that the exponential distribution is *memoryless*. Knowing you have already waited $s$ units gives no information about when the next arrival will come. This property makes the math simple, but means that the exponential distribution isn't a good fit for scenarios where events happen in bursts. Some of the later tutorials will explore models that handle this. + + The final concept is *server utilization*. A server completing work at rate $\mu$ has utilization $\rho = \lambda/\mu$, which is the long-run fraction of time it is busy. The system is stable only when $\rho < 1$. When $\rho \geq 1$, arrivals outpace service and the queue grows without bound. Even at exact balance ($\rho = 1$), randomness creates bursts that accumulate faster than the server recovers, so the expected queue length is infinite. + + The code below uses a discrete event simulation package called [asimpy](https://asimpy.readthedocs.io/) to model a simple job queue. The technical term for this kind of system is an *[M/M/1 queue](https://en.wikipedia.org/wiki/M/M/1_queue)*: memoryless (Poisson) arrivals, memoryless service times, and one server. + """) + return + + +@app.cell +def _(mo): + sim_time_slider = mo.ui.slider( + start=0, + stop=100_000, + step=1_000, + value=20_000, + label="Simulation time" + ) + arrival_rate_slider = mo.ui.slider( + start=1.0, + stop=5.0, + step=0.01, + value=2.0, + label="Arrival rate (Ξ»)" + ) + service_rate_slider = mo.ui.slider( + start=1.0, + stop=5.0, + step=0.01, + value=2.0, + label="Service rate (ΞΌ)" + ) + window_slider = mo.ui.slider( + start=1.0, + stop=5.0, + step=0.01, + value=1.0, + label="Counting window" + ) + seed_input = mo.ui.number( + value=192, + step=1, + label="Random seed" + ) + run_button = mo.ui.button(label="Run simulation") + mo.vstack([sim_time_slider, arrival_rate_slider, service_rate_slider, window_slider, seed_input, run_button]) + return ( + arrival_rate_slider, + seed_input, + service_rate_slider, + sim_time_slider, + window_slider, + ) + + +@app.cell +def _( + arrival_rate_slider, + random, + seed_input, + service_rate_slider, + sim_time_slider, + window_slider, +): + SIM_TIME = int(sim_time_slider.value) + ARRIVAL_RATE = float(arrival_rate_slider.value) + SERVICE_RATE = float(service_rate_slider.value) + WINDOW = float(window_slider.value) + SEED = int(seed_input.value) + random.seed(SEED) + return ARRIVAL_RATE, SERVICE_RATE, SIM_TIME, WINDOW + + +@app.cell +def _(Process, random): + class ArrivalSource(Process): + """Generates arrivals at a Poisson rate and records inter-arrival gaps.""" + + def init(self, rate, gaps): + self.rate = rate + self.gaps = gaps + + async def run(self): + while True: + gap = random.expovariate(self.rate) + await self.timeout(gap) + self.gaps.append(gap) + + return (ArrivalSource,) + + +@app.cell +def _(Process, SERVICE_RATE, random): + class Customer(Process): + def init(self, server, total_service): + self.server = server + self.total_service = total_service + + async def run(self): + async with self.server: + svc = random.expovariate(SERVICE_RATE) + await self.timeout(svc) + self.total_service[0] += svc + + return (Customer,) + + +@app.cell +def _(Customer, Process, random): + class Arrivals(Process): + def init(self, rate, server, total_service): + self.rate = rate + self.server = server + self.total_service = total_service + + async def run(self): + while True: + await self.timeout(random.expovariate(self.rate)) + Customer(self._env, self.server, self.total_service) + + return (Arrivals,) + + +@app.cell +def _( + ARRIVAL_RATE, + ArrivalSource, + Environment, + SIM_TIME, + WINDOW, + alt, + math, + pl, +): + def comparison(): + gaps = [] + env = Environment() + ArrivalSource(env, ARRIVAL_RATE, gaps) + env.run(until=SIM_TIME) + n = int(SIM_TIME / WINDOW) + counts = [0] * n + t = 0.0 + for g in gaps: + t += g + w = int(t / WINDOW) + if w < n: + counts[w] += 1 + freq = {} + for c in counts: + freq[c] = freq.get(c, 0) + 1 + lam_w = ARRIVAL_RATE * WINDOW + return pl.DataFrame([ + { + "k": k, + "observed": freq.get(k, 0) / n, + "theory": (lam_w**k) * math.exp(-lam_w) / math.factorial(k) + } + for k in range(max(counts) + 1) + ]) + + _df = comparison().unpivot( + on=["observed", "theory"], index="k", variable_name="source", value_name="probability" + ) + ( + alt.Chart(_df).mark_bar(opacity=0.8) + .encode( + x=alt.X("k:O", title=f"Arrivals per window (width {WINDOW})"), + y=alt.Y("probability:Q", title="Probability"), + color=alt.Color("source:N", title="Series"), + xOffset="source:N", + tooltip=["k:O", "source:N", "probability:Q"], + ) + .properties(title=f"Arrival Counts: Observed vs. Poisson(Ξ»={ARRIVAL_RATE})") + ) + return + + +@app.cell +def _(Arrivals, Environment, Resource, SERVICE_RATE, SIM_TIME): + def simulate(rho): + env = Environment() + server = Resource(env, capacity=1) + total_service = [0.0] + Arrivals(env, rho * SERVICE_RATE, server, total_service) + env.run(until=SIM_TIME) + busy = total_service[0] / SIM_TIME + return {"rho_target": rho, "rho_observed": round(busy, 4), "idle_frac": round(1.0 - busy, 4)} + + return (simulate,) + + +@app.cell +def _(pl, simulate): + df_sim = pl.DataFrame([simulate(rho) for rho in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95]]) + df_sim + return (df_sim,) + + +@app.cell +def _(alt, df_sim): + _df_plot = df_sim.unpivot( + on=["rho_observed", "idle_frac"], + index="rho_target", + variable_name="metric", + value_name="fraction", + ) + chart = ( + alt.Chart(_df_plot).mark_line(point=True) + .encode( + x=alt.X("rho_target:Q", title="Target utilization (ρ = Ξ»/ΞΌ)"), + y=alt.Y("fraction:Q", title="Fraction of time"), + color=alt.Color("metric:N", title="Metric"), + tooltip=["rho_target:Q", "metric:N", "fraction:Q"], + ) + .properties(title="Server Utilization: Busy and Idle Fractions vs. ρ") + ) + chart + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Understanding the Math + + ### Why inter-arrival gaps are exponential + + Divide $[0, t]$ into $n$ tiny slices of width $\Delta = t/n$. The probability of an arrival in each slice is $\approx \lambda\Delta$; slices are independent. The probability of no arrival across all $n$ slices is $(1 - \lambda t/n)^n \to e^{-\lambda t}$ as $n \to \infty$. This is $P(X > t)$ for the exponential distribution. + + ### Why mean equals standard deviation for the exponential + + If $E[X] = 1/\lambda$, then $E[X^2] = 2/\lambda^2$, so $\text{Var}(X) = 2/\lambda^2 - 1/\lambda^2 = 1/\lambda^2$ and $\text{SD}(X) = 1/\lambda = E[X]$. Equal mean and standard deviation means roughly one-third of gaps are longer than the mean. + + ### Why the busy fraction equals $\rho$ + + Over a long run $T$, about $N \approx \lambda T$ customers are served, each occupying the server for mean $1/\mu$. Total service time $\approx N/\mu = \lambda T/\mu = \rho T$. Dividing by $T$ gives busy fraction $= \rho$. + + ### Why $\rho = 1$ is unstable + + At exact balance, the queue length after each service completion performs a symmetric random walk on the non-negative integers. This random walk is *null recurrent*: it returns to zero but with infinite expected return time, so the mean queue length is infinite even when arrivals and service are perfectly matched on average. + """) + return + + +if __name__ == "__main__": + app.run() diff --git a/queueing/02_queue_formation.py b/queueing/02_queue_formation.py new file mode 100644 index 0000000000000000000000000000000000000000..6fcc4d0c9f352dde581c13030986599b5b30a29b --- /dev/null +++ b/queueing/02_queue_formation.py @@ -0,0 +1,307 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "altair", +# "asimpy", +# "marimo", +# "polars==1.24.0", +# ] +# /// + +import marimo + +__generated_with = "0.20.4" +app = marimo.App(width="medium") + + +@app.cell +def _(): + import marimo as mo + import random + import statistics + + import altair as alt + import polars as pl + + from asimpy import Environment, Process, Resource + + return Environment, Process, Resource, alt, mo, pl, random, statistics + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + # Queue Formation + + ## *Randomness Creates Waiting Even with Spare Capacity* + + We now combine arrivals (Poisson at rate $\lambda$) with a server (exponential service at rate $\mu$) into a complete queue. The system is stable because $\rho = \lambda/\mu < 1$, which means that on average, the server handles more work than arrives. + + Our first question is, how long is the queue? The surprising answer is that even when the server has plenty of spare capacity, customers wait. The mean number of customers in the system (both waiting and being served) is: + + $$L = \frac{\rho}{1 - \rho}$$ + + The table below gives some representative values: + + | $\rho$ | $L$ | + |:---:|:---:| + | 0.1 | 0.11 | + | 0.5 | 1.00 | + | 0.8 | 4.00 | + | 0.9 | 9.00 | + + When $\rho = 0.5$, half the server's capacity is idle, but there is on average one customer in the system at any moment. That customer either had to wait for a previous customer, or is currently being served. The queue is *never* consistently empty, even at moderate load. + + The formula also explains why simple queues are so sensitive to utilization: $L$ blows up as $\rho \to 1$. One way to think about this is that the denominator $(1 - \rho)$ is the spare capacity. As spare capacity vanishes, queue length increases. + """) + return + + +@app.cell +def _(mo): + mo.md(r""" + ## Why Queues Form at All + + With deterministic arrivals and service (every customer arrives exactly $1/\lambda$ apart and takes exactly $1/\mu$), a server with $\rho < 1$ would never form a queue: each customer would depart before the next arrived. Randomness changes this. Sometimes three customers arrive close together before the server finishes even one, so the server falls briefly behind. While it recovers, customers wait. These temporary pileups are unavoidable whenever inter-arrival or service times have any variance. + + The probability that exactly $n$ customers are in an M/M/1 system at steady state is: + + $$P(N = n) = (1 - \rho)\,\rho^n \qquad n = 0, 1, 2, \ldots$$ + + This is a *geometric distribution* with success probability $1 - \rho$. The formula says the server is idle (i.e., n=0) with probability $1 - \rho$, which is consistent with the utilization result from the previous scenario. Each additional customer in the system is $\rho$ times less likely than the previous count. + + This formula $L = \rho/(1-\rho)$ is the foundation of the later M/M/1 nonlinearity scenario, which shows the practical consequences of the $(1-\rho)$ denominator. Every queue-length formula in queueing theory has a similar structure: a traffic factor $\rho$ divided by a spare-capacity factor $(1 - \rho)$, possibly multiplied by a variability correction. + """) + return + + +@app.cell +def _(mo): + mo.md(r""" + ## Implementation + + A `Customer` process increments a shared `in_system` counter on arrival and decrements it on departure. A `Monitor` process samples `in_system[0]` every `SAMPLE_INTERVAL` time units. After the simulation, the mean of the samples estimates $L$. The theoretical value $\rho/(1-\rho)$ is computed and compared. By the law of large numbers, this converges to the true steady-state mean as the simulation time grows. + + The simulation sweeps $\rho$ from 0.1 to 0.9, confirming the formula at each load level. + """) + return + + +@app.cell +def _(mo): + sim_time_slider = mo.ui.slider( + start=0, + stop=100_000, + step=1_000, + value=20_000, + label="Simulation time", + ) + + service_rate_slider = mo.ui.slider( + start=1.0, + stop=5.0, + step=0.01, + value=2.0, + label="Service rate", + ) + + sample_interval_slider = mo.ui.slider( + start=1.0, + stop=5.0, + step=1.0, + value=1.0, + label="Sample interval", + ) + + seed_input = mo.ui.number( + value=192, + step=1, + label="Random seed", + ) + + run_button = mo.ui.button(label="Run simulation") + + mo.vstack([ + sim_time_slider, + service_rate_slider, + sample_interval_slider, + seed_input, + run_button, + ]) + return ( + sample_interval_slider, + seed_input, + service_rate_slider, + sim_time_slider, + ) + + +@app.cell +def _( + sample_interval_slider, + seed_input, + service_rate_slider, + sim_time_slider, +): + SIM_TIME = int(sim_time_slider.value) + SERVICE_RATE = float(service_rate_slider.value) + SAMPLE_INTERVAL = float(sample_interval_slider.value) + SEED = int(seed_input.value) + return SAMPLE_INTERVAL, SEED, SERVICE_RATE, SIM_TIME + + +@app.cell +def _(Process, SERVICE_RATE, random): + class Customer(Process): + def init(self, server, in_system): + self.server = server + self.in_system = in_system + + async def run(self): + self.in_system[0] += 1 + async with self.server: + await self.timeout(random.expovariate(SERVICE_RATE)) + self.in_system[0] -= 1 + + return (Customer,) + + +@app.cell +def _(Customer, Process, random): + class Arrivals(Process): + def init(self, rate, server, in_system): + self.rate = rate + self.server = server + self.in_system = in_system + + async def run(self): + while True: + await self.timeout(random.expovariate(self.rate)) + Customer(self._env, self.server, self.in_system) + + return (Arrivals,) + + +@app.cell +def _(Process, SAMPLE_INTERVAL): + class Monitor(Process): + """Samples total customers in system at regular intervals.""" + + def init(self, in_system, samples): + self.in_system = in_system + self.samples = samples + + async def run(self): + while True: + self.samples.append(self.in_system[0]) + await self.timeout(SAMPLE_INTERVAL) + + return (Monitor,) + + +@app.cell +def _( + Arrivals, + Environment, + Monitor, + Resource, + SERVICE_RATE, + SIM_TIME, + statistics, +): + def simulate(rho): + arrival_rate = rho * SERVICE_RATE + env = Environment() + server = Resource(env, capacity=1) + in_system = [0] + samples = [] + Arrivals(env, arrival_rate, server, in_system) + Monitor(env, in_system, samples) + env.run(until=SIM_TIME) + sim_L = statistics.mean(samples) + theory_L = rho / (1.0 - rho) + return { + "rho": rho, + "sim_L": round(sim_L, 4), + "theory_L": round(theory_L, 4), + "error_pct": round(100.0 * (sim_L - theory_L) / theory_L, 2), + } + + return (simulate,) + + +@app.cell +def _(SEED, pl, random, simulate): + def sweep(): + rows = [simulate(rho) for rho in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]] + return pl.DataFrame(rows) + + random.seed(SEED) + df = sweep() + df + return (df,) + + +@app.cell +def _(alt, df): + df_plot = df.unpivot( + on=["sim_L", "theory_L"], + index="rho", + variable_name="source", + value_name="L", + ) + chart = ( + alt.Chart(df_plot) + .mark_line(point=True) + .encode( + x=alt.X("rho:Q", title="Utilization (ρ)"), + y=alt.Y("L:Q", title="Mean customers in system (L)"), + color=alt.Color("source:N", title="Source"), + tooltip=["rho:Q", "source:N", "L:Q"], + ) + .properties(title="Queue Formation: Simulated vs. Theoretical L = ρ/(1βˆ’Ο)") + ) + chart + return + + +@app.cell +def _(mo): + mo.md(r""" + ## Understanding the Math + + ### Why is the queue length geometric? + + The M/M/1 queue can be analyzed as a random walk on the non-negative integers. When the server is busy, the queue grows by 1 with each arrival (with rate $\lambda$) and shrinks by 1 with each service completion (with rate $\mu$). The ratio $\lambda/\mu = \rho$ is the probability that the queue grows rather than shrinks at the next event. Under steady state, the probability of being at level $n$ is proportional to $\rho^n$ β€” because reaching level $n$ requires $n$ consecutive "up" steps. Normalizing so the probabilities sum to 1 gives $(1-\rho)\rho^n$. + + ### Deriving $L = \rho/(1-\rho)$ from the geometric distribution + + Given $P(N = n) = (1 - \rho)\rho^n$, the mean is: + + $$L = E[N] = \sum_{n=0}^{\infty} n \cdot (1-\rho)\rho^n = (1-\rho) \sum_{n=0}^{\infty} n\rho^n$$ + + A result from basic calculus is that the geometric series $\sum_{n=0}^{\infty} \rho^n = 1/(1-\rho)$. Differentiating both sides with respect to $\rho$: + + $$\sum_{n=0}^{\infty} n\rho^{n-1} = \frac{1}{(1-\rho)^2}$$ + + Multiply both sides by $\rho$: + + $$\sum_{n=0}^{\infty} n\rho^n = \frac{\rho}{(1-\rho)^2}$$ + + Substituting back: + + $$L = (1-\rho) \cdot \frac{\rho}{(1-\rho)^2} = \frac{\rho}{1-\rho}$$ + + ### Checking the formula at the boundaries + + When $\rho \to 0$: there are almost no arrivals, so $L \to 0$, i.e., the server is nearly always idle. + + When $\rho \to 1$: the spare capacity is $(1-\rho) \to 0$, so $L \to \infty$, i.e., the queue grows without bound. + + Both limits match physical intuition. Note that the formula is exact (not an approximation) for an M/M/1 queues in steady state. + """) + return + + +if __name__ == "__main__": + app.run() diff --git a/queueing/03_littles_law.py b/queueing/03_littles_law.py new file mode 100644 index 0000000000000000000000000000000000000000..a7f1cfc4d682fd688a374aa1510d7da45514de60 --- /dev/null +++ b/queueing/03_littles_law.py @@ -0,0 +1,361 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "altair", +# "asimpy", +# "marimo", +# "polars==1.24.0", +# ] +# /// + +import marimo + +__generated_with = "0.20.4" +app = marimo.App(width="medium") + + +@app.cell(hide_code=True) +def _(): + import marimo as mo + import random + import statistics + import altair as alt + import polars as pl + from asimpy import Environment, Process, Resource + + return Environment, Process, Resource, alt, mo, pl, random, statistics + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + # Little's Law + + ## *The Universal Conservation Law of Queues* + + Little's Law states that in a stable system, L = Ξ»W, where: + + - L = mean number of customers in the system + - Ξ» = mean arrival rate + - W = mean time a customer spends in the system + + The law holds regardless of arrival or service distributions, number of + servers, or scheduling discipline. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Why It Is Surprising + + Little's Law holds without any assumptions about the distribution of arrival rates or service times. It does not matter whether arrivals are Poisson, deterministic, or correlated, whether service times are exponential, constant, or heavy-tailed, whether there is one server or a hundred, or what scheduling discipline is used (FIFO, LIFO, random, or priority). As long as the system is stable and stationary, $L = \lambda W$. This universality is remarkable because almost every other formula in queueing theory *does* depend on distributional assumptions. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Practical Use + + Because $L = \lambda W$ is universal, it can be used to measure hard-to-observe quantities from easy-to-observe ones. For example, the mean number of requests in a web server ($L$) and the observed request rate ($\lambda$) immediately give the mean response time ($W = L/\lambda$) without needing to instrument individual request latencies. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Proof Sketch + + Consider a flow diagram where time runs horizontally and each customer traces a horizontal line from arrival to departure. The area under all lines equals both: + + - $\sum_i W_i$ (sum of individual sojourn times), and + - $\int_0^T L(t)\,dt$ (integral of instantaneous queue length). + + Dividing both sides by $T$ and taking $T \to \infty$: + + $$\bar{L} = \lambda \bar{W}$$ + + The argument is purely combinatorial: no probability is needed. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Simulation Design + + The simulation verifies Little's Law across three configurations: + + - M/M/1 (Poisson arrivals, exponential service, one server) + - M/D/1 (Poisson arrivals, deterministic service, one server) + - M/M/3 (Poisson arrivals, exponential service, three servers) + + For each configuration, $L$ is measured two ways: by direct sampling of the + queue length, and by computing $\lambda W$ from observed throughput and mean + sojourn time. + + The scenarios below sweep arrival rate $\lambda$ over (0.5, 1.0, 1.5, 2.0, 2.5) + at server capacities 2, 3, and 4. The `error_%` column shows how closely + $L_{\text{Little}} = \lambda W$ matches the directly sampled $L_{\text{direct}}$. + """) + return + + +@app.cell +def _(): + SEED = 192 # random seed for reproducibility + SIM_TIME = 1000 # simulated time units per scenario + SAMPLE_INTERVAL = 1 # sim-time units between Monitor samples + SERVICE_RATE = 1.0 # exponential service rate (mu) for random service + + return SAMPLE_INTERVAL, SEED, SERVICE_RATE, SIM_TIME + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Random Processes + """) + return + + +@app.cell +def _(Process, SERVICE_RATE, random): + class RandomCustomer(Process): + def init(self, server, in_system, sojourn_times): + self.server = server + self.in_system = in_system + self.sojourn_times = sojourn_times + + async def run(self): + arrival = self.now + self.in_system[0] += 1 + async with self.server: + await self.timeout(random.expovariate(SERVICE_RATE)) + self.in_system[0] -= 1 + self.sojourn_times.append(self.now - arrival) + + return (RandomCustomer,) + + +@app.cell +def _(Process, RandomCustomer, random): + class RandomArrivals(Process): + def init(self, rate, server, in_system, sojourn_times): + self.rate = rate + self.server = server + self.in_system = in_system + self.sojourn_times = sojourn_times + + async def run(self): + while True: + await self.timeout(random.expovariate(self.rate)) + RandomCustomer(self._env, self.server, self.in_system, self.sojourn_times) + + return (RandomArrivals,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Deterministic Processes + """) + return + + +@app.cell +def _(Process): + DETERMINISTIC_SERVICE = 1.0 # fixed service time for M/D/1 scenarios + + class DeterministicCustomer(Process): + def init(self, server, in_system, sojourn_times): + self.server = server + self.in_system = in_system + self.sojourn_times = sojourn_times + + async def run(self): + arrival = self.now + self.in_system[0] += 1 + async with self.server: + await self.timeout(DETERMINISTIC_SERVICE) + self.in_system[0] -= 1 + self.sojourn_times.append(self.now - arrival) + + return DETERMINISTIC_SERVICE, DeterministicCustomer + + +@app.cell +def _(DeterministicCustomer, Process, random): + class DeterministicArrivals(Process): + def init(self, rate, server, in_system, sojourn_times): + self.rate = rate + self.server = server + self.in_system = in_system + self.sojourn_times = sojourn_times + + async def run(self): + while True: + await self.timeout(random.expovariate(self.rate)) + DeterministicCustomer(self._env, self.server, self.in_system, self.sojourn_times) + + return (DeterministicArrivals,) + + +@app.cell +def _(Process, SAMPLE_INTERVAL): + class Monitor(Process): + def init(self, in_system, samples): + self.in_system = in_system + self.samples = samples + + async def run(self): + while True: + self.samples.append(self.in_system[0]) + await self.timeout(SAMPLE_INTERVAL) + + return (Monitor,) + + +@app.cell +def _(Environment, Monitor, Resource, SIM_TIME, statistics): + def run_scenario(lam, capacity, arrivals_cls): + in_system = [0] + sojourns = [] + samples = [] + env = Environment() + server = Resource(env, capacity=capacity) + arrivals_cls(env, lam, server, in_system, sojourns) + Monitor(env, in_system, samples) + env.run(until=SIM_TIME) + L_direct = statistics.mean(samples) + W = statistics.mean(sojourns) + lam_obs = len(sojourns) / SIM_TIME + L_little = lam_obs * W + error = 100.0 * (L_little - L_direct) / L_direct + return { + "lambda": round(lam_obs, 3), + "capacity": capacity, + "W": round(W, 3), + "L_direct": round(L_direct, 3), + "L_little": round(L_little, 3), + "error_%": round(error, 2), + } + + return (run_scenario,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Verification: L = Ξ»W + """) + return + + +@app.cell +def _(RandomArrivals, SEED, pl, random, run_scenario): + random.seed(SEED) + rows = [] + for lam in (0.5, 1.0, 1.5, 2.0, 2.5): + for capacity in (2, 3, 4): + rows.append(run_scenario(lam, capacity, RandomArrivals)) + + df = pl.DataFrame(rows) + df + return (df,) + + +@app.cell +def _(alt, df, pl): + points = ( + alt.Chart(df) + .mark_point(size=100, filled=True) + .encode( + x=alt.X("L_direct:Q", title="L (direct sample)"), + y=alt.Y("L_little:Q", title="L = Ξ»W (Little's Law)"), + color=alt.Color("capacity:O", title="Capacity"), + tooltip=["lambda:Q", "capacity:O", "L_direct:Q", "L_little:Q", "error_%:Q"], + ) + ) + max_val = max(df["L_direct"].to_list()) * 1.1 + diagonal = ( + alt.Chart(pl.DataFrame({"x": [0.0, max_val], "y": [0.0, max_val]})) + .mark_line(color="gray", strokeDash=[4, 4]) + .encode(x="x:Q", y="y:Q") + ) + (diagonal + points).properties(title="Little's Law: Direct Sample vs. Ξ»W") + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## The Error Column + + The large error at $\lambda = 2.5$, capacity $= 2$ is a stability problem, not a simulation bug. + Little's Law only holds in steady state. For an M/M/c queue, steady state requires + that the arrival rate $\lambda$ be less than capacity times service rate $\mu$. + With `SERVICE_RATE = 1.0` and `capacity = 2`, + the maximum sustainable throughput is $2 \times 1.0 = 2.0$. + At $\lambda = 2.5$, the load exceeds service capacity, so the queue grows without bound. + By the time the simulation ends, + hundreds of customers are waiting in queue, and their sojourns are never recorded. + + ## Key Points + + 1. `Monitor` samples `in_system[0]` every `SAMPLE_INTERVAL` time units to + estimate $L$ directly without any queueing formula. + + 2. The `error_%` column shows that $L_{\text{direct}}$ and $\lambda W$ agree to within + less than 1% for all stable configurations, even though the service-time + distributions are completely different. + + 3. `DeterministicCustomer` uses the fixed `DETERMINISTIC_SERVICE` constant + rather than a random draw; everything else in the simulation is unchanged. + The law still holds. + + 4. `Resource(env, capacity=3)` creates a three-slot server for M/M/3. + Setting the arrival rate to 2.4 gives utilization 0.8 per server. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Understanding the Math + + ### The area argument made concrete + + Draw a horizontal time axis from $t = 0$ to $t = T$. Each customer gets a horizontal bar starting at their arrival time and ending at their departure time. The length of their bar is exactly their sojourn time $W_i$ β€” the total time they spend in the system. At any moment $t$, the number of bars that cross that vertical slice is exactly $L(t)$, the instantaneous number of customers in the system. + + Now compute the total area under all the bars in two different ways. First, add up the lengths of all the bars: total area $= \sum_i W_i$. Second, integrate the height of the stack over time: total area $= \int_0^T L(t)\,dt$. These are the same area, so $\sum_i W_i = \int_0^T L(t)\,dt$. + + Divide both sides by $T$. The right side becomes the time-average $\bar{L}$. The left side becomes $(n/T) \cdot \bar{W}$, where $n$ is the total number of customers and $\bar{W}$ is their mean sojourn time. As $T \to \infty$, $n/T \to \lambda$ (the long-run arrival rate). That gives $\bar{L} = \lambda \bar{W}$, which is Little's Law. + + ### No distribution required + + The argument above uses only geometry. There is no probability distribution, no exponential assumption, no Poisson process. The shape of each bar (i.e., how long each customer takes) can be anything. This is why the law applies to M/M/1, M/D/1, M/M/3, and every other configuration equally. + + ### Using it in practice + + Suppose you run a web service. Your monitoring dashboard shows $\lambda = 500$ requests per second and your server logs show a mean response time of $W = 20$ milliseconds. Little's Law immediately tells you that the mean number of active requests in the system is $L = \lambda W = 500 \times 0.02 = 10$ requests. Alternatively, if you observe $L$ and $\lambda$ but not individual response times, you get $W = L/\lambda$ without any per-request timing instrumentation. + + ### Units check + + $\lambda$ has units of customers per unit time; $W$ has units of time; so $L = \lambda W$ is dimensionless β€” a pure count of customers. Always verify units when applying Little's Law to a new problem: if your units do not cancel correctly, you have applied the law incorrectly. + + ### Stability condition + + Little's Law requires the system to reach steady state: over the long run, arrivals and departures must balance. If $\lambda > \mu$ (the arrival rate exceeds the service rate), the queue grows without bound. $L = \infty$ and $W = \infty$; the law still holds, but it tells you the system is broken, not that it is well-behaved. + """) + return + + +if __name__ == "__main__": + app.run() diff --git a/queueing/04_sojourn_time.py b/queueing/04_sojourn_time.py new file mode 100644 index 0000000000000000000000000000000000000000..98c1fd237028ef99583a78a86dcea060c780c079 --- /dev/null +++ b/queueing/04_sojourn_time.py @@ -0,0 +1,306 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "altair", +# "asimpy", +# "marimo", +# "polars==1.24.0", +# ] +# /// + +import marimo + +__generated_with = "0.20.4" +app = marimo.App(width="medium") + + +@app.cell +def _(): + import marimo as mo + import random + import statistics + + import altair as alt + import polars as pl + + from asimpy import Environment, Process, Resource + + return Environment, Process, Resource, alt, mo, pl, random, statistics + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + # Sojourn Time + + ## *How Long Does a Customer Actually Spend in the System?* + + The previous scenario measured $L$, the mean number of customers in the system at any moment. This scenario measures $W$, the mean time a single customer spends from arrival to departure. This is called the *sojourn time*, *residence time*, or *response time*, and has two components: + + - $W_q$: time spent waiting in the queue because the server is busy. + - $W_s$: time spent in service while the server is working on this customer. + + $$W = W_q + W_s$$ + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## The Surprising Finding + + For an M/M/1 queue, the mean sojourn time is: + + $$W = \frac{1}{\mu(1 - \rho)}$$ + + This blows up as $\rho \to 1$, just like $L$. But the split between waiting and service shifts dramatically as load increases. + + | $\rho$ | $W_q$ (wait) | $W_s$ (service) | $W$ (total) | + |:---:|:---:|:---:|:---:| + | 0.1 | 0.11 | 1.00 | 1.11 | + | 0.5 | 1.00 | 1.00 | 2.00 | + | 0.9 | 9.00 | 1.00 | 10.00 | + + The mean service time $W_s = 1/\mu = 1.0$ is constant: the server always takes the same average time to serve one customer. All the extra delay at high load is pure waiting: $W_q = \rho/(\mu(1-\rho))$ grows without bound while $W_s$ stays fixed. At $\rho = 0.9$, 90% of a customer's time is spent waiting for the server to become free. + + This formula is closely connected to Little's Law: + + $$L = \lambda W$$ + + Plugging in $W = 1/(\mu(1-\rho))$ and $\lambda = \rho\mu$: + + $$L = \rho\mu \cdot \frac{1}{\mu(1-\rho)} = \frac{\rho}{1-\rho}$$ + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Implementation + + `Customer` records its arrival time, then captures the exact moment it enters service (`service_start = self.now` inside the `async with self.server:` block, which only executes once the resource is acquired). The wait time is `service_start βˆ’ arrival` and the sojourn time is `departure βˆ’ arrival`. + + A `Monitor` samples the `in_system` counter periodically to estimate $L$ independently. The final dataframe reports $W_q$, $W_s$, $W$, the theoretical $W$, $L$ from sampling, and $L$ from Little's Law, allowing all three to be cross-checked. + """) + return + + +@app.cell +def _(mo): + sim_time_slider = mo.ui.slider( + start=0, + stop=100_000, + step=1_000, + value=20_000, + label="Simulation time", + ) + + service_rate_slider = mo.ui.slider( + start=1.0, + stop=5.0, + step=0.01, + value=1.0, + label="Service rate", + ) + + sample_interval_slider = mo.ui.slider( + start=1.0, + stop=5.0, + step=1.0, + value=1.0, + label="Sample interval", + ) + + seed_input = mo.ui.number( + value=192, + step=1, + label="Random seed", + ) + + run_button = mo.ui.button(label="Run simulation") + + mo.vstack([ + sim_time_slider, + service_rate_slider, + sample_interval_slider, + seed_input, + run_button, + ]) + return ( + sample_interval_slider, + seed_input, + service_rate_slider, + sim_time_slider, + ) + + +@app.cell +def _( + sample_interval_slider, + seed_input, + service_rate_slider, + sim_time_slider, +): + SIM_TIME = int(sim_time_slider.value) + SERVICE_RATE = float(service_rate_slider.value) + SAMPLE_INTERVAL = float(sample_interval_slider.value) + SEED = int(seed_input.value) + return SAMPLE_INTERVAL, SEED, SERVICE_RATE, SIM_TIME + + +@app.cell +def _(Process, SERVICE_RATE, random): + class Customer(Process): + def init(self, server, in_system, sojourn_times, wait_times): + self.server = server + self.in_system = in_system + self.sojourn_times = sojourn_times + self.wait_times = wait_times + + async def run(self): + arrival = self.now + self.in_system[0] += 1 + async with self.server: + service_start = self.now + await self.timeout(random.expovariate(SERVICE_RATE)) + self.in_system[0] -= 1 + self.sojourn_times.append(self.now - arrival) + self.wait_times.append(service_start - arrival) + + return (Customer,) + + +@app.cell +def _(Customer, Process, random): + class Arrivals(Process): + def init(self, rate, server, in_system, sojourn_times, wait_times): + self.rate = rate + self.server = server + self.in_system = in_system + self.sojourn_times = sojourn_times + self.wait_times = wait_times + + async def run(self): + while True: + await self.timeout(random.expovariate(self.rate)) + Customer(self._env, self.server, self.in_system, self.sojourn_times, self.wait_times) + + return (Arrivals,) + + +@app.cell +def _(Process, SAMPLE_INTERVAL): + class Monitor(Process): + def init(self, in_system, samples): + self.in_system = in_system + self.samples = samples + + async def run(self): + while True: + self.samples.append(self.in_system[0]) + await self.timeout(SAMPLE_INTERVAL) + + return (Monitor,) + + +@app.cell +def _( + Arrivals, + Environment, + Monitor, + Resource, + SERVICE_RATE, + SIM_TIME, + statistics, +): + def simulate(rho): + rate = rho * SERVICE_RATE + env = Environment() + server = Resource(env, capacity=1) + in_system = [0] + sojourn_times = [] + wait_times = [] + samples = [] + Arrivals(env, rate, server, in_system, sojourn_times, wait_times) + Monitor(env, in_system, samples) + env.run(until=SIM_TIME) + mean_W = statistics.mean(sojourn_times) + mean_Wq = statistics.mean(wait_times) + mean_Ws = mean_W - mean_Wq + mean_L = statistics.mean(samples) + lam = len(sojourn_times) / SIM_TIME + return { + "rho": rho, + "mean_Wq": round(mean_Wq, 4), + "mean_Ws": round(mean_Ws, 4), + "mean_W": round(mean_W, 4), + "theory_W": round(1.0 / (SERVICE_RATE * (1.0 - rho)), 4), + "L_sampled": round(mean_L, 4), + "L_little": round(lam * mean_W, 4), + } + + return (simulate,) + + +@app.cell +def _(SEED, pl, random, simulate): + random.seed(SEED) + df = pl.DataFrame([simulate(rho) for rho in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]]) + df + return (df,) + + +@app.cell +def _(alt, df): + df_plot = df.select(["rho", "mean_Wq", "mean_Ws"]).unpivot( + on=["mean_Wq", "mean_Ws"], + index="rho", + variable_name="component", + value_name="time", + ) + chart = ( + alt.Chart(df_plot) + .mark_area() + .encode( + x=alt.X("rho:Q", title="Utilization (ρ)"), + y=alt.Y("time:Q", title="Mean time", stack="zero"), + color=alt.Color("component:N", title="Component"), + tooltip=["rho:Q", "component:N", "time:Q"], + ) + .properties(title="Sojourn Time Components: Wq (waiting) + Ws (service) = W") + ) + chart + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Understanding the Math + + ### Why $W_s = 1/\mu$ regardless of $\rho$ + + Service time is drawn from an exponential distribution with rate $\mu$, so its mean is $1/\mu$. This is a property of the distribution, not of the queue. No matter how busy the server is, once it starts serving you it takes on average $1/\mu$ time. + + ### Deriving $W_q$ + + Since $W = W_q + W_s$ and $W_s = 1/\mu$: + + $$W_q = W - W_s = \frac{1}{\mu(1-\rho)} - \frac{1}{\mu} + = \frac{1}{\mu}\left(\frac{1}{1-\rho} - 1\right) + = \frac{1}{\mu} \cdot \frac{\rho}{1-\rho} + = \frac{\rho}{\mu(1-\rho)}$$ + + Note that $W_q = \rho \cdot W$: at high load, almost all of $W$ is waiting. + + ### Units check + + $\lambda$ has units of [customers/time]; $W$ has units of [time]; so $L = \lambda W$ has units of [customers/time $\times$ time] $=$ [customers]. This count of people is dimensionless, as it should be. Checking units this way is a quick sanity test whenever you apply Little's Law to a real problem. + """) + return + + +if __name__ == "__main__": + app.run() diff --git a/queueing/05_mm1_nonlinearity.py b/queueing/05_mm1_nonlinearity.py new file mode 100644 index 0000000000000000000000000000000000000000..87bf0ca7dcfd89330adb6e0d59d83ad58d185db1 --- /dev/null +++ b/queueing/05_mm1_nonlinearity.py @@ -0,0 +1,255 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "altair", +# "asimpy", +# "marimo", +# "polars==1.24.0", +# ] +# /// + +import marimo + +__generated_with = "0.20.4" +app = marimo.App(width="medium") + + +@app.cell +def _(): + import marimo as mo + import random + import statistics + + import altair as alt + import polars as pl + + from asimpy import Environment, Process, Resource + + return Environment, Process, Resource, alt, mo, pl, random, statistics + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + # M/M/1 Queue Nonlinearity + + ## *The 90% Utilization Trap* + + A single server handles jobs that arrive randomly and take a random amount of time to process. If both inter-arrival times and service times follow exponential distributions, this is called an *M/M/1 queue*, and is the simplest model in queueing theory. + + Managers often treat utilization linearly: "90% busy is only a little worse than 80% busy." The M/M/1 formula shows this intuition is badly wrong. The mean number of jobs in the system (waiting and being served) is: + + $$L = \frac{\rho}{1 - \rho}$$ + + where $\rho = \lambda / \mu$ is the utilization ratio (arrival rate divided by service rate). The mean time a job spends in the system follows from Little's Law $L = \lambda W$: + + $$W = \frac{1}{\mu - \lambda} = \frac{1}{\mu(1 - \rho)}$$ + + The denominator $(1 - \rho)$ causes both $L$ and $W$ to blow up as $\rho \to 1$. + + | $\rho$ | $L = \rho/(1-\rho)$ | Marginal $\Delta L$ per 0.1 step | + |-------:|--------------------:|--------------------------------:| + | 0.50 | 1.00 | β€” | + | 0.60 | 1.50 | +0.50 | + | 0.70 | 2.33 | +0.83 | + | 0.80 | 4.00 | +1.67 | + | 0.90 | 9.00 | +5.00 | + + Each equal step in $\rho$ produces a larger jump in queue length than the previous step. Going from 80% to 90% utilization adds more queue length than going from 0% to 80% combined. This happens because the queue is stabilized by the gaps in service capacity. When $\rho = 0.9$, only 10% of capacity is slack. Any random burst of arrivals takes far longer to drain than when $\rho = 0.5$ and 50% of capacity is slack. The system spends most of its time recovering from bursts rather than idling. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Implementation + + The simulation uses a single `Resource(capacity=1)` as the server. A generator process creates customers with inter-arrival gaps drawn from $\text{Exp}(\lambda)$. Each customer records its arrival time, waits to acquire the server, receives $\text{Exp}(\mu)$ service, and logs its total sojourn time. The mean queue length $L$ is computed via Little's Law from the observed mean sojourn time. + """) + return + + +@app.cell +def _(mo): + sim_time_slider = mo.ui.slider( + start=0, + stop=100_000, + step=1_000, + value=20_000, + label="Simulation time", + ) + + service_rate_slider = mo.ui.slider( + start=1.0, + stop=5.0, + step=0.01, + value=1.0, + label="Service rate", + ) + + seed_input = mo.ui.number( + value=192, + step=1, + label="Random seed", + ) + + run_button = mo.ui.button(label="Run simulation") + + mo.vstack([ + sim_time_slider, + service_rate_slider, + seed_input, + run_button, + ]) + return seed_input, service_rate_slider, sim_time_slider + + +@app.cell +def _(seed_input, service_rate_slider, sim_time_slider): + SIM_TIME = int(sim_time_slider.value) + SERVICE_RATE = float(service_rate_slider.value) + SEED = int(seed_input.value) + return SEED, SERVICE_RATE, SIM_TIME + + +@app.cell +def _(Process, random): + class Customer(Process): + def init(self, server, service_rate, sojourn_times): + self.server = server + self.service_rate = service_rate + self.sojourn_times = sojourn_times + + async def run(self): + arrival = self.now + async with self.server: + await self.timeout(random.expovariate(self.service_rate)) + self.sojourn_times.append(self.now - arrival) + + return (Customer,) + + +@app.cell +def _(Customer, Process, random): + class ArrivalStream(Process): + def init(self, arrival_rate, service_rate, server, sojourn_times): + self.arrival_rate = arrival_rate + self.service_rate = service_rate + self.server = server + self.sojourn_times = sojourn_times + + async def run(self): + while True: + await self.timeout(random.expovariate(self.arrival_rate)) + Customer(self._env, self.server, self.service_rate, self.sojourn_times) + + return (ArrivalStream,) + + +@app.cell +def _( + ArrivalStream, + Environment, + Resource, + SERVICE_RATE, + SIM_TIME, + statistics, +): + def simulate(rho): + arrival_rate = rho * SERVICE_RATE + sojourn_times = [] + env = Environment() + server = Resource(env, capacity=1) + ArrivalStream(env, arrival_rate, SERVICE_RATE, server, sojourn_times) + env.run(until=SIM_TIME) + mean_W = statistics.mean(sojourn_times) if sojourn_times else 0.0 + sim_L = arrival_rate * mean_W + theory_L = rho / (1.0 - rho) + return sim_L, theory_L + + return (simulate,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(""" + ## Simulated vs. Theoretical Queue Length + """) + return + + +@app.cell +def _(SEED, pl, random, simulate): + def sweep(): + random.seed(SEED) + rhos = [0.1, 0.2, 0.3, 0.5, 0.7, 0.8, 0.9, 0.95] + sweep_rows = [] + for rho in rhos: + sim_L, theory_L = simulate(rho) + pct = 100.0 * (sim_L - theory_L) / theory_L + sweep_rows.append({"rho": rho, "theory_L": theory_L, "sim_L": sim_L, "pct_error": pct}) + return pl.DataFrame(sweep_rows) + + df_sweep = sweep() + df_sweep + return (df_sweep,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(""" + ## Marginal Increase in L per 0.1 Step in ρ (Theory) + """) + return + + +@app.cell +def _(pl): + def marginal(): + marginal_rows = [] + prev_L, prev_rho = None, None + for rho in [0.5, 0.6, 0.7, 0.8, 0.9]: + theory_L = rho / (1.0 - rho) + if prev_L is not None: + marginal_rows.append({"rho_from": prev_rho, "rho_to": rho, "delta_L": round(theory_L - prev_L, 4)}) + prev_L, prev_rho = theory_L, rho + return pl.DataFrame(marginal_rows) + + df_marginal = marginal() + df_marginal + return + + +@app.cell +def _(alt, df_sweep): + df_plot = df_sweep.unpivot( + on=["theory_L", "sim_L"], index="rho", variable_name="source", value_name="L" + ) + chart = ( + alt.Chart(df_plot) + .mark_line(point=True) + .encode( + x=alt.X("rho:Q", title="Utilization (ρ)"), + y=alt.Y("L:Q", title="Mean queue length (L)"), + color=alt.Color("source:N", title="Source"), + tooltip=["rho:Q", "source:N", "L:Q"], + ) + .properties(title="M/M/1 Queue Length vs. Utilization") + ) + chart + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Key Takeaway + + For any system approximated by an M/M/1 queue, **never target utilization above 80–85%** if low latency matters. The last few percent of throughput come at an enormous cost in queue length and wait time. + """) + return + + +if __name__ == "__main__": + app.run() diff --git a/queueing/06_pooled_vs_separate.py b/queueing/06_pooled_vs_separate.py new file mode 100644 index 0000000000000000000000000000000000000000..826190a51b06319b6a9733fbb596f4c3cd5e06e1 --- /dev/null +++ b/queueing/06_pooled_vs_separate.py @@ -0,0 +1,287 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "altair", +# "asimpy", +# "marimo", +# "polars==1.24.0", +# ] +# /// + +import marimo + +__generated_with = "0.20.4" +app = marimo.App(width="medium") + + +@app.cell +def _(): + import marimo as mo + import random + import statistics + + import altair as alt + import polars as pl + + from asimpy import Environment, Process, Resource + + return Environment, Process, Resource, alt, mo, pl, random, statistics + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + # Pooled vs. Separate Queues + + ## *Why Airports Switched to Single Lines* + + A facility has two identical servers. Customers arrive as a Poisson process and each needs one server for an exponentially distributed service time. Which queueing discipline should the facility use? + + - Separate queues: each server has its own dedicated line; customers randomly pick a line on arrival and cannot switch. + - Pooled queue: a single shared line feeds whichever server becomes free first. + + It turns out that pooling the queues is always better, even though both systems have identical total arrival rate, identical per-server service rate, and identical utilization $\rho$. The pooled system consistently produces shorter mean wait times, often by a factor of two or more at moderate utilization. The reason is that separate queues waste servers' idle time. In separate queues, one server may be idle while customers wait in the other line. Pooling eliminates this mismatch: a free server always serves the next waiting customer. + + ### Why Separate Queues Persist + + Despite being provably worse, separate queues feel fairer because customers can see their progress. Single lines eliminate the anxiety of watching the other queue move faster, but historically customers resisted them until airlines and banks demonstrated the improvement empirically in the 1960s–70s. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Implementation + + This tutorial explores this finding using two simulations run with identical random seeds: + + 1. Pooled: `Resource(capacity=2)` with one arrival stream. The resource grants access to whichever capacity slot is free. + 2. Separate: two `Resource(capacity=1)` instances. Arrivals call `random.choice` to pick a server and cannot switch even if it is slower. + + The mean sojourn time is collected across a sweep of utilization levels $\rho$. + """) + return + + +@app.cell +def _(mo): + sim_time_slider = mo.ui.slider( + start=0, + stop=100_000, + step=1_000, + value=20_000, + label="Simulation time", + ) + + arrival_rate_slider = mo.ui.slider( + start=0.5, + stop=1.9, + step=0.05, + value=1.8, + label="Arrival rate", + ) + + seed_input = mo.ui.number( + value=192, + step=1, + label="Random seed", + ) + + run_button = mo.ui.button(label="Run simulation") + + mo.vstack([ + sim_time_slider, + arrival_rate_slider, + seed_input, + run_button, + ]) + return arrival_rate_slider, seed_input, sim_time_slider + + +@app.cell +def _(arrival_rate_slider, seed_input, sim_time_slider): + SIM_TIME = int(sim_time_slider.value) + ARRIVAL_RATE = float(arrival_rate_slider.value) + SEED = int(seed_input.value) + SERVICE_RATE = 1.0 + N_SERVERS = 2 + RHO = ARRIVAL_RATE / (N_SERVERS * SERVICE_RATE) + return ARRIVAL_RATE, N_SERVERS, RHO, SEED, SERVICE_RATE, SIM_TIME + + +@app.cell +def _(Process, SERVICE_RATE, random): + class Customer(Process): + def init(self, server, sojourn_times): + self.server = server + self.sojourn_times = sojourn_times + + async def run(self): + arrival = self.now + async with self.server: + await self.timeout(random.expovariate(SERVICE_RATE)) + self.sojourn_times.append(self.now - arrival) + + return (Customer,) + + +@app.cell +def _(Customer, Process, random): + class PooledArrivals(Process): + def init(self, arrival_rate, server, sojourn_times): + self.arrival_rate = arrival_rate + self.server = server + self.sojourn_times = sojourn_times + + async def run(self): + while True: + await self.timeout(random.expovariate(self.arrival_rate)) + Customer(self._env, self.server, self.sojourn_times) + + return (PooledArrivals,) + + +@app.cell +def _(Customer, Process, random): + class SeparateArrivals(Process): + def init(self, arrival_rate, servers, sojourn_times): + self.arrival_rate = arrival_rate + self.servers = servers + self.sojourn_times = sojourn_times + + async def run(self): + while True: + await self.timeout(random.expovariate(self.arrival_rate)) + server = random.choice(self.servers) + Customer(self._env, server, self.sojourn_times) + + return (SeparateArrivals,) + + +@app.cell +def _( + ARRIVAL_RATE, + Environment, + N_SERVERS, + PooledArrivals, + Resource, + SEED, + SIM_TIME, + random, + statistics, +): + def run_pooled(arrival_rate=ARRIVAL_RATE): + random.seed(SEED) + sojourn_times = [] + env = Environment() + shared_server = Resource(env, capacity=N_SERVERS) + PooledArrivals(env, arrival_rate, shared_server, sojourn_times) + env.run(until=SIM_TIME) + return statistics.mean(sojourn_times) + + return (run_pooled,) + + +@app.cell +def _( + ARRIVAL_RATE, + Environment, + N_SERVERS, + Resource, + SEED, + SIM_TIME, + SeparateArrivals, + random, + statistics, +): + def run_separate(arrival_rate=ARRIVAL_RATE): + random.seed(SEED) + sojourn_times = [] + env = Environment() + servers = [Resource(env, capacity=1) for _ in range(N_SERVERS)] + SeparateArrivals(env, arrival_rate, servers, sojourn_times) + env.run(until=SIM_TIME) + return statistics.mean(sojourn_times) + + return (run_separate,) + + +@app.cell +def _(ARRIVAL_RATE, N_SERVERS, SERVICE_RATE, pl, run_pooled, run_separate): + def sweep(): + sweep_rows = [] + for rho in [0.5, 0.6, 0.7, 0.8, 0.9]: + rate = rho * N_SERVERS * SERVICE_RATE + pw = run_pooled(arrival_rate=rate) + sw = run_separate(arrival_rate=rate) + sweep_rows.append({"rho": rho, "pooled_W": pw, "separate_W": sw, "ratio": sw / pw}) + return pl.DataFrame(sweep_rows) + + df_sweep = sweep() + pooled_W = run_pooled(arrival_rate=ARRIVAL_RATE) + separate_W = run_separate(arrival_rate=ARRIVAL_RATE) + return df_sweep, pooled_W, separate_W + + +@app.cell(hide_code=True) +def _(N_SERVERS, RHO, SERVICE_RATE, mo, pooled_W, separate_W): + mo.md(f""" + ## Results + + {N_SERVERS} servers, service rate {SERVICE_RATE}, utilisation ρ = {RHO:.2f} + + At ρ = {RHO:.2f}: pooled W = {pooled_W:.3f}, separate W = {separate_W:.3f} + β€” separate queues are **{separate_W / pooled_W:.2f}Γ—** slower + """) + return + + +@app.cell +def _(df_sweep): + df_sweep + return + + +@app.cell +def _(alt, df_sweep): + df_plot = df_sweep.unpivot( + on=["pooled_W", "separate_W"], index="rho", variable_name="system", value_name="W" + ) + chart = ( + alt.Chart(df_plot) + .mark_line(point=True) + .encode( + x=alt.X("rho:Q", title="Utilization per server (ρ)"), + y=alt.Y("W:Q", title="Mean sojourn time (W)"), + color=alt.Color("system:N", title="Queue type"), + tooltip=["rho:Q", "system:N", "W:Q"], + ) + .properties(title="Pooled vs. Separate Queues: Mean Sojourn Time") + ) + chart + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Understanding the Math + + ### Why pooling always wins + + Two separate M/M/1 queues each running at utilization $\rho$ have mean sojourn time $W_{\text{sep}} = 1/(\mu(1-\rho))$. A pooled M/M/2 queue with the same total arrival rate has strictly lower mean sojourn time for every value of $0 < \rho < 1$. The proof uses the [Erlang-C formula](https://en.wikipedia.org/wiki/Erlang_(unit)#Erlang_C_formula), but the intuition is simpler: pooling converts two independent random processes into one, and the combined queue can exploit any idle capacity instantly. At $\rho = 0.8$, separate queues give roughly twice the mean wait of a pooled queue. + + ### Connection to variance reduction + + Think of the service delivered in a time window by two separate servers as two independent random variables $X_1$ and $X_2$. Their average $(X_1 + X_2)/2$ has variance $\sigma^2/2$, which is half the variance of either component alone. Pooling achieves something similar: by combining demand into one stream served by both servers, the system smooths out random fluctuations. The pooled queue is, in effect, averaging over both servers' idle periods instead of locking each idle period to a single lane. + + ### Rule of thumb + + At $\rho = 0.8$, separate queues produce roughly double the mean wait of a pooled queue. This factor grows as $\rho$ increases, because the $(1-\rho)$ term in the denominator amplifies any wasted capacity. The lesson: whenever you can route demand flexibly to a shared resource, do it. + """) + return + + +if __name__ == "__main__": + app.run() diff --git a/queueing/07_late_merge.py b/queueing/07_late_merge.py new file mode 100644 index 0000000000000000000000000000000000000000..08b2f4c3b45ae78a4c4271e622b808d0c8802b95 --- /dev/null +++ b/queueing/07_late_merge.py @@ -0,0 +1,365 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "altair", +# "asimpy", +# "marimo", +# "polars==1.24.0", +# ] +# /// + +import marimo + +__generated_with = "0.20.4" +app = marimo.App(width="medium") + + +@app.cell +def _(): + import marimo as mo + import random + + import altair as alt + import polars as pl + + from asimpy import Environment, Event, Process, Queue + + return Environment, Event, Process, Queue, alt, mo, pl, random + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + # Late Merge + + ## *Courtesy Reduces Throughput* + + A two-lane road narrows to one lane at a construction zone. Drivers face a choice: + + - Early (courtesy) merge: upon seeing the "Lane Ends Ahead" sign, drivers immediately move from the closing lane into the open lane. + - Late (zipper) merge: drivers use both lanes all the way to the merge point, then alternate β€” one car from each lane in turn, like a zipper. + + It turns out that late merging produces higher throughput and shorter queues than early merging, even though it feels less polite. Early merging creates a single long queue that wastes the closing lane's capacity. Late merging fully utilises both lanes up to the bottleneck, then processes cars at the same rate with a zipper pattern. This result is not merely theoretical: the Minnesota Department of Transportation, the German ADAC, and the UK Highway Code all recommend late merging in slow-moving traffic precisely because it is provably more efficient. + + The primary benefit of late merging is higher throughput: more cars complete the merge per unit time. Mean sojourn time for individual cars may actually be slightly longer under late merge, because the larger total buffer admits more cars into the system, increasing average queue occupancy. By Little's Law $L = \lambda W$, if $\lambda$ grows faster than $L$ falls, $W$ rises. This is not a disadvantage: it means more drivers successfully pass through rather than being turned away. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Why Early Merging Hurts + + With early merging: + + - All $N$ cars queue in one lane of capacity $K$. + - When the single queue is full ($K$ cars), arriving cars are turned away (blocking), reducing throughput. + - The merge point processes at rate $\mu$ regardless, but there are fewer cars available to process (the second lane is empty and wasted). + + With late merging: + + - Cars distribute across two lanes, each of capacity $K$ (total $2K$). + - The merge point receives supply from both lanes, reducing starvation. + - Blocking occurs only when *both* lanes are simultaneously full, a rarer event. + + The key metric is the *blocking probability*: the fraction of arriving cars turned away because the pre-merge buffer is full. Let $\rho = \lambda/\mu$ be the utilisation of the merge bottleneck. For a finite-buffer M/M/1/K queue the blocking probability is: + + $$P_{\text{block}} = \frac{(1-\rho)\rho^K}{1 - \rho^{K+1}}$$ + + Early merge has buffer $K$; late merge effectively has buffer $2K$ (spread across two lanes). Since $P_{\text{block}}$ decreases exponentially in $K$, doubling the available buffer dramatically reduces blocking. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Implementation + + - **Early merge**: one `Queue(max_capacity=LANE_CAPACITY)` feeds a single `MergeServer`. Arrivals that find the lane full are counted as blocked and turned away. + - **Late merge**: two `Queue(max_capacity=LANE_CAPACITY)` instances feed a zipper `MergeServer` that alternates between lanes. Arrivals pick the shorter lane; a car is blocked only if its chosen lane is full. + + Both systems have the same total arrival rate $\lambda$ and merge service rate $\mu$. + """) + return + + +@app.cell +def _(mo): + sim_time_slider = mo.ui.slider( + start=0, + stop=100_000, + step=1_000, + value=20_000, + label="Simulation time", + ) + + lane_capacity_slider = mo.ui.slider( + start=2, + stop=30, + step=1, + value=10, + label="Lane capacity (K)", + ) + + seed_input = mo.ui.number( + value=192, + step=1, + label="Random seed", + ) + + run_button = mo.ui.button(label="Run simulation") + + mo.vstack([ + sim_time_slider, + lane_capacity_slider, + seed_input, + run_button, + ]) + return lane_capacity_slider, seed_input, sim_time_slider + + +@app.cell +def _(lane_capacity_slider, seed_input, sim_time_slider): + SIM_TIME = int(sim_time_slider.value) + LANE_CAPACITY = int(lane_capacity_slider.value) + SEED = int(seed_input.value) + ARRIVAL_RATE = 1.85 + MERGE_RATE = 2.0 + RHO = ARRIVAL_RATE / MERGE_RATE + return ARRIVAL_RATE, LANE_CAPACITY, MERGE_RATE, RHO, SEED, SIM_TIME + + +@app.cell +def _(Event, Process): + class EarlyMergeCar(Process): + def init(self, lane, sojourn_times, blocked): + self.lane = lane + self.sojourn_times = sojourn_times + self.blocked = blocked + + async def run(self): + arrival = self.now + if self.lane.is_full(): + self.blocked.append(1) + return + done = Event(self._env) + await self.lane.put((arrival, done)) + await done + self.sojourn_times.append(self.now - arrival) + + return (EarlyMergeCar,) + + +@app.cell +def _(Event, Process): + class LateMergeCar(Process): + def init(self, lane1, lane2, sojourn_times, blocked): + self.lane1 = lane1 + self.lane2 = lane2 + self.sojourn_times = sojourn_times + self.blocked = blocked + + async def run(self): + arrival = self.now + target = ( + self.lane1 + if len(self.lane1._items) <= len(self.lane2._items) + else self.lane2 + ) + if target.is_full(): + self.blocked.append(1) + return + done = Event(self._env) + await target.put((arrival, done)) + await done + self.sojourn_times.append(self.now - arrival) + + return (LateMergeCar,) + + +@app.cell +def _(MERGE_RATE, Process, random): + class MergeServer(Process): + def init(self, lanes, zipper): + self.lanes = lanes + self.zipper = zipper + self._turn = 0 + + async def run(self): + while True: + if self.zipper: + served = False + for _ in range(len(self.lanes)): + idx = self._turn % len(self.lanes) + self._turn += 1 + if not self.lanes[idx].is_empty(): + _, arrival, done = (self.now,) + (await self.lanes[idx].get()) + await self.timeout(random.expovariate(MERGE_RATE)) + done.succeed() + served = True + break + if not served: + await self.timeout(0.05) + else: + _, arrival, done = (self.now,) + (await self.lanes[0].get()) + await self.timeout(random.expovariate(MERGE_RATE)) + done.succeed() + + + return (MergeServer,) + + +@app.cell +def _(ARRIVAL_RATE, EarlyMergeCar, LateMergeCar, Process, random): + class ArrivalStream(Process): + def init(self, lanes, sojourn_times, blocked, zipper): + self.lanes = lanes + self.sojourn_times = sojourn_times + self.blocked = blocked + self.zipper = zipper + + async def run(self): + while True: + await self.timeout(random.expovariate(ARRIVAL_RATE)) + if self.zipper: + LateMergeCar(self._env, self.lanes[0], self.lanes[1], self.sojourn_times, self.blocked) + else: + EarlyMergeCar(self._env, self.lanes[0], self.sojourn_times, self.blocked) + + return (ArrivalStream,) + + +@app.cell +def _(ArrivalStream, Environment, MergeServer, Queue, SEED, SIM_TIME, random): + def run_scenario(zipper, k): + random.seed(SEED) + env = Environment() + sojourn_times = [] + blocked = [] + + if zipper: + lanes = [Queue(env, max_capacity=k), Queue(env, max_capacity=k)] + else: + lanes = [Queue(env, max_capacity=k)] + ArrivalStream(env, lanes, sojourn_times, blocked, zipper) + MergeServer(env, lanes, zipper) + env.run(until=SIM_TIME) + total = len(sojourn_times) + len(blocked) + blocked_pct = 100.0 * len(blocked) / total if total else 0.0 + throughput = len(sojourn_times) / SIM_TIME + mean_sojourn = sum(sojourn_times) / len(sojourn_times) if sojourn_times else 0.0 + return { + "throughput": throughput, + "blocked_pct": blocked_pct, + "mean_sojourn": mean_sojourn, + "total_buffer": k * (2 if zipper else 1), + } + + return (run_scenario,) + + +@app.cell(hide_code=True) +def _(ARRIVAL_RATE, MERGE_RATE, RHO, mo): + mo.md(f""" + ## Main Results + + Arrival rate: {ARRIVAL_RATE}/unit, merge service rate: {MERGE_RATE}/unit, + utilisation ρ = {RHO:.3f} + """) + return + + +@app.cell +def _(LANE_CAPACITY, pl, run_scenario): + early = run_scenario(zipper=False, k=LANE_CAPACITY) + late = run_scenario(zipper=True, k=LANE_CAPACITY) + df_main = pl.DataFrame([ + {"strategy": "early", **early}, + {"strategy": "late", **late}, + ]) + df_main + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(""" + ## Effect of Buffer Size on Blocking Rate + """) + return + + +@app.cell +def _(pl, run_scenario): + def sweep(): + sweep_rows = [] + for k in [5, 10, 15, 20, 30]: + ep = run_scenario(zipper=False, k=k)["blocked_pct"] + lp = run_scenario(zipper=True, k=k)["blocked_pct"] + sweep_rows.append({"buffer_k": k, "early_blocked_pct": ep, "late_blocked_pct": lp}) + return pl.DataFrame(sweep_rows) + + df_sweep = sweep() + df_sweep + return (df_sweep,) + + +@app.cell +def _(alt, df_sweep): + df_plot = df_sweep.unpivot( + on=["early_blocked_pct", "late_blocked_pct"], + index="buffer_k", + variable_name="strategy", + value_name="blocked_pct", + ) + chart = ( + alt.Chart(df_plot) + .mark_line(point=True) + .encode( + x=alt.X("buffer_k:Q", title="Buffer size per lane (K)"), + y=alt.Y("blocked_pct:Q", title="Blocked cars (%)"), + color=alt.Color("strategy:N", title="Merge strategy"), + tooltip=["buffer_k:Q", "strategy:N", "blocked_pct:Q"], + ) + .properties(title="Late Merge: Blocked Cars vs. Buffer Size") + ) + chart + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Understanding the Math + + ### The finite-buffer formula + + For a queue with random arrivals, exponential service, a single server, and a buffer that holds at most $K$ cars (the M/M/1/K model), the blocking probability is: + + $$P_{\text{block}} = \frac{(1-\rho)\,\rho^K}{1 - \rho^{K+1}}$$ + + As usual, $\rho = \lambda/\mu$ is the utilization. Notice that the numerator contains $\rho^K$. Because $\rho < 1$, increasing $K$ by 1 multiplies the numerator by $\rho < 1$, shrinking $P_{\text{block}}$ faster than linearly. Each extra slot in the buffer is more valuable than a simple linear reduction would suggest. + + ### Early vs. late merge in terms of $K$ + + Early merging creates a single queue with buffer $K$: one lane's worth of space. Late merging uses both lanes up to the merge point, creating an effective buffer of $2K$ cars total. Plugging $2K$ into the formula instead of $K$ replaces $\rho^K$ with $\rho^{2K} = (\rho^K)^2$. Since $\rho^K < 1$, squaring it makes it much smaller. This is the why doubling the buffer dramatically reduces blocking. + + ### Intuition about two lanes + + Here is another way to see it. Under late merge, both lanes must be simultaneously full for a car to be blocked. Suppose each individual lane is full with probability $p$. If the two lanes are roughly independent, the probability both are full at once is approximately $p^2$. For example, if $p = 0.3$, then $p^2 = 0.09$ β€” blocking drops from 30% to 9%. Two lanes are dramatically more forgiving than one. + + ### Connection to throughput + + Throughput is the rate at which cars successfully pass through the merge: $\text{throughput} = \lambda \cdot (1 - P_{\text{block}})$. Every blocked car is a car that does not get through. Reducing $P_{\text{block}}$ by doubling $K$ therefore raises throughput nearly proportionally. Late merge does not speed up the bottleneck (the merge point still processes cars at rate $\mu$) but it ensures the bottleneck is never starved of cars to process, maximizing the number of drivers who make it through. + + ### The broader lesson + + The key insight is that the *structure* of a waiting space matters, not just its total size. Two separate lanes of capacity $K$ each are far better than one lane of capacity $2K$ because blocking requires both lanes to fill simultaneously. This logic generalises widely: in computer networks, having multiple independent paths reduces the chance a single congested link stalls all traffic; in hospitals, pooling patients across several triage nurses reduces the chance one idle nurse sits beside an overwhelmed colleague. Wherever there is a finite buffer feeding a shared bottleneck, the late-merge principle applies: spread the waiting space across parallel channels and blocking probability falls dramatically. + """) + return + + +if __name__ == "__main__": + app.run() diff --git a/queueing/08_inspectors_paradox.py b/queueing/08_inspectors_paradox.py new file mode 100644 index 0000000000000000000000000000000000000000..bda767cf97108c1e62b50151422057ca1e8a4837 --- /dev/null +++ b/queueing/08_inspectors_paradox.py @@ -0,0 +1,293 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "altair", +# "asimpy", +# "marimo", +# "polars==1.24.0", +# ] +# /// + +import marimo + +__generated_with = "0.20.4" +app = marimo.App(width="medium") + + +@app.cell +def _(): + import marimo as mo + import random + import statistics + + import altair as alt + import polars as pl + + from asimpy import Environment, Process + + return Environment, Process, alt, mo, pl, random, statistics + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + # The Inspector's Paradox + + ## *Why the Bus Is Always Late* + + Buses arrive at a stop with some average headway (gap between buses) of $\mu$ minutes. A passenger arrives at a uniformly random time and waits for the next bus. How long do they wait? The naive answer is $\mu / 2$: on average you land in the middle of a gap. The correct answer is almost always longerβ€”sometimes much longer. + + The expected wait is not $\mu/2$ but: + + $$E[\text{wait}] = \frac{\mu}{2} + \frac{\sigma^2}{2\mu}$$ + + where $\sigma^2 = \text{Var}[\text{headway}]$. The second term is always non-negative, so higher variance always means longer expected waits, even when the mean headway is unchanged. + + ### Three Bus Schedules with Mean Headway $\mu = 10$ + + | Schedule | $\sigma^2$ | Predicted wait | Naive wait | + |-------------|-----------|----------------|-----------| + | Regular | 0 | 5.0 | 5.0 | + | Exponential | 100 | 10.0 | 5.0 | + | Clustered | 64 | 8.2 | 5.0 | + + For exponentially distributed headways, $\sigma^2 = \mu^2$, so: + + $$E[\text{wait}] = \frac{\mu}{2} + \frac{\mu^2}{2\mu} = \mu$$ + + A passenger waits on average for an *entire* mean headway β€” twice the naive expectation. + + ## Why This Happens: Length-Biased Sampling + + A passenger arriving at a random time is more likely to land inside a *long* gap than a short one, because long gaps occupy more time on the clock. This is called *length-biased sampling*. The interval containing your arrival is not a random headway: it is drawn from the length-biased distribution with density: + + $$f^*(h) = \frac{h \cdot f(h)}{\mu}$$ + + The mean of this biased distribution is $\mu + \sigma^2/\mu$, and you arrive uniformly within it, giving expected wait $(\mu + \sigma^2/\mu)/2$. + + The same phenomenon explains why the average class size experienced by a student exceeds the average class size reported by the university (large classes have more students to report them). + + ## Why "Inspector's Paradox"? + + The name comes from quality control, where an inspector arrives at a random time to sample a production process and systematically encounters longer-than-average intervals. The paradox is that a random observer is more likely to land inside a long gap than a short one, so their experienced mean interval exceeds the true mean interval. It feels paradoxical because you'd expect a random arrival to see the average gap, but length-biased sampling guarantees they see worse-than-average gaps whenever there's any variance at all. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Implementation + + A `BusService` process generates buses under three headway distributions (regular, exponential, clustered bimodal) and records their arrival times. After the simulation, passenger wait times are estimated by sampling $N$ uniformly random arrival times and finding the next bus for each, without needing explicit `Passenger` processes. + """) + return + + +@app.cell +def _(mo): + sim_time_slider = mo.ui.slider( + start=0, + stop=100_000, + step=1_000, + value=20_000, + label="Simulation time", + ) + + mean_headway_slider = mo.ui.slider( + start=5.0, + stop=30.0, + step=1.0, + value=10.0, + label="Mean headway", + ) + + seed_input = mo.ui.number( + value=192, + step=1, + label="Random seed", + ) + + run_button = mo.ui.button(label="Run simulation") + + mo.vstack([ + sim_time_slider, + mean_headway_slider, + seed_input, + run_button, + ]) + return mean_headway_slider, seed_input, sim_time_slider + + +@app.cell +def _(mean_headway_slider, seed_input, sim_time_slider): + SIM_TIME = int(sim_time_slider.value) + MEAN_HEADWAY = float(mean_headway_slider.value) + SEED = int(seed_input.value) + N_PASSENGERS = 20_000 + return MEAN_HEADWAY, N_PASSENGERS, SEED, SIM_TIME + + +@app.cell +def _(MEAN_HEADWAY, Process, random): + class BusService(Process): + def init(self, mode, bus_arrivals): + self.mode = mode + self.bus_arrivals = bus_arrivals + + async def run(self): + while True: + if self.mode == "regular": + headway = MEAN_HEADWAY + elif self.mode == "exponential": + headway = random.expovariate(1.0 / MEAN_HEADWAY) + elif self.mode == "clustered": + headway = MEAN_HEADWAY * 0.2 if random.random() < 0.5 else MEAN_HEADWAY * 1.8 + else: + raise ValueError(f"Unknown mode: {self.mode}") + await self.timeout(headway) + self.bus_arrivals.append(self.now) + + return (BusService,) + + +@app.cell +def _(BusService, Environment, SIM_TIME): + def collect_buses(mode): + bus_arrivals = [] + env = Environment() + BusService(env, mode, bus_arrivals) + env.run(until=SIM_TIME) + return bus_arrivals + + return (collect_buses,) + + +@app.cell +def _(N_PASSENGERS, random, statistics): + def expected_wait(bus_arrivals, n=N_PASSENGERS): + max_t = bus_arrivals[-1] + waits = [] + for _ in range(n): + t = random.uniform(0.0, max_t * 0.95) + for b in bus_arrivals: + if b > t: + waits.append(b - t) + break + return statistics.mean(waits) if waits else 0.0 + + return (expected_wait,) + + +@app.cell +def _(statistics): + def headway_variance(bus_arrivals): + headways = [b - a for a, b in zip(bus_arrivals, bus_arrivals[1:])] + return statistics.variance(headways) if len(headways) > 1 else 0.0 + + return (headway_variance,) + + +@app.cell(hide_code=True) +def _(MEAN_HEADWAY, mo): + mu = MEAN_HEADWAY + naive = MEAN_HEADWAY / 2.0 + var_exp = mu ** 2 + var_clustered = 0.5 * (mu * 0.2 - mu) ** 2 + 0.5 * (mu * 1.8 - mu) ** 2 + mo.md(f""" + ## Results + + Mean headway: {MEAN_HEADWAY} β†’ naive expected wait = {naive:.1f} + + - **Exponential** (Var β‰ˆ {var_exp:.1f}): predicted = {mu / 2 + var_exp / (2 * mu):.1f} (= full mean headway!) + - **Clustered** (Var β‰ˆ {var_clustered:.1f}): predicted = {mu / 2 + var_clustered / (2 * mu):.1f} + """) + return (naive,) + + +@app.cell +def _(MEAN_HEADWAY, collect_buses, expected_wait, headway_variance, naive, pl): + def run_models(): + rows = [] + for mode in ["regular", "exponential", "clustered"]: + buses = collect_buses(mode) + var_h = headway_variance(buses) + mean_w = expected_wait(buses) + rows.append({ + "mode": mode, + "var_headway": round(var_h, 4), + "mean_wait": round(mean_w, 4), + "predicted": round(MEAN_HEADWAY / 2.0 + var_h / (2.0 * MEAN_HEADWAY), 4), + "ratio": round(mean_w / naive, 4), + }) + return pl.DataFrame(rows) + + return (run_models,) + + +@app.cell +def _(SEED, random, run_models): + random.seed(SEED) + df = run_models() + df + return (df,) + + +@app.cell +def _(alt, df, naive, pl): + chart = ( + alt.Chart(df) + .mark_bar() + .encode( + x=alt.X("mode:N", title="Bus schedule type"), + y=alt.Y("mean_wait:Q", title="Mean passenger wait"), + color=alt.Color("mode:N", legend=None), + tooltip=["mode:N", "mean_wait:Q", "ratio:Q"], + ) + .properties(title="Inspector's Paradox: Mean Wait by Schedule Type") + ) + naive_line = ( + alt.Chart(pl.DataFrame({"naive": [naive]})) + .mark_rule(strokeDash=[4, 4], color="gray") + .encode(y="naive:Q") + ) + (chart + naive_line) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Understanding the Math + + ### Length-biased sampling + + Suppose buses run on an irregular schedule where gaps between buses are either 2 minutes or 18 minutes, each with probability 1/2. The mean gap is $\mu = (2 + 18)/2 = 10$ minutes. Now ask: if you arrive at a completely random moment, which gap are you most likely to land inside? + + A 2-minute gap occupies only 2 minutes on the clock, but an 18-minute gap occupies 18. Out of every 20 minutes of clock time on average, 2 minutes belong to a short gap and 18 to a long one. So a random arrival lands in a short gap with probability $2/(2+18) = 1/10$ and in a long gap with probability $18/20 = 9/10$. The expected gap length you experience is: + + $$E[\text{gap experienced}] = \frac{1}{10} \cdot 2 + \frac{9}{10} \cdot 18 = 0.2 + 16.2 = 16.4 \text{ minutes}$$ + + That is far above the mean gap of 10 minutes. You are disproportionately likely to land inside a long gap simply because it takes up more time. + + ### The wait formula + + Once you are inside a gap, you arrive uniformly within it, so on average you land in the middle. Your expected wait is half the gap length you experience. The full formula is: + + $$E[\text{wait}] = \frac{\mu}{2} + \frac{\sigma^2}{2\mu}$$ + + Here $\mu$ is the mean gap and $\sigma^2 = \text{Var}[\text{gap}]$ is the variance of gap lengths. The first term, $\mu/2$, is what you would get if every gap were exactly $\mu$ (deterministic buses β€” arrive in the middle every time). The second term, $\sigma^2/(2\mu)$, is the extra waiting from length-biased sampling. It is always non-negative, so irregular buses always make you wait longer than regular buses with the same mean headway. + + ### Why variance matters + + The variance $\sigma^2$ measures how spread out the gap sizes are. A perfectly regular bus schedule has $\sigma^2 = 0$ and gives the naive answer $\mu/2$. An exponentially distributed schedule has $\sigma^2 = \mu^2$, which doubles the expected wait to $\mu$. More irregular buses, higher penalty. + + ### Connecting to expected values + + The formula arises from a standard result: the expected length of the gap containing a random arrival is $\mu + \sigma^2/\mu$. You can think of this as the mean gap plus a correction term proportional to the variance divided by the mean. Dividing by 2 (uniform arrival within the gap) gives the wait formula above. + """) + return + + +if __name__ == "__main__": + app.run() diff --git a/queueing/09_convoy_effect.py b/queueing/09_convoy_effect.py new file mode 100644 index 0000000000000000000000000000000000000000..d3f6d28cd7ee8fdf0e5bd7d646245e97cc78f018 --- /dev/null +++ b/queueing/09_convoy_effect.py @@ -0,0 +1,302 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "altair", +# "asimpy", +# "marimo", +# "polars==1.24.0", +# ] +# /// + +import marimo + +__generated_with = "0.20.4" +app = marimo.App(width="medium") + + +@app.cell +def _(): + import marimo as mo + import random + import statistics + + import altair as alt + import polars as pl + + from asimpy import Environment, Process, Queue, PriorityQueue + + return Environment, Process, Queue, PriorityQueue, alt, mo, pl, random, statistics + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + # The Convoy Effect + + ## *One Slow Job Ruins Everyone's Day* + + A single server processes jobs that arrive randomly (Poisson process). Most jobs are quick (exponential service with small mean), but a rare few are very slow (exponential service with large mean). This *hyperexponential* service distribution has high variance. This tutorial compares the performance of two scheduling disciplines in this situation: + + - FIFO (First In, First Out): jobs are served in the order they arrive. + - SJF (Shortest Job First): the server always picks the shortest queued job next. + + The surprising result is that SJF dramatically outperforms FIFO: not just for the small jobs that directly benefit from skipping ahead, but also for mean sojourn time across *all* jobs. The improvement is most visible at the tail (95th and 99th percentiles) because FIFO creates a *convoy effect*: one long job blocks many short jobs behind it, inflating everyone's wait. + + ### The Convoy Metaphor + + Picture a one-lane road with one slow truck and many fast cars. Every car behind the truck must drive at truck speed; no overtaking allowed. The truck is the long job; the cars are the short jobs stuck behind it in FIFO order. SJF is like a passing lane: fast cars jump ahead of the truck and reach their destination much sooner. The truck itself arrives at the same time either way, but the total delay experienced by all vehicles plummets. + + ### Why FIFO Hurts with High Variance + + In FIFO, the server's current job is chosen at arrival time, not at decision time. When a slow job begins service, every subsequent arrival must join the queue and wait. The expected excess work in service (the remaining time of the current job, seen by an arriving customer) under FIFO is: + + $$W_{\text{FIFO}} = \frac{\lambda \overline{s^2}}{2(1-\rho)} + \frac{1}{\mu}$$ + + where $\overline{s^2}$ is the second moment of service time. High variance inflates $\overline{s^2}$ without changing $\rho$, directly worsening wait time. + + ### SJF Minimises Mean Sojourn Time + + For a single server with non-preemptive SJF and any service-time distribution, the mean sojourn time is given by the formula below (which is discussed in "Understanding the Math" at the end of this lesson): + + $$W_{\text{SJF}} = \frac{1}{\mu} + \frac{\lambda \overline{s^2}}{2(1-\rho)}$$ + + SJF achieves this minimum because short jobs that would otherwise be blocked by a long job are promoted ahead, reducing the total waiting work in the system. + + ## Practical Relevance + + Operating system CPU schedulers use time-quanta and priority aging to approximate SJF without knowing job sizes in advance. Database query planners estimate query cost and reorder execution to minimize blocking. The phenomenon reappears as *head-of-line blocking* in HTTP/1.1 (one slow response stalls a connection), motivating HTTP/2 multiplexing and HTTP/3's QUIC stream independence. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Implementation + + Jobs are placed in a `PriorityQueue` for SJF (tupled as `(service_time, job_id)` so shorter jobs sort earlier) or a plain `Queue()` for FIFO (tupled as `(job_id, service_time)` to preserve arrival order). The same hyperexponential service-time generator (90% short, 10% long) is used in both runs. + """) + return + + +@app.cell +def _(mo): + sim_time_slider = mo.ui.slider( + start=0, + stop=100_000, + step=1_000, + value=20_000, + label="Simulation time", + ) + + arrival_rate_slider = mo.ui.slider( + start=0.1, + stop=1.5, + step=0.05, + value=0.7, + label="Arrival rate", + ) + + seed_input = mo.ui.number( + value=192, + step=1, + label="Random seed", + ) + + run_button = mo.ui.button(label="Run simulation") + + mo.vstack([ + sim_time_slider, + arrival_rate_slider, + seed_input, + run_button, + ]) + return arrival_rate_slider, seed_input, sim_time_slider + + +@app.cell +def _(arrival_rate_slider, seed_input, sim_time_slider): + SIM_TIME = int(sim_time_slider.value) + ARRIVAL_RATE = float(arrival_rate_slider.value) + SEED = int(seed_input.value) + SHORT_RATE = 4.0 + LONG_RATE = 0.2 + LONG_PROB = 0.10 + return ARRIVAL_RATE, LONG_PROB, LONG_RATE, SEED, SHORT_RATE, SIM_TIME + + +@app.cell +def _(LONG_PROB, LONG_RATE, SHORT_RATE, random): + def service_time(): + if random.random() < LONG_PROB: + return random.expovariate(LONG_RATE) + return random.expovariate(SHORT_RATE) + + return (service_time,) + + +@app.cell +def _(ARRIVAL_RATE, Process, random, service_time): + class JobSource(Process): + def init(self, job_queue, arrivals, sjf): + self.job_queue = job_queue + self.arrivals = arrivals + self.sjf = sjf + self._jid = 0 + + async def run(self): + while True: + await self.timeout(random.expovariate(ARRIVAL_RATE)) + jid = self._jid + self._jid += 1 + svc = service_time() + self.arrivals[jid] = (self.now, svc) + if self.sjf: + await self.job_queue.put((svc, jid)) + else: + await self.job_queue.put((jid, svc)) + + return (JobSource,) + + +@app.cell +def _(Process): + class Server(Process): + def init(self, job_queue, arrivals, sojourn_times, sjf): + self.job_queue = job_queue + self.arrivals = arrivals + self.sojourn_times = sojourn_times + self.sjf = sjf + + async def run(self): + while True: + item = await self.job_queue.get() + if self.sjf: + svc, jid = item + else: + jid, svc = item + await self.timeout(svc) + arrival_time, _ = self.arrivals[jid] + self.sojourn_times.append(self.now - arrival_time) + + return (Server,) + + +@app.cell +def _(Environment, JobSource, Queue, PriorityQueue, SIM_TIME, Server, statistics): + def simulate(sjf): + arrivals = {} + sojourn_times = [] + env = Environment() + q = PriorityQueue(env) if sjf else Queue(env) + JobSource(env, q, arrivals, sjf) + Server(env, q, arrivals, sojourn_times, sjf) + env.run(until=SIM_TIME) + return { + "mean": statistics.mean(sojourn_times), + "median": statistics.median(sojourn_times), + "p95": sorted(sojourn_times)[int(0.95 * len(sojourn_times))], + "p99": sorted(sojourn_times)[int(0.99 * len(sojourn_times))], + "n": len(sojourn_times), + } + + return (simulate,) + + +@app.cell +def _(LONG_PROB, LONG_RATE, SEED, SHORT_RATE, pl, random, simulate): + def run_scenarios(): + fifo = simulate(sjf=False) + sjf_res = simulate(sjf=True) + rows = [ + { + "metric": m, + "fifo": fifo[m], + "sjf": sjf_res[m], + "improvement": fifo[m] / sjf_res[m], + } + for m in ("mean", "median", "p95", "p99") + ] + return pl.DataFrame(rows) + + random.seed(SEED) + df = run_scenarios() + mean_svc = (1 - LONG_PROB) / SHORT_RATE + LONG_PROB / LONG_RATE + return df, mean_svc + + +@app.cell(hide_code=True) +def _(ARRIVAL_RATE, LONG_PROB, LONG_RATE, SHORT_RATE, mean_svc, mo): + mo.md(f""" + ## Summary Statistics + + Arrival rate: {ARRIVAL_RATE}, estimated mean service: {mean_svc:.3f} + + Short jobs: {100 * (1 - LONG_PROB):.0f}% (mean {1 / SHORT_RATE:.2f}), + Long jobs: {100 * LONG_PROB:.0f}% (mean {1 / LONG_RATE:.1f}) + + > **Note:** SJF is optimal for mean sojourn time but requires knowing job sizes in advance. + """) + return + + +@app.cell +def _(df): + df + return + + +@app.cell +def _(alt, df, pl): + df_plot = df.filter(pl.col("metric") != "n").unpivot( + on=["fifo", "sjf"], + index="metric", + variable_name="policy", + value_name="sojourn_time", + ) + chart = ( + alt.Chart(df_plot) + .mark_bar() + .encode( + x=alt.X("metric:N", title="Metric"), + y=alt.Y("sojourn_time:Q", title="Sojourn time"), + color=alt.Color("policy:N", title="Policy"), + xOffset="policy:N", + tooltip=["metric:N", "policy:N", "sojourn_time:Q"], + ) + .properties(title="Convoy Effect: FIFO vs. Shortest Job First") + ) + chart + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Understanding the Math + + ### The second moment + + For a random variable $S$ representing service time, the second moment is $E[S^2]$. Recall from your statistics course that variance is $\text{Var}(S) = E[S^2] - (E[S])^2$, which rearranges to: + + $$E[S^2] = \text{Var}(S) + (E[S])^2$$ + + This means high variance inflates $E[S^2]$ even if the mean $E[S]$ stays fixed. Doubling the spread of service times can quadruple $E[S^2]$, even with the same average service time. + + ### Why variance of service time hurts + + Imagine a FIFO server handling jobs that are either 0.1 minutes or 10 minutes long, with 90% being short and 10% being long. The mean service time is $0.9 \times 0.1 + 0.1 \times 10 = 1.09$ minutes, so utilization $\rho = \lambda / \mu$ might be modest. But when a 10-minute job starts, every job arriving during those 10 minutes must join the queue and wait. The longer $E[S^2]$, the more average work sits ahead of each arriving job. + + ### The Pollaczek–Khinchine formula + + The mean time a job spends waiting (not counting its own service time) in a FIFO single-server queue is: + + $$W_q = \frac{\lambda \cdot E[S^2]}{2(1 - \rho)}$$ + + Here $\lambda$ is the arrival rate, $E[S^2]$ is the second moment of service time, and $\rho = \lambda \cdot E[S]$ is the server utilization. Both $\lambda$ and $E[S^2]$ appear in the numerator, so more variance means more waiting even at the same $\rho$. The $(1-\rho)$ denominator is the familiar blow-up term from M/M/1. + """) + return + + +if __name__ == "__main__": + app.run() diff --git a/queueing/10_priority_starvation.py b/queueing/10_priority_starvation.py new file mode 100644 index 0000000000000000000000000000000000000000..5d093d0d135ca3528be5fe218c77b6d61dca2697 --- /dev/null +++ b/queueing/10_priority_starvation.py @@ -0,0 +1,454 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "altair", +# "asimpy", +# "marimo", +# "polars==1.24.0", +# ] +# /// + +import marimo + +__generated_with = "0.20.4" +app = marimo.App(width="medium") + + +@app.cell +def _(): + import marimo as mo + import random + import statistics + + import altair as alt + import polars as pl + + from asimpy import Environment, Process, Queue + + return Environment, Process, Queue, alt, mo, pl, random, statistics + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + # Priority Starvation + + ## *When High-Priority Traffic Crowds Out Low-Priority Jobs* + + A single server processes two job classes: + + - High-priority jobs (class H) arrive frequently and are served quickly. + - Low-priority jobs (class L) arrive rarely and take longer to serve. + + The server always picks the highest-priority job available. Total server utilization $\rho = \rho_H + \rho_L < 1$, so the server has spare capacity on average. Yet low-priority jobs can wait far longer than the utilization level suggests they should. + + ### Static Priority: Starvation at Moderate Load + + With a static priority queue, high-priority jobs *never* yield to low-priority ones. Even when $\rho_H < 1$, high-priority bursts can lock out low-priority jobs for extended periods. The mean wait for low-priority jobs under a static non-preemptive priority queue is: + + $$W_L = \frac{\overline{s}_L}{1-\rho_H} \cdot \frac{1}{1-\rho_H-\rho_L}$$ + + This diverges as $\rho_H \to 1$ independently of $\rho_L$. As $\rho_H$ approaches 100%, low-priority jobs wait arbitrarily long, even if only a few low-priority jobs ever arrive. + + ### Aging: Solving Starvation Creates Oscillation + + The standard remedy for starvation is *priority aging*: a waiting job's priority improves over time until it eventually beats even high-priority arrivals. This guarantees finite wait for all jobs. + + However, aging introduces a new pathology. When aged low-priority jobs finally burst through, they occupy the server and leave a backlog of high-priority jobs waiting. The high-priority queue then drains, and the cycle repeats β€” producing oscillating bursts rather than smooth, uniform service. + + ### Intuition + + Suppose H jobs arrive in random bursts. During a burst, the server never pauses for L jobs. An L job unlucky enough to arrive at the start of a long burst must wait for every H job in that burst to be served before getting its turn. As bursts grow more frequent (larger $\rho_H$), the expected burst length grows, and with it the expected wait for that unlucky L job. The math confirms: starvation is a real risk at moderate $\rho_H$, not just at extreme loads. + + ### What aging does + + Aging assigns each waiting L job a maximum patience time $T_{\max}$. After waiting $T_{\max}$, the job is promoted to high priority. This caps the worst-case wait: no L job can wait longer than $T_{\max}$ plus one service time. Mathematically, the effective $W_L$ is bounded by $T_{\max} + 1/\mu_L$. + + ## Practical Implications + + Priority queues appear throughout computing: + + - OS scheduling: interactive processes (high priority) vs. batch jobs (low priority). Linux uses dynamic priority aging (nice values + sleep bonuses) to avoid starvation. + - Network QoS: real-time traffic (VoIP, video) vs. bulk data. Traffic shaping with Deficit Round Robin (DRR) or Weighted Fair Queuing (WFQ) guarantees bandwidth shares without starvation. + - Database query planning: short OLTP queries vs. long OLAP queries. Resource groups and query timeouts implement a form of aging. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Implementation + + Two runs are compared: + + 1. Static priority: H jobs are inserted as `(0, ...)` and L jobs as `(1, ...)` into a `Queue(priority=True)`. The server always picks the smallest key first, so H jobs are always served before L jobs. + 2. Aging: an `Ager` process wakes up every `AGING_INTERVAL` time units, inspects waiting L jobs, and promotes sufficiently old ones by reducing their priority key until it falls below the H threshold and they move into the server's feed queue. + """) + return + + +@app.cell +def _(mo): + sim_time_slider = mo.ui.slider( + start=0, + stop=100_000, + step=1_000, + value=20_000, + label="Simulation time", + ) + + aging_threshold_slider = mo.ui.slider( + start=1.0, + stop=60.0, + step=1.0, + value=15.0, + label="Aging threshold", + ) + + seed_input = mo.ui.number( + value=192, + step=1, + label="Random seed", + ) + + run_button = mo.ui.button(label="Run simulation") + + mo.vstack([ + sim_time_slider, + aging_threshold_slider, + seed_input, + run_button, + ]) + return aging_threshold_slider, seed_input, sim_time_slider + + +@app.cell +def _(aging_threshold_slider, seed_input, sim_time_slider): + SIM_TIME = int(sim_time_slider.value) + AGING_THRESHOLD = float(aging_threshold_slider.value) + SEED = int(seed_input.value) + SERVICE_RATE_HI = 2.0 + SERVICE_RATE_LO = 1.0 + ARRIVAL_RATE_LO = 0.2 + return ( + AGING_THRESHOLD, + ARRIVAL_RATE_LO, + SEED, + SERVICE_RATE_HI, + SERVICE_RATE_LO, + SIM_TIME, + ) + + +@app.cell +def _(Process): + class StaticPriorityServer(Process): + def init(self, hi_q, lo_q, sojourn_hi, sojourn_lo): + self.hi_q = hi_q + self.lo_q = lo_q + self.sojourn_hi = sojourn_hi + self.sojourn_lo = sojourn_lo + + async def _serve(self, arrival, svc, record): + await self.timeout(svc) + record.append(self.now - arrival) + + async def run(self): + while True: + if not self.hi_q.is_empty(): + arrival, svc = await self.hi_q.get() + await self._serve(arrival, svc, self.sojourn_hi) + elif not self.lo_q.is_empty(): + arrival, svc = await self.lo_q.get() + await self._serve(arrival, svc, self.sojourn_lo) + else: + await self.timeout(0.01) + + return (StaticPriorityServer,) + + +@app.cell +def _(AGING_THRESHOLD, Process): + class AgingServer(Process): + def init(self, hi_q, lo_q, sojourn_hi, sojourn_lo): + self.hi_q = hi_q + self.lo_q = lo_q + self.sojourn_hi = sojourn_hi + self.sojourn_lo = sojourn_lo + + async def run(self): + while True: + lo_aged = ( + not self.lo_q.is_empty() + and self.now - self.lo_q._items[0][0] >= AGING_THRESHOLD + ) + if lo_aged: + arrival, svc = await self.lo_q.get() + await self.timeout(svc) + self.sojourn_lo.append(self.now - arrival) + elif not self.hi_q.is_empty(): + arrival, svc = await self.hi_q.get() + await self.timeout(svc) + self.sojourn_hi.append(self.now - arrival) + elif not self.lo_q.is_empty(): + arrival, svc = await self.lo_q.get() + await self.timeout(svc) + self.sojourn_lo.append(self.now - arrival) + else: + await self.timeout(0.01) + + return (AgingServer,) + + +@app.cell +def _(Process, SERVICE_RATE_HI, random): + class HiSource(Process): + def init(self, rate, q): + self.rate = rate + self.q = q + + async def run(self): + while True: + await self.timeout(random.expovariate(self.rate)) + svc = random.expovariate(SERVICE_RATE_HI) + await self.q.put((self.now, svc)) + + return (HiSource,) + + +@app.cell +def _(ARRIVAL_RATE_LO, Process, SERVICE_RATE_LO, random): + class LoSource(Process): + def init(self, q): + self.q = q + + async def run(self): + while True: + await self.timeout(random.expovariate(ARRIVAL_RATE_LO)) + svc = random.expovariate(SERVICE_RATE_LO) + await self.q.put((self.now, svc)) + + return (LoSource,) + + +@app.cell +def _( + AgingServer, + Environment, + HiSource, + LoSource, + Queue, + SIM_TIME, + StaticPriorityServer, + statistics, +): + def simulate(arrival_rate_hi, use_aging): + env = Environment() + hi_q = Queue(env) + lo_q = Queue(env) + sojourn_hi = [] + sojourn_lo = [] + HiSource(env, arrival_rate_hi, hi_q) + LoSource(env, lo_q) + if use_aging: + AgingServer(env, hi_q, lo_q, sojourn_hi, sojourn_lo) + else: + StaticPriorityServer(env, hi_q, lo_q, sojourn_hi, sojourn_lo) + env.run(until=SIM_TIME) + return sojourn_hi, sojourn_lo + + def mean_or_none(lst): + return statistics.mean(lst) if lst else None + + def pct_or_none(lst, p): + if not lst: + return None + return sorted(lst)[int(p * len(lst))] + + return mean_or_none, pct_or_none, simulate + + +@app.cell +def _( + ARRIVAL_RATE_LO, + SEED, + SERVICE_RATE_HI, + SERVICE_RATE_LO, + mean_or_none, + pl, + random, + simulate, +): + def sweep(): + sweep_rows = [] + for rho_hi in [0.10, 0.20, 0.40, 0.60, 0.70, 0.80]: + rate_hi = rho_hi * SERVICE_RATE_HI + hi, lo = simulate(rate_hi, use_aging=False) + rho_total = rho_hi + ARRIVAL_RATE_LO / SERVICE_RATE_LO + sweep_rows.append({ + "rho_hi": rho_hi, + "rho_total": rho_total, + "mean_W_hi": mean_or_none(hi), + "mean_W_lo": mean_or_none(lo), + }) + return pl.DataFrame(sweep_rows) + + random.seed(SEED) + df_sweep = sweep() + return (df_sweep,) + + +@app.cell +def _( + ARRIVAL_RATE_LO, + SERVICE_RATE_HI, + SERVICE_RATE_LO, + mean_or_none, + pct_or_none, + pl, + simulate, +): + FIXED_RHO_HI = 0.70 + rho_total = FIXED_RHO_HI + ARRIVAL_RATE_LO / SERVICE_RATE_LO + def compare(): + rate_hi = FIXED_RHO_HI * SERVICE_RATE_HI + hi_static, lo_static = simulate(rate_hi, use_aging=False) + hi_aging, lo_aging = simulate(rate_hi, use_aging=True) + + compare_rows = [ + { + "policy": "static", "class": "hi", "n": len(hi_static), + "mean_W": mean_or_none(hi_static), + "p95": pct_or_none(hi_static, 0.95), + "p99": pct_or_none(hi_static, 0.99), + }, + { + "policy": "static", "class": "lo", "n": len(lo_static), + "mean_W": mean_or_none(lo_static), + "p95": pct_or_none(lo_static, 0.95), + "p99": pct_or_none(lo_static, 0.99), + }, + { + "policy": "aging", "class": "hi", "n": len(hi_aging), + "mean_W": mean_or_none(hi_aging), + "p95": pct_or_none(hi_aging, 0.95), + "p99": pct_or_none(hi_aging, 0.99), + }, + { + "policy": "aging", "class": "lo", "n": len(lo_aging), + "mean_W": mean_or_none(lo_aging), + "p95": pct_or_none(lo_aging, 0.95), + "p99": pct_or_none(lo_aging, 0.99), + }, + ] + return pl.DataFrame(compare_rows) + + df_compare = compare() + return (df_compare, FIXED_RHO_HI, rho_total,) + + +@app.cell(hide_code=True) +def _(AGING_THRESHOLD, ARRIVAL_RATE_LO, SERVICE_RATE_LO, mo): + mo.md(f""" + ## Part 1 β€” Static Priority: Effect of Hi-Priority Load on Lo-Priority Wait + + Lo-priority: arrival rate {ARRIVAL_RATE_LO}, mean service {1 / SERVICE_RATE_LO:.1f}, + ρ_lo = {ARRIVAL_RATE_LO / SERVICE_RATE_LO:.2f} + + Aging threshold: {AGING_THRESHOLD} time units + """) + return + + +@app.cell +def _(df_sweep): + df_sweep + return + + +@app.cell(hide_code=True) +def _(FIXED_RHO_HI, mo, rho_total): + mo.md(f""" + ## Part 2 β€” Static vs. Aging at ρ_hi = {FIXED_RHO_HI:.2f}, ρ_total = {rho_total:.2f} + """) + return + + +@app.cell +def _(df_compare): + df_compare + return + + +@app.cell +def _(alt, df_compare, df_sweep): + df_plot = df_sweep.unpivot( + on=["mean_W_hi", "mean_W_lo"], + index=["rho_hi", "rho_total"], + variable_name="job_class", + value_name="mean_W", + ) + sweep_chart = ( + alt.Chart(df_plot) + .mark_line(point=True) + .encode( + x=alt.X("rho_hi:Q", title="Hi-priority utilization (ρ_hi)"), + y=alt.Y("mean_W:Q", title="Mean sojourn time (W)"), + color=alt.Color("job_class:N", title="Job class"), + tooltip=["rho_hi:Q", "job_class:N", "mean_W:Q"], + ) + .properties(title="Priority Starvation: Effect of Hi-Priority Load") + ) + compare_chart = ( + alt.Chart(df_compare) + .mark_bar() + .encode( + x=alt.X("class:N", title="Job class"), + y=alt.Y("mean_W:Q", title="Mean sojourn time (W)"), + color=alt.Color("policy:N", title="Policy"), + xOffset="policy:N", + tooltip=["policy:N", "class:N", "mean_W:Q", "p99:Q"], + ) + .properties(title="Static Priority vs. Aging") + ) + (sweep_chart | compare_chart) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Understanding the Math + + ### Mean wait for two-priority queues + + Let $\lambda_i$, $\mu_i$, and $\rho_i = \lambda_i / \mu_i$ be the arrival rate, service rate, and utilization of class $i \in \{H, L\}$. For a non-preemptive priority queue: + + $$W_H = \frac{R_0}{1 - \rho_H}$$ + + $$W_L = \frac{R_0}{(1 - \rho_H)(1 - \rho_H - \rho_L)}$$ + + where $R_0 = \tfrac{1}{2}(\lambda_H \overline{s_H^2} + \lambda_L \overline{s_L^2})$ is the mean residual work seen by an arriving customer. The ratio $W_L / W_H = 1/(1 - \rho_H)$ grows without bound as $\rho_H \to 1$. + + ### Utilization of each class + + Let $\lambda_H$ be the arrival rate of high-priority jobs (H) and $\mu_H$ be their service rate. The utilization contributed by H jobs alone is $\rho_H = \lambda_H / \mu_H$ β€” the fraction of server time that H jobs would consume if they were the only class. Similarly, $\rho_L = \lambda_L / \mu_L$ for low-priority jobs. The total utilization is $\rho = \rho_H + \rho_L$. Requiring $\rho < 1$ means the server has enough capacity for both classes on average. + + ### Why "on average" is not enough + + Even when $\rho < 1$, randomness creates bursts of H arrivals. During a burst, the server is continuously occupied by H jobs, and L jobs must wait in the background. The mean wait for low-priority jobs in a non-preemptive priority queue is: + + $$W_L = \frac{R_0}{(1 - \rho_H)(1 - \rho_H - \rho_L)}$$ + + where $R_0$ is the mean residual work in the system when a job arrives. The critical observation is the factor $(1 - \rho_H)$ in the denominator. As $\rho_H \to 1$, this factor approaches zero and $W_L \to \infty$ β€” even if $\rho_L$ stays small and the total load $\rho$ is comfortably below 1. + + ### The trade-off + + Without aging, $W_L$ can be infinite when $\rho_H$ is large. With aging, $W_L \leq T_{\max} + 1/\mu_L$, but during promotion events the effective $\rho_H$ spikes temporarily, increasing $W_H$. Choosing $T_{\max}$ is a design decision: a small $T_{\max}$ protects L jobs but forces more promotions and penalizes H jobs more often; a large $T_{\max}$ is kinder to H jobs but allows L jobs to wait longer. There is no setting that simultaneously minimizes both β€” the trade-off is fundamental. + """) + return + + +if __name__ == "__main__": + app.run() diff --git a/queueing/11_tandem_queue.py b/queueing/11_tandem_queue.py new file mode 100644 index 0000000000000000000000000000000000000000..3587bc84c18ca3e1060f010f7a4a549a2c92814f --- /dev/null +++ b/queueing/11_tandem_queue.py @@ -0,0 +1,296 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "altair", +# "asimpy", +# "marimo", +# "polars==1.24.0", +# ] +# /// + +import marimo + +__generated_with = "0.20.4" +app = marimo.App(width="medium") + + +@app.cell +def _(): + import marimo as mo + import random + + import altair as alt + import polars as pl + + from asimpy import Environment, Process, Queue + + return Environment, Process, Queue, alt, mo, pl, random + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + # Tandem Queue Blocking + + ## *Variability Travels Downstream* + + Two processing stages are arranged in series: Stage 1 feeds work into a bounded buffer, which feeds Stage 2. Both stages have the same mean service rate $\mu$, and the arrival rate $\lambda < \mu$ so neither stage is overloaded on average. However, Stage 1 has high variance (hyperexponential service); Stage 2 has zero variance (deterministic service). + + Even though both stages have identical mean throughput and the system is underloaded, Stage 2 sits idle for a substantial fraction of time when the buffer between them is small. The idle fraction only vanishes as the buffer size $K \to \infty$. + + High service-time variance at Stage 1 produces bursts of outputβ€”many jobs finish close togetherβ€”followed by droughts. With a small buffer, the burst overflows (blocking Stage 1) and the drought starves Stage 2. Both effects reduce system throughput below what we would intuitively expect. + + ## Analysis + + For a two-stage tandem queue with a finite buffer of capacity $K$, the blocking probability at Stage 1 and the starvation probability at Stage 2 depend on the full service-time distributions, not just their means. The Kingman approximation gives the mean wait in a single G/G/1 queue as: + + $$W_q \approx \frac{\rho}{1-\rho} \cdot \frac{c_a^2 + c_s^2}{2} \cdot \frac{1}{\mu}$$ + + where $c_a^2$ and $c_s^2$ are the squared coefficients of variation of inter-arrival and service times respectively. For a hyperexponential service distribution with $c_s^2 \gg 1$, waiting times are far higher than the M/M/1 formula predicts. + + In a tandem network, this extra variability propagates: the departure process of Stage 1 (which is the arrival process for Stage 2) has higher variance than Poisson when Stage 1 has high service variance. This is *Departure Process Variability Propagation* and is a key driver of manufacturing and supply-chain [bullwhip effects](https://en.wikipedia.org/wiki/Bullwhip_effect). + + ## Buffer as a Variability Absorber + + The buffer acts as a shock absorber. Each unit of additional buffer capacity $K$ reduces the starvation probability at Stage 2 by absorbing burst output from Stage 1. The marginal benefit decreases as $K$ grows, leading to a classic diminishing-returns relationship. Practitioners use this to size work-in-progress inventory (WIP) buffers in manufacturing cells. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Implementation + + - A `Source` process generates jobs with exponential inter-arrival times into an unlimited input queue. + - `Stage1` pulls from the input queue, applies a hyperexponential service delay, and pushes to a bounded `Queue(max_capacity=K)`. If the buffer is full, Stage 1 blocks (back-pressure). + - `Stage2` pulls from the bounded buffer, applies deterministic service, and records completion times. + - Stage 2 idle time is measured as the wait inside `queue.get()` (time spent waiting for work to appear). + + The simulation sweeps $K$ from 1 to 21 and reports Stage 2 idle fraction. + """) + return + + +@app.cell +def _(mo): + sim_time_slider = mo.ui.slider( + start=0, + stop=100_000, + step=1_000, + value=20_000, + label="Simulation time", + ) + + arrival_rate_slider = mo.ui.slider( + start=0.3, + stop=0.95, + step=0.05, + value=0.8, + label="Arrival rate", + ) + + seed_input = mo.ui.number( + value=192, + step=1, + label="Random seed", + ) + + run_button = mo.ui.button(label="Run simulation") + + mo.vstack([ + sim_time_slider, + arrival_rate_slider, + seed_input, + run_button, + ]) + return arrival_rate_slider, seed_input, sim_time_slider + + +@app.cell +def _(arrival_rate_slider, seed_input, sim_time_slider): + SIM_TIME = int(sim_time_slider.value) + ARRIVAL_RATE = float(arrival_rate_slider.value) + SEED = int(seed_input.value) + MEAN_SERVICE = 1.0 + return ARRIVAL_RATE, MEAN_SERVICE, SEED, SIM_TIME + + +@app.cell +def _(MEAN_SERVICE, random): + def high_variance_service(): + if random.random() < 0.80: + return random.expovariate(5.0) + return random.expovariate(1.0 / 4.5) + + def low_variance_service(): + return MEAN_SERVICE + + return high_variance_service, low_variance_service + + +@app.cell +def _(ARRIVAL_RATE, Process, random): + class Source(Process): + def init(self, buffer): + self.buffer = buffer + + async def run(self): + jid = 0 + while True: + await self.timeout(random.expovariate(ARRIVAL_RATE)) + await self.buffer.put(jid) + jid += 1 + + return (Source,) + + +@app.cell +def _(Process, high_variance_service): + class Stage1(Process): + def init(self, inp, out, idle_tally): + self.inp = inp + self.out = out + self.idle_tally = idle_tally + + async def run(self): + while True: + idle_start = self.now + job = await self.inp.get() + self.idle_tally.append(self.now - idle_start) + await self.timeout(high_variance_service()) + await self.out.put(job) + + return (Stage1,) + + +@app.cell +def _(Process, low_variance_service): + class Stage2(Process): + def init(self, inp, idle_tally, completions): + self.inp = inp + self.idle_tally = idle_tally + self.completions = completions + + async def run(self): + while True: + idle_start = self.now + await self.inp.get() + idle = self.now - idle_start + self.idle_tally.append(idle) + await self.timeout(low_variance_service()) + self.completions.append(self.now) + + return (Stage2,) + + +@app.cell +def _(Environment, Queue, SIM_TIME, Source, Stage1, Stage2): + def simulate(buffer_capacity): + env = Environment() + input_q = Queue(env) + middle_q = Queue(env, max_capacity=buffer_capacity) + s2_idle = [] + completions = [] + Source(env, input_q) + Stage1(env, input_q, middle_q, []) + Stage2(env, middle_q, s2_idle, completions) + env.run(until=SIM_TIME) + n = len(completions) + idle_total = sum(s2_idle) + return { + "buffer_capacity": buffer_capacity, + "throughput": n / SIM_TIME, + "stage2_idle_frac": idle_total / SIM_TIME, + "n_completed": n, + } + + return (simulate,) + + +@app.cell +def _(SEED, pl, random, simulate): + random.seed(SEED) + df = pl.DataFrame([simulate(buffer_capacity=k) for k in [1, 2, 3, 5, 8, 13, 21]]) + return (df,) + + +@app.cell(hide_code=True) +def _(ARRIVAL_RATE, MEAN_SERVICE, mo): + mo.md(f""" + ## Results + + Arrival rate: {ARRIVAL_RATE}, mean service per stage: {MEAN_SERVICE} + + - Stage 1: hyperexponential (high variance: 80% short mean=0.2, 20% long mean=4.5) + - Stage 2: deterministic (zero variance) + + > As buffer capacity grows, Stage 2 idle fraction falls toward 0%. + """) + return + + +@app.cell +def _(df): + df + return + + +@app.cell +def _(alt, df): + throughput_line = ( + alt.Chart(df) + .mark_line(point=True) + .encode( + x=alt.X("buffer_capacity:Q", title="Buffer capacity (K)"), + y=alt.Y("throughput:Q", title="Throughput (jobs/time)"), + tooltip=["buffer_capacity:Q", "throughput:Q"], + ) + .properties(title="Tandem Queue: Throughput vs. Buffer Capacity") + ) + idle_line = ( + alt.Chart(df) + .mark_line(point=True, strokeDash=[4, 4], color="orange") + .encode( + x=alt.X("buffer_capacity:Q"), + y=alt.Y("stage2_idle_frac:Q", title="Stage 2 idle fraction"), + ) + ) + (throughput_line + idle_line).resolve_scale(y="independent") + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Understanding the Math + + ### Coefficient of variation + + The coefficient of variation (CV) of a random variable $X$ with mean $\mu$ and standard deviation $\sigma$ is defined as $c = \sigma / \mu$. It measures spread relative to the mean. A CV of 0 means the variable is deterministic: every value equals $\mu$. A CV of 1 means the spread equals the mean (the exponential distribution has CV exactly 1). A CV greater than 1 means the distribution is bursty: occasional very large values dominate, even if most values are small. The squared CV $c^2 = \sigma^2/\mu^2$ appears frequently in queueing formulas. + + ### Why high CV at Stage 1 creates bursts and droughts + + With a hyperexponential service distribution ($c_s^2 \gg 1$), Stage 1 sometimes completes several jobs in rapid succession (a burst) and sometimes spends a very long time on a single job (a drought). During a burst, jobs pile up in the buffer between stages. During a drought, Stage 2 exhausts the buffer and has to wait for Stage 1 to finish. This wastes capacity even though the system is underloaded on average. + + ### The buffer as a shock absorber + + A buffer of capacity $K$ can hold at most $K$ jobs between the two stages. It absorbs burst output from Stage 1 and releases it steadily to Stage 2. With a small buffer, a burst overflows (blocking Stage 1) or a drought empties the buffer (starving Stage 2). As $K$ grows, both effects weaken and Stage 2 idle time falls. However, the marginal benefit of each extra unit of buffer decreases. Real factories choose $K$ to balance the cost of holding inventory against the cost of machine starvation. + + ### Kingman's approximation + + For a single-stage queue with general service and arrival distributions, the approximate mean waiting time is: + + $$W_q \approx \frac{\rho}{1-\rho} \cdot \frac{c_a^2 + c_s^2}{2} \cdot \frac{1}{\mu}$$ + + Here $c_a^2$ is the squared CV of inter-arrival times and $c_s^2$ is the squared CV of service times. Notice that the formula separates the utilization effect (the $\rho/(1-\rho)$ term) from the variability effect (the $(c_a^2 + c_s^2)/2$ term). When Stage 1 has $c_s^2 \gg 1$, wait time is far higher than the basic M/M/1 formula predicts, even at the same mean throughput. The mean alone does not tell you enough. + + ### Supply-chain connection + + In manufacturing, Stage 1 corresponds to a supplier and Stage 2 to a production line. High variability at the supplier forces the factory to hold large work-in-progress (WIP) buffers, tying up capital and floor space. The [Toyota Production System](https://en.wikipedia.org/wiki/Toyota_Production_System) explicitly targets CV reduction as the primary tool for shrinking necessary WIP by making every process more deterministic through standardized work and small batch sizes. The math here explains exactly why: lower $c_s^2$ directly reduces $W_q$ and the required buffer size $K$. + """) + return + + +if __name__ == "__main__": + app.run() diff --git a/queueing/12_rush_hour.py b/queueing/12_rush_hour.py new file mode 100644 index 0000000000000000000000000000000000000000..93c9167ca73d67f1e6df45e837f93f031928cbeb --- /dev/null +++ b/queueing/12_rush_hour.py @@ -0,0 +1,302 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "altair", +# "asimpy", +# "marimo", +# "polars==1.24.0", +# ] +# /// + +import marimo + +__generated_with = "0.20.4" +app = marimo.App(width="medium") + + +@app.cell +def _(): + import marimo as mo + import random + import statistics + + import altair as alt + import polars as pl + + return alt, mo, pl, random, statistics + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + # Rush Hour Displacement + + ## *If Everyone Avoids the Rush, It Shifts* + + $N$ commuters all want to leave for work at the same preferred time. The road has a fixed capacity: up to $C$ commuters per time slot travel quickly, but when more than $C$ try to leave in the same slot, everyone in that slot experiences extra delay proportional to the overload. + + Each day, commuters observe yesterday's travel times and shift their departure by one slot toward a less congested option with some probability. Much to their disappointment, the rush hour never disappears. Instead it: + + 1. flattens slightly (spreading across more slots), but + 2. shifts its peak position over successive days, and + 3. reaches a new quasi-equilibrium that may be no less congested than the original, just at a different time. + + The intuition is that any slot that becomes less congested immediately attracts new commuters from adjacent overloaded slots, refilling it. Individual optimization is self-defeating in aggregate. + + The simulation in this tutorial shows emergent dynamics: + + - The arrival distribution begins concentrated at the preferred slot. + - Commuters shift away from congested slots, spreading the peak. + - The spreading creates new local peaks at adjacent slots, which then attract their own shifters. + - Over many days the distribution oscillates or drifts without converging to zero congestion. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## The Vickrey Bottleneck Model + + The classic model (Vickrey 1969) treats the road as a bottleneck with flow rate $s$ vehicles per unit time. At equilibrium, every commuter faces the same *generalized cost*: + + $$c = \alpha \cdot d + \beta \cdot \max(0,\, t^* - t_{\text{arr}}) + \gamma \cdot \max(0,\, t_{\text{arr}} - t^*)$$ + + where $d$ is queuing delay, $t^*$ is the desired arrival time, and $\beta, \gamma$ are schedule-delay costs for early and late arrival respectively. Vickrey showed that at [Nash equilibrium](https://en.wikipedia.org/wiki/Nash_equilibrium) a departure queue forms with length that rises and then falls as commuters spread across time to equalize cost, but total system delay is unchanged. + + This model underlies modern road-pricing schemes: a time-varying toll that exactly offsets the schedule-delay cost eliminates queuing entirely while preserving the total commuting burden. In essence, the toll revenue replaces the wasted queuing time. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Implementation + + Rather than one continuous DES run, the simulation iterates over days. Each day's commute is computed analytically: the travel time for slot $s$ is $\delta_{\text{base}}$ when occupancy $\leq C$, and increases linearly with overflow otherwise. Commuters update their departure slot between days using a simple best-response rule with noise (they shift to a better neighbour with probability $p$). + """) + return + + +@app.cell +def _(mo): + n_commuters_slider = mo.ui.slider( + start=50, + stop=500, + step=50, + value=200, + label="Number of commuters", + ) + + n_days_slider = mo.ui.slider( + start=10, + stop=100, + step=5, + value=40, + label="Number of days", + ) + + shift_prob_slider = mo.ui.slider( + start=0.05, + stop=0.8, + step=0.05, + value=0.3, + label="Shift probability", + ) + + seed_input = mo.ui.number( + value=192, + step=1, + label="Random seed", + ) + + run_button = mo.ui.button(label="Run simulation") + + mo.vstack([ + n_commuters_slider, + n_days_slider, + shift_prob_slider, + seed_input, + run_button, + ]) + return n_commuters_slider, n_days_slider, seed_input, shift_prob_slider + + +@app.cell +def _(n_commuters_slider, n_days_slider, seed_input, shift_prob_slider): + N_COMMUTERS = int(n_commuters_slider.value) + N_DAYS = int(n_days_slider.value) + SHIFT_PROB = float(shift_prob_slider.value) + SEED = int(seed_input.value) + N_SLOTS = 30 + PREFERRED_SLOT = 15 + ROAD_CAPACITY = 20 + OVERLOAD_DELAY = 3.0 + BASE_DELAY = 1.0 + return ( + BASE_DELAY, + N_COMMUTERS, + N_DAYS, + N_SLOTS, + OVERLOAD_DELAY, + PREFERRED_SLOT, + ROAD_CAPACITY, + SEED, + SHIFT_PROB, + ) + + +@app.cell +def _(BASE_DELAY, OVERLOAD_DELAY, ROAD_CAPACITY): + def simulate_day(departure_slots): + counts = {} + for s in departure_slots: + counts[s] = counts.get(s, 0) + 1 + travel_time = {} + for slot, count in counts.items(): + if count <= ROAD_CAPACITY: + travel_time[slot] = BASE_DELAY + else: + overflow = count - ROAD_CAPACITY + travel_time[slot] = BASE_DELAY + OVERLOAD_DELAY * overflow / ROAD_CAPACITY + return travel_time + + return (simulate_day,) + + +@app.cell +def _(BASE_DELAY, N_SLOTS, SHIFT_PROB, random): + def update_slots(departure_slots, travel_times): + new_slots = departure_slots[:] + for i, s in enumerate(departure_slots): + my_delay = travel_times.get(s, BASE_DELAY) + candidates = [] + if s > 0: + candidates.append(s - 1) + if s < N_SLOTS - 1: + candidates.append(s + 1) + better = [c for c in candidates if travel_times.get(c, BASE_DELAY) < my_delay] + if better and random.random() < SHIFT_PROB: + new_slots[i] = random.choice(better) + return new_slots + + return (update_slots,) + + +@app.cell +def _(N_SLOTS): + def slot_distribution(slots): + counts = [0] * N_SLOTS + for s in slots: + counts[s] += 1 + return counts + + return (slot_distribution,) + + +@app.cell +def _( + BASE_DELAY, + N_COMMUTERS, + N_DAYS, + N_SLOTS, + PREFERRED_SLOT, + SEED, + pl, + random, + simulate_day, + slot_distribution, + statistics, + update_slots, +): + def simulate(): + departure_slots = [ + max(0, min(N_SLOTS - 1, PREFERRED_SLOT + round(random.gauss(0, 2)))) + for _ in range(N_COMMUTERS) + ] + + rows = [] + for day in range(N_DAYS): + travel_times = simulate_day(departure_slots) + mean_delay = statistics.mean( + travel_times.get(s, BASE_DELAY) for s in departure_slots + ) + dist = slot_distribution(departure_slots) + max_count = max(dist) + rows.append({"day": day + 1, "mean_delay": mean_delay, "max_slot_count": max_count}) + departure_slots = update_slots(departure_slots, travel_times) + + return departure_slots, rows + + random.seed(SEED) + departure_slots, rows = simulate() + df = pl.DataFrame(rows) + final_dist = slot_distribution(departure_slots) + peak_slot = max(range(N_SLOTS), key=lambda s: final_dist[s]) + return df, peak_slot + + +@app.cell(hide_code=True) +def _(N_COMMUTERS, N_SLOTS, PREFERRED_SLOT, ROAD_CAPACITY, mo, peak_slot): + mo.md(f""" + ## Results + + {N_COMMUTERS} commuters, {N_SLOTS} slots, road capacity {ROAD_CAPACITY}/slot + + - Initial peak slot: {PREFERRED_SLOT} + - Final peak slot: {peak_slot} ({'same' if peak_slot == PREFERRED_SLOT else 'shifted'}) + + > **Observation:** the rush-hour peak flattens and shifts but does not disappear. + """) + return + + +@app.cell +def _(df): + df + return + + +@app.cell +def _(alt, df): + chart = ( + alt.Chart(df) + .mark_line() + .encode( + x=alt.X("day:Q", title="Day"), + y=alt.Y("mean_delay:Q", title="Mean travel delay"), + tooltip=["day:Q", "mean_delay:Q", "max_slot_count:Q"], + ) + .properties(title="Rush Hour Displacement: Mean Delay Over Time") + ) + chart + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Understanding the Math + + ### What is a congestion game? + + Each commuter (the "player") independently chooses a departure time slot. The delay experienced in any given slot depends on how many other commuters choose the same slot: if the slot is over capacity $C$, delay grows with the number of extra commuters. No central authority coordinates choices. This structure, where each player's cost depends on the collective choices of all players, is called a *congestion game*. + + ### Nash equilibrium in this context + + A Nash equilibrium is a distribution of departure times such that no individual commuter can reduce their own delay by unilaterally switching to a different slot. At equilibrium, every occupied slot has the same congestion-adjusted cost. If slot 15 were cheaper than slot 14, commuters from slot 14 would shift to slot 15 until the costs equalized. The equilibrium is therefore defined by: all slots with commuters in them have equal cost, and all empty slots have cost no lower than the occupied ones. + + ### Why Nash equilibrium is not the social optimum + + The social optimum minimizes total delay summed over all commuters. The Nash equilibrium minimizes each person's individual delay given everyone else's choices. These are generally different objectives. At Nash equilibrium, a commuter choosing a crowded slot ignores the extra delay they impose on every other commuter already in that slot. They feel only their own delay; the cost they impose on others is a negative externality that they do not internalize. + + ### Why the peak shifts but does not vanish + + Suppose slot 15 is heavily congested. Some commuters shift to slot 14, relieving slot 15. But now slot 14 is more congested, so its commuters shift to slot 13. The congestion wave ripples outward in both directions. Meanwhile, commuters who shifted away from slot 15 now observe it as less congested and some drift back. The system never reaches zero congestion: it perpetually redistributes congestion across nearby slots in a slow drift. The Nash equilibrium exists in theory, but the day-by-day best-response dynamics cycle around it rather than converging to it, particularly when commuters respond noisily to yesterday's conditions. + """) + return + + +if __name__ == "__main__": + app.run() diff --git a/queueing/13_braess_paradox.py b/queueing/13_braess_paradox.py new file mode 100644 index 0000000000000000000000000000000000000000..df9a527d2caf1802f9162a008bb247a113c3bfab --- /dev/null +++ b/queueing/13_braess_paradox.py @@ -0,0 +1,303 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "altair", +# "asimpy", +# "marimo", +# "polars==1.24.0", +# ] +# /// + +import marimo + +__generated_with = "0.20.4" +app = marimo.App(width="medium") + + +@app.cell +def _(): + import marimo as mo + import math + import random + + import altair as alt + import polars as pl + + from asimpy import Environment, Process + + return Environment, Process, alt, math, mo, pl, random + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + # Braess's Paradox + + ## *Adding a Road Makes Traffic Worse* + + A city has two routes from source $S$ to destination $T$: + + - **Top route** $S \to A \to T$: link $SA$ is congestion-dependent; link $AT$ has a fixed travel time. + - **Bottom route** $S \to B \to T$: link $SB$ has a fixed travel time; link $BT$ is congestion-dependent. + + The network is symmetric. A city planner proposes adding a new shortcut link $A \to B$ with near-zero travel time, creating a third route $S \to A \to B \to T$. To her surprise, adding the shortcut makes everyone's travel time longer at the selfish-routing Nash equilibrium. + + ### Without the shortcut + + Both routes are symmetric. In equilibrium, traffic splits evenly. If $N/2$ drivers use each route and the congested links have delay $\alpha \cdot n$ (where $n$ is the number of cars): + + $$t_{\text{top}} = \frac{N}{2}\alpha + c = t_{\text{bottom}}$$ + + ### With the shortcut $A \to B$ + + Each driver thinks, "Link $AB$ is free; I can use $SA$, slip across to $B$, then take $BT$ instead of the slow constant link $AT$." All $N$ drivers make this choice. The Nash equilibrium has everyone on $S \to A \to B \to T$: + + $$t_{\text{shortcut}} = N\alpha + \varepsilon + N\alpha = 2N\alpha + \varepsilon$$ + + Since $2N\alpha > \frac{N}{2}\alpha + c$ for typical parameters, travel times *increase* after the road is added. This is the paradox: individually rational decisions produce a collectively worse outcome. The ratio of Nash equilibrium cost to the socially optimal cost is called the *[price of anarchy](https://en.wikipedia.org/wiki/Price_of_anarchy)*. + + [Braess's paradox](https://en.wikipedia.org/wiki/Braess%27s_paradox) is not theoretical. Seoul, Stuttgart, and New York all observed traffic *improvements* after closing roads. Conversely, new roads in highly congested networks have sometimes worsened average travel times. + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Implementation + + The simulation maintains a shared `LinkCounts` object tracking how many cars are currently on each link. Each `Car` process: + + 1. Observes current link counts and computes expected travel time for each available route. + 2. Greedily picks the route with minimum expected time. + 3. Traverses each link in sequence, incrementing the count on entry and decrementing on exit; the delay is fixed at the count observed on entry. + + Two runs are compared: one with only the top and bottom routes, one with the $AB$ shortcut added. The simulation uses a probabilistic [logit](https://en.wikipedia.org/wiki/Logit) choice rule so that convergence to Nash equilibrium is smooth rather than instant. + """) + return + + +@app.cell +def _(mo): + n_rounds_slider = mo.ui.slider( + start=10, + stop=200, + step=10, + value=80, + label="Number of rounds", + ) + + beta_slider = mo.ui.slider( + start=0.1, + stop=2.0, + step=0.1, + value=0.5, + label="Sensitivity (Ξ²)", + ) + + seed_input = mo.ui.number( + value=192, + step=1, + label="Random seed", + ) + + run_button = mo.ui.button(label="Run simulation") + + mo.vstack([ + n_rounds_slider, + beta_slider, + seed_input, + run_button, + ]) + return beta_slider, n_rounds_slider, seed_input + + +@app.cell +def _(beta_slider, n_rounds_slider, seed_input): + N_ROUNDS = int(n_rounds_slider.value) + BETA = float(beta_slider.value) + SEED = int(seed_input.value) + N_DRIVERS = 4000 + CAPACITY = 100.0 + CONST_DELAY = 45.0 + return BETA, CAPACITY, CONST_DELAY, N_DRIVERS, N_ROUNDS, SEED + + +@app.cell +def _(CAPACITY, CONST_DELAY): + def route_times(n_top, n_bot, n_short): + n_sa = n_top + n_short + n_bt = n_bot + n_short + t_top = n_sa / CAPACITY + CONST_DELAY + t_bot = CONST_DELAY + n_bt / CAPACITY + t_short = n_sa / CAPACITY + n_bt / CAPACITY + return t_top, t_bot, t_short + + return (route_times,) + + +@app.cell +def _(BETA, math): + def logit_split(times): + vals = [math.exp(-BETA * t) for t in times] + total = sum(vals) + return [v / total for v in vals] + + return (logit_split,) + + +@app.cell +def _(N_DRIVERS, N_ROUNDS, Process, logit_split, route_times): + class RoutingGame(Process): + def init(self, has_shortcut, history): + self.has_shortcut = has_shortcut + self.history = history + self._n_top = N_DRIVERS // 2 + self._n_bot = N_DRIVERS - N_DRIVERS // 2 + self._n_short = 0 + + async def run(self): + for _ in range(N_ROUNDS): + await self.timeout(1.0) + t_top, t_bot, t_short = route_times(self._n_top, self._n_bot, self._n_short) + if self.has_shortcut: + probs = logit_split([t_top, t_bot, t_short]) + self._n_top = round(N_DRIVERS * probs[0]) + self._n_bot = round(N_DRIVERS * probs[1]) + self._n_short = N_DRIVERS - self._n_top - self._n_bot + else: + probs = logit_split([t_top, t_bot]) + self._n_top = round(N_DRIVERS * probs[0]) + self._n_bot = N_DRIVERS - self._n_top + self._n_short = 0 + t_top2, t_bot2, t_short2 = route_times(self._n_top, self._n_bot, self._n_short) + mean_t = ( + self._n_top * t_top2 + self._n_bot * t_bot2 + self._n_short * t_short2 + ) / N_DRIVERS + self.history.append({ + "round": self.now, + "n_top": self._n_top, + "n_bot": self._n_bot, + "n_short": self._n_short, + "t_top": t_top2, + "t_bot": t_bot2, + "t_short": t_short2, + "mean": mean_t, + }) + + return (RoutingGame,) + + +@app.cell +def _(Environment, RoutingGame): + def simulate(has_shortcut): + history = [] + env = Environment() + RoutingGame(env, has_shortcut, history) + env.run() + return history + + return (simulate,) + + +@app.cell +def _(CAPACITY, CONST_DELAY, N_DRIVERS, SEED, pl, random, simulate): + random.seed(SEED) + hist_no = simulate(has_shortcut=False) + hist_yes = simulate(has_shortcut=True) + df_no = pl.DataFrame(hist_no) + df_yes = pl.DataFrame(hist_yes) + eq_no = hist_no[-1]["mean"] + eq_yes = hist_yes[-1]["mean"] + n_half = N_DRIVERS / 2 + t_theory_no = n_half / CAPACITY + CONST_DELAY + t_theory_yes = N_DRIVERS / CAPACITY + N_DRIVERS / CAPACITY + return df_no, df_yes, eq_no, eq_yes, t_theory_no, t_theory_yes + + +@app.cell(hide_code=True) +def _( + CAPACITY, + CONST_DELAY, + N_DRIVERS, + eq_no, + eq_yes, + mo, + t_theory_no, + t_theory_yes, +): + mo.md(f""" + ## Results + + - Nash equilibrium **without** shortcut: **{eq_no:.2f}** + - Nash equilibrium **with** shortcut: **{eq_yes:.2f}** + - Adding the shortcut increased travel time by **{eq_yes - eq_no:.2f}** units + ({100 * (eq_yes / eq_no - 1):.1f}% worse for every driver) + + Theory without shortcut (50/50 split): {t_theory_no:.2f} + + Theory with shortcut (all on SAβ†’ABβ†’BT): {t_theory_yes:.2f} + + Parameters: {N_DRIVERS} drivers, capacity={CAPACITY:.0f}, constant delay={CONST_DELAY} + """) + return + + +@app.cell +def _(alt, df_no, df_yes, pl): + df_no_plot = df_no.select(["round", "mean"]).with_columns( + pl.lit("without shortcut").alias("scenario") + ) + df_yes_plot = df_yes.select(["round", "mean"]).with_columns( + pl.lit("with shortcut").alias("scenario") + ) + df_plot = pl.concat([df_no_plot, df_yes_plot]) + chart = ( + alt.Chart(df_plot) + .mark_line() + .encode( + x=alt.X("round:Q", title="Round"), + y=alt.Y("mean:Q", title="Mean travel time"), + color=alt.Color("scenario:N", title="Network"), + tooltip=["round:Q", "scenario:N", "mean:Q"], + ) + .properties(title="Braess's Paradox: Convergence to Nash Equilibrium") + ) + chart + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Understanding the Math + + ### Nash equilibrium + + A Nash equilibrium is a situation where every player has chosen a strategy and no single player can improve their own outcome by switching to a different strategy so long as everyone else stays put. Think of it as a stable fixed point: if you woke up one morning in a Nash equilibrium, you would have no reason to change what you are doing. Crucially, a Nash equilibrium need not be the best possible outcome for everyone collectively. + + ### The paradox, step by step + + Label the number of cars $N$ and suppose the congested links have delay $\alpha \cdot n$ where $n$ is the number of cars currently using that link. Without the shortcut, traffic splits evenly: $N/2$ cars use each route. Each driver's travel time is $(N/2)\alpha + c$, where $c$ is the fixed delay on the non-congested link. Neither route is faster than the other, so no driver wants to switch β€” that is Nash equilibrium. + + Now add the shortcut $A \to B$ with near-zero travel time $\varepsilon$. A single driver considering a switch reasons: "Link $AB$ is essentially free. If I take $SA$, cross to $B$, and take $BT$, I avoid the fixed cost $c$." If that driver is the only one to switch, it looks cheaper. But every driver makes the same calculation simultaneously. At the new equilibrium, all $N$ drivers pile onto $SA$ and $BT$: + + $$t_{\text{shortcut}} = N\alpha + \varepsilon + N\alpha = 2N\alpha + \varepsilon$$ + + Since $2N\alpha > (N/2)\alpha + c$ for typical parameters, everyone is worse off than before the shortcut was built. + + ### The price of anarchy + + The social optimum would split traffic evenly at cost $(N/2)\alpha + c$, but selfish routing delivers $2N\alpha + \varepsilon$. The price of anarchy exceeds 1, meaning individual rationality destroys collective welfare. + + The [Prisoner's Dilemma](https://en.wikipedia.org/wiki/Prisoner's_dilemma) is the best-known example of this tension. Two suspects each choose independently to cooperate or defect. Defecting is a dominant strategy: it is better for you regardless of what the other person does. Yet if both defect, both get a worse outcome than if both had cooperated. Braess's paradox is the same logic scaled to $N$ drivers. + + ### The logit model + + The simulation uses a probabilistic choice rule: the probability a driver picks route $r$ is proportional to $\exp(-\beta \cdot t_r)$, where $t_r$ is the expected travel time on route $r$ and $\beta$ is a sensitivity parameter. When $\beta$ is large, drivers strongly prefer the fastest route and the outcome approaches the pure Nash equilibrium. When $\beta$ is small, drivers choose nearly randomly and the paradox weakens. The parameter $\beta$ captures how responsive real drivers are to time differences. + """) + return + + +if __name__ == "__main__": + app.run() diff --git a/queueing/index.md b/queueing/index.md new file mode 100644 index 0000000000000000000000000000000000000000..0e2832e7ca069bcceee0c3103ae91f153aeafae1 --- /dev/null +++ b/queueing/index.md @@ -0,0 +1,15 @@ +--- +title: Learn Queueing Theory +description: > + Why is your line always slower than the other one? Why do + traffic jams happen without any apparent cause? These lessons + use a mixture of queueing theory and simulation to explain + these scenarios and others. +tracking: 136 +--- + +## Contributors + +Thanks to our notebook authors: + +* [Greg Wilson](https://github.com/gvwilson) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..0f345fb130a3c82e59193875801dcf965724e90a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +faker==40.11.0 +jinja2==3.1.6 +linkchecker==10.6.0 +markdown==3.10.2 +marimo==0.20.4 +python-frontmatter==1.1.0 +ruff==0.15.5 diff --git a/scripts/build.py b/scripts/build.py deleted file mode 100644 index c891358a63d0b7e69e5e511db1e022003764f826..0000000000000000000000000000000000000000 --- a/scripts/build.py +++ /dev/null @@ -1,291 +0,0 @@ -#!/usr/bin/env python3 - -import os -import subprocess -import argparse -import json -import datetime -import markdown -from datetime import date -from pathlib import Path -from typing import Dict, List, Any, Optional, Tuple - -from jinja2 import Environment, FileSystemLoader - - -def export_html_wasm(notebook_path: str, output_dir: str, as_app: bool = False) -> bool: - """Export a single marimo notebook to HTML format. - - Args: - notebook_path: Path to the notebook to export - output_dir: Directory to write the output HTML files - as_app: If True, export as app instead of notebook - - Returns: - bool: True if export succeeded, False otherwise - """ - # Create directory for the output - os.makedirs(output_dir, exist_ok=True) - - # Determine the output path (preserving directory structure) - rel_path = os.path.basename(os.path.dirname(notebook_path)) - if rel_path != os.path.dirname(notebook_path): - # Create subdirectory if needed - os.makedirs(os.path.join(output_dir, rel_path), exist_ok=True) - - # Determine output filename (same as input but with .html extension) - output_filename = os.path.basename(notebook_path).replace(".py", ".html") - output_path = os.path.join(output_dir, rel_path, output_filename) - - # Run marimo export command - mode = "--mode app" if as_app else "--mode edit" - cmd = f"marimo export html-wasm {mode} {notebook_path} -o {output_path} --sandbox" - print(f"Exporting {notebook_path} to {rel_path}/{output_filename} as {'app' if as_app else 'notebook'}") - print(f"Running command: {cmd}") - - try: - result = subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) - print(f"Successfully exported {notebook_path} to {output_path}") - return True - except subprocess.CalledProcessError as e: - print(f"Error exporting {notebook_path}: {e}") - print(f"Command output: {e.output}") - return False - - -def get_course_metadata(course_dir: Path) -> Dict[str, Any]: - """Extract metadata from a course directory. - - Reads the README.md file to extract title and description. - - Args: - course_dir: Path to the course directory - - Returns: - Dict: Dictionary containing course metadata (title, description) - """ - readme_path = course_dir / "README.md" - title = course_dir.name.replace("_", " ").title() - description = "" - description_html = "" - - if readme_path.exists(): - with open(readme_path, "r", encoding="utf-8") as f: - content = f.read() - - # Try to extract title from first heading - title_match = content.split("\n")[0] - if title_match.startswith("# "): - title = title_match[2:].strip() - - # Extract description from content after first heading - desc_content = "\n".join(content.split("\n")[1:]).strip() - if desc_content: - # Take first paragraph as description, preserve markdown formatting - description = desc_content.split("\n\n")[0].strip() - # Convert markdown to HTML - description_html = markdown.markdown(description) - - return { - "title": title, - "description": description, - "description_html": description_html - } - - -def organize_notebooks_by_course(all_notebooks: List[str]) -> Dict[str, Dict[str, Any]]: - """Organize notebooks by course. - - Args: - all_notebooks: List of paths to notebooks - - Returns: - Dict: A dictionary where keys are course directories and values are - metadata about the course and its notebooks - """ - courses = {} - - for notebook_path in sorted(all_notebooks): - # Parse the path to determine course - # The first directory in the path is the course - path_parts = Path(notebook_path).parts - - if len(path_parts) < 2: - print(f"Skipping notebook with invalid path: {notebook_path}") - continue - - course_id = path_parts[0] - - # If this is a new course, initialize it - if course_id not in courses: - course_metadata = get_course_metadata(Path(course_id)) - - courses[course_id] = { - "id": course_id, - "title": course_metadata["title"], - "description": course_metadata["description"], - "description_html": course_metadata["description_html"], - "notebooks": [] - } - - # Extract the notebook number and name from the filename - filename = Path(notebook_path).name - basename = filename.replace(".py", "") - - # Extract notebook metadata - notebook_title = basename.replace("_", " ").title() - - # Try to extract a sequence number from the start of the filename - # Match patterns like: 01_xxx, 1_xxx, etc. - import re - number_match = re.match(r'^(\d+)(?:[_-]|$)', basename) - notebook_number = number_match.group(1) if number_match else None - - # If we found a number, remove it from the title - if number_match: - notebook_title = re.sub(r'^\d+\s*[_-]?\s*', '', notebook_title) - - # Calculate the HTML output path (for linking) - html_path = f"{course_id}/{filename.replace('.py', '.html')}" - - # Add the notebook to the course - courses[course_id]["notebooks"].append({ - "path": notebook_path, - "html_path": html_path, - "title": notebook_title, - "display_name": notebook_title, - "original_number": notebook_number - }) - - # Sort notebooks by number if available, otherwise by title - for course_id, course_data in courses.items(): - # Sort the notebooks list by number and title - course_data["notebooks"] = sorted( - course_data["notebooks"], - key=lambda x: ( - int(x["original_number"]) if x["original_number"] is not None else float('inf'), - x["title"] - ) - ) - - return courses - - -def generate_clean_tailwind_landing_page(courses: Dict[str, Dict[str, Any]], output_dir: str) -> None: - """Generate a clean tailwindcss landing page with green accents. - - This generates a modern, minimal landing page for marimo notebooks using tailwindcss. - The page is designed with clean aesthetics and green color accents using Jinja2 templates. - - Args: - courses: Dictionary of courses metadata - output_dir: Directory to write the output index.html file - """ - print("Generating clean tailwindcss landing page") - - index_path = os.path.join(output_dir, "index.html") - os.makedirs(output_dir, exist_ok=True) - - # Load Jinja2 template - current_dir = Path(__file__).parent - templates_dir = current_dir / "templates" - env = Environment(loader=FileSystemLoader(templates_dir)) - template = env.get_template('index.html') - - try: - with open(index_path, "w", encoding="utf-8") as f: - # Render the template with the provided data - rendered_html = template.render( - courses=courses, - current_year=datetime.date.today().year - ) - f.write(rendered_html) - - print(f"Successfully generated clean tailwindcss landing page at {index_path}") - - except IOError as e: - print(f"Error generating clean tailwindcss landing page: {e}") - - -def main() -> None: - parser = argparse.ArgumentParser(description="Build marimo notebooks") - parser.add_argument( - "--output-dir", default="_site", help="Output directory for built files" - ) - parser.add_argument( - "--course-dirs", nargs="+", default=None, - help="Specific course directories to build (default: all directories with .py files)" - ) - args = parser.parse_args() - - # Find all course directories (directories containing .py files) - all_notebooks: List[str] = [] - - # Directories to exclude from course detection - excluded_dirs = ["scripts", "env", "__pycache__", ".git", ".github", "assets"] - - if args.course_dirs: - course_dirs = args.course_dirs - else: - # Automatically detect course directories (any directory with .py files) - course_dirs = [] - for item in os.listdir("."): - if (os.path.isdir(item) and - not item.startswith(".") and - not item.startswith("_") and - item not in excluded_dirs): - # Check if directory contains .py files - if list(Path(item).glob("*.py")): - course_dirs.append(item) - - print(f"Found course directories: {', '.join(course_dirs)}") - - for directory in course_dirs: - dir_path = Path(directory) - if not dir_path.exists(): - print(f"Warning: Directory not found: {dir_path}") - continue - - notebooks = [str(path) for path in dir_path.rglob("*.py") - if not path.name.startswith("_") and "/__pycache__/" not in str(path)] - all_notebooks.extend(notebooks) - - if not all_notebooks: - print("No notebooks found!") - return - - # Export notebooks sequentially - successful_notebooks = [] - for nb in all_notebooks: - # Determine if notebook should be exported as app or notebook - # For now, export all as notebooks - if export_html_wasm(nb, args.output_dir, as_app=False): - successful_notebooks.append(nb) - - # Organize notebooks by course (only include successfully exported notebooks) - courses = organize_notebooks_by_course(successful_notebooks) - - # Generate landing page using Tailwind CSS - generate_clean_tailwind_landing_page(courses, args.output_dir) - - # cp assets folder to o/p directory - import shutil - assets_src = "assets" - assets_dst = os.path.join(args.output_dir, "assets") - if os.path.exists(assets_src): - if os.path.exists(assets_dst): - shutil.rmtree(assets_dst) - shutil.copytree(assets_src, assets_dst) - print(f"Copied assets from {assets_src} to {assets_dst}") - - # Save course data as JSON for potential use by other tools - courses_json_path = os.path.join(args.output_dir, "courses.json") - with open(courses_json_path, "w", encoding="utf-8") as f: - json.dump(courses, f, indent=2) - - print(f"Build complete! Site generated in {args.output_dir}") - print(f"Successfully exported {len(successful_notebooks)} out of {len(all_notebooks)} notebooks") - - -if __name__ == "__main__": - main() diff --git a/scripts/templates/index.html b/scripts/templates/index.html deleted file mode 100644 index d742e2756d2b4b0fb4ea78aa0d78a928b4ef1007..0000000000000000000000000000000000000000 --- a/scripts/templates/index.html +++ /dev/null @@ -1,175 +0,0 @@ - - - - - - Marimo Learn - Interactive Python Notebooks - - - - - - - -
-
-
-
-

Interactive Python Learning with marimo

-

Explore our collection of interactive notebooks for Python, data science, and machine learning.

- -
-
-
- Marimo Logo -
-
-
-
-
- - -
-
-

Why Learn with Marimo?

-
-
-
- - - -
-

Interactive Learning

-

Learn by doing with interactive notebooks that run directly in your browser.

-
-
-
- - - -
-

Practical Examples

-

Real-world examples and applications to reinforce your understanding.

-
-
-
- - - -
-

Comprehensive Curriculum

-

From Python basics to advanced machine learning concepts.

-
-
-
-
- - -
-
-

Explore Our Courses

-
- {% for course_id, course in courses.items() %} - {% set notebooks = course.get('notebooks', []) %} - {% set notebook_count = notebooks|length %} - - {% if notebook_count > 0 %} - {% set title = course.get('title', course_id|replace('_', ' ')|title) %} - -
-
-
-

{{ title }}

-

- {% if course.get('description_html') %} - {{ course.get('description_html')|safe }} - {% endif %} -

-
- {{ notebook_count }} notebooks: -
    - {% for notebook in notebooks %} - {% set notebook_title = notebook.get('title', notebook.get('path', '').split('/')[-1].replace('.py', '').replace('_', ' ').title()) %} -
  1. - - {{ notebook_title }} - -
  2. - {% endfor %} -
-
-
-
- {% endif %} - {% endfor %} -
-
-
- - -
-
-

Want to Contribute?

-

Help us improve these learning materials by contributing to the GitHub repository. We welcome new content, bug fixes, and improvements!

- - - - - Contribute on GitHub - -
-
- - -
-
-
-
-

© {{ current_year }} marimo. All rights reserved.

-
- -
-
-
- - - - - diff --git a/sql/01_basic_select.py b/sql/01_basic_select.py new file mode 100644 index 0000000000000000000000000000000000000000..43d4ab83dbab6f578460738e6658edc08e5bb84f --- /dev/null +++ b/sql/01_basic_select.py @@ -0,0 +1,382 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "marimo", +# "marimo-learn>=0.7.0", +# "polars==1.24.0", +# "sqlalchemy", +# ] +# /// + +import marimo + +__generated_with = "0.20.4" +app = marimo.App(width="medium") + +with app.setup: + import marimo as mo + import marimo_learn as mol + from marimo_learn import MultipleChoiceWidget, OrderingWidget + import sqlalchemy + + db_path = mol.localize_file("penguins.db") + DATABASE_URL = f"sqlite:///{db_path}" + engine = sqlalchemy.create_engine(DATABASE_URL) + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + # Basic Selection + + This tutorial shows how to select values from a single table in a database using SQL. We have already made a connection between this notebook and our `penguins.db` databaseβ€”we'll show you how to do that laterβ€”so let's have a look at the data in the `penguins` table. + """) + return + + +@app.cell +def _(): + _df = mo.sql( + f""" + select * from penguins; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + Almost every **query** in SQL starts with the word `select`. The value immediately after it tells the database manager what we want to see. In this case, we use the shorthand `*` to mean "all the columns". We then say `from penguins` to tell the database manager which table we want to get the data from. The semi-colon at the end marks the end of the query. + + Note that the database manager doesn't format the output nicely, draw the little distribution histograms above columns, or give us the page-forward/page-backward controls: all the credit for that belongs to the Marimo notebook. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Choosing Columns + + We don't have to select all of the columns every time we get data from a table. If we only want specific columns, we give their names instead of using `*` to mean "all". As the output below shows, the columns are displayed in the order in which we gave their names. + """) + return + + +@app.cell +def _(): + _df = mo.sql( + f""" + select sex, island, species from penguins; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > Try editing the SQL in the query cell to change the column order, or to get the `bill_length_mm` column. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Upper and Lower Case + + We can write the query above in any mixture of upper and lower case and get the same result. + """) + return + + +@app.cell +def _(): + _df = mo.sql( + f""" + SELECT Sex, island, SPECIES frOM pEnGuInS; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + Please don't do this: it makes your queries very hard to read. It *is* common to use upper case for keywords like `SELECT` and `FROM`, and lower case for column names like `penguins` and `island`; whatever you choose, the most important thing is to be consistent. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Sorting + + When we look at a spreadsheet or a printed table, the rows are in a particular order. A database manager, on the other hand, might rearrange rows for the sake of efficiency as data is added or deleted, which means the rows displayed by `select` can be in whatever order it wants. If we want a particular order, we can add `order by` and the names of one or more columns to our query. + + Note that we have split the query below across several lines to make it easier to read. Just as SQL doesn't care about upper and lower case, it doesn't care about line breaks. As our queries become larger and more complicated, formatting them like this will make them a lot easier to understand. + """) + return + + +@app.cell +def _(): + _df = mo.sql( + f""" + select island, species, sex + from penguins + order by island, species; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + If you page through the output from the query above, you'll see that our penguins have been ordered by island: Biscoe before Dream, and Dream before Torgersen. Within each of those groups, the penguins are sub-ordered by species (Adelie, Chinstrap, and then Gentoo). The penguins aren't ordered by sex, but they could be: as with island and species, the sorting goes from left to right. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > Try rearranging the order of columns in the `select` while leaving the order in `order by` alone and vice versa. Notice that you don't have to sort in the order in which the columns are displayed (but you usually should to make the output easier to understand). + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > What do you think will happen if you select `island` and `species` but `order by sex`? How can you tell if your prediction is correct? + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Limiting Output + + The `penguins` table has 344 rows. If we only want to see the first five, we can add a `limit` clause to our query, which specifies the maximum number of rows we want. + """) + return + + +@app.cell +def _(): + _df = mo.sql( + f""" + select * from penguins limit 5; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + What if we want the next five? Or the five after that? To get those, we can add an offset, which is the number of rows to skip before selecting as many rows as we've asked for. + """) + return + + +@app.cell +def _(): + _df = mo.sql( + f""" + select * from penguins + limit 5 offset 5; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + Selecting one chunk of data after another is called **paging**. Applications frequently do this in order to save memory and bandwidth: people can't look at 100,000 rows at once, so there's usually no point grabbing that many in one gulp. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > Add a cell below to get rows 12 through 17 from the `penguins` table. Think carefully about what the `offset` and `limit` need to be to get precisely these rows. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > Try changing the query above to be `offset 5 limit 5`. Do you understand the result? + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > 1. What happens if you specify a limit that is greater than the number of rows in the table? + > 1. What happens if you specify an offset that is greater than the number of rows in the table? + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > Suppose your program is paging through a table while another application is adding and deleting rows. What would you want to happen? What do you think will happen? + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Removing Duplicates + + Suppose we want to find out which kinds of penguins were seen on which islands. We could scroll through the data, taking note of each unique (species, island) pair we see, but SQL will do this for us if we add the `distinct` keyword to our query. + + Note that the query below includes a comment explaining what it does. While comments in Python start with `#`, comments in SQL start with `--` and run to the end of the line. + """) + return + + +@app.cell +def _(): + _df = mo.sql( + f""" + -- Show unique (species, island) pairs. + select distinct species, island + from penguins; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > Modify the query above to show (island, species) instead of (species, island), and to sort by island name and then by species name. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Doing Calculations + + The `penguins` table records the penguins' masses in grams (at least, that's what we think the `_g` suffix on the column name means). If we want the mass in kilograms, we can divide the given values by 1000. + """) + return + + +@app.cell +def _(): + _df = mo.sql( + f""" + select species, sex, body_mass_g, body_mass_g / 1000 + from penguins + limit 10; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + The query above shows both the mass in grams and the mass in kilograms so that we can check the latter against the former. However, the name that the database manager automatically gives the calculated column isn't particular readable. Let's use `as` to fix that. + """) + return + + +@app.cell +def _(): + _df = mo.sql( + f""" + select species, sex, body_mass_g, body_mass_g / 1000 as mass_kg + from penguins + limit 10; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > Can you use `as` to select a column from the table but display it with a different name? Should you? + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > Write a query to calculate the ratio of bill length and bill height for every penguin. Call the calculated column `bill_ratio`. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Check Understanding + + ![concept map](public/01_concepts.svg) + """) + return + + +@app.cell(hide_code=True) +def _(): + _widget = mo.ui.anywidget( + OrderingWidget( + question="Arrange these SQL clauses in the order they must appear in a query.", + items=["SELECT", "FROM", "ORDER BY", "LIMIT"], + ) + ) + _widget + return + + +@app.cell(hide_code=True) +def _(): + _widget = mo.ui.anywidget( + MultipleChoiceWidget( + question="What does `SELECT *` mean in a SQL query?", + options=[ + "Select only the first row of the table", + "Select all columns from the table", + "Select all rows but only the first column", + "Count the total number of rows", + ], + correct_answer=1, + explanation="`*` is shorthand for 'all columns'. `SELECT *` retrieves every column; the number of rows returned depends on whether you add WHERE, LIMIT, or other clauses.", + ) + ) + _widget + return + + +if __name__ == "__main__": + app.run() diff --git a/sql/02_filter.py b/sql/02_filter.py new file mode 100644 index 0000000000000000000000000000000000000000..4cbe8fa6bf1098d8379dba6eaafea68e14ea9835 --- /dev/null +++ b/sql/02_filter.py @@ -0,0 +1,320 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "marimo", +# "marimo-learn>=0.7.0", +# "polars==1.24.0", +# "sqlalchemy", +# ] +# /// +import marimo + +__generated_with = "0.20.4" +app = marimo.App(width="medium") + + +@app.cell(hide_code=True) +def _(): + import marimo as mo + import marimo_learn as mol + import sqlalchemy + + db_path = mol.localize_file("penguins.db") + DATABASE_URL = f"sqlite:///{db_path}" + engine = sqlalchemy.create_engine(DATABASE_URL) + return engine, mo, mol + + +@app.cell(hide_code=True) +def _(): + from marimo_learn import MatchingWidget, MultipleChoiceWidget + return MatchingWidget, MultipleChoiceWidget + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + # Filtering + + The previous tutorial showed how to select specific columns from a database table, and how to page through the data that a query returns. However, people almost always **filter** data based on its properties rather than on its position in a table. To see how this works, let's look at the combinations of species, island, and sex in the `penguins` table. + """) + return + + +@app.cell +def _(penguins): + _df = mo.sql( + f""" + select distinct species, island, sex from penguins; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Equality + + Suppose we only want to see penguins from Dream island, regardless of their species or sex. To get this, we add a `where` clause to our query. + """) + return + + +@app.cell +def _(penguins): + _df = mo.sql( + f""" + select distinct species, island, sex + from penguins + where island = "Dream"; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + There are several noteworthy things in this query: + + 1. We don't have to use `distinct`. If we leave it out, we get *all* the penguins on Dream island. (We included it to make the output easier to read without paging.) + 2. The `where` clause *must* come after the `from` clause. SQL is very picky about ordering… + 3. We don't put quotation marks around `island` because it's the name of a column. We *do* put quotes around `"Dream"` because it's an actual literal piece of text. + 4. We use a single equals sign `=` to check for equality. This is different from most programming languages, which use `==`. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > Write a query to select all the Chinstrap penguins regardless of what island they're on. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > 1. Change the column name `island` to `ISLAND` and re-run the query: what happens? + > 2. Change the text value `"Dream"` to `"DREAM"`: what happens? + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Comparisons + + We can do all of the usual comparisons in SQL: + + | name | symbol | example | + | :--- | ------ | :------ | + | less than | `<` | `body_mass_g < 3300` | + | less than or equal | `<=` | `flipper_length_mm < 200.0` | + | equal | `=` | `species = "Gentoo"` | + | not equal | `!=` or `<>` | `species != "Gentoo"` | + | greater than or equal | `>=` | `flipper_length_mm >= 200.0` | + | greater than | `>` | `body_mass_g > 3300` | + + Comparing numbers is straightforward. When we compare text, the comparison uses dictionary order: A is less than B, AA is than AB, and so on. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > Find all the penguins that _aren't_ on Torgersen island. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > Use `where`, `order by`, and `limit` to find the heaviest penguin. Use it again to find the lightest. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > What happens if we accidentally compare a number to text? For example, what happens if we select penguins where `species` is less than 3000, or where `body_mass_g` is greater than the letter 'M'? + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Combining Conditions + + We can combine conditions using `and` and `or`. `and` is the simpler of the two: when we write `where condition_1 and condition_2`, we get the rows where *both* conditions are true. + """) + return + + +@app.cell +def _(penguins): + _df = mo.sql( + f""" + select * from penguins + where species = 'Gentoo' and body_mass_g > 6000.0; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + If we use `or`, we get rows where *either or both* condition is true. This is different from common English usage: if you tell a child that they can have an ice cream cone or a chocolate bar, you mean "either/or". When you use `or` in SQL, on the other hand, it means "if any of the conditions is true". For example, the query below gets all of the penguins on Biscoe island, as well as all of the Gentoo penguins. Some penguins satisfy both conditions (the Adelie penguins on Biscoe island), some satisfy just one (the Adelies on Torgersen and the Gentoos on Biscoe). Penguins that don't satisfy either, like Chinstrap penguins on Dream island, don't show up at all. + """) + return + + +@app.cell +def _(penguins): + _df = mo.sql( + f""" + select distinct species, island from penguins + where species = 'Adelie' or island = 'Biscoe'; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + We have written our `where` conditions as we would say them. Many programmers would wrap each condition in parentheses to make them easier to read. + """) + return + + +@app.cell +def _(penguins): + _df = mo.sql( + f""" + select distinct species, island from penguins + where (species = 'Adelie') or (island = 'Biscoe'); + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + The more complex our conditions are, the more important it is to use parentheses to make sure everyone reading the query (including ourselves) understands what it means. The query below shows an example. + """) + return + + +@app.cell +def _(penguins): + _df = mo.sql( + f""" + select distinct species, island from penguins + where ((species = 'Adelie') and (island = 'Biscoe')) or (species = 'Chinstrap'); + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > Explain in simple terms what the condition in the query above is selecting. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > We can put `not` in front of a condition to invert its meaning. Use this to write a query that fetches the same rows as one with the condition `species != 'Chinstrap'`, but which uses `=` instead of `!=`. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > Does the expression `species not = 'Gentoo'` work? + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > 1. Write a query to find all of the penguins whose bill length is greater than their bill depth. + > 2. Write another query to find all of the penguins whose bill length is less than their bill depth. What do you notice about the output of this query? + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > The previous tutorial showed how to do calculations on the fly to (for example) produce a column called `mass_kg` showing the body mass of each penguin in kilograms. Can these on-the-fly columns be used in `where` conditions? To find out, write a query that finds all of the penguins that weight more than 4.0 kg. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Check Understanding + + ![concept map](/public/02_concepts.svg) + """) + return + + +@app.cell(hide_code=True) +def _(MatchingWidget, mo): + _widget = mo.ui.anywidget( + MatchingWidget( + question="Match each SQL comparison operator to its meaning.", + left=["<", "!=", ">=", "="], + right=["equal to", "not equal to", "less than", "greater than or equal to"], + correct_matches={0: 2, 1: 1, 2: 3, 3: 0}, + ) + ) + _widget + return + + +@app.cell(hide_code=True) +def _(MultipleChoiceWidget, mo): + _widget = mo.ui.anywidget( + MultipleChoiceWidget( + question="A query uses `WHERE species = 'Adelie' OR island = 'Biscoe'`. Which rows does it return?", + options=[ + "Only rows where both conditions are true (Adelie penguins on Biscoe)", + "Rows where either condition is true, or both", + "Rows where species is Adelie but island is not Biscoe", + "Rows where island is Biscoe but species is not Adelie", + ], + correct_answer=1, + explanation="In SQL, OR returns every row where at least one condition is true. This includes rows satisfying just the first condition, just the second, or both simultaneously.", + ) + ) + _widget + return + + +if __name__ == "__main__": + app.run() diff --git a/sql/03_aggregate_group.py b/sql/03_aggregate_group.py new file mode 100644 index 0000000000000000000000000000000000000000..f0acb80e8558a5ccd8ca9d886d4b388ed38eb5d7 --- /dev/null +++ b/sql/03_aggregate_group.py @@ -0,0 +1,418 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "marimo", +# "marimo-learn>=0.7.0", +# "polars==1.24.0", +# "sqlalchemy", +# ] +# /// +import marimo + +__generated_with = "0.20.4" +app = marimo.App(width="medium") + + +@app.cell(hide_code=True) +def _(): + import marimo as mo + import marimo_learn as mol + import sqlalchemy + + db_path = mol.localize_file("penguins.db") + DATABASE_URL = f"sqlite:///{db_path}" + engine = sqlalchemy.create_engine(DATABASE_URL) + return engine, mo, mol + + +@app.cell(hide_code=True) +def _(): + from marimo_learn import FlashcardWidget, LabelingWidget + return FlashcardWidget, LabelingWidget + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + # Aggregating and Grouping + + The queries we wrote in the previous two tutorials operated on each row separately. We often want to ask questions about groups of rows, such as "how heavy is the largest penguin we weighed?" or "how many Gentoo penguins did we see?" This tutorial looks first at how to write queries that **aggregate** data, and then at how to calculate aggregate values for several subsets of our data simultaneously. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Aggregation + + Let's start by finding out how heavy the heaviest penguin in our dataset is. To do this, we use a function called `max`, and give it the name of the column it is to get data from. To make the result more readable, we will use `as` to call the result `heaviest`. + """) + return + + +@app.cell +def _(penguins): + _df = mo.sql( + f""" + select max(body_mass_g) as heaviest from penguins; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + The query below shows the six most commonly used aggregation functions in SQL applied to different columns of the penguins data. + """) + return + + +@app.cell +def _(penguins): + _df = mo.sql( + f""" + select + avg(flipper_length_mm) as averagest, + count(species) as num_penguins, + max(body_mass_g) as heaviest, + min(flipper_length_mm) as shortest, + sum(body_mass_g) as total_mass + from penguins; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > How much do the penguins weigh in total? + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > The function `length` calculates the number of characters in a piece of text. Write a query that returns the length of the longest island name in the database. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > The function `round` rounds off a number, e.g., `round(1.234, 1)` produces `1.2`. Use this to display the average flipper length of all the penguins rounded to one decimal place. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + Note: rather than writing `count(species)` or `count(island)`, we often write `count(*)` to count the total number of rows. However, as we will see in the next tutorial `count(species)` and `count(*)` can sometimes produce slightly different answers. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Grouping + + The query shown above applies the aggregation function to all of the rows in the table. If we want, we can apply it to just the first ten. + """) + return + + +@app.cell +def _(penguins): + _df = mo.sql( + f""" + select avg(body_mass_g) as avg_mass + from penguins + limit 10; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + The order of operations here is important. We aren't asking SQL to calculate an average and then give us the first ten rows of the result. Instead, we are asking it to get the first ten rows and *then* calculate the average of those. This matters more when we use `where` to filter the data: the filtering happens before SQL applies the function, which lets us do things like calculate the average mass of all the Gentoo penguins. + """) + return + + +@app.cell +def _(penguins): + _df = mo.sql( + f""" + select avg(body_mass_g) as avg_mass + from penguins + where species = 'Gentoo'; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + But what if we want to calculate the average mass for all of the species? We could write three queries, one for each species, but (a) that would be annoying and (b) if someone adds Emperor penguins to the data and we don't remember to update our query, we won't get the full picture. + + What we should do instead is tell SQL to group the data based on the values in one or more columns, and then calculate the aggregate value within each group. + """) + return + + +@app.cell +def _(penguins): + _df = mo.sql( + f""" + select avg(body_mass_g) as avg_mass + from penguins + group by species; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + Since there are three species, we get three rows of output. Unfortunately, we don't know which average corresponds to which species. To get that, we add the `species` column to the `select` clause. + """) + return + + +@app.cell +def _(penguins): + _df = mo.sql( + f""" + select species, avg(body_mass_g) as avg_mass + from penguins + group by species; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + And just as we can order data by multiple columns at once, we can group by multiple columns. When we do, we get one bucket for each unique combination of grouping values. + """) + return + + +@app.cell +def _(penguins): + _df = mo.sql( + f""" + select species, sex, avg(body_mass_g) as avg_mass + from penguins + group by species, sex; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + We will explain what the blanks in the `sex` column mean in the next tutorial. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > How many penguins of each sex were found on each island? + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > What is difference in weight between the heaviest female penguin and the lightest female penguin within each species? + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > Explain what the query below is calculating, and when its result would be useful. + > + > ```sql + > select round(body_mass_g/1000, 1) as weight, count(*) + > from penguins + > group by weight; + > ``` + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Arbitrary Choice in Aggregation + + The query shown below is legal SQL, but probably not what anyone would want. + """) + return + + +@app.cell +def _(penguins): + _df = mo.sql( + f""" + select sex, species, body_mass_g + from penguins + group by species; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + The rule that SQL follows is this: if we have created groups using `group by`, and we _don't_ specify how to combine the values in a group for a particular column, then the database picks one of the values for that column in that group arbitrarily. For example, since we only grouped by `species`, but we're asking to show `sex`, the result shows one of the values for `sex` for each species. Similarly, since we didn't specify how to combine the various body masses for each species, the three values shown each come from a penguin of that species, but we don't know (and can't control) which one. + + We used this behavior earlier when we selected `species` and `avg(body_mass_g)` after grouping by `species`. Since all of the penguins within a group are of the same species, it doesn't matter which `species` value the database shows us for that group: they're all the same. If we forget to choose an aggregation function by accident, though, the answer will be plausible (because it's an actual value) but wrong. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Filtering After Aggregation + + Just as we can use `where` to filter individual rows before aggregating (or if we're not aggregating at all), we can use `having` to filter aggregated values. For example, the query below finds those combinations of sex and species whose average weight is 4kg or more. + """) + return + + +@app.cell +def _(penguins): + _df = mo.sql( + f""" + select sex, species, avg(body_mass_g) as avg_mass + from penguins + group by sex, species + having avg_mass >= 4000.0; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > Explain what the query below is calculating. + > + > ```sql + > select max(flipper_length_mm) as long_flipper, species, sex + > from penguins + > where sex = 'FEMALE' + > group by species, sex + > having long_flipper > 210.0; + > ``` + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + What we *can't* do with the tools we've seen so far is compare individual values to aggregates. For example, we can't use a query like the one below to find penguins that are heavier than average. + """) + return + + +@app.cell +def _(penguins): + _df = mo.sql( + f""" + select * from penguins + where body_mass_g > avg(body_mass_g); + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + We will see how to write this query in a couple of tutorials. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Check Understanding + + ![concept map](/public/03_concepts.svg) + """) + return + + +@app.cell(hide_code=True) +def _(FlashcardWidget, mo): + _widget = mo.ui.anywidget( + FlashcardWidget( + question="SQL Aggregation Functions", + cards=[ + {"front": "avg(column)", "back": "Returns the average of all non-null values in the column"}, + {"front": "count(*)", "back": "Counts the total number of rows, including rows with null values"}, + {"front": "count(column)", "back": "Counts the number of non-null values in the column (rows with null are skipped)"}, + {"front": "max(column)", "back": "Returns the largest non-null value in the column"}, + {"front": "min(column)", "back": "Returns the smallest non-null value in the column"}, + {"front": "sum(column)", "back": "Adds up all non-null values in the column"}, + ], + ) + ) + _widget + return + + +@app.cell(hide_code=True) +def _(LabelingWidget, mo): + _widget = mo.ui.anywidget( + LabelingWidget( + question="Drag each label to the line of the query it best describes.", + labels=["aggregation function", "alias", "source table", "grouping column"], + text_lines=[ + "select species, avg(body_mass_g) as avg_mass", + "from penguins", + "group by species;", + ], + correct_labels={0: [0, 1], 1: [2], 2: [3]}, + ) + ) + _widget + return + + +if __name__ == "__main__": + app.run() diff --git a/sql/04_null.py b/sql/04_null.py new file mode 100644 index 0000000000000000000000000000000000000000..beb11772c941454c95c03a139ddc8710fb16710a --- /dev/null +++ b/sql/04_null.py @@ -0,0 +1,397 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "marimo", +# "marimo-learn>=0.7.0", +# "polars==1.24.0", +# "sqlalchemy", +# ] +# /// +import marimo + +__generated_with = "0.20.4" +app = marimo.App(width="medium") + + +@app.cell(hide_code=True) +def _(): + import marimo as mo + import marimo_learn as mol + import sqlalchemy + + db_path = mol.localize_file("penguins.db") + DATABASE_URL = f"sqlite:///{db_path}" + engine = sqlalchemy.create_engine(DATABASE_URL) + return engine, mo, mol + + +@app.cell(hide_code=True) +def _(): + from marimo_learn import ConceptMapWidget, MatchingWidget + return ConceptMapWidget, MatchingWidget + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + # Missing Data + + The biggest challenge people facing when using databases isn't remembering the order of clauses in a SQL query. The biggest challenge is handling missing data. This tutorial builds on the filtering introduced in the previous one to show how to manage this in our queries. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Null + + Here are all of the distinct combinations of island, species, and sex in the `penguins` table. + """) + return + + +@app.cell +def _(penguins): + _df = mo.sql( + f""" + select distinct island, species, sex from penguins; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + Notice the two blanks in the `sex` column, and the fact that its subtitle says there are 3 unique values. Those blanks show the special value `null`, which SQL uses to mean "I don't know". In this case, those values tell us that the scientists who collected the penguins didn't record the sex of some of the Adelie penguins on Dream and Torgersen islands. + + The most important thing about **null values** is that almost any operation that involves a `null` produces `null` as an answer. For example, we can use SQL as a very complicated desk calculator and ask, "What is 1 + 2?" + """) + return + + +@app.cell +def _(): + _df = mo.sql( + f""" + select 1 + 2 as result; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + If we ask, "What is 1 + `null`?", the answer is `null`, because one plus "I don't know" is "I don't know". + """) + return + + +@app.cell +def _(): + _df = mo.sql( + f""" + select 1 + null as result; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + We get the same thing if we subtract `null`, multiply by it, and so on. (As the saying goes, "Garbage in, garbage out.") We also get the same thing if we do comparisons. Is `null` equal to 3? Again, the answer is `null`. + """) + return + + +@app.cell +def _(): + _df = mo.sql( + f""" + select null = 3 as result; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + We get the same thing if we ask if `null` is *not* equal to 3, because if we don't know the value, we don't know if it *isn't* 3. + """) + return + + +@app.cell +def _(): + _df = mo.sql( + f""" + select null != 3 as result; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + What about `null = null`? If we have two numbers, and we don't know what either is, we don't know if they're the same or not, so the answer is once again `null`, *not* `true`. + """) + return + + +@app.cell +def _(): + _df = mo.sql( + f""" + select null = null as result; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > 1. Where does SQL put `null` values when sorting: at the start, at the end, or somewhere else? + > 2. Does it follow the same rule for both numbers and text? + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Aggregating Nulls + + If 1 + `null` is `null`, then 1 + 2 + `null` should be `null` as well. Continuing this line of thought, the sum of a column that includes one or more nulls ought to be `null`; so should the `max`, `min`, and so on, because if we don't know all of the inputs, we can't know the output. + + SQL isn't this strict because it wouldn't be useful. Instead, its aggregation functions ignore `null` values. If we calculate a sum, for example, we get the sum of all the numbers that we actually know. If we calculate an average, we get the sum of the known values divided by the number of known values (rather than by the total number of known and unknown values), and so on. + + There is one exception to this rule. If we ask for `count(sex)` in the penguins database, we get the number of penguins whose sex is known: + """) + return + + +@app.cell(hide_code=True) +def _(penguins): + _df = mo.sql( + f""" + select count(sex) from penguins; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + If we use `count(*)`, on the other hand, we get the total number of rows regardless of whether some values are `null` or not: + """) + return + + +@app.cell +def _(penguins): + _df = mo.sql( + f""" + select count(*) from penguins; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > Compare `sum(body_mass_g) / count(body_mass_g)` with `sum(body_mass_g) / count(*)` and with `avg(body_mass_g)`. Are the results consistent with the explanation above? + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Handling Nulls + + There are only two things we can do with `null` that don't produce `null` as a result: ask if a value is `null`, and ask if it isn't. If we're interested in the `sex` column, the first is written `sex is null`, while the second is written `sex is not null`. Note that `is null` and `is not null` are written as multiple words, but are a single test; it's confusing, but we're stuck with it. + + Let's have a look at some practical examples. If we select the distinct values of `sex` from the `penguins` table, we get `"FEMALE"`, `"MALE"`, and `null`. (The first line of output is blank, which is how Marimo shows null values.) + """) + return + + +@app.cell +def _(penguins): + _df = mo.sql( + f""" + select distinct sex from penguins order by sex; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + If we want to get all the rows that have a null value for `sex`, we *cannot* do this: + """) + return + + +@app.cell +def _(penguins): + _df = mo.sql( + f""" + select sex from penguins + where (sex != 'MALE') and (sex != 'FEMALE'); + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + That doesn't produce any output because the rows with null values for `sex` don't pass the test. If we want the rows with missing sex, we have to ask for them explicitly. This query gives us 11 rows. + """) + return + + +@app.cell +def _(penguins): + _df = mo.sql( + f""" + select sex from penguins + where sex is null; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + How many times did the scientists fail to record a penguin's mass or flipper length? The answer is "twice", and in both cases they didn't record *any* of the physical measurements. + """) + return + + +@app.cell +def _(penguins): + _df = mo.sql( + f""" + select * from penguins + where (body_mass_g is null) or (flipper_length_mm is null); + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > 1. Write a query to find penguins whose body mass is known but whose sex is not. + > 2. Write another query to find penguins whose sex is known but whose body mass is not. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > Explain why the query shown earlier (and reproduced below) does not produce any rows: + > + > ```sql + > select sex from penguins + > where (sex != 'MALE') and (sex != 'FEMALE'); + > ``` + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + Some programmers find `null` very annoying. Instead of putting it in their tables, they use marker values like -1 or `"NA"` to signal missing data. Doing this almost always leads to problems. For example, if we are calculating the average age of people who are 17, 19, 21, and and unknown number of years old, the sensible thing to do is add the values we know (the 17, 19, and 21) and then divide by 3. As we will see in the next tutorial, SQL will do this for us automatically _if_ we have used `null` to represent the unknown age. If we use -1, on the other hand, it's all too easy to calculate (17 + 19 + 21 - 1) / 4 and get an average age of 14. We could use `where` to filter out the -1 ages before doing the sum, but (a) we'd have to know to do that and (b) we'd have to know that this programmer used -1 instead of -999999 or something else to mean "I don't know". While it takes a bit of getting used to, it's (almost) always better to use `null` when there are holes in our data. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Ternary Logic + + These tutorials avoid theory when they can, but a little bit will help understand how `null` works. In conventional logic, a statement is either true or false. If we have two statements `A` and `B`, then `A and B` is true when both are true, while `A or B` is true if either or both are true. These rules are sometimes referred to as **binary logic** (also called **Boolean logic**) because there are only two possible values. + + SQL is unusual among programming languages in using **ternary logic**, in which any statement can be true, false, or null. Since `null` is not `true`, `where` drops rows if the filter expression produces `null`. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Checking Understanding + + ![concept map](/public/04_concepts.svg) + """) + return + + +@app.cell(hide_code=True) +def _(MatchingWidget, mo): + _widget = mo.ui.anywidget( + MatchingWidget( + question="Match each SQL expression involving null to its result.", + left=["1 + null", "null = null", "null is null", "null != 3"], + right=[ + "null β€” arithmetic with an unknown is unknown", + "null β€” comparing unknowns yields unknown, not true", + "true β€” the only test that reliably works on null", + "null β€” even inequality checks return unknown for null", + ], + correct_matches={0: 0, 1: 1, 2: 2, 3: 3}, + ) + ) + _widget + return + + +@app.cell(hide_code=True) +def _(ConceptMapWidget, mo): + _widget = mo.ui.anywidget( + ConceptMapWidget( + question="Connect these null-related concepts by selecting a relationship term and clicking two concepts.", + concepts=["null", "unknown value", "is null", "ternary logic", "aggregation functions"], + terms=["means", "tested with", "uses", "ignore"], + correct_edges=[ + {"from": "null", "to": "unknown value", "label": "means"}, + {"from": "null", "to": "is null", "label": "tested with"}, + {"from": "ternary logic", "to": "null", "label": "uses"}, + {"from": "aggregation functions", "to": "null", "label": "ignore"}, + ], + ) + ) + _widget + return + + +if __name__ == "__main__": + app.run() diff --git a/sql/05_join.py b/sql/05_join.py new file mode 100644 index 0000000000000000000000000000000000000000..162db1bf76787941ca9766e2de35777e080c2da8 --- /dev/null +++ b/sql/05_join.py @@ -0,0 +1,317 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "marimo", +# "marimo-learn>=0.7.0", +# "polars==1.24.0", +# "sqlalchemy", +# ] +# /// +import marimo + +__generated_with = "0.20.4" +app = marimo.App(width="medium") + + +@app.cell(hide_code=True) +def _(): + import marimo as mo + import marimo_learn as mol + import sqlalchemy + + db_path = mol.localize_file("lab.db") + DATABASE_URL = f"sqlite:///{db_path}" + engine = sqlalchemy.create_engine(DATABASE_URL) + return engine, mo, mol + + +@app.cell(hide_code=True) +def _(): + from marimo_learn import LabelingWidget, OrderingWidget + return LabelingWidget, OrderingWidget + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + # Combining Tables + + Relational databases get their name from the fact that they store the relations between tables. This tutorial shows how to connect and combine information from multiple tables. We will save most of the exercises for the next tutorial, where we start working with our first complex database. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Basic Joins + + The `jobs` database has two tables. The first, called `job`, shows the credits that students can earn doing different kinds of jobs, and has two rows and two columns: + + | name | credits | + | :--- | ------: | + | calibrate | 1.5 | + | clean | 0.5 | + + The other table, `work`, keeps track of who has done which jobs: + + | person | job | + | :----- | :-- | + | Amal | calibrate | + | Amal | clean | + | Amal | complain | + | Gita | clean | + | Gita | clean | + | Gita | complain | + | Madhi | complain | + + We want to know how many credits each student has earned. The first step in answering this is to **join** the tables together. + """) + return + + +@app.cell +def _(job, work): + _df = mo.sql( + f""" + select * + from job join work; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + The `join` operation creates a temporary table in memory by combining every row of `job` with every row of `work`. Since `job` has two rows and `work` has seven, the temporary table has 2Γ—7=14 rows. + + Some of these rows are useful: the first, for example, tells us that Amal did some calibration, and that calibrating is worth 1.5 credits. The second, however, combines information about calibrating with the fact that Amal did some cleaning. We can get rid of the rows that aren't useful by filtering with `where`. + """) + return + + +@app.cell +def _(job, work): + _df = mo.sql( + f""" + select * + from job join work + where job.name = work.job; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + This query demonstrates two things: + + 1. When we are working with two or more tables, we refer to columns using `table_name.column_name`, as in `job.name` or `work.job`. We don't absolutely need to do this in this query, since columns' names are all unique, but it's very common to have columns with the same names in different tables. In those cases the two-part names are required to avoid ambiguity; it is therefore good practice to *always* use two-part names when working with multiple tables. + 2. There isn't an entry in `job` for `complain`, so `job.name = work.job` isn't true for any of the combined rows that involve complaining. On the other hand, Gita cleaned up the lab twice, so there are two records in the result for that. This shows that `join` doesn't automatically remove duplicates. + + While we can use `where`, the SQL standard encourages us to use a different keyword `on`: + """) + return + + +@app.cell +def _(job, work): + _df = mo.sql( + f""" + select * + from job join work + on job.name = work.job; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + Many years ago, using `on` sometimes gave slightly higher performance. Today, though, the two forms are equivalent from the database manager's point of view. Many people still prefer `on` for readability: it shows how the rows are being combined, while `where` shows how combined rows are being filtered. As with almost everything in programming, what matters most is to pick one and stick to it so that your queries are consistent. + + The standard also encourages us to write our join as `inner join`, because as we will see in a moment, other kinds of joins exist. People often skip this and just write `join`, or even use a simple comma between the table names, but from now on we will be pedantic to make what we're doing clearer. + + We are now able to answer our original question: how many credits has each student earned? + """) + return + + +@app.cell +def _(job, work): + _df = mo.sql( + f""" + select work.person, sum(job.credits) as total -- add up the credits for each person + from job inner join work -- notice: inner join + on job.name = work.job + group by work.person; -- put all the credits for each person into a separate bucket + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Left Joins + + The query above shows us how many credits Amal and Gita have earned, but doesn't show anything for Madhi. Ideally, we'd like a row showing that she has earned zero credits. To get this, we need to use a different kind of join called a **left join**. A left join is created by following these rules: + + 1. If the row from the left-hand table matches one or more rows from the right-hand table, combine them as an inner join would. + 2. If the row from the left-hand table _doesn't_ match any rows from the right-hand table, create one row in the result with the values from the left row and `null` where the values from the right-hand table would be. + + An example will make this clearer. + """) + return + + +@app.cell +def _(job, work): + _df = mo.sql( + f""" + select * + from work left join job + on work.job = job.name; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + Let's trace this query's execution step by step: + + 1. The `(Amal, calibrate)` row from `work` matches the `(calibrate, 1.5)` row from `job`, so that is the first row of output. + 2. Similarly, the `(Amal, clean)` row matches the `(clean, 0.5)` row, so we get the second row of output. + 3. But `(Amal, complain)` _doesn't_ match anything from `job`, so we get a row with the values from the left table (`Amal` and `complain`) and `null` for `name` and `work`. + 4. We then get two rows for Gita cleaning because there's a match… + 5. …and two rows with `null` values for Gita and Madhi complaining because there isn't. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > What do we get if we invert the order of the tables, i.e., do `job left join work`? Why? + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Coalesce + + We can now sum up everyone's credits: + """) + return + + +@app.cell +def _(job, work): + _df = mo.sql( + f""" + select work.person, sum(job.credits) as total + from work left join job -- notice: left join + on work.job = job.name + group by work.person; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + This is *almost* what we want: we have a row for Madhi, but her `total` is `null` because that's what `sum` produces when all of the values it's adding up are `null`. We can fix this using a built-in SQL function called `coalesce`: + """) + return + + +@app.cell +def _(job, work): + _df = mo.sql( + f""" + select + work.person, + coalesce(sum(job.credits), 0) as total + from + work left join job + on + work.job = job.name + group by + work.person; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + `coalesce` takes two inputs. If the first is not `null`, `coalesce` returns that. If the first value *is* `null`, on the other hand, `coalesce` returns its second input. In simpler terms, it gives us a value or a default if the value is `null`. + + Note that we have split this query across several lines with the keywords at the left margin and the parts of the query that belong to them indented below them. As our queries become more complex, this style makes them easier to read. As with `join` versus `inner join`, the most important thing is to be consistent so that the reader isn't distracted by stylistic differences. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Check Understanding + + ![concept map](/public/05_concepts.svg) + """) + return + + +@app.cell(hide_code=True) +def _(OrderingWidget, mo): + _widget = mo.ui.anywidget( + OrderingWidget( + question="Arrange the steps SQL follows when executing an INNER JOIN.", + items=[ + "Combine every row from the left table with every row from the right table", + "Apply the ON condition to keep only matching row pairs", + "Apply any WHERE clause to filter the matched rows further", + "Apply SELECT to return only the requested columns", + ], + ) + ) + _widget + return + + +@app.cell(hide_code=True) +def _(LabelingWidget, mo): + _widget = mo.ui.anywidget( + LabelingWidget( + question="Drag each label to the line of the query it best describes.", + labels=["left table", "right table", "join condition", "fallback for null"], + text_lines=[ + "from work left join job", + "on work.job = job.name", + "coalesce(sum(job.credits), 0) as total", + ], + correct_labels={0: [0, 1], 1: [2], 2: [3]}, + ) + ) + _widget + return + + +if __name__ == "__main__": + app.run() diff --git a/sql/06_keys.py b/sql/06_keys.py new file mode 100644 index 0000000000000000000000000000000000000000..fd6cc150942a857d8ca6258c1dd51912c4a8cb05 --- /dev/null +++ b/sql/06_keys.py @@ -0,0 +1,483 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "marimo", +# "marimo-learn>=0.7.0", +# "polars==1.24.0", +# "sqlalchemy", +# ] +# /// +import marimo + +__generated_with = "0.20.4" +app = marimo.App(width="medium") + + +@app.cell(hide_code=True) +def _(): + import marimo as mo + import marimo_learn as mol + import sqlalchemy + + db_path = mol.localize_file("survey.db") + DATABASE_URL = f"sqlite:///{db_path}" + engine = sqlalchemy.create_engine(DATABASE_URL) + return engine, mo, mol + + +@app.cell(hide_code=True) +def _(): + from marimo_learn import ConceptMapWidget, FlashcardWidget + return ConceptMapWidget, FlashcardWidget + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + # Primary and Foreign Keys + + The previous tutorial explained how to combine information from two tables using `inner join` and `left join`. This tutorial will explain how we can tell when it makes sense to do this, and introduce our first complex database. To start, let's look at a diagram showing the four tables in the `survey` database. + + ![survey tables](/public/survey_tables.svg) + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + Let's start with `person`, which has four columns: `persond_id`, `personal`, `family`, and `supervisor_id` (which we will discuss in the next section). `person_id` is shown in __*bold italics*__ to indicate that it is the table's **primary key**: each row in the table has a non-`null` `person_id`, and each of those values is unique. These values can therefore be used to uniquely identify specific rows in the table. We can check that by selecting all of the people and inspecting the `person_id` values by eye: + """) + return + + +@app.cell +def _(person): + _df = mo.sql( + f""" + select person_id from person; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + A better way is to count the number of rows in the table, the number of non-`null` `person_id` values, and the number of distinct person ID values. Remember, `count(*)` counts rows, while `count(column_name)` counts the number of non-`null` values in that particular column. We haven't seen `count(distinct column_name)` before, but as you might guess, it counts the number of distinct values in a particular column. + """) + return + + +@app.cell +def _(person): + _df = mo.sql( + f""" + select + count(*) as num_rows, + count(person_id) as num_non_null, + count(distinct person_id) as num_distinct + from person; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + Now let's take a look at the `survey` table. Each survey has a survey ID, the ID of the person who did the survey, and the survey's start and end dates. `survey_id` is in __*bold italics*__, which tells us that each survey has a unique ID. `person_id`, on the other hand, is just in *italics*, and there's an arrow connecting it to the `person` table's primary key, which is also called `person_id`. The use of italics and the arrow signals that `survey.person_id` is a **foreign key**, i.e., a value stored in one table that references the primary key of another table. This relationship tells us that: + + 1. It makes sense to use `survey.person_id = person.person_id` as a condition in a join because every `survey.person_id` is guaranteed to refer to an existing `person.person_id`. + 2. Several surveys might refer to the same person (or equivalently, one person might have done several surveys). This is called a **one-to-many relationship**. + + Let's write some queries. Who is in the `person` table? + """) + return + + +@app.cell +def _(person): + _df = mo.sql( + f""" + select * from person; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + How many surveys has AscensiΓ³n Sierra done? Her `person_id` is `P001`, so we can answer the question by filtering the `survey` table and then aggregating. + """) + return + + +@app.cell +def _(survey): + _df = mo.sql( + f""" + select count(*) as num_surveys from survey + where person_id = 'P001'; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + What if we want AscensiΓ³n's name displayed along with this count? To get that, we need to join the tables. + """) + return + + +@app.cell +def _(person, survey): + _df = mo.sql( + f""" + select person.personal, person.family, count(*) + from person join survey + on person.person_id = survey.person_id + where person.person_id = 'P001'; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + What if we want to get AscensiΓ³n's full name in a single column? We can do that by concatenating her personal and family name using the `||` operator (which is sometimes called "glue"). As the output of the query below shows, `||` does for text what `+` does for numbers. + """) + return + + +@app.cell +def _(person, survey): + _df = mo.sql( + f""" + select person.personal || person.family as full_name, count(*) + from person join survey + on person.person_id = survey.person_id + where person.person_id = 'P001'; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + Whoops: we probably want a space between AscensiΓ³n's personal and family names, so we will glue her personal name to a space and then glue that to her family name (just as we would write 1 + 2 + 3). + """) + return + + +@app.cell +def _(person, survey): + _df = mo.sql( + f""" + select person.personal || ' ' || person.family as full_name, count(*) + from person join survey + on person.person_id = survey.person_id + where person.person_id = 'P001'; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + Now, what if we want the number of surveys done by each person ordered by family and personal name? + """) + return + + +@app.cell +def _(person, survey): + _df = mo.sql( + f""" + select person.personal || ' ' || person.family as full_name, count(*) + from person join survey + on person.person_id = survey.person_id + group by person.person_id + order by person.family, person.personal; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + Notice that "Águila" (with an acute accent) comes after "Sierra". Correcting this mistake is out of the scope of this tutorial, but can be done by installing the [International Components for Unicode](https://icu.unicode.org/) and writing the query like this: + + ```sql + select * from person order by family, personal collate 'es_ES'; + ``` + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > When did the earliest survey done by each person start? + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > Which people have done 17 or more surveys? + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > Just as `sum` adds up all the values in a column, `group_concat` concatenates all the text in a column. For example, if the column is called `name`, then `select group_concat(name, ':')` joins all the values in `name` with colons. Use this to write a query that generates two columns: a person's full name, and a comma-separated list of the IDs of the survey that person has done. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > Explain what the following query produces and why. + > + > ```sql + > select person.personal || ' ' || person.family + > from person left join survey + > on person.person_id = survey.person_id + > where survey.survey_id is null; + > ``` + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Self-Joins + + As a reminder, here's the structure of the survey database. + """) + return + + +@app.cell +def _(): + mo.image(src="survey_tables.svg", alt="table diagram of survey database") + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + Notice that the `person` table has a foreign key called `supervisor_id` that refers back to the table's own primary key, `person_id`. This relationship makes sense: supervisors are people, so they're stored in the same table as everyone else. However, if we want to generate a list of people's names and their supervisors' names, we _can't_ just join `person` to `person`. + """) + return + + +@app.cell +def _(person): + _df = mo.sql( + f""" + select * + from person inner join person + on person.person_id = person.supervisor_id; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + The problem is that `person.person_id` and `person.supervisor_id` are ambiguous: are we referring to the left-hand use of the `person` table or the right-hand use? To resolve this, we give each copy of the table an **alias** using `as`, just as we gave columns names using `as`. We also have to specify the columns that we want using two-part `table.column` notation. + """) + return + + +@app.cell +def _(person): + _df = mo.sql( + f""" + select + pa.personal as pa_personal, + pa.family as pa_family, + pb.personal as pb_personal, + pb.family as pb_family + from person pa join person pb + on pa.person_id = pb.supervisor_id; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + Joining a table to itself is called a **self join**. The hard part is figuring out whether `pa` is the minion and `pb` is the supervisor or vice versa. The logic is that the supervisor of person `pb` is person `pa`, which means `pa` is the supervisor and `pb` is the minion. (Alternatively, we can inspect the first couple of rows, check back against the `person` table, and decide that way.) Let's rewrite the query to show the relationship explicitly. + """) + return + + +@app.cell +def _(person): + _df = mo.sql( + f""" + select + pa.personal || ' ' || pa.family as supervisor, + pb.personal || ' ' || pb.family as minion + from person pa join person pb + on pa.person_id = pb.supervisor_id + order by pa.family, pa.personal; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > Write a query that finds the full names of everyone who doesn't have a supervisor. (Hint: you do not need to use a `join`.) + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > Write a query to find all the people who supervise someone who supervises someone. (Hint: you will need to join three copies of `person` to get the person, their boss, and their grand-boss.) + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Many-to-Many Relationships + + Each survey is done by one person, which means that people have a one-to-many relationship with surveys. However, any number of people can have ratings for any number of machines and vice versa, which means these two tables have a **many-to-many relationship**. These relationships can be hard to express in a table: if, for example, we knew that people never have ratings for more than three machines, we could add `machine_1`, `machine_2`, and `machine_3` columns to `person`, but (a) we would have to check several columns if we wanted to find a particular machine, and (b) we would have to redesign our table if the rules changed and people could have ratings for four or five machines. + + A better approach is to create another intermediate table that stores the relationship between the two tables we're interested in. Such a table is sometimes called a **join table** because its main purpose is to allow us to join two other tables. The `rating` table in our database is an example of a join table. Each row stores a foreign key into `person` and a foreign key into `machine`, which shows that the person has some relationship to the machine. The table also stores `level`, which is the actual rating (or `null`), but it is quite common for join tables to only store pairs of foreign keys. + + So, which people have ratings for which machines? + """) + return + + +@app.cell +def _(machine, person, rating): + _df = mo.sql( + f""" + select + person.personal, person.family, machine.machine_type, rating.level + from + person join rating join machine + on + person.person_id = rating.person_id + and + rating.machine_id = machine.machine_id + where + rating.level is not null + order by + person.family, person.personal, machine.machine_type + ; + """, + engine=engine + ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > Which people have a level of 3 or more on at least one machine? + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > Write a query that generates a comma-separated list of the machines that Asensio Amaya is rated on, even if the level is `null`. (Hint: use `group_concat`.) + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + > Many of the `level` values in `rating` are `null`. What do you think this might mean? + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Check Understanding + + ![concept map](/public/06_concepts.svg) + """) + return + + +@app.cell(hide_code=True) +def _(FlashcardWidget, mo): + _widget = mo.ui.anywidget( + FlashcardWidget( + question="Database Key and Relationship Concepts", + cards=[ + {"front": "Primary key", "back": "A column (or set of columns) whose values are unique and non-null for every row, used to uniquely identify each row in a table"}, + {"front": "Foreign key", "back": "A column in one table whose values reference the primary key of another table, establishing a link between the two tables"}, + {"front": "One-to-many relationship", "back": "A relationship where one row in table A can be referenced by many rows in table B β€” e.g., one person can have many surveys"}, + {"front": "Many-to-many relationship", "back": "A relationship where rows in table A can relate to many rows in table B and vice versa β€” requires a join table to represent"}, + {"front": "Join table", "back": "An intermediate table storing pairs of foreign keys to represent a many-to-many relationship between two other tables"}, + {"front": "Self-join", "back": "Joining a table to itself using two aliases, used when rows in a table relate to other rows in the same table (e.g., supervisors and employees)"}, + ], + ) + ) + _widget + return + + +@app.cell(hide_code=True) +def _(ConceptMapWidget, mo): + _widget = mo.ui.anywidget( + ConceptMapWidget( + question="Connect these database design concepts by selecting a relationship term and clicking two concepts.", + concepts=["primary key", "foreign key", "one-to-many", "many-to-many", "join table"], + terms=["referenced by", "implemented with", "requires"], + correct_edges=[ + {"from": "primary key", "to": "foreign key", "label": "referenced by"}, + {"from": "many-to-many", "to": "join table", "label": "implemented with"}, + {"from": "one-to-many", "to": "foreign key", "label": "requires"}, + ], + ) + ) + _widget + return + + +if __name__ == "__main__": + app.run() diff --git a/sql/index.md b/sql/index.md new file mode 100644 index 0000000000000000000000000000000000000000..85ad2de4ccbf63a92a74c0f11ea6ff6df8f8a99f --- /dev/null +++ b/sql/index.md @@ -0,0 +1,14 @@ +--- +title: Learn SQL +description: > + Learn the basics of SQL, the industry standard for interacting + with relational databases. These notebooks also show how easy + it is to work with relational data in marimo. +tracking: 133 +--- + +## Contributors + +Thanks to our notebook authors: + +* [Greg Wilson](https://github.com/gvwilson) diff --git a/sql/public/01_concepts.svg b/sql/public/01_concepts.svg new file mode 100644 index 0000000000000000000000000000000000000000..2b88ebd9191d488f8273ebd69910aa53d15a5c41 --- /dev/null +++ b/sql/public/01_concepts.svg @@ -0,0 +1,4 @@ + + + +
select
select
rows
rows
columns
columns
table
table
chooses
chooses
from
from
specified
by
specified...
names
names
* (for all)
* (for all)
specified
by
specified...
name
name
can be
paged with
can be...
limit
limit
offset
offset
calculations
calculations
Text is not SVG - cannot display
\ No newline at end of file diff --git a/sql/public/02_concepts.svg b/sql/public/02_concepts.svg new file mode 100644 index 0000000000000000000000000000000000000000..0a4576b374c69132a57ef6d9cbd4982f7885132d --- /dev/null +++ b/sql/public/02_concepts.svg @@ -0,0 +1,4 @@ + + + +
filter
filter
rows
rows
table
table
keeps some
keeps some
from
from
has
has
columns
columns
does not modify
does not m...
condition
condition
specifies
specifies
evaluated
for each
evaluated...
must be
must be
Boolean
Boolean
Text is not SVG - cannot display
\ No newline at end of file diff --git a/sql/public/03_concepts.svg b/sql/public/03_concepts.svg new file mode 100644 index 0000000000000000000000000000000000000000..6374a0d2f8434d997ec52110925b2ede75a2f47a --- /dev/null +++ b/sql/public/03_concepts.svg @@ -0,0 +1,4 @@ + + + +
group by
group by
aggregation
aggregation
usually
used with
usually...
specifies
specifies
column
column
function
function
specifies
specifies
columns
columns
usually not
one of
usually not...
combines
values from
combines...
rows
rows
Text is not SVG - cannot display
\ No newline at end of file diff --git a/sql/public/04_concepts.svg b/sql/public/04_concepts.svg new file mode 100644 index 0000000000000000000000000000000000000000..47e22aa800504ea601c692602eecef1bbf027dd6 --- /dev/null +++ b/sql/public/04_concepts.svg @@ -0,0 +1,4 @@ + + + +
null
null
is not
is not
false
false
empty string
empty string
zero
zero
represents
represents
"I don't know"
"I don't know"
produces
itself in
produces...
arithmetic
arithmetic
comparison
comparison
ignored
in
ignored...
aggregation
aggregation
checked using
checked usi...
is null
is null
is not null
is not null
(un)equal
to itself
(un)equal...
Text is not SVG - cannot display
\ No newline at end of file diff --git a/sql/public/05_concepts.svg b/sql/public/05_concepts.svg new file mode 100644 index 0000000000000000000000000000000000000000..cb625864aed67ae1de3a41e04ab4a9ae6769490b --- /dev/null +++ b/sql/public/05_concepts.svg @@ -0,0 +1,4 @@ + + + +
join
join
can be
can be
left join
left join
inner join
inner join
left table
left table
right table
right table
combines
rows from
combines...
to create
to create
temporary
table
temporary...
usually
uses
usually...
condition
condition
to select
rows of
to select...
Text is not SVG - cannot display
\ No newline at end of file diff --git a/sql/public/06_concepts.svg b/sql/public/06_concepts.svg new file mode 100644 index 0000000000000000000000000000000000000000..8885b7393a466e597d84db8e99a96863aea4417a --- /dev/null +++ b/sql/public/06_concepts.svg @@ -0,0 +1,4 @@ + + + +
tables
tables
can have
can have
primary key
primary key
foreign key
foreign key
is
is
unique
unique
not null
not null
refers to
refers to
used in
used in
join
join
can be
can be
join tables
join tables
capture
capture
one-to-many
one-to-many
many-to-many
many-to-many
express
express
can be
can be
relationship
relationship
Text is not SVG - cannot display
\ No newline at end of file diff --git a/sql/public/lab.db b/sql/public/lab.db new file mode 100644 index 0000000000000000000000000000000000000000..63514ababaf069406baf4b5d2d2b8ad1c8152d9a Binary files /dev/null and b/sql/public/lab.db differ diff --git a/sql/public/penguins.db b/sql/public/penguins.db new file mode 100644 index 0000000000000000000000000000000000000000..2e43ca6e97fd73b6dd287e4ba62f60c26916709d Binary files /dev/null and b/sql/public/penguins.db differ diff --git a/sql/public/survey.db b/sql/public/survey.db new file mode 100644 index 0000000000000000000000000000000000000000..d5f3f859705f958ea1c3a81510c82b0a2b2416d8 Binary files /dev/null and b/sql/public/survey.db differ diff --git a/sql/public/survey_tables.svg b/sql/public/survey_tables.svg new file mode 100644 index 0000000000000000000000000000000000000000..b7ea2d9981aed208c941651542f0bcfcbd799859 --- /dev/null +++ b/sql/public/survey_tables.svg @@ -0,0 +1,4 @@ + + + +
person_id
person_id
supervisor_id
supervisor_id
family
family
personal
personal
person
person
survey_id
survey_id
end_date
end_date
start_date
start_date
person_id
person_id
survey
survey
person_id
person_id
level
level
machine_id
machine_id
rating
rating
machine_id
machine_id
machine_type
machine_type
machine
machine
Text is not SVG - cannot display
\ No newline at end of file diff --git a/templates/base.html b/templates/base.html new file mode 100644 index 0000000000000000000000000000000000000000..c371441c103aabea653568b9b5e5cc67499b9b3b --- /dev/null +++ b/templates/base.html @@ -0,0 +1,47 @@ + + + + + + {% block title %}Marimo Learn{% endblock %} + + + + + + + +{% block content %}{% endblock %} + +{% include "contribute.html" %} + + + + + + diff --git a/templates/contribute.html b/templates/contribute.html new file mode 100644 index 0000000000000000000000000000000000000000..cb2777fe7992e3dcdf56194b12daf51d34c64618 --- /dev/null +++ b/templates/contribute.html @@ -0,0 +1,10 @@ +
+
+

Want to Contribute?

+

Help us improve these learning materials by contributing to the GitHub repository. We welcome new content, bug fixes, and improvements!

+ + {% include "icons/github-20.svg" %} + Contribute on GitHub + +
+
diff --git a/templates/icons/book.svg b/templates/icons/book.svg new file mode 100644 index 0000000000000000000000000000000000000000..b655723d349894db935dae139edfd2a4231453eb --- /dev/null +++ b/templates/icons/book.svg @@ -0,0 +1,3 @@ + + + diff --git a/templates/icons/flask.svg b/templates/icons/flask.svg new file mode 100644 index 0000000000000000000000000000000000000000..d513de5effc252a57682c96dcb79e0960d973905 --- /dev/null +++ b/templates/icons/flask.svg @@ -0,0 +1,3 @@ + + + diff --git a/templates/icons/github-20.svg b/templates/icons/github-20.svg new file mode 100644 index 0000000000000000000000000000000000000000..715b8866361b3d1b231b6aa0ee665c31ecaccdcf --- /dev/null +++ b/templates/icons/github-20.svg @@ -0,0 +1,3 @@ + + + diff --git a/templates/icons/github-24.svg b/templates/icons/github-24.svg new file mode 100644 index 0000000000000000000000000000000000000000..557af1322014df08ba13586bf8fca0d720bdb83d --- /dev/null +++ b/templates/icons/github-24.svg @@ -0,0 +1,3 @@ + + + diff --git a/templates/icons/globe.svg b/templates/icons/globe.svg new file mode 100644 index 0000000000000000000000000000000000000000..f34e7f114db21bb3c0bad29a201096bfa11244f4 --- /dev/null +++ b/templates/icons/globe.svg @@ -0,0 +1,3 @@ + + + diff --git a/templates/icons/lightning.svg b/templates/icons/lightning.svg new file mode 100644 index 0000000000000000000000000000000000000000..f2cbb4903f7b89e4f55ce28b0e7a235b63c3acf1 --- /dev/null +++ b/templates/icons/lightning.svg @@ -0,0 +1,3 @@ + + + diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000000000000000000000000000000000000..ba717cea2e58a77351aac3e80dd05429173abd02 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,94 @@ +{% extends "base.html" %} + +{% block title %}Marimo Learn - Interactive Python Notebooks{% endblock %} + +{% block content %} +
+
+
+
+

Interactive Python Learning with marimo

+

Explore our collection of interactive notebooks for Python, data science, and machine learning.

+ +
+
+
+ Marimo Logo +
+
+
+
+
+ +
+
+

Why Learn with Marimo?

+
+
+
+ {% include "icons/lightning.svg" %} +
+

Interactive Learning

+

Learn by doing with interactive notebooks that run directly in your browser.

+
+
+
+ {% include "icons/flask.svg" %} +
+

Practical Examples

+

Real-world examples and applications to reinforce your understanding.

+
+
+
+ {% include "icons/book.svg" %} +
+

Comprehensive Curriculum

+

From Python basics to advanced machine learning concepts.

+
+
+
+
+ +
+
+

Explore Our Courses

+
+ {% for course_id, course in courses.items() %} + {% set notebooks = course.get('notebooks', []) %} + {% if notebooks %} +
+
+
+

+ + {{ course.get('title', course_id) }} + +

+

+ {% if course.get('description_html') %} + {{ course.get('description_html')|safe }} + {% endif %} +

+
+
    + {% for notebook in notebooks %} +
  1. + + {{ notebook.title }} + +
  2. + {% endfor %} +
+
+
+
+ {% endif %} + {% endfor %} +
+
+
+ +{% endblock %} diff --git a/templates/lesson.html b/templates/lesson.html new file mode 100644 index 0000000000000000000000000000000000000000..88b453d162ce7f42a073219872c33989804b6d96 --- /dev/null +++ b/templates/lesson.html @@ -0,0 +1,64 @@ +{% extends "base.html" %} + +{% block title %}{{ lesson.title }} - Marimo Learn{% endblock %} + +{% block content %} +
+
+
+
+ ← All Courses +

{{ lesson.title }}

+ {% if lesson.description_html %} +
{{ lesson.description_html|safe }}
+ {% endif %} +
+
+
+ Marimo Logo +
+
+
+
+
+ +
+
+
+ + + +
+ {{ lesson.body_html|safe }} + {% include "running_notebooks.html" %} +
+ +
+
+
+{% endblock %} diff --git a/templates/page.html b/templates/page.html new file mode 100644 index 0000000000000000000000000000000000000000..66e1f532fdaa0090ff2f2c0c91b47d9e522d1f23 --- /dev/null +++ b/templates/page.html @@ -0,0 +1,20 @@ +{% extends "base.html" %} + +{% block title %}{{ title }} - Marimo Learn{% endblock %} + +{% block content %} +
+
+ ← Home +

{{ title }}

+
+
+ +
+
+
+ {{ body_html|safe }} +
+
+
+{% endblock %} diff --git a/templates/running_notebooks.html b/templates/running_notebooks.html new file mode 100644 index 0000000000000000000000000000000000000000..f91a05a97d9e1f66f22f1e11d8a7ab570b8bd3af --- /dev/null +++ b/templates/running_notebooks.html @@ -0,0 +1,4 @@ +

Running Notebooks

+

To run a notebook locally, use:

+
uvx marimo edit <URL>
+

You can also open notebooks in our online playground by adding marimo.app/ to a notebook's URL.