Spaces:
Running
Running
format code
Browse files
daft/01_what_makes_daft_special.py
CHANGED
@@ -48,10 +48,12 @@ def _(daft, mo):
|
|
48 |
|
49 |
@app.cell(hide_code=True)
|
50 |
def _(df_with_discount, discount_slider, mo):
|
51 |
-
mo.vstack(
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
|
|
55 |
return
|
56 |
|
57 |
|
@@ -120,7 +122,9 @@ def _(mo):
|
|
120 |
|
121 |
@app.cell(hide_code=True)
|
122 |
def _(mo):
|
123 |
-
mo.md(
|
|
|
|
|
124 |
return
|
125 |
|
126 |
|
@@ -137,19 +141,23 @@ def _(daft):
|
|
137 |
|
138 |
@app.cell(hide_code=True)
|
139 |
def _(mo):
|
140 |
-
mo.md(
|
|
|
|
|
141 |
return
|
142 |
|
143 |
|
144 |
@app.cell(hide_code=True)
|
145 |
def _(mo, trillion_rows_df):
|
146 |
-
mo.mermaid(trillion_rows_df.explain(format=
|
147 |
return
|
148 |
|
149 |
|
150 |
@app.cell(hide_code=True)
|
151 |
def _(mo):
|
152 |
-
mo.md(
|
|
|
|
|
153 |
return
|
154 |
|
155 |
|
@@ -219,13 +227,17 @@ def _(daft):
|
|
219 |
|
220 |
@app.cell(hide_code=True)
|
221 |
def _(mo):
|
222 |
-
mo.md(
|
|
|
|
|
223 |
return
|
224 |
|
225 |
|
226 |
@app.cell(hide_code=True)
|
227 |
def _(mo):
|
228 |
-
mo.md(
|
|
|
|
|
229 |
return
|
230 |
|
231 |
|
@@ -261,21 +273,27 @@ def _(daft):
|
|
261 |
@app.cell
|
262 |
def _(df_simple):
|
263 |
# Pandas-flavored API
|
264 |
-
df_simple.where(
|
|
|
|
|
265 |
return
|
266 |
|
267 |
|
268 |
@app.cell
|
269 |
def _(daft, df_simple):
|
270 |
# Polars-flavored API
|
271 |
-
df_simple.where(
|
|
|
|
|
272 |
return
|
273 |
|
274 |
|
275 |
@app.cell
|
276 |
def _(daft):
|
277 |
# SQL Interface
|
278 |
-
daft.sql(
|
|
|
|
|
279 |
return
|
280 |
|
281 |
|
@@ -283,7 +301,7 @@ def _(daft):
|
|
283 |
def _(mo):
|
284 |
mo.md(
|
285 |
r"""
|
286 |
-
## 🟣
|
287 |
|
288 |
So, what makes Daft special? It's the combination of these design choices:
|
289 |
|
@@ -304,6 +322,7 @@ def _(mo):
|
|
304 |
def _():
|
305 |
import daft
|
306 |
import marimo as mo
|
|
|
307 |
return daft, mo
|
308 |
|
309 |
|
|
|
48 |
|
49 |
@app.cell(hide_code=True)
|
50 |
def _(df_with_discount, discount_slider, mo):
|
51 |
+
mo.vstack(
|
52 |
+
[
|
53 |
+
discount_slider,
|
54 |
+
df_with_discount.collect(),
|
55 |
+
]
|
56 |
+
)
|
57 |
return
|
58 |
|
59 |
|
|
|
122 |
|
123 |
@app.cell(hide_code=True)
|
124 |
def _(mo):
|
125 |
+
mo.md(
|
126 |
+
r"""A cornerstone of Daft's design is **lazy execution**. Imagine defining a DataFrame with a trillion rows on your laptop – usually not a great prospect for your device's memory!"""
|
127 |
+
)
|
128 |
return
|
129 |
|
130 |
|
|
|
141 |
|
142 |
@app.cell(hide_code=True)
|
143 |
def _(mo):
|
144 |
+
mo.md(
|
145 |
+
r"""With Daft, this is perfectly fine. Operations like `with_column` or `filter` don't compute results immediately. Instead, Daft builds a *logical plan* – a blueprint of the transformations you've defined. You can inspect this plan:"""
|
146 |
+
)
|
147 |
return
|
148 |
|
149 |
|
150 |
@app.cell(hide_code=True)
|
151 |
def _(mo, trillion_rows_df):
|
152 |
+
mo.mermaid(trillion_rows_df.explain(format="mermaid").split("\nSet")[0][11:-3])
|
153 |
return
|
154 |
|
155 |
|
156 |
@app.cell(hide_code=True)
|
157 |
def _(mo):
|
158 |
+
mo.md(
|
159 |
+
r"""This plan is only executed (and data materialized) when you explicitly request it (e.g., with `.show()`, `.collect()`, or by writing to a file). Before execution, Daft's optimizer works to make your query run as efficiently as possible. This approach allows you to define complex operations on massive datasets without immediate computational cost or memory overflow."""
|
160 |
+
)
|
161 |
return
|
162 |
|
163 |
|
|
|
227 |
|
228 |
@app.cell(hide_code=True)
|
229 |
def _(mo):
|
230 |
+
mo.md(
|
231 |
+
r"""> Example inspired by the great post [Exploring Art with TypeScript, Jupyter, Polars, and Observable Plot](https://deno.com/blog/exploring-art-with-typescript-and-jupyter) published on Deno's blog."""
|
232 |
+
)
|
233 |
return
|
234 |
|
235 |
|
236 |
@app.cell(hide_code=True)
|
237 |
def _(mo):
|
238 |
+
mo.md(
|
239 |
+
r"""In later chapters, we'll explore in more detail how to work with these image objects and other complex types, including applying User-Defined Functions (UDFs) for custom processing. Until then, you can [take a look at a more complex example](https://blog.getdaft.io/p/we-cloned-over-15000-repos-to-find), in which Daft is used to clone over 15,000 GitHub repos to find the best developers."""
|
240 |
+
)
|
241 |
return
|
242 |
|
243 |
|
|
|
273 |
@app.cell
|
274 |
def _(df_simple):
|
275 |
# Pandas-flavored API
|
276 |
+
df_simple.where(
|
277 |
+
(df_simple["quantity"] > 0) & (df_simple["region"] == "North")
|
278 |
+
).collect()
|
279 |
return
|
280 |
|
281 |
|
282 |
@app.cell
|
283 |
def _(daft, df_simple):
|
284 |
# Polars-flavored API
|
285 |
+
df_simple.where(
|
286 |
+
(daft.col("quantity") > 0) & (daft.col("region") == "North")
|
287 |
+
).collect()
|
288 |
return
|
289 |
|
290 |
|
291 |
@app.cell
|
292 |
def _(daft):
|
293 |
# SQL Interface
|
294 |
+
daft.sql(
|
295 |
+
"SELECT * FROM df_simple WHERE quantity > 0 AND region = 'North'"
|
296 |
+
).collect()
|
297 |
return
|
298 |
|
299 |
|
|
|
301 |
def _(mo):
|
302 |
mo.md(
|
303 |
r"""
|
304 |
+
## 🟣 Daft's Value Proposition
|
305 |
|
306 |
So, what makes Daft special? It's the combination of these design choices:
|
307 |
|
|
|
322 |
def _():
|
323 |
import daft
|
324 |
import marimo as mo
|
325 |
+
|
326 |
return daft, mo
|
327 |
|
328 |
|