petergy commited on
Commit
1b7e9b3
·
1 Parent(s): 8c4fca3

format code

Browse files
Files changed (1) hide show
  1. daft/01_what_makes_daft_special.py +33 -14
daft/01_what_makes_daft_special.py CHANGED
@@ -48,10 +48,12 @@ def _(daft, mo):
48
 
49
  @app.cell(hide_code=True)
50
  def _(df_with_discount, discount_slider, mo):
51
- mo.vstack([
52
- discount_slider,
53
- df_with_discount.collect(),
54
- ])
 
 
55
  return
56
 
57
 
@@ -120,7 +122,9 @@ def _(mo):
120
 
121
  @app.cell(hide_code=True)
122
  def _(mo):
123
- mo.md(r"""A cornerstone of Daft's design is **lazy execution**. Imagine defining a DataFrame with a trillion rows on your laptop – usually not a great prospect for your device's memory!""")
 
 
124
  return
125
 
126
 
@@ -137,19 +141,23 @@ def _(daft):
137
 
138
  @app.cell(hide_code=True)
139
  def _(mo):
140
- mo.md(r"""With Daft, this is perfectly fine. Operations like `with_column` or `filter` don't compute results immediately. Instead, Daft builds a *logical plan* – a blueprint of the transformations you've defined. You can inspect this plan:""")
 
 
141
  return
142
 
143
 
144
  @app.cell(hide_code=True)
145
  def _(mo, trillion_rows_df):
146
- mo.mermaid(trillion_rows_df.explain(format='mermaid').split('\nSet')[0][11:-3])
147
  return
148
 
149
 
150
  @app.cell(hide_code=True)
151
  def _(mo):
152
- mo.md(r"""This plan is only executed (and data materialized) when you explicitly request it (e.g., with `.show()`, `.collect()`, or by writing to a file). Before execution, Daft's optimizer works to make your query run as efficiently as possible. This approach allows you to define complex operations on massive datasets without immediate computational cost or memory overflow.""")
 
 
153
  return
154
 
155
 
@@ -219,13 +227,17 @@ def _(daft):
219
 
220
  @app.cell(hide_code=True)
221
  def _(mo):
222
- mo.md(r"""> Example inspired by the great post [Exploring Art with TypeScript, Jupyter, Polars, and Observable Plot](https://deno.com/blog/exploring-art-with-typescript-and-jupyter) published on Deno's blog.""")
 
 
223
  return
224
 
225
 
226
  @app.cell(hide_code=True)
227
  def _(mo):
228
- mo.md(r"""In later chapters, we'll explore in more detail how to work with these image objects and other complex types, including applying User-Defined Functions (UDFs) for custom processing. Until then, you can [take a look at a more complex example](https://blog.getdaft.io/p/we-cloned-over-15000-repos-to-find), in which Daft is used to clone over 15,000 GitHub repos to find the best developers.""")
 
 
229
  return
230
 
231
 
@@ -261,21 +273,27 @@ def _(daft):
261
  @app.cell
262
  def _(df_simple):
263
  # Pandas-flavored API
264
- df_simple.where((df_simple["quantity"] > 0) & (df_simple["region"] == "North")).collect()
 
 
265
  return
266
 
267
 
268
  @app.cell
269
  def _(daft, df_simple):
270
  # Polars-flavored API
271
- df_simple.where((daft.col("quantity") > 0) & (daft.col("region") == "North")).collect()
 
 
272
  return
273
 
274
 
275
  @app.cell
276
  def _(daft):
277
  # SQL Interface
278
- daft.sql("SELECT * FROM df_simple WHERE quantity > 0 AND region = 'North'").collect()
 
 
279
  return
280
 
281
 
@@ -283,7 +301,7 @@ def _(daft):
283
  def _(mo):
284
  mo.md(
285
  r"""
286
- ## 🟣 The Daft Advantage: Putting It All Together
287
 
288
  So, what makes Daft special? It's the combination of these design choices:
289
 
@@ -304,6 +322,7 @@ def _(mo):
304
  def _():
305
  import daft
306
  import marimo as mo
 
307
  return daft, mo
308
 
309
 
 
48
 
49
  @app.cell(hide_code=True)
50
  def _(df_with_discount, discount_slider, mo):
51
+ mo.vstack(
52
+ [
53
+ discount_slider,
54
+ df_with_discount.collect(),
55
+ ]
56
+ )
57
  return
58
 
59
 
 
122
 
123
  @app.cell(hide_code=True)
124
  def _(mo):
125
+ mo.md(
126
+ r"""A cornerstone of Daft's design is **lazy execution**. Imagine defining a DataFrame with a trillion rows on your laptop – usually not a great prospect for your device's memory!"""
127
+ )
128
  return
129
 
130
 
 
141
 
142
  @app.cell(hide_code=True)
143
  def _(mo):
144
+ mo.md(
145
+ r"""With Daft, this is perfectly fine. Operations like `with_column` or `filter` don't compute results immediately. Instead, Daft builds a *logical plan* – a blueprint of the transformations you've defined. You can inspect this plan:"""
146
+ )
147
  return
148
 
149
 
150
  @app.cell(hide_code=True)
151
  def _(mo, trillion_rows_df):
152
+ mo.mermaid(trillion_rows_df.explain(format="mermaid").split("\nSet")[0][11:-3])
153
  return
154
 
155
 
156
  @app.cell(hide_code=True)
157
  def _(mo):
158
+ mo.md(
159
+ r"""This plan is only executed (and data materialized) when you explicitly request it (e.g., with `.show()`, `.collect()`, or by writing to a file). Before execution, Daft's optimizer works to make your query run as efficiently as possible. This approach allows you to define complex operations on massive datasets without immediate computational cost or memory overflow."""
160
+ )
161
  return
162
 
163
 
 
227
 
228
  @app.cell(hide_code=True)
229
  def _(mo):
230
+ mo.md(
231
+ r"""> Example inspired by the great post [Exploring Art with TypeScript, Jupyter, Polars, and Observable Plot](https://deno.com/blog/exploring-art-with-typescript-and-jupyter) published on Deno's blog."""
232
+ )
233
  return
234
 
235
 
236
  @app.cell(hide_code=True)
237
  def _(mo):
238
+ mo.md(
239
+ r"""In later chapters, we'll explore in more detail how to work with these image objects and other complex types, including applying User-Defined Functions (UDFs) for custom processing. Until then, you can [take a look at a more complex example](https://blog.getdaft.io/p/we-cloned-over-15000-repos-to-find), in which Daft is used to clone over 15,000 GitHub repos to find the best developers."""
240
+ )
241
  return
242
 
243
 
 
273
  @app.cell
274
  def _(df_simple):
275
  # Pandas-flavored API
276
+ df_simple.where(
277
+ (df_simple["quantity"] > 0) & (df_simple["region"] == "North")
278
+ ).collect()
279
  return
280
 
281
 
282
  @app.cell
283
  def _(daft, df_simple):
284
  # Polars-flavored API
285
+ df_simple.where(
286
+ (daft.col("quantity") > 0) & (daft.col("region") == "North")
287
+ ).collect()
288
  return
289
 
290
 
291
  @app.cell
292
  def _(daft):
293
  # SQL Interface
294
+ daft.sql(
295
+ "SELECT * FROM df_simple WHERE quantity > 0 AND region = 'North'"
296
+ ).collect()
297
  return
298
 
299
 
 
301
  def _(mo):
302
  mo.md(
303
  r"""
304
+ ## 🟣 Daft's Value Proposition
305
 
306
  So, what makes Daft special? It's the combination of these design choices:
307
 
 
322
  def _():
323
  import daft
324
  import marimo as mo
325
+
326
  return daft, mo
327
 
328