Azmi-84 commited on
Commit
34f04d3
·
1 Parent(s): e5fc993

Hide code cells in getting started guide for improved user experience

Browse files

This commit updates the DuckDB getting started script by hiding code cells to streamline the user interface. The changes enhance readability and focus on the interactive components, making it easier for users to engage with the content without being distracted by the underlying code.

Files changed (1) hide show
  1. duckdb/01_getting_started.py +400 -260
duckdb/01_getting_started.py CHANGED
@@ -9,6 +9,7 @@
9
  # "pandas==2.2.3",
10
  # "sqlglot==26.12.1",
11
  # "plotly==5.23.1",
 
12
  # ]
13
  # ///
14
 
@@ -104,13 +105,12 @@ def _(mo):
104
  | Performance | Faster for most operations | Slightly slower but provides persistence |
105
  | Creation | duckdb.connect(':memory:') | duckdb.connect('filename.db') |
106
  | Multiple Connection Access | Limited to single connection | Multiple connections can access the same database |
107
-
108
  """
109
  )
110
  return
111
 
112
 
113
- @app.cell
114
  def _(os):
115
  # Remove previous database if it exists
116
  if os.path.exists("example.db"):
@@ -121,7 +121,7 @@ def _(os):
121
  return
122
 
123
 
124
- @app.cell
125
  def _(mo):
126
  _df = mo.sql(
127
  f"""
@@ -149,7 +149,7 @@ def _(mo):
149
  return
150
 
151
 
152
- @app.cell
153
  def _(duckdb):
154
  # Create an in-memory DuckDB connection
155
  memory_db = duckdb.connect(":memory:")
@@ -159,7 +159,7 @@ def _(duckdb):
159
  return file_db, memory_db
160
 
161
 
162
- @app.cell
163
  def _(file_db, memory_db):
164
  # Test both connections
165
  memory_db.execute(
@@ -196,7 +196,7 @@ def _(mo):
196
  return
197
 
198
 
199
- @app.cell
200
  def _(mem_test, memory_db, mo):
201
  _df = mo.sql(
202
  f"""
@@ -207,7 +207,7 @@ def _(mem_test, memory_db, mo):
207
  return
208
 
209
 
210
- @app.cell
211
  def _(file_db, file_test, mo):
212
  _df = mo.sql(
213
  f"""
@@ -226,12 +226,12 @@ def _():
226
 
227
 
228
  @app.cell(hide_code=True)
229
- def _file_query(mo):
230
  mo.md(rf"""## 🔄 Simulating Application Restart...""")
231
  return
232
 
233
 
234
- @app.cell
235
  def _(duckdb):
236
  # Create new connections (simulating restart)
237
  new_memory_db = duckdb.connect(":memory:")
@@ -239,7 +239,7 @@ def _(duckdb):
239
  return new_file_db, new_memory_db
240
 
241
 
242
- @app.cell
243
  def _(new_memory_db):
244
  # Try to query tables in the new memory connection
245
  try:
@@ -252,7 +252,7 @@ def _(new_memory_db):
252
  return memory_data_available, memory_persistence
253
 
254
 
255
- @app.cell
256
  def _(new_file_db):
257
  # Try to query tables in the new file connection
258
  try:
@@ -266,7 +266,7 @@ def _(new_file_db):
266
  return file_data, file_data_available, file_persistence
267
 
268
 
269
- @app.cell
270
  def _(
271
  file_data_available,
272
  file_persistence,
@@ -285,18 +285,23 @@ def _(
285
  ],
286
  }
287
  )
288
-
289
- mo.md("### Persistence Test Results")
290
  return (persistence_results,)
291
 
292
 
293
- @app.cell
294
- def _(persistence_results):
295
- persistence_results
 
 
 
 
 
 
 
296
  return
297
 
298
 
299
- @app.cell
300
  def _(file_data, file_data_available, mo):
301
  if file_data_available:
302
  mo.md("### Persisted File-Based Data:")
@@ -326,8 +331,8 @@ def _(mo):
326
  return
327
 
328
 
329
- @app.cell
330
- def _create_users_tables(file_db, new_memory_db):
331
  # For the memory database
332
  try:
333
  new_memory_db.execute("DROP TABLE IF EXISTS users_memory")
@@ -342,7 +347,7 @@ def _create_users_tables(file_db, new_memory_db):
342
  return
343
 
344
 
345
- @app.cell
346
  def _(file_db, new_memory_db):
347
  # Create advanced users table in memory database with primary key
348
  new_memory_db.execute("""
@@ -372,8 +377,8 @@ def _(file_db, new_memory_db):
372
  return
373
 
374
 
375
- @app.cell
376
- def _(mo, new_memory_db):
377
  # Get table schema information using DuckDB's internal system tables
378
  memory_schema = new_memory_db.execute("""
379
  SELECT column_name, data_type, is_nullable
@@ -381,15 +386,21 @@ def _(mo, new_memory_db):
381
  WHERE table_name = 'users_memory'
382
  ORDER BY ordinal_position
383
  """).df()
384
-
385
- # Display the schema using marimo's UI components
386
- mo.md("### 🔍 Table Schema Information")
387
  return (memory_schema,)
388
 
389
 
390
  @app.cell(hide_code=True)
391
  def _(memory_schema, mo):
392
- mo.ui.table(memory_schema)
 
 
 
 
 
 
 
 
 
393
  return
394
 
395
 
@@ -412,8 +423,8 @@ def _(mo):
412
  return
413
 
414
 
415
- @app.cell
416
- def _insert_user_data(date):
417
  today = date.today()
418
 
419
 
@@ -458,7 +469,7 @@ def _insert_user_data(date):
458
  return (safe_insert,)
459
 
460
 
461
- @app.cell
462
  def _():
463
  # Prepare the data
464
  user_data = [
@@ -520,31 +531,17 @@ def _():
520
  return (user_data,)
521
 
522
 
523
- @app.cell
524
- def _(mo, new_memory_db, safe_insert, user_data):
525
  # Safely insert data into memory database
526
- records_inserted = safe_insert(new_memory_db, "users_memory", user_data)
527
- mo.md(
528
- f"""
529
- Inserted {records_inserted} new records into users_memory.
530
- """
531
- )
532
- return
533
-
534
-
535
- @app.cell
536
- def _(file_db, safe_insert, user_data):
537
- def _():
538
- # Safely insert data into file database
539
- records_inserted = safe_insert(file_db, "users_file", user_data)
540
- return print(f"Inserted {records_inserted} new records into users_file")
541
 
542
-
543
- _()
544
  return
545
 
546
 
547
- @app.cell
548
  def _():
549
  # If you need to add just one record, you can use a similar approach:
550
  new_user = (
@@ -559,7 +556,7 @@ def _():
559
  return (new_user,)
560
 
561
 
562
- @app.cell
563
  def _(new_memory_db, new_user):
564
  # Check if the ID exists before inserting
565
  if not new_memory_db.execute(
@@ -578,7 +575,7 @@ def _(new_memory_db, new_user):
578
  return
579
 
580
 
581
- @app.cell
582
  def _(file_db, new_user):
583
  # Do the same for the file database
584
  if not file_db.execute(
@@ -597,7 +594,7 @@ def _(file_db, new_user):
597
  return
598
 
599
 
600
- @app.cell
601
  def _(new_memory_db):
602
  # First try to update
603
  cursor = new_memory_db.execute(
@@ -620,7 +617,7 @@ def _(new_memory_db):
620
  return (cursor,)
621
 
622
 
623
- @app.cell
624
  def _(cursor, mo, new_memory_db):
625
  # If no rows were updated, perform an insert
626
  if cursor.rowcount == 0:
@@ -649,7 +646,7 @@ def _(cursor, mo, new_memory_db):
649
  return
650
 
651
 
652
- @app.cell
653
  def _(file_db, mo):
654
  # For DuckDB using ON CONFLICT, we need to specify the conflict target column
655
  file_db.execute(
@@ -683,8 +680,8 @@ def _(file_db, mo):
683
  return
684
 
685
 
686
- @app.cell
687
- def _view_tables_after_insert(new_memory_db):
688
  # Display memory data using DuckDB's query capabilities
689
  memory_results = new_memory_db.execute("""
690
  SELECT
@@ -701,7 +698,7 @@ def _view_tables_after_insert(new_memory_db):
701
  return (memory_results,)
702
 
703
 
704
- @app.cell
705
  def _(file_db):
706
  # Display file data with formatting
707
  file_results = file_db.execute("""
@@ -719,17 +716,6 @@ def _(file_db):
719
  return (file_results,)
720
 
721
 
722
- @app.cell
723
- def _(mo):
724
- mo.md(
725
- r"""
726
- <!-- Create an interactive display with tabs using marimo components -->
727
- ## 📊 Database Contents After Insertion
728
- """
729
- )
730
- return
731
-
732
-
733
  @app.cell(hide_code=True)
734
  def _(file_results, memory_results, mo):
735
  tabs = mo.ui.tabs(
@@ -738,7 +724,18 @@ def _(file_results, memory_results, mo):
738
  "File-Based Database": mo.ui.table(file_results),
739
  }
740
  )
741
- tabs
 
 
 
 
 
 
 
 
 
 
 
742
  return
743
 
744
 
@@ -746,12 +743,12 @@ def _(file_results, memory_results, mo):
746
  def _(mo):
747
  mo.md(
748
  r"""
749
- # [4. Using SQL Directly in Marimo](https://duckdb.org/docs/stable/sql/query_syntax/select)
750
 
751
  There are multiple ways to leverage DuckDB's SQL capabilities in marimo:
752
 
753
  1. **Direct execution**: Using DuckDB connections to execute SQL
754
- 2. **Marimo SQL**: Using Marimo's built-in SQL engine
755
  3. **Interactive queries**: Combining UI elements with SQL execution
756
 
757
  Let's explore these approaches:
@@ -760,47 +757,39 @@ def _(mo):
760
  return
761
 
762
 
763
- @app.cell(hide_code=True)
764
- def _sql_with_marimo(mo):
765
- mo.md(
766
- rf"""
767
- <!-- Using Marimo's SQL engine with direct SQL on memory_results DataFrame -->
768
- ## 🔍 Query with Marimo SQL
769
- """
770
- )
771
- return
772
-
773
-
774
  @app.cell(hide_code=True)
775
  def _(mo):
776
- mo.md(
777
- rf"""
778
- ## Marimo has its own built-in SQL engine that can work with DataFrames.
779
- Let's use it to filter our users:
780
- """
 
 
 
 
781
  )
782
  return
783
 
784
 
785
- @app.cell
786
- def _(mo):
787
  # Create a SQL selector for users with age threshold
788
- age_threshold = mo.ui.slider(25, 50, value=30, label="Minimum Age")
789
- return (age_threshold,)
 
790
 
791
 
792
- @app.cell
793
- def _(age_threshold, memory_results, mo):
794
  # Create a function to filter users based on the slider value
795
  def filtered_users():
796
  # Use DuckDB directly instead of mo.sql with users param
797
  filtered_df = memory_results[memory_results["age"] >= age_threshold.value]
798
  filtered_df = filtered_df.sort_values("age")
799
  return mo.ui.table(filtered_df)
800
- return (filtered_users,)
801
 
802
 
803
- @app.cell
804
  def _(age_threshold, filtered_users, mo):
805
  layout = mo.vstack(
806
  [
@@ -809,8 +798,10 @@ def _(age_threshold, filtered_users, mo):
809
  mo.md("### Users meeting age criteria:"),
810
  filtered_users(),
811
  ],
812
- gap=1.5,
 
813
  )
 
814
  layout
815
  return
816
 
@@ -821,8 +812,8 @@ def _(mo):
821
  return
822
 
823
 
824
- @app.cell
825
- def _polars_integration(pl):
826
  # Create a Polars DataFrame
827
  polars_df = pl.DataFrame(
828
  {
@@ -835,24 +826,22 @@ def _polars_integration(pl):
835
  return (polars_df,)
836
 
837
 
838
- @app.cell
839
- def _(mo):
840
- mo.md(
841
- rf"""
842
- <!-- Display the Polars DataFrame -->
843
- ## Original Polars DataFrame:
844
- """
845
- )
846
- return
847
-
848
-
849
- @app.cell
850
  def _(mo, polars_df):
851
- mo.ui.table(polars_df)
 
 
 
 
 
 
 
 
 
852
  return
853
 
854
 
855
- @app.cell
856
  def _(new_memory_db, polars_df):
857
  # Register the Polars DataFrame as a DuckDB table in memory connection
858
  new_memory_db.register("products_polars", polars_df)
@@ -865,24 +854,23 @@ def _(new_memory_db, polars_df):
865
 
866
 
867
  @app.cell(hide_code=True)
868
- def _(mo):
869
- mo.md(
870
- r"""
871
- <!-- Display the query result -->
872
- ## DuckDB Query Result (From Polars Data):
873
- """
874
- )
875
- return
876
-
877
-
878
- @app.cell
879
  def _(mo, polars_query_result):
880
- mo.ui.table(polars_query_result)
 
 
 
 
 
 
 
 
 
 
881
  return
882
 
883
 
884
- @app.cell
885
- def _(mo, new_memory_db):
886
  # Demonstrate a more complex query
887
  complex_query_result = new_memory_db.execute("""
888
  SELECT
@@ -895,14 +883,22 @@ def _(mo, new_memory_db):
895
  GROUP BY category
896
  ORDER BY avg_price DESC
897
  """).df()
898
-
899
- mo.md("## Aggregated Product Data by Category:")
900
  return (complex_query_result,)
901
 
902
 
903
- @app.cell
904
  def _(complex_query_result, mo):
905
- mo.ui.table(complex_query_result)
 
 
 
 
 
 
 
 
 
 
906
  return
907
 
908
 
@@ -912,8 +908,8 @@ def _(mo):
912
  return
913
 
914
 
915
- @app.cell
916
- def _join_operations(new_memory_db):
917
  # Create another table to join with
918
  new_memory_db.execute("""
919
  CREATE TABLE IF NOT EXISTS departments (
@@ -925,7 +921,7 @@ def _join_operations(new_memory_db):
925
  return
926
 
927
 
928
- @app.cell
929
  def _(new_memory_db):
930
  new_memory_db.execute("""
931
  INSERT INTO departments VALUES
@@ -936,7 +932,7 @@ def _(new_memory_db):
936
  return
937
 
938
 
939
- @app.cell
940
  def _(new_memory_db):
941
  # Execute a join query
942
  join_result = new_memory_db.execute("""
@@ -980,7 +976,7 @@ def _(mo):
980
  return
981
 
982
 
983
- @app.cell
984
  def _(new_memory_db):
985
  # Inner join
986
  inner_join = new_memory_db.execute("""
@@ -1002,21 +998,125 @@ def _(new_memory_db):
1002
  FROM users_memory u
1003
  FULL OUTER JOIN departments d ON u.id = d.manager_id
1004
  """).df()
1005
- return full_join, inner_join, right_join
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1006
 
1007
 
1008
- @app.cell
1009
- def _(full_join, inner_join, join_result, mo, right_join):
1010
  join_tabs = mo.ui.tabs(
1011
  {
1012
  "Left Join": mo.ui.table(join_result),
1013
  "Inner Join": mo.ui.table(inner_join),
1014
  "Right Join": mo.ui.table(right_join),
1015
  "Full Outer Join": mo.ui.table(full_join),
 
 
 
 
1016
  }
1017
  )
 
 
1018
 
1019
- join_tabs
 
 
 
 
 
 
 
 
 
 
 
 
 
1020
  return
1021
 
1022
 
@@ -1026,8 +1126,8 @@ def _(mo):
1026
  return
1027
 
1028
 
1029
- @app.cell
1030
- def _aggregate_operations(new_memory_db):
1031
  # Execute an aggregate query
1032
  agg_result = new_memory_db.execute("""
1033
  SELECT
@@ -1042,34 +1142,21 @@ def _aggregate_operations(new_memory_db):
1042
 
1043
 
1044
  @app.cell(hide_code=True)
1045
- def _(mo):
1046
- mo.md(
1047
- rf"""
1048
- <!-- Display the aggregate result -->
1049
- ## Aggregate Results (All Users):
1050
- """
1051
- )
1052
- return
1053
-
1054
-
1055
- @app.cell
1056
  def _(agg_result, mo):
1057
- mo.ui.table(agg_result)
1058
- return
1059
-
1060
-
1061
- @app.cell(hide_code=True)
1062
- def _(mo):
1063
- mo.md(
1064
- rf"""
1065
- <!-- More complex aggregate query with grouping -->
1066
- ## Aggregate Results (Grouped by Age Range):
1067
- """
1068
  )
1069
  return
1070
 
1071
 
1072
- @app.cell
1073
  def _(new_memory_db):
1074
  age_groups = new_memory_db.execute("""
1075
  SELECT
@@ -1088,25 +1175,25 @@ def _(new_memory_db):
1088
  return (age_groups,)
1089
 
1090
 
1091
- @app.cell
1092
  def _(age_groups, mo):
1093
  mo.ui.table(age_groups)
1094
- return
1095
-
1096
-
1097
- @app.cell
1098
- def _(mo):
1099
- mo.md(
1100
- r"""
1101
- <!-- Window functions demo -->
1102
- ### Window Functions Example:
1103
- """
1104
  )
1105
  return
1106
 
1107
 
1108
- @app.cell
1109
- def _(mo, new_memory_db):
1110
  window_result = new_memory_db.execute("""
1111
  SELECT
1112
  id,
@@ -1119,8 +1206,19 @@ def _(mo, new_memory_db):
1119
  FROM users_memory
1120
  ORDER BY balance_rank
1121
  """).df()
 
1122
 
1123
- mo.ui.table(window_result)
 
 
 
 
 
 
 
 
 
 
1124
  return
1125
 
1126
 
@@ -1130,8 +1228,8 @@ def _(mo):
1130
  return
1131
 
1132
 
1133
- @app.cell
1134
- def _convert_results(new_memory_db):
1135
  polars_result = new_memory_db.execute(
1136
  """SELECT * FROM users_memory WHERE age > 25 ORDER BY age"""
1137
  ).pl()
@@ -1139,23 +1237,22 @@ def _convert_results(new_memory_db):
1139
 
1140
 
1141
  @app.cell(hide_code=True)
1142
- def _(mo):
1143
- mo.md(
1144
- r"""
1145
- <!-- Display the converted results -->
1146
- ## Query Result as Polars DataFrame:
1147
- """
1148
- )
1149
- return
1150
-
1151
-
1152
- @app.cell
1153
  def _(mo, polars_result):
1154
- mo.ui.table(polars_result)
 
 
 
 
 
 
 
 
 
 
1155
  return
1156
 
1157
 
1158
- @app.cell
1159
  def _(new_memory_db):
1160
  pandas_result = new_memory_db.execute(
1161
  """SELECT * FROM users_memory WHERE age > 25 ORDER BY age"""
@@ -1164,40 +1261,44 @@ def _(new_memory_db):
1164
 
1165
 
1166
  @app.cell(hide_code=True)
1167
- def _(mo):
1168
- mo.md(r"""## Same Query Result as Pandas DataFrame:""")
1169
- return
1170
-
1171
-
1172
- @app.cell
1173
  def _(mo, pandas_result):
1174
- mo.ui.table(pandas_result)
1175
- return
1176
-
1177
-
1178
- @app.cell(hide_code=True)
1179
- def _(mo):
1180
- mo.md(
1181
- r"""
1182
- <!-- Demonstrate the differences in handling -->
1183
- ## Differences in DataFrame Handling
1184
- """
1185
  )
1186
  return
1187
 
1188
 
1189
  @app.cell(hide_code=True)
1190
  def _(mo):
1191
- mo.md(
1192
- r"""
1193
- <!-- Polars operation -->
1194
- ## Polars: Filter users over 35 and calculate average balance
1195
- """
 
 
 
 
 
 
 
 
 
 
 
1196
  )
1197
  return
1198
 
1199
 
1200
- @app.cell
1201
  def _(mo, pl, polars_result):
1202
  def _():
1203
  polars_filtered = polars_result.filter(pl.col("age") > 35)
@@ -1212,7 +1313,7 @@ def _(mo, pl, polars_result):
1212
  mo.md("### Average Account Balance:"),
1213
  mo.ui.table(polars_avg),
1214
  ],
1215
- gap=1.5,
1216
  )
1217
  return layout
1218
 
@@ -1222,30 +1323,30 @@ def _(mo, pl, polars_result):
1222
 
1223
 
1224
  @app.cell(hide_code=True)
1225
- def _(mo):
1226
- mo.md(
1227
- r"""
1228
- <!-- Pandas equivalent (using pandas style) -->
1229
- ## Pandas: Same operation in pandas style
1230
- """
1231
- )
1232
- return
1233
-
1234
-
1235
- @app.cell
1236
  def _(mo, pandas_result):
1237
  pandas_avg = pandas_result[pandas_result["age"] > 35]["account_balance"].mean()
1238
- mo.md(f"Average balance: {pandas_avg:.2f}")
 
 
 
 
 
 
 
 
 
 
 
1239
  return
1240
 
1241
 
1242
  @app.cell(hide_code=True)
1243
  def _(mo):
1244
- mo.md("""## 9. Data Visualization with DuckDB and Plotly""")
1245
  return
1246
 
1247
 
1248
- @app.cell
1249
  def _(age_groups, mo, new_memory_db, plotly_express):
1250
  # User distribution by age group
1251
  fig1 = plotly_express.bar(
@@ -1261,7 +1362,12 @@ def _(age_groups, mo, new_memory_db, plotly_express):
1261
  text=age_groups["count"],
1262
  textposition="outside",
1263
  )
1264
- fig1.update_layout(height=450, margin=dict(t=50, b=50))
 
 
 
 
 
1265
 
1266
 
1267
  # Average balance by age group
@@ -1278,7 +1384,12 @@ def _(age_groups, mo, new_memory_db, plotly_express):
1278
  text=[f"${val:.2f}" for val in age_groups["avg_balance"]],
1279
  textposition="outside",
1280
  )
1281
- fig2.update_layout(height=450, margin=dict(t=50, b=50))
 
 
 
 
 
1282
 
1283
 
1284
  # Age vs Account Balance scatter plot
@@ -1305,7 +1416,12 @@ def _(age_groups, mo, new_memory_db, plotly_express):
1305
  size_max=15,
1306
  )
1307
  fig3.update_traces(marker=dict(size=12))
1308
- fig3.update_layout(height=450, margin=dict(t=50, b=50))
 
 
 
 
 
1309
 
1310
 
1311
  # Distribution of account balances
@@ -1328,7 +1444,12 @@ def _(age_groups, mo, new_memory_db, plotly_express):
1328
  color_discrete_sequence=plotly_express.colors.qualitative.Pastel,
1329
  )
1330
  fig4.update_traces(textinfo="percent+label", textposition="inside")
1331
- fig4.update_layout(height=450, margin=dict(t=50, b=50))
 
 
 
 
 
1332
 
1333
 
1334
  category_tabs = mo.ui.tabs(
@@ -1341,7 +1462,9 @@ def _(age_groups, mo, new_memory_db, plotly_express):
1341
  "Average Balance": mo.ui.plotly(fig2),
1342
  }
1343
  )
1344
- ]
 
 
1345
  ),
1346
  "Financial Analysis": mo.vstack(
1347
  [
@@ -1351,7 +1474,9 @@ def _(age_groups, mo, new_memory_db, plotly_express):
1351
  "Balance Distribution": mo.ui.plotly(fig4),
1352
  }
1353
  )
1354
- ]
 
 
1355
  ),
1356
  },
1357
  lazy=True,
@@ -1359,10 +1484,14 @@ def _(age_groups, mo, new_memory_db, plotly_express):
1359
 
1360
  mo.vstack(
1361
  [
1362
- mo.md("### Select a visualization category:"),
 
 
 
1363
  category_tabs,
1364
  ],
1365
- gap=1.5,
 
1366
  )
1367
  return
1368
 
@@ -1371,7 +1500,9 @@ def _(age_groups, mo, new_memory_db, plotly_express):
1371
  def _(mo):
1372
  mo.md(
1373
  r"""
1374
- # [9. Database Management Best Practices]
 
 
1375
 
1376
  ### Closing Connections
1377
 
@@ -1413,12 +1544,12 @@ def _(mo):
1413
 
1414
 
1415
  @app.cell(hide_code=True)
1416
- def _interactive_dashboard(mo):
1417
- mo.md(rf"""## 10. Interactive DuckDB Dashboard with Marimo and Plotly""")
1418
  return
1419
 
1420
 
1421
- @app.cell
1422
  def _(mo):
1423
  # Create an interactive filter for age range
1424
  min_age = mo.ui.slider(20, 50, value=25, label="Minimum Age")
@@ -1426,7 +1557,7 @@ def _(mo):
1426
  return max_age, min_age
1427
 
1428
 
1429
- @app.cell
1430
  def _(max_age, min_age, new_memory_db):
1431
  # Create a function to filter data and update visualizations
1432
  def get_filtered_data(min_val=min_age.value, max_val=max_age.value):
@@ -1449,7 +1580,7 @@ def _(max_age, min_age, new_memory_db):
1449
  return (get_filtered_data,)
1450
 
1451
 
1452
- @app.cell
1453
  def _(get_filtered_data):
1454
  def get_metrics(data=get_filtered_data()):
1455
  return {
@@ -1460,7 +1591,7 @@ def _(get_filtered_data):
1460
  return (get_metrics,)
1461
 
1462
 
1463
- @app.cell
1464
  def _(get_metrics, mo):
1465
  def metrics_display(metrics=get_metrics()):
1466
  return mo.hstack(
@@ -1488,12 +1619,12 @@ def _(get_metrics, mo):
1488
  ),
1489
  ],
1490
  justify="space-between",
1491
- gap=1.5,
1492
  )
1493
  return (metrics_display,)
1494
 
1495
 
1496
- @app.cell
1497
  def _(get_filtered_data, max_age, min_age, mo, plotly_express):
1498
  def create_visualization(
1499
  data=get_filtered_data(), min_val=min_age.value, max_val=max_age.value
@@ -1516,6 +1647,8 @@ def _(get_filtered_data, max_age, min_age, mo, plotly_express):
1516
  height=400,
1517
  xaxis_tickangle=-45,
1518
  margin=dict(t=50, b=70, l=50, r=30),
 
 
1519
  )
1520
  fig1.update_traces(
1521
  textposition="outside",
@@ -1534,6 +1667,8 @@ def _(get_filtered_data, max_age, min_age, mo, plotly_express):
1534
  height=400,
1535
  margin=dict(t=50, b=70, l=50, r=30),
1536
  bargap=0.1,
 
 
1537
  )
1538
 
1539
  fig3 = plotly_express.scatter(
@@ -1551,6 +1686,8 @@ def _(get_filtered_data, max_age, min_age, mo, plotly_express):
1551
  fig3.update_layout(
1552
  height=400,
1553
  margin=dict(t=50, b=70, l=50, r=30),
 
 
1554
  )
1555
 
1556
  return mo.ui.tabs(
@@ -1563,7 +1700,7 @@ def _(get_filtered_data, max_age, min_age, mo, plotly_express):
1563
  return (create_visualization,)
1564
 
1565
 
1566
- @app.cell
1567
  def _(
1568
  create_visualization,
1569
  get_filtered_data,
@@ -1573,11 +1710,11 @@ def _(
1573
  mo,
1574
  ):
1575
  def dashboard(
1576
- min_val=min_age.value,
1577
- max_val=max_age.value,
1578
- metrics=metrics_display(),
1579
- data=get_filtered_data(),
1580
- visualization=create_visualization()
1581
  ):
1582
  return mo.vstack(
1583
  [
@@ -1588,14 +1725,17 @@ def _(
1588
  mo.md("### Visualizations"),
1589
  visualization,
1590
  ],
1591
- gap=2
 
1592
  )
 
 
1593
  dashboard()
1594
  return
1595
 
1596
 
1597
  @app.cell(hide_code=True)
1598
- def _conclusion(mo):
1599
  mo.md(
1600
  rf"""
1601
  # Summary and Key Takeaways
@@ -1608,7 +1748,7 @@ def _conclusion(mo):
1608
 
1609
  3. **Data insertion**: We demonstrated different ways to insert data, including single inserts and bulk loading.
1610
 
1611
- 4. **SQL queries**: We executed various SQL queries directly and through Marimo's UI components.
1612
 
1613
  5. **Integration with Polars**: We showed how DuckDB can work seamlessly with Polars DataFrames.
1614
 
@@ -1620,7 +1760,7 @@ def _conclusion(mo):
1620
 
1621
  9. **Best practices**: We reviewed best practices for managing DuckDB connections and transactions.
1622
 
1623
- 10. **Visualization**: We created interactive visualizations and dashboards with Plotly and Marimo.
1624
 
1625
  DuckDB is an excellent tool for data analysis, especially for analytical workloads. Its in-process nature makes it fast and easy to use, while its SQL compatibility makes it accessible for anyone familiar with SQL databases.
1626
 
@@ -1629,7 +1769,7 @@ def _conclusion(mo):
1629
  - Try loading larger datasets into DuckDB
1630
  - Experiment with more complex queries and window functions
1631
  - Use DuckDB's COPY functionality to import/export data from/to files
1632
- - Create more advanced interactive dashboards with Marimo and Plotly
1633
  """
1634
  )
1635
  return
 
9
  # "pandas==2.2.3",
10
  # "sqlglot==26.12.1",
11
  # "plotly==5.23.1",
12
+ # "statsmodels==0.14.4",
13
  # ]
14
  # ///
15
 
 
105
  | Performance | Faster for most operations | Slightly slower but provides persistence |
106
  | Creation | duckdb.connect(':memory:') | duckdb.connect('filename.db') |
107
  | Multiple Connection Access | Limited to single connection | Multiple connections can access the same database |
 
108
  """
109
  )
110
  return
111
 
112
 
113
+ @app.cell(hide_code=True)
114
  def _(os):
115
  # Remove previous database if it exists
116
  if os.path.exists("example.db"):
 
121
  return
122
 
123
 
124
+ @app.cell(hide_code=True)
125
  def _(mo):
126
  _df = mo.sql(
127
  f"""
 
149
  return
150
 
151
 
152
+ @app.cell(hide_code=True)
153
  def _(duckdb):
154
  # Create an in-memory DuckDB connection
155
  memory_db = duckdb.connect(":memory:")
 
159
  return file_db, memory_db
160
 
161
 
162
+ @app.cell(hide_code=True)
163
  def _(file_db, memory_db):
164
  # Test both connections
165
  memory_db.execute(
 
196
  return
197
 
198
 
199
+ @app.cell(hide_code=True)
200
  def _(mem_test, memory_db, mo):
201
  _df = mo.sql(
202
  f"""
 
207
  return
208
 
209
 
210
+ @app.cell(hide_code=True)
211
  def _(file_db, file_test, mo):
212
  _df = mo.sql(
213
  f"""
 
226
 
227
 
228
  @app.cell(hide_code=True)
229
+ def _(mo):
230
  mo.md(rf"""## 🔄 Simulating Application Restart...""")
231
  return
232
 
233
 
234
+ @app.cell(hide_code=True)
235
  def _(duckdb):
236
  # Create new connections (simulating restart)
237
  new_memory_db = duckdb.connect(":memory:")
 
239
  return new_file_db, new_memory_db
240
 
241
 
242
+ @app.cell(hide_code=True)
243
  def _(new_memory_db):
244
  # Try to query tables in the new memory connection
245
  try:
 
252
  return memory_data_available, memory_persistence
253
 
254
 
255
+ @app.cell(hide_code=True)
256
  def _(new_file_db):
257
  # Try to query tables in the new file connection
258
  try:
 
266
  return file_data, file_data_available, file_persistence
267
 
268
 
269
+ @app.cell(hide_code=True)
270
  def _(
271
  file_data_available,
272
  file_persistence,
 
285
  ],
286
  }
287
  )
 
 
288
  return (persistence_results,)
289
 
290
 
291
+ @app.cell(hide_code=True)
292
+ def _(mo, persistence_results):
293
+ mo.vstack(
294
+ [
295
+ mo.vstack([mo.md(f"""## Persistence Test Results""")], align="center"),
296
+ persistence_results,
297
+ ],
298
+ gap=2,
299
+ justify="space-between",
300
+ )
301
  return
302
 
303
 
304
+ @app.cell(hide_code=True)
305
  def _(file_data, file_data_available, mo):
306
  if file_data_available:
307
  mo.md("### Persisted File-Based Data:")
 
331
  return
332
 
333
 
334
+ @app.cell(hide_code=True)
335
+ def _(file_db, new_memory_db):
336
  # For the memory database
337
  try:
338
  new_memory_db.execute("DROP TABLE IF EXISTS users_memory")
 
347
  return
348
 
349
 
350
+ @app.cell(hide_code=True)
351
  def _(file_db, new_memory_db):
352
  # Create advanced users table in memory database with primary key
353
  new_memory_db.execute("""
 
377
  return
378
 
379
 
380
+ @app.cell(hide_code=True)
381
+ def _(new_memory_db):
382
  # Get table schema information using DuckDB's internal system tables
383
  memory_schema = new_memory_db.execute("""
384
  SELECT column_name, data_type, is_nullable
 
386
  WHERE table_name = 'users_memory'
387
  ORDER BY ordinal_position
388
  """).df()
 
 
 
389
  return (memory_schema,)
390
 
391
 
392
  @app.cell(hide_code=True)
393
  def _(memory_schema, mo):
394
+ mo.vstack(
395
+ [
396
+ mo.vstack(
397
+ [mo.md(f"""## 🔍 Table Schema Information """)], align="center"
398
+ ),
399
+ mo.ui.table(memory_schema),
400
+ ],
401
+ gap=2,
402
+ justify="space-between",
403
+ )
404
  return
405
 
406
 
 
423
  return
424
 
425
 
426
+ @app.cell(hide_code=True)
427
+ def _(date):
428
  today = date.today()
429
 
430
 
 
469
  return (safe_insert,)
470
 
471
 
472
+ @app.cell(hide_code=True)
473
  def _():
474
  # Prepare the data
475
  user_data = [
 
531
  return (user_data,)
532
 
533
 
534
+ @app.cell(hide_code=True)
535
+ def _(file_db, new_memory_db, safe_insert, user_data):
536
  # Safely insert data into memory database
537
+ safe_insert(new_memory_db, "users_memory", user_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
538
 
539
+ # Safely insert data into file database
540
+ safe_insert(file_db, "users_file", user_data)
541
  return
542
 
543
 
544
+ @app.cell(hide_code=True)
545
  def _():
546
  # If you need to add just one record, you can use a similar approach:
547
  new_user = (
 
556
  return (new_user,)
557
 
558
 
559
+ @app.cell(hide_code=True)
560
  def _(new_memory_db, new_user):
561
  # Check if the ID exists before inserting
562
  if not new_memory_db.execute(
 
575
  return
576
 
577
 
578
+ @app.cell(hide_code=True)
579
  def _(file_db, new_user):
580
  # Do the same for the file database
581
  if not file_db.execute(
 
594
  return
595
 
596
 
597
+ @app.cell(hide_code=True)
598
  def _(new_memory_db):
599
  # First try to update
600
  cursor = new_memory_db.execute(
 
617
  return (cursor,)
618
 
619
 
620
+ @app.cell(hide_code=True)
621
  def _(cursor, mo, new_memory_db):
622
  # If no rows were updated, perform an insert
623
  if cursor.rowcount == 0:
 
646
  return
647
 
648
 
649
+ @app.cell(hide_code=True)
650
  def _(file_db, mo):
651
  # For DuckDB using ON CONFLICT, we need to specify the conflict target column
652
  file_db.execute(
 
680
  return
681
 
682
 
683
+ @app.cell(hide_code=True)
684
+ def _(new_memory_db):
685
  # Display memory data using DuckDB's query capabilities
686
  memory_results = new_memory_db.execute("""
687
  SELECT
 
698
  return (memory_results,)
699
 
700
 
701
+ @app.cell(hide_code=True)
702
  def _(file_db):
703
  # Display file data with formatting
704
  file_results = file_db.execute("""
 
716
  return (file_results,)
717
 
718
 
 
 
 
 
 
 
 
 
 
 
 
719
  @app.cell(hide_code=True)
720
  def _(file_results, memory_results, mo):
721
  tabs = mo.ui.tabs(
 
724
  "File-Based Database": mo.ui.table(file_results),
725
  }
726
  )
727
+
728
+ mo.vstack(
729
+ [
730
+ mo.vstack(
731
+ [mo.md(f"""## 📊 Database Contents After Insertion""")],
732
+ align="center",
733
+ ),
734
+ tabs,
735
+ ],
736
+ gap=2,
737
+ justify="space-between",
738
+ )
739
  return
740
 
741
 
 
743
  def _(mo):
744
  mo.md(
745
  r"""
746
+ # [4. Using SQL Directly in marimo](https://duckdb.org/docs/stable/sql/query_syntax/select)
747
 
748
  There are multiple ways to leverage DuckDB's SQL capabilities in marimo:
749
 
750
  1. **Direct execution**: Using DuckDB connections to execute SQL
751
+ 2. **marimo SQL**: Using marimo's built-in SQL engine
752
  3. **Interactive queries**: Combining UI elements with SQL execution
753
 
754
  Let's explore these approaches:
 
757
  return
758
 
759
 
 
 
 
 
 
 
 
 
 
 
 
760
  @app.cell(hide_code=True)
761
  def _(mo):
762
+ mo.vstack(
763
+ [
764
+ mo.vstack([mo.md(f"""## 🔍 Query with marimo SQL""")], align="center"),
765
+ mo.md(
766
+ "### marimo has its own [built-in SQL engine](https://docs.marimo.io/guides/working_with_data/sql/) that can work with DataFrames."
767
+ ),
768
+ ],
769
+ gap=2,
770
+ justify="space-between",
771
  )
772
  return
773
 
774
 
775
+ @app.cell(hide_code=True)
776
+ def _(memory_results, mo):
777
  # Create a SQL selector for users with age threshold
778
+ age_threshold = mo.ui.slider(
779
+ 25, 50, value=30, label="Minimum Age", full_width=True, show_value=True
780
+ )
781
 
782
 
 
 
783
  # Create a function to filter users based on the slider value
784
  def filtered_users():
785
  # Use DuckDB directly instead of mo.sql with users param
786
  filtered_df = memory_results[memory_results["age"] >= age_threshold.value]
787
  filtered_df = filtered_df.sort_values("age")
788
  return mo.ui.table(filtered_df)
789
+ return age_threshold, filtered_users
790
 
791
 
792
+ @app.cell(hide_code=True)
793
  def _(age_threshold, filtered_users, mo):
794
  layout = mo.vstack(
795
  [
 
798
  mo.md("### Users meeting age criteria:"),
799
  filtered_users(),
800
  ],
801
+ gap=2,
802
+ justify="space-between",
803
  )
804
+
805
  layout
806
  return
807
 
 
812
  return
813
 
814
 
815
+ @app.cell(hide_code=True)
816
+ def _(pl):
817
  # Create a Polars DataFrame
818
  polars_df = pl.DataFrame(
819
  {
 
826
  return (polars_df,)
827
 
828
 
829
+ @app.cell(hide_code=True)
 
 
 
 
 
 
 
 
 
 
 
830
  def _(mo, polars_df):
831
+ mo.vstack(
832
+ [
833
+ mo.vstack(
834
+ [mo.md(f"""## Original Polars DataFrame:""")], align="center"
835
+ ),
836
+ mo.ui.table(polars_df),
837
+ ],
838
+ gap=2,
839
+ justify="space-between",
840
+ )
841
  return
842
 
843
 
844
+ @app.cell(hide_code=True)
845
  def _(new_memory_db, polars_df):
846
  # Register the Polars DataFrame as a DuckDB table in memory connection
847
  new_memory_db.register("products_polars", polars_df)
 
854
 
855
 
856
  @app.cell(hide_code=True)
 
 
 
 
 
 
 
 
 
 
 
857
  def _(mo, polars_query_result):
858
+ mo.vstack(
859
+ [
860
+ mo.vstack(
861
+ [mo.md(f"""## DuckDB Query Result (From Polars Data):""")],
862
+ align="center",
863
+ ),
864
+ mo.ui.table(polars_query_result),
865
+ ],
866
+ gap=2,
867
+ justify="space-between",
868
+ )
869
  return
870
 
871
 
872
+ @app.cell(hide_code=True)
873
+ def _(new_memory_db):
874
  # Demonstrate a more complex query
875
  complex_query_result = new_memory_db.execute("""
876
  SELECT
 
883
  GROUP BY category
884
  ORDER BY avg_price DESC
885
  """).df()
 
 
886
  return (complex_query_result,)
887
 
888
 
889
+ @app.cell(hide_code=True)
890
  def _(complex_query_result, mo):
891
+ mo.vstack(
892
+ [
893
+ mo.vstack(
894
+ [mo.md(f"""## Aggregated Product Data by Category:""")],
895
+ align="center",
896
+ ),
897
+ mo.ui.table(complex_query_result),
898
+ ],
899
+ gap=2,
900
+ justify="space-between",
901
+ )
902
  return
903
 
904
 
 
908
  return
909
 
910
 
911
+ @app.cell(hide_code=True)
912
+ def _(new_memory_db):
913
  # Create another table to join with
914
  new_memory_db.execute("""
915
  CREATE TABLE IF NOT EXISTS departments (
 
921
  return
922
 
923
 
924
+ @app.cell(hide_code=True)
925
  def _(new_memory_db):
926
  new_memory_db.execute("""
927
  INSERT INTO departments VALUES
 
932
  return
933
 
934
 
935
+ @app.cell(hide_code=True)
936
  def _(new_memory_db):
937
  # Execute a join query
938
  join_result = new_memory_db.execute("""
 
976
  return
977
 
978
 
979
+ @app.cell(hide_code=True)
980
  def _(new_memory_db):
981
  # Inner join
982
  inner_join = new_memory_db.execute("""
 
998
  FROM users_memory u
999
  FULL OUTER JOIN departments d ON u.id = d.manager_id
1000
  """).df()
1001
+
1002
+ # Cross join
1003
+ cross_join = new_memory_db.execute("""
1004
+ SELECT u.id, u.name, d.department_name
1005
+ FROM users_memory u
1006
+ CROSS JOIN departments d
1007
+ """).df()
1008
+
1009
+ # Self join (Joining user table with itself to find users with the same age)
1010
+ self_join = new_memory_db.execute("""
1011
+ SELECT u1.id, u1.name, u2.name AS same_age_user
1012
+ FROM users_memory u1
1013
+ JOIN users_memory u2 ON u1.age = u2.age AND u1.id <> u2.id
1014
+ """).df()
1015
+
1016
+ # Semi join (Finding users who are also managers)
1017
+ semi_join = new_memory_db.execute("""
1018
+ SELECT u.id, u.name, u.age
1019
+ FROM users_memory u
1020
+ WHERE EXISTS (
1021
+ SELECT 1 FROM departments d
1022
+ WHERE u.id = d.manager_id
1023
+ )
1024
+ """).df()
1025
+
1026
+ # Anti join (Finding users who are not managers)
1027
+ anti_join = new_memory_db.execute("""
1028
+ SELECT u.id, u.name, u.age
1029
+ FROM users_memory u
1030
+ WHERE NOT EXISTS (
1031
+ SELECT 1 FROM departments d
1032
+ WHERE u.id = d.manager_id
1033
+ )
1034
+ """).df()
1035
+ return (
1036
+ anti_join,
1037
+ cross_join,
1038
+ full_join,
1039
+ inner_join,
1040
+ right_join,
1041
+ self_join,
1042
+ semi_join,
1043
+ )
1044
+
1045
+
1046
+ @app.cell(hide_code=True)
1047
+ def _(mo, new_memory_db):
1048
+ # Display base table side by side
1049
+ users = new_memory_db.execute("SELECT * FROM users_memory").df()
1050
+ departments = new_memory_db.execute("SELECT * FROM departments").df()
1051
+
1052
+ base_tables = mo.vstack(
1053
+ [
1054
+ mo.vstack([mo.md(f"""# Base Tables""")], align="center"),
1055
+ mo.ui.tabs(
1056
+ {
1057
+ "User Table": mo.ui.table(users),
1058
+ "Departments Table": mo.ui.table(departments),
1059
+ }
1060
+ ),
1061
+ ]
1062
+ )
1063
+ base_tables
1064
+ return
1065
+
1066
+
1067
+ @app.cell(hide_code=True)
1068
+ def _(
1069
+ anti_join,
1070
+ cross_join,
1071
+ full_join,
1072
+ inner_join,
1073
+ join_result,
1074
+ mo,
1075
+ right_join,
1076
+ self_join,
1077
+ semi_join,
1078
+ ):
1079
+ join_description = {
1080
+ "Left Join": "Shows all records from the left table and matching records from the right table. Non-matches filled with NULL.",
1081
+ "Inner Join": "Shows only the records where there's a match in both tables.",
1082
+ "Right Join": "Shows all records from the right table and matching records from the left table. Non-matches filled with NULL.",
1083
+ "Full Outer Join": "Shows all records from both tables, with NULL values where there's no match.",
1084
+ "Cross Join": "Returns the Cartesian product - all possible combinations of rows from both tables.",
1085
+ "Self Join": "Joins a table with itself, used to compare rows within the same table.",
1086
+ "Semi Join": "Returns rows from the first table where one or more matches exist in the second table.",
1087
+ "Anti Join": "Returns rows from the first table where no matches exist in the second table.",
1088
+ }
1089
 
1090
 
 
 
1091
  join_tabs = mo.ui.tabs(
1092
  {
1093
  "Left Join": mo.ui.table(join_result),
1094
  "Inner Join": mo.ui.table(inner_join),
1095
  "Right Join": mo.ui.table(right_join),
1096
  "Full Outer Join": mo.ui.table(full_join),
1097
+ "Cross Join": mo.ui.table(cross_join),
1098
+ "Self Join": mo.ui.table(self_join),
1099
+ "Semi Join": mo.ui.table(semi_join),
1100
+ "Anti Join": mo.ui.table(anti_join),
1101
  }
1102
  )
1103
+ return join_description, join_tabs
1104
+
1105
 
1106
+ @app.cell(hide_code=True)
1107
+ def _(join_description, join_tabs, mo):
1108
+ join_display = mo.vstack(
1109
+ [
1110
+ mo.vstack([mo.md(f"""# SQL Join Operations""")], align="center"),
1111
+ mo.md(f"**{join_tabs.value}**: {join_description[join_tabs.value]}"),
1112
+ mo.md("## Join Results"),
1113
+ join_tabs,
1114
+ ],
1115
+ gap=2,
1116
+ justify="space-between",
1117
+ )
1118
+
1119
+ join_display
1120
  return
1121
 
1122
 
 
1126
  return
1127
 
1128
 
1129
+ @app.cell(hide_code=True)
1130
+ def _(new_memory_db):
1131
  # Execute an aggregate query
1132
  agg_result = new_memory_db.execute("""
1133
  SELECT
 
1142
 
1143
 
1144
  @app.cell(hide_code=True)
 
 
 
 
 
 
 
 
 
 
 
1145
  def _(agg_result, mo):
1146
+ mo.vstack(
1147
+ [
1148
+ mo.vstack(
1149
+ [mo.md(f"""## Aggregate Results (All Users):""")], align="center"
1150
+ ),
1151
+ mo.ui.table(agg_result),
1152
+ ],
1153
+ gap=2,
1154
+ justify="space-between",
 
 
1155
  )
1156
  return
1157
 
1158
 
1159
+ @app.cell(hide_code=True)
1160
  def _(new_memory_db):
1161
  age_groups = new_memory_db.execute("""
1162
  SELECT
 
1175
  return (age_groups,)
1176
 
1177
 
1178
+ @app.cell(hide_code=True)
1179
  def _(age_groups, mo):
1180
  mo.ui.table(age_groups)
1181
+ mo.vstack(
1182
+ [
1183
+ mo.vstack(
1184
+ [mo.md(f"""## Aggregate Results (Grouped by Age Range):""")],
1185
+ align="center",
1186
+ ),
1187
+ mo.ui.table(age_groups),
1188
+ ],
1189
+ gap=2,
1190
+ justify="space-between",
1191
  )
1192
  return
1193
 
1194
 
1195
+ @app.cell(hide_code=True)
1196
+ def _(new_memory_db):
1197
  window_result = new_memory_db.execute("""
1198
  SELECT
1199
  id,
 
1206
  FROM users_memory
1207
  ORDER BY balance_rank
1208
  """).df()
1209
+ return (window_result,)
1210
 
1211
+
1212
+ @app.cell(hide_code=True)
1213
+ def _(mo, window_result):
1214
+ mo.vstack(
1215
+ [
1216
+ mo.vstack([mo.md(f"""## Window Functions Example""")], align="center"),
1217
+ mo.ui.table(window_result),
1218
+ ],
1219
+ gap=2,
1220
+ justify="space-between",
1221
+ )
1222
  return
1223
 
1224
 
 
1228
  return
1229
 
1230
 
1231
+ @app.cell(hide_code=True)
1232
+ def _(new_memory_db):
1233
  polars_result = new_memory_db.execute(
1234
  """SELECT * FROM users_memory WHERE age > 25 ORDER BY age"""
1235
  ).pl()
 
1237
 
1238
 
1239
  @app.cell(hide_code=True)
 
 
 
 
 
 
 
 
 
 
 
1240
  def _(mo, polars_result):
1241
+ mo.vstack(
1242
+ [
1243
+ mo.vstack(
1244
+ [mo.md(f"""## Query Result as Polars DataFrame:""")],
1245
+ align="center",
1246
+ ),
1247
+ mo.ui.table(polars_result),
1248
+ ],
1249
+ gap=2,
1250
+ justify="space-between",
1251
+ )
1252
  return
1253
 
1254
 
1255
+ @app.cell(hide_code=True)
1256
  def _(new_memory_db):
1257
  pandas_result = new_memory_db.execute(
1258
  """SELECT * FROM users_memory WHERE age > 25 ORDER BY age"""
 
1261
 
1262
 
1263
  @app.cell(hide_code=True)
 
 
 
 
 
 
1264
  def _(mo, pandas_result):
1265
+ mo.vstack(
1266
+ [
1267
+ mo.vstack(
1268
+ [mo.md(f"""## Same Query Result as Pandas DataFrame:""")],
1269
+ align="center",
1270
+ ),
1271
+ mo.ui.table(pandas_result),
1272
+ ],
1273
+ gap=2,
1274
+ justify="space-between",
 
1275
  )
1276
  return
1277
 
1278
 
1279
  @app.cell(hide_code=True)
1280
  def _(mo):
1281
+ mo.vstack(
1282
+ [
1283
+ mo.vstack(
1284
+ [mo.md(f"""## Differences in DataFrame Handling""")],
1285
+ align="center",
1286
+ ),
1287
+ mo.vstack(
1288
+ [
1289
+ mo.md(
1290
+ f"""## Polars: Filter users over 35 and calculate average balance"""
1291
+ )
1292
+ ],
1293
+ align="start",
1294
+ ),
1295
+ ],
1296
+ gap=2, justify="space-between",
1297
  )
1298
  return
1299
 
1300
 
1301
+ @app.cell(hide_code=True)
1302
  def _(mo, pl, polars_result):
1303
  def _():
1304
  polars_filtered = polars_result.filter(pl.col("age") > 35)
 
1313
  mo.md("### Average Account Balance:"),
1314
  mo.ui.table(polars_avg),
1315
  ],
1316
+ gap=2,
1317
  )
1318
  return layout
1319
 
 
1323
 
1324
 
1325
  @app.cell(hide_code=True)
 
 
 
 
 
 
 
 
 
 
 
1326
  def _(mo, pandas_result):
1327
  pandas_avg = pandas_result[pandas_result["age"] > 35]["account_balance"].mean()
1328
+ mo.vstack(
1329
+ [
1330
+ mo.vstack(
1331
+ [mo.md(f"""## Pandas: Same operation in pandas style""")],
1332
+ align="center",
1333
+ ),
1334
+ mo.vstack(
1335
+ [mo.md(f"""### Average balance: {pandas_avg:.2f}""")],
1336
+ align="start",
1337
+ ),
1338
+ ]
1339
+ )
1340
  return
1341
 
1342
 
1343
  @app.cell(hide_code=True)
1344
  def _(mo):
1345
+ mo.md("""# 9. Data Visualization with DuckDB and Plotly""")
1346
  return
1347
 
1348
 
1349
+ @app.cell(hide_code=True)
1350
  def _(age_groups, mo, new_memory_db, plotly_express):
1351
  # User distribution by age group
1352
  fig1 = plotly_express.bar(
 
1362
  text=age_groups["count"],
1363
  textposition="outside",
1364
  )
1365
+ fig1.update_layout(
1366
+ height=450,
1367
+ margin=dict(t=50, b=50, l=50, r=25),
1368
+ hoverlabel=dict(bgcolor="white", font_size=12),
1369
+ template="plotly_white",
1370
+ )
1371
 
1372
 
1373
  # Average balance by age group
 
1384
  text=[f"${val:.2f}" for val in age_groups["avg_balance"]],
1385
  textposition="outside",
1386
  )
1387
+ fig2.update_layout(
1388
+ height=450,
1389
+ margin=dict(t=50, b=50, l=50, r=25),
1390
+ hoverlabel=dict(bgcolor="white", font_size=12),
1391
+ template="plotly_white",
1392
+ )
1393
 
1394
 
1395
  # Age vs Account Balance scatter plot
 
1416
  size_max=15,
1417
  )
1418
  fig3.update_traces(marker=dict(size=12))
1419
+ fig3.update_layout(
1420
+ height=450,
1421
+ margin=dict(t=50, b=50, l=50, r=25),
1422
+ hoverlabel=dict(bgcolor="white", font_size=12),
1423
+ template="plotly_white",
1424
+ )
1425
 
1426
 
1427
  # Distribution of account balances
 
1444
  color_discrete_sequence=plotly_express.colors.qualitative.Pastel,
1445
  )
1446
  fig4.update_traces(textinfo="percent+label", textposition="inside")
1447
+ fig4.update_layout(
1448
+ height=450,
1449
+ margin=dict(t=50, b=50, l=50, r=25),
1450
+ hoverlabel=dict(bgcolor="white", font_size=12),
1451
+ template="plotly_white",
1452
+ )
1453
 
1454
 
1455
  category_tabs = mo.ui.tabs(
 
1462
  "Average Balance": mo.ui.plotly(fig2),
1463
  }
1464
  )
1465
+ ],
1466
+ gap=2,
1467
+ justify="space-between",
1468
  ),
1469
  "Financial Analysis": mo.vstack(
1470
  [
 
1474
  "Balance Distribution": mo.ui.plotly(fig4),
1475
  }
1476
  )
1477
+ ],
1478
+ gap=2,
1479
+ justify="space-between",
1480
  ),
1481
  },
1482
  lazy=True,
 
1484
 
1485
  mo.vstack(
1486
  [
1487
+ mo.vstack(
1488
+ [mo.md(f"""## Select a visualization category:""")],
1489
+ align="start",
1490
+ ),
1491
  category_tabs,
1492
  ],
1493
+ gap=2,
1494
+ justify="space-between",
1495
  )
1496
  return
1497
 
 
1500
  def _(mo):
1501
  mo.md(
1502
  r"""
1503
+ /// admonition |
1504
+ ## Database Management Best Practices
1505
+ ///
1506
 
1507
  ### Closing Connections
1508
 
 
1544
 
1545
 
1546
  @app.cell(hide_code=True)
1547
+ def _(mo):
1548
+ mo.md(rf"""## 10. Interactive DuckDB Dashboard with marimo and Plotly""")
1549
  return
1550
 
1551
 
1552
+ @app.cell(hide_code=True)
1553
  def _(mo):
1554
  # Create an interactive filter for age range
1555
  min_age = mo.ui.slider(20, 50, value=25, label="Minimum Age")
 
1557
  return max_age, min_age
1558
 
1559
 
1560
+ @app.cell(hide_code=True)
1561
  def _(max_age, min_age, new_memory_db):
1562
  # Create a function to filter data and update visualizations
1563
  def get_filtered_data(min_val=min_age.value, max_val=max_age.value):
 
1580
  return (get_filtered_data,)
1581
 
1582
 
1583
+ @app.cell(hide_code=True)
1584
  def _(get_filtered_data):
1585
  def get_metrics(data=get_filtered_data()):
1586
  return {
 
1591
  return (get_metrics,)
1592
 
1593
 
1594
+ @app.cell(hide_code=True)
1595
  def _(get_metrics, mo):
1596
  def metrics_display(metrics=get_metrics()):
1597
  return mo.hstack(
 
1619
  ),
1620
  ],
1621
  justify="space-between",
1622
+ gap=2,
1623
  )
1624
  return (metrics_display,)
1625
 
1626
 
1627
+ @app.cell(hide_code=True)
1628
  def _(get_filtered_data, max_age, min_age, mo, plotly_express):
1629
  def create_visualization(
1630
  data=get_filtered_data(), min_val=min_age.value, max_val=max_age.value
 
1647
  height=400,
1648
  xaxis_tickangle=-45,
1649
  margin=dict(t=50, b=70, l=50, r=30),
1650
+ hoverlabel=dict(bgcolor="white", font_size=12),
1651
+ template="plotly_white",
1652
  )
1653
  fig1.update_traces(
1654
  textposition="outside",
 
1667
  height=400,
1668
  margin=dict(t=50, b=70, l=50, r=30),
1669
  bargap=0.1,
1670
+ hoverlabel=dict(bgcolor="white", font_size=12),
1671
+ template="plotly_white",
1672
  )
1673
 
1674
  fig3 = plotly_express.scatter(
 
1686
  fig3.update_layout(
1687
  height=400,
1688
  margin=dict(t=50, b=70, l=50, r=30),
1689
+ hoverlabel=dict(bgcolor="white", font_size=12),
1690
+ template="plotly_white",
1691
  )
1692
 
1693
  return mo.ui.tabs(
 
1700
  return (create_visualization,)
1701
 
1702
 
1703
+ @app.cell(hide_code=True)
1704
  def _(
1705
  create_visualization,
1706
  get_filtered_data,
 
1710
  mo,
1711
  ):
1712
  def dashboard(
1713
+ min_val=min_age.value,
1714
+ max_val=max_age.value,
1715
+ metrics=metrics_display(),
1716
+ data=get_filtered_data(),
1717
+ visualization=create_visualization(),
1718
  ):
1719
  return mo.vstack(
1720
  [
 
1725
  mo.md("### Visualizations"),
1726
  visualization,
1727
  ],
1728
+ gap=2,
1729
+ justify="space-between",
1730
  )
1731
+
1732
+
1733
  dashboard()
1734
  return
1735
 
1736
 
1737
  @app.cell(hide_code=True)
1738
+ def _(mo):
1739
  mo.md(
1740
  rf"""
1741
  # Summary and Key Takeaways
 
1748
 
1749
  3. **Data insertion**: We demonstrated different ways to insert data, including single inserts and bulk loading.
1750
 
1751
+ 4. **SQL queries**: We executed various SQL queries directly and through marimo's UI components.
1752
 
1753
  5. **Integration with Polars**: We showed how DuckDB can work seamlessly with Polars DataFrames.
1754
 
 
1760
 
1761
  9. **Best practices**: We reviewed best practices for managing DuckDB connections and transactions.
1762
 
1763
+ 10. **Visualization**: We created interactive visualizations and dashboards with Plotly and marimo.
1764
 
1765
  DuckDB is an excellent tool for data analysis, especially for analytical workloads. Its in-process nature makes it fast and easy to use, while its SQL compatibility makes it accessible for anyone familiar with SQL databases.
1766
 
 
1769
  - Try loading larger datasets into DuckDB
1770
  - Experiment with more complex queries and window functions
1771
  - Use DuckDB's COPY functionality to import/export data from/to files
1772
+ - Create more advanced interactive dashboards with marimo and Plotly
1773
  """
1774
  )
1775
  return