Spaces:
Running
Running
Azmi-84
commited on
Commit
·
34f04d3
1
Parent(s):
e5fc993
Hide code cells in getting started guide for improved user experience
Browse filesThis commit updates the DuckDB getting started script by hiding code cells to streamline the user interface. The changes enhance readability and focus on the interactive components, making it easier for users to engage with the content without being distracted by the underlying code.
- duckdb/01_getting_started.py +400 -260
duckdb/01_getting_started.py
CHANGED
@@ -9,6 +9,7 @@
|
|
9 |
# "pandas==2.2.3",
|
10 |
# "sqlglot==26.12.1",
|
11 |
# "plotly==5.23.1",
|
|
|
12 |
# ]
|
13 |
# ///
|
14 |
|
@@ -104,13 +105,12 @@ def _(mo):
|
|
104 |
| Performance | Faster for most operations | Slightly slower but provides persistence |
|
105 |
| Creation | duckdb.connect(':memory:') | duckdb.connect('filename.db') |
|
106 |
| Multiple Connection Access | Limited to single connection | Multiple connections can access the same database |
|
107 |
-
|
108 |
"""
|
109 |
)
|
110 |
return
|
111 |
|
112 |
|
113 |
-
@app.cell
|
114 |
def _(os):
|
115 |
# Remove previous database if it exists
|
116 |
if os.path.exists("example.db"):
|
@@ -121,7 +121,7 @@ def _(os):
|
|
121 |
return
|
122 |
|
123 |
|
124 |
-
@app.cell
|
125 |
def _(mo):
|
126 |
_df = mo.sql(
|
127 |
f"""
|
@@ -149,7 +149,7 @@ def _(mo):
|
|
149 |
return
|
150 |
|
151 |
|
152 |
-
@app.cell
|
153 |
def _(duckdb):
|
154 |
# Create an in-memory DuckDB connection
|
155 |
memory_db = duckdb.connect(":memory:")
|
@@ -159,7 +159,7 @@ def _(duckdb):
|
|
159 |
return file_db, memory_db
|
160 |
|
161 |
|
162 |
-
@app.cell
|
163 |
def _(file_db, memory_db):
|
164 |
# Test both connections
|
165 |
memory_db.execute(
|
@@ -196,7 +196,7 @@ def _(mo):
|
|
196 |
return
|
197 |
|
198 |
|
199 |
-
@app.cell
|
200 |
def _(mem_test, memory_db, mo):
|
201 |
_df = mo.sql(
|
202 |
f"""
|
@@ -207,7 +207,7 @@ def _(mem_test, memory_db, mo):
|
|
207 |
return
|
208 |
|
209 |
|
210 |
-
@app.cell
|
211 |
def _(file_db, file_test, mo):
|
212 |
_df = mo.sql(
|
213 |
f"""
|
@@ -226,12 +226,12 @@ def _():
|
|
226 |
|
227 |
|
228 |
@app.cell(hide_code=True)
|
229 |
-
def
|
230 |
mo.md(rf"""## 🔄 Simulating Application Restart...""")
|
231 |
return
|
232 |
|
233 |
|
234 |
-
@app.cell
|
235 |
def _(duckdb):
|
236 |
# Create new connections (simulating restart)
|
237 |
new_memory_db = duckdb.connect(":memory:")
|
@@ -239,7 +239,7 @@ def _(duckdb):
|
|
239 |
return new_file_db, new_memory_db
|
240 |
|
241 |
|
242 |
-
@app.cell
|
243 |
def _(new_memory_db):
|
244 |
# Try to query tables in the new memory connection
|
245 |
try:
|
@@ -252,7 +252,7 @@ def _(new_memory_db):
|
|
252 |
return memory_data_available, memory_persistence
|
253 |
|
254 |
|
255 |
-
@app.cell
|
256 |
def _(new_file_db):
|
257 |
# Try to query tables in the new file connection
|
258 |
try:
|
@@ -266,7 +266,7 @@ def _(new_file_db):
|
|
266 |
return file_data, file_data_available, file_persistence
|
267 |
|
268 |
|
269 |
-
@app.cell
|
270 |
def _(
|
271 |
file_data_available,
|
272 |
file_persistence,
|
@@ -285,18 +285,23 @@ def _(
|
|
285 |
],
|
286 |
}
|
287 |
)
|
288 |
-
|
289 |
-
mo.md("### Persistence Test Results")
|
290 |
return (persistence_results,)
|
291 |
|
292 |
|
293 |
-
@app.cell
|
294 |
-
def _(persistence_results):
|
295 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
296 |
return
|
297 |
|
298 |
|
299 |
-
@app.cell
|
300 |
def _(file_data, file_data_available, mo):
|
301 |
if file_data_available:
|
302 |
mo.md("### Persisted File-Based Data:")
|
@@ -326,8 +331,8 @@ def _(mo):
|
|
326 |
return
|
327 |
|
328 |
|
329 |
-
@app.cell
|
330 |
-
def
|
331 |
# For the memory database
|
332 |
try:
|
333 |
new_memory_db.execute("DROP TABLE IF EXISTS users_memory")
|
@@ -342,7 +347,7 @@ def _create_users_tables(file_db, new_memory_db):
|
|
342 |
return
|
343 |
|
344 |
|
345 |
-
@app.cell
|
346 |
def _(file_db, new_memory_db):
|
347 |
# Create advanced users table in memory database with primary key
|
348 |
new_memory_db.execute("""
|
@@ -372,8 +377,8 @@ def _(file_db, new_memory_db):
|
|
372 |
return
|
373 |
|
374 |
|
375 |
-
@app.cell
|
376 |
-
def _(
|
377 |
# Get table schema information using DuckDB's internal system tables
|
378 |
memory_schema = new_memory_db.execute("""
|
379 |
SELECT column_name, data_type, is_nullable
|
@@ -381,15 +386,21 @@ def _(mo, new_memory_db):
|
|
381 |
WHERE table_name = 'users_memory'
|
382 |
ORDER BY ordinal_position
|
383 |
""").df()
|
384 |
-
|
385 |
-
# Display the schema using marimo's UI components
|
386 |
-
mo.md("### 🔍 Table Schema Information")
|
387 |
return (memory_schema,)
|
388 |
|
389 |
|
390 |
@app.cell(hide_code=True)
|
391 |
def _(memory_schema, mo):
|
392 |
-
mo.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
393 |
return
|
394 |
|
395 |
|
@@ -412,8 +423,8 @@ def _(mo):
|
|
412 |
return
|
413 |
|
414 |
|
415 |
-
@app.cell
|
416 |
-
def
|
417 |
today = date.today()
|
418 |
|
419 |
|
@@ -458,7 +469,7 @@ def _insert_user_data(date):
|
|
458 |
return (safe_insert,)
|
459 |
|
460 |
|
461 |
-
@app.cell
|
462 |
def _():
|
463 |
# Prepare the data
|
464 |
user_data = [
|
@@ -520,31 +531,17 @@ def _():
|
|
520 |
return (user_data,)
|
521 |
|
522 |
|
523 |
-
@app.cell
|
524 |
-
def _(
|
525 |
# Safely insert data into memory database
|
526 |
-
|
527 |
-
mo.md(
|
528 |
-
f"""
|
529 |
-
Inserted {records_inserted} new records into users_memory.
|
530 |
-
"""
|
531 |
-
)
|
532 |
-
return
|
533 |
-
|
534 |
-
|
535 |
-
@app.cell
|
536 |
-
def _(file_db, safe_insert, user_data):
|
537 |
-
def _():
|
538 |
-
# Safely insert data into file database
|
539 |
-
records_inserted = safe_insert(file_db, "users_file", user_data)
|
540 |
-
return print(f"Inserted {records_inserted} new records into users_file")
|
541 |
|
542 |
-
|
543 |
-
|
544 |
return
|
545 |
|
546 |
|
547 |
-
@app.cell
|
548 |
def _():
|
549 |
# If you need to add just one record, you can use a similar approach:
|
550 |
new_user = (
|
@@ -559,7 +556,7 @@ def _():
|
|
559 |
return (new_user,)
|
560 |
|
561 |
|
562 |
-
@app.cell
|
563 |
def _(new_memory_db, new_user):
|
564 |
# Check if the ID exists before inserting
|
565 |
if not new_memory_db.execute(
|
@@ -578,7 +575,7 @@ def _(new_memory_db, new_user):
|
|
578 |
return
|
579 |
|
580 |
|
581 |
-
@app.cell
|
582 |
def _(file_db, new_user):
|
583 |
# Do the same for the file database
|
584 |
if not file_db.execute(
|
@@ -597,7 +594,7 @@ def _(file_db, new_user):
|
|
597 |
return
|
598 |
|
599 |
|
600 |
-
@app.cell
|
601 |
def _(new_memory_db):
|
602 |
# First try to update
|
603 |
cursor = new_memory_db.execute(
|
@@ -620,7 +617,7 @@ def _(new_memory_db):
|
|
620 |
return (cursor,)
|
621 |
|
622 |
|
623 |
-
@app.cell
|
624 |
def _(cursor, mo, new_memory_db):
|
625 |
# If no rows were updated, perform an insert
|
626 |
if cursor.rowcount == 0:
|
@@ -649,7 +646,7 @@ def _(cursor, mo, new_memory_db):
|
|
649 |
return
|
650 |
|
651 |
|
652 |
-
@app.cell
|
653 |
def _(file_db, mo):
|
654 |
# For DuckDB using ON CONFLICT, we need to specify the conflict target column
|
655 |
file_db.execute(
|
@@ -683,8 +680,8 @@ def _(file_db, mo):
|
|
683 |
return
|
684 |
|
685 |
|
686 |
-
@app.cell
|
687 |
-
def
|
688 |
# Display memory data using DuckDB's query capabilities
|
689 |
memory_results = new_memory_db.execute("""
|
690 |
SELECT
|
@@ -701,7 +698,7 @@ def _view_tables_after_insert(new_memory_db):
|
|
701 |
return (memory_results,)
|
702 |
|
703 |
|
704 |
-
@app.cell
|
705 |
def _(file_db):
|
706 |
# Display file data with formatting
|
707 |
file_results = file_db.execute("""
|
@@ -719,17 +716,6 @@ def _(file_db):
|
|
719 |
return (file_results,)
|
720 |
|
721 |
|
722 |
-
@app.cell
|
723 |
-
def _(mo):
|
724 |
-
mo.md(
|
725 |
-
r"""
|
726 |
-
<!-- Create an interactive display with tabs using marimo components -->
|
727 |
-
## 📊 Database Contents After Insertion
|
728 |
-
"""
|
729 |
-
)
|
730 |
-
return
|
731 |
-
|
732 |
-
|
733 |
@app.cell(hide_code=True)
|
734 |
def _(file_results, memory_results, mo):
|
735 |
tabs = mo.ui.tabs(
|
@@ -738,7 +724,18 @@ def _(file_results, memory_results, mo):
|
|
738 |
"File-Based Database": mo.ui.table(file_results),
|
739 |
}
|
740 |
)
|
741 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
742 |
return
|
743 |
|
744 |
|
@@ -746,12 +743,12 @@ def _(file_results, memory_results, mo):
|
|
746 |
def _(mo):
|
747 |
mo.md(
|
748 |
r"""
|
749 |
-
# [4. Using SQL Directly in
|
750 |
|
751 |
There are multiple ways to leverage DuckDB's SQL capabilities in marimo:
|
752 |
|
753 |
1. **Direct execution**: Using DuckDB connections to execute SQL
|
754 |
-
2. **
|
755 |
3. **Interactive queries**: Combining UI elements with SQL execution
|
756 |
|
757 |
Let's explore these approaches:
|
@@ -760,47 +757,39 @@ def _(mo):
|
|
760 |
return
|
761 |
|
762 |
|
763 |
-
@app.cell(hide_code=True)
|
764 |
-
def _sql_with_marimo(mo):
|
765 |
-
mo.md(
|
766 |
-
rf"""
|
767 |
-
<!-- Using Marimo's SQL engine with direct SQL on memory_results DataFrame -->
|
768 |
-
## 🔍 Query with Marimo SQL
|
769 |
-
"""
|
770 |
-
)
|
771 |
-
return
|
772 |
-
|
773 |
-
|
774 |
@app.cell(hide_code=True)
|
775 |
def _(mo):
|
776 |
-
mo.
|
777 |
-
|
778 |
-
|
779 |
-
|
780 |
-
|
|
|
|
|
|
|
|
|
781 |
)
|
782 |
return
|
783 |
|
784 |
|
785 |
-
@app.cell
|
786 |
-
def _(mo):
|
787 |
# Create a SQL selector for users with age threshold
|
788 |
-
age_threshold = mo.ui.slider(
|
789 |
-
|
|
|
790 |
|
791 |
|
792 |
-
@app.cell
|
793 |
-
def _(age_threshold, memory_results, mo):
|
794 |
# Create a function to filter users based on the slider value
|
795 |
def filtered_users():
|
796 |
# Use DuckDB directly instead of mo.sql with users param
|
797 |
filtered_df = memory_results[memory_results["age"] >= age_threshold.value]
|
798 |
filtered_df = filtered_df.sort_values("age")
|
799 |
return mo.ui.table(filtered_df)
|
800 |
-
return
|
801 |
|
802 |
|
803 |
-
@app.cell
|
804 |
def _(age_threshold, filtered_users, mo):
|
805 |
layout = mo.vstack(
|
806 |
[
|
@@ -809,8 +798,10 @@ def _(age_threshold, filtered_users, mo):
|
|
809 |
mo.md("### Users meeting age criteria:"),
|
810 |
filtered_users(),
|
811 |
],
|
812 |
-
gap=
|
|
|
813 |
)
|
|
|
814 |
layout
|
815 |
return
|
816 |
|
@@ -821,8 +812,8 @@ def _(mo):
|
|
821 |
return
|
822 |
|
823 |
|
824 |
-
@app.cell
|
825 |
-
def
|
826 |
# Create a Polars DataFrame
|
827 |
polars_df = pl.DataFrame(
|
828 |
{
|
@@ -835,24 +826,22 @@ def _polars_integration(pl):
|
|
835 |
return (polars_df,)
|
836 |
|
837 |
|
838 |
-
@app.cell
|
839 |
-
def _(mo):
|
840 |
-
mo.md(
|
841 |
-
rf"""
|
842 |
-
<!-- Display the Polars DataFrame -->
|
843 |
-
## Original Polars DataFrame:
|
844 |
-
"""
|
845 |
-
)
|
846 |
-
return
|
847 |
-
|
848 |
-
|
849 |
-
@app.cell
|
850 |
def _(mo, polars_df):
|
851 |
-
mo.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
852 |
return
|
853 |
|
854 |
|
855 |
-
@app.cell
|
856 |
def _(new_memory_db, polars_df):
|
857 |
# Register the Polars DataFrame as a DuckDB table in memory connection
|
858 |
new_memory_db.register("products_polars", polars_df)
|
@@ -865,24 +854,23 @@ def _(new_memory_db, polars_df):
|
|
865 |
|
866 |
|
867 |
@app.cell(hide_code=True)
|
868 |
-
def _(mo):
|
869 |
-
mo.md(
|
870 |
-
r"""
|
871 |
-
<!-- Display the query result -->
|
872 |
-
## DuckDB Query Result (From Polars Data):
|
873 |
-
"""
|
874 |
-
)
|
875 |
-
return
|
876 |
-
|
877 |
-
|
878 |
-
@app.cell
|
879 |
def _(mo, polars_query_result):
|
880 |
-
mo.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
881 |
return
|
882 |
|
883 |
|
884 |
-
@app.cell
|
885 |
-
def _(
|
886 |
# Demonstrate a more complex query
|
887 |
complex_query_result = new_memory_db.execute("""
|
888 |
SELECT
|
@@ -895,14 +883,22 @@ def _(mo, new_memory_db):
|
|
895 |
GROUP BY category
|
896 |
ORDER BY avg_price DESC
|
897 |
""").df()
|
898 |
-
|
899 |
-
mo.md("## Aggregated Product Data by Category:")
|
900 |
return (complex_query_result,)
|
901 |
|
902 |
|
903 |
-
@app.cell
|
904 |
def _(complex_query_result, mo):
|
905 |
-
mo.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
906 |
return
|
907 |
|
908 |
|
@@ -912,8 +908,8 @@ def _(mo):
|
|
912 |
return
|
913 |
|
914 |
|
915 |
-
@app.cell
|
916 |
-
def
|
917 |
# Create another table to join with
|
918 |
new_memory_db.execute("""
|
919 |
CREATE TABLE IF NOT EXISTS departments (
|
@@ -925,7 +921,7 @@ def _join_operations(new_memory_db):
|
|
925 |
return
|
926 |
|
927 |
|
928 |
-
@app.cell
|
929 |
def _(new_memory_db):
|
930 |
new_memory_db.execute("""
|
931 |
INSERT INTO departments VALUES
|
@@ -936,7 +932,7 @@ def _(new_memory_db):
|
|
936 |
return
|
937 |
|
938 |
|
939 |
-
@app.cell
|
940 |
def _(new_memory_db):
|
941 |
# Execute a join query
|
942 |
join_result = new_memory_db.execute("""
|
@@ -980,7 +976,7 @@ def _(mo):
|
|
980 |
return
|
981 |
|
982 |
|
983 |
-
@app.cell
|
984 |
def _(new_memory_db):
|
985 |
# Inner join
|
986 |
inner_join = new_memory_db.execute("""
|
@@ -1002,21 +998,125 @@ def _(new_memory_db):
|
|
1002 |
FROM users_memory u
|
1003 |
FULL OUTER JOIN departments d ON u.id = d.manager_id
|
1004 |
""").df()
|
1005 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1006 |
|
1007 |
|
1008 |
-
@app.cell
|
1009 |
-
def _(full_join, inner_join, join_result, mo, right_join):
|
1010 |
join_tabs = mo.ui.tabs(
|
1011 |
{
|
1012 |
"Left Join": mo.ui.table(join_result),
|
1013 |
"Inner Join": mo.ui.table(inner_join),
|
1014 |
"Right Join": mo.ui.table(right_join),
|
1015 |
"Full Outer Join": mo.ui.table(full_join),
|
|
|
|
|
|
|
|
|
1016 |
}
|
1017 |
)
|
|
|
|
|
1018 |
|
1019 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1020 |
return
|
1021 |
|
1022 |
|
@@ -1026,8 +1126,8 @@ def _(mo):
|
|
1026 |
return
|
1027 |
|
1028 |
|
1029 |
-
@app.cell
|
1030 |
-
def
|
1031 |
# Execute an aggregate query
|
1032 |
agg_result = new_memory_db.execute("""
|
1033 |
SELECT
|
@@ -1042,34 +1142,21 @@ def _aggregate_operations(new_memory_db):
|
|
1042 |
|
1043 |
|
1044 |
@app.cell(hide_code=True)
|
1045 |
-
def _(mo):
|
1046 |
-
mo.md(
|
1047 |
-
rf"""
|
1048 |
-
<!-- Display the aggregate result -->
|
1049 |
-
## Aggregate Results (All Users):
|
1050 |
-
"""
|
1051 |
-
)
|
1052 |
-
return
|
1053 |
-
|
1054 |
-
|
1055 |
-
@app.cell
|
1056 |
def _(agg_result, mo):
|
1057 |
-
mo.
|
1058 |
-
|
1059 |
-
|
1060 |
-
|
1061 |
-
|
1062 |
-
|
1063 |
-
|
1064 |
-
|
1065 |
-
|
1066 |
-
## Aggregate Results (Grouped by Age Range):
|
1067 |
-
"""
|
1068 |
)
|
1069 |
return
|
1070 |
|
1071 |
|
1072 |
-
@app.cell
|
1073 |
def _(new_memory_db):
|
1074 |
age_groups = new_memory_db.execute("""
|
1075 |
SELECT
|
@@ -1088,25 +1175,25 @@ def _(new_memory_db):
|
|
1088 |
return (age_groups,)
|
1089 |
|
1090 |
|
1091 |
-
@app.cell
|
1092 |
def _(age_groups, mo):
|
1093 |
mo.ui.table(age_groups)
|
1094 |
-
|
1095 |
-
|
1096 |
-
|
1097 |
-
|
1098 |
-
|
1099 |
-
|
1100 |
-
|
1101 |
-
|
1102 |
-
|
1103 |
-
|
1104 |
)
|
1105 |
return
|
1106 |
|
1107 |
|
1108 |
-
@app.cell
|
1109 |
-
def _(
|
1110 |
window_result = new_memory_db.execute("""
|
1111 |
SELECT
|
1112 |
id,
|
@@ -1119,8 +1206,19 @@ def _(mo, new_memory_db):
|
|
1119 |
FROM users_memory
|
1120 |
ORDER BY balance_rank
|
1121 |
""").df()
|
|
|
1122 |
|
1123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1124 |
return
|
1125 |
|
1126 |
|
@@ -1130,8 +1228,8 @@ def _(mo):
|
|
1130 |
return
|
1131 |
|
1132 |
|
1133 |
-
@app.cell
|
1134 |
-
def
|
1135 |
polars_result = new_memory_db.execute(
|
1136 |
"""SELECT * FROM users_memory WHERE age > 25 ORDER BY age"""
|
1137 |
).pl()
|
@@ -1139,23 +1237,22 @@ def _convert_results(new_memory_db):
|
|
1139 |
|
1140 |
|
1141 |
@app.cell(hide_code=True)
|
1142 |
-
def _(mo):
|
1143 |
-
mo.md(
|
1144 |
-
r"""
|
1145 |
-
<!-- Display the converted results -->
|
1146 |
-
## Query Result as Polars DataFrame:
|
1147 |
-
"""
|
1148 |
-
)
|
1149 |
-
return
|
1150 |
-
|
1151 |
-
|
1152 |
-
@app.cell
|
1153 |
def _(mo, polars_result):
|
1154 |
-
mo.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1155 |
return
|
1156 |
|
1157 |
|
1158 |
-
@app.cell
|
1159 |
def _(new_memory_db):
|
1160 |
pandas_result = new_memory_db.execute(
|
1161 |
"""SELECT * FROM users_memory WHERE age > 25 ORDER BY age"""
|
@@ -1164,40 +1261,44 @@ def _(new_memory_db):
|
|
1164 |
|
1165 |
|
1166 |
@app.cell(hide_code=True)
|
1167 |
-
def _(mo):
|
1168 |
-
mo.md(r"""## Same Query Result as Pandas DataFrame:""")
|
1169 |
-
return
|
1170 |
-
|
1171 |
-
|
1172 |
-
@app.cell
|
1173 |
def _(mo, pandas_result):
|
1174 |
-
mo.
|
1175 |
-
|
1176 |
-
|
1177 |
-
|
1178 |
-
|
1179 |
-
|
1180 |
-
|
1181 |
-
|
1182 |
-
|
1183 |
-
|
1184 |
-
"""
|
1185 |
)
|
1186 |
return
|
1187 |
|
1188 |
|
1189 |
@app.cell(hide_code=True)
|
1190 |
def _(mo):
|
1191 |
-
mo.
|
1192 |
-
|
1193 |
-
|
1194 |
-
|
1195 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1196 |
)
|
1197 |
return
|
1198 |
|
1199 |
|
1200 |
-
@app.cell
|
1201 |
def _(mo, pl, polars_result):
|
1202 |
def _():
|
1203 |
polars_filtered = polars_result.filter(pl.col("age") > 35)
|
@@ -1212,7 +1313,7 @@ def _(mo, pl, polars_result):
|
|
1212 |
mo.md("### Average Account Balance:"),
|
1213 |
mo.ui.table(polars_avg),
|
1214 |
],
|
1215 |
-
gap=
|
1216 |
)
|
1217 |
return layout
|
1218 |
|
@@ -1222,30 +1323,30 @@ def _(mo, pl, polars_result):
|
|
1222 |
|
1223 |
|
1224 |
@app.cell(hide_code=True)
|
1225 |
-
def _(mo):
|
1226 |
-
mo.md(
|
1227 |
-
r"""
|
1228 |
-
<!-- Pandas equivalent (using pandas style) -->
|
1229 |
-
## Pandas: Same operation in pandas style
|
1230 |
-
"""
|
1231 |
-
)
|
1232 |
-
return
|
1233 |
-
|
1234 |
-
|
1235 |
-
@app.cell
|
1236 |
def _(mo, pandas_result):
|
1237 |
pandas_avg = pandas_result[pandas_result["age"] > 35]["account_balance"].mean()
|
1238 |
-
mo.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1239 |
return
|
1240 |
|
1241 |
|
1242 |
@app.cell(hide_code=True)
|
1243 |
def _(mo):
|
1244 |
-
mo.md("""
|
1245 |
return
|
1246 |
|
1247 |
|
1248 |
-
@app.cell
|
1249 |
def _(age_groups, mo, new_memory_db, plotly_express):
|
1250 |
# User distribution by age group
|
1251 |
fig1 = plotly_express.bar(
|
@@ -1261,7 +1362,12 @@ def _(age_groups, mo, new_memory_db, plotly_express):
|
|
1261 |
text=age_groups["count"],
|
1262 |
textposition="outside",
|
1263 |
)
|
1264 |
-
fig1.update_layout(
|
|
|
|
|
|
|
|
|
|
|
1265 |
|
1266 |
|
1267 |
# Average balance by age group
|
@@ -1278,7 +1384,12 @@ def _(age_groups, mo, new_memory_db, plotly_express):
|
|
1278 |
text=[f"${val:.2f}" for val in age_groups["avg_balance"]],
|
1279 |
textposition="outside",
|
1280 |
)
|
1281 |
-
fig2.update_layout(
|
|
|
|
|
|
|
|
|
|
|
1282 |
|
1283 |
|
1284 |
# Age vs Account Balance scatter plot
|
@@ -1305,7 +1416,12 @@ def _(age_groups, mo, new_memory_db, plotly_express):
|
|
1305 |
size_max=15,
|
1306 |
)
|
1307 |
fig3.update_traces(marker=dict(size=12))
|
1308 |
-
fig3.update_layout(
|
|
|
|
|
|
|
|
|
|
|
1309 |
|
1310 |
|
1311 |
# Distribution of account balances
|
@@ -1328,7 +1444,12 @@ def _(age_groups, mo, new_memory_db, plotly_express):
|
|
1328 |
color_discrete_sequence=plotly_express.colors.qualitative.Pastel,
|
1329 |
)
|
1330 |
fig4.update_traces(textinfo="percent+label", textposition="inside")
|
1331 |
-
fig4.update_layout(
|
|
|
|
|
|
|
|
|
|
|
1332 |
|
1333 |
|
1334 |
category_tabs = mo.ui.tabs(
|
@@ -1341,7 +1462,9 @@ def _(age_groups, mo, new_memory_db, plotly_express):
|
|
1341 |
"Average Balance": mo.ui.plotly(fig2),
|
1342 |
}
|
1343 |
)
|
1344 |
-
]
|
|
|
|
|
1345 |
),
|
1346 |
"Financial Analysis": mo.vstack(
|
1347 |
[
|
@@ -1351,7 +1474,9 @@ def _(age_groups, mo, new_memory_db, plotly_express):
|
|
1351 |
"Balance Distribution": mo.ui.plotly(fig4),
|
1352 |
}
|
1353 |
)
|
1354 |
-
]
|
|
|
|
|
1355 |
),
|
1356 |
},
|
1357 |
lazy=True,
|
@@ -1359,10 +1484,14 @@ def _(age_groups, mo, new_memory_db, plotly_express):
|
|
1359 |
|
1360 |
mo.vstack(
|
1361 |
[
|
1362 |
-
mo.
|
|
|
|
|
|
|
1363 |
category_tabs,
|
1364 |
],
|
1365 |
-
gap=
|
|
|
1366 |
)
|
1367 |
return
|
1368 |
|
@@ -1371,7 +1500,9 @@ def _(age_groups, mo, new_memory_db, plotly_express):
|
|
1371 |
def _(mo):
|
1372 |
mo.md(
|
1373 |
r"""
|
1374 |
-
|
|
|
|
|
1375 |
|
1376 |
### Closing Connections
|
1377 |
|
@@ -1413,12 +1544,12 @@ def _(mo):
|
|
1413 |
|
1414 |
|
1415 |
@app.cell(hide_code=True)
|
1416 |
-
def
|
1417 |
-
mo.md(rf"""## 10. Interactive DuckDB Dashboard with
|
1418 |
return
|
1419 |
|
1420 |
|
1421 |
-
@app.cell
|
1422 |
def _(mo):
|
1423 |
# Create an interactive filter for age range
|
1424 |
min_age = mo.ui.slider(20, 50, value=25, label="Minimum Age")
|
@@ -1426,7 +1557,7 @@ def _(mo):
|
|
1426 |
return max_age, min_age
|
1427 |
|
1428 |
|
1429 |
-
@app.cell
|
1430 |
def _(max_age, min_age, new_memory_db):
|
1431 |
# Create a function to filter data and update visualizations
|
1432 |
def get_filtered_data(min_val=min_age.value, max_val=max_age.value):
|
@@ -1449,7 +1580,7 @@ def _(max_age, min_age, new_memory_db):
|
|
1449 |
return (get_filtered_data,)
|
1450 |
|
1451 |
|
1452 |
-
@app.cell
|
1453 |
def _(get_filtered_data):
|
1454 |
def get_metrics(data=get_filtered_data()):
|
1455 |
return {
|
@@ -1460,7 +1591,7 @@ def _(get_filtered_data):
|
|
1460 |
return (get_metrics,)
|
1461 |
|
1462 |
|
1463 |
-
@app.cell
|
1464 |
def _(get_metrics, mo):
|
1465 |
def metrics_display(metrics=get_metrics()):
|
1466 |
return mo.hstack(
|
@@ -1488,12 +1619,12 @@ def _(get_metrics, mo):
|
|
1488 |
),
|
1489 |
],
|
1490 |
justify="space-between",
|
1491 |
-
gap=
|
1492 |
)
|
1493 |
return (metrics_display,)
|
1494 |
|
1495 |
|
1496 |
-
@app.cell
|
1497 |
def _(get_filtered_data, max_age, min_age, mo, plotly_express):
|
1498 |
def create_visualization(
|
1499 |
data=get_filtered_data(), min_val=min_age.value, max_val=max_age.value
|
@@ -1516,6 +1647,8 @@ def _(get_filtered_data, max_age, min_age, mo, plotly_express):
|
|
1516 |
height=400,
|
1517 |
xaxis_tickangle=-45,
|
1518 |
margin=dict(t=50, b=70, l=50, r=30),
|
|
|
|
|
1519 |
)
|
1520 |
fig1.update_traces(
|
1521 |
textposition="outside",
|
@@ -1534,6 +1667,8 @@ def _(get_filtered_data, max_age, min_age, mo, plotly_express):
|
|
1534 |
height=400,
|
1535 |
margin=dict(t=50, b=70, l=50, r=30),
|
1536 |
bargap=0.1,
|
|
|
|
|
1537 |
)
|
1538 |
|
1539 |
fig3 = plotly_express.scatter(
|
@@ -1551,6 +1686,8 @@ def _(get_filtered_data, max_age, min_age, mo, plotly_express):
|
|
1551 |
fig3.update_layout(
|
1552 |
height=400,
|
1553 |
margin=dict(t=50, b=70, l=50, r=30),
|
|
|
|
|
1554 |
)
|
1555 |
|
1556 |
return mo.ui.tabs(
|
@@ -1563,7 +1700,7 @@ def _(get_filtered_data, max_age, min_age, mo, plotly_express):
|
|
1563 |
return (create_visualization,)
|
1564 |
|
1565 |
|
1566 |
-
@app.cell
|
1567 |
def _(
|
1568 |
create_visualization,
|
1569 |
get_filtered_data,
|
@@ -1573,11 +1710,11 @@ def _(
|
|
1573 |
mo,
|
1574 |
):
|
1575 |
def dashboard(
|
1576 |
-
min_val=min_age.value,
|
1577 |
-
max_val=max_age.value,
|
1578 |
-
metrics=metrics_display(),
|
1579 |
-
data=get_filtered_data(),
|
1580 |
-
visualization=create_visualization()
|
1581 |
):
|
1582 |
return mo.vstack(
|
1583 |
[
|
@@ -1588,14 +1725,17 @@ def _(
|
|
1588 |
mo.md("### Visualizations"),
|
1589 |
visualization,
|
1590 |
],
|
1591 |
-
gap=2
|
|
|
1592 |
)
|
|
|
|
|
1593 |
dashboard()
|
1594 |
return
|
1595 |
|
1596 |
|
1597 |
@app.cell(hide_code=True)
|
1598 |
-
def
|
1599 |
mo.md(
|
1600 |
rf"""
|
1601 |
# Summary and Key Takeaways
|
@@ -1608,7 +1748,7 @@ def _conclusion(mo):
|
|
1608 |
|
1609 |
3. **Data insertion**: We demonstrated different ways to insert data, including single inserts and bulk loading.
|
1610 |
|
1611 |
-
4. **SQL queries**: We executed various SQL queries directly and through
|
1612 |
|
1613 |
5. **Integration with Polars**: We showed how DuckDB can work seamlessly with Polars DataFrames.
|
1614 |
|
@@ -1620,7 +1760,7 @@ def _conclusion(mo):
|
|
1620 |
|
1621 |
9. **Best practices**: We reviewed best practices for managing DuckDB connections and transactions.
|
1622 |
|
1623 |
-
10. **Visualization**: We created interactive visualizations and dashboards with Plotly and
|
1624 |
|
1625 |
DuckDB is an excellent tool for data analysis, especially for analytical workloads. Its in-process nature makes it fast and easy to use, while its SQL compatibility makes it accessible for anyone familiar with SQL databases.
|
1626 |
|
@@ -1629,7 +1769,7 @@ def _conclusion(mo):
|
|
1629 |
- Try loading larger datasets into DuckDB
|
1630 |
- Experiment with more complex queries and window functions
|
1631 |
- Use DuckDB's COPY functionality to import/export data from/to files
|
1632 |
-
- Create more advanced interactive dashboards with
|
1633 |
"""
|
1634 |
)
|
1635 |
return
|
|
|
9 |
# "pandas==2.2.3",
|
10 |
# "sqlglot==26.12.1",
|
11 |
# "plotly==5.23.1",
|
12 |
+
# "statsmodels==0.14.4",
|
13 |
# ]
|
14 |
# ///
|
15 |
|
|
|
105 |
| Performance | Faster for most operations | Slightly slower but provides persistence |
|
106 |
| Creation | duckdb.connect(':memory:') | duckdb.connect('filename.db') |
|
107 |
| Multiple Connection Access | Limited to single connection | Multiple connections can access the same database |
|
|
|
108 |
"""
|
109 |
)
|
110 |
return
|
111 |
|
112 |
|
113 |
+
@app.cell(hide_code=True)
|
114 |
def _(os):
|
115 |
# Remove previous database if it exists
|
116 |
if os.path.exists("example.db"):
|
|
|
121 |
return
|
122 |
|
123 |
|
124 |
+
@app.cell(hide_code=True)
|
125 |
def _(mo):
|
126 |
_df = mo.sql(
|
127 |
f"""
|
|
|
149 |
return
|
150 |
|
151 |
|
152 |
+
@app.cell(hide_code=True)
|
153 |
def _(duckdb):
|
154 |
# Create an in-memory DuckDB connection
|
155 |
memory_db = duckdb.connect(":memory:")
|
|
|
159 |
return file_db, memory_db
|
160 |
|
161 |
|
162 |
+
@app.cell(hide_code=True)
|
163 |
def _(file_db, memory_db):
|
164 |
# Test both connections
|
165 |
memory_db.execute(
|
|
|
196 |
return
|
197 |
|
198 |
|
199 |
+
@app.cell(hide_code=True)
|
200 |
def _(mem_test, memory_db, mo):
|
201 |
_df = mo.sql(
|
202 |
f"""
|
|
|
207 |
return
|
208 |
|
209 |
|
210 |
+
@app.cell(hide_code=True)
|
211 |
def _(file_db, file_test, mo):
|
212 |
_df = mo.sql(
|
213 |
f"""
|
|
|
226 |
|
227 |
|
228 |
@app.cell(hide_code=True)
|
229 |
+
def _(mo):
|
230 |
mo.md(rf"""## 🔄 Simulating Application Restart...""")
|
231 |
return
|
232 |
|
233 |
|
234 |
+
@app.cell(hide_code=True)
|
235 |
def _(duckdb):
|
236 |
# Create new connections (simulating restart)
|
237 |
new_memory_db = duckdb.connect(":memory:")
|
|
|
239 |
return new_file_db, new_memory_db
|
240 |
|
241 |
|
242 |
+
@app.cell(hide_code=True)
|
243 |
def _(new_memory_db):
|
244 |
# Try to query tables in the new memory connection
|
245 |
try:
|
|
|
252 |
return memory_data_available, memory_persistence
|
253 |
|
254 |
|
255 |
+
@app.cell(hide_code=True)
|
256 |
def _(new_file_db):
|
257 |
# Try to query tables in the new file connection
|
258 |
try:
|
|
|
266 |
return file_data, file_data_available, file_persistence
|
267 |
|
268 |
|
269 |
+
@app.cell(hide_code=True)
|
270 |
def _(
|
271 |
file_data_available,
|
272 |
file_persistence,
|
|
|
285 |
],
|
286 |
}
|
287 |
)
|
|
|
|
|
288 |
return (persistence_results,)
|
289 |
|
290 |
|
291 |
+
@app.cell(hide_code=True)
|
292 |
+
def _(mo, persistence_results):
|
293 |
+
mo.vstack(
|
294 |
+
[
|
295 |
+
mo.vstack([mo.md(f"""## Persistence Test Results""")], align="center"),
|
296 |
+
persistence_results,
|
297 |
+
],
|
298 |
+
gap=2,
|
299 |
+
justify="space-between",
|
300 |
+
)
|
301 |
return
|
302 |
|
303 |
|
304 |
+
@app.cell(hide_code=True)
|
305 |
def _(file_data, file_data_available, mo):
|
306 |
if file_data_available:
|
307 |
mo.md("### Persisted File-Based Data:")
|
|
|
331 |
return
|
332 |
|
333 |
|
334 |
+
@app.cell(hide_code=True)
|
335 |
+
def _(file_db, new_memory_db):
|
336 |
# For the memory database
|
337 |
try:
|
338 |
new_memory_db.execute("DROP TABLE IF EXISTS users_memory")
|
|
|
347 |
return
|
348 |
|
349 |
|
350 |
+
@app.cell(hide_code=True)
|
351 |
def _(file_db, new_memory_db):
|
352 |
# Create advanced users table in memory database with primary key
|
353 |
new_memory_db.execute("""
|
|
|
377 |
return
|
378 |
|
379 |
|
380 |
+
@app.cell(hide_code=True)
|
381 |
+
def _(new_memory_db):
|
382 |
# Get table schema information using DuckDB's internal system tables
|
383 |
memory_schema = new_memory_db.execute("""
|
384 |
SELECT column_name, data_type, is_nullable
|
|
|
386 |
WHERE table_name = 'users_memory'
|
387 |
ORDER BY ordinal_position
|
388 |
""").df()
|
|
|
|
|
|
|
389 |
return (memory_schema,)
|
390 |
|
391 |
|
392 |
@app.cell(hide_code=True)
|
393 |
def _(memory_schema, mo):
|
394 |
+
mo.vstack(
|
395 |
+
[
|
396 |
+
mo.vstack(
|
397 |
+
[mo.md(f"""## 🔍 Table Schema Information """)], align="center"
|
398 |
+
),
|
399 |
+
mo.ui.table(memory_schema),
|
400 |
+
],
|
401 |
+
gap=2,
|
402 |
+
justify="space-between",
|
403 |
+
)
|
404 |
return
|
405 |
|
406 |
|
|
|
423 |
return
|
424 |
|
425 |
|
426 |
+
@app.cell(hide_code=True)
|
427 |
+
def _(date):
|
428 |
today = date.today()
|
429 |
|
430 |
|
|
|
469 |
return (safe_insert,)
|
470 |
|
471 |
|
472 |
+
@app.cell(hide_code=True)
|
473 |
def _():
|
474 |
# Prepare the data
|
475 |
user_data = [
|
|
|
531 |
return (user_data,)
|
532 |
|
533 |
|
534 |
+
@app.cell(hide_code=True)
|
535 |
+
def _(file_db, new_memory_db, safe_insert, user_data):
|
536 |
# Safely insert data into memory database
|
537 |
+
safe_insert(new_memory_db, "users_memory", user_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
538 |
|
539 |
+
# Safely insert data into file database
|
540 |
+
safe_insert(file_db, "users_file", user_data)
|
541 |
return
|
542 |
|
543 |
|
544 |
+
@app.cell(hide_code=True)
|
545 |
def _():
|
546 |
# If you need to add just one record, you can use a similar approach:
|
547 |
new_user = (
|
|
|
556 |
return (new_user,)
|
557 |
|
558 |
|
559 |
+
@app.cell(hide_code=True)
|
560 |
def _(new_memory_db, new_user):
|
561 |
# Check if the ID exists before inserting
|
562 |
if not new_memory_db.execute(
|
|
|
575 |
return
|
576 |
|
577 |
|
578 |
+
@app.cell(hide_code=True)
|
579 |
def _(file_db, new_user):
|
580 |
# Do the same for the file database
|
581 |
if not file_db.execute(
|
|
|
594 |
return
|
595 |
|
596 |
|
597 |
+
@app.cell(hide_code=True)
|
598 |
def _(new_memory_db):
|
599 |
# First try to update
|
600 |
cursor = new_memory_db.execute(
|
|
|
617 |
return (cursor,)
|
618 |
|
619 |
|
620 |
+
@app.cell(hide_code=True)
|
621 |
def _(cursor, mo, new_memory_db):
|
622 |
# If no rows were updated, perform an insert
|
623 |
if cursor.rowcount == 0:
|
|
|
646 |
return
|
647 |
|
648 |
|
649 |
+
@app.cell(hide_code=True)
|
650 |
def _(file_db, mo):
|
651 |
# For DuckDB using ON CONFLICT, we need to specify the conflict target column
|
652 |
file_db.execute(
|
|
|
680 |
return
|
681 |
|
682 |
|
683 |
+
@app.cell(hide_code=True)
|
684 |
+
def _(new_memory_db):
|
685 |
# Display memory data using DuckDB's query capabilities
|
686 |
memory_results = new_memory_db.execute("""
|
687 |
SELECT
|
|
|
698 |
return (memory_results,)
|
699 |
|
700 |
|
701 |
+
@app.cell(hide_code=True)
|
702 |
def _(file_db):
|
703 |
# Display file data with formatting
|
704 |
file_results = file_db.execute("""
|
|
|
716 |
return (file_results,)
|
717 |
|
718 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
719 |
@app.cell(hide_code=True)
|
720 |
def _(file_results, memory_results, mo):
|
721 |
tabs = mo.ui.tabs(
|
|
|
724 |
"File-Based Database": mo.ui.table(file_results),
|
725 |
}
|
726 |
)
|
727 |
+
|
728 |
+
mo.vstack(
|
729 |
+
[
|
730 |
+
mo.vstack(
|
731 |
+
[mo.md(f"""## 📊 Database Contents After Insertion""")],
|
732 |
+
align="center",
|
733 |
+
),
|
734 |
+
tabs,
|
735 |
+
],
|
736 |
+
gap=2,
|
737 |
+
justify="space-between",
|
738 |
+
)
|
739 |
return
|
740 |
|
741 |
|
|
|
743 |
def _(mo):
|
744 |
mo.md(
|
745 |
r"""
|
746 |
+
# [4. Using SQL Directly in marimo](https://duckdb.org/docs/stable/sql/query_syntax/select)
|
747 |
|
748 |
There are multiple ways to leverage DuckDB's SQL capabilities in marimo:
|
749 |
|
750 |
1. **Direct execution**: Using DuckDB connections to execute SQL
|
751 |
+
2. **marimo SQL**: Using marimo's built-in SQL engine
|
752 |
3. **Interactive queries**: Combining UI elements with SQL execution
|
753 |
|
754 |
Let's explore these approaches:
|
|
|
757 |
return
|
758 |
|
759 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
760 |
@app.cell(hide_code=True)
|
761 |
def _(mo):
|
762 |
+
mo.vstack(
|
763 |
+
[
|
764 |
+
mo.vstack([mo.md(f"""## 🔍 Query with marimo SQL""")], align="center"),
|
765 |
+
mo.md(
|
766 |
+
"### marimo has its own [built-in SQL engine](https://docs.marimo.io/guides/working_with_data/sql/) that can work with DataFrames."
|
767 |
+
),
|
768 |
+
],
|
769 |
+
gap=2,
|
770 |
+
justify="space-between",
|
771 |
)
|
772 |
return
|
773 |
|
774 |
|
775 |
+
@app.cell(hide_code=True)
|
776 |
+
def _(memory_results, mo):
|
777 |
# Create a SQL selector for users with age threshold
|
778 |
+
age_threshold = mo.ui.slider(
|
779 |
+
25, 50, value=30, label="Minimum Age", full_width=True, show_value=True
|
780 |
+
)
|
781 |
|
782 |
|
|
|
|
|
783 |
# Create a function to filter users based on the slider value
|
784 |
def filtered_users():
|
785 |
# Use DuckDB directly instead of mo.sql with users param
|
786 |
filtered_df = memory_results[memory_results["age"] >= age_threshold.value]
|
787 |
filtered_df = filtered_df.sort_values("age")
|
788 |
return mo.ui.table(filtered_df)
|
789 |
+
return age_threshold, filtered_users
|
790 |
|
791 |
|
792 |
+
@app.cell(hide_code=True)
|
793 |
def _(age_threshold, filtered_users, mo):
|
794 |
layout = mo.vstack(
|
795 |
[
|
|
|
798 |
mo.md("### Users meeting age criteria:"),
|
799 |
filtered_users(),
|
800 |
],
|
801 |
+
gap=2,
|
802 |
+
justify="space-between",
|
803 |
)
|
804 |
+
|
805 |
layout
|
806 |
return
|
807 |
|
|
|
812 |
return
|
813 |
|
814 |
|
815 |
+
@app.cell(hide_code=True)
|
816 |
+
def _(pl):
|
817 |
# Create a Polars DataFrame
|
818 |
polars_df = pl.DataFrame(
|
819 |
{
|
|
|
826 |
return (polars_df,)
|
827 |
|
828 |
|
829 |
+
@app.cell(hide_code=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
830 |
def _(mo, polars_df):
|
831 |
+
mo.vstack(
|
832 |
+
[
|
833 |
+
mo.vstack(
|
834 |
+
[mo.md(f"""## Original Polars DataFrame:""")], align="center"
|
835 |
+
),
|
836 |
+
mo.ui.table(polars_df),
|
837 |
+
],
|
838 |
+
gap=2,
|
839 |
+
justify="space-between",
|
840 |
+
)
|
841 |
return
|
842 |
|
843 |
|
844 |
+
@app.cell(hide_code=True)
|
845 |
def _(new_memory_db, polars_df):
|
846 |
# Register the Polars DataFrame as a DuckDB table in memory connection
|
847 |
new_memory_db.register("products_polars", polars_df)
|
|
|
854 |
|
855 |
|
856 |
@app.cell(hide_code=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
857 |
def _(mo, polars_query_result):
|
858 |
+
mo.vstack(
|
859 |
+
[
|
860 |
+
mo.vstack(
|
861 |
+
[mo.md(f"""## DuckDB Query Result (From Polars Data):""")],
|
862 |
+
align="center",
|
863 |
+
),
|
864 |
+
mo.ui.table(polars_query_result),
|
865 |
+
],
|
866 |
+
gap=2,
|
867 |
+
justify="space-between",
|
868 |
+
)
|
869 |
return
|
870 |
|
871 |
|
872 |
+
@app.cell(hide_code=True)
|
873 |
+
def _(new_memory_db):
|
874 |
# Demonstrate a more complex query
|
875 |
complex_query_result = new_memory_db.execute("""
|
876 |
SELECT
|
|
|
883 |
GROUP BY category
|
884 |
ORDER BY avg_price DESC
|
885 |
""").df()
|
|
|
|
|
886 |
return (complex_query_result,)
|
887 |
|
888 |
|
889 |
+
@app.cell(hide_code=True)
|
890 |
def _(complex_query_result, mo):
|
891 |
+
mo.vstack(
|
892 |
+
[
|
893 |
+
mo.vstack(
|
894 |
+
[mo.md(f"""## Aggregated Product Data by Category:""")],
|
895 |
+
align="center",
|
896 |
+
),
|
897 |
+
mo.ui.table(complex_query_result),
|
898 |
+
],
|
899 |
+
gap=2,
|
900 |
+
justify="space-between",
|
901 |
+
)
|
902 |
return
|
903 |
|
904 |
|
|
|
908 |
return
|
909 |
|
910 |
|
911 |
+
@app.cell(hide_code=True)
|
912 |
+
def _(new_memory_db):
|
913 |
# Create another table to join with
|
914 |
new_memory_db.execute("""
|
915 |
CREATE TABLE IF NOT EXISTS departments (
|
|
|
921 |
return
|
922 |
|
923 |
|
924 |
+
@app.cell(hide_code=True)
|
925 |
def _(new_memory_db):
|
926 |
new_memory_db.execute("""
|
927 |
INSERT INTO departments VALUES
|
|
|
932 |
return
|
933 |
|
934 |
|
935 |
+
@app.cell(hide_code=True)
|
936 |
def _(new_memory_db):
|
937 |
# Execute a join query
|
938 |
join_result = new_memory_db.execute("""
|
|
|
976 |
return
|
977 |
|
978 |
|
979 |
+
@app.cell(hide_code=True)
|
980 |
def _(new_memory_db):
|
981 |
# Inner join
|
982 |
inner_join = new_memory_db.execute("""
|
|
|
998 |
FROM users_memory u
|
999 |
FULL OUTER JOIN departments d ON u.id = d.manager_id
|
1000 |
""").df()
|
1001 |
+
|
1002 |
+
# Cross join
|
1003 |
+
cross_join = new_memory_db.execute("""
|
1004 |
+
SELECT u.id, u.name, d.department_name
|
1005 |
+
FROM users_memory u
|
1006 |
+
CROSS JOIN departments d
|
1007 |
+
""").df()
|
1008 |
+
|
1009 |
+
# Self join (Joining user table with itself to find users with the same age)
|
1010 |
+
self_join = new_memory_db.execute("""
|
1011 |
+
SELECT u1.id, u1.name, u2.name AS same_age_user
|
1012 |
+
FROM users_memory u1
|
1013 |
+
JOIN users_memory u2 ON u1.age = u2.age AND u1.id <> u2.id
|
1014 |
+
""").df()
|
1015 |
+
|
1016 |
+
# Semi join (Finding users who are also managers)
|
1017 |
+
semi_join = new_memory_db.execute("""
|
1018 |
+
SELECT u.id, u.name, u.age
|
1019 |
+
FROM users_memory u
|
1020 |
+
WHERE EXISTS (
|
1021 |
+
SELECT 1 FROM departments d
|
1022 |
+
WHERE u.id = d.manager_id
|
1023 |
+
)
|
1024 |
+
""").df()
|
1025 |
+
|
1026 |
+
# Anti join (Finding users who are not managers)
|
1027 |
+
anti_join = new_memory_db.execute("""
|
1028 |
+
SELECT u.id, u.name, u.age
|
1029 |
+
FROM users_memory u
|
1030 |
+
WHERE NOT EXISTS (
|
1031 |
+
SELECT 1 FROM departments d
|
1032 |
+
WHERE u.id = d.manager_id
|
1033 |
+
)
|
1034 |
+
""").df()
|
1035 |
+
return (
|
1036 |
+
anti_join,
|
1037 |
+
cross_join,
|
1038 |
+
full_join,
|
1039 |
+
inner_join,
|
1040 |
+
right_join,
|
1041 |
+
self_join,
|
1042 |
+
semi_join,
|
1043 |
+
)
|
1044 |
+
|
1045 |
+
|
1046 |
+
@app.cell(hide_code=True)
|
1047 |
+
def _(mo, new_memory_db):
|
1048 |
+
# Display base table side by side
|
1049 |
+
users = new_memory_db.execute("SELECT * FROM users_memory").df()
|
1050 |
+
departments = new_memory_db.execute("SELECT * FROM departments").df()
|
1051 |
+
|
1052 |
+
base_tables = mo.vstack(
|
1053 |
+
[
|
1054 |
+
mo.vstack([mo.md(f"""# Base Tables""")], align="center"),
|
1055 |
+
mo.ui.tabs(
|
1056 |
+
{
|
1057 |
+
"User Table": mo.ui.table(users),
|
1058 |
+
"Departments Table": mo.ui.table(departments),
|
1059 |
+
}
|
1060 |
+
),
|
1061 |
+
]
|
1062 |
+
)
|
1063 |
+
base_tables
|
1064 |
+
return
|
1065 |
+
|
1066 |
+
|
1067 |
+
@app.cell(hide_code=True)
|
1068 |
+
def _(
|
1069 |
+
anti_join,
|
1070 |
+
cross_join,
|
1071 |
+
full_join,
|
1072 |
+
inner_join,
|
1073 |
+
join_result,
|
1074 |
+
mo,
|
1075 |
+
right_join,
|
1076 |
+
self_join,
|
1077 |
+
semi_join,
|
1078 |
+
):
|
1079 |
+
join_description = {
|
1080 |
+
"Left Join": "Shows all records from the left table and matching records from the right table. Non-matches filled with NULL.",
|
1081 |
+
"Inner Join": "Shows only the records where there's a match in both tables.",
|
1082 |
+
"Right Join": "Shows all records from the right table and matching records from the left table. Non-matches filled with NULL.",
|
1083 |
+
"Full Outer Join": "Shows all records from both tables, with NULL values where there's no match.",
|
1084 |
+
"Cross Join": "Returns the Cartesian product - all possible combinations of rows from both tables.",
|
1085 |
+
"Self Join": "Joins a table with itself, used to compare rows within the same table.",
|
1086 |
+
"Semi Join": "Returns rows from the first table where one or more matches exist in the second table.",
|
1087 |
+
"Anti Join": "Returns rows from the first table where no matches exist in the second table.",
|
1088 |
+
}
|
1089 |
|
1090 |
|
|
|
|
|
1091 |
join_tabs = mo.ui.tabs(
|
1092 |
{
|
1093 |
"Left Join": mo.ui.table(join_result),
|
1094 |
"Inner Join": mo.ui.table(inner_join),
|
1095 |
"Right Join": mo.ui.table(right_join),
|
1096 |
"Full Outer Join": mo.ui.table(full_join),
|
1097 |
+
"Cross Join": mo.ui.table(cross_join),
|
1098 |
+
"Self Join": mo.ui.table(self_join),
|
1099 |
+
"Semi Join": mo.ui.table(semi_join),
|
1100 |
+
"Anti Join": mo.ui.table(anti_join),
|
1101 |
}
|
1102 |
)
|
1103 |
+
return join_description, join_tabs
|
1104 |
+
|
1105 |
|
1106 |
+
@app.cell(hide_code=True)
|
1107 |
+
def _(join_description, join_tabs, mo):
|
1108 |
+
join_display = mo.vstack(
|
1109 |
+
[
|
1110 |
+
mo.vstack([mo.md(f"""# SQL Join Operations""")], align="center"),
|
1111 |
+
mo.md(f"**{join_tabs.value}**: {join_description[join_tabs.value]}"),
|
1112 |
+
mo.md("## Join Results"),
|
1113 |
+
join_tabs,
|
1114 |
+
],
|
1115 |
+
gap=2,
|
1116 |
+
justify="space-between",
|
1117 |
+
)
|
1118 |
+
|
1119 |
+
join_display
|
1120 |
return
|
1121 |
|
1122 |
|
|
|
1126 |
return
|
1127 |
|
1128 |
|
1129 |
+
@app.cell(hide_code=True)
|
1130 |
+
def _(new_memory_db):
|
1131 |
# Execute an aggregate query
|
1132 |
agg_result = new_memory_db.execute("""
|
1133 |
SELECT
|
|
|
1142 |
|
1143 |
|
1144 |
@app.cell(hide_code=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1145 |
def _(agg_result, mo):
|
1146 |
+
mo.vstack(
|
1147 |
+
[
|
1148 |
+
mo.vstack(
|
1149 |
+
[mo.md(f"""## Aggregate Results (All Users):""")], align="center"
|
1150 |
+
),
|
1151 |
+
mo.ui.table(agg_result),
|
1152 |
+
],
|
1153 |
+
gap=2,
|
1154 |
+
justify="space-between",
|
|
|
|
|
1155 |
)
|
1156 |
return
|
1157 |
|
1158 |
|
1159 |
+
@app.cell(hide_code=True)
|
1160 |
def _(new_memory_db):
|
1161 |
age_groups = new_memory_db.execute("""
|
1162 |
SELECT
|
|
|
1175 |
return (age_groups,)
|
1176 |
|
1177 |
|
1178 |
+
@app.cell(hide_code=True)
|
1179 |
def _(age_groups, mo):
|
1180 |
mo.ui.table(age_groups)
|
1181 |
+
mo.vstack(
|
1182 |
+
[
|
1183 |
+
mo.vstack(
|
1184 |
+
[mo.md(f"""## Aggregate Results (Grouped by Age Range):""")],
|
1185 |
+
align="center",
|
1186 |
+
),
|
1187 |
+
mo.ui.table(age_groups),
|
1188 |
+
],
|
1189 |
+
gap=2,
|
1190 |
+
justify="space-between",
|
1191 |
)
|
1192 |
return
|
1193 |
|
1194 |
|
1195 |
+
@app.cell(hide_code=True)
|
1196 |
+
def _(new_memory_db):
|
1197 |
window_result = new_memory_db.execute("""
|
1198 |
SELECT
|
1199 |
id,
|
|
|
1206 |
FROM users_memory
|
1207 |
ORDER BY balance_rank
|
1208 |
""").df()
|
1209 |
+
return (window_result,)
|
1210 |
|
1211 |
+
|
1212 |
+
@app.cell(hide_code=True)
|
1213 |
+
def _(mo, window_result):
|
1214 |
+
mo.vstack(
|
1215 |
+
[
|
1216 |
+
mo.vstack([mo.md(f"""## Window Functions Example""")], align="center"),
|
1217 |
+
mo.ui.table(window_result),
|
1218 |
+
],
|
1219 |
+
gap=2,
|
1220 |
+
justify="space-between",
|
1221 |
+
)
|
1222 |
return
|
1223 |
|
1224 |
|
|
|
1228 |
return
|
1229 |
|
1230 |
|
1231 |
+
@app.cell(hide_code=True)
|
1232 |
+
def _(new_memory_db):
|
1233 |
polars_result = new_memory_db.execute(
|
1234 |
"""SELECT * FROM users_memory WHERE age > 25 ORDER BY age"""
|
1235 |
).pl()
|
|
|
1237 |
|
1238 |
|
1239 |
@app.cell(hide_code=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1240 |
def _(mo, polars_result):
|
1241 |
+
mo.vstack(
|
1242 |
+
[
|
1243 |
+
mo.vstack(
|
1244 |
+
[mo.md(f"""## Query Result as Polars DataFrame:""")],
|
1245 |
+
align="center",
|
1246 |
+
),
|
1247 |
+
mo.ui.table(polars_result),
|
1248 |
+
],
|
1249 |
+
gap=2,
|
1250 |
+
justify="space-between",
|
1251 |
+
)
|
1252 |
return
|
1253 |
|
1254 |
|
1255 |
+
@app.cell(hide_code=True)
|
1256 |
def _(new_memory_db):
|
1257 |
pandas_result = new_memory_db.execute(
|
1258 |
"""SELECT * FROM users_memory WHERE age > 25 ORDER BY age"""
|
|
|
1261 |
|
1262 |
|
1263 |
@app.cell(hide_code=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
1264 |
def _(mo, pandas_result):
|
1265 |
+
mo.vstack(
|
1266 |
+
[
|
1267 |
+
mo.vstack(
|
1268 |
+
[mo.md(f"""## Same Query Result as Pandas DataFrame:""")],
|
1269 |
+
align="center",
|
1270 |
+
),
|
1271 |
+
mo.ui.table(pandas_result),
|
1272 |
+
],
|
1273 |
+
gap=2,
|
1274 |
+
justify="space-between",
|
|
|
1275 |
)
|
1276 |
return
|
1277 |
|
1278 |
|
1279 |
@app.cell(hide_code=True)
|
1280 |
def _(mo):
|
1281 |
+
mo.vstack(
|
1282 |
+
[
|
1283 |
+
mo.vstack(
|
1284 |
+
[mo.md(f"""## Differences in DataFrame Handling""")],
|
1285 |
+
align="center",
|
1286 |
+
),
|
1287 |
+
mo.vstack(
|
1288 |
+
[
|
1289 |
+
mo.md(
|
1290 |
+
f"""## Polars: Filter users over 35 and calculate average balance"""
|
1291 |
+
)
|
1292 |
+
],
|
1293 |
+
align="start",
|
1294 |
+
),
|
1295 |
+
],
|
1296 |
+
gap=2, justify="space-between",
|
1297 |
)
|
1298 |
return
|
1299 |
|
1300 |
|
1301 |
+
@app.cell(hide_code=True)
|
1302 |
def _(mo, pl, polars_result):
|
1303 |
def _():
|
1304 |
polars_filtered = polars_result.filter(pl.col("age") > 35)
|
|
|
1313 |
mo.md("### Average Account Balance:"),
|
1314 |
mo.ui.table(polars_avg),
|
1315 |
],
|
1316 |
+
gap=2,
|
1317 |
)
|
1318 |
return layout
|
1319 |
|
|
|
1323 |
|
1324 |
|
1325 |
@app.cell(hide_code=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1326 |
def _(mo, pandas_result):
|
1327 |
pandas_avg = pandas_result[pandas_result["age"] > 35]["account_balance"].mean()
|
1328 |
+
mo.vstack(
|
1329 |
+
[
|
1330 |
+
mo.vstack(
|
1331 |
+
[mo.md(f"""## Pandas: Same operation in pandas style""")],
|
1332 |
+
align="center",
|
1333 |
+
),
|
1334 |
+
mo.vstack(
|
1335 |
+
[mo.md(f"""### Average balance: {pandas_avg:.2f}""")],
|
1336 |
+
align="start",
|
1337 |
+
),
|
1338 |
+
]
|
1339 |
+
)
|
1340 |
return
|
1341 |
|
1342 |
|
1343 |
@app.cell(hide_code=True)
|
1344 |
def _(mo):
|
1345 |
+
mo.md("""# 9. Data Visualization with DuckDB and Plotly""")
|
1346 |
return
|
1347 |
|
1348 |
|
1349 |
+
@app.cell(hide_code=True)
|
1350 |
def _(age_groups, mo, new_memory_db, plotly_express):
|
1351 |
# User distribution by age group
|
1352 |
fig1 = plotly_express.bar(
|
|
|
1362 |
text=age_groups["count"],
|
1363 |
textposition="outside",
|
1364 |
)
|
1365 |
+
fig1.update_layout(
|
1366 |
+
height=450,
|
1367 |
+
margin=dict(t=50, b=50, l=50, r=25),
|
1368 |
+
hoverlabel=dict(bgcolor="white", font_size=12),
|
1369 |
+
template="plotly_white",
|
1370 |
+
)
|
1371 |
|
1372 |
|
1373 |
# Average balance by age group
|
|
|
1384 |
text=[f"${val:.2f}" for val in age_groups["avg_balance"]],
|
1385 |
textposition="outside",
|
1386 |
)
|
1387 |
+
fig2.update_layout(
|
1388 |
+
height=450,
|
1389 |
+
margin=dict(t=50, b=50, l=50, r=25),
|
1390 |
+
hoverlabel=dict(bgcolor="white", font_size=12),
|
1391 |
+
template="plotly_white",
|
1392 |
+
)
|
1393 |
|
1394 |
|
1395 |
# Age vs Account Balance scatter plot
|
|
|
1416 |
size_max=15,
|
1417 |
)
|
1418 |
fig3.update_traces(marker=dict(size=12))
|
1419 |
+
fig3.update_layout(
|
1420 |
+
height=450,
|
1421 |
+
margin=dict(t=50, b=50, l=50, r=25),
|
1422 |
+
hoverlabel=dict(bgcolor="white", font_size=12),
|
1423 |
+
template="plotly_white",
|
1424 |
+
)
|
1425 |
|
1426 |
|
1427 |
# Distribution of account balances
|
|
|
1444 |
color_discrete_sequence=plotly_express.colors.qualitative.Pastel,
|
1445 |
)
|
1446 |
fig4.update_traces(textinfo="percent+label", textposition="inside")
|
1447 |
+
fig4.update_layout(
|
1448 |
+
height=450,
|
1449 |
+
margin=dict(t=50, b=50, l=50, r=25),
|
1450 |
+
hoverlabel=dict(bgcolor="white", font_size=12),
|
1451 |
+
template="plotly_white",
|
1452 |
+
)
|
1453 |
|
1454 |
|
1455 |
category_tabs = mo.ui.tabs(
|
|
|
1462 |
"Average Balance": mo.ui.plotly(fig2),
|
1463 |
}
|
1464 |
)
|
1465 |
+
],
|
1466 |
+
gap=2,
|
1467 |
+
justify="space-between",
|
1468 |
),
|
1469 |
"Financial Analysis": mo.vstack(
|
1470 |
[
|
|
|
1474 |
"Balance Distribution": mo.ui.plotly(fig4),
|
1475 |
}
|
1476 |
)
|
1477 |
+
],
|
1478 |
+
gap=2,
|
1479 |
+
justify="space-between",
|
1480 |
),
|
1481 |
},
|
1482 |
lazy=True,
|
|
|
1484 |
|
1485 |
mo.vstack(
|
1486 |
[
|
1487 |
+
mo.vstack(
|
1488 |
+
[mo.md(f"""## Select a visualization category:""")],
|
1489 |
+
align="start",
|
1490 |
+
),
|
1491 |
category_tabs,
|
1492 |
],
|
1493 |
+
gap=2,
|
1494 |
+
justify="space-between",
|
1495 |
)
|
1496 |
return
|
1497 |
|
|
|
1500 |
def _(mo):
|
1501 |
mo.md(
|
1502 |
r"""
|
1503 |
+
/// admonition |
|
1504 |
+
## Database Management Best Practices
|
1505 |
+
///
|
1506 |
|
1507 |
### Closing Connections
|
1508 |
|
|
|
1544 |
|
1545 |
|
1546 |
@app.cell(hide_code=True)
|
1547 |
+
def _(mo):
|
1548 |
+
mo.md(rf"""## 10. Interactive DuckDB Dashboard with marimo and Plotly""")
|
1549 |
return
|
1550 |
|
1551 |
|
1552 |
+
@app.cell(hide_code=True)
|
1553 |
def _(mo):
|
1554 |
# Create an interactive filter for age range
|
1555 |
min_age = mo.ui.slider(20, 50, value=25, label="Minimum Age")
|
|
|
1557 |
return max_age, min_age
|
1558 |
|
1559 |
|
1560 |
+
@app.cell(hide_code=True)
|
1561 |
def _(max_age, min_age, new_memory_db):
|
1562 |
# Create a function to filter data and update visualizations
|
1563 |
def get_filtered_data(min_val=min_age.value, max_val=max_age.value):
|
|
|
1580 |
return (get_filtered_data,)
|
1581 |
|
1582 |
|
1583 |
+
@app.cell(hide_code=True)
|
1584 |
def _(get_filtered_data):
|
1585 |
def get_metrics(data=get_filtered_data()):
|
1586 |
return {
|
|
|
1591 |
return (get_metrics,)
|
1592 |
|
1593 |
|
1594 |
+
@app.cell(hide_code=True)
|
1595 |
def _(get_metrics, mo):
|
1596 |
def metrics_display(metrics=get_metrics()):
|
1597 |
return mo.hstack(
|
|
|
1619 |
),
|
1620 |
],
|
1621 |
justify="space-between",
|
1622 |
+
gap=2,
|
1623 |
)
|
1624 |
return (metrics_display,)
|
1625 |
|
1626 |
|
1627 |
+
@app.cell(hide_code=True)
|
1628 |
def _(get_filtered_data, max_age, min_age, mo, plotly_express):
|
1629 |
def create_visualization(
|
1630 |
data=get_filtered_data(), min_val=min_age.value, max_val=max_age.value
|
|
|
1647 |
height=400,
|
1648 |
xaxis_tickangle=-45,
|
1649 |
margin=dict(t=50, b=70, l=50, r=30),
|
1650 |
+
hoverlabel=dict(bgcolor="white", font_size=12),
|
1651 |
+
template="plotly_white",
|
1652 |
)
|
1653 |
fig1.update_traces(
|
1654 |
textposition="outside",
|
|
|
1667 |
height=400,
|
1668 |
margin=dict(t=50, b=70, l=50, r=30),
|
1669 |
bargap=0.1,
|
1670 |
+
hoverlabel=dict(bgcolor="white", font_size=12),
|
1671 |
+
template="plotly_white",
|
1672 |
)
|
1673 |
|
1674 |
fig3 = plotly_express.scatter(
|
|
|
1686 |
fig3.update_layout(
|
1687 |
height=400,
|
1688 |
margin=dict(t=50, b=70, l=50, r=30),
|
1689 |
+
hoverlabel=dict(bgcolor="white", font_size=12),
|
1690 |
+
template="plotly_white",
|
1691 |
)
|
1692 |
|
1693 |
return mo.ui.tabs(
|
|
|
1700 |
return (create_visualization,)
|
1701 |
|
1702 |
|
1703 |
+
@app.cell(hide_code=True)
|
1704 |
def _(
|
1705 |
create_visualization,
|
1706 |
get_filtered_data,
|
|
|
1710 |
mo,
|
1711 |
):
|
1712 |
def dashboard(
|
1713 |
+
min_val=min_age.value,
|
1714 |
+
max_val=max_age.value,
|
1715 |
+
metrics=metrics_display(),
|
1716 |
+
data=get_filtered_data(),
|
1717 |
+
visualization=create_visualization(),
|
1718 |
):
|
1719 |
return mo.vstack(
|
1720 |
[
|
|
|
1725 |
mo.md("### Visualizations"),
|
1726 |
visualization,
|
1727 |
],
|
1728 |
+
gap=2,
|
1729 |
+
justify="space-between",
|
1730 |
)
|
1731 |
+
|
1732 |
+
|
1733 |
dashboard()
|
1734 |
return
|
1735 |
|
1736 |
|
1737 |
@app.cell(hide_code=True)
|
1738 |
+
def _(mo):
|
1739 |
mo.md(
|
1740 |
rf"""
|
1741 |
# Summary and Key Takeaways
|
|
|
1748 |
|
1749 |
3. **Data insertion**: We demonstrated different ways to insert data, including single inserts and bulk loading.
|
1750 |
|
1751 |
+
4. **SQL queries**: We executed various SQL queries directly and through marimo's UI components.
|
1752 |
|
1753 |
5. **Integration with Polars**: We showed how DuckDB can work seamlessly with Polars DataFrames.
|
1754 |
|
|
|
1760 |
|
1761 |
9. **Best practices**: We reviewed best practices for managing DuckDB connections and transactions.
|
1762 |
|
1763 |
+
10. **Visualization**: We created interactive visualizations and dashboards with Plotly and marimo.
|
1764 |
|
1765 |
DuckDB is an excellent tool for data analysis, especially for analytical workloads. Its in-process nature makes it fast and easy to use, while its SQL compatibility makes it accessible for anyone familiar with SQL databases.
|
1766 |
|
|
|
1769 |
- Try loading larger datasets into DuckDB
|
1770 |
- Experiment with more complex queries and window functions
|
1771 |
- Use DuckDB's COPY functionality to import/export data from/to files
|
1772 |
+
- Create more advanced interactive dashboards with marimo and Plotly
|
1773 |
"""
|
1774 |
)
|
1775 |
return
|