{ "cells": [ { "cell_type": "code", "execution_count": 26, "id": "36b5fea9-940a-4f92-9cd9-7c8ce4d0f59b", "metadata": {}, "outputs": [], "source": [ "import os\n", "from collections import Counter\n", "\n", "import numpy as np\n", "import pandas as pd\n", "from sklearn.feature_extraction.text import TfidfTransformer\n", "from sklearn.decomposition import TruncatedSVD\n", "from tqdm.notebook import tqdm\n", "from openTSNE import TSNE\n", "import datashader as ds\n", "import colorcet as cc\n", "\n", "from dask.distributed import Client, LocalCluster\n", "import dask.dataframe as dd\n", "import dask_ml.feature_extraction.text\n", "import dask.bag as db\n", "\n", "from transformers import AutoTokenizer\n", "from huggingface_hub import notebook_login, HfApi, hf_hub_download, Repository\n", "from datasets import load_dataset\n", "from datasets.utils.py_utils import convert_file_size_to_int" ] }, { "cell_type": "code", "execution_count": 2, "id": "6acce76a-5b79-4ee0-ad92-08b819d268e8", "metadata": {}, "outputs": [], "source": [ "tokenizer = AutoTokenizer.from_pretrained(\"bigscience/bloom\")\n", "vocab = tokenizer.vocab\n", "reverse_vocab = {v:k for k,v in vocab.items()}" ] }, { "cell_type": "code", "execution_count": 1, "id": "b8811f08-4b4c-4e13-86b6-43ee6a4e6e07", "metadata": {}, "outputs": [], "source": [ "def batch_tokenize(batch):\n", " return {'tokenized': [' '.join(e.tokens) for e in tokenizer(batch['text']).encodings]}" ] }, { "cell_type": "code", "execution_count": 5, "id": "23af3f6c-34bb-4011-863d-d8bea08192a7", "metadata": {}, "outputs": [], "source": [ "dset_name = \"bigscience-catalogue-lm-data/roots_fr_uncorpus\"" ] }, { "cell_type": "code", "execution_count": 9, "id": "52054ce2-7f33-49c5-a5a9-fe91548b8fae", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Using custom data configuration bigscience-catalogue-lm-data--cleaned_lm_fr_uncorpus-df68e40301b12c79\n", "Reusing dataset parquet (/media/ssd/BIGSCIENCE/cache/bigscience-catalogue-lm-data___parquet/bigscience-catalogue-lm-data--cleaned_lm_fr_uncorpus-df68e40301b12c79/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n" ] } ], "source": [ "dset = load_dataset(dset_name, split=\"train\", cache_dir=\"/media/ssd/BIGSCIENCE/cache\")" ] }, { "cell_type": "code", "execution_count": 12, "id": "bd31288f-82f1-4752-aeb1-d1ec6cc6e00c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Dataset({\n", " features: ['text', 'meta'],\n", " num_rows: 145876\n", "})" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dset" ] }, { "cell_type": "code", "execution_count": 15, "id": "f59e42a7-0cc0-4425-b4e8-40446a1d187f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " " ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "32ed568f9c304dc8a03249663aeb2de4", "version_major": 2, "version_minor": 0 }, "text/plain": [ "#0: 0%| | 0/82 [00:00\n", "
\n", "
\n", "

Client

\n", "

Client-e038e83a-221b-11ed-a525-40b0760fea7a

\n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "
Connection method: Cluster objectCluster type: distributed.LocalCluster
\n", " Dashboard: http://127.0.0.1:8787/status\n", "
\n", "\n", " \n", "
\n", "

Cluster Info

\n", "
\n", "
\n", "
\n", "
\n", "

LocalCluster

\n", "

45d34c47

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", "
\n", " Dashboard: http://127.0.0.1:8787/status\n", " \n", " Workers: 7\n", "
\n", " Total threads: 28\n", " \n", " Total memory: 62.60 GiB\n", "
Status: runningUsing processes: True
\n", "\n", "
\n", " \n", "

Scheduler Info

\n", "
\n", "\n", "
\n", "
\n", "
\n", "
\n", "

Scheduler

\n", "

Scheduler-b9de7467-0242-4807-ad9d-9241afa2c2a1

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", " Comm: tcp://127.0.0.1:34657\n", " \n", " Workers: 7\n", "
\n", " Dashboard: http://127.0.0.1:8787/status\n", " \n", " Total threads: 28\n", "
\n", " Started: Just now\n", " \n", " Total memory: 62.60 GiB\n", "
\n", "
\n", "
\n", "\n", "
\n", " \n", "

Workers

\n", "
\n", "\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "

Worker: 0

\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", "
\n", " Comm: tcp://127.0.0.1:37977\n", " \n", " Total threads: 4\n", "
\n", " Dashboard: http://127.0.0.1:44675/status\n", " \n", " Memory: 8.94 GiB\n", "
\n", " Nanny: tcp://127.0.0.1:35529\n", "
\n", " Local directory: /media/ssd/BIGSCIENCE/dask-worker-space/worker-f4t43chg\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "

Worker: 1

\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", "
\n", " Comm: tcp://127.0.0.1:34541\n", " \n", " Total threads: 4\n", "
\n", " Dashboard: http://127.0.0.1:44713/status\n", " \n", " Memory: 8.94 GiB\n", "
\n", " Nanny: tcp://127.0.0.1:36837\n", "
\n", " Local directory: /media/ssd/BIGSCIENCE/dask-worker-space/worker-d7xhhp6y\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "

Worker: 2

\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", "
\n", " Comm: tcp://127.0.0.1:41035\n", " \n", " Total threads: 4\n", "
\n", " Dashboard: http://127.0.0.1:41835/status\n", " \n", " Memory: 8.94 GiB\n", "
\n", " Nanny: tcp://127.0.0.1:37267\n", "
\n", " Local directory: /media/ssd/BIGSCIENCE/dask-worker-space/worker-r1hlyoa0\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "

Worker: 3

\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", "
\n", " Comm: tcp://127.0.0.1:43453\n", " \n", " Total threads: 4\n", "
\n", " Dashboard: http://127.0.0.1:39007/status\n", " \n", " Memory: 8.94 GiB\n", "
\n", " Nanny: tcp://127.0.0.1:35059\n", "
\n", " Local directory: /media/ssd/BIGSCIENCE/dask-worker-space/worker-5u2rmzie\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "

Worker: 4

\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", "
\n", " Comm: tcp://127.0.0.1:43103\n", " \n", " Total threads: 4\n", "
\n", " Dashboard: http://127.0.0.1:45759/status\n", " \n", " Memory: 8.94 GiB\n", "
\n", " Nanny: tcp://127.0.0.1:37227\n", "
\n", " Local directory: /media/ssd/BIGSCIENCE/dask-worker-space/worker-r1c7349j\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "

Worker: 5

\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", "
\n", " Comm: tcp://127.0.0.1:35613\n", " \n", " Total threads: 4\n", "
\n", " Dashboard: http://127.0.0.1:34293/status\n", " \n", " Memory: 8.94 GiB\n", "
\n", " Nanny: tcp://127.0.0.1:37127\n", "
\n", " Local directory: /media/ssd/BIGSCIENCE/dask-worker-space/worker-6f6k7cqw\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "

Worker: 6

\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", "
\n", " Comm: tcp://127.0.0.1:39423\n", " \n", " Total threads: 4\n", "
\n", " Dashboard: http://127.0.0.1:39111/status\n", " \n", " Memory: 8.94 GiB\n", "
\n", " Nanny: tcp://127.0.0.1:38503\n", "
\n", " Local directory: /media/ssd/BIGSCIENCE/dask-worker-space/worker-ba89br89\n", "
\n", "
\n", "
\n", "
\n", " \n", "\n", "
\n", "
\n", "\n", "
\n", "
\n", "
\n", "
\n", " \n", "\n", "
\n", "" ], "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "client = Client()\n", "client" ] }, { "cell_type": "code", "execution_count": 6, "id": "dd26792f-ad76-4928-b94b-70f7d9dad6eb", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "distributed.utils_perf - WARNING - full garbage collections took 27% CPU time recently (threshold: 10%)\n", "distributed.utils_perf - WARNING - full garbage collections took 28% CPU time recently (threshold: 10%)\n", "distributed.utils_perf - WARNING - full garbage collections took 27% CPU time recently (threshold: 10%)\n", "distributed.utils_perf - WARNING - full garbage collections took 28% CPU time recently (threshold: 10%)\n", "distributed.utils_perf - WARNING - full garbage collections took 28% CPU time recently (threshold: 10%)\n", "distributed.utils_perf - WARNING - full garbage collections took 28% CPU time recently (threshold: 10%)\n", "distributed.utils_perf - WARNING - full garbage collections took 29% CPU time recently (threshold: 10%)\n", "distributed.utils_perf - WARNING - full garbage collections took 29% CPU time recently (threshold: 10%)\n", "distributed.utils_perf - WARNING - full garbage collections took 29% CPU time recently (threshold: 10%)\n", "distributed.utils_perf - WARNING - full garbage collections took 29% CPU time recently (threshold: 10%)\n", "distributed.utils_perf - WARNING - full garbage collections took 29% CPU time recently (threshold: 10%)\n", "distributed.utils_perf - WARNING - full garbage collections took 30% CPU time recently (threshold: 10%)\n", "distributed.utils_perf - WARNING - full garbage collections took 32% CPU time recently (threshold: 10%)\n", "distributed.utils_perf - WARNING - full garbage collections took 29% CPU time recently (threshold: 10%)\n", "distributed.utils_perf - WARNING - full garbage collections took 28% CPU time recently (threshold: 10%)\n", "distributed.utils_perf - WARNING - full garbage collections took 28% CPU time recently (threshold: 10%)\n", "distributed.utils_perf - WARNING - full garbage collections took 27% CPU time recently (threshold: 10%)\n", "distributed.utils_perf - WARNING - full garbage collections took 29% CPU time recently (threshold: 10%)\n", "distributed.utils_perf - WARNING - full garbage collections took 30% CPU time recently (threshold: 10%)\n", "distributed.utils_perf - WARNING - full garbage collections took 30% CPU time recently (threshold: 10%)\n", "distributed.utils_perf - WARNING - full garbage collections took 30% CPU time recently (threshold: 10%)\n", "distributed.utils_perf - WARNING - full garbage collections took 30% CPU time recently (threshold: 10%)\n", "distributed.utils_perf - WARNING - full garbage collections took 30% CPU time recently (threshold: 10%)\n", "distributed.utils_perf - WARNING - full garbage collections took 30% CPU time recently (threshold: 10%)\n", "distributed.utils_perf - WARNING - full garbage collections took 30% CPU time recently (threshold: 10%)\n", "distributed.utils_perf - WARNING - full garbage collections took 31% CPU time recently (threshold: 10%)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 9.24 s, sys: 742 ms, total: 9.98 s\n", "Wall time: 23 s\n" ] } ], "source": [ "%%time\n", "df = dd.read_parquet(f'{dset_name}/tokenized/')\n", "vect = dask_ml.feature_extraction.text.CountVectorizer(tokenizer=str.split,\n", " token_pattern=None,\n", " vocabulary=vocab)\n", "tokenized_bag = df['tokenized'].to_bag()\n", "X = vect.transform(tokenized_bag)" ] }, { "cell_type": "code", "execution_count": 10, "id": "8811759e-f44d-48ed-b1f6-86befa2cd8a9", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "distributed.utils_perf - WARNING - full garbage collections took 21% CPU time recently (threshold: 10%)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 39.8 s, sys: 5.9 s, total: 45.7 s\n", "Wall time: 2min 39s\n" ] } ], "source": [ "%%time\n", "counts = X.compute()\n", "client.shutdown()" ] }, { "cell_type": "code", "execution_count": 14, "id": "a4a8bff0-68c2-4f63-9330-bcab28e2cd02", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "<145876x250680 sparse matrix of type ''\n", "\twith 43446986 stored elements in Compressed Sparse Row format>" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "counts" ] }, { "cell_type": "code", "execution_count": 11, "id": "acb19280-e554-4d92-8706-f843a299217c", "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/plain": [ "[(',', 34814464),\n", " ('.', 21948181),\n", " (')', 4949061),\n", " (';', 2740524),\n", " ('ant', 1231074),\n", " ('1', 1134577),\n", " ('),', 1117959),\n", " ('le', 1042373),\n", " ('e', 996400),\n", " ('ation', 976949),\n", " ('2', 965098),\n", " ('/', 901092),\n", " ('b', 810668),\n", " ('a', 801115),\n", " ('s', 752845),\n", " ('ent', 749895),\n", " ('3', 714091),\n", " ('c', 675251),\n", " ('er', 653847),\n", " ('ement', 650192),\n", " (':', 646632),\n", " ('ie', 639917),\n", " ('la', 623993),\n", " ('4', 607453),\n", " ('ations', 604552),\n", " ('es', 563981),\n", " ('m', 559806),\n", " ('d', 554653),\n", " ('i', 544907),\n", " ('les', 541236),\n", " ('is', 514949),\n", " (').', 501087),\n", " ('5', 494491),\n", " ('isation', 488711),\n", " ('en', 487799),\n", " ('\"', 473312),\n", " ('un', 462088),\n", " ('6', 418736),\n", " ('aient', 409955),\n", " ('f', 409463),\n", " ('g', 399877),\n", " ('ait', 391162),\n", " ('r', 387126),\n", " ('ants', 384791),\n", " ('>>', 376997),\n", " ('us', 366909),\n", " ('ique', 359072),\n", " ('n', 343549),\n", " ('de', 342141),\n", " ('7', 339226),\n", " ('-', 338144),\n", " ('aux', 336640),\n", " ('v', 330524),\n", " ('ements', 303551),\n", " (\"l'\", 302513),\n", " ('il', 302115),\n", " ('ies', 300399),\n", " ('it', 299106),\n", " ('al', 298741),\n", " ('ale', 296358),\n", " ('if', 294240),\n", " ('8', 292746),\n", " ('era', 290015),\n", " ('p', 285288),\n", " ('par', 282921),\n", " ('ur', 280345),\n", " ('t', 279981),\n", " ('ante', 278659),\n", " (']', 274980),\n", " ('an', 273027),\n", " ('u', 272306),\n", " ('iques', 270463),\n", " ('in', 269539),\n", " ('9', 263244),\n", " ('aire', 262021),\n", " ('et', 261216),\n", " ('age', 261030),\n", " ('h', 255820),\n", " ('l', 255274),\n", " ('10', 255160),\n", " ('au', 255134),\n", " ('ateur', 253419),\n", " ('on', 251010),\n", " ('ues', 250799),\n", " ('ir', 244181),\n", " ('voir', 235858),\n", " ('at', 231829),\n", " ('ales', 228747),\n", " ('con', 217926),\n", " ('iser', 215583),\n", " ('ont', 212434),\n", " ('ires', 211954),\n", " ('antes', 205935),\n", " ('erait', 205901),\n", " ('ons', 203324),\n", " ('ateurs', 197845),\n", " ('ue', 197308),\n", " ('mr', 196911),\n", " ('des', 195245),\n", " ('12', 193578)]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "freq = counts.sum(axis=0).A1\n", "frequencies = Counter({reverse_vocab[i]:freq[i] for i in freq.nonzero()[0]})\n", "frequencies.most_common(100)" ] }, { "cell_type": "code", "execution_count": 17, "id": "9c8b98af-097f-4bf1-9bb1-2f0a130751ca", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 994 ms, sys: 116 ms, total: 1.11 s\n", "Wall time: 1.06 s\n" ] } ], "source": [ "%%time\n", "tfidf = tfidf_transformer.fit_transform(counts)" ] }, { "cell_type": "code", "execution_count": 21, "id": "c6a122c2-bce7-413c-b488-2faaa2296d4f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 3min 37s, sys: 1min, total: 4min 38s\n", "Wall time: 1min 7s\n" ] } ], "source": [ "%%time\n", "svd = TruncatedSVD(n_components=160)\n", "X = svd.fit_transform(tfidf)" ] }, { "cell_type": "code", "execution_count": 24, "id": "df3fd243-87d9-4b25-bcc3-cbf3f779630d", "metadata": {}, "outputs": [], "source": [ "tsne = TSNE(\n", " perplexity=30,\n", " metric=\"cosine\",\n", " n_jobs=28,\n", " random_state=42,\n", " verbose=True,\n", ")" ] }, { "cell_type": "code", "execution_count": 25, "id": "498ae603-f853-41bd-a75e-7854c1464e7e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--------------------------------------------------------------------------------\n", "TSNE(metric='cosine', n_jobs=28, random_state=42, verbose=True)\n", "--------------------------------------------------------------------------------\n", "===> Finding 90 nearest neighbors using Annoy approximate search using cosine distance...\n", " --> Time elapsed: 31.83 seconds\n", "===> Calculating affinity matrix...\n", " --> Time elapsed: 1.01 seconds\n", "===> Calculating PCA-based initialization...\n", " --> Time elapsed: 0.62 seconds\n", "===> Running optimization with exaggeration=12.00, lr=12156.33 for 250 iterations...\n", "Iteration 50, KL divergence 8.0172, 50 iterations in 3.3109 sec\n", "Iteration 100, KL divergence 7.6925, 50 iterations in 3.2977 sec\n", "Iteration 150, KL divergence 7.5762, 50 iterations in 3.3080 sec\n", "Iteration 200, KL divergence 7.5796, 50 iterations in 3.3637 sec\n", "Iteration 250, KL divergence 7.5440, 50 iterations in 3.3075 sec\n", " --> Time elapsed: 16.59 seconds\n", "===> Running optimization with exaggeration=1.00, lr=12156.33 for 500 iterations...\n", "Iteration 50, KL divergence 5.4392, 50 iterations in 3.0292 sec\n", "Iteration 100, KL divergence 4.9245, 50 iterations in 3.0565 sec\n", "Iteration 150, KL divergence 4.6700, 50 iterations in 3.6400 sec\n", "Iteration 200, KL divergence 4.5170, 50 iterations in 4.5099 sec\n", "Iteration 250, KL divergence 4.4161, 50 iterations in 5.1544 sec\n", "Iteration 300, KL divergence 4.3463, 50 iterations in 5.7396 sec\n", "Iteration 350, KL divergence 4.2947, 50 iterations in 7.1917 sec\n", "Iteration 400, KL divergence 4.2536, 50 iterations in 7.5439 sec\n", "Iteration 450, KL divergence 4.2217, 50 iterations in 8.4501 sec\n", "Iteration 500, KL divergence 4.1947, 50 iterations in 8.8987 sec\n", " --> Time elapsed: 57.21 seconds\n", "CPU times: user 22min 46s, sys: 12.8 s, total: 22min 59s\n", "Wall time: 1min 52s\n" ] } ], "source": [ "%%time \n", "tsne_embedding = tsne.fit(X)" ] }, { "cell_type": "code", "execution_count": 28, "id": "97915b89-2844-4ada-ae28-82377ccb21a0", "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame(data=tsne_embedding, columns=['x','y'])" ] }, { "cell_type": "code", "execution_count": 31, "id": "4fe427bd-6326-4442-ab32-b47990439c47", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 44.5 ms, sys: 16 µs, total: 44.5 ms\n", "Wall time: 40.2 ms\n" ] }, { "data": { "image/png": "\n", "text/html": [ "" ], "text/plain": [ "\n", "array([[4278190080, 4278190080, 4278190080, ..., 4278190080, 4278190080,\n", " 4278190080],\n", " [4278190080, 4278190080, 4278190080, ..., 4278190080, 4278190080,\n", " 4278190080],\n", " [4278190080, 4278190080, 4278190080, ..., 4278190080, 4278190080,\n", " 4278190080],\n", " ...,\n", " [4278190080, 4278190080, 4278190080, ..., 4278190080, 4278190080,\n", " 4278190080],\n", " [4278190080, 4278190080, 4278190080, ..., 4278190080, 4278190080,\n", " 4278190080],\n", " [4278190080, 4278190080, 4278190080, ..., 4278190080, 4278190080,\n", " 4278190080]], dtype=uint32)\n", "Coordinates:\n", " * x (x) float64 -71.24 -71.0 -70.75 -70.5 ... 75.77 76.01 76.26 76.51\n", " * y (y) float64 -65.28 -65.06 -64.83 -64.6 ... 69.14 69.36 69.59 69.81" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "\n", "agg = ds.Canvas(plot_height=600, plot_width=600).points(df, 'x', 'y')\n", "img = ds.tf.shade(agg, cmap=cc.fire, how='eq_hist')\n", "ds.tf.set_background(img, \"black\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.13" } }, "nbformat": 4, "nbformat_minor": 5 }