{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "730ba509", "metadata": {}, "outputs": [], "source": [ "from IPython.core.interactiveshell import InteractiveShell\n", "InteractiveShell.ast_node_interactivity = \"all\"" ] }, { "cell_type": "code", "execution_count": 2, "id": "d9acd4b6", "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", "import sys\n", "proj_dir = Path.cwd().parent\n", "\n", "sys.path.append(str(proj_dir))\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "62452860", "metadata": {}, "outputs": [], "source": [ "from datasets import load_dataset" ] }, { "cell_type": "code", "execution_count": 28, "id": "00affc9a", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a106bb47c1194b15bc289d2ef24258af", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading readme: 0%| | 0.00/804 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "Using custom data configuration derek-thomas--dataset-creator-askreddit-a3c1289ebaf83d16\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Downloading and preparing dataset None/None to /Users/derekthomas/.cache/huggingface/datasets/derek-thomas___parquet/derek-thomas--dataset-creator-askreddit-a3c1289ebaf83d16/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec...\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "705d55e70bf442f98a51dd0618a5c2c6", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading data files: 0%| | 0/1 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "139220a81674444997f7657a4c2e1a01", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading data: 0%| | 0.00/702k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1a361406937144cebd4ff6168e56ec3d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Extracting data files: 0%| | 0/1 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating all_days split: 0%| | 0/3272 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Dataset parquet downloaded and prepared to /Users/derekthomas/.cache/huggingface/datasets/derek-thomas___parquet/derek-thomas--dataset-creator-askreddit-a3c1289ebaf83d16/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec. Subsequent calls will reuse this data.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "4df7107473904386aebd66c543858abd", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "dataset = load_dataset('derek-thomas/dataset-creator-askreddit', download_mode=\"reuse_cache_if_exists\", ignore_verifications=True)" ] }, { "cell_type": "code", "execution_count": 29, "id": "ba84be68", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | score | \n", "num_comments | \n", "title | \n", "permalink | \n", "selftext | \n", "url | \n", "created_utc | \n", "author | \n", "id | \n", "downs | \n", "ups | \n", "date | \n", "time | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "2 | \n", "4 | \n", "Reddit, if someone had to describe you to a st... | \n", "/r/AskReddit/comments/15sn6y/reddit_if_someone... | \n", "They would be talking about you without your p... | \n", "http://www.reddit.com/r/AskReddit/comments/15s... | \n", "2013-01-01 23:59:40+00:00 | \n", "[deleted] | \n", "15sn6y | \n", "0 | \n", "2 | \n", "2013-01-01 | \n", "23:59:40 | \n", "
1 | \n", "5 | \n", "24 | \n", "What kind of car does the average \\nRedditor d... | \n", "/r/AskReddit/comments/15sn6m/what_kind_of_car_... | \n", "I've always wanted to know what kind of car th... | \n", "http://www.reddit.com/r/AskReddit/comments/15s... | \n", "2013-01-01 23:59:31+00:00 | \n", "PaytonAdams | \n", "15sn6m | \n", "0 | \n", "5 | \n", "2013-01-01 | \n", "23:59:31 | \n", "
2 | \n", "1 | \n", "5 | \n", "What movies have made you go back to the theat... | \n", "/r/AskReddit/comments/15sn6b/what_movies_have_... | \n", "\n", " | http://www.reddit.com/r/AskReddit/comments/15s... | \n", "2013-01-01 23:59:20+00:00 | \n", "[deleted] | \n", "15sn6b | \n", "0 | \n", "1 | \n", "2013-01-01 | \n", "23:59:20 | \n", "
3 | \n", "0 | \n", "18 | \n", "Worst fear(s)? | \n", "/r/AskReddit/comments/15sn4u/worst_fears/ | \n", "So what is your worst fear, reddit? | \n", "http://www.reddit.com/r/AskReddit/comments/15s... | \n", "2013-01-01 23:58:37+00:00 | \n", "[deleted] | \n", "15sn4u | \n", "0 | \n", "0 | \n", "2013-01-01 | \n", "23:58:37 | \n", "
4 | \n", "11 | \n", "29 | \n", "If there was a type of ink that lasted only fo... | \n", "/r/AskReddit/comments/15sn44/if_there_was_a_ty... | \n", "\n", " | http://www.reddit.com/r/AskReddit/comments/15s... | \n", "2013-01-01 23:58:15+00:00 | \n", "Honeybeard | \n", "15sn44 | \n", "0 | \n", "11 | \n", "2013-01-01 | \n", "23:58:15 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
3267 | \n", "0 | \n", "11 | \n", "Smokers of Reddit- What are your reasons for s... | \n", "/r/AskReddit/comments/15qzen/smokers_of_reddit... | \n", "I'm very curious as to what causes someone to ... | \n", "http://www.reddit.com/r/AskReddit/comments/15q... | \n", "2013-01-01 00:01:36+00:00 | \n", "kelsofb | \n", "15qzen | \n", "0 | \n", "0 | \n", "2013-01-01 | \n", "00:01:36 | \n", "
3268 | \n", "1 | \n", "4 | \n", "Hi | \n", "/r/AskReddit/comments/15qzei/hi/ | \n", "\n", " | http://www.reddit.com/r/AskReddit/comments/15q... | \n", "2013-01-01 00:01:34+00:00 | \n", "ImJE5US | \n", "15qzei | \n", "0 | \n", "1 | \n", "2013-01-01 | \n", "00:01:34 | \n", "
3269 | \n", "1 | \n", "2 | \n", "At the stroke of midnight I was writing this p... | \n", "/r/AskReddit/comments/15qzdx/at_the_stroke_of_... | \n", "\n", " | http://www.reddit.com/r/AskReddit/comments/15q... | \n", "2013-01-01 00:01:15+00:00 | \n", "Sangfroid_Sonder | \n", "15qzdx | \n", "0 | \n", "1 | \n", "2013-01-01 | \n", "00:01:15 | \n", "
3270 | \n", "1 | \n", "2 | \n", "With all the rape stories in the news, why don... | \n", "/r/AskReddit/comments/15qzdc/with_all_the_rape... | \n", "\n", " | http://www.reddit.com/r/AskReddit/comments/15q... | \n", "2013-01-01 00:00:58+00:00 | \n", "[deleted] | \n", "15qzdc | \n", "0 | \n", "1 | \n", "2013-01-01 | \n", "00:00:58 | \n", "
3271 | \n", "0 | \n", "3 | \n", "Do beautiful people have low entropy? | \n", "/r/AskReddit/comments/15qzd3/do_beautiful_peop... | \n", "I have been reading about entropy and arrows o... | \n", "http://www.reddit.com/r/AskReddit/comments/15q... | \n", "2013-01-01 00:00:53+00:00 | \n", "[deleted] | \n", "15qzd3 | \n", "0 | \n", "0 | \n", "2013-01-01 | \n", "00:00:53 | \n", "
3272 rows × 13 columns
\n", "