Commit
•
9de4dba
1
Parent(s):
fc00c85
Updates for datetime format and correcting most_recent_date
Browse files- main.py +15 -11
- notebooks/validate.ipynb +545 -0
- utilities/pushshift_data.py +3 -2
main.py
CHANGED
@@ -59,7 +59,7 @@ def main(date_to_fetch):
|
|
59 |
Runs the main data processing function to fetch and process subreddit data for the specified date.
|
60 |
|
61 |
Args:
|
62 |
-
date_to_fetch (
|
63 |
|
64 |
Returns:
|
65 |
most_recent_date (str): Most recent date in dataset
|
@@ -67,7 +67,7 @@ def main(date_to_fetch):
|
|
67 |
|
68 |
# Load the existing dataset from the Hugging Face hub or create a new one
|
69 |
try:
|
70 |
-
dataset = load_dataset(dataset_name)
|
71 |
logger.info("Loading existing dataset")
|
72 |
if "__index_level_0__" in dataset["all_days"].column_names:
|
73 |
dataset = dataset.remove_columns(["__index_level_0__"])
|
@@ -76,11 +76,11 @@ def main(date_to_fetch):
|
|
76 |
dataset = DatasetDict()
|
77 |
|
78 |
# Call get_subreddit_day with the calculated date
|
79 |
-
logger.info(f"Fetching data for {date_to_fetch}")
|
80 |
-
submissions = scrape_submissions_by_day(subreddit, date_to_fetch)
|
81 |
df = submissions_to_dataframe(submissions)
|
82 |
-
logger.info(f"Data fetched for {date_to_fetch}")
|
83 |
-
most_recent_date =
|
84 |
|
85 |
# Append DataFrame to split 'all_days' or create new split
|
86 |
if "all_days" in dataset:
|
@@ -93,10 +93,14 @@ def main(date_to_fetch):
|
|
93 |
|
94 |
# Drop duplicates just in case
|
95 |
new_data = new_data.drop_duplicates(subset=['id'], keep="first")
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
most_recent_date = max(
|
|
|
|
|
|
|
|
|
100 |
|
101 |
# Convert back to dataset
|
102 |
dataset["all_days"] = Dataset.from_pandas(new_data)
|
@@ -133,7 +137,7 @@ def run_main_continuously():
|
|
133 |
|
134 |
if start_date <= two_days_ago:
|
135 |
logger.info(f"Running main function for date: {start_date}")
|
136 |
-
most_recent_date = main(
|
137 |
start_date = most_recent_date + timedelta(days=1)
|
138 |
else:
|
139 |
tomorrow = today + timedelta(days=1)
|
|
|
59 |
Runs the main data processing function to fetch and process subreddit data for the specified date.
|
60 |
|
61 |
Args:
|
62 |
+
date_to_fetch (datetime.date): The date to fetch subreddit data for
|
63 |
|
64 |
Returns:
|
65 |
most_recent_date (str): Most recent date in dataset
|
|
|
67 |
|
68 |
# Load the existing dataset from the Hugging Face hub or create a new one
|
69 |
try:
|
70 |
+
dataset = load_dataset(dataset_name, download_mode="reuse_cache_if_exists", ignore_verifications=True)
|
71 |
logger.info("Loading existing dataset")
|
72 |
if "__index_level_0__" in dataset["all_days"].column_names:
|
73 |
dataset = dataset.remove_columns(["__index_level_0__"])
|
|
|
76 |
dataset = DatasetDict()
|
77 |
|
78 |
# Call get_subreddit_day with the calculated date
|
79 |
+
logger.info(f"Fetching data for {str(date_to_fetch)}")
|
80 |
+
submissions = scrape_submissions_by_day(subreddit, str(date_to_fetch))
|
81 |
df = submissions_to_dataframe(submissions)
|
82 |
+
logger.info(f"Data fetched for {str(date_to_fetch)}")
|
83 |
+
most_recent_date = start_date
|
84 |
|
85 |
# Append DataFrame to split 'all_days' or create new split
|
86 |
if "all_days" in dataset:
|
|
|
93 |
|
94 |
# Drop duplicates just in case
|
95 |
new_data = new_data.drop_duplicates(subset=['id'], keep="first")
|
96 |
+
|
97 |
+
# Figure out dates when we restart
|
98 |
+
old_data_most_recent_date = old_data['date'].max()
|
99 |
+
most_recent_date = max(old_data_most_recent_date, most_recent_date)
|
100 |
+
|
101 |
+
if len(old_data) == len(new_data):
|
102 |
+
logger.warning("Data in hub is much more recent, using that next!")
|
103 |
+
return most_recent_date
|
104 |
|
105 |
# Convert back to dataset
|
106 |
dataset["all_days"] = Dataset.from_pandas(new_data)
|
|
|
137 |
|
138 |
if start_date <= two_days_ago:
|
139 |
logger.info(f"Running main function for date: {start_date}")
|
140 |
+
most_recent_date = main(start_date)
|
141 |
start_date = most_recent_date + timedelta(days=1)
|
142 |
else:
|
143 |
tomorrow = today + timedelta(days=1)
|
notebooks/validate.ipynb
ADDED
@@ -0,0 +1,545 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"id": "730ba509",
|
7 |
+
"metadata": {},
|
8 |
+
"outputs": [],
|
9 |
+
"source": [
|
10 |
+
"from IPython.core.interactiveshell import InteractiveShell\n",
|
11 |
+
"InteractiveShell.ast_node_interactivity = \"all\""
|
12 |
+
]
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"cell_type": "code",
|
16 |
+
"execution_count": 2,
|
17 |
+
"id": "d9acd4b6",
|
18 |
+
"metadata": {},
|
19 |
+
"outputs": [],
|
20 |
+
"source": [
|
21 |
+
"from pathlib import Path\n",
|
22 |
+
"import sys\n",
|
23 |
+
"proj_dir = Path.cwd().parent\n",
|
24 |
+
"\n",
|
25 |
+
"sys.path.append(str(proj_dir))\n"
|
26 |
+
]
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"cell_type": "code",
|
30 |
+
"execution_count": 4,
|
31 |
+
"id": "62452860",
|
32 |
+
"metadata": {},
|
33 |
+
"outputs": [],
|
34 |
+
"source": [
|
35 |
+
"from datasets import load_dataset"
|
36 |
+
]
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"cell_type": "code",
|
40 |
+
"execution_count": 10,
|
41 |
+
"id": "9264a232",
|
42 |
+
"metadata": {},
|
43 |
+
"outputs": [
|
44 |
+
{
|
45 |
+
"name": "stderr",
|
46 |
+
"output_type": "stream",
|
47 |
+
"text": [
|
48 |
+
"Using custom data configuration derek-thomas--dataset-creator-askreddit-806417599346c17a\n"
|
49 |
+
]
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"name": "stdout",
|
53 |
+
"output_type": "stream",
|
54 |
+
"text": [
|
55 |
+
"Downloading and preparing dataset None/None to /Users/derekthomas/.cache/huggingface/datasets/derek-thomas___parquet/derek-thomas--dataset-creator-askreddit-806417599346c17a/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec...\n"
|
56 |
+
]
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"data": {
|
60 |
+
"application/vnd.jupyter.widget-view+json": {
|
61 |
+
"model_id": "b65ec8c7f33a40eeac5d15e6a527f830",
|
62 |
+
"version_major": 2,
|
63 |
+
"version_minor": 0
|
64 |
+
},
|
65 |
+
"text/plain": [
|
66 |
+
"Downloading data files: 0%| | 0/1 [00:00<?, ?it/s]"
|
67 |
+
]
|
68 |
+
},
|
69 |
+
"metadata": {},
|
70 |
+
"output_type": "display_data"
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"data": {
|
74 |
+
"application/vnd.jupyter.widget-view+json": {
|
75 |
+
"model_id": "2d93949f1f0144779349c73c58a68ca9",
|
76 |
+
"version_major": 2,
|
77 |
+
"version_minor": 0
|
78 |
+
},
|
79 |
+
"text/plain": [
|
80 |
+
"Extracting data files: 0%| | 0/1 [00:00<?, ?it/s]"
|
81 |
+
]
|
82 |
+
},
|
83 |
+
"metadata": {},
|
84 |
+
"output_type": "display_data"
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"data": {
|
88 |
+
"application/vnd.jupyter.widget-view+json": {
|
89 |
+
"model_id": "",
|
90 |
+
"version_major": 2,
|
91 |
+
"version_minor": 0
|
92 |
+
},
|
93 |
+
"text/plain": [
|
94 |
+
"Generating all_days split: 0%| | 0/2468888 [00:00<?, ? examples/s]"
|
95 |
+
]
|
96 |
+
},
|
97 |
+
"metadata": {},
|
98 |
+
"output_type": "display_data"
|
99 |
+
},
|
100 |
+
{
|
101 |
+
"name": "stdout",
|
102 |
+
"output_type": "stream",
|
103 |
+
"text": [
|
104 |
+
"Dataset parquet downloaded and prepared to /Users/derekthomas/.cache/huggingface/datasets/derek-thomas___parquet/derek-thomas--dataset-creator-askreddit-806417599346c17a/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec. Subsequent calls will reuse this data.\n"
|
105 |
+
]
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"data": {
|
109 |
+
"application/vnd.jupyter.widget-view+json": {
|
110 |
+
"model_id": "0e62c7e8b3c74aa5af3b87ab17e6cb1f",
|
111 |
+
"version_major": 2,
|
112 |
+
"version_minor": 0
|
113 |
+
},
|
114 |
+
"text/plain": [
|
115 |
+
" 0%| | 0/1 [00:00<?, ?it/s]"
|
116 |
+
]
|
117 |
+
},
|
118 |
+
"metadata": {},
|
119 |
+
"output_type": "display_data"
|
120 |
+
}
|
121 |
+
],
|
122 |
+
"source": [
|
123 |
+
"dataset = load_dataset('derek-thomas/dataset-creator-askreddit', download_mode=\"reuse_cache_if_exists\", ignore_verifications=True)"
|
124 |
+
]
|
125 |
+
},
|
126 |
+
{
|
127 |
+
"cell_type": "code",
|
128 |
+
"execution_count": 12,
|
129 |
+
"id": "ba84be68",
|
130 |
+
"metadata": {},
|
131 |
+
"outputs": [
|
132 |
+
{
|
133 |
+
"data": {
|
134 |
+
"text/html": [
|
135 |
+
"<div>\n",
|
136 |
+
"<style scoped>\n",
|
137 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
138 |
+
" vertical-align: middle;\n",
|
139 |
+
" }\n",
|
140 |
+
"\n",
|
141 |
+
" .dataframe tbody tr th {\n",
|
142 |
+
" vertical-align: top;\n",
|
143 |
+
" }\n",
|
144 |
+
"\n",
|
145 |
+
" .dataframe thead th {\n",
|
146 |
+
" text-align: right;\n",
|
147 |
+
" }\n",
|
148 |
+
"</style>\n",
|
149 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
150 |
+
" <thead>\n",
|
151 |
+
" <tr style=\"text-align: right;\">\n",
|
152 |
+
" <th></th>\n",
|
153 |
+
" <th>score</th>\n",
|
154 |
+
" <th>num_comments</th>\n",
|
155 |
+
" <th>title</th>\n",
|
156 |
+
" <th>permalink</th>\n",
|
157 |
+
" <th>selftext</th>\n",
|
158 |
+
" <th>url</th>\n",
|
159 |
+
" <th>created_utc</th>\n",
|
160 |
+
" <th>author</th>\n",
|
161 |
+
" <th>id</th>\n",
|
162 |
+
" <th>downs</th>\n",
|
163 |
+
" <th>ups</th>\n",
|
164 |
+
" </tr>\n",
|
165 |
+
" </thead>\n",
|
166 |
+
" <tbody>\n",
|
167 |
+
" <tr>\n",
|
168 |
+
" <th>0</th>\n",
|
169 |
+
" <td>2</td>\n",
|
170 |
+
" <td>4</td>\n",
|
171 |
+
" <td>Reddit, if someone had to describe you to a st...</td>\n",
|
172 |
+
" <td>/r/AskReddit/comments/15sn6y/reddit_if_someone...</td>\n",
|
173 |
+
" <td>They would be talking about you without your p...</td>\n",
|
174 |
+
" <td>http://www.reddit.com/r/AskReddit/comments/15s...</td>\n",
|
175 |
+
" <td>2013-01-01 23:59:40</td>\n",
|
176 |
+
" <td>[deleted]</td>\n",
|
177 |
+
" <td>15sn6y</td>\n",
|
178 |
+
" <td>0</td>\n",
|
179 |
+
" <td>2</td>\n",
|
180 |
+
" </tr>\n",
|
181 |
+
" <tr>\n",
|
182 |
+
" <th>1</th>\n",
|
183 |
+
" <td>5</td>\n",
|
184 |
+
" <td>24</td>\n",
|
185 |
+
" <td>What kind of car does the average \\nRedditor d...</td>\n",
|
186 |
+
" <td>/r/AskReddit/comments/15sn6m/what_kind_of_car_...</td>\n",
|
187 |
+
" <td>I've always wanted to know what kind of car th...</td>\n",
|
188 |
+
" <td>http://www.reddit.com/r/AskReddit/comments/15s...</td>\n",
|
189 |
+
" <td>2013-01-01 23:59:31</td>\n",
|
190 |
+
" <td>PaytonAdams</td>\n",
|
191 |
+
" <td>15sn6m</td>\n",
|
192 |
+
" <td>0</td>\n",
|
193 |
+
" <td>5</td>\n",
|
194 |
+
" </tr>\n",
|
195 |
+
" <tr>\n",
|
196 |
+
" <th>2</th>\n",
|
197 |
+
" <td>1</td>\n",
|
198 |
+
" <td>5</td>\n",
|
199 |
+
" <td>What movies have made you go back to the theat...</td>\n",
|
200 |
+
" <td>/r/AskReddit/comments/15sn6b/what_movies_have_...</td>\n",
|
201 |
+
" <td></td>\n",
|
202 |
+
" <td>http://www.reddit.com/r/AskReddit/comments/15s...</td>\n",
|
203 |
+
" <td>2013-01-01 23:59:20</td>\n",
|
204 |
+
" <td>[deleted]</td>\n",
|
205 |
+
" <td>15sn6b</td>\n",
|
206 |
+
" <td>0</td>\n",
|
207 |
+
" <td>1</td>\n",
|
208 |
+
" </tr>\n",
|
209 |
+
" <tr>\n",
|
210 |
+
" <th>3</th>\n",
|
211 |
+
" <td>0</td>\n",
|
212 |
+
" <td>18</td>\n",
|
213 |
+
" <td>Worst fear(s)?</td>\n",
|
214 |
+
" <td>/r/AskReddit/comments/15sn4u/worst_fears/</td>\n",
|
215 |
+
" <td>So what is your worst fear, reddit?</td>\n",
|
216 |
+
" <td>http://www.reddit.com/r/AskReddit/comments/15s...</td>\n",
|
217 |
+
" <td>2013-01-01 23:58:37</td>\n",
|
218 |
+
" <td>[deleted]</td>\n",
|
219 |
+
" <td>15sn4u</td>\n",
|
220 |
+
" <td>0</td>\n",
|
221 |
+
" <td>0</td>\n",
|
222 |
+
" </tr>\n",
|
223 |
+
" <tr>\n",
|
224 |
+
" <th>4</th>\n",
|
225 |
+
" <td>11</td>\n",
|
226 |
+
" <td>29</td>\n",
|
227 |
+
" <td>If there was a type of ink that lasted only fo...</td>\n",
|
228 |
+
" <td>/r/AskReddit/comments/15sn44/if_there_was_a_ty...</td>\n",
|
229 |
+
" <td></td>\n",
|
230 |
+
" <td>http://www.reddit.com/r/AskReddit/comments/15s...</td>\n",
|
231 |
+
" <td>2013-01-01 23:58:15</td>\n",
|
232 |
+
" <td>Honeybeard</td>\n",
|
233 |
+
" <td>15sn44</td>\n",
|
234 |
+
" <td>0</td>\n",
|
235 |
+
" <td>11</td>\n",
|
236 |
+
" </tr>\n",
|
237 |
+
" <tr>\n",
|
238 |
+
" <th>...</th>\n",
|
239 |
+
" <td>...</td>\n",
|
240 |
+
" <td>...</td>\n",
|
241 |
+
" <td>...</td>\n",
|
242 |
+
" <td>...</td>\n",
|
243 |
+
" <td>...</td>\n",
|
244 |
+
" <td>...</td>\n",
|
245 |
+
" <td>...</td>\n",
|
246 |
+
" <td>...</td>\n",
|
247 |
+
" <td>...</td>\n",
|
248 |
+
" <td>...</td>\n",
|
249 |
+
" <td>...</td>\n",
|
250 |
+
" </tr>\n",
|
251 |
+
" <tr>\n",
|
252 |
+
" <th>3293628</th>\n",
|
253 |
+
" <td>1</td>\n",
|
254 |
+
" <td>1</td>\n",
|
255 |
+
" <td>Help me get an idea of cost of living</td>\n",
|
256 |
+
" <td>/r/AskReddit/comments/2cjj63/help_me_get_an_id...</td>\n",
|
257 |
+
" <td></td>\n",
|
258 |
+
" <td>http://www.reddit.com/r/AskReddit/comments/2cj...</td>\n",
|
259 |
+
" <td>2014-08-04 00:01:20</td>\n",
|
260 |
+
" <td>bbent4698</td>\n",
|
261 |
+
" <td>2cjj63</td>\n",
|
262 |
+
" <td>0</td>\n",
|
263 |
+
" <td>1</td>\n",
|
264 |
+
" </tr>\n",
|
265 |
+
" <tr>\n",
|
266 |
+
" <th>3293629</th>\n",
|
267 |
+
" <td>2</td>\n",
|
268 |
+
" <td>0</td>\n",
|
269 |
+
" <td>If you used a prism to separate light and then...</td>\n",
|
270 |
+
" <td>/r/AskReddit/comments/2cjj5v/if_you_used_a_pri...</td>\n",
|
271 |
+
" <td></td>\n",
|
272 |
+
" <td>http://www.reddit.com/r/AskReddit/comments/2cj...</td>\n",
|
273 |
+
" <td>2014-08-04 00:01:19</td>\n",
|
274 |
+
" <td>Ajmb_88</td>\n",
|
275 |
+
" <td>2cjj5v</td>\n",
|
276 |
+
" <td>0</td>\n",
|
277 |
+
" <td>2</td>\n",
|
278 |
+
" </tr>\n",
|
279 |
+
" <tr>\n",
|
280 |
+
" <th>3293630</th>\n",
|
281 |
+
" <td>0</td>\n",
|
282 |
+
" <td>11</td>\n",
|
283 |
+
" <td>Reddit, what was it like the first time you go...</td>\n",
|
284 |
+
" <td>/r/AskReddit/comments/2cjj4s/reddit_what_was_i...</td>\n",
|
285 |
+
" <td></td>\n",
|
286 |
+
" <td>http://www.reddit.com/r/AskReddit/comments/2cj...</td>\n",
|
287 |
+
" <td>2014-08-04 00:01:01</td>\n",
|
288 |
+
" <td>da-gonzo</td>\n",
|
289 |
+
" <td>2cjj4s</td>\n",
|
290 |
+
" <td>0</td>\n",
|
291 |
+
" <td>0</td>\n",
|
292 |
+
" </tr>\n",
|
293 |
+
" <tr>\n",
|
294 |
+
" <th>3293631</th>\n",
|
295 |
+
" <td>1452</td>\n",
|
296 |
+
" <td>3140</td>\n",
|
297 |
+
" <td>People who refuse to be organ donors, why do y...</td>\n",
|
298 |
+
" <td>/r/AskReddit/comments/2cjj31/people_who_refuse...</td>\n",
|
299 |
+
" <td>R.I.P my inbox</td>\n",
|
300 |
+
" <td>http://www.reddit.com/r/AskReddit/comments/2cj...</td>\n",
|
301 |
+
" <td>2014-08-04 00:00:36</td>\n",
|
302 |
+
" <td>JohnnySniperr</td>\n",
|
303 |
+
" <td>2cjj31</td>\n",
|
304 |
+
" <td>0</td>\n",
|
305 |
+
" <td>1452</td>\n",
|
306 |
+
" </tr>\n",
|
307 |
+
" <tr>\n",
|
308 |
+
" <th>3293632</th>\n",
|
309 |
+
" <td>2</td>\n",
|
310 |
+
" <td>9</td>\n",
|
311 |
+
" <td>What always happens when you travel abroad?</td>\n",
|
312 |
+
" <td>/r/AskReddit/comments/2cjj2a/what_always_happe...</td>\n",
|
313 |
+
" <td></td>\n",
|
314 |
+
" <td>http://www.reddit.com/r/AskReddit/comments/2cj...</td>\n",
|
315 |
+
" <td>2014-08-04 00:00:23</td>\n",
|
316 |
+
" <td>Nicopip</td>\n",
|
317 |
+
" <td>2cjj2a</td>\n",
|
318 |
+
" <td>0</td>\n",
|
319 |
+
" <td>2</td>\n",
|
320 |
+
" </tr>\n",
|
321 |
+
" </tbody>\n",
|
322 |
+
"</table>\n",
|
323 |
+
"<p>3293633 rows × 11 columns</p>\n",
|
324 |
+
"</div>"
|
325 |
+
],
|
326 |
+
"text/plain": [
|
327 |
+
" score num_comments \\\n",
|
328 |
+
"0 2 4 \n",
|
329 |
+
"1 5 24 \n",
|
330 |
+
"2 1 5 \n",
|
331 |
+
"3 0 18 \n",
|
332 |
+
"4 11 29 \n",
|
333 |
+
"... ... ... \n",
|
334 |
+
"3293628 1 1 \n",
|
335 |
+
"3293629 2 0 \n",
|
336 |
+
"3293630 0 11 \n",
|
337 |
+
"3293631 1452 3140 \n",
|
338 |
+
"3293632 2 9 \n",
|
339 |
+
"\n",
|
340 |
+
" title \\\n",
|
341 |
+
"0 Reddit, if someone had to describe you to a st... \n",
|
342 |
+
"1 What kind of car does the average \\nRedditor d... \n",
|
343 |
+
"2 What movies have made you go back to the theat... \n",
|
344 |
+
"3 Worst fear(s)? \n",
|
345 |
+
"4 If there was a type of ink that lasted only fo... \n",
|
346 |
+
"... ... \n",
|
347 |
+
"3293628 Help me get an idea of cost of living \n",
|
348 |
+
"3293629 If you used a prism to separate light and then... \n",
|
349 |
+
"3293630 Reddit, what was it like the first time you go... \n",
|
350 |
+
"3293631 People who refuse to be organ donors, why do y... \n",
|
351 |
+
"3293632 What always happens when you travel abroad? \n",
|
352 |
+
"\n",
|
353 |
+
" permalink \\\n",
|
354 |
+
"0 /r/AskReddit/comments/15sn6y/reddit_if_someone... \n",
|
355 |
+
"1 /r/AskReddit/comments/15sn6m/what_kind_of_car_... \n",
|
356 |
+
"2 /r/AskReddit/comments/15sn6b/what_movies_have_... \n",
|
357 |
+
"3 /r/AskReddit/comments/15sn4u/worst_fears/ \n",
|
358 |
+
"4 /r/AskReddit/comments/15sn44/if_there_was_a_ty... \n",
|
359 |
+
"... ... \n",
|
360 |
+
"3293628 /r/AskReddit/comments/2cjj63/help_me_get_an_id... \n",
|
361 |
+
"3293629 /r/AskReddit/comments/2cjj5v/if_you_used_a_pri... \n",
|
362 |
+
"3293630 /r/AskReddit/comments/2cjj4s/reddit_what_was_i... \n",
|
363 |
+
"3293631 /r/AskReddit/comments/2cjj31/people_who_refuse... \n",
|
364 |
+
"3293632 /r/AskReddit/comments/2cjj2a/what_always_happe... \n",
|
365 |
+
"\n",
|
366 |
+
" selftext \\\n",
|
367 |
+
"0 They would be talking about you without your p... \n",
|
368 |
+
"1 I've always wanted to know what kind of car th... \n",
|
369 |
+
"2 \n",
|
370 |
+
"3 So what is your worst fear, reddit? \n",
|
371 |
+
"4 \n",
|
372 |
+
"... ... \n",
|
373 |
+
"3293628 \n",
|
374 |
+
"3293629 \n",
|
375 |
+
"3293630 \n",
|
376 |
+
"3293631 R.I.P my inbox \n",
|
377 |
+
"3293632 \n",
|
378 |
+
"\n",
|
379 |
+
" url \\\n",
|
380 |
+
"0 http://www.reddit.com/r/AskReddit/comments/15s... \n",
|
381 |
+
"1 http://www.reddit.com/r/AskReddit/comments/15s... \n",
|
382 |
+
"2 http://www.reddit.com/r/AskReddit/comments/15s... \n",
|
383 |
+
"3 http://www.reddit.com/r/AskReddit/comments/15s... \n",
|
384 |
+
"4 http://www.reddit.com/r/AskReddit/comments/15s... \n",
|
385 |
+
"... ... \n",
|
386 |
+
"3293628 http://www.reddit.com/r/AskReddit/comments/2cj... \n",
|
387 |
+
"3293629 http://www.reddit.com/r/AskReddit/comments/2cj... \n",
|
388 |
+
"3293630 http://www.reddit.com/r/AskReddit/comments/2cj... \n",
|
389 |
+
"3293631 http://www.reddit.com/r/AskReddit/comments/2cj... \n",
|
390 |
+
"3293632 http://www.reddit.com/r/AskReddit/comments/2cj... \n",
|
391 |
+
"\n",
|
392 |
+
" created_utc author id downs ups \n",
|
393 |
+
"0 2013-01-01 23:59:40 [deleted] 15sn6y 0 2 \n",
|
394 |
+
"1 2013-01-01 23:59:31 PaytonAdams 15sn6m 0 5 \n",
|
395 |
+
"2 2013-01-01 23:59:20 [deleted] 15sn6b 0 1 \n",
|
396 |
+
"3 2013-01-01 23:58:37 [deleted] 15sn4u 0 0 \n",
|
397 |
+
"4 2013-01-01 23:58:15 Honeybeard 15sn44 0 11 \n",
|
398 |
+
"... ... ... ... ... ... \n",
|
399 |
+
"3293628 2014-08-04 00:01:20 bbent4698 2cjj63 0 1 \n",
|
400 |
+
"3293629 2014-08-04 00:01:19 Ajmb_88 2cjj5v 0 2 \n",
|
401 |
+
"3293630 2014-08-04 00:01:01 da-gonzo 2cjj4s 0 0 \n",
|
402 |
+
"3293631 2014-08-04 00:00:36 JohnnySniperr 2cjj31 0 1452 \n",
|
403 |
+
"3293632 2014-08-04 00:00:23 Nicopip 2cjj2a 0 2 \n",
|
404 |
+
"\n",
|
405 |
+
"[3293633 rows x 11 columns]"
|
406 |
+
]
|
407 |
+
},
|
408 |
+
"execution_count": 12,
|
409 |
+
"metadata": {},
|
410 |
+
"output_type": "execute_result"
|
411 |
+
}
|
412 |
+
],
|
413 |
+
"source": [
|
414 |
+
"df = dataset['all_days'].to_pandas()\n",
|
415 |
+
"df"
|
416 |
+
]
|
417 |
+
},
|
418 |
+
{
|
419 |
+
"cell_type": "code",
|
420 |
+
"execution_count": 16,
|
421 |
+
"id": "b5bbfa15",
|
422 |
+
"metadata": {},
|
423 |
+
"outputs": [
|
424 |
+
{
|
425 |
+
"data": {
|
426 |
+
"text/plain": [
|
427 |
+
"score Int64\n",
|
428 |
+
"num_comments Int64\n",
|
429 |
+
"title string\n",
|
430 |
+
"permalink string\n",
|
431 |
+
"selftext string\n",
|
432 |
+
"url string\n",
|
433 |
+
"created_utc string\n",
|
434 |
+
"author string\n",
|
435 |
+
"id string\n",
|
436 |
+
"downs Int64\n",
|
437 |
+
"ups Int64\n",
|
438 |
+
"dtype: object"
|
439 |
+
]
|
440 |
+
},
|
441 |
+
"execution_count": 16,
|
442 |
+
"metadata": {},
|
443 |
+
"output_type": "execute_result"
|
444 |
+
}
|
445 |
+
],
|
446 |
+
"source": [
|
447 |
+
"df.convert_dtypes().dtypes"
|
448 |
+
]
|
449 |
+
},
|
450 |
+
{
|
451 |
+
"cell_type": "code",
|
452 |
+
"execution_count": 18,
|
453 |
+
"id": "c4292c7c",
|
454 |
+
"metadata": {},
|
455 |
+
"outputs": [],
|
456 |
+
"source": [
|
457 |
+
"import pandas as pd"
|
458 |
+
]
|
459 |
+
},
|
460 |
+
{
|
461 |
+
"cell_type": "code",
|
462 |
+
"execution_count": 21,
|
463 |
+
"id": "5a516c19",
|
464 |
+
"metadata": {},
|
465 |
+
"outputs": [],
|
466 |
+
"source": [
|
467 |
+
"df['created_utc'] = pd.to_datetime(df['created_utc'])\n",
|
468 |
+
"df['date'] = df['created_utc'].dt.date\n",
|
469 |
+
"df['time'] = df['created_utc'].dt.time"
|
470 |
+
]
|
471 |
+
},
|
472 |
+
{
|
473 |
+
"cell_type": "code",
|
474 |
+
"execution_count": 25,
|
475 |
+
"id": "22d87986",
|
476 |
+
"metadata": {},
|
477 |
+
"outputs": [
|
478 |
+
{
|
479 |
+
"data": {
|
480 |
+
"text/plain": [
|
481 |
+
"<Axes: >"
|
482 |
+
]
|
483 |
+
},
|
484 |
+
"execution_count": 25,
|
485 |
+
"metadata": {},
|
486 |
+
"output_type": "execute_result"
|
487 |
+
},
|
488 |
+
{
|
489 |
+
"data": {
|
490 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGdCAYAAAA7VYb2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABJX0lEQVR4nO3dfVhUdf4//ueAM4MoiKaAFCpp3isqppJluiqI/CzKj21iZa7K6hfdlNbMMgT5bJql5idxzau8aYPV3CvxBj/oiOtdjpkEmbJ6pXnTroLbjZCQwwjv3x995uzMMLdwhpkDz8d1zYXzPu95n+d5z5kzL8/cqYQQAkRERETUKH7eDkBERETUHLCoIiIiIpIBiyoiIiIiGbCoIiIiIpIBiyoiIiIiGbCoIiIiIpIBiyoiIiIiGbCoIiIiIpJBK28H8Ka6ujrcuHEDQUFBUKlU3o5DRERELhBC4Oeff0ZERAT8/Hzn/FCLLqpu3LiByMhIb8cgIiKiBvjuu+/wwAMPeDuGpEUXVUFBQQB+vVOCg4NlG9doNOLgwYOIi4uDWq2WbVxPU2puQNnZAeb3JiVnB5SbX6m5AWVnB5pH/ry8PMyaNUt6HvcVLbqoMr3kFxwcLHtRFRgYiODgYEXtsErNDSg7O8D83qTk7IBy8ys1N6Ds7EDzyQ/A59664zsvRBIREREpGIsqIiIiIhmwqCIiIiKSAYsqIiIiIhmwqCIiIiKSgVtF1YoVK/Dwww8jKCgIoaGhSEpKwsWLFy363L17F6mpqbjvvvvQtm1bTJ48GeXl5RZ9rl+/jsTERAQGBiI0NBSLFi3CvXv3LPocOXIEQ4YMgVarRY8ePbB169Z6ebKzs9GtWzcEBARg+PDhOH36tDubQ0RERCQbt4qqo0ePIjU1FadOnYJOp4PRaERcXByqqqqkPgsXLsTevXuxc+dOHD16FDdu3MDTTz8tLa+trUViYiJqampw8uRJbNu2DVu3bkV6errU58qVK0hMTMSYMWNQUlKCBQsWYNasWThw4IDUZ8eOHUhLS8OyZcvw5ZdfIjo6GvHx8bh161Zj5oOIiIioQdz6nqqCggKL61u3bkVoaCiKioowatQoVFRU4MMPP0Rubi5+85vfAAC2bNmCPn364NSpUxgxYgQOHjyI0tJSHDp0CGFhYRg0aBCysrKwePFiZGRkQKPRYOPGjYiKisLq1asBAH369MGJEyewdu1axMfHAwDWrFmD2bNnY8aMGQCAjRs3Ij8/H5s3b8arr77a6IkhIiIickejvvyzoqICANChQwcAQFFREYxGI8aNGyf16d27N7p06QK9Xo8RI0ZAr9djwIABCAsLk/rEx8dj7ty5OH/+PAYPHgy9Xm8xhqnPggULAAA1NTUoKirCkiVLpOV+fn4YN24c9Hq93bwGgwEGg0G6XllZCeDXLxIzGo0NnIX6TGPJOWZTUGpuQNnZAeb3JiVnB5SbX6m5AWVnB5pPfl/U4KKqrq4OCxYswMiRI9G/f38AQFlZGTQaDUJCQiz6hoWFoaysTOpjXlCZlpuWOepTWVmJX375BT/99BNqa2tt9rlw4YLdzCtWrEBmZma99oMHD0rfzionnU4n+5hNQam5AWVnB5jfm5ScHVBufqXmBpSdHVB+fl/U4KIqNTUV586dw4kTJ+TM41FLlixBWlqadL2yshKRkZGIi4uT/WdqdDodxo8fr6ifAFBqbkDZ2QHm9yYlZweUm1+puQFlZweaR/7du3d7O4ZNDSqq5s2bh3379uHYsWMWvw4dHh6Ompoa3L592+JsVXl5OcLDw6U+1p/SM3060LyP9ScGy8vLERwcjNatW8Pf3x/+/v42+5jGsEWr1UKr1dZrV6vVHtmxPDWupyk1N6Ds7ADze5OSswPKza/U3ICyswPKz++L3Pr0nxAC8+bNw65du3D48GFERUVZLI+JiYFarUZhYaHUdvHiRVy/fh2xsbEAgNjYWHz99dcWn9LT6XQIDg5G3759pT7mY5j6mMbQaDSIiYmx6FNXV4fCwkKpDxEREVFTcutMVWpqKnJzc7F7924EBQVJ74Fq164dWrdujXbt2mHmzJlIS0tDhw4dEBwcjPnz5yM2NhYjRowAAMTFxaFv3754/vnnsWrVKpSVlWHp0qVITU2VziLNmTMH69evxyuvvILf/e53OHz4MD755BPk5+dLWdLS0jB9+nQMHToUw4YNw7vvvouqqirp04BERERETcmtourPf/4zAGD06NEW7Vu2bMGLL74IAFi7di38/PwwefJkGAwGxMfHY8OGDVJff39/7Nu3D3PnzkVsbCzatGmD6dOnY/ny5VKfqKgo5OfnY+HChVi3bh0eeOABfPDBB9LXKQDAb3/7W/z73/9Geno6ysrKMGjQIBQUFNR78zr5lm6v5uPqykRvxyAiIpKdW0WVEMJpn4CAAGRnZyM7O9tun65du2L//v0Oxxk9ejSKi4sd9pk3bx7mzZvnNBMRERGRp/G3/4iIiIhkwKKKiIiISAYsqoiIiIhkwKKKiIiISAYsqoiIiIhkwKKKfEa3V/OddyIiIvJRLKrIK1hAERFRc8OiioiIiEgGLKqIiIiIZMCiioiIiEgGLKqIiIiIZMCiioiIiEgGLKqIiIiIZMCiioiIiEgGLKqIiIiIZMCiinxat1fz+UWhRESkCCyqiIiIiGTAooqIiIhIBiyqiIiIiGTAosqH8L1D3sX3bxERUWOwqCIiIiKSAYsqIiIiIhmwqCIiIiKSAYsqIiIiIhmwqCJZ8A3eRETU0rGoIq9jQUZERM0BiypyCwsgIiIi21hUkeKwsCMiIl/EooqIiIhIBiyqiIiIiGTAooqIiIhIBiyqyCG+f4mIiMg1LKqIiIiIZMCiioiIiEgGbhdVx44dw6RJkxAREQGVSoW8vDyL5SqVyubl7bfflvp069at3vKVK1dajHP27Fk89thjCAgIQGRkJFatWlUvy86dO9G7d28EBARgwIAB2L9/v7ubQy0YX9okIiI5uV1UVVVVITo6GtnZ2TaX37x50+KyefNmqFQqTJ482aLf8uXLLfrNnz9fWlZZWYm4uDh07doVRUVFePvtt5GRkYFNmzZJfU6ePImpU6di5syZKC4uRlJSEpKSknDu3Dl3N4nI57EAJCLyfa3cvUFCQgISEhLsLg8PD7e4vnv3bowZMwYPPvigRXtQUFC9viY5OTmoqanB5s2bodFo0K9fP5SUlGDNmjVISUkBAKxbtw4TJkzAokWLAABZWVnQ6XRYv349Nm7c6O5mUTPU7dV8XF2Z6O0YHtGct42ISKncLqrcUV5ejvz8fGzbtq3espUrVyIrKwtdunRBcnIyFi5ciFatfo2j1+sxatQoaDQaqX98fDzeeust/PTTT2jfvj30ej3S0tIsxoyPj6/3cqQ5g8EAg8EgXa+srAQAGI1GGI3GxmyqBdNY9sbsn3EA5zLi67Vr/YXFbez18xRbua0zWV931m6vn3l/W23m/Z1lcjW7s8y21gU0zf3gbJ9x9X7wFmf5fZmSswPKza/U3ICyswPNJ78vUgkhRINvrFJh165dSEpKsrl81apVWLlyJW7cuIGAgACpfc2aNRgyZAg6dOiAkydPYsmSJZgxYwbWrFkDAIiLi0NUVBTef/996TalpaXo168fSktL0adPH2g0Gmzbtg1Tp06V+mzYsAGZmZkoLy+3mScjIwOZmZn12nNzcxEYGNiQKSAiIqImVl1djeTkZFRUVCA4ONjbcSQePVO1efNmTJs2zaKgAmBxhmngwIHQaDT4/e9/jxUrVkCr1Xosz5IlSyzWXVlZicjISMTFxcl6pxiNRuh0OowfPx5qtbrecntnPqzbvXGmyjq3q5lczWrqZ97fVpt5fwBOM7iS3VlmW+tyZ9saw919pqn3DWec5fdlSs4OKDe/UnMDys4ONI/8u3fv9nYMmzxWVB0/fhwXL17Ejh07nPYdPnw47t27h6tXr6JXr14IDw+vd7bJdN30Pix7fey9TwsAtFqtzaJNrVZ7ZMeyN66hVuVSu71+nmae29VMrmY19TPvb6vNvL8pkyvrcpTdWWZb67I3jqfe0+TqPuOtfcMZTz2WmoKSswPKza/U3ICyswPKz++LPPY9VR9++CFiYmIQHR3ttG9JSQn8/PwQGhoKAIiNjcWxY8csXjfV6XTo1asX2rdvL/UpLCy0GEen0yE2NlbGrSAiIiJyjdtF1Z07d1BSUoKSkhIAwJUrV1BSUoLr169LfSorK7Fz507MmjWr3u31ej3effddfPXVV/j222+Rk5ODhQsX4rnnnpMKpuTkZGg0GsycORPnz5/Hjh07sG7dOouX7l566SUUFBRg9erVuHDhAjIyMnDmzBnMmzfP3U1SPH7cnoiIyPvcfvnvzJkzGDNmjHTdVOhMnz4dW7duBQBs374dQgiLN5GbaLVabN++HRkZGTAYDIiKisLChQstCqZ27drh4MGDSE1NRUxMDDp27Ij09HTp6xQA4JFHHkFubi6WLl2K1157DQ899BDy8vLQv39/dzeJiIiIqNHcLqpGjx4NZx8YTElJsSiAzA0ZMgSnTp1yup6BAwfi+PHjDvtMmTIFU6ZMcToWKQ+/h4mIiJSGv/1HHtHt1Xy+LElERC0KiyoiIiIiGbCoIiIiIpIBi6pmii+9ERERNS0WVUREREQyYFFF5EU8o0hE1HywqFIgPhE3DuePiIg8gUUVERERkQxYVBE1MX6HFxFR88SiiiT9Mw54OwIREZFisahqQjw74Zt4vxARkRxYVBERERHJgEUVERERkQxYVBERERHJgEUVERERkQxYVBE1Ab4Znoio+WNR1UI5epLvn3GARQAREZGbWFQRyYSFKBFRy8aiipo9FjtERNQUWFR5EZ/siYiImg8WVS0IizgiIiLPYVFFREREJAMWVUREREQyYFHlg/gyHRERkfKwqCIiIiKSAYsqanZ4po+IiLyBRRURERGRDFhUEXkQz5oREbUcLKqIiIiIZMCiioiIiEgGLKqIiIiIZMCiisgH9M844O0IRETUSCyqiBSOb4YnIvINbhdVx44dw6RJkxAREQGVSoW8vDyL5S+++CJUKpXFZcKECRZ9fvzxR0ybNg3BwcEICQnBzJkzcefOHYs+Z8+exWOPPYaAgABERkZi1apV9bLs3LkTvXv3RkBAAAYMGID9+/e7uzlEREREsnC7qKqqqkJ0dDSys7Pt9pkwYQJu3rwpXf76179aLJ82bRrOnz8PnU6Hffv24dixY0hJSZGWV1ZWIi4uDl27dkVRURHefvttZGRkYNOmTVKfkydPYurUqZg5cyaKi4uRlJSEpKQknDt3zt1NoiYgx9kUnpEhIiJf1srdGyQkJCAhIcFhH61Wi/DwcJvL/vGPf6CgoABffPEFhg4dCgB47733MHHiRLzzzjuIiIhATk4OampqsHnzZmg0GvTr1w8lJSVYs2aNVHytW7cOEyZMwKJFiwAAWVlZ0Ol0WL9+PTZu3OjuZhERERE1ikfeU3XkyBGEhoaiV69emDt3Ln744QdpmV6vR0hIiFRQAcC4cePg5+eHzz//XOozatQoaDQaqU98fDwuXryIn376Seozbtw4i/XGx8dDr9d7YpOIiIiIHHL7TJUzEyZMwNNPP42oqChcvnwZr732GhISEqDX6+Hv74+ysjKEhoZahmjVCh06dEBZWRkAoKysDFFRURZ9wsLCpGXt27dHWVmZ1GbexzSGLQaDAQaDQbpeWVkJADAajTAajQ3faCumsazH1PoLGI3Gen9dXW7dz951V9psLvcTFn8dZXQlk/kcONo2Z23WWWxlMJ/zhs6zdebGzK+9dVnPjbTcz/a6Hc2vo/uyqdnb55VAydkB5eZXam5A2dmB5pPfF6mEEKLBN1apsGvXLiQlJdnt8+2336J79+44dOgQxo4dizfffBPbtm3DxYsXLfqFhoYiMzMTc+fORVxcHKKiovD+++9Ly0tLS9GvXz+UlpaiT58+0Gg02LZtG6ZOnSr12bBhAzIzM1FeXm4zS0ZGBjIzM+u15+bmIjAw0M2tJyIiIm+orq5GcnIyKioqEBwc7O04/yEaAYDYtWuX034dO3YUGzduFEII8eGHH4qQkBCL5UajUfj7+4tPP/1UCCHE888/L5588kmLPocPHxYAxI8//iiEECIyMlKsXbvWok96eroYOHCg3Rx3794VFRUV0uW7774TAMT3338vampqZLtUVVWJvLw8UVVVZdHe87W9Nv+6uty6n73rrrTZWj5g6R6Rl5cnBizd4zSjK5lsrc/WuM7aXJkX8zlv6Dw7y9yYObce17rdNPfW+4wr97m9+6QpL/b2eSVclJxdyfmVmlvp2ZtL/tzcXAFAVFRUOK1BmpLsL/9Z++c//4kffvgBnTt3BgDExsbi9u3bKCoqQkxMDADg8OHDqKurw/Dhw6U+r7/+OoxGI9RqNQBAp9OhV69eaN++vdSnsLAQCxYskNal0+kQGxtrN4tWq4VWq63XrlarpfXIyXpcQ60KarW63l9Xl1v3s3fdlTaby+tU0l9nGe1l6vZqPq6uTIShViXNgbNtc9ZmGsfZvNjr5+o8O8vsaHudzbn1ttXL+n9zP/hPh3HxT/+f3fm1dd3RfDQ1Tz2WmoKSswPKza/U3ICyswPKz++L3H6j+p07d1BSUoKSkhIAwJUrV1BSUoLr16/jzp07WLRoEU6dOoWrV6+isLAQTz75JHr06IH4+HgAQJ8+fTBhwgTMnj0bp0+fxmeffYZ58+bh2WefRUREBAAgOTkZGo0GM2fOxPnz57Fjxw6sW7cOaWlpUo6XXnoJBQUFWL16NS5cuICMjAycOXMG8+bNk2FaiIiIiNzjdlF15swZDB48GIMHDwYApKWlYfDgwUhPT4e/vz/Onj2LJ554Aj179sTMmTMRExOD48ePW5whysnJQe/evTF27FhMnDgRjz76qMV3ULVr1w4HDx7ElStXEBMTg5dffhnp6ekW32X1yCOPIDc3F5s2bUJ0dDT+9re/IS8vD/3792/MfJCb+N1RREREv3L75b/Ro0dDOHhv+4EDzn/DrEOHDsjNzXXYZ+DAgTh+/LjDPlOmTMGUKVOcrs+bTEWH6SUiIiIiap74239EREREMmBR1Yx0ezWfL8cRERF5CYuqFo5FGBERkTxYVBERERHJgEUVkYv48ioRETnCooqIiIhIBiyqiIiIiGTAooqIiIhIBiyqiIiIiGTAooqIiIhIBiyqSNH4aTwiIvIVLKrIJSxeiIiIHGNRReQBLEKJiFoeFlVEREREMmBR1QLwrAkREZHnsagiIiIikgGLKiIiIiIZsKgiIiIikgGLKiIF4fvjiIh8F4sqBeATKRERke9jUUVEREQkAxZVRERERDJgUaVgfFmw+er2aj7vXyIihWFRRURERCQDFlVEREREMmBRRURERCQDFlVEREREMmBRRURERCQDFlVEREREMmBRRURERCQDFlVEREREMmBRRURERCQDFlVEREREMmBRRURERCQDt4uqY8eOYdKkSYiIiIBKpUJeXp60zGg0YvHixRgwYADatGmDiIgIvPDCC7hx44bFGN26dYNKpbK4rFy50qLP2bNn8dhjjyEgIACRkZFYtWpVvSw7d+5E7969ERAQgAEDBmD//v3ubg4RERGRLNwuqqqqqhAdHY3s7Ox6y6qrq/Hll1/ijTfewJdffolPP/0UFy9exBNPPFGv7/Lly3Hz5k3pMn/+fGlZZWUl4uLi0LVrVxQVFeHtt99GRkYGNm3aJPU5efIkpk6dipkzZ6K4uBhJSUlISkrCuXPn3N0kIiIiokZr5e4NEhISkJCQYHNZu3btoNPpLNrWr1+PYcOG4fr16+jSpYvUHhQUhPDwcJvj5OTkoKamBps3b4ZGo0G/fv1QUlKCNWvWICUlBQCwbt06TJgwAYsWLQIAZGVlQafTYf369di4caO7m0VERETUKG4XVe6qqKiASqVCSEiIRfvKlSuRlZWFLl26IDk5GQsXLkSrVr/G0ev1GDVqFDQajdQ/Pj4eb731Fn766Se0b98eer0eaWlpFmPGx8dbvBxpzWAwwGAwSNcrKysB/PqypdFobOSW/odpLKPRCK2/sPi3rb8m9pa70896Xbb6223zExZ/5criqJ8r+exlMe9nPeeeyuxKFle2t944ZnPf0PnzJvP5VxolZweUm1+puQFlZweaT35fpBJCiAbfWKXCrl27kJSUZHP53bt3MXLkSPTu3Rs5OTlS+5o1azBkyBB06NABJ0+exJIlSzBjxgysWbMGABAXF4eoqCi8//770m1KS0vRr18/lJaWok+fPtBoNNi2bRumTp0q9dmwYQMyMzNRXl5uM09GRgYyMzPrtefm5iIwMLAhU0BERERNrLq6GsnJyaioqEBwcLC34/yHaAQAYteuXTaX1dTUiEmTJonBgweLiooKh+N8+OGHolWrVuLu3btCCCHGjx8vUlJSLPqcP39eABClpaVCCCHUarXIzc216JOdnS1CQ0Ptrufu3buioqJCunz33XcCgPj+++9FTU2NbJeqqiqRl5cnqqqqRM/X9oqer+0VNTU1dv+aLnL0s3UbV9sGLN0j8vLyxICle2TN4mwcZ22u3NZ6zj2V2ZUsrmyvdbv53Dd0/rx5MZ9/b2dpSdmVnF+puZWevbnkz83NFQCc1hdNzSMv/xmNRjzzzDO4du0aDh8+7LSKHD58OO7du4erV6+iV69eCA8Pr3e2yXTd9D4se33svU8LALRaLbRabb12tVoNtVrt0ra5Q61Ww1Crsvi3rb8m9pa70896Xbb6222rU0l/5cziqJ8r+ezNn3k/6zn3VGZXszjb3nrjmM19Q+fPF3jqsdQUlJwdUG5+peYGlJ0dUH5+XyT791SZCqpvvvkGhw4dwn333ef0NiUlJfDz80NoaCgAIDY2FseOHbN43VSn06FXr15o37691KewsNBiHJ1Oh9jYWBm3hoiIiMg1bp+punPnDi5duiRdv3LlCkpKStChQwd07twZ//Vf/4Uvv/wS+/btQ21tLcrKygAAHTp0gEajgV6vx+eff44xY8YgKCgIer0eCxcuxHPPPScVTMnJycjMzMTMmTOxePFinDt3DuvWrcPatWul9b700kt4/PHHsXr1aiQmJmL79u04c+aMxdcuEBERETUVt4uqM2fOYMyYMdJ10yfwpk+fjoyMDOzZswcAMGjQIIvb/f3vf8fo0aOh1Wqxfft2ZGRkwGAwICoqCgsXLrT4JF+7du1w8OBBpKamIiYmBh07dkR6err0dQoA8MgjjyA3NxdLly7Fa6+9hoceegh5eXno37+/u5tERERE1GhuF1WjR4+GcPCBQUfLAGDIkCE4deqU0/UMHDgQx48fd9hnypQpmDJlitOxiIiIiDyNv/1HREREJAMWVUREREQyYFFFREREJAMWVUREREQyYFFFREREJAMWVUREREQyYFFFREREJAMWVUREROSWbq/mezuCT2JRRURERCQDFlVEREREMmBRRURERCQDFlVEREREMmBRRURERCQDFlVEREREMmBRRURERCQDFlVEREREMmBRRURERCQDFlVEREREMmBRRURERCQDFlVEREREMmBRRURERCQDFlVEREREMmBRRURERCQDFlVEREREMmBRRURERCQDFlVEREQ2dHs139sRSGFYVBEREZFNLCzdw6KKiIiISAYsqoiIiNzQP+OAtyOQj2JRRURERCQDFlVEREREMmBRRURERCQDFlVERF7UHD5d1Ry2gUgOLKqIiIiIZOB2UXXs2DFMmjQJERERUKlUyMvLs1guhEB6ejo6d+6M1q1bY9y4cfjmm28s+vz444+YNm0agoODERISgpkzZ+LOnTsWfc6ePYvHHnsMAQEBiIyMxKpVq+pl2blzJ3r37o2AgAAMGDAA+/fvd3dziIiIqJG6vZrPM5ZoQFFVVVWF6OhoZGdn21y+atUq/M///A82btyIzz//HG3atEF8fDzu3r0r9Zk2bRrOnz8PnU6Hffv24dixY0hJSZGWV1ZWIi4uDl27dkVRURHefvttZGRkYNOmTVKfkydPYurUqZg5cyaKi4uRlJSEpKQknDt3zt1NIiIiImq0Vu7eICEhAQkJCTaXCSHw7rvvYunSpXjyyScBAB999BHCwsKQl5eHZ599Fv/4xz9QUFCAL774AkOHDgUAvPfee5g4cSLeeecdREREICcnBzU1Ndi8eTM0Gg369euHkpISrFmzRiq+1q1bhwkTJmDRokUAgKysLOh0Oqxfvx4bN25s0GQQETV3prMJV1cmejkJUfPjdlHlyJUrV1BWVoZx48ZJbe3atcPw4cOh1+vx7LPPQq/XIyQkRCqoAGDcuHHw8/PD559/jqeeegp6vR6jRo2CRqOR+sTHx+Ott97CTz/9hPbt20Ov1yMtLc1i/fHx8fVejjRnMBhgMBik65WVlQAAo9EIo9HY2M2XmMYyGo3Q+guLf9v6a2JvuTv9rNdlq7/dNj9h8VeuLI76uZLPXhbzftZz7qnMrmRxZXvrjWM29w2dP28yn3+l8Xb2xt5/7ua33rfk0JBt8Pa8u8LedpkfI5XInbm3Owc2jquujtlYvjzvKiGEaPCNVSrs2rULSUlJAH59SW7kyJG4ceMGOnfuLPV75plnoFKpsGPHDrz55pvYtm0bLl68aDFWaGgoMjMzMXfuXMTFxSEqKgrvv/++tLy0tBT9+vVDaWkp+vTpA41Gg23btmHq1KlSnw0bNiAzMxPl5eU282ZkZCAzM7Nee25uLgIDAxs6DURERNSEqqurkZycjIqKCgQHB3s7zn+IRgAgdu3aJV3/7LPPBABx48YNi35TpkwRzzzzjBBCiD/96U+iZ8+e9cbq1KmT2LBhgxBCiPHjx4uUlBSL5efPnxcARGlpqRBCCLVaLXJzcy36ZGdni9DQULt57969KyoqKqTLd999JwCI77//XtTU1Mh2qaqqEnl5eaKqqkr0fG2v6PnaXlFTU2P3r+kiRz9bt3G1bcDSPSIvL08MWLpH1izOxnHW5sptrefcU5ldyeLK9lq3m899Q+fPmxfz+fd2FqVlb+z95yi/rbGt9y1vbUND570p93d76zI9XpW4v7s79/bmwNZxtanum6qqKpGbmysAiIqKCteLliYg68t/4eHhAIDy8nKLM1Xl5eUYNGiQ1OfWrVsWt7t37x5+/PFH6fbh4eH1zjaZrjvrY1pui1arhVarrdeuVquhVqtd2US3qNVqGGpVFv+29dfE3nJ3+lmvy1Z/u211KumvnFkc9XMln735M+9nPeeeyuxqFmfbW28cs7lv6Pz5Ak89lpqCs+zdXs33yPuQ5Lr/bOW3Nbb1viWHxmyDKber82u9Lk/dL7bWJbXX/WcOXdluT2ZsDFfy250DG8dV05i+ur1NQdbvqYqKikJ4eDgKCwultsrKSnz++eeIjY0FAMTGxuL27dsoKiqS+hw+fBh1dXUYPny41OfYsWMWr5vqdDr06tUL7du3l/qYr8fUx7QeUhZ+FJeaO9M+7q193dF6+fgjkofbRdWdO3dQUlKCkpISAL++Ob2kpATXr1+HSqXCggUL8N///d/Ys2cPvv76a7zwwguIiIiQ3nfVp08fTJgwAbNnz8bp06fx2WefYd68eXj22WcREREBAEhOToZGo8HMmTNx/vx57NixA+vWrbN4Y/pLL72EgoICrF69GhcuXEBGRgbOnDmDefPmNX5WiIioRfHE9yyxWG153C6qzpw5g8GDB2Pw4MEAgLS0NAwePBjp6ekAgFdeeQXz589HSkoKHn74Ydy5cwcFBQUICAiQxsjJyUHv3r0xduxYTJw4EY8++qjFd1C1a9cOBw8exJUrVxATE4OXX34Z6enpFt9l9cgjjyA3NxebNm1CdHQ0/va3vyEvLw/9+/dv8GQQETUHcj2ZsyjwDH5RZvPl9nuqRo8eDeHgA4MqlQrLly/H8uXL7fbp0KEDcnNzHa5n4MCBOH78uMM+U6ZMwZQpUxwHJiLyEH7nEwG++56phnB3W5rTtsuBv/1HREQ+iWdzSGlYVBERkUtY5CgL76+mx6KKFK/bq/non3HA2zGIyEW+8GTvCxms2crkiznJPhZVRERERDJgUUVE5IN4hqJpcJ5JTiyqSJF4ICRSBj5WlYH3kzxYVHkZd2RLvvht09TycH8gooZgUUUexScnIvfwMUPNRUvcl1lU+QB+u658+ClAIu/j8cyzOL++i0UVtUg8KJG3cR8kOXF/8g0sqoiIqNlisfErzkPTYFFFPosHAcdsvWzMOfM+3ge2cV6oJWBRRY3WlAfLlnZgbmnbS0S+i8cj51hUNWONfQC4e3tX+/OB6R7OF/kafriGyDYWVQrjiQNZcz44erKwVMK88cmPHOG+QSQvFlUtDJ9kiYi8g8fe5o9FFQFQ/hkZIiJH+B9KagosqoiImgF+8S2R97GoIvIwX/7fsS9nk1Nz287mtj1EzQWLKh/lywdNnkaXh9LmUGl5iYiaGosqchmfVKk54stmzROPV+QNLKo8iAfr5okHa9/VEu8bHmeIfAeLqibmqYN+S3wyaYnb3BQ4r9SUlFwUKjm7MzwONAyLKiIrjTmY8P1mTYtzTda8vU/wGNCysahq5vjgbj54X7ZsLeXJ2hd+7qolzDN5BosqohZCyU8UcmVX8hyQd3HfIVewqCIiImohWBx6FosqImqQ5nhwbuz76ZSuOWwDkTexqKIG84X3PhCR5/ExTOQaFlVEMuCTDnkK9y2yp6V8eEFJWFQpBB88RMrSEh+vLXGbicyxqCJF8PbB2tvrb2k430SkRCyqfFxDnlz4hES+xLQ/Nqezrc1lO5SEc05KIHtR1a1bN6hUqnqX1NRUAMDo0aPrLZszZ47FGNevX0diYiICAwMRGhqKRYsW4d69exZ9jhw5giFDhkCr1aJHjx7YunWr3JsiKx4QiIiImrdWcg/4xRdfoLa2Vrp+7tw5jB8/HlOmTJHaZs+ejeXLl0vXAwMDpX/X1tYiMTER4eHhOHnyJG7evIkXXngBarUab775JgDgypUrSExMxJw5c5CTk4PCwkLMmjULnTt3Rnx8vNybRKQ43V7Nx9WVid6OQUTUosheVHXq1Mni+sqVK9G9e3c8/vjjUltgYCDCw8Nt3v7gwYMoLS3FoUOHEBYWhkGDBiErKwuLFy9GRkYGNBoNNm7ciKioKKxevRoA0KdPH5w4cQJr165lUUVEREReIXtRZa6mpgYff/wx0tLSoFKppPacnBx8/PHHCA8Px6RJk/DGG29IZ6v0ej0GDBiAsLAwqX98fDzmzp2L8+fPY/DgwdDr9Rg3bpzFuuLj47FgwQKHeQwGAwwGg3S9srISAGA0GmE0Ghu7uRLTWFo/4bCP1r/+cnvt9vr1en0ftP6uZbIe17rNlNdRble4um3SPDnJ5Qrr7PbWZb2+xt4P7ozhcHvN8jtbt6373Hxs0+3NxzFvM+dqm6128+vWf23dzjyLM47W1Zh+NrfXr/4cmffrn3EA5zLiLdocza+9ddlb7k4/63WZ5zftO/a2V+7MpjEc9XOWxTy3034yzp+9fo72A0dzbt3P2fxaZ7G1j1lvj602dx7ftrbJ3f3T2f3haP7k5Ikx5aISQjTuGdSBTz75BMnJybh+/ToiIiIAAJs2bULXrl0RERGBs2fPYvHixRg2bBg+/fRTAEBKSgquXbuGAwcOSONUV1ejTZs22L9/PxISEtCzZ0/MmDEDS5Yskfrs378fiYmJqK6uRuvWrW3mycjIQGZmZr323Nxci5cgiYiIyHdVV1cjOTkZFRUVCA4O9nYciUfPVH344YdISEiQCirg16LJZMCAAejcuTPGjh2Ly5cvo3v37p6MgyVLliAtLU26XllZicjISMTFxcl6pxiNRuh0Orxxxg+GOpXNPucy4tE/44DNZZ5ga33WbVo/gayhdQ5zN3Rd9voBkGUerLNbZzCty3p99rK6c/+4Ooa97T2XEY+Y5QVS/qL0CW7PifnYpvWar9+8zZyrbbbaza+b9vnx48dDrVbbvJ15FnczNCSTvQzWc2Ga+/Hjx2Pwnw7XW27rNo7m11Z/Rxnc6We9LvP8pn2nqTKb35eOxrHXVvz6b6TjpGmft3dbuefPXj9782Kdxdbj1frx7ShrQ+bP3v5sa5+w7md9G9Pcmz9eXZlfZ/eHo/mTk9FoxO7du2UfVw4eK6quXbuGQ4cOSWeg7Bk+fDgA4NKlS+jevTvCw8Nx+vRpiz7l5eUAIL0PKzw8XGoz7xMcHGz3LBUAaLVaaLXaeu1qtbreE4EcDHUqGGptFycPvXEQQMMLF3ep1ep6WWy1AY5zN3Rd9voBaNS6rJmyW2cwv3+t222t39VtcGcMe9urVqulItZQVz+7qxlMY5tubz6OeZs58zbTm9tt9bPua+u6aT321mGexdG4rq6rIf2s/wKQ5t7ucidt9m5rL1tj+lmvyzy/ad9pqszm96XD+XXQZp7baT8Z589eP1f2A1tzbt3P2fw2ZP6A+o9Rdx7ftsY2f7y6Mr/O7g9H89dSeOx7qrZs2YLQ0FAkJjr+BFJJSQkAoHPnzgCA2NhYfP3117h165bUR6fTITg4GH379pX6FBYWWoyj0+kQGxsr4xaQkvErLOzj3BAReYZHiqq6ujps2bIF06dPR6tW/zkZdvnyZWRlZaGoqAhXr17Fnj178MILL2DUqFEYOHAgACAuLg59+/bF888/j6+++goHDhzA0qVLkZqaKp1lmjNnDr799lu88soruHDhAjZs2IBPPvkECxcu9MTmEBERETnlkaLq0KFDuH79On73u99ZtGs0Ghw6dAhxcXHo3bs3Xn75ZUyePBl79+6V+vj7+2Pfvn3w9/dHbGwsnnvuObzwwgsW32sVFRWF/Px86HQ6REdHY/Xq1fjggw/4dQo+gmdCWq6mfJ8gEZGv8ch7quLi4mDrQ4WRkZE4evSo09t37doV+/fvd9hn9OjRKC4ubnBGIrmwiHQfv5yUiJoj/vYfEfkEFqdEpHQsqqhF4RN3y8L7m4iakke/p4rIkZb4hNcSt5mIqKXgmSoiL2uuhVZz3S4iIntYVBERERHJgEUVtQjdXs23eebEF8+m+GImR5SWl4jIU1hUEXmIdbHhyeKDhQ0RkfexqCIim5RUqCkpKxE1XyyqWgg+6biH80VERO5iUUVEREQkAxZVRERERDJgUUVEPosvwxKRkrCoIiIiIpIBiyoicgvPHhER2caiioiIiEgGLKqISFHsfTs+EZG3sahqwfjEREREJB8WVUREREQyYFFFREREJAMWVUREREQyYFFFREREJAMWVUREREQyYFFFREREJAMWVUREREQyYFFFREREJAMWVUREREQyYFFFREREJAMWVUREREQyYFFFRETUTPA3Xb2LRRURERGRDFhUERERkct4Nsw+FlVEREREMmBRRUTURPg/fKLmjUUVUTPBJ2xyhvsIkWfJXlRlZGRApVJZXHr37i0tv3v3LlJTU3Hfffehbdu2mDx5MsrLyy3GuH79OhITExEYGIjQ0FAsWrQI9+7ds+hz5MgRDBkyBFqtFj169MDWrVvl3hQiIiKyg0V6fR45U9WvXz/cvHlTupw4cUJatnDhQuzduxc7d+7E0aNHcePGDTz99NPS8traWiQmJqKmpgYnT57Etm3bsHXrVqSnp0t9rly5gsTERIwZMwYlJSVYsGABZs2ahQMHDnhic4iIPIJPSkTNSyuPDNqqFcLDw+u1V1RU4MMPP0Rubi5+85vfAAC2bNmCPn364NSpUxgxYgQOHjyI0tJSHDp0CGFhYRg0aBCysrKwePFiZGRkQKPRYOPGjYiKisLq1asBAH369MGJEyewdu1axMfHe2KTiIiajBzFFgs2oqbnkaLqm2++QUREBAICAhAbG4sVK1agS5cuKCoqgtFoxLhx46S+vXv3RpcuXaDX6zFixAjo9XoMGDAAYWFhUp/4+HjMnTsX58+fx+DBg6HX6y3GMPVZsGCBw1wGgwEGg0G6XllZCQAwGo0wGo0ybDmk8QBA6ydkG7MpmPIqLTeg7OyAvPmNRiO0/kL6a6tN2kdt/NtWP0fLLcbwczyedRZb/ZxlcbRNDc3syhzZa3Mni/V9ZH7dUT+H82e271jPkfnY5m1yzJ/12A2ZP/Pc5v16vb4P5zLiXdonzPv0zzgArb/9bXOW2ZX9wNacu7t/NmT+Gp3ZxnJX7+ter+/7vz71913z9QNAr9f31bsP5OaJMeWiEkLI+iz0v//7v7hz5w569eqFmzdvIjMzE//6179w7tw57N27FzNmzLAobABg2LBhGDNmDN566y2kpKTg2rVrFi/lVVdXo02bNti/fz8SEhLQs2dPzJgxA0uWLJH67N+/H4mJiaiurkbr1q1tZsvIyEBmZma99tzcXAQGBso0A0RERORJ1dXVSE5ORkVFBYKDg70dRyL7maqEhATp3wMHDsTw4cPRtWtXfPLJJ3aLnaayZMkSpKWlSdcrKysRGRmJuLg4We8Uo9EInU6HN874wVCnkm1cT9P6CWQNrVNcbkDZ2QF585/LiEf/jAPSX1tt5zJ+fZnc1r9t9XO0vH/GARS//htpny9Kn2C3v3UWW/2cZXG0Te5kNvWLWV6ArKF1GD9+PAb/6bDb+dzJYn0fmV931M/R/JnyW8+99djmbfYyuDN/1mM728es26z3GVf2E3vz4uq+Yc1Wu6M2e3PuKIOz+9LV+bPXz9l82Lqtae7Hjx8PtVrt0n3d2DmTk9FoxO7du2UfVw4eefnPXEhICHr27IlLly5h/PjxqKmpwe3btxESEiL1KS8vl96DFR4ejtOnT1uMYfp0oHkf608MlpeXIzg42GHhptVqodVq67Wr1Wppx5KToU4FQ63ynuCVmhtQdnZAnvxqtRqGWpX011abaX+39W9b/RwttxijzvF41lls9XOWxdE2NTSzK3NkqFXhoTcO4urKxHrb9NAbBwE4z2J9H5lfd9TP4fz9X37rubce23q/ss5gvg2O5s/Uz3ps2/1gMS/W82ee29bYtu53W/Ntvi5796Wtx5WtdkdtpvVo/S3n3HrbbI3j7vy5+hi1t73Obmu6nb39HLDM2tA588Rzqy/z+PdU3blzB5cvX0bnzp0RExMDtVqNwsJCafnFixdx/fp1xMbGAgBiY2Px9ddf49atW1IfnU6H4OBg9O3bV+pjPoapj2kMIiIl6fZqvs+9sdzX8hApgexnqv74xz9i0qRJ6Nq1K27cuIFly5bB398fU6dORbt27TBz5kykpaWhQ4cOCA4Oxvz58xEbG4sRI0YAAOLi4tC3b188//zzWLVqFcrKyrB06VKkpqZKZ5nmzJmD9evX45VXXsHvfvc7HD58GJ988gny83kQIKKm0VyLjoZuV3OZj8Zsh5xz4I35bC73oTfJXlT985//xNSpU/HDDz+gU6dOePTRR3Hq1Cl06tQJALB27Vr4+flh8uTJMBgMiI+Px4YNG6Tb+/v7Y9++fZg7dy5iY2PRpk0bTJ8+HcuXL5f6REVFIT8/HwsXLsS6devwwAMP4IMPPuDXKRCRx/nSE0+3V/Oh9fd2Cvf40vwRyU32omr79u0OlwcEBCA7OxvZ2dl2+3Tt2hX79+93OM7o0aNRXFzcoIxERM7wyf9X3poHX5r/psjiaB3dXs3H1ZWJDRrDl+axJeBv/xG1ADywkqdw3yL6DxZVRM0Mn+TkxzklX8N90jexqCIiIq9igeA6X5wrX8zkLR7/nioiIiXxxd/d84UnLb63qmn4+vb6ej5v45kqohaqpR8cffG7oaz5ej5qHN6/zQ+LKiLyuKZ88uATFfkC7octE4sqohaoJR7wm9s2u7M9ntr25janRI3F91QRETUT/J6i5ov3ozKwqCIih3gwJyJ3tdTjBosqIpK4eyD01QOnr+aipmG6/7kf2NY/4wBWDfN2iuaJ76kiIkWwfoJ09QnT20+sSviUITU/3Oe8g2eqiEh2DT2gt5QnAnvb6cr2+/octbRPevpCce8L80C/4pkqIqqnMU/6RErR0vfn/hkHvB2h2eGZKiLyCrn+h+/LT4y+nK0pcR6opeCZKiIimSipeFBSViKlYFFFRLKS68maT/pEDcOX772HRRURETU5vp+HmiMWVUREREQy4BvVicguvoxA5Fnefix5e/3NDYsqohZOSQdVJWUl38H9hpoKX/4jaqb4REJE1LRYVBGRx7CwI6KWhEUVERERkQxYVBG1IJ46c+QLH49v7Lb5wjYQkbKxqCKiJsWXBImouWJRRUQtCos6IvIUFlVEREREMmBRRURERCQDFlVE5HV8SY6ImgMWVUREREQyYFFFREREJAMWVUREREQyYFFFREREJAPZi6oVK1bg4YcfRlBQEEJDQ5GUlISLFy9a9Bk9ejRUKpXFZc6cORZ9rl+/jsTERAQGBiI0NBSLFi3CvXv3LPocOXIEQ4YMgVarRY8ePbB161a5N4eIiIjIJbIXVUePHkVqaipOnToFnU4Ho9GIuLg4VFVVWfSbPXs2bt68KV1WrVolLautrUViYiJqampw8uRJbNu2DVu3bkV6errU58qVK0hMTMSYMWNQUlKCBQsWYNasWThwgD81QURERE2vldwDFhQUWFzfunUrQkNDUVRUhFGjRkntgYGBCA8PtznGwYMHUVpaikOHDiEsLAyDBg1CVlYWFi9ejIyMDGg0GmzcuBFRUVFYvXo1AKBPnz44ceIE1q5di/j4eLk3i4iIiMghj7+nqqKiAgDQoUMHi/acnBx07NgR/fv3x5IlS1BdXS0t0+v1GDBgAMLCwqS2+Ph4VFZW4vz581KfcePGWYwZHx8PvV7vqU0hIiIiskv2M1Xm6urqsGDBAowcORL9+/eX2pOTk9G1a1dERETg7NmzWLx4MS5evIhPP/0UAFBWVmZRUAGQrpeVlTnsU1lZiV9++QWtW7eul8dgMMBgMEjXKysrAQBGoxFGo1GGLYY0HgBo/YRsYzYFU16l5QaUnR1gfm9ScnZAufmVmhtQdnagafPL+dzqyTHl4tGiKjU1FefOncOJEycs2lNSUqR/DxgwAJ07d8bYsWNx+fJldO/e3WN5VqxYgczMzHrtBw8eRGBgoOzryxpaJ/uYTUGpuQFlZweY35uUnB1Qbn6l5gaUnR1omvz79+/3+Dp8iceKqnnz5mHfvn04duwYHnjgAYd9hw8fDgC4dOkSunfvjvDwcJw+fdqiT3l5OQBI78MKDw+X2sz7BAcH2zxLBQBLlixBWlqadL2yshKRkZGIi4tDcHCwexvogNFohE6nwxtn/GCoU8k2rqdp/QSyhtYpLjeg7OwA83uTkrMDys2v1NyAsrMDTZv/XIb873E2Go3YvXu37OPKQfaiSgiB+fPnY9euXThy5AiioqKc3qakpAQA0LlzZwBAbGws/vSnP+HWrVsIDQ0FAOh0OgQHB6Nv375SH+sKWKfTITY21u56tFottFptvXa1Wg21Wu3S9rnDUKeCoVZ5Dzil5gaUnR1gfm9ScnZAufmVmhtQdnagafJ74rnVl8n+RvXU1FR8/PHHyM3NRVBQEMrKylBWVoZffvkFAHD58mVkZWWhqKgIV69exZ49e/DCCy9g1KhRGDhwIAAgLi4Offv2xfPPP4+vvvoKBw4cwNKlS5GamioVRXPmzMG3336LV155BRcuXMCGDRvwySefYOHChXJvEhEREZFTshdVf/7zn1FRUYHRo0ejc+fO0mXHjh0AAI1Gg0OHDiEuLg69e/fGyy+/jMmTJ2Pv3r3SGP7+/ti3bx/8/f0RGxuL5557Di+88AKWL18u9YmKikJ+fj50Oh2io6OxevVqfPDBB/w6BSIiIvIKj7z850hkZCSOHj3qdJyuXbs6fYPb6NGjUVxc7FY+IiIiIk/gb/8RERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMFF9UZWdno1u3bggICMDw4cNx+vRpb0ciIiKiFkjRRdWOHTuQlpaGZcuW4csvv0R0dDTi4+Nx69Ytb0cjIiKiFkbRRdWaNWswe/ZszJgxA3379sXGjRsRGBiIzZs3ezsaERERtTCtvB2goWpqalBUVIQlS5ZIbX5+fhg3bhz0er3N2xgMBhgMBul6RUUFAODHH3+E0WiULZvRaER1dTVaGf1QW6eSbVxPa1UnUF1dp7jcgLKzA8zvTUrODig3v1JzA8rODjRt/h9++EH2MU3PsQAghJB9/EYRCvWvf/1LABAnT560aF+0aJEYNmyYzdssW7ZMAOCFF1544YUXXprB5bvvvmuKksNlij1T1RBLlixBWlqadL2urg4//vgj7rvvPqhU8lXrlZWViIyMxHfffYfg4GDZxvU0peYGlJ0dYH5vUnJ2QLn5lZobUHZ2oPnkLy0tRUREhLfjWFBsUdWxY0f4+/ujvLzcor28vBzh4eE2b6PVaqHVai3aQkJCPBURwcHBitxhlZobUHZ2gPm9ScnZAeXmV2puQNnZAeXnv//+++Hn51tvDfetNG7QaDSIiYlBYWGh1FZXV4fCwkLExsZ6MRkRERG1RIo9UwUAaWlpmD59OoYOHYphw4bh3XffRVVVFWbMmOHtaERERNTCKLqo+u1vf4t///vfSE9PR1lZGQYNGoSCggKEhYV5NZdWq8WyZcvqvdTo65SaG1B2doD5vUnJ2QHl5ldqbkDZ2QHm9ySVEL72eUQiIiIi5VHse6qIiIiIfAmLKiIiIiIZsKgiIiIikgGLKiIiIiIZtJiiasWKFXj44YcRFBSE0NBQJCUl4eLFixZ97t69i9TUVNx3331o27YtJk+eXO/LRf/whz8gJiYGWq0WgwYNqreeixcvYsyYMQgLC0NAQAAefPBBLF261KXfFszOzka3bt0QEBCA4cOH4/Tp0xa5g4OD0bFjR7Rt2xYqlQq3b9/22dwmK1asQGhoKPz8/KBSqaDVajF27FhcuHBB6uOr+U1z36pVK6hUKovLnDlzfDo7ALzyyiv1cpsuO3fu9Pn8K1aswMCBA6FWq+Hn5we1Wo2EhASLfN7Mf+zYMUyaNAkRERFQqVTIy8uzyP7www+jdevW0Gg00Gg0UKlUKCkpkTW7uUuXLiEoKMjlLzT25vFG7tzAf+Zco9HA398f/v7+aN++PZ588knpeOOr2U35g4OD7R5rfD1/Ux1vPJUfAC5fvoynnnoKnTp1QnBwMJ555pl6+Zzy9u/kNJX4+HixZcsWce7cOVFSUiImTpwounTpIu7cuSP1mTNnjoiMjBSFhYXizJkzYsSIEeKRRx6xGGf+/Pli/fr14vnnnxfR0dH11nP58mWxefNmUVJSIq5evSp2794tQkNDxZIlSxzm2759u9BoNGLz5s3i/PnzYvbs2SIkJESMGTNGyv3HP/5R9OrVS4SEhAgA4qeffvLZ3OXl5dK8T58+XWzdulXk5+eLkSNHitatW4v7779f3Lt3z6fzm/aZoUOHiqefflqMHTtW3H///eLy5cuioqLCp7MLIURcXJx49913xZEjR8ShQ4fE2LFjRbt27USbNm3Ezz//7PP5x40bJzp16iTGjh0rdu7cKR577DHRunVrMWTIEFFbW+v1/Pv37xevv/66+PTTTwUAsWvXLmmZad958803xdy5c8WAAQMEYPlbpXJkN6mpqRFDhw4VCQkJol27dg5zC+H9443cuc0fr8uWLRMffvihGDNmjAgPDxcJCQkiMjJS3Lt3z2ezC/HrPtOrVy8xefJk6fF6//33ixs3bkhj+HL+pjreeCr/nTt3xIMPPiieeuopcfbsWXH27Fnx5JNPiocfflg63riixRRV1m7duiUAiKNHjwohhLh9+7ZQq9Vi586dUp9//OMfAoDQ6/X1br9s2TKHd7i5hQsXikcffdRhn2HDhonU1FTpem1trYiIiBArVqywmdt0kFNKblv5L126pIj8jz/+uHjppZcUu8+YmPJPnDhREfkPHDgg/Pz8pALWfN/R6XRez2/OuqiydubMGQFAfPDBB0II+ef+lVdeEc8995zYsmWLS08wvnK88VRu8+ybN28WAERxcbHPZzcda8zze+p448m5N8/vqeON3PmtjzemzCqVSuh0Oqfjm7SYl/+sVVRUAAA6dOgAACgqKoLRaMS4ceOkPr1790aXLl2g1+sbvJ5Lly6hoKAAjz/+uN0+NTU1KCoqsli3n58fxo0bV2/dptwmSsltcvPmTQC//mZTZGSkYvLn5OSgZ8+eAICPPvoI1dXVislu8tlnnwEApk6dCsD39x2DwSC9ZAz8Z9/38/PDiRMnvJrfXT///DMAoF27dgDknfvDhw9j586dyM7Odqm/rxxvPJnbPPvhw4cRFRWF8vJyRWTPyclBx44d8cgjjwAAWrduDUBZcw949njjifzWxxsACAgIkI43rmqRRVVdXR0WLFiAkSNHon///gCAsrIyaDSaeq/NhoWFoayszO11PPLIIwgICMBDDz2Exx57DMuXL7fb9/vvv0dtbW29b4K3XrcptymzUnIDwIYNG9CmTRtER0ejdevWOHr0KDQajSLyJycn46OPPsLAgQPRs2dPFBQU4LnnnlNEdhPTvtO6dWs899xzAHx/3xkxYgTatGmDxYsX486dO5g/fz46d+6Muro63Lx506v53VFXV4esrCwAQI8ePQDIN/c//PADXnzxRWzdutXlH8b1heONJ3MDwPr169G7d28AwBdffAGdTocffvjB57MnJyfj448/RmFhIdq2bQuNRoMVK1YAUM7cA5493ngqv/nxprq6GlVVVfjjH/+I2tpa6WSAK1pkUZWamopz585h+/btHlvHjh078OWXXyI3Nxf5+fl45513AADHjx9H27ZtpUtOTo7LY5pyp6eneyq2R3IDwLRp0/Dkk08iPDwcI0eOxDPPPIO7d+8qIn9KSgr27NmDa9euobCwEB999BF27drl/hsYvZDd5Pe//z2uX7+Ol19+WdbM5uTO36lTJ+zcuRN79+5FUFAQCgoKMGLECAwZMsQjv0zvqflPTU2t96EYucyePRvJyckYNWqUzeW+erzxZG4AKC4uRnh4OHbu3ImePXvimWeeQU1NjRzRPZo9JSUF8fHx2LhxI3766SfpWHP58mVZsgOen3vAs8cbT+U3P960bdsW7dq1w+3bt90+3ij6t/8aYt68edi3bx+OHTuGBx54QGoPDw9HTU0Nbt++bVFJl5eXIzw83O31REZGAgD69u2L2tpapKSk4OWXX8bQoUMtPgEUFhYGrVYLf3//ek/S5us2z33t2jXF5DZ5/fXXcfz4cZw8eRL3338/2rdvj127dikiv/U+0759ewC/nlL29eym/J9++in8/f3xhz/8QWpXwtzHxcUhISEBu3btwv79+xEdHY3w8HA8+OCDXs3vKtO+89e//tXiSUCu7IcPH8aePXukAlAIgbq6OrRq1QqbNm3C1KlTffJ446ncpuwHDx7E8ePHERUVhSeeeALt27fHpUuXfD67Kb9p7kNDQwH8+pK0EubelN+TxxtP5o+Li8Ply5fx/fffo1WrVggJCZGON65qMWeqhBCYN28edu3aJb3Gbi4mJgZqtRqFhYVS28WLF3H9+nXExsY2at11dXUwGo2oq6tD69at0aNHD+kSFBQEjUaDmJgYi3XX1dWhsLAQI0aMUGRu07ptzbv49QMSMBgMPp3f3j5jesA++uijPpsdsJz77t2748knn0SnTp2k/r4899b5jxw5gujoaBw+fBi3bt3CE0884dX8zljvO6aCzUSu7Hq9HiUlJdJl+fLlCAoKQklJCZ566imfPd7IndvR49V0vImIiPDZ7Kac1vlNx5rOnTv79Nxb5/fk8cZT+c117NgRISEhFscbl7n8lnaFmzt3rmjXrp04cuSIuHnzpnSprq6W+syZM0d06dJFHD58WJw5c0bExsaK2NhYi3G++eYbUVxcLH7/+9+Lnj17iuLiYlFcXCwMBoMQQoiPP/5Y7NixQ5SWlorLly+LHTt2iIiICDFt2jSH+bZv3y60Wq3YunWrKC0tFSkpKSIkJERMnz5dyv3VV18JnU4n3nnnHQFAHDt2TBQXF4sXX3zR53KXlZUJIYSYNm2aCAgIEO+//7744osvxJ49e8T48eNF+/btpY+y+uK8l5WViblz54qgoCAxY8YMUVBQIE6fPi22bt0qunXrJkaNGuXT2YX4zz7/8ccfCwAiJydHEfu8ef7WrVuL7OxsodfrxXvvvSdCQkLE/PnzfSL/zz//LI0FQKxZs0YUFxeLa9euSXO/Z88eodPpxF/+8hcBQHz00UeiuLhY3Lx5U5bs1lz9JJS3jzdy5zZ/vM6aNUsUFBRIx5uJEyeKDh06iPLycp+cc9P+npycLB0rTceaLl26iJEjR0pj+HL+pjreeCq/EEJs3rxZ6PV6cenSJfGXv/xFdOjQQaSlpTkd21yLKarwfx8Ltr5s2bJF6vPLL7+I//f//p9o3769CAwMFE899ZS4efOmxTiPP/64zXGuXLkihPj1jhsyZIho27ataNOmjejbt6948803xS+//OI043vvvSe6dOkiNBqNGDZsmDh16pTd3OaXTZs2+VxuZ/P+5ptv+vS8O8qekJAgfezWV7M7yu/r+7yz/Js3b/aJ/H//+99tjjt9+nSnj9lly5bJkt2aq08w9ua+qY43cucWwv7+MmLECHHhwgUhhDz7iyeyO8q/YcMGqY8S88t9vPFUfiGEWLx4sQgLCxNqtVo89NBDYvXq1aKurs6lsU1UQggBIiIiImqUFvOeKiIiIiJPYlFFREREJAMWVUREREQyYFFFREREJAMWVUREREQyYFFFREREJAMWVUREREQyYFFFREREJAMWVUREREQyYFFFREREJAMWVUREREQyYFFFREREJIP/H6/lA+KJfyx/AAAAAElFTkSuQmCC\n",
|
491 |
+
"text/plain": [
|
492 |
+
"<Figure size 640x480 with 1 Axes>"
|
493 |
+
]
|
494 |
+
},
|
495 |
+
"metadata": {},
|
496 |
+
"output_type": "display_data"
|
497 |
+
}
|
498 |
+
],
|
499 |
+
"source": [
|
500 |
+
"df.date.hist(bins=400)"
|
501 |
+
]
|
502 |
+
},
|
503 |
+
{
|
504 |
+
"cell_type": "code",
|
505 |
+
"execution_count": 26,
|
506 |
+
"id": "19d6539b",
|
507 |
+
"metadata": {},
|
508 |
+
"outputs": [],
|
509 |
+
"source": [
|
510 |
+
"new_df = df.drop_duplicates(subset=['id'], keep=\"first\")"
|
511 |
+
]
|
512 |
+
},
|
513 |
+
{
|
514 |
+
"cell_type": "code",
|
515 |
+
"execution_count": null,
|
516 |
+
"id": "466cd2c7",
|
517 |
+
"metadata": {},
|
518 |
+
"outputs": [],
|
519 |
+
"source": [
|
520 |
+
"new_df.date.hist(bins-)"
|
521 |
+
]
|
522 |
+
}
|
523 |
+
],
|
524 |
+
"metadata": {
|
525 |
+
"kernelspec": {
|
526 |
+
"display_name": "Python 3 (ipykernel)",
|
527 |
+
"language": "python",
|
528 |
+
"name": "python3"
|
529 |
+
},
|
530 |
+
"language_info": {
|
531 |
+
"codemirror_mode": {
|
532 |
+
"name": "ipython",
|
533 |
+
"version": 3
|
534 |
+
},
|
535 |
+
"file_extension": ".py",
|
536 |
+
"mimetype": "text/x-python",
|
537 |
+
"name": "python",
|
538 |
+
"nbconvert_exporter": "python",
|
539 |
+
"pygments_lexer": "ipython3",
|
540 |
+
"version": "3.10.8"
|
541 |
+
}
|
542 |
+
},
|
543 |
+
"nbformat": 4,
|
544 |
+
"nbformat_minor": 5
|
545 |
+
}
|
utilities/pushshift_data.py
CHANGED
@@ -148,8 +148,9 @@ def submissions_to_dataframe(submissions: List[Dict[str, Any]]) -> pd.DataFrame:
|
|
148 |
df = df.convert_dtypes()
|
149 |
df = df[cols]
|
150 |
# Convert the "created_utc" column to a datetime column with timezone information
|
151 |
-
df['created_utc'] = pd.to_datetime(df['created_utc'], unit='s').dt.tz_localize('UTC')
|
152 |
-
|
|
|
153 |
return df
|
154 |
|
155 |
|
|
|
148 |
df = df.convert_dtypes()
|
149 |
df = df[cols]
|
150 |
# Convert the "created_utc" column to a datetime column with timezone information
|
151 |
+
df['created_utc'] = pd.to_datetime(df['created_utc'], unit='s').dt.tz_localize('UTC')
|
152 |
+
df['date'] = df['created_utc'].dt.date
|
153 |
+
df['time'] = df['created_utc'].dt.time
|
154 |
return df
|
155 |
|
156 |
|