derek-thomas HF staff commited on
Commit
9de4dba
1 Parent(s): fc00c85

Updates for datetime format and correcting most_recent_date

Browse files
Files changed (3) hide show
  1. main.py +15 -11
  2. notebooks/validate.ipynb +545 -0
  3. utilities/pushshift_data.py +3 -2
main.py CHANGED
@@ -59,7 +59,7 @@ def main(date_to_fetch):
59
  Runs the main data processing function to fetch and process subreddit data for the specified date.
60
 
61
  Args:
62
- date_to_fetch (str): The date to fetch subreddit data for, in the format "YYYY-MM-DD".
63
 
64
  Returns:
65
  most_recent_date (str): Most recent date in dataset
@@ -67,7 +67,7 @@ def main(date_to_fetch):
67
 
68
  # Load the existing dataset from the Hugging Face hub or create a new one
69
  try:
70
- dataset = load_dataset(dataset_name)
71
  logger.info("Loading existing dataset")
72
  if "__index_level_0__" in dataset["all_days"].column_names:
73
  dataset = dataset.remove_columns(["__index_level_0__"])
@@ -76,11 +76,11 @@ def main(date_to_fetch):
76
  dataset = DatasetDict()
77
 
78
  # Call get_subreddit_day with the calculated date
79
- logger.info(f"Fetching data for {date_to_fetch}")
80
- submissions = scrape_submissions_by_day(subreddit, date_to_fetch)
81
  df = submissions_to_dataframe(submissions)
82
- logger.info(f"Data fetched for {date_to_fetch}")
83
- most_recent_date = datetime.strptime(date_to_fetch, '%Y-%m-%d').date()
84
 
85
  # Append DataFrame to split 'all_days' or create new split
86
  if "all_days" in dataset:
@@ -93,10 +93,14 @@ def main(date_to_fetch):
93
 
94
  # Drop duplicates just in case
95
  new_data = new_data.drop_duplicates(subset=['id'], keep="first")
96
- new_data_most_recent_date_raw = new_data['created_utc'].max()
97
- new_data_most_recent_date_dt = datetime.strptime(new_data_most_recent_date_raw.split(' ')[0], '%Y-%m-%d').date()
98
- # Adding timedelta in case there is rounding error
99
- most_recent_date = max(new_data_most_recent_date_dt - timedelta(days=1), most_recent_date)
 
 
 
 
100
 
101
  # Convert back to dataset
102
  dataset["all_days"] = Dataset.from_pandas(new_data)
@@ -133,7 +137,7 @@ def run_main_continuously():
133
 
134
  if start_date <= two_days_ago:
135
  logger.info(f"Running main function for date: {start_date}")
136
- most_recent_date = main(str(start_date))
137
  start_date = most_recent_date + timedelta(days=1)
138
  else:
139
  tomorrow = today + timedelta(days=1)
 
59
  Runs the main data processing function to fetch and process subreddit data for the specified date.
60
 
61
  Args:
62
+ date_to_fetch (datetime.date): The date to fetch subreddit data for
63
 
64
  Returns:
65
  most_recent_date (str): Most recent date in dataset
 
67
 
68
  # Load the existing dataset from the Hugging Face hub or create a new one
69
  try:
70
+ dataset = load_dataset(dataset_name, download_mode="reuse_cache_if_exists", ignore_verifications=True)
71
  logger.info("Loading existing dataset")
72
  if "__index_level_0__" in dataset["all_days"].column_names:
73
  dataset = dataset.remove_columns(["__index_level_0__"])
 
76
  dataset = DatasetDict()
77
 
78
  # Call get_subreddit_day with the calculated date
79
+ logger.info(f"Fetching data for {str(date_to_fetch)}")
80
+ submissions = scrape_submissions_by_day(subreddit, str(date_to_fetch))
81
  df = submissions_to_dataframe(submissions)
82
+ logger.info(f"Data fetched for {str(date_to_fetch)}")
83
+ most_recent_date = start_date
84
 
85
  # Append DataFrame to split 'all_days' or create new split
86
  if "all_days" in dataset:
 
93
 
94
  # Drop duplicates just in case
95
  new_data = new_data.drop_duplicates(subset=['id'], keep="first")
96
+
97
+ # Figure out dates when we restart
98
+ old_data_most_recent_date = old_data['date'].max()
99
+ most_recent_date = max(old_data_most_recent_date, most_recent_date)
100
+
101
+ if len(old_data) == len(new_data):
102
+ logger.warning("Data in hub is much more recent, using that next!")
103
+ return most_recent_date
104
 
105
  # Convert back to dataset
106
  dataset["all_days"] = Dataset.from_pandas(new_data)
 
137
 
138
  if start_date <= two_days_ago:
139
  logger.info(f"Running main function for date: {start_date}")
140
+ most_recent_date = main(start_date)
141
  start_date = most_recent_date + timedelta(days=1)
142
  else:
143
  tomorrow = today + timedelta(days=1)
notebooks/validate.ipynb ADDED
@@ -0,0 +1,545 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "730ba509",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "from IPython.core.interactiveshell import InteractiveShell\n",
11
+ "InteractiveShell.ast_node_interactivity = \"all\""
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": 2,
17
+ "id": "d9acd4b6",
18
+ "metadata": {},
19
+ "outputs": [],
20
+ "source": [
21
+ "from pathlib import Path\n",
22
+ "import sys\n",
23
+ "proj_dir = Path.cwd().parent\n",
24
+ "\n",
25
+ "sys.path.append(str(proj_dir))\n"
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "execution_count": 4,
31
+ "id": "62452860",
32
+ "metadata": {},
33
+ "outputs": [],
34
+ "source": [
35
+ "from datasets import load_dataset"
36
+ ]
37
+ },
38
+ {
39
+ "cell_type": "code",
40
+ "execution_count": 10,
41
+ "id": "9264a232",
42
+ "metadata": {},
43
+ "outputs": [
44
+ {
45
+ "name": "stderr",
46
+ "output_type": "stream",
47
+ "text": [
48
+ "Using custom data configuration derek-thomas--dataset-creator-askreddit-806417599346c17a\n"
49
+ ]
50
+ },
51
+ {
52
+ "name": "stdout",
53
+ "output_type": "stream",
54
+ "text": [
55
+ "Downloading and preparing dataset None/None to /Users/derekthomas/.cache/huggingface/datasets/derek-thomas___parquet/derek-thomas--dataset-creator-askreddit-806417599346c17a/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec...\n"
56
+ ]
57
+ },
58
+ {
59
+ "data": {
60
+ "application/vnd.jupyter.widget-view+json": {
61
+ "model_id": "b65ec8c7f33a40eeac5d15e6a527f830",
62
+ "version_major": 2,
63
+ "version_minor": 0
64
+ },
65
+ "text/plain": [
66
+ "Downloading data files: 0%| | 0/1 [00:00<?, ?it/s]"
67
+ ]
68
+ },
69
+ "metadata": {},
70
+ "output_type": "display_data"
71
+ },
72
+ {
73
+ "data": {
74
+ "application/vnd.jupyter.widget-view+json": {
75
+ "model_id": "2d93949f1f0144779349c73c58a68ca9",
76
+ "version_major": 2,
77
+ "version_minor": 0
78
+ },
79
+ "text/plain": [
80
+ "Extracting data files: 0%| | 0/1 [00:00<?, ?it/s]"
81
+ ]
82
+ },
83
+ "metadata": {},
84
+ "output_type": "display_data"
85
+ },
86
+ {
87
+ "data": {
88
+ "application/vnd.jupyter.widget-view+json": {
89
+ "model_id": "",
90
+ "version_major": 2,
91
+ "version_minor": 0
92
+ },
93
+ "text/plain": [
94
+ "Generating all_days split: 0%| | 0/2468888 [00:00<?, ? examples/s]"
95
+ ]
96
+ },
97
+ "metadata": {},
98
+ "output_type": "display_data"
99
+ },
100
+ {
101
+ "name": "stdout",
102
+ "output_type": "stream",
103
+ "text": [
104
+ "Dataset parquet downloaded and prepared to /Users/derekthomas/.cache/huggingface/datasets/derek-thomas___parquet/derek-thomas--dataset-creator-askreddit-806417599346c17a/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec. Subsequent calls will reuse this data.\n"
105
+ ]
106
+ },
107
+ {
108
+ "data": {
109
+ "application/vnd.jupyter.widget-view+json": {
110
+ "model_id": "0e62c7e8b3c74aa5af3b87ab17e6cb1f",
111
+ "version_major": 2,
112
+ "version_minor": 0
113
+ },
114
+ "text/plain": [
115
+ " 0%| | 0/1 [00:00<?, ?it/s]"
116
+ ]
117
+ },
118
+ "metadata": {},
119
+ "output_type": "display_data"
120
+ }
121
+ ],
122
+ "source": [
123
+ "dataset = load_dataset('derek-thomas/dataset-creator-askreddit', download_mode=\"reuse_cache_if_exists\", ignore_verifications=True)"
124
+ ]
125
+ },
126
+ {
127
+ "cell_type": "code",
128
+ "execution_count": 12,
129
+ "id": "ba84be68",
130
+ "metadata": {},
131
+ "outputs": [
132
+ {
133
+ "data": {
134
+ "text/html": [
135
+ "<div>\n",
136
+ "<style scoped>\n",
137
+ " .dataframe tbody tr th:only-of-type {\n",
138
+ " vertical-align: middle;\n",
139
+ " }\n",
140
+ "\n",
141
+ " .dataframe tbody tr th {\n",
142
+ " vertical-align: top;\n",
143
+ " }\n",
144
+ "\n",
145
+ " .dataframe thead th {\n",
146
+ " text-align: right;\n",
147
+ " }\n",
148
+ "</style>\n",
149
+ "<table border=\"1\" class=\"dataframe\">\n",
150
+ " <thead>\n",
151
+ " <tr style=\"text-align: right;\">\n",
152
+ " <th></th>\n",
153
+ " <th>score</th>\n",
154
+ " <th>num_comments</th>\n",
155
+ " <th>title</th>\n",
156
+ " <th>permalink</th>\n",
157
+ " <th>selftext</th>\n",
158
+ " <th>url</th>\n",
159
+ " <th>created_utc</th>\n",
160
+ " <th>author</th>\n",
161
+ " <th>id</th>\n",
162
+ " <th>downs</th>\n",
163
+ " <th>ups</th>\n",
164
+ " </tr>\n",
165
+ " </thead>\n",
166
+ " <tbody>\n",
167
+ " <tr>\n",
168
+ " <th>0</th>\n",
169
+ " <td>2</td>\n",
170
+ " <td>4</td>\n",
171
+ " <td>Reddit, if someone had to describe you to a st...</td>\n",
172
+ " <td>/r/AskReddit/comments/15sn6y/reddit_if_someone...</td>\n",
173
+ " <td>They would be talking about you without your p...</td>\n",
174
+ " <td>http://www.reddit.com/r/AskReddit/comments/15s...</td>\n",
175
+ " <td>2013-01-01 23:59:40</td>\n",
176
+ " <td>[deleted]</td>\n",
177
+ " <td>15sn6y</td>\n",
178
+ " <td>0</td>\n",
179
+ " <td>2</td>\n",
180
+ " </tr>\n",
181
+ " <tr>\n",
182
+ " <th>1</th>\n",
183
+ " <td>5</td>\n",
184
+ " <td>24</td>\n",
185
+ " <td>What kind of car does the average \\nRedditor d...</td>\n",
186
+ " <td>/r/AskReddit/comments/15sn6m/what_kind_of_car_...</td>\n",
187
+ " <td>I've always wanted to know what kind of car th...</td>\n",
188
+ " <td>http://www.reddit.com/r/AskReddit/comments/15s...</td>\n",
189
+ " <td>2013-01-01 23:59:31</td>\n",
190
+ " <td>PaytonAdams</td>\n",
191
+ " <td>15sn6m</td>\n",
192
+ " <td>0</td>\n",
193
+ " <td>5</td>\n",
194
+ " </tr>\n",
195
+ " <tr>\n",
196
+ " <th>2</th>\n",
197
+ " <td>1</td>\n",
198
+ " <td>5</td>\n",
199
+ " <td>What movies have made you go back to the theat...</td>\n",
200
+ " <td>/r/AskReddit/comments/15sn6b/what_movies_have_...</td>\n",
201
+ " <td></td>\n",
202
+ " <td>http://www.reddit.com/r/AskReddit/comments/15s...</td>\n",
203
+ " <td>2013-01-01 23:59:20</td>\n",
204
+ " <td>[deleted]</td>\n",
205
+ " <td>15sn6b</td>\n",
206
+ " <td>0</td>\n",
207
+ " <td>1</td>\n",
208
+ " </tr>\n",
209
+ " <tr>\n",
210
+ " <th>3</th>\n",
211
+ " <td>0</td>\n",
212
+ " <td>18</td>\n",
213
+ " <td>Worst fear(s)?</td>\n",
214
+ " <td>/r/AskReddit/comments/15sn4u/worst_fears/</td>\n",
215
+ " <td>So what is your worst fear, reddit?</td>\n",
216
+ " <td>http://www.reddit.com/r/AskReddit/comments/15s...</td>\n",
217
+ " <td>2013-01-01 23:58:37</td>\n",
218
+ " <td>[deleted]</td>\n",
219
+ " <td>15sn4u</td>\n",
220
+ " <td>0</td>\n",
221
+ " <td>0</td>\n",
222
+ " </tr>\n",
223
+ " <tr>\n",
224
+ " <th>4</th>\n",
225
+ " <td>11</td>\n",
226
+ " <td>29</td>\n",
227
+ " <td>If there was a type of ink that lasted only fo...</td>\n",
228
+ " <td>/r/AskReddit/comments/15sn44/if_there_was_a_ty...</td>\n",
229
+ " <td></td>\n",
230
+ " <td>http://www.reddit.com/r/AskReddit/comments/15s...</td>\n",
231
+ " <td>2013-01-01 23:58:15</td>\n",
232
+ " <td>Honeybeard</td>\n",
233
+ " <td>15sn44</td>\n",
234
+ " <td>0</td>\n",
235
+ " <td>11</td>\n",
236
+ " </tr>\n",
237
+ " <tr>\n",
238
+ " <th>...</th>\n",
239
+ " <td>...</td>\n",
240
+ " <td>...</td>\n",
241
+ " <td>...</td>\n",
242
+ " <td>...</td>\n",
243
+ " <td>...</td>\n",
244
+ " <td>...</td>\n",
245
+ " <td>...</td>\n",
246
+ " <td>...</td>\n",
247
+ " <td>...</td>\n",
248
+ " <td>...</td>\n",
249
+ " <td>...</td>\n",
250
+ " </tr>\n",
251
+ " <tr>\n",
252
+ " <th>3293628</th>\n",
253
+ " <td>1</td>\n",
254
+ " <td>1</td>\n",
255
+ " <td>Help me get an idea of cost of living</td>\n",
256
+ " <td>/r/AskReddit/comments/2cjj63/help_me_get_an_id...</td>\n",
257
+ " <td></td>\n",
258
+ " <td>http://www.reddit.com/r/AskReddit/comments/2cj...</td>\n",
259
+ " <td>2014-08-04 00:01:20</td>\n",
260
+ " <td>bbent4698</td>\n",
261
+ " <td>2cjj63</td>\n",
262
+ " <td>0</td>\n",
263
+ " <td>1</td>\n",
264
+ " </tr>\n",
265
+ " <tr>\n",
266
+ " <th>3293629</th>\n",
267
+ " <td>2</td>\n",
268
+ " <td>0</td>\n",
269
+ " <td>If you used a prism to separate light and then...</td>\n",
270
+ " <td>/r/AskReddit/comments/2cjj5v/if_you_used_a_pri...</td>\n",
271
+ " <td></td>\n",
272
+ " <td>http://www.reddit.com/r/AskReddit/comments/2cj...</td>\n",
273
+ " <td>2014-08-04 00:01:19</td>\n",
274
+ " <td>Ajmb_88</td>\n",
275
+ " <td>2cjj5v</td>\n",
276
+ " <td>0</td>\n",
277
+ " <td>2</td>\n",
278
+ " </tr>\n",
279
+ " <tr>\n",
280
+ " <th>3293630</th>\n",
281
+ " <td>0</td>\n",
282
+ " <td>11</td>\n",
283
+ " <td>Reddit, what was it like the first time you go...</td>\n",
284
+ " <td>/r/AskReddit/comments/2cjj4s/reddit_what_was_i...</td>\n",
285
+ " <td></td>\n",
286
+ " <td>http://www.reddit.com/r/AskReddit/comments/2cj...</td>\n",
287
+ " <td>2014-08-04 00:01:01</td>\n",
288
+ " <td>da-gonzo</td>\n",
289
+ " <td>2cjj4s</td>\n",
290
+ " <td>0</td>\n",
291
+ " <td>0</td>\n",
292
+ " </tr>\n",
293
+ " <tr>\n",
294
+ " <th>3293631</th>\n",
295
+ " <td>1452</td>\n",
296
+ " <td>3140</td>\n",
297
+ " <td>People who refuse to be organ donors, why do y...</td>\n",
298
+ " <td>/r/AskReddit/comments/2cjj31/people_who_refuse...</td>\n",
299
+ " <td>R.I.P my inbox</td>\n",
300
+ " <td>http://www.reddit.com/r/AskReddit/comments/2cj...</td>\n",
301
+ " <td>2014-08-04 00:00:36</td>\n",
302
+ " <td>JohnnySniperr</td>\n",
303
+ " <td>2cjj31</td>\n",
304
+ " <td>0</td>\n",
305
+ " <td>1452</td>\n",
306
+ " </tr>\n",
307
+ " <tr>\n",
308
+ " <th>3293632</th>\n",
309
+ " <td>2</td>\n",
310
+ " <td>9</td>\n",
311
+ " <td>What always happens when you travel abroad?</td>\n",
312
+ " <td>/r/AskReddit/comments/2cjj2a/what_always_happe...</td>\n",
313
+ " <td></td>\n",
314
+ " <td>http://www.reddit.com/r/AskReddit/comments/2cj...</td>\n",
315
+ " <td>2014-08-04 00:00:23</td>\n",
316
+ " <td>Nicopip</td>\n",
317
+ " <td>2cjj2a</td>\n",
318
+ " <td>0</td>\n",
319
+ " <td>2</td>\n",
320
+ " </tr>\n",
321
+ " </tbody>\n",
322
+ "</table>\n",
323
+ "<p>3293633 rows × 11 columns</p>\n",
324
+ "</div>"
325
+ ],
326
+ "text/plain": [
327
+ " score num_comments \\\n",
328
+ "0 2 4 \n",
329
+ "1 5 24 \n",
330
+ "2 1 5 \n",
331
+ "3 0 18 \n",
332
+ "4 11 29 \n",
333
+ "... ... ... \n",
334
+ "3293628 1 1 \n",
335
+ "3293629 2 0 \n",
336
+ "3293630 0 11 \n",
337
+ "3293631 1452 3140 \n",
338
+ "3293632 2 9 \n",
339
+ "\n",
340
+ " title \\\n",
341
+ "0 Reddit, if someone had to describe you to a st... \n",
342
+ "1 What kind of car does the average \\nRedditor d... \n",
343
+ "2 What movies have made you go back to the theat... \n",
344
+ "3 Worst fear(s)? \n",
345
+ "4 If there was a type of ink that lasted only fo... \n",
346
+ "... ... \n",
347
+ "3293628 Help me get an idea of cost of living \n",
348
+ "3293629 If you used a prism to separate light and then... \n",
349
+ "3293630 Reddit, what was it like the first time you go... \n",
350
+ "3293631 People who refuse to be organ donors, why do y... \n",
351
+ "3293632 What always happens when you travel abroad? \n",
352
+ "\n",
353
+ " permalink \\\n",
354
+ "0 /r/AskReddit/comments/15sn6y/reddit_if_someone... \n",
355
+ "1 /r/AskReddit/comments/15sn6m/what_kind_of_car_... \n",
356
+ "2 /r/AskReddit/comments/15sn6b/what_movies_have_... \n",
357
+ "3 /r/AskReddit/comments/15sn4u/worst_fears/ \n",
358
+ "4 /r/AskReddit/comments/15sn44/if_there_was_a_ty... \n",
359
+ "... ... \n",
360
+ "3293628 /r/AskReddit/comments/2cjj63/help_me_get_an_id... \n",
361
+ "3293629 /r/AskReddit/comments/2cjj5v/if_you_used_a_pri... \n",
362
+ "3293630 /r/AskReddit/comments/2cjj4s/reddit_what_was_i... \n",
363
+ "3293631 /r/AskReddit/comments/2cjj31/people_who_refuse... \n",
364
+ "3293632 /r/AskReddit/comments/2cjj2a/what_always_happe... \n",
365
+ "\n",
366
+ " selftext \\\n",
367
+ "0 They would be talking about you without your p... \n",
368
+ "1 I've always wanted to know what kind of car th... \n",
369
+ "2 \n",
370
+ "3 So what is your worst fear, reddit? \n",
371
+ "4 \n",
372
+ "... ... \n",
373
+ "3293628 \n",
374
+ "3293629 \n",
375
+ "3293630 \n",
376
+ "3293631 R.I.P my inbox \n",
377
+ "3293632 \n",
378
+ "\n",
379
+ " url \\\n",
380
+ "0 http://www.reddit.com/r/AskReddit/comments/15s... \n",
381
+ "1 http://www.reddit.com/r/AskReddit/comments/15s... \n",
382
+ "2 http://www.reddit.com/r/AskReddit/comments/15s... \n",
383
+ "3 http://www.reddit.com/r/AskReddit/comments/15s... \n",
384
+ "4 http://www.reddit.com/r/AskReddit/comments/15s... \n",
385
+ "... ... \n",
386
+ "3293628 http://www.reddit.com/r/AskReddit/comments/2cj... \n",
387
+ "3293629 http://www.reddit.com/r/AskReddit/comments/2cj... \n",
388
+ "3293630 http://www.reddit.com/r/AskReddit/comments/2cj... \n",
389
+ "3293631 http://www.reddit.com/r/AskReddit/comments/2cj... \n",
390
+ "3293632 http://www.reddit.com/r/AskReddit/comments/2cj... \n",
391
+ "\n",
392
+ " created_utc author id downs ups \n",
393
+ "0 2013-01-01 23:59:40 [deleted] 15sn6y 0 2 \n",
394
+ "1 2013-01-01 23:59:31 PaytonAdams 15sn6m 0 5 \n",
395
+ "2 2013-01-01 23:59:20 [deleted] 15sn6b 0 1 \n",
396
+ "3 2013-01-01 23:58:37 [deleted] 15sn4u 0 0 \n",
397
+ "4 2013-01-01 23:58:15 Honeybeard 15sn44 0 11 \n",
398
+ "... ... ... ... ... ... \n",
399
+ "3293628 2014-08-04 00:01:20 bbent4698 2cjj63 0 1 \n",
400
+ "3293629 2014-08-04 00:01:19 Ajmb_88 2cjj5v 0 2 \n",
401
+ "3293630 2014-08-04 00:01:01 da-gonzo 2cjj4s 0 0 \n",
402
+ "3293631 2014-08-04 00:00:36 JohnnySniperr 2cjj31 0 1452 \n",
403
+ "3293632 2014-08-04 00:00:23 Nicopip 2cjj2a 0 2 \n",
404
+ "\n",
405
+ "[3293633 rows x 11 columns]"
406
+ ]
407
+ },
408
+ "execution_count": 12,
409
+ "metadata": {},
410
+ "output_type": "execute_result"
411
+ }
412
+ ],
413
+ "source": [
414
+ "df = dataset['all_days'].to_pandas()\n",
415
+ "df"
416
+ ]
417
+ },
418
+ {
419
+ "cell_type": "code",
420
+ "execution_count": 16,
421
+ "id": "b5bbfa15",
422
+ "metadata": {},
423
+ "outputs": [
424
+ {
425
+ "data": {
426
+ "text/plain": [
427
+ "score Int64\n",
428
+ "num_comments Int64\n",
429
+ "title string\n",
430
+ "permalink string\n",
431
+ "selftext string\n",
432
+ "url string\n",
433
+ "created_utc string\n",
434
+ "author string\n",
435
+ "id string\n",
436
+ "downs Int64\n",
437
+ "ups Int64\n",
438
+ "dtype: object"
439
+ ]
440
+ },
441
+ "execution_count": 16,
442
+ "metadata": {},
443
+ "output_type": "execute_result"
444
+ }
445
+ ],
446
+ "source": [
447
+ "df.convert_dtypes().dtypes"
448
+ ]
449
+ },
450
+ {
451
+ "cell_type": "code",
452
+ "execution_count": 18,
453
+ "id": "c4292c7c",
454
+ "metadata": {},
455
+ "outputs": [],
456
+ "source": [
457
+ "import pandas as pd"
458
+ ]
459
+ },
460
+ {
461
+ "cell_type": "code",
462
+ "execution_count": 21,
463
+ "id": "5a516c19",
464
+ "metadata": {},
465
+ "outputs": [],
466
+ "source": [
467
+ "df['created_utc'] = pd.to_datetime(df['created_utc'])\n",
468
+ "df['date'] = df['created_utc'].dt.date\n",
469
+ "df['time'] = df['created_utc'].dt.time"
470
+ ]
471
+ },
472
+ {
473
+ "cell_type": "code",
474
+ "execution_count": 25,
475
+ "id": "22d87986",
476
+ "metadata": {},
477
+ "outputs": [
478
+ {
479
+ "data": {
480
+ "text/plain": [
481
+ "<Axes: >"
482
+ ]
483
+ },
484
+ "execution_count": 25,
485
+ "metadata": {},
486
+ "output_type": "execute_result"
487
+ },
488
+ {
489
+ "data": {
490
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGdCAYAAAA7VYb2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABJX0lEQVR4nO3dfVhUdf4//ueAM4MoiKaAFCpp3isqppJluiqI/CzKj21iZa7K6hfdlNbMMgT5bJql5idxzau8aYPV3CvxBj/oiOtdjpkEmbJ6pXnTroLbjZCQwwjv3x995uzMMLdwhpkDz8d1zYXzPu95n+d5z5kzL8/cqYQQAkRERETUKH7eDkBERETUHLCoIiIiIpIBiyoiIiIiGbCoIiIiIpIBiyoiIiIiGbCoIiIiIpIBiyoiIiIiGbCoIiIiIpJBK28H8Ka6ujrcuHEDQUFBUKlU3o5DRERELhBC4Oeff0ZERAT8/Hzn/FCLLqpu3LiByMhIb8cgIiKiBvjuu+/wwAMPeDuGpEUXVUFBQQB+vVOCg4NlG9doNOLgwYOIi4uDWq2WbVxPU2puQNnZAeb3JiVnB5SbX6m5AWVnB5pH/ry8PMyaNUt6HvcVLbqoMr3kFxwcLHtRFRgYiODgYEXtsErNDSg7O8D83qTk7IBy8ys1N6Ds7EDzyQ/A59664zsvRBIREREpGIsqIiIiIhmwqCIiIiKSAYsqIiIiIhmwqCIiIiKSgVtF1YoVK/Dwww8jKCgIoaGhSEpKwsWLFy363L17F6mpqbjvvvvQtm1bTJ48GeXl5RZ9rl+/jsTERAQGBiI0NBSLFi3CvXv3LPocOXIEQ4YMgVarRY8ePbB169Z6ebKzs9GtWzcEBARg+PDhOH36tDubQ0RERCQbt4qqo0ePIjU1FadOnYJOp4PRaERcXByqqqqkPgsXLsTevXuxc+dOHD16FDdu3MDTTz8tLa+trUViYiJqampw8uRJbNu2DVu3bkV6errU58qVK0hMTMSYMWNQUlKCBQsWYNasWThw4IDUZ8eOHUhLS8OyZcvw5ZdfIjo6GvHx8bh161Zj5oOIiIioQdz6nqqCggKL61u3bkVoaCiKioowatQoVFRU4MMPP0Rubi5+85vfAAC2bNmCPn364NSpUxgxYgQOHjyI0tJSHDp0CGFhYRg0aBCysrKwePFiZGRkQKPRYOPGjYiKisLq1asBAH369MGJEyewdu1axMfHAwDWrFmD2bNnY8aMGQCAjRs3Ij8/H5s3b8arr77a6IkhIiIickejvvyzoqICANChQwcAQFFREYxGI8aNGyf16d27N7p06QK9Xo8RI0ZAr9djwIABCAsLk/rEx8dj7ty5OH/+PAYPHgy9Xm8xhqnPggULAAA1NTUoKirCkiVLpOV+fn4YN24c9Hq93bwGgwEGg0G6XllZCeDXLxIzGo0NnIX6TGPJOWZTUGpuQNnZAeb3JiVnB5SbX6m5AWVnB5pPfl/U4KKqrq4OCxYswMiRI9G/f38AQFlZGTQaDUJCQiz6hoWFoaysTOpjXlCZlpuWOepTWVmJX375BT/99BNqa2tt9rlw4YLdzCtWrEBmZma99oMHD0rfzionnU4n+5hNQam5AWVnB5jfm5ScHVBufqXmBpSdHVB+fl/U4KIqNTUV586dw4kTJ+TM41FLlixBWlqadL2yshKRkZGIi4uT/WdqdDodxo8fr6ifAFBqbkDZ2QHm9yYlZweUm1+puQFlZweaR/7du3d7O4ZNDSqq5s2bh3379uHYsWMWvw4dHh6Ompoa3L592+JsVXl5OcLDw6U+1p/SM3060LyP9ScGy8vLERwcjNatW8Pf3x/+/v42+5jGsEWr1UKr1dZrV6vVHtmxPDWupyk1N6Ds7ADze5OSswPKza/U3ICyswPKz++L3Pr0nxAC8+bNw65du3D48GFERUVZLI+JiYFarUZhYaHUdvHiRVy/fh2xsbEAgNjYWHz99dcWn9LT6XQIDg5G3759pT7mY5j6mMbQaDSIiYmx6FNXV4fCwkKpDxEREVFTcutMVWpqKnJzc7F7924EBQVJ74Fq164dWrdujXbt2mHmzJlIS0tDhw4dEBwcjPnz5yM2NhYjRowAAMTFxaFv3754/vnnsWrVKpSVlWHp0qVITU2VziLNmTMH69evxyuvvILf/e53OHz4MD755BPk5+dLWdLS0jB9+nQMHToUw4YNw7vvvouqqirp04BERERETcmtourPf/4zAGD06NEW7Vu2bMGLL74IAFi7di38/PwwefJkGAwGxMfHY8OGDVJff39/7Nu3D3PnzkVsbCzatGmD6dOnY/ny5VKfqKgo5OfnY+HChVi3bh0eeOABfPDBB9LXKQDAb3/7W/z73/9Geno6ysrKMGjQIBQUFNR78zr5lm6v5uPqykRvxyAiIpKdW0WVEMJpn4CAAGRnZyM7O9tun65du2L//v0Oxxk9ejSKi4sd9pk3bx7mzZvnNBMRERGRp/G3/4iIiIhkwKKKiIiISAYsqoiIiIhkwKKKiIiISAYsqoiIiIhkwKKKfEa3V/OddyIiIvJRLKrIK1hAERFRc8OiioiIiEgGLKqIiIiIZMCiioiIiEgGLKqIiIiIZMCiioiIiEgGLKqIiIiIZMCiioiIiEgGLKqIiIiIZMCiinxat1fz+UWhRESkCCyqiIiIiGTAooqIiIhIBiyqiIiIiGTAosqH8L1D3sX3bxERUWOwqCIiIiKSAYsqIiIiIhmwqCIiIiKSAYsqIiIiIhmwqCJZ8A3eRETU0rGoIq9jQUZERM0BiypyCwsgIiIi21hUkeKwsCMiIl/EooqIiIhIBiyqiIiIiGTAooqIiIhIBiyqyCG+f4mIiMg1LKqIiIiIZMCiioiIiEgGbhdVx44dw6RJkxAREQGVSoW8vDyL5SqVyubl7bfflvp069at3vKVK1dajHP27Fk89thjCAgIQGRkJFatWlUvy86dO9G7d28EBARgwIAB2L9/v7ubQy0YX9okIiI5uV1UVVVVITo6GtnZ2TaX37x50+KyefNmqFQqTJ482aLf8uXLLfrNnz9fWlZZWYm4uDh07doVRUVFePvtt5GRkYFNmzZJfU6ePImpU6di5syZKC4uRlJSEpKSknDu3Dl3N4nI57EAJCLyfa3cvUFCQgISEhLsLg8PD7e4vnv3bowZMwYPPvigRXtQUFC9viY5OTmoqanB5s2bodFo0K9fP5SUlGDNmjVISUkBAKxbtw4TJkzAokWLAABZWVnQ6XRYv349Nm7c6O5mUTPU7dV8XF2Z6O0YHtGct42ISKncLqrcUV5ejvz8fGzbtq3espUrVyIrKwtdunRBcnIyFi5ciFatfo2j1+sxatQoaDQaqX98fDzeeust/PTTT2jfvj30ej3S0tIsxoyPj6/3cqQ5g8EAg8EgXa+srAQAGI1GGI3GxmyqBdNY9sbsn3EA5zLi67Vr/YXFbez18xRbua0zWV931m6vn3l/W23m/Z1lcjW7s8y21gU0zf3gbJ9x9X7wFmf5fZmSswPKza/U3ICyswPNJ78vUgkhRINvrFJh165dSEpKsrl81apVWLlyJW7cuIGAgACpfc2aNRgyZAg6dOiAkydPYsmSJZgxYwbWrFkDAIiLi0NUVBTef/996TalpaXo168fSktL0adPH2g0Gmzbtg1Tp06V+mzYsAGZmZkoLy+3mScjIwOZmZn12nNzcxEYGNiQKSAiIqImVl1djeTkZFRUVCA4ONjbcSQePVO1efNmTJs2zaKgAmBxhmngwIHQaDT4/e9/jxUrVkCr1Xosz5IlSyzWXVlZicjISMTFxcl6pxiNRuh0OowfPx5qtbrecntnPqzbvXGmyjq3q5lczWrqZ97fVpt5fwBOM7iS3VlmW+tyZ9saw919pqn3DWec5fdlSs4OKDe/UnMDys4ONI/8u3fv9nYMmzxWVB0/fhwXL17Ejh07nPYdPnw47t27h6tXr6JXr14IDw+vd7bJdN30Pix7fey9TwsAtFqtzaJNrVZ7ZMeyN66hVuVSu71+nmae29VMrmY19TPvb6vNvL8pkyvrcpTdWWZb67I3jqfe0+TqPuOtfcMZTz2WmoKSswPKza/U3ICyswPKz++LPPY9VR9++CFiYmIQHR3ttG9JSQn8/PwQGhoKAIiNjcWxY8csXjfV6XTo1asX2rdvL/UpLCy0GEen0yE2NlbGrSAiIiJyjdtF1Z07d1BSUoKSkhIAwJUrV1BSUoLr169LfSorK7Fz507MmjWr3u31ej3effddfPXVV/j222+Rk5ODhQsX4rnnnpMKpuTkZGg0GsycORPnz5/Hjh07sG7dOouX7l566SUUFBRg9erVuHDhAjIyMnDmzBnMmzfP3U1SPH7cnoiIyPvcfvnvzJkzGDNmjHTdVOhMnz4dW7duBQBs374dQgiLN5GbaLVabN++HRkZGTAYDIiKisLChQstCqZ27drh4MGDSE1NRUxMDDp27Ij09HTp6xQA4JFHHkFubi6WLl2K1157DQ899BDy8vLQv39/dzeJiIiIqNHcLqpGjx4NZx8YTElJsSiAzA0ZMgSnTp1yup6BAwfi+PHjDvtMmTIFU6ZMcToWKQ+/h4mIiJSGv/1HHtHt1Xy+LElERC0KiyoiIiIiGbCoIiIiIpIBi6pmii+9ERERNS0WVUREREQyYFFF5EU8o0hE1HywqFIgPhE3DuePiIg8gUUVERERkQxYVBE1MX6HFxFR88SiiiT9Mw54OwIREZFisahqQjw74Zt4vxARkRxYVBERERHJgEUVERERkQxYVBERERHJgEUVERERkQxYVBE1Ab4Znoio+WNR1UI5epLvn3GARQAREZGbWFQRyYSFKBFRy8aiipo9FjtERNQUWFR5EZ/siYiImg8WVS0IizgiIiLPYVFFREREJAMWVUREREQyYFHlg/gyHRERkfKwqCIiIiKSAYsqanZ4po+IiLyBRRURERGRDFhUEXkQz5oREbUcLKqIiIiIZMCiioiIiEgGLKqIiIiIZMCiisgH9M844O0IRETUSCyqiBSOb4YnIvINbhdVx44dw6RJkxAREQGVSoW8vDyL5S+++CJUKpXFZcKECRZ9fvzxR0ybNg3BwcEICQnBzJkzcefOHYs+Z8+exWOPPYaAgABERkZi1apV9bLs3LkTvXv3RkBAAAYMGID9+/e7uzlEREREsnC7qKqqqkJ0dDSys7Pt9pkwYQJu3rwpXf76179aLJ82bRrOnz8PnU6Hffv24dixY0hJSZGWV1ZWIi4uDl27dkVRURHefvttZGRkYNOmTVKfkydPYurUqZg5cyaKi4uRlJSEpKQknDt3zt1NoiYgx9kUnpEhIiJf1srdGyQkJCAhIcFhH61Wi/DwcJvL/vGPf6CgoABffPEFhg4dCgB47733MHHiRLzzzjuIiIhATk4OampqsHnzZmg0GvTr1w8lJSVYs2aNVHytW7cOEyZMwKJFiwAAWVlZ0Ol0WL9+PTZu3OjuZhERERE1ikfeU3XkyBGEhoaiV69emDt3Ln744QdpmV6vR0hIiFRQAcC4cePg5+eHzz//XOozatQoaDQaqU98fDwuXryIn376Seozbtw4i/XGx8dDr9d7YpOIiIiIHHL7TJUzEyZMwNNPP42oqChcvnwZr732GhISEqDX6+Hv74+ysjKEhoZahmjVCh06dEBZWRkAoKysDFFRURZ9wsLCpGXt27dHWVmZ1GbexzSGLQaDAQaDQbpeWVkJADAajTAajQ3faCumsazH1PoLGI3Gen9dXW7dz951V9psLvcTFn8dZXQlk/kcONo2Z23WWWxlMJ/zhs6zdebGzK+9dVnPjbTcz/a6Hc2vo/uyqdnb55VAydkB5eZXam5A2dmB5pPfF6mEEKLBN1apsGvXLiQlJdnt8+2336J79+44dOgQxo4dizfffBPbtm3DxYsXLfqFhoYiMzMTc+fORVxcHKKiovD+++9Ly0tLS9GvXz+UlpaiT58+0Gg02LZtG6ZOnSr12bBhAzIzM1FeXm4zS0ZGBjIzM+u15+bmIjAw0M2tJyIiIm+orq5GcnIyKioqEBwc7O04/yEaAYDYtWuX034dO3YUGzduFEII8eGHH4qQkBCL5UajUfj7+4tPP/1UCCHE888/L5588kmLPocPHxYAxI8//iiEECIyMlKsXbvWok96eroYOHCg3Rx3794VFRUV0uW7774TAMT3338vampqZLtUVVWJvLw8UVVVZdHe87W9Nv+6uty6n73rrrTZWj5g6R6Rl5cnBizd4zSjK5lsrc/WuM7aXJkX8zlv6Dw7y9yYObce17rdNPfW+4wr97m9+6QpL/b2eSVclJxdyfmVmlvp2ZtL/tzcXAFAVFRUOK1BmpLsL/9Z++c//4kffvgBnTt3BgDExsbi9u3bKCoqQkxMDADg8OHDqKurw/Dhw6U+r7/+OoxGI9RqNQBAp9OhV69eaN++vdSnsLAQCxYskNal0+kQGxtrN4tWq4VWq63XrlarpfXIyXpcQ60KarW63l9Xl1v3s3fdlTaby+tU0l9nGe1l6vZqPq6uTIShViXNgbNtc9ZmGsfZvNjr5+o8O8vsaHudzbn1ttXL+n9zP/hPh3HxT/+f3fm1dd3RfDQ1Tz2WmoKSswPKza/U3ICyswPKz++L3H6j+p07d1BSUoKSkhIAwJUrV1BSUoLr16/jzp07WLRoEU6dOoWrV6+isLAQTz75JHr06IH4+HgAQJ8+fTBhwgTMnj0bp0+fxmeffYZ58+bh2WefRUREBAAgOTkZGo0GM2fOxPnz57Fjxw6sW7cOaWlpUo6XXnoJBQUFWL16NS5cuICMjAycOXMG8+bNk2FaiIiIiNzjdlF15swZDB48GIMHDwYApKWlYfDgwUhPT4e/vz/Onj2LJ554Aj179sTMmTMRExOD48ePW5whysnJQe/evTF27FhMnDgRjz76qMV3ULVr1w4HDx7ElStXEBMTg5dffhnp6ekW32X1yCOPIDc3F5s2bUJ0dDT+9re/IS8vD/3792/MfJCb+N1RREREv3L75b/Ro0dDOHhv+4EDzn/DrEOHDsjNzXXYZ+DAgTh+/LjDPlOmTMGUKVOcrs+bTEWH6SUiIiIiap74239EREREMmBR1Yx0ezWfL8cRERF5CYuqFo5FGBERkTxYVBERERHJgEUVkYv48ioRETnCooqIiIhIBiyqiIiIiGTAooqIiIhIBiyqiIiIiGTAooqIiIhIBiyqSNH4aTwiIvIVLKrIJSxeiIiIHGNRReQBLEKJiFoeFlVEREREMmBR1QLwrAkREZHnsagiIiIikgGLKiIiIiIZsKgiIiIikgGLKiIF4fvjiIh8F4sqBeATKRERke9jUUVEREQkAxZVRERERDJgUaVgfFmw+er2aj7vXyIihWFRRURERCQDFlVEREREMmBRRURERCQDFlVEREREMmBRRURERCQDFlVEREREMmBRRURERCQDFlVEREREMmBRRURERCQDFlVEREREMmBRRURERCQDt4uqY8eOYdKkSYiIiIBKpUJeXp60zGg0YvHixRgwYADatGmDiIgIvPDCC7hx44bFGN26dYNKpbK4rFy50qLP2bNn8dhjjyEgIACRkZFYtWpVvSw7d+5E7969ERAQgAEDBmD//v3ubg4RERGRLNwuqqqqqhAdHY3s7Ox6y6qrq/Hll1/ijTfewJdffolPP/0UFy9exBNPPFGv7/Lly3Hz5k3pMn/+fGlZZWUl4uLi0LVrVxQVFeHtt99GRkYGNm3aJPU5efIkpk6dipkzZ6K4uBhJSUlISkrCuXPn3N0kIiIiokZr5e4NEhISkJCQYHNZu3btoNPpLNrWr1+PYcOG4fr16+jSpYvUHhQUhPDwcJvj5OTkoKamBps3b4ZGo0G/fv1QUlKCNWvWICUlBQCwbt06TJgwAYsWLQIAZGVlQafTYf369di4caO7m0VERETUKG4XVe6qqKiASqVCSEiIRfvKlSuRlZWFLl26IDk5GQsXLkSrVr/G0ev1GDVqFDQajdQ/Pj4eb731Fn766Se0b98eer0eaWlpFmPGx8dbvBxpzWAwwGAwSNcrKysB/PqypdFobOSW/odpLKPRCK2/sPi3rb8m9pa70896Xbb6223zExZ/5criqJ8r+exlMe9nPeeeyuxKFle2t944ZnPf0PnzJvP5VxolZweUm1+puQFlZweaT35fpBJCiAbfWKXCrl27kJSUZHP53bt3MXLkSPTu3Rs5OTlS+5o1azBkyBB06NABJ0+exJIlSzBjxgysWbMGABAXF4eoqCi8//770m1KS0vRr18/lJaWok+fPtBoNNi2bRumTp0q9dmwYQMyMzNRXl5uM09GRgYyMzPrtefm5iIwMLAhU0BERERNrLq6GsnJyaioqEBwcLC34/yHaAQAYteuXTaX1dTUiEmTJonBgweLiooKh+N8+OGHolWrVuLu3btCCCHGjx8vUlJSLPqcP39eABClpaVCCCHUarXIzc216JOdnS1CQ0Ptrufu3buioqJCunz33XcCgPj+++9FTU2NbJeqqiqRl5cnqqqqRM/X9oqer+0VNTU1dv+aLnL0s3UbV9sGLN0j8vLyxICle2TN4mwcZ22u3NZ6zj2V2ZUsrmyvdbv53Dd0/rx5MZ9/b2dpSdmVnF+puZWevbnkz83NFQCc1hdNzSMv/xmNRjzzzDO4du0aDh8+7LSKHD58OO7du4erV6+iV69eCA8Pr3e2yXTd9D4se33svU8LALRaLbRabb12tVoNtVrt0ra5Q61Ww1Crsvi3rb8m9pa70896Xbb6222rU0l/5cziqJ8r+ezNn3k/6zn3VGZXszjb3nrjmM19Q+fPF3jqsdQUlJwdUG5+peYGlJ0dUH5+XyT791SZCqpvvvkGhw4dwn333ef0NiUlJfDz80NoaCgAIDY2FseOHbN43VSn06FXr15o37691KewsNBiHJ1Oh9jYWBm3hoiIiMg1bp+punPnDi5duiRdv3LlCkpKStChQwd07twZ//Vf/4Uvv/wS+/btQ21tLcrKygAAHTp0gEajgV6vx+eff44xY8YgKCgIer0eCxcuxHPPPScVTMnJycjMzMTMmTOxePFinDt3DuvWrcPatWul9b700kt4/PHHsXr1aiQmJmL79u04c+aMxdcuEBERETUVt4uqM2fOYMyYMdJ10yfwpk+fjoyMDOzZswcAMGjQIIvb/f3vf8fo0aOh1Wqxfft2ZGRkwGAwICoqCgsXLrT4JF+7du1w8OBBpKamIiYmBh07dkR6err0dQoA8MgjjyA3NxdLly7Fa6+9hoceegh5eXno37+/u5tERERE1GhuF1WjR4+GcPCBQUfLAGDIkCE4deqU0/UMHDgQx48fd9hnypQpmDJlitOxiIiIiDyNv/1HREREJAMWVUREREQyYFFFREREJAMWVUREREQyYFFFREREJAMWVUREREQyYFFFREREJAMWVUREROSWbq/mezuCT2JRRURERCQDFlVEREREMmBRRURERCQDFlVEREREMmBRRURERCQDFlVEREREMmBRRURERCQDFlVEREREMmBRRURERCQDFlVEREREMmBRRURERCQDFlVEREREMmBRRURERCQDFlVEREREMmBRRURERCQDFlVEREREMmBRRURERCQDFlVEREQ2dHs139sRSGFYVBEREZFNLCzdw6KKiIiISAYsqoiIiNzQP+OAtyOQj2JRRURERCQDFlVEREREMmBRRURERCQDFlVERF7UHD5d1Ry2gUgOLKqIiIiIZOB2UXXs2DFMmjQJERERUKlUyMvLs1guhEB6ejo6d+6M1q1bY9y4cfjmm28s+vz444+YNm0agoODERISgpkzZ+LOnTsWfc6ePYvHHnsMAQEBiIyMxKpVq+pl2blzJ3r37o2AgAAMGDAA+/fvd3dziIiIqJG6vZrPM5ZoQFFVVVWF6OhoZGdn21y+atUq/M///A82btyIzz//HG3atEF8fDzu3r0r9Zk2bRrOnz8PnU6Hffv24dixY0hJSZGWV1ZWIi4uDl27dkVRURHefvttZGRkYNOmTVKfkydPYurUqZg5cyaKi4uRlJSEpKQknDt3zt1NIiIiImq0Vu7eICEhAQkJCTaXCSHw7rvvYunSpXjyyScBAB999BHCwsKQl5eHZ599Fv/4xz9QUFCAL774AkOHDgUAvPfee5g4cSLeeecdREREICcnBzU1Ndi8eTM0Gg369euHkpISrFmzRiq+1q1bhwkTJmDRokUAgKysLOh0Oqxfvx4bN25s0GQQETV3prMJV1cmejkJUfPjdlHlyJUrV1BWVoZx48ZJbe3atcPw4cOh1+vx7LPPQq/XIyQkRCqoAGDcuHHw8/PD559/jqeeegp6vR6jRo2CRqOR+sTHx+Ott97CTz/9hPbt20Ov1yMtLc1i/fHx8fVejjRnMBhgMBik65WVlQAAo9EIo9HY2M2XmMYyGo3Q+guLf9v6a2JvuTv9rNdlq7/dNj9h8VeuLI76uZLPXhbzftZz7qnMrmRxZXvrjWM29w2dP28yn3+l8Xb2xt5/7ua33rfk0JBt8Pa8u8LedpkfI5XInbm3Owc2jquujtlYvjzvKiGEaPCNVSrs2rULSUlJAH59SW7kyJG4ceMGOnfuLPV75plnoFKpsGPHDrz55pvYtm0bLl68aDFWaGgoMjMzMXfuXMTFxSEqKgrvv/++tLy0tBT9+vVDaWkp+vTpA41Gg23btmHq1KlSnw0bNiAzMxPl5eU282ZkZCAzM7Nee25uLgIDAxs6DURERNSEqqurkZycjIqKCgQHB3s7zn+IRgAgdu3aJV3/7LPPBABx48YNi35TpkwRzzzzjBBCiD/96U+iZ8+e9cbq1KmT2LBhgxBCiPHjx4uUlBSL5efPnxcARGlpqRBCCLVaLXJzcy36ZGdni9DQULt57969KyoqKqTLd999JwCI77//XtTU1Mh2qaqqEnl5eaKqqkr0fG2v6PnaXlFTU2P3r+kiRz9bt3G1bcDSPSIvL08MWLpH1izOxnHW5sptrefcU5ldyeLK9lq3m899Q+fPmxfz+fd2FqVlb+z95yi/rbGt9y1vbUND570p93d76zI9XpW4v7s79/bmwNZxtanum6qqKpGbmysAiIqKCteLliYg68t/4eHhAIDy8nKLM1Xl5eUYNGiQ1OfWrVsWt7t37x5+/PFH6fbh4eH1zjaZrjvrY1pui1arhVarrdeuVquhVqtd2US3qNVqGGpVFv+29dfE3nJ3+lmvy1Z/u211KumvnFkc9XMln735M+9nPeeeyuxqFmfbW28cs7lv6Pz5Ak89lpqCs+zdXs33yPuQ5Lr/bOW3Nbb1viWHxmyDKber82u9Lk/dL7bWJbXX/WcOXdluT2ZsDFfy250DG8dV05i+ur1NQdbvqYqKikJ4eDgKCwultsrKSnz++eeIjY0FAMTGxuL27dsoKiqS+hw+fBh1dXUYPny41OfYsWMWr5vqdDr06tUL7du3l/qYr8fUx7QeUhZ+FJeaO9M+7q193dF6+fgjkofbRdWdO3dQUlKCkpISAL++Ob2kpATXr1+HSqXCggUL8N///d/Ys2cPvv76a7zwwguIiIiQ3nfVp08fTJgwAbNnz8bp06fx2WefYd68eXj22WcREREBAEhOToZGo8HMmTNx/vx57NixA+vWrbN4Y/pLL72EgoICrF69GhcuXEBGRgbOnDmDefPmNX5WiIioRfHE9yyxWG153C6qzpw5g8GDB2Pw4MEAgLS0NAwePBjp6ekAgFdeeQXz589HSkoKHn74Ydy5cwcFBQUICAiQxsjJyUHv3r0xduxYTJw4EY8++qjFd1C1a9cOBw8exJUrVxATE4OXX34Z6enpFt9l9cgjjyA3NxebNm1CdHQ0/va3vyEvLw/9+/dv8GQQETUHcj2ZsyjwDH5RZvPl9nuqRo8eDeHgA4MqlQrLly/H8uXL7fbp0KEDcnNzHa5n4MCBOH78uMM+U6ZMwZQpUxwHJiLyEH7nEwG++56phnB3W5rTtsuBv/1HREQ+iWdzSGlYVBERkUtY5CgL76+mx6KKFK/bq/non3HA2zGIyEW+8GTvCxms2crkiznJPhZVRERERDJgUUVE5IN4hqJpcJ5JTiyqSJF4ICRSBj5WlYH3kzxYVHkZd2RLvvht09TycH8gooZgUUUexScnIvfwMUPNRUvcl1lU+QB+u658+ClAIu/j8cyzOL++i0UVtUg8KJG3cR8kOXF/8g0sqoiIqNlisfErzkPTYFFFPosHAcdsvWzMOfM+3ge2cV6oJWBRRY3WlAfLlnZgbmnbS0S+i8cj51hUNWONfQC4e3tX+/OB6R7OF/kafriGyDYWVQrjiQNZcz44erKwVMK88cmPHOG+QSQvFlUtDJ9kiYi8g8fe5o9FFQFQ/hkZIiJH+B9KagosqoiImgF+8S2R97GoIvIwX/7fsS9nk1Nz287mtj1EzQWLKh/lywdNnkaXh9LmUGl5iYiaGosqchmfVKk54stmzROPV+QNLKo8iAfr5okHa9/VEu8bHmeIfAeLqibmqYN+S3wyaYnb3BQ4r9SUlFwUKjm7MzwONAyLKiIrjTmY8P1mTYtzTda8vU/wGNCysahq5vjgbj54X7ZsLeXJ2hd+7qolzDN5BosqohZCyU8UcmVX8hyQd3HfIVewqCIiImohWBx6FosqImqQ5nhwbuz76ZSuOWwDkTexqKIG84X3PhCR5/ExTOQaFlVEMuCTDnkK9y2yp6V8eEFJWFQpBB88RMrSEh+vLXGbicyxqCJF8PbB2tvrb2k430SkRCyqfFxDnlz4hES+xLQ/Nqezrc1lO5SEc05KIHtR1a1bN6hUqnqX1NRUAMDo0aPrLZszZ47FGNevX0diYiICAwMRGhqKRYsW4d69exZ9jhw5giFDhkCr1aJHjx7YunWr3JsiKx4QiIiImrdWcg/4xRdfoLa2Vrp+7tw5jB8/HlOmTJHaZs+ejeXLl0vXAwMDpX/X1tYiMTER4eHhOHnyJG7evIkXXngBarUab775JgDgypUrSExMxJw5c5CTk4PCwkLMmjULnTt3Rnx8vNybRKQ43V7Nx9WVid6OQUTUosheVHXq1Mni+sqVK9G9e3c8/vjjUltgYCDCw8Nt3v7gwYMoLS3FoUOHEBYWhkGDBiErKwuLFy9GRkYGNBoNNm7ciKioKKxevRoA0KdPH5w4cQJr165lUUVEREReIXtRZa6mpgYff/wx0tLSoFKppPacnBx8/PHHCA8Px6RJk/DGG29IZ6v0ej0GDBiAsLAwqX98fDzmzp2L8+fPY/DgwdDr9Rg3bpzFuuLj47FgwQKHeQwGAwwGg3S9srISAGA0GmE0Ghu7uRLTWFo/4bCP1r/+cnvt9vr1en0ftP6uZbIe17rNlNdRble4um3SPDnJ5Qrr7PbWZb2+xt4P7ozhcHvN8jtbt6373Hxs0+3NxzFvM+dqm6128+vWf23dzjyLM47W1Zh+NrfXr/4cmffrn3EA5zLiLdocza+9ddlb7k4/63WZ5zftO/a2V+7MpjEc9XOWxTy3034yzp+9fo72A0dzbt3P2fxaZ7G1j1lvj602dx7ftrbJ3f3T2f3haP7k5Ikx5aISQjTuGdSBTz75BMnJybh+/ToiIiIAAJs2bULXrl0RERGBs2fPYvHixRg2bBg+/fRTAEBKSgquXbuGAwcOSONUV1ejTZs22L9/PxISEtCzZ0/MmDEDS5Yskfrs378fiYmJqK6uRuvWrW3mycjIQGZmZr323Nxci5cgiYiIyHdVV1cjOTkZFRUVCA4O9nYciUfPVH344YdISEiQCirg16LJZMCAAejcuTPGjh2Ly5cvo3v37p6MgyVLliAtLU26XllZicjISMTFxcl6pxiNRuh0Orxxxg+GOpXNPucy4tE/44DNZZ5ga33WbVo/gayhdQ5zN3Rd9voBkGUerLNbZzCty3p99rK6c/+4Ooa97T2XEY+Y5QVS/qL0CW7PifnYpvWar9+8zZyrbbbaza+b9vnx48dDrVbbvJ15FnczNCSTvQzWc2Ga+/Hjx2Pwnw7XW27rNo7m11Z/Rxnc6We9LvP8pn2nqTKb35eOxrHXVvz6b6TjpGmft3dbuefPXj9782Kdxdbj1frx7ShrQ+bP3v5sa5+w7md9G9Pcmz9eXZlfZ/eHo/mTk9FoxO7du2UfVw4eK6quXbuGQ4cOSWeg7Bk+fDgA4NKlS+jevTvCw8Nx+vRpiz7l5eUAIL0PKzw8XGoz7xMcHGz3LBUAaLVaaLXaeu1qtbreE4EcDHUqGGptFycPvXEQQMMLF3ep1ep6WWy1AY5zN3Rd9voBaNS6rJmyW2cwv3+t222t39VtcGcMe9urVqulItZQVz+7qxlMY5tubz6OeZs58zbTm9tt9bPua+u6aT321mGexdG4rq6rIf2s/wKQ5t7ucidt9m5rL1tj+lmvyzy/ad9pqszm96XD+XXQZp7baT8Z589eP1f2A1tzbt3P2fw2ZP6A+o9Rdx7ftsY2f7y6Mr/O7g9H89dSeOx7qrZs2YLQ0FAkJjr+BFJJSQkAoHPnzgCA2NhYfP3117h165bUR6fTITg4GH379pX6FBYWWoyj0+kQGxsr4xaQkvErLOzj3BAReYZHiqq6ujps2bIF06dPR6tW/zkZdvnyZWRlZaGoqAhXr17Fnj178MILL2DUqFEYOHAgACAuLg59+/bF888/j6+++goHDhzA0qVLkZqaKp1lmjNnDr799lu88soruHDhAjZs2IBPPvkECxcu9MTmEBERETnlkaLq0KFDuH79On73u99ZtGs0Ghw6dAhxcXHo3bs3Xn75ZUyePBl79+6V+vj7+2Pfvn3w9/dHbGwsnnvuObzwwgsW32sVFRWF/Px86HQ6REdHY/Xq1fjggw/4dQo+gmdCWq6mfJ8gEZGv8ch7quLi4mDrQ4WRkZE4evSo09t37doV+/fvd9hn9OjRKC4ubnBGIrmwiHQfv5yUiJoj/vYfEfkEFqdEpHQsqqhF4RN3y8L7m4iakke/p4rIkZb4hNcSt5mIqKXgmSoiL2uuhVZz3S4iIntYVBERERHJgEUVtQjdXs23eebEF8+m+GImR5SWl4jIU1hUEXmIdbHhyeKDhQ0RkfexqCIim5RUqCkpKxE1XyyqWgg+6biH80VERO5iUUVEREQkAxZVRERERDJgUUVEPosvwxKRkrCoIiIiIpIBiyoicgvPHhER2caiioiIiEgGLKqISFHsfTs+EZG3sahqwfjEREREJB8WVUREREQyYFFFREREJAMWVUREREQyYFFFREREJAMWVUREREQyYFFFREREJAMWVUREREQyYFFFREREJAMWVUREREQyYFFFREREJAMWVUREREQyYFFFRETUTPA3Xb2LRRURERGRDFhUERERkct4Nsw+FlVEREREMmBRRUTURPg/fKLmjUUVUTPBJ2xyhvsIkWfJXlRlZGRApVJZXHr37i0tv3v3LlJTU3Hfffehbdu2mDx5MsrLyy3GuH79OhITExEYGIjQ0FAsWrQI9+7ds+hz5MgRDBkyBFqtFj169MDWrVvl3hQiIiKyg0V6fR45U9WvXz/cvHlTupw4cUJatnDhQuzduxc7d+7E0aNHcePGDTz99NPS8traWiQmJqKmpgYnT57Etm3bsHXrVqSnp0t9rly5gsTERIwZMwYlJSVYsGABZs2ahQMHDnhic4iIPIJPSkTNSyuPDNqqFcLDw+u1V1RU4MMPP0Rubi5+85vfAAC2bNmCPn364NSpUxgxYgQOHjyI0tJSHDp0CGFhYRg0aBCysrKwePFiZGRkQKPRYOPGjYiKisLq1asBAH369MGJEyewdu1axMfHe2KTiIiajBzFFgs2oqbnkaLqm2++QUREBAICAhAbG4sVK1agS5cuKCoqgtFoxLhx46S+vXv3RpcuXaDX6zFixAjo9XoMGDAAYWFhUp/4+HjMnTsX58+fx+DBg6HX6y3GMPVZsGCBw1wGgwEGg0G6XllZCQAwGo0wGo0ybDmk8QBA6ydkG7MpmPIqLTeg7OyAvPmNRiO0/kL6a6tN2kdt/NtWP0fLLcbwczyedRZb/ZxlcbRNDc3syhzZa3Mni/V9ZH7dUT+H82e271jPkfnY5m1yzJ/12A2ZP/Pc5v16vb4P5zLiXdonzPv0zzgArb/9bXOW2ZX9wNacu7t/NmT+Gp3ZxnJX7+ter+/7vz71913z9QNAr9f31bsP5OaJMeWiEkLI+iz0v//7v7hz5w569eqFmzdvIjMzE//6179w7tw57N27FzNmzLAobABg2LBhGDNmDN566y2kpKTg2rVrFi/lVVdXo02bNti/fz8SEhLQs2dPzJgxA0uWLJH67N+/H4mJiaiurkbr1q1tZsvIyEBmZma99tzcXAQGBso0A0RERORJ1dXVSE5ORkVFBYKDg70dRyL7maqEhATp3wMHDsTw4cPRtWtXfPLJJ3aLnaayZMkSpKWlSdcrKysRGRmJuLg4We8Uo9EInU6HN874wVCnkm1cT9P6CWQNrVNcbkDZ2QF585/LiEf/jAPSX1tt5zJ+fZnc1r9t9XO0vH/GARS//htpny9Kn2C3v3UWW/2cZXG0Te5kNvWLWV6ArKF1GD9+PAb/6bDb+dzJYn0fmV931M/R/JnyW8+99djmbfYyuDN/1mM728es26z3GVf2E3vz4uq+Yc1Wu6M2e3PuKIOz+9LV+bPXz9l82Lqtae7Hjx8PtVrt0n3d2DmTk9FoxO7du2UfVw4eefnPXEhICHr27IlLly5h/PjxqKmpwe3btxESEiL1KS8vl96DFR4ejtOnT1uMYfp0oHkf608MlpeXIzg42GHhptVqodVq67Wr1Wppx5KToU4FQ63ynuCVmhtQdnZAnvxqtRqGWpX011abaX+39W9b/RwttxijzvF41lls9XOWxdE2NTSzK3NkqFXhoTcO4urKxHrb9NAbBwE4z2J9H5lfd9TP4fz9X37rubce23q/ss5gvg2O5s/Uz3ps2/1gMS/W82ee29bYtu53W/Ntvi5796Wtx5WtdkdtpvVo/S3n3HrbbI3j7vy5+hi1t73Obmu6nb39HLDM2tA588Rzqy/z+PdU3blzB5cvX0bnzp0RExMDtVqNwsJCafnFixdx/fp1xMbGAgBiY2Px9ddf49atW1IfnU6H4OBg9O3bV+pjPoapj2kMIiIl6fZqvs+9sdzX8hApgexnqv74xz9i0qRJ6Nq1K27cuIFly5bB398fU6dORbt27TBz5kykpaWhQ4cOCA4Oxvz58xEbG4sRI0YAAOLi4tC3b188//zzWLVqFcrKyrB06VKkpqZKZ5nmzJmD9evX45VXXsHvfvc7HD58GJ988gny83kQIKKm0VyLjoZuV3OZj8Zsh5xz4I35bC73oTfJXlT985//xNSpU/HDDz+gU6dOePTRR3Hq1Cl06tQJALB27Vr4+flh8uTJMBgMiI+Px4YNG6Tb+/v7Y9++fZg7dy5iY2PRpk0bTJ8+HcuXL5f6REVFIT8/HwsXLsS6devwwAMP4IMPPuDXKRCRx/nSE0+3V/Oh9fd2Cvf40vwRyU32omr79u0OlwcEBCA7OxvZ2dl2+3Tt2hX79+93OM7o0aNRXFzcoIxERM7wyf9X3poHX5r/psjiaB3dXs3H1ZWJDRrDl+axJeBv/xG1ADywkqdw3yL6DxZVRM0Mn+TkxzklX8N90jexqCIiIq9igeA6X5wrX8zkLR7/nioiIiXxxd/d84UnLb63qmn4+vb6ej5v45kqohaqpR8cffG7oaz5ej5qHN6/zQ+LKiLyuKZ88uATFfkC7octE4sqohaoJR7wm9s2u7M9ntr25janRI3F91QRETUT/J6i5ov3ozKwqCIih3gwJyJ3tdTjBosqIpK4eyD01QOnr+aipmG6/7kf2NY/4wBWDfN2iuaJ76kiIkWwfoJ09QnT20+sSviUITU/3Oe8g2eqiEh2DT2gt5QnAnvb6cr2+/octbRPevpCce8L80C/4pkqIqqnMU/6RErR0vfn/hkHvB2h2eGZKiLyCrn+h+/LT4y+nK0pcR6opeCZKiIimSipeFBSViKlYFFFRLKS68maT/pEDcOX772HRRURETU5vp+HmiMWVUREREQy4BvVicguvoxA5Fnefix5e/3NDYsqohZOSQdVJWUl38H9hpoKX/4jaqb4REJE1LRYVBGRx7CwI6KWhEUVERERkQxYVBG1IJ46c+QLH49v7Lb5wjYQkbKxqCKiJsWXBImouWJRRUQtCos6IvIUFlVEREREMmBRRURERCQDFlVE5HV8SY6ImgMWVUREREQyYFFFREREJAMWVUREREQyYFFFREREJAPZi6oVK1bg4YcfRlBQEEJDQ5GUlISLFy9a9Bk9ejRUKpXFZc6cORZ9rl+/jsTERAQGBiI0NBSLFi3CvXv3LPocOXIEQ4YMgVarRY8ePbB161a5N4eIiIjIJbIXVUePHkVqaipOnToFnU4Ho9GIuLg4VFVVWfSbPXs2bt68KV1WrVolLautrUViYiJqampw8uRJbNu2DVu3bkV6errU58qVK0hMTMSYMWNQUlKCBQsWYNasWThwgD81QURERE2vldwDFhQUWFzfunUrQkNDUVRUhFGjRkntgYGBCA8PtznGwYMHUVpaikOHDiEsLAyDBg1CVlYWFi9ejIyMDGg0GmzcuBFRUVFYvXo1AKBPnz44ceIE1q5di/j4eLk3i4iIiMghj7+nqqKiAgDQoUMHi/acnBx07NgR/fv3x5IlS1BdXS0t0+v1GDBgAMLCwqS2+Ph4VFZW4vz581KfcePGWYwZHx8PvV7vqU0hIiIiskv2M1Xm6urqsGDBAowcORL9+/eX2pOTk9G1a1dERETg7NmzWLx4MS5evIhPP/0UAFBWVmZRUAGQrpeVlTnsU1lZiV9++QWtW7eul8dgMMBgMEjXKysrAQBGoxFGo1GGLYY0HgBo/YRsYzYFU16l5QaUnR1gfm9ScnZAufmVmhtQdnagafPL+dzqyTHl4tGiKjU1FefOncOJEycs2lNSUqR/DxgwAJ07d8bYsWNx+fJldO/e3WN5VqxYgczMzHrtBw8eRGBgoOzryxpaJ/uYTUGpuQFlZweY35uUnB1Qbn6l5gaUnR1omvz79+/3+Dp8iceKqnnz5mHfvn04duwYHnjgAYd9hw8fDgC4dOkSunfvjvDwcJw+fdqiT3l5OQBI78MKDw+X2sz7BAcH2zxLBQBLlixBWlqadL2yshKRkZGIi4tDcHCwexvogNFohE6nwxtn/GCoU8k2rqdp/QSyhtYpLjeg7OwA83uTkrMDys2v1NyAsrMDTZv/XIb873E2Go3YvXu37OPKQfaiSgiB+fPnY9euXThy5AiioqKc3qakpAQA0LlzZwBAbGws/vSnP+HWrVsIDQ0FAOh0OgQHB6Nv375SH+sKWKfTITY21u56tFottFptvXa1Wg21Wu3S9rnDUKeCoVZ5Dzil5gaUnR1gfm9ScnZAufmVmhtQdnagafJ74rnVl8n+RvXU1FR8/PHHyM3NRVBQEMrKylBWVoZffvkFAHD58mVkZWWhqKgIV69exZ49e/DCCy9g1KhRGDhwIAAgLi4Offv2xfPPP4+vvvoKBw4cwNKlS5GamioVRXPmzMG3336LV155BRcuXMCGDRvwySefYOHChXJvEhEREZFTshdVf/7zn1FRUYHRo0ejc+fO0mXHjh0AAI1Gg0OHDiEuLg69e/fGyy+/jMmTJ2Pv3r3SGP7+/ti3bx/8/f0RGxuL5557Di+88AKWL18u9YmKikJ+fj50Oh2io6OxevVqfPDBB/w6BSIiIvIKj7z850hkZCSOHj3qdJyuXbs6fYPb6NGjUVxc7FY+IiIiIk/gb/8RERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMWFQRERERyYBFFREREZEMFF9UZWdno1u3bggICMDw4cNx+vRpb0ciIiKiFkjRRdWOHTuQlpaGZcuW4csvv0R0dDTi4+Nx69Ytb0cjIiKiFkbRRdWaNWswe/ZszJgxA3379sXGjRsRGBiIzZs3ezsaERERtTCtvB2goWpqalBUVIQlS5ZIbX5+fhg3bhz0er3N2xgMBhgMBul6RUUFAODHH3+E0WiULZvRaER1dTVaGf1QW6eSbVxPa1UnUF1dp7jcgLKzA8zvTUrODig3v1JzA8rODjRt/h9++EH2MU3PsQAghJB9/EYRCvWvf/1LABAnT560aF+0aJEYNmyYzdssW7ZMAOCFF1544YUXXprB5bvvvmuKksNlij1T1RBLlixBWlqadL2urg4//vgj7rvvPqhU8lXrlZWViIyMxHfffYfg4GDZxvU0peYGlJ0dYH5vUnJ2QLn5lZobUHZ2oPnkLy0tRUREhLfjWFBsUdWxY0f4+/ujvLzcor28vBzh4eE2b6PVaqHVai3aQkJCPBURwcHBitxhlZobUHZ2gPm9ScnZAeXmV2puQNnZAeXnv//+++Hn51tvDfetNG7QaDSIiYlBYWGh1FZXV4fCwkLExsZ6MRkRERG1RIo9UwUAaWlpmD59OoYOHYphw4bh3XffRVVVFWbMmOHtaERERNTCKLqo+u1vf4t///vfSE9PR1lZGQYNGoSCggKEhYV5NZdWq8WyZcvqvdTo65SaG1B2doD5vUnJ2QHl5ldqbkDZ2QHm9ySVEL72eUQiIiIi5VHse6qIiIiIfAmLKiIiIiIZsKgiIiIikgGLKiIiIiIZtJiiasWKFXj44YcRFBSE0NBQJCUl4eLFixZ97t69i9TUVNx3331o27YtJk+eXO/LRf/whz8gJiYGWq0WgwYNqreeixcvYsyYMQgLC0NAQAAefPBBLF261KXfFszOzka3bt0QEBCA4cOH4/Tp0xa5g4OD0bFjR7Rt2xYqlQq3b9/22dwmK1asQGhoKPz8/KBSqaDVajF27FhcuHBB6uOr+U1z36pVK6hUKovLnDlzfDo7ALzyyiv1cpsuO3fu9Pn8K1aswMCBA6FWq+Hn5we1Wo2EhASLfN7Mf+zYMUyaNAkRERFQqVTIy8uzyP7www+jdevW0Gg00Gg0UKlUKCkpkTW7uUuXLiEoKMjlLzT25vFG7tzAf+Zco9HA398f/v7+aN++PZ588knpeOOr2U35g4OD7R5rfD1/Ux1vPJUfAC5fvoynnnoKnTp1QnBwMJ555pl6+Zzy9u/kNJX4+HixZcsWce7cOVFSUiImTpwounTpIu7cuSP1mTNnjoiMjBSFhYXizJkzYsSIEeKRRx6xGGf+/Pli/fr14vnnnxfR0dH11nP58mWxefNmUVJSIq5evSp2794tQkNDxZIlSxzm2759u9BoNGLz5s3i/PnzYvbs2SIkJESMGTNGyv3HP/5R9OrVS4SEhAgA4qeffvLZ3OXl5dK8T58+XWzdulXk5+eLkSNHitatW4v7779f3Lt3z6fzm/aZoUOHiqefflqMHTtW3H///eLy5cuioqLCp7MLIURcXJx49913xZEjR8ShQ4fE2LFjRbt27USbNm3Ezz//7PP5x40bJzp16iTGjh0rdu7cKR577DHRunVrMWTIEFFbW+v1/Pv37xevv/66+PTTTwUAsWvXLmmZad958803xdy5c8WAAQMEYPlbpXJkN6mpqRFDhw4VCQkJol27dg5zC+H9443cuc0fr8uWLRMffvihGDNmjAgPDxcJCQkiMjJS3Lt3z2ezC/HrPtOrVy8xefJk6fF6//33ixs3bkhj+HL+pjreeCr/nTt3xIMPPiieeuopcfbsWXH27Fnx5JNPiocfflg63riixRRV1m7duiUAiKNHjwohhLh9+7ZQq9Vi586dUp9//OMfAoDQ6/X1br9s2TKHd7i5hQsXikcffdRhn2HDhonU1FTpem1trYiIiBArVqywmdt0kFNKblv5L126pIj8jz/+uHjppZcUu8+YmPJPnDhREfkPHDgg/Pz8pALWfN/R6XRez2/OuqiydubMGQFAfPDBB0II+ef+lVdeEc8995zYsmWLS08wvnK88VRu8+ybN28WAERxcbHPZzcda8zze+p448m5N8/vqeON3PmtjzemzCqVSuh0Oqfjm7SYl/+sVVRUAAA6dOgAACgqKoLRaMS4ceOkPr1790aXLl2g1+sbvJ5Lly6hoKAAjz/+uN0+NTU1KCoqsli3n58fxo0bV2/dptwmSsltcvPmTQC//mZTZGSkYvLn5OSgZ8+eAICPPvoI1dXVislu8tlnnwEApk6dCsD39x2DwSC9ZAz8Z9/38/PDiRMnvJrfXT///DMAoF27dgDknfvDhw9j586dyM7Odqm/rxxvPJnbPPvhw4cRFRWF8vJyRWTPyclBx44d8cgjjwAAWrduDUBZcw949njjifzWxxsACAgIkI43rmqRRVVdXR0WLFiAkSNHon///gCAsrIyaDSaeq/NhoWFoayszO11PPLIIwgICMBDDz2Exx57DMuXL7fb9/vvv0dtbW29b4K3XrcptymzUnIDwIYNG9CmTRtER0ejdevWOHr0KDQajSLyJycn46OPPsLAgQPRs2dPFBQU4LnnnlNEdhPTvtO6dWs899xzAHx/3xkxYgTatGmDxYsX486dO5g/fz46d+6Muro63Lx506v53VFXV4esrCwAQI8ePQDIN/c//PADXnzxRWzdutXlH8b1heONJ3MDwPr169G7d28AwBdffAGdTocffvjB57MnJyfj448/RmFhIdq2bQuNRoMVK1YAUM7cA5493ngqv/nxprq6GlVVVfjjH/+I2tpa6WSAK1pkUZWamopz585h+/btHlvHjh078OWXXyI3Nxf5+fl45513AADHjx9H27ZtpUtOTo7LY5pyp6eneyq2R3IDwLRp0/Dkk08iPDwcI0eOxDPPPIO7d+8qIn9KSgr27NmDa9euobCwEB999BF27drl/hsYvZDd5Pe//z2uX7+Ol19+WdbM5uTO36lTJ+zcuRN79+5FUFAQCgoKMGLECAwZMsQjv0zvqflPTU2t96EYucyePRvJyckYNWqUzeW+erzxZG4AKC4uRnh4OHbu3ImePXvimWeeQU1NjRzRPZo9JSUF8fHx2LhxI3766SfpWHP58mVZsgOen3vAs8cbT+U3P960bdsW7dq1w+3bt90+3ij6t/8aYt68edi3bx+OHTuGBx54QGoPDw9HTU0Nbt++bVFJl5eXIzw83O31REZGAgD69u2L2tpapKSk4OWXX8bQoUMtPgEUFhYGrVYLf3//ek/S5us2z33t2jXF5DZ5/fXXcfz4cZw8eRL3338/2rdvj127dikiv/U+0759ewC/nlL29eym/J9++in8/f3xhz/8QWpXwtzHxcUhISEBu3btwv79+xEdHY3w8HA8+OCDXs3vKtO+89e//tXiSUCu7IcPH8aePXukAlAIgbq6OrRq1QqbNm3C1KlTffJ446ncpuwHDx7E8ePHERUVhSeeeALt27fHpUuXfD67Kb9p7kNDQwH8+pK0EubelN+TxxtP5o+Li8Ply5fx/fffo1WrVggJCZGON65qMWeqhBCYN28edu3aJb3Gbi4mJgZqtRqFhYVS28WLF3H9+nXExsY2at11dXUwGo2oq6tD69at0aNHD+kSFBQEjUaDmJgYi3XX1dWhsLAQI0aMUGRu07ptzbv49QMSMBgMPp3f3j5jesA++uijPpsdsJz77t2748knn0SnTp2k/r4899b5jxw5gujoaBw+fBi3bt3CE0884dX8zljvO6aCzUSu7Hq9HiUlJdJl+fLlCAoKQklJCZ566imfPd7IndvR49V0vImIiPDZ7Kac1vlNx5rOnTv79Nxb5/fk8cZT+c117NgRISEhFscbl7n8lnaFmzt3rmjXrp04cuSIuHnzpnSprq6W+syZM0d06dJFHD58WJw5c0bExsaK2NhYi3G++eYbUVxcLH7/+9+Lnj17iuLiYlFcXCwMBoMQQoiPP/5Y7NixQ5SWlorLly+LHTt2iIiICDFt2jSH+bZv3y60Wq3YunWrKC0tFSkpKSIkJERMnz5dyv3VV18JnU4n3nnnHQFAHDt2TBQXF4sXX3zR53KXlZUJIYSYNm2aCAgIEO+//7744osvxJ49e8T48eNF+/btpY+y+uK8l5WViblz54qgoCAxY8YMUVBQIE6fPi22bt0qunXrJkaNGuXT2YX4zz7/8ccfCwAiJydHEfu8ef7WrVuL7OxsodfrxXvvvSdCQkLE/PnzfSL/zz//LI0FQKxZs0YUFxeLa9euSXO/Z88eodPpxF/+8hcBQHz00UeiuLhY3Lx5U5bs1lz9JJS3jzdy5zZ/vM6aNUsUFBRIx5uJEyeKDh06iPLycp+cc9P+npycLB0rTceaLl26iJEjR0pj+HL+pjreeCq/EEJs3rxZ6PV6cenSJfGXv/xFdOjQQaSlpTkd21yLKarwfx8Ltr5s2bJF6vPLL7+I//f//p9o3769CAwMFE899ZS4efOmxTiPP/64zXGuXLkihPj1jhsyZIho27ataNOmjejbt6948803xS+//OI043vvvSe6dOkiNBqNGDZsmDh16pTd3OaXTZs2+VxuZ/P+5ptv+vS8O8qekJAgfezWV7M7yu/r+7yz/Js3b/aJ/H//+99tjjt9+nSnj9lly5bJkt2aq08w9ua+qY43cucWwv7+MmLECHHhwgUhhDz7iyeyO8q/YcMGqY8S88t9vPFUfiGEWLx4sQgLCxNqtVo89NBDYvXq1aKurs6lsU1UQggBIiIiImqUFvOeKiIiIiJPYlFFREREJAMWVUREREQyYFFFREREJAMWVUREREQyYFFFREREJAMWVUREREQyYFFFREREJAMWVUREREQyYFFFREREJAMWVUREREQyYFFFREREJIP/H6/lA+KJfyx/AAAAAElFTkSuQmCC\n",
491
+ "text/plain": [
492
+ "<Figure size 640x480 with 1 Axes>"
493
+ ]
494
+ },
495
+ "metadata": {},
496
+ "output_type": "display_data"
497
+ }
498
+ ],
499
+ "source": [
500
+ "df.date.hist(bins=400)"
501
+ ]
502
+ },
503
+ {
504
+ "cell_type": "code",
505
+ "execution_count": 26,
506
+ "id": "19d6539b",
507
+ "metadata": {},
508
+ "outputs": [],
509
+ "source": [
510
+ "new_df = df.drop_duplicates(subset=['id'], keep=\"first\")"
511
+ ]
512
+ },
513
+ {
514
+ "cell_type": "code",
515
+ "execution_count": null,
516
+ "id": "466cd2c7",
517
+ "metadata": {},
518
+ "outputs": [],
519
+ "source": [
520
+ "new_df.date.hist(bins-)"
521
+ ]
522
+ }
523
+ ],
524
+ "metadata": {
525
+ "kernelspec": {
526
+ "display_name": "Python 3 (ipykernel)",
527
+ "language": "python",
528
+ "name": "python3"
529
+ },
530
+ "language_info": {
531
+ "codemirror_mode": {
532
+ "name": "ipython",
533
+ "version": 3
534
+ },
535
+ "file_extension": ".py",
536
+ "mimetype": "text/x-python",
537
+ "name": "python",
538
+ "nbconvert_exporter": "python",
539
+ "pygments_lexer": "ipython3",
540
+ "version": "3.10.8"
541
+ }
542
+ },
543
+ "nbformat": 4,
544
+ "nbformat_minor": 5
545
+ }
utilities/pushshift_data.py CHANGED
@@ -148,8 +148,9 @@ def submissions_to_dataframe(submissions: List[Dict[str, Any]]) -> pd.DataFrame:
148
  df = df.convert_dtypes()
149
  df = df[cols]
150
  # Convert the "created_utc" column to a datetime column with timezone information
151
- df['created_utc'] = pd.to_datetime(df['created_utc'], unit='s').dt.tz_localize('UTC').dt.strftime(
152
- '%Y-%m-%d %H:%M:%S')
 
153
  return df
154
 
155
 
 
148
  df = df.convert_dtypes()
149
  df = df[cols]
150
  # Convert the "created_utc" column to a datetime column with timezone information
151
+ df['created_utc'] = pd.to_datetime(df['created_utc'], unit='s').dt.tz_localize('UTC')
152
+ df['date'] = df['created_utc'].dt.date
153
+ df['time'] = df['created_utc'].dt.time
154
  return df
155
 
156