diff --git "a/notebooks/data_processing.ipynb" "b/notebooks/data_processing.ipynb" new file mode 100644--- /dev/null +++ "b/notebooks/data_processing.ipynb" @@ -0,0 +1,1444 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "553ac4eb-285f-494e-b477-eecfe9a13d72", + "metadata": {}, + "source": [ + "# Build Historical Data\n", + "\n", + "My data strategy is to get as much as I can from the torrent below, and add as much as I can with PRAW. I can only get 1000 submissions so Ill sort by new and by top. This is tricky since Ill probably have some dates missing, but its the best I can think of. \n", + "\n", + "Pushshift is dead, and thats the only other major solution I know of. " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "5debf5fe-4556-4963-a3c5-302ce4857d74", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import json\n", + "from pathlib import Path\n", + "from typing import List, Dict\n", + "\n", + "from datasets import Dataset\n", + "import pandas as pd\n", + "from tqdm.autonotebook import tqdm\n", + "\n", + "proj_dir_path = Path().cwd().parent\n", + "proj_dir = str(proj_dir_path)" + ] + }, + { + "cell_type": "markdown", + "id": "52ee0cba-1ea5-4e24-bc6b-b97dd7f89134", + "metadata": {}, + "source": [ + "## Get Torrent Data\n", + "\n", + "Download the subreddit data and put it in [data/torrent](data/torrent) as described here: https://www.reddit.com/r/pushshift/comments/11ef9if/separate_dump_files_for_the_top_20k_subreddits/\n", + "\n", + "Install zstd, or decompress in the best way you can find. Its a little tricky. Here, Im putting the `ndsjon` in [data/processed](data/processed)." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "83393808-04a7-4f43-95e6-efe922d3fd00", + "metadata": {}, + "outputs": [], + "source": [ + "!zstd -f -d \"$proj_dir\"/data/torrents/BestofRedditorUpdates_submissions.zst -o \"$proj_dir\"/data/torrents/BestofRedditorUpdates_submissions.ndjson > /dev/null 2>&1" + ] + }, + { + "cell_type": "markdown", + "id": "93413e24-b2dd-4e55-8c48-57e3e8244460", + "metadata": {}, + "source": [ + "Use `ls` to make sure it worked." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "505af3b4-94d3-44b5-9c7f-c886b553980f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r--@ 1 derekthomas staff 76007896 Oct 20 10:48 /Users/derekthomas/projects/spaces/dataset-creator-reddit/data/torrents/BestofRedditorUpdates_submissions.ndjson\n" + ] + } + ], + "source": [ + "%ls -alF \"$proj_dir\"/data/torrents/BestofRedditorUpdates_submissions.ndjson" + ] + }, + { + "cell_type": "markdown", + "id": "7d158d41-c8b9-4746-859c-fc09bd25c8bb", + "metadata": {}, + "source": [ + "## Read from torrent file" + ] + }, + { + "cell_type": "markdown", + "id": "6e837989-ae3d-4e49-a23b-310e765ab678", + "metadata": {}, + "source": [ + "Read in our ndjson line by line." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9b9f4cfa-bf1f-4c1e-92db-977f5ef7b74b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "data = []\n", + "with open(proj_dir + '/data/torrents/BestofRedditorUpdates_submissions.ndjson', 'r') as file:\n", + " for line in file:\n", + " item = json.loads(line)\n", + " data.append(item)" + ] + }, + { + "cell_type": "markdown", + "id": "a420607a-26dd-41c2-8ca8-5307670d9f5f", + "metadata": {}, + "source": [ + "We need to choose which keys to keep." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "cd1ae551-5e72-4968-9640-d9580473f2e3", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'all_awardings': [],\n", + " 'allow_live_comments': False,\n", + " 'archived': False,\n", + " 'author': '[deleted]',\n", + " 'author_flair_background_color': '',\n", + " 'author_flair_css_class': None,\n", + " 'author_flair_template_id': None,\n", + " 'author_flair_text': None,\n", + " 'author_flair_text_color': 'dark',\n", + " 'awarders': [],\n", + " 'can_gild': False,\n", + " 'can_mod_post': False,\n", + " 'category': None,\n", + " 'content_categories': None,\n", + " 'contest_mode': False,\n", + " 'created_utc': 1580499608,\n", + " 'crosspost_parent': 't3_etguwh',\n", + " 'crosspost_parent_list': [{'all_awardings': [{'award_sub_type': 'GLOBAL',\n", + " 'award_type': 'global',\n", + " 'coin_price': 1800,\n", + " 'coin_reward': 0,\n", + " 'count': 4,\n", + " 'days_of_drip_extension': 31,\n", + " 'days_of_premium': 31,\n", + " 'description': 'Gives the author a month of Reddit Premium, which includes %{coin_symbol}700 Coins for that month, and shows a Platinum Award.',\n", + " 'end_date': None,\n", + " 'giver_coin_reward': None,\n", + " 'icon_format': None,\n", + " 'icon_height': 512,\n", + " 'icon_url': 'https://www.redditstatic.com/gold/awards/icon/platinum_512.png',\n", + " 'icon_width': 512,\n", + " 'id': 'gid_3',\n", + " 'is_enabled': True,\n", + " 'is_new': False,\n", + " 'name': 'Platinum',\n", + " 'penny_donate': None,\n", + " 'penny_price': None,\n", + " 'resized_icons': [{'height': 16,\n", + " 'url': 'https://www.redditstatic.com/gold/awards/icon/platinum_16.png',\n", + " 'width': 16},\n", + " {'height': 32,\n", + " 'url': 'https://www.redditstatic.com/gold/awards/icon/platinum_32.png',\n", + " 'width': 32},\n", + " {'height': 48,\n", + " 'url': 'https://www.redditstatic.com/gold/awards/icon/platinum_48.png',\n", + " 'width': 48},\n", + " {'height': 64,\n", + " 'url': 'https://www.redditstatic.com/gold/awards/icon/platinum_64.png',\n", + " 'width': 64},\n", + " {'height': 128,\n", + " 'url': 'https://www.redditstatic.com/gold/awards/icon/platinum_128.png',\n", + " 'width': 128}],\n", + " 'start_date': None,\n", + " 'subreddit_coin_reward': 0,\n", + " 'subreddit_id': None},\n", + " {'award_sub_type': 'GLOBAL',\n", + " 'award_type': 'global',\n", + " 'coin_price': 500,\n", + " 'coin_reward': 100,\n", + " 'count': 6,\n", + " 'days_of_drip_extension': 0,\n", + " 'days_of_premium': 7,\n", + " 'description': 'Gives the author a week of Reddit Premium, %{coin_symbol}100 Coins to do with as they please, and shows a Gold Award.',\n", + " 'end_date': None,\n", + " 'giver_coin_reward': None,\n", + " 'icon_format': None,\n", + " 'icon_height': 512,\n", + " 'icon_url': 'https://www.redditstatic.com/gold/awards/icon/gold_512.png',\n", + " 'icon_width': 512,\n", + " 'id': 'gid_2',\n", + " 'is_enabled': True,\n", + " 'is_new': False,\n", + " 'name': 'Gold',\n", + " 'penny_donate': None,\n", + " 'penny_price': None,\n", + " 'resized_icons': [{'height': 16,\n", + " 'url': 'https://www.redditstatic.com/gold/awards/icon/gold_16.png',\n", + " 'width': 16},\n", + " {'height': 32,\n", + " 'url': 'https://www.redditstatic.com/gold/awards/icon/gold_32.png',\n", + " 'width': 32},\n", + " {'height': 48,\n", + " 'url': 'https://www.redditstatic.com/gold/awards/icon/gold_48.png',\n", + " 'width': 48},\n", + " {'height': 64,\n", + " 'url': 'https://www.redditstatic.com/gold/awards/icon/gold_64.png',\n", + " 'width': 64},\n", + " {'height': 128,\n", + " 'url': 'https://www.redditstatic.com/gold/awards/icon/gold_128.png',\n", + " 'width': 128}],\n", + " 'start_date': None,\n", + " 'subreddit_coin_reward': 0,\n", + " 'subreddit_id': None},\n", + " {'award_sub_type': 'GLOBAL',\n", + " 'award_type': 'global',\n", + " 'coin_price': 100,\n", + " 'coin_reward': 0,\n", + " 'count': 32,\n", + " 'days_of_drip_extension': 0,\n", + " 'days_of_premium': 0,\n", + " 'description': \"Shows the Silver Award... and that's it.\",\n", + " 'end_date': None,\n", + " 'giver_coin_reward': None,\n", + " 'icon_format': None,\n", + " 'icon_height': 512,\n", + " 'icon_url': 'https://www.redditstatic.com/gold/awards/icon/silver_512.png',\n", + " 'icon_width': 512,\n", + " 'id': 'gid_1',\n", + " 'is_enabled': True,\n", + " 'is_new': False,\n", + " 'name': 'Silver',\n", + " 'penny_donate': None,\n", + " 'penny_price': None,\n", + " 'resized_icons': [{'height': 16,\n", + " 'url': 'https://www.redditstatic.com/gold/awards/icon/silver_16.png',\n", + " 'width': 16},\n", + " {'height': 32,\n", + " 'url': 'https://www.redditstatic.com/gold/awards/icon/silver_32.png',\n", + " 'width': 32},\n", + " {'height': 48,\n", + " 'url': 'https://www.redditstatic.com/gold/awards/icon/silver_48.png',\n", + " 'width': 48},\n", + " {'height': 64,\n", + " 'url': 'https://www.redditstatic.com/gold/awards/icon/silver_64.png',\n", + " 'width': 64},\n", + " {'height': 128,\n", + " 'url': 'https://www.redditstatic.com/gold/awards/icon/silver_128.png',\n", + " 'width': 128}],\n", + " 'start_date': None,\n", + " 'subreddit_coin_reward': 0,\n", + " 'subreddit_id': None}],\n", + " 'allow_live_comments': True,\n", + " 'approved_at_utc': None,\n", + " 'approved_by': None,\n", + " 'archived': False,\n", + " 'author': 'AmINotTheAsshole',\n", + " 'author_flair_background_color': None,\n", + " 'author_flair_css_class': None,\n", + " 'author_flair_richtext': [],\n", + " 'author_flair_template_id': None,\n", + " 'author_flair_text': None,\n", + " 'author_flair_text_color': None,\n", + " 'author_flair_type': 'text',\n", + " 'author_fullname': 't2_5br24m60',\n", + " 'author_patreon_flair': False,\n", + " 'author_premium': True,\n", + " 'awarders': [],\n", + " 'banned_at_utc': None,\n", + " 'banned_by': None,\n", + " 'can_gild': True,\n", + " 'can_mod_post': False,\n", + " 'category': None,\n", + " 'clicked': False,\n", + " 'content_categories': None,\n", + " 'contest_mode': False,\n", + " 'created': 1579929833.0,\n", + " 'created_utc': 1579901033.0,\n", + " 'discussion_type': None,\n", + " 'distinguished': None,\n", + " 'domain': 'self.AmItheAsshole',\n", + " 'downs': 0,\n", + " 'edited': 1579901489.0,\n", + " 'gilded': 6,\n", + " 'gildings': {'gid_1': 32, 'gid_2': 6, 'gid_3': 4},\n", + " 'hidden': False,\n", + " 'hide_score': False,\n", + " 'id': 'etguwh',\n", + " 'is_crosspostable': True,\n", + " 'is_meta': False,\n", + " 'is_original_content': False,\n", + " 'is_reddit_media_domain': False,\n", + " 'is_robot_indexable': True,\n", + " 'is_self': True,\n", + " 'is_video': False,\n", + " 'likes': None,\n", + " 'link_flair_background_color': '#ffd635',\n", + " 'link_flair_css_class': None,\n", + " 'link_flair_richtext': [],\n", + " 'link_flair_template_id': 'ca4006b8-f14a-11e9-9b18-0e179a5854dc',\n", + " 'link_flair_text': 'UPDATE',\n", + " 'link_flair_text_color': 'dark',\n", + " 'link_flair_type': 'text',\n", + " 'locked': True,\n", + " 'media': None,\n", + " 'media_embed': {},\n", + " 'media_only': False,\n", + " 'mod_note': None,\n", + " 'mod_reason_by': None,\n", + " 'mod_reason_title': None,\n", + " 'mod_reports': [],\n", + " 'name': 't3_etguwh',\n", + " 'no_follow': False,\n", + " 'num_comments': 713,\n", + " 'num_crossposts': 1,\n", + " 'num_reports': None,\n", + " 'over_18': False,\n", + " 'parent_whitelist_status': 'all_ads',\n", + " 'permalink': '/r/AmItheAsshole/comments/etguwh/update_aita_for_letting_my_brother_call_me_dad/',\n", + " 'pinned': False,\n", + " 'pwls': 6,\n", + " 'quarantine': False,\n", + " 'removal_reason': None,\n", + " 'removed_by': None,\n", + " 'removed_by_category': None,\n", + " 'report_reasons': None,\n", + " 'saved': False,\n", + " 'score': 37507,\n", + " 'secure_media': None,\n", + " 'secure_media_embed': {},\n", + " 'selftext': 'Original post: https://www.reddit.com/r/AmItheAsshole/comments/ehmsme/aita_for_letting_my_brother_call_me_dad_and/?utm_medium=android_app&utm_source=share\\n\\nHey guys! So many people wanted me to update on my previous post and wanted me to seek professional advice first before I take matters into my own hands. Apologies if it is long. I\\'ll try to make it as detailed as possible while making this short.\\n\\nI went to my local therapist and told him about my situation and asked what to do. To keep it short, he said he\\'s heard similar recounts from before and said it is best if I tell him as soon as possible for multiple reasons and to **make sure that my bio-children are present** (multiple reasons). I asked a few of my closest friends and the majority said more or less the same thing.\\n\\nMy wife and I decided to sit the kids down and burst the big bubble. I asked my brother Josh to come closer and I made sure I held him close and make him feel comfortable. He asked \"What\\'s going on?\" but I started by telling us how much we cared and loved for him, then told him everything about my parents (I put them in a bright light in hopes of a reunion) and who I am to him, then quickly hugged him and my other two kids together and told him that I love all my children the same and NOTHING is going to change my love for him. He was shocked and asked if I was joking, but I was starting to cry a little at this point, so he knew I was serious. My bio-children were very surprised too. He was in tears and asked me why I didn\\'t tell him sooner. I didn\\'t know what to say and said \"I was just trying to protect you, I\\'m sorry and I hope you can forgive me\", but unfortunately and understandably, he left.\\n\\nHe didn\\'t talk to me as much, again, understandably so. I continuously offered him to go out to the park and play a bit of football (he loves that) and all his favourite things, but he just outright declined and even got a little angry sometimes for me even talking to him. I thought I messed up big-time, until one day while my wife and two children were out doing shopping and we were alone, he came up to me and said \"I know you\\'re not my real father, but I want to let you know you\\'re the best dad in the whole world. Sorry for before.\" I hugged him and things got pretty emotional. It would be a big lie to say my house is normal now (far from that), but things are slowly - ever so slowly - starting to brighten up. There\\'s no longer anything to hide anymore and it feels like we are born again.\\n\\nJosh is a tough kid, and he handled this far better than I believed he would. I\\'ll be looking into therapy for him to help him recover just incase it doesn\\'t go well in the long run. I\\'ll strive and continue to be a great dad to my kids, and a great dad to my brother/son. Thank you Reddit for pushing me towards this happy ending. Thank you for all the advice and judgements I got (excluding the rude ones about my uncles and aunts -- eeek!) I love you all. Good night.',\n", + " 'selftext_html': '<!-- SC_OFF --><div class=\"md\"><p>Original post: <a href=\"https://www.reddit.com/r/AmItheAsshole/comments/ehmsme/aita_for_letting_my_brother_call_me_dad_and/?utm_medium=android_app&amp;utm_source=share\">https://www.reddit.com/r/AmItheAsshole/comments/ehmsme/aita_for_letting_my_brother_call_me_dad_and/?utm_medium=android_app&amp;utm_source=share</a></p>\\n\\n<p>Hey guys! So many people wanted me to update on my previous post and wanted me to seek professional advice first before I take matters into my own hands. Apologies if it is long. I&#39;ll try to make it as detailed as possible while making this short.</p>\\n\\n<p>I went to my local therapist and told him about my situation and asked what to do. To keep it short, he said he&#39;s heard similar recounts from before and said it is best if I tell him as soon as possible for multiple reasons and to <strong>make sure that my bio-children are present</strong> (multiple reasons). I asked a few of my closest friends and the majority said more or less the same thing.</p>\\n\\n<p>My wife and I decided to sit the kids down and burst the big bubble. I asked my brother Josh to come closer and I made sure I held him close and make him feel comfortable. He asked &quot;What&#39;s going on?&quot; but I started by telling us how much we cared and loved for him, then told him everything about my parents (I put them in a bright light in hopes of a reunion) and who I am to him, then quickly hugged him and my other two kids together and told him that I love all my children the same and NOTHING is going to change my love for him. He was shocked and asked if I was joking, but I was starting to cry a little at this point, so he knew I was serious. My bio-children were very surprised too. He was in tears and asked me why I didn&#39;t tell him sooner. I didn&#39;t know what to say and said &quot;I was just trying to protect you, I&#39;m sorry and I hope you can forgive me&quot;, but unfortunately and understandably, he left.</p>\\n\\n<p>He didn&#39;t talk to me as much, again, understandably so. I continuously offered him to go out to the park and play a bit of football (he loves that) and all his favourite things, but he just outright declined and even got a little angry sometimes for me even talking to him. I thought I messed up big-time, until one day while my wife and two children were out doing shopping and we were alone, he came up to me and said &quot;I know you&#39;re not my real father, but I want to let you know you&#39;re the best dad in the whole world. Sorry for before.&quot; I hugged him and things got pretty emotional. It would be a big lie to say my house is normal now (far from that), but things are slowly - ever so slowly - starting to brighten up. There&#39;s no longer anything to hide anymore and it feels like we are born again.</p>\\n\\n<p>Josh is a tough kid, and he handled this far better than I believed he would. I&#39;ll be looking into therapy for him to help him recover just incase it doesn&#39;t go well in the long run. I&#39;ll strive and continue to be a great dad to my kids, and a great dad to my brother/son. Thank you Reddit for pushing me towards this happy ending. Thank you for all the advice and judgements I got (excluding the rude ones about my uncles and aunts -- eeek!) I love you all. Good night.</p>\\n</div><!-- SC_ON -->',\n", + " 'send_replies': True,\n", + " 'spoiler': False,\n", + " 'stickied': False,\n", + " 'subreddit': 'AmItheAsshole',\n", + " 'subreddit_id': 't5_2xhvq',\n", + " 'subreddit_name_prefixed': 'r/AmItheAsshole',\n", + " 'subreddit_subscribers': 1884468,\n", + " 'subreddit_type': 'public',\n", + " 'suggested_sort': None,\n", + " 'thumbnail': 'self',\n", + " 'thumbnail_height': None,\n", + " 'thumbnail_width': None,\n", + " 'title': 'UPDATE: AITA for letting my brother call me \"dad\" and refusing to tell him the ugly truth?',\n", + " 'total_awards_received': 42,\n", + " 'treatment_tags': [],\n", + " 'ups': 37507,\n", + " 'url': 'https://www.reddit.com/r/AmItheAsshole/comments/etguwh/update_aita_for_letting_my_brother_call_me_dad/',\n", + " 'user_reports': [],\n", + " 'view_count': None,\n", + " 'visited': False,\n", + " 'whitelist_status': 'all_ads',\n", + " 'wls': 6}],\n", + " 'discussion_type': None,\n", + " 'distinguished': None,\n", + " 'domain': 'self.AmItheAsshole',\n", + " 'edited': False,\n", + " 'gilded': 0,\n", + " 'gildings': {},\n", + " 'hidden': False,\n", + " 'id': 'ewtopo',\n", + " 'is_crosspostable': False,\n", + " 'is_meta': False,\n", + " 'is_original_content': False,\n", + " 'is_reddit_media_domain': False,\n", + " 'is_robot_indexable': False,\n", + " 'is_self': False,\n", + " 'is_video': False,\n", + " 'link_flair_background_color': '',\n", + " 'link_flair_css_class': None,\n", + " 'link_flair_richtext': [],\n", + " 'link_flair_text': None,\n", + " 'link_flair_text_color': 'dark',\n", + " 'link_flair_type': 'text',\n", + " 'locked': False,\n", + " 'media': None,\n", + " 'media_embed': {},\n", + " 'media_only': False,\n", + " 'no_follow': True,\n", + " 'num_comments': 0,\n", + " 'num_crossposts': 0,\n", + " 'over_18': False,\n", + " 'parent_whitelist_status': None,\n", + " 'permalink': '/r/BestofRedditorUpdates/comments/ewtopo/update_aita_for_letting_my_brother_call_me_dad/',\n", + " 'pinned': False,\n", + " 'pwls': None,\n", + " 'quarantine': False,\n", + " 'removal_reason': None,\n", + " 'removed_by': None,\n", + " 'removed_by_category': 'deleted',\n", + " 'retrieved_on': 1587263887,\n", + " 'score': 1,\n", + " 'secure_media': None,\n", + " 'secure_media_embed': {},\n", + " 'selftext': '[deleted]',\n", + " 'send_replies': False,\n", + " 'spoiler': False,\n", + " 'stickied': False,\n", + " 'subreddit': 'BestofRedditorUpdates',\n", + " 'subreddit_id': 't5_2ea6kj',\n", + " 'subreddit_name_prefixed': 'r/BestofRedditorUpdates',\n", + " 'subreddit_subscribers': 1519,\n", + " 'subreddit_type': 'public',\n", + " 'suggested_sort': None,\n", + " 'thumbnail': 'default',\n", + " 'thumbnail_height': None,\n", + " 'thumbnail_width': None,\n", + " 'title': 'UPDATE: AITA for letting my brother call me \"dad\" and refusing to tell him the ugly truth?',\n", + " 'total_awards_received': 0,\n", + " 'treatment_tags': [],\n", + " 'url': '/r/AmItheAsshole/comments/etguwh/update_aita_for_letting_my_brother_call_me_dad/',\n", + " 'whitelist_status': None,\n", + " 'wls': None}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "92677aaa-38ee-4b19-84b2-c16e3b076f5e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "keys_to_keep = ['created_utc', 'title', 'score', 'flair', 'link_flair_text', 'selftext', 'author', 'permalink', 'selftext']\n", + "filtered_data = [{k: d[k] for k in keys_to_keep if k in d} for d in data]" + ] + }, + { + "cell_type": "markdown", + "id": "5ef7768e-d74b-4291-8009-5e37731e1d7d", + "metadata": {}, + "source": [ + "Verify our filtered_data looks correct." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "2f3ea667-0203-4231-82a0-054b22e41be2", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'created_utc': 1580499608,\n", + " 'title': 'UPDATE: AITA for letting my brother call me \"dad\" and refusing to tell him the ugly truth?',\n", + " 'score': 1,\n", + " 'link_flair_text': None,\n", + " 'selftext': '[deleted]',\n", + " 'author': '[deleted]',\n", + " 'permalink': '/r/BestofRedditorUpdates/comments/ewtopo/update_aita_for_letting_my_brother_call_me_dad/'}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filtered_data[0]" + ] + }, + { + "cell_type": "markdown", + "id": "4df81203-a94e-48ed-801c-0a11e9a4604d", + "metadata": {}, + "source": [ + "Put it in a dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "0d7e6d3a-5430-4efd-b19a-e861aba9d67a", + "metadata": {}, + "outputs": [], + "source": [ + "key_conversion = {\n", + " 'submission_content': 'content',\n", + " 'selftext': 'content',\n", + " 'author': 'poster',\n", + " 'date': 'date_utc',\n", + " 'created_utc': 'date_utc',\n", + " 'link_flair_text': 'flair',\n", + " 'upvotes': 'score',\n", + " 'link': 'permalink'\n", + " # 'poster_link': (No direct match)\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "76778131-1b0a-4863-8319-49283d871964", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
date_utctitlescoreflaircontentposterpermalinkid
02020-01-31 19:40:08UPDATE: AITA for letting my brother call me \"d...1None[deleted][deleted]/r/BestofRedditorUpdates/comments/ewtopo/updat...ewtopo
12020-01-31 19:46:30Younger brother thinks older brother is his da...7AITAOriginal post: https://www.reddit.com/r/AmIthe...register2014/r/BestofRedditorUpdates/comments/ewts8z/young...ewts8z
22020-01-31 19:53:11Stressed Father who wanted to cancel Christmas...9AITA[Original: AITA if I \"cancel\" Christmas becaus...register2014/r/BestofRedditorUpdates/comments/ewtvnr/stres...ewtvnr
32020-01-31 20:37:14Man refuses to take girlfriend out to dinner b...23AITA**Original Post:** https://www.reddit.com/r/Am...register2014/r/BestofRedditorUpdates/comments/ewuj3d/man_r...ewuj3d
42020-01-31 20:41:16He pesters neighbor to cook for him for $515AITA[Original] (https://www.reddit.com/r/AmItheAss...register2014/r/BestofRedditorUpdates/comments/ewulam/he_pe...ewulam
...........................
83882022-12-31 15:56:56AITA for making my daughter share her presents...1CONCLUDED[removed]anonziee/r/BestofRedditorUpdates/comments/zzx36a/aita_...zzx36a
83892022-12-31 18:16:22To All BORU contributors, Thank you :)1CONCLUDED[removed]IsItAcOnSeQuEnCe/r/BestofRedditorUpdates/comments/10004zw/to_a...10004zw
83902022-12-31 19:52:50Car was mistakenly towed from my own parking s...5743INCONCLUSIVE**I am not the original poster. Originally pos...Celany/r/BestofRedditorUpdates/comments/10025jy/car_...10025jy
83912022-12-31 21:47:44My (29F) husband (30M) has been going out with...1CONCLUDED[removed][deleted]/r/BestofRedditorUpdates/comments/1004j93/my_2...1004j93
83922022-12-31 23:04:03AITA for reporting my best friend to the HR be...1CONCLUDED[removed][deleted]/r/BestofRedditorUpdates/comments/10061th/aita...10061th
\n", + "

8393 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " date_utc title \\\n", + "0 2020-01-31 19:40:08 UPDATE: AITA for letting my brother call me \"d... \n", + "1 2020-01-31 19:46:30 Younger brother thinks older brother is his da... \n", + "2 2020-01-31 19:53:11 Stressed Father who wanted to cancel Christmas... \n", + "3 2020-01-31 20:37:14 Man refuses to take girlfriend out to dinner b... \n", + "4 2020-01-31 20:41:16 He pesters neighbor to cook for him for $5 \n", + "... ... ... \n", + "8388 2022-12-31 15:56:56 AITA for making my daughter share her presents... \n", + "8389 2022-12-31 18:16:22 To All BORU contributors, Thank you :) \n", + "8390 2022-12-31 19:52:50 Car was mistakenly towed from my own parking s... \n", + "8391 2022-12-31 21:47:44 My (29F) husband (30M) has been going out with... \n", + "8392 2022-12-31 23:04:03 AITA for reporting my best friend to the HR be... \n", + "\n", + " score flair content \\\n", + "0 1 None [deleted] \n", + "1 7 AITA Original post: https://www.reddit.com/r/AmIthe... \n", + "2 9 AITA [Original: AITA if I \"cancel\" Christmas becaus... \n", + "3 23 AITA **Original Post:** https://www.reddit.com/r/Am... \n", + "4 15 AITA [Original] (https://www.reddit.com/r/AmItheAss... \n", + "... ... ... ... \n", + "8388 1 CONCLUDED [removed] \n", + "8389 1 CONCLUDED [removed] \n", + "8390 5743 INCONCLUSIVE **I am not the original poster. Originally pos... \n", + "8391 1 CONCLUDED [removed] \n", + "8392 1 CONCLUDED [removed] \n", + "\n", + " poster permalink \\\n", + "0 [deleted] /r/BestofRedditorUpdates/comments/ewtopo/updat... \n", + "1 register2014 /r/BestofRedditorUpdates/comments/ewts8z/young... \n", + "2 register2014 /r/BestofRedditorUpdates/comments/ewtvnr/stres... \n", + "3 register2014 /r/BestofRedditorUpdates/comments/ewuj3d/man_r... \n", + "4 register2014 /r/BestofRedditorUpdates/comments/ewulam/he_pe... \n", + "... ... ... \n", + "8388 anonziee /r/BestofRedditorUpdates/comments/zzx36a/aita_... \n", + "8389 IsItAcOnSeQuEnCe /r/BestofRedditorUpdates/comments/10004zw/to_a... \n", + "8390 Celany /r/BestofRedditorUpdates/comments/10025jy/car_... \n", + "8391 [deleted] /r/BestofRedditorUpdates/comments/1004j93/my_2... \n", + "8392 [deleted] /r/BestofRedditorUpdates/comments/10061th/aita... \n", + "\n", + " id \n", + "0 ewtopo \n", + "1 ewts8z \n", + "2 ewtvnr \n", + "3 ewuj3d \n", + "4 ewulam \n", + "... ... \n", + "8388 zzx36a \n", + "8389 10004zw \n", + "8390 10025jy \n", + "8391 1004j93 \n", + "8392 10061th \n", + "\n", + "[8393 rows x 8 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "torrent_df = pd.DataFrame(filtered_data)\n", + "torrent_df['created_utc'] = pd.to_datetime(torrent_df['created_utc'], unit='s')\n", + "torrent_df = torrent_df.rename(columns=key_conversion)\n", + "torrent_df['id'] = torrent_df.permalink.str.split('/').str[4]\n", + "\n", + "torrent_df" + ] + }, + { + "cell_type": "markdown", + "id": "5796777c-d75f-416e-91ed-13253134aae3", + "metadata": {}, + "source": [ + "## Read PRAW Downloads\n", + "\n", + "Basically I downloaded 1000 posts from praw in 2 ways, the 1000 most recent from `new` and the 1000 `top` posts. I needlessly saved it to file, but here is me adding that to the data I already have." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "9dc1ae61-4320-4053-bee7-189de46b103b", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "9db2bf5ee3b745cf9828a4399cfc49ca", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "submissions = []\n", + "for submission_path in tqdm((proj_dir_path/'data'/'BestofRedditorUpdates').glob('*/*.json')):\n", + " date, post_id = submission_path.stem.split('_')\n", + " with open(submission_path) as file:\n", + " submission = json.load(file)\n", + " submissions.append(submission)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "d5ed21a3-43f0-4cea-9e8d-656fc85b096a", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'submission_content': '**I am NOT the Original Poster. That is** u/throwaway970012390. He posted in r/AmItheAsshole, r/Advice, r/TrueOffMyChest and his own profile.\\n\\nThis is a **long, dark post.** Please read the trigger warnings and mood spoiler.\\n\\n**Trigger Warning:** >!drug use; suicide attempt; addiction; misandry; infidelity!<\\n\\n**Mood Spoiler:** >!depressing and frustrating!<\\n\\n**Original** [Post](https://www.reddit.com/r/AmItheAsshole/comments/12towpj/aita_38m_for_cutting_back_on_work_to_prove_a/)**: April 20, 2023**\\n\\nMy wife is usually an angel of a woman, but has recently gotten into a friendship with a woman whom I personally believe is a bad influence on her, not in a patronizing way, more of a \"lay down with dogs, get up with fleas\" type of situation.\\n\\nI never said anything about her childishness or her very radical misandry, because frankly it doesn\\'t effect me.\\n\\nUntil it did. A few months ago my wife began pressuring me to do more around the house. Before I get an instant YTA. We already split chores and child care, admittedly, she had a bigger cut than I because she is a SAHM, but I do most of the cooking, breakfasts and Dinners, Lunch is her responsibility for her and the boys. I take out the garbage and I do laundry, and I deep clean the bathrooms once a week. I do also help with our boys homework and such.\\n\\nShe insists that I am not doing enough and that I should be doing more around the house. I tried having discussions with her asking what she expected from me (namely all chores and child rearing should be my duty it seems) and for months it seemed to be going no where. She used the D word more than once when speaking on this which felt manipulative.\\n\\nIt boiled over when we were out with friends one night, and she began talking about how I never helped out and how I use her as a house slave (her words). I will admit I saw red.\\n\\nThis next part is where I may be the asshole. I didn\\'t say anything that night but the next day I asked my boss to be given reduced hours for the next little bit, due to stress.\\n\\nAnd I took over everything in the house. I cooked Breakfast, and made lunch for the boys before I drove them to school, I cleaned the house top to bottom, I did every dish we had twice and so on. My wife was blindingly happy, and bragged to her friend that she finally had me \"worn in\".\\n\\nShe Didn\\'t lift a finger for around a month. Then she began asking why we never went on dates anymore and Complaining that she wanted to get her nails done as they were growing in. I explained that I had to take that out of our budget so we could continue to afford everything else, but we could absolutely have a movie night in, and I could paint her nails for her. She was unhappy with that solution, So I asked her if she would want to get a part time job to pay for either luxury\\'s. You would have thought I asked if she wanted to join a cult.\\n\\nShe then asked if I could Just pick up more shifts at work to cover her other expenses, and used the phrase \"be a man\". Which I found more than a little insulting. I then asked her if she would be willing to go back to splitting the chores and such? Which is when she began to catch on that the two were related.\\n\\nShe yelled at me that I was being a manipulative asshole for doing this and even claimed it was financial \"a word\". I stood strong for a while but now I am questioning my methods, because even I feel what I did was a bit underhanded. so AITA?\\n\\n***Relevant Comments:***\\n\\n*Why are you still with her when she treats you like that?*\\n\\n\"I love her. With everything I got. She’s an excellent mother, and honestly before she met this friend we were both blissfully happy to the best of my knowledge.\"\\n\\n*How did she suddenly notice that she wasn\\'t getting her nails done or going out on dates? Did you block the credit card from everything but the grocery store?*\\n\\n\"No, for one she has her own card, though we do only have one bank account. I set her nail and hair appointments, because she hates making phone calls, and she asked why I didn\\'t take her out anymore. She could have spent from the card without saying anything I suppose but upon budgeting we would have been in trouble if 300+ was gone from a night out with friends.\"\\n\\n*More about his wife and their relationship:*\\n\\n\"She had always wanted to be a SAHM before we got together, I try not to say this part because while her two boys are not mine biologically, they are my sons, but being a single mother was incredibly taxing for her, because working in the public was too much. I had a bit of experience with being a single father myself, I have a son of my own, but I was looking to advance my career, and was more than happy to take over the bills for a lessened load at home.\"\\n\\n*You\\'re paying to raise another man\\'s children:*\\n\\n\"I do not like this comment. Those are my kids. No one else’s, and regardless of what happens with their mother ever, I hope those boys know that.\"\\n\\n*More about how things have changed:*\\n\\n\"Oh god, I can tell you but it may be a bit mundane. When we first got married, she would give me shoulder and back massages everyday after work, and have my favorite music playing when I opened the door, even though she hates bluegrass. She would make my coffee while I was getting dressed. She made sure to pick up extra crunchy peanut butter from the store even though I’m the only person who likes it. We would have movie night twice a week with the kids and a date night to ourselves once a week. I have always had trouble sleeping, and I don’t want to take pills for it, so she always had the bed ready for me, a heating pad already turned on, and my pajamas on the bed. She would run her fingers through my hair until I fell asleep, and would wake me up herself instead of the alarm because she knew it put me in a better mood. None of which I asked for. She’s a good wife and wanted to because she knew that that’s what I liked, and she did it. She hasn’t done a 180, some of this is still true, af least it was until I cut down my hours, that was really when she stopped doing anything at all. And right now she’s pissed so I’m on the couch. Awake and regretful. Personally I think she’s stubborn. I don’t think she even really wants it. She just wants to prove that I would do it if she asks. She has a troubled history with men, and that’s why I tend to be forgiving when things do happen.\"\\n\\n*Troubled history with men?*\\n\\n\"It’s not something I’m comfortable talking about. But believe me when I say. What happened, was not her fault.\"\\n\\n***OOP is voted NTA***\\n\\n**Update** [Post](https://www.reddit.com/r/Advice/comments/12znu7j/how_do_i_38m_explain_tohelp_my_sons_to_understand/)**: April 26, 2023 (6 days later)**\\n\\n**Title: How do I (38M) explain to/help my sons to understand their mother (30F) is going to Rehab.**\\n\\nI have never been in a situation like this. I am a former addict myself, but I didn\\'t have children then.\\n\\nSee my last post for more clarification, ***(editor\\'s note- I tried several different engines and search tactics, but couldn\\'t find any other \"last post\" besides the AITA one)*** but the gist of it is that my wife and I recently had a blowout argument where she admitted to using two substances for several months, and has agreed to get checked into rehab, which we are currently setting up now.\\n\\nHow the HELL do I bring this up to them, without them being judgmental or hateful to their mother? Or worse, falling into the same mental space I am in? I don\\'t want to lie to them, which is what my wife wants, but I am failing to see an alternative that won\\'t destroy them or the respect they have for their mother. I am swimming blind here, and I have barely slept since this all came out. Any and all advice is appreciated. Thank you in advance.\\n\\n***Relevant Comments:***\\n\\n*I think it depends. What was she taking?*\\n\\n\"klonopin and adipex that she was buying from her friend. She also admitted to having tried coke and several other prescription narcotics, but those were the only two she did often.\"\\n\\n*Was this the friend that turned her on to radical misandry?*\\n\\n\"Yes. Though it’s come to light that it wasn’t misandry she was being taught but flagrant drug usage. I have told her that she goes no contact with this friend or else she will be facing divorce along with everything else.\"\\n\\n*How old are the kids?*\\n\\n9, 12, and 14.\\n\\n**Update** [Post](https://www.reddit.com/user/throwaway970012390/comments/133xprp/i_yelled_at_my_wife/) **2: April 30, 2023 (10 days from OG post)**\\n\\n**Title: I Yelled at my wife**\\n\\nSee my profile for details. But I was driving my wife to the rehabilitation center we decided on. On the way she was screaming at me. About how she can’t believe I’m humiliating her like this (explaining what was happening to the boys, and making her message her dealer/friend that they would not be hanging out or using together anymore)\\n\\nAbout how she doesn’t want to go, and that I am a controlling monster, and how threatening her with divorce and taking primary custody of the boys was too far and I was insane, and I just took it, and took it and took it, until I just couldn’t.\\n\\nAnd I screamed at her. I screamed that the woman I met would have rather died than had a pillhead junkie around her sons, and how she disgusted me, and that I don’t know if she knew how much I was considering leaving her not because of the addiction but the way she was fucking acting, like she hadn’t brought drugs into our home. Around me, a former addict myself, and around OUR BOYS. That I am beginning to hate her for doing that. That she was becoming exactly what she always cried about her mother being, and that she was lucky I was here to see it before what happened to her happened to her goddamned sons.\\n\\nIt makes me sick to say but watching it sink in just how far she had spiraled felt good. Watching her realize that her actions have consequences was nice. She yelled a few more times, that I was an abusive asshole, or whatever, but she was still crying so I felt her heart wasn’t in it.\\n\\nI plan on speaking to a lawyer. I don’t want to divorce her, but I don’t know how healthy our relationship could possibly be after this. I know yelling like that was wrong, but I don’t feel bad. And that is the part that makes me think that maybe I shouldn’t be married to her anymore. For her sake and my own.\\n\\nI don’t know what else to do, and I’m so pissed that she detonated c-4 in every bit of our life.\\n\\n***Relevant Comments:***\\n\\n\"I believe I’m going to have to divorce her. And it’s. Wrecking me. I don’t want to. I still love her, but I don’t know if I trust myself around her, and also not to use myself. I have been closer to relapsing this week than I ever have been.\"\\n\\n**Update** [Post](https://www.reddit.com/user/throwaway970012390/comments/13fk2ll/she_was_cheating/) **3: May 12, 2023 (12 days from last post, 3 weeks from first post)**\\n\\n**Title: She was cheating**\\n\\nBefore you read, please know this is a vent post. I normally would never be like this but I am beyond okay and need to get this poison out of my head before I go anywhere else with it.\\n\\nShe was fucking cheating. The drug dealing friend sent me fucking videos of her dancing and grinding on this ugly hick looking bastard.\\n\\nI am goddamned destroyed. The boys are staying with my mother for a few days, and I’m taking the next week off work.\\n\\nI am so done. I have never been so angry in my goddamned life.\\n\\nShe was so goddamned smug sending it, “in case you don’t realize you’re replaceable to her.” well the free ride stops here. I hope she can get on Medicaid for her suboxone LMFAO. I\\'m done.\\n\\nI save the video immediately and I’m going to see a lawyer asap. I can’t tell anyone yet because I want to do this shit right.\\n\\nThankful as FUCK my parents insisted on a prenup with what I at the time thought was an inhumane cheating clause. Never been cheated on before and I feel like tearing my goddamned hair out. I genuinely never thought she would turn out to be such a scummy piece of shit. I can not handle this. I am not physically able to handle this. I haven\\'t been able to keep food down and I drank for the first time in over a decade last night.\\n\\nThen I woke up and had to pour the rest down the drain because I am about to spiral, and my boys don\\'t need both mom and dad in rehab right now. I am so close to losing my goddamned mind.\\n\\nAlso, believe what you want, but stop sending me private messages about how I should take down the posts or that posting about my personal relationship with my wife is wrong- please. Leave me be it will not work. This is the only place I can talk about this shit.\\n\\n**Latest Update** [Post](https://www.reddit.com/r/TrueOffMyChest/comments/153p4xk/my_soon_to_be_exwife_is_in_the_hospital_after_a/)**: July 19, 2023 (3 months from OG post)**\\n\\n**Title: My Soon to be ex-wife is in the hospital after a suicide attempt, and I feel like a monster.**\\n\\nYou can read my other posts for more context on what happened to get here, if you like, but the short of it is, I was blind to my wife’s addiction until she admitted it, and went to rehab, while she was in rehab, I was sent evidence that she had been cheating, often, and with more than one person.\\n\\nI have been working on filing for divorce, while she’s in rehab, not just for the cheating, but because with that on top of everything else, and myself nearly sinking back into my own addiction due to the stress of the situation, I couldn’t stand to even think of her anymore, and there’s no healthy relationship that has room for that mind set.\\n\\nI honestly didn’t want to be in a room with her again, to try mediation or counseling due to the fact that the last time I was alone with her I raised my voice, and at the time even felt she deserved it. (I of course now know that me doing that was terrible, and could be considered abuse, yet another reason I should not be in a relationship with this woman.)\\n\\nI moved all of her belongings to our guest room, minus the pills I found hidden in her beside table. I took pictures of those in their hiding spot then flushed them.\\n\\nI also removed her from my Bank account and credit cards.\\n\\nI spoke to my boys, explaining the situation without demonizing their mother to the best of my ability, and they seemed to understand I have no intention of abandoning them, and blood or not, they were my sons.\\n\\nThen she came home. The boys were, and still are away at camp, a birthday present paid for by my mother. She was quiet. Eyes on the ground after the moment I picked her up at the facility all the way home. Once we got home, I led her to the guest room silently, and she didn’t take it well, crying before she could even take the first step.\\n\\nThroughout the next couple weeks, I let her get settled, and though I stayed carefully neutral, I know she could tell something was coming, but I wanted to do be as fair as possible, and try to let her get used to being out before I said anything, as that was one thing I myself hated about when I left rehab, everything was flying at me so fast, I didn’t have time to breathe.\\n\\nFinally, I asked her to sit on the couch and I began explaining to her that I do not believe I can continue being married to her, and that I wanted divorce.\\n\\nI should have known her reaction was all wrong, she didn’t say anything at all, she only nodded, and cried quietly as I spoke, I explained that I did not intend to hurt her, but I could not be married to her anymore, and that maybe both of us should focus on being the best parents we can be.\\n\\nI told her I had no intentions of kicking her out, and that because of our prenup the divorce should be cut and dry, and she should be safe to begin looking for employment now, and once she has a job I will help her find an apartment.\\n\\nAt this, she stood and walked to her room. I let her, because I thought she must have been overwhelmed, and this talk could wait. She didn’t come out at dinner time, and I weighed whether I should leave her alone or not. Eventually, I decided to knock on the door, and ask if she was hungry.\\n\\nLong story short. She had smuggled pills into my house somehow (or she had a stash I was unaware of), and had an overdose, and was dead for several minutes in the ambulance, and she’s in a medically induced coma, because the doctors aren’t sure exactly how much damage she’s done to her brain, from what they’ve said.\\n\\nI feel like an absolute monster. Like I am the scum of the earth. Like I should have just said nothing. Like I should have just dealt with it. Just. Held it in, and stayed.\\n\\nI am responsible for this and it kills me. I may not have the same love for her as I did, but I do feel so very sorry for everything she’s been through. It’s killing me. I haven’t told my sons yet, and I am debating waiting until they’re back from camp, so they can have a little more time without this on their minds on top of everything else.\\n\\nI am sorry for the grammar and such, I don’t have the energy to edit this, but wanted to get this off my chest.\\n\\n***Relevant Comments:***\\n\\n*Where\\'s her family?*\\n\\n\"Not my story to tell but she doesn’t have much family alive, and the ones who are she’s no contact with. She has other friends, but I don’t know which ones were enabling. They all know what’s going on, I messaged all her friends, except the dealer, though she knows now I know from messages she sent me. She hasn’t shown up to the hospital though, possibly because she thinks I would throw her out, which I would be tempted to do, to be entirely honest. A couple of her other friends visit all the time.\"\\n\\n\"Yes, she’s no contact with her mother ironically because of her mothers addiction, and bad treatment of her. The rest constantly insisted she should see her mother, and two times even took her boys to her mothers house without her permission\"*One more clarification on the kids:*\\n\\n\"Yes, because all of our children are from previous relationships. I have adopted the two eldest, who aren’t mine biologically.\"\\n\\n*Why he did it at that point:*\\n\\n\"I wanted to do it while she’s in rehab, but my therapist told me to reconsider so I did. I was so angry when I found out about the cheating I wanted to take her belongings to her dealers house and leave them there, but I knew that was wrong. I knew that once my anger wore off I would regret it. So now all I want to get the divorce started and overwith as soon as possible, so that I can begin trying to pick up the pieces and move on with my life. On top of that? I didn’t want to lead her on, and I could tell that she knew something was coming because I can’t even stand her touching me anymore, it makes me physically ill. If I had known she was going to kill herself I could have closed my eyes and grit my teeth, and let her do whatever, but honestly even now, after what she did, I know that isn’t feasible for me. I still found myself wanting to start fights, to yell, and I know that I am not a strong enough person to be in a relationship with someone who hurt me that much, who disrespected me, my home, and my children that much, who took my own past experiences with drugs into account so little that she brought them into my home, directly under the nose of myself and my children (pun intended). This is as much kindness as I can afford to extend to her anymore for my own mental healths sake.\"\\n\\n***One more comment from OOP as of July 24 (not really an update but including it here)***\\n\\n\"Not awake yet, I took the advice of some of the commenters, and went to go get my boys, to see if they wanted to see their mother, I explained the situation to the best of my ability, age-appropriate, and asked if they wanted to see her, they all agreed to see her eventually, but the eldest only wanted to go to support his brothers. I\\'m concerned about the anger he is building towards his mother, and I do intend to talk to him about it, but I also don\\'t want to tell him how to feel, or tell him that his feelings are bad and wrong, I was already working to get them into therapy, but I\\'m going to expedite that.\"\\n\\n**Edited to add a bit more info from the comments:**\\n\\n\"Two of my three sons are stepsons, but I adopted them, (they never had a father due to their bio dad being an absolute piece of shit) My biological son is the youngest and was born to a girlfriend who is not in the picture and doesn’t want to be. My sons are 9, 12, and 14. She had two jobs when I met her, though though were both shit jobs, and I had been looking into finding her a better one. When it comes to the dealer, she was getting the drugs from her friend who is a woman, and a few of the men who she cheated with.\"\\n\\n\"There have been multiple pictures/videos of her dancing on/being inappropriate with men since that post. I haven’t blocked the friend because she sent me a large amount of proof of infidelity, for the divorce. And in that post the hick she was dancing on was a man, perhaps you are confused because her woman friend sent the video? Perhaps my wording was bad, I apologize. And yes. I do know that trauma is the gateway to addiction, as I am an addict and my own CSA from my uncle and general abuse from both my parents (we went to counseling during my rehab and our relationship is much better now, but growing up was very bad). I cant say I know what made her start using, but I can say I know she had a rough childhood and even worse teen years. Editing to add; regardless of sexual identity if I found out my wife was engaging in sexual acts with women I would also consider it cheating?\"\\n\\n\"She was doing other sexually inappropriate things. But no, I didn\\'t get any straight up sex tapes. I would rather not go into it further, but I think you can get my drift.\" \\n',\n", + " 'poster': 'LucyAriaRose',\n", + " 'date': '2023-07-27 05:07:37',\n", + " 'flair': 'ONGOING',\n", + " 'title': 'AITA (38M) For Cutting Back On work To Prove A Point To My Wife (30F)?',\n", + " 'upvotes': 7091,\n", + " 'link': '/r/BestofRedditorUpdates/comments/15asjag/aita_38m_for_cutting_back_on_work_to_prove_a/',\n", + " 'poster_link': 'https://www.reddit.com/user/LucyAriaRose'}" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "submission" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "3ebd9204-4f28-4bb6-9573-e0a7fcbab336", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['submission_content', 'poster', 'date', 'flair', 'title', 'upvotes', 'link', 'poster_link'])" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "submission.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "e27f0dff-b86d-4181-a9ea-4fce5c446787", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def preprocess_praw_data(submissions: List[Dict], key_conversion: Dict[str, str]) -> pd.DataFrame:\n", + " \"\"\"\n", + " Preprocesses praw data into a DataFrame.\n", + " \n", + " Parameters:\n", + " - submissions: List of submission dictionaries.\n", + " - key_conversion: Mapping of original to new column names.\n", + " \n", + " Returns:\n", + " - pd.DataFrame: Preprocessed DataFrame.\n", + " \"\"\"\n", + " \n", + " # Convert the submissions list to a DataFrame\n", + " praw_df = pd.DataFrame(submissions)\n", + " \n", + " # Convert 'date' column to datetime format\n", + " praw_df.date = pd.to_datetime(praw_df.date)\n", + " \n", + " # Rename columns based on the provided mapping\n", + " praw_df = praw_df.rename(columns=key_conversion)\n", + " \n", + " # Remove 'poster_link' column if it exists\n", + " if 'poster_link' in praw_df.columns:\n", + " del praw_df['poster_link']\n", + " \n", + " # Extract the 4th element from 'permalink' as 'id'\n", + " praw_df['id'] = praw_df.permalink.str.split('/').str[4]\n", + " \n", + " return praw_df" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "007f168b-98c7-4123-b60e-c7e96ed2e31d", + "metadata": {}, + "outputs": [], + "source": [ + "praw_df = preprocess_praw_data(submissions=submissions, key_conversion=key_conversion)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "d66f5777-5758-4062-87ab-d2daa968a118", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
date_utctitlescoreflaircontentposterpermalinkid
02020-01-31 19:40:08UPDATE: AITA for letting my brother call me \"d...1None[deleted][deleted]/r/BestofRedditorUpdates/comments/ewtopo/updat...ewtopo
12020-01-31 19:46:30Younger brother thinks older brother is his da...7AITAOriginal post: https://www.reddit.com/r/AmIthe...register2014/r/BestofRedditorUpdates/comments/ewts8z/young...ewts8z
22020-01-31 19:53:11Stressed Father who wanted to cancel Christmas...9AITA[Original: AITA if I \"cancel\" Christmas becaus...register2014/r/BestofRedditorUpdates/comments/ewtvnr/stres...ewtvnr
32020-01-31 20:37:14Man refuses to take girlfriend out to dinner b...23AITA**Original Post:** https://www.reddit.com/r/Am...register2014/r/BestofRedditorUpdates/comments/ewuj3d/man_r...ewuj3d
42020-01-31 20:41:16He pesters neighbor to cook for him for $515AITA[Original] (https://www.reddit.com/r/AmItheAss...register2014/r/BestofRedditorUpdates/comments/ewulam/he_pe...ewulam
...........................
106042023-10-19 10:59:57My (30M) fiance (38F) is angry that I got my d...5808CONCLUDEDI am not the OOP. This was originally posted b...rickysayshey/r/BestofRedditorUpdates/comments/17bgfuc/my_3...17bgfuc
106052023-10-20 00:22:42My parents invited their ‘friends’ on a family...2391CONCLUDED**I am not The OOP, OOP is** u/Relative-Young9...Direct-Caterpillar77/r/BestofRedditorUpdates/comments/17by1dt/my_p...17by1dt
106062023-10-20 02:04:15(New Update) My(f21) fiancé's(m22) parents out...217INCONCLUSIVEI am not OP; that would be u/throwraalerting\\n...ThrowRA3837374/r/BestofRedditorUpdates/comments/17c02y1/new_...17c02y1
106072023-10-20 04:00:18AITA for telling my ex that his children are n...1ONGOING**I am NOT OOP. OOP is** u/No_Bumblebee8165\\n\\...Choice_Evidence1983/r/BestofRedditorUpdates/comments/17c298a/aita...17c298a
106082023-10-20 04:00:40Little sister calling me a 'parasitic leech'.....15ONGOING**I am not The OOP, OOP is** u/PatientCurrency...Direct-Caterpillar77/r/BestofRedditorUpdates/comments/17c29i7/litt...17c29i7
\n", + "

10609 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " date_utc title \\\n", + "0 2020-01-31 19:40:08 UPDATE: AITA for letting my brother call me \"d... \n", + "1 2020-01-31 19:46:30 Younger brother thinks older brother is his da... \n", + "2 2020-01-31 19:53:11 Stressed Father who wanted to cancel Christmas... \n", + "3 2020-01-31 20:37:14 Man refuses to take girlfriend out to dinner b... \n", + "4 2020-01-31 20:41:16 He pesters neighbor to cook for him for $5 \n", + "... ... ... \n", + "10604 2023-10-19 10:59:57 My (30M) fiance (38F) is angry that I got my d... \n", + "10605 2023-10-20 00:22:42 My parents invited their ‘friends’ on a family... \n", + "10606 2023-10-20 02:04:15 (New Update) My(f21) fiancé's(m22) parents out... \n", + "10607 2023-10-20 04:00:18 AITA for telling my ex that his children are n... \n", + "10608 2023-10-20 04:00:40 Little sister calling me a 'parasitic leech'..... \n", + "\n", + " score flair content \\\n", + "0 1 None [deleted] \n", + "1 7 AITA Original post: https://www.reddit.com/r/AmIthe... \n", + "2 9 AITA [Original: AITA if I \"cancel\" Christmas becaus... \n", + "3 23 AITA **Original Post:** https://www.reddit.com/r/Am... \n", + "4 15 AITA [Original] (https://www.reddit.com/r/AmItheAss... \n", + "... ... ... ... \n", + "10604 5808 CONCLUDED I am not the OOP. This was originally posted b... \n", + "10605 2391 CONCLUDED **I am not The OOP, OOP is** u/Relative-Young9... \n", + "10606 217 INCONCLUSIVE I am not OP; that would be u/throwraalerting\\n... \n", + "10607 1 ONGOING **I am NOT OOP. OOP is** u/No_Bumblebee8165\\n\\... \n", + "10608 15 ONGOING **I am not The OOP, OOP is** u/PatientCurrency... \n", + "\n", + " poster \\\n", + "0 [deleted] \n", + "1 register2014 \n", + "2 register2014 \n", + "3 register2014 \n", + "4 register2014 \n", + "... ... \n", + "10604 rickysayshey \n", + "10605 Direct-Caterpillar77 \n", + "10606 ThrowRA3837374 \n", + "10607 Choice_Evidence1983 \n", + "10608 Direct-Caterpillar77 \n", + "\n", + " permalink id \n", + "0 /r/BestofRedditorUpdates/comments/ewtopo/updat... ewtopo \n", + "1 /r/BestofRedditorUpdates/comments/ewts8z/young... ewts8z \n", + "2 /r/BestofRedditorUpdates/comments/ewtvnr/stres... ewtvnr \n", + "3 /r/BestofRedditorUpdates/comments/ewuj3d/man_r... ewuj3d \n", + "4 /r/BestofRedditorUpdates/comments/ewulam/he_pe... ewulam \n", + "... ... ... \n", + "10604 /r/BestofRedditorUpdates/comments/17bgfuc/my_3... 17bgfuc \n", + "10605 /r/BestofRedditorUpdates/comments/17by1dt/my_p... 17by1dt \n", + "10606 /r/BestofRedditorUpdates/comments/17c02y1/new_... 17c02y1 \n", + "10607 /r/BestofRedditorUpdates/comments/17c298a/aita... 17c298a \n", + "10608 /r/BestofRedditorUpdates/comments/17c29i7/litt... 17c29i7 \n", + "\n", + "[10609 rows x 8 columns]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.concat([torrent_df, praw_df]).sort_values(by='date_utc').reset_index(drop=True)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "506f177b-6306-45b9-81c1-43ff6fa236b2", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def filter_redundant_ids(df: pd.DataFrame) -> pd.DataFrame:\n", + " \"\"\"\n", + " Removes rows with redundant ids, retaining the one with the longest content.\n", + "\n", + " Parameters:\n", + " - df (pd.DataFrame): The input DataFrame with columns 'id' and 'content'.\n", + "\n", + " Returns:\n", + " - pd.DataFrame: A filtered DataFrame with unique ids, where each id is associated \n", + " with the longest content available.\n", + " \"\"\"\n", + " \n", + " # Create a column for content length\n", + " df['content_length'] = df['content'].str.len()\n", + "\n", + " # Use groupby to get the index of the row with the longest content for each 'id'\n", + " idx_to_keep = df.groupby('id')['content_length'].idxmax().values\n", + "\n", + " # Filter the DataFrame to only keep those rows\n", + " df_filtered = df.loc[idx_to_keep]\n", + "\n", + " # Drop the 'content_length' column\n", + " df_filtered = df_filtered.drop(columns=['content_length'])\n", + "\n", + " return df_filtered" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "7f5c3657-f2f9-4e4c-8d36-1a3f84859419", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Before filtering: 10609\n", + "After filtering: 9840\n" + ] + } + ], + "source": [ + "print(\"Before filtering: \", len(df))\n", + "df = filter_redundant_ids(df)\n", + "print(\"After filtering: \", len(df))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "130b56fd-4645-46ac-9a14-1d6bc2b241e7", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "date_utc datetime64[ns]\n", + "title object\n", + "score int64\n", + "flair object\n", + "content object\n", + "poster object\n", + "permalink object\n", + "id object\n", + "dtype: object" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "9d2b1553-a985-4054-a3a1-e22144d2c0c4", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Dataset({\n", + " features: ['date_utc', 'title', 'score', 'flair', 'content', 'poster', 'permalink', 'id'],\n", + " num_rows: 9840\n", + "})" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset = Dataset.from_pandas(df.reset_index(drop=True))\n", + "dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "289d5623-3f4b-4026-ad04-3cc99b920e01", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'date_utc': Value(dtype='timestamp[ns]', id=None),\n", + " 'title': Value(dtype='string', id=None),\n", + " 'score': Value(dtype='int64', id=None),\n", + " 'flair': Value(dtype='string', id=None),\n", + " 'content': Value(dtype='string', id=None),\n", + " 'poster': Value(dtype='string', id=None),\n", + " 'permalink': Value(dtype='string', id=None),\n", + " 'id': Value(dtype='string', id=None)}" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset.features" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "e6d3d16e-551a-4d72-a9ac-e6c14a7dcfff", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ab295e4136ed4c5d99657f707ce61935", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Creating parquet from Arrow format: 0%| | 0/10 [00:00" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "ax = df.content.apply(lambda x: len(x.split(' '))).hist(bins=150)\n", + "\n", + "# Adding titles and labels\n", + "ax.set_title('Number of words in a post')\n", + "ax.set_xlabel('Number of words')\n", + "ax.set_ylabel('Number of posts')\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "b57f2119-b3e7-4c21-8176-c81b79cd05dd", + "metadata": {}, + "outputs": [], + "source": [ + "df1 = dataset.to_pandas()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "5449a923-fff9-4765-86bb-e9cb32fa1db0", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['date_utc', 'title', 'score', 'flair', 'content', 'poster', 'permalink',\n", + " 'id'],\n", + " dtype='object')" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "5f25e791-2f67-46c2-80e6-072c0c5dbb23", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Dataset({\n", + " features: ['date_utc', 'title', 'score', 'flair', 'content', 'poster', 'permalink', 'id'],\n", + " num_rows: 9840\n", + "})" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds1 = Dataset.from_pandas(df1)\n", + "ds1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13d3fee8-39a9-4faa-8150-0b484bef95ab", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}