slackdemo / reddit_collect.py
svummidi's picture
PAN subreddit 200 threads added
218f81b
raw
history blame contribute delete
No virus
2.01 kB
import json
import time
from prawcore import RequestException
import csv
import praw
from data_models import GenericMessage
reddit = praw.Reddit(
client_id='LV2nS-xiWYIEn6YpwOhWpg',
client_secret='PhC4AYKkL0OUR8miVIuZF45Iz_saiA',
user_agent='PythonScript:com.example.passive_monitoring:v0.0.1 (by /u/vvsatya)',
)
subreddit ='paloaltonetworks'
# Access subreddit
subreddit = reddit.subreddit(subreddit)
retry_count = 0
max_retries = 5
retry_delay = 5 # in seconds
while retry_count < max_retries:
try:
thread_messages_file = f'csv/{subreddit}_messages.csv'
with open(thread_messages_file, 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(
['thread_ts', 'messages_json'])
for index, submission in enumerate(subreddit.hot(limit=1000), 1):
messages = []
messages.append(GenericMessage(submission.author.id, submission.created, f"Title: {submission.title}\n Body: {submission.selftext}"))
# Collect and print comments
submission.comments.replace_more(limit=None) # Get more comments if there are "load more" placeholders
for comment in submission.comments.list():
author = comment.author.id if comment.author else 'unknown'
messages.append(GenericMessage(author, comment.created, comment.body))
message_dicts = [msg.__dict__ for msg in messages]
writer.writerow([f'{subreddit}-{index}', json.dumps(message_dicts)])
if ( index % 10 == 0 ):
print("Fetched threads : ", index)
except RequestException as e:
if hasattr(e, 'response') and e.response is not None:
if e.response.status_code == 429:
print(f"Rate limit exceeded. Retrying in {retry_delay} seconds.")
time.sleep(retry_delay)
retry_count += 1
continue
raise