import json import time from prawcore import RequestException import csv import praw from data_models import GenericMessage reddit = praw.Reddit( client_id='LV2nS-xiWYIEn6YpwOhWpg', client_secret='PhC4AYKkL0OUR8miVIuZF45Iz_saiA', user_agent='PythonScript:com.example.passive_monitoring:v0.0.1 (by /u/vvsatya)', ) subreddit ='paloaltonetworks' # Access subreddit subreddit = reddit.subreddit(subreddit) retry_count = 0 max_retries = 5 retry_delay = 5 # in seconds while retry_count < max_retries: try: thread_messages_file = f'csv/{subreddit}_messages.csv' with open(thread_messages_file, 'w', newline='') as csvfile: writer = csv.writer(csvfile) writer.writerow( ['thread_ts', 'messages_json']) for index, submission in enumerate(subreddit.hot(limit=1000), 1): messages = [] messages.append(GenericMessage(submission.author.id, submission.created, f"Title: {submission.title}\n Body: {submission.selftext}")) # Collect and print comments submission.comments.replace_more(limit=None) # Get more comments if there are "load more" placeholders for comment in submission.comments.list(): author = comment.author.id if comment.author else 'unknown' messages.append(GenericMessage(author, comment.created, comment.body)) message_dicts = [msg.__dict__ for msg in messages] writer.writerow([f'{subreddit}-{index}', json.dumps(message_dicts)]) if ( index % 10 == 0 ): print("Fetched threads : ", index) except RequestException as e: if hasattr(e, 'response') and e.response is not None: if e.response.status_code == 429: print(f"Rate limit exceeded. Retrying in {retry_delay} seconds.") time.sleep(retry_delay) retry_count += 1 continue raise