|
import json |
|
import time |
|
|
|
from prawcore import RequestException |
|
|
|
import csv |
|
|
|
import praw |
|
|
|
from data_models import GenericMessage |
|
|
|
reddit = praw.Reddit( |
|
client_id='LV2nS-xiWYIEn6YpwOhWpg', |
|
client_secret='PhC4AYKkL0OUR8miVIuZF45Iz_saiA', |
|
user_agent='PythonScript:com.example.passive_monitoring:v0.0.1 (by /u/vvsatya)', |
|
) |
|
|
|
subreddit ='paloaltonetworks' |
|
|
|
subreddit = reddit.subreddit(subreddit) |
|
|
|
retry_count = 0 |
|
max_retries = 5 |
|
retry_delay = 5 |
|
|
|
while retry_count < max_retries: |
|
try: |
|
thread_messages_file = f'csv/{subreddit}_messages.csv' |
|
with open(thread_messages_file, 'w', newline='') as csvfile: |
|
writer = csv.writer(csvfile) |
|
writer.writerow( |
|
['thread_ts', 'messages_json']) |
|
for index, submission in enumerate(subreddit.hot(limit=1000), 1): |
|
messages = [] |
|
messages.append(GenericMessage(submission.author.id, submission.created, f"Title: {submission.title}\n Body: {submission.selftext}")) |
|
|
|
submission.comments.replace_more(limit=None) |
|
for comment in submission.comments.list(): |
|
author = comment.author.id if comment.author else 'unknown' |
|
messages.append(GenericMessage(author, comment.created, comment.body)) |
|
message_dicts = [msg.__dict__ for msg in messages] |
|
writer.writerow([f'{subreddit}-{index}', json.dumps(message_dicts)]) |
|
if ( index % 10 == 0 ): |
|
print("Fetched threads : ", index) |
|
except RequestException as e: |
|
if hasattr(e, 'response') and e.response is not None: |
|
if e.response.status_code == 429: |
|
print(f"Rate limit exceeded. Retrying in {retry_delay} seconds.") |
|
time.sleep(retry_delay) |
|
retry_count += 1 |
|
continue |
|
raise |