# Install necessary packages import praw import pandas as pd import matplotlib.pyplot as plt import nltk from nltk.sentiment import SentimentIntensityAnalyzer import os import gradio as gr # Download the VADER lexicon nltk.download('vader_lexicon', quiet=True) class RedditSentimentAgent: def __init__(self, client_id, client_secret, user_agent): # Initialize VADER sentiment analyzer self.sia = SentimentIntensityAnalyzer() # Initialize Reddit API client self.reddit = praw.Reddit( client_id=client_id, client_secret=client_secret, user_agent=user_agent ) def get_comments(self, subreddit_name, limit=1000): subreddit = self.reddit.subreddit(subreddit_name) comments = [] for comment in subreddit.comments(limit=limit): comments.append(comment.body) return comments def analyze_sentiment(self, text): return self.sia.polarity_scores(text)['compound'] def categorize_sentiment(self, score): if score <= -0.05: return 'Negative' elif score >= 0.05: return 'Positive' else: return 'Neutral' def analyze_subreddit(self, subreddit_name, limit=1000): comments = self.get_comments(subreddit_name, limit) sentiments = [self.analyze_sentiment(comment) for comment in comments] df = pd.DataFrame({'comment': comments, 'sentiment': sentiments}) df['sentiment_category'] = df['sentiment'].apply(self.categorize_sentiment) return df def visualize_results(self, df, subreddit_name): # Histogram of sentiment scores fig1, ax1 = plt.subplots(figsize=(10, 6)) ax1.hist(df['sentiment'], bins=20, edgecolor='black') ax1.set_title(f'Sentiment Distribution in r/{subreddit_name} using VADER') ax1.set_xlabel('Sentiment Polarity') ax1.set_ylabel('Frequency') # Bar chart of sentiment categories sentiment_counts = df['sentiment_category'].value_counts() fig2, ax2 = plt.subplots(figsize=(8, 6)) sentiment_counts.plot(kind='bar', ax=ax2) ax2.set_title(f'Sentiment Distribution in r/{subreddit_name}') ax2.set_xlabel('Sentiment Category') ax2.set_ylabel('Number of Comments') return fig1, fig2 def get_statistics(self, df): stats = f"Average sentiment: {df['sentiment'].mean():.2f}\n" stats += f"Median sentiment: {df['sentiment'].median():.2f}\n" stats += f"Most positive comment: {df.loc[df['sentiment'].idxmax(), 'comment']}\n" stats += f"Most negative comment: {df.loc[df['sentiment'].idxmin(), 'comment']}\n" sentiment_counts = df['sentiment_category'].value_counts() stats += f"\nSentiment Distribution:\n{sentiment_counts}" return stats def analyze(self, subreddit_name, limit): try: df = self.analyze_subreddit(subreddit_name, limit) fig1, fig2 = self.visualize_results(df, subreddit_name) stats = self.get_statistics(df) return fig1, fig2, stats except Exception as e: return None, None, f"An error occurred: {str(e)}" # Replace these with your actual Reddit API credentials REDDIT_CLIENT_ID = "rDCXZ2gSgcW-GVwC01MlUw" REDDIT_CLIENT_SECRET = "7_7u6nwF8Ok-il14ek2yt6rv3vg0gQ" REDDIT_USER_AGENT = "IcyTruth4022" agent = RedditSentimentAgent(REDDIT_CLIENT_ID, REDDIT_CLIENT_SECRET, REDDIT_USER_AGENT) def gradio_interface(subreddit_name, limit): return agent.analyze(subreddit_name, int(limit)) iface = gr.Interface( fn=gradio_interface, inputs=[ gr.Textbox(label="Subreddit Name"), gr.Slider(minimum=10, maximum=1000, step=10, label="Number of Comments", value=100) ], outputs=[ gr.Plot(label="Sentiment Distribution"), gr.Plot(label="Sentiment Categories"), gr.Textbox(label="Statistics", lines=10) ], title="Reddit Sentiment Analysis", description="Analyze the sentiment of comments in a subreddit using VADER." ) iface.launch()