Spaces:

yashwantpatilyup
/

RedditPulse

Sleeping

App Files Files Community

RedditPulse / app.py

yashwantpatilyup

Update app.py

1bb8d98 verified 23 days ago

raw

history blame contribute delete

4.12 kB



	# Install necessary packages


	import praw
	import pandas as pd
	import matplotlib.pyplot as plt
	import nltk
	from nltk.sentiment import SentimentIntensityAnalyzer
	import os
	import gradio as gr

	# Download the VADER lexicon
	nltk.download('vader_lexicon', quiet=True)

	class RedditSentimentAgent:
	def __init__(self, client_id, client_secret, user_agent):
	# Initialize VADER sentiment analyzer
	self.sia = SentimentIntensityAnalyzer()

	# Initialize Reddit API client
	self.reddit = praw.Reddit(
	client_id=client_id,
	client_secret=client_secret,
	user_agent=user_agent
	)

	def get_comments(self, subreddit_name, limit=1000):
	subreddit = self.reddit.subreddit(subreddit_name)
	comments = []
	for comment in subreddit.comments(limit=limit):
	comments.append(comment.body)
	return comments

	def analyze_sentiment(self, text):
	return self.sia.polarity_scores(text)['compound']

	def categorize_sentiment(self, score):
	if score <= -0.05:
	return 'Negative'
	elif score >= 0.05:
	return 'Positive'
	else:
	return 'Neutral'

	def analyze_subreddit(self, subreddit_name, limit=1000):
	comments = self.get_comments(subreddit_name, limit)
	sentiments = [self.analyze_sentiment(comment) for comment in comments]

	df = pd.DataFrame({'comment': comments, 'sentiment': sentiments})
	df['sentiment_category'] = df['sentiment'].apply(self.categorize_sentiment)

	return df

	def visualize_results(self, df, subreddit_name):
	# Histogram of sentiment scores
	fig1, ax1 = plt.subplots(figsize=(10, 6))
	ax1.hist(df['sentiment'], bins=20, edgecolor='black')
	ax1.set_title(f'Sentiment Distribution in r/{subreddit_name} using VADER')
	ax1.set_xlabel('Sentiment Polarity')
	ax1.set_ylabel('Frequency')

	# Bar chart of sentiment categories
	sentiment_counts = df['sentiment_category'].value_counts()
	fig2, ax2 = plt.subplots(figsize=(8, 6))
	sentiment_counts.plot(kind='bar', ax=ax2)
	ax2.set_title(f'Sentiment Distribution in r/{subreddit_name}')
	ax2.set_xlabel('Sentiment Category')
	ax2.set_ylabel('Number of Comments')

	return fig1, fig2

	def get_statistics(self, df):
	stats = f"Average sentiment: {df['sentiment'].mean():.2f}\n"
	stats += f"Median sentiment: {df['sentiment'].median():.2f}\n"
	stats += f"Most positive comment: {df.loc[df['sentiment'].idxmax(), 'comment']}\n"
	stats += f"Most negative comment: {df.loc[df['sentiment'].idxmin(), 'comment']}\n"

	sentiment_counts = df['sentiment_category'].value_counts()
	stats += f"\nSentiment Distribution:\n{sentiment_counts}"

	return stats

	def analyze(self, subreddit_name, limit):
	try:
	df = self.analyze_subreddit(subreddit_name, limit)
	fig1, fig2 = self.visualize_results(df, subreddit_name)
	stats = self.get_statistics(df)
	return fig1, fig2, stats
	except Exception as e:
	return None, None, f"An error occurred: {str(e)}"

	# Replace these with your actual Reddit API credentials
	REDDIT_CLIENT_ID = "rDCXZ2gSgcW-GVwC01MlUw"
	REDDIT_CLIENT_SECRET = "7_7u6nwF8Ok-il14ek2yt6rv3vg0gQ"
	REDDIT_USER_AGENT = "IcyTruth4022"

	agent = RedditSentimentAgent(REDDIT_CLIENT_ID, REDDIT_CLIENT_SECRET, REDDIT_USER_AGENT)

	def gradio_interface(subreddit_name, limit):
	return agent.analyze(subreddit_name, int(limit))

	iface = gr.Interface(
	fn=gradio_interface,
	inputs=[
	gr.Textbox(label="Subreddit Name"),
	gr.Slider(minimum=10, maximum=1000, step=10, label="Number of Comments", value=100)
	],
	outputs=[
	gr.Plot(label="Sentiment Distribution"),
	gr.Plot(label="Sentiment Categories"),
	gr.Textbox(label="Statistics", lines=10)
	],
	title="Reddit Sentiment Analysis",
	description="Analyze the sentiment of comments in a subreddit using VADER."
	)

	iface.launch()