Spaces:

Lagstill
/

Streamlit_reddit

Runtime error

App Files Files Community

Streamlit_reddit / app.py

Lagstill

dashboard app added

f6d53db over 1 year ago

raw

history blame contribute delete

No virus

3.09 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	import praw
	import pandas as pd
	import datetime as dt
	from wordcloud import WordCloud, STOPWORDS

	reddit = praw.Reddit(client_id='w0cDom4nIf5druip4y9zSw', \
	client_secret='mtCul8hEucwNky7hLwgkewlLPzH0sg', \
	user_agent='Profile extractor', \
	username='CarelessSwordfish541', \
	password='Testing@2022')

	st.title('Just Reddit as it is 👀')

	st.write('This is a simple web app to extract data from Reddit and analyze it.')

	DATA_URL = 'subreddit_data_v1.csv'



	@st.cache
	def load_data():
	data = pd.read_csv(DATA_URL)
	lowercase = lambda x: str(x).lower()
	data.rename(lowercase, axis='columns', inplace=True)
	return data

	data_load_state = st.text('Loading data...')
	data = load_data()
	data_load_state.text("Done! (using st.cache)")


	if st.checkbox('Show raw data'):
	st.subheader('Raw data')
	st.write(data)

	subreddit = st.selectbox('Select a subreddit', data['subreddit'].unique())

	st.subheader('Wordcloud of the most common words in the subreddit')


	comment_words = ''
	stopwords = set(STOPWORDS)

	# iterate through the csv file
	for val in data[data['subreddit'] == subreddit]['title']:
	# typecaste each val to string
	val = str(val)

	# split the value
	tokens = val.split()

	# Converts each token into lowercase
	for i in range(len(tokens)):
	tokens[i] = tokens[i].lower()

	comment_words += " ".join(tokens)+" "

	wordcloud = WordCloud(width = 800, height = 800,
	background_color ='white',
	stopwords = stopwords,
	min_font_size = 10).generate(comment_words)

	# plot the WordCloud image
	plt.figure(figsize = (8, 8), facecolor = None)

	plt.imshow(wordcloud)

	plt.axis("off")

	plt.tight_layout(pad = 0)
	st.set_option('deprecation.showPyplotGlobalUse', False)
	st.pyplot()


	#Based on the subreddit selected , show the statistics of the subreddit
	st.subheader('Statistics of the subreddit')
	st.write(data[data['subreddit'] == subreddit].describe())

	#Based on the subreddit selected display the number of posts per day
	st.subheader('Number of posts per day')
	st.write(data[data['subreddit'] == subreddit].groupby('created')['title'].count())

	#Based on the subreddit selected display the number of comments per day
	st.subheader('Number of comments per day')
	st.write(data[data['subreddit'] == subreddit].groupby('created')['num_comments'].sum())

	#display a bar chart of the score of the posts
	st.subheader('Score of the posts')
	st.bar_chart(data[data['subreddit'] == subreddit]['score'])





	# st.subheader('Number of pickups by hour')
	# hist_values = np.histogram(data[DATE_COLUMN].dt.hour, bins=24, range=(0,24))[0]
	# st.bar_chart(hist_values)

	# # Some number in the range 0-23
	# hour_to_filter = st.slider('hour', 0, 23, 17)
	# filtered_data = data[data[DATE_COLUMN].dt.hour == hour_to_filter]

	# st.subheader('Map of all pickups at %s:00' % hour_to_filter)
	# st.map(filtered_data)