SDC_test / app.py
RJuro's picture
Update app.py
ad809c8 verified
raw
history blame contribute delete
No virus
1.02 kB
import pandas as pd
import streamlit as st
st.title('Udemy Courses Analysis')
courses_info = pd.read_csv('udemy_courses_info.csv')
courses_year = pd.read_csv('udemy_courses_year.csv')
couses_df = pd.merge(courses_info, courses_year, on='course_id')
options = st.multiselect(
"Please select subjects",
couses_df.subject.unique(),
couses_df.subject.unique())
couses_df = couses_df[couses_df['subject'].isin(options)]
# Group the DataFrame by 'subject' and 'course_title', and calculate the sum of 'num_subscribers'
grouped = couses_df.groupby(['subject', 'course_title'])['num_subscribers'].sum()
# Reset the index of the grouped DataFrame
grouped = grouped.reset_index()
# Sort the grouped DataFrame by 'subject' and 'num_subscribers' in descending order
grouped = grouped.sort_values(['subject', 'num_subscribers'], ascending=[True, False])
# Group the sorted DataFrame by 'subject' and get the top 5 rows for each group
top_courses = grouped.groupby('subject').head(5)
st.dataframe(top_courses)