RJuro commited on
Commit
ad809c8
1 Parent(s): 0fafc69

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -22
app.py CHANGED
@@ -1,31 +1,30 @@
1
- import streamlit as st
2
  import pandas as pd
3
- import matplotlib.pyplot as plt
4
- import seaborn as sns
 
 
 
 
5
 
6
- st.title('My first app')
7
- st.write("Here's our first app with Streamlit")
8
 
9
- df_courses_year = pd.read_csv('udemy_courses_year.csv')
 
 
 
10
 
11
- # Extract unique years and sort them
12
- years = sorted(df_courses_year['year'].unique())
13
 
14
- # Sidebar for selecting year(s)
15
- selected_years = st.sidebar.multiselect('Select Year(s)', years, default=years)
16
 
17
- # Filter data based on selected year(s)
18
- filtered_data = df_courses_year[df_courses_year['year'].isin(selected_years)]
19
 
20
- # Set style for better visuals
21
- sns.set(style="whitegrid")
22
 
23
- # Create the plot
24
- plt.figure(figsize=(10, 6))
25
- ax = sns.countplot(x='year', data=filtered_data, palette='viridis')
26
- ax.set_title('Count of Observations per Year')
27
- ax.set_xlabel('Year')
28
- ax.set_ylabel('Count')
29
- plt.xticks(rotation=45)
30
 
31
- st.pyplot(plt)
 
 
1
  import pandas as pd
2
+ import streamlit as st
3
+
4
+ st.title('Udemy Courses Analysis')
5
+
6
+ courses_info = pd.read_csv('udemy_courses_info.csv')
7
+ courses_year = pd.read_csv('udemy_courses_year.csv')
8
 
9
+ couses_df = pd.merge(courses_info, courses_year, on='course_id')
 
10
 
11
+ options = st.multiselect(
12
+ "Please select subjects",
13
+ couses_df.subject.unique(),
14
+ couses_df.subject.unique())
15
 
16
+ couses_df = couses_df[couses_df['subject'].isin(options)]
 
17
 
18
+ # Group the DataFrame by 'subject' and 'course_title', and calculate the sum of 'num_subscribers'
19
+ grouped = couses_df.groupby(['subject', 'course_title'])['num_subscribers'].sum()
20
 
21
+ # Reset the index of the grouped DataFrame
22
+ grouped = grouped.reset_index()
23
 
24
+ # Sort the grouped DataFrame by 'subject' and 'num_subscribers' in descending order
25
+ grouped = grouped.sort_values(['subject', 'num_subscribers'], ascending=[True, False])
26
 
27
+ # Group the sorted DataFrame by 'subject' and get the top 5 rows for each group
28
+ top_courses = grouped.groupby('subject').head(5)
 
 
 
 
 
29
 
30
+ st.dataframe(top_courses)