File size: 2,796 Bytes
faaf200
 
ce4edbc
faaf200
307bf4d
faaf200
 
2bd80f0
 
 
 
 
 
 
 
 
faaf200
89efc52
ce4edbc
38f5758
52131e0
faaf200
2bd80f0
faaf200
2bd80f0
52131e0
 
ce4edbc
2bd80f0
 
 
 
 
8cbe5e9
 
 
 
2bd80f0
 
 
ce4edbc
8cbe5e9
5d0fab7
8cbe5e9
 
 
5d0fab7
ce4edbc
 
 
 
 
 
 
 
 
 
 
 
c678fad
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt

# Load your DataFrame from the specified CSV file 
df = pd.read_csv("TIMES_WorldUniversityRankings_2024.csv")

# Define the columns you want to display in the app, including 'rank'.
display_columns_university = [
    'name', 'location', 'rank', 'scores_teaching', 'scores_research', 'scores_citations'
]
display_columns_country = [
    'name', 'rank', 'scores_teaching', 'scores_research', 'scores_citations'
]

st.title('University Data Search')

# Setup tabs
tab1, tab2, tab3, tab4 = st.tabs(["University Search", "Country Search", "Rankings", "Top 10 Country Chart"])

with tab1:
    university_name = st.selectbox('Select University', df['name'].unique())
    selected_uni = df[df['name'] == university_name][display_columns_university]
    if not selected_uni.empty:
        st.table(selected_uni.reset_index(drop=True))

with tab2:
    country_name = st.selectbox('Select Country', df['location'].unique())
    selected_country = df[df['location'] == country_name][display_columns_country]
    if not selected_country.empty:
        st.table(selected_country.reset_index(drop=True))

with tab3:
    metric = st.selectbox('Select a metric for ranking:',
                          ['scores_teaching', 'scores_research', 'scores_citations'], key='metric_select')
    num_results = st.slider('Number of results to display:', min_value=5, max_value=50, value=10, key='num_results_slider')
    
    ranking_df = df.sort_values(by=metric, ascending=False)[['name', 'location', metric]].head(num_results)
    st.write(f"Top {num_results} Universities by {metric.replace('scores_', '').title()}:")
    st.table(ranking_df)

    # Calculate country distribution and show it within the rankings tab, not sidebar
    country_count = ranking_df['location'].value_counts()
    st.subheader('Country Distribution')
    for country, count in country_count.items():
        st.write(f"{country}: {count}")

with tab4:
    # Calculate the average score for each university
    df['average_score'] = (df['scores_teaching'] + df['scores_research'] + df['scores_citations']) / 3

    # Group by 'location' and calculate the mean of 'average_score' for each country
    average_scores_by_country = df.groupby('location')['average_score'].mean().reset_index()

    # Get the top 10 countries by average score
    top_countries = average_scores_by_country.nlargest(10, 'average_score')

    # Plotting
    fig, ax = plt.subplots()
    ax.bar(top_countries['location'], top_countries['average_score'], color='blue')
    ax.set_xlabel('Country')
    ax.set_ylabel('Average Score')
    ax.set_title('Top 10 Countries by Average University Score')
    ax.set_xticklabels(top_countries['location'], rotation=45, ha='right')
    st.pyplot(fig)