Spaces:

Karlsen
/

university_ranking_app

Runtime error

File size: 2,796 Bytes

faaf200
 
ce4edbc
faaf200
307bf4d
faaf200
 
2bd80f0
 
 
 
 
 
 
 
 
faaf200
89efc52
ce4edbc
38f5758
52131e0
faaf200
2bd80f0
faaf200
2bd80f0
52131e0
 
ce4edbc
2bd80f0
 
 
 
 
8cbe5e9
 
 
 
2bd80f0
 
 
ce4edbc
8cbe5e9
5d0fab7
8cbe5e9
 
 
5d0fab7
ce4edbc
 
 
 
 
 
 
 
 
 
 
 
c678fad

import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt

# Load your DataFrame from the specified CSV file 
df = pd.read_csv("TIMES_WorldUniversityRankings_2024.csv")

# Define the columns you want to display in the app, including 'rank'.
display_columns_university = [
    'name', 'location', 'rank', 'scores_teaching', 'scores_research', 'scores_citations'
]
display_columns_country = [
    'name', 'rank', 'scores_teaching', 'scores_research', 'scores_citations'
]

st.title('University Data Search')

# Setup tabs
tab1, tab2, tab3, tab4 = st.tabs(["University Search", "Country Search", "Rankings", "Top 10 Country Chart"])

with tab1:
    university_name = st.selectbox('Select University', df['name'].unique())
    selected_uni = df[df['name'] == university_name][display_columns_university]
    if not selected_uni.empty:
        st.table(selected_uni.reset_index(drop=True))

with tab2:
    country_name = st.selectbox('Select Country', df['location'].unique())
    selected_country = df[df['location'] == country_name][display_columns_country]
    if not selected_country.empty:
        st.table(selected_country.reset_index(drop=True))

with tab3:
    metric = st.selectbox('Select a metric for ranking:',
                          ['scores_teaching', 'scores_research', 'scores_citations'], key='metric_select')
    num_results = st.slider('Number of results to display:', min_value=5, max_value=50, value=10, key='num_results_slider')
    
    ranking_df = df.sort_values(by=metric, ascending=False)[['name', 'location', metric]].head(num_results)
    st.write(f"Top {num_results} Universities by {metric.replace('scores_', '').title()}:")
    st.table(ranking_df)

    # Calculate country distribution and show it within the rankings tab, not sidebar
    country_count = ranking_df['location'].value_counts()
    st.subheader('Country Distribution')
    for country, count in country_count.items():
        st.write(f"{country}: {count}")

with tab4:
    # Calculate the average score for each university
    df['average_score'] = (df['scores_teaching'] + df['scores_research'] + df['scores_citations']) / 3

    # Group by 'location' and calculate the mean of 'average_score' for each country
    average_scores_by_country = df.groupby('location')['average_score'].mean().reset_index()

    # Get the top 10 countries by average score
    top_countries = average_scores_by_country.nlargest(10, 'average_score')

    # Plotting
    fig, ax = plt.subplots()
    ax.bar(top_countries['location'], top_countries['average_score'], color='blue')
    ax.set_xlabel('Country')
    ax.set_ylabel('Average Score')
    ax.set_title('Top 10 Countries by Average University Score')
    ax.set_xticklabels(top_countries['location'], rotation=45, ha='right')
    st.pyplot(fig)