Spaces:
Runtime error
Runtime error
import streamlit as st | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
# Load your DataFrame from the specified CSV file | |
df = pd.read_csv("TIMES_WorldUniversityRankings_2024.csv") | |
# Define the columns you want to display in the app, including 'rank'. | |
display_columns_university = [ | |
'name', 'location', 'rank', 'scores_teaching', 'scores_research', 'scores_citations' | |
] | |
display_columns_country = [ | |
'name', 'rank', 'scores_teaching', 'scores_research', 'scores_citations' | |
] | |
st.title('University Data Search') | |
# Setup tabs | |
tab1, tab2, tab3, tab4 = st.tabs(["University Search", "Country Search", "Rankings", "Top 10 Country Chart"]) | |
with tab1: | |
university_name = st.selectbox('Select University', df['name'].unique()) | |
selected_uni = df[df['name'] == university_name][display_columns_university] | |
if not selected_uni.empty: | |
st.table(selected_uni.reset_index(drop=True)) | |
with tab2: | |
country_name = st.selectbox('Select Country', df['location'].unique()) | |
selected_country = df[df['location'] == country_name][display_columns_country] | |
if not selected_country.empty: | |
st.table(selected_country.reset_index(drop=True)) | |
with tab3: | |
metric = st.selectbox('Select a metric for ranking:', | |
['scores_teaching', 'scores_research', 'scores_citations'], key='metric_select') | |
num_results = st.slider('Number of results to display:', min_value=5, max_value=50, value=10, key='num_results_slider') | |
ranking_df = df.sort_values(by=metric, ascending=False)[['name', 'location', metric]].head(num_results) | |
st.write(f"Top {num_results} Universities by {metric.replace('scores_', '').title()}:") | |
st.table(ranking_df) | |
# Calculate country distribution and show it within the rankings tab, not sidebar | |
country_count = ranking_df['location'].value_counts() | |
st.subheader('Country Distribution') | |
for country, count in country_count.items(): | |
st.write(f"{country}: {count}") | |
with tab4: | |
# Calculate the average score for each university | |
df['average_score'] = (df['scores_teaching'] + df['scores_research'] + df['scores_citations']) / 3 | |
# Group by 'location' and calculate the mean of 'average_score' for each country | |
average_scores_by_country = df.groupby('location')['average_score'].mean().reset_index() | |
# Get the top 10 countries by average score | |
top_countries = average_scores_by_country.nlargest(10, 'average_score') | |
# Plotting | |
fig, ax = plt.subplots() | |
ax.bar(top_countries['location'], top_countries['average_score'], color='blue') | |
ax.set_xlabel('Country') | |
ax.set_ylabel('Average Score') | |
ax.set_title('Top 10 Countries by Average University Score') | |
ax.set_xticklabels(top_countries['location'], rotation=45, ha='right') | |
st.pyplot(fig) | |