Spaces:
Runtime error
Runtime error
File size: 2,796 Bytes
faaf200 ce4edbc faaf200 307bf4d faaf200 2bd80f0 faaf200 89efc52 ce4edbc 38f5758 52131e0 faaf200 2bd80f0 faaf200 2bd80f0 52131e0 ce4edbc 2bd80f0 8cbe5e9 2bd80f0 ce4edbc 8cbe5e9 5d0fab7 8cbe5e9 5d0fab7 ce4edbc c678fad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
# Load your DataFrame from the specified CSV file
df = pd.read_csv("TIMES_WorldUniversityRankings_2024.csv")
# Define the columns you want to display in the app, including 'rank'.
display_columns_university = [
'name', 'location', 'rank', 'scores_teaching', 'scores_research', 'scores_citations'
]
display_columns_country = [
'name', 'rank', 'scores_teaching', 'scores_research', 'scores_citations'
]
st.title('University Data Search')
# Setup tabs
tab1, tab2, tab3, tab4 = st.tabs(["University Search", "Country Search", "Rankings", "Top 10 Country Chart"])
with tab1:
university_name = st.selectbox('Select University', df['name'].unique())
selected_uni = df[df['name'] == university_name][display_columns_university]
if not selected_uni.empty:
st.table(selected_uni.reset_index(drop=True))
with tab2:
country_name = st.selectbox('Select Country', df['location'].unique())
selected_country = df[df['location'] == country_name][display_columns_country]
if not selected_country.empty:
st.table(selected_country.reset_index(drop=True))
with tab3:
metric = st.selectbox('Select a metric for ranking:',
['scores_teaching', 'scores_research', 'scores_citations'], key='metric_select')
num_results = st.slider('Number of results to display:', min_value=5, max_value=50, value=10, key='num_results_slider')
ranking_df = df.sort_values(by=metric, ascending=False)[['name', 'location', metric]].head(num_results)
st.write(f"Top {num_results} Universities by {metric.replace('scores_', '').title()}:")
st.table(ranking_df)
# Calculate country distribution and show it within the rankings tab, not sidebar
country_count = ranking_df['location'].value_counts()
st.subheader('Country Distribution')
for country, count in country_count.items():
st.write(f"{country}: {count}")
with tab4:
# Calculate the average score for each university
df['average_score'] = (df['scores_teaching'] + df['scores_research'] + df['scores_citations']) / 3
# Group by 'location' and calculate the mean of 'average_score' for each country
average_scores_by_country = df.groupby('location')['average_score'].mean().reset_index()
# Get the top 10 countries by average score
top_countries = average_scores_by_country.nlargest(10, 'average_score')
# Plotting
fig, ax = plt.subplots()
ax.bar(top_countries['location'], top_countries['average_score'], color='blue')
ax.set_xlabel('Country')
ax.set_ylabel('Average Score')
ax.set_title('Top 10 Countries by Average University Score')
ax.set_xticklabels(top_countries['location'], rotation=45, ha='right')
st.pyplot(fig)
|