Spaces:
Sleeping
Sleeping
import streamlit as st | |
import os | |
from datasets import load_dataset | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
HF_TOKEN = os.environ.get("HF_TOKEN") | |
ds = load_dataset("CohereForAI/mmlu-translations-results", split="train", token=HF_TOKEN) | |
df = ds.to_pandas() | |
st.title("MMLU Translations Progress") | |
# Extract the language from the metadata column and create a new column | |
df['language'] = df['metadata'].apply(lambda x: x.get('language')) | |
# Count the occurrences of each language | |
language_counts = df['language'].value_counts() | |
# Plotting the bar chart using matplotlib | |
fig, ax = plt.subplots() | |
language_counts.plot(kind='bar', ax=ax) | |
ax.set_title('Number of Completed Tasks for Each Language') | |
ax.set_xlabel('Language') | |
ax.set_ylabel('Count') | |
# Convert the language counts to a DataFrame for display in the table | |
language_counts_df = language_counts.reset_index() | |
language_counts_df.columns = ['Language', 'Count'] | |
# Display the table in the Streamlit app | |
st.table(language_counts_df) | |
# Display the plot in the Streamlit app | |
st.pyplot(fig) | |
# Extract user_id from the is_edit_required field in the response column and count occurrences | |
user_ids = df['responses'].apply(lambda x: x['is_edit_required']).explode().apply(lambda x: x['user_id']) | |
user_id_counts = user_ids.value_counts() | |
# Convert the user ID counts to a DataFrame for display in the table | |
user_id_counts_df = user_id_counts.reset_index() | |
user_id_counts_df.columns = ['User ID', 'Count'] | |
# Display the table of user ID counts in the Streamlit app | |
st.table(user_id_counts_df) | |
st.dataframe(df) | |