Spaces:
Sleeping
Sleeping
from datasets import load_dataset | |
import streamlit as st | |
from ast import literal_eval | |
import pandas as pd | |
nlp_tasks = ["text-classification", "text-generation", "text2text-generation", "token-classification", "fill-mask", "question-answering", | |
"translation", "conversational", "sentence-similarity", "summarization", "multiple-choice", "zero-shot-classification", "table-question-answering" | |
] | |
audio_tasks = ["automatic-speech-recognition", "audio-classification", "text-to-speech", "audio-to-audio", "voice-activity-detection"] | |
cv_tasks = ["image-classification", "image-segmentation", "zero-shot-image-classification", "image-to-image", "unconditional-image-generation", "object-detection"] | |
multimodal = ["feature-extraction", "text-to-image", "visual-question-answering", "image-to-text", "document-question-answering"] | |
tabular = ["tabular-classification", "tabular-regression"] | |
modalities = { | |
"nlp": nlp_tasks, | |
"audio": audio_tasks, | |
"cv": cv_tasks, | |
"multimodal": multimodal, | |
"tabular": tabular, | |
"rl": ["reinforcement-learning"] | |
} | |
def modality(row): | |
pipeline = row["pipeline"] | |
for modality, tasks in modalities.items(): | |
if pipeline in tasks: | |
return modality | |
if type(pipeline) == "str": | |
return "unk_modality" | |
return None | |
st.cache(allow_output_mutation=True) | |
def process_dataset(version): | |
# Load dataset at specified revision | |
dataset = load_dataset("open-source-metrics/model-repos-stats", revision=version) | |
# Convert to pandas dataframe | |
data = dataset["train"].to_pandas() | |
# Add modality column | |
data["modality"] = data.apply(modality, axis=1) | |
# Bin the model card length into some bins | |
data["length_bins"] = pd.cut(data["text_length"], [0, 200, 1000, 2000, 3000, 4000, 5000, 7500, 10000, 20000, 50000]) | |
return data | |
def eval_tags(row): | |
tags = row["tags"] | |
if tags == "none" or tags == [] or tags == "{}": | |
return [] | |
if tags[0] != "[": | |
tags = str([tags]) | |
val = literal_eval(tags) | |
if isinstance(val, dict): | |
return [] | |
return val | |
def change_pct(old, new): | |
if new == 0: | |
return -10000000 | |
return round(100* (new - old) / new, 3) | |
def change_and_delta(old_old, old, new): | |
curr_change = change_pct(old, new) | |
prev_change = change_pct(old_old, old) | |
delta = round(curr_change-prev_change, 3) | |
if delta > 0: | |
delta = f"+{delta}%" | |
curr_change = f"{curr_change}%" | |
return curr_change, delta |