models-explorer / language.py
osanseviero's picture
osanseviero HF staff
Release v2
78f7e42
raw history blame
No virus
1.88 kB
from ast import literal_eval
def make_lang_list(row):
languages = row["languages"]
if languages == "none":
return []
return literal_eval(languages)
def language_count(row):
return len(row["languages"])
def process_for_lang(data, modality):
# Filter by modality
if modality == "NLP":
data = data[data["modality"] == "nlp"]
elif modality == "Audio":
data = data[data["modality"] == "audio"]
elif modality == "Multimodal":
data = data[data["modality"] == "multimodal"]
# Remove rows without languages
data.loc[data.languages == "False", 'languages'] = None
data.loc[data.languages == {}, 'languages'] = None
# Count of rows that have no languages
no_lang_count = data["languages"].isna().sum()
# As the languages column might have multiple languages,
# we need to convert it to a list. We then count the number of languages.
data["languages"] = data["languages"].fillna('none')
data["languages"] = data.apply(make_lang_list, axis=1)
data["language_count"] = data.apply(language_count, axis=1)
# Just keep the models with at least one language
models_with_langs = data[data["language_count"] > 0]
langs = models_with_langs["languages"].explode()
langs = langs[langs != {}]
total_langs = len(langs.unique())
data['multilingual'] = data.apply(lambda x: int("multilingual" in x['languages']), axis=1)
return data, no_lang_count, total_langs, langs.unique()
def filter_multilinguality(data, linguality):
if linguality == "Just Multilingual":
multilingual_tag = data["multilingual"] == 1
multiple_lang_tags = data["language_count"] > 1
return data[multilingual_tag | multiple_lang_tags]
elif linguality == "Three or more languages":
return data[data["language_count"] >= 3]
else:
return data