Spaces:
Build error
Build error
import pandas as pd | |
texts_meta = pd.read_csv("output/crashes/split_data/split_dev10.texts.meta.csv", index_col=0, dtype={"text_id": str}) | |
def is_a_dutch_text(doc_id, exclude_frisian=True): | |
filtered_for_doc = texts_meta[texts_meta["text_id"] == doc_id] | |
if len(filtered_for_doc) >= 1: | |
if exclude_frisian: | |
# exclude newsproviders publishing mainly in Frisian | |
# (NB these texts are recognized as Dutch by langdetect, hence the need for a provider filter) | |
if filtered_for_doc["provider"].iloc[0] == "omropfryslan.nl": | |
return False | |
if filtered_for_doc["language"].iloc[0] == "nl": | |
return True | |
return False | |