Spaces:

responsibility-framing
/

sociofillmore_public

Build error

Gosse Minnema

Add sociofillmore code, load dataset via private dataset repo

b11ac48 8 months ago

711 Bytes

	import pandas as pd

	texts_meta = pd.read_csv("output/crashes/split_data/split_dev10.texts.meta.csv", index_col=0, dtype={"text_id": str})


	def is_a_dutch_text(doc_id, exclude_frisian=True):
	filtered_for_doc = texts_meta[texts_meta["text_id"] == doc_id]
	if len(filtered_for_doc) >= 1:
	if exclude_frisian:
	# exclude newsproviders publishing mainly in Frisian
	# (NB these texts are recognized as Dutch by langdetect, hence the need for a provider filter)
	if filtered_for_doc["provider"].iloc[0] == "omropfryslan.nl":
	return False
	if filtered_for_doc["language"].iloc[0] == "nl":
	return True
	return False