Spaces:

atlasia
/

DarijaLID-Leaderboard

Sleeping

App Files Files Community

DarijaLID-Leaderboard / src /streamlit_app.py

imomayiz

Update src/streamlit_app.py

7b27b9c verified 6 months ago

raw

history blame

5.79 kB

	import os
	import json
	import streamlit as st
	import pandas as pd
	from sklearn.metrics import accuracy_score, precision_recall_fscore_support
	import fasttext
	from huggingface_hub import hf_hub_download

	# Constants
	LEADERBOARD_FILE = 'leaderboard.json'
	TEST_SET = 'atlasia/Darija-LID-benchmark'
	CACHE_DIR = os.path.join(os.path.dirname(__file__), 'cache')

	HF_TOKEN = os.getenv('HF_TOKEN')

	def load_leaderboard():
	if os.path.exists(LEADERBOARD_FILE):
	with open(LEADERBOARD_FILE, 'r') as f:
	return json.load(f)
	return []

	def save_leaderboard(leaderboard):
	with open(LEADERBOARD_FILE, 'w') as f:
	json.dump(leaderboard, f, indent=2)

	def load_test_data() -> list[str]:
	# Create cache directory if it doesn't exist
	os.makedirs(CACHE_DIR, exist_ok=True)

	path = hf_hub_download(
	repo_id='atlasia/Darija-LID-private',
	filename='benchmark.txt',
	cache_dir=CACHE_DIR,
	token=HF_TOKEN,
	repo_type='dataset')

	with open(path, "r") as f:
	lines = f.readlines()

	samples = list(map(lambda x:x.replace('\n', ''), lines))
	return samples


	def evaluate_predictions(y_true: list[str], y_pred: list[str]) -> dict:
	accuracy = accuracy_score(y_true, y_pred)
	precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, pos_label='ary', average="weighted")
	return {
	'accuracy': float(accuracy),
	'precision': float(precision),
	'recall': float(recall),
	'f1': float(f1)
	}

	def predict_with_fasttext(model, texts: list[str]) -> list[str]:
	preds = model.predict(texts)
	y_hat = [x[0].split('__label__')[1] for x in preds[0]]
	return y_hat

	def load_hf_fasttext_model(model_id):
	model_path = hf_hub_download(repo_id=model_id, filename="model.bin", cache_dir=CACHE_DIR)
	model = fasttext.load_model(model_path)
	os.remove(model_path)
	return model

	def load_local_fasttext_model(model_path):
	model = fasttext.load_model(model_path)
	return model

	def load_predictions(uploaded_file):
	predictions_df = pd.read_csv(uploaded_file)
	assert 'prediction' in predictions_df.columns, "Predictions file must contain a 'prediction' column"
	y_pred = list(predictions_df['prediction'].values)
	assert set(y_pred) == {'ary', 'other'}, "Predictions must contain only 'ary' or 'other'"
	return y_pred


	def main():
	st.title("Darija-LID Model Evaluation")
	st.write("Upload your model or provide a HuggingFace model ID to evaluate it on the Darija-LID test set atlasia/Darija-LID-benchmark.")
	st.write("Currently supports FastText models only. If you're using a different model, you can upload your predictions.")

	# Load test data
	test_data = load_test_data()
	texts = [' '.join(x.split()[1:]) for x in test_data]
	labels = [x.split('__label__')[1].split()[0] for x in test_data]

	# Model input section
	st.header("Model Input")
	model_type = st.radio("Select model type:", ["Local FastText Model", "HuggingFace FastText Model", "Predictions File"])

	if model_type == "Local FastText Model":
	uploaded_file = st.file_uploader("Upload FastText model (.bin)", type=['bin'])
	if uploaded_file:
	with open("temp_model.bin", "wb") as f:
	f.write(uploaded_file.getvalue())
	model = load_local_fasttext_model("temp_model.bin")
	y_pred = predict_with_fasttext(model, texts)
	os.remove("temp_model.bin")

	elif model_type == "HuggingFace FastText Model":
	model_id = st.text_input("Enter HuggingFace model ID:")
	if model_id:
	model = load_hf_fasttext_model(model_id)
	y_pred = predict_with_fasttext(model, texts)

	else:
	uploaded_file = st.file_uploader("Upload predictions file (CSV with 'prediction' column containing either 'ary' or 'other')", type=['csv'])
	if uploaded_file:
	y_pred = load_predictions(uploaded_file)
	assert len(y_pred) == len(labels), "Predictions and labels must have the same length. Make sure the predictions are for the test set."


	# Evaluation section
	if 'y_pred' in locals():
	st.header("Evaluation Results")
	results = evaluate_predictions(labels, y_pred)

	# Display metrics
	col1, col2, col3, col4 = st.columns(4)
	with col1:
	st.metric("Accuracy", f"{results['accuracy']:.4f}")
	with col2:
	st.metric("Precision", f"{results['precision']:.4f}")
	with col3:
	st.metric("Recall", f"{results['recall']:.4f}")
	with col4:
	st.metric("F1 Score", f"{results['f1']:.4f}")

	# Leaderboard submission
	st.header("Submit to Leaderboard")
	submitter_name = st.text_input("Your Name:")
	if st.button("Submit to Leaderboard"):
	if submitter_name:
	leaderboard = load_leaderboard()
	entry = {
	'name': submitter_name,
	'model_type': model_type,
	'model_id': model_id if model_type == "HuggingFace Model" else "uploaded_file",
	**results
	}
	leaderboard.append(entry)
	save_leaderboard(leaderboard)
	st.success("Successfully submitted to leaderboard!")
	else:
	st.error("Please enter your name to submit to the leaderboard.")

	# Display leaderboard
	st.header("Leaderboard")
	leaderboard = load_leaderboard()
	if leaderboard:
	df = pd.DataFrame(leaderboard)
	df = df.sort_values('f1', ascending=False)
	st.dataframe(df)
	else:
	st.write("No submissions yet.")

	if __name__ == "__main__":
	main()