Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import streamlit as st | |
| import pandas as pd | |
| from sklearn.metrics import accuracy_score, precision_recall_fscore_support | |
| import fasttext | |
| from huggingface_hub import hf_hub_download | |
| # Constants | |
| LEADERBOARD_FILE = 'leaderboard.json' | |
| TEST_SET = 'atlasia/Darija-LID-benchmark' | |
| CACHE_DIR = os.path.join(os.path.dirname(__file__), 'cache') | |
| HF_TOKEN = os.getenv('HF_TOKEN') | |
| def load_leaderboard(): | |
| if os.path.exists(LEADERBOARD_FILE): | |
| with open(LEADERBOARD_FILE, 'r') as f: | |
| return json.load(f) | |
| return [] | |
| def save_leaderboard(leaderboard): | |
| with open(LEADERBOARD_FILE, 'w') as f: | |
| json.dump(leaderboard, f, indent=2) | |
| def load_test_data() -> list[str]: | |
| # Create cache directory if it doesn't exist | |
| os.makedirs(CACHE_DIR, exist_ok=True) | |
| path = hf_hub_download( | |
| repo_id='atlasia/Darija-LID-private', | |
| filename='benchmark.txt', | |
| cache_dir=CACHE_DIR, | |
| token=HF_TOKEN, | |
| repo_type='dataset') | |
| with open(path, "r") as f: | |
| lines = f.readlines() | |
| samples = list(map(lambda x:x.replace('\n', ''), lines)) | |
| return samples | |
| def evaluate_predictions(y_true: list[str], y_pred: list[str]) -> dict: | |
| accuracy = accuracy_score(y_true, y_pred) | |
| precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, pos_label='ary', average="weighted") | |
| return { | |
| 'accuracy': float(accuracy), | |
| 'precision': float(precision), | |
| 'recall': float(recall), | |
| 'f1': float(f1) | |
| } | |
| def predict_with_fasttext(model, texts: list[str]) -> list[str]: | |
| preds = model.predict(texts) | |
| y_hat = [x[0].split('__label__')[1] for x in preds[0]] | |
| return y_hat | |
| def load_hf_fasttext_model(model_id): | |
| model_path = hf_hub_download(repo_id=model_id, filename="model.bin", cache_dir=CACHE_DIR) | |
| model = fasttext.load_model(model_path) | |
| os.remove(model_path) | |
| return model | |
| def load_local_fasttext_model(model_path): | |
| model = fasttext.load_model(model_path) | |
| return model | |
| def load_predictions(uploaded_file): | |
| predictions_df = pd.read_csv(uploaded_file) | |
| assert 'prediction' in predictions_df.columns, "Predictions file must contain a 'prediction' column" | |
| y_pred = list(predictions_df['prediction'].values) | |
| assert set(y_pred) == {'ary', 'other'}, "Predictions must contain only 'ary' or 'other'" | |
| return y_pred | |
| def main(): | |
| st.title("Darija-LID Model Evaluation") | |
| st.write("Upload your model or provide a HuggingFace model ID to evaluate it on the Darija-LID test set atlasia/Darija-LID-benchmark.") | |
| st.write("Currently supports FastText models only. If you're using a different model, you can upload your predictions.") | |
| # Load test data | |
| test_data = load_test_data() | |
| texts = [' '.join(x.split()[1:]) for x in test_data] | |
| labels = [x.split('__label__')[1].split()[0] for x in test_data] | |
| # Model input section | |
| st.header("Model Input") | |
| model_type = st.radio("Select model type:", ["Local FastText Model", "HuggingFace FastText Model", "Predictions File"]) | |
| if model_type == "Local FastText Model": | |
| uploaded_file = st.file_uploader("Upload FastText model (.bin)", type=['bin']) | |
| if uploaded_file: | |
| with open("temp_model.bin", "wb") as f: | |
| f.write(uploaded_file.getvalue()) | |
| model = load_local_fasttext_model("temp_model.bin") | |
| y_pred = predict_with_fasttext(model, texts) | |
| os.remove("temp_model.bin") | |
| elif model_type == "HuggingFace FastText Model": | |
| model_id = st.text_input("Enter HuggingFace model ID:") | |
| if model_id: | |
| model = load_hf_fasttext_model(model_id) | |
| y_pred = predict_with_fasttext(model, texts) | |
| else: | |
| uploaded_file = st.file_uploader("Upload predictions file (CSV with 'prediction' column containing either 'ary' or 'other')", type=['csv']) | |
| if uploaded_file: | |
| y_pred = load_predictions(uploaded_file) | |
| assert len(y_pred) == len(labels), "Predictions and labels must have the same length. Make sure the predictions are for the test set." | |
| # Evaluation section | |
| if 'y_pred' in locals(): | |
| st.header("Evaluation Results") | |
| results = evaluate_predictions(labels, y_pred) | |
| # Display metrics | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.metric("Accuracy", f"{results['accuracy']:.4f}") | |
| with col2: | |
| st.metric("Precision", f"{results['precision']:.4f}") | |
| with col3: | |
| st.metric("Recall", f"{results['recall']:.4f}") | |
| with col4: | |
| st.metric("F1 Score", f"{results['f1']:.4f}") | |
| # Leaderboard submission | |
| st.header("Submit to Leaderboard") | |
| submitter_name = st.text_input("Your Name:") | |
| if st.button("Submit to Leaderboard"): | |
| if submitter_name: | |
| leaderboard = load_leaderboard() | |
| entry = { | |
| 'name': submitter_name, | |
| 'model_type': model_type, | |
| 'model_id': model_id if model_type == "HuggingFace Model" else "uploaded_file", | |
| **results | |
| } | |
| leaderboard.append(entry) | |
| save_leaderboard(leaderboard) | |
| st.success("Successfully submitted to leaderboard!") | |
| else: | |
| st.error("Please enter your name to submit to the leaderboard.") | |
| # Display leaderboard | |
| st.header("Leaderboard") | |
| leaderboard = load_leaderboard() | |
| if leaderboard: | |
| df = pd.DataFrame(leaderboard) | |
| df = df.sort_values('f1', ascending=False) | |
| st.dataframe(df) | |
| else: | |
| st.write("No submissions yet.") | |
| if __name__ == "__main__": | |
| main() |