|
from pathlib import Path |
|
|
|
|
|
DIR_OUTPUT_REQUESTS = Path("requested_models") |
|
EVAL_REQUESTS_PATH = Path("eval_requests") |
|
|
|
|
|
|
|
|
|
|
|
banner_url = "https://huggingface.co/datasets/vargha/persian_asr_leaderboard/resolve/main/banner.png" |
|
BANNER = f'<div style="display: flex; justify-content: space-around;"><img src="{banner_url}" alt="Banner" style="width: 40vw; min-width: 300px; max-width: 600px;"> </div>' |
|
|
|
INTRODUCTION_TEXT = "π The π€ Persian ASR Leaderboard ranks and evaluates speech recognition models \ |
|
on the Hugging Face Hub using the Persian Common Voice dataset. \ |
|
\nWe report the [WER](https://huggingface.co/spaces/evaluate-metric/wer) and [CER](https://huggingface.co/spaces/evaluate-metric/cer) metrics (β¬οΈ lower the better). Models are ranked based on their WER, from lowest to highest. Check the π Metrics tab to understand how the models are evaluated. \ |
|
\nIf you want results for a model that is not listed here, you can submit a request for it to be included βοΈβ¨." |
|
|
|
CITATION_TEXT = """@misc{persian-asr-leaderboard, |
|
title = {Persian Automatic Speech Recognition Leaderboard}, |
|
author = {Your Name}, |
|
year = 2024, |
|
publisher = {Hugging Face}, |
|
howpublished = "\\url{https://huggingface.co/spaces/your-username/persian_asr_leaderboard}" |
|
} |
|
""" |
|
|
|
METRICS_TAB_TEXT = """ |
|
# Metrics and Dataset |
|
|
|
## Metrics |
|
|
|
We evaluate models using the Word Error Rate (WER) and Character Error Rate (CER) metrics. Both metrics are used to measure the accuracy of automatic speech recognition systems. |
|
|
|
- **Word Error Rate (WER)**: Calculates the percentage of words that were incorrectly predicted. A lower WER indicates better performance. |
|
- **Character Error Rate (CER)**: Similar to WER but operates at the character level, which can be more informative for languages with rich morphology like Persian. |
|
|
|
## Dataset |
|
|
|
We use the [Persian Common Voice](https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0) dataset for evaluation. The dataset consists of diverse speech recordings from various speakers, making it a good benchmark for Persian ASR models. |
|
|
|
## How to Submit Your Model |
|
|
|
To submit your model for evaluation, go to the "βοΈβ¨ Request a model here!" tab and enter your model's name in the format `username/model_name`. Your model should be hosted on the Hugging Face Hub. |
|
|
|
""" |
|
|
|
|