alielfilali01 commited on
Commit
7206088
·
verified ·
1 Parent(s): f5920ad

Update app.py

Browse files

Filter out any entries that contain '_last_sync_timestamp' in results.json

Files changed (1) hide show
  1. app.py +22 -18
app.py CHANGED
@@ -11,48 +11,42 @@ DATASET_REPO_ID = f"{OWNER}/requests-dataset"
11
 
12
  HEADER = """
13
  <center>
 
14
  <h1>AraGen Leaderboard: Generative Tasks Evaluation of Arabic LLMs</h1>
15
  </center>
16
 
17
  <br></br>
18
 
19
- <p>This leaderboard introduces generative tasks evaluation for Arabic Large Language Models (LLMs). Powered by the new <strong>3C3H</strong> evaluation measure, this framework delivers a transparent, robust, and holistic evaluation system that balances factual accuracy and usability assessment for a production ready setting.</p>
20
 
21
- <p>For more details, please consider going through the technical blogpost <a href="https://huggingface.co/blog/leaderboard-3c3h-aragen">here</a>.</p>
22
  """
23
 
24
  ABOUT_SECTION = """
25
- ## About
26
 
27
  The AraGen Leaderboard is designed to evaluate and compare the performance of Chat Arabic Large Language Models (LLMs) on a set of generative tasks. By leveraging the new **3C3H** evaluation measure which evaluate the model's output across six dimensions —Correctness, Completeness, Conciseness, Helpfulness, Honesty, and Harmlessness— the leaderboard provides a comprehensive and holistic evaluation of a model's performance in generating human-like and ethically responsible content.
28
 
29
  ### Why Focus on Chat Models?
30
 
31
- AraGen Leaderboard —And 3C3H in general— is specifically designed to assess **chat models**, which interact in conversational settings, intended for end user interaction and require a blend of factual accuracy and user-centric dialogue capabilities. While it is technically possible to submit foundational models, we kindly ask users to refrain from doing so. For evaluations of foundational models using likelihood accuracy based benchmarks, please refer to the [Open Arabic LLM Leaderboard (OALL)](https://huggingface.co/spaces/OALL/Open-Arabic-LLM-Leaderboard).
32
 
33
  ### How to Submit Your Model?
34
 
35
- Navigate to the submission section below to submit your open chat model from the HuggingFace Hub for evaluation. Ensure that your model is public and the submmited metadata (precision, revision, #params) is accurate.
36
 
37
  ### Contact
38
 
39
- For any inquiries or assistance, feel free to reach out through the community tab at [Inception AraGen Community](https://huggingface.co/spaces/inceptionai/AraGen-Leaderboard/discussions) or via [email](mailto:ali.filali@inceptionai.ai).
40
- """
41
-
42
- CITATION_BUTTON_LABEL = """
43
- Copy the following snippet to cite these results
44
  """
45
 
46
  CITATION_BUTTON_TEXT = """
47
- @misc{AraGen,
48
- author = {El Filali, Ali and Sengupta, Neha and Abouelseoud, Arwa and Nakov, Preslav and Fourrier, Clémentine},
49
- title = {Rethinking LLM Evaluation with 3C3H: AraGen Benchmark and Leaderboard},
50
- year = {2024},
51
- publisher = {Inception},
52
- howpublished = "url{https://huggingface.co/spaces/inceptionai/AraGen-Leaderboard}"
53
- }
54
  """
55
 
 
 
 
56
 
57
  def load_results():
58
  # Get the current directory of the script and construct the path to results.json
@@ -63,6 +57,16 @@ def load_results():
63
  with open(results_file, 'r') as f:
64
  data = json.load(f)
65
 
 
 
 
 
 
 
 
 
 
 
66
  # Lists to collect data
67
  data_3c3h = []
68
  data_tasks = []
@@ -654,7 +658,7 @@ def main():
654
  citation_button = gr.Textbox(
655
  value=CITATION_BUTTON_TEXT,
656
  label=CITATION_BUTTON_LABEL,
657
- lines=10,
658
  elem_id="citation-button",
659
  show_copy_button=True,
660
  )
 
11
 
12
  HEADER = """
13
  <center>
14
+ <h1>This space is experimental and should stay always private!</h1><br></br>
15
  <h1>AraGen Leaderboard: Generative Tasks Evaluation of Arabic LLMs</h1>
16
  </center>
17
 
18
  <br></br>
19
 
20
+ <p>This leaderboard is designed to redefine the evaluation of Arabic Large Language Models (LLMs). Powered by the new <strong>3C3H</strong> evaluation measure, this framework delivers a transparent, robust, and holistic evaluation system that balances factual accuracy and usability assessment for a production ready setting.</p>
21
 
22
+ <p>For more details, please consider going through the technical blogpost <a href="https://huggingface.co/blog/">here</a>.</p>
23
  """
24
 
25
  ABOUT_SECTION = """
26
+ ## About:
27
 
28
  The AraGen Leaderboard is designed to evaluate and compare the performance of Chat Arabic Large Language Models (LLMs) on a set of generative tasks. By leveraging the new **3C3H** evaluation measure which evaluate the model's output across six dimensions —Correctness, Completeness, Conciseness, Helpfulness, Honesty, and Harmlessness— the leaderboard provides a comprehensive and holistic evaluation of a model's performance in generating human-like and ethically responsible content.
29
 
30
  ### Why Focus on Chat Models?
31
 
32
+ AraGen —And 3C3H in general— is specifically tailored to assess **chat models**, which interact in conversational settings, intended for end user interaction and require a blend of factual accuracy and user-centric dialogue capabilities. While it is technically possible to submit foundational models, we kindly ask users to refrain from doing so. For evaluations of foundational models using likelihood accuracy based benchmarks, please refer to the [Open Arabic LLM Leaderboard (OALL)](https://huggingface.co/spaces/OALL/Open-Arabic-LLM-Leaderboard).
33
 
34
  ### How to Submit Your Model?
35
 
36
+ avigate to the submission section below to submit your open chat model from the HuggingFace Hub for evaluation. Ensure that your model is public and the submmited metadata (precision, revision, #params) is accurate.
37
 
38
  ### Contact
39
 
40
+ For any inquiries or assistance, please reach out through the community tab at [Inception AraGen Community](https://huggingface.co/spaces/inceptionai/AraGen-Leaderboard/discussions) or via [email](mailto:ali.filali@inceptionai.ai).
 
 
 
 
41
  """
42
 
43
  CITATION_BUTTON_TEXT = """
44
+ CITATION
 
 
 
 
 
 
45
  """
46
 
47
+ CITATION_BUTTON_LABEL = """
48
+ CITATION_BUTTON_LABEL
49
+ """
50
 
51
  def load_results():
52
  # Get the current directory of the script and construct the path to results.json
 
57
  with open(results_file, 'r') as f:
58
  data = json.load(f)
59
 
60
+ # Filter out any entries that only contain '_last_sync_timestamp'
61
+ filtered_data = []
62
+ for entry in data:
63
+ # If '_last_sync_timestamp' is the only key, skip it
64
+ if len(entry.keys()) == 1 and "_last_sync_timestamp" in entry:
65
+ continue
66
+ filtered_data.append(entry)
67
+
68
+ data = filtered_data
69
+
70
  # Lists to collect data
71
  data_3c3h = []
72
  data_tasks = []
 
658
  citation_button = gr.Textbox(
659
  value=CITATION_BUTTON_TEXT,
660
  label=CITATION_BUTTON_LABEL,
661
+ lines=20,
662
  elem_id="citation-button",
663
  show_copy_button=True,
664
  )