Weyaxi commited on
Commit
364e449
1 Parent(s): 416aee5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -83
app.py CHANGED
@@ -1,58 +1,15 @@
1
- import os
2
- os.system("wget https://raw.githubusercontent.com/Weyaxi/scrape-open-llm-leaderboard/main/openllm.py")
3
  from openllm import *
4
  import requests
5
  import pandas as pd
6
  from bs4 import BeautifulSoup
7
  from tqdm import tqdm
8
- from huggingface_hub import HfApi, CommitOperationAdd, create_commit
9
  import gradio as gr
10
  import datetime
11
 
12
  api = HfApi()
13
 
14
 
15
- HF_TOKEN = os.getenv('HF_TOKEN')
16
-
17
-
18
- headers_models = ["🔢 Serial Number", "👤 Author Name", "📥 Total Downloads", "👍 Total Likes", "🤖 Number of Models",
19
- "🏆 Best Model On Open LLM Leaderboard", "🥇 Best Rank On Open LLM Leaderboard",
20
- "📊 Average Downloads per Model", "📈 Average Likes per Model", "🚀 Most Downloaded Model",
21
- "📈 Most Download Count", "❤️ Most Liked Model", "👍 Most Like Count", "🔥 Trending Model",
22
- "👑 Best Rank at Trending Models", "🏷️ Type"]
23
-
24
- headers_datasets = ["🔢 Serial Number", "👤 Author Name", "📥 Total Downloads", "👍 Total Likes", "📊 Number of Datasets",
25
- "📊 Average Downloads per Dataset", "📈 Average Likes per Dataset", "🚀 Most Downloaded Dataset",
26
- "📈 Most Download Count", "❤️ Most Liked Dataset", "👍 Most Like Count", "🔥 Trending Dataset",
27
- "👑 Best Rank at Trending Datasets", "🏷️ Type"]
28
-
29
- headers_spaces = ["🔢 Serial Number", "👤 Author Name", "👍 Total Likes", "🚀 Number of Spaces", "📈 Average Likes per Space",
30
- "❤️ Most Liked Space", "👍 Most Like Count", "🔥 Trending Space", "👑 Best Rank at Trending Spaces",
31
- "🏷️ Type"]
32
-
33
-
34
- def apply_headers(df, headers):
35
- tmp = df.copy()
36
- tmp.columns = headers
37
-
38
- return tmp
39
-
40
-
41
- def get_time():
42
- return datetime.datetime.now().strftime("%d-%m-%Y %H-%M")
43
-
44
-
45
- def upload_datasets(dfs):
46
-
47
- time = get_time()
48
-
49
- operations = [CommitOperationAdd(path_in_repo=f"{time}/models_df.csv", path_or_fileobj=(dfs[0].to_csv()).encode()),
50
- CommitOperationAdd(path_in_repo=f"{time}/datasets_df.csv", path_or_fileobj=(dfs[1].to_csv()).encode()),
51
- CommitOperationAdd(path_in_repo=f"{time}/spaces_df.csv", path_or_fileobj=(dfs[2].to_csv()).encode())]
52
-
53
- return (create_commit(repo_id="Weyaxi/huggingface-leaderboard-history", operations=operations, commit_message=f"Uploading history of {time}", repo_type="dataset", token=HF_TOKEN))
54
-
55
-
56
  def get_most(df_for_most_function):
57
  download_sorted_df = df_for_most_function.sort_values(by=['downloads'], ascending=False)
58
  most_downloaded = download_sorted_df.iloc[0]
@@ -73,14 +30,10 @@ def get_sum(df_for_sum_function):
73
 
74
 
75
  def get_openllm_leaderboard():
76
- try:
77
- data = get_json_format_data()
78
- finished_models = get_datas(data)
79
- df = pd.DataFrame(finished_models)
80
- return df['Model'].tolist()
81
- except Exception as e: # something is wrong about the leaderboard so return empty list
82
- print(e)
83
- return []
84
 
85
 
86
  def get_ranking(model_list, target_org):
@@ -259,21 +212,21 @@ def get_ranking_trend(json_data, org_name):
259
  return {"id": "Not Found", "rank": "Not Found"}
260
 
261
 
262
-
263
  def fetch_data_from_url(url):
264
  response = requests.get(url)
265
  if response.status_code == 200:
266
  data = response.text.splitlines()
267
- return [line.rstrip("\n") for line in data]
268
  else:
269
  print(f"Failed to fetch data from URL: {url}")
270
  return []
271
 
272
- user_names_url = "https://huggingface.co/datasets/Weyaxi/user-orgs-huggingface-leaderboard/raw/main/user_names.txt"
273
- org_names_url = "https://huggingface.co/datasets/Weyaxi/user-orgs-huggingface-leaderboard/raw/main/org_names.txt"
274
 
275
- user_names_in_list = fetch_data_from_url(user_names_url)
276
- org_names_in_list = fetch_data_from_url(org_names_url)
 
 
 
277
 
278
  datetime_now = str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M"))
279
  INTRODUCTION_TEXT = f"""
@@ -291,9 +244,9 @@ INTRODUCTION_TEXT = f"""
291
 
292
  🛠️ The leaderboard's backend mainly runs on the [Hugging Face Hub API](https://huggingface.co/docs/huggingface_hub/v0.5.1/en/package_reference/hf_api).
293
 
294
- 📒 **Note:** In the model's dataframe, there are some columns related to the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard). This data is also retrieved through web scraping.
295
 
296
- 📒 **Note:** In trending models/datasets/spaces, first 300 models/datasets/spaces is being retrieved from huggingface.
297
 
298
  ## 🔍 Searching Organizations and Users
299
 
@@ -346,12 +299,12 @@ def update_table(orgs, users, how_much=400, return_all=False):
346
  filtered_df = dataFrame[(dataFrame['Type'] == 'Organization') | (dataFrame['Type'] == 'User')]
347
 
348
  else:
349
- return apply_headers(dataFrame.head(0), headers_models)
350
 
351
  if return_all:
352
- return apply_headers(filtered_df, headers_models)
353
  else:
354
- return apply_headers(filtered_df, headers_models).head(how_much)
355
 
356
 
357
  def update_table_datasets(orgs, users, how_much=250, return_all=False):
@@ -367,12 +320,12 @@ def update_table_datasets(orgs, users, how_much=250, return_all=False):
367
  filtered_df = dataFrame[(dataFrame['Type'] == 'Organization') | (dataFrame['Type'] == 'User')]
368
 
369
  else:
370
- return apply_headers(dataFrame, headers_datasets).head(0)
371
 
372
  if return_all:
373
- return apply_headers(filtered_df, headers_datasets)
374
  else:
375
- return apply_headers(filtered_df, headers_datasets).head(how_much)
376
 
377
 
378
  def update_table_spaces(orgs, users, how_much=200, return_all=False):
@@ -388,12 +341,12 @@ def update_table_spaces(orgs, users, how_much=200, return_all=False):
388
  filtered_df = dataFrame[(dataFrame['Type'] == 'Organization') | (dataFrame['Type'] == 'User')]
389
 
390
  else:
391
- return apply_headers(dataFrame, headers_spaces).head(0)
392
 
393
  if return_all:
394
- return apply_headers(filtered_df, headers_spaces)
395
  else:
396
- return apply_headers(filtered_df, headers_spaces).head(how_much)
397
 
398
 
399
 
@@ -503,6 +456,7 @@ def search_df(author):
503
  return markdown_text
504
 
505
 
 
506
  with gr.Blocks() as demo:
507
  gr.Markdown("""<h1 align="center" id="space-title">🤗 Huggingface Leaderboard</h1>""")
508
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
@@ -522,7 +476,13 @@ with gr.Blocks() as demo:
522
  models_df = make_leaderboard(org_names_in_list, user_names_in_list, "models", group_models_by_author(all_models))
523
  models_df = models_df_to_clickable(models_df, columns_to_convert, "models")
524
 
525
- gr_models = gr.Dataframe(apply_headers(models_df, headers_models).head(400), headers=headers_models, interactive=True,
 
 
 
 
 
 
526
  datatype=["str", "markdown", "str", "str", "str", "markdown", "str", "str", "str",
527
  "markdown", "str", "markdown", "str", "markdown", "str", "str"])
528
 
@@ -531,7 +491,12 @@ with gr.Blocks() as demo:
531
  dataset_df = make_leaderboard(org_names_in_list, user_names_in_list, "datasets", group_models_by_author(all_datasets))
532
  dataset_df = models_df_to_clickable(dataset_df, columns_to_convert, "datasets")
533
 
534
- gr_datasets = gr.Dataframe(apply_headers(dataset_df, headers_datasets).head(250), headers=headers_datasets, interactive=False,
 
 
 
 
 
535
  datatype=["str", "markdown", "str", "str", "str", "str", "str", "markdown", "str",
536
  "markdown", "str", "markdown", "str", "str"])
537
 
@@ -541,11 +506,14 @@ with gr.Blocks() as demo:
541
  spaces_df = make_leaderboard(org_names_in_list, user_names_in_list, "spaces", group_models_by_author(all_spaces))
542
  spaces_df = models_df_to_clickable(spaces_df, columns_to_convert, "spaces")
543
 
544
- gr_spaces = gr.Dataframe(apply_headers(spaces_df, headers_spaces).head(200), headers=headers_spaces, interactive=False,
 
 
 
 
545
  datatype=["str", "markdown", "str", "str", "str", "markdown", "str", "markdown", "str",
546
  "str"])
547
 
548
-
549
  with gr.TabItem("🔍 Search", id=4):
550
  with gr.Column(min_width=320):
551
  search_bar = gr.Textbox(
@@ -557,9 +525,6 @@ with gr.Blocks() as demo:
557
  search_bar.submit(fn=search_df, inputs=search_bar, outputs=yazi)
558
 
559
 
560
- commit = upload_datasets([models_df, dataset_df, spaces_df])
561
- print(commit)
562
-
563
  orgs.change(fn=update_table, inputs=[orgs, users], outputs=gr_models)
564
 
565
  orgs.change(fn=update_table_datasets, inputs=[orgs, users], outputs=gr_datasets)
@@ -573,14 +538,13 @@ with gr.Blocks() as demo:
573
  users.change(fn=update_table_spaces, inputs=[orgs, users], outputs=gr_spaces)
574
 
575
 
576
- filtered_model_users = update_table(orgs=False, users=True, return_all=True)['👤 Author Name'].tolist()
577
- filtered_model_orgs = update_table(orgs=True, users=False, return_all=True)['👤 Author Name'].tolist()
578
 
579
- filtered_datasets_users = update_table_datasets(orgs=False, users=True, return_all=True)['👤 Author Name'].tolist()
580
- filtered_datasets_orgs = update_table_datasets(orgs=True, users=False, return_all=True)['👤 Author Name'].tolist()
581
 
582
- filtered_spaces_users = update_table_spaces(orgs=False, users=True, return_all=True)['👤 Author Name'].tolist()
583
- filtered_spaces_orgs = update_table_spaces(orgs=True, users=False, return_all=True)['👤 Author Name'].tolist()
584
 
585
  demo.launch(debug=True)
586
-
 
 
 
1
  from openllm import *
2
  import requests
3
  import pandas as pd
4
  from bs4 import BeautifulSoup
5
  from tqdm import tqdm
6
+ from huggingface_hub import HfApi
7
  import gradio as gr
8
  import datetime
9
 
10
  api = HfApi()
11
 
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def get_most(df_for_most_function):
14
  download_sorted_df = df_for_most_function.sort_values(by=['downloads'], ascending=False)
15
  most_downloaded = download_sorted_df.iloc[0]
 
30
 
31
 
32
  def get_openllm_leaderboard():
33
+ data = get_json_format_data()
34
+ finished_models = get_datas(data)
35
+ df = pd.DataFrame(finished_models)
36
+ return df['Model'].tolist()
 
 
 
 
37
 
38
 
39
  def get_ranking(model_list, target_org):
 
212
  return {"id": "Not Found", "rank": "Not Found"}
213
 
214
 
 
215
  def fetch_data_from_url(url):
216
  response = requests.get(url)
217
  if response.status_code == 200:
218
  data = response.text.splitlines()
219
+ return [line.strip() for line in data]
220
  else:
221
  print(f"Failed to fetch data from URL: {url}")
222
  return []
223
 
 
 
224
 
225
+ user_names_url = "https://huggingface.co/datasets/PulsarAI/user-orgs-huggingface-leaderboard/raw/main/user_names.txt"
226
+ org_names_url = "https://huggingface.co/datasets/PulsarAI/user-orgs-huggingface-leaderboard/raw/main/org_names.txt"
227
+
228
+ org_names_in_list = fetch_data_from_url(user_names_url)
229
+ user_names_in_list = fetch_data_from_url(org_names_url)
230
 
231
  datetime_now = str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M"))
232
  INTRODUCTION_TEXT = f"""
 
244
 
245
  🛠️ The leaderboard's backend mainly runs on the [Hugging Face Hub API](https://huggingface.co/docs/huggingface_hub/v0.5.1/en/package_reference/hf_api).
246
 
247
+ **📝 Note:** In the model's dataframe, there are some columns related to the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard). This data is also retrieved through web scraping.
248
 
249
+ **📝 Note:** In trending models/datasets/spaces, first 300 models/datasets/spaces is being retrieved from huggingface.
250
 
251
  ## 🔍 Searching Organizations and Users
252
 
 
299
  filtered_df = dataFrame[(dataFrame['Type'] == 'Organization') | (dataFrame['Type'] == 'User')]
300
 
301
  else:
302
+ return dataFrame.head(0)
303
 
304
  if return_all:
305
+ return filtered_df
306
  else:
307
+ return filtered_df.head(how_much)
308
 
309
 
310
  def update_table_datasets(orgs, users, how_much=250, return_all=False):
 
320
  filtered_df = dataFrame[(dataFrame['Type'] == 'Organization') | (dataFrame['Type'] == 'User')]
321
 
322
  else:
323
+ return dataFrame.head(0)
324
 
325
  if return_all:
326
+ return filtered_df
327
  else:
328
+ return filtered_df.head(how_much)
329
 
330
 
331
  def update_table_spaces(orgs, users, how_much=200, return_all=False):
 
341
  filtered_df = dataFrame[(dataFrame['Type'] == 'Organization') | (dataFrame['Type'] == 'User')]
342
 
343
  else:
344
+ return dataFrame.head(0)
345
 
346
  if return_all:
347
+ return filtered_df
348
  else:
349
+ return filtered_df.head(how_much)
350
 
351
 
352
 
 
456
  return markdown_text
457
 
458
 
459
+
460
  with gr.Blocks() as demo:
461
  gr.Markdown("""<h1 align="center" id="space-title">🤗 Huggingface Leaderboard</h1>""")
462
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
 
476
  models_df = make_leaderboard(org_names_in_list, user_names_in_list, "models", group_models_by_author(all_models))
477
  models_df = models_df_to_clickable(models_df, columns_to_convert, "models")
478
 
479
+ headers = ["🔢 Serial Number", "👤 Author Name", "📥 Total Downloads", "👍 Total Likes", "🤖 Number of Models",
480
+ "🏆 Best Model On Open LLM Leaderboard", "🥇 Best Rank On Open LLM Leaderboard",
481
+ "📊 Average Downloads per Model", "📈 Average Likes per Model", "🚀 Most Downloaded Model",
482
+ "📈 Most Download Count", "❤️ Most Liked Model", "👍 Most Like Count", "🔥 Trending Model",
483
+ "👑 Best Rank at Trending Models", "🏷️ Type"]
484
+
485
+ gr_models = gr.Dataframe(models_df.head(400), headers=headers, interactive=True,
486
  datatype=["str", "markdown", "str", "str", "str", "markdown", "str", "str", "str",
487
  "markdown", "str", "markdown", "str", "markdown", "str", "str"])
488
 
 
491
  dataset_df = make_leaderboard(org_names_in_list, user_names_in_list, "datasets", group_models_by_author(all_datasets))
492
  dataset_df = models_df_to_clickable(dataset_df, columns_to_convert, "datasets")
493
 
494
+ headers = ["🔢 Serial Number", "👤 Author Name", "📥 Total Downloads", "👍 Total Likes", "📊 Number of Datasets",
495
+ "📊 Average Downloads per Dataset", "📈 Average Likes per Dataset", "🚀 Most Downloaded Dataset",
496
+ "📈 Most Download Count", "❤️ Most Liked Dataset", "👍 Most Like Count", "🔥 Trending Dataset",
497
+ "👑 Best Rank at Trending Datasets", "🏷️ Type"]
498
+
499
+ gr_datasets = gr.Dataframe(dataset_df.head(250), headers=headers, interactive=False,
500
  datatype=["str", "markdown", "str", "str", "str", "str", "str", "markdown", "str",
501
  "markdown", "str", "markdown", "str", "str"])
502
 
 
506
  spaces_df = make_leaderboard(org_names_in_list, user_names_in_list, "spaces", group_models_by_author(all_spaces))
507
  spaces_df = models_df_to_clickable(spaces_df, columns_to_convert, "spaces")
508
 
509
+ headers = ["🔢 Serial Number", "👤 Author Name", "👍 Total Likes", "🚀 Number of Spaces", "📈 Average Likes per Space",
510
+ "❤️ Most Liked Space", "👍 Most Like Count", "🔥 Trending Space", "👑 Best Rank at Trending Spaces",
511
+ "🏷️ Type"]
512
+
513
+ gr_spaces = gr.Dataframe(spaces_df.head(200), headers=headers, interactive=False,
514
  datatype=["str", "markdown", "str", "str", "str", "markdown", "str", "markdown", "str",
515
  "str"])
516
 
 
517
  with gr.TabItem("🔍 Search", id=4):
518
  with gr.Column(min_width=320):
519
  search_bar = gr.Textbox(
 
525
  search_bar.submit(fn=search_df, inputs=search_bar, outputs=yazi)
526
 
527
 
 
 
 
528
  orgs.change(fn=update_table, inputs=[orgs, users], outputs=gr_models)
529
 
530
  orgs.change(fn=update_table_datasets, inputs=[orgs, users], outputs=gr_datasets)
 
538
  users.change(fn=update_table_spaces, inputs=[orgs, users], outputs=gr_spaces)
539
 
540
 
541
+ filtered_model_users = update_table(orgs=False, users=True, return_all=True)['Author Name'].tolist()
542
+ filtered_model_orgs = update_table(orgs=True, users=False, return_all=True)['Author Name'].tolist()
543
 
544
+ filtered_datasets_users = update_table_datasets(orgs=False, users=True, return_all=True)['Author Name'].tolist()
545
+ filtered_datasets_orgs = update_table_datasets(orgs=True, users=False, return_all=True)['Author Name'].tolist()
546
 
547
+ filtered_spaces_users = update_table_spaces(orgs=False, users=True, return_all=True)['Author Name'].tolist()
548
+ filtered_spaces_orgs = update_table_spaces(orgs=True, users=False, return_all=True)['Author Name'].tolist()
549
 
550
  demo.launch(debug=True)