Santosh Sanjeev commited on
Commit
cc52786
·
1 Parent(s): 8d6809c

pushed changes app.py

Browse files
Files changed (2) hide show
  1. app.py +43 -3
  2. datasetcards_new.parquet +2 -2
app.py CHANGED
@@ -536,7 +536,47 @@ columns = list(initial_df.columns)
536
 
537
  # --- Gradio App ---
538
  with gr.Blocks() as demo:
539
- gr.Markdown("## Dataset Insight Portal")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
540
 
541
  # Pagination
542
  with gr.Row():
@@ -652,7 +692,7 @@ with gr.Blocks() as demo:
652
  )
653
 
654
  # --- Auto-push ---
655
- def auto_push_loop(interval=300):
656
  while True:
657
  try:
658
  hf_token = os.environ.get("HF_TOKEN")
@@ -674,5 +714,5 @@ def auto_push_loop(interval=300):
674
  print("⚠️ Push failed:", e)
675
  time.sleep(interval)
676
 
677
- threading.Thread(target=auto_push_loop, args=(300,), daemon=True).start()
678
  demo.launch()
 
536
 
537
  # --- Gradio App ---
538
  with gr.Blocks() as demo:
539
+ gr.Markdown("""
540
+ # Dataset Insight Portal
541
+ Welcome! This portal helps you explore and manage datasets from our Hugging Face organization.
542
+ ## What is this space for?
543
+ This space provides a table of datasets along with metadata. You can:
544
+ - Browse datasets with pagination.
545
+ - Search datasets by various fields.
546
+ - Assign responsibility for reviewing datasets (`assigned_to`).
547
+ - Track progress using `status`.
548
+ ## Why the table?
549
+ The table gives a structured view of all datasets, making it easy to sort, filter, and update information for each dataset. It consists of all datasets until 20-09-2025.
550
+ ## What does the table contain?
551
+ Each row represents a dataset. Columns include:
552
+ - **dataset_id**: Unique identifier of the dataset.
553
+ - **dataset_url**: Link to the dataset page on Hugging Face.
554
+ - **downloads**: Number of downloads.
555
+ - **author**: Dataset author.
556
+ - **license**: License type.
557
+ - **tags**: Tags describing the dataset. Obtained from the dataset card.
558
+ - **task_categories**: Categories of tasks the dataset is useful for. Obtained from the dataset card.
559
+ - **last_modified**: Date of last update.
560
+ - **field, keyword**: Metadata columns describing dataset purpose based on heuristics. Use the `field` and `keyword` to filter for science based datasets.
561
+ - **category**: Category of the dataset (`rich` means it is good dataset card. `minimal` means it needs improvement for the reasons below).
562
+ - **reason**: Reason why the dataset is classified as `minimal`. Options: `Failed to load card`, `No metadata and no description`, `No metadata and has description`, `Short description`.
563
+ - **usedStorage**: Storage used by the dataset (bytes).
564
+ - **assigned_to**: Person responsible for the dataset (editable).
565
+ - **status**: Progress status (editable). Options: `todo`, `inprogress`, `PR submitted`, `PR merged`.
566
+ ## How to use search
567
+ - Select a **column** from the dropdown.
568
+ - If the column is textual, type your query in the text box.
569
+ - If the column is a dropdown (like `assigned_to` or `status`), select the value from the dropdown.
570
+ - Click **Search** to filter the table.
571
+ ## How to add or update `assigned_to` and `status`
572
+ 1. Search for the **dataset_id** initially.
573
+ 2. Then, select the **dataset_id** from the dropdown below the table.
574
+ 3. Choose the person responsible in **Assigned To**. If you are a member of the organization, your username should appear in the list. Else refresh and try again.
575
+ 4. Select the current status in **Status**.
576
+ 5. Click **Save Changes** to update the table and persist the changes.
577
+ 6. Use **Refresh All** to reload the table and the latest members list.
578
+ This portal makes it easy to keep track of dataset reviews, assignments, and progress all in one place.
579
+ """)
580
 
581
  # Pagination
582
  with gr.Row():
 
692
  )
693
 
694
  # --- Auto-push ---
695
+ def auto_push_loop(interval=3600):
696
  while True:
697
  try:
698
  hf_token = os.environ.get("HF_TOKEN")
 
714
  print("⚠️ Push failed:", e)
715
  time.sleep(interval)
716
 
717
+ threading.Thread(target=auto_push_loop, args=(3600,), daemon=True).start()
718
  demo.launch()
datasetcards_new.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc6f8cbcec66be439c4824865f51129d42c538c2554cad2e87ccd6b7c489c8ba
3
- size 27585820
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0d3770a3024eaf459d5c12d2c4a9d0d5a5043660d0a15c062a387595602eacf
3
+ size 38347730