Santosh Sanjeev
commited on
Commit
·
cc52786
1
Parent(s):
8d6809c
pushed changes app.py
Browse files- app.py +43 -3
- datasetcards_new.parquet +2 -2
app.py
CHANGED
|
@@ -536,7 +536,47 @@ columns = list(initial_df.columns)
|
|
| 536 |
|
| 537 |
# --- Gradio App ---
|
| 538 |
with gr.Blocks() as demo:
|
| 539 |
-
gr.Markdown("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 540 |
|
| 541 |
# Pagination
|
| 542 |
with gr.Row():
|
|
@@ -652,7 +692,7 @@ with gr.Blocks() as demo:
|
|
| 652 |
)
|
| 653 |
|
| 654 |
# --- Auto-push ---
|
| 655 |
-
def auto_push_loop(interval=
|
| 656 |
while True:
|
| 657 |
try:
|
| 658 |
hf_token = os.environ.get("HF_TOKEN")
|
|
@@ -674,5 +714,5 @@ def auto_push_loop(interval=300):
|
|
| 674 |
print("⚠️ Push failed:", e)
|
| 675 |
time.sleep(interval)
|
| 676 |
|
| 677 |
-
threading.Thread(target=auto_push_loop, args=(
|
| 678 |
demo.launch()
|
|
|
|
| 536 |
|
| 537 |
# --- Gradio App ---
|
| 538 |
with gr.Blocks() as demo:
|
| 539 |
+
gr.Markdown("""
|
| 540 |
+
# Dataset Insight Portal
|
| 541 |
+
Welcome! This portal helps you explore and manage datasets from our Hugging Face organization.
|
| 542 |
+
## What is this space for?
|
| 543 |
+
This space provides a table of datasets along with metadata. You can:
|
| 544 |
+
- Browse datasets with pagination.
|
| 545 |
+
- Search datasets by various fields.
|
| 546 |
+
- Assign responsibility for reviewing datasets (`assigned_to`).
|
| 547 |
+
- Track progress using `status`.
|
| 548 |
+
## Why the table?
|
| 549 |
+
The table gives a structured view of all datasets, making it easy to sort, filter, and update information for each dataset. It consists of all datasets until 20-09-2025.
|
| 550 |
+
## What does the table contain?
|
| 551 |
+
Each row represents a dataset. Columns include:
|
| 552 |
+
- **dataset_id**: Unique identifier of the dataset.
|
| 553 |
+
- **dataset_url**: Link to the dataset page on Hugging Face.
|
| 554 |
+
- **downloads**: Number of downloads.
|
| 555 |
+
- **author**: Dataset author.
|
| 556 |
+
- **license**: License type.
|
| 557 |
+
- **tags**: Tags describing the dataset. Obtained from the dataset card.
|
| 558 |
+
- **task_categories**: Categories of tasks the dataset is useful for. Obtained from the dataset card.
|
| 559 |
+
- **last_modified**: Date of last update.
|
| 560 |
+
- **field, keyword**: Metadata columns describing dataset purpose based on heuristics. Use the `field` and `keyword` to filter for science based datasets.
|
| 561 |
+
- **category**: Category of the dataset (`rich` means it is good dataset card. `minimal` means it needs improvement for the reasons below).
|
| 562 |
+
- **reason**: Reason why the dataset is classified as `minimal`. Options: `Failed to load card`, `No metadata and no description`, `No metadata and has description`, `Short description`.
|
| 563 |
+
- **usedStorage**: Storage used by the dataset (bytes).
|
| 564 |
+
- **assigned_to**: Person responsible for the dataset (editable).
|
| 565 |
+
- **status**: Progress status (editable). Options: `todo`, `inprogress`, `PR submitted`, `PR merged`.
|
| 566 |
+
## How to use search
|
| 567 |
+
- Select a **column** from the dropdown.
|
| 568 |
+
- If the column is textual, type your query in the text box.
|
| 569 |
+
- If the column is a dropdown (like `assigned_to` or `status`), select the value from the dropdown.
|
| 570 |
+
- Click **Search** to filter the table.
|
| 571 |
+
## How to add or update `assigned_to` and `status`
|
| 572 |
+
1. Search for the **dataset_id** initially.
|
| 573 |
+
2. Then, select the **dataset_id** from the dropdown below the table.
|
| 574 |
+
3. Choose the person responsible in **Assigned To**. If you are a member of the organization, your username should appear in the list. Else refresh and try again.
|
| 575 |
+
4. Select the current status in **Status**.
|
| 576 |
+
5. Click **Save Changes** to update the table and persist the changes.
|
| 577 |
+
6. Use **Refresh All** to reload the table and the latest members list.
|
| 578 |
+
This portal makes it easy to keep track of dataset reviews, assignments, and progress all in one place.
|
| 579 |
+
""")
|
| 580 |
|
| 581 |
# Pagination
|
| 582 |
with gr.Row():
|
|
|
|
| 692 |
)
|
| 693 |
|
| 694 |
# --- Auto-push ---
|
| 695 |
+
def auto_push_loop(interval=3600):
|
| 696 |
while True:
|
| 697 |
try:
|
| 698 |
hf_token = os.environ.get("HF_TOKEN")
|
|
|
|
| 714 |
print("⚠️ Push failed:", e)
|
| 715 |
time.sleep(interval)
|
| 716 |
|
| 717 |
+
threading.Thread(target=auto_push_loop, args=(3600,), daemon=True).start()
|
| 718 |
demo.launch()
|
datasetcards_new.parquet
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b0d3770a3024eaf459d5c12d2c4a9d0d5a5043660d0a15c062a387595602eacf
|
| 3 |
+
size 38347730
|