import os import gradio as gr from huggingface_hub import get_collection import re def extract_collection_id(input_text): # Check if input is a full URL url_match = re.match(r'https://huggingface\.co/collections/(.+)$', input_text) if url_match: return url_match.group(1) # Check if input is already in the correct format if re.match(r'^[\w-]+/[\w-]+$', input_text): return input_text return None def load_collection(): collection_input = os.getenv('COLLECTION_SLUG_OR_URL') if not collection_input: raise ValueError("COLLECTION_SLUG_OR_URL environment variable is not set.") collection_id = extract_collection_id(collection_input) if not collection_id: raise ValueError("Invalid collection ID or URL in COLLECTION_SLUG_OR_URL environment variable.") collection = get_collection(collection_id) dataset_ids = [item.item_id for item in collection.items if item.item_type == 'dataset'] if not dataset_ids: raise ValueError("No datasets found in this collection.") return dataset_ids, collection_id def display_dataset(dataset_ids, index): dataset_id = dataset_ids[index] return gr.HTML(f"""""") def navigate_dataset(dataset_ids, index, direction): new_index = (index + direction) % len(dataset_ids) return new_index, f"Dataset {new_index + 1} of {len(dataset_ids)}: {dataset_ids[new_index]}" try: dataset_ids, collection_id = load_collection() with gr.Blocks() as demo: gr.Markdown(f"