File size: 4,785 Bytes
44f6f5f
c5ed223
44f6f5f
 
 
 
 
 
 
 
9646ea8
44f6f5f
 
9646ea8
44f6f5f
 
9646ea8
 
 
62e25b3
 
 
 
 
 
44f6f5f
 
62e25b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44f6f5f
62e25b3
44f6f5f
f1c52f2
 
 
44f6f5f
 
 
 
f1c52f2
44f6f5f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376619b
44f6f5f
 
62e25b3
44f6f5f
 
 
 
 
 
62e25b3
 
44f6f5f
62e25b3
44f6f5f
c5ed223
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
from typing import List, Dict
import httpx
import gradio as gr
import pandas as pd
from huggingface_hub import HfApi, ModelCard

def search_hub(query: str, search_type: str) -> pd.DataFrame:
    api = HfApi()
    if search_type == "Models":
        results = api.list_models(search=query)
        data = [{"id": model.modelId, "author": model.author, "downloads": model.downloads, "link": f"https://huggingface.co/{model.modelId}"} for model in results]
    elif search_type == "Datasets":
        results = api.list_datasets(search=query)
        data = [{"id": dataset.id, "author": dataset.author, "downloads": dataset.downloads, "link": f"https://huggingface.co/datasets/{dataset.id}"} for dataset in results]
    elif search_type == "Spaces":
        results = api.list_spaces(search=query)
        data = [{"id": space.id, "author": space.author, "link": f"https://huggingface.co/spaces/{space.id}"} for space in results]
    else:
        data = []
    
    # Add numbering and format the link
    for i, item in enumerate(data, 1):
        item['number'] = i
        item['formatted_link'] = format_link(item, i, search_type)
    
    return pd.DataFrame(data)

def format_link(item: Dict, number: int, search_type: str) -> str:
    link = item['link']
    readme_link = f"{link}/blob/main/README.md"
    title = f"{number}. {item['id']}"
    
    metadata = f"Author: {item['author']}"
    if 'downloads' in item:
        metadata += f", Downloads: {item['downloads']}"
    
    html = f"""
    <div style="margin-bottom: 10px;">
        <strong>{title}</strong><br>
        <a href="{link}" target="_blank" style="color: #4a90e2; text-decoration: none;">View {search_type[:-1]}</a> | 
        <a href="{readme_link}" target="_blank" style="color: #4a90e2; text-decoration: none;">View README</a><br>
        <small>{metadata}</small>
    </div>
    """
    return html

def display_results(df: pd.DataFrame):
    if df is not None and not df.empty:
        html = "<div style='max-height: 400px; overflow-y: auto;'>"
        for _, row in df.iterrows():
            html += row['formatted_link']
        html += "</div>"
        return html
    else:
        return "<p>No results found.</p>"

def load_metadata(evt: gr.SelectData, df: pd.DataFrame, search_type: str):
    if df is not None and not df.empty and evt.index[0] < len(df):
        item_id = df.iloc[evt.index[0]]['id']
        
        if search_type == "Models":
            try:
                card = ModelCard.load(item_id)
                return str(card)
            except Exception as e:
                return f"Error loading model card: {str(e)}"
        elif search_type == "Datasets":
            api = HfApi()
            metadata = api.dataset_info(item_id)
            return str(metadata)
        elif search_type == "Spaces":
            api = HfApi()
            metadata = api.space_info(item_id)
            return str(metadata)
        else:
            return ""
    else:
        return ""

def SwarmyTime(data: List[Dict]) -> Dict:
    """
    Aggregates all content from the given data.
    
    :param data: List of dictionaries containing the search results
    :return: Dictionary with aggregated content
    """
    aggregated = {
        "total_items": len(data),
        "unique_authors": set(),
        "total_downloads": 0,
        "item_types": {"Models": 0, "Datasets": 0, "Spaces": 0}
    }

    for item in data:
        aggregated["unique_authors"].add(item.get("author", "Unknown"))
        aggregated["total_downloads"] += item.get("downloads", 0)
        
        if "modelId" in item:
            aggregated["item_types"]["Models"] += 1
        elif "dataset" in item.get("id", ""):
            aggregated["item_types"]["Datasets"] += 1
        else:
            aggregated["item_types"]["Spaces"] += 1

    aggregated["unique_authors"] = len(aggregated["unique_authors"])
    
    return aggregated

with gr.Blocks() as demo:
    gr.Markdown("## Search the Hugging Face Hub")
    with gr.Row():
        search_query = gr.Textbox(label="Search Query", value="awacke1")
        search_type = gr.Radio(["Models", "Datasets", "Spaces"], label="Search Type", value="Models")
        search_button = gr.Button("Search")
    results_html = gr.HTML(label="Search Results")
    metadata_output = gr.Textbox(label="Metadata", lines=10)
    aggregated_output = gr.JSON(label="Aggregated Content")

    def search_and_aggregate(query, search_type):
        df = search_hub(query, search_type)
        aggregated = SwarmyTime(df.to_dict('records'))
        html_results = display_results(df)
        return html_results, aggregated

    search_button.click(search_and_aggregate, inputs=[search_query, search_type], outputs=[results_html, aggregated_output])

demo.launch(debug=True)