Model-Drops-Tracker-Full

Sleeping

File size: 3,382 Bytes

import gradio as gr
from huggingface_hub import HfApi
from datetime import datetime, timedelta
import pandas as pd

# Initialize the Hugging Face API
api = HfApi()

def get_recent_models(min_likes, days_ago, filter_string, search_string):
    # Calculate the start date for filtering models
    start_date = datetime.utcnow() - timedelta(days=days_ago)
    
    # Prepare filter and search substrings
    filter_substrings = {sub.strip().lower() for sub in filter_string.split(';') if sub.strip()}
    search_substrings = {term.strip().lower() for term in search_string.split(';') if term.strip()}
    
    # Initialize an empty list to store the filtered models
    recent_models = []
    
    # Fetch models sorted by likes in descending order
    for model in api.list_models(sort="likes", direction=-1):
        if model.likes < min_likes:
            # Since models are sorted by likes in descending order, break early
            break
        
        created_at_date = model.created_at.replace(tzinfo=None) if model.created_at else None
        
        # Ensure the model meets the date, like, search, and filter criteria
        if created_at_date and created_at_date >= start_date:
            model_id_lower = model.modelId.lower()
            if (not search_substrings or any(term in model_id_lower for term in search_substrings)) and \
               (not filter_substrings or not any(sub in model_id_lower for sub in filter_substrings)):
                
                task = model.pipeline_tag if hasattr(model, "pipeline_tag") else "N/A"
                recent_models.append({
                    "Model ID": f'<a href="https://huggingface.co/{model.modelId}" target="_blank">{model.modelId}</a>',
                    "Likes": model.likes,
                    "Creation Date": created_at_date.strftime("%Y-%m-%d %H:%M"),
                    "Task": task
                })
    
    # Convert the list of dictionaries to a pandas DataFrame
    df = pd.DataFrame(recent_models)
    
    return df

# Define the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Model Drops Tracker 🚀")
    gr.Markdown(
        "Overwhelmed by the rapid pace of model releases? 😅 You're not alone! "
        "That's exactly why I built this tool. Easily filter recent models from the Hub "
        "by setting a minimum number of likes and the number of days since their release. "
        "Click on a model to see its card. Use `;` to split filter and search terms."
    )
    
    with gr.Row():
        likes_slider = gr.Slider(minimum=1, maximum=100, step=1, value=5, label="Minimum Likes")
        days_slider = gr.Slider(minimum=1, maximum=30, step=1, value=3, label="Days Ago")
    
    with gr.Row():
        filter_text = gr.Text(label="Filter", max_lines=1, placeholder="Exclude models containing these terms (separate by `;`)")
        search_text = gr.Text(label="Search", max_lines=1, placeholder="Include only models containing these terms (separate by `;`)")

    btn = gr.Button("Run")
    
    with gr.Column():
        df = gr.DataFrame(
            headers=["Model ID", "Likes", "Creation Date", "Task"],
            wrap=True,
            datatype=["html", "number", "str"],
        )
    
    btn.click(fn=get_recent_models, inputs=[likes_slider, days_slider, filter_text, search_text], outputs=df)

if __name__ == "__main__":
    demo.launch()