import os import random import base64 import requests from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By from selenium.common.exceptions import WebDriverException, TimeoutException from PIL import Image from io import BytesIO from datetime import datetime import gradio as gr from typing import Tuple import time from pathlib import Path # 추가 # 스크린샷 캐시 디렉토리 설정 CACHE_DIR = Path("screenshot_cache") CACHE_DIR.mkdir(exist_ok=True) # 전역 변수로 스크린샷 캐시 선언 SCREENSHOT_CACHE = {} def get_cached_screenshot(url: str) -> str: """캐시된 스크린샷 가져오기 또는 새로 생성""" cache_file = CACHE_DIR / f"{base64.b64encode(url.encode()).decode()}.png" if cache_file.exists(): with open(cache_file, "rb") as f: return base64.b64encode(f.read()).decode() return take_screenshot(url) def take_screenshot(url): """웹사이트 스크린샷 촬영 함수 (로딩 대기 시간 추가)""" if url in SCREENSHOT_CACHE: return SCREENSHOT_CACHE[url] if not url.startswith('http'): url = f"https://{url}" options = webdriver.ChromeOptions() options.add_argument('--headless') options.add_argument('--no-sandbox') options.add_argument('--disable-dev-shm-usage') options.add_argument('--window-size=1080,720') try: driver = webdriver.Chrome(options=options) driver.get(url) # 명시적 대기: body 요소가 로드될 때까지 대기 (최대 10초) try: WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.TAG_NAME, "body")) ) except TimeoutException: print(f"페이지 로딩 타임아웃: {url}") # 추가 대기 시간을 2초로 증가 time.sleep(2) # 1초에서 2초로 변경 # JavaScript 실행 완료 대기 driver.execute_script("return document.readyState") == "complete" # 스크린샷 촬영 screenshot = driver.get_screenshot_as_png() img = Image.open(BytesIO(screenshot)) buffered = BytesIO() img.save(buffered, format="PNG") base64_image = base64.b64encode(buffered.getvalue()).decode() # 캐시에 저장 SCREENSHOT_CACHE[url] = base64_image return base64_image except WebDriverException as e: print(f"스크린샷 촬영 실패: {str(e)} for URL: {url}") return None except Exception as e: print(f"예상치 못한 오류: {str(e)} for URL: {url}") return None finally: if 'driver' in locals(): driver.quit() from datetime import datetime, timedelta def calculate_rising_rate(created_date: str, rank: int) -> int: """AI Rising Rate 계산""" # 생성일 기준 점수 계산 created = datetime.strptime(created_date.split('T')[0], '%Y-%m-%d') today = datetime.now() days_diff = (today - created).days date_score = max(0, 300 - days_diff) # 최대 300점 # 순위 기준 점수 계산 rank_score = max(0, 300 - rank) # 최대 300점 # 총점 계산 total_score = date_score + rank_score # 별 개수 계산 (0~5) if total_score <= 100: stars = 1 elif total_score <= 200: stars = 2 elif total_score <= 300: stars = 3 elif total_score <= 400: stars = 4 else: stars = 5 return stars def get_popularity_grade(likes: int, stars: int) -> tuple: """AI Popularity Score 등급 계산""" # 기본 점수 (likes) base_score = min(likes, 10000) # 최대 10000점 # 별점 추가 점수 (별 하나당 500점) star_score = stars * 500 # 총점 total_score = base_score + star_score # 등급 테이블 (18단계) grades = [ (9000, "AAA+"), (8500, "AAA"), (8000, "AAA-"), (7500, "AA+"), (7000, "AA"), (6500, "AA-"), (6000, "A+"), (5500, "A"), (5000, "A-"), (4500, "BBB+"), (4000, "BBB"), (3500, "BBB-"), (3000, "BB+"), (2500, "BB"), (2000, "BB-"), (1500, "B+"), (1000, "B"), (500, "B-") ] for threshold, grade in grades: if total_score >= threshold: return grade, total_score return "B-", total_score # get_card 함수 내의 hardware_info 부분을 다음으로 교체: def get_rating_info(item: dict, index: int) -> str: """평가 정보 HTML 생성""" created = item.get('createdAt', '').split('T')[0] likes = int(str(item.get('likes', '0')).replace(',', '')) # AI Rising Rate 계산 stars = calculate_rising_rate(created, index + 1) star_html = "★" * stars + "☆" * (5 - stars) # 채워진 별과 빈 별 조합 # AI Popularity Score 계산 grade, score = get_popularity_grade(likes, stars) # 등급별 색상 설정 grade_colors = { 'AAA': '#FFD700', 'AA': '#FFA500', 'A': '#FF4500', 'BBB': '#4169E1', 'BB': '#1E90FF', 'B': '#00BFFF' } grade_base = grade.rstrip('+-') grade_color = grade_colors.get(grade_base, '#666666') return f"""
AI Rising Rate: {star_html}
AI Popularity Score: {grade} ({score:,})
""" def get_hardware_info(item: dict) -> tuple: """하드웨어 정보 추출""" try: # runtime 정보 확인 runtime = item.get('runtime', {}) # CPU 정보 처리 cpu_info = runtime.get('cpu', 'Standard') # GPU 정보 처리 gpu_info = "None" if runtime.get('accelerator') == "gpu": gpu_type = runtime.get('gpu', {}).get('name', '') gpu_memory = runtime.get('gpu', {}).get('memory', '') if gpu_type: gpu_info = f"{gpu_type}" if gpu_memory: gpu_info += f" ({gpu_memory}GB)" # spaces decorator 확인 if '@spaces.GPU' in str(item.get('sdk_version', '')): if gpu_info == "None": gpu_info = "GPU Enabled" # SDK 정보 처리 sdk = item.get('sdk', 'N/A') print(f"Debug - Runtime Info: {runtime}") # 디버그 출력 print(f"Debug - GPU Info: {gpu_info}") # 디버그 출력 return cpu_info, gpu_info, sdk except Exception as e: print(f"Error parsing hardware info: {str(e)}") return 'Standard', 'None', 'N/A' def get_card(item: dict, index: int, card_type: str = "space") -> str: """통합 카드 HTML 생성""" item_id = item.get('id', '') author, title = item_id.split('/', 1) likes = format(item.get('likes', 0), ',') created = item.get('createdAt', '').split('T')[0] # URL 정의 if card_type == "space": url = f"https://huggingface.co/spaces/{item_id}" elif card_type == "model": url = f"https://huggingface.co/{item_id}" else: # dataset url = f"https://huggingface.co/datasets/{item_id}" # 메타데이터 처리 tags = item.get('tags', []) pipeline_tag = item.get('pipeline_tag', '') license = item.get('license', '') sdk = item.get('sdk', 'N/A') # AI Rating 정보 가져오기 rating_info = get_rating_info(item, index) # 카드 타입별 그라데이션 설정 if card_type == "space": gradient_colors = """ rgba(255, 182, 193, 0.7), /* 파스텔 핑크 */ rgba(173, 216, 230, 0.7), /* 파스텔 블루 */ rgba(255, 218, 185, 0.7) /* 파스텔 피치 */ """ bg_content = f""" background-image: url(data:image/png;base64,{get_cached_screenshot(url) if get_cached_screenshot(url) else ''}); background-size: cover; background-position: center; """ type_icon = "🎯" type_label = "SPACE" elif card_type == "model": gradient_colors = """ rgba(110, 142, 251, 0.7), /* 모델 블루 */ rgba(130, 158, 251, 0.7), rgba(150, 174, 251, 0.7) """ bg_content = f""" background: linear-gradient(135deg, #6e8efb, #4a6cf7); padding: 15px; """ type_icon = "🤖" type_label = "MODEL" else: # dataset gradient_colors = """ rgba(255, 107, 107, 0.7), /* 데이터셋 레드 */ rgba(255, 127, 127, 0.7), rgba(255, 147, 147, 0.7) """ bg_content = f""" background: linear-gradient(135deg, #ff6b6b, #ff8787); padding: 15px; """ type_icon = "📊" type_label = "DATASET" content_bg = f""" background: linear-gradient(135deg, {gradient_colors}); backdrop-filter: blur(10px); """ # 태그 표시 (models와 datasets용) tags_html = "" if card_type != "space": tags_html = f"""
{' '.join([f''' #{tag} ''' for tag in tags[:5]])}
""" # 카드 HTML 반환 return f"""
#{index + 1}
{type_icon} {type_label}
{tags_html}

{title}

👤 {author}
❤️ {likes}
📅 {created}
{rating_info}
""" def get_trending_spaces(search_query="", sort_by="rank", progress=gr.Progress()) -> Tuple[str, str]: """트렌딩 스페이스 가져오기""" url = "https://huggingface.co/api/spaces" try: progress(0, desc="Fetching spaces data...") params = { 'full': 'true', 'limit': 10 # 기본 300개 유지 } response = requests.get(url, params=params) response.raise_for_status() spaces = response.json() # 검색어로 필터링 (검색어가 있는 경우에만) if search_query: filtered_spaces = [] # 추가 데이터 가져오기 (검색용) params['limit'] = 1000 response = requests.get(url, params=params) all_spaces = response.json() filtered_spaces = [space for space in all_spaces if search_query.lower() in (space.get('id', '') + space.get('title', '')).lower()] spaces = filtered_spaces[:300] # 상위 300개만 유지 # 정렬 (rank가 아닌 경우에만) if sort_by != "rank": # rank인 경우 기존 순서 유지 if sort_by == "rising_rate": spaces.sort(key=lambda x: calculate_rising_rate(x.get('createdAt', ''), 0), reverse=True) elif sort_by == "popularity": spaces.sort(key=lambda x: get_popularity_grade( int(str(x.get('likes', '0')).replace(',', '')), calculate_rising_rate(x.get('createdAt', ''), 0))[1], reverse=True) progress(0.1, desc="Creating gallery...") html_content = """
""" for idx, space in enumerate(spaces): html_content += get_card(space, idx, "space") progress((0.1 + 0.9 * idx/len(spaces)), desc=f"Loading space {idx+1}/{len(spaces)}...") html_content += "
" progress(1.0, desc="Complete!") return html_content, f"Found {len(spaces)} spaces" except Exception as e: error_html = f'
Error: {str(e)}
' return error_html, f"Error: {str(e)}" def get_models(search_query="", sort_by="rank", progress=gr.Progress()) -> Tuple[str, str]: """인기 모델 가져오기""" url = "https://huggingface.co/api/models" try: progress(0, desc="Fetching models data...") params = { 'full': 'true', 'limit': 300 # 기본 300개 유지 } response = requests.get(url, params=params) response.raise_for_status() models = response.json() # 검색어로 필터링 (검색어가 있는 경우에만) if search_query: filtered_models = [] # 추가 데이터 가져오기 (검색용) params['limit'] = 1000 response = requests.get(url, params=params) all_models = response.json() filtered_models = [model for model in all_models if search_query.lower() in (model.get('id', '') + model.get('title', '')).lower()] models = filtered_models[:300] # 상위 300개만 유지 # 정렬 (rank가 아닌 경우에만) if sort_by != "rank": # rank인 경우 기존 순서 유지 if sort_by == "rising_rate": models.sort(key=lambda x: calculate_rising_rate(x.get('createdAt', ''), 0), reverse=True) elif sort_by == "popularity": models.sort(key=lambda x: get_popularity_grade( int(str(x.get('likes', '0')).replace(',', '')), calculate_rising_rate(x.get('createdAt', ''), 0))[1], reverse=True) progress(0.1, desc="Creating gallery...") html_content = """
""" for idx, model in enumerate(models): html_content += get_card(model, idx, "model") progress((0.1 + 0.9 * idx/len(models)), desc=f"Loading model {idx+1}/{len(models)}...") html_content += "
" progress(1.0, desc="Complete!") return html_content, f"Found {len(models)} models" except Exception as e: error_html = f'
Error: {str(e)}
' return error_html, f"Error: {str(e)}" def get_datasets(search_query="", sort_by="rank", progress=gr.Progress()) -> Tuple[str, str]: """인기 데이터셋 가져오기""" url = "https://huggingface.co/api/datasets" try: progress(0, desc="Fetching datasets data...") params = { 'full': 'true', 'limit': 300 # 기본 300개 유지 } response = requests.get(url, params=params) response.raise_for_status() datasets = response.json() # 검색어로 필터링 (검색어가 있는 경우에만) if search_query: filtered_datasets = [] # 추가 데이터 가져오기 (검색용) params['limit'] = 1000 response = requests.get(url, params=params) all_datasets = response.json() filtered_datasets = [dataset for dataset in all_datasets if search_query.lower() in (dataset.get('id', '') + dataset.get('title', '')).lower()] datasets = filtered_datasets[:300] # 상위 300개만 유지 # 정렬 (rank가 아닌 경우에만) if sort_by != "rank": # rank인 경우 기존 순서 유지 if sort_by == "rising_rate": datasets.sort(key=lambda x: calculate_rising_rate(x.get('createdAt', ''), 0), reverse=True) elif sort_by == "popularity": datasets.sort(key=lambda x: get_popularity_grade( int(str(x.get('likes', '0')).replace(',', '')), calculate_rising_rate(x.get('createdAt', ''), 0))[1], reverse=True) progress(0.1, desc="Creating gallery...") html_content = """
""" for idx, dataset in enumerate(datasets): html_content += get_card(dataset, idx, "dataset") progress((0.1 + 0.9 * idx/len(datasets)), desc=f"Loading dataset {idx+1}/{len(datasets)}...") html_content += "
" progress(1.0, desc="Complete!") return html_content, f"Found {len(datasets)} datasets" except Exception as e: error_html = f'
Error: {str(e)}
' return error_html, f"Error: {str(e)}" # 정렬 함수 추가 def sort_items(items, sort_by): if sort_by == "rank": return items # 이미 순위대로 정렬되어 있음 elif sort_by == "rising_rate": return sorted(items, key=lambda x: calculate_rising_rate(x.get('createdAt', ''), 0), reverse=True) elif sort_by == "popularity": return sorted(items, key=lambda x: get_popularity_grade(int(str(x.get('likes', '0')).replace(',', '')), calculate_rising_rate(x.get('createdAt', ''), 0))[1], reverse=True) return items # API 호출 함수 수정 def fetch_items(item_type, search_query="", sort_by="rank", limit=1000): """아이템 가져오기 (spaces/models/datasets)""" base_url = f"https://huggingface.co/api/{item_type}" params = { 'full': 'true', 'limit': limit, 'search': search_query } try: response = requests.get(base_url, params=params) response.raise_for_status() items = response.json() # 검색어로 필터링 if search_query: items = [item for item in items if search_query.lower() in (item.get('id', '') + item.get('title', '')).lower()] # 정렬 items = sort_items(items, sort_by) return items[:300] # 상위 300개만 반환 except Exception as e: print(f"Error fetching items: {e}") return [] # 인터페이스 수정 def create_interface(): with gr.Blocks(title="HuggingFace Trending Board") as interface: gr.Markdown("# 🤗 HuggingFace Trending TOP 300 Board") with gr.Tabs() as tabs: # Spaces 탭 with gr.Tab("🎯 Trending Spaces"): with gr.Row(): spaces_search = gr.Textbox(label="Search Spaces", placeholder="Enter search terms...") spaces_sort = gr.Radio( choices=["rank", "rising_rate", "popularity"], value="rank", label="Sort by", interactive=True ) spaces_refresh_btn = gr.Button("Refresh", variant="primary") spaces_gallery = gr.HTML() spaces_status = gr.Markdown("Ready") # Models 탭 with gr.Tab("🤖 Trending Models"): with gr.Row(): models_search = gr.Textbox(label="Search Models", placeholder="Enter search terms...") models_sort = gr.Radio( choices=["rank", "rising_rate", "popularity"], value="rank", label="Sort by", interactive=True ) models_refresh_btn = gr.Button("Refresh", variant="primary") models_gallery = gr.HTML() models_status = gr.Markdown("Ready") # Datasets 탭 with gr.Tab("📊 Trending Datasets"): with gr.Row(): datasets_search = gr.Textbox(label="Search Datasets", placeholder="Enter search terms...") datasets_sort = gr.Radio( choices=["rank", "rising_rate", "popularity"], value="rank", label="Sort by", interactive=True ) datasets_refresh_btn = gr.Button("Refresh", variant="primary") datasets_gallery = gr.HTML() datasets_status = gr.Markdown("Ready") # Event handlers spaces_refresh_btn.click( fn=get_trending_spaces, inputs=[spaces_search, spaces_sort], outputs=[spaces_gallery, spaces_status] ) models_refresh_btn.click( fn=get_models, inputs=[models_search, models_sort], outputs=[models_gallery, models_status] ) datasets_refresh_btn.click( fn=get_datasets, inputs=[datasets_search, datasets_sort], outputs=[datasets_gallery, datasets_status] ) # 검색어 변경 시 자동 새로고침 spaces_search.change( fn=get_trending_spaces, inputs=[spaces_search, spaces_sort], outputs=[spaces_gallery, spaces_status] ) models_search.change( fn=get_models, inputs=[models_search, models_sort], outputs=[models_gallery, models_status] ) datasets_search.change( fn=get_datasets, inputs=[datasets_search, datasets_sort], outputs=[datasets_gallery, datasets_status] ) # 정렬 방식 변경 시 자동 새로고침 spaces_sort.change( fn=get_trending_spaces, inputs=[spaces_search, spaces_sort], outputs=[spaces_gallery, spaces_status] ) models_sort.change( fn=get_models, inputs=[models_search, models_sort], outputs=[models_gallery, models_status] ) datasets_sort.change( fn=get_datasets, inputs=[datasets_search, datasets_sort], outputs=[datasets_gallery, datasets_status] ) return interface if __name__ == "__main__": try: demo = create_interface() demo.launch( share=True, inbrowser=True, show_api=False ) except Exception as e: print(f"Error launching app: {e}")