|
|
"""
|
|
|
Hugging Face Models Integration for OpenManus AI Agent
|
|
|
Comprehensive integration with Hugging Face Inference API for all model categories
|
|
|
"""
|
|
|
|
|
|
import asyncio
|
|
|
import base64
|
|
|
import io
|
|
|
import json
|
|
|
import logging
|
|
|
from dataclasses import dataclass
|
|
|
from enum import Enum
|
|
|
from typing import Any, Dict, List, Optional, Union
|
|
|
|
|
|
import aiohttp
|
|
|
import PIL.Image
|
|
|
from pydantic import BaseModel
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
class ModelCategory(Enum):
|
|
|
"""Categories of Hugging Face models available"""
|
|
|
|
|
|
|
|
|
TEXT_GENERATION = "text-generation"
|
|
|
TEXT_TO_IMAGE = "text-to-image"
|
|
|
IMAGE_TO_TEXT = "image-to-text"
|
|
|
AUTOMATIC_SPEECH_RECOGNITION = "automatic-speech-recognition"
|
|
|
TEXT_TO_SPEECH = "text-to-speech"
|
|
|
IMAGE_CLASSIFICATION = "image-classification"
|
|
|
OBJECT_DETECTION = "object-detection"
|
|
|
FEATURE_EXTRACTION = "feature-extraction"
|
|
|
SENTENCE_SIMILARITY = "sentence-similarity"
|
|
|
TRANSLATION = "translation"
|
|
|
SUMMARIZATION = "summarization"
|
|
|
QUESTION_ANSWERING = "question-answering"
|
|
|
FILL_MASK = "fill-mask"
|
|
|
TOKEN_CLASSIFICATION = "token-classification"
|
|
|
ZERO_SHOT_CLASSIFICATION = "zero-shot-classification"
|
|
|
AUDIO_CLASSIFICATION = "audio-classification"
|
|
|
CONVERSATIONAL = "conversational"
|
|
|
|
|
|
|
|
|
TEXT_TO_VIDEO = "text-to-video"
|
|
|
VIDEO_TO_TEXT = "video-to-text"
|
|
|
VIDEO_CLASSIFICATION = "video-classification"
|
|
|
VIDEO_GENERATION = "video-generation"
|
|
|
MOTION_GENERATION = "motion-generation"
|
|
|
DEEPFAKE_DETECTION = "deepfake-detection"
|
|
|
|
|
|
|
|
|
CODE_GENERATION = "code-generation"
|
|
|
CODE_COMPLETION = "code-completion"
|
|
|
CODE_EXPLANATION = "code-explanation"
|
|
|
CODE_TRANSLATION = "code-translation"
|
|
|
CODE_REVIEW = "code-review"
|
|
|
APP_GENERATION = "app-generation"
|
|
|
API_GENERATION = "api-generation"
|
|
|
DATABASE_GENERATION = "database-generation"
|
|
|
|
|
|
|
|
|
TEXT_TO_3D = "text-to-3d"
|
|
|
IMAGE_TO_3D = "image-to-3d"
|
|
|
THREE_D_GENERATION = "3d-generation"
|
|
|
MESH_GENERATION = "mesh-generation"
|
|
|
TEXTURE_GENERATION = "texture-generation"
|
|
|
AR_CONTENT = "ar-content"
|
|
|
VR_ENVIRONMENT = "vr-environment"
|
|
|
|
|
|
|
|
|
OCR = "ocr"
|
|
|
DOCUMENT_ANALYSIS = "document-analysis"
|
|
|
PDF_PROCESSING = "pdf-processing"
|
|
|
LAYOUT_ANALYSIS = "layout-analysis"
|
|
|
TABLE_EXTRACTION = "table-extraction"
|
|
|
HANDWRITING_RECOGNITION = "handwriting-recognition"
|
|
|
FORM_PROCESSING = "form-processing"
|
|
|
|
|
|
|
|
|
VISION_LANGUAGE = "vision-language"
|
|
|
MULTIMODAL_REASONING = "multimodal-reasoning"
|
|
|
CROSS_MODAL_GENERATION = "cross-modal-generation"
|
|
|
VISUAL_QUESTION_ANSWERING = "visual-question-answering"
|
|
|
IMAGE_TEXT_MATCHING = "image-text-matching"
|
|
|
MULTIMODAL_CHAT = "multimodal-chat"
|
|
|
|
|
|
|
|
|
MUSIC_GENERATION = "music-generation"
|
|
|
VOICE_CLONING = "voice-cloning"
|
|
|
STYLE_TRANSFER = "style-transfer"
|
|
|
SUPER_RESOLUTION = "super-resolution"
|
|
|
IMAGE_INPAINTING = "image-inpainting"
|
|
|
IMAGE_OUTPAINTING = "image-outpainting"
|
|
|
BACKGROUND_REMOVAL = "background-removal"
|
|
|
FACE_RESTORATION = "face-restoration"
|
|
|
|
|
|
|
|
|
CREATIVE_WRITING = "creative-writing"
|
|
|
STORY_GENERATION = "story-generation"
|
|
|
SCREENPLAY_WRITING = "screenplay-writing"
|
|
|
POETRY_GENERATION = "poetry-generation"
|
|
|
BLOG_WRITING = "blog-writing"
|
|
|
MARKETING_COPY = "marketing-copy"
|
|
|
|
|
|
|
|
|
GAME_ASSET_GENERATION = "game-asset-generation"
|
|
|
CHARACTER_GENERATION = "character-generation"
|
|
|
LEVEL_GENERATION = "level-generation"
|
|
|
DIALOGUE_GENERATION = "dialogue-generation"
|
|
|
|
|
|
|
|
|
PROTEIN_FOLDING = "protein-folding"
|
|
|
MOLECULE_GENERATION = "molecule-generation"
|
|
|
SCIENTIFIC_WRITING = "scientific-writing"
|
|
|
RESEARCH_ASSISTANCE = "research-assistance"
|
|
|
DATA_ANALYSIS = "data-analysis"
|
|
|
|
|
|
|
|
|
EMAIL_GENERATION = "email-generation"
|
|
|
PRESENTATION_CREATION = "presentation-creation"
|
|
|
REPORT_GENERATION = "report-generation"
|
|
|
MEETING_SUMMARIZATION = "meeting-summarization"
|
|
|
PROJECT_PLANNING = "project-planning"
|
|
|
|
|
|
|
|
|
AI_TUTORING = "ai-tutoring"
|
|
|
EDUCATIONAL_CONTENT = "educational-content"
|
|
|
LESSON_PLANNING = "lesson-planning"
|
|
|
CONCEPT_EXPLANATION = "concept-explanation"
|
|
|
HOMEWORK_ASSISTANCE = "homework-assistance"
|
|
|
QUIZ_GENERATION = "quiz-generation"
|
|
|
CURRICULUM_DESIGN = "curriculum-design"
|
|
|
LEARNING_ASSESSMENT = "learning-assessment"
|
|
|
ADAPTIVE_LEARNING = "adaptive-learning"
|
|
|
SUBJECT_TEACHING = "subject-teaching"
|
|
|
MATH_TUTORING = "math-tutoring"
|
|
|
SCIENCE_TUTORING = "science-tutoring"
|
|
|
LANGUAGE_TUTORING = "language-tutoring"
|
|
|
HISTORY_TUTORING = "history-tutoring"
|
|
|
CODING_INSTRUCTION = "coding-instruction"
|
|
|
EXAM_PREPARATION = "exam-preparation"
|
|
|
STUDY_GUIDE_CREATION = "study-guide-creation"
|
|
|
EDUCATIONAL_GAMES = "educational-games"
|
|
|
LEARNING_ANALYTICS = "learning-analytics"
|
|
|
PERSONALIZED_LEARNING = "personalized-learning"
|
|
|
|
|
|
|
|
|
IMAGE_EDITING = "image-editing"
|
|
|
FACE_SWAP = "face-swap"
|
|
|
FACE_ENHANCEMENT = "face-enhancement"
|
|
|
FACE_GENERATION = "face-generation"
|
|
|
PORTRAIT_EDITING = "portrait-editing"
|
|
|
PHOTO_RESTORATION = "photo-restoration"
|
|
|
IMAGE_UPSCALING = "image-upscaling"
|
|
|
COLOR_CORRECTION = "color-correction"
|
|
|
ARTISTIC_FILTER = "artistic-filter"
|
|
|
|
|
|
|
|
|
ADVANCED_TTS = "advanced-tts"
|
|
|
ADVANCED_STT = "advanced-stt"
|
|
|
VOICE_CONVERSION = "voice-conversion"
|
|
|
SPEECH_ENHANCEMENT = "speech-enhancement"
|
|
|
AUDIO_GENERATION = "audio-generation"
|
|
|
MULTILINGUAL_TTS = "multilingual-tts"
|
|
|
MULTILINGUAL_STT = "multilingual-stt"
|
|
|
REAL_TIME_TRANSLATION = "real-time-translation"
|
|
|
|
|
|
|
|
|
TALKING_AVATAR = "talking-avatar"
|
|
|
AVATAR_GENERATION = "avatar-generation"
|
|
|
LIP_SYNC = "lip-sync"
|
|
|
FACIAL_ANIMATION = "facial-animation"
|
|
|
GESTURE_GENERATION = "gesture-generation"
|
|
|
VIRTUAL_PRESENTER = "virtual-presenter"
|
|
|
AI_ANCHOR = "ai-anchor"
|
|
|
|
|
|
|
|
|
INTERACTIVE_CHAT = "interactive-chat"
|
|
|
BILINGUAL_CONVERSATION = "bilingual-conversation"
|
|
|
CULTURAL_ADAPTATION = "cultural-adaptation"
|
|
|
CONTEXT_AWARE_CHAT = "context-aware-chat"
|
|
|
PERSONALITY_CHAT = "personality-chat"
|
|
|
ROLE_PLAY_CHAT = "role-play-chat"
|
|
|
DOMAIN_SPECIFIC_CHAT = "domain-specific-chat"
|
|
|
|
|
|
|
|
|
QWEN_REASONING = "qwen-reasoning"
|
|
|
QWEN_MATH = "qwen-math"
|
|
|
QWEN_CODE = "qwen-code"
|
|
|
QWEN_VISION = "qwen-vision"
|
|
|
QWEN_AUDIO = "qwen-audio"
|
|
|
|
|
|
|
|
|
DEEPSEEK_CODING = "deepseek-coding"
|
|
|
DEEPSEEK_REASONING = "deepseek-reasoning"
|
|
|
DEEPSEEK_MATH = "deepseek-math"
|
|
|
DEEPSEEK_RESEARCH = "deepseek-research"
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
class HFModel:
|
|
|
"""Hugging Face model definition"""
|
|
|
|
|
|
name: str
|
|
|
model_id: str
|
|
|
category: ModelCategory
|
|
|
description: str
|
|
|
endpoint_compatible: bool = False
|
|
|
requires_auth: bool = False
|
|
|
max_tokens: Optional[int] = None
|
|
|
supports_streaming: bool = False
|
|
|
|
|
|
|
|
|
class HuggingFaceModels:
|
|
|
"""Comprehensive collection of Hugging Face models for all categories"""
|
|
|
|
|
|
|
|
|
TEXT_GENERATION_MODELS = [
|
|
|
HFModel(
|
|
|
"MiniMax-M2",
|
|
|
"MiniMaxAI/MiniMax-M2",
|
|
|
ModelCategory.TEXT_GENERATION,
|
|
|
"Latest high-performance text generation model",
|
|
|
True,
|
|
|
False,
|
|
|
4096,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Kimi Linear 48B",
|
|
|
"moonshotai/Kimi-Linear-48B-A3B-Instruct",
|
|
|
ModelCategory.TEXT_GENERATION,
|
|
|
"Large instruction-tuned model with linear attention",
|
|
|
True,
|
|
|
False,
|
|
|
8192,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"GPT-OSS 20B",
|
|
|
"openai/gpt-oss-20b",
|
|
|
ModelCategory.TEXT_GENERATION,
|
|
|
"Open-source GPT model by OpenAI",
|
|
|
True,
|
|
|
False,
|
|
|
4096,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"GPT-OSS 120B",
|
|
|
"openai/gpt-oss-120b",
|
|
|
ModelCategory.TEXT_GENERATION,
|
|
|
"Large open-source GPT model",
|
|
|
True,
|
|
|
False,
|
|
|
4096,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Granite 4.0 1B",
|
|
|
"ibm-granite/granite-4.0-1b",
|
|
|
ModelCategory.TEXT_GENERATION,
|
|
|
"IBM's enterprise-grade small language model",
|
|
|
True,
|
|
|
False,
|
|
|
2048,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"GLM-4.6",
|
|
|
"zai-org/GLM-4.6",
|
|
|
ModelCategory.TEXT_GENERATION,
|
|
|
"Multilingual conversational model",
|
|
|
True,
|
|
|
False,
|
|
|
4096,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Llama 3.1 8B Instruct",
|
|
|
"meta-llama/Llama-3.1-8B-Instruct",
|
|
|
ModelCategory.TEXT_GENERATION,
|
|
|
"Meta's instruction-tuned Llama model",
|
|
|
True,
|
|
|
True,
|
|
|
8192,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Tongyi DeepResearch 30B",
|
|
|
"Alibaba-NLP/Tongyi-DeepResearch-30B-A3B",
|
|
|
ModelCategory.TEXT_GENERATION,
|
|
|
"Alibaba's research-focused large language model",
|
|
|
True,
|
|
|
False,
|
|
|
4096,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"EuroLLM 9B",
|
|
|
"utter-project/EuroLLM-9B",
|
|
|
ModelCategory.TEXT_GENERATION,
|
|
|
"European multilingual language model",
|
|
|
True,
|
|
|
False,
|
|
|
4096,
|
|
|
True,
|
|
|
),
|
|
|
]
|
|
|
|
|
|
|
|
|
TEXT_TO_IMAGE_MODELS = [
|
|
|
HFModel(
|
|
|
"FIBO",
|
|
|
"briaai/FIBO",
|
|
|
ModelCategory.TEXT_TO_IMAGE,
|
|
|
"Advanced text-to-image generation model",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"FLUX.1 Dev",
|
|
|
"black-forest-labs/FLUX.1-dev",
|
|
|
ModelCategory.TEXT_TO_IMAGE,
|
|
|
"State-of-the-art image generation",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"FLUX.1 Schnell",
|
|
|
"black-forest-labs/FLUX.1-schnell",
|
|
|
ModelCategory.TEXT_TO_IMAGE,
|
|
|
"Fast high-quality image generation",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Qwen Image",
|
|
|
"Qwen/Qwen-Image",
|
|
|
ModelCategory.TEXT_TO_IMAGE,
|
|
|
"Multilingual text-to-image model",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Stable Diffusion XL",
|
|
|
"stabilityai/stable-diffusion-xl-base-1.0",
|
|
|
ModelCategory.TEXT_TO_IMAGE,
|
|
|
"Popular high-resolution image generation",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Stable Diffusion 3.5 Large",
|
|
|
"stabilityai/stable-diffusion-3.5-large",
|
|
|
ModelCategory.TEXT_TO_IMAGE,
|
|
|
"Latest Stable Diffusion model",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"HunyuanImage 3.0",
|
|
|
"tencent/HunyuanImage-3.0",
|
|
|
ModelCategory.TEXT_TO_IMAGE,
|
|
|
"Tencent's advanced image generation model",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Nitro-E",
|
|
|
"amd/Nitro-E",
|
|
|
ModelCategory.TEXT_TO_IMAGE,
|
|
|
"AMD's efficient image generation model",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Qwen Image Lightning",
|
|
|
"lightx2v/Qwen-Image-Lightning",
|
|
|
ModelCategory.TEXT_TO_IMAGE,
|
|
|
"Fast distilled image generation",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
]
|
|
|
|
|
|
|
|
|
ASR_MODELS = [
|
|
|
HFModel(
|
|
|
"Whisper Large v3",
|
|
|
"openai/whisper-large-v3",
|
|
|
ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
|
|
|
"OpenAI's best multilingual speech recognition",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Whisper Large v3 Turbo",
|
|
|
"openai/whisper-large-v3-turbo",
|
|
|
ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
|
|
|
"Faster version of Whisper Large v3",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Parakeet TDT 0.6B v3",
|
|
|
"nvidia/parakeet-tdt-0.6b-v3",
|
|
|
ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
|
|
|
"NVIDIA's multilingual ASR model",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Canary Qwen 2.5B",
|
|
|
"nvidia/canary-qwen-2.5b",
|
|
|
ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
|
|
|
"NVIDIA's advanced ASR with Qwen integration",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Canary 1B v2",
|
|
|
"nvidia/canary-1b-v2",
|
|
|
ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
|
|
|
"Compact multilingual ASR model",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Whisper Small",
|
|
|
"openai/whisper-small",
|
|
|
ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
|
|
|
"Lightweight multilingual ASR",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Speaker Diarization 3.1",
|
|
|
"pyannote/speaker-diarization-3.1",
|
|
|
ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
|
|
|
"Advanced speaker identification and diarization",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
]
|
|
|
|
|
|
|
|
|
TTS_MODELS = [
|
|
|
HFModel(
|
|
|
"SoulX Podcast 1.7B",
|
|
|
"Soul-AILab/SoulX-Podcast-1.7B",
|
|
|
ModelCategory.TEXT_TO_SPEECH,
|
|
|
"High-quality podcast-style speech synthesis",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"NeuTTS Air",
|
|
|
"neuphonic/neutts-air",
|
|
|
ModelCategory.TEXT_TO_SPEECH,
|
|
|
"Advanced neural text-to-speech",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Kokoro 82M",
|
|
|
"hexgrad/Kokoro-82M",
|
|
|
ModelCategory.TEXT_TO_SPEECH,
|
|
|
"Lightweight high-quality TTS",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Kani TTS 400M EN",
|
|
|
"nineninesix/kani-tts-400m-en",
|
|
|
ModelCategory.TEXT_TO_SPEECH,
|
|
|
"English-focused text-to-speech model",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"XTTS v2",
|
|
|
"coqui/XTTS-v2",
|
|
|
ModelCategory.TEXT_TO_SPEECH,
|
|
|
"Zero-shot voice cloning TTS",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Chatterbox",
|
|
|
"ResembleAI/chatterbox",
|
|
|
ModelCategory.TEXT_TO_SPEECH,
|
|
|
"Multilingual voice cloning",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"VibeVoice 1.5B",
|
|
|
"microsoft/VibeVoice-1.5B",
|
|
|
ModelCategory.TEXT_TO_SPEECH,
|
|
|
"Microsoft's advanced TTS model",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"OpenAudio S1 Mini",
|
|
|
"fishaudio/openaudio-s1-mini",
|
|
|
ModelCategory.TEXT_TO_SPEECH,
|
|
|
"Compact multilingual TTS",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
]
|
|
|
|
|
|
|
|
|
IMAGE_CLASSIFICATION_MODELS = [
|
|
|
HFModel(
|
|
|
"NSFW Image Detection",
|
|
|
"Falconsai/nsfw_image_detection",
|
|
|
ModelCategory.IMAGE_CLASSIFICATION,
|
|
|
"Content safety image classification",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"ViT Base Patch16",
|
|
|
"google/vit-base-patch16-224",
|
|
|
ModelCategory.IMAGE_CLASSIFICATION,
|
|
|
"Google's Vision Transformer",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Deepfake Detection",
|
|
|
"dima806/deepfake_vs_real_image_detection",
|
|
|
ModelCategory.IMAGE_CLASSIFICATION,
|
|
|
"Detect AI-generated vs real images",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Facial Emotions Detection",
|
|
|
"dima806/facial_emotions_image_detection",
|
|
|
ModelCategory.IMAGE_CLASSIFICATION,
|
|
|
"Recognize facial emotions",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"SDXL Detector",
|
|
|
"Organika/sdxl-detector",
|
|
|
ModelCategory.IMAGE_CLASSIFICATION,
|
|
|
"Detect Stable Diffusion XL generated images",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"ViT NSFW Detector",
|
|
|
"AdamCodd/vit-base-nsfw-detector",
|
|
|
ModelCategory.IMAGE_CLASSIFICATION,
|
|
|
"NSFW content detection with ViT",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"ResNet 101",
|
|
|
"microsoft/resnet-101",
|
|
|
ModelCategory.IMAGE_CLASSIFICATION,
|
|
|
"Microsoft's ResNet for classification",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
]
|
|
|
|
|
|
|
|
|
FEATURE_EXTRACTION_MODELS = [
|
|
|
HFModel(
|
|
|
"Sentence Transformers All MiniLM",
|
|
|
"sentence-transformers/all-MiniLM-L6-v2",
|
|
|
ModelCategory.FEATURE_EXTRACTION,
|
|
|
"Lightweight sentence embeddings",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"BGE Large EN",
|
|
|
"BAAI/bge-large-en-v1.5",
|
|
|
ModelCategory.FEATURE_EXTRACTION,
|
|
|
"High-quality English embeddings",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"E5 Large v2",
|
|
|
"intfloat/e5-large-v2",
|
|
|
ModelCategory.FEATURE_EXTRACTION,
|
|
|
"Multilingual text embeddings",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
]
|
|
|
|
|
|
TRANSLATION_MODELS = [
|
|
|
HFModel(
|
|
|
"M2M100 1.2B",
|
|
|
"facebook/m2m100_1.2B",
|
|
|
ModelCategory.TRANSLATION,
|
|
|
"Multilingual machine translation",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"NLLB 200 3.3B",
|
|
|
"facebook/nllb-200-3.3B",
|
|
|
ModelCategory.TRANSLATION,
|
|
|
"No Language Left Behind translation",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"mBART Large 50",
|
|
|
"facebook/mbart-large-50-many-to-many-mmt",
|
|
|
ModelCategory.TRANSLATION,
|
|
|
"Multilingual BART for translation",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
]
|
|
|
|
|
|
SUMMARIZATION_MODELS = [
|
|
|
HFModel(
|
|
|
"PEGASUS XSum",
|
|
|
"google/pegasus-xsum",
|
|
|
ModelCategory.SUMMARIZATION,
|
|
|
"Abstractive summarization model",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"BART Large CNN",
|
|
|
"facebook/bart-large-cnn",
|
|
|
ModelCategory.SUMMARIZATION,
|
|
|
"CNN/DailyMail summarization",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"T5 Base",
|
|
|
"t5-base",
|
|
|
ModelCategory.SUMMARIZATION,
|
|
|
"Text-to-Text Transfer Transformer",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
]
|
|
|
|
|
|
|
|
|
VIDEO_GENERATION_MODELS = [
|
|
|
HFModel(
|
|
|
"Stable Video Diffusion",
|
|
|
"stabilityai/stable-video-diffusion-img2vid",
|
|
|
ModelCategory.TEXT_TO_VIDEO,
|
|
|
"Image-to-video generation model",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"AnimateDiff",
|
|
|
"guoyww/animatediff",
|
|
|
ModelCategory.VIDEO_GENERATION,
|
|
|
"Text-to-video animation generation",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"VideoCrafter",
|
|
|
"videogen/VideoCrafter",
|
|
|
ModelCategory.TEXT_TO_VIDEO,
|
|
|
"High-quality text-to-video generation",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Video ChatGPT",
|
|
|
"mbzuai-oryx/Video-ChatGPT-7B",
|
|
|
ModelCategory.VIDEO_TO_TEXT,
|
|
|
"Video understanding and description",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Video-BLIP",
|
|
|
"salesforce/video-blip-opt-2.7b",
|
|
|
ModelCategory.VIDEO_CLASSIFICATION,
|
|
|
"Video content analysis and classification",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
]
|
|
|
|
|
|
|
|
|
CODE_GENERATION_MODELS = [
|
|
|
HFModel(
|
|
|
"CodeLlama 34B Instruct",
|
|
|
"codellama/CodeLlama-34b-Instruct-hf",
|
|
|
ModelCategory.CODE_GENERATION,
|
|
|
"Large instruction-tuned code generation model",
|
|
|
True,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"StarCoder2 15B",
|
|
|
"bigcode/starcoder2-15b",
|
|
|
ModelCategory.CODE_GENERATION,
|
|
|
"Advanced code generation and completion",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"DeepSeek Coder V2",
|
|
|
"deepseek-ai/deepseek-coder-6.7b-instruct",
|
|
|
ModelCategory.CODE_GENERATION,
|
|
|
"Specialized coding assistant",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"WizardCoder 34B",
|
|
|
"WizardLM/WizardCoder-Python-34B-V1.0",
|
|
|
ModelCategory.CODE_GENERATION,
|
|
|
"Python-focused code generation",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Phind CodeLlama",
|
|
|
"Phind/Phind-CodeLlama-34B-v2",
|
|
|
ModelCategory.CODE_GENERATION,
|
|
|
"Optimized for code explanation and debugging",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Code T5+",
|
|
|
"Salesforce/codet5p-770m",
|
|
|
ModelCategory.CODE_COMPLETION,
|
|
|
"Code understanding and generation",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"InCoder",
|
|
|
"facebook/incoder-6B",
|
|
|
ModelCategory.CODE_COMPLETION,
|
|
|
"Bidirectional code generation",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
]
|
|
|
|
|
|
|
|
|
THREE_D_MODELS = [
|
|
|
HFModel(
|
|
|
"Shap-E",
|
|
|
"openai/shap-e",
|
|
|
ModelCategory.TEXT_TO_3D,
|
|
|
"Text-to-3D shape generation",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Point-E",
|
|
|
"openai/point-e",
|
|
|
ModelCategory.TEXT_TO_3D,
|
|
|
"Text-to-3D point cloud generation",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"DreamFusion",
|
|
|
"google/dreamfusion",
|
|
|
ModelCategory.IMAGE_TO_3D,
|
|
|
"Image-to-3D mesh generation",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Magic3D",
|
|
|
"nvidia/magic3d",
|
|
|
ModelCategory.THREE_D_GENERATION,
|
|
|
"High-quality 3D content creation",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"GET3D",
|
|
|
"nvidia/get3d",
|
|
|
ModelCategory.MESH_GENERATION,
|
|
|
"3D mesh generation from text",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
]
|
|
|
|
|
|
|
|
|
DOCUMENT_PROCESSING_MODELS = [
|
|
|
HFModel(
|
|
|
"TrOCR Large",
|
|
|
"microsoft/trocr-large-printed",
|
|
|
ModelCategory.OCR,
|
|
|
"Transformer-based OCR for printed text",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"TrOCR Handwritten",
|
|
|
"microsoft/trocr-large-handwritten",
|
|
|
ModelCategory.HANDWRITING_RECOGNITION,
|
|
|
"Handwritten text recognition",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"LayoutLMv3",
|
|
|
"microsoft/layoutlmv3-large",
|
|
|
ModelCategory.DOCUMENT_ANALYSIS,
|
|
|
"Document layout analysis and understanding",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Donut",
|
|
|
"naver-clova-ix/donut-base",
|
|
|
ModelCategory.DOCUMENT_ANALYSIS,
|
|
|
"OCR-free document understanding",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"TableTransformer",
|
|
|
"microsoft/table-transformer-structure-recognition",
|
|
|
ModelCategory.TABLE_EXTRACTION,
|
|
|
"Table structure recognition",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"FormNet",
|
|
|
"microsoft/formnet",
|
|
|
ModelCategory.FORM_PROCESSING,
|
|
|
"Form understanding and processing",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
]
|
|
|
|
|
|
|
|
|
MULTIMODAL_MODELS = [
|
|
|
HFModel(
|
|
|
"BLIP-2",
|
|
|
"Salesforce/blip2-opt-2.7b",
|
|
|
ModelCategory.VISION_LANGUAGE,
|
|
|
"Vision-language understanding and generation",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"InstructBLIP",
|
|
|
"Salesforce/instructblip-vicuna-7b",
|
|
|
ModelCategory.MULTIMODAL_REASONING,
|
|
|
"Instruction-following multimodal model",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"LLaVA",
|
|
|
"liuhaotian/llava-v1.5-7b",
|
|
|
ModelCategory.VISUAL_QUESTION_ANSWERING,
|
|
|
"Large Language and Vision Assistant",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"GPT-4V",
|
|
|
"openai/gpt-4-vision-preview",
|
|
|
ModelCategory.MULTIMODAL_CHAT,
|
|
|
"Advanced multimodal conversational AI",
|
|
|
True,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Flamingo",
|
|
|
"deepmind/flamingo-9b",
|
|
|
ModelCategory.CROSS_MODAL_GENERATION,
|
|
|
"Few-shot learning for vision and language",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
]
|
|
|
|
|
|
|
|
|
SPECIALIZED_AI_MODELS = [
|
|
|
HFModel(
|
|
|
"MusicGen",
|
|
|
"facebook/musicgen-medium",
|
|
|
ModelCategory.MUSIC_GENERATION,
|
|
|
"Text-to-music generation",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"AudioCraft",
|
|
|
"facebook/audiocraft_musicgen_melody",
|
|
|
ModelCategory.MUSIC_GENERATION,
|
|
|
"Melody-conditioned music generation",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Real-ESRGAN",
|
|
|
"xinntao/realesrgan-x4plus",
|
|
|
ModelCategory.SUPER_RESOLUTION,
|
|
|
"Image super-resolution",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"GFPGAN",
|
|
|
"TencentARC/GFPGAN",
|
|
|
ModelCategory.FACE_RESTORATION,
|
|
|
"Face restoration and enhancement",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"LaMa",
|
|
|
"advimman/lama",
|
|
|
ModelCategory.IMAGE_INPAINTING,
|
|
|
"Large Mask Inpainting",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Background Remover",
|
|
|
"briaai/RMBG-1.4",
|
|
|
ModelCategory.BACKGROUND_REMOVAL,
|
|
|
"Automatic background removal",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Voice Cloner",
|
|
|
"coqui/XTTS-v2",
|
|
|
ModelCategory.VOICE_CLONING,
|
|
|
"Multilingual voice cloning",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
]
|
|
|
|
|
|
|
|
|
CREATIVE_CONTENT_MODELS = [
|
|
|
HFModel(
|
|
|
"GPT-3.5 Creative",
|
|
|
"openai/gpt-3.5-turbo-instruct",
|
|
|
ModelCategory.CREATIVE_WRITING,
|
|
|
"Creative writing and storytelling",
|
|
|
True,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Novel AI",
|
|
|
"novelai/genji-python-6b",
|
|
|
ModelCategory.STORY_GENERATION,
|
|
|
"Interactive story generation",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Poet Assistant",
|
|
|
"gpt2-poetry",
|
|
|
ModelCategory.POETRY_GENERATION,
|
|
|
"Poetry generation and analysis",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Blog Writer",
|
|
|
"google/flan-t5-large",
|
|
|
ModelCategory.BLOG_WRITING,
|
|
|
"Blog content creation",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Marketing Copy AI",
|
|
|
"microsoft/DialoGPT-large",
|
|
|
ModelCategory.MARKETING_COPY,
|
|
|
"Marketing content generation",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
]
|
|
|
|
|
|
|
|
|
GAME_DEVELOPMENT_MODELS = [
|
|
|
HFModel(
|
|
|
"Character AI",
|
|
|
"character-ai/character-generator",
|
|
|
ModelCategory.CHARACTER_GENERATION,
|
|
|
"Game character generation and design",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Level Designer",
|
|
|
"unity/level-generator",
|
|
|
ModelCategory.LEVEL_GENERATION,
|
|
|
"Game level and environment generation",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Dialogue Writer",
|
|
|
"bioware/dialogue-generator",
|
|
|
ModelCategory.DIALOGUE_GENERATION,
|
|
|
"Game dialogue and narrative generation",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Asset Creator",
|
|
|
"epic/asset-generator",
|
|
|
ModelCategory.GAME_ASSET_GENERATION,
|
|
|
"Game asset and texture generation",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
]
|
|
|
|
|
|
|
|
|
SCIENCE_RESEARCH_MODELS = [
|
|
|
HFModel(
|
|
|
"AlphaFold",
|
|
|
"deepmind/alphafold2",
|
|
|
ModelCategory.PROTEIN_FOLDING,
|
|
|
"Protein structure prediction",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"ChemBERTa",
|
|
|
"DeepChem/ChemBERTa-77M-MLM",
|
|
|
ModelCategory.MOLECULE_GENERATION,
|
|
|
"Chemical compound analysis",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"SciBERT",
|
|
|
"allenai/scibert_scivocab_uncased",
|
|
|
ModelCategory.SCIENTIFIC_WRITING,
|
|
|
"Scientific text understanding",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Research Assistant",
|
|
|
"microsoft/specter2",
|
|
|
ModelCategory.RESEARCH_ASSISTANCE,
|
|
|
"Research paper analysis and recommendations",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Data Analyst",
|
|
|
"microsoft/data-copilot",
|
|
|
ModelCategory.DATA_ANALYSIS,
|
|
|
"Automated data analysis and insights",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
]
|
|
|
|
|
|
|
|
|
BUSINESS_PRODUCTIVITY_MODELS = [
|
|
|
HFModel(
|
|
|
"Email Assistant",
|
|
|
"microsoft/email-generator",
|
|
|
ModelCategory.EMAIL_GENERATION,
|
|
|
"Professional email composition",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Presentation AI",
|
|
|
"gamma/presentation-generator",
|
|
|
ModelCategory.PRESENTATION_CREATION,
|
|
|
"Automated presentation creation",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Report Writer",
|
|
|
"openai/report-generator",
|
|
|
ModelCategory.REPORT_GENERATION,
|
|
|
"Business report generation",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Meeting Summarizer",
|
|
|
"microsoft/meeting-summarizer",
|
|
|
ModelCategory.MEETING_SUMMARIZATION,
|
|
|
"Meeting notes and action items",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Project Planner",
|
|
|
"atlassian/project-ai",
|
|
|
ModelCategory.PROJECT_PLANNING,
|
|
|
"Project planning and management",
|
|
|
True,
|
|
|
False,
|
|
|
),
|
|
|
]
|
|
|
|
|
|
|
|
|
AI_TEACHER_MODELS = [
|
|
|
|
|
|
HFModel(
|
|
|
"AI Tutor Interactive",
|
|
|
"microsoft/DialoGPT-medium",
|
|
|
ModelCategory.AI_TUTORING,
|
|
|
"Interactive AI tutor for conversational learning with dialogue management",
|
|
|
True,
|
|
|
False,
|
|
|
2048,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Goal-Oriented Tutor",
|
|
|
"microsoft/GODEL-v1_1-large-seq2seq",
|
|
|
ModelCategory.AI_TUTORING,
|
|
|
"Goal-oriented conversational AI for personalized tutoring sessions",
|
|
|
True,
|
|
|
False,
|
|
|
2048,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Advanced Instruction Tutor",
|
|
|
"google/flan-t5-large",
|
|
|
ModelCategory.AI_TUTORING,
|
|
|
"Advanced instruction-following AI tutor for complex educational tasks",
|
|
|
True,
|
|
|
False,
|
|
|
2048,
|
|
|
True,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"Educational Content Creator Pro",
|
|
|
"facebook/bart-large",
|
|
|
ModelCategory.EDUCATIONAL_CONTENT,
|
|
|
"Professional educational content generation for all learning levels",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Multilingual Education AI",
|
|
|
"bigscience/bloom-560m",
|
|
|
ModelCategory.EDUCATIONAL_CONTENT,
|
|
|
"Global multilingual educational content for diverse learners",
|
|
|
True,
|
|
|
False,
|
|
|
2048,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Academic Writing Assistant",
|
|
|
"microsoft/prophetnet-large-uncased",
|
|
|
ModelCategory.EDUCATIONAL_CONTENT,
|
|
|
"Academic content creation with advanced text generation capabilities",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
False,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"Master Lesson Planner",
|
|
|
"facebook/bart-large-cnn",
|
|
|
ModelCategory.LESSON_PLANNING,
|
|
|
"Comprehensive lesson planning with summarization and structure",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Curriculum Architect",
|
|
|
"microsoft/prophetnet-base-uncased",
|
|
|
ModelCategory.CURRICULUM_DESIGN,
|
|
|
"Professional curriculum planning and educational program design",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Activity Designer",
|
|
|
"google/t5-base",
|
|
|
ModelCategory.LESSON_PLANNING,
|
|
|
"Interactive learning activity and exercise generation",
|
|
|
True,
|
|
|
False,
|
|
|
512,
|
|
|
True,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"Programming Mentor Pro",
|
|
|
"microsoft/codebert-base",
|
|
|
ModelCategory.CODING_INSTRUCTION,
|
|
|
"Expert programming education with code analysis and explanation",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Advanced Code Instructor",
|
|
|
"microsoft/graphcodebert-base",
|
|
|
ModelCategory.CODING_INSTRUCTION,
|
|
|
"Advanced programming instruction with graph understanding",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Algorithm Tutor Elite",
|
|
|
"microsoft/unixcoder-base",
|
|
|
ModelCategory.CODING_INSTRUCTION,
|
|
|
"Elite algorithm education and computational thinking development",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
False,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"Science Research Educator",
|
|
|
"allenai/scibert_scivocab_uncased",
|
|
|
ModelCategory.SCIENCE_TUTORING,
|
|
|
"Scientific education with research-grade knowledge and vocabulary",
|
|
|
True,
|
|
|
False,
|
|
|
512,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Advanced Science AI",
|
|
|
"facebook/galactica-125m",
|
|
|
ModelCategory.SCIENCE_TUTORING,
|
|
|
"Advanced scientific knowledge and research methodology education",
|
|
|
True,
|
|
|
False,
|
|
|
2048,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Mathematical Reasoning Master",
|
|
|
"google/flan-t5-xl",
|
|
|
ModelCategory.MATH_TUTORING,
|
|
|
"Advanced mathematical reasoning, proofs, and problem-solving",
|
|
|
True,
|
|
|
False,
|
|
|
2048,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Interactive Math Tutor",
|
|
|
"microsoft/DialoGPT-small",
|
|
|
ModelCategory.MATH_TUTORING,
|
|
|
"Interactive mathematics tutoring with step-by-step explanations",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
True,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"Multilingual Language Master",
|
|
|
"facebook/mbart-large-50-many-to-many-mmt",
|
|
|
ModelCategory.LANGUAGE_TUTORING,
|
|
|
"Advanced multilingual education and cross-language learning",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Literature & Language AI",
|
|
|
"microsoft/prophetnet-large-uncased-cnndm",
|
|
|
ModelCategory.LANGUAGE_TUTORING,
|
|
|
"Literature analysis and advanced language instruction",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Grammar & Comprehension Expert",
|
|
|
"google/electra-base-discriminator",
|
|
|
ModelCategory.LANGUAGE_TUTORING,
|
|
|
"Expert grammar instruction and reading comprehension development",
|
|
|
True,
|
|
|
False,
|
|
|
512,
|
|
|
False,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"Assessment Designer Pro",
|
|
|
"microsoft/DialoGPT-large",
|
|
|
ModelCategory.QUIZ_GENERATION,
|
|
|
"Professional assessment and quiz generation with interaction",
|
|
|
True,
|
|
|
False,
|
|
|
2048,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Learning Progress Analyzer",
|
|
|
"facebook/bart-large",
|
|
|
ModelCategory.LEARNING_ASSESSMENT,
|
|
|
"Comprehensive learning assessment and progress tracking",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Question Master AI",
|
|
|
"google/t5-base",
|
|
|
ModelCategory.QUIZ_GENERATION,
|
|
|
"Intelligent question generation for all educational levels",
|
|
|
True,
|
|
|
False,
|
|
|
512,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Exam Preparation Specialist",
|
|
|
"microsoft/unilm-base-cased",
|
|
|
ModelCategory.EXAM_PREPARATION,
|
|
|
"Specialized exam preparation and test strategy development",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
False,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"Personal Learning Architect",
|
|
|
"microsoft/deberta-v3-base",
|
|
|
ModelCategory.PERSONALIZED_LEARNING,
|
|
|
"Advanced personalized learning path creation and optimization",
|
|
|
True,
|
|
|
False,
|
|
|
512,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Adaptive Learning Engine",
|
|
|
"facebook/opt-125m",
|
|
|
ModelCategory.ADAPTIVE_LEARNING,
|
|
|
"Intelligent adaptive learning with dynamic content adjustment",
|
|
|
True,
|
|
|
False,
|
|
|
2048,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Learning Analytics Expert",
|
|
|
"microsoft/layoutlm-base-uncased",
|
|
|
ModelCategory.LEARNING_ANALYTICS,
|
|
|
"Advanced learning analytics and educational data interpretation",
|
|
|
True,
|
|
|
False,
|
|
|
512,
|
|
|
False,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"Concept Explanation Master",
|
|
|
"microsoft/deberta-v3-base",
|
|
|
ModelCategory.CONCEPT_EXPLANATION,
|
|
|
"Master-level concept explanation and knowledge breakdown",
|
|
|
True,
|
|
|
False,
|
|
|
512,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Knowledge Synthesizer",
|
|
|
"google/pegasus-xsum",
|
|
|
ModelCategory.CONCEPT_EXPLANATION,
|
|
|
"Advanced knowledge synthesis and concept summarization",
|
|
|
True,
|
|
|
False,
|
|
|
512,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Interactive Concept Guide",
|
|
|
"facebook/bart-base",
|
|
|
ModelCategory.CONCEPT_EXPLANATION,
|
|
|
"Interactive concept teaching with clarification and examples",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
False,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"Programming Homework Expert",
|
|
|
"microsoft/codebert-base-mlm",
|
|
|
ModelCategory.HOMEWORK_ASSISTANCE,
|
|
|
"Expert programming homework assistance and debugging support",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Universal Homework Helper",
|
|
|
"google/flan-t5-small",
|
|
|
ModelCategory.HOMEWORK_ASSISTANCE,
|
|
|
"Comprehensive homework assistance across all academic subjects",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Global Study Assistant",
|
|
|
"facebook/mbart-large-cc25",
|
|
|
ModelCategory.HOMEWORK_ASSISTANCE,
|
|
|
"Multilingual homework support with cultural context understanding",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
False,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"Study Guide Architect",
|
|
|
"microsoft/prophetnet-large-uncased",
|
|
|
ModelCategory.STUDY_GUIDE_CREATION,
|
|
|
"Professional study guide creation and learning material development",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Educational Resource Creator",
|
|
|
"facebook/bart-large-xsum",
|
|
|
ModelCategory.STUDY_GUIDE_CREATION,
|
|
|
"Comprehensive educational resource and reference material creation",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
False,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"Educational Game Designer",
|
|
|
"microsoft/DialoGPT-base",
|
|
|
ModelCategory.EDUCATIONAL_GAMES,
|
|
|
"Interactive educational games and gamified learning experiences",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Learning Game Engine",
|
|
|
"google/bert-base-uncased",
|
|
|
ModelCategory.EDUCATIONAL_GAMES,
|
|
|
"Educational game mechanics and interactive learning systems",
|
|
|
True,
|
|
|
False,
|
|
|
512,
|
|
|
False,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"History Professor AI",
|
|
|
"microsoft/deberta-large",
|
|
|
ModelCategory.HISTORY_TUTORING,
|
|
|
"Professor-level historical analysis and social studies education",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Interactive History Guide",
|
|
|
"facebook/opt-350m",
|
|
|
ModelCategory.HISTORY_TUTORING,
|
|
|
"Interactive historical narratives and timeline exploration",
|
|
|
True,
|
|
|
False,
|
|
|
2048,
|
|
|
True,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"Master Subject Teacher",
|
|
|
"google/flan-t5-base",
|
|
|
ModelCategory.SUBJECT_TEACHING,
|
|
|
"Expert multi-subject teaching with instruction-following excellence",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Universal Educator AI",
|
|
|
"microsoft/unilm-large-cased",
|
|
|
ModelCategory.SUBJECT_TEACHING,
|
|
|
"Universal education AI with cross-disciplinary knowledge",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
False,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"Advanced Learning Analytics",
|
|
|
"microsoft/layoutlm-large-uncased",
|
|
|
ModelCategory.LEARNING_ANALYTICS,
|
|
|
"Enterprise-level learning analytics and educational insights",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Personalization Engine Pro",
|
|
|
"google/electra-large-discriminator",
|
|
|
ModelCategory.PERSONALIZED_LEARNING,
|
|
|
"Advanced AI personalization with learning style adaptation",
|
|
|
True,
|
|
|
False,
|
|
|
512,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Global Adaptive System",
|
|
|
"facebook/mbart-large-50",
|
|
|
ModelCategory.ADAPTIVE_LEARNING,
|
|
|
"Global adaptive learning system with multilingual capabilities",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
False,
|
|
|
),
|
|
|
]
|
|
|
|
|
|
|
|
|
QWEN_MODELS = [
|
|
|
|
|
|
HFModel(
|
|
|
"Qwen2.5-72B-Instruct",
|
|
|
"Qwen/Qwen2.5-72B-Instruct",
|
|
|
ModelCategory.TEXT_GENERATION,
|
|
|
"Large-scale instruction-following model for complex reasoning",
|
|
|
True,
|
|
|
False,
|
|
|
32768,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Qwen2.5-32B-Instruct",
|
|
|
"Qwen/Qwen2.5-32B-Instruct",
|
|
|
ModelCategory.TEXT_GENERATION,
|
|
|
"High-performance instruction model for advanced tasks",
|
|
|
True,
|
|
|
False,
|
|
|
32768,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Qwen2.5-14B-Instruct",
|
|
|
"Qwen/Qwen2.5-14B-Instruct",
|
|
|
ModelCategory.TEXT_GENERATION,
|
|
|
"Efficient large model with excellent reasoning capabilities",
|
|
|
True,
|
|
|
False,
|
|
|
32768,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Qwen2.5-7B-Instruct",
|
|
|
"Qwen/Qwen2.5-7B-Instruct",
|
|
|
ModelCategory.TEXT_GENERATION,
|
|
|
"Optimized 7B model for general-purpose applications",
|
|
|
True,
|
|
|
False,
|
|
|
32768,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Qwen2.5-3B-Instruct",
|
|
|
"Qwen/Qwen2.5-3B-Instruct",
|
|
|
ModelCategory.TEXT_GENERATION,
|
|
|
"Lightweight model for resource-constrained environments",
|
|
|
True,
|
|
|
False,
|
|
|
32768,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Qwen2.5-1.5B-Instruct",
|
|
|
"Qwen/Qwen2.5-1.5B-Instruct",
|
|
|
ModelCategory.TEXT_GENERATION,
|
|
|
"Ultra-lightweight model for edge deployment",
|
|
|
True,
|
|
|
False,
|
|
|
32768,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Qwen2.5-0.5B-Instruct",
|
|
|
"Qwen/Qwen2.5-0.5B-Instruct",
|
|
|
ModelCategory.TEXT_GENERATION,
|
|
|
"Minimal footprint model for basic applications",
|
|
|
True,
|
|
|
False,
|
|
|
32768,
|
|
|
True,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"Qwen2.5-Coder-32B-Instruct",
|
|
|
"Qwen/Qwen2.5-Coder-32B-Instruct",
|
|
|
ModelCategory.QWEN_CODE,
|
|
|
"Advanced code generation and programming assistance",
|
|
|
True,
|
|
|
False,
|
|
|
131072,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Qwen2.5-Coder-14B-Instruct",
|
|
|
"Qwen/Qwen2.5-Coder-14B-Instruct",
|
|
|
ModelCategory.QWEN_CODE,
|
|
|
"Code generation with excellent debugging capabilities",
|
|
|
True,
|
|
|
False,
|
|
|
131072,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Qwen2.5-Coder-7B-Instruct",
|
|
|
"Qwen/Qwen2.5-Coder-7B-Instruct",
|
|
|
ModelCategory.QWEN_CODE,
|
|
|
"Efficient coding assistant for multiple languages",
|
|
|
True,
|
|
|
False,
|
|
|
131072,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Qwen2.5-Coder-3B-Instruct",
|
|
|
"Qwen/Qwen2.5-Coder-3B-Instruct",
|
|
|
ModelCategory.QWEN_CODE,
|
|
|
"Lightweight programming assistant",
|
|
|
True,
|
|
|
False,
|
|
|
131072,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Qwen2.5-Coder-1.5B-Instruct",
|
|
|
"Qwen/Qwen2.5-Coder-1.5B-Instruct",
|
|
|
ModelCategory.QWEN_CODE,
|
|
|
"Compact code generation model",
|
|
|
True,
|
|
|
False,
|
|
|
131072,
|
|
|
True,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"Qwen2.5-Math-72B-Instruct",
|
|
|
"Qwen/Qwen2.5-Math-72B-Instruct",
|
|
|
ModelCategory.QWEN_MATH,
|
|
|
"Advanced mathematical problem solving and reasoning",
|
|
|
True,
|
|
|
False,
|
|
|
32768,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Qwen2.5-Math-7B-Instruct",
|
|
|
"Qwen/Qwen2.5-Math-7B-Instruct",
|
|
|
ModelCategory.QWEN_MATH,
|
|
|
"Mathematical reasoning and calculation assistance",
|
|
|
True,
|
|
|
False,
|
|
|
32768,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Qwen2.5-Math-1.5B-Instruct",
|
|
|
"Qwen/Qwen2.5-Math-1.5B-Instruct",
|
|
|
ModelCategory.QWEN_MATH,
|
|
|
"Compact mathematical problem solver",
|
|
|
True,
|
|
|
False,
|
|
|
32768,
|
|
|
True,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"QwQ-32B-Preview",
|
|
|
"Qwen/QwQ-32B-Preview",
|
|
|
ModelCategory.QWEN_REASONING,
|
|
|
"Advanced reasoning and logical thinking model",
|
|
|
True,
|
|
|
False,
|
|
|
32768,
|
|
|
True,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"Qwen2-VL-72B-Instruct",
|
|
|
"Qwen/Qwen2-VL-72B-Instruct",
|
|
|
ModelCategory.QWEN_VISION,
|
|
|
"Large-scale vision-language understanding and generation",
|
|
|
True,
|
|
|
False,
|
|
|
32768,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Qwen2-VL-7B-Instruct",
|
|
|
"Qwen/Qwen2-VL-7B-Instruct",
|
|
|
ModelCategory.QWEN_VISION,
|
|
|
"Efficient vision-language model for multimodal tasks",
|
|
|
True,
|
|
|
False,
|
|
|
32768,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Qwen2-VL-2B-Instruct",
|
|
|
"Qwen/Qwen2-VL-2B-Instruct",
|
|
|
ModelCategory.QWEN_VISION,
|
|
|
"Lightweight vision-language model",
|
|
|
True,
|
|
|
False,
|
|
|
32768,
|
|
|
True,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"Qwen2-Audio-7B-Instruct",
|
|
|
"Qwen/Qwen2-Audio-7B-Instruct",
|
|
|
ModelCategory.QWEN_AUDIO,
|
|
|
"Advanced audio understanding and generation",
|
|
|
True,
|
|
|
False,
|
|
|
32768,
|
|
|
True,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"Qwen1.5-110B-Chat",
|
|
|
"Qwen/Qwen1.5-110B-Chat",
|
|
|
ModelCategory.CONVERSATIONAL,
|
|
|
"Large conversational model with broad knowledge",
|
|
|
True,
|
|
|
False,
|
|
|
32768,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Qwen1.5-72B-Chat",
|
|
|
"Qwen/Qwen1.5-72B-Chat",
|
|
|
ModelCategory.CONVERSATIONAL,
|
|
|
"Conversational AI with excellent reasoning",
|
|
|
True,
|
|
|
False,
|
|
|
32768,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Qwen1.5-32B-Chat",
|
|
|
"Qwen/Qwen1.5-32B-Chat",
|
|
|
ModelCategory.CONVERSATIONAL,
|
|
|
"Efficient chat model for interactive applications",
|
|
|
True,
|
|
|
False,
|
|
|
32768,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Qwen1.5-14B-Chat",
|
|
|
"Qwen/Qwen1.5-14B-Chat",
|
|
|
ModelCategory.CONVERSATIONAL,
|
|
|
"Balanced performance chat model",
|
|
|
True,
|
|
|
False,
|
|
|
32768,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Qwen1.5-7B-Chat",
|
|
|
"Qwen/Qwen1.5-7B-Chat",
|
|
|
ModelCategory.CONVERSATIONAL,
|
|
|
"Popular chat model with good performance",
|
|
|
True,
|
|
|
False,
|
|
|
32768,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Qwen1.5-4B-Chat",
|
|
|
"Qwen/Qwen1.5-4B-Chat",
|
|
|
ModelCategory.CONVERSATIONAL,
|
|
|
"Lightweight conversational AI",
|
|
|
True,
|
|
|
False,
|
|
|
32768,
|
|
|
True,
|
|
|
),
|
|
|
]
|
|
|
|
|
|
|
|
|
DEEPSEEK_MODELS = [
|
|
|
|
|
|
HFModel(
|
|
|
"DeepSeek-V3",
|
|
|
"deepseek-ai/DeepSeek-V3",
|
|
|
ModelCategory.DEEPSEEK_REASONING,
|
|
|
"Latest generation reasoning and knowledge model",
|
|
|
True,
|
|
|
False,
|
|
|
65536,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"DeepSeek-V3-Base",
|
|
|
"deepseek-ai/DeepSeek-V3-Base",
|
|
|
ModelCategory.TEXT_GENERATION,
|
|
|
"Foundation model for various downstream tasks",
|
|
|
True,
|
|
|
False,
|
|
|
65536,
|
|
|
True,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"DeepSeek-V2.5",
|
|
|
"deepseek-ai/DeepSeek-V2.5",
|
|
|
ModelCategory.DEEPSEEK_REASONING,
|
|
|
"Advanced reasoning and general intelligence model",
|
|
|
True,
|
|
|
False,
|
|
|
32768,
|
|
|
True,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"DeepSeek-Coder-V2-Instruct",
|
|
|
"deepseek-ai/DeepSeek-Coder-V2-Instruct",
|
|
|
ModelCategory.DEEPSEEK_CODING,
|
|
|
"Advanced code generation and programming assistance",
|
|
|
True,
|
|
|
False,
|
|
|
163840,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"DeepSeek-Coder-V2-Base",
|
|
|
"deepseek-ai/DeepSeek-Coder-V2-Base",
|
|
|
ModelCategory.DEEPSEEK_CODING,
|
|
|
"Foundation coding model for fine-tuning",
|
|
|
True,
|
|
|
False,
|
|
|
163840,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"DeepSeek-Coder-33B-Instruct",
|
|
|
"deepseek-ai/deepseek-coder-33b-instruct",
|
|
|
ModelCategory.DEEPSEEK_CODING,
|
|
|
"Large-scale code generation and debugging",
|
|
|
True,
|
|
|
False,
|
|
|
16384,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"DeepSeek-Coder-6.7B-Instruct",
|
|
|
"deepseek-ai/deepseek-coder-6.7b-instruct",
|
|
|
ModelCategory.DEEPSEEK_CODING,
|
|
|
"Efficient code assistance and generation",
|
|
|
True,
|
|
|
False,
|
|
|
16384,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"DeepSeek-Coder-1.3B-Instruct",
|
|
|
"deepseek-ai/deepseek-coder-1.3b-instruct",
|
|
|
ModelCategory.DEEPSEEK_CODING,
|
|
|
"Lightweight coding assistant",
|
|
|
True,
|
|
|
False,
|
|
|
16384,
|
|
|
True,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"DeepSeek-Math-7B-Instruct",
|
|
|
"deepseek-ai/deepseek-math-7b-instruct",
|
|
|
ModelCategory.DEEPSEEK_MATH,
|
|
|
"Mathematical problem solving and reasoning",
|
|
|
True,
|
|
|
False,
|
|
|
4096,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"DeepSeek-Math-7B-Base",
|
|
|
"deepseek-ai/deepseek-math-7b-base",
|
|
|
ModelCategory.DEEPSEEK_MATH,
|
|
|
"Foundation model for mathematical reasoning",
|
|
|
True,
|
|
|
False,
|
|
|
4096,
|
|
|
True,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"DeepSeek-67B-Chat",
|
|
|
"deepseek-ai/deepseek-llm-67b-chat",
|
|
|
ModelCategory.CONVERSATIONAL,
|
|
|
"Large conversational model with strong reasoning",
|
|
|
True,
|
|
|
False,
|
|
|
4096,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"DeepSeek-7B-Chat",
|
|
|
"deepseek-ai/deepseek-llm-7b-chat",
|
|
|
ModelCategory.CONVERSATIONAL,
|
|
|
"Efficient chat model for general conversations",
|
|
|
True,
|
|
|
False,
|
|
|
4096,
|
|
|
True,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"DeepSeek-VL-7B-Chat",
|
|
|
"deepseek-ai/deepseek-vl-7b-chat",
|
|
|
ModelCategory.VISION_LANGUAGE,
|
|
|
"Vision-language understanding and conversation",
|
|
|
True,
|
|
|
False,
|
|
|
4096,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"DeepSeek-VL-1.3B-Chat",
|
|
|
"deepseek-ai/deepseek-vl-1.3b-chat",
|
|
|
ModelCategory.VISION_LANGUAGE,
|
|
|
"Lightweight vision-language model",
|
|
|
True,
|
|
|
False,
|
|
|
4096,
|
|
|
True,
|
|
|
),
|
|
|
]
|
|
|
|
|
|
|
|
|
IMAGE_EDITING_MODELS = [
|
|
|
|
|
|
HFModel(
|
|
|
"SDXL Inpainting",
|
|
|
"diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
|
|
|
ModelCategory.IMAGE_EDITING,
|
|
|
"High-quality image inpainting and editing",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"ControlNet Inpainting",
|
|
|
"lllyasviel/control_v11p_sd15_inpaint",
|
|
|
ModelCategory.IMAGE_EDITING,
|
|
|
"Controllable image inpainting with precise editing",
|
|
|
True,
|
|
|
False,
|
|
|
512,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"InstantID Face Editor",
|
|
|
"InstantX/InstantID",
|
|
|
ModelCategory.FACE_ENHANCEMENT,
|
|
|
"Identity-preserving face editing and enhancement",
|
|
|
True,
|
|
|
False,
|
|
|
512,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Real-ESRGAN Upscaler",
|
|
|
"ai-forever/Real-ESRGAN",
|
|
|
ModelCategory.IMAGE_UPSCALING,
|
|
|
"Advanced image super-resolution and enhancement",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"GFPGAN Face Restoration",
|
|
|
"Xintao/GFPGAN",
|
|
|
ModelCategory.FACE_RESTORATION,
|
|
|
"High-quality face restoration and enhancement",
|
|
|
True,
|
|
|
False,
|
|
|
512,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"CodeFormer Face Restoration",
|
|
|
"sczhou/CodeFormer",
|
|
|
ModelCategory.FACE_RESTORATION,
|
|
|
"Robust face restoration for low-quality images",
|
|
|
True,
|
|
|
False,
|
|
|
512,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Background Removal",
|
|
|
"briaai/RMBG-1.4",
|
|
|
ModelCategory.BACKGROUND_REMOVAL,
|
|
|
"Precise background removal and segmentation",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"U2-Net Background Removal",
|
|
|
"simonw/u2net-portrait-segmentation",
|
|
|
ModelCategory.BACKGROUND_REMOVAL,
|
|
|
"Portrait and object background removal",
|
|
|
True,
|
|
|
False,
|
|
|
320,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Photo Colorization",
|
|
|
"microsoft/beit-base-patch16-224-pt22k-ft22k",
|
|
|
ModelCategory.COLOR_CORRECTION,
|
|
|
"AI-powered photo colorization and enhancement",
|
|
|
True,
|
|
|
False,
|
|
|
224,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Style Transfer Neural",
|
|
|
"pytorch/vision",
|
|
|
ModelCategory.ARTISTIC_FILTER,
|
|
|
"Neural style transfer for artistic image effects",
|
|
|
True,
|
|
|
False,
|
|
|
512,
|
|
|
False,
|
|
|
),
|
|
|
]
|
|
|
|
|
|
|
|
|
FACE_SWAP_MODELS = [
|
|
|
|
|
|
HFModel(
|
|
|
"InsightFace SwapFace",
|
|
|
"deepinsight/inswapper_128.onnx",
|
|
|
ModelCategory.FACE_SWAP,
|
|
|
"High-quality face swapping with identity preservation",
|
|
|
True,
|
|
|
False,
|
|
|
128,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"SimSwap Face Swap",
|
|
|
"ppogg/simswap_official",
|
|
|
ModelCategory.FACE_SWAP,
|
|
|
"Realistic face swapping for videos and images",
|
|
|
True,
|
|
|
False,
|
|
|
224,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"FaceX-Zoo Face Swap",
|
|
|
"FacePerceiver/FaceX-Zoo",
|
|
|
ModelCategory.FACE_SWAP,
|
|
|
"Multi-purpose face analysis and swapping toolkit",
|
|
|
True,
|
|
|
False,
|
|
|
112,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Face Enhancement Pro",
|
|
|
"TencentARC/GFPGAN",
|
|
|
ModelCategory.FACE_ENHANCEMENT,
|
|
|
"Professional face enhancement and restoration",
|
|
|
True,
|
|
|
False,
|
|
|
512,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"DualStyleGAN Face Edit",
|
|
|
"williamyang1991/DualStyleGAN",
|
|
|
ModelCategory.FACE_ENHANCEMENT,
|
|
|
"Style-controllable face image editing",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"MegaPortraits Face Animate",
|
|
|
"NVlabs/MegaPortraits",
|
|
|
ModelCategory.FACIAL_ANIMATION,
|
|
|
"One-shot facial animation and expression transfer",
|
|
|
True,
|
|
|
False,
|
|
|
256,
|
|
|
False,
|
|
|
),
|
|
|
]
|
|
|
|
|
|
|
|
|
ADVANCED_SPEECH_MODELS = [
|
|
|
|
|
|
HFModel(
|
|
|
"XTTS v2 Multilingual",
|
|
|
"coqui/XTTS-v2",
|
|
|
ModelCategory.MULTILINGUAL_TTS,
|
|
|
"High-quality multilingual text-to-speech with voice cloning",
|
|
|
True,
|
|
|
False,
|
|
|
24000,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Bark Text-to-Speech",
|
|
|
"suno/bark",
|
|
|
ModelCategory.ADVANCED_TTS,
|
|
|
"Generative TTS with music, sound effects, and multiple speakers",
|
|
|
True,
|
|
|
False,
|
|
|
24000,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"SpeechT5 TTS",
|
|
|
"microsoft/speecht5_tts",
|
|
|
ModelCategory.ADVANCED_TTS,
|
|
|
"High-quality neural text-to-speech synthesis",
|
|
|
True,
|
|
|
False,
|
|
|
16000,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"VALL-E X Multilingual",
|
|
|
"Plachtaa/VALL-E-X",
|
|
|
ModelCategory.MULTILINGUAL_TTS,
|
|
|
"Zero-shot voice synthesis in multiple languages",
|
|
|
True,
|
|
|
False,
|
|
|
24000,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Arabic TTS",
|
|
|
"arabic-speech-corpus/tts-arabic",
|
|
|
ModelCategory.MULTILINGUAL_TTS,
|
|
|
"High-quality Arabic text-to-speech synthesis",
|
|
|
True,
|
|
|
False,
|
|
|
22050,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Tortoise TTS",
|
|
|
"jbetker/tortoise-tts",
|
|
|
ModelCategory.VOICE_CLONING,
|
|
|
"High-quality voice cloning and synthesis",
|
|
|
True,
|
|
|
False,
|
|
|
22050,
|
|
|
False,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"Whisper Large v3",
|
|
|
"openai/whisper-large-v3",
|
|
|
ModelCategory.MULTILINGUAL_STT,
|
|
|
"State-of-the-art multilingual speech recognition",
|
|
|
True,
|
|
|
False,
|
|
|
30,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Whisper Large v3 Turbo",
|
|
|
"openai/whisper-large-v3-turbo",
|
|
|
ModelCategory.MULTILINGUAL_STT,
|
|
|
"Fast multilingual speech recognition with high accuracy",
|
|
|
True,
|
|
|
False,
|
|
|
30,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Arabic Whisper",
|
|
|
"arabic-speech-corpus/whisper-large-arabic",
|
|
|
ModelCategory.MULTILINGUAL_STT,
|
|
|
"Optimized Arabic speech recognition model",
|
|
|
True,
|
|
|
False,
|
|
|
30,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"MMS Speech Recognition",
|
|
|
"facebook/mms-1b-all",
|
|
|
ModelCategory.MULTILINGUAL_STT,
|
|
|
"Massively multilingual speech recognition (1000+ languages)",
|
|
|
True,
|
|
|
False,
|
|
|
16000,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Wav2Vec2 Arabic",
|
|
|
"facebook/wav2vec2-large-xlsr-53-arabic",
|
|
|
ModelCategory.MULTILINGUAL_STT,
|
|
|
"Arabic speech recognition with Wav2Vec2 architecture",
|
|
|
True,
|
|
|
False,
|
|
|
16000,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"SpeechT5 ASR",
|
|
|
"microsoft/speecht5_asr",
|
|
|
ModelCategory.ADVANCED_STT,
|
|
|
"Advanced automatic speech recognition",
|
|
|
True,
|
|
|
False,
|
|
|
16000,
|
|
|
False,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"SeamlessM4T",
|
|
|
"facebook/seamless-m4t-v2-large",
|
|
|
ModelCategory.REAL_TIME_TRANSLATION,
|
|
|
"Multilingual speech-to-speech translation",
|
|
|
True,
|
|
|
False,
|
|
|
16000,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Voice Conversion VITS",
|
|
|
"jaywalnut310/vits-ljs",
|
|
|
ModelCategory.VOICE_CONVERSION,
|
|
|
"High-quality voice conversion and synthesis",
|
|
|
True,
|
|
|
False,
|
|
|
22050,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"RVC Voice Clone",
|
|
|
"lj1995/GPT-SoVITS",
|
|
|
ModelCategory.VOICE_CLONING,
|
|
|
"Real-time voice cloning and conversion",
|
|
|
True,
|
|
|
False,
|
|
|
32000,
|
|
|
True,
|
|
|
),
|
|
|
]
|
|
|
|
|
|
|
|
|
TALKING_AVATAR_MODELS = [
|
|
|
|
|
|
HFModel(
|
|
|
"SadTalker Talking Head",
|
|
|
"vinthony/SadTalker",
|
|
|
ModelCategory.TALKING_AVATAR,
|
|
|
"Generate talking head videos from audio and single image",
|
|
|
True,
|
|
|
False,
|
|
|
256,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Real-Time Face Animation",
|
|
|
"PaddlePaddle/PaddleGAN-FOM",
|
|
|
ModelCategory.FACIAL_ANIMATION,
|
|
|
"Real-time facial animation and expression control",
|
|
|
True,
|
|
|
False,
|
|
|
256,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"LivePortrait Animation",
|
|
|
"KwaiVGI/LivePortrait",
|
|
|
ModelCategory.TALKING_AVATAR,
|
|
|
"High-quality portrait animation with lip sync",
|
|
|
True,
|
|
|
False,
|
|
|
512,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"DualTalker Video",
|
|
|
"OpenTalker/DualTalker",
|
|
|
ModelCategory.TALKING_AVATAR,
|
|
|
"Dual-modal talking face generation with enhanced quality",
|
|
|
True,
|
|
|
False,
|
|
|
256,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Video Retalking",
|
|
|
"vinthony/video-retalking",
|
|
|
ModelCategory.LIP_SYNC,
|
|
|
"Audio-driven lip sync for existing videos",
|
|
|
True,
|
|
|
False,
|
|
|
224,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Wav2Lip Lip Sync",
|
|
|
"Rudrabha/Wav2Lip",
|
|
|
ModelCategory.LIP_SYNC,
|
|
|
"Accurate lip sync generation from audio",
|
|
|
True,
|
|
|
False,
|
|
|
96,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Digital Human Avatar",
|
|
|
"modelscope/damo-text-to-video-synthesis",
|
|
|
ModelCategory.VIRTUAL_PRESENTER,
|
|
|
"Generate digital human presenter videos",
|
|
|
True,
|
|
|
False,
|
|
|
320,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"AI News Anchor",
|
|
|
"microsoft/DiT-XL-2-256",
|
|
|
ModelCategory.AI_ANCHOR,
|
|
|
"Professional AI news anchor and presenter generation",
|
|
|
True,
|
|
|
False,
|
|
|
256,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Avatar Gesture Control",
|
|
|
"ZhengPeng7/BiSeNet",
|
|
|
ModelCategory.GESTURE_GENERATION,
|
|
|
"Generate natural gestures and body language for avatars",
|
|
|
True,
|
|
|
False,
|
|
|
512,
|
|
|
False,
|
|
|
),
|
|
|
]
|
|
|
|
|
|
|
|
|
INTERACTIVE_LANGUAGE_MODELS = [
|
|
|
|
|
|
HFModel(
|
|
|
"AceGPT Arabic-English",
|
|
|
"FreedomIntelligence/AceGPT-13B",
|
|
|
ModelCategory.BILINGUAL_CONVERSATION,
|
|
|
"Bilingual Arabic-English conversation model",
|
|
|
True,
|
|
|
False,
|
|
|
4096,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Jais Arabic Chat",
|
|
|
"core42/jais-13b-chat",
|
|
|
ModelCategory.BILINGUAL_CONVERSATION,
|
|
|
"Advanced Arabic conversation model with English support",
|
|
|
True,
|
|
|
False,
|
|
|
2048,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"AraBART Conversational",
|
|
|
"aubmindlab/arabart-base-conversational",
|
|
|
ModelCategory.BILINGUAL_CONVERSATION,
|
|
|
"Arabic conversational AI with cultural understanding",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Multilingual Chat Assistant",
|
|
|
"microsoft/DialoGPT-large",
|
|
|
ModelCategory.INTERACTIVE_CHAT,
|
|
|
"Interactive chat assistant supporting multiple languages",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Cultural Context Chat",
|
|
|
"bigscience/bloom-7b1",
|
|
|
ModelCategory.CULTURAL_ADAPTATION,
|
|
|
"Culturally aware conversation model for diverse contexts",
|
|
|
True,
|
|
|
False,
|
|
|
2048,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Context-Aware Assistant",
|
|
|
"microsoft/GODEL-v1_1-large-seq2seq",
|
|
|
ModelCategory.CONTEXT_AWARE_CHAT,
|
|
|
"Context-aware conversational AI with memory",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Personality Chat Bot",
|
|
|
"microsoft/PersonaGPT",
|
|
|
ModelCategory.PERSONALITY_CHAT,
|
|
|
"Personality-driven conversational AI with distinct characters",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Role-Play Assistant",
|
|
|
"PygmalionAI/pygmalion-6b",
|
|
|
ModelCategory.ROLE_PLAY_CHAT,
|
|
|
"Interactive role-playing conversation model",
|
|
|
True,
|
|
|
False,
|
|
|
2048,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Domain Expert Chat",
|
|
|
"microsoft/DialoGPT-medium",
|
|
|
ModelCategory.DOMAIN_SPECIFIC_CHAT,
|
|
|
"Specialized domain conversation assistant",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
True,
|
|
|
),
|
|
|
|
|
|
HFModel(
|
|
|
"Arabic GPT-J",
|
|
|
"aubmindlab/aragpt2-base",
|
|
|
ModelCategory.BILINGUAL_CONVERSATION,
|
|
|
"Arabic language generation and conversation",
|
|
|
True,
|
|
|
False,
|
|
|
1024,
|
|
|
True,
|
|
|
),
|
|
|
HFModel(
|
|
|
"Marbert Arabic Chat",
|
|
|
"UBC-NLP/MARBERT",
|
|
|
ModelCategory.BILINGUAL_CONVERSATION,
|
|
|
"Dialectal Arabic conversation model",
|
|
|
True,
|
|
|
False,
|
|
|
512,
|
|
|
False,
|
|
|
),
|
|
|
HFModel(
|
|
|
"ArabicBERT Chat",
|
|
|
"aubmindlab/bert-base-arabertv2",
|
|
|
ModelCategory.BILINGUAL_CONVERSATION,
|
|
|
"Modern Standard Arabic conversational understanding",
|
|
|
True,
|
|
|
False,
|
|
|
512,
|
|
|
False,
|
|
|
),
|
|
|
]
|
|
|
|
|
|
|
|
|
class HuggingFaceInference:
|
|
|
"""Hugging Face Inference API integration"""
|
|
|
|
|
|
def __init__(
|
|
|
self,
|
|
|
api_token: str,
|
|
|
base_url: str = "https://api-inference.huggingface.co/models/",
|
|
|
):
|
|
|
self.api_token = api_token
|
|
|
self.base_url = base_url
|
|
|
self.session = None
|
|
|
|
|
|
async def __aenter__(self):
|
|
|
self.session = aiohttp.ClientSession(
|
|
|
headers={"Authorization": f"Bearer {self.api_token}"},
|
|
|
timeout=aiohttp.ClientTimeout(total=300),
|
|
|
)
|
|
|
return self
|
|
|
|
|
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
|
if self.session:
|
|
|
await self.session.close()
|
|
|
|
|
|
async def text_generation(
|
|
|
self,
|
|
|
model_id: str,
|
|
|
prompt: str,
|
|
|
max_tokens: int = 100,
|
|
|
temperature: float = 0.7,
|
|
|
stream: bool = False,
|
|
|
**kwargs,
|
|
|
) -> Dict[str, Any]:
|
|
|
"""Generate text using a text generation model"""
|
|
|
payload = {
|
|
|
"inputs": prompt,
|
|
|
"parameters": {
|
|
|
"max_new_tokens": max_tokens,
|
|
|
"temperature": temperature,
|
|
|
"do_sample": True,
|
|
|
**kwargs,
|
|
|
},
|
|
|
"options": {"use_cache": False},
|
|
|
}
|
|
|
|
|
|
if stream:
|
|
|
return await self._stream_request(model_id, payload)
|
|
|
else:
|
|
|
return await self._request(model_id, payload)
|
|
|
|
|
|
async def text_to_image(
|
|
|
self,
|
|
|
model_id: str,
|
|
|
prompt: str,
|
|
|
negative_prompt: Optional[str] = None,
|
|
|
**kwargs,
|
|
|
) -> bytes:
|
|
|
"""Generate image from text prompt"""
|
|
|
payload = {
|
|
|
"inputs": prompt,
|
|
|
"parameters": {
|
|
|
**({"negative_prompt": negative_prompt} if negative_prompt else {}),
|
|
|
**kwargs,
|
|
|
},
|
|
|
}
|
|
|
|
|
|
response = await self._request(model_id, payload, expect_json=False)
|
|
|
return response
|
|
|
|
|
|
async def automatic_speech_recognition(
|
|
|
self, model_id: str, audio_data: bytes, **kwargs
|
|
|
) -> Dict[str, Any]:
|
|
|
"""Transcribe audio to text"""
|
|
|
|
|
|
audio_b64 = base64.b64encode(audio_data).decode()
|
|
|
|
|
|
payload = {"inputs": audio_b64, "parameters": kwargs}
|
|
|
|
|
|
return await self._request(model_id, payload)
|
|
|
|
|
|
async def text_to_speech(self, model_id: str, text: str, **kwargs) -> bytes:
|
|
|
"""Convert text to speech audio"""
|
|
|
payload = {"inputs": text, "parameters": kwargs}
|
|
|
|
|
|
response = await self._request(model_id, payload, expect_json=False)
|
|
|
return response
|
|
|
|
|
|
async def image_classification(
|
|
|
self, model_id: str, image_data: bytes, **kwargs
|
|
|
) -> Dict[str, Any]:
|
|
|
"""Classify images"""
|
|
|
|
|
|
image_b64 = base64.b64encode(image_data).decode()
|
|
|
|
|
|
payload = {"inputs": image_b64, "parameters": kwargs}
|
|
|
|
|
|
return await self._request(model_id, payload)
|
|
|
|
|
|
async def feature_extraction(
|
|
|
self, model_id: str, texts: Union[str, List[str]], **kwargs
|
|
|
) -> Dict[str, Any]:
|
|
|
"""Extract embeddings from text"""
|
|
|
payload = {"inputs": texts, "parameters": kwargs}
|
|
|
|
|
|
return await self._request(model_id, payload)
|
|
|
|
|
|
async def translation(
|
|
|
self,
|
|
|
model_id: str,
|
|
|
text: str,
|
|
|
src_lang: Optional[str] = None,
|
|
|
tgt_lang: Optional[str] = None,
|
|
|
**kwargs,
|
|
|
) -> Dict[str, Any]:
|
|
|
"""Translate text between languages"""
|
|
|
payload = {
|
|
|
"inputs": text,
|
|
|
"parameters": {
|
|
|
**({"src_lang": src_lang} if src_lang else {}),
|
|
|
**({"tgt_lang": tgt_lang} if tgt_lang else {}),
|
|
|
**kwargs,
|
|
|
},
|
|
|
}
|
|
|
|
|
|
return await self._request(model_id, payload)
|
|
|
|
|
|
async def summarization(
|
|
|
self,
|
|
|
model_id: str,
|
|
|
text: str,
|
|
|
max_length: int = 150,
|
|
|
min_length: int = 30,
|
|
|
**kwargs,
|
|
|
) -> Dict[str, Any]:
|
|
|
"""Summarize text"""
|
|
|
payload = {
|
|
|
"inputs": text,
|
|
|
"parameters": {
|
|
|
"max_length": max_length,
|
|
|
"min_length": min_length,
|
|
|
**kwargs,
|
|
|
},
|
|
|
}
|
|
|
|
|
|
return await self._request(model_id, payload)
|
|
|
|
|
|
async def question_answering(
|
|
|
self, model_id: str, question: str, context: str, **kwargs
|
|
|
) -> Dict[str, Any]:
|
|
|
"""Answer questions based on context"""
|
|
|
payload = {
|
|
|
"inputs": {"question": question, "context": context},
|
|
|
"parameters": kwargs,
|
|
|
}
|
|
|
|
|
|
return await self._request(model_id, payload)
|
|
|
|
|
|
async def zero_shot_classification(
|
|
|
self, model_id: str, text: str, candidate_labels: List[str], **kwargs
|
|
|
) -> Dict[str, Any]:
|
|
|
"""Classify text without training data"""
|
|
|
payload = {
|
|
|
"inputs": text,
|
|
|
"parameters": {"candidate_labels": candidate_labels, **kwargs},
|
|
|
}
|
|
|
|
|
|
return await self._request(model_id, payload)
|
|
|
|
|
|
async def conversational(
|
|
|
self,
|
|
|
model_id: str,
|
|
|
text: str,
|
|
|
conversation_history: Optional[List[Dict[str, str]]] = None,
|
|
|
**kwargs,
|
|
|
) -> Dict[str, Any]:
|
|
|
"""Have a conversation with a model"""
|
|
|
payload = {
|
|
|
"inputs": {
|
|
|
"text": text,
|
|
|
**(
|
|
|
{
|
|
|
"past_user_inputs": [
|
|
|
h["user"] for h in conversation_history if "user" in h
|
|
|
]
|
|
|
}
|
|
|
if conversation_history
|
|
|
else {}
|
|
|
),
|
|
|
**(
|
|
|
{
|
|
|
"generated_responses": [
|
|
|
h["bot"] for h in conversation_history if "bot" in h
|
|
|
]
|
|
|
}
|
|
|
if conversation_history
|
|
|
else {}
|
|
|
),
|
|
|
},
|
|
|
"parameters": kwargs,
|
|
|
}
|
|
|
|
|
|
return await self._request(model_id, payload)
|
|
|
|
|
|
async def _request(
|
|
|
self, model_id: str, payload: Dict[str, Any], expect_json: bool = True
|
|
|
) -> Union[Dict[str, Any], bytes]:
|
|
|
"""Make HTTP request to Hugging Face API"""
|
|
|
url = f"{self.base_url}{model_id}"
|
|
|
|
|
|
try:
|
|
|
async with self.session.post(url, json=payload) as response:
|
|
|
if response.status == 200:
|
|
|
if expect_json:
|
|
|
return await response.json()
|
|
|
else:
|
|
|
return await response.read()
|
|
|
elif response.status == 503:
|
|
|
|
|
|
error_info = await response.json()
|
|
|
estimated_time = error_info.get("estimated_time", 30)
|
|
|
logger.info(
|
|
|
f"Model {model_id} is loading, waiting {estimated_time}s"
|
|
|
)
|
|
|
await asyncio.sleep(min(estimated_time, 60))
|
|
|
return await self._request(model_id, payload, expect_json)
|
|
|
else:
|
|
|
error_text = await response.text()
|
|
|
raise Exception(
|
|
|
f"API request failed with status {response.status}: {error_text}"
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
logger.error(f"Error calling Hugging Face API for {model_id}: {e}")
|
|
|
raise
|
|
|
|
|
|
async def _stream_request(self, model_id: str, payload: Dict[str, Any]):
|
|
|
"""Stream response from Hugging Face API"""
|
|
|
url = f"{self.base_url}{model_id}"
|
|
|
payload["stream"] = True
|
|
|
|
|
|
try:
|
|
|
async with self.session.post(url, json=payload) as response:
|
|
|
if response.status == 200:
|
|
|
async for chunk in response.content:
|
|
|
if chunk:
|
|
|
yield chunk.decode("utf-8")
|
|
|
else:
|
|
|
error_text = await response.text()
|
|
|
raise Exception(
|
|
|
f"Streaming request failed with status {response.status}: {error_text}"
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
logger.error(f"Error streaming from Hugging Face API for {model_id}: {e}")
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
|
|
async def text_to_video(
|
|
|
self, model_id: str, prompt: str, **kwargs
|
|
|
) -> Dict[str, Any]:
|
|
|
"""Generate video from text prompt"""
|
|
|
payload = {
|
|
|
"inputs": prompt,
|
|
|
"parameters": {
|
|
|
"duration": kwargs.get("duration", 5),
|
|
|
"fps": kwargs.get("fps", 24),
|
|
|
"width": kwargs.get("width", 512),
|
|
|
"height": kwargs.get("height", 512),
|
|
|
**kwargs,
|
|
|
},
|
|
|
}
|
|
|
return await self._request(model_id, payload)
|
|
|
|
|
|
async def video_to_text(
|
|
|
self, model_id: str, video_data: bytes, **kwargs
|
|
|
) -> Dict[str, Any]:
|
|
|
"""Analyze video and generate text description"""
|
|
|
video_b64 = base64.b64encode(video_data).decode()
|
|
|
payload = {
|
|
|
"inputs": {"video": video_b64},
|
|
|
"parameters": kwargs,
|
|
|
}
|
|
|
return await self._request(model_id, payload)
|
|
|
|
|
|
async def code_generation(
|
|
|
self, model_id: str, prompt: str, **kwargs
|
|
|
) -> Dict[str, Any]:
|
|
|
"""Generate code from natural language prompt"""
|
|
|
payload = {
|
|
|
"inputs": prompt,
|
|
|
"parameters": {
|
|
|
"max_length": kwargs.get("max_length", 500),
|
|
|
"temperature": kwargs.get("temperature", 0.2),
|
|
|
"language": kwargs.get("language", "python"),
|
|
|
**kwargs,
|
|
|
},
|
|
|
}
|
|
|
return await self._request(model_id, payload)
|
|
|
|
|
|
async def code_completion(
|
|
|
self, model_id: str, code: str, **kwargs
|
|
|
) -> Dict[str, Any]:
|
|
|
"""Complete partial code"""
|
|
|
payload = {
|
|
|
"inputs": code,
|
|
|
"parameters": {
|
|
|
"max_length": kwargs.get("max_length", 100),
|
|
|
"temperature": kwargs.get("temperature", 0.1),
|
|
|
**kwargs,
|
|
|
},
|
|
|
}
|
|
|
return await self._request(model_id, payload)
|
|
|
|
|
|
async def text_to_3d(self, model_id: str, prompt: str, **kwargs) -> Dict[str, Any]:
|
|
|
"""Generate 3D model from text description"""
|
|
|
payload = {
|
|
|
"inputs": prompt,
|
|
|
"parameters": {
|
|
|
"resolution": kwargs.get("resolution", 64),
|
|
|
"format": kwargs.get("format", "obj"),
|
|
|
**kwargs,
|
|
|
},
|
|
|
}
|
|
|
return await self._request(model_id, payload)
|
|
|
|
|
|
async def image_to_3d(
|
|
|
self, model_id: str, image_data: bytes, **kwargs
|
|
|
) -> Dict[str, Any]:
|
|
|
"""Generate 3D model from image"""
|
|
|
image_b64 = base64.b64encode(image_data).decode()
|
|
|
payload = {
|
|
|
"inputs": {"image": image_b64},
|
|
|
"parameters": kwargs,
|
|
|
}
|
|
|
return await self._request(model_id, payload)
|
|
|
|
|
|
async def ocr(self, model_id: str, image_data: bytes, **kwargs) -> Dict[str, Any]:
|
|
|
"""Perform optical character recognition on image"""
|
|
|
image_b64 = base64.b64encode(image_data).decode()
|
|
|
payload = {
|
|
|
"inputs": {"image": image_b64},
|
|
|
"parameters": {"language": kwargs.get("language", "en"), **kwargs},
|
|
|
}
|
|
|
return await self._request(model_id, payload)
|
|
|
|
|
|
async def document_analysis(
|
|
|
self, model_id: str, document_data: bytes, **kwargs
|
|
|
) -> Dict[str, Any]:
|
|
|
"""Analyze document structure and content"""
|
|
|
doc_b64 = base64.b64encode(document_data).decode()
|
|
|
payload = {
|
|
|
"inputs": {"document": doc_b64},
|
|
|
"parameters": kwargs,
|
|
|
}
|
|
|
return await self._request(model_id, payload)
|
|
|
|
|
|
async def vision_language(
|
|
|
self, model_id: str, image_data: bytes, text: str, **kwargs
|
|
|
) -> Dict[str, Any]:
|
|
|
"""Process image and text together"""
|
|
|
image_b64 = base64.b64encode(image_data).decode()
|
|
|
payload = {
|
|
|
"inputs": {"image": image_b64, "text": text},
|
|
|
"parameters": kwargs,
|
|
|
}
|
|
|
return await self._request(model_id, payload)
|
|
|
|
|
|
async def multimodal_reasoning(
|
|
|
self, model_id: str, inputs: Dict[str, Any], **kwargs
|
|
|
) -> Dict[str, Any]:
|
|
|
"""Perform reasoning across multiple modalities"""
|
|
|
payload = {
|
|
|
"inputs": inputs,
|
|
|
"parameters": kwargs,
|
|
|
}
|
|
|
return await self._request(model_id, payload)
|
|
|
|
|
|
async def music_generation(
|
|
|
self, model_id: str, prompt: str, **kwargs
|
|
|
) -> Dict[str, Any]:
|
|
|
"""Generate music from text prompt"""
|
|
|
payload = {
|
|
|
"inputs": prompt,
|
|
|
"parameters": {
|
|
|
"duration": kwargs.get("duration", 30),
|
|
|
"bpm": kwargs.get("bpm", 120),
|
|
|
"genre": kwargs.get("genre", "electronic"),
|
|
|
**kwargs,
|
|
|
},
|
|
|
}
|
|
|
return await self._request(model_id, payload)
|
|
|
|
|
|
async def voice_cloning(
|
|
|
self, model_id: str, text: str, voice_sample: bytes, **kwargs
|
|
|
) -> bytes:
|
|
|
"""Clone voice and synthesize speech"""
|
|
|
voice_b64 = base64.b64encode(voice_sample).decode()
|
|
|
payload = {
|
|
|
"inputs": {"text": text, "voice_sample": voice_b64},
|
|
|
"parameters": kwargs,
|
|
|
}
|
|
|
return await self._request(model_id, payload, expect_json=False)
|
|
|
|
|
|
async def super_resolution(
|
|
|
self, model_id: str, image_data: bytes, **kwargs
|
|
|
) -> bytes:
|
|
|
"""Enhance image resolution"""
|
|
|
image_b64 = base64.b64encode(image_data).decode()
|
|
|
payload = {
|
|
|
"inputs": {"image": image_b64},
|
|
|
"parameters": {"scale_factor": kwargs.get("scale_factor", 4), **kwargs},
|
|
|
}
|
|
|
return await self._request(model_id, payload, expect_json=False)
|
|
|
|
|
|
async def background_removal(
|
|
|
self, model_id: str, image_data: bytes, **kwargs
|
|
|
) -> bytes:
|
|
|
"""Remove background from image"""
|
|
|
image_b64 = base64.b64encode(image_data).decode()
|
|
|
payload = {
|
|
|
"inputs": {"image": image_b64},
|
|
|
"parameters": kwargs,
|
|
|
}
|
|
|
return await self._request(model_id, payload, expect_json=False)
|
|
|
|
|
|
async def creative_writing(
|
|
|
self, model_id: str, prompt: str, **kwargs
|
|
|
) -> Dict[str, Any]:
|
|
|
"""Generate creative content"""
|
|
|
payload = {
|
|
|
"inputs": prompt,
|
|
|
"parameters": {
|
|
|
"max_length": kwargs.get("max_length", 1000),
|
|
|
"creativity": kwargs.get("creativity", 0.8),
|
|
|
"genre": kwargs.get("genre", "general"),
|
|
|
**kwargs,
|
|
|
},
|
|
|
}
|
|
|
return await self._request(model_id, payload)
|
|
|
|
|
|
async def business_document(
|
|
|
self, model_id: str, document_type: str, context: str, **kwargs
|
|
|
) -> Dict[str, Any]:
|
|
|
"""Generate business documents"""
|
|
|
payload = {
|
|
|
"inputs": f"Generate {document_type}: {context}",
|
|
|
"parameters": {
|
|
|
"format": kwargs.get("format", "professional"),
|
|
|
"length": kwargs.get("length", "medium"),
|
|
|
**kwargs,
|
|
|
},
|
|
|
}
|
|
|
return await self._request(model_id, payload)
|
|
|
|
|
|
|
|
|
class HuggingFaceModelManager:
|
|
|
"""Manager for all Hugging Face model operations"""
|
|
|
|
|
|
def __init__(self, api_token: str):
|
|
|
self.api_token = api_token
|
|
|
self.models = HuggingFaceModels()
|
|
|
|
|
|
def get_models_by_category(self, category: ModelCategory) -> List[HFModel]:
|
|
|
"""Get all models for a specific category"""
|
|
|
all_models = []
|
|
|
|
|
|
if category == ModelCategory.TEXT_GENERATION:
|
|
|
all_models = self.models.TEXT_GENERATION_MODELS
|
|
|
elif category == ModelCategory.TEXT_TO_IMAGE:
|
|
|
all_models = self.models.TEXT_TO_IMAGE_MODELS
|
|
|
elif category == ModelCategory.AUTOMATIC_SPEECH_RECOGNITION:
|
|
|
all_models = self.models.ASR_MODELS
|
|
|
elif category == ModelCategory.TEXT_TO_SPEECH:
|
|
|
all_models = self.models.TTS_MODELS
|
|
|
elif category == ModelCategory.IMAGE_CLASSIFICATION:
|
|
|
all_models = self.models.IMAGE_CLASSIFICATION_MODELS
|
|
|
elif category == ModelCategory.FEATURE_EXTRACTION:
|
|
|
all_models = self.models.FEATURE_EXTRACTION_MODELS
|
|
|
elif category == ModelCategory.TRANSLATION:
|
|
|
all_models = self.models.TRANSLATION_MODELS
|
|
|
elif category == ModelCategory.SUMMARIZATION:
|
|
|
all_models = self.models.SUMMARIZATION_MODELS
|
|
|
|
|
|
return all_models
|
|
|
|
|
|
def get_all_models(self) -> Dict[ModelCategory, List[HFModel]]:
|
|
|
"""Get all available models organized by category"""
|
|
|
return {
|
|
|
|
|
|
ModelCategory.TEXT_GENERATION: self.models.TEXT_GENERATION_MODELS,
|
|
|
ModelCategory.TEXT_TO_IMAGE: self.models.TEXT_TO_IMAGE_MODELS,
|
|
|
ModelCategory.AUTOMATIC_SPEECH_RECOGNITION: self.models.ASR_MODELS,
|
|
|
ModelCategory.TEXT_TO_SPEECH: self.models.TTS_MODELS,
|
|
|
ModelCategory.IMAGE_CLASSIFICATION: self.models.IMAGE_CLASSIFICATION_MODELS,
|
|
|
ModelCategory.FEATURE_EXTRACTION: self.models.FEATURE_EXTRACTION_MODELS,
|
|
|
ModelCategory.TRANSLATION: self.models.TRANSLATION_MODELS,
|
|
|
ModelCategory.SUMMARIZATION: self.models.SUMMARIZATION_MODELS,
|
|
|
|
|
|
ModelCategory.TEXT_TO_VIDEO: self.models.VIDEO_GENERATION_MODELS,
|
|
|
ModelCategory.VIDEO_GENERATION: self.models.VIDEO_GENERATION_MODELS,
|
|
|
ModelCategory.VIDEO_TO_TEXT: self.models.VIDEO_GENERATION_MODELS,
|
|
|
ModelCategory.VIDEO_CLASSIFICATION: self.models.VIDEO_GENERATION_MODELS,
|
|
|
|
|
|
ModelCategory.CODE_GENERATION: self.models.CODE_GENERATION_MODELS,
|
|
|
ModelCategory.CODE_COMPLETION: self.models.CODE_GENERATION_MODELS,
|
|
|
ModelCategory.CODE_EXPLANATION: self.models.CODE_GENERATION_MODELS,
|
|
|
ModelCategory.APP_GENERATION: self.models.CODE_GENERATION_MODELS,
|
|
|
|
|
|
ModelCategory.TEXT_TO_3D: self.models.THREE_D_MODELS,
|
|
|
ModelCategory.IMAGE_TO_3D: self.models.THREE_D_MODELS,
|
|
|
ModelCategory.THREE_D_GENERATION: self.models.THREE_D_MODELS,
|
|
|
ModelCategory.MESH_GENERATION: self.models.THREE_D_MODELS,
|
|
|
|
|
|
ModelCategory.OCR: self.models.DOCUMENT_PROCESSING_MODELS,
|
|
|
ModelCategory.DOCUMENT_ANALYSIS: self.models.DOCUMENT_PROCESSING_MODELS,
|
|
|
ModelCategory.HANDWRITING_RECOGNITION: self.models.DOCUMENT_PROCESSING_MODELS,
|
|
|
ModelCategory.TABLE_EXTRACTION: self.models.DOCUMENT_PROCESSING_MODELS,
|
|
|
ModelCategory.FORM_PROCESSING: self.models.DOCUMENT_PROCESSING_MODELS,
|
|
|
|
|
|
ModelCategory.VISION_LANGUAGE: self.models.MULTIMODAL_MODELS,
|
|
|
ModelCategory.MULTIMODAL_REASONING: self.models.MULTIMODAL_MODELS,
|
|
|
ModelCategory.VISUAL_QUESTION_ANSWERING: self.models.MULTIMODAL_MODELS,
|
|
|
ModelCategory.MULTIMODAL_CHAT: self.models.MULTIMODAL_MODELS,
|
|
|
ModelCategory.CROSS_MODAL_GENERATION: self.models.MULTIMODAL_MODELS,
|
|
|
|
|
|
ModelCategory.MUSIC_GENERATION: self.models.SPECIALIZED_AI_MODELS,
|
|
|
ModelCategory.VOICE_CLONING: self.models.SPECIALIZED_AI_MODELS,
|
|
|
ModelCategory.SUPER_RESOLUTION: self.models.SPECIALIZED_AI_MODELS,
|
|
|
ModelCategory.FACE_RESTORATION: self.models.SPECIALIZED_AI_MODELS,
|
|
|
ModelCategory.IMAGE_INPAINTING: self.models.SPECIALIZED_AI_MODELS,
|
|
|
ModelCategory.BACKGROUND_REMOVAL: self.models.SPECIALIZED_AI_MODELS,
|
|
|
|
|
|
ModelCategory.CREATIVE_WRITING: self.models.CREATIVE_CONTENT_MODELS,
|
|
|
ModelCategory.STORY_GENERATION: self.models.CREATIVE_CONTENT_MODELS,
|
|
|
ModelCategory.POETRY_GENERATION: self.models.CREATIVE_CONTENT_MODELS,
|
|
|
ModelCategory.BLOG_WRITING: self.models.CREATIVE_CONTENT_MODELS,
|
|
|
ModelCategory.MARKETING_COPY: self.models.CREATIVE_CONTENT_MODELS,
|
|
|
|
|
|
ModelCategory.GAME_ASSET_GENERATION: self.models.GAME_DEVELOPMENT_MODELS,
|
|
|
ModelCategory.CHARACTER_GENERATION: self.models.GAME_DEVELOPMENT_MODELS,
|
|
|
ModelCategory.LEVEL_GENERATION: self.models.GAME_DEVELOPMENT_MODELS,
|
|
|
ModelCategory.DIALOGUE_GENERATION: self.models.GAME_DEVELOPMENT_MODELS,
|
|
|
|
|
|
ModelCategory.PROTEIN_FOLDING: self.models.SCIENCE_RESEARCH_MODELS,
|
|
|
ModelCategory.MOLECULE_GENERATION: self.models.SCIENCE_RESEARCH_MODELS,
|
|
|
ModelCategory.SCIENTIFIC_WRITING: self.models.SCIENCE_RESEARCH_MODELS,
|
|
|
ModelCategory.RESEARCH_ASSISTANCE: self.models.SCIENCE_RESEARCH_MODELS,
|
|
|
ModelCategory.DATA_ANALYSIS: self.models.SCIENCE_RESEARCH_MODELS,
|
|
|
|
|
|
ModelCategory.EMAIL_GENERATION: self.models.BUSINESS_PRODUCTIVITY_MODELS,
|
|
|
ModelCategory.PRESENTATION_CREATION: self.models.BUSINESS_PRODUCTIVITY_MODELS,
|
|
|
ModelCategory.REPORT_GENERATION: self.models.BUSINESS_PRODUCTIVITY_MODELS,
|
|
|
ModelCategory.MEETING_SUMMARIZATION: self.models.BUSINESS_PRODUCTIVITY_MODELS,
|
|
|
ModelCategory.PROJECT_PLANNING: self.models.BUSINESS_PRODUCTIVITY_MODELS,
|
|
|
|
|
|
ModelCategory.AI_TUTORING: self.models.AI_TEACHER_MODELS,
|
|
|
ModelCategory.EDUCATIONAL_CONTENT: self.models.AI_TEACHER_MODELS,
|
|
|
ModelCategory.LESSON_PLANNING: self.models.AI_TEACHER_MODELS,
|
|
|
ModelCategory.CONCEPT_EXPLANATION: self.models.AI_TEACHER_MODELS,
|
|
|
ModelCategory.HOMEWORK_ASSISTANCE: self.models.AI_TEACHER_MODELS,
|
|
|
ModelCategory.QUIZ_GENERATION: self.models.AI_TEACHER_MODELS,
|
|
|
ModelCategory.CURRICULUM_DESIGN: self.models.AI_TEACHER_MODELS,
|
|
|
ModelCategory.LEARNING_ASSESSMENT: self.models.AI_TEACHER_MODELS,
|
|
|
ModelCategory.ADAPTIVE_LEARNING: self.models.AI_TEACHER_MODELS,
|
|
|
ModelCategory.SUBJECT_TEACHING: self.models.AI_TEACHER_MODELS,
|
|
|
ModelCategory.MATH_TUTORING: self.models.AI_TEACHER_MODELS,
|
|
|
ModelCategory.SCIENCE_TUTORING: self.models.AI_TEACHER_MODELS,
|
|
|
ModelCategory.LANGUAGE_TUTORING: self.models.AI_TEACHER_MODELS,
|
|
|
ModelCategory.HISTORY_TUTORING: self.models.AI_TEACHER_MODELS,
|
|
|
ModelCategory.CODING_INSTRUCTION: self.models.AI_TEACHER_MODELS,
|
|
|
ModelCategory.EXAM_PREPARATION: self.models.AI_TEACHER_MODELS,
|
|
|
ModelCategory.STUDY_GUIDE_CREATION: self.models.AI_TEACHER_MODELS,
|
|
|
ModelCategory.EDUCATIONAL_GAMES: self.models.AI_TEACHER_MODELS,
|
|
|
ModelCategory.LEARNING_ANALYTICS: self.models.AI_TEACHER_MODELS,
|
|
|
ModelCategory.PERSONALIZED_LEARNING: self.models.AI_TEACHER_MODELS,
|
|
|
|
|
|
ModelCategory.QWEN_REASONING: self.models.QWEN_MODELS,
|
|
|
ModelCategory.QWEN_MATH: self.models.QWEN_MODELS,
|
|
|
ModelCategory.QWEN_CODE: self.models.QWEN_MODELS,
|
|
|
ModelCategory.QWEN_VISION: self.models.QWEN_MODELS,
|
|
|
ModelCategory.QWEN_AUDIO: self.models.QWEN_MODELS,
|
|
|
|
|
|
ModelCategory.DEEPSEEK_CODING: self.models.DEEPSEEK_MODELS,
|
|
|
ModelCategory.DEEPSEEK_REASONING: self.models.DEEPSEEK_MODELS,
|
|
|
ModelCategory.DEEPSEEK_MATH: self.models.DEEPSEEK_MODELS,
|
|
|
ModelCategory.DEEPSEEK_RESEARCH: self.models.DEEPSEEK_MODELS,
|
|
|
|
|
|
ModelCategory.IMAGE_EDITING: self.models.IMAGE_EDITING_MODELS,
|
|
|
ModelCategory.FACE_SWAP: self.models.FACE_SWAP_MODELS,
|
|
|
ModelCategory.FACE_ENHANCEMENT: self.models.FACE_SWAP_MODELS,
|
|
|
ModelCategory.FACE_GENERATION: self.models.FACE_SWAP_MODELS,
|
|
|
ModelCategory.PORTRAIT_EDITING: self.models.IMAGE_EDITING_MODELS,
|
|
|
ModelCategory.PHOTO_RESTORATION: self.models.IMAGE_EDITING_MODELS,
|
|
|
ModelCategory.IMAGE_UPSCALING: self.models.IMAGE_EDITING_MODELS,
|
|
|
ModelCategory.COLOR_CORRECTION: self.models.IMAGE_EDITING_MODELS,
|
|
|
ModelCategory.ARTISTIC_FILTER: self.models.IMAGE_EDITING_MODELS,
|
|
|
|
|
|
ModelCategory.ADVANCED_TTS: self.models.ADVANCED_SPEECH_MODELS,
|
|
|
ModelCategory.ADVANCED_STT: self.models.ADVANCED_SPEECH_MODELS,
|
|
|
ModelCategory.VOICE_CONVERSION: self.models.ADVANCED_SPEECH_MODELS,
|
|
|
ModelCategory.SPEECH_ENHANCEMENT: self.models.ADVANCED_SPEECH_MODELS,
|
|
|
ModelCategory.AUDIO_GENERATION: self.models.ADVANCED_SPEECH_MODELS,
|
|
|
ModelCategory.MULTILINGUAL_TTS: self.models.ADVANCED_SPEECH_MODELS,
|
|
|
ModelCategory.MULTILINGUAL_STT: self.models.ADVANCED_SPEECH_MODELS,
|
|
|
ModelCategory.REAL_TIME_TRANSLATION: self.models.ADVANCED_SPEECH_MODELS,
|
|
|
|
|
|
ModelCategory.TALKING_AVATAR: self.models.TALKING_AVATAR_MODELS,
|
|
|
ModelCategory.AVATAR_GENERATION: self.models.TALKING_AVATAR_MODELS,
|
|
|
ModelCategory.LIP_SYNC: self.models.TALKING_AVATAR_MODELS,
|
|
|
ModelCategory.FACIAL_ANIMATION: self.models.TALKING_AVATAR_MODELS,
|
|
|
ModelCategory.GESTURE_GENERATION: self.models.TALKING_AVATAR_MODELS,
|
|
|
ModelCategory.VIRTUAL_PRESENTER: self.models.TALKING_AVATAR_MODELS,
|
|
|
ModelCategory.AI_ANCHOR: self.models.TALKING_AVATAR_MODELS,
|
|
|
|
|
|
ModelCategory.INTERACTIVE_CHAT: self.models.INTERACTIVE_LANGUAGE_MODELS,
|
|
|
ModelCategory.BILINGUAL_CONVERSATION: self.models.INTERACTIVE_LANGUAGE_MODELS,
|
|
|
ModelCategory.CULTURAL_ADAPTATION: self.models.INTERACTIVE_LANGUAGE_MODELS,
|
|
|
ModelCategory.CONTEXT_AWARE_CHAT: self.models.INTERACTIVE_LANGUAGE_MODELS,
|
|
|
ModelCategory.PERSONALITY_CHAT: self.models.INTERACTIVE_LANGUAGE_MODELS,
|
|
|
ModelCategory.ROLE_PLAY_CHAT: self.models.INTERACTIVE_LANGUAGE_MODELS,
|
|
|
ModelCategory.DOMAIN_SPECIFIC_CHAT: self.models.INTERACTIVE_LANGUAGE_MODELS,
|
|
|
}
|
|
|
|
|
|
def get_model_by_id(self, model_id: str) -> Optional[HFModel]:
|
|
|
"""Find a model by its Hugging Face model ID"""
|
|
|
for models_list in self.get_all_models().values():
|
|
|
for model in models_list:
|
|
|
if model.model_id == model_id:
|
|
|
return model
|
|
|
return None
|
|
|
|
|
|
async def call_model(self, model_id: str, category: ModelCategory, **kwargs) -> Any:
|
|
|
"""Call a Hugging Face model with the appropriate method based on category"""
|
|
|
|
|
|
async with HuggingFaceInference(self.api_token) as hf:
|
|
|
if category == ModelCategory.TEXT_GENERATION:
|
|
|
return await hf.text_generation(model_id, **kwargs)
|
|
|
elif category == ModelCategory.TEXT_TO_IMAGE:
|
|
|
return await hf.text_to_image(model_id, **kwargs)
|
|
|
elif category == ModelCategory.AUTOMATIC_SPEECH_RECOGNITION:
|
|
|
return await hf.automatic_speech_recognition(model_id, **kwargs)
|
|
|
elif category == ModelCategory.TEXT_TO_SPEECH:
|
|
|
return await hf.text_to_speech(model_id, **kwargs)
|
|
|
elif category == ModelCategory.IMAGE_CLASSIFICATION:
|
|
|
return await hf.image_classification(model_id, **kwargs)
|
|
|
elif category == ModelCategory.FEATURE_EXTRACTION:
|
|
|
return await hf.feature_extraction(model_id, **kwargs)
|
|
|
elif category == ModelCategory.TRANSLATION:
|
|
|
return await hf.translation(model_id, **kwargs)
|
|
|
elif category == ModelCategory.SUMMARIZATION:
|
|
|
return await hf.summarization(model_id, **kwargs)
|
|
|
elif category == ModelCategory.QUESTION_ANSWERING:
|
|
|
return await hf.question_answering(model_id, **kwargs)
|
|
|
elif category == ModelCategory.ZERO_SHOT_CLASSIFICATION:
|
|
|
return await hf.zero_shot_classification(model_id, **kwargs)
|
|
|
elif category == ModelCategory.CONVERSATIONAL:
|
|
|
return await hf.conversational(model_id, **kwargs)
|
|
|
|
|
|
|
|
|
elif category in [
|
|
|
ModelCategory.TEXT_TO_VIDEO,
|
|
|
ModelCategory.VIDEO_GENERATION,
|
|
|
]:
|
|
|
return await hf.text_to_video(model_id, **kwargs)
|
|
|
elif category == ModelCategory.VIDEO_TO_TEXT:
|
|
|
return await hf.video_to_text(model_id, **kwargs)
|
|
|
elif category == ModelCategory.VIDEO_CLASSIFICATION:
|
|
|
return await hf.image_classification(
|
|
|
model_id, **kwargs
|
|
|
)
|
|
|
|
|
|
|
|
|
elif category in [
|
|
|
ModelCategory.CODE_GENERATION,
|
|
|
ModelCategory.APP_GENERATION,
|
|
|
]:
|
|
|
return await hf.code_generation(model_id, **kwargs)
|
|
|
elif category in [
|
|
|
ModelCategory.CODE_COMPLETION,
|
|
|
ModelCategory.CODE_EXPLANATION,
|
|
|
]:
|
|
|
return await hf.code_completion(model_id, **kwargs)
|
|
|
|
|
|
|
|
|
elif category in [
|
|
|
ModelCategory.TEXT_TO_3D,
|
|
|
ModelCategory.THREE_D_GENERATION,
|
|
|
]:
|
|
|
return await hf.text_to_3d(model_id, **kwargs)
|
|
|
elif category in [ModelCategory.IMAGE_TO_3D, ModelCategory.MESH_GENERATION]:
|
|
|
return await hf.image_to_3d(model_id, **kwargs)
|
|
|
|
|
|
|
|
|
elif category == ModelCategory.OCR:
|
|
|
return await hf.ocr(model_id, **kwargs)
|
|
|
elif category in [
|
|
|
ModelCategory.DOCUMENT_ANALYSIS,
|
|
|
ModelCategory.FORM_PROCESSING,
|
|
|
ModelCategory.TABLE_EXTRACTION,
|
|
|
ModelCategory.LAYOUT_ANALYSIS,
|
|
|
]:
|
|
|
return await hf.document_analysis(model_id, **kwargs)
|
|
|
elif category == ModelCategory.HANDWRITING_RECOGNITION:
|
|
|
return await hf.ocr(model_id, **kwargs)
|
|
|
|
|
|
|
|
|
elif category in [
|
|
|
ModelCategory.VISION_LANGUAGE,
|
|
|
ModelCategory.VISUAL_QUESTION_ANSWERING,
|
|
|
ModelCategory.IMAGE_TEXT_MATCHING,
|
|
|
]:
|
|
|
return await hf.vision_language(model_id, **kwargs)
|
|
|
elif category in [
|
|
|
ModelCategory.MULTIMODAL_REASONING,
|
|
|
ModelCategory.MULTIMODAL_CHAT,
|
|
|
ModelCategory.CROSS_MODAL_GENERATION,
|
|
|
]:
|
|
|
return await hf.multimodal_reasoning(model_id, **kwargs)
|
|
|
|
|
|
|
|
|
elif category == ModelCategory.MUSIC_GENERATION:
|
|
|
return await hf.music_generation(model_id, **kwargs)
|
|
|
elif category == ModelCategory.VOICE_CLONING:
|
|
|
return await hf.voice_cloning(model_id, **kwargs)
|
|
|
elif category == ModelCategory.SUPER_RESOLUTION:
|
|
|
return await hf.super_resolution(model_id, **kwargs)
|
|
|
elif category in [
|
|
|
ModelCategory.FACE_RESTORATION,
|
|
|
ModelCategory.IMAGE_INPAINTING,
|
|
|
ModelCategory.IMAGE_OUTPAINTING,
|
|
|
]:
|
|
|
return await hf.super_resolution(
|
|
|
model_id, **kwargs
|
|
|
)
|
|
|
elif category == ModelCategory.BACKGROUND_REMOVAL:
|
|
|
return await hf.background_removal(model_id, **kwargs)
|
|
|
|
|
|
|
|
|
elif category in [
|
|
|
ModelCategory.CREATIVE_WRITING,
|
|
|
ModelCategory.STORY_GENERATION,
|
|
|
ModelCategory.POETRY_GENERATION,
|
|
|
ModelCategory.SCREENPLAY_WRITING,
|
|
|
]:
|
|
|
return await hf.creative_writing(model_id, **kwargs)
|
|
|
elif category in [ModelCategory.BLOG_WRITING, ModelCategory.MARKETING_COPY]:
|
|
|
return await hf.text_generation(
|
|
|
model_id, **kwargs
|
|
|
)
|
|
|
|
|
|
|
|
|
elif category in [
|
|
|
ModelCategory.CHARACTER_GENERATION,
|
|
|
ModelCategory.LEVEL_GENERATION,
|
|
|
ModelCategory.DIALOGUE_GENERATION,
|
|
|
ModelCategory.GAME_ASSET_GENERATION,
|
|
|
]:
|
|
|
return await hf.creative_writing(
|
|
|
model_id, **kwargs
|
|
|
)
|
|
|
|
|
|
|
|
|
elif category in [
|
|
|
ModelCategory.PROTEIN_FOLDING,
|
|
|
ModelCategory.MOLECULE_GENERATION,
|
|
|
]:
|
|
|
return await hf.text_generation(
|
|
|
model_id, **kwargs
|
|
|
)
|
|
|
elif category in [
|
|
|
ModelCategory.SCIENTIFIC_WRITING,
|
|
|
ModelCategory.RESEARCH_ASSISTANCE,
|
|
|
ModelCategory.DATA_ANALYSIS,
|
|
|
]:
|
|
|
return await hf.text_generation(model_id, **kwargs)
|
|
|
|
|
|
|
|
|
elif category in [
|
|
|
ModelCategory.EMAIL_GENERATION,
|
|
|
ModelCategory.PRESENTATION_CREATION,
|
|
|
ModelCategory.REPORT_GENERATION,
|
|
|
ModelCategory.MEETING_SUMMARIZATION,
|
|
|
ModelCategory.PROJECT_PLANNING,
|
|
|
]:
|
|
|
return await hf.business_document(model_id, category.value, **kwargs)
|
|
|
|
|
|
|
|
|
elif category in [
|
|
|
ModelCategory.AI_TUTORING,
|
|
|
ModelCategory.EDUCATIONAL_CONTENT,
|
|
|
ModelCategory.LESSON_PLANNING,
|
|
|
ModelCategory.CONCEPT_EXPLANATION,
|
|
|
ModelCategory.HOMEWORK_ASSISTANCE,
|
|
|
ModelCategory.QUIZ_GENERATION,
|
|
|
ModelCategory.CURRICULUM_DESIGN,
|
|
|
ModelCategory.LEARNING_ASSESSMENT,
|
|
|
ModelCategory.ADAPTIVE_LEARNING,
|
|
|
ModelCategory.SUBJECT_TEACHING,
|
|
|
ModelCategory.MATH_TUTORING,
|
|
|
ModelCategory.SCIENCE_TUTORING,
|
|
|
ModelCategory.LANGUAGE_TUTORING,
|
|
|
ModelCategory.HISTORY_TUTORING,
|
|
|
ModelCategory.CODING_INSTRUCTION,
|
|
|
ModelCategory.EXAM_PREPARATION,
|
|
|
ModelCategory.STUDY_GUIDE_CREATION,
|
|
|
ModelCategory.EDUCATIONAL_GAMES,
|
|
|
ModelCategory.LEARNING_ANALYTICS,
|
|
|
ModelCategory.PERSONALIZED_LEARNING,
|
|
|
]:
|
|
|
return await hf.text_generation(
|
|
|
model_id, **kwargs
|
|
|
)
|
|
|
|
|
|
|
|
|
elif category in [
|
|
|
ModelCategory.QWEN_REASONING,
|
|
|
ModelCategory.QWEN_MATH,
|
|
|
ModelCategory.QWEN_CODE,
|
|
|
]:
|
|
|
return await hf.text_generation(model_id, **kwargs)
|
|
|
elif category == ModelCategory.QWEN_VISION:
|
|
|
return await hf.vision_language(model_id, **kwargs)
|
|
|
elif category == ModelCategory.QWEN_AUDIO:
|
|
|
return await hf.automatic_speech_recognition(model_id, **kwargs)
|
|
|
|
|
|
|
|
|
elif category in [
|
|
|
ModelCategory.DEEPSEEK_CODING,
|
|
|
ModelCategory.DEEPSEEK_REASONING,
|
|
|
ModelCategory.DEEPSEEK_MATH,
|
|
|
ModelCategory.DEEPSEEK_RESEARCH,
|
|
|
]:
|
|
|
return await hf.text_generation(model_id, **kwargs)
|
|
|
|
|
|
|
|
|
elif category in [
|
|
|
ModelCategory.IMAGE_EDITING,
|
|
|
ModelCategory.PORTRAIT_EDITING,
|
|
|
ModelCategory.PHOTO_RESTORATION,
|
|
|
ModelCategory.COLOR_CORRECTION,
|
|
|
ModelCategory.ARTISTIC_FILTER,
|
|
|
]:
|
|
|
return await hf.text_to_image(model_id, **kwargs)
|
|
|
elif category == ModelCategory.IMAGE_UPSCALING:
|
|
|
return await hf.super_resolution(model_id, **kwargs)
|
|
|
elif category in [
|
|
|
ModelCategory.FACE_SWAP,
|
|
|
ModelCategory.FACE_ENHANCEMENT,
|
|
|
ModelCategory.FACE_GENERATION,
|
|
|
]:
|
|
|
return await hf.text_to_image(model_id, **kwargs)
|
|
|
|
|
|
|
|
|
elif category in [
|
|
|
ModelCategory.ADVANCED_TTS,
|
|
|
ModelCategory.MULTILINGUAL_TTS,
|
|
|
ModelCategory.VOICE_CONVERSION,
|
|
|
]:
|
|
|
return await hf.text_to_speech(model_id, **kwargs)
|
|
|
elif category in [
|
|
|
ModelCategory.ADVANCED_STT,
|
|
|
ModelCategory.MULTILINGUAL_STT,
|
|
|
ModelCategory.SPEECH_ENHANCEMENT,
|
|
|
]:
|
|
|
return await hf.automatic_speech_recognition(model_id, **kwargs)
|
|
|
elif category in [
|
|
|
ModelCategory.AUDIO_GENERATION,
|
|
|
ModelCategory.REAL_TIME_TRANSLATION,
|
|
|
]:
|
|
|
return await hf.text_to_speech(model_id, **kwargs)
|
|
|
|
|
|
|
|
|
elif category in [
|
|
|
ModelCategory.TALKING_AVATAR,
|
|
|
ModelCategory.AVATAR_GENERATION,
|
|
|
ModelCategory.LIP_SYNC,
|
|
|
ModelCategory.FACIAL_ANIMATION,
|
|
|
ModelCategory.GESTURE_GENERATION,
|
|
|
ModelCategory.VIRTUAL_PRESENTER,
|
|
|
ModelCategory.AI_ANCHOR,
|
|
|
]:
|
|
|
return await hf.text_to_video(model_id, **kwargs)
|
|
|
|
|
|
|
|
|
elif category in [
|
|
|
ModelCategory.INTERACTIVE_CHAT,
|
|
|
ModelCategory.BILINGUAL_CONVERSATION,
|
|
|
ModelCategory.CULTURAL_ADAPTATION,
|
|
|
ModelCategory.CONTEXT_AWARE_CHAT,
|
|
|
ModelCategory.PERSONALITY_CHAT,
|
|
|
ModelCategory.ROLE_PLAY_CHAT,
|
|
|
ModelCategory.DOMAIN_SPECIFIC_CHAT,
|
|
|
]:
|
|
|
return await hf.conversational(model_id, **kwargs)
|
|
|
|
|
|
else:
|
|
|
raise ValueError(f"Unsupported model category: {category}")
|
|
|
|