Research_AI_Assistant / src /models_config.py
JatsTheAIGen's picture
feat: Add ZeroGPU Chat API integration
a58b1f9
# models_config.py
LLM_CONFIG = {
"primary_provider": "huggingface",
"models": {
"reasoning_primary": {
"model_id": "Qwen/Qwen2.5-7B-Instruct", # High-quality instruct model
"task": "general_reasoning",
"max_tokens": 10000,
"temperature": 0.7,
"cost_per_token": 0.000015,
"fallback": "gpt2", # Simple but guaranteed working model
"is_chat_model": True
},
"embedding_specialist": {
"model_id": "sentence-transformers/all-MiniLM-L6-v2",
"task": "embeddings",
"vector_dimensions": 384,
"purpose": "semantic_similarity",
"cost_advantage": "90%_cheaper_than_primary",
"is_chat_model": False
},
"classification_specialist": {
"model_id": "Qwen/Qwen2.5-7B-Instruct", # Use chat model for classification
"task": "intent_classification",
"max_length": 512,
"specialization": "fast_inference",
"latency_target": "<100ms",
"is_chat_model": True
},
"safety_checker": {
"model_id": "Qwen/Qwen2.5-7B-Instruct", # Use chat model for safety
"task": "content_moderation",
"confidence_threshold": 0.85,
"purpose": "bias_detection",
"is_chat_model": True
}
},
"routing_logic": {
"strategy": "task_based_routing",
"fallback_chain": ["primary", "fallback", "degraded_mode"],
"load_balancing": "round_robin_with_health_check"
},
"zero_gpu_task_mapping": {
"intent_classification": "classification",
"embedding_generation": "embedding",
"safety_check": "general",
"general_reasoning": "reasoning",
"response_synthesis": "general"
}
}