Spaces:

JatinAutonomousLabs
/

Research_AI_Assistant

Sleeping

Research_AI_Assistant / src /models_config.py

feat: Add ZeroGPU Chat API integration

a58b1f9 about 1 month ago

1.86 kB

	# models_config.py
	LLM_CONFIG = {
	"primary_provider": "huggingface",
	"models": {
	"reasoning_primary": {
	"model_id": "Qwen/Qwen2.5-7B-Instruct", # High-quality instruct model
	"task": "general_reasoning",
	"max_tokens": 10000,
	"temperature": 0.7,
	"cost_per_token": 0.000015,
	"fallback": "gpt2", # Simple but guaranteed working model
	"is_chat_model": True
	},
	"embedding_specialist": {
	"model_id": "sentence-transformers/all-MiniLM-L6-v2",
	"task": "embeddings",
	"vector_dimensions": 384,
	"purpose": "semantic_similarity",
	"cost_advantage": "90%_cheaper_than_primary",
	"is_chat_model": False
	},
	"classification_specialist": {
	"model_id": "Qwen/Qwen2.5-7B-Instruct", # Use chat model for classification
	"task": "intent_classification",
	"max_length": 512,
	"specialization": "fast_inference",
	"latency_target": "<100ms",
	"is_chat_model": True
	},
	"safety_checker": {
	"model_id": "Qwen/Qwen2.5-7B-Instruct", # Use chat model for safety
	"task": "content_moderation",
	"confidence_threshold": 0.85,
	"purpose": "bias_detection",
	"is_chat_model": True
	}
	},
	"routing_logic": {
	"strategy": "task_based_routing",
	"fallback_chain": ["primary", "fallback", "degraded_mode"],
	"load_balancing": "round_robin_with_health_check"
	},
	"zero_gpu_task_mapping": {
	"intent_classification": "classification",
	"embedding_generation": "embedding",
	"safety_check": "general",
	"general_reasoning": "reasoning",
	"response_synthesis": "general"
	}
	}