from typing import Dict, List, Any from lmdeploy import pipeline from lmdeploy.vl import load_image from lmdeploy.messages import TurbomindEngineConfig class EndpointHandler(): def __init__(self, path): # Preload the model at initialization backend_config = TurbomindEngineConfig(model_name ="deepseek-ai/deepseek-coder-33b-instruct",model_format='hf',tp=1) self.pipe = pipeline(f"{path}", backend_config=backend_config, log_level='INFO') def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]: """ data args: inputs (:obj: `str`) kwargs Return: A :obj:`str`| `Dict`: will be serialized and returned """ query = data.get('query') if not query: return [{'error': 'No query provided'}] response = self.pipe([query]) return {'response': response.text}