Student0809's picture
Add files using upload-large-folder tool
cb2428f verified
# Copyright (c) Alibaba, Inc. and its affiliates.
from abc import ABC, abstractmethod
from typing import AsyncIterator, Iterator, List, Optional, Union
from swift.llm import InferRequest
from swift.plugin import Metric
from ..protocol import ChatCompletionResponse, ChatCompletionStreamResponse, RequestConfig
class BaseInferEngine(ABC):
@abstractmethod
def infer(self,
infer_requests: List[InferRequest],
request_config: Optional[RequestConfig] = None,
metrics: Optional[List[Metric]] = None,
*,
use_tqdm: Optional[bool] = None,
**kwargs) -> List[Union[ChatCompletionResponse, Iterator[ChatCompletionStreamResponse]]]:
"""
This method performs inference on a list of inference requests.
The method takes a list of inference requests and processes them according to the provided configuration.
It can optionally use tqdm for progress visualization and accept additional keyword arguments.
Args:
infer_requests (List[InferRequest]): A list of inference requests to be processed.
request_config (Optional[RequestConfig]): Configuration for the request, if any.
metrics (Optional[List[Metric]]): A list of usage information to return.
use_tqdm (Optional[bool]): Whether to use tqdm for progress visualization.
**kwargs: Additional keyword arguments.
Returns:
List[Union[ChatCompletionResponse, Iterator[ChatCompletionStreamResponse]]]:
The result of the inference.
"""
pass
@abstractmethod
async def infer_async(self,
infer_request: InferRequest,
request_config: Optional[RequestConfig] = None,
**kwargs) -> Union[ChatCompletionResponse, AsyncIterator[ChatCompletionStreamResponse]]:
"""
This method performs asynchronous inference on a single inference request.
The method takes an inference request and processes it according to the provided configuration.
It can accept additional keyword arguments.
Args:
infer_request (InferRequest): An inference request to be processed.
request_config (Optional[RequestConfig]): Configuration for the request, if any.
**kwargs: Additional keyword arguments.
Returns:
Union[ChatCompletionResponse, AsyncIterator[ChatCompletionStreamResponse]]: The result of
the asynchronous inference.
"""
pass