Student0809
/

interactSpeech

Model card Files Files and versions

interactSpeech / swift /llm /infer /infer_engine /base.py

Student0809's picture

Add files using upload-large-folder tool

cb2428f verified 4 months ago

history blame contribute delete

2.6 kB

	# Copyright (c) Alibaba, Inc. and its affiliates.
	from abc import ABC, abstractmethod
	from typing import AsyncIterator, Iterator, List, Optional, Union

	from swift.llm import InferRequest
	from swift.plugin import Metric
	from ..protocol import ChatCompletionResponse, ChatCompletionStreamResponse, RequestConfig


	class BaseInferEngine(ABC):

	@abstractmethod
	def infer(self,
	infer_requests: List[InferRequest],
	request_config: Optional[RequestConfig] = None,
	metrics: Optional[List[Metric]] = None,
	*,
	use_tqdm: Optional[bool] = None,
	**kwargs) -> List[Union[ChatCompletionResponse, Iterator[ChatCompletionStreamResponse]]]:
	"""
	This method performs inference on a list of inference requests.

	The method takes a list of inference requests and processes them according to the provided configuration.
	It can optionally use tqdm for progress visualization and accept additional keyword arguments.

	Args:
	infer_requests (List[InferRequest]): A list of inference requests to be processed.
	request_config (Optional[RequestConfig]): Configuration for the request, if any.
	metrics (Optional[List[Metric]]): A list of usage information to return.
	use_tqdm (Optional[bool]): Whether to use tqdm for progress visualization.
	**kwargs: Additional keyword arguments.

	Returns:
	List[Union[ChatCompletionResponse, Iterator[ChatCompletionStreamResponse]]]:
	The result of the inference.
	"""
	pass

	@abstractmethod
	async def infer_async(self,
	infer_request: InferRequest,
	request_config: Optional[RequestConfig] = None,
	**kwargs) -> Union[ChatCompletionResponse, AsyncIterator[ChatCompletionStreamResponse]]:
	"""
	This method performs asynchronous inference on a single inference request.

	The method takes an inference request and processes it according to the provided configuration.
	It can accept additional keyword arguments.

	Args:
	infer_request (InferRequest): An inference request to be processed.
	request_config (Optional[RequestConfig]): Configuration for the request, if any.
	**kwargs: Additional keyword arguments.

	Returns:
	Union[ChatCompletionResponse, AsyncIterator[ChatCompletionStreamResponse]]: The result of
	the asynchronous inference.
	"""
	pass