# Copyright (c) 2024 Microsoft # Licensed under The MIT License [see LICENSE for details] # flake8: noqa from .minference_configuration import MInferenceConfig from .models_patch import MInference from .ops.block_sparse_flash_attention import block_sparse_attention from .ops.pit_sparse_flash_attention_v2 import vertical_slash_sparse_attention from .ops.streaming_kernel import streaming_forward from .patch import ( minference_patch, minference_patch_kv_cache_cpu, minference_patch_with_snapkv, patch_hf, ) from .version import VERSION as __version__ __all__ = [ "MInference", "MInferenceConfig", "minference_patch", "minference_patch_kv_cache_cpu", "minference_patch_with_snapkv", "patch_hf", "vertical_slash_sparse_attention", "block_sparse_attention", "streaming_forward", ]