tritesh
/

dflash-mlx-universal

Text Generation

dflash-mlx-universal

speculative-decoding

inference-acceleration

block-diffusion

Model card Files Files and versions

tritesh commited on 3 days ago

Commit

5fdf20a

·

verified ·

1 Parent(s): e8cb6a7

Upload dflash_mlx/init.py

Files changed (1) hide show

dflash_mlx/__init__.py +43 -3

dflash_mlx/__init__.py CHANGED Viewed

@@ -2,16 +2,56 @@
 DFlash-MLX-Universal: Block Diffusion Speculative Decoding for MLX
 A universal MLX implementation of DFlash that works with any MLX-converted model.
-Optimized for Apple Silicon (M2/M3/M4 Pro/Max/Ultra).
 """
 from .speculative_decode import DFlashSpeculativeDecoder
 from .universal import UniversalDFlashDecoder
-from .convert import convert_dflash_to_mlx
-__version__ = "0.1.1"
 __all__ = [
     "DFlashSpeculativeDecoder",
     "UniversalDFlashDecoder",
     "convert_dflash_to_mlx",
 ]

 DFlash-MLX-Universal: Block Diffusion Speculative Decoding for MLX
 A universal MLX implementation of DFlash that works with any MLX-converted model.
+Optimized for Apple Silicon (M2/M3/M4 Pro/Max/Ultra) and compatible with all
+major model families: Qwen3, Qwen3.5, LLaMA, Mistral, Gemma.
+Key features:
+- Architecture-agnostic adapters for any MLX model family
+- KV cache management with proper trim/rewind on rejection
+- Streaming generation with real-time text output
+- OpenAI-compatible server (via serve.py)
+- Training custom drafters on-the-fly (via trainer.py)
+- Conversion of PyTorch DFlash drafters to MLX (via convert.py)
+- Benchmarking and diagnostics tools
 """
+from .adapters import (
+    MLXTargetAdapter,
+    Qwen3Adapter,
+    Qwen35Adapter,
+    LlamaAdapter,
+    MistralAdapter,
+    GemmaAdapter,
+    LoadedTargetModel,
+    load_target_model,
+    adapter_for_model_type,
+    detect_model_architecture,
+)
+from .model import DFlashDraftModel, DFlashDenoiser
 from .speculative_decode import DFlashSpeculativeDecoder
+from .convert import convert_dflash_to_mlx, load_mlx_dflash
 from .universal import UniversalDFlashDecoder
+__version__ = "0.2.0"
 __all__ = [
+    # Adapters
+    "MLXTargetAdapter",
+    "Qwen3Adapter",
+    "Qwen35Adapter",
+    "LlamaAdapter",
+    "MistralAdapter",
+    "GemmaAdapter",
+    "LoadedTargetModel",
+    "load_target_model",
+    "adapter_for_model_type",
+    "detect_model_architecture",
+    # Core model
+    "DFlashDraftModel",
+    "DFlashDenoiser",
+    # Decoding
     "DFlashSpeculativeDecoder",
     "UniversalDFlashDecoder",
+    # Conversion
     "convert_dflash_to_mlx",
+    "load_mlx_dflash",
 ]