Instructions to use vidfom/Ltx-3 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- llama-cpp-python
How to use vidfom/Ltx-3 with llama-cpp-python:
# !pip install llama-cpp-python from llama_cpp import Llama llm = Llama.from_pretrained( repo_id="vidfom/Ltx-3", filename="ComfyUI/models/text_encoders/gemma-3-12b-it-qat-UD-Q4_K_XL.gguf", )
llm.create_chat_completion( messages = "No input example has been defined for this model task." )
- Notebooks
- Google Colab
- Kaggle
- Local Apps
- llama.cpp
How to use vidfom/Ltx-3 with llama.cpp:
Install from brew
brew install llama.cpp # Start a local OpenAI-compatible server with a web UI: llama-server -hf vidfom/Ltx-3:UD-Q4_K_XL # Run inference directly in the terminal: llama-cli -hf vidfom/Ltx-3:UD-Q4_K_XL
Install from WinGet (Windows)
winget install llama.cpp # Start a local OpenAI-compatible server with a web UI: llama-server -hf vidfom/Ltx-3:UD-Q4_K_XL # Run inference directly in the terminal: llama-cli -hf vidfom/Ltx-3:UD-Q4_K_XL
Use pre-built binary
# Download pre-built binary from: # https://github.com/ggerganov/llama.cpp/releases # Start a local OpenAI-compatible server with a web UI: ./llama-server -hf vidfom/Ltx-3:UD-Q4_K_XL # Run inference directly in the terminal: ./llama-cli -hf vidfom/Ltx-3:UD-Q4_K_XL
Build from source code
git clone https://github.com/ggerganov/llama.cpp.git cd llama.cpp cmake -B build cmake --build build -j --target llama-server llama-cli # Start a local OpenAI-compatible server with a web UI: ./build/bin/llama-server -hf vidfom/Ltx-3:UD-Q4_K_XL # Run inference directly in the terminal: ./build/bin/llama-cli -hf vidfom/Ltx-3:UD-Q4_K_XL
Use Docker
docker model run hf.co/vidfom/Ltx-3:UD-Q4_K_XL
- LM Studio
- Jan
- Ollama
How to use vidfom/Ltx-3 with Ollama:
ollama run hf.co/vidfom/Ltx-3:UD-Q4_K_XL
- Unsloth Studio new
How to use vidfom/Ltx-3 with Unsloth Studio:
Install Unsloth Studio (macOS, Linux, WSL)
curl -fsSL https://unsloth.ai/install.sh | sh # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for vidfom/Ltx-3 to start chatting
Install Unsloth Studio (Windows)
irm https://unsloth.ai/install.ps1 | iex # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for vidfom/Ltx-3 to start chatting
Using HuggingFace Spaces for Unsloth
# No setup required # Open https://huggingface.co/spaces/unsloth/studio in your browser # Search for vidfom/Ltx-3 to start chatting
- Docker Model Runner
How to use vidfom/Ltx-3 with Docker Model Runner:
docker model run hf.co/vidfom/Ltx-3:UD-Q4_K_XL
- Lemonade
How to use vidfom/Ltx-3 with Lemonade:
Pull the model
# Download Lemonade from https://lemonade-server.ai/ lemonade pull vidfom/Ltx-3:UD-Q4_K_XL
Run and chat with the model
lemonade run user.Ltx-3-UD-Q4_K_XL
List all available models
lemonade list
| import torch | |
| import logging | |
| try: | |
| import comfy_kitchen as ck | |
| from comfy_kitchen.tensor import ( | |
| QuantizedTensor, | |
| QuantizedLayout, | |
| TensorCoreFP8Layout as _CKFp8Layout, | |
| TensorCoreNVFP4Layout as _CKNvfp4Layout, | |
| register_layout_op, | |
| register_layout_class, | |
| get_layout_class, | |
| ) | |
| _CK_AVAILABLE = True | |
| if torch.version.cuda is None: | |
| ck.registry.disable("cuda") | |
| else: | |
| cuda_version = tuple(map(int, str(torch.version.cuda).split('.'))) | |
| if cuda_version < (13,): | |
| ck.registry.disable("cuda") | |
| logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.") | |
| ck.registry.disable("triton") | |
| for k, v in ck.list_backends().items(): | |
| logging.info(f"Found comfy_kitchen backend {k}: {v}") | |
| except ImportError as e: | |
| logging.error(f"Failed to import comfy_kitchen, Error: {e}, fp8 and fp4 support will not be available.") | |
| _CK_AVAILABLE = False | |
| class QuantizedTensor: | |
| pass | |
| class _CKFp8Layout: | |
| pass | |
| class _CKNvfp4Layout: | |
| pass | |
| def register_layout_class(name, cls): | |
| pass | |
| def get_layout_class(name): | |
| return None | |
| _CK_MXFP8_AVAILABLE = False | |
| if _CK_AVAILABLE: | |
| try: | |
| from comfy_kitchen.tensor import TensorCoreMXFP8Layout as _CKMxfp8Layout | |
| _CK_MXFP8_AVAILABLE = True | |
| except ImportError: | |
| logging.warning("comfy_kitchen does not support MXFP8, please update comfy_kitchen.") | |
| if not _CK_MXFP8_AVAILABLE: | |
| class _CKMxfp8Layout: | |
| pass | |
| import comfy.float | |
| # ============================================================================== | |
| # FP8 Layouts with Comfy-Specific Extensions | |
| # ============================================================================== | |
| class _TensorCoreFP8LayoutBase(_CKFp8Layout): | |
| FP8_DTYPE = None # Must be overridden in subclass | |
| def quantize(cls, tensor, scale=None, stochastic_rounding=0, inplace_ops=False): | |
| if cls.FP8_DTYPE is None: | |
| raise NotImplementedError(f"{cls.__name__} must define FP8_DTYPE") | |
| orig_dtype = tensor.dtype | |
| orig_shape = tuple(tensor.shape) | |
| if isinstance(scale, str) and scale == "recalculate": | |
| scale = torch.amax(tensor.abs()).to(dtype=torch.float32) / torch.finfo(cls.FP8_DTYPE).max | |
| if tensor.dtype not in [torch.float32, torch.bfloat16]: # Prevent scale from being too small | |
| tensor_info = torch.finfo(tensor.dtype) | |
| scale = (1.0 / torch.clamp((1.0 / scale), min=tensor_info.min, max=tensor_info.max)) | |
| if scale is None: | |
| scale = torch.ones((), device=tensor.device, dtype=torch.float32) | |
| if not isinstance(scale, torch.Tensor): | |
| scale = torch.tensor(scale, device=tensor.device, dtype=torch.float32) | |
| if stochastic_rounding > 0: | |
| if inplace_ops: | |
| tensor *= (1.0 / scale).to(tensor.dtype) | |
| else: | |
| tensor = tensor * (1.0 / scale).to(tensor.dtype) | |
| qdata = comfy.float.stochastic_rounding(tensor, dtype=cls.FP8_DTYPE, seed=stochastic_rounding) | |
| else: | |
| qdata = ck.quantize_per_tensor_fp8(tensor, scale, cls.FP8_DTYPE) | |
| params = cls.Params(scale=scale.float(), orig_dtype=orig_dtype, orig_shape=orig_shape) | |
| return qdata, params | |
| class TensorCoreMXFP8Layout(_CKMxfp8Layout): | |
| def quantize(cls, tensor, scale=None, stochastic_rounding=0, inplace_ops=False): | |
| if tensor.dim() != 2: | |
| raise ValueError(f"MXFP8 requires 2D tensor, got {tensor.dim()}D") | |
| orig_dtype = tensor.dtype | |
| orig_shape = tuple(tensor.shape) | |
| padded_shape = cls.get_padded_shape(orig_shape) | |
| needs_padding = padded_shape != orig_shape | |
| if stochastic_rounding > 0: | |
| qdata, block_scale = comfy.float.stochastic_round_quantize_mxfp8_by_block(tensor, pad_32x=needs_padding, seed=stochastic_rounding) | |
| else: | |
| qdata, block_scale = ck.quantize_mxfp8(tensor, pad_32x=needs_padding) | |
| params = cls.Params( | |
| scale=block_scale, | |
| orig_dtype=orig_dtype, | |
| orig_shape=orig_shape, | |
| ) | |
| return qdata, params | |
| class TensorCoreNVFP4Layout(_CKNvfp4Layout): | |
| def quantize(cls, tensor, scale=None, stochastic_rounding=0, inplace_ops=False): | |
| if tensor.dim() != 2: | |
| raise ValueError(f"NVFP4 requires 2D tensor, got {tensor.dim()}D") | |
| orig_dtype = tensor.dtype | |
| orig_shape = tuple(tensor.shape) | |
| if scale is None or (isinstance(scale, str) and scale == "recalculate"): | |
| scale = torch.amax(tensor.abs()) / (ck.float_utils.F8_E4M3_MAX * ck.float_utils.F4_E2M1_MAX) | |
| if not isinstance(scale, torch.Tensor): | |
| scale = torch.tensor(scale) | |
| scale = scale.to(device=tensor.device, dtype=torch.float32) | |
| padded_shape = cls.get_padded_shape(orig_shape) | |
| needs_padding = padded_shape != orig_shape | |
| if stochastic_rounding > 0: | |
| qdata, block_scale = comfy.float.stochastic_round_quantize_nvfp4_by_block(tensor, scale, pad_16x=needs_padding, seed=stochastic_rounding) | |
| else: | |
| qdata, block_scale = ck.quantize_nvfp4(tensor, scale, pad_16x=needs_padding) | |
| params = cls.Params( | |
| scale=scale, | |
| orig_dtype=orig_dtype, | |
| orig_shape=orig_shape, | |
| block_scale=block_scale, | |
| ) | |
| return qdata, params | |
| class TensorCoreFP8E4M3Layout(_TensorCoreFP8LayoutBase): | |
| FP8_DTYPE = torch.float8_e4m3fn | |
| class TensorCoreFP8E5M2Layout(_TensorCoreFP8LayoutBase): | |
| FP8_DTYPE = torch.float8_e5m2 | |
| # Backward compatibility alias - default to E4M3 | |
| TensorCoreFP8Layout = TensorCoreFP8E4M3Layout | |
| # ============================================================================== | |
| # Registry | |
| # ============================================================================== | |
| register_layout_class("TensorCoreFP8Layout", TensorCoreFP8Layout) | |
| register_layout_class("TensorCoreFP8E4M3Layout", TensorCoreFP8E4M3Layout) | |
| register_layout_class("TensorCoreFP8E5M2Layout", TensorCoreFP8E5M2Layout) | |
| register_layout_class("TensorCoreNVFP4Layout", TensorCoreNVFP4Layout) | |
| if _CK_MXFP8_AVAILABLE: | |
| register_layout_class("TensorCoreMXFP8Layout", TensorCoreMXFP8Layout) | |
| QUANT_ALGOS = { | |
| "float8_e4m3fn": { | |
| "storage_t": torch.float8_e4m3fn, | |
| "parameters": {"weight_scale", "input_scale"}, | |
| "comfy_tensor_layout": "TensorCoreFP8E4M3Layout", | |
| }, | |
| "float8_e5m2": { | |
| "storage_t": torch.float8_e5m2, | |
| "parameters": {"weight_scale", "input_scale"}, | |
| "comfy_tensor_layout": "TensorCoreFP8E5M2Layout", | |
| }, | |
| "nvfp4": { | |
| "storage_t": torch.uint8, | |
| "parameters": {"weight_scale", "weight_scale_2", "input_scale"}, | |
| "comfy_tensor_layout": "TensorCoreNVFP4Layout", | |
| "group_size": 16, | |
| }, | |
| } | |
| if _CK_MXFP8_AVAILABLE: | |
| QUANT_ALGOS["mxfp8"] = { | |
| "storage_t": torch.float8_e4m3fn, | |
| "parameters": {"weight_scale", "input_scale"}, | |
| "comfy_tensor_layout": "TensorCoreMXFP8Layout", | |
| "group_size": 32, | |
| } | |
| # ============================================================================== | |
| # Re-exports for backward compatibility | |
| # ============================================================================== | |
| __all__ = [ | |
| "QuantizedTensor", | |
| "QuantizedLayout", | |
| "TensorCoreFP8Layout", | |
| "TensorCoreFP8E4M3Layout", | |
| "TensorCoreFP8E5M2Layout", | |
| "TensorCoreNVFP4Layout", | |
| "QUANT_ALGOS", | |
| "register_layout_op", | |
| ] | |