#pipeline.transformer.set_attention_backend("_sage_qk_int8_pv_fp16_triton") # Enable Sage Attention #pipeline.transformer.set_attention_backend("flash") # Enable Flash-Attention-2 #pipeline.transformer.set_attention_backend("_flash_3") # Enable Flash-Attention-3
#pipeline.transformer.compile()
#pipeline.enable_model_cpu_offload()
images = pipeline( prompt=prompt, num_inference_steps=9, # This actually results in 8 DiT forwards guidance_scale=0.0, # Guidance should be 0 for the Turbo models height=height, width=width, generator=torch.Generator("cuda").manual_seed(seed) ).images[0]
images.save("zimage.png")
</details>
### Credits
- **Original Model**: [Z-Image Turbo by Tongyi-MAI](https://huggingface.co/Tongyi-MAI/Z-Image-Turbo)
- **Quantization Tools & Guide**: [llama.cpp](https://github.com/ggml-org/llama.cpp) & [city96](https://github.com/city96/ComfyUI-GGUF/blob/main/tools/README.md)
### License
This repository follows the same license as the [Z-Image Turbo](https://huggingface.co/Tongyi-MAI/Z-Image-Turbo).