Modular Diffusers Custom Blocks
Collection
Custom blocks for Modular Diffusers • 9 items • Updated
• 2
This is a custom block designed to extract depth maps from input images using the Depth Anything Model model. The model can be used as a processor to generate conditioning images for ControlNets.
import torch
from diffusers import ModularPipeline, ComponentsManager, ModularPipelineBlocks
from diffusers.utils import load_image
# Use ComponentsManager to enable auto CPU offloading for memory efficiency
manager = ComponentsManager()
manager.enable_auto_cpu_offload(device="cuda:0")
# Initialize pipeline
pipe = ModularPipeline.from_pretrained("Qwen/Qwen-Image", components_manager=manager)
# Insert a depth processing block
blocks = pipe.blocks.get_workflow("controlnet_text2image")
depth_block = ModularPipelineBlocks.from_pretrained(
"diffusers/depth-processor-custom-block",
trust_remote_code=True,
)
blocks.sub_blocks.insert("depth", depth_block, 0)
# Reinitialize the pipeline for ControlNet
pipe = blocks.init_pipeline("Qwen/Qwen-Image", components_manager=manager)
pipe.load_components(torch_dtype=torch.bfloat16)
# Load the ControlNet model
controlnet_spec = pipeline.get_component_spec("controlnet")
controlnet_spec.pretrained_model_name_or_path = "InstantX/Qwen-Image-ControlNet-Union"
controlnet = controlnet_spec.load(torch_dtype=torch.bfloat16)
pipe.update_components(controlnet=controlnet)
# Infer
prompt = "cat wizard with red hat, gandalf, lord of the rings, detailed, fantasy, cute, adorable, Pixar, Disney"
image = load_image("https://github.com/Trgtuan10/Image_storage/blob/main/cute_cat.png?raw=true")
output = pipe(
prompt=prompt,
image=image,
).images[0]
output