Add Z_image support and Improve latent/channel handling

Adds functions to harmonize latent channel counts and condition token lengths to prevent mismatches, especially for models like FLUX/Z_image. Enhances error reporting with debug output and traceback printing. Updates mg_combinode to better validate VAE/CLIP presence for checkpoint and input selection. Fixes hybrid sigma schedule alignment in mg_zesmart_sampler_v1_1.

Files changed (4) hide show

mod/easy/mg_cade25_easy.py +146 -0
mod/hard/mg_cade25.py +1 -0
mod/hard/mg_zesmart_sampler_v1_1.py +9 -0
mod/mg_combinode.py +21 -2

mod/easy/mg_cade25_easy.py CHANGED Viewed

@@ -7,6 +7,7 @@ import torch
 import os
 import numpy as np
 import torch.nn.functional as F
 import nodes
 import comfy.model_management as model_management
@@ -1115,6 +1116,133 @@ def safe_decode(vae, lat, tile=512, ovlp=128, to_fp32: bool = False):
         return out
 def safe_encode(vae, img, tile=512, ovlp=64):
     import math, torch.nn.functional as F
     h, w = img.shape[1:3]
@@ -2309,6 +2437,13 @@ class ComfyAdaptiveDetailEnhancer25:
         except Exception:
             pass
         image = safe_decode(vae, latent, to_fp32=bool(vae_decode_fp32))
         # allow user cancel right after initial decode
         model_management.throw_exception_if_processing_interrupted()
@@ -2830,6 +2965,7 @@ class ComfyAdaptiveDetailEnhancer25:
                             )
                             # Prepare latent + noise like in MG_ZeSmartSampler
                             lat_img = current_latent["samples"]
                             lat_img = _sample.fix_empty_latent_channels(sampler_model, lat_img)
                             batch_inds = current_latent.get("batch_index", None)
                             noise = _sample.prepare_noise(lat_img, int(iter_seed), batch_inds)
@@ -2848,6 +2984,16 @@ class ComfyAdaptiveDetailEnhancer25:
                             current_latent = {**current_latent}
                             current_latent["samples"] = samples
                         except Exception as e:
                             # Before any fallback, propagate user cancel if set
                             try:
                                 model_management.throw_exception_if_processing_interrupted()

 import os
 import numpy as np
 import torch.nn.functional as F
+import traceback
 import nodes
 import comfy.model_management as model_management
         return out
+def _match_latent_channels(vae, latent: dict, model=None):
+    """Align latent channel count to model/VAE expectations (e.g., FLUX/Z_image 16ch) with variance preservation."""
+    if not isinstance(latent, dict) or ("samples" not in latent):
+        return latent
+    z = latent.get("samples", None)
+    if z is None:
+        return latent
+    try:
+        target_c = None
+        # Prefer model latent_format if available (more reliable than VAE decoder)
+        if model is not None:
+            try:
+                lf = model.get_model_object("latent_format")
+                target_c = int(getattr(lf, "latent_channels", None) or 0) or None
+            except Exception:
+                target_c = None
+        fs = getattr(vae, "first_stage_model", None)
+        dec = getattr(fs, "decoder", None)
+        if dec is not None and hasattr(dec, "conv_in"):
+            target_c = target_c or int(dec.conv_in.in_channels)
+        if target_c is None and hasattr(fs, "latent_channels"):
+            target_c = int(getattr(fs, "latent_channels"))
+        if target_c is None and hasattr(vae, "latent_channels"):
+            target_c = int(getattr(vae, "latent_channels"))
+        if target_c is None:
+            return latent
+        cur_c = int(z.shape[1])
+        if cur_c == target_c:
+            return latent
+        # Repeat channels when divisible (common case: 4 -> 16)
+        if target_c % cur_c == 0 and cur_c > 0:
+            rep = target_c // cur_c
+            reps = [1, rep] + [1] * (z.ndim - 2)
+            z_fixed = z.repeat(*reps)
+            # Preserve variance after channel replication
+            z_fixed = z_fixed / (rep ** 0.5)
+        else:
+            # Fallback: pad zeros or slice to match
+            if target_c > cur_c:
+                pad = target_c - cur_c
+                pad_tensor = torch.zeros_like(z[:, :1, ...]).repeat(1, pad, *([1] * (z.ndim - 2)))
+                z_fixed = torch.cat([z, pad_tensor], dim=1)
+            else:
+                z_fixed = z[:, :target_c, ...]
+        latent = {**latent, "samples": z_fixed}
+    except Exception:
+        pass
+    return latent
+def _harmonize_cond_tokens(cond_list):
+    """Pad/truncate cond tokens + masks to a common length to avoid mismatches (e.g., 499 vs 528 or 981 vs 1286)."""
+    if not isinstance(cond_list, list):
+        return cond_list
+    # pass 1: find max token length across cross_attn
+    max_len = 0
+    for c in cond_list:
+        if isinstance(c, dict):
+            ca = c.get("cross_attn", None)
+            if ca is not None:
+                try:
+                    max_len = max(max_len, int(ca.shape[1]))
+                except Exception:
+                    pass
+    if max_len <= 0:
+        return cond_list
+    fixed = []
+    for c in cond_list:
+        if not isinstance(c, dict):
+            fixed.append(c)
+            continue
+        d = c.copy()
+        ca = d.get("cross_attn", None)
+        am = d.get("attention_mask", None)
+        # Harmonize cross_attn length
+        if ca is not None:
+            try:
+                ca_len = int(ca.shape[1])
+                if ca_len < max_len:
+                    pad_shape = list(ca.shape)
+                    pad_shape[1] = max_len - ca_len
+                    ca_pad = torch.zeros(pad_shape, device=ca.device, dtype=ca.dtype)
+                    ca = torch.cat([ca, ca_pad], dim=1)
+                elif ca_len > max_len:
+                    ca = ca[:, :max_len, ...]
+                d["cross_attn"] = ca
+            except Exception:
+                pass
+        # Harmonize mask length to cross_attn length
+        if ca is not None:
+            ca_len = int(ca.shape[1])
+            if am is None:
+                am = torch.ones((ca.shape[0], ca_len), device=ca.device, dtype=ca.dtype)
+            try:
+                am_len = int(am.shape[-1] if am.dim() == 2 else am.shape[1])
+                if am_len < ca_len:
+                    pad = ca_len - am_len
+                    pad_shape = list(am.shape)
+                    pad_shape[-1] = pad
+                    pad_tensor = torch.zeros(pad_shape, device=am.device, dtype=am.dtype)
+                    am = torch.cat([am, pad_tensor], dim=-1)
+                elif am_len > ca_len:
+                    am = am[..., :ca_len]
+                d["attention_mask"] = am
+                try:
+                    d["num_tokens"] = int(torch.count_nonzero(am, dim=-1).max().item())
+                except Exception:
+                    d["num_tokens"] = ca_len
+            except Exception:
+                pass
+        fixed.append(d)
+    return fixed
+def _summarize_conds(label, conds):
+    out = []
+    if isinstance(conds, list):
+        for idx, c in enumerate(conds):
+            try:
+                ca = c.get("cross_attn", None) if isinstance(c, dict) else None
+                am = c.get("attention_mask", None) if isinstance(c, dict) else None
+                out.append(f"{label}[{idx}]: ca={None if ca is None else list(ca.shape)}, am={None if am is None else list(am.shape)}")
+            except Exception:
+                pass
+    return "; ".join(out)
 def safe_encode(vae, img, tile=512, ovlp=64):
     import math, torch.nn.functional as F
     h, w = img.shape[1:3]
         except Exception:
             pass
+        # Align latent channels to VAE/model (e.g., Z_image/FLUX use 16ch latents)
+        latent = _match_latent_channels(vae, latent, model)
+        # Harmonize cond token lengths to prevent rare MGHybrid size mismatches
+        positive = _harmonize_cond_tokens(positive)
+        negative = _harmonize_cond_tokens(negative)
         image = safe_decode(vae, latent, to_fp32=bool(vae_decode_fp32))
         # allow user cancel right after initial decode
         model_management.throw_exception_if_processing_interrupted()
                             )
                             # Prepare latent + noise like in MG_ZeSmartSampler
                             lat_img = current_latent["samples"]
+                            lat_img = _match_latent_channels(vae, {"samples": lat_img}, sampler_model)["samples"]
                             lat_img = _sample.fix_empty_latent_channels(sampler_model, lat_img)
                             batch_inds = current_latent.get("batch_index", None)
                             noise = _sample.prepare_noise(lat_img, int(iter_seed), batch_inds)
                             current_latent = {**current_latent}
                             current_latent["samples"] = samples
                         except Exception as e:
+                            try:
+                                print(f"[CADE2.5][MGHybrid][debug] sigmas={list(sigmas.shape)} lat={list(current_latent['samples'].shape)}")
+                                print(_summarize_conds("pos", positive))
+                                print(_summarize_conds("neg", negative))
+                            except Exception:
+                                pass
+                            try:
+                                traceback.print_exc()
+                            except Exception:
+                                pass
                             # Before any fallback, propagate user cancel if set
                             try:
                                 model_management.throw_exception_if_processing_interrupted()

mod/hard/mg_cade25.py CHANGED Viewed

@@ -11,6 +11,7 @@ import torch
 import os
 import numpy as np
 import torch.nn.functional as F
 import nodes
 import comfy.model_management as model_management

 import os
 import numpy as np
 import torch.nn.functional as F
+import traceback
 import nodes
 import comfy.model_management as model_management

mod/hard/mg_zesmart_sampler_v1_1.py CHANGED Viewed

@@ -33,7 +33,15 @@ def _build_hybrid_sigmas(model, steps: int, base_sampler: str, mode: str,
     sig_k = _samplers.calculate_sigmas(ms, "karras", steps)
     sig_b = _samplers.calculate_sigmas(ms, "beta",   steps)
     mode = str(mode).lower()
     if mode == "karras":
         sig = sig_k
     elif mode == "beta":
@@ -54,6 +62,7 @@ def _build_hybrid_sigmas(model, steps: int, base_sampler: str, mode: str,
         new_steps = max(1, int(steps / max(1e-6, float(denoise))))
         sk = _samplers.calculate_sigmas(ms, "karras", new_steps)
         sb = _samplers.calculate_sigmas(ms, "beta",   new_steps)
         if mode == "karras":
             sig_full = sk
         elif mode == "beta":

     sig_k = _samplers.calculate_sigmas(ms, "karras", steps)
     sig_b = _samplers.calculate_sigmas(ms, "beta",   steps)
+    def _align_len(a: torch.Tensor, b: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+        """Align two sigma schedules to the same length (use tail of longer)."""
+        if a.shape[0] == b.shape[0]:
+            return a, b
+        m = min(a.shape[0], b.shape[0])
+        return a[-m:], b[-m:]
     mode = str(mode).lower()
+    sig_k, sig_b = _align_len(sig_k, sig_b)
     if mode == "karras":
         sig = sig_k
     elif mode == "beta":
         new_steps = max(1, int(steps / max(1e-6, float(denoise))))
         sk = _samplers.calculate_sigmas(ms, "karras", new_steps)
         sb = _samplers.calculate_sigmas(ms, "beta",   new_steps)
+        sk, sb = _align_len(sk, sb)
         if mode == "karras":
             sig_full = sk
         elif mode == "beta":

mod/mg_combinode.py CHANGED Viewed

@@ -275,13 +275,30 @@ class MagicNodesCombiNode:
         pos_text_expanded = _norm_prompt(_expand_dynamic(positive_prompt, int(dyn_seed), bool(dynamic_break_freeze)) if bool(dynamic_pos) else positive_prompt)
         neg_text_expanded = _norm_prompt(_expand_dynamic(negative_prompt, int(dyn_seed), bool(dynamic_break_freeze)) if bool(dynamic_neg) else negative_prompt)
         if use_checkpoint and checkpoint:
             checkpoint_path = folder_paths.get_full_path_or_raise("checkpoints", checkpoint)
             _unload_old_checkpoint(checkpoint_path)
             base_model, base_clip, vae = _load_checkpoint(checkpoint_path)
             model = base_model.clone()
-            clip = base_clip.clone()
-            clip_clean = base_clip.clone()  # keep pristine CLIP for standard pipeline path
         elif model_in and clip_in:
             _unload_old_checkpoint(None)
@@ -289,6 +306,8 @@ class MagicNodesCombiNode:
             clip = clip_in.clone()
             clip_clean = clip_in.clone()
             vae = vae_in
         else:
             raise Exception("No model selected!")

         pos_text_expanded = _norm_prompt(_expand_dynamic(positive_prompt, int(dyn_seed), bool(dynamic_break_freeze)) if bool(dynamic_pos) else positive_prompt)
         neg_text_expanded = _norm_prompt(_expand_dynamic(negative_prompt, int(dyn_seed), bool(dynamic_break_freeze)) if bool(dynamic_neg) else negative_prompt)
+        def _valid_vae(v):
+            try:
+                return (v is not None) and (getattr(v, "first_stage_model", None) is not None)
+            except Exception:
+                return False
         if use_checkpoint and checkpoint:
             checkpoint_path = folder_paths.get_full_path_or_raise("checkpoints", checkpoint)
             _unload_old_checkpoint(checkpoint_path)
             base_model, base_clip, vae = _load_checkpoint(checkpoint_path)
             model = base_model.clone()
+            # Some flow/DiT style checkpoints (e.g., Z_image) ship without CLIP/VAE.
+            clip_source = base_clip or clip_in
+            if clip_source is None:
+                raise Exception("Checkpoint has no CLIP. Connect a CLIP input node or use a checkpoint that bundles CLIP.")
+            clip = clip_source.clone()
+            clip_clean = clip_source.clone()  # keep pristine CLIP for standard pipeline path
+            # Prefer external VAE when provided; some FLOW/DiT checkpoints return an invalid stub VAE.
+            for candidate in (vae_in, vae):
+                if _valid_vae(candidate):
+                    vae = candidate
+                    break
+            else:
+                raise Exception("Checkpoint has no valid VAE. Connect a VAE input node or use a checkpoint that bundles VAE.")
         elif model_in and clip_in:
             _unload_old_checkpoint(None)
             clip = clip_in.clone()
             clip_clean = clip_in.clone()
             vae = vae_in
+            if not _valid_vae(vae):
+                raise Exception("VAE input is missing or invalid. Please connect a proper VAE node.")
         else:
             raise Exception("No model selected!")