Merge pull request #16035 from v0xie/cfgpp

Add new sampler DDIM CFG++

Merge pull request #16035 from v0xie/cfgpp
Add new sampler DDIM CFG++
eb112c6f · AUTOMATIC1111 · GitHub · ace00a1f · 663a4d80 · eb112c6f
Commit eb112c6f authored Jul 06, 2024 by AUTOMATIC1111 Committed by GitHub Jul 06, 2024
3 changed files
--- a/modules/sd_samplers_cfg_denoiser.py
+++ b/modules/sd_samplers_cfg_denoiser.py
@@ -58,6 +58,8 @@ class CFGDenoiser(torch.nn.Module):
        self.model_wrap = None
        self.p = None
+        self.last_noise_uncond = None
        # NOTE: masking before denoising can cause the original latents to be oversmoothed
        # as the original latents do not have noise
        self.mask_before_denoising = False
@@ -160,6 +162,8 @@ class CFGDenoiser(torch.nn.Module):
        # so is_edit_model is set to False to support AND composition.
        is_edit_model = shared.sd_model.cond_stage_key == "edit" and self.image_cfg_scale is not None and self.image_cfg_scale != 1.0
+        is_cfg_pp = 'CFG++' in self.sampler.config.name
        conds_list, tensor = prompt_parser.reconstruct_multicond_batch(cond, self.step)
        uncond = prompt_parser.reconstruct_cond_batch(uncond, self.step)
@@ -273,10 +277,16 @@ class CFGDenoiser(torch.nn.Module):
        denoised_params = CFGDenoisedParams(x_out, state.sampling_step, state.sampling_steps, self.inner_model)
        cfg_denoised_callback(denoised_params)
+        if is_cfg_pp:
+            self.last_noise_uncond = x_out[-uncond.shape[0]:]
+            self.last_noise_uncond = torch.clone(self.last_noise_uncond)
        if is_edit_model:
            denoised = self.combine_denoised_for_edit_model(x_out, cond_scale)
        elif skip_uncond:
            denoised = self.combine_denoised(x_out, conds_list, uncond, 1.0)
+        elif is_cfg_pp:
+            denoised = self.combine_denoised(x_out, conds_list, uncond, cond_scale/12.5) # CFG++ scale of (0, 1) maps to (1.0, 12.5)
        else:
            denoised = self.combine_denoised(x_out, conds_list, uncond, cond_scale)

--- a/modules/sd_samplers_timesteps.py
+++ b/modules/sd_samplers_timesteps.py
@@ -10,6 +10,7 @@ import modules.shared as shared
 samplers_timesteps = [
    ('DDIM', sd_samplers_timesteps_impl.ddim, ['ddim'], {}),
+    ('DDIM CFG++', sd_samplers_timesteps_impl.ddim_cfgpp, ['ddim_cfgpp'], {}),
    ('PLMS', sd_samplers_timesteps_impl.plms, ['plms'], {}),
    ('UniPC', sd_samplers_timesteps_impl.unipc, ['unipc'], {}),
 ]

--- a/modules/sd_samplers_timesteps_impl.py
+++ b/modules/sd_samplers_timesteps_impl.py
@@ -40,6 +40,43 @@ def ddim(model, x, timesteps, extra_args=None, callback=None, disable=None, eta=
    return x
+@torch.no_grad()
+def ddim_cfgpp(model, x, timesteps, extra_args=None, callback=None, disable=None, eta=0.0):
+    """ Implements CFG++: Manifold-constrained Classifier Free Guidance For Diffusion Models (2024).
+    Uses the unconditional noise prediction instead of the conditional noise to guide the denoising direction.
+    The CFG scale is divided by 12.5 to map CFG from [0.0, 12.5] to [0, 1.0].
+    """
+    alphas_cumprod = model.inner_model.inner_model.alphas_cumprod
+    alphas = alphas_cumprod[timesteps]
+    alphas_prev = alphas_cumprod[torch.nn.functional.pad(timesteps[:-1], pad=(1, 0))].to(float64(x))
+    sqrt_one_minus_alphas = torch.sqrt(1 - alphas)
+    sigmas = eta * np.sqrt((1 - alphas_prev.cpu().numpy()) / (1 - alphas.cpu()) * (1 - alphas.cpu() / alphas_prev.cpu().numpy()))
+    extra_args = {} if extra_args is None else extra_args
+    s_in = x.new_ones((x.shape[0]))
+    s_x = x.new_ones((x.shape[0], 1, 1, 1))
+    for i in tqdm.trange(len(timesteps) - 1, disable=disable):
+        index = len(timesteps) - 1 - i
+        e_t = model(x, timesteps[index].item() * s_in, **extra_args)
+        last_noise_uncond = model.last_noise_uncond
+        a_t = alphas[index].item() * s_x
+        a_prev = alphas_prev[index].item() * s_x
+        sigma_t = sigmas[index].item() * s_x
+        sqrt_one_minus_at = sqrt_one_minus_alphas[index].item() * s_x
+        pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt()
+        dir_xt = (1. - a_prev - sigma_t ** 2).sqrt() * last_noise_uncond
+        noise = sigma_t * k_diffusion.sampling.torch.randn_like(x)
+        x = a_prev.sqrt() * pred_x0 + dir_xt + noise
+        if callback is not None:
+            callback({'x': x, 'i': i, 'sigma': 0, 'sigma_hat': 0, 'denoised': pred_x0})
+    return x
 @torch.no_grad()
 def plms(model, x, timesteps, extra_args=None, callback=None, disable=None):
    alphas_cumprod = model.inner_model.inner_model.alphas_cumprod