Spaces:

AbstractPhil
/

sd15-flow-matching-lune

Sleeping

App Files Files Community

AbstractPhil commited on Nov 7

Commit

1893c89

verified ·

1 Parent(s): ad329bd

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -11

app.py CHANGED Viewed

@@ -185,8 +185,13 @@ class FlowMatchingPipeline:
                     noise_pred, t, latents, return_dict=False
                 )[0]
-        # Decode latents
-        latents = (latents / self.vae_scale_factor) * 5.52
         with torch.no_grad():
             image = self.vae.decode(latents).sample
@@ -248,6 +253,8 @@ def initialize_pipeline(model_choice: str, device: str = "cuda"):
     print(f"🚀 Initializing {model_choice} pipeline...")
     # Load base components
     print("Loading VAE...")
     vae = AutoencoderKL.from_pretrained(
@@ -267,7 +274,7 @@ def initialize_pipeline(model_choice: str, device: str = "cuda"):
     )
     # Load UNet based on model choice
-    if model_choice == "Flow-Lune (Latest)":
         # Load latest checkpoint from repo
         repo_id = "AbstractPhil/sd15-flow-lune"
         # Find latest checkpoint - for now use a known one
@@ -293,7 +300,7 @@ def initialize_pipeline(model_choice: str, device: str = "cuda"):
     print("✅ Pipeline initialized!")
-    return FlowMatchingPipeline(
         vae=vae,
         text_encoder=text_encoder,
         tokenizer=tokenizer,
@@ -301,6 +308,11 @@ def initialize_pipeline(model_choice: str, device: str = "cuda"):
         scheduler=scheduler,
         device=device
     )
 # ============================================================================
@@ -417,16 +429,17 @@ def create_demo():
         with gr.Row():
             with gr.Column(scale=1):
-                # Prompt
                 prompt = gr.TextArea(
                     label="Prompt",
-                    placeholder="A beautiful landscape with mountains and a lake at sunset...",
                     lines=3
                 )
                 negative_prompt = gr.TextArea(
                     label="Negative Prompt",
                     placeholder="blurry, low quality, distorted...",
                     lines=2
                 )
@@ -460,7 +473,7 @@ def create_demo():
                     prediction_type = gr.Radio(
                         label="Prediction Type",
                         choices=["epsilon", "v_prediction"],
-                        value="epsilon",
                         info="Type of model prediction"
                     )
@@ -531,7 +544,9 @@ def create_demo():
                 - **Flow matching** works best with 15-25 steps (vs 50+ for standard diffusion)
                 - **Shift** controls the flow trajectory (2.0-2.5 recommended for Lune)
                 - Lower shift = more direct path, higher shift = more exploration
-                - Try **v_prediction** mode if epsilon gives unstable results
                 ### Model Info:
                 - **Flow-Lune**: Trained with flow matching on 500k SD1.5 distillation pairs
@@ -553,7 +568,7 @@ def create_demo():
                     512,
                     2.5,
                     True,
-                    "epsilon",
                     42,
                     False
                 ],
@@ -567,7 +582,7 @@ def create_demo():
                     512,
                     2.5,
                     True,
-                    "epsilon",
                     123,
                     False
                 ],
@@ -581,7 +596,7 @@ def create_demo():
                     512,
                     2.0,
                     True,
-                    "epsilon",
                     456,
                     False
                 ]
@@ -597,6 +612,29 @@ def create_demo():
         )
         # Event handlers
         generate_btn.click(
             fn=generate_image,
             inputs=[

                     noise_pred, t, latents, return_dict=False
                 )[0]
+        # Decode latents with model-specific scaling
+        latents = latents / self.vae_scale_factor
+        # Lune-specific scaling: multiply by 5.52 for Lune's latent space offset
+        # This must be applied ONLY for Lune model, not SD1.5 Base
+        if hasattr(self, 'is_lune_model') and self.is_lune_model:
+            latents = latents * 5.52
         with torch.no_grad():
             image = self.vae.decode(latents).sample
     print(f"🚀 Initializing {model_choice} pipeline...")
+    is_lune = "Lune" in model_choice
     # Load base components
     print("Loading VAE...")
     vae = AutoencoderKL.from_pretrained(
     )
     # Load UNet based on model choice
+    if is_lune:
         # Load latest checkpoint from repo
         repo_id = "AbstractPhil/sd15-flow-lune"
         # Find latest checkpoint - for now use a known one
     print("✅ Pipeline initialized!")
+    pipeline = FlowMatchingPipeline(
         vae=vae,
         text_encoder=text_encoder,
         tokenizer=tokenizer,
         scheduler=scheduler,
         device=device
     )
+    # Set flag for Lune-specific VAE scaling
+    pipeline.is_lune_model = is_lune
+    return pipeline
 # ============================================================================
         with gr.Row():
             with gr.Column(scale=1):
+                # Prompt - default to first example
                 prompt = gr.TextArea(
                     label="Prompt",
+                    value="A serene mountain landscape at golden hour, crystal clear lake reflecting snow-capped peaks, photorealistic, 8k",
                     lines=3
                 )
                 negative_prompt = gr.TextArea(
                     label="Negative Prompt",
                     placeholder="blurry, low quality, distorted...",
+                    value="blurry, low quality",
                     lines=2
                 )
                     prediction_type = gr.Radio(
                         label="Prediction Type",
                         choices=["epsilon", "v_prediction"],
+                        value="v_prediction",  # Default to v_prediction for Lune
                         info="Type of model prediction"
                     )
                 - **Flow matching** works best with 15-25 steps (vs 50+ for standard diffusion)
                 - **Shift** controls the flow trajectory (2.0-2.5 recommended for Lune)
                 - Lower shift = more direct path, higher shift = more exploration
+                - **Lune** uses v_prediction by default for optimal results
+                - **SD1.5 Base** uses epsilon (standard diffusion)
+                - Lune operates in a scaled latent space (5.52x) for geometric efficiency
                 ### Model Info:
                 - **Flow-Lune**: Trained with flow matching on 500k SD1.5 distillation pairs
                     512,
                     2.5,
                     True,
+                    "v_prediction",
                     42,
                     False
                 ],
                     512,
                     2.5,
                     True,
+                    "v_prediction",
                     123,
                     False
                 ],
                     512,
                     2.0,
                     True,
+                    "v_prediction",
                     456,
                     False
                 ]
         )
         # Event handlers
+        # Update settings when model changes
+        def on_model_change(model_name):
+            """Update default settings based on model selection."""
+            if model_name == "SD1.5 Base":
+                # SD1.5: disable flow matching, use epsilon
+                return {
+                    use_flow_matching: gr.update(value=False),
+                    prediction_type: gr.update(value="epsilon")
+                }
+            else:
+                # Lune: enable flow matching, use v_prediction
+                return {
+                    use_flow_matching: gr.update(value=True),
+                    prediction_type: gr.update(value="v_prediction")
+                }
+        model_choice.change(
+            fn=on_model_change,
+            inputs=[model_choice],
+            outputs=[use_flow_matching, prediction_type]
+        )
         generate_btn.click(
             fn=generate_image,
             inputs=[