Update app.py
Browse files
app.py
CHANGED
|
@@ -444,13 +444,13 @@ def load_lyra_vae(repo_id: str = "AbstractPhil/vae-lyra", device: str = "cuda"):
|
|
| 444 |
return None
|
| 445 |
|
| 446 |
|
| 447 |
-
def initialize_pipeline(model_choice: str, device: str = "cuda"):
|
| 448 |
"""Initialize the complete pipeline."""
|
| 449 |
|
| 450 |
print(f"š Initializing {model_choice} pipeline...")
|
|
|
|
| 451 |
|
| 452 |
is_lune = "Lune" in model_choice
|
| 453 |
-
is_lyra = "Lyra" in model_choice
|
| 454 |
|
| 455 |
# Load base components
|
| 456 |
print("Loading VAE...")
|
|
@@ -460,46 +460,40 @@ def initialize_pipeline(model_choice: str, device: str = "cuda"):
|
|
| 460 |
torch_dtype=torch.float32
|
| 461 |
).to(device)
|
| 462 |
|
| 463 |
-
print("Loading CLIP text encoder...")
|
| 464 |
text_encoder = CLIPTextModel.from_pretrained(
|
| 465 |
-
|
| 466 |
torch_dtype=torch.float32
|
| 467 |
).to(device)
|
| 468 |
|
| 469 |
tokenizer = CLIPTokenizer.from_pretrained(
|
| 470 |
-
|
| 471 |
)
|
| 472 |
|
| 473 |
-
#
|
| 474 |
-
|
| 475 |
-
t5_tokenizer =
|
| 476 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 477 |
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
"t5-base",
|
| 483 |
-
torch_dtype=torch.float32
|
| 484 |
-
).to(device)
|
| 485 |
-
t5_encoder.eval()
|
| 486 |
-
print("ā T5 loaded")
|
| 487 |
-
|
| 488 |
-
print("Loading Lyra VAE...")
|
| 489 |
-
lyra_model = load_lyra_vae(device=device)
|
| 490 |
-
if lyra_model is None:
|
| 491 |
-
raise ValueError("Failed to load Lyra VAE")
|
| 492 |
|
| 493 |
# Load UNet based on model choice
|
| 494 |
if is_lune:
|
| 495 |
# Load latest checkpoint from repo
|
| 496 |
repo_id = "AbstractPhil/sd15-flow-lune"
|
| 497 |
-
# Find latest checkpoint - for now use a known one
|
| 498 |
filename = "sd15_flow_lune_e34_s34000.pt"
|
| 499 |
unet = load_lune_checkpoint(repo_id, filename, device)
|
| 500 |
|
| 501 |
-
elif
|
| 502 |
-
# Use standard SD1.5 UNet
|
| 503 |
print("Loading SD1.5 base UNet...")
|
| 504 |
unet = UNet2DConditionModel.from_pretrained(
|
| 505 |
"runwayml/stable-diffusion-v1-5",
|
|
@@ -543,15 +537,17 @@ def initialize_pipeline(model_choice: str, device: str = "cuda"):
|
|
| 543 |
# Initialize with None, will load on first inference
|
| 544 |
CURRENT_PIPELINE = None
|
| 545 |
CURRENT_MODEL = None
|
|
|
|
| 546 |
|
| 547 |
|
| 548 |
-
def get_pipeline(model_choice: str):
|
| 549 |
-
"""Get or create pipeline for selected model."""
|
| 550 |
-
global CURRENT_PIPELINE, CURRENT_MODEL
|
| 551 |
|
| 552 |
-
if CURRENT_PIPELINE is None or CURRENT_MODEL != model_choice:
|
| 553 |
-
CURRENT_PIPELINE = initialize_pipeline(model_choice, device="cuda")
|
| 554 |
CURRENT_MODEL = model_choice
|
|
|
|
| 555 |
|
| 556 |
return CURRENT_PIPELINE
|
| 557 |
|
|
@@ -560,7 +556,7 @@ def get_pipeline(model_choice: str):
|
|
| 560 |
# INFERENCE
|
| 561 |
# ============================================================================
|
| 562 |
|
| 563 |
-
def estimate_duration(num_steps: int, width: int, height: int) -> int:
|
| 564 |
"""Estimate GPU duration based on generation parameters."""
|
| 565 |
# Base time per step (seconds)
|
| 566 |
base_time_per_step = 0.3
|
|
@@ -568,18 +564,24 @@ def estimate_duration(num_steps: int, width: int, height: int) -> int:
|
|
| 568 |
# Resolution scaling
|
| 569 |
resolution_factor = (width * height) / (512 * 512)
|
| 570 |
|
| 571 |
-
# Total estimate
|
| 572 |
estimated = num_steps * base_time_per_step * resolution_factor
|
| 573 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 574 |
# Add 15 seconds for model loading overhead
|
| 575 |
return int(estimated + 15)
|
| 576 |
|
| 577 |
|
| 578 |
-
@spaces.GPU(duration=lambda *args: estimate_duration(args[
|
| 579 |
def generate_image(
|
| 580 |
prompt: str,
|
| 581 |
negative_prompt: str,
|
| 582 |
model_choice: str,
|
|
|
|
| 583 |
num_steps: int,
|
| 584 |
cfg_scale: float,
|
| 585 |
width: int,
|
|
@@ -587,11 +589,12 @@ def generate_image(
|
|
| 587 |
shift: float,
|
| 588 |
use_flow_matching: bool,
|
| 589 |
prediction_type: str,
|
|
|
|
| 590 |
seed: int,
|
| 591 |
randomize_seed: bool,
|
| 592 |
progress=gr.Progress()
|
| 593 |
):
|
| 594 |
-
"""Generate image with ZeroGPU support."""
|
| 595 |
|
| 596 |
# Randomize seed if requested
|
| 597 |
if randomize_seed:
|
|
@@ -603,32 +606,68 @@ def generate_image(
|
|
| 603 |
|
| 604 |
try:
|
| 605 |
# Get pipeline
|
| 606 |
-
pipeline = get_pipeline(model_choice)
|
| 607 |
-
|
| 608 |
-
# Determine if we should use Lyra encoding
|
| 609 |
-
use_lyra = "Lyra" in model_choice
|
| 610 |
-
|
| 611 |
-
# Generate
|
| 612 |
-
progress(0.05, desc="Starting generation...")
|
| 613 |
-
|
| 614 |
-
image = pipeline(
|
| 615 |
-
prompt=prompt,
|
| 616 |
-
negative_prompt=negative_prompt,
|
| 617 |
-
height=height,
|
| 618 |
-
width=width,
|
| 619 |
-
num_inference_steps=num_steps,
|
| 620 |
-
guidance_scale=cfg_scale,
|
| 621 |
-
shift=shift,
|
| 622 |
-
use_flow_matching=use_flow_matching,
|
| 623 |
-
prediction_type=prediction_type,
|
| 624 |
-
seed=seed,
|
| 625 |
-
use_lyra=use_lyra,
|
| 626 |
-
progress_callback=progress_callback
|
| 627 |
-
)
|
| 628 |
|
| 629 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 630 |
|
| 631 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 632 |
|
| 633 |
except Exception as e:
|
| 634 |
print(f"ā Generation failed: {e}")
|
|
@@ -648,12 +687,13 @@ def create_demo():
|
|
| 648 |
|
| 649 |
**Geometric crystalline diffusion with flow matching** by [AbstractPhil](https://huggingface.co/AbstractPhil)
|
| 650 |
|
| 651 |
-
Generate images using SD1.5-based models with geometric deep learning
|
| 652 |
-
- **Flow-Lune**: Flow matching with pentachoron geometric structures
|
| 653 |
-
- **
|
| 654 |
-
- **
|
|
|
|
| 655 |
|
| 656 |
-
|
| 657 |
""")
|
| 658 |
|
| 659 |
with gr.Row():
|
|
@@ -674,15 +714,34 @@ def create_demo():
|
|
| 674 |
|
| 675 |
# Model selection
|
| 676 |
model_choice = gr.Dropdown(
|
| 677 |
-
label="Model",
|
| 678 |
choices=[
|
| 679 |
"Flow-Lune (Latest)",
|
| 680 |
-
"Lyra-VAE (Geometric Fusion)",
|
| 681 |
"SD1.5 Base"
|
| 682 |
],
|
| 683 |
value="Flow-Lune (Latest)"
|
| 684 |
)
|
| 685 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 686 |
# Flow matching settings
|
| 687 |
with gr.Accordion("Flow Matching Settings", open=True):
|
| 688 |
use_flow_matching = gr.Checkbox(
|
|
@@ -759,10 +818,18 @@ def create_demo():
|
|
| 759 |
generate_btn = gr.Button("šØ Generate", variant="primary", size="lg")
|
| 760 |
|
| 761 |
with gr.Column(scale=1):
|
| 762 |
-
|
| 763 |
-
|
| 764 |
-
|
| 765 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 766 |
|
| 767 |
output_seed = gr.Number(
|
| 768 |
label="Used Seed",
|
|
@@ -775,15 +842,22 @@ def create_demo():
|
|
| 775 |
- **Shift** controls the flow trajectory (2.0-2.5 recommended for Lune)
|
| 776 |
- Lower shift = more direct path, higher shift = more exploration
|
| 777 |
- **Lune** uses v_prediction by default for optimal results
|
| 778 |
-
- **Lyra**
|
|
|
|
| 779 |
- **SD1.5 Base** uses epsilon (standard diffusion)
|
| 780 |
- Lune operates in a scaled latent space (5.52x) for geometric efficiency
|
| 781 |
|
| 782 |
### Model Info:
|
| 783 |
- **Flow-Lune**: Trained with flow matching on 500k SD1.5 distillation pairs
|
| 784 |
-
- **Lyra
|
| 785 |
- **SD1.5 Base**: Standard Stable Diffusion 1.5 for comparison
|
| 786 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 787 |
[š Learn more about geometric deep learning](https://github.com/AbstractEyes/lattice_vocabulary)
|
| 788 |
""")
|
| 789 |
|
|
@@ -794,6 +868,7 @@ def create_demo():
|
|
| 794 |
"A serene mountain landscape at golden hour, crystal clear lake reflecting snow-capped peaks, photorealistic, 8k",
|
| 795 |
"blurry, low quality",
|
| 796 |
"Flow-Lune (Latest)",
|
|
|
|
| 797 |
20,
|
| 798 |
7.5,
|
| 799 |
512,
|
|
@@ -801,44 +876,49 @@ def create_demo():
|
|
| 801 |
2.5,
|
| 802 |
True,
|
| 803 |
"v_prediction",
|
|
|
|
| 804 |
42,
|
| 805 |
False
|
| 806 |
],
|
| 807 |
[
|
| 808 |
"A futuristic cyberpunk city at night, neon lights, rain-slicked streets, highly detailed",
|
| 809 |
"low quality, blurry",
|
| 810 |
-
"
|
| 811 |
-
|
|
|
|
| 812 |
7.5,
|
| 813 |
512,
|
| 814 |
512,
|
| 815 |
-
|
| 816 |
-
|
| 817 |
-
"
|
|
|
|
| 818 |
123,
|
| 819 |
False
|
| 820 |
],
|
| 821 |
[
|
| 822 |
"Portrait of a majestic lion, golden mane, dramatic lighting, wildlife photography",
|
| 823 |
"cartoon, painting",
|
| 824 |
-
"
|
| 825 |
-
|
| 826 |
-
|
|
|
|
| 827 |
512,
|
| 828 |
512,
|
| 829 |
-
|
|
|
|
|
|
|
| 830 |
True,
|
| 831 |
-
"v_prediction",
|
| 832 |
456,
|
| 833 |
False
|
| 834 |
]
|
| 835 |
],
|
| 836 |
inputs=[
|
| 837 |
-
prompt, negative_prompt, model_choice, num_steps, cfg_scale,
|
| 838 |
-
width, height, shift, use_flow_matching, prediction_type,
|
| 839 |
seed, randomize_seed
|
| 840 |
],
|
| 841 |
-
outputs=[
|
| 842 |
fn=generate_image,
|
| 843 |
cache_examples=False
|
| 844 |
)
|
|
@@ -854,12 +934,6 @@ def create_demo():
|
|
| 854 |
use_flow_matching: gr.update(value=False),
|
| 855 |
prediction_type: gr.update(value="epsilon")
|
| 856 |
}
|
| 857 |
-
elif model_name == "Lyra-VAE (Geometric Fusion)":
|
| 858 |
-
# Lyra: disable flow matching (uses standard diffusion), use epsilon
|
| 859 |
-
return {
|
| 860 |
-
use_flow_matching: gr.update(value=False),
|
| 861 |
-
prediction_type: gr.update(value="epsilon")
|
| 862 |
-
}
|
| 863 |
else:
|
| 864 |
# Lune: enable flow matching, use v_prediction
|
| 865 |
return {
|
|
@@ -867,20 +941,40 @@ def create_demo():
|
|
| 867 |
prediction_type: gr.update(value="v_prediction")
|
| 868 |
}
|
| 869 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 870 |
model_choice.change(
|
| 871 |
fn=on_model_change,
|
| 872 |
inputs=[model_choice],
|
| 873 |
outputs=[use_flow_matching, prediction_type]
|
| 874 |
)
|
| 875 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 876 |
generate_btn.click(
|
| 877 |
fn=generate_image,
|
| 878 |
inputs=[
|
| 879 |
-
prompt, negative_prompt, model_choice, num_steps, cfg_scale,
|
| 880 |
-
width, height, shift, use_flow_matching, prediction_type,
|
| 881 |
seed, randomize_seed
|
| 882 |
],
|
| 883 |
-
outputs=[
|
| 884 |
)
|
| 885 |
|
| 886 |
return demo
|
|
|
|
| 444 |
return None
|
| 445 |
|
| 446 |
|
| 447 |
+
def initialize_pipeline(model_choice: str, clip_model: str = "openai/clip-vit-large-patch14", device: str = "cuda"):
|
| 448 |
"""Initialize the complete pipeline."""
|
| 449 |
|
| 450 |
print(f"š Initializing {model_choice} pipeline...")
|
| 451 |
+
print(f" CLIP model: {clip_model}")
|
| 452 |
|
| 453 |
is_lune = "Lune" in model_choice
|
|
|
|
| 454 |
|
| 455 |
# Load base components
|
| 456 |
print("Loading VAE...")
|
|
|
|
| 460 |
torch_dtype=torch.float32
|
| 461 |
).to(device)
|
| 462 |
|
| 463 |
+
print(f"Loading CLIP text encoder: {clip_model}...")
|
| 464 |
text_encoder = CLIPTextModel.from_pretrained(
|
| 465 |
+
clip_model,
|
| 466 |
torch_dtype=torch.float32
|
| 467 |
).to(device)
|
| 468 |
|
| 469 |
tokenizer = CLIPTokenizer.from_pretrained(
|
| 470 |
+
clip_model
|
| 471 |
)
|
| 472 |
|
| 473 |
+
# Always load T5 and Lyra for potential use
|
| 474 |
+
print("Loading T5-base encoder...")
|
| 475 |
+
t5_tokenizer = T5Tokenizer.from_pretrained("t5-base")
|
| 476 |
+
t5_encoder = T5EncoderModel.from_pretrained(
|
| 477 |
+
"t5-base",
|
| 478 |
+
torch_dtype=torch.float32
|
| 479 |
+
).to(device)
|
| 480 |
+
t5_encoder.eval()
|
| 481 |
+
print("ā T5 loaded")
|
| 482 |
|
| 483 |
+
print("Loading Lyra VAE...")
|
| 484 |
+
lyra_model = load_lyra_vae(device=device)
|
| 485 |
+
if lyra_model is None:
|
| 486 |
+
print("ā ļø Lyra VAE not available - fusion disabled")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 487 |
|
| 488 |
# Load UNet based on model choice
|
| 489 |
if is_lune:
|
| 490 |
# Load latest checkpoint from repo
|
| 491 |
repo_id = "AbstractPhil/sd15-flow-lune"
|
|
|
|
| 492 |
filename = "sd15_flow_lune_e34_s34000.pt"
|
| 493 |
unet = load_lune_checkpoint(repo_id, filename, device)
|
| 494 |
|
| 495 |
+
elif model_choice == "SD1.5 Base":
|
| 496 |
+
# Use standard SD1.5 UNet
|
| 497 |
print("Loading SD1.5 base UNet...")
|
| 498 |
unet = UNet2DConditionModel.from_pretrained(
|
| 499 |
"runwayml/stable-diffusion-v1-5",
|
|
|
|
| 537 |
# Initialize with None, will load on first inference
|
| 538 |
CURRENT_PIPELINE = None
|
| 539 |
CURRENT_MODEL = None
|
| 540 |
+
CURRENT_CLIP_MODEL = None
|
| 541 |
|
| 542 |
|
| 543 |
+
def get_pipeline(model_choice: str, clip_model: str):
|
| 544 |
+
"""Get or create pipeline for selected model and CLIP variant."""
|
| 545 |
+
global CURRENT_PIPELINE, CURRENT_MODEL, CURRENT_CLIP_MODEL
|
| 546 |
|
| 547 |
+
if CURRENT_PIPELINE is None or CURRENT_MODEL != model_choice or CURRENT_CLIP_MODEL != clip_model:
|
| 548 |
+
CURRENT_PIPELINE = initialize_pipeline(model_choice, clip_model, device="cuda")
|
| 549 |
CURRENT_MODEL = model_choice
|
| 550 |
+
CURRENT_CLIP_MODEL = clip_model
|
| 551 |
|
| 552 |
return CURRENT_PIPELINE
|
| 553 |
|
|
|
|
| 556 |
# INFERENCE
|
| 557 |
# ============================================================================
|
| 558 |
|
| 559 |
+
def estimate_duration(num_steps: int, width: int, height: int, use_lyra: bool = False) -> int:
|
| 560 |
"""Estimate GPU duration based on generation parameters."""
|
| 561 |
# Base time per step (seconds)
|
| 562 |
base_time_per_step = 0.3
|
|
|
|
| 564 |
# Resolution scaling
|
| 565 |
resolution_factor = (width * height) / (512 * 512)
|
| 566 |
|
| 567 |
+
# Total estimate for one generation
|
| 568 |
estimated = num_steps * base_time_per_step * resolution_factor
|
| 569 |
|
| 570 |
+
# If Lyra enabled, we generate twice
|
| 571 |
+
if use_lyra:
|
| 572 |
+
estimated *= 2
|
| 573 |
+
estimated += 2 # Extra overhead for dual generation
|
| 574 |
+
|
| 575 |
# Add 15 seconds for model loading overhead
|
| 576 |
return int(estimated + 15)
|
| 577 |
|
| 578 |
|
| 579 |
+
@spaces.GPU(duration=lambda *args: estimate_duration(args[4], args[6], args[7], args[11]))
|
| 580 |
def generate_image(
|
| 581 |
prompt: str,
|
| 582 |
negative_prompt: str,
|
| 583 |
model_choice: str,
|
| 584 |
+
clip_model: str,
|
| 585 |
num_steps: int,
|
| 586 |
cfg_scale: float,
|
| 587 |
width: int,
|
|
|
|
| 589 |
shift: float,
|
| 590 |
use_flow_matching: bool,
|
| 591 |
prediction_type: str,
|
| 592 |
+
use_lyra: bool,
|
| 593 |
seed: int,
|
| 594 |
randomize_seed: bool,
|
| 595 |
progress=gr.Progress()
|
| 596 |
):
|
| 597 |
+
"""Generate image with ZeroGPU support. Returns (standard_img, lyra_img, seed) or (img, None, seed)."""
|
| 598 |
|
| 599 |
# Randomize seed if requested
|
| 600 |
if randomize_seed:
|
|
|
|
| 606 |
|
| 607 |
try:
|
| 608 |
# Get pipeline
|
| 609 |
+
pipeline = get_pipeline(model_choice, clip_model)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 610 |
|
| 611 |
+
if not use_lyra or pipeline.lyra_model is None:
|
| 612 |
+
# Standard generation only
|
| 613 |
+
progress(0.05, desc="Generating (standard)...")
|
| 614 |
+
|
| 615 |
+
image = pipeline(
|
| 616 |
+
prompt=prompt,
|
| 617 |
+
negative_prompt=negative_prompt,
|
| 618 |
+
height=height,
|
| 619 |
+
width=width,
|
| 620 |
+
num_inference_steps=num_steps,
|
| 621 |
+
guidance_scale=cfg_scale,
|
| 622 |
+
shift=shift,
|
| 623 |
+
use_flow_matching=use_flow_matching,
|
| 624 |
+
prediction_type=prediction_type,
|
| 625 |
+
seed=seed,
|
| 626 |
+
use_lyra=False,
|
| 627 |
+
progress_callback=progress_callback
|
| 628 |
+
)
|
| 629 |
+
|
| 630 |
+
progress(1.0, desc="Complete!")
|
| 631 |
+
return image, None, seed
|
| 632 |
|
| 633 |
+
else:
|
| 634 |
+
# Generate both standard and Lyra versions
|
| 635 |
+
progress(0.05, desc="Generating standard version...")
|
| 636 |
+
|
| 637 |
+
image_standard = pipeline(
|
| 638 |
+
prompt=prompt,
|
| 639 |
+
negative_prompt=negative_prompt,
|
| 640 |
+
height=height,
|
| 641 |
+
width=width,
|
| 642 |
+
num_inference_steps=num_steps,
|
| 643 |
+
guidance_scale=cfg_scale,
|
| 644 |
+
shift=shift,
|
| 645 |
+
use_flow_matching=use_flow_matching,
|
| 646 |
+
prediction_type=prediction_type,
|
| 647 |
+
seed=seed,
|
| 648 |
+
use_lyra=False,
|
| 649 |
+
progress_callback=lambda s, t, d: progress(0.05 + (s/t) * 0.45, desc=d)
|
| 650 |
+
)
|
| 651 |
+
|
| 652 |
+
progress(0.5, desc="Generating Lyra fusion version...")
|
| 653 |
+
|
| 654 |
+
image_lyra = pipeline(
|
| 655 |
+
prompt=prompt,
|
| 656 |
+
negative_prompt=negative_prompt,
|
| 657 |
+
height=height,
|
| 658 |
+
width=width,
|
| 659 |
+
num_inference_steps=num_steps,
|
| 660 |
+
guidance_scale=cfg_scale,
|
| 661 |
+
shift=shift,
|
| 662 |
+
use_flow_matching=use_flow_matching,
|
| 663 |
+
prediction_type=prediction_type,
|
| 664 |
+
seed=seed,
|
| 665 |
+
use_lyra=True,
|
| 666 |
+
progress_callback=lambda s, t, d: progress(0.5 + (s/t) * 0.45, desc=d)
|
| 667 |
+
)
|
| 668 |
+
|
| 669 |
+
progress(1.0, desc="Complete!")
|
| 670 |
+
return image_standard, image_lyra, seed
|
| 671 |
|
| 672 |
except Exception as e:
|
| 673 |
print(f"ā Generation failed: {e}")
|
|
|
|
| 687 |
|
| 688 |
**Geometric crystalline diffusion with flow matching** by [AbstractPhil](https://huggingface.co/AbstractPhil)
|
| 689 |
|
| 690 |
+
Generate images using SD1.5-based models with geometric deep learning:
|
| 691 |
+
- **Flow-Lune**: Flow matching with pentachoron geometric structures (15-25 steps)
|
| 692 |
+
- **SD1.5 Base**: Standard Stable Diffusion 1.5 baseline
|
| 693 |
+
- **Lyra VAE Toggle**: Add CLIP+T5 fusion for side-by-side comparison
|
| 694 |
+
- **CLIP Variants**: Different text encoders for varied semantic understanding
|
| 695 |
|
| 696 |
+
Enable Lyra to see both standard CLIP and geometric CLIP+T5 fusion results!
|
| 697 |
""")
|
| 698 |
|
| 699 |
with gr.Row():
|
|
|
|
| 714 |
|
| 715 |
# Model selection
|
| 716 |
model_choice = gr.Dropdown(
|
| 717 |
+
label="Base Model",
|
| 718 |
choices=[
|
| 719 |
"Flow-Lune (Latest)",
|
|
|
|
| 720 |
"SD1.5 Base"
|
| 721 |
],
|
| 722 |
value="Flow-Lune (Latest)"
|
| 723 |
)
|
| 724 |
|
| 725 |
+
# CLIP model selection
|
| 726 |
+
clip_model_choice = gr.Dropdown(
|
| 727 |
+
label="CLIP Model",
|
| 728 |
+
choices=[
|
| 729 |
+
"openai/clip-vit-large-patch14",
|
| 730 |
+
"openai/clip-vit-large-patch14-336",
|
| 731 |
+
"laion/CLIP-ViT-L-14-laion2B-s32B-b82K",
|
| 732 |
+
"laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
|
| 733 |
+
],
|
| 734 |
+
value="openai/clip-vit-large-patch14",
|
| 735 |
+
info="Text encoder variant"
|
| 736 |
+
)
|
| 737 |
+
|
| 738 |
+
# Lyra toggle
|
| 739 |
+
use_lyra = gr.Checkbox(
|
| 740 |
+
label="Enable Lyra VAE (CLIP+T5 Fusion)",
|
| 741 |
+
value=False,
|
| 742 |
+
info="Generate side-by-side comparison with geometric fusion"
|
| 743 |
+
)
|
| 744 |
+
|
| 745 |
# Flow matching settings
|
| 746 |
with gr.Accordion("Flow Matching Settings", open=True):
|
| 747 |
use_flow_matching = gr.Checkbox(
|
|
|
|
| 818 |
generate_btn = gr.Button("šØ Generate", variant="primary", size="lg")
|
| 819 |
|
| 820 |
with gr.Column(scale=1):
|
| 821 |
+
with gr.Row():
|
| 822 |
+
output_image_standard = gr.Image(
|
| 823 |
+
label="Standard Generation",
|
| 824 |
+
type="pil",
|
| 825 |
+
visible=True
|
| 826 |
+
)
|
| 827 |
+
|
| 828 |
+
output_image_lyra = gr.Image(
|
| 829 |
+
label="Lyra Fusion šµ",
|
| 830 |
+
type="pil",
|
| 831 |
+
visible=False
|
| 832 |
+
)
|
| 833 |
|
| 834 |
output_seed = gr.Number(
|
| 835 |
label="Used Seed",
|
|
|
|
| 842 |
- **Shift** controls the flow trajectory (2.0-2.5 recommended for Lune)
|
| 843 |
- Lower shift = more direct path, higher shift = more exploration
|
| 844 |
- **Lune** uses v_prediction by default for optimal results
|
| 845 |
+
- **Lyra toggle** generates side-by-side comparison (CLIP vs CLIP+T5 fusion)
|
| 846 |
+
- **CLIP variants** may give different semantic interpretations
|
| 847 |
- **SD1.5 Base** uses epsilon (standard diffusion)
|
| 848 |
- Lune operates in a scaled latent space (5.52x) for geometric efficiency
|
| 849 |
|
| 850 |
### Model Info:
|
| 851 |
- **Flow-Lune**: Trained with flow matching on 500k SD1.5 distillation pairs
|
| 852 |
+
- **Lyra VAE**: Multi-modal fusion (CLIP+T5) via Cantor geometric attention
|
| 853 |
- **SD1.5 Base**: Standard Stable Diffusion 1.5 for comparison
|
| 854 |
|
| 855 |
+
### CLIP Models:
|
| 856 |
+
- **openai/clip-vit-large-patch14**: Standard CLIP-L (default)
|
| 857 |
+
- **openai/clip-vit-large-patch14-336**: Higher resolution CLIP-L
|
| 858 |
+
- **laion/CLIP-ViT-L-14**: LAION-trained CLIP-L variant
|
| 859 |
+
- **laion/CLIP-ViT-bigG-14**: Larger CLIP-G model
|
| 860 |
+
|
| 861 |
[š Learn more about geometric deep learning](https://github.com/AbstractEyes/lattice_vocabulary)
|
| 862 |
""")
|
| 863 |
|
|
|
|
| 868 |
"A serene mountain landscape at golden hour, crystal clear lake reflecting snow-capped peaks, photorealistic, 8k",
|
| 869 |
"blurry, low quality",
|
| 870 |
"Flow-Lune (Latest)",
|
| 871 |
+
"openai/clip-vit-large-patch14",
|
| 872 |
20,
|
| 873 |
7.5,
|
| 874 |
512,
|
|
|
|
| 876 |
2.5,
|
| 877 |
True,
|
| 878 |
"v_prediction",
|
| 879 |
+
False,
|
| 880 |
42,
|
| 881 |
False
|
| 882 |
],
|
| 883 |
[
|
| 884 |
"A futuristic cyberpunk city at night, neon lights, rain-slicked streets, highly detailed",
|
| 885 |
"low quality, blurry",
|
| 886 |
+
"Flow-Lune (Latest)",
|
| 887 |
+
"openai/clip-vit-large-patch14",
|
| 888 |
+
20,
|
| 889 |
7.5,
|
| 890 |
512,
|
| 891 |
512,
|
| 892 |
+
2.5,
|
| 893 |
+
True,
|
| 894 |
+
"v_prediction",
|
| 895 |
+
True,
|
| 896 |
123,
|
| 897 |
False
|
| 898 |
],
|
| 899 |
[
|
| 900 |
"Portrait of a majestic lion, golden mane, dramatic lighting, wildlife photography",
|
| 901 |
"cartoon, painting",
|
| 902 |
+
"SD1.5 Base",
|
| 903 |
+
"openai/clip-vit-large-patch14",
|
| 904 |
+
30,
|
| 905 |
+
7.5,
|
| 906 |
512,
|
| 907 |
512,
|
| 908 |
+
0.0,
|
| 909 |
+
False,
|
| 910 |
+
"epsilon",
|
| 911 |
True,
|
|
|
|
| 912 |
456,
|
| 913 |
False
|
| 914 |
]
|
| 915 |
],
|
| 916 |
inputs=[
|
| 917 |
+
prompt, negative_prompt, model_choice, clip_model_choice, num_steps, cfg_scale,
|
| 918 |
+
width, height, shift, use_flow_matching, prediction_type, use_lyra,
|
| 919 |
seed, randomize_seed
|
| 920 |
],
|
| 921 |
+
outputs=[output_image_standard, output_image_lyra, output_seed],
|
| 922 |
fn=generate_image,
|
| 923 |
cache_examples=False
|
| 924 |
)
|
|
|
|
| 934 |
use_flow_matching: gr.update(value=False),
|
| 935 |
prediction_type: gr.update(value="epsilon")
|
| 936 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 937 |
else:
|
| 938 |
# Lune: enable flow matching, use v_prediction
|
| 939 |
return {
|
|
|
|
| 941 |
prediction_type: gr.update(value="v_prediction")
|
| 942 |
}
|
| 943 |
|
| 944 |
+
# Update image visibility when Lyra toggle changes
|
| 945 |
+
def on_lyra_toggle(lyra_enabled):
|
| 946 |
+
"""Show/hide Lyra comparison image."""
|
| 947 |
+
if lyra_enabled:
|
| 948 |
+
return {
|
| 949 |
+
output_image_standard: gr.update(visible=True, label="Standard CLIP"),
|
| 950 |
+
output_image_lyra: gr.update(visible=True, label="Lyra Fusion (CLIP+T5) šµ")
|
| 951 |
+
}
|
| 952 |
+
else:
|
| 953 |
+
return {
|
| 954 |
+
output_image_standard: gr.update(visible=True, label="Generated Image"),
|
| 955 |
+
output_image_lyra: gr.update(visible=False)
|
| 956 |
+
}
|
| 957 |
+
|
| 958 |
model_choice.change(
|
| 959 |
fn=on_model_change,
|
| 960 |
inputs=[model_choice],
|
| 961 |
outputs=[use_flow_matching, prediction_type]
|
| 962 |
)
|
| 963 |
|
| 964 |
+
use_lyra.change(
|
| 965 |
+
fn=on_lyra_toggle,
|
| 966 |
+
inputs=[use_lyra],
|
| 967 |
+
outputs=[output_image_standard, output_image_lyra]
|
| 968 |
+
)
|
| 969 |
+
|
| 970 |
generate_btn.click(
|
| 971 |
fn=generate_image,
|
| 972 |
inputs=[
|
| 973 |
+
prompt, negative_prompt, model_choice, clip_model_choice, num_steps, cfg_scale,
|
| 974 |
+
width, height, shift, use_flow_matching, prediction_type, use_lyra,
|
| 975 |
seed, randomize_seed
|
| 976 |
],
|
| 977 |
+
outputs=[output_image_standard, output_image_lyra, output_seed]
|
| 978 |
)
|
| 979 |
|
| 980 |
return demo
|