import os import sys import logging import spaces import torch import gradio as gr import tempfile import numpy as np from datetime import datetime from pathlib import Path from PIL import Image import json import base64 import io import time import random import gc import math from huggingface_hub import InferenceClient from diffusers import QwenImageEditPlusPipeline, FlowMatchEulerDiscreteScheduler from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3 from config_image_edit import TITLE, LOGO_HTML, CUSTOM_HEAD, HEADLINE_MD, SUBHEADLINE_MD, THEME, DEFAULT_PROMPT, PROMPT_PLACEHOLDER, DEFAULT_PROMPT_NEGATIVE, SYSTEM_PROMPT_EDIT, INFO_TAB_TEXT log = logging.getLogger(__name__) # MARK: GLOBAL CONSTANTS: # Define paths using pathlib.Path for consistency BASE_DIR = Path(__file__).resolve().parent RES = BASE_DIR / "_res" ASSETS = RES / "assets" EXAMPLES = BASE_DIR / "examples" IMAGE_CACHE = BASE_DIR / "image_cache" # Ensure the image cache directory exists IMAGE_CACHE.mkdir(exist_ok=True) # Set static paths for Gradio # gr.set_static_paths([str(IMAGE_CACHE), str(EXAMPLES), str(ASSETS), str(RES)]) gr.set_static_paths(paths=[IMAGE_CACHE, RES, EXAMPLES]) MAX_SEED = np.iinfo(np.int32).max MIN_IMAGE_SIZE = 256 MAX_IMAGE_SIZE = 2048 # ------------------ # MARK: Pipeline load # ------------------ scheduler_config = { "base_image_seq_len": 256, "base_shift": math.log(3), "invert_sigmas": False, "max_image_seq_len": 8192, "max_shift": math.log(3), "num_train_timesteps": 1000, "shift": 1.0, "shift_terminal": None, "stochastic_sampling": False, "time_shift_type": "exponential", "use_beta_sigmas": False, "use_dynamic_shifting": True, "use_exponential_sigmas": False, "use_karras_sigmas": False, } scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config) dtype = torch.bfloat16 device = "cuda" if torch.cuda.is_available() else "cpu" # Load the model pipeline pipe = QwenImageEditPlusPipeline.from_pretrained("Qwen/Qwen-Image-Edit-2509", scheduler=scheduler, torch_dtype=torch.bfloat16).to(device) pipe.set_progress_bar_config(disable=None) pipe.load_lora_weights("lightx2v/Qwen-Image-Lightning", weight_name="Qwen-Image-Lightning-8steps-V2.0-bf16.safetensors", adapter_name="lightning-8steps") pipe.load_lora_weights("dx8152/Qwen-Edit-2509-Multiple-angles", weight_name="镜头转换.safetensors", adapter_name="multiple-angles") pipe.load_lora_weights("dx8152/Qwen-Image-Edit-2509-Light_restoration", weight_name="移除光影.safetensors", adapter_name="light-restoration") pipe.load_lora_weights("dx8152/Qwen-Image-Edit-2509-Relight", weight_name="Qwen-Edit-Relight.safetensors", adapter_name="relight") pipe.load_lora_weights("dx8152/Qwen-Edit-2509-Multi-Angle-Lighting", weight_name="多角度灯光-251116.safetensors", adapter_name="multi-angle-lighting") pipe.load_lora_weights("tlennon-ie/qwen-edit-skin", weight_name="qwen-edit-skin_1.1_000002750.safetensors", adapter_name="edit-skin") pipe.load_lora_weights("lovis93/next-scene-qwen-image-lora-2509", weight_name="next-scene_lora-v2-3000.safetensors", adapter_name="next-scene") pipe.load_lora_weights("vafipas663/Qwen-Edit-2509-Upscale-LoRA", weight_name="qwen-edit-enhance_64-v3_000001000.safetensors", adapter_name="upscale-image") pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3()) # pipe.fuse_lora() def polish_prompt_hf(original_prompt, img_list): # Ensure HF_TOKEN is set api_key = os.environ.get("HF_TOKEN") if not api_key: print("Warning: HF_TOKEN not set. Falling back to original prompt.") return original_prompt prompt = f"{SYSTEM_PROMPT_EDIT}\n\nUser Input: {original_prompt}\n\nRewritten Prompt:" system_prompt = "you are a helpful assistant, you should provide useful answers to users." try: # Initialize the client client = InferenceClient( provider="nebius", api_key=api_key, ) # Convert list of images to base64 data URLs image_urls = [] if img_list is not None: # Ensure img_list is actually a list if not isinstance(img_list, list): img_list = [img_list] for img in img_list: image_url = None # If img is a PIL Image if hasattr(img, 'save'): # Check if it's a PIL Image buffered = io.BytesIO() img.save(buffered, format="PNG") img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8') image_url = f"data:image/png;base64,{img_base64}" # If img is already a file path (string) elif isinstance(img, str): with open(img, "rb") as image_file: img_base64 = base64.b64encode(image_file.read()).decode('utf-8') image_url = f"data:image/png;base64,{img_base64}" else: print(f"Warning: Unexpected image type: {type(img)}, skipping...") continue if image_url: image_urls.append(image_url) # Build the content array with text first, then all images content = [ { "type": "text", "text": prompt } ] # Add all images to the content for image_url in image_urls: content.append({ "type": "image_url", "image_url": { "url": image_url } }) # Format the messages for the chat completions API messages = [ {"role": "system", "content": system_prompt}, { "role": "user", "content": content } ] # Call the API completion = client.chat.completions.create( model="Qwen/Qwen2.5-VL-72B-Instruct", messages=messages, ) # Parse the response result = completion.choices[0].message.content # Try to extract JSON if present if '"Rewritten"' in result: try: # Clean up the response result = result.replace('```json', '').replace('```', '') result_json = json.loads(result) polished_prompt = result_json.get('Rewritten', result) except: polished_prompt = result else: polished_prompt = result polished_prompt = polished_prompt.strip().replace("\n", " ") return polished_prompt except Exception as e: print(f"Error during API call to Hugging Face: {e}") # Fallback to original prompt if enhancement fails return original_prompt def use_history_as_input(evt: gr.SelectData, history): if history and evt.index < len(history): im = history[evt.index][0] image_path = f"image_cache/{history[evt.index][1]}" im.save(image_path, format="PNG") return image_path # gr.update(value=history[evt.index][0]) return # gr.update() @spaces.GPU(duration=60) def infer( images, prompt, lora_adapter="Lightning-8steps", negative_prompt=" ", num_inference_steps=30, true_guidance_scale=2.0, seed=42, randomize_seed=False, rewrite_prompt=True, gallery=None, height=None, width=None, num_images_per_prompt=1, progress=gr.Progress(track_tqdm=True), ): if true_guidance_scale <= 2: negative_prompt = " " if randomize_seed: seed = random.randint(0, MAX_SEED) if lora_adapter == "Multiple-Angles": pipe.set_adapters(["multiple-angles"], adapter_weights=[1.0]) elif lora_adapter == "Light-Restoration": pipe.set_adapters(["light-restoration"], adapter_weights=[1.0]) elif lora_adapter == "Relight": pipe.set_adapters(["relight"], adapter_weights=[1.0]) elif lora_adapter == "Multi-Angle-Lighting": pipe.set_adapters(["multi-angle-lighting"], adapter_weights=[1.0]) elif lora_adapter == "Edit-Skin": pipe.set_adapters(["edit-skin"], adapter_weights=[1.0]) elif lora_adapter == "Next-Scene": pipe.set_adapters(["next-scene"], adapter_weights=[1.0]) elif lora_adapter == "Upscale-Image": pipe.set_adapters(["upscale-image"], adapter_weights=[1.0]) elif lora_adapter == "Lightning-8steps": pipe.set_adapters(["lightning-8steps"], adapter_weights=[1.0]) # Set up the generator for reproducibility generator = torch.Generator(device=device).manual_seed(seed) # Load input images into PIL Images pil_images = [] if images is not None: for item in images: try: if isinstance(item[0], Image.Image): pil_images.append(item[0].convert("RGB")) elif isinstance(item[0], str): pil_images.append(Image.open(item[0]).convert("RGB")) elif hasattr(item, "name"): pil_images.append(Image.open(item.name).convert("RGB")) except Exception: continue if height==256 and width==256: height, width = None, None print(f"Calling pipeline with prompt: '{prompt}'") print(f"Negative Prompt: '{negative_prompt}'") print(f"Seed: {seed}, Steps: {num_inference_steps}, Guidance: {true_guidance_scale}, Size: {width}x{height}") if rewrite_prompt and len(pil_images) > 0: prompt = polish_prompt_hf(prompt, pil_images) print(f"Rewritten Prompt: {prompt}") # Generate the image image = pipe( image=pil_images if len(pil_images) > 0 else None, prompt=prompt, height=height, width=width, negative_prompt=negative_prompt, num_inference_steps=num_inference_steps, generator=generator, true_cfg_scale=true_guidance_scale, num_images_per_prompt=num_images_per_prompt, ).images[0] timestamp = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") image_path = IMAGE_CACHE / f"{timestamp}.png" try: image.save(image_path, format="PNG") except Exception as exc: raise gr.Error(f"Fehler beim Speichern des Bildes: {exc}") if gallery is None: gallery = [] gallery.insert(0, (image, str(f"{timestamp}.png"))) return str(image_path), seed, gallery # ------------------ # MARK: GRADIO UI # ------------------ with gr.Blocks(title=TITLE) as demo: with gr.Row(elem_classes="row-logo"): gr.HTML( f"""{LOGO_HTML}""", elem_classes="logo-html", ) with gr.Tab(" Start"): with gr.Row(elem_classes="row-header"): gr.HTML( f"""
lg Sebastian gib dem Space gerne ein