Commit
·
6ab5690
1
Parent(s):
5a1d390
new resize with original images
Browse files- .DS_Store +0 -0
- handler.py +85 -42
- pipeline_stable_diffusion_xl_instantid_full.py +30 -30
.DS_Store
CHANGED
|
Binary files a/.DS_Store and b/.DS_Store differ
|
|
|
handler.py
CHANGED
|
@@ -45,19 +45,19 @@ class EndpointHandler():
|
|
| 45 |
face_adapter = f"./checkpoints/ip-adapter.bin"
|
| 46 |
controlnet_path = f"./checkpoints/ControlNetModel"
|
| 47 |
|
| 48 |
-
transform = Compose([
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
])
|
| 61 |
|
| 62 |
self.controlnet_identitynet = ControlNetModel.from_pretrained(
|
| 63 |
controlnet_path, torch_dtype=dtype
|
|
@@ -73,7 +73,6 @@ class EndpointHandler():
|
|
| 73 |
feature_extractor=None,
|
| 74 |
).to(device)
|
| 75 |
|
| 76 |
-
|
| 77 |
self.pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(
|
| 78 |
self.pipe.scheduler.config
|
| 79 |
)
|
|
@@ -89,10 +88,9 @@ class EndpointHandler():
|
|
| 89 |
|
| 90 |
self.pipe.scheduler = diffusers.LCMScheduler.from_config(self.pipe.scheduler.config)
|
| 91 |
|
| 92 |
-
|
| 93 |
controlnet_pose_model = "thibaud/controlnet-openpose-sdxl-1.0"
|
| 94 |
controlnet_canny_model = "diffusers/controlnet-canny-sdxl-1.0"
|
| 95 |
-
controlnet_depth_model = "diffusers/controlnet-depth-sdxl-1.0-small"
|
| 96 |
|
| 97 |
controlnet_pose = ControlNetModel.from_pretrained(
|
| 98 |
controlnet_pose_model, torch_dtype=dtype
|
|
@@ -100,49 +98,49 @@ class EndpointHandler():
|
|
| 100 |
controlnet_canny = ControlNetModel.from_pretrained(
|
| 101 |
controlnet_canny_model, torch_dtype=dtype
|
| 102 |
).to(device)
|
| 103 |
-
controlnet_depth = ControlNetModel.from_pretrained(
|
| 104 |
-
|
| 105 |
-
).to(device)
|
| 106 |
|
| 107 |
openpose = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
|
| 108 |
-
depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(device).eval()
|
| 109 |
|
| 110 |
def get_canny_image(image, t1=100, t2=200):
|
| 111 |
image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
|
| 112 |
edges = cv2.Canny(image, t1, t2)
|
| 113 |
return Image.fromarray(edges, "L")
|
| 114 |
|
| 115 |
-
def get_depth_map(image):
|
| 116 |
|
| 117 |
-
|
| 118 |
|
| 119 |
-
|
| 120 |
|
| 121 |
-
|
| 122 |
-
|
| 123 |
|
| 124 |
-
|
| 125 |
-
|
| 126 |
|
| 127 |
-
|
| 128 |
-
|
| 129 |
|
| 130 |
-
|
| 131 |
|
| 132 |
-
|
| 133 |
|
| 134 |
-
|
| 135 |
|
| 136 |
self.controlnet_map = {
|
| 137 |
"pose": controlnet_pose,
|
| 138 |
"canny": controlnet_canny,
|
| 139 |
-
"depth": controlnet_depth,
|
| 140 |
}
|
| 141 |
|
| 142 |
self.controlnet_map_fn = {
|
| 143 |
"pose": openpose,
|
| 144 |
"canny": get_canny_image,
|
| 145 |
-
"depth": get_depth_map,
|
| 146 |
}
|
| 147 |
|
| 148 |
self.app = FaceAnalysis(name="buffalo_l", root="./", providers=["CPUExecutionProvider"])
|
|
@@ -153,13 +151,14 @@ class EndpointHandler():
|
|
| 153 |
identitynet_strength_ratio = 0.8
|
| 154 |
pose_strength = 0.4
|
| 155 |
canny_strength = 0.3
|
| 156 |
-
depth_strength = 0.5
|
| 157 |
-
self.my_controlnet_selection = ["pose", "canny"
|
|
|
|
| 158 |
|
| 159 |
controlnet_scales = {
|
| 160 |
"pose": pose_strength,
|
| 161 |
"canny": canny_strength,
|
| 162 |
-
"depth": depth_strength,
|
| 163 |
}
|
| 164 |
|
| 165 |
self.pipe.controlnet = MultiControlNetModel(
|
|
@@ -176,19 +175,23 @@ class EndpointHandler():
|
|
| 176 |
default_negative_prompt = "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, anime, photorealistic, 35mm film, deformed, glitch, low contrast, noisy"
|
| 177 |
|
| 178 |
# hyperparamters
|
| 179 |
-
prompt_input = data.pop("inputs", "a man")
|
|
|
|
| 180 |
prompt=default_prompt.replace("{prompt}", prompt_input)
|
| 181 |
-
num_inference_steps = data.pop("num_inference_steps",
|
| 182 |
-
guidance_scale = data.pop("guidance_scale",
|
| 183 |
negative_prompt = data.pop("negative_prompt", default_negative_prompt)
|
| 184 |
|
| 185 |
# 1024px
|
| 186 |
# face_image_path = data.pop("face_image_path", "https://i.ibb.co/SKg69dD/kaifu-resize.png")
|
| 187 |
# pose_image_path = data.pop("pose_image_path", "https://i.ibb.co/ZSrQ8ZJ/pose.jpg")
|
| 188 |
# 512px
|
| 189 |
-
face_image_path = "https://i.ibb.co/5Rsrd2d/kaifu-resize-1.png"
|
| 190 |
-
pose_image_path = "https://i.ibb.co/9bP9tMb/pose-2-1.jpg"
|
| 191 |
|
|
|
|
|
|
|
|
|
|
| 192 |
|
| 193 |
adapter_strength_ratio = 0.8
|
| 194 |
|
|
@@ -198,6 +201,44 @@ class EndpointHandler():
|
|
| 198 |
def convert_from_image_to_cv2(img: Image) -> np.ndarray:
|
| 199 |
return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
|
| 200 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
# check if the input is valid
|
| 202 |
# if face_image_path is None:
|
| 203 |
# raise gr.Error(
|
|
@@ -210,6 +251,7 @@ class EndpointHandler():
|
|
| 210 |
# prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
|
| 211 |
|
| 212 |
face_image = load_image(face_image_path)
|
|
|
|
| 213 |
face_image_cv2 = convert_from_image_to_cv2(face_image)
|
| 214 |
height, width, _ = face_image_cv2.shape
|
| 215 |
|
|
@@ -233,6 +275,7 @@ class EndpointHandler():
|
|
| 233 |
img_controlnet = face_image
|
| 234 |
if pose_image_path is not None:
|
| 235 |
pose_image = load_image(pose_image_path)
|
|
|
|
| 236 |
img_controlnet = pose_image
|
| 237 |
pose_image_cv2 = convert_from_image_to_cv2(pose_image)
|
| 238 |
|
|
|
|
| 45 |
face_adapter = f"./checkpoints/ip-adapter.bin"
|
| 46 |
controlnet_path = f"./checkpoints/ControlNetModel"
|
| 47 |
|
| 48 |
+
# transform = Compose([
|
| 49 |
+
# Resize(
|
| 50 |
+
# width=512,
|
| 51 |
+
# height=512,
|
| 52 |
+
# resize_target=False,
|
| 53 |
+
# keep_aspect_ratio=True,
|
| 54 |
+
# ensure_multiple_of=14,
|
| 55 |
+
# resize_method='lower_bound',
|
| 56 |
+
# image_interpolation_method=cv2.INTER_CUBIC,
|
| 57 |
+
# ),
|
| 58 |
+
# NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
| 59 |
+
# PrepareForNet(),
|
| 60 |
+
# ])
|
| 61 |
|
| 62 |
self.controlnet_identitynet = ControlNetModel.from_pretrained(
|
| 63 |
controlnet_path, torch_dtype=dtype
|
|
|
|
| 73 |
feature_extractor=None,
|
| 74 |
).to(device)
|
| 75 |
|
|
|
|
| 76 |
self.pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(
|
| 77 |
self.pipe.scheduler.config
|
| 78 |
)
|
|
|
|
| 88 |
|
| 89 |
self.pipe.scheduler = diffusers.LCMScheduler.from_config(self.pipe.scheduler.config)
|
| 90 |
|
|
|
|
| 91 |
controlnet_pose_model = "thibaud/controlnet-openpose-sdxl-1.0"
|
| 92 |
controlnet_canny_model = "diffusers/controlnet-canny-sdxl-1.0"
|
| 93 |
+
# controlnet_depth_model = "diffusers/controlnet-depth-sdxl-1.0-small"
|
| 94 |
|
| 95 |
controlnet_pose = ControlNetModel.from_pretrained(
|
| 96 |
controlnet_pose_model, torch_dtype=dtype
|
|
|
|
| 98 |
controlnet_canny = ControlNetModel.from_pretrained(
|
| 99 |
controlnet_canny_model, torch_dtype=dtype
|
| 100 |
).to(device)
|
| 101 |
+
# controlnet_depth = ControlNetModel.from_pretrained(
|
| 102 |
+
# controlnet_depth_model, torch_dtype=dtype
|
| 103 |
+
# ).to(device)
|
| 104 |
|
| 105 |
openpose = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
|
| 106 |
+
# depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(device).eval()
|
| 107 |
|
| 108 |
def get_canny_image(image, t1=100, t2=200):
|
| 109 |
image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
|
| 110 |
edges = cv2.Canny(image, t1, t2)
|
| 111 |
return Image.fromarray(edges, "L")
|
| 112 |
|
| 113 |
+
# def get_depth_map(image):
|
| 114 |
|
| 115 |
+
# image = np.array(image) / 255.0
|
| 116 |
|
| 117 |
+
# h, w = image.shape[:2]
|
| 118 |
|
| 119 |
+
# image = transform({'image': image})['image']
|
| 120 |
+
# image = torch.from_numpy(image).unsqueeze(0).to("cuda")
|
| 121 |
|
| 122 |
+
# with torch.no_grad():
|
| 123 |
+
# depth = depth_anything(image)
|
| 124 |
|
| 125 |
+
# depth = F.interpolate(depth[None], (h, w), mode='bilinear', align_corners=False)[0, 0]
|
| 126 |
+
# depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
|
| 127 |
|
| 128 |
+
# depth = depth.cpu().numpy().astype(np.uint8)
|
| 129 |
|
| 130 |
+
# depth_image = Image.fromarray(depth)
|
| 131 |
|
| 132 |
+
# return depth_image
|
| 133 |
|
| 134 |
self.controlnet_map = {
|
| 135 |
"pose": controlnet_pose,
|
| 136 |
"canny": controlnet_canny,
|
| 137 |
+
# "depth": controlnet_depth,
|
| 138 |
}
|
| 139 |
|
| 140 |
self.controlnet_map_fn = {
|
| 141 |
"pose": openpose,
|
| 142 |
"canny": get_canny_image,
|
| 143 |
+
# "depth": get_depth_map,
|
| 144 |
}
|
| 145 |
|
| 146 |
self.app = FaceAnalysis(name="buffalo_l", root="./", providers=["CPUExecutionProvider"])
|
|
|
|
| 151 |
identitynet_strength_ratio = 0.8
|
| 152 |
pose_strength = 0.4
|
| 153 |
canny_strength = 0.3
|
| 154 |
+
# depth_strength = 0.5
|
| 155 |
+
self.my_controlnet_selection = ["pose", "canny"]
|
| 156 |
+
# self.my_controlnet_selection = ["pose", "canny", "depth"]
|
| 157 |
|
| 158 |
controlnet_scales = {
|
| 159 |
"pose": pose_strength,
|
| 160 |
"canny": canny_strength,
|
| 161 |
+
# "depth": depth_strength,
|
| 162 |
}
|
| 163 |
|
| 164 |
self.pipe.controlnet = MultiControlNetModel(
|
|
|
|
| 175 |
default_negative_prompt = "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, anime, photorealistic, 35mm film, deformed, glitch, low contrast, noisy"
|
| 176 |
|
| 177 |
# hyperparamters
|
| 178 |
+
# prompt_input = data.pop("inputs", "a man")
|
| 179 |
+
prompt_input = "a man"
|
| 180 |
prompt=default_prompt.replace("{prompt}", prompt_input)
|
| 181 |
+
num_inference_steps = data.pop("num_inference_steps", 20)
|
| 182 |
+
guidance_scale = data.pop("guidance_scale", 5.0)
|
| 183 |
negative_prompt = data.pop("negative_prompt", default_negative_prompt)
|
| 184 |
|
| 185 |
# 1024px
|
| 186 |
# face_image_path = data.pop("face_image_path", "https://i.ibb.co/SKg69dD/kaifu-resize.png")
|
| 187 |
# pose_image_path = data.pop("pose_image_path", "https://i.ibb.co/ZSrQ8ZJ/pose.jpg")
|
| 188 |
# 512px
|
| 189 |
+
# face_image_path = "https://i.ibb.co/5Rsrd2d/kaifu-resize-1.png"
|
| 190 |
+
# pose_image_path = "https://i.ibb.co/9bP9tMb/pose-2-1.jpg"
|
| 191 |
|
| 192 |
+
# original
|
| 193 |
+
face_image_path = "https://i.ibb.co/GQzm527/examples-musk-resize.jpg"
|
| 194 |
+
pose_image_path = "https://i.ibb.co/ZSrQ8ZJ/pose.jpg"
|
| 195 |
|
| 196 |
adapter_strength_ratio = 0.8
|
| 197 |
|
|
|
|
| 201 |
def convert_from_image_to_cv2(img: Image) -> np.ndarray:
|
| 202 |
return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
|
| 203 |
|
| 204 |
+
def resize_img(
|
| 205 |
+
input_image,
|
| 206 |
+
max_side=1280,
|
| 207 |
+
min_side=1024,
|
| 208 |
+
size=None,
|
| 209 |
+
pad_to_max_side=False,
|
| 210 |
+
mode=Image.BILINEAR,
|
| 211 |
+
base_pixel_number=64,
|
| 212 |
+
):
|
| 213 |
+
if size is not None:
|
| 214 |
+
w_resize_new, h_resize_new = size
|
| 215 |
+
else:
|
| 216 |
+
w, h = input_image.size
|
| 217 |
+
# Calcular el redimensionamiento con un solo paso
|
| 218 |
+
ratio_min = min_side / min(w, h)
|
| 219 |
+
w_min, h_min = round(ratio_min * w), round(ratio_min * h)
|
| 220 |
+
ratio_max = max_side / max(w_min, h_min)
|
| 221 |
+
# Aplicar la menor de las dos ratios para asegurar que cumple ambas condiciones
|
| 222 |
+
final_ratio = min(ratio_min, ratio_max)
|
| 223 |
+
w_final, h_final = round(final_ratio * w), round(final_ratio * h)
|
| 224 |
+
|
| 225 |
+
# Ajustar al número base de píxeles más cercano
|
| 226 |
+
w_resize_new = (w_final // base_pixel_number) * base_pixel_number
|
| 227 |
+
h_resize_new = (h_final // base_pixel_number) * base_pixel_number
|
| 228 |
+
|
| 229 |
+
# Redimensionar una sola vez
|
| 230 |
+
input_image = input_image.resize([w_resize_new, h_resize_new], mode)
|
| 231 |
+
|
| 232 |
+
if pad_to_max_side:
|
| 233 |
+
# Optimizar la creación del fondo
|
| 234 |
+
res = Image.new("RGB", (max_side, max_side), (255, 255, 255))
|
| 235 |
+
offset_x = (max_side - w_resize_new) // 2
|
| 236 |
+
offset_y = (max_side - h_resize_new) // 2
|
| 237 |
+
res.paste(input_image, (offset_x, offset_y))
|
| 238 |
+
return res
|
| 239 |
+
|
| 240 |
+
return input_image
|
| 241 |
+
|
| 242 |
# check if the input is valid
|
| 243 |
# if face_image_path is None:
|
| 244 |
# raise gr.Error(
|
|
|
|
| 251 |
# prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
|
| 252 |
|
| 253 |
face_image = load_image(face_image_path)
|
| 254 |
+
face_image = resize_img(face_image, max_side=1024)
|
| 255 |
face_image_cv2 = convert_from_image_to_cv2(face_image)
|
| 256 |
height, width, _ = face_image_cv2.shape
|
| 257 |
|
|
|
|
| 275 |
img_controlnet = face_image
|
| 276 |
if pose_image_path is not None:
|
| 277 |
pose_image = load_image(pose_image_path)
|
| 278 |
+
pose_image = resize_img(pose_image, max_side=1024)
|
| 279 |
img_controlnet = pose_image
|
| 280 |
pose_image_cv2 = convert_from_image_to_cv2(pose_image)
|
| 281 |
|
pipeline_stable_diffusion_xl_instantid_full.py
CHANGED
|
@@ -61,10 +61,9 @@ EXAMPLE_DOC_STRING = """
|
|
| 61 |
>>> from PIL import Image
|
| 62 |
|
| 63 |
>>> from insightface.app import FaceAnalysis
|
| 64 |
-
|
| 65 |
>>> from pipeline_stable_diffusion_xl_instantid import StableDiffusionXLInstantIDPipeline, draw_kps
|
| 66 |
>>> # download 'antelopev2' under ./models
|
| 67 |
-
|
| 68 |
>>> app.prepare(ctx_id=0, det_size=(640, 640))
|
| 69 |
|
| 70 |
>>> # download models under ./checkpoints
|
|
@@ -469,33 +468,34 @@ class LongPromptWeight(object):
|
|
| 469 |
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
|
| 470 |
return prompt_embeds
|
| 471 |
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
|
|
|
| 499 |
|
| 500 |
class StableDiffusionXLInstantIDPipeline(StableDiffusionXLControlNetPipeline):
|
| 501 |
|
|
@@ -526,7 +526,7 @@ class StableDiffusionXLInstantIDPipeline(StableDiffusionXLControlNetPipeline):
|
|
| 526 |
def set_image_proj_model(self, model_ckpt, image_emb_dim=512, num_tokens=16):
|
| 527 |
|
| 528 |
image_proj_model = Resampler(
|
| 529 |
-
dim=
|
| 530 |
depth=4,
|
| 531 |
dim_head=64,
|
| 532 |
heads=20,
|
|
|
|
| 61 |
>>> from PIL import Image
|
| 62 |
|
| 63 |
>>> from insightface.app import FaceAnalysis
|
|
|
|
| 64 |
>>> from pipeline_stable_diffusion_xl_instantid import StableDiffusionXLInstantIDPipeline, draw_kps
|
| 65 |
>>> # download 'antelopev2' under ./models
|
| 66 |
+
>>> app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
|
| 67 |
>>> app.prepare(ctx_id=0, det_size=(640, 640))
|
| 68 |
|
| 69 |
>>> # download models under ./checkpoints
|
|
|
|
| 468 |
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
|
| 469 |
return prompt_embeds
|
| 470 |
|
| 471 |
+
def draw_kps(image_pil, kps, color_list=[(255,0,0), (0,255,0), (0,0,255), (255,255,0), (255,0,255)]):
|
| 472 |
+
|
| 473 |
+
stickwidth = 4
|
| 474 |
+
limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
|
| 475 |
+
kps = np.array(kps)
|
| 476 |
+
|
| 477 |
+
w, h = image_pil.size
|
| 478 |
+
out_img = np.zeros([h, w, 3])
|
| 479 |
+
|
| 480 |
+
for i in range(len(limbSeq)):
|
| 481 |
+
index = limbSeq[i]
|
| 482 |
+
color = color_list[index[0]]
|
| 483 |
+
|
| 484 |
+
x = kps[index][:, 0]
|
| 485 |
+
y = kps[index][:, 1]
|
| 486 |
+
length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5
|
| 487 |
+
angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
|
| 488 |
+
polygon = cv2.ellipse2Poly((int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
|
| 489 |
+
out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
|
| 490 |
+
out_img = (out_img * 0.6).astype(np.uint8)
|
| 491 |
+
|
| 492 |
+
for idx_kp, kp in enumerate(kps):
|
| 493 |
+
color = color_list[idx_kp]
|
| 494 |
+
x, y = kp
|
| 495 |
+
out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)
|
| 496 |
+
|
| 497 |
+
out_img_pil = PIL.Image.fromarray(out_img.astype(np.uint8))
|
| 498 |
+
return out_img_pil
|
| 499 |
|
| 500 |
class StableDiffusionXLInstantIDPipeline(StableDiffusionXLControlNetPipeline):
|
| 501 |
|
|
|
|
| 526 |
def set_image_proj_model(self, model_ckpt, image_emb_dim=512, num_tokens=16):
|
| 527 |
|
| 528 |
image_proj_model = Resampler(
|
| 529 |
+
dim=1280,
|
| 530 |
depth=4,
|
| 531 |
dim_head=64,
|
| 532 |
heads=20,
|