yamildiego commited on
Commit
6ab5690
·
1 Parent(s): 5a1d390

new resize with original images

Browse files
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
handler.py CHANGED
@@ -45,19 +45,19 @@ class EndpointHandler():
45
  face_adapter = f"./checkpoints/ip-adapter.bin"
46
  controlnet_path = f"./checkpoints/ControlNetModel"
47
 
48
- transform = Compose([
49
- Resize(
50
- width=512,
51
- height=512,
52
- resize_target=False,
53
- keep_aspect_ratio=True,
54
- ensure_multiple_of=14,
55
- resize_method='lower_bound',
56
- image_interpolation_method=cv2.INTER_CUBIC,
57
- ),
58
- NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
59
- PrepareForNet(),
60
- ])
61
 
62
  self.controlnet_identitynet = ControlNetModel.from_pretrained(
63
  controlnet_path, torch_dtype=dtype
@@ -73,7 +73,6 @@ class EndpointHandler():
73
  feature_extractor=None,
74
  ).to(device)
75
 
76
-
77
  self.pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(
78
  self.pipe.scheduler.config
79
  )
@@ -89,10 +88,9 @@ class EndpointHandler():
89
 
90
  self.pipe.scheduler = diffusers.LCMScheduler.from_config(self.pipe.scheduler.config)
91
 
92
-
93
  controlnet_pose_model = "thibaud/controlnet-openpose-sdxl-1.0"
94
  controlnet_canny_model = "diffusers/controlnet-canny-sdxl-1.0"
95
- controlnet_depth_model = "diffusers/controlnet-depth-sdxl-1.0-small"
96
 
97
  controlnet_pose = ControlNetModel.from_pretrained(
98
  controlnet_pose_model, torch_dtype=dtype
@@ -100,49 +98,49 @@ class EndpointHandler():
100
  controlnet_canny = ControlNetModel.from_pretrained(
101
  controlnet_canny_model, torch_dtype=dtype
102
  ).to(device)
103
- controlnet_depth = ControlNetModel.from_pretrained(
104
- controlnet_depth_model, torch_dtype=dtype
105
- ).to(device)
106
 
107
  openpose = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
108
- depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(device).eval()
109
 
110
  def get_canny_image(image, t1=100, t2=200):
111
  image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
112
  edges = cv2.Canny(image, t1, t2)
113
  return Image.fromarray(edges, "L")
114
 
115
- def get_depth_map(image):
116
 
117
- image = np.array(image) / 255.0
118
 
119
- h, w = image.shape[:2]
120
 
121
- image = transform({'image': image})['image']
122
- image = torch.from_numpy(image).unsqueeze(0).to("cuda")
123
 
124
- with torch.no_grad():
125
- depth = depth_anything(image)
126
 
127
- depth = F.interpolate(depth[None], (h, w), mode='bilinear', align_corners=False)[0, 0]
128
- depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
129
 
130
- depth = depth.cpu().numpy().astype(np.uint8)
131
 
132
- depth_image = Image.fromarray(depth)
133
 
134
- return depth_image
135
 
136
  self.controlnet_map = {
137
  "pose": controlnet_pose,
138
  "canny": controlnet_canny,
139
- "depth": controlnet_depth,
140
  }
141
 
142
  self.controlnet_map_fn = {
143
  "pose": openpose,
144
  "canny": get_canny_image,
145
- "depth": get_depth_map,
146
  }
147
 
148
  self.app = FaceAnalysis(name="buffalo_l", root="./", providers=["CPUExecutionProvider"])
@@ -153,13 +151,14 @@ class EndpointHandler():
153
  identitynet_strength_ratio = 0.8
154
  pose_strength = 0.4
155
  canny_strength = 0.3
156
- depth_strength = 0.5
157
- self.my_controlnet_selection = ["pose", "canny", "depth"]
 
158
 
159
  controlnet_scales = {
160
  "pose": pose_strength,
161
  "canny": canny_strength,
162
- "depth": depth_strength,
163
  }
164
 
165
  self.pipe.controlnet = MultiControlNetModel(
@@ -176,19 +175,23 @@ class EndpointHandler():
176
  default_negative_prompt = "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, anime, photorealistic, 35mm film, deformed, glitch, low contrast, noisy"
177
 
178
  # hyperparamters
179
- prompt_input = data.pop("inputs", "a man")
 
180
  prompt=default_prompt.replace("{prompt}", prompt_input)
181
- num_inference_steps = data.pop("num_inference_steps", 5)
182
- guidance_scale = data.pop("guidance_scale", 1.5)
183
  negative_prompt = data.pop("negative_prompt", default_negative_prompt)
184
 
185
  # 1024px
186
  # face_image_path = data.pop("face_image_path", "https://i.ibb.co/SKg69dD/kaifu-resize.png")
187
  # pose_image_path = data.pop("pose_image_path", "https://i.ibb.co/ZSrQ8ZJ/pose.jpg")
188
  # 512px
189
- face_image_path = "https://i.ibb.co/5Rsrd2d/kaifu-resize-1.png"
190
- pose_image_path = "https://i.ibb.co/9bP9tMb/pose-2-1.jpg"
191
 
 
 
 
192
 
193
  adapter_strength_ratio = 0.8
194
 
@@ -198,6 +201,44 @@ class EndpointHandler():
198
  def convert_from_image_to_cv2(img: Image) -> np.ndarray:
199
  return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  # check if the input is valid
202
  # if face_image_path is None:
203
  # raise gr.Error(
@@ -210,6 +251,7 @@ class EndpointHandler():
210
  # prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
211
 
212
  face_image = load_image(face_image_path)
 
213
  face_image_cv2 = convert_from_image_to_cv2(face_image)
214
  height, width, _ = face_image_cv2.shape
215
 
@@ -233,6 +275,7 @@ class EndpointHandler():
233
  img_controlnet = face_image
234
  if pose_image_path is not None:
235
  pose_image = load_image(pose_image_path)
 
236
  img_controlnet = pose_image
237
  pose_image_cv2 = convert_from_image_to_cv2(pose_image)
238
 
 
45
  face_adapter = f"./checkpoints/ip-adapter.bin"
46
  controlnet_path = f"./checkpoints/ControlNetModel"
47
 
48
+ # transform = Compose([
49
+ # Resize(
50
+ # width=512,
51
+ # height=512,
52
+ # resize_target=False,
53
+ # keep_aspect_ratio=True,
54
+ # ensure_multiple_of=14,
55
+ # resize_method='lower_bound',
56
+ # image_interpolation_method=cv2.INTER_CUBIC,
57
+ # ),
58
+ # NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
59
+ # PrepareForNet(),
60
+ # ])
61
 
62
  self.controlnet_identitynet = ControlNetModel.from_pretrained(
63
  controlnet_path, torch_dtype=dtype
 
73
  feature_extractor=None,
74
  ).to(device)
75
 
 
76
  self.pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(
77
  self.pipe.scheduler.config
78
  )
 
88
 
89
  self.pipe.scheduler = diffusers.LCMScheduler.from_config(self.pipe.scheduler.config)
90
 
 
91
  controlnet_pose_model = "thibaud/controlnet-openpose-sdxl-1.0"
92
  controlnet_canny_model = "diffusers/controlnet-canny-sdxl-1.0"
93
+ # controlnet_depth_model = "diffusers/controlnet-depth-sdxl-1.0-small"
94
 
95
  controlnet_pose = ControlNetModel.from_pretrained(
96
  controlnet_pose_model, torch_dtype=dtype
 
98
  controlnet_canny = ControlNetModel.from_pretrained(
99
  controlnet_canny_model, torch_dtype=dtype
100
  ).to(device)
101
+ # controlnet_depth = ControlNetModel.from_pretrained(
102
+ # controlnet_depth_model, torch_dtype=dtype
103
+ # ).to(device)
104
 
105
  openpose = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
106
+ # depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(device).eval()
107
 
108
  def get_canny_image(image, t1=100, t2=200):
109
  image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
110
  edges = cv2.Canny(image, t1, t2)
111
  return Image.fromarray(edges, "L")
112
 
113
+ # def get_depth_map(image):
114
 
115
+ # image = np.array(image) / 255.0
116
 
117
+ # h, w = image.shape[:2]
118
 
119
+ # image = transform({'image': image})['image']
120
+ # image = torch.from_numpy(image).unsqueeze(0).to("cuda")
121
 
122
+ # with torch.no_grad():
123
+ # depth = depth_anything(image)
124
 
125
+ # depth = F.interpolate(depth[None], (h, w), mode='bilinear', align_corners=False)[0, 0]
126
+ # depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
127
 
128
+ # depth = depth.cpu().numpy().astype(np.uint8)
129
 
130
+ # depth_image = Image.fromarray(depth)
131
 
132
+ # return depth_image
133
 
134
  self.controlnet_map = {
135
  "pose": controlnet_pose,
136
  "canny": controlnet_canny,
137
+ # "depth": controlnet_depth,
138
  }
139
 
140
  self.controlnet_map_fn = {
141
  "pose": openpose,
142
  "canny": get_canny_image,
143
+ # "depth": get_depth_map,
144
  }
145
 
146
  self.app = FaceAnalysis(name="buffalo_l", root="./", providers=["CPUExecutionProvider"])
 
151
  identitynet_strength_ratio = 0.8
152
  pose_strength = 0.4
153
  canny_strength = 0.3
154
+ # depth_strength = 0.5
155
+ self.my_controlnet_selection = ["pose", "canny"]
156
+ # self.my_controlnet_selection = ["pose", "canny", "depth"]
157
 
158
  controlnet_scales = {
159
  "pose": pose_strength,
160
  "canny": canny_strength,
161
+ # "depth": depth_strength,
162
  }
163
 
164
  self.pipe.controlnet = MultiControlNetModel(
 
175
  default_negative_prompt = "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, anime, photorealistic, 35mm film, deformed, glitch, low contrast, noisy"
176
 
177
  # hyperparamters
178
+ # prompt_input = data.pop("inputs", "a man")
179
+ prompt_input = "a man"
180
  prompt=default_prompt.replace("{prompt}", prompt_input)
181
+ num_inference_steps = data.pop("num_inference_steps", 20)
182
+ guidance_scale = data.pop("guidance_scale", 5.0)
183
  negative_prompt = data.pop("negative_prompt", default_negative_prompt)
184
 
185
  # 1024px
186
  # face_image_path = data.pop("face_image_path", "https://i.ibb.co/SKg69dD/kaifu-resize.png")
187
  # pose_image_path = data.pop("pose_image_path", "https://i.ibb.co/ZSrQ8ZJ/pose.jpg")
188
  # 512px
189
+ # face_image_path = "https://i.ibb.co/5Rsrd2d/kaifu-resize-1.png"
190
+ # pose_image_path = "https://i.ibb.co/9bP9tMb/pose-2-1.jpg"
191
 
192
+ # original
193
+ face_image_path = "https://i.ibb.co/GQzm527/examples-musk-resize.jpg"
194
+ pose_image_path = "https://i.ibb.co/ZSrQ8ZJ/pose.jpg"
195
 
196
  adapter_strength_ratio = 0.8
197
 
 
201
  def convert_from_image_to_cv2(img: Image) -> np.ndarray:
202
  return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
203
 
204
+ def resize_img(
205
+ input_image,
206
+ max_side=1280,
207
+ min_side=1024,
208
+ size=None,
209
+ pad_to_max_side=False,
210
+ mode=Image.BILINEAR,
211
+ base_pixel_number=64,
212
+ ):
213
+ if size is not None:
214
+ w_resize_new, h_resize_new = size
215
+ else:
216
+ w, h = input_image.size
217
+ # Calcular el redimensionamiento con un solo paso
218
+ ratio_min = min_side / min(w, h)
219
+ w_min, h_min = round(ratio_min * w), round(ratio_min * h)
220
+ ratio_max = max_side / max(w_min, h_min)
221
+ # Aplicar la menor de las dos ratios para asegurar que cumple ambas condiciones
222
+ final_ratio = min(ratio_min, ratio_max)
223
+ w_final, h_final = round(final_ratio * w), round(final_ratio * h)
224
+
225
+ # Ajustar al número base de píxeles más cercano
226
+ w_resize_new = (w_final // base_pixel_number) * base_pixel_number
227
+ h_resize_new = (h_final // base_pixel_number) * base_pixel_number
228
+
229
+ # Redimensionar una sola vez
230
+ input_image = input_image.resize([w_resize_new, h_resize_new], mode)
231
+
232
+ if pad_to_max_side:
233
+ # Optimizar la creación del fondo
234
+ res = Image.new("RGB", (max_side, max_side), (255, 255, 255))
235
+ offset_x = (max_side - w_resize_new) // 2
236
+ offset_y = (max_side - h_resize_new) // 2
237
+ res.paste(input_image, (offset_x, offset_y))
238
+ return res
239
+
240
+ return input_image
241
+
242
  # check if the input is valid
243
  # if face_image_path is None:
244
  # raise gr.Error(
 
251
  # prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
252
 
253
  face_image = load_image(face_image_path)
254
+ face_image = resize_img(face_image, max_side=1024)
255
  face_image_cv2 = convert_from_image_to_cv2(face_image)
256
  height, width, _ = face_image_cv2.shape
257
 
 
275
  img_controlnet = face_image
276
  if pose_image_path is not None:
277
  pose_image = load_image(pose_image_path)
278
+ pose_image = resize_img(pose_image, max_side=1024)
279
  img_controlnet = pose_image
280
  pose_image_cv2 = convert_from_image_to_cv2(pose_image)
281
 
pipeline_stable_diffusion_xl_instantid_full.py CHANGED
@@ -61,10 +61,9 @@ EXAMPLE_DOC_STRING = """
61
  >>> from PIL import Image
62
 
63
  >>> from insightface.app import FaceAnalysis
64
-
65
  >>> from pipeline_stable_diffusion_xl_instantid import StableDiffusionXLInstantIDPipeline, draw_kps
66
  >>> # download 'antelopev2' under ./models
67
- = >>> app = FaceAnalysis(name='antelopev2')
68
  >>> app.prepare(ctx_id=0, det_size=(640, 640))
69
 
70
  >>> # download models under ./checkpoints
@@ -469,33 +468,34 @@ class LongPromptWeight(object):
469
  prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
470
  return prompt_embeds
471
 
472
- def draw_kps_optimized(image_pil, kps, color_list=[(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255)]):
473
- stickwidth = 4
474
- limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
475
- kps = np.array(kps)
476
-
477
- w, h = image_pil.size
478
- out_img = np.zeros([h, w, 3], dtype=np.uint8)
479
-
480
- for i, (start, end) in enumerate(limbSeq):
481
- color = color_list[i % len(color_list)]
482
-
483
- x = kps[[start, end], 0]
484
- y = kps[[start, end], 1]
485
-
486
- center = tuple(np.round(np.mean([x, y], axis=1)).astype(int))
487
- length = int(np.hypot(x[0] - x[1], y[0] - y[1]) / 2)
488
- angle = int(np.degrees(np.arctan2(y[0] - y[1], x[0] - x[1])))
489
-
490
- polygon = cv2.ellipse2Poly(center, (length, stickwidth), angle, 0, 360, 1)
491
- cv2.fillConvexPoly(out_img, polygon, color)
492
-
493
- for idx_kp, (x, y) in enumerate(kps):
494
- color = color_list[idx_kp % len(color_list)]
495
- cv2.circle(out_img, (int(x), int(y)), 10, color, thickness=-1)
496
-
497
- out_img_pil = PIL.Image.fromarray(out_img)
498
- return out_img_pil
 
499
 
500
  class StableDiffusionXLInstantIDPipeline(StableDiffusionXLControlNetPipeline):
501
 
@@ -526,7 +526,7 @@ class StableDiffusionXLInstantIDPipeline(StableDiffusionXLControlNetPipeline):
526
  def set_image_proj_model(self, model_ckpt, image_emb_dim=512, num_tokens=16):
527
 
528
  image_proj_model = Resampler(
529
- dim=512,
530
  depth=4,
531
  dim_head=64,
532
  heads=20,
 
61
  >>> from PIL import Image
62
 
63
  >>> from insightface.app import FaceAnalysis
 
64
  >>> from pipeline_stable_diffusion_xl_instantid import StableDiffusionXLInstantIDPipeline, draw_kps
65
  >>> # download 'antelopev2' under ./models
66
+ >>> app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
67
  >>> app.prepare(ctx_id=0, det_size=(640, 640))
68
 
69
  >>> # download models under ./checkpoints
 
468
  prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
469
  return prompt_embeds
470
 
471
+ def draw_kps(image_pil, kps, color_list=[(255,0,0), (0,255,0), (0,0,255), (255,255,0), (255,0,255)]):
472
+
473
+ stickwidth = 4
474
+ limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
475
+ kps = np.array(kps)
476
+
477
+ w, h = image_pil.size
478
+ out_img = np.zeros([h, w, 3])
479
+
480
+ for i in range(len(limbSeq)):
481
+ index = limbSeq[i]
482
+ color = color_list[index[0]]
483
+
484
+ x = kps[index][:, 0]
485
+ y = kps[index][:, 1]
486
+ length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5
487
+ angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
488
+ polygon = cv2.ellipse2Poly((int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
489
+ out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
490
+ out_img = (out_img * 0.6).astype(np.uint8)
491
+
492
+ for idx_kp, kp in enumerate(kps):
493
+ color = color_list[idx_kp]
494
+ x, y = kp
495
+ out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)
496
+
497
+ out_img_pil = PIL.Image.fromarray(out_img.astype(np.uint8))
498
+ return out_img_pil
499
 
500
  class StableDiffusionXLInstantIDPipeline(StableDiffusionXLControlNetPipeline):
501
 
 
526
  def set_image_proj_model(self, model_ckpt, image_emb_dim=512, num_tokens=16):
527
 
528
  image_proj_model = Resampler(
529
+ dim=1280,
530
  depth=4,
531
  dim_head=64,
532
  heads=20,