Spaces:
Sleeping
Sleeping
CI: deploy Docker/PDM Space
Browse files- services/app_service/deim_model.py +61 -64
- services/app_service/models/model_deimhgnetV2m_cpu_v0.json +2 -2
- services/app_service/models/model_deimhgnetV2m_cpu_v2.json +2 -2
- services/app_service/models/model_deimhgnetV2m_cpu_v3.json +3 -0
- services/app_service/models/model_deimhgnetV2m_cpu_v3.pt +3 -0
- services/app_service/models/model_deimhgnetV2m_cuda_v2.json +2 -2
- services/app_service/models/model_deimhgnetV2m_cuda_v3.json +3 -0
- services/app_service/models/model_deimhgnetV2m_cuda_v3.pt +3 -0
services/app_service/deim_model.py
CHANGED
|
@@ -92,6 +92,8 @@ class DeimHgnetV2MDrone(BaseModel):
|
|
| 92 |
int(self.cfg["target_size"][0]),
|
| 93 |
int(self.cfg["target_size"][1]),
|
| 94 |
)
|
|
|
|
|
|
|
| 95 |
print(f"Loading model from: {weights_path}")
|
| 96 |
print(f"Model device: {self.device}")
|
| 97 |
self.model = torch.jit.load(weights_path, map_location=self.device).eval()
|
|
@@ -106,15 +108,13 @@ class DeimHgnetV2MDrone(BaseModel):
|
|
| 106 |
)
|
| 107 |
return transforms(image).unsqueeze(0).to(self.device)
|
| 108 |
|
| 109 |
-
def _postprocess_detections(
|
| 110 |
-
self, scores, bboxes, min_confidence: float, wh: Tuple[int, int]
|
| 111 |
-
):
|
| 112 |
w, h = wh
|
| 113 |
b_np = bboxes[0].cpu().numpy()
|
| 114 |
s_np = scores.sigmoid()[0].cpu().numpy()
|
| 115 |
-
mask = (s_np >= min_confidence).squeeze()
|
| 116 |
if not mask.any():
|
| 117 |
-
return np.zeros((0,
|
| 118 |
valid = b_np[mask]
|
| 119 |
cx, cy, box_w, box_h = valid[:, 0], valid[:, 1], valid[:, 2], valid[:, 3]
|
| 120 |
x1 = cx - box_w / 2
|
|
@@ -122,11 +122,30 @@ class DeimHgnetV2MDrone(BaseModel):
|
|
| 122 |
x2 = cx + box_w / 2
|
| 123 |
y2 = cy + box_h / 2
|
| 124 |
valid_xyxy = np.stack([x1, y1, x2, y2], axis=1) * [w, h, w, h]
|
| 125 |
-
return np.concatenate([
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
def _nms(self, dets):
|
| 128 |
if dets.shape[0] == 0 or self.cfg["nms_iou_thr"] <= 0:
|
| 129 |
return dets
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
x1 = dets[:, 0]
|
| 131 |
y1 = dets[:, 1]
|
| 132 |
x2 = dets[:, 2]
|
|
@@ -134,7 +153,6 @@ class DeimHgnetV2MDrone(BaseModel):
|
|
| 134 |
scores = dets[:, 4]
|
| 135 |
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
|
| 136 |
order = scores.argsort()[::-1]
|
| 137 |
-
|
| 138 |
keep = []
|
| 139 |
while order.size > 0:
|
| 140 |
i = order[0]
|
|
@@ -149,16 +167,20 @@ class DeimHgnetV2MDrone(BaseModel):
|
|
| 149 |
iou = inter / (areas[i] + areas[order[1:]] - inter)
|
| 150 |
inds = np.where(iou <= self.cfg["nms_iou_thr"])[0]
|
| 151 |
order = order[inds + 1]
|
| 152 |
-
return
|
| 153 |
|
| 154 |
def _draw_detections_on_np(
|
| 155 |
self, image_np: np.ndarray, dets: np.ndarray
|
| 156 |
) -> np.ndarray:
|
| 157 |
for bbox in dets:
|
| 158 |
-
x1, y1, x2, y2, confidence = bbox
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
|
| 160 |
cv2.rectangle(image_np, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
| 161 |
-
label = f"{confidence:.2f}"
|
| 162 |
label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[0]
|
| 163 |
cv2.rectangle(
|
| 164 |
image_np,
|
|
@@ -247,66 +269,41 @@ class DeimHgnetV2MDrone(BaseModel):
|
|
| 247 |
"Could not initialize video writer with MJPG or XVID codec"
|
| 248 |
)
|
| 249 |
|
| 250 |
-
print(f"DEIM Model:
|
| 251 |
-
print(
|
| 252 |
-
f"DEIM Model: Processing video {input_p.name} ({width}x{height}, {fps:.1f} FPS)"
|
| 253 |
-
)
|
| 254 |
print(f"DEIM Model: Output will be saved to {out_path}")
|
| 255 |
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
break
|
| 262 |
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
# Draw detections on frame
|
| 277 |
-
annotated_frame = self._draw_detections_on_np(
|
| 278 |
-
frame_bgr.copy(), dets
|
| 279 |
-
)
|
| 280 |
-
writer.write(annotated_frame)
|
| 281 |
-
frame_count += 1
|
| 282 |
-
|
| 283 |
-
# Log progress every 50 frames
|
| 284 |
-
if frame_count % 50 == 0:
|
| 285 |
-
print(f"DEIM Model: Processed {frame_count} frames...")
|
| 286 |
-
|
| 287 |
-
except Exception as e:
|
| 288 |
-
print(f"DEIM Model: Error processing frame {frame_count}: {e}")
|
| 289 |
-
# Write original frame if processing fails
|
| 290 |
-
writer.write(frame_bgr)
|
| 291 |
-
frame_count += 1
|
| 292 |
-
continue
|
| 293 |
-
|
| 294 |
-
except Exception as e:
|
| 295 |
-
print(f"DEIM Model: Error during video processing: {e}")
|
| 296 |
-
raise
|
| 297 |
-
finally:
|
| 298 |
-
cap.release()
|
| 299 |
-
if writer is not None:
|
| 300 |
-
writer.release()
|
| 301 |
-
print(
|
| 302 |
-
f"DEIM Model: Video processing completed. Processed {frame_count} frames."
|
| 303 |
)
|
| 304 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
|
| 306 |
return str(out_path)
|
| 307 |
|
| 308 |
|
| 309 |
# if __name__ == "__main__":
|
| 310 |
-
# model = DeimHgnetV2MDrone(version="
|
| 311 |
-
# output_image = model.predict_video("./resources/videos/raw/
|
| 312 |
-
# output_image.show()
|
|
|
|
| 92 |
int(self.cfg["target_size"][0]),
|
| 93 |
int(self.cfg["target_size"][1]),
|
| 94 |
)
|
| 95 |
+
self._categories = self.cfg["categories"]
|
| 96 |
+
self._confs_by_categories = self.cfg["confs_by_categories"]
|
| 97 |
print(f"Loading model from: {weights_path}")
|
| 98 |
print(f"Model device: {self.device}")
|
| 99 |
self.model = torch.jit.load(weights_path, map_location=self.device).eval()
|
|
|
|
| 108 |
)
|
| 109 |
return transforms(image).unsqueeze(0).to(self.device)
|
| 110 |
|
| 111 |
+
def _postprocess_detections(self, scores, bboxes, min_confidence: float, wh: Tuple[int, int]):
|
|
|
|
|
|
|
| 112 |
w, h = wh
|
| 113 |
b_np = bboxes[0].cpu().numpy()
|
| 114 |
s_np = scores.sigmoid()[0].cpu().numpy()
|
| 115 |
+
mask = (s_np.max(axis=1) >= min_confidence).squeeze()
|
| 116 |
if not mask.any():
|
| 117 |
+
return np.zeros((0, 6), dtype=np.float32)
|
| 118 |
valid = b_np[mask]
|
| 119 |
cx, cy, box_w, box_h = valid[:, 0], valid[:, 1], valid[:, 2], valid[:, 3]
|
| 120 |
x1 = cx - box_w / 2
|
|
|
|
| 122 |
x2 = cx + box_w / 2
|
| 123 |
y2 = cy + box_h / 2
|
| 124 |
valid_xyxy = np.stack([x1, y1, x2, y2], axis=1) * [w, h, w, h]
|
| 125 |
+
return np.concatenate([
|
| 126 |
+
valid_xyxy,
|
| 127 |
+
s_np[mask].max(axis=1, keepdims=True),
|
| 128 |
+
s_np[mask].argmax(axis=1, keepdims=True)
|
| 129 |
+
], axis=1)
|
| 130 |
|
| 131 |
def _nms(self, dets):
|
| 132 |
if dets.shape[0] == 0 or self.cfg["nms_iou_thr"] <= 0:
|
| 133 |
return dets
|
| 134 |
+
class_ids = np.unique(dets[:, 5].astype(int))
|
| 135 |
+
keep_all = []
|
| 136 |
+
for class_id in class_ids:
|
| 137 |
+
class_mask = dets[:, 5] == class_id
|
| 138 |
+
class_dets = dets[class_mask]
|
| 139 |
+
if class_dets.shape[0] == 0:
|
| 140 |
+
continue
|
| 141 |
+
class_keep = self._nms_single_class(class_dets)
|
| 142 |
+
original_indices = np.where(class_mask)[0]
|
| 143 |
+
keep_all.extend(original_indices[class_keep])
|
| 144 |
+
return dets[keep_all] if keep_all else np.zeros((0, 6), dtype=np.float32)
|
| 145 |
+
|
| 146 |
+
def _nms_single_class(self, dets):
|
| 147 |
+
if dets.shape[0] == 0:
|
| 148 |
+
return []
|
| 149 |
x1 = dets[:, 0]
|
| 150 |
y1 = dets[:, 1]
|
| 151 |
x2 = dets[:, 2]
|
|
|
|
| 153 |
scores = dets[:, 4]
|
| 154 |
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
|
| 155 |
order = scores.argsort()[::-1]
|
|
|
|
| 156 |
keep = []
|
| 157 |
while order.size > 0:
|
| 158 |
i = order[0]
|
|
|
|
| 167 |
iou = inter / (areas[i] + areas[order[1:]] - inter)
|
| 168 |
inds = np.where(iou <= self.cfg["nms_iou_thr"])[0]
|
| 169 |
order = order[inds + 1]
|
| 170 |
+
return keep
|
| 171 |
|
| 172 |
def _draw_detections_on_np(
|
| 173 |
self, image_np: np.ndarray, dets: np.ndarray
|
| 174 |
) -> np.ndarray:
|
| 175 |
for bbox in dets:
|
| 176 |
+
x1, y1, x2, y2, confidence, category_id = bbox
|
| 177 |
+
category_name = self._categories[int(category_id)]
|
| 178 |
+
conf_by_this_cat = self._confs_by_categories.get(category_name, 0.0)
|
| 179 |
+
if confidence < conf_by_this_cat:
|
| 180 |
+
continue
|
| 181 |
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
|
| 182 |
cv2.rectangle(image_np, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
| 183 |
+
label = f"{category_name} {confidence:.2f}"
|
| 184 |
label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[0]
|
| 185 |
cv2.rectangle(
|
| 186 |
image_np,
|
|
|
|
| 269 |
"Could not initialize video writer with MJPG or XVID codec"
|
| 270 |
)
|
| 271 |
|
| 272 |
+
print(f"DEIM Model: Processing video {input_p.name} ({width}x{height}, {fps:.1f} FPS)")
|
|
|
|
|
|
|
|
|
|
| 273 |
print(f"DEIM Model: Output will be saved to {out_path}")
|
| 274 |
|
| 275 |
+
frame_count = 0
|
| 276 |
+
while True:
|
| 277 |
+
ret, frame_bgr = cap.read()
|
| 278 |
+
if not ret:
|
| 279 |
+
break
|
|
|
|
| 280 |
|
| 281 |
+
frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
|
| 282 |
+
pil_img = Image.fromarray(frame_rgb)
|
| 283 |
+
|
| 284 |
+
tensor = self._preprocess_image(pil_img.copy())
|
| 285 |
+
with torch.no_grad():
|
| 286 |
+
labels, bboxes = self.model(tensor)
|
| 287 |
+
dets = self._postprocess_detections(
|
| 288 |
+
labels, bboxes, min_confidence, (width, height)
|
| 289 |
+
)
|
| 290 |
+
dets = self._nms(dets)
|
| 291 |
+
|
| 292 |
+
annotated_frame = self._draw_detections_on_np(
|
| 293 |
+
frame_bgr.copy(), dets
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
)
|
| 295 |
+
writer.write(annotated_frame)
|
| 296 |
+
frame_count += 1
|
| 297 |
+
|
| 298 |
+
print(f"processed {frame_count} frames...")
|
| 299 |
+
|
| 300 |
+
cap.release()
|
| 301 |
+
if writer is not None:
|
| 302 |
+
writer.release()
|
| 303 |
|
| 304 |
return str(out_path)
|
| 305 |
|
| 306 |
|
| 307 |
# if __name__ == "__main__":
|
| 308 |
+
# model = DeimHgnetV2MDrone(version="v3", device="cpu")
|
| 309 |
+
# output_image = model.predict_video("./resources/videos/raw/sample2.mp4", 0.3)
|
|
|
services/app_service/models/model_deimhgnetV2m_cpu_v0.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5fe7455827286dc1b471e6a2049391a80b9e0ee2aa4523667e526789d18a6016
|
| 3 |
+
size 112
|
services/app_service/models/model_deimhgnetV2m_cpu_v2.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5fe7455827286dc1b471e6a2049391a80b9e0ee2aa4523667e526789d18a6016
|
| 3 |
+
size 112
|
services/app_service/models/model_deimhgnetV2m_cpu_v3.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01cbfda05bb2cb62606e488bf7ab48de415cf4cd28d101378c776b1c7f585fb8
|
| 3 |
+
size 140
|
services/app_service/models/model_deimhgnetV2m_cpu_v3.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2428ca22799a109a1313eeeb37df44a6a818a3425aa5496cf0f37a11a7dcf535
|
| 3 |
+
size 78257123
|
services/app_service/models/model_deimhgnetV2m_cuda_v2.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5fe7455827286dc1b471e6a2049391a80b9e0ee2aa4523667e526789d18a6016
|
| 3 |
+
size 112
|
services/app_service/models/model_deimhgnetV2m_cuda_v3.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01cbfda05bb2cb62606e488bf7ab48de415cf4cd28d101378c776b1c7f585fb8
|
| 3 |
+
size 140
|
services/app_service/models/model_deimhgnetV2m_cuda_v3.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4550ec38ef2b13deae9aadd9388aabf243366749ed8e29f65f4e0c3e24264c8
|
| 3 |
+
size 78262918
|