lucid-hf commited on
Commit
23a9434
·
verified ·
1 Parent(s): 7735c3d

CI: deploy Docker/PDM Space

Browse files
services/app_service/deim_model.py CHANGED
@@ -92,6 +92,8 @@ class DeimHgnetV2MDrone(BaseModel):
92
  int(self.cfg["target_size"][0]),
93
  int(self.cfg["target_size"][1]),
94
  )
 
 
95
  print(f"Loading model from: {weights_path}")
96
  print(f"Model device: {self.device}")
97
  self.model = torch.jit.load(weights_path, map_location=self.device).eval()
@@ -106,15 +108,13 @@ class DeimHgnetV2MDrone(BaseModel):
106
  )
107
  return transforms(image).unsqueeze(0).to(self.device)
108
 
109
- def _postprocess_detections(
110
- self, scores, bboxes, min_confidence: float, wh: Tuple[int, int]
111
- ):
112
  w, h = wh
113
  b_np = bboxes[0].cpu().numpy()
114
  s_np = scores.sigmoid()[0].cpu().numpy()
115
- mask = (s_np >= min_confidence).squeeze()
116
  if not mask.any():
117
- return np.zeros((0, 5), dtype=np.float32)
118
  valid = b_np[mask]
119
  cx, cy, box_w, box_h = valid[:, 0], valid[:, 1], valid[:, 2], valid[:, 3]
120
  x1 = cx - box_w / 2
@@ -122,11 +122,30 @@ class DeimHgnetV2MDrone(BaseModel):
122
  x2 = cx + box_w / 2
123
  y2 = cy + box_h / 2
124
  valid_xyxy = np.stack([x1, y1, x2, y2], axis=1) * [w, h, w, h]
125
- return np.concatenate([valid_xyxy, s_np[mask]], axis=1)
 
 
 
 
126
 
127
  def _nms(self, dets):
128
  if dets.shape[0] == 0 or self.cfg["nms_iou_thr"] <= 0:
129
  return dets
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  x1 = dets[:, 0]
131
  y1 = dets[:, 1]
132
  x2 = dets[:, 2]
@@ -134,7 +153,6 @@ class DeimHgnetV2MDrone(BaseModel):
134
  scores = dets[:, 4]
135
  areas = (x2 - x1 + 1) * (y2 - y1 + 1)
136
  order = scores.argsort()[::-1]
137
-
138
  keep = []
139
  while order.size > 0:
140
  i = order[0]
@@ -149,16 +167,20 @@ class DeimHgnetV2MDrone(BaseModel):
149
  iou = inter / (areas[i] + areas[order[1:]] - inter)
150
  inds = np.where(iou <= self.cfg["nms_iou_thr"])[0]
151
  order = order[inds + 1]
152
- return dets[keep]
153
 
154
  def _draw_detections_on_np(
155
  self, image_np: np.ndarray, dets: np.ndarray
156
  ) -> np.ndarray:
157
  for bbox in dets:
158
- x1, y1, x2, y2, confidence = bbox
 
 
 
 
159
  x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
160
  cv2.rectangle(image_np, (x1, y1), (x2, y2), (0, 255, 0), 2)
161
- label = f"{confidence:.2f}"
162
  label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[0]
163
  cv2.rectangle(
164
  image_np,
@@ -247,66 +269,41 @@ class DeimHgnetV2MDrone(BaseModel):
247
  "Could not initialize video writer with MJPG or XVID codec"
248
  )
249
 
250
- print(f"DEIM Model: Successfully initialized video writer with codec: {fourcc}")
251
- print(
252
- f"DEIM Model: Processing video {input_p.name} ({width}x{height}, {fps:.1f} FPS)"
253
- )
254
  print(f"DEIM Model: Output will be saved to {out_path}")
255
 
256
- try:
257
- frame_count = 0
258
- while True:
259
- ret, frame_bgr = cap.read()
260
- if not ret:
261
- break
262
 
263
- try:
264
- # Process frame for detection
265
- frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
266
- pil_img = Image.fromarray(frame_rgb)
267
-
268
- tensor = self._preprocess_image(pil_img.copy())
269
- with torch.no_grad():
270
- labels, bboxes = self.model(tensor)
271
- dets = self._postprocess_detections(
272
- labels, bboxes, min_confidence, (width, height)
273
- )
274
- dets = self._nms(dets)
275
-
276
- # Draw detections on frame
277
- annotated_frame = self._draw_detections_on_np(
278
- frame_bgr.copy(), dets
279
- )
280
- writer.write(annotated_frame)
281
- frame_count += 1
282
-
283
- # Log progress every 50 frames
284
- if frame_count % 50 == 0:
285
- print(f"DEIM Model: Processed {frame_count} frames...")
286
-
287
- except Exception as e:
288
- print(f"DEIM Model: Error processing frame {frame_count}: {e}")
289
- # Write original frame if processing fails
290
- writer.write(frame_bgr)
291
- frame_count += 1
292
- continue
293
-
294
- except Exception as e:
295
- print(f"DEIM Model: Error during video processing: {e}")
296
- raise
297
- finally:
298
- cap.release()
299
- if writer is not None:
300
- writer.release()
301
- print(
302
- f"DEIM Model: Video processing completed. Processed {frame_count} frames."
303
  )
304
- print(f"DEIM Model: Results saved to {out_path}")
 
 
 
 
 
 
 
305
 
306
  return str(out_path)
307
 
308
 
309
  # if __name__ == "__main__":
310
- # model = DeimHgnetV2MDrone(version="v2")
311
- # output_image = model.predict_video("./resources/videos/raw/sample1.mp4", 0.3)
312
- # output_image.show()
 
92
  int(self.cfg["target_size"][0]),
93
  int(self.cfg["target_size"][1]),
94
  )
95
+ self._categories = self.cfg["categories"]
96
+ self._confs_by_categories = self.cfg["confs_by_categories"]
97
  print(f"Loading model from: {weights_path}")
98
  print(f"Model device: {self.device}")
99
  self.model = torch.jit.load(weights_path, map_location=self.device).eval()
 
108
  )
109
  return transforms(image).unsqueeze(0).to(self.device)
110
 
111
+ def _postprocess_detections(self, scores, bboxes, min_confidence: float, wh: Tuple[int, int]):
 
 
112
  w, h = wh
113
  b_np = bboxes[0].cpu().numpy()
114
  s_np = scores.sigmoid()[0].cpu().numpy()
115
+ mask = (s_np.max(axis=1) >= min_confidence).squeeze()
116
  if not mask.any():
117
+ return np.zeros((0, 6), dtype=np.float32)
118
  valid = b_np[mask]
119
  cx, cy, box_w, box_h = valid[:, 0], valid[:, 1], valid[:, 2], valid[:, 3]
120
  x1 = cx - box_w / 2
 
122
  x2 = cx + box_w / 2
123
  y2 = cy + box_h / 2
124
  valid_xyxy = np.stack([x1, y1, x2, y2], axis=1) * [w, h, w, h]
125
+ return np.concatenate([
126
+ valid_xyxy,
127
+ s_np[mask].max(axis=1, keepdims=True),
128
+ s_np[mask].argmax(axis=1, keepdims=True)
129
+ ], axis=1)
130
 
131
  def _nms(self, dets):
132
  if dets.shape[0] == 0 or self.cfg["nms_iou_thr"] <= 0:
133
  return dets
134
+ class_ids = np.unique(dets[:, 5].astype(int))
135
+ keep_all = []
136
+ for class_id in class_ids:
137
+ class_mask = dets[:, 5] == class_id
138
+ class_dets = dets[class_mask]
139
+ if class_dets.shape[0] == 0:
140
+ continue
141
+ class_keep = self._nms_single_class(class_dets)
142
+ original_indices = np.where(class_mask)[0]
143
+ keep_all.extend(original_indices[class_keep])
144
+ return dets[keep_all] if keep_all else np.zeros((0, 6), dtype=np.float32)
145
+
146
+ def _nms_single_class(self, dets):
147
+ if dets.shape[0] == 0:
148
+ return []
149
  x1 = dets[:, 0]
150
  y1 = dets[:, 1]
151
  x2 = dets[:, 2]
 
153
  scores = dets[:, 4]
154
  areas = (x2 - x1 + 1) * (y2 - y1 + 1)
155
  order = scores.argsort()[::-1]
 
156
  keep = []
157
  while order.size > 0:
158
  i = order[0]
 
167
  iou = inter / (areas[i] + areas[order[1:]] - inter)
168
  inds = np.where(iou <= self.cfg["nms_iou_thr"])[0]
169
  order = order[inds + 1]
170
+ return keep
171
 
172
  def _draw_detections_on_np(
173
  self, image_np: np.ndarray, dets: np.ndarray
174
  ) -> np.ndarray:
175
  for bbox in dets:
176
+ x1, y1, x2, y2, confidence, category_id = bbox
177
+ category_name = self._categories[int(category_id)]
178
+ conf_by_this_cat = self._confs_by_categories.get(category_name, 0.0)
179
+ if confidence < conf_by_this_cat:
180
+ continue
181
  x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
182
  cv2.rectangle(image_np, (x1, y1), (x2, y2), (0, 255, 0), 2)
183
+ label = f"{category_name} {confidence:.2f}"
184
  label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[0]
185
  cv2.rectangle(
186
  image_np,
 
269
  "Could not initialize video writer with MJPG or XVID codec"
270
  )
271
 
272
+ print(f"DEIM Model: Processing video {input_p.name} ({width}x{height}, {fps:.1f} FPS)")
 
 
 
273
  print(f"DEIM Model: Output will be saved to {out_path}")
274
 
275
+ frame_count = 0
276
+ while True:
277
+ ret, frame_bgr = cap.read()
278
+ if not ret:
279
+ break
 
280
 
281
+ frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
282
+ pil_img = Image.fromarray(frame_rgb)
283
+
284
+ tensor = self._preprocess_image(pil_img.copy())
285
+ with torch.no_grad():
286
+ labels, bboxes = self.model(tensor)
287
+ dets = self._postprocess_detections(
288
+ labels, bboxes, min_confidence, (width, height)
289
+ )
290
+ dets = self._nms(dets)
291
+
292
+ annotated_frame = self._draw_detections_on_np(
293
+ frame_bgr.copy(), dets
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
  )
295
+ writer.write(annotated_frame)
296
+ frame_count += 1
297
+
298
+ print(f"processed {frame_count} frames...")
299
+
300
+ cap.release()
301
+ if writer is not None:
302
+ writer.release()
303
 
304
  return str(out_path)
305
 
306
 
307
  # if __name__ == "__main__":
308
+ # model = DeimHgnetV2MDrone(version="v3", device="cpu")
309
+ # output_image = model.predict_video("./resources/videos/raw/sample2.mp4", 0.3)
 
services/app_service/models/model_deimhgnetV2m_cpu_v0.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7923e3e65801dad3cc4442f7395b3c551b9ef756ac73cb382ed071ba8bb9205
3
- size 55
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fe7455827286dc1b471e6a2049391a80b9e0ee2aa4523667e526789d18a6016
3
+ size 112
services/app_service/models/model_deimhgnetV2m_cpu_v2.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7923e3e65801dad3cc4442f7395b3c551b9ef756ac73cb382ed071ba8bb9205
3
- size 55
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fe7455827286dc1b471e6a2049391a80b9e0ee2aa4523667e526789d18a6016
3
+ size 112
services/app_service/models/model_deimhgnetV2m_cpu_v3.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01cbfda05bb2cb62606e488bf7ab48de415cf4cd28d101378c776b1c7f585fb8
3
+ size 140
services/app_service/models/model_deimhgnetV2m_cpu_v3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2428ca22799a109a1313eeeb37df44a6a818a3425aa5496cf0f37a11a7dcf535
3
+ size 78257123
services/app_service/models/model_deimhgnetV2m_cuda_v2.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7923e3e65801dad3cc4442f7395b3c551b9ef756ac73cb382ed071ba8bb9205
3
- size 55
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fe7455827286dc1b471e6a2049391a80b9e0ee2aa4523667e526789d18a6016
3
+ size 112
services/app_service/models/model_deimhgnetV2m_cuda_v3.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01cbfda05bb2cb62606e488bf7ab48de415cf4cd28d101378c776b1c7f585fb8
3
+ size 140
services/app_service/models/model_deimhgnetV2m_cuda_v3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4550ec38ef2b13deae9aadd9388aabf243366749ed8e29f65f4e0c3e24264c8
3
+ size 78262918