Spaces:

lucid-hf
/

lucid-natsar-dev

Sleeping

App Files Files Community

lucid-hf commited on Nov 4

Commit

23a9434

verified ·

1 Parent(s): 7735c3d

CI: deploy Docker/PDM Space

Browse files

Files changed (8) hide show

services/app_service/deim_model.py +61 -64
services/app_service/models/model_deimhgnetV2m_cpu_v0.json +2 -2
services/app_service/models/model_deimhgnetV2m_cpu_v2.json +2 -2
services/app_service/models/model_deimhgnetV2m_cpu_v3.json +3 -0
services/app_service/models/model_deimhgnetV2m_cpu_v3.pt +3 -0
services/app_service/models/model_deimhgnetV2m_cuda_v2.json +2 -2
services/app_service/models/model_deimhgnetV2m_cuda_v3.json +3 -0
services/app_service/models/model_deimhgnetV2m_cuda_v3.pt +3 -0

services/app_service/deim_model.py CHANGED Viewed

@@ -92,6 +92,8 @@ class DeimHgnetV2MDrone(BaseModel):
             int(self.cfg["target_size"][0]),
             int(self.cfg["target_size"][1]),
         )
         print(f"Loading model from: {weights_path}")
         print(f"Model device: {self.device}")
         self.model = torch.jit.load(weights_path, map_location=self.device).eval()
@@ -106,15 +108,13 @@ class DeimHgnetV2MDrone(BaseModel):
         )
         return transforms(image).unsqueeze(0).to(self.device)
-    def _postprocess_detections(
-        self, scores, bboxes, min_confidence: float, wh: Tuple[int, int]
-    ):
         w, h = wh
         b_np = bboxes[0].cpu().numpy()
         s_np = scores.sigmoid()[0].cpu().numpy()
-        mask = (s_np >= min_confidence).squeeze()
         if not mask.any():
-            return np.zeros((0, 5), dtype=np.float32)
         valid = b_np[mask]
         cx, cy, box_w, box_h = valid[:, 0], valid[:, 1], valid[:, 2], valid[:, 3]
         x1 = cx - box_w / 2
@@ -122,11 +122,30 @@ class DeimHgnetV2MDrone(BaseModel):
         x2 = cx + box_w / 2
         y2 = cy + box_h / 2
         valid_xyxy = np.stack([x1, y1, x2, y2], axis=1) * [w, h, w, h]
-        return np.concatenate([valid_xyxy, s_np[mask]], axis=1)
     def _nms(self, dets):
         if dets.shape[0] == 0 or self.cfg["nms_iou_thr"] <= 0:
             return dets
         x1 = dets[:, 0]
         y1 = dets[:, 1]
         x2 = dets[:, 2]
@@ -134,7 +153,6 @@ class DeimHgnetV2MDrone(BaseModel):
         scores = dets[:, 4]
         areas = (x2 - x1 + 1) * (y2 - y1 + 1)
         order = scores.argsort()[::-1]
         keep = []
         while order.size > 0:
             i = order[0]
@@ -149,16 +167,20 @@ class DeimHgnetV2MDrone(BaseModel):
             iou = inter / (areas[i] + areas[order[1:]] - inter)
             inds = np.where(iou <= self.cfg["nms_iou_thr"])[0]
             order = order[inds + 1]
-        return dets[keep]
     def _draw_detections_on_np(
         self, image_np: np.ndarray, dets: np.ndarray
     ) -> np.ndarray:
         for bbox in dets:
-            x1, y1, x2, y2, confidence = bbox
             x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
             cv2.rectangle(image_np, (x1, y1), (x2, y2), (0, 255, 0), 2)
-            label = f"{confidence:.2f}"
             label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[0]
             cv2.rectangle(
                 image_np,
@@ -247,66 +269,41 @@ class DeimHgnetV2MDrone(BaseModel):
                 "Could not initialize video writer with MJPG or XVID codec"
             )
-        print(f"DEIM Model: Successfully initialized video writer with codec: {fourcc}")
-        print(
-            f"DEIM Model: Processing video {input_p.name} ({width}x{height}, {fps:.1f} FPS)"
-        )
         print(f"DEIM Model: Output will be saved to {out_path}")
-        try:
-            frame_count = 0
-            while True:
-                ret, frame_bgr = cap.read()
-                if not ret:
-                    break
-                try:
-                    # Process frame for detection
-                    frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
-                    pil_img = Image.fromarray(frame_rgb)
-                    tensor = self._preprocess_image(pil_img.copy())
-                    with torch.no_grad():
-                        labels, bboxes = self.model(tensor)
-                    dets = self._postprocess_detections(
-                        labels, bboxes, min_confidence, (width, height)
-                    )
-                    dets = self._nms(dets)
-                    # Draw detections on frame
-                    annotated_frame = self._draw_detections_on_np(
-                        frame_bgr.copy(), dets
-                    )
-                    writer.write(annotated_frame)
-                    frame_count += 1
-                    # Log progress every 50 frames
-                    if frame_count % 50 == 0:
-                        print(f"DEIM Model: Processed {frame_count} frames...")
-                except Exception as e:
-                    print(f"DEIM Model: Error processing frame {frame_count}: {e}")
-                    # Write original frame if processing fails
-                    writer.write(frame_bgr)
-                    frame_count += 1
-                    continue
-        except Exception as e:
-            print(f"DEIM Model: Error during video processing: {e}")
-            raise
-        finally:
-            cap.release()
-            if writer is not None:
-                writer.release()
-            print(
-                f"DEIM Model: Video processing completed. Processed {frame_count} frames."
             )
-            print(f"DEIM Model: Results saved to {out_path}")
         return str(out_path)
 # if __name__ == "__main__":
-#     model = DeimHgnetV2MDrone(version="v2")
-#     output_image = model.predict_video("./resources/videos/raw/sample1.mp4", 0.3)
-#     output_image.show()

             int(self.cfg["target_size"][0]),
             int(self.cfg["target_size"][1]),
         )
+        self._categories = self.cfg["categories"]
+        self._confs_by_categories = self.cfg["confs_by_categories"]
         print(f"Loading model from: {weights_path}")
         print(f"Model device: {self.device}")
         self.model = torch.jit.load(weights_path, map_location=self.device).eval()
         )
         return transforms(image).unsqueeze(0).to(self.device)
+    def _postprocess_detections(self, scores, bboxes, min_confidence: float, wh: Tuple[int, int]):
         w, h = wh
         b_np = bboxes[0].cpu().numpy()
         s_np = scores.sigmoid()[0].cpu().numpy()
+        mask = (s_np.max(axis=1) >= min_confidence).squeeze()
         if not mask.any():
+            return np.zeros((0, 6), dtype=np.float32)
         valid = b_np[mask]
         cx, cy, box_w, box_h = valid[:, 0], valid[:, 1], valid[:, 2], valid[:, 3]
         x1 = cx - box_w / 2
         x2 = cx + box_w / 2
         y2 = cy + box_h / 2
         valid_xyxy = np.stack([x1, y1, x2, y2], axis=1) * [w, h, w, h]
+        return np.concatenate([
+            valid_xyxy,
+            s_np[mask].max(axis=1, keepdims=True),
+            s_np[mask].argmax(axis=1, keepdims=True)
+        ], axis=1)
     def _nms(self, dets):
         if dets.shape[0] == 0 or self.cfg["nms_iou_thr"] <= 0:
             return dets
+        class_ids = np.unique(dets[:, 5].astype(int))
+        keep_all = []
+        for class_id in class_ids:
+            class_mask = dets[:, 5] == class_id
+            class_dets = dets[class_mask]
+            if class_dets.shape[0] == 0:
+                continue
+            class_keep = self._nms_single_class(class_dets)
+            original_indices = np.where(class_mask)[0]
+            keep_all.extend(original_indices[class_keep])
+        return dets[keep_all] if keep_all else np.zeros((0, 6), dtype=np.float32)
+    def _nms_single_class(self, dets):
+        if dets.shape[0] == 0:
+            return []
         x1 = dets[:, 0]
         y1 = dets[:, 1]
         x2 = dets[:, 2]
         scores = dets[:, 4]
         areas = (x2 - x1 + 1) * (y2 - y1 + 1)
         order = scores.argsort()[::-1]
         keep = []
         while order.size > 0:
             i = order[0]
             iou = inter / (areas[i] + areas[order[1:]] - inter)
             inds = np.where(iou <= self.cfg["nms_iou_thr"])[0]
             order = order[inds + 1]
+        return keep
     def _draw_detections_on_np(
         self, image_np: np.ndarray, dets: np.ndarray
     ) -> np.ndarray:
         for bbox in dets:
+            x1, y1, x2, y2, confidence, category_id = bbox
+            category_name = self._categories[int(category_id)]
+            conf_by_this_cat = self._confs_by_categories.get(category_name, 0.0)
+            if confidence < conf_by_this_cat:
+                continue
             x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
             cv2.rectangle(image_np, (x1, y1), (x2, y2), (0, 255, 0), 2)
+            label = f"{category_name} {confidence:.2f}"
             label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[0]
             cv2.rectangle(
                 image_np,
                 "Could not initialize video writer with MJPG or XVID codec"
             )
+        print(f"DEIM Model: Processing video {input_p.name} ({width}x{height}, {fps:.1f} FPS)")
         print(f"DEIM Model: Output will be saved to {out_path}")
+        frame_count = 0
+        while True:
+            ret, frame_bgr = cap.read()
+            if not ret:
+                break
+            frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
+            pil_img = Image.fromarray(frame_rgb)
+            tensor = self._preprocess_image(pil_img.copy())
+            with torch.no_grad():
+                labels, bboxes = self.model(tensor)
+            dets = self._postprocess_detections(
+                labels, bboxes, min_confidence, (width, height)
+            )
+            dets = self._nms(dets)
+            annotated_frame = self._draw_detections_on_np(
+                frame_bgr.copy(), dets
             )
+            writer.write(annotated_frame)
+            frame_count += 1
+            print(f"processed {frame_count} frames...")
+        cap.release()
+        if writer is not None:
+            writer.release()
         return str(out_path)
 # if __name__ == "__main__":
+#     model = DeimHgnetV2MDrone(version="v3", device="cpu")
+#     output_image = model.predict_video("./resources/videos/raw/sample2.mp4", 0.3)

services/app_service/models/model_deimhgnetV2m_cpu_v0.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a7923e3e65801dad3cc4442f7395b3c551b9ef756ac73cb382ed071ba8bb9205
-size 55

 version https://git-lfs.github.com/spec/v1
+oid sha256:5fe7455827286dc1b471e6a2049391a80b9e0ee2aa4523667e526789d18a6016
+size 112

services/app_service/models/model_deimhgnetV2m_cpu_v2.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a7923e3e65801dad3cc4442f7395b3c551b9ef756ac73cb382ed071ba8bb9205
-size 55

 version https://git-lfs.github.com/spec/v1
+oid sha256:5fe7455827286dc1b471e6a2049391a80b9e0ee2aa4523667e526789d18a6016
+size 112

services/app_service/models/model_deimhgnetV2m_cpu_v3.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:01cbfda05bb2cb62606e488bf7ab48de415cf4cd28d101378c776b1c7f585fb8
+size 140

services/app_service/models/model_deimhgnetV2m_cpu_v3.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2428ca22799a109a1313eeeb37df44a6a818a3425aa5496cf0f37a11a7dcf535
+size 78257123

services/app_service/models/model_deimhgnetV2m_cuda_v2.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a7923e3e65801dad3cc4442f7395b3c551b9ef756ac73cb382ed071ba8bb9205
-size 55

 version https://git-lfs.github.com/spec/v1
+oid sha256:5fe7455827286dc1b471e6a2049391a80b9e0ee2aa4523667e526789d18a6016
+size 112

services/app_service/models/model_deimhgnetV2m_cuda_v3.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:01cbfda05bb2cb62606e488bf7ab48de415cf4cd28d101378c776b1c7f585fb8
+size 140

services/app_service/models/model_deimhgnetV2m_cuda_v3.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e4550ec38ef2b13deae9aadd9388aabf243366749ed8e29f65f4e0c3e24264c8
+size 78262918