Spaces:

lucid-hf
/

lucid-natsar-dev

Sleeping

App Files Files Community

lucid-natsar-dev / services /training_service /debug_script.py

lucid-hf

CI: deploy Docker/PDM Space

98a3af2 verified about 2 months ago

raw

history blame contribute delete

27 kB

	#!/usr/bin/env python3
	"""
	DEIM Debug Script for Interactive Bbox Detection and Visualization
	Copyright (c) 2024 The DEIM Authors. All Rights Reserved.

	This script provides interactive debugging capabilities for DEIM models:
	- Load model from config and checkpoint
	- Process images and videos
	- Interactive OpenCV visualization with imshow
	- Adjustable confidence thresholds
	- Keyboard controls for video playback
	"""

	import argparse
	import os
	import sys
	import time
	from pathlib import Path

	import cv2
	import torch
	import torch.nn as nn
	import torchvision.transforms as T
	from PIL import Image

	# Add the project root to Python path
	sys.path.insert(0, str(Path(__file__).parent))

	from engine.core import YAMLConfig

	# Default class names - will be overridden by dataset configuration
	DEFAULT_CLASSES = {
	1: 'person', 2: 'bicycle', 3: 'car', 4: 'motorbike', 5: 'aeroplane',
	6: 'bus', 7: 'train', 8: 'truck', 9: 'boat', 10: 'trafficlight',
	11: 'firehydrant', 13: 'stopsign', 14: 'parkingmeter', 15: 'bench',
	16: 'bird', 17: 'cat', 18: 'dog', 19: 'horse', 20: 'sheep',
	21: 'cow', 22: 'elephant', 23: 'bear', 24: 'zebra', 25: 'giraffe',
	27: 'backpack', 28: 'umbrella', 31: 'handbag', 32: 'tie',
	33: 'suitcase', 34: 'frisbee', 35: 'skis', 36: 'snowboard',
	37: 'sportsball', 38: 'kite', 39: 'baseballbat', 40: 'baseballglove',
	41: 'skateboard', 42: 'surfboard', 43: 'tennisracket', 44: 'bottle',
	46: 'wineglass', 47: 'cup', 48: 'fork', 49: 'knife', 50: 'spoon',
	51: 'bowl', 52: 'banana', 53: 'apple', 54: 'sandwich', 55: 'orange',
	56: 'broccoli', 57: 'carrot', 58: 'hotdog', 59: 'pizza', 60: 'donut',
	61: 'cake', 62: 'chair', 63: 'sofa', 64: 'pottedplant', 65: 'bed',
	67: 'diningtable', 70: 'toilet', 72: 'tv', 73: 'laptop', 74: 'mouse',
	75: 'remote', 76: 'keyboard', 77: 'cellphone', 78: 'microwave',
	79: 'oven', 80: 'toaster', 81: 'sink', 82: 'refrigerator', 84: 'book',
	85: 'clock', 86: 'vase', 87: 'scissors', 88: 'teddybear',
	89: 'hairdrier', 90: 'toothbrush'
	}


	def load_class_names_from_config(cfg):
	"""Load class names from dataset configuration"""
	try:
	# Import here to avoid circular imports
	from engine.data.dataset.coco_dataset import mscoco_category2name

	# Check if we can access dataset configuration
	if hasattr(cfg, 'val_dataloader') and cfg.val_dataloader is not None:
	dataset_cfg = cfg.val_dataloader.dataset

	# Try to instantiate dataset to get class names
	try:
	# Get the number of classes from config
	num_classes = getattr(cfg, 'num_classes', 80)

	# Check if using COCO remapping
	remap_mscoco = getattr(cfg, 'remap_mscoco_category', False)

	if remap_mscoco:
	print(f"Using COCO class names (remapped)")
	return mscoco_category2name

	# Try to create dataset instance to get category names
	if hasattr(dataset_cfg, 'ann_file') and dataset_cfg.ann_file:
	# For COCO-style datasets, try to load annotations
	try:
	from pycocotools.coco import COCO
	if os.path.exists(dataset_cfg.ann_file):
	coco = COCO(dataset_cfg.ann_file)
	categories = coco.dataset.get('categories', [])
	if categories:
	class_names = {}
	for i, cat in enumerate(categories):
	# Use category ID as key for proper mapping
	class_names[cat['id']] = cat['name']
	print(f"Loaded {len(class_names)} class names from annotation file")
	return class_names
	except Exception as e:
	print(f"Could not load classes from annotation file: {e}")

	# Generate generic class names based on number of classes
	print(f"Generating generic class names for {num_classes} classes")
	if num_classes == 80:
	return mscoco_category2name
	elif num_classes == 1:
	return {1: 'object'}
	elif num_classes == 2:
	return {1: 'person', 2: 'object'} # Common for crowd detection
	elif num_classes == 20:
	# VOC classes
	voc_classes = {
	1: 'aeroplane', 2: 'bicycle', 3: 'bird', 4: 'boat', 5: 'bottle',
	6: 'bus', 7: 'car', 8: 'cat', 9: 'chair', 10: 'cow',
	11: 'diningtable', 12: 'dog', 13: 'horse', 14: 'motorbike', 15: 'person',
	16: 'pottedplant', 17: 'sheep', 18: 'sofa', 19: 'train', 20: 'tvmonitor'
	}
	return voc_classes
	else:
	# Generic class names
	return {i + 1: f'class_{i + 1}' for i in range(num_classes)}

	except Exception as e:
	print(f"Could not instantiate dataset: {e}")

	except Exception as e:
	print(f"Could not load class names from config: {e}")

	# Fallback to default COCO classes
	print("Using default COCO class names")
	return DEFAULT_CLASSES


	# Color palette for bounding boxes (BGR format for OpenCV)
	COLORS = [
	(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255),
	(0, 255, 255), (128, 0, 0), (0, 128, 0), (0, 0, 128), (128, 128, 0),
	(128, 0, 128), (0, 128, 128), (255, 128, 0), (255, 0, 128), (128, 255, 0),
	(0, 255, 128), (128, 0, 255), (0, 128, 255), (192, 192, 192), (64, 64, 64)
	]


	class DEIMModel(nn.Module):
	"""Wrapper for DEIM model with postprocessing"""

	def __init__(self, config_path, checkpoint_path, device='cuda', input_size=640):
	super().__init__()
	self.device = device

	config_overrides = {'HGNetv2': {'pretrained': False}}
	self.cfg = YAMLConfig(config_path, resume=checkpoint_path, **config_overrides)

	print(f"Loading checkpoint from: {checkpoint_path}")
	state_dict = torch.load(checkpoint_path, map_location='cpu')['model']
	self.cfg.model.load_state_dict(state_dict)

	self.model = self.cfg.model.eval().to(device)
	self.postprocessor = self.cfg.postprocessor.eval().to(device)

	self.class_names = load_class_names_from_config(self.cfg)
	self.num_classes = getattr(self.cfg, 'num_classes', len(self.class_names))

	print(f"Model loaded successfully on {device}")
	print(f"Model type: {type(self.model).__name__}")
	print(f"Number of classes: {self.num_classes}")
	print(f"Sample classes: {dict(list(self.class_names.items())[:5])}...")

	def forward(self, images, orig_sizes):
	"""Forward pass through model and postprocessor"""
	with torch.no_grad():
	outputs = self.model(images)
	results = self.postprocessor(outputs, orig_sizes)

	return results

	def get_class_name(self, class_id):
	"""Get class name for given class ID"""
	return self.class_names.get(class_id, f'class_{class_id}')


	class DebugVisualizer:
	"""Interactive visualizer with OpenCV"""

	def __init__(self, model, confidence_threshold=0.5, window_name="DEIM Debug"):
	self.model = model
	self.confidence_threshold = confidence_threshold
	self.window_name = window_name
	self.paused = False
	self.show_info = True

	# Create window
	cv2.namedWindow(self.window_name, cv2.WINDOW_NORMAL)
	cv2.resizeWindow(self.window_name, 1200, 800)

	print("\n=== Debug Controls ===")
	print("SPACE: Pause/Resume video")
	print("'q' or ESC: Quit")
	print("'i': Toggle info display")
	print("'+'/'-': Increase/Decrease confidence threshold")
	print("'s': Save current frame")
	print("'n': Next file (in folder mode)")
	print("=====================\n")

	def draw_detections(self, image, results, frame_info=None):
	"""Draw bounding boxes and labels on image"""
	vis_image = image.copy()

	if len(results) == 0:
	return vis_image

	# Extract results
	result = results[0] if isinstance(results, list) else results
	labels = result['labels'].cpu().numpy()
	boxes = result['boxes'].cpu().numpy()
	scores = result['scores'].cpu().numpy()

	# Filter by confidence threshold
	valid_indices = scores >= self.confidence_threshold
	labels = labels[valid_indices]
	boxes = boxes[valid_indices]
	scores = scores[valid_indices]

	# Draw bounding boxes
	for i, (box, label, score) in enumerate(zip(boxes, labels, scores)):
	x1, y1, x2, y2 = box.astype(int)

	# Get class name and color
	class_name = self.model.get_class_name(label)
	color = COLORS[label % len(COLORS)]

	# Draw bounding box
	cv2.rectangle(vis_image, (x1, y1), (x2, y2), color, 2)

	# Prepare label text
	label_text = f'{class_name}: {score:.2f}'

	# Get text size
	(text_w, text_h), baseline = cv2.getTextSize(
	label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)

	# Draw label background
	cv2.rectangle(vis_image, (x1, y1 - text_h - baseline),
	(x1 + text_w, y1), color, -1)

	# Draw label text
	cv2.putText(vis_image, label_text, (x1, y1 - baseline),
	cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

	# Draw info overlay
	if self.show_info:
	self._draw_info_overlay(vis_image, labels, scores, frame_info)

	return vis_image

	def _draw_info_overlay(self, image, labels, scores, frame_info=None):
	"""Draw information overlay on image"""
	h, w = image.shape[:2]
	overlay_y = 30

	# Detection count and confidence info
	info_lines = [
	f"Detections: {len(labels)} (conf >= {self.confidence_threshold:.2f})",
	f"Avg Confidence: {scores.mean():.3f}" if len(scores) > 0 else "Avg Confidence: N/A"
	]

	# Add frame info for videos
	if frame_info:
	info_lines.extend([
	f"Frame: {frame_info.get('frame_num', 'N/A')}",
	f"FPS: {frame_info.get('fps', 'N/A'):.1f}",
	f"Status: {'PAUSED' if self.paused else 'PLAYING'}"
	])
	# Add file progress if available
	if 'file_progress' in frame_info:
	info_lines.append(f"File: {frame_info['file_progress']}")

	# Draw background
	overlay_height = len(info_lines) * 25 + 20
	cv2.rectangle(image, (10, 10), (350, 10 + overlay_height),
	(0, 0, 0), -1)
	cv2.rectangle(image, (10, 10), (350, 10 + overlay_height),
	(255, 255, 255), 1)

	# Draw text
	for i, line in enumerate(info_lines):
	cv2.putText(image, line, (20, overlay_y + i * 25),
	cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)

	def show_image(self, image, results, title=None):
	"""Display single image with detections"""
	vis_image = self.draw_detections(image, results)

	if title:
	cv2.setWindowTitle(self.window_name, f"{self.window_name} - {title}")

	cv2.imshow(self.window_name, vis_image)

	# Wait for key press
	while True:
	key = cv2.waitKey(0) & 0xFF

	if key == ord('q') or key == 27: # 'q' or ESC
	return False
	elif key == ord('n'): # Next file
	return True
	elif key == ord('s'): # Save image
	save_path = f"debug_output_{int(time.time())}.jpg"
	cv2.imwrite(save_path, vis_image)
	print(f"Image saved as {save_path}")
	elif key == ord('i'): # Toggle info
	self.show_info = not self.show_info
	vis_image = self.draw_detections(image, results)
	cv2.imshow(self.window_name, vis_image)
	elif key == ord('+') or key == ord('='): # Increase threshold
	self.confidence_threshold = min(1.0, self.confidence_threshold + 0.05)
	print(f"Confidence threshold: {self.confidence_threshold:.2f}")
	vis_image = self.draw_detections(image, results)
	cv2.imshow(self.window_name, vis_image)
	elif key == ord('-') or key == ord('_'): # Decrease threshold
	self.confidence_threshold = max(0.0, self.confidence_threshold - 0.05)
	print(f"Confidence threshold: {self.confidence_threshold:.2f}")
	vis_image = self.draw_detections(image, results)
	cv2.imshow(self.window_name, vis_image)
	else:
	break

	return True

	def show_video_frame(self, image, results, frame_info):
	"""Display video frame with detections"""
	vis_image = self.draw_detections(image, results, frame_info)

	cv2.setWindowTitle(self.window_name,
	f"{self.window_name} - Frame {frame_info.get('frame_num', 'N/A')}")
	cv2.imshow(self.window_name, vis_image)

	# Handle keyboard input
	wait_time = 1 if self.paused else max(1, int(1000 / frame_info.get('fps', 30)))
	key = cv2.waitKey(1) & 0xFF

	if key == ord('q') or key == 27: # Quit
	return False
	elif key == ord('n'): # Next file (skip rest of video)
	return 'next'
	elif key == ord(' '): # Pause/Resume
	self.paused = not self.paused
	print("PAUSED" if self.paused else "RESUMED")
	elif key == ord('s'): # Save frame
	save_path = f"debug_frame_{frame_info.get('frame_num', int(time.time()))}.jpg"
	cv2.imwrite(save_path, vis_image)
	print(f"Frame saved as {save_path}")
	elif key == ord('i'): # Toggle info
	self.show_info = not self.show_info
	elif key == ord('+') or key == ord('='): # Increase threshold
	self.confidence_threshold = min(1.0, self.confidence_threshold + 0.05)
	print(f"Confidence threshold: {self.confidence_threshold:.2f}")
	elif key == ord('-') or key == ord('_'): # Decrease threshold
	self.confidence_threshold = max(0.0, self.confidence_threshold - 0.05)
	print(f"Confidence threshold: {self.confidence_threshold:.2f}")

	return True

	def close(self):
	"""Close visualization windows"""
	cv2.destroyAllWindows()


	def find_media_files(folder_path):
	"""Recursively find all image and video files in folder"""
	image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif', '.webp', '.gif'}
	video_extensions = {'.mp4', '.avi', '.mov', '.mkv', '.flv', '.wmv', '.m4v', '.webm'}

	media_files = []
	folder_path = Path(folder_path)

	if folder_path.is_file():
	# Single file provided
	if folder_path.suffix.lower() in image_extensions \| video_extensions:
	media_files.append(folder_path)
	else:
	# Recursively find all media files
	for file_path in folder_path.rglob('*'):
	if file_path.is_file() and file_path.suffix.lower() in image_extensions \| video_extensions:
	media_files.append(file_path)

	# Sort files for consistent ordering
	media_files.sort()

	# Separate images and videos
	images = [f for f in media_files if f.suffix.lower() in image_extensions]
	videos = [f for f in media_files if f.suffix.lower() in video_extensions]

	print(f"Found {len(images)} images and {len(videos)} videos")
	return images, videos


	def process_image(model, image_path, visualizer, input_size=640, file_index=None, total_files=None):
	"""Process single image"""
	progress_str = f"[{file_index + 1}/{total_files}] " if file_index is not None else ""
	print(f"{progress_str}Processing image: {image_path}")

	# Load and preprocess image
	image = cv2.imread(str(image_path))
	if image is None:
	print(f"Error: Could not load image {image_path}")
	return False

	h, w = image.shape[:2]
	orig_size = torch.tensor([[w, h]], dtype=torch.float32).to(model.device)

	# Convert to PIL for transforms
	pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

	# Apply transforms
	transforms = T.Compose([
	T.Resize((input_size, input_size)),
	T.ToTensor(),
	])

	tensor_image = transforms(pil_image).unsqueeze(0).to(model.device)

	# Run inference
	start_time = time.time()
	results = model(tensor_image, orig_size)
	inference_time = time.time() - start_time

	print(f"Inference time: {inference_time:.3f}s")

	# Show results
	title = f"{progress_str}{Path(image_path).name} ({inference_time:.3f}s)"
	return visualizer.show_image(image, results, title)


	def process_video(model, video_path, visualizer, input_size=640, file_index=None, total_files=None):
	"""Process video file"""
	progress_str = f"[{file_index + 1}/{total_files}] " if file_index is not None else ""
	print(f"{progress_str}Processing video: {video_path}")

	cap = cv2.VideoCapture(str(video_path))
	if not cap.isOpened():
	print(f"Error: Could not open video {video_path}")
	return False

	# Get video properties
	fps = cap.get(cv2.CAP_PROP_FPS)
	total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

	print(f"Video FPS: {fps:.2f}")
	print(f"Total frames: {total_frames}")

	# Apply transforms
	transforms = T.Compose([
	T.Resize((input_size, input_size)),
	T.ToTensor(),
	])

	frame_num = 0
	start_time = time.time()

	try:
	while cap.isOpened():
	for _ in range(1):
	ret, frame = cap.read()
	if not ret:
	break

	frame_num += 1
	h, w = frame.shape[:2]
	orig_size = torch.tensor([[w, h]], dtype=torch.float32).to(model.device)

	# Convert to PIL for transforms
	pil_frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
	tensor_frame = transforms(pil_frame).unsqueeze(0).to(model.device)

	# Run inference
	frame_start = time.time()
	results = model(tensor_frame, orig_size)
	inference_time = time.time() - frame_start

	# Calculate average FPS
	elapsed_time = time.time() - start_time
	avg_fps = frame_num / elapsed_time if elapsed_time > 0 else 0

	# Prepare frame info
	frame_info = {
	'frame_num': frame_num,
	'fps': avg_fps,
	'inference_time': inference_time,
	'total_frames': total_frames,
	'file_progress': f"{progress_str}{Path(video_path).name}"
	}

	# Show frame
	result = visualizer.show_video_frame(frame, results, frame_info)
	if result == False:
	break
	elif result == 'next':
	print("Skipping to next file...")
	break

	# Print progress periodically
	if frame_num % 30 == 0:
	print(f"Processed {frame_num}/{total_frames} frames, "
	f"Avg FPS: {avg_fps:.1f}, "
	f"Inference: {inference_time:.3f}s")

	finally:
	cap.release()

	print(f"\nVideo processing completed!")
	print(f"Total frames processed: {frame_num}")
	print(f"Average FPS: {frame_num / (time.time() - start_time):.2f}")

	return True


	def process_folder(model, folder_path, visualizer, input_size=640, process_videos=True):
	"""Process all images and videos in a folder recursively"""
	print(f"Scanning folder: {folder_path}")

	# Find all media files
	images, videos = find_media_files(folder_path)

	if not images and not videos:
	print("No image or video files found!")
	return False

	all_files = []

	# Add images first
	if images:
	print(f"\nFound {len(images)} images:")
	for img in images[:10]: # Show first 10
	print(f" {img}")
	if len(images) > 10:
	print(f" ... and {len(images) - 10} more")
	all_files.extend([(img, 'image') for img in images])

	# Add videos if requested
	if videos and process_videos:
	print(f"\nFound {len(videos)} videos:")
	for vid in videos[:10]: # Show first 10
	print(f" {vid}")
	if len(videos) > 10:
	print(f" ... and {len(videos) - 10} more")
	all_files.extend([(vid, 'video') for vid in videos])
	elif videos and not process_videos:
	print(f"\nSkipping {len(videos)} videos (use --process-videos to include them)")

	if not all_files:
	print("No files to process!")
	return False

	print(f"\nProcessing {len(all_files)} files total...")
	print("Use SPACE to pause/resume, 'q' to quit, 'n' for next file")

	# Process all files
	for i, (file_path, file_type) in enumerate(all_files):
	print(f"\n{'=' * 60}")

	try:
	if file_type == 'image':
	success = process_image(model, file_path, visualizer, input_size, i, len(all_files))
	else: # video
	success = process_video(model, file_path, visualizer, input_size, i, len(all_files))

	if not success:
	print(f"Stopping processing at user request or error")
	break

	except KeyboardInterrupt:
	print(f"\nProcessing interrupted by user")
	break
	except Exception as e:
	print(f"Error processing {file_path}: {e}")
	import traceback
	traceback.print_exc()

	# Ask user if they want to continue
	response = input("Continue with next file? (y/n): ")
	if response.lower() != 'y':
	break

	print(f"\nFinished processing folder: {folder_path}")
	return True


	def main():
	parser = argparse.ArgumentParser(description="DEIM Debug Script")
	parser.add_argument('-c', '--config', type=str, required=True,
	help='Path to config file')
	parser.add_argument('-ckpt', '--checkpoint', type=str, required=True,
	help='Path to model checkpoint')
	parser.add_argument('-i', '--input', type=str, required=True,
	help='Path to input image, video, or folder')
	parser.add_argument('-d', '--device', type=str, default='cuda',
	help='Device to use (cuda/cpu)')
	parser.add_argument('--input-size', type=int, default=1600,
	help='Input image size')
	parser.add_argument('--conf-threshold', type=float, default=0.3,
	help='Confidence threshold for detections')
	parser.add_argument('--process-videos', action='store_true',
	help='Process video files when scanning folders')
	parser.add_argument('--images-only', action='store_true',
	help='Process only images (skip videos)')
	parser.add_argument('--videos-only', action='store_true',
	help='Process only videos (skip images)')

	args = parser.parse_args()

	# Check if files exist
	if not os.path.exists(args.config):
	print(f"Error: Config file not found: {args.config}")
	return

	if not os.path.exists(args.checkpoint):
	print(f"Error: Checkpoint file not found: {args.checkpoint}")
	return

	if not os.path.exists(args.input):
	print(f"Error: Input file not found: {args.input}")
	return

	# Check device availability
	if args.device == 'cuda' and not torch.cuda.is_available():
	print("Warning: CUDA not available, using CPU")
	args.device = 'cpu'

	print("=== DEIM Debug Script ===")
	print(f"Config: {args.config}")
	print(f"Checkpoint: {args.checkpoint}")
	print(f"Input: {args.input}")
	print(f"Device: {args.device}")
	print(f"Input size: {args.input_size}")
	print(f"Confidence threshold: {args.conf_threshold}")
	print("========================\n")

	try:
	# Initialize model
	print("Loading model...")
	model = DEIMModel(args.config, args.checkpoint, args.device, args.input_size)

	# Initialize visualizer
	visualizer = DebugVisualizer(model, args.conf_threshold)

	# Determine input type and process
	input_path = Path(args.input)

	if input_path.is_file():
	# Single file
	if input_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif', '.webp']:
	if not args.videos_only:
	success = process_image(model, args.input, visualizer, args.input_size)
	else:
	print("Skipping image file (videos-only mode)")
	success = True
	elif input_path.suffix.lower() in ['.mp4', '.avi', '.mov', '.mkv', '.flv', '.wmv', '.m4v', '.webm']:
	if not args.images_only:
	success = process_video(model, args.input, visualizer, args.input_size)
	else:
	print("Skipping video file (images-only mode)")
	success = True
	else:
	print(f"Error: Unsupported file format: {input_path.suffix}")
	success = False
	elif input_path.is_dir():
	# Folder - process recursively
	process_videos = args.process_videos or not args.images_only
	if args.videos_only:
	# Only process videos, skip images
	success = process_folder(model, args.input, visualizer, args.input_size, process_videos=True)
	elif args.images_only:
	# Only process images, skip videos
	success = process_folder(model, args.input, visualizer, args.input_size, process_videos=False)
	else:
	# Process based on --process-videos flag
	success = process_folder(model, args.input, visualizer, args.input_size, process_videos)
	else:
	print(f"Error: Input path does not exist: {args.input}")
	success = False

	if success:
	print("Processing completed successfully!")

	except Exception as e:
	print(f"Error during processing: {e}")
	import traceback
	traceback.print_exc()

	finally:
	# Cleanup
	if 'visualizer' in locals():
	visualizer.close()
	print("Debug session ended.")


	if __name__ == '__main__':
	main()