Spaces:

viarias
/

bogota_land_space

Paused

App Files Files Community

bogota_land_space / classifier.py

viarias

Update classifier.py (#1)

19ae37f verified 4 months ago

raw

history blame contribute delete

8.52 kB

	from typing import List
	import logging
	from model import Model
	import json, re
	from PIL import Image
	from types_io import ImageData

	# Configure logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
	)
	logger = logging.getLogger(__name__)

	SCHEMA_JSON = json.dumps(ImageData.model_json_schema(), ensure_ascii=False)

	LAND_USE_PROMPT = f"""
	You are a structured image analysis agent.
	Think only inside markers: ◁think▷ ... ◁/think▷
	After ◁/think▷, output ONLY a JSON object that validates against this JSON Schema:
	{SCHEMA_JSON}

	Guidelines:
	- Return 3–5 categories.
	- Confidence is in [0,1] where 1.0 means highest confidence (be consistent).

	Categories :

	- Residenciales: Buildings intended for housing - Houses, PH Buildings, Condominiums.
	- Comerciales1: Refers to the storage, distribution, or exchange of products, goods, or services with a commercial interest.
	- Comerciales2: Buildings where activities aimed at providing services are carried out.
	- Comerciales3: Buildings used for artisanal activities where raw materials are transformed on a local scale.
	- Comerciales4: Hotels, Motels, and Restaurants.
	- Comerciales5: Operational offices and warehouses.
	- Centros_Comerciales: Commercial premises located on properties of one or several buildings.
	- Bodegas: Buildings in warehouse-type constructions dedicated to commercial, industrial, or storage activities.
	- Parqueaderos: Buildings designed for vehicle parking.
	- Dotacionales1: Buildings where activities aimed at the welfare or service of a community are carried out.
	- Dotacionales2: Buildings designed to carry out educational or training activities.
	- Dotacionales3: Buildings with the necessary infrastructure to provide surgical and/or hospitalization services.
	- Dotacionales4: Buildings for religious worship owned by communities or religious congregations.
	- Dotacionales5: Theaters, cinemas, swimming pools, museums, sports, events, or shows.
	- Especiales: Military administrative areas, cemeteries, airport runways.
	- Moles: Large buildings in height (>4 floors) or area (>10,000 m²), usually under construction.
	- Rurales: Sheds, kiosks, shelters, barns, stables, silos, etc.
	- Mixto1: (Residencial + Comercial1) Housing and commercial premises.
	- Mixto2: (Residencial + Comercial2) Housing and offices.
	- Mixto3: (Comercial1 + Comercial2) Commercial premises and offices.

	Return ONLY the JSON object (no prose, no backticks) after ◁/think▷.
	"""


	class Classifier:
	def __init__(self, MAX_NEW_TOKENS: int = 1024):
	self.max_new_tokens = MAX_NEW_TOKENS

	logger.info("Initializing Classifier")
	logger.info("Loading model...")
	self.model = Model.load_model()
	logger.info("Loading processor...")
	self.processor = Model.load_processor()
	logger.info("Classifier initialization complete")
	logger.info("Setting up image data generator...")

	def get_response(self, images: List[Image.Image], saved_image_paths: List[str] = None) -> dict:
	logger.info(f"Processing classification request for {len(images)} images")

	logger.info("Loading and preprocessing images...")
	images = self.get_input_tensor(images)
	logger.debug("Successfully preprocessed images")

	logger.info("Preparing input messages...")
	messages = self.prepare_messages(saved_image_paths)

	response = self.generate_model_response(images, messages)

	# return {"output": response}
	think, json_text = self._split_think_and_json(response)
	data = json.loads(json_text)
	if isinstance(data, dict) and "think" not in data and think:
	data["think"] = think

	# Validate against Pydantic schema; raise on failure
	validated = ImageData.model_validate(data)
	return {"output": validated.model_dump()}


	def get_input_tensor(self, images: List[Image.Image]) -> List[Image.Image]:
	"""
	Preprocess a list of PIL images.
	Args:
	images (List[Image.Image]): List of PIL images to be processed.
	Returns:
	List[Image.Image]: List of preprocessed images ready for classification.
	"""
	if not images:
	raise ValueError("No images provided for classification.")

	logger.info(f"Preprocessing {len(images)} images...")
	processed_images = []
	for idx, img in enumerate(images):
	logger.debug(f"Processing image at index: {idx}")
	try:
	img = self.resize_image(img)
	processed_images.append(img)
	logger.debug(f"Successfully processed image at index: {idx}")
	except Exception as e:
	logger.error(f"Error processing image at index {idx}: {str(e)}")
	raise
	return processed_images

	def generate_model_response(self, images: List[Image.Image], messages: List[dict]) -> str:
	"""
	Generate response from the model.
	Args:
	images (List[Image.Image]): List of preprocessed images.
	messages (List[dict]): Messages for the processor.
	Returns:
	str: Decoded response from the model.
	"""
	logger.info("Applying chat template...")
	try:
	# Get the text as string first, then let outlines handle tokenization
	text = self.processor.apply_chat_template(
	messages, add_generation_prompt=True, return_tensors="pt"
	)
	logger.info(f"Text length: {len(text)} characters")

	inputs = self.processor(images=images, text=text, return_tensors="pt", padding=True, truncation=True).to(self.model.device)


	except Exception as e:
	logger.error(f"Error applying chat template: {str(e)}")
	raise

	logger.info("Generating response...")
	generated_ids = self.model.generate(**inputs, max_new_tokens=1024, temperature=0.1)
	generated_ids_trimmed = [
	out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
	]

	response = self.processor.batch_decode(
	generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
	)[0]

	logger.debug("Successfully generated response")
	return response


	@staticmethod
	def resize_image(image: Image.Image, max_size: int = 224) -> Image.Image:
	"""
	Resize an image while maintaining aspect ratio.

	Args:
	image: PIL Image object to resize
	max_size: Maximum dimension (width or height) of the output image

	Returns:
	PIL Image: Resized image with maintained aspect ratio
	"""
	# Get current dimensions
	width, height = image.size

	# Calculate scaling factor to fit within max_size
	scale = min(max_size / width, max_size / height)

	# Only resize if image is larger than max_size
	if scale < 1:
	new_width = int(width * scale)
	new_height = int(height * scale)
	image = image.resize(
	(new_width, new_height),
	Image.Resampling.LANCZOS
	)

	return image

	@staticmethod
	def prepare_messages(saved_image_paths: List[str]) -> List[dict]:
	"""
	Prepare messages for the processor.
	Args:
	saved_image_paths (List[str]): List of paths to saved images.
	classification_prompt (str): The prompt for classification.
	Returns:
	List[dict]: Messages for the processor.
	"""
	return [
	{
	"role": "user",
	"content": [
	{"type": "image", "image": image_path} for image_path in saved_image_paths
	] + [{"type": "text", "text": LAND_USE_PROMPT}],
	},
	]

	@staticmethod
	def _split_think_and_json(text: str):
	start, end = "◁think▷", "◁/think▷"
	think = ""
	after = text

	if start in text and end in text:
	s = text.find(start) + len(start)
	e = text.find(end, s)
	think = text[s:e].strip()
	after = text[e+len(end):].strip()
	m = re.search(r"\{.*\}", after, flags=re.S)

	json_text = m.group(0) if m else after
	return think, json_text