diff --git a/people_clothing_color.py b/people_clothing_color.py new file mode 100644 index 00000000..937ef8d8 --- /dev/null +++ b/people_clothing_color.py @@ -0,0 +1,291 @@ +#!/usr/bin/env python3 +""" +Detect people in an image and infer the color of their clothes. + +- Uses OpenCV HOG person detector +- Estimates clothing color from a torso ROI using HSV+k-means +- Prints a table with person index and clothing color +- For grayscale clothing, reports "light" or "dark" +""" +from __future__ import annotations + +import argparse +import os +from typing import List, Sequence, Tuple + +import cv2 +import numpy as np + + +def resize_for_detection(image: np.ndarray, max_width: int = 900) -> Tuple[np.ndarray, float]: + """Resize wide images for faster detection, returning resized image and scale factor.""" + height, width = image.shape[:2] + if width <= max_width: + return image, 1.0 + scale = max_width / float(width) + resized = cv2.resize(image, (int(width * scale), int(height * scale)), interpolation=cv2.INTER_AREA) + return resized, scale + + +def non_max_suppression(boxes: np.ndarray, overlap_thresh: float = 0.65) -> np.ndarray: + """Apply non-maximum suppression to reduce overlapping boxes. + + boxes: Nx4 array of [x1, y1, x2, y2] + returns: Mx4 array of picked boxes (int) + """ + if boxes is None or len(boxes) == 0: + return np.empty((0, 4), dtype=int) + + boxes = boxes.astype("float") + pick: List[int] = [] + + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + idxs = np.argsort(y2) + + while len(idxs) > 0: + last = idxs[-1] + pick.append(int(last)) + + xx1 = np.maximum(x1[last], x1[idxs[:-1]]) + yy1 = np.maximum(y1[last], y1[idxs[:-1]]) + xx2 = np.minimum(x2[last], x2[idxs[:-1]]) + yy2 = np.minimum(y2[last], y2[idxs[:-1]]) + + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + overlap = (w * h) / areas[idxs[:-1]] + + idxs = np.delete(idxs, np.concatenate(([len(idxs) - 1], np.where(overlap > overlap_thresh)[0]))) + + picked = boxes[pick].astype("int") + return picked + + +def detect_people(image_bgr: np.ndarray) -> List[Tuple[int, int, int, int]]: + """Detect people and return rectangles as (x, y, w, h) on the provided image.""" + hog = cv2.HOGDescriptor() + hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector()) + + rects, weights = hog.detectMultiScale( + image_bgr, + winStride=(8, 8), + padding=(8, 8), + scale=1.05, + ) + + if len(rects) == 0: + return [] + + rects = np.array(rects) + xyxy = np.zeros((rects.shape[0], 4), dtype=int) + xyxy[:, 0] = rects[:, 0] + xyxy[:, 1] = rects[:, 1] + xyxy[:, 2] = rects[:, 0] + rects[:, 2] + xyxy[:, 3] = rects[:, 1] + rects[:, 3] + + nms = non_max_suppression(xyxy, overlap_thresh=0.65) + result: List[Tuple[int, int, int, int]] = [] + for x1, y1, x2, y2 in nms: + result.append((int(x1), int(y1), int(x2 - x1), int(y2 - y1))) + return result + + +def get_torso_roi(image_bgr: np.ndarray, rect: Tuple[int, int, int, int]) -> np.ndarray: + """Crop a torso region from the detected person bounding box. + + Uses middle-lower portion to avoid face/skin and background. + """ + x, y, w, h = rect + img_h, img_w = image_bgr.shape[:2] + + y1 = max(0, int(y + 0.35 * h)) + y2 = min(img_h, int(y + 0.85 * h)) + x1 = max(0, int(x + 0.20 * w)) + x2 = min(img_w, int(x + 0.80 * w)) + + if y2 <= y1 or x2 <= x1: + y1, y2 = y, min(img_h, y + h) + x1, x2 = x, min(img_w, x + w) + + return image_bgr[y1:y2, x1:x2].copy() + + +def is_grayscale(hsv: np.ndarray) -> bool: + """Return True if the region is predominantly grayscale (low saturation).""" + s = hsv[:, :, 1].astype(np.float32) / 255.0 + s_median = float(np.median(s)) + s_p90 = float(np.percentile(s, 90)) + return s_median < 0.18 and s_p90 < 0.35 + + +def light_or_dark(hsv: np.ndarray) -> str: + v = hsv[:, :, 2].astype(np.float32) / 255.0 + v_mean = float(np.mean(v)) + return "light" if v_mean >= 0.60 else "dark" + + +def cluster_dominant_hsv(hsv: np.ndarray, max_samples: int = 5000, k: int = 3) -> Tuple[float, float, float]: + """Cluster HSV pixels and return the most representative colorful cluster center (h, s, v).""" + pixels = hsv.reshape(-1, 3).astype(np.float32) + n = pixels.shape[0] + if n == 0: + return 0.0, 0.0, 0.0 + + if n > max_samples: + idxs = np.random.choice(n, max_samples, replace=False) + sample = pixels[idxs] + else: + sample = pixels + + criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 20, 1.0) + attempts = 3 + _, labels, centers = cv2.kmeans(sample, k, None, criteria, attempts, cv2.KMEANS_PP_CENTERS) + + labels = labels.flatten() + counts = np.bincount(labels, minlength=k) + s_centers = centers[:, 1] + + # Prefer high-saturation clusters; otherwise fall back to largest cluster + sorted_by_count = np.argsort(-counts) + chosen = None + for idx in sorted_by_count: + if s_centers[idx] >= 50: # ~0.2 saturation threshold + chosen = int(idx) + break + if chosen is None: + chosen = int(sorted_by_count[0]) + + h, s, v = (float(centers[chosen, 0]), float(centers[chosen, 1]), float(centers[chosen, 2])) + return h, s, v + + +def map_hsv_to_color_name(h: float, s: float, v: float) -> str: + """Map HSV center to a human-friendly color name. + + OpenCV hue is [0, 180). Saturation and value are [0, 255]. + """ + s_norm = s / 255.0 + v_norm = v / 255.0 + + if s_norm < 0.18: + return "light" if v_norm >= 0.60 else "dark" + + h_val = h # 0..180 (OpenCV scale) + + # Brown (dark orange) + if 10 <= h_val < 25 and v_norm < 0.55 and s_norm > 0.30: + return "brown" + + if h_val < 10 or h_val >= 170: + return "red" + elif h_val < 20: + return "orange" + elif h_val < 35: + return "yellow" + elif h_val < 85: + return "green" + elif h_val < 100: + return "teal" + elif h_val < 130: + return "blue" + elif h_val < 150: + return "purple" + elif h_val < 170: + return "pink" if v_norm > 0.6 else "magenta" + else: + return "red" + + +def infer_clothing_color(roi_bgr: np.ndarray) -> str: + if roi_bgr is None or roi_bgr.size == 0: + return "unknown" + + h, w = roi_bgr.shape[:2] + cx1, cx2 = int(w * 0.20), int(w * 0.80) + cy1, cy2 = int(h * 0.20), int(h * 0.80) + inner = roi_bgr[cy1:cy2, cx1:cx2] + if inner.size == 0: + inner = roi_bgr + + hsv = cv2.cvtColor(inner, cv2.COLOR_BGR2HSV) + + if is_grayscale(hsv): + return light_or_dark(hsv) + + h_c, s_c, v_c = cluster_dominant_hsv(hsv) + return map_hsv_to_color_name(h_c, s_c, v_c) + + +def format_table(rows: Sequence[Sequence[object]], headers: Sequence[str]) -> str: + col_widths = [len(h) for h in headers] + for row in rows: + for i, cell in enumerate(row): + col_widths[i] = max(col_widths[i], len(str(cell))) + + header_line = " | ".join(h.ljust(col_widths[i]) for i, h in enumerate(headers)) + sep_line = "-+-".join("-" * col_widths[i] for i in range(len(headers))) + + lines = [header_line, sep_line] + for row in rows: + lines.append(" | ".join(str(cell).ljust(col_widths[i]) for i, cell in enumerate(row))) + return "\n".join(lines) + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Detect people in an image and report clothing color per person." + ) + parser.add_argument("image_path", help="Path to input image file") + parser.add_argument( + "--draw", + action="store_true", + help="Save an annotated image with detections and labels (_annotated.jpg)", + ) + args = parser.parse_args() + + image = cv2.imread(args.image_path) + if image is None: + print(f"Error: could not read image at '{args.image_path}'") + return 1 + + resized, scale = resize_for_detection(image) + rects = detect_people(resized) + + results: List[Tuple[int, str]] = [] + for idx, rect in enumerate(rects, start=1): + roi = get_torso_roi(resized, rect) + color = infer_clothing_color(roi) + results.append((idx, color)) + + print(f"People detected: {len(rects)}") + if results: + table = format_table([[idx, color] for idx, color in results], headers=["Person", "Clothes"]) + print(table) + else: + print("No people found.") + + if args.draw and len(rects) > 0: + vis = resized.copy() + for idx, rect in enumerate(rects, start=1): + x, y, w, h = rect + cv2.rectangle(vis, (x, y), (x + w, y + h), (0, 255, 0), 2) + roi = get_torso_roi(resized, rect) + color = infer_clothing_color(roi) + label = f"{idx}: {color}" + cv2.putText(vis, label, (x, max(0, y - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1, cv2.LINE_AA) + + base, _ = os.path.splitext(args.image_path) + out_path = f"{base}_annotated.jpg" + cv2.imwrite(out_path, vis) + print(f"Annotated image saved to {out_path}") + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..88002668 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +opencv-python==4.10.0.84 +numpy==2.1.2