hmoharrer · hmoharrer · Oct 15, 2025
diff --git a/people_clothing_color.py b/people_clothing_color.py
@@ -0,0 +1,291 @@
+#!/usr/bin/env python3
+"""
+Detect people in an image and infer the color of their clothes.
+
+- Uses OpenCV HOG person detector
+- Estimates clothing color from a torso ROI using HSV+k-means
+- Prints a table with person index and clothing color
+- For grayscale clothing, reports "light" or "dark"
+"""
+from __future__ import annotations
+
+import argparse
+import os
+from typing import List, Sequence, Tuple
+
+import cv2
+import numpy as np
+
+
+def resize_for_detection(image: np.ndarray, max_width: int = 900) -> Tuple[np.ndarray, float]:
+    """Resize wide images for faster detection, returning resized image and scale factor."""
+    height, width = image.shape[:2]
+    if width <= max_width:
+        return image, 1.0
+    scale = max_width / float(width)
+    resized = cv2.resize(image, (int(width * scale), int(height * scale)), interpolation=cv2.INTER_AREA)
+    return resized, scale
+
+
+def non_max_suppression(boxes: np.ndarray, overlap_thresh: float = 0.65) -> np.ndarray:
+    """Apply non-maximum suppression to reduce overlapping boxes.
+
+    boxes: Nx4 array of [x1, y1, x2, y2]
+    returns: Mx4 array of picked boxes (int)
+    """
+    if boxes is None or len(boxes) == 0:
+        return np.empty((0, 4), dtype=int)
+
+    boxes = boxes.astype("float")
+    pick: List[int] = []
+
+    x1 = boxes[:, 0]
+    y1 = boxes[:, 1]
+    x2 = boxes[:, 2]
+    y2 = boxes[:, 3]
+
+    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    idxs = np.argsort(y2)
+
+    while len(idxs) > 0:
+        last = idxs[-1]
+        pick.append(int(last))
+
+        xx1 = np.maximum(x1[last], x1[idxs[:-1]])
+        yy1 = np.maximum(y1[last], y1[idxs[:-1]])
+        xx2 = np.minimum(x2[last], x2[idxs[:-1]])
+        yy2 = np.minimum(y2[last], y2[idxs[:-1]])
+
+        w = np.maximum(0.0, xx2 - xx1 + 1)
+        h = np.maximum(0.0, yy2 - yy1 + 1)
+        overlap = (w * h) / areas[idxs[:-1]]
+
+        idxs = np.delete(idxs, np.concatenate(([len(idxs) - 1], np.where(overlap > overlap_thresh)[0])))
+
+    picked = boxes[pick].astype("int")
+    return picked
+
+
+def detect_people(image_bgr: np.ndarray) -> List[Tuple[int, int, int, int]]:
+    """Detect people and return rectangles as (x, y, w, h) on the provided image."""
+    hog = cv2.HOGDescriptor()
+    hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
+
+    rects, weights = hog.detectMultiScale(
+        image_bgr,
+        winStride=(8, 8),
+        padding=(8, 8),
+        scale=1.05,
+    )
+
+    if len(rects) == 0:
+        return []
+
+    rects = np.array(rects)
+    xyxy = np.zeros((rects.shape[0], 4), dtype=int)
+    xyxy[:, 0] = rects[:, 0]
+    xyxy[:, 1] = rects[:, 1]
+    xyxy[:, 2] = rects[:, 0] + rects[:, 2]
+    xyxy[:, 3] = rects[:, 1] + rects[:, 3]
+
+    nms = non_max_suppression(xyxy, overlap_thresh=0.65)
+    result: List[Tuple[int, int, int, int]] = []
+    for x1, y1, x2, y2 in nms:
+        result.append((int(x1), int(y1), int(x2 - x1), int(y2 - y1)))
+    return result
+
+
+def get_torso_roi(image_bgr: np.ndarray, rect: Tuple[int, int, int, int]) -> np.ndarray:
+    """Crop a torso region from the detected person bounding box.
+
+    Uses middle-lower portion to avoid face/skin and background.
+    """
+    x, y, w, h = rect
+    img_h, img_w = image_bgr.shape[:2]
+
+    y1 = max(0, int(y + 0.35 * h))
+    y2 = min(img_h, int(y + 0.85 * h))
+    x1 = max(0, int(x + 0.20 * w))
+    x2 = min(img_w, int(x + 0.80 * w))
+
+    if y2 <= y1 or x2 <= x1:
+        y1, y2 = y, min(img_h, y + h)
+        x1, x2 = x, min(img_w, x + w)
+
+    return image_bgr[y1:y2, x1:x2].copy()
+
+
+def is_grayscale(hsv: np.ndarray) -> bool:
+    """Return True if the region is predominantly grayscale (low saturation)."""
+    s = hsv[:, :, 1].astype(np.float32) / 255.0
+    s_median = float(np.median(s))
+    s_p90 = float(np.percentile(s, 90))
+    return s_median < 0.18 and s_p90 < 0.35
+
+
+def light_or_dark(hsv: np.ndarray) -> str:
+    v = hsv[:, :, 2].astype(np.float32) / 255.0
+    v_mean = float(np.mean(v))
+    return "light" if v_mean >= 0.60 else "dark"
+
+
+def cluster_dominant_hsv(hsv: np.ndarray, max_samples: int = 5000, k: int = 3) -> Tuple[float, float, float]:
+    """Cluster HSV pixels and return the most representative colorful cluster center (h, s, v)."""
+    pixels = hsv.reshape(-1, 3).astype(np.float32)
+    n = pixels.shape[0]
+    if n == 0:
+        return 0.0, 0.0, 0.0
+
+    if n > max_samples:
+        idxs = np.random.choice(n, max_samples, replace=False)
+        sample = pixels[idxs]
+    else:
+        sample = pixels
+
+    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 20, 1.0)
+    attempts = 3
+    _, labels, centers = cv2.kmeans(sample, k, None, criteria, attempts, cv2.KMEANS_PP_CENTERS)
+
+    labels = labels.flatten()
+    counts = np.bincount(labels, minlength=k)
+    s_centers = centers[:, 1]
+
+    # Prefer high-saturation clusters; otherwise fall back to largest cluster
+    sorted_by_count = np.argsort(-counts)
+    chosen = None
+    for idx in sorted_by_count:
+        if s_centers[idx] >= 50:  # ~0.2 saturation threshold
+            chosen = int(idx)
+            break
+    if chosen is None:
+        chosen = int(sorted_by_count[0])
+
+    h, s, v = (float(centers[chosen, 0]), float(centers[chosen, 1]), float(centers[chosen, 2]))
+    return h, s, v
+
+
+def map_hsv_to_color_name(h: float, s: float, v: float) -> str:
+    """Map HSV center to a human-friendly color name.
+
+    OpenCV hue is [0, 180). Saturation and value are [0, 255].
+    """
+    s_norm = s / 255.0
+    v_norm = v / 255.0
+
+    if s_norm < 0.18:
+        return "light" if v_norm >= 0.60 else "dark"
+
+    h_val = h  # 0..180 (OpenCV scale)
+
+    # Brown (dark orange)
+    if 10 <= h_val < 25 and v_norm < 0.55 and s_norm > 0.30:
+        return "brown"
+
+    if h_val < 10 or h_val >= 170:
+        return "red"
+    elif h_val < 20:
+        return "orange"
+    elif h_val < 35:
+        return "yellow"
+    elif h_val < 85:
+        return "green"
+    elif h_val < 100:
+        return "teal"
+    elif h_val < 130:
+        return "blue"
+    elif h_val < 150:
+        return "purple"
+    elif h_val < 170:
+        return "pink" if v_norm > 0.6 else "magenta"
+    else:
+        return "red"
+
+
+def infer_clothing_color(roi_bgr: np.ndarray) -> str:
+    if roi_bgr is None or roi_bgr.size == 0:
+        return "unknown"
+
+    h, w = roi_bgr.shape[:2]
+    cx1, cx2 = int(w * 0.20), int(w * 0.80)
+    cy1, cy2 = int(h * 0.20), int(h * 0.80)
+    inner = roi_bgr[cy1:cy2, cx1:cx2]
+    if inner.size == 0:
+        inner = roi_bgr
+
+    hsv = cv2.cvtColor(inner, cv2.COLOR_BGR2HSV)
+
+    if is_grayscale(hsv):
+        return light_or_dark(hsv)
+
+    h_c, s_c, v_c = cluster_dominant_hsv(hsv)
+    return map_hsv_to_color_name(h_c, s_c, v_c)
+
+
+def format_table(rows: Sequence[Sequence[object]], headers: Sequence[str]) -> str:
+    col_widths = [len(h) for h in headers]
+    for row in rows:
+        for i, cell in enumerate(row):
+            col_widths[i] = max(col_widths[i], len(str(cell)))
+
+    header_line = " | ".join(h.ljust(col_widths[i]) for i, h in enumerate(headers))
+    sep_line = "-+-".join("-" * col_widths[i] for i in range(len(headers)))
+
+    lines = [header_line, sep_line]
+    for row in rows:
+        lines.append(" | ".join(str(cell).ljust(col_widths[i]) for i, cell in enumerate(row)))
+    return "\n".join(lines)
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(
+        description="Detect people in an image and report clothing color per person."
+    )
+    parser.add_argument("image_path", help="Path to input image file")
+    parser.add_argument(
+        "--draw",
+        action="store_true",
+        help="Save an annotated image with detections and labels (_annotated.jpg)",
+    )
+    args = parser.parse_args()
+
+    image = cv2.imread(args.image_path)
+    if image is None:
+        print(f"Error: could not read image at '{args.image_path}'")
+        return 1
+
+    resized, scale = resize_for_detection(image)
+    rects = detect_people(resized)
+
+    results: List[Tuple[int, str]] = []
+    for idx, rect in enumerate(rects, start=1):
+        roi = get_torso_roi(resized, rect)
+        color = infer_clothing_color(roi)
+        results.append((idx, color))
+
+    print(f"People detected: {len(rects)}")
+    if results:
+        table = format_table([[idx, color] for idx, color in results], headers=["Person", "Clothes"])
+        print(table)
+    else:
+        print("No people found.")
+
+    if args.draw and len(rects) > 0:
+        vis = resized.copy()
+        for idx, rect in enumerate(rects, start=1):
+            x, y, w, h = rect
+            cv2.rectangle(vis, (x, y), (x + w, y + h), (0, 255, 0), 2)
+            roi = get_torso_roi(resized, rect)
+            color = infer_clothing_color(roi)
+            label = f"{idx}: {color}"
+            cv2.putText(vis, label, (x, max(0, y - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1, cv2.LINE_AA)
+
+        base, _ = os.path.splitext(args.image_path)
+        out_path = f"{base}_annotated.jpg"
+        cv2.imwrite(out_path, vis)
+        print(f"Annotated image saved to {out_path}")
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,2 @@
+opencv-python==4.10.0.84
+numpy==2.1.2