Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
291 changes: 291 additions & 0 deletions people_clothing_color.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,291 @@
#!/usr/bin/env python3
"""
Detect people in an image and infer the color of their clothes.

- Uses OpenCV HOG person detector
- Estimates clothing color from a torso ROI using HSV+k-means
- Prints a table with person index and clothing color
- For grayscale clothing, reports "light" or "dark"
"""
from __future__ import annotations

import argparse
import os
from typing import List, Sequence, Tuple

import cv2
import numpy as np


def resize_for_detection(image: np.ndarray, max_width: int = 900) -> Tuple[np.ndarray, float]:
"""Resize wide images for faster detection, returning resized image and scale factor."""
height, width = image.shape[:2]
if width <= max_width:
return image, 1.0
scale = max_width / float(width)
resized = cv2.resize(image, (int(width * scale), int(height * scale)), interpolation=cv2.INTER_AREA)
return resized, scale


def non_max_suppression(boxes: np.ndarray, overlap_thresh: float = 0.65) -> np.ndarray:
"""Apply non-maximum suppression to reduce overlapping boxes.

boxes: Nx4 array of [x1, y1, x2, y2]
returns: Mx4 array of picked boxes (int)
"""
if boxes is None or len(boxes) == 0:
return np.empty((0, 4), dtype=int)

boxes = boxes.astype("float")
pick: List[int] = []

x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]

areas = (x2 - x1 + 1) * (y2 - y1 + 1)
idxs = np.argsort(y2)

while len(idxs) > 0:
last = idxs[-1]
pick.append(int(last))

xx1 = np.maximum(x1[last], x1[idxs[:-1]])
yy1 = np.maximum(y1[last], y1[idxs[:-1]])
xx2 = np.minimum(x2[last], x2[idxs[:-1]])
yy2 = np.minimum(y2[last], y2[idxs[:-1]])

w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
overlap = (w * h) / areas[idxs[:-1]]

idxs = np.delete(idxs, np.concatenate(([len(idxs) - 1], np.where(overlap > overlap_thresh)[0])))

picked = boxes[pick].astype("int")
return picked


def detect_people(image_bgr: np.ndarray) -> List[Tuple[int, int, int, int]]:
"""Detect people and return rectangles as (x, y, w, h) on the provided image."""
hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())

rects, weights = hog.detectMultiScale(
image_bgr,
winStride=(8, 8),
padding=(8, 8),
scale=1.05,
)

if len(rects) == 0:
return []

rects = np.array(rects)
xyxy = np.zeros((rects.shape[0], 4), dtype=int)
xyxy[:, 0] = rects[:, 0]
xyxy[:, 1] = rects[:, 1]
xyxy[:, 2] = rects[:, 0] + rects[:, 2]
xyxy[:, 3] = rects[:, 1] + rects[:, 3]

nms = non_max_suppression(xyxy, overlap_thresh=0.65)
result: List[Tuple[int, int, int, int]] = []
for x1, y1, x2, y2 in nms:
result.append((int(x1), int(y1), int(x2 - x1), int(y2 - y1)))
return result


def get_torso_roi(image_bgr: np.ndarray, rect: Tuple[int, int, int, int]) -> np.ndarray:
"""Crop a torso region from the detected person bounding box.

Uses middle-lower portion to avoid face/skin and background.
"""
x, y, w, h = rect
img_h, img_w = image_bgr.shape[:2]

y1 = max(0, int(y + 0.35 * h))
y2 = min(img_h, int(y + 0.85 * h))
x1 = max(0, int(x + 0.20 * w))
x2 = min(img_w, int(x + 0.80 * w))

if y2 <= y1 or x2 <= x1:
y1, y2 = y, min(img_h, y + h)
x1, x2 = x, min(img_w, x + w)

return image_bgr[y1:y2, x1:x2].copy()


def is_grayscale(hsv: np.ndarray) -> bool:
"""Return True if the region is predominantly grayscale (low saturation)."""
s = hsv[:, :, 1].astype(np.float32) / 255.0
s_median = float(np.median(s))
s_p90 = float(np.percentile(s, 90))
return s_median < 0.18 and s_p90 < 0.35


def light_or_dark(hsv: np.ndarray) -> str:
v = hsv[:, :, 2].astype(np.float32) / 255.0
v_mean = float(np.mean(v))
return "light" if v_mean >= 0.60 else "dark"


def cluster_dominant_hsv(hsv: np.ndarray, max_samples: int = 5000, k: int = 3) -> Tuple[float, float, float]:
"""Cluster HSV pixels and return the most representative colorful cluster center (h, s, v)."""
pixels = hsv.reshape(-1, 3).astype(np.float32)
n = pixels.shape[0]
if n == 0:
return 0.0, 0.0, 0.0

if n > max_samples:
idxs = np.random.choice(n, max_samples, replace=False)
sample = pixels[idxs]
else:
sample = pixels

criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 20, 1.0)
attempts = 3
_, labels, centers = cv2.kmeans(sample, k, None, criteria, attempts, cv2.KMEANS_PP_CENTERS)

labels = labels.flatten()
counts = np.bincount(labels, minlength=k)
s_centers = centers[:, 1]

# Prefer high-saturation clusters; otherwise fall back to largest cluster
sorted_by_count = np.argsort(-counts)
chosen = None
for idx in sorted_by_count:
if s_centers[idx] >= 50: # ~0.2 saturation threshold
chosen = int(idx)
break
if chosen is None:
chosen = int(sorted_by_count[0])

h, s, v = (float(centers[chosen, 0]), float(centers[chosen, 1]), float(centers[chosen, 2]))
return h, s, v


def map_hsv_to_color_name(h: float, s: float, v: float) -> str:
"""Map HSV center to a human-friendly color name.

OpenCV hue is [0, 180). Saturation and value are [0, 255].
"""
s_norm = s / 255.0
v_norm = v / 255.0

if s_norm < 0.18:
return "light" if v_norm >= 0.60 else "dark"

h_val = h # 0..180 (OpenCV scale)

# Brown (dark orange)
if 10 <= h_val < 25 and v_norm < 0.55 and s_norm > 0.30:
return "brown"

if h_val < 10 or h_val >= 170:
return "red"
elif h_val < 20:
return "orange"
elif h_val < 35:
return "yellow"
elif h_val < 85:
return "green"
elif h_val < 100:
return "teal"
elif h_val < 130:
return "blue"
elif h_val < 150:
return "purple"
elif h_val < 170:
return "pink" if v_norm > 0.6 else "magenta"
else:
return "red"


def infer_clothing_color(roi_bgr: np.ndarray) -> str:
if roi_bgr is None or roi_bgr.size == 0:
return "unknown"

h, w = roi_bgr.shape[:2]
cx1, cx2 = int(w * 0.20), int(w * 0.80)
cy1, cy2 = int(h * 0.20), int(h * 0.80)
inner = roi_bgr[cy1:cy2, cx1:cx2]
if inner.size == 0:
inner = roi_bgr

hsv = cv2.cvtColor(inner, cv2.COLOR_BGR2HSV)

if is_grayscale(hsv):
return light_or_dark(hsv)

h_c, s_c, v_c = cluster_dominant_hsv(hsv)
return map_hsv_to_color_name(h_c, s_c, v_c)


def format_table(rows: Sequence[Sequence[object]], headers: Sequence[str]) -> str:
col_widths = [len(h) for h in headers]
for row in rows:
for i, cell in enumerate(row):
col_widths[i] = max(col_widths[i], len(str(cell)))

header_line = " | ".join(h.ljust(col_widths[i]) for i, h in enumerate(headers))
sep_line = "-+-".join("-" * col_widths[i] for i in range(len(headers)))

lines = [header_line, sep_line]
for row in rows:
lines.append(" | ".join(str(cell).ljust(col_widths[i]) for i, cell in enumerate(row)))
return "\n".join(lines)


def main() -> int:
parser = argparse.ArgumentParser(
description="Detect people in an image and report clothing color per person."
)
parser.add_argument("image_path", help="Path to input image file")
parser.add_argument(
"--draw",
action="store_true",
help="Save an annotated image with detections and labels (_annotated.jpg)",
)
args = parser.parse_args()

image = cv2.imread(args.image_path)
if image is None:
print(f"Error: could not read image at '{args.image_path}'")
return 1

resized, scale = resize_for_detection(image)
rects = detect_people(resized)

results: List[Tuple[int, str]] = []
for idx, rect in enumerate(rects, start=1):
roi = get_torso_roi(resized, rect)
color = infer_clothing_color(roi)
results.append((idx, color))

print(f"People detected: {len(rects)}")
if results:
table = format_table([[idx, color] for idx, color in results], headers=["Person", "Clothes"])
print(table)
else:
print("No people found.")

if args.draw and len(rects) > 0:
vis = resized.copy()
for idx, rect in enumerate(rects, start=1):
x, y, w, h = rect
cv2.rectangle(vis, (x, y), (x + w, y + h), (0, 255, 0), 2)
roi = get_torso_roi(resized, rect)
color = infer_clothing_color(roi)
label = f"{idx}: {color}"
cv2.putText(vis, label, (x, max(0, y - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1, cv2.LINE_AA)

base, _ = os.path.splitext(args.image_path)
out_path = f"{base}_annotated.jpg"
cv2.imwrite(out_path, vis)
print(f"Annotated image saved to {out_path}")

return 0


if __name__ == "__main__":
raise SystemExit(main())
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
opencv-python==4.10.0.84
numpy==2.1.2