From 4cc388efb0ca75b5d46aa81db3b80a17257f3906 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Wed, 15 Oct 2025 17:09:12 +0000 Subject: [PATCH] feat: Add people and clothing detection Co-authored-by: hmoharrer --- analyze_image.py | 46 ++++++ image_analyzer/__init__.py | 7 + image_analyzer/people_and_clothes.py | 223 +++++++++++++++++++++++++++ requirements.txt | 4 + streamlit_app.py | 55 +++++++ 5 files changed, 335 insertions(+) create mode 100644 analyze_image.py create mode 100644 image_analyzer/__init__.py create mode 100644 image_analyzer/people_and_clothes.py create mode 100644 requirements.txt create mode 100644 streamlit_app.py diff --git a/analyze_image.py b/analyze_image.py new file mode 100644 index 00000000..babbb3b8 --- /dev/null +++ b/analyze_image.py @@ -0,0 +1,46 @@ +import argparse +from typing import List, Dict, Any + +import cv2 + +from image_analyzer import analyze_image, draw_annotations + + +def _format_table(rows: List[Dict[str, Any]]) -> str: + headers = ["Person", "Clothing"] + str_rows = [[str(r["id"]), str(r["clothing"]).lower()] for r in rows] + col_widths = [max(len(headers[i]), max((len(row[i]) for row in str_rows), default=0)) for i in range(2)] + line_sep = "+".join(["".ljust(col_widths[0] + 2, "-") , "".ljust(col_widths[1] + 2, "-")]) + + def fmt_row(cols): + return "| " + cols[0].ljust(col_widths[0]) + " | " + cols[1].ljust(col_widths[1]) + " |" + + out = [] + out.append(fmt_row(headers)) + out.append("+" + line_sep + "+") + for row in str_rows: + out.append(fmt_row(row)) + return "\n".join(out) + + +def main(): + parser = argparse.ArgumentParser(description="Detect people and clothing colors in an image.") + parser.add_argument("image", help="Path to input image") + parser.add_argument("--save", dest="save_path", default=None, help="Optional path to save annotated image") + args = parser.parse_args() + + results = analyze_image(args.image) + + print(f"People detected: {len(results)}") + if results: + print(_format_table(results)) + + if args.save_path: + image_bgr = cv2.imread(args.image) + annotated = draw_annotations(image_bgr, results) + cv2.imwrite(args.save_path, annotated) + print(f"Annotated image saved to: {args.save_path}") + + +if __name__ == "__main__": + main() diff --git a/image_analyzer/__init__.py b/image_analyzer/__init__.py new file mode 100644 index 00000000..d1211533 --- /dev/null +++ b/image_analyzer/__init__.py @@ -0,0 +1,7 @@ +from .people_and_clothes import analyze_image, analyze_image_array, draw_annotations + +__all__ = [ + "analyze_image", + "analyze_image_array", + "draw_annotations", +] diff --git a/image_analyzer/people_and_clothes.py b/image_analyzer/people_and_clothes.py new file mode 100644 index 00000000..145ff2ed --- /dev/null +++ b/image_analyzer/people_and_clothes.py @@ -0,0 +1,223 @@ +from __future__ import annotations + +import cv2 +import numpy as np +from typing import List, Tuple, Dict, Any + + +BoundingBox = Tuple[int, int, int, int] + + +def _init_hog_detector() -> cv2.HOGDescriptor: + hog = cv2.HOGDescriptor() + hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector()) + return hog + + +_HOG = _init_hog_detector() + + +def detect_people_bboxes(image_bgr: np.ndarray) -> List[BoundingBox]: + """ + Detect person bounding boxes using OpenCV's built-in HOG detector. + Returns list of (x, y, w, h) in pixel coordinates. + """ + # Faster on resized image, then scale bboxes back + original_h, original_w = image_bgr.shape[:2] + scale = 800.0 / max(original_h, original_w) + if scale < 1.0: + resized = cv2.resize(image_bgr, (int(original_w * scale), int(original_h * scale))) + else: + resized = image_bgr.copy() + scale = 1.0 + + rects, weights = _HOG.detectMultiScale( + resized, + winStride=(8, 8), + padding=(8, 8), + scale=1.05, + hitThreshold=0.0, + ) + + bboxes: List[BoundingBox] = [] + inv = 1.0 / scale + for (x, y, w, h) in rects: + # scale back to original image size + x0 = int(round(x * inv)) + y0 = int(round(y * inv)) + w0 = int(round(w * inv)) + h0 = int(round(h * inv)) + # clamp + x0 = max(0, min(x0, original_w - 1)) + y0 = max(0, min(y0, original_h - 1)) + w0 = max(2, min(w0, original_w - x0)) + h0 = max(2, min(h0, original_h - y0)) + bboxes.append((x0, y0, w0, h0)) + + # Non-maximum suppression to merge overlapping detections + if len(bboxes) > 0: + bboxes = _nms_bboxes(bboxes, overlap_thresh=0.45) + + return bboxes + + +def _nms_bboxes(bboxes: List[BoundingBox], overlap_thresh: float = 0.45) -> List[BoundingBox]: + if not bboxes: + return [] + boxes = np.array([[x, y, x + w, y + h] for (x, y, w, h) in bboxes], dtype=np.float32) + pick: List[int] = [] + + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + idxs = np.argsort(y2) + + while len(idxs) > 0: + last = idxs[-1] + pick.append(int(last)) + suppress = [len(idxs) - 1] + for pos in range(len(idxs) - 1): + i = idxs[pos] + xx1 = max(x1[last], x1[i]) + yy1 = max(y1[last], y1[i]) + xx2 = min(x2[last], x2[i]) + yy2 = min(y2[last], y2[i]) + w = max(0.0, xx2 - xx1 + 1) + h = max(0.0, yy2 - yy1 + 1) + overlap = (w * h) / areas[i] + if overlap > overlap_thresh: + suppress.append(pos) + idxs = np.delete(idxs, suppress) + + picked_boxes = [] + for i in pick: + x1i, y1i, x2i, y2i = boxes[i] + picked_boxes.append((int(x1i), int(y1i), int(x2i - x1i), int(y2i - y1i))) + return picked_boxes + + +def _extract_clothing_region(image_bgr: np.ndarray, bbox: BoundingBox) -> np.ndarray: + x, y, w, h = bbox + x2, y2 = x + w, y + h + roi = image_bgr[y:y2, x:x2] + if roi.size == 0: + return roi + # Focus on central torso: ignore head (top ~35%), use central 60% width, lower 60% height + rh, rw = roi.shape[:2] + y_start = int(0.35 * rh) + y_end = int(min(rh, y_start + int(0.60 * rh))) + x_start = int(0.20 * rw) + x_end = int(min(rw, x_start + int(0.60 * rw))) + torso = roi[y_start:y_end, x_start:x_end] + return torso if torso.size > 0 else roi + + +def _classify_color_name(roi_bgr: np.ndarray) -> str: + if roi_bgr.size == 0: + return "unknown" + + # Smooth and convert to HSV + blur = cv2.GaussianBlur(roi_bgr, (5, 5), 0) + hsv = cv2.cvtColor(blur, cv2.COLOR_BGR2HSV) + h = hsv[:, :, 0].astype(np.float32) # 0..179 + s = hsv[:, :, 1].astype(np.float32) / 255.0 # 0..1 + v = hsv[:, :, 2].astype(np.float32) / 255.0 # 0..1 + + # Compute achromatic ratio + low_sat_mask = s < 0.18 + achromatic_ratio = float(np.count_nonzero(low_sat_mask)) / float(s.size) + median_s = float(np.median(s)) + median_v = float(np.median(v)) + + # If mostly achromatic, report light/dark per requirement + if achromatic_ratio > 0.65 or median_s < 0.15: + return "light" if median_v >= 0.55 else "dark" + + # Consider only moderately saturated pixels for hue histogram + sat_mask = s >= 0.18 + if np.count_nonzero(sat_mask) < 50: + # Not enough chromatic pixels, fallback to light/dark + return "light" if median_v >= 0.55 else "dark" + + hue = h[sat_mask] + val = v[sat_mask] + sat = s[sat_mask] + + # Weight histogram by saturation and value to emphasize vivid regions + weights = (sat * 0.6 + val * 0.4).clip(0.01, 1.0) + + # Build hue histogram in OpenCV hue space 0..179 + bins = 36 # 5-degree bins + hist, edges = np.histogram(hue, bins=bins, range=(0, 180), weights=weights) + if hist.sum() <= 0: + return "light" if median_v >= 0.55 else "dark" + + dominant_bin = int(np.argmax(hist)) + # Representative hue at bin center + h_center = (edges[dominant_bin] + edges[dominant_bin + 1]) * 0.5 + + # Map hue/value to named color (OpenCV hue degrees ~0..179) + # Special-case brown: dark orange region with lower brightness + if 10 <= h_center < 25 and np.median(val) < 0.55: + return "brown" + + if h_center < 10 or h_center >= 170: + return "red" + if 10 <= h_center < 20: + return "orange" + if 20 <= h_center < 35: + return "yellow" + if 35 <= h_center < 85: + return "green" + if 85 <= h_center < 100: + return "cyan" + if 100 <= h_center < 130: + return "blue" + if 130 <= h_center < 150: + return "purple" + if 150 <= h_center < 170: + return "pink" + + return "unknown" + + +def analyze_image_array(image_bgr: np.ndarray) -> List[Dict[str, Any]]: + """ + Analyze an image (BGR) and return a list of per-person results: + [{ 'id': int, 'bbox': (x,y,w,h), 'clothing': str }] + Clothing is a simple label: a color name, or 'light'/'dark' for grayscale clothes. + """ + bboxes = detect_people_bboxes(image_bgr) + results: List[Dict[str, Any]] = [] + for idx, bbox in enumerate(bboxes, start=1): + roi = _extract_clothing_region(image_bgr, bbox) + label = _classify_color_name(roi) + results.append({ + "id": idx, + "bbox": bbox, + "clothing": label, + }) + return results + + +def analyze_image(image_path: str) -> List[Dict[str, Any]]: + image_bgr = cv2.imread(image_path) + if image_bgr is None: + raise FileNotFoundError(f"Could not read image: {image_path}") + return analyze_image_array(image_bgr) + + +def draw_annotations(image_bgr: np.ndarray, results: List[Dict[str, Any]]) -> np.ndarray: + annotated = image_bgr.copy() + for res in results: + x, y, w, h = res["bbox"] + label = f"Person {res['id']}: {res['clothing']}" + cv2.rectangle(annotated, (x, y), (x + w, y + h), (0, 255, 0), 2) + # Put label background + (tw, th), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) + cv2.rectangle(annotated, (x, y - th - baseline - 4), (x + tw + 4, y), (0, 255, 0), -1) + cv2.putText(annotated, label, (x + 2, y - 4), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA) + return annotated diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..f2d92552 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +opencv-python==4.10.0.84 +numpy==1.26.4 +Pillow==10.4.0 +streamlit==1.37.1 diff --git a/streamlit_app.py b/streamlit_app.py new file mode 100644 index 00000000..96e84be7 --- /dev/null +++ b/streamlit_app.py @@ -0,0 +1,55 @@ +import io +from typing import List, Dict, Any + +import cv2 +import numpy as np +import streamlit as st + +from image_analyzer import analyze_image_array, draw_annotations + + +st.set_page_config(page_title="People & Clothing Color Analyzer", layout="wide") + +st.title("People & Clothing Color Analyzer") +st.write("Upload an image. The app will count people and estimate the clothing color for each person. For grayscale clothes, it reports 'light' or 'dark'.") + +uploaded = st.file_uploader("Choose an image", type=["jpg", "jpeg", "png", "bmp", "webp"]) + +if uploaded is not None: + file_bytes = np.frombuffer(uploaded.read(), dtype=np.uint8) + image_bgr = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR) + if image_bgr is None: + st.error("Could not read the uploaded image.") + else: + with st.spinner("Analyzing..."): + results: List[Dict[str, Any]] = analyze_image_array(image_bgr) + annotated = draw_annotations(image_bgr, results) + + st.subheader(f"People detected: {len(results)}") + if results: + # Display table + table_rows = [{"Person": r["id"], "Clothing": str(r["clothing"]).lower()} for r in results] + st.dataframe(table_rows, hide_index=True, use_container_width=True) + else: + st.info("No people detected.") + + # Show images side by side + c1, c2 = st.columns(2) + with c1: + st.caption("Original") + st.image(cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB), channels="RGB") + with c2: + st.caption("Annotated") + st.image(cv2.cvtColor(annotated, cv2.COLOR_BGR2RGB), channels="RGB") + + # Download annotated image + success, buf = cv2.imencode(".png", annotated) + if success: + st.download_button( + label="Download annotated image", + data=buf.tobytes(), + file_name="annotated.png", + mime="image/png", + ) + else: + st.warning("Could not prepare annotated image for download.")