From 4cc388efb0ca75b5d46aa81db3b80a17257f3906 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Wed, 15 Oct 2025 17:09:12 +0000
Subject: [PATCH] feat: Add people and clothing detection

Co-authored-by: hmoharrer <hmoharrer@gmail.com>
---
 analyze_image.py                     |  46 ++++++
 image_analyzer/__init__.py           |   7 +
 image_analyzer/people_and_clothes.py | 223 +++++++++++++++++++++++++++
 requirements.txt                     |   4 +
 streamlit_app.py                     |  55 +++++++
 5 files changed, 335 insertions(+)
 create mode 100644 analyze_image.py
 create mode 100644 image_analyzer/__init__.py
 create mode 100644 image_analyzer/people_and_clothes.py
 create mode 100644 requirements.txt
 create mode 100644 streamlit_app.py

diff --git a/analyze_image.py b/analyze_image.py
new file mode 100644
index 00000000..babbb3b8
--- /dev/null
+++ b/analyze_image.py
@@ -0,0 +1,46 @@
+import argparse
+from typing import List, Dict, Any
+
+import cv2
+
+from image_analyzer import analyze_image, draw_annotations
+
+
+def _format_table(rows: List[Dict[str, Any]]) -> str:
+    headers = ["Person", "Clothing"]
+    str_rows = [[str(r["id"]), str(r["clothing"]).lower()] for r in rows]
+    col_widths = [max(len(headers[i]), max((len(row[i]) for row in str_rows), default=0)) for i in range(2)]
+    line_sep = "+".join(["".ljust(col_widths[0] + 2, "-") , "".ljust(col_widths[1] + 2, "-")])
+
+    def fmt_row(cols):
+        return "| " + cols[0].ljust(col_widths[0]) + " | " + cols[1].ljust(col_widths[1]) + " |"
+
+    out = []
+    out.append(fmt_row(headers))
+    out.append("+" + line_sep + "+")
+    for row in str_rows:
+        out.append(fmt_row(row))
+    return "\n".join(out)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Detect people and clothing colors in an image.")
+    parser.add_argument("image", help="Path to input image")
+    parser.add_argument("--save", dest="save_path", default=None, help="Optional path to save annotated image")
+    args = parser.parse_args()
+
+    results = analyze_image(args.image)
+
+    print(f"People detected: {len(results)}")
+    if results:
+        print(_format_table(results))
+
+    if args.save_path:
+        image_bgr = cv2.imread(args.image)
+        annotated = draw_annotations(image_bgr, results)
+        cv2.imwrite(args.save_path, annotated)
+        print(f"Annotated image saved to: {args.save_path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/image_analyzer/__init__.py b/image_analyzer/__init__.py
new file mode 100644
index 00000000..d1211533
--- /dev/null
+++ b/image_analyzer/__init__.py
@@ -0,0 +1,7 @@
+from .people_and_clothes import analyze_image, analyze_image_array, draw_annotations
+
+__all__ = [
+    "analyze_image",
+    "analyze_image_array",
+    "draw_annotations",
+]
diff --git a/image_analyzer/people_and_clothes.py b/image_analyzer/people_and_clothes.py
new file mode 100644
index 00000000..145ff2ed
--- /dev/null
+++ b/image_analyzer/people_and_clothes.py
@@ -0,0 +1,223 @@
+from __future__ import annotations
+
+import cv2
+import numpy as np
+from typing import List, Tuple, Dict, Any
+
+
+BoundingBox = Tuple[int, int, int, int]
+
+
+def _init_hog_detector() -> cv2.HOGDescriptor:
+    hog = cv2.HOGDescriptor()
+    hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
+    return hog
+
+
+_HOG = _init_hog_detector()
+
+
+def detect_people_bboxes(image_bgr: np.ndarray) -> List[BoundingBox]:
+    """
+    Detect person bounding boxes using OpenCV's built-in HOG detector.
+    Returns list of (x, y, w, h) in pixel coordinates.
+    """
+    # Faster on resized image, then scale bboxes back
+    original_h, original_w = image_bgr.shape[:2]
+    scale = 800.0 / max(original_h, original_w)
+    if scale < 1.0:
+        resized = cv2.resize(image_bgr, (int(original_w * scale), int(original_h * scale)))
+    else:
+        resized = image_bgr.copy()
+        scale = 1.0
+
+    rects, weights = _HOG.detectMultiScale(
+        resized,
+        winStride=(8, 8),
+        padding=(8, 8),
+        scale=1.05,
+        hitThreshold=0.0,
+    )
+
+    bboxes: List[BoundingBox] = []
+    inv = 1.0 / scale
+    for (x, y, w, h) in rects:
+        # scale back to original image size
+        x0 = int(round(x * inv))
+        y0 = int(round(y * inv))
+        w0 = int(round(w * inv))
+        h0 = int(round(h * inv))
+        # clamp
+        x0 = max(0, min(x0, original_w - 1))
+        y0 = max(0, min(y0, original_h - 1))
+        w0 = max(2, min(w0, original_w - x0))
+        h0 = max(2, min(h0, original_h - y0))
+        bboxes.append((x0, y0, w0, h0))
+
+    # Non-maximum suppression to merge overlapping detections
+    if len(bboxes) > 0:
+        bboxes = _nms_bboxes(bboxes, overlap_thresh=0.45)
+
+    return bboxes
+
+
+def _nms_bboxes(bboxes: List[BoundingBox], overlap_thresh: float = 0.45) -> List[BoundingBox]:
+    if not bboxes:
+        return []
+    boxes = np.array([[x, y, x + w, y + h] for (x, y, w, h) in bboxes], dtype=np.float32)
+    pick: List[int] = []
+
+    x1 = boxes[:, 0]
+    y1 = boxes[:, 1]
+    x2 = boxes[:, 2]
+    y2 = boxes[:, 3]
+
+    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    idxs = np.argsort(y2)
+
+    while len(idxs) > 0:
+        last = idxs[-1]
+        pick.append(int(last))
+        suppress = [len(idxs) - 1]
+        for pos in range(len(idxs) - 1):
+            i = idxs[pos]
+            xx1 = max(x1[last], x1[i])
+            yy1 = max(y1[last], y1[i])
+            xx2 = min(x2[last], x2[i])
+            yy2 = min(y2[last], y2[i])
+            w = max(0.0, xx2 - xx1 + 1)
+            h = max(0.0, yy2 - yy1 + 1)
+            overlap = (w * h) / areas[i]
+            if overlap > overlap_thresh:
+                suppress.append(pos)
+        idxs = np.delete(idxs, suppress)
+
+    picked_boxes = []
+    for i in pick:
+        x1i, y1i, x2i, y2i = boxes[i]
+        picked_boxes.append((int(x1i), int(y1i), int(x2i - x1i), int(y2i - y1i)))
+    return picked_boxes
+
+
+def _extract_clothing_region(image_bgr: np.ndarray, bbox: BoundingBox) -> np.ndarray:
+    x, y, w, h = bbox
+    x2, y2 = x + w, y + h
+    roi = image_bgr[y:y2, x:x2]
+    if roi.size == 0:
+        return roi
+    # Focus on central torso: ignore head (top ~35%), use central 60% width, lower 60% height
+    rh, rw = roi.shape[:2]
+    y_start = int(0.35 * rh)
+    y_end = int(min(rh, y_start + int(0.60 * rh)))
+    x_start = int(0.20 * rw)
+    x_end = int(min(rw, x_start + int(0.60 * rw)))
+    torso = roi[y_start:y_end, x_start:x_end]
+    return torso if torso.size > 0 else roi
+
+
+def _classify_color_name(roi_bgr: np.ndarray) -> str:
+    if roi_bgr.size == 0:
+        return "unknown"
+
+    # Smooth and convert to HSV
+    blur = cv2.GaussianBlur(roi_bgr, (5, 5), 0)
+    hsv = cv2.cvtColor(blur, cv2.COLOR_BGR2HSV)
+    h = hsv[:, :, 0].astype(np.float32)  # 0..179
+    s = hsv[:, :, 1].astype(np.float32) / 255.0  # 0..1
+    v = hsv[:, :, 2].astype(np.float32) / 255.0  # 0..1
+
+    # Compute achromatic ratio
+    low_sat_mask = s < 0.18
+    achromatic_ratio = float(np.count_nonzero(low_sat_mask)) / float(s.size)
+    median_s = float(np.median(s))
+    median_v = float(np.median(v))
+
+    # If mostly achromatic, report light/dark per requirement
+    if achromatic_ratio > 0.65 or median_s < 0.15:
+        return "light" if median_v >= 0.55 else "dark"
+
+    # Consider only moderately saturated pixels for hue histogram
+    sat_mask = s >= 0.18
+    if np.count_nonzero(sat_mask) < 50:
+        # Not enough chromatic pixels, fallback to light/dark
+        return "light" if median_v >= 0.55 else "dark"
+
+    hue = h[sat_mask]
+    val = v[sat_mask]
+    sat = s[sat_mask]
+
+    # Weight histogram by saturation and value to emphasize vivid regions
+    weights = (sat * 0.6 + val * 0.4).clip(0.01, 1.0)
+
+    # Build hue histogram in OpenCV hue space 0..179
+    bins = 36  # 5-degree bins
+    hist, edges = np.histogram(hue, bins=bins, range=(0, 180), weights=weights)
+    if hist.sum() <= 0:
+        return "light" if median_v >= 0.55 else "dark"
+
+    dominant_bin = int(np.argmax(hist))
+    # Representative hue at bin center
+    h_center = (edges[dominant_bin] + edges[dominant_bin + 1]) * 0.5
+
+    # Map hue/value to named color (OpenCV hue degrees ~0..179)
+    # Special-case brown: dark orange region with lower brightness
+    if 10 <= h_center < 25 and np.median(val) < 0.55:
+        return "brown"
+
+    if h_center < 10 or h_center >= 170:
+        return "red"
+    if 10 <= h_center < 20:
+        return "orange"
+    if 20 <= h_center < 35:
+        return "yellow"
+    if 35 <= h_center < 85:
+        return "green"
+    if 85 <= h_center < 100:
+        return "cyan"
+    if 100 <= h_center < 130:
+        return "blue"
+    if 130 <= h_center < 150:
+        return "purple"
+    if 150 <= h_center < 170:
+        return "pink"
+
+    return "unknown"
+
+
+def analyze_image_array(image_bgr: np.ndarray) -> List[Dict[str, Any]]:
+    """
+    Analyze an image (BGR) and return a list of per-person results:
+    [{ 'id': int, 'bbox': (x,y,w,h), 'clothing': str }]
+    Clothing is a simple label: a color name, or 'light'/'dark' for grayscale clothes.
+    """
+    bboxes = detect_people_bboxes(image_bgr)
+    results: List[Dict[str, Any]] = []
+    for idx, bbox in enumerate(bboxes, start=1):
+        roi = _extract_clothing_region(image_bgr, bbox)
+        label = _classify_color_name(roi)
+        results.append({
+            "id": idx,
+            "bbox": bbox,
+            "clothing": label,
+        })
+    return results
+
+
+def analyze_image(image_path: str) -> List[Dict[str, Any]]:
+    image_bgr = cv2.imread(image_path)
+    if image_bgr is None:
+        raise FileNotFoundError(f"Could not read image: {image_path}")
+    return analyze_image_array(image_bgr)
+
+
+def draw_annotations(image_bgr: np.ndarray, results: List[Dict[str, Any]]) -> np.ndarray:
+    annotated = image_bgr.copy()
+    for res in results:
+        x, y, w, h = res["bbox"]
+        label = f"Person {res['id']}: {res['clothing']}"
+        cv2.rectangle(annotated, (x, y), (x + w, y + h), (0, 255, 0), 2)
+        # Put label background
+        (tw, th), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+        cv2.rectangle(annotated, (x, y - th - baseline - 4), (x + tw + 4, y), (0, 255, 0), -1)
+        cv2.putText(annotated, label, (x + 2, y - 4), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
+    return annotated
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 00000000..f2d92552
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+opencv-python==4.10.0.84
+numpy==1.26.4
+Pillow==10.4.0
+streamlit==1.37.1
diff --git a/streamlit_app.py b/streamlit_app.py
new file mode 100644
index 00000000..96e84be7
--- /dev/null
+++ b/streamlit_app.py
@@ -0,0 +1,55 @@
+import io
+from typing import List, Dict, Any
+
+import cv2
+import numpy as np
+import streamlit as st
+
+from image_analyzer import analyze_image_array, draw_annotations
+
+
+st.set_page_config(page_title="People & Clothing Color Analyzer", layout="wide")
+
+st.title("People & Clothing Color Analyzer")
+st.write("Upload an image. The app will count people and estimate the clothing color for each person. For grayscale clothes, it reports 'light' or 'dark'.")
+
+uploaded = st.file_uploader("Choose an image", type=["jpg", "jpeg", "png", "bmp", "webp"]) 
+
+if uploaded is not None:
+    file_bytes = np.frombuffer(uploaded.read(), dtype=np.uint8)
+    image_bgr = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
+    if image_bgr is None:
+        st.error("Could not read the uploaded image.")
+    else:
+        with st.spinner("Analyzing..."):
+            results: List[Dict[str, Any]] = analyze_image_array(image_bgr)
+            annotated = draw_annotations(image_bgr, results)
+
+        st.subheader(f"People detected: {len(results)}")
+        if results:
+            # Display table
+            table_rows = [{"Person": r["id"], "Clothing": str(r["clothing"]).lower()} for r in results]
+            st.dataframe(table_rows, hide_index=True, use_container_width=True)
+        else:
+            st.info("No people detected.")
+
+        # Show images side by side
+        c1, c2 = st.columns(2)
+        with c1:
+            st.caption("Original")
+            st.image(cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB), channels="RGB")
+        with c2:
+            st.caption("Annotated")
+            st.image(cv2.cvtColor(annotated, cv2.COLOR_BGR2RGB), channels="RGB")
+
+        # Download annotated image
+        success, buf = cv2.imencode(".png", annotated)
+        if success:
+            st.download_button(
+                label="Download annotated image",
+                data=buf.tobytes(),
+                file_name="annotated.png",
+                mime="image/png",
+            )
+        else:
+            st.warning("Could not prepare annotated image for download.")