|
| 1 | +import cv2 |
| 2 | +import mediapipe as mp |
| 3 | +import numpy as np |
| 4 | +import json |
| 5 | +import socket |
| 6 | + |
| 7 | +# Set up the UDP socket |
| 8 | +sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) |
| 9 | +server_address = ('localhost', 12345) # Port for Unity to listen to |
| 10 | + |
| 11 | +# Function to read and return 3D landmark positions |
| 12 | +def read_landmark_positions_3d(results): |
| 13 | + if results.pose_world_landmarks is None: |
| 14 | + return None |
| 15 | + else: |
| 16 | + # Extract 3D landmark positions |
| 17 | + landmarks = [results.pose_world_landmarks.landmark[lm] for lm in mp.solutions.pose.PoseLandmark] |
| 18 | + return np.array([(lm.x, lm.y, lm.z) for lm in landmarks]) |
| 19 | + |
| 20 | +# Function to draw landmarks on the image |
| 21 | +def draw_landmarks_on_image(frame, results): |
| 22 | + if results.pose_landmarks is not None: |
| 23 | + mp_drawing = mp.solutions.drawing_utils |
| 24 | + mp_pose = mp.solutions.pose |
| 25 | + mp_drawing.draw_landmarks( |
| 26 | + frame, |
| 27 | + results.pose_landmarks, |
| 28 | + mp_pose.POSE_CONNECTIONS, |
| 29 | + mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=2), |
| 30 | + mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2), |
| 31 | + ) |
| 32 | + |
| 33 | +# Real-time 3D pose estimation function |
| 34 | +def real_time_pose_estimation(): |
| 35 | + # Initialize webcam or video |
| 36 | + cap = cv2.VideoCapture(0) # Use 0 for webcam |
| 37 | + |
| 38 | + # Initialize Mediapipe Pose model |
| 39 | + mp_pose = mp.solutions.pose |
| 40 | + pose_detector = mp_pose.Pose(static_image_mode=False, model_complexity=2) |
| 41 | + |
| 42 | + while cap.isOpened(): |
| 43 | + ret, frame = cap.read() |
| 44 | + if not ret: |
| 45 | + break |
| 46 | + |
| 47 | + # Convert the frame to RGB (required by Mediapipe) |
| 48 | + frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) |
| 49 | + |
| 50 | + # Process the frame to obtain pose landmarks |
| 51 | + results = pose_detector.process(frame_rgb) |
| 52 | + |
| 53 | + # Draw landmarks on the frame (optional for visualization) |
| 54 | + draw_landmarks_on_image(frame, results) |
| 55 | + |
| 56 | + # Extract 3D landmarks |
| 57 | + landmark_positions_3d = read_landmark_positions_3d(results) |
| 58 | + if landmark_positions_3d is not None: |
| 59 | + # Send landmark positions to Unity via UDP |
| 60 | + data = json.dumps(landmark_positions_3d.tolist()) # Convert to JSON format |
| 61 | + sock.sendto(data.encode('utf-8'), server_address) # Send data to Unity |
| 62 | + print(f'3D Landmarks: {landmark_positions_3d}') # Optional: Print landmarks to console |
| 63 | + |
| 64 | + # Display the frame with landmarks drawn |
| 65 | + cv2.imshow('Real-Time 3D Pose Estimation', frame) |
| 66 | + |
| 67 | + # Exit loop when 'q' key is pressed |
| 68 | + if cv2.waitKey(1) & 0xFF == ord('q'): |
| 69 | + break |
| 70 | + |
| 71 | + cap.release() |
| 72 | + cv2.destroyAllWindows() |
| 73 | + |
| 74 | +if __name__ == "__main__": |
| 75 | + real_time_pose_estimation() |
0 commit comments