当前位置: 首页 > article >正文

亚博microros小车-原生ubuntu支持系列:5-姿态检测

MediaPipe 介绍参见:亚博microros小车-原生ubuntu支持系列:4-手部检测-CSDN博客 

本篇继续迁移姿态检测。

一 背景知识

以下来自亚博官网

MediaPipe Pose是⼀个⽤于⾼保真⾝体姿势跟踪的ML解决⽅案,利⽤BlazePose研究,从RGB视频帧推断出33个3D坐标和全⾝背景分割遮罩,该研究也为ML Kit姿势检测API提供了动⼒。

MediaPipe姿势中的地标模型预测了33个姿势坐标的位置(参⻅下图)。

image-20240125170006220

 跟手部检测类似

import cv2
import mediapipe as mp

mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, 
                    model_complexity=1, 
                    smooth_landmarks=True, 
                    min_detection_confidence=0.5, 
                    min_tracking_confidence=0.5)

cap = cv2.VideoCapture(0)#打开默认摄像头
while True:
    ret,frame = cap.read()#读取一帧图像
    #图像格式转换
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    # 因为摄像头是镜像的,所以将摄像头水平翻转
    # 不是镜像的可以不翻转
    frame= cv2.flip(frame,1)
    #输出结果
    results = pose.process(frame)

    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

    if results.pose_landmarks:
        print(f'pose_landmarks:{results.pose_landmarks}' )
        # 关键点可视化
        mp_drawing.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
    else:
       print('there are no person!')
       continue    
    cv2.imshow('MediaPipe pose', frame)
    if cv2.waitKey(1) & 0xFF == 27:
        break
cap.release()

运行效果:

二 位姿检测

src/yahboom_esp32_mediapipe/yahboom_esp32_mediapipe/目录下新建文件02_PoseDetector.py

#!/usr/bin/env python3
# encoding: utf-8

#import ros lib
import rclpy
from rclpy.node import Node
from geometry_msgs.msg import Point
import mediapipe as mp
#import define msg
from yahboomcar_msgs.msg import PointArray
from cv_bridge import CvBridge
from sensor_msgs.msg import Image, CompressedImage
#import commom lib
import cv2 as cv
import numpy as np
import time

from rclpy.time import Time
import datetime

print("import done")

class PoseDetector(Node):
    def __init__(self, name,mode=False, smooth=True, detectionCon=0.5, trackCon=0.5):
        super().__init__(name)
        self.mpPose = mp.solutions.pose
        self.mpDraw = mp.solutions.drawing_utils
        #初始化位姿
        self.pose = self.mpPose.Pose(
            static_image_mode=mode,
            smooth_landmarks=smooth,
            min_detection_confidence=detectionCon,
            min_tracking_confidence=trackCon )
        self.pub_point = self.create_publisher(PointArray,'/mediapipe/points',1000)
        #输出关键点样式
        self.lmDrawSpec = mp.solutions.drawing_utils.DrawingSpec(color=(0, 0, 255), thickness=-1, circle_radius=6)
        self.drawSpec = mp.solutions.drawing_utils.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2)
     #位姿检测   
    def pubPosePoint(self, frame, draw=True):
        pointArray = PointArray()
        img = np.copy(frame)
        #图片格式转换
        img_RGB = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
        self.results = self.pose.process(img_RGB)
        if self.results.pose_landmarks:#关键点输出
            if draw: self.mpDraw.draw_landmarks(frame, self.results.pose_landmarks, self.mpPose.POSE_CONNECTIONS, self.lmDrawSpec, self.drawSpec)
            self.mpDraw.draw_landmarks(img, self.results.pose_landmarks, self.mpPose.POSE_CONNECTIONS, self.lmDrawSpec, self.drawSpec)
            for id, lm in enumerate(self.results.pose_landmarks.landmark):
                point = Point()
                point.x, point.y, point.z = lm.x, lm.y, lm.z
                pointArray.points.append(point)
        self.pub_point.publish(pointArray)
        return frame, img

    def frame_combine(slef,frame, src):
        if len(frame.shape) == 3:
            frameH, frameW = frame.shape[:2]
            srcH, srcW = src.shape[:2]
            dst = np.zeros((max(frameH, srcH), frameW + srcW, 3), np.uint8)
            dst[:, :frameW] = frame[:, :]
            dst[:, frameW:] = src[:, :]
        else:
            src = cv.cvtColor(src, cv.COLOR_BGR2GRAY)
            frameH, frameW = frame.shape[:2]
            imgH, imgW = src.shape[:2]
            dst = np.zeros((frameH, frameW + imgW), np.uint8)
            dst[:, :frameW] = frame[:, :]
            dst[:, frameW:] = src[:, :]
        return dst

class MY_Picture(Node):
    def __init__(self, name):
        super().__init__(name)
        self.bridge = CvBridge()
        self.sub_img = self.create_subscription(
            CompressedImage, '/espRos/esp32camera', self.handleTopic, 1) #获取esp32传来的图像
        
        self.last_stamp = None
        self.new_seconds = 0
        self.fps_seconds = 1

        self.pose_detector = PoseDetector('pose_detector')
    #回调函数
    def handleTopic(self, msg):
        self.last_stamp = msg.header.stamp  
        if self.last_stamp:
            total_secs = Time(nanoseconds=self.last_stamp.nanosec, seconds=self.last_stamp.sec).nanoseconds
            delta = datetime.timedelta(seconds=total_secs * 1e-9)
            seconds = delta.total_seconds()*100

            if self.new_seconds != 0:
                self.fps_seconds = seconds - self.new_seconds

            self.new_seconds = seconds#保留这次的值

        start = time.time()

        frame = self.bridge.compressed_imgmsg_to_cv2(msg)
        frame = cv.resize(frame, (640, 480))
        cv.waitKey(10)
        frame, img = self.pose_detector.pubPosePoint(frame,draw=False)
        
        end = time.time()
        fps = 1 / ((end - start)+self.fps_seconds)
        text = "FPS : " + str(int(fps))
        cv.putText(frame, text, (20, 30), cv.FONT_HERSHEY_SIMPLEX, 0.9, (0, 0, 255), 1)

        dist = self.pose_detector.frame_combine(frame, img)
        cv.imshow('dist', dist)
        # print(frame)
    
        cv.waitKey(10)

def main():
    print("start it")
    rclpy.init()
    esp_img = MY_Picture("My_Picture")
    try:
            rclpy.spin(esp_img)
    except KeyboardInterrupt:
        pass
    finally:
        esp_img.destroy_node()
        rclpy.shutdown()

主要逻辑跟之前的手部探测类似,MY_Picture(Node):从摄像头获取图像,调用 pubPosePoint(frame,draw=False)探测位姿。

测试:

启动图像代理
docker run -it --rm -v /dev:/dev -v /dev/shm:/dev/shm --privileged --net=host microros/micro-ros-agent:humble udp4 --port 9999 -v4

重新构建后运行:

bohu@bohu-TM1701:~/yahboomcar/yahboomcar_ws$ ros2 run yahboom_esp32_mediapipe PoseDetector 
import done
start it
WARNING: All log messages before absl::InitializeLog() is called are written to STDERR
I0000 00:00:1737459931.889105   73213 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1737459931.892356   73266 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 23.2.1-1ubuntu3.1~22.04.3), renderer: Mesa Intel(R) UHD Graphics 620 (KBL GT2)
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1737459931.986178   73249 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1737459932.041183   73256 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1737459932.068208   73256 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.
Warning: Ignoring XDG_SESSION_TYPE=wayland on Gnome. Use QT_QPA_PLATFORM=wayland to run on Wayland anyway.
Corrupt JPEG data: premature end of data segment
Corrupt JPEG data: premature end of data segment
Corrupt JPEG data: premature end of data segment
Corrupt JPEG data: premature end of data segment

受限于小车摄像头,太近了拍不了。用手机放个体操视频来测试下

 


http://www.kler.cn/a/513774.html

相关文章:

  • 图解Git——分布式Git《Pro Git》
  • ABP - 缓存模块(1)
  • 65,【5】buuctf web [SUCTF 2019]Upload Labs 2
  • 【useContext Hook】解决组件树层级较深时props逐级传递问题
  • ASP.NET Core Web API 创建指南
  • 4.C++中的循环语句
  • Linux——信号量和(环形队列消费者模型)
  • [春秋杯冬季赛2025] pwn复现
  • 基于微信小程序的健身房预约管理系统
  • MySQL面试题2025 每日20道【其四】
  • 【0x04】HCI_Connection_Request事件详解
  • C++ 类- 构造和析构
  • 专业辅助软件,独家开发版本,请珍惜使用
  • 【专题】为2025制定可付诸实践的IT战略规划报告汇总PDF洞察(附原数据表)
  • C++ ——— 模拟实现 vector 类
  • RTMP|RTSP播放器只解码视频关键帧功能探讨
  • Esxi下虚拟机磁盘类型厚置备改精简置备
  • leetcode_字符串 14.最长公共前缀函数
  • GitHub的主要用途及核心功能
  • 99.12 金融难点通俗解释:毛利率
  • Cyber Security 101-Security Solutions-Vulnerability Scanner Overview(漏洞扫描程序概述)
  • Excel 技巧17 - 如何计算倒计时,并添加该倒计时的数据条(★)
  • RavenMarket:用AI和区块链重塑预测市场
  • Cursor的详细使用指南
  • 打家劫舍 打家劫舍II 打家劫舍III
  • 三分钟简单了解一些HTML的标签和语法_01