当前位置：首页 > article >正文

使用OpenCV和卡尔曼滤波器进行实时活体检测

article 2025/2/28 15:46:08

引言

在现代计算机视觉应用中，实时检测和跟踪物体是一项重要的任务。本文将详细介绍如何使用OpenCV库和卡尔曼滤波器来实现一个实时的活体检测系统。该系统能够通过摄像头捕捉视频流，并使用YOLOv3模型来检测目标对象（例如人），同时利用卡尔曼滤波器来预测目标的运动轨迹。本文将逐步介绍代码的实现过程，并解释每个部分的功能。

1. 环境准备

在开始编写代码之前，确保已经安装了以下依赖库：

OpenCV
NumPy
FilterPy

可以使用pip命令来安装这些库：

pip install opencv-python numpy filterpy

2. 代码结构

2.1 导入必要的库

import cv2
import numpy as np
from filterpy.kalman import KalmanFilter
from filterpy.common import Q_discrete_white_noise

2.2 初始化卡尔曼滤波器

卡尔曼滤波器是一种用于估计线性动态系统的状态的算法。在这里，我们使用它来预测目标的位置和速度。

def init_kalman_filter():
    kf = KalmanFilter(dim_x=4, dim_z=2)
    kf.x = np.zeros((4, 1))  # 初始状态 [x, y, vx, vy]
    kf.F = np.array([[1, 0, 1, 0],
                     [0, 1, 0, 1],
                     [0, 0, 1, 0],
                     [0, 0, 0, 1]], dtype=float)  # 状态转移矩阵
    kf.H = np.array([[1, 0, 0, 0],
                     [0, 1, 0, 0]])  # 观测矩阵
    kf.P *= 1000  # 初始协方差
    kf.R = np.eye(2) * 5  # 测量噪声
    kf.Q = Q_discrete_white_noise(dim=4, dt=1, var=0.1)  # 过程噪声
    return kf

2.3 更新卡尔曼滤波器

每次获取新的观测值时，我们需要更新卡尔曼滤波器的状态。

def update_kalman_filter(kf, measurement):
    kf.predict()
    kf.update(measurement)
    return kf.x[0, 0], kf.x[1, 0], kf.x[2, 0], kf.x[3, 0]

2.4 实时检测函数

detect_live 函数是整个系统的核心，它负责从摄像头读取视频流，检测目标，并使用卡尔曼滤波器进行预测。

def detect_live(
        camera_index=0,
        motion_threshold=10,  # 移动的阈值
        min_confidence=0.5,  # 最小置信度
        debug=False,  # 是否显示调试窗口
        consecutive_motion_frames=5,  # 连续检测到移动的帧数
        target_class="person"  # 目标类别
):
    # 加载 YOLOv3 配置和权重文件
    net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")

    # 加载类别名称
    with open("coco.names", "r") as f:
        classes = [line.strip() for line in f.readlines()]

    # 获取输出层名称
    layer_names = net.getLayerNames()
    try:
        output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
    except IndexError:
        output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

    # 打开摄像头
    cap = cv2.VideoCapture(camera_index)

    if not cap.isOpened():
        print("无法打开摄像头。")
        return

    prev_frame = None
    prev_centers = []  # 用于存储前几帧的目标中心点
    consecutive_motion_count = 0  # 连续检测到移动的帧数计数器
    is_target_detected = False  # 标志变量，用于记录当前帧中是否检测到目标类别
    kf = init_kalman_filter()  # 初始化卡尔曼滤波器

    try:
        while True:
            # 读取摄像头帧
            ret, frame = cap.read()

            if not ret:
                print("无法读取摄像头帧。")
                break

            height, width, _ = frame.shape

            # 对图像进行预处理
            blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
            net.setInput(blob)
            outs = net.forward(output_layers)

            class_ids = []
            confidences = []
            boxes = []

            for out in outs:
                for detection in out:
                    scores = detection[5:]
                    class_id = np.argmax(scores)
                    confidence = scores[class_id]
                    if confidence > min_confidence:
                        center_x = int(detection[0] * width)
                        center_y = int(detection[1] * height)
                        w = int(detection[2] * width)
                        h = int(detection[3] * height)
                        x = int(center_x - w / 2)
                        y = int(center_y - h / 2)
                        boxes.append([x, y, w, h])
                        confidences.append(float(confidence))
                        class_ids.append(class_id)
                        if classes[class_id] == target_class:
                            is_target_detected = True  # 检测到目标类别

            indexes = cv2.dnn.NMSBoxes(boxes, confidences, min_confidence, 0.4)

            # 将 indexes 转换为 NumPy 数组
            indexes = np.array(indexes)

            current_centers = []
            for i in indexes.flatten():
                x, y, w, h = boxes[i]
                label = str(classes[class_ids[i]])
                confidence = confidences[i]

                # 计算中心点
                center = ((x + x + w) // 2, (y + y + h) // 2)
                current_centers.append(center)

                if len(prev_centers) > 0 and label == target_class:
                    # 如果有前一帧的数据，计算速度和方向
                    prev_center = prev_centers[-1]
                    distance = np.sqrt((center[0] - prev_center[0]) ** 2 + (center[1] - prev_center[1]) ** 2)
                    speed = distance  # 单位是像素/帧
                    direction = (center[0] - prev_center[0], center[1] - prev_center[1])

                    # 更新卡尔曼滤波器
                    x, y, vx, vy = update_kalman_filter(kf, np.array([center[0], center[1]]))

                    # 简单的行为预测
                    if speed > motion_threshold:
                        consecutive_motion_count += 1
                    else:
                        consecutive_motion_count = 0

                    # 如果连续检测到足够的移动，认为是活体
                    if consecutive_motion_count >= consecutive_motion_frames:
                        yield {"is_live": True, "speed": speed, "direction": direction, "predicted_position": (x, y),
                               "predicted_velocity": (vx, vy)}
                        consecutive_motion_count = 0  # 重置计数器
                else:
                    consecutive_motion_count = 0

                # 在调试模式下绘制框
                if debug:
                    color = (0, 255, 0) if label == target_class else (0, 0, 255)  # 绿色表示目标类别，红色表示其他类别
                    # 确保坐标是整数类型
                    x, y, w, h = int(x), int(y), int(w), int(h)
                    cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
                    cv2.putText(frame, f"{label}: {confidence:.2f}", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color,
                                2)
                    if label == target_class:
                        cv2.circle(frame, center, 5, (0, 0, 255), -1)
                        cv2.circle(frame, (int(x), int(y)), 5, (0, 255, 0), -1)  # 预测位置

            # 更新前一帧的中心点列表
            prev_centers = current_centers

            # 如果没有检测到目标类别，输出不是活体
            if not is_target_detected:
                yield {"is_live": False}

            # 重置标志变量
            is_target_detected = False

            # 显示当前帧（仅在调试模式下）
            if debug:
                cv2.imshow('Live Detection', frame)

            # 按 'q' 键退出循环
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    finally:
        # 释放摄像头并关闭所有窗口
        cap.release()
        cv2.destroyAllWindows()

2.5 主程序

主程序调用 detect_live 函数，并打印出检测结果。

if __name__ == "__main__":
    for result in detect_live(debug=True):
        if result["is_live"]:
            print(
                f"Is live: True, Speed: {result['speed']:.2f} pixels/frame, Direction: {result['direction']}, Predicted Position: {result['predicted_position']}, Predicted Velocity: {result['predicted_velocity']}")
        else:
            print("Is live: False")