当前位置：首页 > article >正文

【python】OpenCV—Tracking（10.5）—dlib

article 2025/2/19 7:01:44

在这里插入图片描述

文章目录

1、功能描述
2、代码实现
3、效果展示
4、完整代码
5、涉及到的库函数
- dlib.correlation_tracker()
6、参考

1、功能描述

基于 dlib 库，实现指定类别的目标检测和单目标跟踪

2、代码实现

caffe 模型

https://github.com/MediosZ/MobileNet-SSD/tree/master/mobilenet

或者

链接: https://pan.baidu.com/s/1fiBz6tEQmcXdw_dtaUuAVw?pwd=pw5n
提取码: pw5n

在这里插入图片描述

输入 1x3x300x300

输出的类别数为 21

在这里插入图片描述

导入必要的包

from imutils.video import FPS
import numpy as np
import argparse
import imutils
import dlib
import cv2

注意 dlib 的安装

conda 或者 pip 安装，如果 build 失败的话，可以试试下载 whl 安装

https://github.com/Silufer/dlib-python/tree/main

python -V 查看 python 版本，然后找到对应版本的 whl ，pip install xxx.whl

构造参数解析并解析参数

ap = argparse.ArgumentParser()
ap.add_argument("-p", "--prototxt", required=True,
                help="path to Caffe 'deploy' prototxt file")
ap.add_argument("-m", "--model", required=True,
                help="path to Caffe pre-trained model")
ap.add_argument("-v", "--video", required=True,
                help="path to input video file")
ap.add_argument("-l", "--label", required=True,
                help="class label we are interested in detecting + tracking")
ap.add_argument("-o", "--output", type=str,
                help="path to optional output video file")
ap.add_argument("-c", "--confidence", type=float, default=0.2,
                help="minimum probability to filter weak detections")
args = vars(ap.parse_args())

涉及到 caffe 模型的 prototxt，caffemodel，输入视频，类别标签，输出视频，检测框的置信度配置

moblienet SSD 支持的类别类型如下

CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat",
           "bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
           "dog", "horse", "motorbike", "person", "pottedplant", "sheep",
           "sofa", "train", "tvmonitor"]

加载模型，读取视频，初始化跟踪器

print("[INFO] loading model...")
net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])

# 初始化视频流、dlib 相关跟踪器、输出视频写入器和预测的类标签
print("[INFO] starting video stream...")
vs = cv2.VideoCapture(args["video"])
tracker = None
writer = None
label = ""
# 启动每秒帧数估计器
fps = FPS().start()

循环读取视频帧

# 循环播放视频文件流中的帧
while True:
    # 从视频文件中获取下一帧
    (grabbed, frame) = vs.read()
    # 检查我们是否已经到达视频文件的末尾
    if frame is None:
        break
    # 调整帧大小以加快处理速度，然后将帧从 BGR 转换为 RGB 排序（dlib 需要 RGB 排序）
    frame = imutils.resize(frame, width=600)
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    # 如果我们应该将视频写入磁盘，请初始化写入器
    if args["output"] is not None and writer is None:
        fourcc = cv2.VideoWriter_fourcc(*"MJPG")
        writer = cv2.VideoWriter(args["output"], fourcc, 30,
                                 (frame.shape[1], frame.shape[0]), True)

resize 图片至宽为 600，转化为 RGB 输入模式，设置输出视频相关配置

    # 如果我们的相关对象跟踪器是None，我们首先需要应用一个对象检测器来为跟踪器提供实际跟踪的东西
    if tracker is None:
        # 获得帧尺寸并将帧转换为 blob
        (h, w) = frame.shape[:2]
        blob = cv2.dnn.blobFromImage(frame, 0.007843, (w, h), 127.5)
        # blob传入网络并获得检测结果
        net.setInput(blob)
        detections = net.forward()

        # 确保至少有一个检测结果
        if len(detections) > 0:
            # 找到概率最大的检测索引——为方便起见，我们只跟踪我们以最大概率找到的第一个对象；
            # 未来的示例将演示如何检测和提取*特定*对象
            i = np.argmax(detections[0, 0, :, 2])
            # 获取与对象关联的概率及其类标签
            conf = detections[0, 0, i, 2]
            label = CLASSES[int(detections[0, 0, i, 1])]

            # filter out weak detections by requiring a minimum
            # confidence
            if conf > args["confidence"] and label == args["label"]:
                # compute the (x, y)-coordinates of the bounding box
                # for the object
                box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                (startX, startY, endX, endY) = box.astype("int")
                # construct a dlib rectangle object from the bounding
                # box coordinates and then start the dlib correlation
                # tracker
                tracker = dlib.correlation_tracker()
                rect = dlib.rectangle(startX, startY, endX, endY)
                tracker.start_track(rgb, rect)
                # draw the bounding box and text for the object
                cv2.rectangle(frame, (startX, startY), (endX, endY),
                              (0, 255, 0), 2)
                cv2.putText(frame, label, (startX, startY - 15),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 255, 0), 2)

第一帧的时候，调用目标检测模型，获取检测结果 detections

如果检测到了目标，预测的分数大于配置的阈值，且预测的类别和配置的类别一致

初始化跟踪器 tracker，可视化检测结果

否则，我们已经执行了检测，所以让我们跟踪对象

    else:
        # 更新跟踪器并抓取被跟踪对象的位置
        tracker.update(rgb)
        pos = tracker.get_position()
        # 解包位置对象
        startX = int(pos.left())
        startY = int(pos.top())
        endX = int(pos.right())
        endY = int(pos.bottom())
        # 从相关对象跟踪器中绘制边界框
        cv2.rectangle(frame, (startX, startY), (endX, endY),
                      (0, 255, 0), 2)
        cv2.putText(frame, label, (startX, startY - 15),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 255, 0), 2)

后续帧采用跟踪算法，update 更新目标坐标后，通过 get_position 获取新的坐标，并可视化

    # 检查我们是否应该将帧写入磁盘
    if writer is not None:
        writer.write(frame)
    # 显示输出帧
    cv2.imshow("Frame", frame)
    key = cv2.waitKey(1) & 0xFF
    # 如果按下了“q”键，则退出循环
    if key == ord("q"):
        break
    # 更新FPS计数器
    fps.update()

保存和可视化结果，按 q 键退出视频流

# 我们的 fps 计数器停止并且 FPS 信息显示在终端中
fps.stop()
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))
# 然后，如果我们正在写入输出视频，我们释放视频编写器
if writer is not None:
    writer.release()
# 最后，我们关闭所有 OpenCV 窗口并释放视频流
cv2.destroyAllWindows()
vs.release()

完成信息统计，释放资源

3、效果展示

train_result

cat_result

4、完整代码

# 导入必要的包
from imutils.video import FPS
import numpy as np
import argparse
import imutils
import dlib
import cv2

# 构造参数解析并解析参数
ap = argparse.ArgumentParser()
ap.add_argument("-p", "--prototxt", required=True,
                help="path to Caffe 'deploy' prototxt file")
ap.add_argument("-m", "--model", required=True,
                help="path to Caffe pre-trained model")
ap.add_argument("-v", "--video", required=True,
                help="path to input video file")
ap.add_argument("-l", "--label", required=True,
                help="class label we are interested in detecting + tracking")
ap.add_argument("-o", "--output", type=str,
                help="path to optional output video file")
ap.add_argument("-c", "--confidence", type=float, default=0.2,
                help="minimum probability to filter weak detections")
args = vars(ap.parse_args())

# 初始化MobileNet SSD训练好的类标签列表
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat",
           "bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
           "dog", "horse", "motorbike", "person", "pottedplant", "sheep",
           "sofa", "train", "tvmonitor"]
# 从磁盘加载我们的序列化模型
print("[INFO] loading model...")
net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])

# 初始化视频流、dlib 相关跟踪器、输出视频写入器和预测的类标签
print("[INFO] starting video stream...")
vs = cv2.VideoCapture(args["video"])
tracker = None
writer = None
label = ""
# 启动每秒帧数估计器
fps = FPS().start()

# 循环播放视频文件流中的帧
while True:
    # 从视频文件中获取下一帧
    (grabbed, frame) = vs.read()
    # 检查我们是否已经到达视频文件的末尾
    if frame is None:
        break
    # 调整帧大小以加快处理速度，然后将帧从 BGR 转换为 RGB 排序（dlib 需要 RGB 排序）
    frame = imutils.resize(frame, width=600)
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    # 如果我们应该将视频写入磁盘，请初始化写入器
    if args["output"] is not None and writer is None:
        fourcc = cv2.VideoWriter_fourcc(*"MJPG")
        writer = cv2.VideoWriter(args["output"], fourcc, 30,
                                 (frame.shape[1], frame.shape[0]), True)

    # 如果我们的相关对象跟踪器是None，我们首先需要应用一个对象检测器来为跟踪器提供实际跟踪的东西
    if tracker is None:
        # 获得帧尺寸并将帧转换为 blob
        (h, w) = frame.shape[:2]
        blob = cv2.dnn.blobFromImage(frame, 0.007843, (w, h), 127.5)
        # blob传入网络并获得检测结果
        net.setInput(blob)
        detections = net.forward()

        # 确保至少有一个检测结果
        if len(detections) > 0:
            # 找到概率最大的检测索引——为方便起见，我们只跟踪我们以最大概率找到的第一个对象；
            # 未来的示例将演示如何检测和提取*特定*对象
            i = np.argmax(detections[0, 0, :, 2])
            # 获取与对象关联的概率及其类标签
            conf = detections[0, 0, i, 2]
            label = CLASSES[int(detections[0, 0, i, 1])]

            # filter out weak detections by requiring a minimum
            # confidence
            if conf > args["confidence"] and label == args["label"]:
                # compute the (x, y)-coordinates of the bounding box
                # for the object
                box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                (startX, startY, endX, endY) = box.astype("int")
                # construct a dlib rectangle object from the bounding
                # box coordinates and then start the dlib correlation
                # tracker
                tracker = dlib.correlation_tracker()
                rect = dlib.rectangle(startX, startY, endX, endY)
                tracker.start_track(rgb, rect)
                # draw the bounding box and text for the object
                cv2.rectangle(frame, (startX, startY), (endX, endY),
                              (0, 255, 0), 2)
                cv2.putText(frame, label, (startX, startY - 15),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 255, 0), 2)

    # 否则，我们已经执行了检测，所以让我们跟踪对象
    else:
        # 更新跟踪器并抓取被跟踪对象的位置
        tracker.update(rgb)
        pos = tracker.get_position()
        # 解包位置对象
        startX = int(pos.left())
        startY = int(pos.top())
        endX = int(pos.right())
        endY = int(pos.bottom())
        # 从相关对象跟踪器中绘制边界框
        cv2.rectangle(frame, (startX, startY), (endX, endY),
                      (0, 255, 0), 2)
        cv2.putText(frame, label, (startX, startY - 15),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 255, 0), 2)
    # 检查我们是否应该将帧写入磁盘
    if writer is not None:
        writer.write(frame)
    # 显示输出帧
    cv2.imshow("Frame", frame)
    key = cv2.waitKey(1) & 0xFF
    # 如果按下了“q”键，则退出循环
    if key == ord("q"):
        break
    # 更新FPS计数器
    fps.update()

# 我们的 fps 计数器停止并且 FPS 信息显示在终端中
fps.stop()
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))
# 然后，如果我们正在写入输出视频，我们释放视频编写器
if writer is not None:
    writer.release()
# 最后，我们关闭所有 OpenCV 窗口并释放视频流
cv2.destroyAllWindows()
vs.release()

测试脚本1

python .\track.py -p .\mobilenet_ssd\MobileNetSSD_deploy.prototxt -m .\mobilenet_ssd\MobileNetSSD_deploy.caffemodel -v .\cat.mp4 -l cat -o cat_result.mp4

测试脚本2

python .\track.py -p .\mobilenet_ssd\MobileNetSSD_deploy.prototxt -m .\mobilenet_ssd\MobileNetSSD_deploy.caffemodel -v .\train.mp4 -l aeroplane -o train_result.mp4

5、涉及到的库函数

dlib.correlation_tracker()

dlib.correlation_tracker 是 Dlib 库中的一个功能，用于实现目标跟踪（Object Tracking）。

dlib.correlation_tracker 基于判别式相关滤波器（Discriminative Correlation Filter, DCF）的方法，这种方法通过训练一个滤波器来区分目标对象和背景，从而实现高效的跟踪。

使用 dlib.correlation_tracker 跟踪目标通常涉及以下几个步骤：

初始化跟踪器：首先，你需要创建一个 correlation_tracker 对象。这通常是在你已知目标对象在第一帧中的位置时进行的。
设置目标区域：你需要指定一个矩形区域（通常通过左上角和右下角的坐标或者通过中心点和尺寸）来标识目标对象在第一帧中的位置。
更新跟踪器：对于后续的视频帧，你需要将新的帧传递给跟踪器，并让它更新目标的位置。这个过程会不断重复，直到视频结束或者跟踪失败。
获取跟踪结果：每次更新后，你可以从跟踪器中获取当前帧中目标对象的位置。

以下是一个简单的示例，展示了如何使用 dlib.correlation_tracker 进行目标跟踪：

import dlib
import cv2
 
# 加载视频
cap = cv2.VideoCapture('video.mp4')
 
# 读取第一帧
ret, frame = cap.read()
 
# 选择目标区域（这里需要手动选择或者通过某种方法自动选择）
rect = dlib.rectangle(50, 50, 200, 200)  # 示例矩形，需要替换为实际的目标位置
 
# 创建跟踪器
tracker = dlib.correlation_tracker()
tracker.start_track(frame, rect)
 
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
 
    # 更新跟踪器
    tracker.update(frame)
 
    # 获取跟踪结果
    rect = tracker.get_position()
 
    # 在帧上绘制跟踪结果
    cv2.rectangle(frame, (rect.left(), rect.top()), (rect.right(), rect.bottom()), (0, 255, 0), 2)
 
    # 显示结果
    cv2.imshow('Tracking', frame)
 
    # 按下 'q' 键退出
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
 
# 释放资源
cap.release()
cv2.destroyAllWindows()

注意事项

目标初始化：目标在第一帧中的位置对于跟踪器的性能至关重要。如果初始化不准确，跟踪可能会失败。
视频质量：视频的质量（如分辨率、帧率、光照条件等）也会影响跟踪器的性能。
遮挡和快速移动：当目标被遮挡或者快速移动时，跟踪器可能会遇到困难。虽然 dlib.correlation_tracker 已经在很多场景下表现良好，但在这些情况下可能需要更复杂的策略。

通过 dlib.correlation_tracker，你可以实现高效且相对准确的目标跟踪，适用于各种计算机视觉应用，如视频监控、人机交互等。