当前位置：首页 > article >正文

P_all: 投影矩阵（Projection Matrix）

article 2025/2/3 6:18:27

P_all 是所有摄像头的投影矩阵（Projection Matrix）的集合。每个摄像头的投影矩阵 Pi 是一个 3×4 的矩阵，用于将世界坐标系中的 3D 点 X=[X,Y,Z,1]T 投影到该摄像头的 2D 图像平面上的点 u=[u,v,1]T。投影关系可以表示为：

u=PiX

其中：

u=[u,v,1]T 是 2D 图像平面上的齐次坐标。
X=[X,Y,Z,1]T 是世界坐标系中的齐次 3D 坐标。
Pi是第 i 个摄像头的投影矩阵。

投影矩阵 Pi 的组成

投影矩阵 Pi 由摄像头的内参矩阵 Ki和外参矩阵 [Ri∣ti] 组成：

Pi=Ki[Ri∣ti]

其中：

Pall 的含义

P_all 是所有摄像头的投影矩阵的集合。假设有 23 个摄像头，那么 P_all 是一个长度为 23 的列表，每个元素是一个 3×4 的投影矩阵 Pi，即：

Pall=[P1,P2,…,P23]

其中，每个 Pi 对应一个摄像头的投影矩阵。

Pall的作用

在多视角三角化中，P_all 用于将多个摄像头的 2D 观测数据（即关键点的 2D 坐标）与 3D 世界坐标联系起来。具体来说：

线性三角化：
通过构造超定方程组 AX=0，利用所有摄像头的投影矩阵 PiPi 和对应的 2D 点 (ui,vi)，求解 3D 点 X。
非线性优化：
在 Bundle Adjustment 中，P_all 用于计算重投影误差，即 3D 点 X 投影到每个摄像头的 2D 平面后，与观测到的 2D 点之间的误差。

示例代码：构造 Pall

假设你已经有了每个摄像头的内参矩阵 Ki 和外参矩阵 [Ri∣ti]，可以通过以下代码构造 P_all：

以Easymocap 需中的Dataset为例从数据集的内参文件和外参文件构建Pall矩阵：multiview dataset （链接可以下载具体的数据集）：

import cv2
import numpy as np

def read_opencv_matrix(file_storage, key):
    """从 OpenCV 的 YAML 文件中读取矩阵"""
    node = file_storage.getNode(key)
    if node.empty():
        raise ValueError(f"Node '{key}' is empty or does not exist.")
    mat = node.mat()
    return mat


def read_opencv_string_list(file_storage, key):
    """从 OpenCV 的 YAML 文件中读取字符串列表"""
    node = file_storage.getNode(key)
    if node.empty():
        raise ValueError(f"Node '{key}' is empty or does not exist.")
    # 读取字符串列表
    string_list = []
    for i in range(node.size()):
        string_list.append(node.at(i).string())
    return string_list


def compute_projection_matrix(K, R, t):
    """计算投影矩阵 P = K * [R | t]"""
    # 将 t 转换为列向量
    t = t.reshape(-1, 1)
    # 构造外参矩阵 [R | t]
    extrinsic = np.hstack((R, t))
    # 计算投影矩阵 P
    P = np.dot(K, extrinsic)
    return P

def main():
    # 读取 extri.yml 文件
    extri_path = 'extri.yml'
    extri_fs = cv2.FileStorage(extri_path, cv2.FILE_STORAGE_READ)

    # 读取 intri.yml 文件
    intri_path = 'intri.yml'
    intri_fs = cv2.FileStorage(intri_path, cv2.FILE_STORAGE_READ)

    # 获取相机名称列表
    names = read_opencv_string_list(extri_fs, "names")
    # 将字符串数组转换为整数数组
    names_int = [int(name) for name in names]
    names = sorted(names_int)
    print("names: ", names)

    # 初始化 P_all 列表
    P_all = []

    for name in names:
        try:
            # 读取内参矩阵 K
            K_key = f'K_{name}'
            K = read_opencv_matrix(intri_fs, K_key)

            # 读取畸变系数 dist
            dist_key = f'dist_{name}'
            dist = read_opencv_matrix(intri_fs, dist_key)

            # 读取外参矩阵 R 和 t
            Rot_key = f'Rot_{name}'
            Rot = read_opencv_matrix(extri_fs, Rot_key)

            T_key = f'T_{name}'
            T = read_opencv_matrix(extri_fs, T_key)

            # 计算投影矩阵 P
            P = compute_projection_matrix(K, Rot, T)

            # 将 P 添加到 P_all 列表
            P_all.append(P)

        except ValueError as e:
            print(f"Error processing camera {name}: {e}")
            continue

    # 将 P_all 转换为 NumPy 数组
    P_all = np.array(P_all)
    print("P_all:",len(P_all))

    # 保存为 .npy 文件
    np.save('Pall.npy', P_all)

    print("投影矩阵 P_all 已保存到 P_all.npy 文件中。")

if __name__ == "__main__":
    main()

利用数据集中的3D keypoints 关键点可以通过Pall矩阵计算得到23个视角下对应的2D关键点坐标：

可以参考如下代码：

import numpy as np
import json
import os

def project_3d_to_2d(P, points_3d):
    """
    将 3D 点投影到 2D 图像平面
    :param P: 投影矩阵 (3x4)
    :param points_3d: 3D 点坐标 (Nx3)
    :return: 2D 点坐标 (Nx2)
    """
    # 将 3D 点转换为齐次坐标
    points_3d_homo = np.hstack((points_3d, np.ones((points_3d.shape[0], 1))))
    
    # 投影到 2D 平面
    points_2d_homo = np.dot(P, points_3d_homo.T).T
    
    # 将齐次坐标转换为非齐次坐标
    points_2d = points_2d_homo[:, :2] / points_2d_homo[:, 2:3]
    
    return points_2d

def process_keypoints3d_file(file_path, P_all):
    """
    处理单个 JSON 文件，计算 2D 关键点并保存到摄像头目录
    :param file_path: JSON 文件路径
    :param P_all: 所有摄像头的投影矩阵 (23x3x4)
    """
    # 读取 JSON 文件
    with open(file_path, 'r') as f:
        data = json.load(f)
    
    # 提取 3D 关键点坐标
    keypoints_3d = np.array(data[0]['keypoints3d'])[:, :3]  # 只取前 3 列 (x, y, z)

    # 获取文件名（如 000000.json -> 000000）
    file_name = os.path.splitext(os.path.basename(file_path))[0]

    # 遍历每个摄像头的投影矩阵
    for i, P in enumerate(P_all):
        # 将 3D 关键点投影到当前摄像头的 2D 平面
        keypoints_2d = project_3d_to_2d(P, keypoints_3d)
        
        # 创建摄像头编号对应的目录
        camera_dir = f"keypoints2d/{i + 1}"
        os.makedirs(camera_dir, exist_ok=True)
        
        # 保存当前摄像头的 2D 关键点到对应的 JSON 文件
        output_path = os.path.join(camera_dir, f"{file_name}.json")
        with open(output_path, 'w') as f:
            json.dump({"keypoints2d": keypoints_2d.tolist()}, f, indent=4)
        
        print(f"Camera {i + 1} 2D keypoints saved to {output_path}")

def main():
    # 加载 P_all.npy 文件
    P_all = np.load('Pall.npy')  # 假设 P_all.npy 是投影矩阵的数组

    # 定义 keypoints3d 目录路径
    keypoints3d_dir = 'test-dwpose-f-track/keypoints3d'

    # 遍历 keypoints3d 目录下的所有 JSON 文件
    for file_name in os.listdir(keypoints3d_dir):
        if file_name.endswith('.json'):
            file_path = os.path.join(keypoints3d_dir, file_name)
            print(f"Processing file: {file_path}")
            process_keypoints3d_file(file_path, P_all)

    print("所有文件的 2D 关键点已保存到对应的摄像头目录中。")

if __name__ == "__main__":
    main()

python3 apps/demo/mvmp.py feng/ --out feng/test-dwpose-f --annot filtered/annots-dwpose --cfg config/exp/mvmp1f.yml --undis
python3 apps/demo/auto_track.py feng/test-dwpose-f feng/test-dwpose-f-track --track3

查看全文

http://www.kler.cn/a/529386.html