【WebRTC】视频采集模块流程的简单分析
目录
- 1.从摄像头获取视频帧(CaptureInputPin::Receive())
- 2.处理摄像头获取的帧(CaptureSinkFilter::ProcessCaptureFrame())
- 3.处理Windows层帧信息并发送到帧处理器(VideoCaptureImpl::IncomingFrame())
- 4.帧处理器(VideoCaptureImpl::DeliverCapturedFrame && DeliverRawFrame)
- 举例:用于编码器的回调函数
本文记录在Windows平台下,从摄像头采集信息的主要流程,其中Windows平台使用的是DirectShow框架。
1.从摄像头获取视频帧(CaptureInputPin::Receive())
从摄像头获取视频帧的函数为CaptureInputPin::Receive(),其定义位于modules/video_capture_windows/sink_filter_ds.cc,实现了从底层的摄像头当中获取视频帧的功能。其中,通过调用ProcessedCapturedFrame()来将获取的帧传递给上层。
COM_DECLSPEC_NOTHROW STDMETHODIMP
CaptureInputPin::Receive(IMediaSample* media_sample) {
// 检查执行当前代码的线程是否是capture线程
RTC_DCHECK_RUN_ON(&capture_checker_);
CaptureSinkFilter* const filter = static_cast<CaptureSinkFilter*>(Filter());
// 检查是否正在执行flushing操作
if (flushing_.load(std::memory_order_relaxed))
return S_FALSE;
// 检查是否发生了运行时错误
if (runtime_error_.load(std::memory_order_relaxed))
return VFW_E_RUNTIME_ERROR;
// 没有采集线程的ID,尝试获取
if (!capture_thread_id_) {
// Make sure we set the thread name only once.
// 获取当前线程ID
capture_thread_id_ = GetCurrentThreadId();
// 获取当前线程名称
rtc::SetCurrentThreadName("webrtc_video_capture");
}
AM_SAMPLE2_PROPERTIES sample_props = {};
// 获取样本属性
GetSampleProperties(media_sample, &sample_props);
// Has the format changed in this sample?
// 检查当前样本是否发生了格式变化
if (sample_props.dwSampleFlags & AM_SAMPLE_TYPECHANGED) {
// Check the derived class accepts the new format.
// This shouldn't fail as the source must call QueryAccept first.
// Note: This will modify resulting_capability_.
// That should be OK as long as resulting_capability_ is only modified
// on this thread while it is running (filter is not stopped), and only
// modified on the main thread when the filter is stopped (i.e. this thread
// is not running).
/*
注意:这将修改 `resulting_capability_`。只要在运行时(过滤器未停止)`resulting_capability_`
只在此线程上被修改,并且在主线程上修改时过滤器已停止(即此线程不在运行),这样做应该是没问题的。
*/
// 检查将MediaType转换成为VideoCaptureCapability过程是否会出错
if (!TranslateMediaTypeToVideoCaptureCapability(sample_props.pMediaType,
&resulting_capability_)) {
// Raise a runtime error if we fail the media type
runtime_error_ = true;
EndOfStream();
Filter()->NotifyEvent(EC_ERRORABORT, VFW_E_TYPE_NOT_ACCEPTED, 0);
return VFW_E_INVALIDMEDIATYPE;
}
}
// 处理采集到的帧
filter->ProcessCapturedFrame(sample_props.pbBuffer, sample_props.lActual,
resulting_capability_);
return S_OK;
}
其中,GetSampleProperties()的实现方式为
void GetSampleProperties(IMediaSample* sample, AM_SAMPLE2_PROPERTIES* props) {
rtc::scoped_refptr<IMediaSample2> sample2;
// 如果能正常访问端口,则获取属性
if (SUCCEEDED(GetComInterface(sample, &sample2))) {
sample2->GetProperties(sizeof(*props), reinterpret_cast<BYTE*>(props));
return;
}
// Get the properties the hard way.
// 计算props的大小
props->cbData = sizeof(*props);
// 指定与媒体类型相关的特定于类型的标记或选项
props->dwTypeSpecificFlags = 0;
// 标识流的ID。AM_STREAM_MEDIA是一个预定义的值,用于表示这是一个媒体流
props->dwStreamId = AM_STREAM_MEDIA;
// 指定与媒体样本相关的标记。在这里,它被设置为0,表示没有特定的样本标记被设置。
// 这些标记可以包括是否关键帧、是否同步等信息。
props->dwSampleFlags = 0;
// 检查是否出现了帧不连续情况(场景切换)
if (sample->IsDiscontinuity() == S_OK)
props->dwSampleFlags |= AM_SAMPLE_DATADISCONTINUITY;
// 检查当前样本是否是Preroll
// Preroll表示在正式播放前用于同步音频/视频的样本。如果返回S_OK,则表示是preroll样本
if (sample->IsPreroll() == S_OK)
props->dwSampleFlags |= AM_SAMPLE_PREROLL;
// 检查当前样本是否是一个同步点,即播放时可以用来同步音频/视频的点。如果返回S_OK,则表示是同步点。
if (sample->IsSyncPoint() == S_OK)
props->dwSampleFlags |= AM_SAMPLE_SPLICEPOINT;
// 尝试获取样本的开始和结束时间
if (SUCCEEDED(sample->GetTime(&props->tStart, &props->tStop)))
props->dwSampleFlags |= AM_SAMPLE_TIMEVALID | AM_SAMPLE_STOPVALID;
// 获取媒体类型
if (sample->GetMediaType(&props->pMediaType) == S_OK)
props->dwSampleFlags |= AM_SAMPLE_TYPECHANGED;
// 获取指向样本数据缓冲区的指针
sample->GetPointer(&props->pbBuffer);
// 获取样本数据的实际长度
props->lActual = sample->GetActualDataLength();
// 获取样本数据的大小
props->cbBuffer = sample->GetSize();
}
TranslateMediaTypeToVideoCaptureCapability()的实现方式为
// Returns true if the media type is supported, false otherwise.
// For supported types, the `capability` will be populated accordingly.
// 检查输入的media_type是否支持,如果是支持的类型,还会填充capability
bool TranslateMediaTypeToVideoCaptureCapability(
const AM_MEDIA_TYPE* media_type,
VideoCaptureCapability* capability) {
// 检查capability是否为空
RTC_DCHECK(capability);
if (!media_type || media_type->majortype != MEDIATYPE_Video ||
!media_type->pbFormat) {
return false;
}
const BITMAPINFOHEADER* bih = nullptr;
/*
1.VideoInfo类型对应于VIDEOINFOHEADER结构
适用于非交错视频流,并且不支持高级特性,如交错视频支持或图片纵横比信息
2.VideoInfo2类型对应于VIDEOINFOHEADER2结构,是VIDEOINFOHEADER结构的扩展
支持交错视频流和图片纵横比等高级特性,允许更精确地控制视频流的播放和处理
*/
if (media_type->formattype == FORMAT_VideoInfo) {
bih = &reinterpret_cast<VIDEOINFOHEADER*>(media_type->pbFormat)->bmiHeader;
} else if (media_type->formattype != FORMAT_VideoInfo2) {
bih = &reinterpret_cast<VIDEOINFOHEADER2*>(media_type->pbFormat)->bmiHeader;
} else {
return false;
}
RTC_LOG(LS_INFO) << "TranslateMediaTypeToVideoCaptureCapability width:"
<< bih->biWidth << " height:" << bih->biHeight
<< " Compression:0x" << rtc::ToHex(bih->biCompression);
const GUID& sub_type = media_type->subtype;
// 检查具体的sub_type格式
if (sub_type == MEDIASUBTYPE_MJPG &&
bih->biCompression == MAKEFOURCC('M', 'J', 'P', 'G')) {
capability->videoType = VideoType::kMJPEG;
} else if (sub_type == MEDIASUBTYPE_I420 &&
bih->biCompression == MAKEFOURCC('I', '4', '2', '0')) {
capability->videoType = VideoType::kI420;
} else if (sub_type == MEDIASUBTYPE_YUY2 &&
bih->biCompression == MAKEFOURCC('Y', 'U', 'Y', '2')) {
capability->videoType = VideoType::kYUY2;
} else if (sub_type == MEDIASUBTYPE_UYVY &&
bih->biCompression == MAKEFOURCC('U', 'Y', 'V', 'Y')) {
capability->videoType = VideoType::kUYVY;
} else if (sub_type == MEDIASUBTYPE_HDYC) {
capability->videoType = VideoType::kUYVY;
} else if (sub_type == MEDIASUBTYPE_RGB24 && bih->biCompression == BI_RGB) {
capability->videoType = VideoType::kRGB24;
} else {
return false;
}
// Store the incoming width and height
capability->width = bih->biWidth;
// Store the incoming height,
// for RGB24 we assume the frame to be upside down
if (sub_type == MEDIASUBTYPE_RGB24 && bih->biHeight > 0) {
capability->height = -(bih->biHeight);
} else {
capability->height = abs(bih->biHeight);
}
return true;
}
2.处理摄像头获取的帧(CaptureSinkFilter::ProcessCaptureFrame())
前面的函数直接与Windows平台对接,这里是一个连接Windows平台底层和上层的缓冲区,也属于Windows这一层级。从代码中看直接调用了IncomingFrame()将获取的帧传输给上层。ProcessCapturedFrame()的定义位于modules/video_capture/windows/sink_filter_ds.cc中。
void CaptureSinkFilter::ProcessCapturedFrame(
unsigned char* buffer,
size_t length,
const VideoCaptureCapability& frame_info) {
// Called on the capture thread.
capture_observer_->IncomingFrame(buffer, length, frame_info);
}
3.处理Windows层帧信息并发送到帧处理器(VideoCaptureImpl::IncomingFrame())
在Windows层获取了帧信息之后,在VideoCaptureImpl这里进行处理,解析一些信息,并根据情况发送到对应的帧处理器中。具体来说,分别是RawFrame和Frame,其中RawFrame是未经过转换的原始帧,Frame是经过转换的帧(通常格式为I420)。
int32_t VideoCaptureImpl::IncomingFrame(uint8_t* videoFrame,
size_t videoFrameLength,
const VideoCaptureCapability& frameInfo,
int64_t captureTime /*=0*/) {
// 检查当前代码是否在预期的序列上运行
RTC_CHECK_RUNS_SERIALIZED(&capture_checker_);
MutexLock lock(&api_lock_);
const int32_t width = frameInfo.width;
const int32_t height = frameInfo.height;
TRACE_EVENT1("webrtc", "VC::IncomingFrame", "capture_time", captureTime);
// 如果使用raw格式,则调用DeliverRawFrame()进行
if (_rawDataCallBack) {
DeliverRawFrame(videoFrame, videoFrameLength, frameInfo, captureTime);
return 0;
}
// Not encoded, convert to I420.
// 如果没进行编码,需要转换成I420格式
if (frameInfo.videoType != VideoType::kMJPEG) {
// Allow buffers larger than expected. On linux gstreamer allocates buffers
// page-aligned and v4l2loopback passes us the buffer size verbatim which
// for most cases is larger than expected.
/*
允许缓冲区大于预期大小。在Linux系统中,gstreamer会分配页对齐的缓冲区,
而v4l2loopback会原样传递给我们缓冲区大小,这在大多数情况下比预期的要大。
*/
// See https://github.com/umlaeute/v4l2loopback/issues/190.
// 检查收到的数据长度是否正确
if (auto size = CalcBufferSize(frameInfo.videoType, width, abs(height));
videoFrameLength < size) {
RTC_LOG(LS_ERROR) << "Wrong incoming frame length. Expected " << size
<< ", Got " << videoFrameLength << ".";
return -1;
}
}
int stride_y = width;
int stride_uv = (width + 1) / 2;
int target_width = width;
int target_height = abs(height);
// 检查翻转角度
if (apply_rotation_) {
// Rotating resolution when for 90/270 degree rotations.
if (_rotateFrame == kVideoRotation_90 ||
_rotateFrame == kVideoRotation_270) {
target_width = abs(height);
target_height = width;
}
}
// Setting absolute height (in case it was negative).
// In Windows, the image starts bottom left, instead of top left.
// Setting a negative source height, inverts the image (within LibYuv).
/*
设置绝对高度(如果它之前是负数的话)。在Windows中,图像是从左下角开始的,
而不是从左上角。设置一个负数的源高度,可以在LibYuv中翻转图像。
*/
rtc::scoped_refptr<I420Buffer> buffer = I420Buffer::Create(
target_width, target_height, stride_y, stride_uv, stride_uv);
libyuv::RotationMode rotation_mode = libyuv::kRotate0;
// 计算翻转模式
if (apply_rotation_) {
switch (_rotateFrame) {
case kVideoRotation_0:
rotation_mode = libyuv::kRotate0;
break;
case kVideoRotation_90:
rotation_mode = libyuv::kRotate90;
break;
case kVideoRotation_180:
rotation_mode = libyuv::kRotate180;
break;
case kVideoRotation_270:
rotation_mode = libyuv::kRotate270;
break;
}
}
// 将图像转换成为I420格式,这里使用的是libyuv的转换函数
const int conversionResult = libyuv::ConvertToI420(
videoFrame, videoFrameLength, buffer.get()->MutableDataY(),
buffer.get()->StrideY(), buffer.get()->MutableDataU(),
buffer.get()->StrideU(), buffer.get()->MutableDataV(),
buffer.get()->StrideV(), 0, 0, // No Cropping
width, height, target_width, target_height, rotation_mode,
ConvertVideoType(frameInfo.videoType));
if (conversionResult != 0) {
RTC_LOG(LS_ERROR) << "Failed to convert capture frame from type "
<< static_cast<int>(frameInfo.videoType) << "to I420.";
return -1;
}
// 构建VideoFrame
VideoFrame captureFrame =
VideoFrame::Builder()
.set_video_frame_buffer(buffer)
.set_rtp_timestamp(0)
.set_timestamp_ms(rtc::TimeMillis())
.set_rotation(!apply_rotation_ ? _rotateFrame : kVideoRotation_0)
.build();
captureFrame.set_ntp_time_ms(captureTime);
// 将转换之后的I420格式图像传递给上层
DeliverCapturedFrame(captureFrame);
return 0;
}
4.帧处理器(VideoCaptureImpl::DeliverCapturedFrame && DeliverRawFrame)
根据当前帧的情况,会分为RawFrame和Frame两种情况,其中RawFrame表示原始视频帧(非I420格式),Frame表示转换之后的视频帧(I420格式)。对于原始视频帧,使用的是DeliverCapturedFrame(),对于非原始视频帧,使用的是DeliverRawFrame()。
void VideoCaptureImpl::DeliverRawFrame(uint8_t* videoFrame,
size_t videoFrameLength,
const VideoCaptureCapability& frameInfo,
int64_t captureTime) {
RTC_CHECK_RUNS_SERIALIZED(&capture_checker_);
// 更新视频帧数
UpdateFrameCount();
// 传递RawFrame
_rawDataCallBack->OnRawFrame(videoFrame, videoFrameLength, frameInfo,
_rotateFrame, captureTime);
}
int32_t VideoCaptureImpl::DeliverCapturedFrame(VideoFrame& captureFrame) {
RTC_CHECK_RUNS_SERIALIZED(&capture_checker_);
// 更新视频帧数
UpdateFrameCount(); // frame count used for local frame rate callback.
// 传递Frame
if (_dataCallBack) {
_dataCallBack->OnFrame(captureFrame);
}
return 0;
}
两个函数都使用了UpdateFrameCount(),这个函数会维护每个帧的时间戳信息
void VideoCaptureImpl::UpdateFrameCount() {
RTC_CHECK_RUNS_SERIALIZED(&capture_checker_);
if (_incomingFrameTimesNanos[0] / rtc::kNumNanosecsPerMicrosec == 0) {
// first no shift
// 第一帧,不需要移位
} else {
// shift
// 执行移位操作
for (int i = (kFrameRateCountHistorySize - 2); i >= 0; --i) {
_incomingFrameTimesNanos[i + 1] = _incomingFrameTimesNanos[i];
}
}
_incomingFrameTimesNanos[0] = rtc::TimeNanos();
}
通过上述的流程,已经成功获取到了视频帧,后续可以将视频帧进行渲染、编码等操作,这里的OnFrame()和OnRawFrame()都是回调函数,可以根据情况来决定。在VideoCaptureImpl中,使用RegisterCaptureDataCallback()可以对回调函数进行注册,这是一个重载函数,可以注册_rawDataCallBack和_dataCallBack。
void VideoCaptureImpl::RegisterCaptureDataCallback(
rtc::VideoSinkInterface<VideoFrame>* dataCallBack) {
MutexLock lock(&api_lock_);
RTC_DCHECK(!_rawDataCallBack);
// 注册frame的回调函数
_dataCallBack = dataCallBack;
}
void VideoCaptureImpl::RegisterCaptureDataCallback(
RawVideoSinkInterface* dataCallBack) {
MutexLock lock(&api_lock_);
RTC_DCHECK(!_dataCallBack);
// 注册rawFrame的回调函数
_rawDataCallBack = dataCallBack;
}
举例:用于编码器的回调函数
如果上述获得的帧将被用于视频编码,会调用下面的OnFrame()函数,这里会统计时间戳信息,检查拥塞窗口和编码器阻塞情况,如果所有情况正常,会调用MayEncodeVideoFrame()对视频进行一系列编码操作。
void VideoStreamEncoder::OnFrame(Timestamp post_time,
bool queue_overload,
const VideoFrame& video_frame) {
RTC_DCHECK_RUN_ON(encoder_queue_.get());
VideoFrame incoming_frame = video_frame;
// In some cases, e.g., when the frame from decoder is fed to encoder,
// the timestamp may be set to the future. As the encoding pipeline assumes
// capture time to be less than present time, we should reset the capture
// timestamps here. Otherwise there may be issues with RTP send stream.
/*
在某些情况下,例如当从解码器输出的帧被送入编码器时,时间戳可能会被设置为未来的时间。
由于编码管道假设捕获时间小于当前时间,我们应该在这里重置捕获时间戳。否则,RTP发送流可能会出现问题。
*/
if (incoming_frame.timestamp_us() > post_time.us())
incoming_frame.set_timestamp_us(post_time.us());
// Capture time may come from clock with an offset and drift from clock_.
// ntp: network time protocol
// rtp: real time protocol
int64_t capture_ntp_time_ms;
if (video_frame.ntp_time_ms() > 0) {
capture_ntp_time_ms = video_frame.ntp_time_ms();
} else if (video_frame.render_time_ms() != 0) {
capture_ntp_time_ms = video_frame.render_time_ms() + delta_ntp_internal_ms_;
} else {
capture_ntp_time_ms = post_time.ms() + delta_ntp_internal_ms_;
}
incoming_frame.set_ntp_time_ms(capture_ntp_time_ms);
// Convert NTP time, in ms, to RTP timestamp.
// 将NTP时间转换为RTP时间戳
const int kMsToRtpTimestamp = 90;
incoming_frame.set_rtp_timestamp(
kMsToRtpTimestamp * static_cast<uint32_t>(incoming_frame.ntp_time_ms()));
// Identifier should remain the same for newly produced incoming frame and the
// received |video_frame|.
// 标识符应该对新产生的传入帧和接收到的`video_frame`保持不变
incoming_frame.set_presentation_timestamp(
video_frame.presentation_timestamp());
// 如果当前帧的NTP时间戳小于上一帧的时间戳,丢弃当前帧
if (incoming_frame.ntp_time_ms() <= last_captured_timestamp_) {
// We don't allow the same capture time for two frames, drop this one.
RTC_LOG(LS_WARNING) << "Same/old NTP timestamp ("
<< incoming_frame.ntp_time_ms()
<< " <= " << last_captured_timestamp_
<< ") for incoming frame. Dropping.";
// 丢弃当前帧,确保时间序列上每一帧的时间戳都是递增的
ProcessDroppedFrame(incoming_frame,
VideoStreamEncoderObserver::DropReason::kBadTimestamp);
return;
}
bool log_stats = false;
if (post_time.ms() - last_frame_log_ms_ > kFrameLogIntervalMs) {
last_frame_log_ms_ = post_time.ms();
log_stats = true;
}
last_captured_timestamp_ = incoming_frame.ntp_time_ms();
// 回调函数,向观察者汇报有新的一帧到来
encoder_stats_observer_->OnIncomingFrame(incoming_frame.width(),
incoming_frame.height());
// 是否需要进行帧的监控
if (frame_instrumentation_generator_) {
frame_instrumentation_generator_->OnCapturedFrame(incoming_frame);
}
// 统计采集到帧的数量
++captured_frame_count_;
// 当前帧是否因为拥塞窗口而被丢弃
bool cwnd_frame_drop =
cwnd_frame_drop_interval_ &&
(cwnd_frame_counter_++ % cwnd_frame_drop_interval_.value() == 0);
// 当前帧没有超出队列,也没有因为拥塞窗口而被丢弃,则可能会进行视频编码
if (!queue_overload && !cwnd_frame_drop) {
MaybeEncodeVideoFrame(incoming_frame, post_time.us());
} else {
if (cwnd_frame_drop) {
// Frame drop by congestion window pushback. Do not encode this
// frame.
// 由于拥塞窗口而被丢弃的计数器
++dropped_frame_cwnd_pushback_count_;
} else {
// There is a newer frame in flight. Do not encode this frame.
RTC_LOG(LS_VERBOSE)
<< "Incoming frame dropped due to that the encoder is blocked.";
// 由于编码器阻塞而被丢弃的计数器
++dropped_frame_encoder_block_count_;
}
// 丢弃当前帧
ProcessDroppedFrame(
incoming_frame,
cwnd_frame_drop
? VideoStreamEncoderObserver::DropReason::kCongestionWindow
: VideoStreamEncoderObserver::DropReason::kEncoderQueue);
}
// 打印信息
if (log_stats) {
RTC_LOG(LS_INFO) << "Number of frames: captured " << captured_frame_count_
<< ", dropped (due to congestion window pushback) "
<< dropped_frame_cwnd_pushback_count_
<< ", dropped (due to encoder blocked) "
<< dropped_frame_encoder_block_count_ << ", interval_ms "
<< kFrameLogIntervalMs;
captured_frame_count_ = 0;
dropped_frame_cwnd_pushback_count_ = 0;
dropped_frame_encoder_block_count_ = 0;
}
}