当前位置：首页 > article >正文

二.使用ffmpeg对原始音频数据重采样并进行AAC编码

article 2025/3/17 6:54:59

重采样：将音频三元组【采样率采样格式通道数】之中的任何一个或者多个值改变。

一.为什么要进行重采样？

1.原始音频数据和编码器的数据格式不一致

2.播放器要求的和获取的数据不一致

3.方便运算

二.本次编码流程

1.了解自己本机麦克风参数，我的切换为44100/16/2；包括麦克风录音的size可能不一样，本机windows下录音的size为88200；

1.ffmpeg获取麦克风数据

2.ffmpeg对数据进行重采样（本次三元组无需变换）

3.使用AAC编码器对重采样后的数据进行AAC编码，然后存入.aac文件

4.使用ffplay播放测试

三.整体代码

#include "customcodex.hpp"

int add_samples_to_fifo(AVAudioFifo *fifo,
                        uint8_t **input_data,
                        const int frame_size)
{
    int ret = 0;
    int size = 0;
    printf("add_samples_to_fifo size:%d \n", frame_size);
    size = av_audio_fifo_size(fifo) + frame_size;
    ret = av_audio_fifo_realloc(fifo, size);
    if (ret < 0)
    {
        printf("Error, Failed to reallocate fifo!\n");
        return ret;
    }

    ret = av_audio_fifo_write(fifo, reinterpret_cast<void **>(input_data), frame_size);
    if (ret < frame_size)
    {
        printf("Error, Failed to write data to fifo!\n");
        return AVERROR_EXIT;
    }

    return 0;
}
int read_fifo_and_encode(AVAudioFifo *fifo,
                         AVFormatContext *fmt_ctx,
                         AVCodecContext *c_ctx,
                         AVFrame *frame)
{
    int ret = 0;

    const int frame_size = FFMIN(av_audio_fifo_size(fifo),
                                 c_ctx->frame_size);
    printf("read fifo - size : %d ,c_ctx->frame_size : %d\n", av_audio_fifo_size(fifo), c_ctx->frame_size);

    ret = av_audio_fifo_read(fifo, reinterpret_cast<void **>(frame->data), frame_size);
    if (ret < frame_size)
    {
        printf("Error, Failed to read data from fifo!\n");
        return AVERROR_EXIT;
    }

    return 0;
}
int open_coder(AVCodecContext **codec_ctx)
{
    // 编码器
    const AVCodec *codex = avcodec_find_encoder_by_name("libfdk_aac");
    // codex->capabilities = AV_CODEC_CAP_VARIABLE_FRAME_SIZE;
    if (!codex)
    {
        fprintf(stderr, "Codec not found\n");
        return -1;
    }
    // 编码器上下文
    *codec_ctx = avcodec_alloc_context3(codex);
    (*codec_ctx)->sample_fmt = AV_SAMPLE_FMT_S16;       // 采样大小
    (*codec_ctx)->channel_layout = AV_CH_LAYOUT_STEREO; //
    (*codec_ctx)->channels = 2;                         // 声道数
    (*codec_ctx)->sample_rate = 44100;                  // 采样率
    (*codec_ctx)->bit_rate = 0;                         // AAC 128k；AAC HE 64k; AAC_HE V2:32K
    // codec_ctx->profile = FF_PROFILE_AAC_HE_V2;       // 用哪个AAC
    if (avcodec_open2(*codec_ctx, codex, NULL) < 0)
    {
        fprintf(stderr, "failed avcodec_open2 \n");
        return -1;
    }
    return 0;
}
int encode(AVCodecContext *codec_ctx, AVFrame *avframe, AVPacket *outpkt, FILE *outfile)
{

    printf("open_coder - codec_ctx->frame_size: %d ,avframe-size:%d\n", codec_ctx->frame_size, avframe->nb_samples);
    int ret = avcodec_send_frame(codec_ctx, avframe);
    printf("avcodec_send_frame:%d\n", ret);
    if (ret < 0)
    {
        fprintf(stderr, "Error sending frame to encoder\n");
        return -1;
    }

    while (ret >= 0)
    {
        // 获取编码后的音频数据
        ret = avcodec_receive_packet(codec_ctx, outpkt);
        printf("avcodec_receive_packet:%d\n", ret);
        if (ret < 0)
        {
            printf("encode - ret: %d \n", ret);
            // 有数据但是不够了生成一帧了   没有数据了
            if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
                return 0;
            exit(-1);
        }
        printf("fwrite - outpkt.size: %d \n", outpkt->size);
        fwrite(outpkt->data, 1, outpkt->size, outfile);
        fflush(outfile);
    }
    return 0;
}

int read_audio()
{
    int ret = 0;
    char errors[1024];
    AVFormatContext *fmt_ctx = NULL;
    AVDictionary *options = NULL;
    AVAudioFifo *fifo = nullptr;
    FILE *outfile = fopen("./out.pcm", "wb+");
    FILE *outfile_aac = fopen("./out.aac", "wb+");
    if (outfile == nullptr)
    {
        printf("filed open out file\n");
    }

    AVPacket pkt;
    av_init_packet(&pkt);
    int frame_count = 0;
    const char *devicename = "audio=麦克风 (Realtek(R) Audio)";

    // 找到采集工具
    const AVInputFormat *iformat = av_find_input_format("dshow");
    if (iformat == NULL)
    {
        printf("AVInputFormat find failed \n");
        return -1;
    }
    // 打开音频设备
    ret = avformat_open_input(&fmt_ctx, devicename, iformat, &options);
    if (ret < 0)
    {
        av_strerror(ret, errors, 1024);
        av_log(NULL, AV_LOG_ERROR, "error:%s", errors);
        return -1;
    }

    // 重采样缓冲区
    uint8_t **src_data = NULL;
    int src_linesize = 0;

    uint8_t **dst_data = NULL;
    int dst_linesize = 0;

    // 初始化重采样上下文
    SwrContext *swr_ctx = initSwr();
    if (!swr_ctx)
    {
        printf("failed init swr\n");
        return -1;
    }

    // 编码器上下文
    AVCodecContext *codec_ctx;
    ret = open_coder(&codec_ctx);
    if (ret != 0)
    {
        return -1;
    }
    // 音频输入数据
    AVFrame *avframe = initInAvframe();
    //
    AVPacket *outpkt = av_packet_alloc();
    av_init_packet(outpkt);

    // Create the FIFO buffer for the audio samples to be encoded.
    fifo = av_audio_fifo_alloc(codec_ctx->sample_fmt, codec_ctx->channels, 1);
    if (!fifo)
    {
        printf("Error, Failed to alloc fifo!\n");
        return -1;
    }

    // 88200/2=44100/2=22050
    // 每次读取数据大小是88200，16位2个字节，双声道
    // 创建输入缓冲区
    initBuffer(&src_data, src_linesize, &dst_data, dst_linesize);

    av_log(NULL, AV_LOG_DEBUG, "src-size:%d , dst-size:%d\n", src_linesize, dst_linesize);
    int count = 0;
    while (1)
    {
        int frame_size = codec_ctx->frame_size;

        static bool finished = false;
        while (av_audio_fifo_size(fifo) < frame_size)
        {
            printf("av_audio_fifo_size(fifo) :%d , frame_size :%d\n", av_audio_fifo_size(fifo), frame_size);
            ret = av_read_frame(fmt_ctx, &pkt);
            printf("av_read_frame-ret : %d\n", ret);
            if (ret == 0)
            {
                printf("pkt-size:%d\n", pkt.size);
                memcpy((void *)src_data[0], (void *)pkt.data, pkt.size);
                ret = swr_convert(swr_ctx,                    // 重采样的上下文
                                  dst_data,                   // 输出结果缓冲区
                                  22050,                      // 每个通道的采样数
                                  (const uint8_t **)src_data, // 输入缓冲区
                                  22050);                     // 输入单个通道的采样数
                printf("swr_convert-ret:%d\n", ret);
                ret = add_samples_to_fifo(fifo, dst_data, 22050);
                printf("add_samples_to_fifo-ret:%d\n", ret);
            }
            if (count >= 20)
            {
                finished = true;
                break;
            }
            count++;
            printf("##################### count:%d\n", count);
        }

        while (av_audio_fifo_size(fifo) > frame_size || (finished && av_audio_fifo_size(fifo) > 0))
        {
            ret = read_fifo_and_encode(fifo, fmt_ctx, codec_ctx, avframe);
            encode(codec_ctx, avframe, outpkt, outfile_aac);
        }
        if (finished)
        {
            // 强制将编码器缓冲区中的音频进行编码输出
            encode(codec_ctx, nullptr, outpkt, outfile_aac);
            break;
        }
    }
    freePtr(src_data, dst_data, swr_ctx, fmt_ctx, outfile, outfile_aac);

    return 0;
}
void freePtr(uint8_t **src_data, uint8_t **dst_data, SwrContext *swr_ctx, AVFormatContext *fmt_ctx,
             FILE *outfile, FILE *outfile_aac)
{
    // 释放输入输出缓冲区
    if (src_data)
    {
        av_freep(&src_data[0]);
    }
    av_freep(src_data);
    if (dst_data)
    {

        av_freep(&dst_data[0]);
    }
    av_freep(dst_data);
    // 释放重采样的上下文
    swr_free(&swr_ctx);
    avformat_close_input(&fmt_ctx);
    fclose(outfile);
    fclose(outfile_aac);
    av_log(NULL, AV_LOG_DEBUG, "end");
}
SwrContext *initSwr()
{
    SwrContext *swr_ctx = swr_alloc();
    // 设置重采样参数
    av_opt_set_int(swr_ctx, "in_channel_count", 2, 0);
    av_opt_set_int(swr_ctx, "in_sample_rate", 44100, 0); // 输入采样率
    av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", AV_SAMPLE_FMT_S16, 0);

    av_opt_set_int(swr_ctx, "out_channel_count", 2, 0);
    av_opt_set_int(swr_ctx, "out_sample_rate", 44100, 0); // 输出采样率
    av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0);
    swr_init(swr_ctx);
    return swr_ctx;
}
AVFrame *initInAvframe()
{
    AVFrame *avframe = av_frame_alloc();
    avframe->nb_samples = 1024; // 单通道一个音频的采样数
    avframe->format = AV_SAMPLE_FMT_S16;
    avframe->channel_layout = AV_CH_LAYOUT_STEREO; // AV_CH_LAYOUT_STEREO
    av_frame_get_buffer(avframe, 0);               // 22050*2*2=88200
    if (!avframe || !avframe->buf)
    {
        printf("failed get frame buffer\n");
        return nullptr;
    }
    return avframe;
}
void initBuffer(uint8_t ***src_data, int &src_linesize, uint8_t ***dst_data, int &dst_linesize)
{
    av_samples_alloc_array_and_samples(src_data,          // 输出缓冲区地址
                                       &src_linesize,     // 缓冲区的大小
                                       2,                 // 通道个数
                                       22050,             // 单通道采样个数
                                       AV_SAMPLE_FMT_S16, // 采样格式
                                       0);

    // 创建输出缓冲区
    av_samples_alloc_array_and_samples(dst_data,          // 输出缓冲区地址
                                       &dst_linesize,     // 缓冲区的大小
                                       2,                 // 通道个数
                                       22050,             // 单通道采样个数
                                       AV_SAMPLE_FMT_S16, // 采样格式
                                       0);
}

四.遇到的问题

1.采样格式和采样个数不明确

解决办法：查看系统声音设置中，相应设备的输出格式，一般包含位深和采样率

采样个数的话通过打开并读取音频数据，可以通过pkt.size打印出来。

查看全文

http://www.kler.cn/a/588017.html