libswresample：为什么 swr_init（）会改变 |in_ch_layout|顺序，使其不再与我解码的 AVFrames 匹配，导致重新采样失败？

libswresample: Why does swr_init() change |in_ch_layout| order so it no longer matches my decoded AVFrames, causing resampling to fail?

提问人：CheekyChips 提问时间：11/17/2023 最后编辑：CheekyChips 更新时间：11/20/2023 访问量：15

问：

我正在尝试编写一些代码，将音频文件重新采样为 16kHz 和 1 声道，然后将其编码为 PCM，但我在声道布局方面遇到了问题。

简而言之：

My 和我从流中获取的帧的通道布局顺序为。但是当我调用它时，顺序更改为 .然后，当我使用 s 进行调用时，由于通道布局顺序不匹配，重采样失败，因为它认为输入已更改。AVCodecContextavcodec_receive_frame()AV_CHANNEL_ORDER_UNSPECswr_init()in_ch_layoutAV_CHANNEL_ORDER_NATIVEswr_convert_frame()AVFrame

更多详情：

我从音频流的编解码器创建一个，它的通道布局为 2 个通道，我通过流解码的任何帧也具有通道布局顺序。AVCodecContextAV_CHANNEL_ORDER_UNSPECavcodec_receive_frame()AV_CHANNEL_ORDER_UNSPEC

我将 's 设置为编解码器上下文中的示例通道布局：SwrContext|in_ch_layout|

    AVChannelLayout in_ch_layout = in_codec_context->ch_layout,
    ...
    int ret = swr_alloc_set_opts2(&swr_ctx, ...
                      &in_ch_layout,
                      ...);

但是将其内部从更改为意味着下次我调用时它会失败，因为输入帧的通道布局与 .当被调用时（在我的例子中是间接的，但如果我也可以直接调用它），并且被更新为具有通道布局顺序：SwrContext->init()in_ch_layoutAV_CHANNEL_ORDER_UNSPECAV_CHANNEL_ORDER_NATIVEswr_convert_frame()SwrContextswr_init()swr_convert_frame()SwrContext->used_ch_layoutSwrContext->in_ch_layoutAV_CHANNEL_ORDER_NATIVE

    // swresample.c
    av_cold int swr_init(struct SwrContext *s){
        ...
        if (!av_channel_layout_check(&s->used_ch_layout))       <-- This hits if I don't set anything for used_ch_layout
            av_channel_layout_default(&s->used_ch_layout, s->in.ch_count);      <-- default is AV_CHANNEL_ORDER_NATIVE
        ...
        if (s->used_ch_layout.order == AV_CHANNEL_ORDER_UNSPEC) <-- This hits if I do set used_ch_layout
            av_channel_layout_default(&s->used_ch_layout, s->used_ch_layout.nb_channels);   <-- default is AV_CHANNEL_ORDER_NATIVE

然后，当我下次调用时，由于帧的布局与音频流的编解码器（）相同，并且这与（）不同，因此它会提前退出。swr_convert_frame()AV_CHANNEL_ORDER_UNSPECSwrContext->in_ch_layoutAV_CHANNEL_ORDER_NATIVEret |= AVERROR_INPUT_CHANGED

// swresample_frame.c
    int swr_convert_frame(SwrContext *s,
                      AVFrame *out, const AVFrame *in)
    {
        ...
        if ((ret = config_changed(s, out, in)))
            return ret;
        ...

    static int config_changed(SwrContext *s,
                            const AVFrame *out, const AVFrame *in)
    {
        ...
        if ((err = av_channel_layout_copy(&ch_layout, &in->ch_layout)) < 0)
            ...
        if (av_channel_layout_compare(&s->in_ch_layout, &ch_layout) || ...) {   <-- This hits the next time I call swr_convert_frame()
            ret |= AVERROR_INPUT_CHANGED;
        }

    // channel_layout.c
    int av_channel_layout_compare(const AVChannelLayout *chl, const AVChannelLayout *chl1)
    {
        ...
        // if only one is unspecified -> not equal
        if ((chl->order  == AV_CHANNEL_ORDER_UNSPEC) !=
            (chl1->order == AV_CHANNEL_ORDER_UNSPEC))
            return 1;

如果我在重新采样之前对每个输入的通道布局顺序进行硬编码，那么重新采样和后续编码就会起作用，但这感觉是一个非常糟糕的主意，当然，一旦我使用不同的通道布局对音频文件进行重新采样，它就不会起作用。AVFrameAV_CHANNEL_ORDER_NATIVE

    avcodec_receive_frame(in_codec_context, input_frame);

    AVChannelLayout input_frame_ch_layout;
    av_channel_layout_default(&input_frame_ch_layout, 2 /* = nb_channels*/);
    input_frame->ch_layout = input_frame_ch_layout;
    // Bad idea - but "fixes" my issue!

我的问题

我需要对重采样器OR/AND解码的音频帧执行哪些操作，以确保它们具有相同的通道布局顺序并且重采样有效？

如何使从中获得的 s 的通道顺序与输入通道顺序匹配，以便重采样有效？我的理解是，解码后的帧应该已经是“正确的”，我不需要更改它们的任何值，只需要更改我创建的输出（重新采样）帧的值。AVFrameavcodec_receive_frame()SwrContext

在重新采样之前，我需要设置一些东西吗？AVFrame

为什么选择将通道顺序更改为？SwrContextAV_CHANNEL_ORDER_NATIVE

注意：解决方法可能是与原始数据缓冲区一起使用，而不是，因为它看起来绕过了此检查（因为不涉及帧）。我还没有尝试过这个，但这应该是不必要的，我想在处理输入和输出帧时使用。swr_convert()swr_convert_frame()swr_convert_frame()

不幸的是，我找不到使用示例代码（甚至 ffmpeg 代码似乎也没有调用它）。swr_convert_frame()

我的完整 c++ 源代码（为了可读性，省略了错误处理）：

std::string fileToUse = "/home/projects/audioFileProject/Audio files/14 Black Cadillacs.wma";
const std::string outputFilename = "out.wav";
const std::string PCMS16BE_encoder_name = "pcm_f32le";

int main()
{
    // Open audio file
    AVFormatContext* in_format_context = avformat_alloc_context();
    avformat_open_input(&in_format_context, fileToUse.c_str(), NULL, NULL);
    avformat_find_stream_info(in_format_context, NULL);
    
    // Get audio stream from file and corresponding decoder
    AVStream* in_stream = in_format_context->streams[0];
    AVCodecParameters* codec_params = in_stream->codecpar;
    const AVCodec* in_codec = avcodec_find_decoder(codec_params->codec_id);
    AVCodecContext *in_codec_context = avcodec_alloc_context3(in_codec);
    avcodec_parameters_to_context(in_codec_context, codec_params);
    avcodec_open2(in_codec_context, in_codec, NULL);

    // Prepare output stream and output encoder (PCM)
    AVFormatContext* out_format_context = nullptr;
    avformat_alloc_output_context2(&out_format_context, NULL, NULL, outputFilename.c_str());
    AVStream* out_stream = avformat_new_stream(out_format_context, NULL);
    const AVCodec* output_codec = avcodec_find_encoder_by_name(PCMS16BE_encoder_name.c_str());
    AVCodecContext* output_codec_context = avcodec_alloc_context3(output_codec);

    // -------------------------------
    
    AVChannelLayout output_ch_layout;
    av_channel_layout_default(&output_ch_layout, 1);    // AV_CHANNEL_LAYOUT_MONO
    output_codec_context->ch_layout = output_ch_layout;
    
    auto out_sample_rate = 16000;
    output_codec_context->sample_rate = out_sample_rate;
    output_codec_context->sample_fmt = output_codec->sample_fmts[0];
    //output_codec_context->bit_rate = output_codec_context->bit_rate;  // TODO Do we need to set the bit rate?
    output_codec_context->time_base = (AVRational){1, out_sample_rate};
    out_stream->time_base = output_codec_context->time_base;

    auto in_sample_rate = in_codec_context->sample_rate;
    AVChannelLayout in_ch_layout = in_codec_context->ch_layout,
                    out_ch_layout = output_ch_layout;   // AV_CHANNEL_LAYOUT_MONO;
    enum AVSampleFormat in_sample_fmt = in_codec_context->sample_fmt,
                        out_sample_fmt = in_codec_context->sample_fmt;

    SwrContext *swr_ctx = nullptr;
    int ret = swr_alloc_set_opts2(&swr_ctx,
                      &out_ch_layout,
                      out_sample_fmt,
                      out_sample_rate,
                      &in_ch_layout,
                      in_sample_fmt,
                      in_sample_rate,
                      0,                    // log_offset
                      NULL);                // log_ctx

    // Probably not necessary - documentation says "This option is
only used for special remapping."
    av_opt_set_chlayout(swr_ctx,    "used_chlayout",     &in_ch_layout, 0);

    // Open output file for writing
    avcodec_open2(output_codec_context, output_codec, NULL);
    avcodec_parameters_from_context(out_stream->codecpar, output_codec_context);
    
    if (out_format_context->oformat->flags & AVFMT_GLOBALHEADER)
        out_format_context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;

    avio_open(&out_format_context->pb, outputFilename.c_str(), AVIO_FLAG_WRITE);
    AVDictionary* muxer_opts = nullptr;
    avformat_write_header(out_format_context, &muxer_opts);

    AVFrame* input_frame = av_frame_alloc();
    AVPacket* in_packet = av_packet_alloc();

    // Loop through decoded input frames. Resample and get resulting samples in a new output frame.
    // I think PCM supports variable number of samples in frames so probably can immediately write out
    while (av_read_frame(in_format_context, in_packet) >= 0) {
        avcodec_send_packet(in_codec_context, in_packet);
        avcodec_receive_frame(in_codec_context, input_frame);

        // I don't want to do this, but it 'fixes' the error where channel layout of input frames
        // doesn't match what the resampler expects - hardcoded the number 2 to fit my sample audio file.
        AVChannelLayout input_frame_ch_layout;
        av_channel_layout_default(&input_frame_ch_layout, 2 /* = nb_channels*/);
        input_frame->ch_layout = input_frame_ch_layout;

        AVFrame* output_frame = av_frame_alloc();
        output_frame->sample_rate = out_sample_rate;
        output_frame->format = out_sample_fmt;
        output_frame->ch_layout = out_ch_layout;
        output_frame->nb_samples = output_codec_context->frame_size;
        
        // TODO Probably need to do maths to calculate new pts properly
        output_frame->pts = input_frame->pts;

        if (swr_convert_frame(swr_ctx, output_frame, input_frame))
            {   logging("Swr Convert failed");  return -1;   }          
            /// ^ Fails here, the second time (since the first time init() is called internally)

        AVPacket *output_packet = av_packet_alloc();
        int response = avcodec_send_frame(output_codec_context, output_frame);

        while (response >= 0) {
            response = avcodec_receive_packet(output_codec_context, output_packet);

            if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {
                break;
            }

            output_packet->stream_index = 0;
            av_packet_rescale_ts(output_packet, in_stream->time_base, out_stream->time_base);
            av_interleaved_write_frame(out_format_context, output_packet);
        }
        av_packet_unref(output_packet);
        av_packet_free(&output_packet);
        av_frame_unref(input_frame);    // Free references held by the frame before reading new data into it.
        av_frame_unref(output_frame);
    }
    // TODO write last output packet flushing the buffer

    avformat_close_input(&in_format_context);
    return 0;
}

ffmpeg libav libswresample

答： 暂无答案

上一个：不小心格式化了视频

下一个：在Qt 5.12.2中使用FFmpeg/LibAV流式传输和播放即时转换的AAC音频

libswresample： 为什么 swr_init（） 会改变 |in_ch_layout|顺序，使其不再与我解码的 AVFrames 匹配，导致重新采样失败？

libswresample: Why does swr_init() change |in_ch_layout| order so it no longer matches my decoded AVFrames, causing resampling to fail?

评论

libswresample：为什么 swr_init（）会改变 |in_ch_layout|顺序，使其不再与我解码的 AVFrames 匹配，导致重新采样失败？