使用 libav 和 c++ 进行 H264 解码后,将 YUV 转换为 RGB 的 src 图像 ptrs 错误

Bad src image ptrs converting YUV to RGB after H264 decoding with libav and c++

提问人:Sebastian DELLING 提问时间:10/30/2023 更新时间:10/31/2023 访问量:50

问:

在从 H264 文件解码帧后,尝试将帧转换为带有 sws_scale 的 RGB 时,我收到“bad src image ptrs”错误,并且无法确定出了什么问题。

我检查了导致错误的原因,并在 swscale.c 中找到了该函数,该函数验证给定数据中是否存在像素格式 () 所需的平面和线条大小,而我的数据似乎并非如此。check_image_pointersav_pix_fmt_desc_get

编写的 pgm 文件对我来说看起来不错,重播文件也可以。

我打印了我的框架的相应数据。问题似乎是平面 1 和 2 的线大小为 0。他们 3 个似乎都有数据。平面 0 线大小是图像宽度的三倍,这也让我感到困惑。

这是我的输出:

Have videoStreamIndex 0 codec id: 27
saving frame 1 C:\\tmp\\output-frame-1.pgm colorspace 2 pix_fmt 0 w: 3840 h: 2160
Required:
plane 0 : 0
plane 1 : 1
plane 2 : 2
plane 3 : 0
Present:
Frame plane 0: 1 , 11520
Frame plane 1: 1 , 0
Frame plane 2: 1 , 0
Frame plane 3: 0 , 0
Frame plane 4: 0 , 0
Frame plane 5: 0 , 0
Frame plane 6: 0 , 0
Frame plane 7: 0 , 0

这是我的应用程序的整个代码,问题发生在方法解码中:

#include <iostream>
#include <cstring>
#include <cstdio>
#include <cstdint>
#include <string>
#include <iostream>
#include <chrono>

// #include <opencv2/highgui.hpp>
// #include <opencv2/opencv.hpp>

extern "C"
{

#include <libswscale/swscale.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavutil/opt.h>
#include <libavutil/pixdesc.h>
#include <libavutil/display.h>
#include "libavutil/imgutils.h"
}

#define INBUF_SIZE 4096
class H264Decoder
{
public:
    H264Decoder(const std::string &inputFilename, const std::string &outputFilenamePrefix)
    {

        // Open input file
        if (avformat_open_input(&formatContext, inputFilename.c_str(), nullptr, nullptr) != 0)
        {
            throw std::runtime_error("Could not open input file");
        }

        if (avformat_find_stream_info(formatContext, nullptr) < 0)
        {
            throw std::runtime_error("Could not find stream information");
        }

        // Find H.264 video stream
        for (unsigned i = 0; i < formatContext->nb_streams; i++)
        {
            if (formatContext->streams[i]->codecpar->codec_id == AV_CODEC_ID_H264)
            {
                videoStreamIndex = i;
                std::cout << "Have videoStreamIndex " << videoStreamIndex << " codec id: " << formatContext->streams[i]->codecpar->codec_id << std::endl;
                break;
            }
        }

        if (videoStreamIndex == -1)
        {
            throw std::runtime_error("H.264 video stream not found");
        }

        // Initialize codec and codec context
        const AVCodec *codec = avcodec_find_decoder(formatContext->streams[videoStreamIndex]->codecpar->codec_id);
        if (!codec)
        {
            throw std::runtime_error("Codec not found");
        }

        parser = av_parser_init(codec->id);
        if (!parser)
        {
            throw std::runtime_error("parser not found");
        }

        codecContext = avcodec_alloc_context3(codec);
        if (!codecContext)
        {
            throw std::runtime_error("Could not allocate codec context");
        }

        if (avcodec_open2(codecContext, codec, nullptr) < 0)
        {
            throw std::runtime_error("Could not open codec");
        }

        // Initialize frame
        frame = av_frame_alloc();
        frame->format = AV_PIX_FMT_YUV420P;
        if (!frame)
        {
            throw std::runtime_error("Could not allocate frame");
        }

        inputPacket = av_packet_alloc();
        if (!inputPacket)
        {
            throw std::runtime_error("Could not allocate packet");
        }

        inputFilename_ = inputFilename;
        outputFilenamePrefix_ = outputFilenamePrefix;
    }

    void decode()
    {
        char buf[1024];
        int ret;

        ret = avcodec_send_packet(codecContext, inputPacket);
        if (ret < 0)
        {
            fprintf(stderr, "Error sending a packet for decoding\n");
            exit(1);
        }

        while (ret >= 0)
        {
            ret = avcodec_receive_frame(codecContext, frame);
            if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
                return;
            else if (ret < 0)
            {
                fprintf(stderr, "Error during decoding\n");
                exit(1);
            }

            /* the picture is allocated by the decoder. no need to
               free it */
            snprintf(buf, sizeof(buf), "%s-%" PRId64 ".pgm", outputFilenamePrefix_.c_str(), codecContext->frame_num);

            std::cout << "saving frame " << codecContext->frame_num << " " << buf << " colorspace " << frame->colorspace << " pix_fmt " << codecContext->pix_fmt << " w: " << frame->width << " h: " << frame->height << std::endl;

            SwsContext *sws_ctx = NULL;

            sws_ctx = sws_getContext(codecContext->width,
                                     codecContext->height,
                                     codecContext->pix_fmt,
                                     codecContext->width,
                                     codecContext->height,
                                     AV_PIX_FMT_RGB24,
                                     SWS_BICUBIC,
                                     NULL,
                                     NULL,
                                     NULL);

            AVFrame *frame2 = av_frame_alloc();
            int num_bytes = av_image_get_buffer_size(AV_PIX_FMT_RGB24, codecContext->width, codecContext->height, 32);
            uint8_t *frame2_buffer = (uint8_t *)av_malloc(num_bytes * sizeof(uint8_t));
            av_image_fill_arrays(frame2->data, frame->linesize, frame2_buffer, AV_PIX_FMT_RGB24, codecContext->width, codecContext->height, 32);

            const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(codecContext->pix_fmt);
            std::cout << "Required:" << std::endl;
            for (int i = 0; i < 4; i++)
            {
                int plane = desc->comp[i].plane;
                std::cout << "plane " << i << " : " << plane << std::endl;
            }
            std::cout << "Present:" << std::endl;
            for (int i = 0; i < AV_NUM_DATA_POINTERS; ++i)
            {
                std::cout << "Frame plane " << i << ": " << static_cast<bool>(frame->data[i]) << " , " << frame->linesize[i] << std::endl;
            }

            sws_scale(sws_ctx, frame->data,
                      frame->linesize, 0, codecContext->height,
                      frame2->data, frame2->linesize);

            // cv::Mat img(frame2->height, frame2->width, CV_8UC3, frame2->data[0]);
            // cv::imshow("Image", img);

            pgm_save(frame->data[0], frame->linesize[0],
                     frame->width, frame->height, buf);
        }
    }

    ~H264Decoder()
    {
        avformat_close_input(&formatContext);
        avformat_free_context(formatContext);
        avcodec_free_context(&codecContext);
        av_frame_free(&frame);
        av_packet_free(&inputPacket);
    }

    void readAndDecode()
    {
        FILE *f;
        uint8_t inbuf[INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
        uint8_t *data;
        size_t data_size;
        int ret;
        int eof;
        f = fopen(inputFilename_.c_str(), "rb");
        auto start = std::chrono::high_resolution_clock::now();
        do
        {
            /* read raw data from the input file */
            data_size = fread(inbuf, 1, INBUF_SIZE, f);
            if (ferror(f))
                break;
            eof = !data_size;

            /* use the parser to split the data into frames */
            data = inbuf;
            while (data_size > 0 || eof)
            {
                ret = av_parser_parse2(parser, codecContext, &inputPacket->data, &inputPacket->size,
                                       data, data_size, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
                if (ret < 0)
                {
                    fprintf(stderr, "Error while parsing\n");
                    exit(1);
                }
                data += ret;
                data_size -= ret;

                if (inputPacket->size)
                {
                    decode();
                }
                else if (eof)
                {
                    break;
                }
            }
        } while (!eof);
        auto diff = std::chrono::high_resolution_clock::now() - start;
        std::cout << "Decoded " << codecContext->frame_num << " frames in " << std::chrono::duration_cast<std::chrono::milliseconds>(diff).count() << " ms" << std::endl;
    }

private:
    AVFormatContext *formatContext = nullptr;
    AVCodecContext *codecContext = nullptr;
    AVCodecParserContext *parser;
    AVFrame *frame = nullptr;
    AVFrame *frameRgb = nullptr;
    AVPacket *inputPacket = nullptr;
    int videoStreamIndex = -1;
    std::string inputFilename_;
    std::string outputFilenamePrefix_;

    static void pgm_save(unsigned char *buf, int wrap, int xsize, int ysize, const char *filename)
    {
        FILE *f = fopen(filename, "wb");
        if (!f)
        {
            std::cout << "Error opening file for saving PGM" << std::endl;
            exit(1);
        }

        fprintf(f, "P5\n%d %d\n%d\n", xsize, ysize, 255);
        for (int i = 0; i < ysize; i++)
            fwrite(buf + i * wrap, 1, xsize, f);

        fclose(f);
    }
};

int main(int argc, char *argv[])
{
    if (argc < 2)
    {
        std::cout << "Please provide input file name as parameter" << std::endl;
    }

    std::string inputFilename = argv[1];
    std::string outputFilenamePrefix = "C:\\tmp\\output-frame";

    try
    {

        H264Decoder decoder(inputFilename, outputFilenamePrefix);
        decoder.readAndDecode();
    }
    catch (const std::exception &e)
    {
        std::cout << "Error: " << e.what() << std::endl;
        return 1;
    }

    return 0;
}
C++ ffmpeg rgb yuv libavcodec

评论

0赞 PaulMcKenzie 10/30/2023
f = fopen(inputFilename_.c_str(), "rb");-- 你没有检查是否失败。fopen

答:

0赞 navaneeth mohan 10/30/2023 #1

由于您的错误表明这意味着您分配 YUV 帧的方式有问题,或者您没有正确传递参数。既然你说你的书面文件工作正常,我更倾向于后一个原因。bad src imagesws_scalepgm

在通话中尝试以下操作之一:sws_scale

  1. (const uint8_t * const *)frame而不是frame->data
  2. frame->data[0]而不是。frame->data

问题:我看到你的班级有一个.这应该是输出帧吗?那是什么?问题:您使用的是哪个版本的 FFMPEG? 问题:您能否在输出文件上使用以确认它确实是一个?AVFrame *frameRgbAVFrame *frame2FFPROBEpgmYUV420P

这是一个 C++ 包装器,在重新缩放视频帧时对我有用。与您的情况不同,输入帧不是成员变量。但是,输出帧是一个成员变量,它在构造函数中分配。关于和可能与您的情况无关的部分。ptspkt_dts

VideoRescaler.hpp

struct VideoRescaler
{
    struct SwsContext *swsCtx_;
    AVFrame *outFrame_;
    void Rescale(AVFrame *inFrame);
    VideoRescaler(int iWidth, int iHeight, enum AVPixelFormat iPixFmt, int oWidth, int oHeight, enum AVPixelFormat oPixFmt);
    int iWidth_,iHeight_,oWidth_,oHeight_;
    enum AVPixelFormat iPixFmt_,oPixFmt_;
};

视频重缩放器.cpp

VideoRescaler::VideoRescaler(int iWidth, int iHeight, enum AVPixelFormat iPixFmt, int oWidth, int oHeight, enum AVPixelFormat oPixFmt) : 
iWidth_(iWidth),iHeight_(iHeight),oWidth_(oWidth),oHeight_(oHeight),iPixFmt_(iPixFmt),oPixFmt_(oPixFmt)
{
    outFrame_ = av_frame_alloc();
    outFrame_->width = oWidth_;
    outFrame_->height = oHeight_;
    outFrame_->format = oPixFmt_;
    av_frame_get_buffer(outFrame_,0);

    swsCtx_ = sws_getContext(
        iWidth_, iHeight_, iPixFmt_,
        oWidth_, oHeight_, oPixFmt_,
        SWS_BILINEAR, NULL, NULL, NULL
    );

    printf("INIT RESCALER %d,%d-->%d,%d\n",iWidth_,iHeight_,oWidth_,oHeight_);
}


void VideoRescaler::Rescale(AVFrame *inFrame)
{
    int ret = -1;
    printf("RESCALING %d,%d-->%d,%d\n",iWidth_,iHeight_,oWidth_,oHeight_);
    ret = sws_scale(
        swsCtx_,
        (const uint8_t * const *)inFrame, 
        inFrame->linesize, 
        0,
        inFrame->height,
        outFrame_->data,
        outFrame_->linesize
    );

    if(ret != outFrame_->height)
    {
        printf("Failed to rescale frame %d:%s\n",ret,av_err2str(ret));
        exit(1);
    }
    outFrame_->pts = inFrame->pts;
    outFrame_->pkt_dts = inFrame->pkt_dts;
}
0赞 Sebastian DELLING 10/31/2023 #2

问题是我试图初始化frame2。我覆盖了 yuv 帧的线条大小而不是 rgb 帧:av_image_fill_arrays(frame2->data, frame->linesize, frame2_buffer, AV_PIX_FMT_RGB24, codecContext->width, codecContext->height, 32);

我删除了 frame2 的完整初始化,并用于为 rgb 帧分配缓冲区。av_image_alloc

这是我目前的工作代码,以防有人想将其用作参考。转换从 RGB 更改为 BGR,以使用 OpenCV 显示它。

#include <chrono>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <iostream>
#include <string>

#include <opencv2/highgui.hpp>
#include <opencv2/opencv.hpp>

extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/imgutils.h>
#include <libswscale/swscale.h>
}

#define INBUF_SIZE 4096
class H264Decoder {
public:
    H264Decoder(const std::string& inputFilename, const std::string& outputFilenamePrefix, uint16_t outputWidth,
                uint16_t outputHeight, bool show, bool save)
            : doShow(show), doSave(save), inputFilename_(inputFilename), outputFilenamePrefix_(outputFilenamePrefix),
              outputHeight(outputHeight), outputWidth(outputWidth) {
        // Open input file
        if (avformat_open_input(&formatContext, inputFilename.c_str(), nullptr, nullptr) != 0) {
            throw std::runtime_error("Could not open input file");
        }

        if (avformat_find_stream_info(formatContext, nullptr) < 0) {
            throw std::runtime_error("Could not find stream information");
        }

        // Find H.264 video stream
        for (unsigned i = 0; i < formatContext->nb_streams; i++) {
            if (formatContext->streams[i]->codecpar->codec_id == AV_CODEC_ID_H264) {
                videoStreamIndex = i;
                break;
            }
        }

        if (videoStreamIndex == -1) {
            throw std::runtime_error("H.264 video stream not found");
        }

        // Initialize codec and codec context
        codec = avcodec_find_decoder(formatContext->streams[videoStreamIndex]->codecpar->codec_id);
        if (!codec) {
            throw std::runtime_error("Codec not found");
        }

        parser = av_parser_init(codec->id);
        if (!parser) {
            throw std::runtime_error("parser not found");
        }

        codecContext = avcodec_alloc_context3(codec);
        if (!codecContext) {
            throw std::runtime_error("Could not allocate codec context");
        }

        if (avcodec_open2(codecContext, codec, nullptr) < 0) {
            throw std::runtime_error("Could not open codec");
        }

        // Initialize frame
        frame = av_frame_alloc();
        if (!frame) {
            throw std::runtime_error("Could not allocate frame");
        }

        frameRgb = av_frame_alloc();
        if (!frameRgb) {
            throw std::runtime_error("Could not allocate frame");
        }
        av_image_alloc(frameRgb->data, frameRgb->linesize, outputWidth, outputHeight, AV_PIX_FMT_BGR24, 32);

        inputPacket = av_packet_alloc();
        if (!inputPacket) {
            throw std::runtime_error("Could not allocate packet");
        }

    }

    void decode() {
        char buf[1024];
        int ret;

        ret = avcodec_send_packet(codecContext, inputPacket);
        if (ret < 0) {
            fprintf(stderr, "Error sending a packet for decoding\n");
            exit(1);
        }

        while (ret >= 0) {
            ret = avcodec_receive_frame(codecContext, frame);
            if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
                return;
            else if (ret < 0) {
                fprintf(stderr, "Error during decoding\n");
                exit(1);
            }

            snprintf(buf, sizeof(buf), "%s-%" PRId64 ".pgm", outputFilenamePrefix_.c_str(), codecContext->frame_num);

            SwsContext* sws_ctx = NULL;

            sws_ctx = sws_getContext(codecContext->width, codecContext->height, codecContext->pix_fmt, outputWidth,
                                     outputHeight, AV_PIX_FMT_BGR24, SWS_BICUBIC, NULL, NULL, NULL);
            if (doSave) {
                pgm_save(frame->data[0], frame->linesize[0], frame->width, frame->height, buf);
            }

            sws_scale(sws_ctx, frame->data, frame->linesize, 0, codecContext->height, frameRgb->data,
                      frameRgb->linesize);

            if (doShow) {
                cv::Mat img(outputHeight, outputWidth, CV_8UC3, frameRgb->data[0]);
                cv::imshow("Image", img);
                cv::waitKey(1);
            }
        }
    }

    ~H264Decoder() {
        avformat_close_input(&formatContext);
        avformat_free_context(formatContext);
        avcodec_free_context(&codecContext);
        av_frame_free(&frame);
        av_frame_free(&frameRgb);
        av_packet_free(&inputPacket);
        av_freep(&frameRgb->data[0]);
    }

    void readAndDecode() {
        FILE* f;
        uint8_t inbuf[INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
        uint8_t* data;
        size_t data_size;
        int ret;
        int eof;
        f = fopen(inputFilename_.c_str(), "rb");
        if (!f) {
            std::cout << "Error opening file" << std::endl;;
            exit(1);
        }
        memset(inbuf + INBUF_SIZE, 0, AV_INPUT_BUFFER_PADDING_SIZE);
        auto start = std::chrono::high_resolution_clock::now();
        do {
            /* read raw data from the input file */
            data_size = fread(inbuf, 1, INBUF_SIZE, f);
            if (ferror(f))
                break;
            eof = !data_size;

            /* use the parser to split the data into frames */
            data = inbuf;
            while (data_size > 0 || eof) {
                ret = av_parser_parse2(parser, codecContext, &inputPacket->data, &inputPacket->size, data, data_size,
                                       AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
                if (ret < 0) {
                    fprintf(stderr, "Error while parsing\n");
                    exit(1);
                }
                data += ret;
                data_size -= ret;

                if (inputPacket->size) {
                    decode();
                } else if (eof) {
                    break;
                }
            }
        } while (!eof);
        auto diff = std::chrono::high_resolution_clock::now() - start;
        std::cout << "Decoded " << codecContext->frame_num << " frames in "
                  << std::chrono::duration_cast<std::chrono::milliseconds>(diff).count() << " ms "
                  << std::chrono::duration_cast<std::chrono::milliseconds>(diff).count() / codecContext->frame_num
                  << " ms/frame " << std::endl;
    }

private:
    bool doShow{false};
    bool doSave{true};
    const AVCodec* codec;
    AVFormatContext* formatContext = nullptr;
    AVCodecContext* codecContext = nullptr;
    AVCodecParserContext* parser;
    AVFrame* frame = nullptr;
    AVFrame* frameRgb = nullptr;
    AVPacket* inputPacket = nullptr;
    int videoStreamIndex = -1;
    std::string inputFilename_;
    std::string outputFilenamePrefix_;
    uint16_t outputHeight = 1280;
    uint16_t outputWidth = 1632;

    static void pgm_save(unsigned char* buf, int wrap, int xsize, int ysize, const char* filename) {
        FILE* f = fopen(filename, "wb");
        if (!f) {
            std::cout << "Error opening file for saving PGM" << std::endl;
            exit(1);
        }

        fprintf(f, "P5\n%d %d\n%d\n", xsize, ysize, 255);
        for (int i = 0; i < ysize; i++)
            fwrite(buf + i * wrap, 1, xsize, f);

        fclose(f);
    }
};

int main(int argc, char* argv[]) {
    if (argc < 2) {
        std::cout << "Please provide input file name as parameter" << std::endl;
        exit(1);
    }

    std::string inputFilename = argv[1];
    std::string outputFilenamePrefix = "C:\\tmp\\pics\\output-frame";

    try {
        H264Decoder decoder(inputFilename, outputFilenamePrefix, 1632, 1280, true, false);
        decoder.readAndDecode();
    } catch (const std::exception& e) {
        std::cout << "Error: " << e.what() << std::endl;
        return 1;
    }

    return 0;
}