提问人:Sebastian DELLING 提问时间:10/30/2023 更新时间:10/31/2023 访问量:50
使用 libav 和 c++ 进行 H264 解码后,将 YUV 转换为 RGB 的 src 图像 ptrs 错误
Bad src image ptrs converting YUV to RGB after H264 decoding with libav and c++
问:
在从 H264 文件解码帧后,尝试将帧转换为带有 sws_scale 的 RGB 时,我收到“bad src image ptrs”错误,并且无法确定出了什么问题。
我检查了导致错误的原因,并在 swscale.c 中找到了该函数,该函数验证给定数据中是否存在像素格式 () 所需的平面和线条大小,而我的数据似乎并非如此。check_image_pointers
av_pix_fmt_desc_get
编写的 pgm 文件对我来说看起来不错,重播文件也可以。
我打印了我的框架的相应数据。问题似乎是平面 1 和 2 的线大小为 0。他们 3 个似乎都有数据。平面 0 线大小是图像宽度的三倍,这也让我感到困惑。
这是我的输出:
Have videoStreamIndex 0 codec id: 27
saving frame 1 C:\\tmp\\output-frame-1.pgm colorspace 2 pix_fmt 0 w: 3840 h: 2160
Required:
plane 0 : 0
plane 1 : 1
plane 2 : 2
plane 3 : 0
Present:
Frame plane 0: 1 , 11520
Frame plane 1: 1 , 0
Frame plane 2: 1 , 0
Frame plane 3: 0 , 0
Frame plane 4: 0 , 0
Frame plane 5: 0 , 0
Frame plane 6: 0 , 0
Frame plane 7: 0 , 0
这是我的应用程序的整个代码,问题发生在方法解码中:
#include <iostream>
#include <cstring>
#include <cstdio>
#include <cstdint>
#include <string>
#include <iostream>
#include <chrono>
// #include <opencv2/highgui.hpp>
// #include <opencv2/opencv.hpp>
extern "C"
{
#include <libswscale/swscale.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavutil/opt.h>
#include <libavutil/pixdesc.h>
#include <libavutil/display.h>
#include "libavutil/imgutils.h"
}
#define INBUF_SIZE 4096
class H264Decoder
{
public:
H264Decoder(const std::string &inputFilename, const std::string &outputFilenamePrefix)
{
// Open input file
if (avformat_open_input(&formatContext, inputFilename.c_str(), nullptr, nullptr) != 0)
{
throw std::runtime_error("Could not open input file");
}
if (avformat_find_stream_info(formatContext, nullptr) < 0)
{
throw std::runtime_error("Could not find stream information");
}
// Find H.264 video stream
for (unsigned i = 0; i < formatContext->nb_streams; i++)
{
if (formatContext->streams[i]->codecpar->codec_id == AV_CODEC_ID_H264)
{
videoStreamIndex = i;
std::cout << "Have videoStreamIndex " << videoStreamIndex << " codec id: " << formatContext->streams[i]->codecpar->codec_id << std::endl;
break;
}
}
if (videoStreamIndex == -1)
{
throw std::runtime_error("H.264 video stream not found");
}
// Initialize codec and codec context
const AVCodec *codec = avcodec_find_decoder(formatContext->streams[videoStreamIndex]->codecpar->codec_id);
if (!codec)
{
throw std::runtime_error("Codec not found");
}
parser = av_parser_init(codec->id);
if (!parser)
{
throw std::runtime_error("parser not found");
}
codecContext = avcodec_alloc_context3(codec);
if (!codecContext)
{
throw std::runtime_error("Could not allocate codec context");
}
if (avcodec_open2(codecContext, codec, nullptr) < 0)
{
throw std::runtime_error("Could not open codec");
}
// Initialize frame
frame = av_frame_alloc();
frame->format = AV_PIX_FMT_YUV420P;
if (!frame)
{
throw std::runtime_error("Could not allocate frame");
}
inputPacket = av_packet_alloc();
if (!inputPacket)
{
throw std::runtime_error("Could not allocate packet");
}
inputFilename_ = inputFilename;
outputFilenamePrefix_ = outputFilenamePrefix;
}
void decode()
{
char buf[1024];
int ret;
ret = avcodec_send_packet(codecContext, inputPacket);
if (ret < 0)
{
fprintf(stderr, "Error sending a packet for decoding\n");
exit(1);
}
while (ret >= 0)
{
ret = avcodec_receive_frame(codecContext, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
return;
else if (ret < 0)
{
fprintf(stderr, "Error during decoding\n");
exit(1);
}
/* the picture is allocated by the decoder. no need to
free it */
snprintf(buf, sizeof(buf), "%s-%" PRId64 ".pgm", outputFilenamePrefix_.c_str(), codecContext->frame_num);
std::cout << "saving frame " << codecContext->frame_num << " " << buf << " colorspace " << frame->colorspace << " pix_fmt " << codecContext->pix_fmt << " w: " << frame->width << " h: " << frame->height << std::endl;
SwsContext *sws_ctx = NULL;
sws_ctx = sws_getContext(codecContext->width,
codecContext->height,
codecContext->pix_fmt,
codecContext->width,
codecContext->height,
AV_PIX_FMT_RGB24,
SWS_BICUBIC,
NULL,
NULL,
NULL);
AVFrame *frame2 = av_frame_alloc();
int num_bytes = av_image_get_buffer_size(AV_PIX_FMT_RGB24, codecContext->width, codecContext->height, 32);
uint8_t *frame2_buffer = (uint8_t *)av_malloc(num_bytes * sizeof(uint8_t));
av_image_fill_arrays(frame2->data, frame->linesize, frame2_buffer, AV_PIX_FMT_RGB24, codecContext->width, codecContext->height, 32);
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(codecContext->pix_fmt);
std::cout << "Required:" << std::endl;
for (int i = 0; i < 4; i++)
{
int plane = desc->comp[i].plane;
std::cout << "plane " << i << " : " << plane << std::endl;
}
std::cout << "Present:" << std::endl;
for (int i = 0; i < AV_NUM_DATA_POINTERS; ++i)
{
std::cout << "Frame plane " << i << ": " << static_cast<bool>(frame->data[i]) << " , " << frame->linesize[i] << std::endl;
}
sws_scale(sws_ctx, frame->data,
frame->linesize, 0, codecContext->height,
frame2->data, frame2->linesize);
// cv::Mat img(frame2->height, frame2->width, CV_8UC3, frame2->data[0]);
// cv::imshow("Image", img);
pgm_save(frame->data[0], frame->linesize[0],
frame->width, frame->height, buf);
}
}
~H264Decoder()
{
avformat_close_input(&formatContext);
avformat_free_context(formatContext);
avcodec_free_context(&codecContext);
av_frame_free(&frame);
av_packet_free(&inputPacket);
}
void readAndDecode()
{
FILE *f;
uint8_t inbuf[INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
uint8_t *data;
size_t data_size;
int ret;
int eof;
f = fopen(inputFilename_.c_str(), "rb");
auto start = std::chrono::high_resolution_clock::now();
do
{
/* read raw data from the input file */
data_size = fread(inbuf, 1, INBUF_SIZE, f);
if (ferror(f))
break;
eof = !data_size;
/* use the parser to split the data into frames */
data = inbuf;
while (data_size > 0 || eof)
{
ret = av_parser_parse2(parser, codecContext, &inputPacket->data, &inputPacket->size,
data, data_size, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
if (ret < 0)
{
fprintf(stderr, "Error while parsing\n");
exit(1);
}
data += ret;
data_size -= ret;
if (inputPacket->size)
{
decode();
}
else if (eof)
{
break;
}
}
} while (!eof);
auto diff = std::chrono::high_resolution_clock::now() - start;
std::cout << "Decoded " << codecContext->frame_num << " frames in " << std::chrono::duration_cast<std::chrono::milliseconds>(diff).count() << " ms" << std::endl;
}
private:
AVFormatContext *formatContext = nullptr;
AVCodecContext *codecContext = nullptr;
AVCodecParserContext *parser;
AVFrame *frame = nullptr;
AVFrame *frameRgb = nullptr;
AVPacket *inputPacket = nullptr;
int videoStreamIndex = -1;
std::string inputFilename_;
std::string outputFilenamePrefix_;
static void pgm_save(unsigned char *buf, int wrap, int xsize, int ysize, const char *filename)
{
FILE *f = fopen(filename, "wb");
if (!f)
{
std::cout << "Error opening file for saving PGM" << std::endl;
exit(1);
}
fprintf(f, "P5\n%d %d\n%d\n", xsize, ysize, 255);
for (int i = 0; i < ysize; i++)
fwrite(buf + i * wrap, 1, xsize, f);
fclose(f);
}
};
int main(int argc, char *argv[])
{
if (argc < 2)
{
std::cout << "Please provide input file name as parameter" << std::endl;
}
std::string inputFilename = argv[1];
std::string outputFilenamePrefix = "C:\\tmp\\output-frame";
try
{
H264Decoder decoder(inputFilename, outputFilenamePrefix);
decoder.readAndDecode();
}
catch (const std::exception &e)
{
std::cout << "Error: " << e.what() << std::endl;
return 1;
}
return 0;
}
答:
由于您的错误表明这意味着您分配 YUV 帧的方式有问题,或者您没有正确传递参数。既然你说你的书面文件工作正常,我更倾向于后一个原因。bad src image
sws_scale
pgm
在通话中尝试以下操作之一:sws_scale
(const uint8_t * const *)frame
而不是frame->data
frame->data[0]
而不是。frame->data
问题:我看到你的班级有一个.这应该是输出帧吗?那是什么?问题:您使用的是哪个版本的 FFMPEG?
问题:您能否在输出文件上使用以确认它确实是一个?AVFrame *frameRgb
AVFrame *frame2
FFPROBE
pgm
YUV420P
这是一个 C++ 包装器,在重新缩放视频帧时对我有用。与您的情况不同,输入帧不是成员变量。但是,输出帧是一个成员变量,它在构造函数中分配。关于和可能与您的情况无关的部分。pts
pkt_dts
VideoRescaler.hpp
struct VideoRescaler
{
struct SwsContext *swsCtx_;
AVFrame *outFrame_;
void Rescale(AVFrame *inFrame);
VideoRescaler(int iWidth, int iHeight, enum AVPixelFormat iPixFmt, int oWidth, int oHeight, enum AVPixelFormat oPixFmt);
int iWidth_,iHeight_,oWidth_,oHeight_;
enum AVPixelFormat iPixFmt_,oPixFmt_;
};
视频重缩放器.cpp
VideoRescaler::VideoRescaler(int iWidth, int iHeight, enum AVPixelFormat iPixFmt, int oWidth, int oHeight, enum AVPixelFormat oPixFmt) :
iWidth_(iWidth),iHeight_(iHeight),oWidth_(oWidth),oHeight_(oHeight),iPixFmt_(iPixFmt),oPixFmt_(oPixFmt)
{
outFrame_ = av_frame_alloc();
outFrame_->width = oWidth_;
outFrame_->height = oHeight_;
outFrame_->format = oPixFmt_;
av_frame_get_buffer(outFrame_,0);
swsCtx_ = sws_getContext(
iWidth_, iHeight_, iPixFmt_,
oWidth_, oHeight_, oPixFmt_,
SWS_BILINEAR, NULL, NULL, NULL
);
printf("INIT RESCALER %d,%d-->%d,%d\n",iWidth_,iHeight_,oWidth_,oHeight_);
}
void VideoRescaler::Rescale(AVFrame *inFrame)
{
int ret = -1;
printf("RESCALING %d,%d-->%d,%d\n",iWidth_,iHeight_,oWidth_,oHeight_);
ret = sws_scale(
swsCtx_,
(const uint8_t * const *)inFrame,
inFrame->linesize,
0,
inFrame->height,
outFrame_->data,
outFrame_->linesize
);
if(ret != outFrame_->height)
{
printf("Failed to rescale frame %d:%s\n",ret,av_err2str(ret));
exit(1);
}
outFrame_->pts = inFrame->pts;
outFrame_->pkt_dts = inFrame->pkt_dts;
}
问题是我试图初始化frame2。我覆盖了 yuv 帧的线条大小而不是 rgb 帧:av_image_fill_arrays(frame2->data, frame->linesize, frame2_buffer, AV_PIX_FMT_RGB24, codecContext->width, codecContext->height, 32);
我删除了 frame2 的完整初始化,并用于为 rgb 帧分配缓冲区。av_image_alloc
这是我目前的工作代码,以防有人想将其用作参考。转换从 RGB 更改为 BGR,以使用 OpenCV 显示它。
#include <chrono>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <iostream>
#include <string>
#include <opencv2/highgui.hpp>
#include <opencv2/opencv.hpp>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/imgutils.h>
#include <libswscale/swscale.h>
}
#define INBUF_SIZE 4096
class H264Decoder {
public:
H264Decoder(const std::string& inputFilename, const std::string& outputFilenamePrefix, uint16_t outputWidth,
uint16_t outputHeight, bool show, bool save)
: doShow(show), doSave(save), inputFilename_(inputFilename), outputFilenamePrefix_(outputFilenamePrefix),
outputHeight(outputHeight), outputWidth(outputWidth) {
// Open input file
if (avformat_open_input(&formatContext, inputFilename.c_str(), nullptr, nullptr) != 0) {
throw std::runtime_error("Could not open input file");
}
if (avformat_find_stream_info(formatContext, nullptr) < 0) {
throw std::runtime_error("Could not find stream information");
}
// Find H.264 video stream
for (unsigned i = 0; i < formatContext->nb_streams; i++) {
if (formatContext->streams[i]->codecpar->codec_id == AV_CODEC_ID_H264) {
videoStreamIndex = i;
break;
}
}
if (videoStreamIndex == -1) {
throw std::runtime_error("H.264 video stream not found");
}
// Initialize codec and codec context
codec = avcodec_find_decoder(formatContext->streams[videoStreamIndex]->codecpar->codec_id);
if (!codec) {
throw std::runtime_error("Codec not found");
}
parser = av_parser_init(codec->id);
if (!parser) {
throw std::runtime_error("parser not found");
}
codecContext = avcodec_alloc_context3(codec);
if (!codecContext) {
throw std::runtime_error("Could not allocate codec context");
}
if (avcodec_open2(codecContext, codec, nullptr) < 0) {
throw std::runtime_error("Could not open codec");
}
// Initialize frame
frame = av_frame_alloc();
if (!frame) {
throw std::runtime_error("Could not allocate frame");
}
frameRgb = av_frame_alloc();
if (!frameRgb) {
throw std::runtime_error("Could not allocate frame");
}
av_image_alloc(frameRgb->data, frameRgb->linesize, outputWidth, outputHeight, AV_PIX_FMT_BGR24, 32);
inputPacket = av_packet_alloc();
if (!inputPacket) {
throw std::runtime_error("Could not allocate packet");
}
}
void decode() {
char buf[1024];
int ret;
ret = avcodec_send_packet(codecContext, inputPacket);
if (ret < 0) {
fprintf(stderr, "Error sending a packet for decoding\n");
exit(1);
}
while (ret >= 0) {
ret = avcodec_receive_frame(codecContext, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
return;
else if (ret < 0) {
fprintf(stderr, "Error during decoding\n");
exit(1);
}
snprintf(buf, sizeof(buf), "%s-%" PRId64 ".pgm", outputFilenamePrefix_.c_str(), codecContext->frame_num);
SwsContext* sws_ctx = NULL;
sws_ctx = sws_getContext(codecContext->width, codecContext->height, codecContext->pix_fmt, outputWidth,
outputHeight, AV_PIX_FMT_BGR24, SWS_BICUBIC, NULL, NULL, NULL);
if (doSave) {
pgm_save(frame->data[0], frame->linesize[0], frame->width, frame->height, buf);
}
sws_scale(sws_ctx, frame->data, frame->linesize, 0, codecContext->height, frameRgb->data,
frameRgb->linesize);
if (doShow) {
cv::Mat img(outputHeight, outputWidth, CV_8UC3, frameRgb->data[0]);
cv::imshow("Image", img);
cv::waitKey(1);
}
}
}
~H264Decoder() {
avformat_close_input(&formatContext);
avformat_free_context(formatContext);
avcodec_free_context(&codecContext);
av_frame_free(&frame);
av_frame_free(&frameRgb);
av_packet_free(&inputPacket);
av_freep(&frameRgb->data[0]);
}
void readAndDecode() {
FILE* f;
uint8_t inbuf[INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
uint8_t* data;
size_t data_size;
int ret;
int eof;
f = fopen(inputFilename_.c_str(), "rb");
if (!f) {
std::cout << "Error opening file" << std::endl;;
exit(1);
}
memset(inbuf + INBUF_SIZE, 0, AV_INPUT_BUFFER_PADDING_SIZE);
auto start = std::chrono::high_resolution_clock::now();
do {
/* read raw data from the input file */
data_size = fread(inbuf, 1, INBUF_SIZE, f);
if (ferror(f))
break;
eof = !data_size;
/* use the parser to split the data into frames */
data = inbuf;
while (data_size > 0 || eof) {
ret = av_parser_parse2(parser, codecContext, &inputPacket->data, &inputPacket->size, data, data_size,
AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
if (ret < 0) {
fprintf(stderr, "Error while parsing\n");
exit(1);
}
data += ret;
data_size -= ret;
if (inputPacket->size) {
decode();
} else if (eof) {
break;
}
}
} while (!eof);
auto diff = std::chrono::high_resolution_clock::now() - start;
std::cout << "Decoded " << codecContext->frame_num << " frames in "
<< std::chrono::duration_cast<std::chrono::milliseconds>(diff).count() << " ms "
<< std::chrono::duration_cast<std::chrono::milliseconds>(diff).count() / codecContext->frame_num
<< " ms/frame " << std::endl;
}
private:
bool doShow{false};
bool doSave{true};
const AVCodec* codec;
AVFormatContext* formatContext = nullptr;
AVCodecContext* codecContext = nullptr;
AVCodecParserContext* parser;
AVFrame* frame = nullptr;
AVFrame* frameRgb = nullptr;
AVPacket* inputPacket = nullptr;
int videoStreamIndex = -1;
std::string inputFilename_;
std::string outputFilenamePrefix_;
uint16_t outputHeight = 1280;
uint16_t outputWidth = 1632;
static void pgm_save(unsigned char* buf, int wrap, int xsize, int ysize, const char* filename) {
FILE* f = fopen(filename, "wb");
if (!f) {
std::cout << "Error opening file for saving PGM" << std::endl;
exit(1);
}
fprintf(f, "P5\n%d %d\n%d\n", xsize, ysize, 255);
for (int i = 0; i < ysize; i++)
fwrite(buf + i * wrap, 1, xsize, f);
fclose(f);
}
};
int main(int argc, char* argv[]) {
if (argc < 2) {
std::cout << "Please provide input file name as parameter" << std::endl;
exit(1);
}
std::string inputFilename = argv[1];
std::string outputFilenamePrefix = "C:\\tmp\\pics\\output-frame";
try {
H264Decoder decoder(inputFilename, outputFilenamePrefix, 1632, 1280, true, false);
decoder.readAndDecode();
} catch (const std::exception& e) {
std::cout << "Error: " << e.what() << std::endl;
return 1;
}
return 0;
}
评论
f = fopen(inputFilename_.c_str(), "rb");
-- 你没有检查是否失败。fopen