YUV to JPG encoding using ffmpeg - c++

I am trying to encode a YVU file and save it as jpg file. but i didn't understand the following
1.why the packet size is size*3.
av_new_packet(&pkt,size*3);`
2.In fread why we using size*3/2.
if(fread(buffer , 1, size*3/2, ptrInputFile)<=0)`
3.how they are filling data here
frame->data[0] = buffer;
frame->data[1] = buffer + siz;
frame->data[2] = buffer + siz*5/4;
code:
AVFormatContext *avFrameContext;
AVOutputFormat *avOutputFormat;
AVStream *avStream;
AVCodecContext *avCodecContext;
AVCodec *avCodec;
AVFrame *frame;
AVPacket pkt;
const char *output = "temp.jpg";
FILE *ptrInputFile;
const char *input = "cuc_view_480x272.yuv";
ptrInputFile = fopen(input ,"rb");
if(!ptrInputFile)
return -1;
avFrameContext = avformat_alloc_context();
avOutputFormat = av_guess_format("mjpeg", NULL, NULL);
if(!avOutputFormat)
return -1;
avFrameContext->oformat = avOutputFormat;
if(avio_open(&avFrameContext->pb ,output ,AVIO_FLAG_READ_WRITE)<0)
return -1;
avStream = avformat_new_stream(avFrameContext,NULL);
if(!avStream)
return -1;
avCodecContext = avStream->codec;
avCodecContext->codec_id = avOutputFormat->video_codec;
avCodecContext->codec_type = AVMEDIA_TYPE_VIDEO;
avCodecContext->pix_fmt = PIX_FMT_YUVJ420P;
avCodecContext->width = 480;
avCodecContext->height = 272;
avCodecContext->time_base.num = 1;
avCodecContext->time_base.den = 25;
avCodec = avcodec_find_encoder(avCodecContext->codec_id);
if(!avCodec)
return -1;
if(avcodec_open2(avCodecContext ,avCodec,NULL)<0)
return -1;
frame = av_frame_alloc();
int size = avpicture_get_size(PIX_FMT_YUVJ420P ,avCodecContext->width, avCodecContext->height);
uint8_t *buffer = (uint8_t*)av_malloc(size*sizeof(uint8_t));
avpicture_fill((AVPicture*)frame, buffer, avCodecContext->pix_fmt ,avCodecContext->width, avCodecContext->height);
//write header
avformat_write_header(avFrameContext, NULL);
int siz = avCodecContext->width*avCodecContext->height;
av_new_packet(&pkt,siz*3);
if(fread(buffer , 1, siz*3/2, ptrInputFile)<=0)
return -1;
frame->data[0] = buffer;
frame->data[1] = buffer + siz;
frame->data[2] = buffer + siz*5/4;

If you look at the format of the yuv420p (wiki) the data is formatted in the file as:
As there are 'siz' length of pixels in the image:
siz length of y value
siz/4 length of u value
siz/4 length of v value
So for question 2: we have siz*3/2 length of data to read.
For question 3: y starts at buffer+0, u starts at buffer+siz, and v starts at buffer+siz*5/4.
As for question 1: I am not sure if the data is converted to RGB. If it is converted then it would require 3 byte for each pixel. Additional code is required to see that.

I don't know much about the code you provided above. But if you are trying to encode yuv video and save as jpeg you can directly use the following command in ffmpeg
ffmpeg -f rawvideo -vcodec rawvideo -s <resolution> -r 25 -pix_fmt yuv420p -i video.yuv -preset ultrafast -qp 0 %d.jpg
replace <resolution> by resolution of your video eg. 1920x1080

Related

How can I Convert raw file to an audio file (.wav) by FFmpeg in c++

I want to covert mp4 format to wav format with different sample rate in my c++ application.
First of all I have extracted audio from mp4 file by ffmpeg in c++, then i have converted that to a raw file, but I down not know how can I convert raw file to a wav file with different sample rate.
How can I solve this?
#include "ffmpeg.h"
int decode_packet(int *got_frame, int cached)
{
int ret = 0;
int decoded = pkt.size;
*got_frame = 0;
if (pkt.stream_index == video_stream_idx) {
/* decode video frame */
ret = avcodec_decode_video2(video_dec_ctx, frame, got_frame, &pkt);
if (ret < 0) {
// fprintf(stderr, "Error decoding video frame (%s)\n", av_err2str(ret));
return ret;
}
if (*got_frame) {
if (frame->width != width || frame->height != height ||
frame->format != pix_fmt) {
/* To handle this change, one could call av_image_alloc again and
* decode the following frames into another rawvideo file. */
// fprintf(stderr, "Error: Width, height and pixel format have to be "
// "constant in a rawvideo file, but the width, height or "
// "pixel format of the input video changed:\n"
// "old: width = %d, height = %d, format = %s\n"
// "new: width = %d, height = %d, format = %s\n",
// width, height, av_get_pix_fmt_name(pix_fmt),
// frame->width, frame->height,
// av_get_pix_fmt_name(frame->format));
return -1;
}
printf("video_frame%s n:%d coded_n:%d\n",
cached ? "(cached)" : "",
video_frame_count++, frame->coded_picture_number);
/* copy decoded frame to destination buffer:
* this is required since rawvideo expects non aligned data */
av_image_copy(video_dst_data, video_dst_linesize,
(const uint8_t **)(frame->data), frame->linesize,
pix_fmt, width, height);
/* write to rawvideo file */
fwrite(video_dst_data[0], 1, video_dst_bufsize, video_dst_file);
}
} else if (pkt.stream_index == audio_stream_idx) {
/* decode audio frame */
ret = avcodec_decode_audio4(audio_dec_ctx, frame, got_frame, &pkt);
if (ret < 0) {
// fprintf(stderr, "Error decoding audio frame (%s)\n", av_err2str(ret));
return ret;
}
/* Some audio decoders decode only part of the packet, and have to be
* called again with the remainder of the packet data.
* Sample: fate-suite/lossless-audio/luckynight-partial.shn
* Also, some decoders might over-read the packet. */
decoded = FFMIN(ret, pkt.size);
if (*got_frame) {
size_t unpadded_linesize = frame->nb_samples * av_get_bytes_per_sample((AVSampleFormat)frame->format);
// printf("audio_frame%s n:%d nb_samples:%d pts:%s\n",
// cached ? "(cached)" : "",
// audio_frame_count++, frame->nb_samples,
// av_ts2timestr(frame->pts, &audio_dec_ctx->time_base));
/* Write the raw audio data samples of the first plane. This works
* fine for packed formats (e.g. AV_SAMPLE_FMT_S16). However,
* most audio decoders output planar audio, which uses a separate
* plane of audio samples for each channel (e.g. AV_SAMPLE_FMT_S16P).
* In other words, this code will write only the first audio channel
* in these cases.
* You should use libswresample or libavfilter to convert the frame
* to packed data. */
// fwrite(frame->extended_data[0], 1, unpadded_linesize, audio_dst_file);
//encode function
encode(cOut, frame, &pktout, audio_dst_file);
// av_init_packet(&pktout);
// pktout.data = NULL; // packet data will be allocated by the encoder
// pktout.size = 0;
// /* encode the samples */
// ret = avcodec_encode_audio2(cOut, &pktout, frame, &got_outputOut);
// if (ret < 0) {
// fprintf(stderr, "Error encoding audio frame\n");
// exit(1);
// }
// if (got_outputOut) {
// fwrite(pktout.data, 1, pktout.size, audio_dst_file);
// av_free_packet(&pktout);
// }
}
}
/* If we use frame reference counting, we own the data and need
* to de-reference it when we don't use it anymore */
if (*got_frame && refcount)
av_frame_unref(frame);
return decoded;
}
First you should use Libswresample to resample audio data.
Then you can save audio raw data with wav format.

Opencv: imdecode() is failed for images with non standard sizes

I met one feature for cv::imdecode() - when I tried to load the image with with non standard size, I always got the image with a NULL buffer and rows = 0.
For example, here is a .jpg with size 236x402:
My C++ code:
FILE* pf = NULL;
pf = _wfopen(strFileName, L"r");
if (!pf)
{
return false;
}
fread(pFileBuf, 1, nFileSize, pf);
fclose(pf);
cv::Mat matRaw = cv::Mat( 1, nFileSize, CV_8UC1, pFileBuf);
cv::Mat matImage = cv::imdecode(matRaw, CV_LOAD_IMAGE_COLOR);
delete [] pFileBuf;
SIZE size;
size.cx = matImage.cols;
size.cy = matImage.rows;
After run matImage.rows = 0 and matImage.data = NULL.
But for an jpeg image with 1280x720 it works well.
Ideas?
The bug in
pf = _wfopen(strFileName, L"r");
Must be
pf = _wfopen(strFileName, L"rb");
Thank for help

Create video using ffmpeg

I have 100 images(PNG) and I want to create a video using these images. I am using the ffmpeg library for this. Using command line I can create video easily. But how do I do it through coding?
Any help will be appreciated.
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#ifdef HAVE_AV_CONFIG_H
#undef HAVE_AV_CONFIG_H
#endif
extern "C"
{
#include "libavutil/imgutils.h"
#include "libavutil/opt.h"
#include "libavcodec/avcodec.h"
#include "libavutil/mathematics.h"
#include "libavutil/samplefmt.h"
}
#define INBUF_SIZE 4096
#define AUDIO_INBUF_SIZE 20480
#define AUDIO_REFILL_THRESH 4096
static void video_encode_example(const char *filename, int codec_id)
{
AVCodec *codec;
AVCodecContext *c= NULL;
int i, out_size, size, x, y, outbuf_size;
FILE *f;
AVFrame *picture;
uint8_t *outbuf;
int nrOfFramesPerSecond =25;
int nrOfSeconds =1;
printf("Video encoding\n");
// find the mpeg1 video encoder
codec = avcodec_find_encoder((CodecID) codec_id);
if (!codec) {
fprintf(stderr, "codec not found\n");
exit(1);
}
c = avcodec_alloc_context3(codec);
picture= avcodec_alloc_frame();
// put sample parameters
c->bit_rate = 400000;
// resolution must be a multiple of two
c->width = 352;
c->height = 288;
// frames per second
c->time_base= (AVRational){1,25};
c->gop_size = 10; //emit one intra frame every ten frames
c->max_b_frames=1;
c->pix_fmt = PIX_FMT_YUV420P;
if(codec_id == CODEC_ID_H264)
av_opt_set(c->priv_data, "preset", "slow", 0);
// open it
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "could not open codec\n");
exit(1);
}
f = fopen(filename, "wb");
if (!f) {
fprintf(stderr, "could not open %s\n", filename);
exit(1);
}
// alloc image and output buffer
outbuf_size = 100000;
outbuf = (uint8_t*) malloc(outbuf_size);
// the image can be allocated by any means and av_image_alloc() is
// * just the most convenient way if av_malloc() is to be used
av_image_alloc(picture->data, picture->linesize,
c->width, c->height, c->pix_fmt, 1);
// encode 1 second of video
int nrOfFramesTotal = nrOfFramesPerSecond * nrOfSeconds;
// encode 1 second of video
for(i=0;i < nrOfFramesTotal; i++) {
fflush(stdout);
// prepare a dummy image
for(y=0;y<c->height;y++) {
for(x=0;x<c->width;x++) {
picture->data[0][y * picture->linesize[0] + x] = x + y + i * 3;
}
}
// Cb and Cr
for(y=0;y<c->height/2;y++) {
for(x=0;x<c->width/2;x++) {
picture->data[1][y * picture->linesize[1] + x] = 128 + y + i * 2;
picture->data[2][y * picture->linesize[2] + x] = 64 + x + i * 5;
}
}
// encode the image
out_size = avcodec_encode_video(c, outbuf, outbuf_size, picture);
printf("encoding frame %3d (size=%5d)\n", i, out_size);
fwrite(outbuf, 1, out_size, f);
}
// get the delayed frames
for(; out_size; i++) {
fflush(stdout);
out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
printf("write frame %3d (size=%5d)\n", i, out_size);
fwrite(outbuf, 1, out_size, f);
}
// add sequence end code to have a real mpeg file
outbuf[0] = 0x00;
outbuf[1] = 0x00;
outbuf[2] = 0x01;
outbuf[3] = 0xb7;
fwrite(outbuf, 1, 4, f);
fclose(f);
free(outbuf);
avcodec_close(c);
// av_free(c);
// av_free(picture->data[0]);
// av_free(picture);
printf("\n");
}
int main(int argc, char **argv)
{
const char *filename;
avcodec_register_all();
if (argc <= 1) {
video_encode_example("/home/radix/Desktop/OpenCV/FFMPEG_Output/op89.png", AV_CODEC_ID_H264);
} else {
filename = argv[1];
}
return 0;
}
On searching everytime i m getting code similar to this.But i don't understood hot to use it for creating video from images.
The reason this comes up again and again is because you're using encoding_example.c as your reference. Please don't do that. The most fundamental mistake in this example is that it doesn't teach you the difference between codecs and containers. In fact, it ignored containers altogether.
What is a codec?
A codec is a method of compressing a media type. H264, for example, will compress raw video. Imagine a 1080p video frame, which is typically in YUV format with 4:2:0 chroma subsampling. Raw, this is 1080*1920*3/2 bytes per frame, i.e. ~3MB/f. For 60fps, this is 180MB/sec, or 1.44 gigabit/sec (gbps). That's a lot of data, so we compress it. At that resolution, you can get pretty quality at a few megabit/sec (mbps) for modern codecs, like H264, HEVC or VP9. For audio, codecs like AAC or Opus are popular.
What is a container?
A container takes video or audio (or subtitle) packets (compressed or uncompressed) and interleaves them for combined storage in a single output file. So rather than getting one file for video and one for audio, you get one file that interleaves packets for both. This allows effective seeking and indexing, it typically also allows adding metadata storage ("author", "title") and so on. Examples of popular containers are MOV, MP4 (which is really just mov), AVI, Ogg, Matroska or WebM (which is really just matroska).
(You can store video-only data in a file if you want. For H264, this is called "annexb" raw H264. This is actually what you were doing above. So why didn't it work? Well, you're ignoring "header" packets like the SPS and PPS. These are in avctx->extradata and need to be written before the first video packet. Using a container would take care of that for you, but you didn't, so it didn't work.)
How do you use a container in FFmpeg? See e.g. this post, particularly the sections calling functions like avformat_write_*() (basically anything that sounds like output). I'm happy to answer more specific questions, but I think the above post should clear out most confusion for you.

ffmpeg sws_scale YUV420p to RGBA not giving correct scaling result (c++)

I am trying to scale a decoded YUV420p frame(1018x700) via sws_scale to RGBA, I am saving data to a raw video file and then playing the raw video using ffplay to see the result.
Here is my code:
sws_ctx = sws_getContext(video_dec_ctx->width, video_dec_ctx->height,AV_PIX_FMT_YUV420P, video_dec_ctx->width, video_dec_ctx->height, AV_PIX_FMT_BGR32, SWS_LANCZOS | SWS_ACCURATE_RND, 0, 0, 0);
ret = avcodec_decode_video2(video_dec_ctx, yuvframe, got_frame, &pkt);
if (ret < 0) {
std::cout<<"Error in decoding"<<std::endl;
return ret;
}else{
//the source and destination heights and widths are the same
int sourceX = video_dec_ctx->width;
int sourceY = video_dec_ctx->height;
int destX = video_dec_ctx->width;
int destY = video_dec_ctx->height;
//declare destination frame
AVFrame avFrameRGB;
avFrameRGB.linesize[0] = destX * 4;
avFrameRGB.data[0] = (uint8_t*)malloc(avFrameRGB.linesize[0] * destY);
//scale the frame to avFrameRGB
sws_scale(sws_ctx, yuvframe->data, yuvframe->linesize, 0, yuvframe->height, avFrameRGB.data, avFrameRGB.linesize);
//write to file
fwrite(avFrameRGB.data[0], 1, video_dst_bufsize, video_dst_file);
}
Here is the result without scaling (i.e. in YUV420p Format)
Here is the after scaling while playing using ffplay (i.e. in RGBA format)
I run the ffplay using the following command ('video' is the raw video file)
ffplay -f rawvideo -pix_fmt bgr32 -video_size 1018x700 video
What should I fix to make the correct scaling happen to RGB32?
I found the solution, the problem here was that I was not using the correct buffer size to write to the file.
fwrite(avFrameRGB.data[0], 1, video_dst_bufsize, video_dst_file);
The variable video_dst_file was being taken from the return value of
video_dst_bufsize = av_image_alloc(yuvframe.data, yuvframe.linesize, destX, destY, AV_PIX_FMT_YUV420P, 1);
The solution is to get the return value from and use this in the fwrite statement:
video_dst_bufsize_RGB = av_image_alloc(avFrameRGB.data, avFrameRGB.linesize, destX, destY, AV_PIX_FMT_BGR32, 1);
fwrite(avFrameRGB.data[0], 1, video_dst_bufsize_RGB, video_dst_file);

Decoding mJPEG with libavcodec

I am creating video conference application. I have discovered that webcams (at least 3 I have) provide higher resolutions and framerates for mJPEG output format. So far I was using YUY2, converted in I420 for compression with X264. To transcode mJPEG to I420, I need to decode it first. I am trying to decode images from webcam with libavcodec. This is my code.
Initialization:
// mJPEG to I420 conversion
AVCodecContext * _transcoder = nullptr;
AVFrame * _outputFrame;
AVPacket _inputPacket;
avcodec_register_all();
_outputFrame = av_frame_alloc();
av_frame_unref(_outputFrame);
av_init_packet(&_inputPacket);
AVCodec * codecDecode = avcodec_find_decoder(AV_CODEC_ID_MJPEG);
_transcoder = avcodec_alloc_context3(codecDecode);
avcodec_get_context_defaults3(_transcoder, codecDecode);
_transcoder->flags2 |= CODEC_FLAG2_FAST;
_transcoder->pix_fmt = AVPixelFormat::AV_PIX_FMT_YUV420P;
_transcoder->width = width;
_transcoder->height = height;
avcodec_open2(_transcoder, codecDecode, nullptr);
Decoding:
_inputPacket.size = size;
_inputPacket.data = data;
int got_picture;
int decompressed_size = avcodec_decode_video2(_transcoder, _outputFrame, &got_picture, &_inputPacket);
But so far, what I am getting is a green screen. Where am I wrong?
UPD:
I have enabled libavcodec logging, but there are not warnings or errors.
Also I have discovered that _outputframe has AV_PIX_FMT_YUVJ422P as format and colorspace, which does not fit any on values in libavcodec's enums (the actual value is 156448160).
After suggestions from comments, I came up with working solution.
Initialization:
av_init_packet(&_inputPacket);
AVCodec * codecDecode = avcodec_find_decoder(AV_CODEC_ID_MJPEG);
_transcoder = avcodec_alloc_context3(codecDecode);
avcodec_get_context_defaults3(_transcoder, codecDecode);
avcodec_open2(_transcoder, codecDecode, nullptr);
// swscale contex init
mJPEGconvertCtx = sws_getContext(width, height, AV_PIX_FMT_YUVJ422P,
width, height, AV_PIX_FMT_YUV420P, SWS_FAST_BILINEAR, nullptr, nullptr, nullptr);
// x264 pic init
x264_picture_t _pic_in;
x264_picture_alloc(&_pic_in, X264_CSP_I420, width, height);
_pic_in.img.i_csp = X264_CSP_I420 ;
_pic_in.img.i_plane = 3;
_pic_in.img.i_stride[0] = width;
_pic_in.img.i_stride[1] = width / 2;
_pic_in.img.i_stride[2] = width / 2;
_pic_in.i_type = X264_TYPE_AUTO ;
_pic_in.i_qpplus1 = 0;
Transcoding:
_inputPacket.size = size;
_inputPacket.data = data;
int got_picture;
// decode
avcodec_decode_video2(_transcoder, _outputFrame, &got_picture, &_inputPacket);
// transform
sws_scale(_mJPEGconvertCtx, _outputFrame->data, _outputFrame->linesize, 0, height,
_pic_in.img.plane, _pic_in.img.i_stride);
Afterwards, _pic_in is used directly by x264. Image is fine, but the transcoding times are horrible for higher resolutions.