I have a system that allows me to capture a window and save it as a mp4, using ffmpeg. I use gdigrab to capture the frame, but it is fairly slow (60ms per av_read_frame calls)
I know I can capture a game using the DirectX API, but I don't know how to convert the resulting BMP to an AVFrame.
The following code is the DirectX code I use to capture the frame
extern void* pBits;
extern IDirect3DDevice9* g_pd3dDevice;
IDirect3DSurface9* pSurface;
g_pd3dDevice->CreateOffscreenPlainSurface(ScreenWidth, ScreenHeight,
D3DFMT_A8R8G8B8, D3DPOOL_SCRATCH,
&pSurface, NULL);
g_pd3dDevice->GetFrontBufferData(0, pSurface);
D3DLOCKED_RECT lockedRect;
pSurface->LockRect(&lockedRect,NULL,
D3DLOCK_NO_DIRTY_UPDATE|
D3DLOCK_NOSYSLOCK|D3DLOCK_READONLY)));
for( int i=0 ; i < ScreenHeight ; i++)
{
memcpy( (BYTE*) pBits + i * ScreenWidth * BITSPERPIXEL / 8 ,
(BYTE*) lockedRect.pBits + i* lockedRect.Pitch ,
ScreenWidth * BITSPERPIXEL / 8);
}
g_pSurface->UnlockRect();
pSurface->Release();
And here is my read loop :
{
while (1) {
if (av_read_frame(pFormatCtx, &packet) < 0 || exit)
break;
if (packet.stream_index == videoindex) {
// Decode video frame
av_packet_rescale_ts(&packet, { 1, std::stoi(pParser->GetVal("video-fps")) }, pCodecCtx->time_base);
avcodec_decode_video2(pCodecCtx, pFrame, &frameFinished, &packet);
if (frameFinished) {
pFrame->pts = i;
i++;
sws_scale(sws_ctx, (uint8_t const * const *)pFrame->data, pFrame->linesize, 0, pCodecCtx->height, pFrameRGB->data, pFrameRGB->linesize);
pFrameRGB->pts = pFrame->pts;
enc.encodeFrame(pFrameRGB);
}
// Free the packet that was allocated by av_read_frame
av_free_packet(&packet);
}
How can I create an AVFrame using the bmp I have, without using the av_read_frame ?
Related
I have been following a tutorial on how to use ffmpeg and SDL to make a simple video player with no audio (yet). While looking through the tutorial I realized it was out of date and many of the functions it used, for both ffmpeg and SDL, were deprecated. So I searched for an up-to-date solution and found a stackoverflow question answer that completed what the tutorial was missing.
However, it uses YUV420 which is of low quality. I want to implement YUV444 and after studying chroma-subsampling for a bit and looking at the different formats for YUV am confused as to how to implement it. From what I understand YUV420 is a quarter of the quality YUV444 is. YUV444 means every pixel has its own chroma sample and as such is more detailed while YUV420 means pixels are grouped together and have the same chroma sample and therefore is less detailed.
And from what I understand the different formats of YUV(420, 422, 444) are different in the way they order y, u, and v. All of this is a bit overwhelming because I haven't done much with codecs, conversions, etc. Any help would be much appreciated and if additional info is needed please let me know before downvoting.
Here is the code from the answer I mentioned concerning the conversion to YUV420:
texture = SDL_CreateTexture(
renderer,
SDL_PIXELFORMAT_YV12,
SDL_TEXTUREACCESS_STREAMING,
pCodecCtx->width,
pCodecCtx->height
);
if (!texture) {
fprintf(stderr, "SDL: could not create texture - exiting\n");
exit(1);
}
// initialize SWS context for software scaling
sws_ctx = sws_getContext(pCodecCtx->width, pCodecCtx->height,
pCodecCtx->pix_fmt, pCodecCtx->width, pCodecCtx->height,
AV_PIX_FMT_YUV420P,
SWS_BILINEAR,
NULL,
NULL,
NULL);
// set up YV12 pixel array (12 bits per pixel)
yPlaneSz = pCodecCtx->width * pCodecCtx->height;
uvPlaneSz = pCodecCtx->width * pCodecCtx->height / 4;
yPlane = (Uint8*)malloc(yPlaneSz);
uPlane = (Uint8*)malloc(uvPlaneSz);
vPlane = (Uint8*)malloc(uvPlaneSz);
if (!yPlane || !uPlane || !vPlane) {
fprintf(stderr, "Could not allocate pixel buffers - exiting\n");
exit(1);
}
uvPitch = pCodecCtx->width / 2;
while (av_read_frame(pFormatCtx, &packet) >= 0) {
// Is this a packet from the video stream?
if (packet.stream_index == videoStream) {
// Decode video frame
avcodec_decode_video2(pCodecCtx, pFrame, &frameFinished, &packet);
// Did we get a video frame?
if (frameFinished) {
AVPicture pict;
pict.data[0] = yPlane;
pict.data[1] = uPlane;
pict.data[2] = vPlane;
pict.linesize[0] = pCodecCtx->width;
pict.linesize[1] = uvPitch;
pict.linesize[2] = uvPitch;
// Convert the image into YUV format that SDL uses
sws_scale(sws_ctx, (uint8_t const * const *)pFrame->data,
pFrame->linesize, 0, pCodecCtx->height, pict.data,
pict.linesize);
SDL_UpdateYUVTexture(
texture,
NULL,
yPlane,
pCodecCtx->width,
uPlane,
uvPitch,
vPlane,
uvPitch
);
SDL_RenderClear(renderer);
SDL_RenderCopy(renderer, texture, NULL, NULL);
SDL_RenderPresent(renderer);
}
}
// Free the packet that was allocated by av_read_frame
av_free_packet(&packet);
SDL_PollEvent(&event);
switch (event.type) {
case SDL_QUIT:
SDL_DestroyTexture(texture);
SDL_DestroyRenderer(renderer);
SDL_DestroyWindow(screen);
SDL_Quit();
exit(0);
break;
default:
break;
}
}
// Free the YUV frame
av_frame_free(&pFrame);
free(yPlane);
free(uPlane);
free(vPlane);
// Close the codec
avcodec_close(pCodecCtx);
avcodec_close(pCodecCtxOrig);
// Close the video file
avformat_close_input(&pFormatCtx);
EDIT:
After more research I learned that in YUV420 is stored with all Y's first then a combination of U and V bytes one after another as illustrated by this image:
(source: wikimedia.org)
However I also learned that YUV444 is stored in the order U, Y, V and repeats like this picture shows:
I tried changing some things around in code:
// I changed SDL_PIXELFORMAT_YV12 to SDL_PIXELFORMAT_UYVY
// as to reflect the order of YUV444
texture = SDL_CreateTexture(
renderer,
SDL_PIXELFORMAT_UYVY,
SDL_TEXTUREACCESS_STREAMING,
pCodecCtx->width,
pCodecCtx->height
);
if (!texture) {
fprintf(stderr, "SDL: could not create texture - exiting\n");
exit(1);
}
// Changed AV_PIX_FMT_YUV420P to AV_PIX_FMT_YUV444P
// for rather obvious reasons
sws_ctx = sws_getContext(pCodecCtx->width, pCodecCtx->height,
pCodecCtx->pix_fmt, pCodecCtx->width, pCodecCtx->height,
AV_PIX_FMT_YUV444P,
SWS_BILINEAR,
NULL,
NULL,
NULL);
// There are as many Y, U and V bytes as pixels I just
// made yPlaneSz and uvPlaneSz equal to the number of pixels
yPlaneSz = pCodecCtx->width * pCodecCtx->height;
uvPlaneSz = pCodecCtx->width * pCodecCtx->height;
yPlane = (Uint8*)malloc(yPlaneSz);
uPlane = (Uint8*)malloc(uvPlaneSz);
vPlane = (Uint8*)malloc(uvPlaneSz);
if (!yPlane || !uPlane || !vPlane) {
fprintf(stderr, "Could not allocate pixel buffers - exiting\n");
exit(1);
}
uvPitch = pCodecCtx->width * 2;
while (av_read_frame(pFormatCtx, &packet) >= 0) {
// Is this a packet from the video stream?
if (packet.stream_index == videoStream) {
// Decode video frame
avcodec_decode_video2(pCodecCtx, pFrame, &frameFinished, &packet);
// Rearranged the order of the planes to reflect UYV order
// then set linesize to the number of Y, U and V bytes
// per row
if (frameFinished) {
AVPicture pict;
pict.data[0] = uPlane;
pict.data[1] = yPlane;
pict.data[2] = vPlane;
pict.linesize[0] = pCodecCtx->width;
pict.linesize[1] = pCodecCtx->width;
pict.linesize[2] = pCodecCtx->width;
// Convert the image into YUV format that SDL uses
sws_scale(sws_ctx, (uint8_t const * const *)pFrame->data,
pFrame->linesize, 0, pCodecCtx->height, pict.data,
pict.linesize);
SDL_UpdateYUVTexture(
texture,
NULL,
yPlane,
1,
uPlane,
uvPitch,
vPlane,
uvPitch
);
//.................................................
But now I get an access violation at the call to SDL_UpdateYUVTexture... I'm honestly not sure what's wrong. I think it may have to do with setting AVPicture pic's member data and linesize improperly but I'm not positive.
After many hours of scouring the web for possible answers I stumbled upon this post in which someone was asking about YUV444 support for packed or planar mode. The only current format I've found is AYUV which is packed.
The answer they got was a list of all the currently supported formats which did not include AYUV. Therefore SDL does not support YUV444.
The only solution is to use a different library that supports AYUV / YUV444.
I am trying to create a program that will capture a full screen directx application, look for a specific set of pixels on the screen and if it finds it then draw an image on the screen.
I have been able to set up the application to capture the screen the directx libraries using the code the answer for this question Capture screen using DirectX
In this example the code saves to the harddrive using the IWIC libraries. I would rather manipulate the pixels instead of saving it.
After I have captured the screen and have a LPBYTE of the entire screen pixels I am unsure how to crop it to the region I want and then being able to manipulate the pixel array. Is it just a multi dimensional byte array?
The way I think I should do it is
Capture screen to IWIC bitmap (done).
Convert IWIC bitmap to ID2D1 bitmap using ID2D1RenderTarget::CreateBitmapFromWicBitmap
Create new ID2D1::Bitmap to store partial image.
Copy region of the ID2D1 bitmap to a new bitmap using ID2D1::CopyFromBitmap.
Render back onto screen using ID2D1 .
Any help on any of this would be so much appreciated.
Here is a modified version of the original code that only captures a portion of the screen into a buffer, and also gives back the stride. Then it browses all the pixels, dumps their colors as a sample usage of the returned buffer.
In this sample, the buffer is allocated by the function, so you must free it once you've used it:
// sample usage
int main()
{
LONG left = 10;
LONG top = 10;
LONG width = 100;
LONG height = 100;
LPBYTE buffer;
UINT stride;
RECT rc = { left, top, left + width, top + height };
Direct3D9TakeScreenshot(D3DADAPTER_DEFAULT, &buffer, &stride, &rc);
// In 32bppPBGRA format, each pixel is represented by 4 bytes
// with one byte each for blue, green, red, and the alpha channel, in that order.
// But don't forget this is all modulo endianness ...
// So, on Intel architecture, if we read a pixel from memory
// as a DWORD, it's reversed (ARGB). The macros below handle that.
// browse every pixel by line
for (int h = 0; h < height; h++)
{
LPDWORD pixels = (LPDWORD)(buffer + h * stride);
for (int w = 0; w < width; w++)
{
DWORD pixel = pixels[w];
wprintf(L"#%02X#%02X#%02X#%02X\n", GetBGRAPixelAlpha(pixel), GetBGRAPixelRed(pixel), GetBGRAPixelGreen(pixel), GetBGRAPixelBlue(pixel));
}
}
// get pixel at 50, 50 in the buffer, as #ARGB
DWORD pixel = GetBGRAPixel(buffer, stride, 50, 50);
wprintf(L"#%02X#%02X#%02X#%02X\n", GetBGRAPixelAlpha(pixel), GetBGRAPixelRed(pixel), GetBGRAPixelGreen(pixel), GetBGRAPixelBlue(pixel));
SavePixelsToFile32bppPBGRA(width, height, stride, buffer, L"test.png", GUID_ContainerFormatPng);
LocalFree(buffer);
return 0;;
}
#define GetBGRAPixelBlue(p) (LOBYTE(p))
#define GetBGRAPixelGreen(p) (HIBYTE(p))
#define GetBGRAPixelRed(p) (LOBYTE(HIWORD(p)))
#define GetBGRAPixelAlpha(p) (HIBYTE(HIWORD(p)))
#define GetBGRAPixel(b,s,x,y) (((LPDWORD)(((LPBYTE)b) + y * s))[x])
int main()
HRESULT Direct3D9TakeScreenshot(UINT adapter, LPBYTE *pBuffer, UINT *pStride, const RECT *pInputRc = nullptr)
{
if (!pBuffer || !pStride) return E_INVALIDARG;
HRESULT hr = S_OK;
IDirect3D9 *d3d = nullptr;
IDirect3DDevice9 *device = nullptr;
IDirect3DSurface9 *surface = nullptr;
D3DPRESENT_PARAMETERS parameters = { 0 };
D3DDISPLAYMODE mode;
D3DLOCKED_RECT rc;
*pBuffer = NULL;
*pStride = 0;
// init D3D and get screen size
d3d = Direct3DCreate9(D3D_SDK_VERSION);
HRCHECK(d3d->GetAdapterDisplayMode(adapter, &mode));
LONG width = pInputRc ? (pInputRc->right - pInputRc->left) : mode.Width;
LONG height = pInputRc ? (pInputRc->bottom - pInputRc->top) : mode.Height;
parameters.Windowed = TRUE;
parameters.BackBufferCount = 1;
parameters.BackBufferHeight = height;
parameters.BackBufferWidth = width;
parameters.SwapEffect = D3DSWAPEFFECT_DISCARD;
parameters.hDeviceWindow = NULL;
// create device & capture surface (note it needs desktop size, not our capture size)
HRCHECK(d3d->CreateDevice(adapter, D3DDEVTYPE_HAL, NULL, D3DCREATE_SOFTWARE_VERTEXPROCESSING, ¶meters, &device));
HRCHECK(device->CreateOffscreenPlainSurface(mode.Width, mode.Height, D3DFMT_A8R8G8B8, D3DPOOL_SYSTEMMEM, &surface, nullptr));
// get pitch/stride to compute the required buffer size
HRCHECK(surface->LockRect(&rc, pInputRc, 0));
*pStride = rc.Pitch;
HRCHECK(surface->UnlockRect());
// allocate buffer
*pBuffer = (LPBYTE)LocalAlloc(0, *pStride * height);
if (!*pBuffer)
{
hr = E_OUTOFMEMORY;
goto cleanup;
}
// get the data
HRCHECK(device->GetFrontBufferData(0, surface));
// copy it into our buffer
HRCHECK(surface->LockRect(&rc, pInputRc, 0));
CopyMemory(*pBuffer, rc.pBits, rc.Pitch * height);
HRCHECK(surface->UnlockRect());
cleanup:
if (FAILED(hr))
{
if (*pBuffer)
{
LocalFree(*pBuffer);
*pBuffer = NULL;
}
*pStride = 0;
}
RELEASE(surface);
RELEASE(device);
RELEASE(d3d);
return hr;
}
I'm trying to decode a video file using ffmpeg, grab the AVFrame object, convert it to opencv mat object, do some processing then convert it back to AVFrame object and encode it back to a video file.
Well, the program can run, but it produces bad result.
I Keep getting errors like "top block unavailable for requested intra mode at 7 19", "error while decoding MB 7 19, bytestream 358", "concealing 294 DC, 294AC, 294 MV errors in P frame" etc.
And the result video got glithes all over it. like this,
I'm guessing it's because my AVFrame to Mat and Mat to AVFrame methods, and here they are
//unspecified function
temp_rgb_frame = avcodec_alloc_frame();
int numBytes = avpicture_get_size(PIX_FMT_RGB24, width, height);
uint8_t * frame2_buffer = (uint8_t *)av_malloc(numBytes * sizeof(uint8_t));
avpicture_fill((AVPicture*)temp_rgb_frame, frame2_buffer, PIX_FMT_RGB24, width, height);
void CoreProcessor::Mat2AVFrame(cv::Mat **input, AVFrame *output)
{
//create a AVPicture frame from the opencv Mat input image
avpicture_fill((AVPicture *)temp_rgb_frame,
(uint8_t *)(*input)->data,
AV_PIX_FMT_RGB24,
(*input)->cols,
(*input)->rows);
//convert the frame to the color space and pixel format specified in the sws context
sws_scale(
rgb_to_yuv_context,
temp_rgb_frame->data,
temp_rgb_frame->linesize,
0, height,
((AVPicture *)output)->data,
((AVPicture *)output)->linesize);
(*input)->release();
}
void CoreProcessor::AVFrame2Mat(AVFrame *pFrame, cv::Mat **mat)
{
sws_scale(
yuv_to_rgb_context,
((AVPicture*)pFrame)->data,
((AVPicture*)pFrame)->linesize,
0, height,
((AVPicture *)temp_rgb_frame)->data,
((AVPicture *)temp_rgb_frame)->linesize);
*mat = new cv::Mat(pFrame->height, pFrame->width, CV_8UC3, temp_rgb_frame->data[0]);
}
void CoreProcessor::process_frame(AVFrame *pFrame)
{
cv::Mat *mat = NULL;
AVFrame2Mat(pFrame, &mat);
Mat2AVFrame(&mat, pFrame);
}
Am I doing something wrong with the memory? Because if I remove the processing part, just decode and then encode the frame, the result is correct.
Well, it turns out I made a mistake at the initialization of temp_rgb_frame,if should be like this,
temp_rgb_frame = avcodec_alloc_frame();
int numBytes = avpicture_get_size(PIX_FMT_RGB24, width, height);
uint8_t * frame2_buffer = (uint8_t *)av_malloc(numBytes * sizeof(uint8_t));
avpicture_fill((AVPicture*)temp_rgb_frame, frame2_buffer, PIX_FMT_RGB24, width, height);
I have an image in OpenCV which I can view and save correctly, I want to take this image and pass it to FFMPEG so I can encode it, but when I save the jpg output from ffpmeg I get a empty image, which probably means I am not copying the data correctly over to AVFrame.
What am I doing wrong...
Any help is greatly appreciated.
This is how I set up the final_frame and yuv422 frame
final_frame = avcodec_alloc_frame();
int num_bytes = avpicture_get_size(PIX_FMT_BGR24, 1600, 720);
final_frame1_buffer = (uint8_t *)av_malloc(num_bytes*sizeof(uint8_t));
avpicture_fill((AVPicture*)final_frame, final_frame1_buffer, PIX_FMT_BGR24, 1600, 720);
yuv422_final_frame = avcodec_alloc_frame();
int yuv422_num_bytes = avpicture_get_size( PIX_FMT_YUV422P, 1600, 720 );
final_frame2_buffer = (uint8_t *)av_malloc(yuv422_num_bytes*sizeof(uint8_t));
avpicture_fill((AVPicture*)yuv422_final_frame, final_frame2_buffer, PIX_FMT_YUVJ422P, 1600, 720);
I have attached the code below
cv::imshow("output image", im3); <---------- Image shows correctly
cv::Mat rgb_frame;
cv::cvtColor( im3 , rgb_frame, CV_BGR2RGB ) ;
if (final_sws_ctx == NULL)
{
final_sws_ctx = sws_getContext(1600, 720,
AV_PIX_FMT_BGR24, 1600, 720,
AV_PIX_FMT_YUVJ422P, SWS_FAST_BILINEAR, 0, 0, 0);
}
imwrite( "rgbjpeg.jpg", rgb_frame ); <----- Image Saves correctly here too
avpicture_fill((AVPicture*)final_frame, rgb_frame.data, PIX_FMT_RGB24, 1600, 720);
sws_scale(final_sws_ctx, final_frame->data,
final_frame->linesize,
0, 720,
yuv422_final_frame->data,
yuv422_final_frame->linesize);
AVPacket encode_packet;
int got_output = 0;
av_init_packet(&encode_packet);
encode_packet.data = NULL;
encode_packet.size = 0;
int ret = avcodec_encode_video2(final_codec_context,
&encode_packet,
yuv422_final_frame,
&got_output);
if (got_output ) {
CString temp;
temp.Format( "test%u.jpg", counter);
FILE* outputFile = fopen(temp, "wb");
printf("Write frame (size=%5d)\n", encode_packet.size);
fwrite(encode_packet.data, 1, encode_packet.size, outputFile);
av_free_packet(&encode_packet);
fclose(outputFile);
counter++;
}
Try to explicitly set the stride value for the final_frame:
final_frame = avcodec_alloc_frame();
avcodec_get_frame_defaults(final_frame);
final_frame->format = PIX_FMT_RGB24;
final_frame->width = 1600;
final_frame->height = 720;
final_frame->data[0] = rgb_frame.data;
final_frame->linesize[0] = rgb_frame.step; // <<< stride
sws_scale(final_sws_ctx, final_frame->data, // etc...
I have been working with RGB->YUV420 conversion for sometime using the FFmpeg library. Already tried the sws_scale functionality but its not working well. Now, I have decided to convert each pixel individually, using colorspace conversion formulae. So, following is the code that gets me few frames and allows me to access individual R,G,B values of each pixel:
// Read frames and save first five frames to disk
i=0;
while((av_read_frame(pFormatCtx, &packet)>=0) && (i<5))
{
// Is this a packet from the video stream?
if(packet.stream_index==videoStreamIdx)
{
/// Decode video frame
avcodec_decode_video2(pCodecCtx, pFrame, &frameFinished, &packet);
// Did we get a video frame?
if(frameFinished)
{
i++;
sws_scale(img_convert_ctx, (const uint8_t * const *)pFrame->data,
pFrame->linesize, 0, pCodecCtx->height,
pFrameRGB->data, pFrameRGB->linesize);
int x, y, R, G, B;
uint8_t *p = pFrameRGB->data[0];
for(y = 0; y < h; y++)
{
for(x = 0; x < w; x++)
{
R = *p++;
G = *p++;
B = *p++;
printf(" %d-%d-%d ",R,G,B);
}
}
SaveFrame(pFrameRGB, pCodecCtx->width, pCodecCtx->height, i);
}
}
// Free the packet that was allocated by av_read_frame
av_free_packet(&packet);
}
I read online that to convert RGB->YUV420 or vice-versa, one should first convert to YUV444 format. So, its like: RGB->YUV444->YUV420. How do I implement this in C++?
Also, here is the SaveFrame() function used above. I guess this will also have to change a little since YUV420 stores data differently. How to take care of that?
void SaveFrame(AVFrame *pFrame, int width, int height, int iFrame)
{
FILE *pFile;
char szFilename[32];
int y;
// Open file
sprintf(szFilename, "frame%d.ppm", iFrame);
pFile=fopen(szFilename, "wb");
if(pFile==NULL)
return;
// Write header
fprintf(pFile, "P6\n%d %d\n255\n", width, height);
// Write pixel data
for(y=0; y<height; y++)
fwrite(pFrame->data[0]+y*pFrame->linesize[0], 1, width*3, pFile);
// Close file
fclose(pFile);
}
Can somebody please suggest? Many thanks!!!
void SaveFrameYUV420P(AVFrame *pFrame, int width, int height, int iFrame)
{
FILE *pFile;
char szFilename[32];
int y;
// Open file
sprintf(szFilename, "frame%d.yuv", iFrame);
pFile=fopen(szFilename, "wb");
if(pFile==NULL)
return;
// Write pixel data
fwrite(pFrame->data[0], 1, width*height, pFile);
fwrite(pFrame->data[1], 1, width*height/4, pFile);
fwrite(pFrame->data[2], 1, width*height/4, pFile);
// Close file
fclose(pFile);
}
On Windows, you can use irfanview to see frames saved this way. You open the frame as RAW, 24bpp format, provide width and height, and check the box "yuv420".