AccessViolationException when using C++ DLL from C#

AccessViolationException when using C++ DLL from C# - c++

I have a C++ DLL for use from C#. I have a function which takes a string passed to it, and I have those set on the C++ function parameters as const char * like so:
int __stdcall extract_all_frames(const char* szDestination, float scaleFactor)
The main body of this function is copied directly from a working FFmpeg example function so I'm almost certain the problem isn't there. I feel like the problem is in this modification I made to it:
//Open file
char szFilename[32];
sprintf_s(szFilename, sizeof(szFilename), "frame%d.ppm\0", iFrame);
// JOIN szFILENAME AND szDESTINATION
std::string buffer(szDestination, sizeof(szDestination));
buffer.append("\\");
buffer.append(szDestination);
Which is supposed to be a concatenated path and directory. I then pass buffer.c_str() into fopen_s(), which takes const char * not std::string. Whenever calling this function from C#, I get the following exception:
A first chance exception of type 'System.AccessViolationException' occurred in XRF FFmpeg Invoke Test.exe
Additional information: Attempted to read or write protected memory. This is often an indication that other memory is corrupt.
This is the complete code:
#include "stdafx.h"
#pragma comment (lib, "avcodec.lib")
#pragma comment (lib, "avformat.lib")
#pragma comment (lib, "avutil.lib")
#pragma comment (lib, "swscale.lib")
extern "C"
{
#include <libavcodec\avcodec.h>
#include <libavformat\avformat.h>
#include <libavutil\avutil.h>
#include <libswscale\swscale.h>
}
#include <string>
#include "Xrf.FFmpeg.hpp"
void save_frame(AVFrame* pFrame, int iFrame, const char* szDestination)
{
//Open file
char szFilename[32];
sprintf_s(szFilename, sizeof(szFilename), "frame%d.ppm\0", iFrame);
// JOIN szFILENAME AND szDESTINATION
std::string buffer(szDestination, sizeof(szDestination));
buffer.append("\\");
buffer.append(szDestination);
FILE* pFile;
errno_t openError = fopen_s(&pFile, buffer.c_str(), "wb");
if (pFile == NULL)
{
return;
}
//Write header
int width = pFrame->width;
int height = pFrame->height;
fprintf(pFile, "P6\n%d %d\n255\n", width, height);
//Write pixel data
for (int y = 0; y < height; y++)
{
fwrite(pFrame->data[0] + y * pFrame->linesize[0], 1, width * 3, pFile);
}
// Close file
fclose(pFile);
}
int __stdcall extract_all_frames(const char* szPath, const char* szDestination, float scaleFactor)
{
// Check if scaleFactor is valid
if ((scaleFactor != 0.f) &&
(scaleFactor > 3.f))
{
fprintf(stderr, "Xrf: Scale factor '%f' out of bounds!\nMust be greater than 0, and less then or equal to 3.0.\n", scaleFactor);
return -1;
}
// Register all formats and codecs
av_register_all();
AVFormatContext* pFormatCtx;
if (avformat_open_input(&pFormatCtx, szPath, nullptr, nullptr) != 0)
{
fprintf(stderr, "libavformat: Couldn't open file '%s'!\n", szPath);
return -1;
}
// Retrieve stream information
if (avformat_find_stream_info(pFormatCtx, nullptr) < 0)
{
fprintf(stderr, "libavformat: Unable to find stream information!\n");
return -1;
}
// Dump information about file onto standard error
av_dump_format(pFormatCtx, 0, szPath, 0);
// Find the first video stream
size_t i;
int videoStream = -1;
for (i = 0; i < pFormatCtx->nb_streams; i++)
{
if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
{
videoStream = i;
break;
}
}
if (videoStream == -1)
{
fprintf(stderr, "libavformat: No video stream found!\n");
return -1;
}
// Get a pointer to the codec context for the video stream
AVCodecContext* pCodecCtx = pFormatCtx->streams[videoStream]->codec;
// Scale the frame
int scaleHeight = static_cast<int>(floor(pCodecCtx->height * scaleFactor));
int scaleWidth = static_cast<int>(floor(pCodecCtx->width * scaleFactor));
//Check if frame sizes are valid (not 0, because that's dumb)
if (scaleHeight == 0 || scaleWidth == 0)
{
fprintf(stderr, "Xrf: Scale factor caused a zero value in either width or height!\n");
return -1;
}
// Find the decoder for the video stream
AVCodec* pCodec = avcodec_find_decoder(pCodecCtx->codec_id);
if (pCodec == NULL)
{
fprintf(stderr, "libavcodec: Unsupported codec!\n");
return -1; // Codec not found
}
// Open codec
AVDictionary* optionsDict = nullptr;
if (avcodec_open2(pCodecCtx, pCodec, &optionsDict) < 0)
{
fprintf(stderr, "libavcodec: Couldn't open codec '%s'!\n", pCodec->long_name);
return -1;
}
// Allocate video frame
AVFrame* pFrame = av_frame_alloc();
// Allocate an AVFrame structure
AVFrame* pFrameRGB = av_frame_alloc();
if (pFrameRGB == NULL)
{
fprintf(stderr, "libavformat: Unable to allocate a YUV->RGB resampling AVFrame!\n");
return -1;
}
// Determine required buffer size and allocate buffer
int numBytes = avpicture_get_size(PIX_FMT_RGB24, scaleWidth, scaleHeight);
uint8_t* buffer = static_cast <uint8_t *> (av_malloc(numBytes * sizeof(uint8_t)));
struct SwsContext* sws_ctx = sws_getContext(pCodecCtx->width,
pCodecCtx->height,
pCodecCtx->pix_fmt,
scaleWidth,
scaleHeight,
PIX_FMT_RGB24,
SWS_BILINEAR,
nullptr, nullptr, nullptr);
// Assign appropriate parts of buffer to image planes in pFrameRGB
// Note that pFrameRGB is an AVFrame, but AVFrame is a superset
// of AVPicture
avpicture_fill(reinterpret_cast <AVPicture *> (pFrameRGB),
buffer,
PIX_FMT_RGB24,
scaleWidth,
scaleHeight);
// Read frames and save first five frames to disk
AVPacket packet;
int frameFinished;
while (av_read_frame(pFormatCtx, &packet) >= 0)
{
// Is this a packet from the video stream?
if (packet.stream_index == videoStream)
{
// Decode video frame
avcodec_decode_video2(pCodecCtx, pFrame, &frameFinished, &packet);
// Did we get a video frame?
if (frameFinished)
{
// Convert the image from its native format to RGB
sws_scale(sws_ctx,
static_cast <uint8_t const * const *> (pFrame->data),
pFrame->linesize,
0,
pCodecCtx->height,
pFrameRGB->data,
pFrameRGB->linesize);
// Save the frame to disk
if (++i <= 5)
{
save_frame(pFrameRGB, i, szDestination);
}
}
}
// Free the packet that was allocated by av_read_frame
av_free_packet(&packet);
}
av_free(buffer); // Free the RGB image
av_free(pFrameRGB);
av_free(pFrame); // Free the YUV frame
avcodec_close(pCodecCtx); // Close the codec
avformat_close_input(&pFormatCtx); // Close the video file
return 0;
}
I don't know if the error is in my modification (most likely, I'm extremely new to C++), or the other code, as the exception only throws on the invocation line in C#, not the actual C++ line causing the problem.

This is wrong:
std::string buffer(szDestination, sizeof(szDestination));
szDestination is a pointer, thus sizeof(szDestination) will return the pointer size, in bytes, not the number of characters.
If szDestination is a null terminated string, use strlen or similar function to determine the number of characters. If it isn't null terminated, then you need to pass the number of bytes to copy as a parameter.
The better thing to do is when your DLL function is called:
int __stdcall extract_all_frames(const char* szPath, const char* szDestination, float scaleFactor)
take those pointers and immediately assign them to std::string. Then drop all usage of char* or const char* from there. There is no need for your helper functions to deal with "dumb" character pointers.
Example:
int __stdcall extract_all_frames(const char* szPath, const char* szDestination, float scaleFactor)
{
std::string sPath = szPath;
std::string sDestination = sDestination;
// From here, use sPath and sDestination
//...
}
// redefinition of save_frame
//...
void save_frame(AVFrame* pFrame, int iFrame, const std::string& szDestination)
{
//Open file
std::string buffer = "frame" + to_string(iFrame) + ".ppm\0";
buffer.append("\\");
buffer.append(szDestination);
//...
}

Related

Writing a 1 bit depth grayscale PNG with libpng results in incorrect image

So I am trying to use libpng to write png's from an array of unsigned char that are a bit depth of 1. Meaning for all bits, there is only black and white, as a gray scale image. I have managed to do this successfully for 8-bit depth gray scale png, but not 1 bit depth. Here is the code i have :
extern int write_png_bwsq(const char* filename,
int dimen,
const unsigned char *buffer,
char* title)
{
int yrow;
int dim_bytes;
int code = 1;
FILE *fp = NULL;
png_structp png_ptr = NULL;
png_infop info_ptr = NULL;
png_bytep row = NULL;
dim_bytes = (dimen * dimen) / 8;
// Open file for writing (binary mode)
fp = fopen(filename, "wb");
if (fp == NULL) {
fprintf(stderr, "PNG ERROR: Could not open file %s for writing\n", filename);
code = 0;
goto finalise;
}
// Initialize write structure
png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
if (png_ptr == NULL) {
fprintf(stderr, "PNG ERROR: Could not allocate PNG write struct\n");
code = 0;
goto finalise;
}
// Initialize info structure
info_ptr = png_create_info_struct(png_ptr);
if (info_ptr == NULL) {
fprintf(stderr, "PNG ERROR: Could not allocate PNG info struct\n");
code = 0;
goto finalise;
}
// Setup Exception handling
if (setjmp(png_jmpbuf(png_ptr))) {
fprintf(stderr, "PNG Error: Creating the PNG output failed.\n");
code = 0;
goto finalise;
}
png_init_io(png_ptr, fp);
png_set_IHDR(png_ptr, info_ptr, dimen, dimen,
1, PNG_COLOR_TYPE_GRAY, PNG_INTERLACE_NONE,
PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE);
// Sets the title
if (title != NULL) {
png_text title_text;
title_text.compression = PNG_TEXT_COMPRESSION_NONE;
title_text.key = "Title";
title_text.text = title;
png_set_text(png_ptr, info_ptr, &title_text, 1);
}
png_write_info(png_ptr, info_ptr);
row = (png_bytep) buffer;
// Write image data
for (yrow=0 ; yrow<dim_bytes ; yrow++) {
png_write_row(png_ptr, row);
++row;
}
// End write operation
png_write_end(png_ptr, NULL);
finalise:
if (fp != NULL) fclose(fp);
if (info_ptr != NULL) png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1);
if (png_ptr != NULL) png_destroy_write_struct(&png_ptr, (png_infopp)NULL);
return code;
}
Then, i have a separate file where i prepare my image
#include "write_pngs.h"
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#if CHAR_BIT != 8
# error "CHAR_BIT is not usable on this platform"
#endif
#define PIC_SIZE_DIM 1080
#define PIC_SIZE_BW ((PIC_SIZE_DIM * PIC_SIZE_DIM) / CHAR_BIT)
static unsigned char PIC_BITS[PIC_SIZE_BW] = {0};
static void __write_bits_pic(void)
{
size_t i = 1000;
for(;i < 140000;++i) {
PIC_BITS[i] = ((i + 76) >> 5) & 0xff;
}
}
int main(void) {
__write_bits_pic();
printf("Writing picture of %d bytes and %d bits\n", PIC_SIZE_BW, PIC_SIZE_BW * CHAR_BIT);
return !write_png_bwsq("bwpixs.png",
PIC_SIZE_DIM,
PIC_BITS,
"bwpixs");
}
This results in an incorrect image where its not only quite large for a png (about 5mb for only 1080 x 1080), but only the bottom right corner of the image is changed from black.
What am i doing wrong here ? Does libpng require any special steps for writing pngs that are only 1 in bit depth for gray scale i am not doing ?

You are calling png_write_row() way too many times. Your
for (yrow=0 ; yrow<dim_bytes ; yrow++) {
png_write_row(png_ptr, row);
++row;
}
should be something like:
for (yrow=0 ; yrow<dim_bytes ; yrow += dimen / 8) {
png_write_row(png_ptr, row);
row += dimen / 8;
}
in order to write a row of 1080 pixels (135 bytes). You were calling png_write_row() for every byte, as if the image is eight pixels wide. Which it isn't.

How to create a bitmap/image from Scan0?

I'm having trouble porting the following C# code to C++:
protected override void OnPaint(CefBrowser browser, CefPaintElementType type, CefRectangle[] dirtyRects
, System.IntPtr buffer, int width, int height)
{
if (isPainting == true)
return;
isPainting = true;
// Save the provided buffer (a bitmap image) as a PNG.
using (System.Drawing.Bitmap bitmap = new System.Drawing.Bitmap(width, height, width * 4, System.Drawing.Imaging.PixelFormat.Format32bppRgb, buffer))
{
bitmap.Save(#"LastOnPaint.png", System.Drawing.Imaging.ImageFormat.Png);
} // End Using bitmap
}
What it does:
Create an image from a WebSite/SVG as rendered by the latest version of Chromium embedded and save it as a file.
So this is the corresponding render-handler in C++:
void RenderHandler::OnPaint(
CefRefPtr<CefBrowser> browser,
CefRenderHandler::PaintElementType type,
const CefRenderHandler::RectList& dirtyRects,
const void* buffer, int width, int height
) {
// size_t len = sizeof(buffer) / sizeof(void*);
// printf("buffer length: %zu\n", len); // 1...
// Array size is probably: width*height * 4;
}
So I was looking into what C# does in the bitmap-constructor, which is the following:
public Bitmap(int width, int height, int stride, PixelFormat format, IntPtr scan0)
{
IntPtr bitmap = IntPtr.Zero;
int status = Gdip.GdipCreateBitmapFromScan0(width, height, stride, unchecked((int)format), new HandleRef(null, scan0), out bitmap);
Gdip.CheckStatus(status);
SetNativeImage(bitmap);
}
internal void SetNativeImage(IntPtr handle) {
if (handle == IntPtr.Zero)
throw new ArgumentException(SR.GetString(SR.NativeHandle0), "handle");
nativeImage = handle;
}
Which traces to
internal const string Gdiplus = "gdiplus.dll";
[DllImport(ExternDll.Gdiplus, SetLastError=true, ExactSpelling=true, CharSet=System.Runtime.InteropServices.CharSet.Unicode)] // 3 = Unicode
[ResourceExposure(ResourceScope.Machine)]
internal static extern int GdipCreateBitmapFromScan0(int width, int height, int stride, int format, HandleRef scan0, out IntPtr bitmap);
So I thought I could just call GdipCreateBitmapFromScan0 in gdibitmapflat and be almost finished
GpStatus WINGDIPAPI GdipCreateBitmapFromScan0(INT width
, INT height, INT stride, PixelFormat format
, BYTE* scan0, GpBitmap** bitmap)
So I gathered the necessary header-files for GDI, which was a horrible experience
#ifndef __BITMAPHELPER_H__
#define __BITMAPHELPER_H__
// #define WIN32_LEAN_AND_MEAN
#pragma warning(disable:4458)
#include <Windows.h>
#include <ObjIdl.h>
#include <minmax.h>
#include <gdiplus.h>
#include <wingdi.h>
#include <gdiplusbitmap.h>
#include <gdiplusflat.h>
using namespace Gdiplus;
#pragma comment (lib,"gdiplus.lib")
#pragma warning(default:4458)
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <cstdint>
#include <cstdbool>
#include <algorithm>
#include <memory>
And thought this would about do it
#include "BitmapHelper.h"
static void Test()
{
GpBitmap *bitmap = NULL;
GdipCreateBitmapFromScan0(100, 100, 0, PixelFormat32bppARGB, NULL, &bitmap); // create a bitmap object with specified width/height/color
// GpGraphics *graph;
// Image * syntaxTest = NULL;
//syntaxTest->FromFile(TEXT("d:\\abc.jpg"), true); // create an image object
// Bitmap::FromBITMAPINFO
// GpImage *image = NULL;
// Gdiplus::Image()
Bitmap *bmp = NULL;
// GdipLoadImageFromFile(TEXT("d:\\abc.jpg"), &image); // create an image object
// GdipGetImageGraphicsContext(bitmap, &graph); // create a graphic object via bitmap object
// GdipDrawImageI(graph, image, 100, 100); // draw image to this graphic object, it can be done
}
However, it turns out the compiler doesn't know GdipCreateBitmapFromScan0, although it's definitely inside #include <gdiplusflat.h>...
How to create a bitmap/image from Scan0 ?
Note:
While I am at it, I don't want to resort to C++.NET, and ideally not to the WinAPI either; because i'd like it to work on Linux too. And not to a monstrous dependency like SDL either.
So far, it looks like my possible alternatives are using this code:
https://codereview.stackexchange.com/questions/196084/read-and-write-bmp-file-in-c
which means I have to create the bitmap header myselfs.
Or I could use some code from ImageIO.
I can't quite belive that creating a simple bitmap on even a single operating-system is that hard...
Is there really no better (and portable) way to create a simple bitmap from a trivial array of pixel colors ?
And why does the compiler not find GdipCreateBitmapFromScan0 ?
If I had used LoadLibrary and GetProcAddress to invoke it instead of f*ing windows header files, I'd be about finished by now...
And why does #include <gdiplus.h> not include its own dependencies ?

Your looking at the internals of .NET have led you toward using a function that's not part of the documented, public interface of GDI+. It looks to me like that's the real cause of most of your problems.
What I think you probably want to do is start by creating a GdiPlus::Bitmap object from your pixels. It has a constructor that looks like it'll directly accept your data.
Once you've created the Bitmap object, you call its Save member function. Bitmap is publicly derived from Image, so you're basically dealing with the normal Image::Save to generate a PNG.
If you want to eliminate the dependency on Windows code, you might consider using (for one obvious possibility) libpng instead. This gives you quite a lot more control over the process, at the expense of being quite a bit more work to use (depending on what you want to do, probably on the order of a half dozen to a dozen lines of code rather than one or two).

So, after having done this in both GDI+ and raw C, I can safely say that it's actually faster, and not to mention considerably less problematic and less google-intensive just doing the image-handling without GDI/GDI+. Whoever implemented GDI+ has a major brain damage.
Since I haven't yet handled transparency properly, and not yet incorporated lodepng, I've added GDI+ as an optional extra option, for the time being.
// A program to read, write, and crop BMP image files.
#include "Bmp.h"
// Make a copy of a string on the heap.
// - Postcondition: the caller is responsible to free
// the memory for the string.
char *_string_duplicate(const char *string)
{
char *copy = (char*)malloc(sizeof(*copy) * (strlen(string) + 1));
if (copy == NULL)
{
// return "Not enough memory for error message";
const char* error_message = "Not enough memory for error message";
size_t len = strlen(error_message);
char* error = (char*)malloc(len * sizeof(char) + 1);
strcpy(error, error_message);
return error;
}
strcpy(copy, string);
return copy;
}
// Check condition and set error message.
bool _check(bool condition, char **error, const char *error_message)
{
bool is_valid = true;
if (!condition)
{
is_valid = false;
if (*error == NULL) // to avoid memory leaks
{
*error = _string_duplicate(error_message);
}
}
return is_valid;
}
// Write an image to an already open file.
// - Postcondition: it is the caller's responsibility to free the memory
// for the error message.
// - Return: true if and only if the operation succeeded.
bool write_bmp(FILE *fp, BMPImage *image, char **error)
{
// Write header
rewind(fp);
size_t num_read = fwrite(&image->header, sizeof(image->header), 1, fp);
if (!_check(num_read == 1, error, "Cannot write image"))
{
return false;
}
// Write image data
num_read = fwrite(image->data, image->header.image_size_bytes, 1, fp);
if (!_check(num_read == 1, error, "Cannot write image"))
{
return false;
}
return true;
}
// Free all memory referred to by the given BMPImage.
void free_bmp(BMPImage *image)
{
free(image->data);
free(image);
}
// Open file. In case of error, print message and exit.
FILE *_open_file(const char *filename, const char *mode)
{
FILE *fp = fopen(filename, mode);
if (fp == NULL)
{
fprintf(stderr, "Could not open file %s\n", filename);
exit(EXIT_FAILURE);
}
return fp;
}
// Close file and release memory.void _clean_up(FILE *fp, BMPImage *image, char **error)
void _clean_up(FILE *fp, BMPImage *image, char **error)
{
if (fp != NULL)
{
fclose(fp);
}
free_bmp(image);
free(*error);
}
// Print error message and clean up resources.
void _handle_error(char **error, FILE *fp, BMPImage *image)
{
fprintf(stderr, "ERROR: %s\n", *error);
_clean_up(fp, image, error);
exit(EXIT_FAILURE);
}
void write_image(const char *filename, BMPImage *image, char **error)
{
FILE *output_ptr = _open_file(filename, "wb");
if (!write_bmp(output_ptr, image, error))
{
_handle_error(error, output_ptr, image);
}
fflush(output_ptr);
fclose(output_ptr);
_clean_up(output_ptr, image, error);
}
// Return the size of an image row in bytes.
// - Precondition: the header must have the width of the image in pixels.
uint32_t computeImageSize(BMPHeader *bmp_header)
{
uint32_t bytes_per_pixel = bmp_header->bits_per_pixel / BITS_PER_BYTE;
uint32_t bytes_per_row_without_padding = bmp_header->width_px * bytes_per_pixel;
uint32_t padding = (4 - (bmp_header->width_px * bytes_per_pixel) % 4) % 4;
uint32_t row_size_bytes = bytes_per_row_without_padding + padding;
return row_size_bytes * bmp_header->height_px;
}
#ifdef USE_GDI
#pragma warning(disable:4189)
int GetEncoderClsid(const WCHAR* format, CLSID* pClsid)
{
UINT num = 0; // number of image encoders
UINT size = 0; // size of the image encoder array in bytes
Gdiplus::ImageCodecInfo* pImageCodecInfo = NULL;
Gdiplus::GetImageEncodersSize(&num, &size);
if (size == 0)
return -1; // Failure
pImageCodecInfo = (Gdiplus::ImageCodecInfo*)(malloc(size));
if (pImageCodecInfo == NULL)
return -1; // Failure
Gdiplus::GetImageEncoders(num, size, pImageCodecInfo);
for (UINT j = 0; j < num; ++j)
{
if (wcscmp(pImageCodecInfo[j].MimeType, format) == 0)
{
*pClsid = pImageCodecInfo[j].Clsid;
free(pImageCodecInfo);
return j; // Success
} // if (wcscmp(pImageCodecInfo[j].MimeType, format) == 0)
} // Next j
free(pImageCodecInfo);
return -1; // Failure
}
// https://github.com/lvandeve/lodepng
static bool notInitialized = true;
void WriteBitmapToFile(const char *filename, int width, int height, const void* buffer)
{
// HRESULT hr = CoInitializeEx(NULL, COINIT_APARTMENTTHREADED | COINIT_DISABLE_OLE1DDE);
if (notInitialized)
{
// https://learn.microsoft.com/en-us/windows/desktop/api/gdiplusinit/nf-gdiplusinit-gdiplusstartup
Gdiplus::GdiplusStartupInput gdiplusStartupInput;
ULONG_PTR gdiplusToken;
Gdiplus::Status isOk = Gdiplus::GdiplusStartup(&gdiplusToken, &gdiplusStartupInput, NULL);
if (isOk != Gdiplus::Status::Ok)
{
printf("Failed on GdiplusStartup\n");
}
notInitialized = false;
// defer
// GdiplusShutdown(gdiplusToken);
} // End if (notInitialized)
// https://learn.microsoft.com/en-us/windows/desktop/gdiplus/-gdiplus-constant-image-pixel-format-constants
Gdiplus::Bitmap* myBitmap = new Gdiplus::Bitmap(width, height, width*4, PixelFormat32bppARGB, (BYTE*)buffer);
// myBitmap->RotateFlip(Gdiplus::Rotate180FlipY);
CLSID pngClsid;
// int result = GetEncoderClsid(L"image/tiff", &tiffClsid);
int result = GetEncoderClsid(L"image/png", &pngClsid);
printf("End GetEncoderClsid:\n");
if (result == -1)
printf("Error: GetEncoderClsid\n");
// throw std::runtime_error("Bitmap::Save");
// if (Ok != myBitmap->Save(L"D\foobartest.png", &pngClsid)) printf("Error: Bitmap::Save");
// WTF ? I guess a standard C/C++-stream would have been too simple ?
IStream* oStream = nullptr;
if (CreateStreamOnHGlobal(NULL, TRUE, (LPSTREAM*)&oStream) != S_OK)
printf("Error on creating an empty IStream\n");
Gdiplus::EncoderParameters encoderParameters;
encoderParameters.Count = 1;
encoderParameters.Parameter[0].Guid = Gdiplus::EncoderQuality;
encoderParameters.Parameter[0].Type = Gdiplus::EncoderParameterValueTypeLong;
encoderParameters.Parameter[0].NumberOfValues = 1;
ULONG quality = 100;
encoderParameters.Parameter[0].Value = &quality;
// https://learn.microsoft.com/en-us/windows/desktop/api/gdiplusheaders/nf-gdiplusheaders-image-save(inistream_inconstclsid_inconstencoderparameters)
if (Gdiplus::Status::Ok != myBitmap->Save(oStream, &pngClsid, &encoderParameters))
printf("Error: Bitmap::Save\n");
// throw std::runtime_error("Bitmap::Save");
ULARGE_INTEGER ulnSize;
LARGE_INTEGER lnOffset;
lnOffset.QuadPart = 0;
oStream->Seek(lnOffset, STREAM_SEEK_END, &ulnSize);
oStream->Seek(lnOffset, STREAM_SEEK_SET, NULL);
uint8_t *pBuff = new uint8_t[(unsigned int)ulnSize.QuadPart];
ULONG ulBytesRead;
oStream->Read(pBuff, (ULONG)ulnSize.QuadPart, &ulBytesRead);
FILE *output_ptr = _open_file(filename, "wb");
fwrite((void*)pBuff, sizeof(uint8_t), (unsigned int)ulnSize.QuadPart, output_ptr);
fflush(output_ptr);
fclose(output_ptr);
oStream->Release();
delete pBuff;
delete myBitmap;
// https://renenyffenegger.ch/notes/development/Base64/Encoding-and-decoding-base-64-with-cpp
// std::string rotated_string = base64_encode((const unsigned char*)pBuff, ulnSize.QuadPart);
}
#pragma warning(default:4189)
#else
// TODO: PNG-Encoder
// https://github.com/lvandeve/lodepng
// https://lodev.org/lodepng/
BMPImage * CreateBitmapFromScan0(int32_t w, int32_t h, uint8_t* scan0)
{
BMPImage *new_image = (BMPImage *)malloc(sizeof(*new_image));
BMPHeader *header = (BMPHeader *)malloc(sizeof(*header));
new_image->header = *header;
new_image->header.type = MAGIC_VALUE;
new_image->header.bits_per_pixel = BITS_PER_PIXEL;
new_image->header.width_px = w;
new_image->header.height_px = h;
new_image->header.image_size_bytes = computeImageSize(&new_image->header);
new_image->header.size = BMP_HEADER_SIZE + new_image->header.image_size_bytes;
new_image->header.dib_header_size = DIB_HEADER_SIZE;
new_image->header.offset = (uint32_t) sizeof(BMPHeader);
new_image->header.num_planes = 1;
new_image->header.compression = 0;
new_image->header.reserved1 = 0;
new_image->header.reserved2 = 0;
new_image->header.num_colors = 0;
new_image->header.important_colors = 0;
new_image->header.x_resolution_ppm = 3780; // image->header.x_resolution_ppm;
new_image->header.y_resolution_ppm = 3780; // image->header.y_resolution_ppm;
new_image->data = (uint8_t*)malloc(sizeof(*new_image->data) * new_image->header.image_size_bytes);
memcpy(new_image->data, scan0, new_image->header.image_size_bytes);
return new_image;
}
void WriteBitmapToFile(const char *filename, int width, int height, const void* buffer)
{
BMPImage * image = CreateBitmapFromScan0((int32_t)width, (int32_t)height, (uint8_t*)buffer);
char *error = NULL;
write_image(filename, image, &error);
}
#endif
Header:
#ifndef BITMAPLION_BITMAPINFORMATION_H
#define BITMAPLION_BITMAPINFORMATION_H
#ifdef __cplusplus
// #include <iostream>
// #include <fstream>
#include <cstdio>
#include <cstdlib>
#include <cstdint>
#include <cstring>
#else
#include <stdio.h>
#include <stdlib.h> // for malloc
#include <stdint.h>
#include <stdbool.h>
#include <string.h> // for strlen, strcopy
#endif
#ifdef __linux__
//linux specific code goes here
#elif _WIN32
// windows specific code goes here
#pragma warning(disable:4458)
#include <Windows.h>
#include <ObjIdl.h>
#include <minmax.h>
#include <gdiplus.h>
// #include <gdiplusheaders.h>
// #include <wingdi.h>
// #include <gdiplusbitmap.h>
// #include <gdiplusflat.h>
// #include <Gdipluspixelformats.h>
#pragma comment (lib,"gdiplus.lib")
// using namespace Gdiplus;
#pragma warning(default:4458)
#else
#endif
#define BMP_HEADER_SIZE 54
#define DIB_HEADER_SIZE 40
// Correct values for the header
#define MAGIC_VALUE 0x4D42
#define NUM_PLANE 1
#define COMPRESSION 0
#define NUM_COLORS 0
#define IMPORTANT_COLORS 0
#define BITS_PER_BYTE 8
// #define BITS_PER_PIXEL 24
#define BITS_PER_PIXEL 32
#ifdef _MSC_VER
#pragma pack(push) // save the original data alignment
#pragma pack(1) // Set data alignment to 1 byte boundary
#endif
typedef struct
#ifndef _MSC_VER
__attribute__((packed))
#endif
{
uint16_t type; // Magic identifier: 0x4d42
uint32_t size; // File size in bytes
uint16_t reserved1; // Not used
uint16_t reserved2; // Not used
uint32_t offset; // Offset to image data in bytes from beginning of file
uint32_t dib_header_size; // DIB Header size in bytes
int32_t width_px; // Width of the image
int32_t height_px; // Height of image
uint16_t num_planes; // Number of color planes
uint16_t bits_per_pixel; // Bits per pixel
uint32_t compression; // Compression type
uint32_t image_size_bytes; // Image size in bytes
int32_t x_resolution_ppm; // Pixels per meter
int32_t y_resolution_ppm; // Pixels per meter
uint32_t num_colors; // Number of colors
uint32_t important_colors; // Important colors
} BMPHeader;
#ifdef _MSC_VER
#pragma pack(pop) // restore the previous pack setting
#endif
typedef struct {
BMPHeader header;
// unsigned char* data;
// It is more informative and will force a necessary compiler error
// on a rare machine with 16-bit char.
uint8_t* data;
} BMPImage;
// #define USE_GDI true
#ifndef USE_GDI
BMPImage * CreateBitmapFromScan0(int32_t w, int32_t h, uint8_t* scan0);
#endif
void WriteBitmapToFile(const char *filename, int width, int height, const void* buffer);
#endif //BITMAPLION_BITMAPINFORMATION_H

FFmpeg: Parallel encoding with custom thread pool

One of the things I'm trying to achieve is parallel encoding via FFmpeg's c API. This looks to work out of the box quite nicely; however, I've changed the goal posts slightly:
In an existing application, I already have a thread pool at hand. Instead of using another thread pool via FFmpeg, I would like reuse the existing thread pool in my application. Having studied the latest FFmpeg trunk docs, it very much looks possible.
Using some FFmpeg sample code, I've created a sample application to demonstrate what I'm trying to achieve (see below). The sample app generates a video-only mpeg2 ts using the mp2v codec.
The problem I'm experiencing is that the custom 'thread_execute' or 'thread_execute2' are never invoked. This is despite the fact that the codec appears to indicate that threading is supported. Please be aware that I have not yet plumbed in the thread pool just yet. My first goal is for it to call the custom function pointer.
I've tried to get assistance on the FFmpeg mailing lists but to no avail.
#include <iostream>
#include <thread>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <cstring>
#include <future>
extern "C"
{
#include <libavutil/avassert.h>
#include <libavutil/channel_layout.h>
#include <libavutil/opt.h>
#include <libavutil/timestamp.h>
#include <libavformat/avformat.h>
//#include <libswscale/swscale.h>
#include <libswresample/swresample.h>
}
#define STREAM_DURATION 1000.0
#define STREAM_FRAME_RATE 25 /* 25 images/s */
#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P /* default pix_fmt */
#define SCALE_FLAGS SWS_BICUBIC
// a wrapper around a single output AVStream
typedef struct OutputStream {
AVStream *st;
AVCodecContext *enc;
/* pts of the next frame that will be generated */
int64_t next_pts;
int samples_count;
AVFrame *frame;
AVFrame *tmp_frame;
float t, tincr, tincr2;
struct SwsContext *sws_ctx;
struct SwrContext *swr_ctx;
} OutputStream;
/////////////////////////////////////////////////////////////////////////////
// The ffmpeg variation raises compiler warnings.
char *cb_av_ts2str(char *buf, int64_t ts)
{
std::memset(buf,0,AV_TS_MAX_STRING_SIZE);
return av_ts_make_string(buf,ts);
}
/////////////////////////////////////////////////////////////////////////////
// The ffmpeg variation raises compiler warnings.
char *cb_av_ts2timestr(char *buf, int64_t ts, AVRational *tb)
{
std::memset(buf,0,sizeof(AV_TS_MAX_STRING_SIZE));
return av_ts_make_time_string(buf,ts,tb);
}
/////////////////////////////////////////////////////////////////////////////
// The ffmpeg variation raises compiler warnings.
char *cb_av_err2str(char *errbuf, size_t errbuf_size, int errnum)
{
std::memset(errbuf,0,errbuf_size);
return av_make_error_string(errbuf,errbuf_size,errnum);
}
int thread_execute(AVCodecContext* s, int (*func)(AVCodecContext *c2, void *arg2), void* arg, int* ret, int count, int size)
{
// Do it all serially for now
std::cout << "thread_execute" << std::endl;
for (int k = 0; k < count; ++k)
{
ret[k] = func(s, arg);
}
return 0;
}
int thread_execute2(AVCodecContext* s, int (*func)(AVCodecContext* c2, void* arg2, int, int), void* arg, int* ret, int count)
{
// Do it all serially for now
std::cout << "thread_execute2" << std::endl;
for (int k = 0; k < count; ++k)
{
ret[k] = func(s, arg, k, count);
}
return 0;
}
static void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt)
{
char s[AV_TS_MAX_STRING_SIZE];
AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;
printf("pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
cb_av_ts2str(s,pkt->pts), cb_av_ts2timestr(s,pkt->pts, time_base),
cb_av_ts2str(s,pkt->dts), cb_av_ts2timestr(s,pkt->dts, time_base),
cb_av_ts2str(s,pkt->duration), cb_av_ts2timestr(s,pkt->duration, time_base),
pkt->stream_index);
}
static int write_frame(AVFormatContext *fmt_ctx, const AVRational *time_base, AVStream *st, AVPacket *pkt)
{
/* rescale output packet timestamp values from codec to stream timebase */
av_packet_rescale_ts(pkt, *time_base, st->time_base);
pkt->stream_index = st->index;
/* Write the compressed frame to the media file. */
log_packet(fmt_ctx, pkt);
return av_interleaved_write_frame(fmt_ctx, pkt);
}
/* Add an output stream. */
static void add_stream(OutputStream *ost, AVFormatContext *oc,
AVCodec **codec,
enum AVCodecID codec_id)
{
AVCodecContext *c;
int i;
/* find the encoder */
*codec = avcodec_find_encoder(codec_id);
if (!(*codec)) {
fprintf(stderr, "Could not find encoder for '%s'\n",
avcodec_get_name(codec_id));
exit(1);
}
ost->st = avformat_new_stream(oc, NULL);
if (!ost->st) {
fprintf(stderr, "Could not allocate stream\n");
exit(1);
}
ost->st->id = oc->nb_streams-1;
c = avcodec_alloc_context3(*codec);
if (!c) {
fprintf(stderr, "Could not alloc an encoding context\n");
exit(1);
}
ost->enc = c;
switch ((*codec)->type)
{
case AVMEDIA_TYPE_AUDIO:
c->sample_fmt = (*codec)->sample_fmts ?
(*codec)->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
c->bit_rate = 64000;
c->sample_rate = 44100;
if ((*codec)->supported_samplerates) {
c->sample_rate = (*codec)->supported_samplerates[0];
for (i = 0; (*codec)->supported_samplerates[i]; i++) {
if ((*codec)->supported_samplerates[i] == 44100)
c->sample_rate = 44100;
}
}
c->channels = av_get_channel_layout_nb_channels(c->channel_layout);
c->channel_layout = AV_CH_LAYOUT_STEREO;
if ((*codec)->channel_layouts) {
c->channel_layout = (*codec)->channel_layouts[0];
for (i = 0; (*codec)->channel_layouts[i]; i++) {
if ((*codec)->channel_layouts[i] == AV_CH_LAYOUT_STEREO)
c->channel_layout = AV_CH_LAYOUT_STEREO;
}
}
c->channels = av_get_channel_layout_nb_channels(c->channel_layout);
ost->st->time_base = (AVRational){ 1, c->sample_rate };
break;
case AVMEDIA_TYPE_VIDEO:
c->codec_id = codec_id;
c->bit_rate = 400000;
/* Resolution must be a multiple of two. */
c->width = 352;
c->height = 288;
/* timebase: This is the fundamental unit of time (in seconds) in terms
* of which frame timestamps are represented. For fixed-fps content,
* timebase should be 1/framerate and timestamp increments should be
* identical to 1. */
ost->st->time_base = (AVRational){ 1, STREAM_FRAME_RATE };
c->time_base = ost->st->time_base;
c->gop_size = 12; /* emit one intra frame every twelve frames at most */
c->pix_fmt = STREAM_PIX_FMT;
if (c->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
/* just for testing, we also add B-frames */
c->max_b_frames = 2;
}
if (c->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
/* Needed to avoid using macroblocks in which some coeffs overflow.
* This does not happen with normal video, it just happens here as
* the motion of the chroma plane does not match the luma plane. */
c->mb_decision = 2;
}
break;
default:
break;
}
if (c->codec->capabilities & AV_CODEC_CAP_FRAME_THREADS ||
c->codec->capabilities & AV_CODEC_CAP_SLICE_THREADS)
{
if (c->codec->capabilities & AV_CODEC_CAP_FRAME_THREADS)
{
c->thread_type = FF_THREAD_FRAME;
}
if (c->codec->capabilities & AV_CODEC_CAP_SLICE_THREADS)
{
c->thread_type = FF_THREAD_SLICE;
}
c->execute = &thread_execute;
c->execute2 = &thread_execute2;
c->thread_count = 4;
// NOTE: Testing opaque.
c->opaque = (void*)0xff;
}
/* Some formats want stream headers to be separate. */
if (oc->oformat->flags & AVFMT_GLOBALHEADER)
c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
/**************************************************************/
/* video output */
static AVFrame *alloc_picture(enum AVPixelFormat pix_fmt, int width, int height)
{
AVFrame *picture;
int ret;
picture = av_frame_alloc();
if (!picture)
return NULL;
picture->format = pix_fmt;
picture->width = width;
picture->height = height;
/* allocate the buffers for the frame data */
ret = av_frame_get_buffer(picture, 32);
if (ret < 0) {
fprintf(stderr, "Could not allocate frame data.\n");
exit(1);
}
return picture;
}
static void open_video(AVFormatContext *oc, AVCodec *codec, OutputStream *ost, AVDictionary *opt_arg)
{
int ret;
AVCodecContext *c = ost->enc;
//AVDictionary *opt = NULL;
//av_dict_copy(&opt, opt_arg, 0);
/* open the codec */
ret = avcodec_open2(c, codec, NULL);
//av_dict_free(&opt);
if (ret < 0) {
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Could not open video codec: %s\n", cb_av_err2str(s,AV_ERROR_MAX_STRING_SIZE,ret));
exit(1);
}
/* allocate and init a re-usable frame */
ost->frame = alloc_picture(c->pix_fmt, c->width, c->height);
if (!ost->frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
/* If the output format is not YUV420P, then a temporary YUV420P
* picture is needed too. It is then converted to the required
* output format. */
ost->tmp_frame = NULL;
if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
ost->tmp_frame = alloc_picture(AV_PIX_FMT_YUV420P, c->width, c->height);
if (!ost->tmp_frame) {
fprintf(stderr, "Could not allocate temporary picture\n");
exit(1);
}
}
/* copy the stream parameters to the muxer */
ret = avcodec_parameters_from_context(ost->st->codecpar, c);
if (ret < 0) {
fprintf(stderr, "Could not copy the stream parameters\n");
exit(1);
}
}
/* Prepare a dummy image. */
static void fill_yuv_image(AVFrame *pict, int frame_index,
int width, int height)
{
int x, y, i;
i = frame_index;
/* Y */
for (y = 0; y < height; y++)
for (x = 0; x < width; x++)
pict->data[0][y * pict->linesize[0] + x] = x + y + i * 3;
/* Cb and Cr */
for (y = 0; y < height / 2; y++) {
for (x = 0; x < width / 2; x++) {
pict->data[1][y * pict->linesize[1] + x] = 128 + y + i * 2;
pict->data[2][y * pict->linesize[2] + x] = 64 + x + i * 5;
}
}
}
static AVFrame *get_video_frame(OutputStream *ost)
{
AVCodecContext *c = ost->enc;
/* check if we want to generate more frames */
if (av_compare_ts(ost->next_pts, c->time_base,
STREAM_DURATION, (AVRational){ 1, 1 }) >= 0)
return NULL;
/* when we pass a frame to the encoder, it may keep a reference to it
* internally; make sure we do not overwrite it here */
if (av_frame_make_writable(ost->frame) < 0)
exit(1);
if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
/* as we only generate a YUV420P picture, we must convert it
* to the codec pixel format if needed */
/*if (!ost->sws_ctx) {
ost->sws_ctx = sws_getContext(c->width, c->height,
AV_PIX_FMT_YUV420P,
c->width, c->height,
c->pix_fmt,
SCALE_FLAGS, NULL, NULL, NULL);
if (!ost->sws_ctx) {
fprintf(stderr,
"Could not initialize the conversion context\n");
exit(1);
}
}
fill_yuv_image(ost->tmp_frame, ost->next_pts, c->width, c->height);
sws_scale(ost->sws_ctx,
(const uint8_t * const *)ost->tmp_frame->data, ost->tmp_frame->linesize,
0, c->height, ost->frame->data, ost->frame->linesize);*/
} else {
fill_yuv_image(ost->frame, ost->next_pts, c->width, c->height);
}
ost->frame->pts = ost->next_pts++;
return ost->frame;
}
/*
* encode one video frame and send it to the muxer
* return 1 when encoding is finished, 0 otherwise
*/
static int write_video_frame(AVFormatContext *oc, OutputStream *ost)
{
int ret;
AVCodecContext *c;
AVFrame *frame;
int got_packet = 0;
AVPacket pkt = { 0 };
c = ost->enc;
frame = get_video_frame(ost);
if (frame)
{
ret = avcodec_send_frame(ost->enc, frame);
if (ret < 0)
{
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Error encoding video frame: %s\n", cb_av_err2str(s, AV_ERROR_MAX_STRING_SIZE, ret));
exit(1);
}
}
av_init_packet(&pkt);
ret = avcodec_receive_packet(ost->enc,&pkt);
if (ret < 0)
{
if (ret == AVERROR(EAGAIN)) { ret = 0; }
else
{
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Error receiving packet: %s\n", cb_av_err2str(s,AV_ERROR_MAX_STRING_SIZE,ret));
exit(1);
}
}
else
{
got_packet = 1;
ret = write_frame(oc, &c->time_base, ost->st, &pkt);
}
if (ret < 0) {
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Error while writing video frame: %s\n", cb_av_err2str(s,AV_ERROR_MAX_STRING_SIZE,ret));
exit(1);
}
return (frame || got_packet) ? 0 : 1;
}
static void close_stream(AVFormatContext *oc, OutputStream *ost)
{
avcodec_free_context(&ost->enc);
av_frame_free(&ost->frame);
av_frame_free(&ost->tmp_frame);
//sws_freeContext(ost->sws_ctx);
//swr_free(&ost->swr_ctx);
}
/**************************************************************/
/* media file output */
int main(int argc, char **argv)
{
OutputStream video_st = { 0 }, audio_st = { 0 };
const char *filename;
AVOutputFormat *fmt;
AVFormatContext *oc;
AVCodec /**audio_codec,*/ *video_codec;
int ret;
int have_video = 0, have_audio = 0;
int encode_video = 0, encode_audio = 0;
AVDictionary *opt = NULL;
int i;
/* Initialize libavcodec, and register all codecs and formats. */
av_register_all();
avformat_network_init();
if (argc < 2) {
printf("usage: %s output_file\n"
"API example program to output a media file with libavformat.\n"
"This program generates a synthetic audio and video stream, encodes and\n"
"muxes them into a file named output_file.\n"
"The output format is automatically guessed according to the file extension.\n"
"Raw images can also be output by using '%%d' in the filename.\n"
"\n", argv[0]);
return 1;
}
filename = argv[1];
for (i = 2; i+1 < argc; i+=2) {
if (!strcmp(argv[i], "-flags") || !strcmp(argv[i], "-fflags"))
av_dict_set(&opt, argv[i]+1, argv[i+1], 0);
}
const char *pfilename = filename;
/* allocate the output media context */
avformat_alloc_output_context2(&oc, NULL, "mpegts", pfilename);
if (!oc) {
printf("Could not deduce output format from file extension: using MPEG.\n");
avformat_alloc_output_context2(&oc, NULL, "mpeg", pfilename);
}
if (!oc)
return 1;
fmt = oc->oformat;
/* Add the audio and video streams using the default format codecs
* and initialize the codecs. */
if (fmt->video_codec != AV_CODEC_ID_NONE) {
add_stream(&video_st, oc, &video_codec, fmt->video_codec);
have_video = 1;
encode_video = 1;
}
/*if (fmt->audio_codec != AV_CODEC_ID_NONE) {
add_stream(&audio_st, oc, &audio_codec, fmt->audio_codec);
have_audio = 1;
encode_audio = 1;
}*/
/* Now that all the parameters are set, we can open the audio and
* video codecs and allocate the necessary encode buffers. */
if (have_video)
open_video(oc, video_codec, &video_st, opt);
//if (have_audio)
// open_audio(oc, audio_codec, &audio_st, opt);
av_dump_format(oc, 0, pfilename, 1);
/* open the output file, if needed */
if (!(fmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&oc->pb, pfilename, AVIO_FLAG_WRITE);
if (ret < 0) {
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Could not open '%s': %s\n", pfilename,
cb_av_err2str(s,AV_ERROR_MAX_STRING_SIZE,ret));
return 1;
}
}
/* Write the stream header, if any. */
ret = avformat_write_header(oc, &opt);
if (ret < 0) {
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Error occurred when opening output file: %s\n",
cb_av_err2str(s,AV_ERROR_MAX_STRING_SIZE,ret));
return 1;
}
while (encode_video || encode_audio) {
/* select the stream to encode */
if (encode_video &&
(!encode_audio || av_compare_ts(video_st.next_pts, video_st.enc->time_base,
audio_st.next_pts, audio_st.enc->time_base) <= 0)) {
encode_video = !write_video_frame(oc, &video_st);
} else {
//encode_audio = !write_audio_frame(oc, &audio_st);
}
//std::this_thread::sleep_for(std::chrono::milliseconds(35));
}
/* Write the trailer, if any. The trailer must be written before you
* close the CodecContexts open when you wrote the header; otherwise
* av_write_trailer() may try to use memory that was freed on
* av_codec_close(). */
av_write_trailer(oc);
/* Close each codec. */
if (have_video)
close_stream(oc, &video_st);
if (have_audio)
close_stream(oc, &audio_st);
if (!(fmt->flags & AVFMT_NOFILE))
/* Close the output file. */
avio_closep(&oc->pb);
/* free the stream */
avformat_free_context(oc);
return 0;
}
//
Environment:
Ubuntu Zesty (17.04)
FFmpeg version 3.2.4 (via package manager)
gcc 6.3 (C++)

You have to do following:
call avcodec_alloc_context3(...). This call will set default execute and execute2 functions in new context
set c->thread_count = number_of_threads_in_your_thread_pool()
call avcodec_open2(...).
set c->execute and c->execute2 to point to your functions
call ff_thread_free(c). This function isnt exposed in libavcodec headers but you can add following line:
extern "C" void ff_thread_free(AVCodecContext *s);
Drawback is that libavcodec will create internal thread pool after avcodec_open2(...) call, and that pool will be deleted in ff_thread_free() call.
Internal thread pool is very efficient, but its not good if you plan to do parallel encoding of multiple video feeds. In that case libavcodec will create separate thread pool for each encoding video feed.

Custom buffer for FFMPEG

I have a question regarding buffer read with ffmpeg.
Idea is as it follows: an outside module (can not change it) is providing me video stream in chunks of data and it is giving me input data and it's size in bytes ("framfunction" function input parameters). I have to copy input data to a buffer and read it with ffmpeg (Zeranoe) and extract video frames. Each time I receive new data, my function "framfunction" will be called. All unprocessed data from a first run will be moved at the beginning of the buffer followed by a new data on the second run and so on. It is essentially based on source and Dranger tutorials. My current attempt is like this and just look at comments (I've left only ones regarding current buffer function) in code to get a picture what I want to do(I know it is messy and it works - sort of; skipping some frames. Any suggestions around ffmpeg code and buffer design are welcome):
#include <iostream>
#include <string>
extern "C"
{
#include <libavformat/avformat.h>
#include <libavcodec/avcodec.h>
#include <libswscale/swscale.h>
#include <libavformat/avio.h>
#include <libavutil/file.h>
}
struct buffer_data {
uint8_t *ptr;
size_t size;
};
static int read_packet(void *opaque, uint8_t *buf, int buf_size)
{
struct buffer_data *bd = (struct buffer_data *)opaque;
buf_size = FFMIN(buf_size, bd->size);
memcpy(buf, bd->ptr, buf_size);
bd->ptr += buf_size;
bd->size -= buf_size;
return buf_size;
}
class videoclass
{
private:
uint8_t* inputdatabuffer;
size_t offset;
public:
videoclass();
~videoclass();
int framfunction(uint8_t* inputbytes, int inputbytessize);
};
videoclass::videoclass()
: inputdatabuffer(nullptr)
, offset(0)
{
inputdatabuffer = new uint8_t[8388608]; //buffer where the input data will be stored
}
videoclass::~videoclass()
{
delete[] inputdatabuffer;
}
int videoclass::framfunction(uint8_t* inputbytes, int inputbytessize)
{
int i, videoStream, numBytes, frameFinished;
AVFormatContext *pFormatCtx = NULL;
AVCodecContext *pCodecCtx = NULL;
AVIOContext *avio_ctx = NULL;
AVCodec *pCodec = NULL;
AVFrame *pFrame = NULL;
AVFrame *pFrameRGB = NULL;
AVPacket packet;
uint8_t *buffer = NULL;
uint8_t *avio_ctx_buffer = NULL;
size_t avio_ctx_buffer_size = 4096;
size_t bytes_processed = 0;
struct buffer_data bd = { 0 };
//if (av_file_map("sample.ts", &inputbytes, &inputbytessize, 0, NULL) < 0)//
// return -1;
memcpy(inputdatabuffer + offset, inputbytes, inputbytessize);//copy new data to buffer inputdatabuffer with offset calculated at the end of previous function run. In other words - cope new data after unprocessed data from a previous call
offset += inputbytessize; //total number of bytes in buffer. Size of an unprocessed data from the last run + size of new data (inputbytessize)
bd.ptr = inputdatabuffer;
bd.size = offset;
if (!(pFormatCtx = avformat_alloc_context()))
return -1;
avio_ctx_buffer = (uint8_t *)av_malloc(avio_ctx_buffer_size);
avio_ctx = avio_alloc_context(avio_ctx_buffer, avio_ctx_buffer_size,0, &bd, &read_packet, NULL, NULL);
pFormatCtx->pb = avio_ctx;
av_register_all();
avcodec_register_all();
pFrame = av_frame_alloc();
pFrameRGB = av_frame_alloc();
if (avformat_open_input(&pFormatCtx, NULL, NULL, NULL) != 0)
return -2;
if (avformat_find_stream_info(pFormatCtx, NULL) < 0)
return -3;
videoStream = -1;
for (i = 0; i < pFormatCtx->nb_streams; i++)
if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
videoStream = i;
break;
}
if (videoStream == -1)
return -4;
pCodecCtx = pFormatCtx->streams[videoStream]->codec;
pCodec = avcodec_find_decoder(pCodecCtx->codec_id);
if (pCodec == NULL){
std::cout << "Unsupported codec" << std::endl;
return -5;
}
if (avcodec_open2(pCodecCtx, pCodec, NULL) < 0)
return -6;
numBytes = avpicture_get_size(PIX_FMT_BGR24, pCodecCtx->width, pCodecCtx->height);
buffer = (uint8_t *)av_malloc(numBytes*sizeof(uint8_t));
avpicture_fill((AVPicture *)pFrameRGB, buffer, PIX_FMT_BGR24, pCodecCtx->width, pCodecCtx->height);
while (av_read_frame(pFormatCtx, &packet) >= 0){
if (packet.stream_index == videoStream){
avcodec_decode_video2(pCodecCtx, pFrame, &frameFinished, &packet);
if (frameFinished){
std::cout << "Yaay, frame found" << std::endl;
}
}
av_free_packet(&packet);
bytes_processed = (size_t)pFormatCtx->pb->pos; //data which is processed so far (x bytes out of inputbytessize)??????????????????????????????
}
offset -= bytes_processed; //size of unprocessed data
av_free(buffer);
av_free(pFrameRGB);
av_free(pFrame);
avcodec_close(pCodecCtx);
av_freep(&avio_ctx->buffer);
av_freep(&avio_ctx);
avformat_close_input(&pFormatCtx);
memmove(inputdatabuffer, inputdatabuffer + bytes_processed, offset);//move unprocessed data to begining of the main buffer
return 0;
}
Call of my function would be something like this
WHILE(VIDEO_INPUT)
{
READ VIDEO DATA FROM INPUT BUFFER
STORE DATA FROM INPUT BUFFER AND SIZE OP THAT DATA TO VARIABLES NEW_DATA AND NEW_DATA_SIZE
CALL FUNCTION FRAMMUNCTION AND PASS NEW_DATA AND NEW_DATA_FUNCTION
DO OTHER THINGS
}
What I would like to know is what is exact size of unprocessed data. Comments in code are showing my attempt but I think it is not good enough so I need some help with that issue.
EDIT: magic question is how to get correct "bytes_processed" size. I've also made an pdf with explanation how my buffer should work pdf file
Thanks

C/CUDA: Only every fourth element in CudaArray can be indexed

This is my first post, so I am thrilled to get some new insights and enlarge my knowledge. Currently I am working on a C-project where a binary raw file with 3d-data is loaded, processed in CUDA and saved in a new binary raw file.
This is based on the simpleTexture3D project from CUDA Samples:
This is my cpp
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
// includes, cuda
#include <vector_types.h>
#include <driver_functions.h>
#include <cuda_runtime.h>
// CUDA utilities and system includes
#include <helper_cuda.h>
#include <helper_functions.h>
#include <vector_types.h>
typedef unsigned int uint;
typedef unsigned char uchar;
const char *sSDKsample = "simpleTexture3D";
const char *volumeFilename = "Bucky.raw";
const cudaExtent volumeSize = make_cudaExtent(32, 32, 32);
const uint width = 64, height = 64, depth=64;
//const char *volumeFilename = "TestOCT.raw";
//const cudaExtent volumeSize = make_cudaExtent(1024, 512, 512);
//
//const uint width = 1024, height = 512, depth=512;
const dim3 blockSize(8, 8, 8);
const dim3 gridSize(width / blockSize.x, height / blockSize.y, depth / blockSize.z);
uint *d_output = NULL;
int *pArgc = NULL;
char **pArgv = NULL;
extern "C" void cleanup();
extern "C" void initCuda(const uchar *h_volume, cudaExtent volumeSize);
extern "C" void render_kernel(dim3 gridSize, dim3 blockSize, uint *d_output, uint imageW, uint imageH, uint imageD);
void loadVolumeData(char *exec_path);
// render image using CUDA
void render()
{
// call CUDA kernel
render_kernel(gridSize, blockSize, d_output, width, height, depth);
getLastCudaError("render_kernel failed");
}
void cleanup()
{
// cudaDeviceReset causes the driver to clean up all state. While
// not mandatory in normal operation, it is good practice. It is also
// needed to ensure correct operation when the application is being
// profiled. Calling cudaDeviceReset causes all profile data to be
// flushed before the application exits
checkCudaErrors(cudaDeviceReset());
}
// Load raw data from disk
uchar *loadRawFile(const char *filename, size_t size)
{
FILE *fp = fopen(filename, "rb");
if (!fp)
{
fprintf(stderr, "Error opening file '%s'\n", filename);
return 0;
}
uchar *data = (uchar *) malloc(size);
size_t read = fread(data, 1, size, fp);
fclose(fp);
printf("Read '%s', %lu bytes\n", filename, read);
return data;
}
// write raw data to disk
int writeRawFile(const char *filename, uchar *data, size_t size)
{
int returnState=0;
// cut file extension from filename
char *a=strdup(filename); //via strdup you dumb a const char to char, you must free it yourself
int len = strlen(a);
a[len-4] = '\0'; //deletes '.raw'
//printf("%s\n",a);
char b[50];
sprintf(b, "_%dx%dx%d_out.raw", width, height, depth);
//char b[]="_out.raw"; //Add suffix out to filename
char buffer[256]; // <- danger, only storage for 256 characters.
strncpy(buffer, a, sizeof(buffer));
strncat(buffer, b, sizeof(buffer));
free(a);
FILE *fp = fopen(buffer, "wb"); //Open or create file for writing as binary, all existing data is cleared
if (!fp)
{
fprintf(stderr, "Error opening or creating file '%s'\n", buffer);
return 0;
}
size_t write = fwrite(data, 1, size, fp);
fclose(fp);
if (write==size)
{
printf("Wrote %lu bytes to '%s'\n", write, buffer);
return 0;
}
else
{
printf("Error writing data to file '%s'\n", buffer);
return 1;
}
}
// General initialization call for CUDA Device
int chooseCudaDevice(int argc, char **argv)
{
int result = 0;
result = findCudaDevice(argc, (const char **)argv);
return result;
}
void runAutoTest(char *exec_path, char *PathToFile)
{
// set path
char *path;
if (PathToFile == NULL)
{
path = sdkFindFilePath(volumeFilename, exec_path);
}
else
{
path = PathToFile;
}
if (path == NULL)
{
fprintf(stderr, "Error unable to find 3D Volume file: '%s'\n", volumeFilename);
exit(EXIT_FAILURE);
}
// Allocate output memory
checkCudaErrors(cudaMalloc((void **)&d_output, width*height*depth*sizeof(uchar)));
// zero out the output array with cudaMemset
cudaMemset(d_output, 0, width*height*depth*sizeof(uchar));
// render the volumeData
render_kernel(gridSize, blockSize, d_output, width, height, depth);
checkCudaErrors(cudaDeviceSynchronize());
getLastCudaError("render_kernel failed");
uchar *h_output = (uchar*)malloc(width*height*depth);
checkCudaErrors(cudaMemcpy(h_output, d_output, width*height*depth*sizeof(uchar), cudaMemcpyDeviceToHost));
int wState=writeRawFile(path,h_output,width*height*depth);
checkCudaErrors(cudaFree(d_output));
free(h_output);
// cudaDeviceReset causes the driver to clean up all state. While
// not mandatory in normal operation, it is good practice. It is also
// needed to ensure correct operation when the application is being
// profiled. Calling cudaDeviceReset causes all profile data to be
// flushed before the application exits
cudaDeviceReset();
//exit(bTestResult ? EXIT_SUCCESS : EXIT_FAILURE);
}
void loadVolumeData(char *exec_path, char *PathToFile)
{
char *path;
// load volume data
if (PathToFile == NULL)
{
path = sdkFindFilePath(volumeFilename, exec_path);
}
else
{
path = PathToFile;
}
if (path == NULL)
{
fprintf(stderr, "Error unable to find 3D Volume file: '%s'\n", volumeFilename);
exit(EXIT_FAILURE);
}
size_t size = volumeSize.width*volumeSize.height*volumeSize.depth;
uchar *h_volume = loadRawFile(path, size);
//int wState=writeRawFile(path,h_volume,size);
initCuda(h_volume, volumeSize);
free(h_volume);
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main(int argc, char **argv)
{
pArgc = &argc;
pArgv = argv;
char *image_file = NULL;
printf("%s Starting...\n\n", sSDKsample);
if (checkCmdLineFlag(argc, (const char **)argv, "file")) //Note cmd line argument is -file "PathToFile/File.raw"
{ // for example -file "C:\ProgramData\NVIDIA Corporation\CUDA Samples\v7.0\2_Graphics\simpleTexture3D_FanBeamCorr\data\TestOCT_Kopie.raw"
getCmdLineArgumentString(argc, (const char **)argv, "file", &image_file);
}
if (image_file)
{
chooseCudaDevice(argc, argv);
loadVolumeData(argv[0],image_file);
runAutoTest(argv[0],image_file);
}
else
{
// use command-line specified CUDA device, otherwise use device with highest Gflops/s
chooseCudaDevice(argc, argv);
loadVolumeData(argv[0],NULL);
runAutoTest(argv[0],NULL);
}
printf("I am finished...\n"
"Can I get some ice cream please\n");
exit(EXIT_SUCCESS);
}
And this is my .cu
#ifndef _SIMPLETEXTURE3D_KERNEL_CU_
#define _SIMPLETEXTURE3D_KERNEL_CU_
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <helper_cuda.h>
#include <helper_math.h>
typedef unsigned int uint;
typedef unsigned char uchar;
texture<uchar, 3, cudaReadModeNormalizedFloat> tex; // 3D texture
cudaArray *d_volumeArray = 0;
__global__ void
d_render(uint *d_output, uint imageW, uint imageH, uint imageD)
{
uint x = __umul24(blockIdx.x, blockDim.x) + threadIdx.x;
uint y = __umul24(blockIdx.y, blockDim.y) + threadIdx.y;
uint z = __umul24(blockIdx.z, blockDim.z) + threadIdx.z;
// float u = x / (float) imageW;
// float v = y / (float) imageH;
//float w = z / (float) imageD;
// // read from 3D texture
// float voxel = tex3D(tex, u, v, w);
uint ps=__umul24(imageW,imageH);
if ((x < imageW) && (y < imageH) && (z < imageD))
{
// write output color
uint i = __umul24(z,ps) +__umul24(y, imageW) + x;
d_output[1] = (uchar) 255;//+0*voxel*255;
}
}
extern "C"
void initCuda(const uchar *h_volume, cudaExtent volumeSize)
{
// create 3D array
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<uchar>();
checkCudaErrors(cudaMalloc3DArray(&d_volumeArray, &channelDesc, volumeSize));
// copy data to 3D array
cudaMemcpy3DParms copyParams = {0};
copyParams.srcPtr = make_cudaPitchedPtr((void *)h_volume, volumeSize.width*sizeof(uchar), volumeSize.width, volumeSize.height);
copyParams.dstArray = d_volumeArray;
copyParams.extent = volumeSize;
copyParams.kind = cudaMemcpyHostToDevice;
checkCudaErrors(cudaMemcpy3D(&copyParams));
// set texture parameters
tex.normalized = true; // access with normalized texture coordinates
tex.filterMode = cudaFilterModeLinear; // linear interpolation
tex.addressMode[0] = cudaAddressModeBorder; // wrap texture coordinates
tex.addressMode[1] = cudaAddressModeBorder;
tex.addressMode[2] = cudaAddressModeBorder;
// bind array to 3D texture
checkCudaErrors(cudaBindTextureToArray(tex, d_volumeArray, channelDesc));
}
extern "C"
void render_kernel(dim3 gridSize, dim3 blockSize, uint *d_output, uint imageW, uint imageH, uint imageD)
{
d_render<<<gridSize, blockSize>>>(d_output, imageW, imageH, imageD);
}
#endif // #ifndef _SIMPLETEXTURE3D_KERNEL_CU_
As you can see, currently, I set all values to zero except the index = 1, which is set to 255. Yet when I now open the image stack in Fiji, I see that the fourth pixel on the first slide is white. If I use index=i instead, I get white vertical lines across the image stack periodically every four columns. Generally spoken, it seems that only every fourth element is beeing indexed in the CudaArray. So I am wondering if there is somekind of error here resulting from sizeof(uchar)=1 and sizeof(uint)=4. There would obviously be the factor 4 :)
I am eager to here from you experts
Cheers Mika

I figured it out by myself. The kernel works with uint* d_output while the copy to the host is written into a uchar* h_output
uchar *h_output = (uchar*)malloc(width*height*depth);
checkCudaErrors(cudaMemcpy(h_output, d_output, width*height*depth*sizeof(uchar), cudaMemcpyDeviceToHost));
This led to this strange behavior

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

AccessViolationException when using C++ DLL from C# - c++

Related

Writing a 1 bit depth grayscale PNG with libpng results in incorrect image

How to create a bitmap/image from Scan0?

FFmpeg: Parallel encoding with custom thread pool

Custom buffer for FFMPEG

C/CUDA: Only every fourth element in CudaArray can be indexed

Categories

Resources