c++ opencl return CL_OUT_OF_RESOURCES

c++ opencl return CL_OUT_OF_RESOURCES - c++

I'm learning OpenCL and trying to apply a black and white on a picture but enqueueNDRangeKernel return CL_OUT_OF_RESOURCES and I don't understand why. OpenCL is running on a GTX 980M, and OpenCL 1.2 .
#define _CRT_SECURE_NO_WARNINGS
#include <iostream>
#include <string>
#include <CL/cl.hpp>
#define STB_IMAGE_IMPLEMENTATION
#include "stb_image.h"
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_write.h"
int main() {
unsigned int vectorSize = 1000;
const std::string progCode = "__kernel \n"
"void img_kernel( __read_only image2d_t inputImage, __write_only image2d_t outputImage) \n"
"{ \n"
"const sampler_t sampler=CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; \n"
" int width = get_image_width(inputImage); \n"
" int height = get_image_height(inputImage); \n"
" int2 pixelcoord = (int2) (get_global_id(0), get_global_id(1)); \n"
" float4 pixel = read_imagef(inputImage, sampler, pixelcoord); \n"
" float color = (pixel.x + pixel.y + pixel.z)/3; \n"
" float4 outColor = (float4)(pixel.x,pixel.y,pixel.z, 1.0); \n"
" write_imagef(outputImage, pixelcoord, outColor); } \n";
int imageX, imageY, imageN;
unsigned char *dataImage = stbi_load("test.jpg", &imageX, &imageY, &imageN, 3);
if (dataImage == nullptr)
{
std::cout << "Unable to load picture" << std::endl;
getchar();
return 1;
}
cl_int error;
std::vector<cl::Platform> platformsList;
error = cl::Platform::get(&platformsList);
if (error != CL_SUCCESS)
{
std::cout << "Unable to find any OpenCL platforms" << std::endl;
getchar();
return 1;
}
std::vector<cl::Device> devicesList;
error = platformsList[0].getDevices(CL_DEVICE_TYPE_DEFAULT, &devicesList);
if (error != CL_SUCCESS)
{
std::cout << "Unable to find any OpenCL device" << std::endl;
getchar();
return 1;
}
cl::Device currentDevice = devicesList[0];
std::string nameDevice, driverDevice;
error = currentDevice.getInfo(CL_DEVICE_NAME, &nameDevice);
error = currentDevice.getInfo(CL_DRIVER_VERSION, &driverDevice);
std::cout << "Device : " << nameDevice << " " << driverDevice << std::endl;;
cl::Context context(currentDevice);
cl::Program::Sources source;
source.push_back({ progCode.c_str(), progCode.size() });
cl::Program program(context, source);
if (program.build({ currentDevice }) != CL_SUCCESS)
{
std::cout << " Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(currentDevice) << "\n";
getchar();
exit(1);
}
cl::ImageFormat globalImgFormat;
globalImgFormat.image_channel_data_type = CL_UNSIGNED_INT8;
globalImgFormat.image_channel_order = CL_RGB;
cl::size_t<3> origin;
origin[0] = 0; origin[1] = 0, origin[2] = 0;
cl::size_t<3> region;
region[0] = imageX; region[1] = imageY; region[2] = 1;
cl::Image2D inputImage(context, CL_MEM_READ_ONLY, globalImgFormat, imageX, imageY, 0, dataImage, &error);
if (error != CL_SUCCESS)
{
std::cout << "Unable to create cl Image for input." << std::endl;
getchar();
return 1;
}
cl::Image2D outputImage(context, CL_MEM_WRITE_ONLY, globalImgFormat, imageX, imageY, 0, nullptr, &error);
if (error != CL_SUCCESS)
{
std::cout << "Unable to create cl Image for output." << std::endl;
getchar();
return 1;
}
cl::CommandQueue queue(context, currentDevice);
cl::Kernel image_kernel(program, "img_kernel", &error);
if (error != CL_SUCCESS)
{
std::cout << "Unable to create kernel." << std::endl;
getchar();
return 1;
}
error = image_kernel.setArg(0, inputImage);
if (error != CL_SUCCESS)
{
std::cout << "Unable to set param." << std::endl;
getchar();
return 1;
}
error = image_kernel.setArg(1, outputImage);
if (error != CL_SUCCESS)
{
std::cout << "Unable to set param." << std::endl;
getchar();
return 1;
}
cl::NDRange globalSize(imageX, imageY);
error = queue.enqueueNDRangeKernel(image_kernel, cl::NullRange, globalSize, cl::NullRange);
if (error != CL_SUCCESS)
{
std::cout << "Unable to compute Image data." << std::endl;
getchar();
return 1;
}
queue.finish();
unsigned char *resultPros = new unsigned char[imageX * imageY * imageN];
error = queue.enqueueReadImage(outputImage, CL_TRUE, origin, region, 0, 0, resultPros);
stbi_write_bmp("testresul.jpg", imageX, imageY, imageN, resultPros);
stbi_image_free(dataImage);
stbi_image_free(resultPros);
getchar();
return 0;
}

On NVIDIA hardware if you write outside of a buffer or image (which is an undefined operation) then CL_OUT_OF_RESOURCES is a common error to get. It's better than the early hardware or drivers that simply crashed! Double-check your writes.

It seems that the combination of image_channel_data_type and image_channel_order is not correct. Could be your problem related with this?
Please, take a look here:
https://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/cl_image_format.html
Regards

Related

audio do not stop recording after pause ffmpeg c++

I am developing an application that record the screen and the audio from microphone. I implemented the pause function stopping video and audio thread on a condition variable, resuming them with a notify on the same condition variable. This is done in captureAudio(), in the main while. In this way works on macOS and linux, where I use avfoudation and alsa respectively, but on windows, with dshow, keep recording audio during the pause, when the thread is waiting on the condition variable. Does anybody know how can I fix this behaviour?
#include "ScreenRecorder.h"
using namespace std;
ScreenRecorder::ScreenRecorder() : pauseCapture(false), stopCapture(false), started(false), activeMenu(true) {
avcodec_register_all();
avdevice_register_all();
width = 1920;
height = 1200;
}
ScreenRecorder::~ScreenRecorder() {
if (started) {
value = av_write_trailer(outAVFormatContext);
if (value < 0) {
cerr << "Error in writing av trailer" << endl;
exit(-1);
}
avformat_close_input(&inAudioFormatContext);
if(inAudioFormatContext == nullptr){
cout << "inAudioFormatContext close successfully" << endl;
}
else{
cerr << "Error: unable to close the inAudioFormatContext" << endl;
exit(-1);
//throw "Error: unable to close the file";
}
avformat_free_context(inAudioFormatContext);
if(inAudioFormatContext == nullptr){
cout << "AudioFormat freed successfully" << endl;
}
else{
cerr << "Error: unable to free AudioFormatContext" << endl;
exit(-1);
}
avformat_close_input(&pAVFormatContext);
if (pAVFormatContext == nullptr) {
cout << "File close successfully" << endl;
}
else {
cerr << "Error: unable to close the file" << endl;
exit(-1);
//throw "Error: unable to close the file";
}
avformat_free_context(pAVFormatContext);
if (pAVFormatContext == nullptr) {
cout << "VideoFormat freed successfully" << endl;
}
else {
cerr << "Error: unable to free VideoFormatContext" << endl;
exit(-1);
}
}
}
/*==================================== VIDEO ==============================*/
int ScreenRecorder::openVideoDevice() throw() {
value = 0;
options = nullptr;
pAVFormatContext = nullptr;
pAVFormatContext = avformat_alloc_context();
string dimension = to_string(width) + "x" + to_string(height);
av_dict_set(&options, "video_size", dimension.c_str(), 0); //option to set the dimension of the screen section to record
#ifdef _WIN32
pAVInputFormat = av_find_input_format("gdigrab");
if (avformat_open_input(&pAVFormatContext, "desktop", pAVInputFormat, &options) != 0) {
cerr << "Couldn't open input stream" << endl;
exit(-1);
}
#elif defined linux
int offset_x = 0, offset_y = 0;
string url = ":0.0+" + to_string(offset_x) + "," + to_string(offset_y); //custom string to set the start point of the screen section
pAVInputFormat = av_find_input_format("x11grab");
value = avformat_open_input(&pAVFormatContext, url.c_str(), pAVInputFormat, &options);
if (value != 0) {
cerr << "Error in opening input device (video)" << endl;
exit(-1);
}
#else
value = av_dict_set(&options, "pixel_format", "0rgb", 0);
if (value < 0) {
cerr << "Error in setting pixel format" << endl;
exit(-1);
}
value = av_dict_set(&options, "video_device_index", "1", 0);
if (value < 0) {
cerr << "Error in setting video device index" << endl;
exit(-1);
}
pAVInputFormat = av_find_input_format("avfoundation");
if (avformat_open_input(&pAVFormatContext, "Capture screen 0:none", pAVInputFormat, &options) != 0) { //TODO trovare un modo per selezionare sempre lo schermo (forse "Capture screen 0")
cerr << "Error in opening input device" << endl;
exit(-1);
}
#endif
//set frame per second
value = av_dict_set(&options, "framerate", "30", 0);
if (value < 0) {
cerr << "Error in setting dictionary value (setting framerate)" << endl;
exit(-1);
}
value = av_dict_set(&options, "preset", "medium", 0);
if (value < 0) {
cerr << "Error in setting dictionary value (setting preset value)" << endl;
exit(-1);
}
/*
value = av_dict_set(&options, "vsync", "1", 0);
if(value < 0){
cerr << "Error in setting dictionary value (setting vsync value)" << endl;
exit(-1);
}
*/
value = av_dict_set(&options, "probesize", "60M", 0);
if (value < 0) {
cerr << "Error in setting probesize value" << endl;
exit(-1);
}
//get video stream infos from context
value = avformat_find_stream_info(pAVFormatContext, nullptr);
if (value < 0) {
cerr << "Error in retrieving the stream info" << endl;
exit(-1);
}
VideoStreamIndx = -1;
for (int i = 0; i < pAVFormatContext->nb_streams; i++) {
if (pAVFormatContext->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
VideoStreamIndx = i;
break;
}
}
if (VideoStreamIndx == -1) {
cerr << "Error: unable to find video stream index" << endl;
exit(-2);
}
pAVCodecContext = pAVFormatContext->streams[VideoStreamIndx]->codec;
pAVCodec = avcodec_find_decoder(pAVCodecContext->codec_id/*params->codec_id*/);
if (pAVCodec == nullptr) {
cerr << "Error: unable to find decoder video" << endl;
exit(-1);
}
cout << "Insert height and width [h w]: "; //custom screen dimension to record
cin >> h >> w;*/
return 0;
}
/*========================================== AUDIO ============================*/
int ScreenRecorder::openAudioDevice() {
audioOptions = nullptr;
inAudioFormatContext = nullptr;
inAudioFormatContext = avformat_alloc_context();
value = av_dict_set(&audioOptions, "sample_rate", "44100", 0);
if (value < 0) {
cerr << "Error: cannot set audio sample rate" << endl;
exit(-1);
}
value = av_dict_set(&audioOptions, "async", "1", 0);
if (value < 0) {
cerr << "Error: cannot set audio sample rate" << endl;
exit(-1);
}
#if defined linux
audioInputFormat = av_find_input_format("alsa");
value = avformat_open_input(&inAudioFormatContext, "hw:0", audioInputFormat, &audioOptions);
if (value != 0) {
cerr << "Error in opening input device (audio)" << endl;
exit(-1);
}
#endif
#if defined _WIN32
audioInputFormat = av_find_input_format("dshow");
value = avformat_open_input(&inAudioFormatContext, "audio=Microfono (Realtek(R) Audio)", audioInputFormat, &audioOptions);
if (value != 0) {
cerr << "Error in opening input device (audio)" << endl;
exit(-1);
}
#endif
value = avformat_find_stream_info(inAudioFormatContext, nullptr);
if (value != 0) {
cerr << "Error: cannot find the audio stream information" << endl;
exit(-1);
}
audioStreamIndx = -1;
for (int i = 0; i < inAudioFormatContext->nb_streams; i++) {
if (inAudioFormatContext->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
audioStreamIndx = i;
break;
}
}
if (audioStreamIndx == -1) {
cerr << "Error: unable to find audio stream index" << endl;
exit(-2);
}
}
int ScreenRecorder::initOutputFile() {
value = 0;
outAVFormatContext = nullptr;
outputAVFormat = av_guess_format(nullptr, "output.mp4", nullptr);
if (outputAVFormat == nullptr) {
cerr << "Error in guessing the video format, try with correct format" << endl;
exit(-5);
}
avformat_alloc_output_context2(&outAVFormatContext, outputAVFormat, outputAVFormat->name, "..\\media\\output.mp4");
if (outAVFormatContext == nullptr) {
cerr << "Error in allocating outAVFormatContext" << endl;
exit(-4);
}
/*===========================================================================*/
this->generateVideoStream();
this->generateAudioStream();
//create an empty video file
if (!(outAVFormatContext->flags & AVFMT_NOFILE)) {
if (avio_open2(&outAVFormatContext->pb, "..\\media\\output.mp4", AVIO_FLAG_WRITE, nullptr, nullptr) < 0) {
cerr << "Error in creating the video file" << endl;
exit(-10);
}
}
if (outAVFormatContext->nb_streams == 0) {
cerr << "Output file does not contain any stream" << endl;
exit(-11);
}
value = avformat_write_header(outAVFormatContext, &options);
if (value < 0) {
cerr << "Error in writing the header context" << endl;
exit(-12);
}
return 0;
}
/*=================================== VIDEO ==================================*/
void ScreenRecorder::generateVideoStream() {
//Generate video stream
videoSt = avformat_new_stream(outAVFormatContext, nullptr);
if (videoSt == nullptr) {
cerr << "Error in creating AVFormatStream" << endl;
exit(-6);
}
outVideoCodec = avcodec_find_encoder(AV_CODEC_ID_MPEG4); //AV_CODEC_ID_MPEG4
if (outVideoCodec == nullptr) {
cerr << "Error in finding the AVCodec, try again with the correct codec" << endl;
exit(-8);
}
avcodec_alloc_context3(outAVCodec)
outVideoCodecContext = avcodec_alloc_context3(outVideoCodec);
if (outVideoCodecContext == nullptr) {
cerr << "Error in allocating the codec context" << endl;
exit(-7);
}
//set properties of the video file (stream)
outVideoCodecContext = videoSt->codec;
outVideoCodecContext->codec_id = AV_CODEC_ID_MPEG4;
outVideoCodecContext->codec_type = AVMEDIA_TYPE_VIDEO;
outVideoCodecContext->pix_fmt = AV_PIX_FMT_YUV420P;
outVideoCodecContext->bit_rate = 10000000;
outVideoCodecContext->width = width;
outVideoCodecContext->height = height;
outVideoCodecContext->gop_size = 10;
outVideoCodecContext->global_quality = 500;
outVideoCodecContext->max_b_frames = 2;
outVideoCodecContext->time_base.num = 1;
outVideoCodecContext->time_base.den = 30;
outVideoCodecContext->bit_rate_tolerance = 400000;
if (outVideoCodecContext->codec_id == AV_CODEC_ID_H264) {
av_opt_set(outVideoCodecContext->priv_data, "preset", "slow", 0);
}
if (outAVFormatContext->oformat->flags & AVFMT_GLOBALHEADER) {
outVideoCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
value = avcodec_open2(outVideoCodecContext, outVideoCodec, nullptr);
if (value < 0) {
cerr << "Error in opening the AVCodec" << endl;
exit(-9);
}
outVideoStreamIndex = -1;
for (int i = 0; i < outAVFormatContext->nb_streams; i++) {
if (outAVFormatContext->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_UNKNOWN) {
outVideoStreamIndex = i;
}
}
if (outVideoStreamIndex < 0) {
cerr << "Error: cannot find a free stream index for video output" << endl;
exit(-1);
}
avcodec_parameters_from_context(outAVFormatContext->streams[outVideoStreamIndex]->codecpar, outVideoCodecContext);
}
/*=============================== AUDIO ==================================*/
void ScreenRecorder::generateAudioStream() {
AVCodecParameters* params = inAudioFormatContext->streams[audioStreamIndx]->codecpar;
inAudioCodec = avcodec_find_decoder(params->codec_id);
if (inAudioCodec == nullptr) {
cerr << "Error: cannot find the audio decoder" << endl;
exit(-1);
}
inAudioCodecContext = avcodec_alloc_context3(inAudioCodec);
if (avcodec_parameters_to_context(inAudioCodecContext, params) < 0) {
cout << "Cannot create codec context for audio input" << endl;
}
value = avcodec_open2(inAudioCodecContext, inAudioCodec, nullptr);
if (value < 0) {
cerr << "Error: cannot open the input audio codec" << endl;
exit(-1);
}
//Generate audio stream
outAudioCodecContext = nullptr;
outAudioCodec = nullptr;
int i;
AVStream* audio_st = avformat_new_stream(outAVFormatContext, nullptr);
if (audio_st == nullptr) {
cerr << "Error: cannot create audio stream" << endl;
exit(1);
}
outAudioCodec = avcodec_find_encoder(AV_CODEC_ID_AAC);
if (outAudioCodec == nullptr) {
cerr << "Error: cannot find requested encoder" << endl;
exit(1);
}
outAudioCodecContext = avcodec_alloc_context3(outAudioCodec);
if (outAudioCodecContext == nullptr) {
cerr << "Error: cannot create related VideoCodecContext" << endl;
exit(1);
}
if ((outAudioCodec)->supported_samplerates) {
outAudioCodecContext->sample_rate = (outAudioCodec)->supported_samplerates[0];
for (i = 0; (outAudioCodec)->supported_samplerates[i]; i++) {
if ((outAudioCodec)->supported_samplerates[i] == inAudioCodecContext->sample_rate)
outAudioCodecContext->sample_rate = inAudioCodecContext->sample_rate;
}
}
outAudioCodecContext->codec_id = AV_CODEC_ID_AAC;
outAudioCodecContext->sample_fmt = (outAudioCodec)->sample_fmts ? (outAudioCodec)->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
outAudioCodecContext->channels = inAudioCodecContext->channels;
outAudioCodecContext->channel_layout = av_get_default_channel_layout(outAudioCodecContext->channels);
outAudioCodecContext->bit_rate = 96000;
outAudioCodecContext->time_base = { 1, inAudioCodecContext->sample_rate };
outAudioCodecContext->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
if ((outAVFormatContext)->oformat->flags & AVFMT_GLOBALHEADER) {
outAudioCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
if (avcodec_open2(outAudioCodecContext, outAudioCodec, nullptr) < 0) {
cerr << "error in opening the avcodec" << endl;
exit(1);
}
//find a free stream index
outAudioStreamIndex = -1;
for (i = 0; i < outAVFormatContext->nb_streams; i++) {
if (outAVFormatContext->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_UNKNOWN) {
outAudioStreamIndex = i;
}
}
if (outAudioStreamIndex < 0) {
cerr << "Error: cannot find a free stream for audio on the output" << endl;
exit(1);
}
avcodec_parameters_from_context(outAVFormatContext->streams[outAudioStreamIndex]->codecpar, outAudioCodecContext);
}
int ScreenRecorder::init_fifo()
{
/* Create the FIFO buffer based on the specified output sample format. */
if (!(fifo = av_audio_fifo_alloc(outAudioCodecContext->sample_fmt,
outAudioCodecContext->channels, 1))) {
fprintf(stderr, "Could not allocate FIFO\n");
return AVERROR(ENOMEM);
}
return 0;
}
int ScreenRecorder::add_samples_to_fifo(uint8_t** converted_input_samples, const int frame_size) {
int error;
/* Make the FIFO as large as it needs to be to hold both,
* the old and the new samples. */
if ((error = av_audio_fifo_realloc(fifo, av_audio_fifo_size(fifo) + frame_size)) < 0) {
fprintf(stderr, "Could not reallocate FIFO\n");
return error;
}
/* Store the new samples in the FIFO buffer. */
if (av_audio_fifo_write(fifo, (void**)converted_input_samples, frame_size) < frame_size) {
fprintf(stderr, "Could not write data to FIFO\n");
return AVERROR_EXIT;
}
return 0;
}
int ScreenRecorder::initConvertedSamples(uint8_t*** converted_input_samples,
AVCodecContext* output_codec_context,
int frame_size) {
int error;
/* Allocate as many pointers as there are audio channels.
* Each pointer will later point to the audio samples of the corresponding
* channels (although it may be NULL for interleaved formats).
*/
if (!(*converted_input_samples = (uint8_t**)calloc(output_codec_context->channels,
sizeof(**converted_input_samples)))) {
fprintf(stderr, "Could not allocate converted input sample pointers\n");
return AVERROR(ENOMEM);
}
/* Allocate memory for the samples of all channels in one consecutive
* block for convenience. */
if (av_samples_alloc(*converted_input_samples, nullptr,
output_codec_context->channels,
frame_size,
output_codec_context->sample_fmt, 0) < 0) {
exit(1);
}
return 0;
}
static int64_t pts = 0;
void ScreenRecorder::captureAudio() {
int ret;
AVPacket* inPacket, * outPacket;
AVFrame* rawFrame, * scaledFrame;
uint8_t** resampledData;
init_fifo();
//allocate space for a packet
inPacket = (AVPacket*)av_malloc(sizeof(AVPacket));
if (!inPacket) {
cerr << "Cannot allocate an AVPacket for encoded video" << endl;
exit(1);
}
av_init_packet(inPacket);
//allocate space for a packet
rawFrame = av_frame_alloc();
if (!rawFrame) {
cerr << "Cannot allocate an AVPacket for encoded video" << endl;
exit(1);
}
scaledFrame = av_frame_alloc();
if (!scaledFrame) {
cerr << "Cannot allocate an AVPacket for encoded video" << endl;
exit(1);
}
outPacket = (AVPacket*)av_malloc(sizeof(AVPacket));
if (!outPacket) {
cerr << "Cannot allocate an AVPacket for encoded video" << endl;
exit(1);
}
//init the resampler
SwrContext* resampleContext = nullptr;
resampleContext = swr_alloc_set_opts(resampleContext,
av_get_default_channel_layout(outAudioCodecContext->channels),
outAudioCodecContext->sample_fmt,
outAudioCodecContext->sample_rate,
av_get_default_channel_layout(inAudioCodecContext->channels),
inAudioCodecContext->sample_fmt,
inAudioCodecContext->sample_rate,
0,
nullptr);
if (!resampleContext) {
cerr << "Cannot allocate the resample context" << endl;
exit(1);
}
if ((swr_init(resampleContext)) < 0) {
fprintf(stderr, "Could not open resample context\n");
swr_free(&resampleContext);
exit(1);
}
while (true) {
if (pauseCapture) {
cout << "Pause audio" << endl;
}
cv.wait(ul, [this]() { return !pauseCapture; });
if (stopCapture) {
break;
}
ul.unlock();
if (av_read_frame(inAudioFormatContext, inPacket) >= 0 && inPacket->stream_index == audioStreamIndx) {
//decode audio routing
av_packet_rescale_ts(outPacket, inAudioFormatContext->streams[audioStreamIndx]->time_base, inAudioCodecContext->time_base);
if ((ret = avcodec_send_packet(inAudioCodecContext, inPacket)) < 0) {
cout << "Cannot decode current audio packet " << ret << endl;
continue;
}
while (ret >= 0) {
ret = avcodec_receive_frame(inAudioCodecContext, rawFrame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
break;
else if (ret < 0) {
cerr << "Error during decoding" << endl;
exit(1);
}
if (outAVFormatContext->streams[outAudioStreamIndex]->start_time <= 0) {
outAVFormatContext->streams[outAudioStreamIndex]->start_time = rawFrame->pts;
}
initConvertedSamples(&resampledData, outAudioCodecContext, rawFrame->nb_samples);
swr_convert(resampleContext,
resampledData, rawFrame->nb_samples,
(const uint8_t**)rawFrame->extended_data, rawFrame->nb_samp
add_samples_to_fifo(resampledData, rawFrame->nb_samples);
//raw frame ready
av_init_packet(outPacket);
outPacket->data = nullptr;
outPacket->size = 0;
const int frame_size = FFMAX(av_audio_fifo_size(fifo), outAudioCodecContext->frame_size);
scaledFrame = av_frame_alloc();
if (!scaledFrame) {
cerr << "Cannot allocate an AVPacket for encoded video" << endl;
exit(1);
}
scaledFrame->nb_samples = outAudioCodecContext->frame_size;
scaledFrame->channel_layout = outAudioCodecContext->channel_layout;
scaledFrame->format = outAudioCodecContext->sample_fmt;
scaledFrame->sample_rate = outAudioCodecContext->sample_rate;
av_frame_get_buffer(scaledFrame, 0);
while (av_audio_fifo_size(fifo) >= outAudioCodecContext->frame_size) {
ret = av_audio_fifo_read(fifo, (void**)(scaledFrame->data), outAudioCodecContext->frame_size);
scaledFrame->pts = pts;
pts += scaledFrame->nb_samples;
if (avcodec_send_frame(outAudioCodecContext, scaledFrame) < 0) {
cout << "Cannot encode current audio packet " << endl;
exit(1);
}
while (ret >= 0) {
ret = avcodec_receive_packet(outAudioCodecContext, outPacket);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
break;
else if (ret < 0) {
cerr << "Error during encoding" << endl;
exit(1);
}
av_packet_rescale_ts(outPacket, outAudioCodecContext->time_base, outAVFormatContext->streams[outAudioStreamIndex]->time_base);
outPacket->stream_index = outAudioStreamIndex;
write_lock.lock();
if (av_write_frame(outAVFormatContext, outPacket) != 0)
{
cerr << "Error in writing audio frame" << endl;
}
write_lock.unlock();
av_packet_unref(outPacket);
}
ret = 0;
}
av_frame_free(&scaledFrame);
av_packet_unref(outPacket);
}
}
}
}
int ScreenRecorder::captureVideoFrames() {
int64_t pts = 0;
int flag;
int frameFinished = 0;
bool endPause = false;
int numPause = 0;
ofstream outFile{ "..\\media\\log.txt", ios::out };
int frameIndex = 0;
value = 0;
pAVPacket = (AVPacket*)av_malloc(sizeof(AVPacket));
if (pAVPacket == nullptr) {
cerr << "Error in allocating AVPacket" << endl;
exit(-1);
}
pAVFrame = av_frame_alloc();
if (pAVFrame == nullptr) {
cerr << "Error: unable to alloc the AVFrame resources" << endl;
exit(-1);
}
outFrame = av_frame_alloc();
if (outFrame == nullptr) {
cerr << "Error: unable to alloc the AVFrame resources for out frame" << endl;
exit(-1);
}
int videoOutBuffSize;
int nBytes = av_image_get_buffer_size(outVideoCodecContext->pix_fmt, outVideoCodecContext->width, outVideoCodecContext->height, 32);
uint8_t* videoOutBuff = (uint8_t*)av_malloc(nBytes);
if (videoOutBuff == nullptr) {
cerr << "Error: unable to allocate memory" << endl;
exit(-1);
}
value = av_image_fill_arrays(outFrame->data, outFrame->linesize, videoOutBuff, AV_PIX_FMT_YUV420P, outVideoCodecContext->width, outVideoCodecContext->height, 1);
if (value < 0) {
cerr << "Error in filling image array" << endl;
}
SwsContext* swsCtx_;
if (avcodec_open2(pAVCodecContext, pAVCodec, nullptr) < 0) {
cerr << "Could not open codec" << endl;
exit(-1);
}
swsCtx_ = sws_getContext(pAVCodecContext->width, pAVCodecContext->height, pAVCodecContext->pix_fmt, outVideoCodecContext->width, outVideoCodecContext->height, outVideoCodecContext->pix_fmt, SWS_BICUBIC,
nullptr, nullptr, nullptr);
AVPacket outPacket;
int gotPicture;
time_t startTime;
time(&startTime);
while (true) {
if (pauseCapture) {
cout << "Pause" << endl;
outFile << "/////////////////// Pause ///////////////////" << endl;
cout << "outVideoCodecContext->time_base: " << outVideoCodecContext->time_base.num << ", " << outVideoCodecContext->time_base.den << endl;
}
cv.wait(ul, [this]() { return !pauseCapture; }); //pause capture (not busy waiting)
if (endPause) {
endPause = false;
}
if (stopCapture) //check if the capture has to stop
break;
ul.unlock();
if (av_read_frame(pAVFormatContext, pAVPacket) >= 0 && pAVPacket->stream_index == VideoStreamIndx) {
av_packet_rescale_ts(pAVPacket, pAVFormatContext->streams[VideoStreamIndx]->time_base, pAVCodecContext->time_base);
value = avcodec_decode_video2(pAVCodecContext, pAVFrame, &frameFinished, pAVPacket);
if (value < 0) {
cout << "Unable to decode video" << endl;
}
if (frameFinished) { //frame successfully decoded
//sws_scale(swsCtx_, pAVFrame->data, pAVFrame->linesize, 0, pAVCodecContext->height, outFrame->data, outFrame->linesize);
av_init_packet(&outPacket);
outPacket.data = nullptr;
outPacket.size = 0;
if (outAVFormatContext->streams[outVideoStreamIndex]->start_time <= 0) {
outAVFormatContext->streams[outVideoStreamIndex]->start_time = pAVFrame->pts;
}
//disable warning on the console
outFrame->width = outVideoCodecContext->width;
outFrame->height = outVideoCodecContext->height;
outFrame->format = outVideoCodecContext->pix_fmt;
sws_scale(swsCtx_, pAVFrame->data, pAVFrame->linesize, 0, pAVCodecContext->height, outFrame->data, outFrame->linesize);
avcodec_encode_video2(outVideoCodecContext, &outPacket, outFrame, &gotPicture);
if (gotPicture) {
if (outPacket.pts != AV_NOPTS_VALUE) {
outPacket.pts = av_rescale_q(outPacket.pts, videoSt->codec->time_base, videoSt->time_base);
}
if (outPacket.dts != AV_NOPTS_VALUE) {
outPacket.dts = av_rescale_q(outPacket.dts, videoSt->codec->time_base, videoSt->time_base);
}
//cout << "Write frame " << j++ << " (size = " << outPacket.size / 1000 << ")" << endl;
//cout << "(size = " << outPacket.size << ")" << endl;
//av_packet_rescale_ts(&outPacket, outVideoCodecContext->time_base, outAVFormatContext->streams[outVideoStreamIndex]->time_base);
//outPacket.stream_index = outVideoStreamIndex;
outFile << "outPacket->duration: " << outPacket.duration << ", " << "pAVPacket->duration: " << pAVPacket->duration << endl;
outFile << "outPacket->pts: " << outPacket.pts << ", " << "pAVPacket->pts: " << pAVPacket->pts << endl;
outFile << "outPacket.dts: " << outPacket.dts << ", " << "pAVPacket->dts: " << pAVPacket->dts << endl;
time_t timer;
double seconds;
mu.lock();
if (!activeMenu) {
time(&timer);
seconds = difftime(timer, startTime);
int h = (int)(seconds / 3600);
int m = (int)(seconds / 60) % 60;
int s = (int)(seconds) % 60;
std::cout << std::flush << "\r" << std::setw(2) << std::setfill('0') << h << ':'
<< std::setw(2) << std::setfill('0') << m << ':'
<< std::setw(2) << std::setfill('0') << s << std::flush;
}
mu.unlock();
write_lock.lock();
if (av_write_frame(outAVFormatContext, &outPacket) != 0) {
cerr << "Error in writing video frame" << endl;
}
write_lock.unlock();
av_packet_unref(&outPacket);
}
av_packet_unref(&outPacket);
av_free_packet(pAVPacket); //avoid memory saturation
}
}
}
outFile.close();
av_free(videoOutBuff);
return 0;
}

I resolved this problem performing an avformat_close_input(&inAudioFormatContext) before enter in pause, and an avformat_open_input(&inAudioFormatContext, "audio=Microfono (Realtek(R) Audio)", audioInputFormat, &audioOptions) after resume the recording. In this way the final file seems well syncronized with video.

Convert NV12 to BGR by NVIDIA Performance Primitives

I'm trying to convert NV12 image to BGR by npp, but in the final array i have zeroes.
int lumaStepBytes, chromaStepBytes;
int rgbStepBytes;
auto dpNV12LumaFrame = nppiMalloc_8u_C1(dec.GetWidth(), dec.GetHeight(), &lumaStepBytes);
auto dpNV12ChromaFrame = nppiMalloc_8u_C1(dec.GetWidth(), dec.GetChromaHeight(), &chromaStepBytes);
auto dpBGRFrame = nppiMalloc_8u_C3(dec.GetWidth(), dec.GetHeight(), &rgbStepBytes);
cudaMemcpy2D(dpNV12LumaFrame, lumaStepBytes, pFrame, dec.GetWidth(),
dec.GetWidth(), dec.GetHeight(), cudaMemcpyKind::cudaMemcpyHostToDevice);
cudaMemcpy2D(dpNV12ChromaFrame, chromaStepBytes, pFrame + dec.GetLumaPlaneSize(), dec.GetWidth(),
dec.GetWidth(), dec.GetChromaHeight(), cudaMemcpyKind::cudaMemcpyHostToDevice);
Npp8u *planesAddres[2];
planesAddres[0] = dpNV12LumaFrame;
planesAddres[1] = dpNV12ChromaFrame;
nppiNV12ToBGR_8u_P2C3R(planesAddres, lumaStepBytes,
dpBGRFrame, rgbStepBytes,
{dec.GetWidth(), dec.GetHeight()});
res.m_data.resize(dec.GetWidth() * dec.GetHeight() * 3);
cudaMemcpy2D(res.m_data.data(), dec.GetWidth(), dpBGRFrame, rgbStepBytes,
dec.GetWidth(), dec.GetHeight(), cudaMemcpyKind::cudaMemcpyDeviceToHost);
nppiFree(dpBGRFrame);
nppiFree(dpNV12ChromaFrame);
nppiFree(dpNV12LumaFrame);
dec is a video decoder which gives pFrame in NV12 format and provide additional information about that, like offsets, dimensions, NV12 planes, etc.
The same result I have if I use cu... and cuda... functions for allocating without alignment.
Do anybody have any ideas about the problem?

For questions like this the SO expectation is that you provide a complete example, see item 1 here. So I haven't tried to determine exactly what is wrong with your code.
However I can show you a complete code that converts NV12 to RGB (and other things as well) which is working correctly for me:
// sample compile command line: nvcc -o rs rs.cu -lnppicc -lnppig -DUSE_DEBUG -DUNIT_TEST
#include <nppi.h>
#include <iostream>
template <typename T>
__global__ void pack_uv(T * __restrict__ u, T * __restrict__ v, T * __restrict__ uv, const int w, const int h, const int pitch_uv, const int pitch_u, const int pitch_v){
int idx = threadIdx.x+blockDim.x*blockIdx.x;
int idy = threadIdx.y+blockDim.y*blockIdx.y;
if ((idx < w) && (idy < h)){
T *o = (T *)(((char *)uv) + idy*pitch_uv);
T *iu = (T *)(((char *)u) + idy*pitch_u);
T *iv = (T *)(((char *)v) + idy*pitch_v);
int idx2 = idx >> 1;
o[idx] = (idx&1)?iv[idx2]:iu[idx2];}
}
int rs(const int ish, const int isw, const int ipitch, const int osh, const int osw, const int opitch, const unsigned char *iy, const unsigned char *iuv, unsigned char *oy, unsigned char *ouv, unsigned char *tempbuff, int method = 0, int eInterpolation = NPPI_INTER_LANCZOS){
#ifdef USE_DEBUG
if ((iy != NULL) && (tempbuff == NULL)) std::cout << "error: tempbuff is NULL" << std::endl;
if ((iy != NULL) && (iuv == NULL)) std::cout << "error: iuv is NULL" << std::endl;
if ((iy != NULL) && (oy == NULL)) std::cout << "error: oy is NULL" << std::endl;
if ((iy != NULL) && (ouv == NULL)) std::cout << "error: ouv is NULL" << std::endl;
if (isw < 2) std::cout << "error on input width: " << isw << std::endl;
if (ish < 2) std::cout << "error on input height: " << ish << std::endl;
if (ipitch < isw) std::cout << "error on input pitch: " << ipitch << std::endl;
if (osw < 1) std::cout << "error on output width: " << osw << std::endl;
if (osh < 1) std::cout << "error on output height: " << osh << std::endl;
if (opitch < osw) std::cout << "error on output pitch: " << opitch << std::endl;
#endif
cudaError_t err;
NppStatus stat;
// convert NV12 input to RGB
if (iy == NULL){ // temp buffer sizing
// for method 1
NppiSize oSrcROI;
oSrcROI.width = isw;
oSrcROI.height = ish;
NppiSize oDstROI;
oDstROI.width = osw;
oDstROI.height = osh;
int bufferSize;
stat = nppiResizeAdvancedGetBufferHostSize_8u_C1R(oSrcROI, oDstROI, &bufferSize, NPPI_INTER_LANCZOS3_ADVANCED);
return ((ish*isw + osh*osw)*3*sizeof(unsigned char))+bufferSize; // temp buffer sizing
}
if (method == 0){
const Npp8u *pSrc[2] = {iy, iuv};
NppiSize oSizeROI;
oSizeROI.width = isw;
oSizeROI.height = ish;
#ifdef USE_709
stat = nppiNV12ToRGB_709HDTV_8u_P2C3R(pSrc, ipitch, tempbuff, isw*3*sizeof(Npp8u), oSizeROI);
#else
stat = nppiNV12ToRGB_8u_P2C3R(pSrc, ipitch, tempbuff, isw*3*sizeof(Npp8u), oSizeROI);
#endif
#ifdef USE_DEBUG
err = cudaDeviceSynchronize();
if (err != cudaSuccess) std::cout << "NV12 to RGB CUDA error: " << cudaGetErrorString(err) << std::endl;
if (stat != NPP_SUCCESS) std::cout << "NV12 to RGB NPP error: " << (int)stat << std::endl;
#endif
if (stat != NPP_SUCCESS) return -1;
// perform resize
NppiSize oSrcSize;
oSrcSize.width = isw;
oSrcSize.height = ish;
NppiRect oSrcROI;
oSrcROI.x = 0;
oSrcROI.y = 0;
oSrcROI.width = isw;
oSrcROI.height = ish;
NppiRect oDstROI;
oDstROI.x = 0;
oDstROI.y = 0;
oDstROI.width = osw;
oDstROI.height = osh;
double nXFactor = osw/(double)isw;
double nYFactor = osh/(double)ish;
double nXShift = 0;
double nYShift = 0;
stat = nppiResizeSqrPixel_8u_C3R(tempbuff, oSrcSize, isw*3*sizeof(Npp8u), oSrcROI, tempbuff+ish*isw*3, osw*3*sizeof(Npp8u), oDstROI, nXFactor, nYFactor, nXShift, nYShift, eInterpolation);
#ifdef USE_DEBUG
err = cudaDeviceSynchronize();
if (err != cudaSuccess) std::cout << "RGB LANCZOS RESIZE CUDA error: " << cudaGetErrorString(err) << std::endl;
if (stat != NPP_SUCCESS) std::cout << "RGB LANCZOS RESIZE NPP error: " << (int)stat << std::endl;
#endif
if (stat != NPP_SUCCESS) return -2;
// convert resized RGB to YUV420
Npp8u *pDst[3] = { oy, ouv, ouv + osh*opitch/4 };
int rDstStep[3] = { opitch, opitch/2, opitch/2 };
oSizeROI.width = osw;
oSizeROI.height = osh;
stat = nppiRGBToYUV420_8u_C3P3R(tempbuff+ish*isw*3, osw*3*sizeof(Npp8u), pDst, rDstStep, oSizeROI);
#ifdef USE_DEBUG
err = cudaDeviceSynchronize();
if (err != cudaSuccess) std::cout << "RGB TO YUV420 CUDA error: " << cudaGetErrorString(err) << std::endl;
if (stat != NPP_SUCCESS) std::cout << "RGB TO YUV420 NPP error: " << (int)stat << std::endl;
#endif
if (stat != NPP_SUCCESS) return -3;
// pack uv
dim3 block(32, 8);
dim3 grid((osw+block.x-1)/block.x, (osh+block.y-1)/block.y);
pack_uv<<< grid, block >>>(ouv, ouv + osh*opitch/4, tempbuff, osw, osh/2, osw, osw/2, osw/2);
err = cudaGetLastError();
#ifdef USE_DEBUG
if (err != cudaSuccess) std::cout << "PACK UV LAUNCH CUDA error: " << cudaGetErrorString(err) << std::endl;
err = cudaDeviceSynchronize();
if (err != cudaSuccess) std::cout << "PACK UV EXEC CUDA error: " << cudaGetErrorString(err) << std::endl;
#endif
if (err != cudaSuccess) return -4;
// move packed uv to output
err = cudaMemcpy2D(ouv, opitch, tempbuff, osw*sizeof(Npp8u), osw*sizeof(Npp8u), osh/2, cudaMemcpyDeviceToDevice);
#ifdef USE_DEBUG
if (err != cudaSuccess) std::cout << "PACK UV COPY CUDA error: " << cudaGetErrorString(err) << std::endl;
#endif
if (err != cudaSuccess) return -5;
}
else{ // method 1
// NV12 to YUV420 planar
const Npp8u *const pSrc[2] = {iy, iuv};
Npp8u *pDst[3] = {tempbuff, tempbuff+isw*ish, tempbuff+isw*ish+(isw*ish)/4};
int aDstStep[3] = {isw, isw/2, isw/2};
NppiSize oSizeROI;
oSizeROI.width = isw;
oSizeROI.height = ish;
stat = nppiNV12ToYUV420_8u_P2P3R(pSrc, ipitch, pDst, aDstStep, oSizeROI);
#ifdef USE_DEBUG
err = cudaDeviceSynchronize();
if (err != cudaSuccess) std::cout << "NV12 TO YUV420 CUDA error: " << cudaGetErrorString(err) << std::endl;
if (stat != NPP_SUCCESS) std::cout << "NV12 TO YUV420 NPP error: " << (int)stat << std::endl;
#endif
if (stat != NPP_SUCCESS) return -6;
// resize each plane individually
NppiSize oSrcSize = oSizeROI;
NppiRect oSrcROI;
oSrcROI.x = 0;
oSrcROI.y = 0;
oSrcROI.width = isw;
oSrcROI.height = ish;
NppiRect oDstROI;
oDstROI.x = 0;
oDstROI.y = 0;
oDstROI.width = osw;
oDstROI.height = osh;
double nXFactor = osw/(double)isw;
double nYFactor = osh/(double)ish;
// resize Y
stat = nppiResizeSqrPixel_8u_C1R_Advanced(tempbuff, oSrcSize, isw, oSrcROI, oy, opitch, oDstROI, nXFactor, nYFactor, tempbuff+(ish*isw*3),NPPI_INTER_LANCZOS3_ADVANCED);
#ifdef USE_DEBUG
err = cudaDeviceSynchronize();
if (err != cudaSuccess) std::cout << "Y RESIZE CUDA error: " << cudaGetErrorString(err) << std::endl;
if (stat != NPP_SUCCESS) std::cout << "Y RESIZE NPP error: " << (int)stat << std::endl;
#endif
if (stat != NPP_SUCCESS) return -7;
// resize U
oSrcSize.width /= 2;
oSrcSize.height /= 2;
oSrcROI.width /= 2;
oSrcROI.height /= 2;
oDstROI.width /= 2;
oDstROI.height /= 2;
stat = nppiResizeSqrPixel_8u_C1R_Advanced(tempbuff+ish*isw, oSrcSize, isw/2, oSrcROI, tempbuff+(ish*isw*3), osw/2, oDstROI, nXFactor, nYFactor, tempbuff+(ish*isw*3) + (osh*osw*3),NPPI_INTER_LANCZOS3_ADVANCED);
#ifdef USE_DEBUG
err = cudaDeviceSynchronize();
if (err != cudaSuccess) std::cout << "U RESIZE CUDA error: " << cudaGetErrorString(err) << std::endl;
if (stat != NPP_SUCCESS) std::cout << "U RESIZE NPP error: " << (int)stat << std::endl;
#endif
if (stat != NPP_SUCCESS) return -8;
// resize V
stat = nppiResizeSqrPixel_8u_C1R_Advanced(tempbuff+ish*isw+(ish*isw/4), oSrcSize, isw/2, oSrcROI, tempbuff+(ish*isw*3)+(osh*osw/4), osw/2, oDstROI, nXFactor, nYFactor, tempbuff+(ish*isw*3) + (osh*osw*3),NPPI_INTER_LANCZOS3_ADVANCED);
#ifdef USE_DEBUG
err = cudaDeviceSynchronize();
if (err != cudaSuccess) std::cout << "V RESIZE CUDA error: " << cudaGetErrorString(err) << std::endl;
if (stat != NPP_SUCCESS) std::cout << "V RESIZE NPP error: " << (int)stat << std::endl;
#endif
if (stat != NPP_SUCCESS) return -9;
// pack_uv
dim3 block(32, 8);
dim3 grid((osw+block.x-1)/block.x, (osh+block.y-1)/block.y);
pack_uv<<< grid, block >>>(tempbuff+(ish*isw*3), tempbuff+(ish*isw*3)+(osh*osw/4), ouv, osw, osh/2, opitch, osw/2, osw/2);
err = cudaGetLastError();
#ifdef USE_DEBUG
if (err != cudaSuccess) std::cout << "PACK UV LAUNCH CUDA error: " << cudaGetErrorString(err) << std::endl;
err = cudaDeviceSynchronize();
if (err != cudaSuccess) std::cout << "PACK UV EXEC CUDA error: " << cudaGetErrorString(err) << std::endl;
#endif
if (err != cudaSuccess) return -10;
}
return 0;
}
#ifdef UNIT_TEST
// timing
#include <time.h>
#include <sys/time.h>
#define USECPSEC 1000000ULL
unsigned long long dtime_usec(unsigned long long start){
timeval tv;
gettimeofday(&tv, 0);
return ((tv.tv_sec*USECPSEC)+tv.tv_usec)-start;
}
// bitmap file handling
struct Info{
int width;
int height;
int offset;
unsigned char * info;
unsigned char * data;
int size;
};
#include <fstream>
Info readBMP(const char* filename)
{
int i;
std::ifstream is(filename, std::ifstream::binary);
is.seekg(0, is.end);
i = is.tellg();
is.seekg(0);
unsigned char *info = new unsigned char[i];
is.read((char *)info,i);
int width = *(int*)&info[18];
int height = *(int*)&info[22];
int offset = *(int*)&info[10];
Info dat;
dat.width = width;
dat.height = height;
dat.offset = offset;
dat.size = i;
dat.info = new unsigned char[offset - 1];
dat.data = new unsigned char[i - offset + 1];
if ((i-offset+1) < (3*height*width)) std::cout << "size: " << i-offset+1 << " expected: " << height*width*3 << std::endl;
std::copy(info,
info + offset,
dat.info);
std::copy(info + offset,
info + i,
dat.data);
delete[] info;
return dat;
}
void writeBMP(const char *filename, Info dat){
std::ofstream fout;
fout.open(filename, std::ios::binary | std::ios::out);
fout.write( reinterpret_cast<char *>(dat.info), dat.offset);
fout.write( reinterpret_cast<char *>(dat.data), dat.size - dat.offset );
fout.close();
}
int main(int argc, char *argv[]){
int eInterpolation = NPPI_INTER_LANCZOS;
if (argc > 1) eInterpolation = atoi(argv[1]);
else{
std::cout << "Must specify a valid interpolation mode:" << std::endl;
std::cout << NPPI_INTER_NN << " :NPPI_INTER_NN" << std::endl;
std::cout << NPPI_INTER_LINEAR << " :NPPI_INTER_LINEAR" << std::endl;
std::cout << NPPI_INTER_CUBIC << " :NPPI_INTER_CUBIC" << std::endl;
std::cout << NPPI_INTER_LANCZOS << " :NPPI_INTER_LANCZOS" << std::endl;
return 0;}
int method = 0;
if (argc > 2) method = atoi(argv[2]);
// input to NV12
Info rfile = readBMP("input.bmp");
const int H = rfile.height;
const int W = rfile.width;
std::cout << "Height = " << rfile.height << std::endl;
std::cout << "Width = " << rfile.width << std::endl;
Npp8u *rgbdata, *ty, *tu, *tv, *tuv;
cudaMalloc(&rgbdata, H*W*3);
cudaMalloc(&ty, H*W);
cudaMalloc(&tu, H*W/4);
cudaMalloc(&tv, H*W/4);
cudaMalloc(&tuv, H*W/2);
cudaMemcpy(rgbdata, rfile.data, H*W*3, cudaMemcpyHostToDevice);
Npp8u *pDst[3] = { ty, tu, tv};
int rDstStep[3] = { W, W/2, W/2 };
NppiSize oSizeROI;
oSizeROI.width = W;
oSizeROI.height = H;
NppStatus stat = nppiRGBToYUV420_8u_C3P3R(rgbdata, W*3*sizeof(Npp8u), pDst, rDstStep, oSizeROI);
if (stat != NPP_SUCCESS) { std::cout << "Input NPP error" << std::endl; return 0;}
dim3 block(32, 8);
dim3 grid((W+block.x-1)/block.x, (H+block.y-1)/block.y);
pack_uv<<< grid, block >>>(tu, tv, tuv, W, H/2, W, W/2, W/2);
// 1:1 test
int buff_size = rs(H, W, W, H, W, W, NULL, NULL, NULL, NULL, NULL);
unsigned char *tbuff;
cudaError_t err = cudaMalloc(&tbuff, buff_size);
if (err != cudaSuccess) {std::cout << "on temp buff allocation of size: " << buff_size << " error: " << (int)err << std::endl; return 0;}
unsigned char *oy, *ouv;
err = cudaMalloc(&oy, H*W*sizeof(unsigned char));
if (err != cudaSuccess) {std::cout << "on oy allocation of size: " << H*W*sizeof(unsigned char) << " error: " << (int)err << std::endl; return 0;}
err = cudaMalloc(&ouv, H*W*sizeof(unsigned char)/2);
if (err != cudaSuccess) {std::cout << "on ouv allocation of size: " << H*W*sizeof(unsigned char) << " error: " << (int)err << std::endl; return 0;}
int error = rs(H, W, W, H, W, W, ty, tuv, oy, ouv, tbuff, method, eInterpolation);
if (error != 0) std::cout << "Function Failure: " << error << std::endl;
// output to RGB
const Npp8u *pSrc[2] = {ty, tuv};
oSizeROI.width = W;
oSizeROI.height = H;
#ifdef USE_709
stat = nppiNV12ToRGB_709HDTV_8u_P2C3R(pSrc, W, rgbdata, W*3*sizeof(Npp8u), oSizeROI);
#else
stat = nppiNV12ToRGB_8u_P2C3R(pSrc, W, rgbdata, W*3*sizeof(Npp8u), oSizeROI);
#endif
if (stat != NPP_SUCCESS) { std::cout << "Output NPP error" << std::endl; return 0;}
cudaMemcpy(rfile.data, rgbdata, H*W*3, cudaMemcpyDeviceToHost);
writeBMP("output.bmp", rfile);
// 2x upscale test
cudaFree(tbuff);
buff_size = rs(H, W, W, 2*H, 2*W, 2*W, NULL, NULL, NULL, NULL, NULL);
err = cudaMalloc(&tbuff, buff_size);
if (err != cudaSuccess) {std::cout << "on temp buff allocation of size: " << buff_size << " error: " << (int)err << std::endl; return 0;}
cudaFree(oy);
cudaFree(ouv);
err = cudaMalloc(&oy, 4*H*W*sizeof(unsigned char));
if (err != cudaSuccess) {std::cout << "on oy allocation of size: " << H*W*sizeof(unsigned char) << " error: " << (int)err << std::endl; return 0;}
err = cudaMalloc(&ouv, 2*H*W*sizeof(unsigned char));
if (err != cudaSuccess) {std::cout << "on ouv allocation of size: " << H*W*sizeof(unsigned char) << " error: " << (int)err << std::endl; return 0;}
unsigned long long dt = dtime_usec(0);
error = rs(H, W, W, 2*H, 2*W, 2*W, ty, tuv, oy, ouv, tbuff, method, eInterpolation);
cudaDeviceSynchronize();
dt = dtime_usec(dt);
if (error != 0) std::cout << "Function Failure: " << error << std::endl;
std::cout << "2x resize time: " << dt/(float)USECPSEC << "s" << std::endl;
// output to RGB
const Npp8u *pSrc2[2] = {oy, ouv};
oSizeROI.width = 2*W;
oSizeROI.height = 2*H;
cudaFree(rgbdata);
cudaMalloc(&rgbdata, H*W*12);
#ifdef USE_709
stat = nppiNV12ToRGB_709HDTV_8u_P2C3R(pSrc2, 2*W, rgbdata, W*6*sizeof(Npp8u), oSizeROI);
#else
stat = nppiNV12ToRGB_8u_P2C3R(pSrc2, 2*W, rgbdata, W*6*sizeof(Npp8u), oSizeROI);
#endif
if (stat != NPP_SUCCESS) { std::cout << "Output NPP error" << std::endl; return 0;}
delete[] rfile.data;
rfile.data = new unsigned char[H*W*12];
cudaMemcpy(rfile.data, rgbdata, H*W*12, cudaMemcpyDeviceToHost);
int osize = rfile.size - rfile.offset;
int nsizeinc = H*W*12 - osize;
rfile.size += nsizeinc;
*((int*)(rfile.info+18)) = 2*W;
*((int*)(rfile.info+22)) = 2*H;
writeBMP("output2.bmp", rfile);
return 0;
}
#endif
The above code does the following steps:
read in a .bmp file from disk into RGB storage
convert to YUV420
convert to NV12
resize the NV12 image (there are multiple steps here)
convert the resized NV12 image to RGB
write RGB image as a .bmp file

the problem went away when I changed the video card. GeForce 740 to 1080

NppiSize oSizeROI;
oSizeROI.width = frame_dec->width;
oSizeROI.height = frame_dec->height;
DBUG_PRINT("width: %d, height: %d, stepBytes: %d\n", oSizeROI.width, oSizeROI.height, stepBytes);
// NppStatus stat = nppiNV12ToBGR_8u_P2C3R(frame_dec->data, frame_dec->width, bgrData, frame_dec->width*3*sizeof(Npp8u), oSizeROI);
NppStatus stat;
#ifdef USE_709
stat = nppiNV12ToBGR_8u_P2C3R(frame_dec->data, frame_dec->linesize[0], bgrData, frame_dec->width*3, oSizeROI);
#else
stat = nppiNV12ToBGR_709HDTV_8u_P2C3R(frame_dec->data, frame_dec->linesize[0], bgrData, frame_dec->width * 3, oSizeROI);
#endif
unsigned char *data = (unsigned char *)malloc(frame_dec->width * frame_dec->height * 3);
cudaMemcpy(data, bgrData, frame_dec->height * frame_dec->width * 3, cudaMemcpyDeviceToHost);
cv::Mat mat_test(frame_dec->height, frame_dec->width, CV_8UC3, data);
imwrite("test.jpg", mat_test);
free(data);
nppiFree(bgrData);
exit(0);
frame_dec decoded by ffmpeg cuda decoder

libusb failing at getting string descriptor on some devices

I am trying to get information about all attached devices.
This is my code:
libusb_device_descriptor desc;
libusb_config_descriptor *conDesc;
char szBuffer[256] = { 0 };
unsigned char strDesc[256];
libusb_device_handle *devHandle = NULL;
int retVal = 0;
__int64 i64Temp;
DWORD dwProdId;
DWORD dwProdId1;
i64Temp = 13888;
dwProdId = (DWORD)i64Temp;
retVal = libusb_open(dev, &devHandle);
int r = libusb_get_device_descriptor(dev, &desc);
if (r < 0)
{
cout << "failed to get device descriptor" << endl;
return;
}
r = libusb_get_config_descriptor(dev, 0, &conDesc);
printf("Interface Class = %d\n", conDesc->interface->altsetting->bInterfaceClass);
cout << "Number of possible configurations: " << (int)desc.bNumConfigurations << "" << endl;
cout << "Device Class: " << desc.bDeviceClass << endl;
cout << "Device Class: " << desc.bDeviceSubClass << endl;
printf("Class = %d\n", desc.bDeviceClass);
cout << "VendorID: " << desc.idVendor << endl;
cout << "ProductID: " << desc.idProduct << endl;
if (retVal == LIBUSB_SUCCESS)
{
retVal = libusb_get_string_descriptor_ascii(devHandle, desc.iManufacturer, strDesc, 256);
printf("Manufacturer: %s\n", strDesc);
retVal = libusb_get_string_descriptor_ascii(devHandle, desc.iSerialNumber, strDesc, 256);
printf("SerialNumber: %s\n", strDesc);
retVal = libusb_get_string_descriptor_ascii(devHandle, desc.iProduct, strDesc, 256);
printf("Product: %s\n", strDesc);
printf("\n\n");
}
else if (retVal != LIBUSB_SUCCESS)
{
printf("retVal failed");
printf("\n\n");
}
My code knows that there are 8 devices connected but it's getting manufacturer or serial number only on two of them. I would like to get this information for all of the attached devices.

Binaural beats in C++

I am trying to build a brain device using EEG input and outputting light pulses and binaural beats in "close to real-time" on a Raspberry PI. The light-output is no problem using WiringPi, but any audio output seems to be a major hurdle. The math of calculating a sinewave for a buffer is straightforward, but playing two frequencies on two channels via any standard libraries seems to be a very complicated process, and I can't come up with any relevant examples. I have successfully opened and closed an ALSA device thanks to this tutorial which complicates my fairly simple code tremendously but appears necessary for ALSA. I would be incredibly grateful if someone could show me the easiest method for playing two different calculated tones on left and right channels. The code below is the simplest ALSA playback example I could find.
#include <alsa/asoundlib.h>
#include <iostream>
using namespace std;
// Globals are generally a bad idea in code. We're using one here to keep it simple.
snd_pcm_t * _soundDevice;
bool Init(const char *name)
{
int i;
int err;
snd_pcm_hw_params_t *hw_params;
if( name == NULL )
{
// Try to open the default device
err = snd_pcm_open( &_soundDevice, "plughw:0,0", SND_PCM_STREAM_PLAYBACK, 0 );
}
else
{
// Open the device we were told to open.
err = snd_pcm_open (&_soundDevice, name, SND_PCM_STREAM_PLAYBACK, 0);
}
// Check for error on open.
if( err < 0 )
{
cout << "Init: cannot open audio device " << name << " (" << snd_strerror (err) << ")" << endl;
return false;
}
else
{
cout << "Audio device opened successfully." << endl;
}
// Allocate the hardware parameter structure.
if ((err = snd_pcm_hw_params_malloc (&hw_params)) < 0)
{
cout << "Init: cannot allocate hardware parameter structure (" << snd_strerror (err) << ")" << endl;
return false;
}
if ((err = snd_pcm_hw_params_any (_soundDevice, hw_params)) < 0)
{
cout << "Init: cannot initialize hardware parameter structure (" << snd_strerror (err) << ")" << endl;
return false;
}
// Enable resampling.
unsigned int resample = 1;
err = snd_pcm_hw_params_set_rate_resample(_soundDevice, hw_params, resample);
if (err < 0)
{
cout << "Init: Resampling setup failed for playback: " << snd_strerror(err) << endl;
return err;
}
// Set access to RW interleaved.
if ((err = snd_pcm_hw_params_set_access (_soundDevice, hw_params, SND_PCM_ACCESS_RW_INTERLEAVED)) < 0)
{
cout << "Init: cannot set access type (" << snd_strerror (err) << ")" << endl;
return false;
}
if ((err = snd_pcm_hw_params_set_format (_soundDevice, hw_params, SND_PCM_FORMAT_S16_LE)) < 0)
{
cout << "Init: cannot set sample format (" << snd_strerror (err) << ")" << endl;
return false;
}
// Set channels to stereo (2).
if ((err = snd_pcm_hw_params_set_channels (_soundDevice, hw_params, 2)) < 0)
{
cout << "Init: cannot set channel count (" << snd_strerror (err) << ")" << endl;
return false;
}
// Set sample rate.
unsigned int actualRate = 44100;
if ((err = snd_pcm_hw_params_set_rate_near (_soundDevice, hw_params, &actualRate, 0)) < 0)
{
cout << "Init: cannot set sample rate to 44100. (" << snd_strerror (err) << ")" << endl;
return false;
}
if( actualRate < 44100 )
{
cout << "Init: sample rate does not match requested rate. (" << "44100 requested, " << actualRate << " acquired)" << endl;
}
// Apply the hardware parameters that we've set.
if ((err = snd_pcm_hw_params (_soundDevice, hw_params)) < 0)
{
cout << "Init: cannot set parameters (" << snd_strerror (err) << ")" << endl;
return false;
}
else
{
cout << "Audio device parameters have been set successfully." << endl;
}
// Get the buffer size.
snd_pcm_uframes_t bufferSize;
snd_pcm_hw_params_get_buffer_size( hw_params, &bufferSize );
// If we were going to do more with our sound device we would want to store
// the buffer size so we know how much data we will need to fill it with.
cout << "Init: Buffer size = " << bufferSize << " frames." << endl;
// Display the bit size of samples.
cout << "Init: Significant bits for linear samples = " << snd_pcm_hw_params_get_sbits(hw_params) << endl;
// Free the hardware parameters now that we're done with them.
snd_pcm_hw_params_free (hw_params);
// Prepare interface for use.
if ((err = snd_pcm_prepare (_soundDevice)) < 0)
{
cout << "Init: cannot prepare audio interface for use (" << snd_strerror (err) << ")" << endl;
return false;
}
else
{
cout << "Audio device has been prepared for use." << endl;
}
return true;
}
bool UnInit()
{
snd_pcm_close (_soundDevice);
cout << "Audio device has been uninitialized." << endl;
return true;
}
int main( char *argc, int argv )
{
Init(NULL);
UnInit();
return 0;
}

Use some up-to-date example, like this:
#include <stdio.h>
#include <stdlib.h>
#include <alsa/asoundlib.h>
static const char *device = "default";
unsigned short buffer[2 * 24000];
int main(void)
{
int err;
snd_pcm_t *handle;
if ((err = snd_pcm_open(&handle, device, SND_PCM_STREAM_PLAYBACK, 0)) < 0) {
printf("open error: %s\n", snd_strerror(err));
exit(EXIT_FAILURE);
}
if ((err = snd_pcm_set_params(handle,
SND_PCM_FORMAT_S16,
SND_PCM_ACCESS_RW_INTERLEAVED,
2, /* channels */
48000, /* rate */
1,
500000)) < 0) { /* buffer: 0.5 sec */
printf("open error: %s\n", snd_strerror(err));
exit(EXIT_FAILURE);
}
for (;;) {
for (int i = 0; i < 24000; i++) {
buffer[2 * i + 0] = 32767 * sin(...); /* left channel */
buffer[2 * i + 1] = 32767 * sin(...); /* right channel */
}
snd_pcm_sframes_t frames = snd_pcm_writei(handle, buffer, 24000);
if (frames < 0)
frames = snd_pcm_recover(handle, frames, 0);
if (frames < 0) {
printf("snd_pcm_writei failed: %s\n", snd_strerror(err));
break;
}
}
snd_pcm_close(handle);
return 0;
}

Draw Mandelbrot using OpenCl

I want to write a program to draw a Mandelbrot set to an image.
I'm using OpenCl and cl.hpp - wrapper to c++, but I don't know why this isn't working.
I try draw image with different width and height, but mostly I get white image with random colored pixel (when in kernel I try write to output hardcoded values) or black image.
I also wondering about passing image2d_t to the kernel and "write" pixels directly to the image but when I execute my kernel I get -52 error that I set invalid arguments..
Declaration of my kernel(using image2d_t) looks like this:
__kernel syf(__write_only image2d_t img)
and I set arguments like this:
cl_mem image= clCreateImage2D(context, CL_MEM_WRITE_ONLY, &outputFormat, ImageWidth, ImageHeight, 0, 0, &err);
clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&image)
Could you look at the code and help me?
#define __CL_ENABLE_EXCEPTIONS
//#define __NO_STD_VECTOR
#include <ctime>
#include <fstream>
#include <iostream>
#include <exception>
#include "D:/Users/cuda/cuda/bmp.cpp"
#if defined (__APPLE__) || defined (MACOSX)
#include <OpenCl/cl.cpp>
#else
#include <CL/OpenCL.h>
#include <CL/cl.hpp>
#endif
const int N = 1024 * 1024 * 3;
const char * kernelSource =
"__kernel void prepareVector(__global char *output)"
"{"
" size_t xDimension = get_global_id(0); "
" size_t yDimension = get_global_id(1); "
" "
" size_t currentIndex = 3 * 1024.0f*yDimension + 3 * xDimension; "
" float xOriginal = 3.25f*((float) xDimension / 1024.0f) - 2.0f; "
" float yOriginal = 2.5f*((float) yDimension / 1024.0f) - 1.25f;"
" "
" int iteration = 0; "
" int maxIteration = 256; "
" float temp; "
" float x = 0.0f; "
" float y = 0.0f; "
" while (x*x + y*y <= 4.0f && iteration < maxIteration)"
" { "
" temp = x*x - y*y + xOriginal; "
" y = 2.0*x*y + yOriginal; "
" x = temp; "
" ++iteration; "
" } "
" "
" if (iteration == maxIteration) "
" { "
" iteration = 0; "
" } "
" output[currentIndex] = iteration; "
" output[currentIndex] = iteration;"
" output[currentIndex] = iteration;"
"}";
std::vector<cl::Platform> platforms;
std::vector<cl::Device> devices;
std::vector<cl::Kernel> allKernels;
cl::Program program;
cl_int cli_err_msg;
char *host_out_c;
int main(int argc, char* argv [])
{
try
{
size_t dimensionSize = 2;
cl::NDRange *globalSize = new cl::NDRange( 6, 8 );
cl::NDRange *localSize = new cl::NDRange( 3, 4 );
cl::NDRange *offsetSize = new cl::NDRange( 0, 0 );
host_out_c = new char[N];
cl::Platform::get(&platforms);
platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices);
cl_context_properties properties [] =
{
CL_CONTEXT_PLATFORM, (cl_context_properties) (platforms[0])(), 0
};
//cl_context context = clCreateContext(0, devices.size(), devices, NULL, NULL, NULL);
cl::Context context(CL_DEVICE_TYPE_ALL, properties);
std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
cl::Program::Sources source(1, std::make_pair(kernelSource, strlen(kernelSource)));
program = cl::Program(context, source);
program.build(devices);
cl::Kernel kernel(program, "prepareVector", &cli_err_msg);
cl::Buffer device_out_c(context, CL_MEM_WRITE_ONLY, N*sizeof(char));
cl::Event event;
cl::CommandQueue queue(context, devices[0], 0, &cli_err_msg);
kernel.setArg(0, device_out_c);
queue.enqueueNDRangeKernel(kernel, *offsetSize, *globalSize, *localSize, NULL, &event);
queue.enqueueReadBuffer(device_out_c, true, 0, N*sizeof(char), (void*) host_out_c);
//printArray("kernel output:\n", host_out_c);
write_bmp("lel.bmp", 1024, 1024, host_out_c);
}
catch (cl::Error e)
{
std::cout << "Status: "<< program.getBuildInfo<CL_PROGRAM_BUILD_STATUS>(devices[0]) << std::endl;
std::cout << "Options: "<< program.getBuildInfo<CL_PROGRAM_BUILD_OPTIONS>(devices[0]) << std::endl;
std::cout << "Log: "<< program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[0]) << std::endl;
std::cout << e.what() << ": Error code: " << e.err() << std::endl;
}
return 0;
}

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

c++ opencl return CL_OUT_OF_RESOURCES - c++

On NVIDIA hardware if you write outside of a buffer or image (which is an undefined operation) then CL_OUT_OF_RESOURCES is a common error to get. It's better than the early hardware or drivers that simply crashed! Double-check your writes.

It seems that the combination of image_channel_data_type and image_channel_order is not correct. Could be your problem related with this? Please, take a look here: https://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/cl_image_format.html Regards

Related

audio do not stop recording after pause ffmpeg c++

Convert NV12 to BGR by NVIDIA Performance Primitives

libusb failing at getting string descriptor on some devices

Binaural beats in C++

Draw Mandelbrot using OpenCl

Categories

Resources