I have 1d array (size = 4 * width * height + 1) of pixels of RGBA png image. I want to rotate image by X degrees clockwise. I already know how to do it for 90 degrees, but I guess I have some problem with trigonometry.
Here's the code:
std::pair<int, int> move(int x, int y, double rad) {
return {x * cos(rad) - y * sin(rad), x * cos(rad) + y * sin(rad)};
}
void turn(int deg) {
if (deg < 0) {
deg = 360 + deg;
}
double rad = deg * (M_PI / (double)180);
unsigned int oldWidth = width;
width = lround(sqrt(height * height + width * width));
height = lround(sqrt(height * height + oldWidth * oldWidth));
std::vector<unsigned char> output(rawPixels.size());
for (int X = 0; X < width; ++X) {
for (int Y = 0; Y < height; ++Y) {
for (int chan = 0; chan < CHANNELS_COUNT; ++chan) {
std::pair<int, int> xy = move(X, Y, rad);
output[(X * height + Y) * CHANNELS_COUNT + chan] = rawPixels[
((height - 1 - xy.second) * width + xy.first) * CHANNELS_COUNT + chan];
}
}
}
rawPixels = output;
}
It's ok to use addition arrays, but I don't want to use OpenCV or any other libraries.
I'm trying to use GPU Delegate in Tensorflow Lite on iOS. My model has inputs and outputs as OpenCV BGR image ([258, 540, 3]). How can I set inputs and outputs in C++ tensorflow lite interpreter? I tried to use this code
int input = interpreter->inputs()[0];
float* out = interpreter->typed_tensor<float>(input);
NSData* slicedData = [self inputDataFromCvMat:slicedImage];
uint8_t* in = (uint8_t*) slicedData.bytes;
ProcessInputWithFloatModel(in, out, WIDTH, HEIGHT, CHANNELS);
void ProcessInputWithFloatModel(uint8_t* input, float* buffer, int image_width, int image_height, int image_channels) {
for (int y = 0; y < wanted_input_height; ++y) {
float* out_row = buffer + (y * wanted_input_width * wanted_input_channels);
for (int x = 0; x < wanted_input_width; ++x) {
const int in_x = (y * image_width) / wanted_input_width;
const int in_y = (x * image_height) / wanted_input_height;
uint8_t* input_pixel =
input + (in_y * image_width * image_channels) + (in_x * image_channels);
float* out_pixel = out_row + (x * wanted_input_channels);
for (int c = 0; c < wanted_input_channels; ++c) {
out_pixel[c] = (input_pixel[c] - input_mean) / input_std;
}
}
}
}
- (NSData *)inputDataFromCvMat:(Mat)image {
NSMutableData *inputData = [[NSMutableData alloc] initWithCapacity:0];
for (int row = 0; row < HEIGHT + 10; row++) {
for (int col = 0; col < WIDTH + 10; col++) {
Vec3b intensity = image.at<Vec3b>(row, col);
int blue = intensity.val[0];
int green = intensity.val[1];
int red = intensity.val[2];
// we need to put pixel values in BGR (model was trained with opencv)
[inputData appendBytes:&blue length:sizeof(blue)];
[inputData appendBytes:&green length:sizeof(green)];
[inputData appendBytes:&red length:sizeof(red)];
}
}
return inputData;
}
but I don't know what is wrong
After some research, I managed to get it working
const int wanted_input_width = 258;
const int wanted_input_height = 540;
const int wanted_input_channels = 3;
Mat image = ...
// write to input
int input = interpreter->inputs()[0];
float* out = interpreter->typed_tensor<float>(input);
uint8_t* in = image.ptr<uint8_t>(0);
ProcessInputWithFloatModel(in, out);
// run interpreter
if (interpreter->Invoke() != kTfLiteOk) {
LOG(FATAL) << "Failed to invoke!";
}
// get output
int output_idx = interpreter->outputs()[0];
float* output = interpreter->typed_output_tensor<float>(output_idx);
Mat outputMat = ProcessOutputWithFloatModel(output);
/// Preprocess the input image and feed the TFLite interpreter buffer for a float model.
void ProcessInputWithFloatModel(uint8_t* input, float* buffer) {
for (int y = 0; y < wanted_input_height; ++y) {
float* out_row = buffer + (y * wanted_input_width * wanted_input_channels);
for (int x = 0; x < wanted_input_width; ++x) {
uint8_t* input_pixel = input + (y * wanted_input_width * wanted_input_channels) + (x * wanted_input_channels);
float* out_pixel = out_row + (x * wanted_input_channels);
for (int c = 0; c < wanted_input_channels; ++c) {
out_pixel[c] = input_pixel[c] / 255.0f;
}
}
}
}
Mat ProcessOutputWithFloatModel(float* input) {
cv::Mat image = cv::Mat::zeros(wanted_input_height, wanted_input_width, CV_8UC3);
for (int y = 0; y < wanted_input_height; ++y) {
for (int x = 0; x < wanted_input_width; ++x) {
float* input_pixel = input + (y * wanted_input_width * wanted_input_channels) + (x * wanted_input_channels);
cv::Vec3b & color = image.at<cv::Vec3b>(cv::Point(x, y));
color[0] = (uchar) floor(input_pixel[0] * 255.0f);
color[1] = (uchar) floor(input_pixel[1] * 255.0f);
color[2] = (uchar) floor(input_pixel[2] * 255.0f);
}
}
return image;
}
Is there a way to scale down with highest quality a font which is png image in opengl at startup? I tried gluScaleImage but there are many artefacts. Is there anything that uses lanczos or something like that? I don't want to write a shader or anything that does the scaling runtime.
This is based on an algorithm, I copied decades ago from the German c't Magazin, and still use it from time to time for similar issues like described by OP.
bool scaleDown(
const Image &imgSrc,
Image &imgDst,
int w, int h,
int align)
{
const int wSrc = imgSrc.w(), hSrc = imgSrc.h();
assert(w > 0 && w <= wSrc && h > 0 && h <= hSrc);
// compute scaling factors
const double sx = (double)wSrc / (double)w;
const double sy = (double)hSrc / (double)h;
const double sxy = sx * sy;
// prepare destination image
imgDst.resize(w, h, (w * 3 + align - 1) / align * align);
// cache some data
const uint8 *const dataSrc = imgSrc.data();
const int bPRSrc = imgSrc.bPR();
// perform scaling
for (int y = 0; y < h; ++y) {
const double yStart = sy * y;
const double yEnd = std::min(sy * (y + 1), (double)hSrc);
const int yStartInt = (int)yStart;
const int yEndInt = (int)yEnd - (yEndInt == yEnd);
const double tFrm = 1 + yStartInt - yStart, bFrm = yEnd - yEndInt;
for (int x = 0; x < w; ++x) {
const double xStart = sx * x;
const double xEnd = std::min(sx * (x + 1), (double)wSrc);
const int xStartInt = (int)xStart;
const int xEndInt = (int)xEnd - (xEndInt == xEnd);
double lFrm = 1 + xStartInt - xStart, rFrm = xEnd - xEndInt;
double pixel[3] = { 0.0, 0.0, 0.0 }; // values of target pixel
for (int i = yStartInt; i <= yEndInt; ++i) {
int jData = i * bPRSrc + xStartInt * 3;
for (int j = xStartInt; j <= xEndInt; ++j) {
double pixelAdd[3];
for (int k = 0; k < 3; ++k) {
pixelAdd[k] = (double)dataSrc[jData++] / sxy;
}
if (j == xStartInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= lFrm;
} else if (j == xEndInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= rFrm;
}
if (i == yStartInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= tFrm;
} else if (i == yEndInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= bFrm;
}
for (int k = 0; k < 3; ++k) pixel[k] += pixelAdd[k];
}
}
imgDst.setPixel(x, y,
(uint8)pixel[0], (uint8)pixel[1], (uint8)pixel[2]);
}
}
// done
return true;
}
If I got it right, this implements a bilinear interpolation.
I don't dare to call it a Minimal Complete Verifiable Example although this is what I intended to do.
The complete sample application:
A simplified class Image
image.h:
#ifndef IMAGE_H
#define IMAGE_H
#include <vector>
// convenience type for bytes
typedef unsigned char uint8;
// image helper class
class Image {
private: // variables:
int _w, _h; // image size
size_t _bPR; // bytes per row
std::vector<uint8> _data; // image data
public: // methods:
// constructor.
Image(): _w(0), _h(0), _bPR(0) { }
// destructor.
~Image() = default;
// copy constructor.
Image(const Image&) = delete; // = default; would work as well.
// copy assignment.
Image& operator=(const Image&) = delete; // = default; would work as well.
// returns width of image.
int w() const { return _w; }
// returns height of image.
int h() const { return _h; }
// returns bytes per row.
size_t bPR() const { return _bPR; }
// returns pointer to image data.
const uint8* data(
int y = 0) // row number
const {
return &_data[y * _bPR];
}
// returns data size (in bytes).
size_t size() const { return _data.size(); }
// clears image.
void clear();
// resizes image.
uint8* resize( // returns allocated buffer
int w, // image width
int h, // image height
int bPR); // bytes per row
// returns pixel.
int getPixel(
int x, // column
int y) // row
const;
// sets pixel.
void setPixel(
int x, // column
int y, // row
uint8 r, uint8 g, uint8 b);
// sets pixel.
void setPixel(
int x, // column
int y, // row
int value) // RGB value
{
setPixel(x, y, value & 0xff, value >> 8 & 0xff, value >> 16 & 0xff);
}
};
// helper functions:
inline uint8 getR(int value) { return value & 0xff; }
inline uint8 getG(int value) { return value >> 8 & 0xff; }
inline uint8 getB(int value) { return value >> 16 & 0xff; }
#endif // IMAGE_H
image.cc:
#include <cassert>
#include "image.h"
// clears image.
void Image::clear()
{
_data.clear(); _w = _h = _bPR = 0;
}
// allocates image data.
uint8* Image::resize( // returns allocated buffer
int w, // image width
int h, // image height
int bPR) // bits per row
{
assert(w >= 0 && 3 * w <= bPR);
assert(h >= 0);
_w = w; _h = h; _bPR = bPR;
const size_t size = h * bPR;
_data.resize(size);
return _data.data();
}
// returns pixel.
int Image::getPixel(
int x, // column
int y) // row
const {
assert(x >= 0 && x < _w);
assert(y >= 0 && y < _h);
const size_t offs = y * _bPR + 3 * x;
return _data[offs + 0]
| _data[offs + 1] << 8
| _data[offs + 2] << 16;
}
// sets pixel.
void Image::setPixel(
int x, // column
int y, // row
uint8 r, uint8 g, uint8 b) // R, G, B values
{
assert(x >= 0 && x < _w);
assert(y >= 0 && y < _h);
const size_t offs = y * _bPR + 3 * x;
_data[offs + 0] = r;
_data[offs + 1] = g;
_data[offs + 2] = b;
}
Image Scaling
imageScale.h:
#ifndef IMAGE_SCALE_H
#define IMAGE_SCALE_H
#include "image.h"
/* scales an image to a certain width and height.
*
* Note:
* imgSrc and imgDst may not be identical.
*/
bool scaleTo( // returns true if successful
const Image &imgSrc, // source image
Image &imgDst, // destination image
int w, int h, // destination width and height
int align = 4); // row alignment
/* scales an image about a certain horizontal/vertical scaling factor.
*
* Note:
* imgSrc and imgDst may not be identical.
*/
inline bool scaleXY( // returns true if successful
const Image &imgSrc, // source image
Image &imgDst, // destination image
double sX, // horizontal scaling factor (must be > 0 but not too large)
double sY, // vertical scaling factor (must be > 0 but not too large)
int align = 4) // row alignment
{
return sX > 0.0 && sY > 0.0
? scaleTo(imgSrc, imgDst,
(int)(sX * imgSrc.w()), (int)(sY * imgSrc.h()), align)
: false;
}
/* scales an image about a certain scaling factor.
*
* Note:
* imgSrc and imgDst may not be identical.
*/
inline bool scale( // returns true if successful
const Image &imgSrc, // source image
Image &imgDst, // destination image
double s, // scaling factor (must be > 0 but not too large)
int align = 4) // row alignment
{
return scaleXY(imgSrc, imgDst, s, s, align);
}
#endif // IMAGE_SCALE_H
imageScale.cc:
#include <cassert>
#include <algorithm>
#include "imageScale.h"
namespace {
template <typename VALUE>
VALUE clip(VALUE value, VALUE min, VALUE max)
{
return value < min ? min : value > max ? max : value;
}
bool scaleDown(
const Image &imgSrc,
Image &imgDst,
int w, int h,
int align)
{
const int wSrc = imgSrc.w(), hSrc = imgSrc.h();
assert(w > 0 && w <= wSrc && h > 0 && h <= hSrc);
// compute scaling factors
const double sx = (double)wSrc / (double)w;
const double sy = (double)hSrc / (double)h;
const double sxy = sx * sy;
// prepare destination image
imgDst.resize(w, h, (w * 3 + align - 1) / align * align);
// cache some data
const uint8 *const dataSrc = imgSrc.data();
const int bPRSrc = imgSrc.bPR();
// perform scaling
for (int y = 0; y < h; ++y) {
const double yStart = sy * y;
const double yEnd = std::min(sy * (y + 1), (double)hSrc);
const int yStartInt = (int)yStart;
const int yEndInt = (int)yEnd - (yEndInt == yEnd);
const double tFrm = 1 + yStartInt - yStart, bFrm = yEnd - yEndInt;
for (int x = 0; x < w; ++x) {
const double xStart = sx * x;
const double xEnd = std::min(sx * (x + 1), (double)wSrc);
const int xStartInt = (int)xStart;
const int xEndInt = (int)xEnd - (xEndInt == xEnd);
double lFrm = 1 + xStartInt - xStart, rFrm = xEnd - xEndInt;
double pixel[3] = { 0.0, 0.0, 0.0 }; // values of target pixel
for (int i = yStartInt; i <= yEndInt; ++i) {
int jData = i * bPRSrc + xStartInt * 3;
for (int j = xStartInt; j <= xEndInt; ++j) {
double pixelAdd[3];
for (int k = 0; k < 3; ++k) {
pixelAdd[k] = (double)dataSrc[jData++] / sxy;
}
if (j == xStartInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= lFrm;
} else if (j == xEndInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= rFrm;
}
if (i == yStartInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= tFrm;
} else if (i == yEndInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= bFrm;
}
for (int k = 0; k < 3; ++k) pixel[k] += pixelAdd[k];
}
}
imgDst.setPixel(x, y,
(uint8)pixel[0], (uint8)pixel[1], (uint8)pixel[2]);
}
}
// done
return true;
}
bool scaleUp(
const Image &imgSrc,
Image &imgDst,
int w, int h,
int align)
{
const int wSrc = imgSrc.w(), hSrc = imgSrc.h();
assert(w && w >= wSrc && h && h >= hSrc);
// compute scaling factors
const double sx = (double)wSrc / (double)w;
const double sy = (double)hSrc / (double)h;
// prepare destination image
imgDst.resize(w, h, (w * 3 + align - 1) / align * align);
// cache some data
const uint8 *const dataSrc = imgSrc.data();
const int bPRSrc = imgSrc.bPR();
// perform scaling
for (int y = 0; y < h; ++y) {
const double yStart = sy * y;
const double yEnd = std::min(sy * (y + 1), (double)hSrc - 1);
const int yStartInt = (int)yStart;
const int yEndInt = (int)yEnd;
if (yStartInt < yEndInt) {
const double bFract = clip((double)((yEnd - yEndInt) / sy), 0.0, 1.0);
const double tFract = 1.0 - bFract;
for (int x = 0; x < w; ++x) {
const double xStart = sx * x;
const double xEnd = std::min(sx * (x + 1), (double)wSrc - 1);
const int xStartInt = (int)xStart, xEndInt = (int)xEnd;
double pixel[4];
if (xStartInt < xEndInt) {
const double rFract
= clip((double)((xEnd - xEndInt) / sx), 0.0, 1.0);
const double lFract = 1.0 - rFract;
int jData = yStartInt * bPRSrc + xStartInt * 3;
for (int k = 0; k < 3; ++k) {
pixel[k] = tFract * lFract * dataSrc[jData++];
}
for (int k = 0; k < 3; ++k) {
pixel[k] += tFract * rFract * dataSrc[jData++];
}
jData = yEndInt * bPRSrc + xStartInt * 3;
for (int k = 0; k < 3; ++k) {
pixel[k] += bFract * lFract *dataSrc[jData++];
}
for (int k = 0; k < 3; ++k) {
pixel[k] += bFract * rFract *dataSrc[jData++];
}
} else {
int jData = yStartInt * bPRSrc + xStartInt * 3;
for (int k = 0; k < 3; ++k) {
pixel[k] = tFract * dataSrc[jData++];
}
jData = yEndInt * bPRSrc + xStartInt * 3;
for (int k = 0; k < 3; ++k) {
pixel[k] += bFract * dataSrc[jData++];
}
}
imgDst.setPixel(x, y,
(uint8)pixel[0], (uint8)pixel[1], (uint8)pixel[2]);
}
} else {
for (int x = 0; x < w; ++x) {
const double xStart = sx * x;
const double xEnd = std::min(sx * (x + 1), (double)wSrc - 1);
const int xStartInt = (int)xStart, xEndInt = (int)xEnd;
double pixel[3];
if (xStartInt < xEndInt) {
const double rFract
= clip((double)((xEnd - xEndInt) / sx), 0.0, 1.0);
const double lFract = 1.0 - rFract;
int jData = yStartInt * bPRSrc + xStartInt * 3;
for (int k = 0; k < 3; ++k) {
pixel[k] = lFract * dataSrc[jData++];
}
for (int k = 0; k < 3; ++k) {
pixel[k] += rFract * dataSrc[jData++];
}
} else {
int jData = yStartInt * bPRSrc + xStartInt * 3;
for (int k = 0; k < 3; ++k) pixel[k] = dataSrc[jData++];
}
imgDst.setPixel(x, y,
(uint8)pixel[0], (uint8)pixel[1], (uint8)pixel[2]);
}
}
}
// done
return true;
}
} // namespace
bool scaleTo(const Image &imgSrc, Image &imgDst, int w, int h, int align)
{
Image imgTmp;
return w <= 0 || h <= 0 ? false
: w >= imgSrc.w() && h >= imgSrc.h()
? scaleUp(imgSrc, imgDst, w, h, align)
: w <= imgSrc.w() && h <= imgSrc.h()
? scaleDown(imgSrc, imgDst, w, h, align)
: w >= imgSrc.w()
? scaleUp(imgSrc, imgTmp, w, imgSrc.h(), 1)
&& scaleDown(imgTmp, imgDst, w, h, align)
: scaleDown(imgSrc, imgTmp, w, imgSrc.h(), 1)
&& scaleUp(imgTmp, imgDst, w, h, align);
}
PPM file IO
imagePPM.h:
#ifndef IMAGE_PPM_H
#define IMAGE_PPM_H
#include <iostream>
#include "image.h"
// reads a binary PPM file.
bool readPPM( // returns true if successful
std::istream &in, // input stream (must be opened with std::ios::binary)
Image &img, // image to read into
int align = 4); // row alignment
// writes binary PPM file.
bool writePPM( // returns true if successful
std::ostream &out, // output stream (must be opened with std::ios::binary)
const Image &img); // image to write from
#endif // IMAGE_PPM_H
imagePPM.cc:
#include <sstream>
#include <string>
#include "imagePPM.h"
// reads a binary PPM file.
bool readPPM( // returns true if successful
std::istream &in, // input stream (must be opened with std::ios::binary)
Image &img, // image to read into
int align) // row alignment
{
// parse header
std::string buffer;
if (!getline(in, buffer)) return false;
if (buffer != "P6") {
std::cerr << "Wrong header! 'P6' expected.\n";
return false;
}
int w = 0, h = 0, t = 0;
for (int i = 0; i < 3;) {
if (!getline(in, buffer)) return false;
if (buffer.empty()) continue; // skip empty lines
if (buffer[0] == '#') continue; // skip comments
std::istringstream str(buffer);
switch (i) {
case 0:
if (!(str >> w)) continue;
++i;
case 1:
if (!(str >> h)) continue;
++i;
case 2:
if (!(str >> t)) continue;
++i;
}
}
if (t != 255) {
std::cerr << "Unsupported format! t = 255 expected.\n";
return false;
}
// allocate image buffer
uint8 *data = img.resize(w, h, (w * 3 + align - 1) / align * align);
// read data
for (int i = 0; i < h; ++i) {
if (!in.read((char*)data, 3 * img.w())) return false;
data += img.bPR();
}
// done
return true;
}
// writes binary PPM file.
bool writePPM( // returns true if successful
std::ostream &out, // output stream (must be opened with std::ios::binary)
const Image &img) // image to write from
{
// write header
if (!(out << "P6\n" << img.w() << ' ' << img.h() << " 255\n")) return false;
// write image data
for (size_t y = 0; y < img.h(); ++y) {
const uint8 *const data = img.data(y);
if (!out.write((const char*)data, 3 * img.w())) return false;
}
// done
return true;
}
The main application
scaleRGBImg.cc:
#include <iostream>
#include <fstream>
#include <string>
#include "image.h"
#include "imagePPM.h"
#include "imageScale.h"
int main(int argc, char **argv)
{
// read command line arguments
if (argc <= 3) {
std::cerr << "Missing arguments!\n";
std::cout
<< "Usage:\n"
<< " scaleRGBImg IN_FILE SCALE OUT_FILE\n";
return 1;
}
const std::string inFile = argv[1];
char *end;
const double s = std::strtod(argv[2], &end);
if (end == argv[2] || *end != '\0') {
std::cerr << "Invalid scale factor '" << argv[2] << "'!\n";
return 1;
}
if (s <= 0.0) {
std::cerr << "Invalid scale factor " << s << "!\n";
return 1;
}
const std::string outFile = argv[3];
// read image
Image imgSrc;
{ std::ifstream fIn(inFile.c_str(), std::ios::binary);
if (!readPPM(fIn, imgSrc)) {
std::cerr << "Reading '" << inFile << "' failed!\n";
return 1;
}
}
// scale image
Image imgDst;
if (!scale(imgSrc, imgDst, s)) {
std::cerr << "Scaling failed!\n";
return 1;
}
// write image
{ std::ofstream fOut(outFile.c_str(), std::ios::binary);
if (!writePPM(fOut, imgDst) || (fOut.close(), !fOut.good())) {
std::cerr << "Writing '" << outFile << "' failed!\n";
return 1;
}
}
// done
return 0;
}
Test
Compiled in cygwin64:
$ g++ -std=c++11 -o scaleRGBImg scaleRGBImg.cc image.cc imagePPM.cc imageScale.cc
$
A sample image test.ppm for a test – converted to PPM in GIMP:
Test with the sample image:
$ for I in 0.8 0.6 0.4 0.2 ; do echo ./scaleRGBImg test.ppm $I test.$I.ppm ; done
./scaleRGBImg test.ppm 0.8 test.0.8.ppm
./scaleRGBImg test.ppm 0.6 test.0.6.ppm
./scaleRGBImg test.ppm 0.4 test.0.4.ppm
./scaleRGBImg test.ppm 0.2 test.0.2.ppm
$ for I in 0.8 0.6 0.4 0.2 ; do ./scaleRGBImg test.ppm $I test.$I.ppm ; done
$
This is what came out:
test.0.8.ppm:
test.0.6.ppm:
test.0.4.ppm:
test.0.2.ppm:
How can I resize a YUV image in CUDA? I tried converting libyuv’s scaling code to CUDA, but the performance is very bad.
void ScalePlaneSimple(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const Npp8u* src_ptr, Npp8u* dst_ptr) {
int i;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
int dx = 0;
int dy = 0;
ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone,
&x, &y, &dx, &dy);
src_width = Abs(src_width);
if (src_width * 2 == dst_width && x < 0x8000) {
for (i = 0; i < dst_height; ++i) {
ScaleColsUp2_C(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
dst_ptr += dst_stride;
y += dy;
}
}
else
{
for (i = 0; i < dst_height; ++i) {
ScaleCols_C<<<1,1>>>(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
dst_ptr += dst_stride;
y += dy;
}
}
}
__global__ void ScaleCols_C(Npp8u* dst_ptr, const Npp8u* src_ptr,
int dst_width, int x, int dx) {
int j;
for (j = 0; j < dst_width - 1; j += 2) {
dst_ptr[0] = src_ptr[x >> 16];
x += dx;
dst_ptr[1] = src_ptr[x >> 16];
x += dx;
dst_ptr += 2;
}
if (dst_width & 1) {
dst_ptr[0] = src_ptr[x >> 16];
}
}
Maybe I should use parallel computing? Any advice is welcome.
If you want to use cuda, take a look at the NVidia performance primitives. There are image resizing functions. (It's a copy of the interface for the Intel performance primitives, if you don't want to use the gpu)
I made a program in C++ which calculates the mandelbrot-set. Now I want to visualize it (save it in a picture). But when I try to save a 64k picture some problems come up. So what is the best way to save a picture of the pixels or at least to visual it?
Edit:
When I want to create a for Example 64K (61440 * 34560) image there will be the error "Access violation while writing at the position 0x0..." (originally on German and translated) and the program stops. This error appears with very high resolution. On lower resolutions the program works as it is supposed to.
#include <SFML\Graphics.hpp>
#include <stdlib.h>
#include <complex>
#include <cmath>
#include <thread>
//4K : 3840 * 2160
//8K : 7680 * 4320
//16K: 15360 * 8640
//32K: 30720 * 17280
//64K: 61440 * 34560
//128K:122880 * 69120
const unsigned long width = 61440; //should be dividable by ratioX & numberOfThreads!
const unsigned long height = 34560; //should be dividable by ratioY & numberOfThreads!
const unsigned int maxIterations = 500;
const unsigned int numberOfThreads = 6;
const int maxWidth = width / 3;
const int maxHeight = height / 2;
const int minWidth = -maxWidth * 2;
const int minHeight = -maxHeight;
const double ratioX = 3.0 / width;
const double ratioY = 2.0 / height;
sf::Image img = sf::Image();
int getsGreaterThan2(std::complex<double> z, int noIterations) {
double result;
std::complex<double> zTmp = z;
std::complex<double> c = z;
for (int i = 1; i != noIterations; i++) {
zTmp = std::pow(z, 2) + c;
if (zTmp == z) {
return 0;
}
z = std::pow(z, 2) + c;
result = std::sqrt(std::pow(z.real(), 2) + std::pow(z.imag(), 2));
if (result > 2) {
return i;
}
}
return 0;
}
void fillPixelArrayThreadFunc(int noThreads, int threadNr) { //threadNr ... starts from 0
double imgNumber;
double realNumber;
double tmp;
long startWidth = ((double)width) / noThreads * threadNr + minWidth;
long endWidth = startWidth + width / noThreads;
for (long x = startWidth; x < endWidth; x++) {
imgNumber = x * ratioX;
for (long y = minHeight; y < maxHeight; y++) {
realNumber = y * ratioY;
long xArray = x - minWidth;
long yArray = y - minHeight;
tmp = getsGreaterThan2(std::complex<double>(imgNumber, realNumber), maxIterations);
if (tmp == 0) {
img.setPixel(xArray, yArray, sf::Color(0, 0, 0, 255));
}
else {
img.setPixel(xArray, yArray, sf::Color(tmp / maxIterations * 128, tmp / maxIterations * 128, tmp / maxIterations * 255, 255));
}
}
}
}
int main() {
img.create(width, height, sf::Color::Black);
std::thread *threads = new std::thread[numberOfThreads];
for (int i = 0; i < numberOfThreads; i++) {
threads[i] = std::thread(std::bind(fillPixelArrayThreadFunc, numberOfThreads, i));
}
for (int i = 0; i < numberOfThreads; i++) {
threads[i].join();
}
img.saveToFile("filename.png");
return 1;
}
Your program fails during the call img.create(width, height, sf::Color::Black);.
When you step into the sf::Image::create function you end up here where the newPixels vector is created, this simply fails when width * height is too big as in your case:
////////////////////////////////////////////////////////////
void Image::create(unsigned int width, unsigned int height, const Color& color)
{
if (width && height)
{
// Create a new pixel buffer first for exception safety's sake
std::vector<Uint8> newPixels(width * height * 4);
^61440* ^34560 = 8'493'465'600 bytes !!
Conclusion: SFML cannot handle huge images.