I have a distorted image in YUY2 data form, YUY2 comes under the family of YUV 4:2:2 (not 4:2:0).
And I have mapx and mapy (height-720, width-1280), which I obtained from
cv::fisheye::initUndistortRectifyMap(K, D, cv::Mat::eye(3, 3, CV_64F), new_K, Size, CV_32FC1, mapx, mapy);
How can I have undistorted YUY2?
My final goal is to have undistorted YUY2 (not BGR).
I thought to perform below steps:
cv::cvtColor(YUY, BGR, cv::COLOR_YUV2BGR_YUY2);
\\ then perform remapping
\\ and convert back to YUY
But there is no conversion from BGR2YUY_YUY2.
Is there is any smarter way?

You may convert from YUV 4:2:2 to YUV 4:4:4, undistorted the 4:4:4, and convert back to 4:2:2.
Illustration of the conversion stages:
YUV422 -> YUV444 -> remap(YUV444) -> YUV422
I could not find an OpenCV function for converting from YUV 4:2:2 to YUV 4:4:4.
Implementing the conversion by simple for loops is quite straight forward:
//Convert YUYV to YUV (y,u,v,y,u,v,y,u,v...)
//The conversion is performed by duplicating each U and V element twice (equivalent to resize with nearest neighbor interpolation).
//The input type is CV_8UC1 (considered to be Grayscale image).
//The output type is CV_8UC3 (considered to be colored image with 3 channels).
static cv::Mat convertYuyv422toYuv444(const cv::Mat yuyv)
int rows = yuyv.rows;
int src_cols = yuyv.cols;
size_t src_step = yuyv.step;
const unsigned char *I = (unsigned char*); //Pointer to source image.
int dst_cols = src_cols / 2;
cv::Mat yuv = cv::Mat(rows, dst_cols, CV_8UC3);
size_t dst_step = yuv.step;
unsigned char *J = (unsigned char*); //Pointer to destination image.
for (int y = 0; y < rows; y++)
const unsigned char *I0 = I + y*src_step; //Points the beginning for the source row.
unsigned char *J0 = J + y*dst_step; //Points the beginning for the destination row.
int srcx = 0;
int dstx = 0;
//yuyv -> yuvyuv
//Convert 2 pixels per iteration
for (int x = 0; x < src_cols / 2; x += 2)
unsigned char y0 = I0[srcx];
unsigned char u0 = I0[srcx + 1];
unsigned char y1 = I0[srcx + 2];
unsigned char v0 = I0[srcx + 3];
J0[dstx] = y0;
J0[dstx + 1] = u0;
J0[dstx + 2] = v0;
J0[dstx + 3] = y1;
J0[dstx + 4] = u0; //Duplicate U
J0[dstx + 5] = v0; //Duplicate V
srcx += 4; //Source has 2 elements per pixel
dstx += 6; //Destination has 3 elements per pixel
return yuv;
The conversion simply duplicate every U and V element twice.
It's not the best way, but it's assumed to be good enough.
Duplicating U and V is equivalent to resize with Nearest Neighbor interpolation.
For converting YUV 4:4:4 back to YUV 4:2:2, you may use the code sample from (my) following post:
Convert YUV4:4:4 to YUV4:2:2 images.
There are existing optimized libraries that support all sorts of color format conversions.
libswscale for example, but I think it's an overkill for your needs...
For testing, I used the input form your previous post (with my answer):
How to undistort I420 image data? Efficiently
Since I don't have a YUYV image, I used FFmpeg (command line) for creating one:
ffmpeg -i input_image.jpg -codec rawvideo -pix_fmt yuyv422 input_image_yuyv.yuv
I used MATLAB code for converting the raw input_image_yuyv.yuv to PNG.
The MATLAB implementation convert 4:2:2 to 4:4:4 in two ways and verify that duplicating U and V is is equivalent to resize with Nearest Neighbor interpolation.
The MATLAB code is also used for validating the correctness of the C++ implementation.
I = imread('input_image.jpg');
[rows, cols, ch] = size(I); % rows = 1280, cols = 720
% Read the YUYV to 2560x720 matrix from a binary file
f = fopen('input_image_yuyv.yuv', 'r');
YUYV = fread(f, [cols*2, rows], '*uint8')';
% Write YUYV to PNG image - to be used as C++ input.
imwrite(YUYV, 'YUYV.png');
Y = YUYV(:, 1:2:end); % 1280x720
U = YUYV(:, 2:4:end); % 640x720
V = YUYV(:, 4:4:end); % 640x720
% figure;imshow(Y);title('in Y');impixelinfo
% figure;imshow(U);title('in U');impixelinfo
% figure;imshow(V);title('in V');impixelinfo
% Convert U and V to 4:4:4 format using imresize with Nearest Neighbor interpolation method (used as reference).
refU2 = imresize(U, [rows, cols], 'nearest');
refV2 = imresize(V, [rows, cols], 'nearest');
% figure;imshow(U2);title('reference inU full');impixelinfo
% figure;imshow(V2);title('reference inV full');impixelinfo
% Resize x2 in the horizontal axis by simple duplication:
U2 = zeros(rows, cols, 'uint8');
U2(:, 1:2:end) = U;
U2(:, 2:2:end) = U;
V2 = zeros(rows, cols, 'uint8');
V2(:, 1:2:end) = V;
V2(:, 2:2:end) = V;
% Verify that the simple duplication is equivalent to resize with Nearest Neighbor interpolation:
% display(isequal(U2, refU2) && isequal(V2, refV2)) % Equal!!!
% Build YUV444 3840x720 matrix:
YUV444 = zeros(rows, cols*3, 'uint8');
YUV444(:, 1:3:end) = Y;
YUV444(:, 2:3:end) = U2;
YUV444(:, 3:3:end) = V2;
% Write the YUV444 image to binary file (used as reference for C++ implementation)
f = fopen('image_yuv444.yuv', 'w');
fwrite(f, YUV444', 'uint8');
imwrite(YUV444, 'matlabYUV444.png');
% Test output (after executing C++ code).
c_YUV444 = imread('yuv444.png');
display(isequal(YUV444, c_YUV444));
The following code is the main() of the C++ implementation:
int main()
cv::Mat yuyv = cv::imread("YUYV.png", cv::IMREAD_GRAYSCALE); //Read YUYV.png (created using MATLAB) as Grayscale
cv::Mat yuv = convertYuyv422toYuv444(yuyv); //Convet yuyv to yuv (y,u,v,y,u,v...)
//cv::imshow("yuyv", yuyv);
cv::imwrite("yuv444.png", yuv); //Store YUV image for testing.
//remap the YUV 4:4:4
int W = 1280, H = 720; //Assume resolution of Y plane is 1280x720
cv::Mat mapx;
cv::Mat mapy;
cv::Mat dst_yuv;
cv::Matx33d K = cv::Matx33d(541.2152931632737, 0.0, 661.7479652584254,
0.0, 541.0606969363056, 317.4524205037745,
0.0, 0.0, 1.0);
cv::Vec4d D = cv::Vec4d(-0.042166406281296365, -0.001223961942208027, -0.0017036710622692108, 0.00023929900459453295);
cv::Size newSize = cv::Size(3400, 1940);
cv::Matx33d new_K;
cv::fisheye::estimateNewCameraMatrixForUndistortRectify(K, D, cv::Size(W, H), cv::Mat::eye(3, 3, CV_64F), new_K, 1, newSize); // W,H are the distorted image size
cv::fisheye::initUndistortRectifyMap(K, D, cv::Mat::eye(3, 3, CV_64F), new_K, newSize, CV_16SC2, mapx, mapy);
cv::remap(yuv, dst_yuv, mapx, mapy, cv::INTER_LINEAR, cv::BORDER_CONSTANT, cv::Scalar(0, 128, 128));
//Convert for BGR - just for dispaly
cv::Mat dst_bgr;
cv::cvtColor(dst_yuv, dst_bgr, cv::COLOR_YUV2BGR);
cv::imshow("yuv", yuv);
cv::imshow("dst_yuv", dst_yuv);
cv::imshow("dst_bgr", dst_bgr);
cv::imwrite("dst_bgr.png", dst_bgr); //Store BGR image for testing.
return 0;
Use remap with cv::BORDER_CONSTANT and cv::Scalar(0, 128, 128):
cv::remap(yuv, dst_yuv, mapx, mapy, cv::INTER_LINEAR, cv::BORDER_CONSTANT, cv::Scalar(0, 128, 128));
C++ Result (after converting to BGR):

I tried modification in mapx & mapy to make it suitable for YUV422. Result is very good in terms of computation time. Just one remapping is required in real time. But the quality is not the best.
Then I tried YUV422 -> YUV444 -> remap(YUV444) -> YUV422 through libswscale, but again YUV conversion was taking time.
Finally I developed cuda kernels for YUV conversions. I attached below.
// nvcc -c -o colorConversion.o `pkg-config --libs --cflags opencv4`
// /usr/bin/g++ -g -O3 /home/jai/vscode/opencvCUDA/cuda3.cpp -o /home/jai/vscode/opencvCUDA/cuda3 colorConversion.o `pkg-config --libs --cflags opencv4` `pkg-config --libs --cflags gstreamer-1.0` `pkg-config --libs --cflags cuda-11.3` `pkg-config --libs --cflags cudart-11.3`
#include "colorConversion.h"
__global__ void kernel_YUY422toYUY(cv::cuda::PtrStepSz<uchar2> YUV422, cv::cuda::PtrStepSz<uchar3> YUV)
int i = blockIdx.y; // row
int j = blockDim.x * blockIdx.x + threadIdx.x; // col
if (threadIdx.x & 1) { // odd 1,3,5
// YUV[i * step3 + 3 * j] = YUV422[i * step2 + 2 * j]; // Y0
// YUV[i * step3 + 3 * j + 1] = YUV422[i * step2 + 2 * j - 1]; // Y0
// YUV[i * step3 + 3 * j + 2] = YUV422[i * step2 + 2 * j + 1]; // Y0
YUV(i, j).x = YUV422(i, j).x;
YUV(i, j).y = YUV422(i, j - 1).y;
YUV(i, j).z = YUV422(i, j).y;
} else { // even 0,2,4,
// YUV[i * step3 + 3 * j] = YUV422[i * step2 + 2 * j]; // Y0
// YUV[i * step3 + 3 * j + 1] = YUV422[i * step2 + 2 * j + 1]; // U0
// YUV[i * step3 + 3 * j + 2] = YUV422[i * step2 + 2 * j + 3]; // V0
YUV(i, j).x = YUV422(i, j).x;
YUV(i, j).y = YUV422(i, j).y;
YUV(i, j).z = YUV422(i, j+1).y;
void YUY422toYUY(const cv::cuda::GpuMat &YUV422gpu, cv::cuda::GpuMat &YUVgpu)
kernel_YUY422toYUY<<<dim3(2, YUVgpu.rows), dim3(YUVgpu.cols / 2)>>>(YUV422gpu, YUVgpu);
__global__ void kernel_YUYtoYUY422(cv::cuda::PtrStepSz<uchar3> YUV, cv::cuda::PtrStepSz<uchar2> YUV422)
int i = blockIdx.x; // row
int j = threadIdx.x*2; // col
YUV422(i, j).x = YUV(i, j).x;
YUV422(i, j).y = (YUV(i, j).y + YUV(i, j+1).y)/2;
YUV422(i, j+1).x = YUV(i, j+1).x;
YUV422(i, j+1).y = (YUV(i, j).z + YUV(i, j+1).z)/2;
void YUYtoYUY422(const cv::cuda::GpuMat &YUVgpu, cv::cuda::GpuMat &YUV422gpu)
kernel_YUYtoYUY422<<<dim3(YUV422gpu.rows), dim3(YUV422gpu.cols / 2)>>>(YUVgpu, YUV422gpu);
And then I do remapping using CUDA again with following lines of code:
YUV422GPU.upload(YUV422); // YUV422 #channel = 2
YUV1.create(H, W, CV_8UC3);
YUV2.create(H, W, CV_8UC3);
cv::cuda::remap(YUV1, YUV2, mapxGPU, mapyGPU, interpolationMethod); // YUV remap
YUYtoYUY422(YUV2, YUV422GPU);; // dst is the final YUV422. 2 channel image


