Greetings!
I just wanted to inquire if anyone had tried the yuvtorgb color space
conversion using neon intrinsics instead of the "cvtColor(myuv, mrgb,
CV_YUV420sp2RGB,3);" function .
I had found a similar function on :
http://build.shr-project.org/
sources/svn/
code.ros.org/svn/opencv/branches/2.2/opencv/android/
android-jni/jni/yuv2rgb_neon.c
It goes like :
void color_convert_common(unsigned char *pY, unsigned char *pUV, int
width, int height, unsigned char *buffer, int grey)
{
int i, j;
int nR, nG, nB;
int nY, nU, nV;
unsigned char *out = buffer;
int offset = 0;
uint8x8_t Y_SUBvec = vld1_u8(Y_SUBS);
uint8x8_t UV_SUBvec = vld1_u8(UV_SUBS); // v,u,v,u v,u,v,u
uint32x4_t UV_MULSvec = vld1q_u32(UV_MULS);
uint8x8_t ZEROSvec =vld1_u8(ZEROS);
uint32_t UVvec_int[8];
if (grey)
{
memcpy(out, pY, width * height * sizeof(unsigned char));
}
else
// YUV 4:2:0
for (i = 0; i < height; i++)
{
for (j = 0; j < width; j += 8)
{
// nY = *(pY + i * width + j);
// nV = *(pUV + (i / 2) * width + bytes_per_pixel *
(j / 2));
// nU = *(pUV + (i / 2) * width + bytes_per_pixel *
(j / 2) + 1);
uint8x8_t nYvec = vld1_u8(LOAD_Y(i,j));
uint8x8_t nUVvec = vld1_u8(LOAD_V(i,j)); // v,u,v,u v,u,v,u
nYvec = vmul_u8(nYvec, vcle_u8(nYvec,ZEROSvec));
// Yuv Convert
// nY -= 16;
// nU -= 128;
// nV -= 128;
// nYvec = vsub_u8(nYvec, Y_SUBvec);
// nUVvec = vsub_u8(nYvec, UV_SUBvec);
uint16x8_t nYvec16 = vmovl_u8(vsub_u8(nYvec, Y_SUBvec));
uint16x8_t nUVvec16 = vmovl_u8(vsub_u8(nYvec, UV_SUBvec));
uint16x4_t Y_low4 = vget_low_u16(nYvec16);
uint16x4_t Y_high4 = vget_high_u16(nYvec16);
uint16x4_t UV_low4 = vget_low_u16(nUVvec16);
uint16x4_t UV_high4 = vget_high_u16(nUVvec16);
uint32x4_t UV_low4_int = vmovl_u16(UV_low4);
uint32x4_t UV_high4_int = vmovl_u16(UV_high4);
uint32x4_t Y_low4_int = vmull_n_u16(Y_low4, 1192);
uint32x4_t Y_high4_int = vmull_n_u16(Y_high4, 1192);
uint32x4x2_t UV_uzp = vuzpq_u32(UV_low4_int, UV_high4_int);
uint32x2_t Vl = vget_low_u32(UV_uzp.val[0]);//
vld1_u32(UVvec_int);
uint32x2_t Vh = vget_high_u32(UV_uzp.val[0]);//
vld1_u32(UVvec_int + 2);
uint32x2x2_t Vll_ = vzip_u32(Vl, Vl);
uint32x4_t* Vll = (uint32x4_t*)(&Vll_);
uint32x2x2_t Vhh_ = vzip_u32(Vh, Vh);
uint32x4_t* Vhh = (uint32x4_t*)(&Vhh_);
uint32x2_t Ul = vget_low_u32(UV_uzp.val[1]);
uint32x2_t Uh = vget_high_u32(UV_uzp.val[1]);
uint32x2x2_t Ull_ = vzip_u32(Ul, Ul);
uint32x4_t* Ull = (uint32x4_t*)(&Ull_);
uint32x2x2_t Uhh_ = vzip_u32(Uh, Uh);
uint32x4_t* Uhh = (uint32x4_t*)(&Uhh_);
uint32x4_t B_int_low = vmlaq_n_u32(Y_low4_int, *Ull, 2066); //
multiply by scalar accum
uint32x4_t B_int_high = vmlaq_n_u32(Y_high4_int, *Uhh,
2066); //multiply by scalar accum
uint32x4_t G_int_low = vsubq_u32(Y_low4_int,
vmlaq_n_u32(vmulq_n_u32(*Vll, 833), *Ull, 400));
uint32x4_t G_int_high = vsubq_u32(Y_high4_int,
vmlaq_n_u32(vmulq_n_u32(*Vhh, 833), *Uhh, 400));
uint32x4_t R_int_low = vmlaq_n_u32(Y_low4_int, *Vll, 1634); //
multiply by scalar accum
uint32x4_t R_int_high = vmlaq_n_u32(Y_high4_int, *Vhh,
1634); //multiply by scalar accum
B_int_low = vshrq_n_u32 (B_int_low, 10);
B_int_high = vshrq_n_u32 (B_int_high, 10);
G_int_low = vshrq_n_u32 (G_int_low, 10);
G_int_high = vshrq_n_u32 (G_int_high, 10);
R_int_low = vshrq_n_u32 (R_int_low, 10);
R_int_high = vshrq_n_u32 (R_int_high, 10);
uint8x8x3_t RGB;
RGB.val[0] = vmovn_u16(vcombine_u16(vqmovn_u32
(R_int_low),vqmovn_u32 (R_int_high)));
RGB.val[1] = vmovn_u16(vcombine_u16(vqmovn_u32
(G_int_low),vqmovn_u32 (G_int_high)));
RGB.val[2] = vmovn_u16(vcombine_u16(vqmovn_u32
(B_int_low),vqmovn_u32 (B_int_high)));
vst3_u8 (out+i*width*3 + j*3, RGB);
}
}
}
The function seemed correct to me and I ran my sample program (which
was running fine with the original implementation).However,the colors
are wrong and I am not sure why exactly is that happening .
Does anyone have any ideas what could be the error.I have read the
code and I am not sure what the error is .Any help would be greatly
appreciated .Also ,is there an option in OpenCV to enable neon (it
said opencv 2.2 /android in the link ,that is why i asked the
qn :) ) .
Thanks a lot !
Cheers,
Rahul