HELP I need to get the real distance of a tracked object (opencv libfreenect)

1,495 views
Skip to first unread message

antomega

unread,
Mar 7, 2013, 6:35:08 AM3/7/13
to openk...@googlegroups.com
Hi everyone.
I have a problem with my code.
I've done a color based tracking using OpenCV and a kinect camera (using the libfreenect library).
About the color tracking everything is ok, but i don't understand how to get the real distance of the supposed mass centre of the blob i ve detect as object.
I get the RGB matrix and the 16 and 8 bit depth matrixs and the screen position of the mass centre of the blob. i need to know how to extract the depth info of the screen position i've found from the depth matrix and, after that, how to trasform the screen X - screen Y - depth Z in the coordinates of the object relative to the kinect (i suppose that kinect has the coordinates X=0, Y=0, Z=0).
To get the depth value from the depth matrix i don't know if i need to use depthZ=depthMat.at<unsigned char>(iMeanx,iMeany);
or frameDepth->imageData[iMeanx,iMeany]
This is my code

#include <opencv/cvaux.h>
#include <opencv/highgui.h>
#include <opencv/cxcore.h>
#include <stdio.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <math.h>
#include <float.h>
#include <limits.h>
#include <time.h>
#include <ctype.h>
#include <cvblobs/BlobResult.h>
#include "libfreenect.hpp"
#include "libfreenect.h"
#include <libfreenect-registration.h>

using namespace cv;
using namespace std;

class Mutex {
public:
    Mutex() {
        pthread_mutex_init( &m_mutex, NULL );
    }
    void lock() {
        pthread_mutex_lock( &m_mutex );
    }
    void unlock() {
        pthread_mutex_unlock( &m_mutex );
    }
private:
    pthread_mutex_t m_mutex;
};

class MyFreenectDevice : public Freenect::FreenectDevice {
  public:
    MyFreenectDevice(freenect_context *_ctx, int _index)
        : Freenect::FreenectDevice(_ctx, _index), m_buffer_depth(FREENECT_DEPTH_MM),m_buffer_rgb(FREENECT_VIDEO_RGB), m_gamma(2048), m_new_rgb_frame(false), m_new_depth_frame(false),
          depthMat(Size(640,480),CV_16UC1), rgbMat(Size(640,480),CV_8UC3,Scalar(0)), ownMat(Size(640,480),CV_8UC3,Scalar(0))
    {
        for( unsigned int i = 0 ; i < 2048 ; i++) {
            float v = i/2048.0;
            v = std::pow(v, 3)* 6;
            m_gamma[i] = v*6*256;
        }
    }

    void VideoCallback(void* _rgb, uint32_t timestamp) {
        //std::cout << "RGB callback" << std::endl;
        m_rgb_mutex.lock();
        uint8_t* rgb = static_cast<uint8_t*>(_rgb);
        rgbMat.data = rgb;
        m_new_rgb_frame = true;
        m_rgb_mutex.unlock();
    };

    void DepthCallback(void* _depth, uint32_t timestamp) {
        //std::cout << "Depth callback" << std::endl;
        m_depth_mutex.lock();
        uint16_t* depth = static_cast<uint16_t*>(_depth);
        depthMat.data = (uchar*) depth;
        m_new_depth_frame = true;
        m_depth_mutex.unlock();
    }

    bool getVideo(Mat& output) {
        m_rgb_mutex.lock();
        if(m_new_rgb_frame) {
            cv::cvtColor(rgbMat, output, CV_RGB2BGR);
            m_new_rgb_frame = false;
            m_rgb_mutex.unlock();
            return true;
        } else {
            m_rgb_mutex.unlock();
            return false;
        }
    }

    bool getDepth(Mat& output) {
            m_depth_mutex.lock();
            if(m_new_depth_frame) {
                depthMat.copyTo(output);
                m_new_depth_frame = false;
                m_depth_mutex.unlock();
                return true;
            } else {
                m_depth_mutex.unlock();
                return false;
            }
        }

  private:
    std::vector<uint8_t> m_buffer_depth;
    std::vector<uint8_t> m_buffer_rgb;
    std::vector<uint16_t> m_gamma;
    Mat depthMat;
    Mat rgbMat;
    Mat ownMat;
    Mutex m_rgb_mutex;
    Mutex m_depth_mutex;
    bool m_new_rgb_frame;
    bool m_new_depth_frame;
};

IplImage* GetThresholdedImage(IplImage* imgHSV){
        IplImage* imgThresh=cvCreateImage(cvGetSize(imgHSV),IPL_DEPTH_8U, 1);
        //cvInRangeS(imgHSV, cvScalar(170,160,60), cvScalar(180,2556,256), imgThresh); //ROSSO
        //cvInRangeS(imgHSV, cvScalar(112, 100, 100), cvScalar(124, 255, 255), imgThresh); //Blu
        cvInRangeS(imgHSV, cvScalar(25, 148, 65), cvScalar(35, 256, 190), imgThresh); //giallo
        return imgThresh;
}

int main(int argc, char **argv) {

    IplImage* frame = 0;
    IplImage* frameDepth = 0;

    CBlobResult blobs;

    int iMaxx, iMinx, iMaxy, iMiny, iMeanx, iMeany, depthZ;
    double * world_x;
    double * world_y;

    /*IplImage *ResizedImage ;
    ResizedImage = cvCreateImage( cvSize(320, 240), 8, 3 );*/

    Mat depthMat(Size(640,480),CV_16UC1);//0---2047
    Mat depthf(Size(640,480),CV_8UC1);//0---255
    Mat rgbMat(Size(640,480),CV_8UC3,Scalar(0));
    Mat ownMat(Size(640,480),CV_8UC3,Scalar(0));
   
    Freenect::Freenect freenect;
    MyFreenectDevice& device = freenect.createDevice<MyFreenectDevice>(0);
   
    //MyFreenectDevice * device;
    //device = &freenect.createDevice<MyFreenectDevice>(0);
   
    namedWindow("rgb",CV_WINDOW_AUTOSIZE);
    namedWindow("depth",CV_WINDOW_AUTOSIZE);

    device.startVideo();
    device.startDepth();


    while (1) {
        device.getVideo(rgbMat);
        device.getDepth(depthMat);

        frame = cvCreateImage(cvSize(640,480), 8, 3);
        frame->imageData = (char *) rgbMat.data;
        frameDepth = cvCreateImage(cvSize(640,480), 8, 1);
        depthMat.convertTo(depthf, CV_8UC1, 255.0/2048.0);
        frameDepth->imageData = (char *) depthf.data;


        if( !frame )
        {
                fprintf( stderr, "ERROR: frame is null...\n" );
                getchar();
                break;
        }

       // cvResize(frame, ResizedImage, CV_INTER_LINEAR);
       // ResizedImage = cvCloneImage(frame);
        cvSmooth(frame, frame, CV_GAUSSIAN,3,3);
        IplImage* imgHSV = cvCreateImage(cvGetSize(frame), IPL_DEPTH_8U, 3);
        cvCvtColor(frame, imgHSV, CV_BGR2HSV);
        IplImage* imgThresh = GetThresholdedImage(imgHSV);
        cvDilate(imgThresh,imgThresh,NULL,2);
        cvSmooth( imgThresh, imgThresh, CV_GAUSSIAN, 9, 9 );

        CBlob biggestBlob;
        blobs = CBlobResult(imgThresh, NULL, 0 );
        blobs.GetNthBlob( CBlobGetArea(), 0, biggestBlob );
        iMaxx=biggestBlob.MaxX();
        iMinx=biggestBlob.MinX();
        iMaxy=biggestBlob.MaxY();
        iMiny=biggestBlob.MinY();
        iMeanx=(iMinx+iMaxx)/2;
        iMeany=(iMiny+iMaxy)/2;

        //per escludere i blobs minori di parametro 80
        //blobs.Filter( blobs, B_EXCLUDE, CBlobGetArea(), B_LESS, 80 );

        IplImage* filtered = cvCreateImage( cvGetSize( frame ), IPL_DEPTH_8U, 3 );
        cvMerge( imgThresh, imgThresh, imgThresh, NULL, filtered );
        cvLine( frame, cvPoint(iMeanx, iMeany), cvPoint(iMeanx, iMeany), CV_RGB(50,50 , 50), 4, 8, 0 );
        cvRectangle( frame, cvPoint(iMinx , iMiny ), cvPoint ( iMaxx, iMaxy ), CV_RGB(150, 150, 150), 2, 8, 0);


        //std::cout << "\n\ndepth: " << depthf. << std::endl;
       
        depthZ=depthMat.at<unsigned char>(iMeany,iMeanx);


       // printf("\n iMeany: %d, iMeanx: %d\n, depthZ: %d\n",iMeany,iMeanx,depthZ );
        biggestBlob.FillBlob( filtered, CV_RGB(255,0,0));

        cvShowImage( "rgb", frame );
       // cvShowImage( "depth", filtered);
       // sleep(1);
        if( (cvWaitKey(10) & 255) == 27 ) break;
    }

     // cvReleaseCapture( &capture );
     cvDestroyWindow( "mywindow" );
     return 0;
}

antomega

unread,
Mar 7, 2013, 11:29:35 AM3/7/13
to openk...@googlegroups.com
UP UP
any help??

jeff kramer

unread,
Mar 7, 2013, 11:39:28 AM3/7/13
to openk...@googlegroups.com
America is just coming online. =)

I think the command you're looking for is:
freenect_camera_to_world(device->getDevice(), u, v, iRealDepth, &x, &y);
Try it?

-Jeff

--
You received this message because you are subscribed to the Google Groups "OpenKinect" group.
To unsubscribe from this group and stop receiving emails from it, send an email to openkinect+...@googlegroups.com.
For more options, visit https://groups.google.com/groups/opt_out.
 
 

antomega

unread,
Mar 8, 2013, 10:15:11 AM3/8/13
to openk...@googlegroups.com

eheheh you're right :P
About freenect_camera_to_world I've tried, but it doesn't work cause it says that there is no method called "getDevice"
My first problem is about the depth matrix, i don't know how to extract the real depth value of the mass centre of the tracked object. I need it so i can get the world coordinate from the screen ones

jeff kramer

unread,
Mar 8, 2013, 10:22:18 AM3/8/13
to openk...@googlegroups.com
Ack, sorry.  I made a dangerous addition to libfreenect.hpp to enable that particular trick:

freenect_device* getDevice() {
return m_dev;
}

You can see my edited version here: https://github.com/Qworg/libfreenect/blob/unstable/wrappers/cpp/libfreenect.hpp

antomega

unread,
Mar 8, 2013, 10:44:44 AM3/8/13
to openk...@googlegroups.com
so i need to add that code to the library (libfreenect.hpp)??
What about the access to the depth matrix?? To use your method i need the iRealDepth, right?

jeff kramer

unread,
Mar 8, 2013, 10:54:40 AM3/8/13
to openk...@googlegroups.com
Apologies!

The code I'm pulling from is here: https://github.com/Qworg/OKPCL/blob/master/OKPCL.cpp
device->getDepth(mdepth); //Gets the depth from the Kinect.
for (size_t v=0 ; v<480 ; v++)
        {
            for ( size_t u=0 ; u<640 ; u++, i++)
            {
                iRealDepth = mdepth[i]; //Iterate over each depth pixel and set the value.


antomega

unread,
Mar 8, 2013, 11:35:07 AM3/8/13
to openk...@googlegroups.com
ok. Well, i track the object using an opencv algorithm and to do it i use the rgb matrix that i get with the RGB camera of my kinect, so i have the (x,y) screen position of the mass centre of the tracked object. At the same time i get the depth matrix from the kinect. With your code i can get the pointcloud of the full image (is it right??), so if the centre of my object is in ,ex., X=220, Y=300 it means that i can extract the depth of that point (so the distance of the object relative to the kinect) fixing the v and u of the for cicle as v=300 and u =220???

Thank u

antomega

unread,
Mar 11, 2013, 6:09:54 AM3/11/13
to openk...@googlegroups.com
Hi Jeff,
can you explain me how your code works?? it saves the pcd file but i can't view pcl in the window when i start the program
Thanks

antomega

unread,
Mar 11, 2013, 12:38:59 PM3/11/13
to openk...@googlegroups.com
I ve fixed your code and i can get a pcd file with these values (as example)
11.170239 -293.21878 804 1.8403113e-40
12.56652 -293.21878 804 1.8403113e-40
13.9628 -293.21878 804 1.8403113e-40
15.359079 -293.21878 804 1.8403113e-40
16.75536 -293.21878 804 1.8403113e-40
18.15164 -293.21878 804 1.8403113e-40
19.54792 -293.21878 804 1.8403113e-40
20.944201 -293.21878 804 1.8403113e-40
22.396053 -293.94818 806 9.219563e-41
23.736759 -293.21878 804 9.2554362e-41
25.133039 -293.21878 804 9.219563e-41
26.52932 -293.21878 804 9.219563e-41
27.9256 -293.21878 804 9.219563e-41
29.32188 -293.21878 804 9.2197031e-41
30.718159 -293.21878 804 1.8403533e-40
32.114441 -293.21878 804 2.7586802e-40
33.510719 -293.21878 804 1.8438986e-40
34.906998 -293.21878 804 9.219563e-41

I dont understand the format of the X and Y coordinates in that file.
Anyway, i don't need the full image point cloud, but i just need the realdepth of the mass center of the tracked object .
I've tryed using
depthZ = depthf.at<unsigned char>(iMeany, iMeanx);
freenect_camera_to_world(device.getDevice(), iMeanx, iMeany, depthZ, &x, &y);

where iMeany and iMeanx are the coordinates of the tracked object center and


Mat depthMat(Size(640,480),CV_16UC1);//0---2047
    Mat depthf(Size(640,480),CV_8UC1);//0---255
frameDepth = cvCreateImage(cvSize(640,480), 8, 1);
        depthMat.convertTo(depthf, CV_8UC1, 255.0/2048.0);
If i use this method, freenect_camera_to_world returns just Y (with a max of 255) and no X and Z.

Can someone help me???

antomega

unread,
Mar 13, 2013, 12:16:08 PM3/13/13
to openk...@googlegroups.com
UP UP

anyone help??


Il giorno giovedì 7 marzo 2013 12:35:08 UTC+1, antomega ha scritto:

Lorne Covington

unread,
Mar 13, 2013, 7:55:54 PM3/13/13
to openk...@googlegroups.com

Well, once you have the screen point of interest, simply take the depth value at that X and Y pixel, and do the projective to world conversion.  I don't remember the specific call, but there's no need to convert the whole depthmap to world XYZ, just do the points of interest.

The formula I got on this forum some time back was:

FovH = 1.0144686707507438 (rad)    // horizontal field of view
FovV = 0.78980943449644714 (rad)   // vertical field of view

From there you can find the real world coordinates from the depth
cloud like so:

// X_proj = X pixel number, Y_proj = Y pixel number
// Xres = X resolution (640 for Kinect and Xtion, optional 320 for Xtion
// Yres = Y resolution (480 for Kinect and Xtion, optional 240 for Xtion

X_rw = (X_proj/Xres-.5) * Z * XtoZ
Y_rw = (0.5-Y_proj/Yres) * Z * YtoZ,
Z_rw = Z

where XtoZ = tan(FovH/2)*2, and YtoZ = tan(FovV/2)*2, and Z is the value
given by the depth map at each pixel.

Note the Fov values I have seen different values for, and are different for the Xtion than the Kinect.  But this is the basic idea.

I actually write a GPU shader to do this for the whole bitmap, which is awfully fast and returns the XYZ values as RGB.

Good luck!

- Lorne

http://noirflux.com

--

antomega

unread,
Mar 14, 2013, 12:08:55 PM3/14/13
to openk...@googlegroups.com
"simply take the depth value at that X and Y pixel," Lorne, this is the problem.
I don't know how to get this value, i've tried different way, but they don't work.
I have these matrix

    Mat depthMat(Size(640,480),CV_16UC1);//0---2047
    Mat depthf(Size(640,480),CV_8UC1);//0---255
    Mat rgbMat(Size(640,480),CV_8UC3,Scalar(0));
    Mat ownMat(Size(640,480),CV_8UC3,Scalar(0));
When i call
        device.getVideo(rgbMat);
        device.getDepth(depthMat);
do they give me the rgb of each pixel and the depth (the distance from the kinect) of each pixel in the real world???
I work with these matrix using OpenCV to track a blue object
I capture all the frames (rgb and depth)

frame->imageData = (char *) rgbMat.data;
frameDepth = cvCreateImage(cvSize(640,480), 8, 1);
depthMat.convertTo(depthf, CV_8UC1, 255.0/2048.0);
frameDepth->imageData = (char *) depthf.data;
Everything works fine about the tracking, but i still don't know how can i get the distance (in MM) of the mass center of the object i'm tracking.
I ve tried with
-depthZ=depthMat.at<unsigned char>(iMeany,iMeanx);
-depthZ=frame->imageData(iMeany,iMeanx);
-depthZ=frameDepth->imageData(iMeany,iMeanx);
but all the values don't have sense.
If i do a cout of the depthMat (std::cout << "\n\n DepthMat:  " << depthMat << std::endl) where i suppose to save the depth infos, i get a long list of values between 0 and 2047, if i do this with the Depthf i get values between 0 and 255.
I need this to have the world coordinates of the object, cause i should pick up it with a robot arm.
THANKS

Lorne Covington

unread,
Mar 14, 2013, 1:01:53 PM3/14/13
to openk...@googlegroups.com

The raw depth data from the Kinect comes back as I understand it as 11 bits of non-linear depth data that needs to be processed into a 13 bit MM number.  I just waltzed into this thread, so check how you are getting the depth data.

Also, shouldn't you be accessing the data here:

    depthZ=depthMat.at<unsigned char>(iMeany,iMeanx)?

as X, Y, not Y, X?

Good luck!

- Lorne

antomega

unread,
Mar 14, 2013, 1:29:06 PM3/14/13
to openk...@googlegroups.com
I've tried both ways (x,y) and (y,x), but same results.
I don't know if i need to fix something at the Freenect declaration
I've tried to set FREENECT_DEPTH_MM
ex.here  MyFreenectDevice(freenect_context *_ctx, int _index)
        : Freenect::FreenectDevice(_ctx, _index), m_buffer_depth(FREENECT_DEPTH_MM),m_buffer_rgb(FREENECT_VIDEO_RGB)


I've tried to check out all the posts here, but nothing can help me.

antomega

unread,
Mar 15, 2013, 9:40:19 AM3/15/13
to openk...@googlegroups.com

Now I don't know if I have to calibrate kinect to have depth pixel value correct. That is, if I take a pixel (u, v) from the image RBG, get the correct value of depth taking the pixels (u, v) from the image depth?

depthMat.at<uchar>(u,v)??
or

depthf.at<uchar>(u,v)??

I define
Mat depthMat(Size(640,480),CV_16UC1);
Mat depthf(Size(640,480),CV_8UC1);
depthMat.convertTo(depthf, CV_8UC1, 255.0/2048.0);

antomega

unread,
Mar 26, 2013, 9:04:54 AM3/26/13
to openk...@googlegroups.com
UP

can anyone help me???

Thanks

Armin Kazim

unread,
Mar 18, 2015, 6:18:06 AM3/18/15
to openk...@googlegroups.com
Hi everyone this is my code in opencv with libfreenect to get depth information;
Freenect::Freenect freenect;
MyFreenectDevice& device = freenect.createDevice<MyFreenectDevice>(0);
Freenect::Freenect freenect2;
MyFreenectDevice& device2 = freenect2.createDevice<MyFreenectDevice>(1);
device.setDepthFormat(FREENECT_DEPTH_REGISTERED);
device2.setDepthFormat(FREENECT_DEPTH_REGISTERED);
device.startVideo();
device.startDepth();
device2.startVideo();
device2.startDepth();
Mat picright,depthright(Size(640,480),CV_16UC1);
Mat picleft,depthleft(Size(640,480),CV_16UC1);
unsigned int frame_count = 0;
while(char(waitKey(1)) != 'q') {
double t0 = getTickCount();
device.getVideo(picright);
device.getDepth(depthright);
device2.getVideo(picleft);
device2.getDepth(depthleft);
//cout << left.size() <<right.size()<< endl;
if(!(depthleft.empty()) && !(depthright.empty())){
int z = depthright.at<uint16_t>(200, 200);
uint16_t b = z;
cout << b <<endl;
z = depthright.at<uint16_t>(200, 200);
b =z;
cout << b <<endl;
z = depthleft.at<uint16_t>(200,200);
b =z;
cout << b <<endl;
z = depthleft.at<uint16_t>(210, 210);
b = z;
cout << b <<endl;
for(int h=0;h<2;h++){
int ans = 0;
int falseva = 0;
int sum = 0;
for(int x=300;x<340;x++){
for(int y=250;y<270;y++){
if(h==1){ z = depthleft.at<uint16_t>(x, y);}
else{ z = depthright.at<uint16_t>(x, y);}
if(z > 200){
sum = sum + (int) z;
//cout << (int) z <<endl;
ans++;
}
if(z <200){
falseva++;
}
}
}
//while(char(waitKey(1)) != ' ') {}
if(ans != 0){
if(h == 0){
cout << "average: "<<sum/ans <<" bei " << ans<<"und: "<<falseva<<"around left 320 260"<< endl;
}
else{
cout << "average: "<<sum/ans <<" bei " << ans<<"und: "<<falseva<<"around right 320 260"<<endl;
}
}
}
}
notice that i use two cameras and get therefore the informations two times this is not neccassary hope i could help you also you should know that distance smaller than 400 mm can't be measured
Reply all
Reply to author
Forward
0 new messages