I started using my laptop (MacOSX macbookpro i7) but the given numbers
are on a dual-cpu six-core-cpu machine running linux (Intel(R) Xeon(R)
CPU X5650 @ 2.67GHz - 24 virtual threads).
I just used pthreads to partition a large image, each thread getting a
block of rows (easiest partition).
Here's the code (it assume 4-byte input and 2-byte output images but
that's easy to change):
#include <pthread.h>
typedef unsigned char byte;
int Compress(const byte *in, byte *out, int width, int height, int
numthreads, PixFcFlag flags,
PixFcPixelFormat input_format, PixFcPixelFormat output_format);
typedef struct _work_t {
struct PixFcSSE *pixfc;
byte *in, *out;
} work_t;
void *slave(void *arg)
{
work_t *param = (work_t*) arg;
param->pixfc->convert(param->pixfc, param->in, param->out);
return NULL;
}
int Compress(const byte *in, byte *out, int width, int height, int
numthreads, PixFcFlag flags,
PixFcPixelFormat input_format, PixFcPixelFormat output_format)
{
pthread_t *pid;
work_t *job;
job = new work_t[numthreads];
pid = new pthread_t[numthreads];
for (int k=0;k<numthreads;k++)
{
struct PixFcSSE *pix;
create_pixfc(&pix, input_format, output_format, width, height/
numthreads, flags);
job[k].pixfc = pix;
job[k].in = (byte*)in + (k*width*4*height/numthreads);
job[k].out = (byte*)out + (k*width*2*height/numthreads);
pthread_create(&pid[k], NULL, slave, &job[k]);
}
// Join all the threads
for (int k=0;k<numthreads;k++)
{
pthread_join(pid[k], NULL);
free(job[k].pixfc);
}
return 1;
}
I saw you started work on planar formats, that would be useful indeed.
Luc