I sort of missed the big C++ discussion, but I'd like to give some examples of
how writing code can become much simpler if you are based on C++. This is from
my mahotas package, which has a thin C++ wrapper around numpy's C API
https://github.com/luispedro/mahotas/blob/master/mahotas/_morph.cpp
and it implements multi-type greyscale erosion.
// numpy::aligned_array wraps PyArrayObject*
template<typename T>
void erode(numpy::aligned_array<T> res,
numpy::aligned_array<T> array,
numpy::aligned_array<T> Bc) {
// Release the GIL using RAII
gil_release nogil;
const int N = res.size();
typename numpy::aligned_array<T>::iterator iter = array.begin();
// this is adapted from scipy.ndimage.
// it implements the convolution-like filtering.
filter_iterator<T> filter(res.raw_array(),
Bc.raw_array(),
EXTEND_NEAREST,
is_bool(T()));
const int N2 = filter.size();
T* rpos = res.data();
for (int i = 0; i != N; ++i, ++rpos, filter.iterate_both(iter)) {
T value = std::numeric_limits<T>::max();
for (int j = 0; j != N2; ++j) {
T arr_val = T();
filter.retrieve(iter, j, arr_val);
value = std::min<T>(value, erode_sub(arr_val, filter[j]));
}
*rpos = value;
}
}
If you compare this with the equivalent scipy.ndimage function, which is very
good C code (but mostly write-only—in fact, ndimage has not been maintainable
because it is so hard [at least for me, I've tried]):
int NI_BinaryErosion(PyArrayObject* input, PyArrayObject* strct,
PyArrayObject* mask, PyArrayObject* output, int bdr_value,
npy_intp *origins, int invert, int center_is_true,
int* changed, NI_CoordinateList **coordinate_list)
{
npy_intp struct_size = 0, *offsets = NULL, size, *oo, jj;
npy_intp ssize, block_size = 0, *current = NULL, border_flag_value;
int kk, true, false, msk_value;
NI_Iterator ii, io, mi;
NI_FilterIterator fi;
Bool *ps, out = 0;
char *pi, *po, *pm = NULL;
NI_CoordinateBlock *block = NULL;
ps = (Bool*)PyArray_DATA(strct);
ssize = 1;
for(kk = 0; kk < strct->nd; kk++)
ssize *= strct->dimensions[kk];
for(jj = 0; jj < ssize; jj++)
if (ps[jj]) ++struct_size;
if (mask) {
if (!NI_InitPointIterator(mask, &mi))
return 0;
pm = (void *)PyArray_DATA(mask);
}
/* calculate the filter offsets: */
if (!NI_InitFilterOffsets(input, ps, strct->dimensions, origins,
NI_EXTEND_CONSTANT, &offsets,
&border_flag_value, NULL))
goto exit;
/* initialize input element iterator: */
if (!NI_InitPointIterator(input, &ii))
goto exit;
/* initialize output element iterator: */
if (!NI_InitPointIterator(output, &io))
goto exit;
/* initialize filter iterator: */
if (!NI_InitFilterIterator(input->nd, strct->dimensions, struct_size,
input->dimensions,
origins, &fi))
goto exit;
/* get data pointers an size: */
pi = (void *)PyArray_DATA(input);
po = (void *)PyArray_DATA(output);
size = 1;
for(kk = 0; kk < input->nd; kk++)
size *= input->dimensions[kk];
if (invert) {
bdr_value = bdr_value ? 0 : 1;
true = 0;
false = 1;
} else {
bdr_value = bdr_value ? 1 : 0;
true = 1;
false = 0;
}
if (coordinate_list) {
block_size = LIST_SIZE / input->nd / sizeof(int);
if (block_size < 1)
block_size = 1;
if (block_size > size)
block_size = size;
*coordinate_list = NI_InitCoordinateList(block_size, input->nd);
if (!*coordinate_list)
goto exit;
}
/* iterator over the elements: */
oo = offsets;
*changed = 0;
msk_value = 1;
for(jj = 0; jj < size; jj++) {
int pchange = 0;
if (mask) {
switch(mask->descr->type_num) {
CASE_GET_MASK(msk_value, pm, Bool);
CASE_GET_MASK(msk_value, pm, UInt8);
CASE_GET_MASK(msk_value, pm, UInt16);
CASE_GET_MASK(msk_value, pm, UInt32);
#if HAS_UINT64
CASE_GET_MASK(msk_value, pm, UInt64);
#endif
CASE_GET_MASK(msk_value, pm, Int8);
CASE_GET_MASK(msk_value, pm, Int16);
CASE_GET_MASK(msk_value, pm, Int32);
CASE_GET_MASK(msk_value, pm, Int64);
CASE_GET_MASK(msk_value, pm, Float32);
CASE_GET_MASK(msk_value, pm, Float64);
default:
PyErr_SetString(PyExc_RuntimeError, "data type not
supported");
return 0;
}
}
switch (input->descr->type_num) {
CASE_NI_ERODE_POINT(pi, out, oo, struct_size, Bool, msk_value,
bdr_value, border_flag_value,
center_is_true,
true, false, pchange);
CASE_NI_ERODE_POINT(pi, out, oo, struct_size, UInt8, msk_value,
bdr_value, border_flag_value,
center_is_true,
true, false, pchange);
CASE_NI_ERODE_POINT(pi, out, oo, struct_size, UInt16, msk_value,
bdr_value, border_flag_value,
center_is_true,
true, false, pchange);
CASE_NI_ERODE_POINT(pi, out, oo, struct_size, UInt32, msk_value,
bdr_value, border_flag_value,
center_is_true,
true, false, pchange);
#if HAS_UINT64
CASE_NI_ERODE_POINT(pi, out, oo, struct_size, UInt64, msk_value,
bdr_value, border_flag_value,
center_is_true,
true, false, pchange);
#endif
CASE_NI_ERODE_POINT(pi, out, oo, struct_size, Int8, msk_value,
bdr_value, border_flag_value,
center_is_true,
true, false, pchange);
CASE_NI_ERODE_POINT(pi, out, oo, struct_size, Int16, msk_value,
bdr_value, border_flag_value,
center_is_true,
true, false, pchange);
CASE_NI_ERODE_POINT(pi, out, oo, struct_size, Int32, msk_value,
bdr_value, border_flag_value,
center_is_true,
true, false, pchange);
CASE_NI_ERODE_POINT(pi, out, oo, struct_size, Int64, msk_value,
bdr_value, border_flag_value,
center_is_true,
true, false, pchange);
CASE_NI_ERODE_POINT(pi, out, oo, struct_size, Float32, msk_value,
bdr_value, border_flag_value,
center_is_true,
true, false, pchange);
CASE_NI_ERODE_POINT(pi, out, oo, struct_size, Float64, msk_value,
bdr_value, border_flag_value,
center_is_true,
true, false, pchange);
default:
PyErr_SetString(PyExc_RuntimeError, "data type not supported");
goto exit;
}
switch (output->descr->type_num) {
CASE_OUTPUT(po, out, Bool);
CASE_OUTPUT(po, out, UInt8);
CASE_OUTPUT(po, out, UInt16);
CASE_OUTPUT(po, out, UInt32);
#if HAS_UINT64
CASE_OUTPUT(po, out, UInt64);
#endif
CASE_OUTPUT(po, out, Int8);
CASE_OUTPUT(po, out, Int16);
CASE_OUTPUT(po, out, Int32);
CASE_OUTPUT(po, out, Int64);
CASE_OUTPUT(po, out, Float32);
CASE_OUTPUT(po, out, Float64);
default:
PyErr_SetString(PyExc_RuntimeError, "data type not supported");
goto exit;
}
if (pchange) {
*changed = 1;
if (coordinate_list) {
if (block == NULL || block->size == block_size) {
block = NI_CoordinateListAddBlock(*coordinate_list);
current = block->coordinates;
}
for(kk = 0; kk < input->nd; kk++)
*current++ = ii.coordinates[kk];
block->size++;
}
}
if (mask) {
NI_FILTER_NEXT3(fi, ii, io, mi, oo, pi, po, pm);
} else {
NI_FILTER_NEXT2(fi, ii, io, oo, pi, po);
}
}
exit:
if (offsets)
free(offsets);
if (PyErr_Occurred()) {
if (coordinate_list) {
NI_FreeCoordinateList(*coordinate_list);
*coordinate_list = NULL;
}
return 0;
} else {
return 1;
}
return PyErr_Occurred() ? 0 : 1;
}
HTH
--
Luis Pedro Coelho | Institute for Molecular Medicine | http://luispedro.org
The fact that this is good C is matter of opinon :)
I don't think the code is comparable either - some of the stuff done
in the C code is done in the C++ code your are calling. The C code
could be significantly improved. Even more important here: almost none
of this code should be written anymore anyway, C++ or not. This is
really the kind of code that should be done in cython, as it is mostly
about wrapping C code into the python C API.
cheers,
David
_______________________________________________
NumPy-Discussion mailing list
NumPy-Di...@scipy.org
http://mail.scipy.org/mailman/listinfo/numpy-discussion
+1
Gael
Actually, that's not 100% accurate. The C code calls the same functions. Most
of the extra cruft is that it needs to do all of this error checking and type-
dispatch, while in C++ you can have RAII and templates.
> Even more important here: almost none
> of this code should be written anymore anyway, C++ or not. This is
> really the kind of code that should be done in cython, as it is mostly
> about wrapping C code into the python C API.
At least last time I read up on it, cython was not able to do multi-type code,
i.e., have code that works on arrays of multiple types. Does it support it
now?
Best,
--
Luis Pedro Coelho | University of Lisbon | http://luispedro.org
-Jeff
But most of the code in the C one is not related to either. All the
size computation, etc… still needs to be done somewhere.
While I agree that RAII is by itself a very useful feature, it is not
that used in your example. IMO, all you are doing is comparing decent
C++ to awful C. That's not very interesting to me.
> I sort of missed the big C++ discussion, but I'd like to give some examples of
> how writing code can become much simpler if you are based on C++. This is from
> my mahotas package, which has a thin C++ wrapper around numpy's C API
Here you are using NumPy arrays, not implementing them. That makes a big
difference. Your code would be even simpler if you had used Cython or
Fortran (f2py) instead of C++.
The only thing you have proved is that using NumPy arrays in C can be
messy, which I suspect everybody knows already.
C is a good systems programming language, but it sucks for any kind of
numerical computing. C++ sucks a little bit less. Using either for
numerical programming usually a mistake. But NumPy core is not a case of
numerical programming, it is a case of systems programming, for which C
is actually very nice.
And no, unmaintainable C is NOT an example of "very good C", rather the
opposite. This is a big mistake:
"If you compare this with the equivalent scipy.ndimage function,
which is very good C code (but mostly write-only—in fact, ndimage has
not been maintainable..."
Another thing to observe is that the ndimage C code you cited has a
major DRY violation: We have the best text (pre)processor there is to
our disposal: Python. So why are there macros and switch statements for
so many dtypes in there? As with C++ templates, generics in C is easy if
we just let Python generate the type-specialized code we need. (Actually
that is what NumPy's .src files do.) Generics is therefore not an
argument for using C++. We can augment standard C with syntax suger for
generics, or even NumPy arrays and Python types, using a Python script
as meta-compiler. We don't need C++ for that.
Sturla
Using either for
numerical programming usually a mistake.
> This is your opinion, but there are a lot of numerical code now in C++
> and they are far more maintainable than in Fortran. And they are faster
> for exactly this reason.
That is mostly because C++ makes tasks that are non-numerical easier.
But that is why we have Python.
On 06.03.2012 21:45, Matthieu Brucher wrote:That is mostly because C++ makes tasks that are non-numerical easier.
> This is your opinion, but there are a lot of numerical code now in C++
> and they are far more maintainable than in Fortran. And they are faster
> for exactly this reason.
The Bottleneck project used some sort of template system to generate
multiple type cyton code -- not cython itself, but none the less
useful.
-Chris
--
Christopher Barker, Ph.D.
Oceanographer
Emergency Response Division
NOAA/NOS/OR&R (206) 526-6959 voice
7600 Sand Point Way NE (206) 526-6329 fax
Seattle, WA 98115 (206) 526-6317 main reception
Sturla
Sendt fra min iPad
Upcoming Cython releases will have a generics system called "fused types".
Sturla
Sendt fra min iPad
Den 6. mars 2012 kl. 23:26 skrev Chris Barker <chris....@noaa.gov>:
> On Sun, Mar 4, 2012 at 2:18 PM, Luis Pedro Coelho <l...@cmu.edu> wrote:
>> At least last time I read up on it, cython was not able to do multi-type code,
>> i.e., have code that works on arrays of multiple types. Does it support it
>> now?
>
> The Bottleneck project used some sort of template system to generate
> multiple type cyton code -- not cython itself, but none the less
> useful.
>
Moreover:
**begin repeat
*
* #from = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG,
* LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE,
* CFLOAT, CDOUBLE, CLONGDOUBLE, OBJECT, DATETIME, TIMEDELTA#
* #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong,
* longlong, ulonglong, half, float, double, longdouble,
* cfloat, cdouble, clongdouble, object, datetime, timedelta#
* #sort = 1*18, 0*3#
* #num = 1*15, 2*3, 1*3#
* #fromtyp = Bool, byte, ubyte, short, ushort, int, uint, long, ulong,
* longlong, ulonglong, npy_half, float, double, longdouble,
* float, double, longdouble, PyObject *, datetime, timedelta#
* #NAME = Bool, Byte, UByte, Short, UShort, Int, UInt, Long, ULong,
* LongLong, ULongLong, Half, Float, Double, LongDouble,
* CFloat, CDouble, CLongDouble, Object, Datetime, Timedelta#
* #kind = GENBOOL, SIGNED, UNSIGNED, SIGNED, UNSIGNED, SIGNED,
UNSIGNED, SIGNED, UNSIGNED,
* SIGNED, UNSIGNED, FLOATING, FLOATING, FLOATING, FLOATING,
* COMPLEX, COMPLEX, COMPLEX, OBJECT, DATETIME, TIMEDELTA#
* #endian = |*3, =*15, |, =*2#
* #isobject= 0*18,NPY_OBJECT_DTYPE_FLAGS,0*2#
*/
is not exactly sweet-tasting sugar. It would make more sense for all the
various values to be bundled together by type, so that a type is
comprehensible as whole, quickly. Instead all the values are in
separated property lists that cut across all the types. Worse, very
similar or identical information to what is in this list is repeated,
without any chance of consistency or error checking, all over the place.
Bryan
I don't think anyone likes it . All your points are certainly valid.
There are solutions to this that does not require C++ (Chuck worked on
that I believe, although I guess he knows would rather see C++ used).
David
In Norwegian there's a saying (as you will know): "Badly reasoned
argument, so talk louder".
Anyway, I agree with you a long way, but saying there's no place at all
for C++ anywhere seems a bit over the top for me. E.g., I believe C++
Elemental made the right choice in C++ (distributed linear algebra,
where they use templates just enough to make things readable, but not
over-the-top like STL or Boost):
http://code.google.com/p/elemental/
Dag
Den 7. mars 2012 kl. 00:43 skrev Charles R Harris <charles...@gmail.com>:
>
>
> I don't see generics as the main selling point of C++ for Numpy. What I expect to be really useful is exception handling, smart pointers, and RIAA. And maybe some carefule uses of classes and inheritance. Having a standard inline keyword will be nice too. But I'm not a modern C++ guru, so I may have missed a lot of things.
>
"RIAA is evil, RAII is not" ;-)
Actually, Cython has all of those features too :-)
I am not suggesting NumPy should use Cython in the core. However if it did, the main machinery would already be in the compiler (typed memory views) :-)
Sturla
Den 7. mars 2012 kl. 00:43 skrev Charles R Harris <charles...@gmail.com>:
"RIAA is evil, RAII is not" ;-)
>
>
> I don't see generics as the main selling point of C++ for Numpy. What I expect to be really useful is exception handling, smart pointers, and RIAA. And maybe some carefule uses of classes and inheritance. Having a standard inline keyword will be nice too. But I'm not a modern C++ guru, so I may have missed a lot of things.
>
Actually, Cython has all of those features too :-)
I am not suggesting NumPy should use Cython in the core. However if it did, the main machinery would already be in the compiler (typed memory views) :-)