Checking if a uintptr_t's value points to a legal memory address

237 views
Skip to first unread message

Anish Narayanan

unread,
Jun 29, 2017, 4:14:45 PM6/29/17
to cython-users
Hi everyone, 

I have been working on rewriting a 3d game engine from python to cython and am trying to rewrite my matrix and vector classes for performance reasons. In order to avoid the overhead of passing lists of data between python and cython every time I perform a calculation, I only pass in and out a uintptr_t that represents the pointer to a float * on the cython side. This is what my 2d vector class currently looks like with this approach:

#vec2.pyx
from libc.stdlib cimport malloc, free, rand, srand, RAND_MAX
from libc.stdint cimport uintptr_t
from libc.time cimport time
cimport libc.math as math

cdef int size = 2
srand(time(NULL)); rand()

cpdef add(uintptr_t out, uintptr_t a, uintptr_t b):
    cdef float *a_data = <float *>a
    cdef float *b_data = <float *>b
    cdef float *out_data = <float *>out
    cdef int i = 0
    for i in xrange(size):
        out_data[i] = a_data[i] + b_data[i]

cpdef ceil(uintptr_t out, uintptr_t a):
    cdef float *a_data = <float *>a
    cdef float *out_data = <float *>out
    cdef int i = 0
    for i in xrange(size):
        out_data[i] = math.ceil(a_data[i])

cpdef clone(uintptr_t a):
    cdef float *a_data = <float *>a
    cdef float *out = <float *>malloc(size * sizeof(float))
    cdef int i = 0
    for i in xrange(size):
        out[i] = a_data[i]
    cdef uintptr_t u_ptr = <uintptr_t>out
    return u_ptr

cpdef copy(uintptr_t out, uintptr_t a):
    cdef float *a_data = <float *>a
    cdef float *out_data = <float *>out
    cdef int i = 0
    for i in xrange(size):
        out_data[i] = a_data[i]

cpdef create():
    cdef float* a = <float *>malloc(size * sizeof(float))
    a[:] = [0, 0]
    cdef uintptr_t u_ptr = <uintptr_t>a
    return u_ptr

#cpdef cross(uintptr_t out, uintptr_t a, uintptr_t b):
#    pass

cpdef delete(uintptr_t a):
    free(<void *>a)
    
cpdef dist(uintptr_t a, uintptr_t b):
    cdef float out = math.sqrt(sqr_dist(a, b))
    return out

cpdef div(uintptr_t out, uintptr_t a, uintptr_t b):
    cdef float *a_data = <float *>a
    cdef float *b_data = <float *>b
    cdef float *out_data = <float *>out
    cdef int i = 0
    for i in xrange(size):
        out_data[i] = a_data[i] / b_data[i]

cpdef dot(uintptr_t a, uintptr_t b):
    cdef float out = 0.0
    cdef float *a_data = <float *>a
    cdef float *b_data = <float *>b
    cdef int i = 0
    for i in xrange(size):
        out += a_data[i] * b_data[i]
    return out

cpdef equals(uintptr_t a, uintptr_t b, float epsilon=0.000001):
    cdef float *a_data = <float *>a
    cdef float *b_data = <float *>b
    cdef int i = 0
    for i in xrange(size):
        if math.fabs(a_data[i] - b_data[i]) > epsilon * max(1.0, math.fabs(a_data[i]), math.fabs(b_data[i])):
            return False
    return True
    
cpdef exact_equals(uintptr_t a, uintptr_t b):
    cdef float *a_data = <float *>a
    cdef float *b_data = <float *>b
    cdef int i = 0
    for i in xrange(size):
        if a_data[i] != b_data[i]:
            return False
    return True
    
cpdef floor(uintptr_t out, uintptr_t a):
    cdef float *a_data = <float *>a
    cdef float *out_data = <float *>out
    cdef int i = 0
    for i in xrange(size):
        out_data[i] = math.floor(a_data[i])

cpdef from_values(float x, float y):
    cdef float* a = <float *>malloc(size * sizeof(float))
    a[:] = [x, y]
    cdef uintptr_t u_ptr = <uintptr_t>a
    return u_ptr

cpdef get_values(uintptr_t a):
    cdef float *a_data = <float *>a
    cdef list out = [0]*size
    cdef int i = 0
    for i in xrange(size):
        out[i] = a_data[i]
    return out

cpdef inverse(uintptr_t out, uintptr_t a):
    cdef float *a_data = <float *>a
    cdef float *out_data = <float *>out
    cdef int i = 0
    for i in xrange(size):
        out_data[i] = 1.0 / a_data[i]

cpdef length(uintptr_t a):
    cdef float out = math.sqrt(sqr_length(a))
    return out
    
cpdef lerp(uintptr_t out, uintptr_t a, uintptr_t b, float t):
    cdef float *a_data = <float *>a
    cdef float *b_data = <float *>b
    cdef float *out_data = <float *>out
    cdef int i = 0
    for i in xrange(size):
        out_data[i] = a_data[i] + t * (b_data[i] - a_data[i])
    
cpdef max_comps(uintptr_t out, uintptr_t a, uintptr_t b):
    cdef float *a_data = <float *>a
    cdef float *b_data = <float *>b
    cdef float *out_data = <float *>out
    cdef int i = 0
    for i in xrange(size):
        out_data[i] = max(a_data[i], b_data[i])
    
cpdef min_comps(uintptr_t out, uintptr_t a, uintptr_t b):
    cdef float *a_data = <float *>a
    cdef float *b_data = <float *>b
    cdef float *out_data = <float *>out
    cdef int i = 0
    for i in xrange(size):
        out_data[i] = min(a_data[i], b_data[i])

cpdef mul(uintptr_t out, uintptr_t a, uintptr_t b):
    cdef float *a_data = <float *>a
    cdef float *b_data = <float *>b
    cdef float *out_data = <float *>out
    cdef int i = 0
    for i in xrange(size):
        out_data[i] = a_data[i] * b_data[i]

cpdef negate(uintptr_t out, uintptr_t a):
    cdef float *a_data = <float *>a
    cdef float *out_data = <float *>out
    cdef int i = 0
    for i in xrange(size):
        out_data[i] = a_data[i] * -1
    
cpdef norm(uintptr_t out, uintptr_t a):
    cdef float mag = length(a)
    scale(out, a, 1.0/mag)
    
cpdef random(uintptr_t out):
    cdef float *out_data = <float *>out
    cdef float x = rand() / float(RAND_MAX)
    cdef float y = rand() / float(RAND_MAX)
    out_data[:] = [x, y]

cpdef round(uintptr_t out, uintptr_t a):
    cdef float *a_data = <float *>a
    cdef float *out_data = <float *>out
    cdef int i = 0
    for i in xrange(size):
        out_data[i] = math.round(a_data[i])
    
cpdef scale(uintptr_t out, uintptr_t a, float b):
    cdef float *a_data = <float *>a
    cdef float *out_data = <float *>out
    cdef int i = 0
    for i in xrange(size):
        out_data[i] = a_data[i] * b
    
cpdef set_values(uintptr_t out, float x, float y):
    cdef float *out_data = <float *>out
    out_data[:] = [x, y]

cpdef sqr_dist(uintptr_t a, uintptr_t b):
    cdef float *a_data = <float *>a
    cdef float *b_data = <float *>b
    cdef int i = 0
    cdef float out = 0.0
    for i in xrange(size):
        out += (b_data[i] - a_data[i])*(b_data[i] - a_data[i])
    return out

cpdef sqr_length(uintptr_t a):
    cdef float *a_data = <float *>a
    cdef int i = 0
    cdef float out = 0.0
    for i in xrange(size):
        out += a_data[i] * a_data[i]
    return out

cpdef sub(uintptr_t out, uintptr_t a, uintptr_t b):
    cdef float *a_data = <float *>a
    cdef float *b_data = <float *>b
    cdef float *out_data = <float *>out
    cdef int i = 0
    for i in xrange(size):
        out_data[i] = a_data[i] - b_data[i]

#cpdef transform_mat2, transform_mat3, transform_mat4

To use this code in python, the end user would write the following to say, add two vec2 together:

import math3d.vec2 as vec2

a = vec2.from_values(1.0, 2.0)
b = vec2.from_values(3.0, 4.0)
c = vec2.create()
vec2.add(c, a, b)
print vec2.get_values(c)

However, if I pass in an invalid uintptr_t to the cython side that does not point to a legal memory address with float data, the python interpreter becomes unresponsive and crashes. This could happen if someone innocuously uses the + operator to add two vec2 instead. Is it possible in cython to check if a uintptr_t's value is a valid memory address? Any help would be greatly appreciated.

Jeroen Demeyer

unread,
Jun 30, 2017, 4:36:13 AM6/30/17
to cython...@googlegroups.com
On 2017-06-29 20:31, Anish Narayanan wrote:
> Is it
> possible in cython to check if a uintptr_t's value is a valid memory
> address?

No.

Stefan Behnel

unread,
Jun 30, 2017, 6:20:01 AM6/30/17
to cython...@googlegroups.com
Hi,

you should wrap the pointer in a cdef class that you can pass around. Use the freelist decorator for faster instantiation.

I'd also turn most of your functions into methods of that class then, to make them easier to use from Python. They look a lot like they should be special methods that allow simple operator usage from Python code.

Stefan

Anish Narayanan

unread,
Jul 1, 2017, 2:09:23 AM7/1/17
to cython-users
@Jeroen Thank you for the succinct answer! Glad to know that so I do not keep trying it fruitlessly.

@Stefan Thank you for the alternative solution of just wrapping my uintptr_t in a cdef class! I was worried about overhead with creating an object, but when I tried it out, performance was not too different. Using some basic timing code like this:

import time
import math3d.vec2 as vec2

start = time.time()
for i in range(1000000):
    a = vec2.from_values(1.0, 2.0)
    b = vec2.from_values(3.0, 4.0)
    c = vec2.create()
    vec2.add(c, a, b)
    vec2.get_values(c)
end = time.time()
print end - start

... the new version wrapping the pointer in a cdef class takes about 0.05 seconds longer (0.84 vs 0.89 seconds). Of course, this means obviously either way that I cannot create a million Vec2 objects every frame if I want 60 fps, but both are at least 3x faster than the original python class version I had.

Using @cython.freelist(some number) is an interesting performance tip too! Is there any rationale that I can use for choosing a specific number for that decorator? I saw in the documentation here that they used 8 in the Penguin example. 

Also, my original python version was an actual class that heavily used various convenience functions (you can see it as part of this stackoverflow question I asked earlier this week under "Update 1"). Now, it might be that I was creating new instances of Vec2 in that code when I overrode __add__ in my old python code, but is there a performance hit for using these same arithmetic special methods in cython? Besides that, the only other issue of rewriting the code to be fully class-based is that some of the operation semantics were a little weird. For example, dotting two vec2s, without using @classmethod decorators (which may also be another performance hit), would become a.dot(b) rather than vec2.dot(a, b). With the current layout, it is similar to other libraries like javascript's glmatrix or c++'s glm.

Anish Narayanan

unread,
Jul 1, 2017, 2:09:42 AM7/1/17
to cython-users
Should have tried out writing a complete Vec2 cython class version before posting that last reply; the cdef class is actually FASTER! Here is what this implementation looks like:

from libc.stdlib cimport malloc, free, rand, srand, RAND_MAX
from libc.stdint cimport uintptr_t
from libc.time cimport time
cimport libc.math as math
cimport cython

cdef int size = 2
srand(time(NULL)); rand()

@cython.freelist(100)
cdef class Vec2:
    cdef uintptr_t id_
    
    def __cinit__(Vec2 self, float x=0.0, float y=0.0):
        cdef float* a = <float *>malloc(size * sizeof(float))
        a[:] = [x, y]
        cdef uintptr_t u_ptr = <uintptr_t>a
        self.id_ = u_ptr
    
    def __dealloc__(Vec2 self):
        free(<void *>(self.id_))
        return 
    
    def __add__(Vec2 a, Vec2 b):
        cdef float *a_data = <float *>(a.id_)
        cdef float *b_data = <float *>(b.id_)
        cdef Vec2 out = Vec2()
        cdef float *out_data = <float *>(out.id_)
        cdef int i = 0
        for i in xrange(size):
            out_data[i] = a_data[i] + b_data[i]
        return out
    
    @staticmethod
    def dot(Vec2 a, Vec2 b):
        cdef float out = 0.0
        cdef float *a_data = <float *>(a.id_)
        cdef float *b_data = <float *>(b.id_)
        cdef int i = 0
        for i in xrange(size):
            out += a_data[i] * b_data[i]
        return out
        
    cpdef dot2(Vec2 a, Vec2 b):
        cdef float out = 0.0
        cdef float *a_data = <float *>(a.id_)
        cdef float *b_data = <float *>(b.id_)
        cdef int i = 0
        for i in xrange(size):
            out += a_data[i] * b_data[i]
        return out
    
    cpdef get_values(Vec2 self):
        cdef float *a_data = <float *>(self.id_)
        cdef list out = [0]*size
        cdef int i = 0
        for i in xrange(size):
            out[i] = a_data[i]
        return out

Not sure what is going on here, but a similar addition timing loop to the one I had in the previous post clocked 1 million iterations in only 0.73 seconds (about the same with or without freelist), and I get the convenience of operators so this is nice! The only issue now is the semantics one of Vec2.dot(a, b) vs a.dot(b), but I think I can learn to get over that since dot2 is faster than my @staticmethod dot version :)

Thank you so much,
Anish

Reply all
Reply to author
Forward
0 new messages