Viewing std::vector as NumPy array in pure Python mode

59 views
Skip to first unread message

BattleSushi

unread,
Aug 26, 2024, 3:29:50 PM8/26/24
to cython-users
I want to get a NumPy view (without copy) of a C++ std::vector. I am using the Pure Python mode to profit of the rich tooling Python has to offer.

In principle, exact this question was already asked on StackOverflow, see https://stackoverflow.com/questions/78604365/coerce-stdvector-to-numpy-in-cythons-pure-python-mode

Maybe here we got more experts on the topic. Has anybody a suggestion?

Johannes Fischer

unread,
Sep 8, 2024, 2:34:00 AM9/8/24
to cython-users
I got it to work like this (buffer and then reading from it - np.frombuffer) :
I used a descriptor to make sure that the memory address is always updated.

class NpDescriptor:
    def __init__(self, dtype):
        self.dtype = dtype
        self.current_address = 0
        self.current_buffer = 0
        self.current_size = 0

    def __get__(self, instance, owner):
        cdef:
            size_t current_address_new=instance._get_vector_address()
            size_t current_size_new=len(instance)
            object haystack_buffer
        if current_address_new != self.current_address or current_size_new != self.current_size:
            haystack_buffer = (self.dtype * (current_size_new)).from_address(current_address_new)
            self.current_address=current_address_new
            self.current_buffer = np.frombuffer(haystack_buffer,dtype=self.dtype)
            self.current_size = current_size_new
        return self.current_buffer

    def __set__(self, instance, value):
        instance.__dict__[self.name] = np.array([],self.dtype)


In the official documentation, there is another way, I haven't tried it yet.

# distutils: language = c++
from cpython cimport Py_buffer
from libcpp.vector cimport vector


cdef class Matrix:
    cdef Py_ssize_t ncols
    cdef Py_ssize_t[2] shape
    cdef Py_ssize_t[2] strides
    cdef vector[float] v

    def __cinit__(self, Py_ssize_t ncols):
        self.ncols = ncols

    def add_row(self):
        """Adds a row, initially zero-filled."""
        self.v.resize(self.v.size() + self.ncols)

    def __getbuffer__(self, Py_buffer *buffer, int flags):
        cdef Py_ssize_t itemsize = sizeof(self.v[0])

        self.shape[0] = self.v.size() // self.ncols
        self.shape[1] = self.ncols

        # Stride 1 is the distance, in bytes, between two items in a row;
        # this is the distance between two adjacent items in the vector.
        # Stride 0 is the distance between the first elements of adjacent rows.
        self.strides[1] = <Py_ssize_t>(  <char *>&(self.v[1])
                                       - <char *>&(self.v[0]))



        self.strides[0] = self.ncols * self.strides[1]

        buffer.buf = <char *>&(self.v[0])
        buffer.format = 'f'                     # float
        buffer.internal = NULL                  # see References
        buffer.itemsize = itemsize
        buffer.len = self.v.size() * itemsize   # product(shape) * itemsize
        buffer.ndim = 2
        buffer.obj = self
        buffer.readonly = 0
        buffer.shape = self.shape
        buffer.strides = self.strides
        buffer.suboffsets = NULL                # for pointer arrays only

    def __releasebuffer__(self, Py_buffer *buffer):
        pass
Reply all
Reply to author
Forward
0 new messages