1

I have a c++ class that

  • stores two pointers to the data of two numpy arrays as members (m_ptrA, m_ptrB)
  • exposes a function to initialize the pointers (initPtrs)
  • exposes a function that operates on the pointers (doSomethingWithPtrs)
class Foo
{
public:
  void initPtrs( py::array_t<int32_t  , py::array::c_style>  pyAryA,
                 py::array_t<float32_t, py::array::c_style>  pyAryB
                )
  {
    m_ptrA = (int32_t*)   pyAryA.request().ptr;
    m_ptrB = (float32_t*) pyAryB.request().ptr;
  }
  
  void doSomethingWithPtrs()
  {
     std::cout << m_ptrB[0] << std::endl; //Crashes sometimes here: Pointer m_ptrB is not valid.
  }
  
 private:
   int32_t*   m_ptrA;
   float32_t* m_ptrB;
};

I tried to bind the class and its functions to python using pybind11:

PYBIND11_MODULE(bar,m)
{
py::class_<Foo>(m,"Foo")
  .def(py::init<>())
  .def( "init_ptrs"               , &Foo::initPtrs, py::keep_alive<1,2>(), py::keep_alive<1,3>() )
  .def( "do_something_with_ptrs"  , &Foo::doSomethingWithPtrs );
}

However, when calling do_something_with_ptrs after init_ptrs, the following program crashes sometimes, since the pointer m_ptrB is invalid:

def test( aryA, torchTensorB ):
  my_foo = bar.Foo()
  my_foo.init_ptrs( aryA, torchTensorB.numpy() * 3840 )
  my_foo.do_something_with_ptrs()

aryA is a numpy array of type int32. pyTensorB is a pytorch tensor of type torch.float32.

I'm pretty new to python/pybind. Any help would be appreciated. In particular, I'm not sure if I understood the py::keep_alive statements correctly.

2
  • To me, the Foo construction seems off. Are you still interested in a solution? Commented Feb 8, 2022 at 23:07
  • As workaround, I'm just deep copying the arrays now instead of storing the pointers. If you have a solution, I would be happy to hear it. Commented Feb 14, 2022 at 16:21

1 Answer 1

1

IMHO the issue lies somewhere else. I suspect that you are using an uninitialized pointer or going out of bounds.

Without being provided a minimal reproduceable code, I tried to come up with something on my own. I am leaving the sources below for reference. During my testing, everything worked as expected on Windows (with MSVC and Clang). Modifying the array elements in Python showed up in the C++ code. Reassigning the variables in Python and calling the garbage collector manually did not invalidate the C++ class. When removing keep_alive (and multiplying the arrays with 2 at init_ptrs), I also managed to see the same memory being reassigned to the newly initialized array.

Sources

test.py

import torch
import numpy as np
import gc

import foo


if __name__ == "__main__":
    gc.disable()

    my_array = np.array([0, 1, 2, 3], dtype=np.int32)
    my_tensor = torch.tensor([4, 5, 6, 7], dtype=torch.float32)

    my_foo = foo.Foo()
    my_foo.init_ptrs(my_array * 2, my_tensor.numpy() * 2)

    print(gc.get_count())
    gc.collect()
    print(gc.get_count())

    my_foo.do_something_with_ptrs()

    my_array[0] = 71
    my_tensor[0] = 72
    my_foo.do_something_with_ptrs()

    my_array = np.array([10, 11, 12, 13], dtype=np.int32)
    my_tensor = torch.tensor([14, 15, 16, 17], dtype=torch.float)

    print(gc.get_count())
    gc.collect()
    print(gc.get_count())

    my_foo.do_something_with_ptrs()

    del(my_foo)
    print(gc.get_count())
    gc.collect()
    print(gc.get_count())

foo.cpp

#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>

#include <iostream>

namespace py = pybind11;

class Foo
{
public:
    void initPtrs(py::array_t<int32_t, py::array::c_style> pyAryA,
                  py::array_t<float_t, py::array::c_style> pyAryB)
    {
        auto buffer_info = pyAryA.request();
        m_ptrA = static_cast<int32_t *>(buffer_info.ptr);
        m_sizeA = [&]
        {
            size_t sum = 0;
            for (auto elem : buffer_info.shape)
                sum += elem;
            return sum;
        }();

        std::cout << "Initialized int32 with size: " << m_sizeA << '\n';

        buffer_info = pyAryB.request();
        m_ptrB = static_cast<float_t *>(buffer_info.ptr);
        m_sizeB = [&]
        {
            size_t sum = 0;
            for (auto elem : buffer_info.shape)
                sum += elem;
            return sum;
        }();

        std::cout << "Initialized float with size: " << m_sizeB << '\n';
    }

    void doSomethingWithPtrs()
    {
        std::cout << "int32 idx 0: ";
        for (size_t i = 0; i < m_sizeA /*+ 1*/; ++i)
        {
            std::cout << ' ' << m_ptrA[i];
        }
        std::cout << '\n';

        std::cout << "float idx 0: ";
        for (size_t i = 0; i < m_sizeB /*+ 1*/; ++i)
        {
            std::cout << ' ' << m_ptrB[i];
        }
        std::cout << '\n';
    }

private:
    int32_t *m_ptrA = nullptr;
    size_t m_sizeA = 0;
    float_t *m_ptrB = nullptr;
    size_t m_sizeB = 0;
};

PYBIND11_MODULE(foo, m)
{
    py::class_<Foo>(m, "Foo")
        .def(py::init<>())
        .def("init_ptrs", &Foo::initPtrs, py::keep_alive<1, 2>(), py::keep_alive<1, 3>())
        .def("do_something_with_ptrs", &Foo::doSomethingWithPtrs);
}

setup.py (based on CMake example)

import os
import re
import sys
import platform
import subprocess

from setuptools import setup, Extension
from setuptools.command.build_ext import build_ext
from distutils.version import LooseVersion


class CMakeExtension(Extension):
    def __init__(self, name, sourcedir=''):
        Extension.__init__(self, name, sources=[])
        self.sourcedir = os.path.abspath(sourcedir)


class CMakeBuild(build_ext):
    def run(self):
        try:
            out = subprocess.check_output(['cmake', '--version'])
        except OSError:
            raise RuntimeError("CMake must be installed to build the following extensions: " +
                               ", ".join(e.name for e in self.extensions))

        cmake_version = LooseVersion(
            re.search(r'version\s*([\d.]+)', out.decode()).group(1))
        if cmake_version < '3.11.0':
            raise RuntimeError("CMake >= 3.11.0 required")

        for ext in self.extensions:
            self.build_extension(ext)

    def build_extension(self, ext):
        extdir = os.path.abspath(os.path.dirname(
            self.get_ext_fullpath(ext.name)))
        # required for auto-detection of auxiliary "native" libs
        if not extdir.endswith(os.path.sep):
            extdir += os.path.sep

        cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir,
                      '-DPYTHON_EXECUTABLE=' + sys.executable,
                      '-DPIP_INSTALL=ON',
                      ]

        cfg = 'Debug' if self.debug else 'Release'
        build_args = ['--config', cfg]

        if platform.system() == "Windows":
            cmake_args += [
                '-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), extdir)]
            if sys.maxsize > 2**32:
                cmake_args += ['-A', 'x64']
            build_args += ['--', '/m']
        else:
            cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg]
            build_args += ['--', '-j10']

        env = os.environ.copy()
        env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''),
                                                              self.distribution.get_version())
        if not os.path.exists(self.build_temp):
            os.makedirs(self.build_temp)
        subprocess.check_call(['cmake', ext.sourcedir] +
                              cmake_args, cwd=self.build_temp, env=env)
        subprocess.check_call(['cmake', '--build', '.'] +
                              build_args, cwd=self.build_temp)


setup(
    name='Foo',
    version=2022.08,    

    # The list of python packages
    py_modules=['foo'],
    package_dir={'': os.path.join(os.getcwd())},
    # A list of instances of setuptools.Extension providing the list of Python extensions to be built.
    ext_modules=[CMakeExtension('foo')],
    # A dictionary providing a mapping of command names to Command subclasses.
    cmdclass=dict(build_ext=CMakeBuild),
    zip_safe=False
)

CMakeLists.txt

cmake_minimum_required(VERSION 3.5)
project(Foo)


include(FetchContent)

# Fetch pybind11
FetchContent_Declare(
  pybind11
    GIT_REPOSITORY https://github.com/pybind/pybind11
    GIT_TAG        v2.9.1
)

FetchContent_MakeAvailable(pybind11)

pybind11_add_module(foo "")

target_sources(foo
  PUBLIC
    ${CMAKE_CURRENT_LIST_DIR}/foo.cpp
)
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.