diff --git a/CMakeLists.txt b/CMakeLists.txt index 47db07db..b05f2f95 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -142,6 +142,7 @@ set(PYBIND11_HEADERS include/pybind11/detail/cross_extension_shared_state.h include/pybind11/detail/descr.h include/pybind11/detail/dynamic_raw_ptr_cast_if_possible.h + include/pybind11/detail/function_record_pyobject.h include/pybind11/detail/init.h include/pybind11/detail/internals.h include/pybind11/detail/native_enum_data.h diff --git a/include/pybind11/attr.h b/include/pybind11/attr.h index 82848b4f..e7724050 100644 --- a/include/pybind11/attr.h +++ b/include/pybind11/attr.h @@ -203,6 +203,7 @@ struct argument_record { /// Internal data structure which holds metadata about a bound function (signature, overloads, /// etc.) +#define PYBIND11_DETAIL_FUNCTION_RECORD_ABI_ID "v1" // PLEASE UPDATE if the struct is changed. struct function_record { function_record() : is_constructor(false), is_new_style_constructor(false), is_stateless(false), diff --git a/include/pybind11/detail/function_record_pyobject.h b/include/pybind11/detail/function_record_pyobject.h new file mode 100644 index 00000000..9f59772f --- /dev/null +++ b/include/pybind11/detail/function_record_pyobject.h @@ -0,0 +1,205 @@ +// Copyright (c) 2024 The Pybind Development Team. +// All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +// For background see the description of PR google/pywrapcc#30099. + +#pragma once + +#include "../attr.h" +#include "../pytypes.h" +#include "common.h" + +#include + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +PYBIND11_NAMESPACE_BEGIN(detail) + +struct function_record_PyObject { + PyObject_HEAD + function_record *cpp_func_rec; +}; + +PYBIND11_NAMESPACE_BEGIN(function_record_PyTypeObject_methods) + +PyObject *tp_new_impl(PyTypeObject *type, PyObject *args, PyObject *kwds); +PyObject *tp_alloc_impl(PyTypeObject *type, Py_ssize_t nitems); +int tp_init_impl(PyObject *self, PyObject *args, PyObject *kwds); +void tp_dealloc_impl(PyObject *self); +void tp_free_impl(void *self); + +static PyObject *reduce_ex_impl(PyObject *self, PyObject *, PyObject *); + +PYBIND11_WARNING_PUSH +#if defined(__GNUC__) && __GNUC__ >= 8 +PYBIND11_WARNING_DISABLE_GCC("-Wcast-function-type") +#endif +static PyMethodDef tp_methods_impl[] + = {{"__reduce_ex__", (PyCFunction) reduce_ex_impl, METH_VARARGS | METH_KEYWORDS, nullptr}, + {nullptr, nullptr, 0, nullptr}}; +PYBIND11_WARNING_POP + +// Note that this name is versioned. +constexpr char tp_name_impl[] + = "pybind11_detail_function_record_" PYBIND11_DETAIL_FUNCTION_RECORD_ABI_ID + "_" PYBIND11_PLATFORM_ABI_ID_V4; + +PYBIND11_NAMESPACE_END(function_record_PyTypeObject_methods) + +// Designated initializers are a C++20 feature: +// https://en.cppreference.com/w/cpp/language/aggregate_initialization#Designated_initializers +// MSVC rejects them unless /std:c++20 is used (error code C7555). +PYBIND11_WARNING_PUSH +PYBIND11_WARNING_DISABLE_CLANG("-Wmissing-field-initializers") +#if defined(__GNUC__) && __GNUC__ >= 8 +PYBIND11_WARNING_DISABLE_GCC("-Wmissing-field-initializers") +#endif +static PyTypeObject function_record_PyTypeObject = { + PyVarObject_HEAD_INIT(nullptr, 0) + /* const char *tp_name */ function_record_PyTypeObject_methods::tp_name_impl, + /* Py_ssize_t tp_basicsize */ sizeof(function_record_PyObject), + /* Py_ssize_t tp_itemsize */ 0, + /* destructor tp_dealloc */ function_record_PyTypeObject_methods::tp_dealloc_impl, + /* Py_ssize_t tp_vectorcall_offset */ 0, + /* getattrfunc tp_getattr */ nullptr, + /* setattrfunc tp_setattr */ nullptr, + /* PyAsyncMethods *tp_as_async */ nullptr, + /* reprfunc tp_repr */ nullptr, + /* PyNumberMethods *tp_as_number */ nullptr, + /* PySequenceMethods *tp_as_sequence */ nullptr, + /* PyMappingMethods *tp_as_mapping */ nullptr, + /* hashfunc tp_hash */ nullptr, + /* ternaryfunc tp_call */ nullptr, + /* reprfunc tp_str */ nullptr, + /* getattrofunc tp_getattro */ nullptr, + /* setattrofunc tp_setattro */ nullptr, + /* PyBufferProcs *tp_as_buffer */ nullptr, + /* unsigned long tp_flags */ Py_TPFLAGS_DEFAULT, + /* const char *tp_doc */ nullptr, + /* traverseproc tp_traverse */ nullptr, + /* inquiry tp_clear */ nullptr, + /* richcmpfunc tp_richcompare */ nullptr, + /* Py_ssize_t tp_weaklistoffset */ 0, + /* getiterfunc tp_iter */ nullptr, + /* iternextfunc tp_iternext */ nullptr, + /* struct PyMethodDef *tp_methods */ function_record_PyTypeObject_methods::tp_methods_impl, + /* struct PyMemberDef *tp_members */ nullptr, + /* struct PyGetSetDef *tp_getset */ nullptr, + /* struct _typeobject *tp_base */ nullptr, + /* PyObject *tp_dict */ nullptr, + /* descrgetfunc tp_descr_get */ nullptr, + /* descrsetfunc tp_descr_set */ nullptr, + /* Py_ssize_t tp_dictoffset */ 0, + /* initproc tp_init */ function_record_PyTypeObject_methods::tp_init_impl, + /* allocfunc tp_alloc */ function_record_PyTypeObject_methods::tp_alloc_impl, + /* newfunc tp_new */ function_record_PyTypeObject_methods::tp_new_impl, + /* freefunc tp_free */ function_record_PyTypeObject_methods::tp_free_impl, + /* inquiry tp_is_gc */ nullptr, + /* PyObject *tp_bases */ nullptr, + /* PyObject *tp_mro */ nullptr, + /* PyObject *tp_cache */ nullptr, + /* PyObject *tp_subclasses */ nullptr, + /* PyObject *tp_weaklist */ nullptr, + /* destructor tp_del */ nullptr, + /* unsigned int tp_version_tag */ 0, + /* destructor tp_finalize */ nullptr, +#if PY_VERSION_HEX >= 0x03080000 + /* vectorcallfunc tp_vectorcall */ nullptr, +#endif +}; +PYBIND11_WARNING_POP + +static bool function_record_PyTypeObject_PyType_Ready_first_call = true; + +inline void function_record_PyTypeObject_PyType_Ready() { + if (function_record_PyTypeObject_PyType_Ready_first_call) { + if (PyType_Ready(&function_record_PyTypeObject) < 0) { + throw error_already_set(); + } + function_record_PyTypeObject_PyType_Ready_first_call = false; + } +} + +inline bool is_function_record_PyObject(PyObject *obj) { + if (PyType_Check(obj) != 0) { + return false; + } + PyTypeObject *obj_type = Py_TYPE(obj); + // Fast path (pointer comparison). + if (obj_type == &function_record_PyTypeObject) { + return true; + } + // This works across extension modules. Note that tp_name is versioned. + if (strcmp(obj_type->tp_name, function_record_PyTypeObject.tp_name) == 0) { + return true; + } + return false; +} + +inline function_record *function_record_ptr_from_PyObject(PyObject *obj) { + if (is_function_record_PyObject(obj)) { + return ((detail::function_record_PyObject *) obj)->cpp_func_rec; + } + return nullptr; +} + +inline object function_record_PyObject_New() { + auto *py_func_rec = PyObject_New(function_record_PyObject, &function_record_PyTypeObject); + if (py_func_rec == nullptr) { + throw error_already_set(); + } + py_func_rec->cpp_func_rec = nullptr; // For clarity/purity. Redundant in practice. + return reinterpret_steal((PyObject *) py_func_rec); +} + +PYBIND11_NAMESPACE_BEGIN(function_record_PyTypeObject_methods) + +// Guard against accidents & oversights, in particular when porting to future Python versions. +inline PyObject *tp_new_impl(PyTypeObject *, PyObject *, PyObject *) { + pybind11_fail("UNEXPECTED CALL OF function_record_PyTypeObject_methods::tp_new_impl"); + // return nullptr; // Unreachable. +} + +inline PyObject *tp_alloc_impl(PyTypeObject *, Py_ssize_t) { + pybind11_fail("UNEXPECTED CALL OF function_record_PyTypeObject_methods::tp_alloc_impl"); + // return nullptr; // Unreachable. +} + +inline int tp_init_impl(PyObject *, PyObject *, PyObject *) { + pybind11_fail("UNEXPECTED CALL OF function_record_PyTypeObject_methods::tp_init_impl"); + // return -1; // Unreachable. +} + +// The implementation needs the definition of `class cpp_function`. +void tp_dealloc_impl(PyObject *self); + +inline void tp_free_impl(void *) { + pybind11_fail("UNEXPECTED CALL OF function_record_PyTypeObject_methods::tp_free_impl"); +} + +inline PyObject *reduce_ex_impl(PyObject *self, PyObject *, PyObject *) { + // Deliberately ignoring the arguments for simplicity (expected is `protocol: int`). + const function_record *rec = function_record_ptr_from_PyObject(self); + if (rec == nullptr) { + pybind11_fail( + "FATAL: function_record_PyTypeObject reduce_ex_impl(): cannot obtain cpp_func_rec."); + } + if (rec->name != nullptr && rec->name[0] != '\0' && rec->scope + && PyModule_Check(rec->scope.ptr()) != 0) { + object scope_module = get_scope_module(rec->scope); + if (scope_module) { + return make_tuple(reinterpret_borrow(PyEval_GetBuiltins())["eval"], + make_tuple(str("__import__('importlib').import_module('") + + scope_module + str("')"))) + .release() + .ptr(); + } + } + set_error(PyExc_RuntimeError, repr(self) + str(" is not pickleable.")); + return nullptr; +} + +PYBIND11_NAMESPACE_END(function_record_PyTypeObject_methods) + +PYBIND11_NAMESPACE_END(detail) +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 23515987..c049728f 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -145,6 +145,7 @@ struct internals { # if PYBIND11_INTERNALS_VERSION > 4 // Note that we have to use a std::string to allocate memory to ensure a unique address // We want unique addresses since we use pointer equality to compare function records + // OBSOLETE: google/pywrapcc#30099 std::string function_record_capsule_name = internals_function_record_capsule_name; # endif diff --git a/include/pybind11/functional.h b/include/pybind11/functional.h index bf2721dc..253b4df7 100644 --- a/include/pybind11/functional.h +++ b/include/pybind11/functional.h @@ -98,15 +98,8 @@ struct type_caster> { auto *cfunc_self = PyCFunction_GET_SELF(cfunc.ptr()); if (cfunc_self == nullptr) { PyErr_Clear(); - } else if (isinstance(cfunc_self)) { - auto c = reinterpret_borrow(cfunc_self); - - function_record *rec = nullptr; - // Check that we can safely reinterpret the capsule into a function_record - if (detail::is_function_record_capsule(c)) { - rec = c.get_pointer(); - } - + } else { + function_record *rec = function_record_ptr_from_PyObject(cfunc_self); while (rec != nullptr) { if (rec->is_stateless && same_type(typeid(function_type), diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index 34617d4d..ae7361e9 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -12,6 +12,7 @@ // IWYU pragma: begin_exports #include "detail/class.h" +#include "detail/function_record_pyobject.h" #include "detail/init.h" #include "detail/native_enum_data.h" #include "detail/smart_holder_sfinae_hooks_only.h" @@ -22,6 +23,7 @@ #include "options.h" #include "typing.h" +#include #include #include #include @@ -530,20 +532,11 @@ class cpp_function : public function { if (rec->sibling) { if (PyCFunction_Check(rec->sibling.ptr())) { auto *self = PyCFunction_GET_SELF(rec->sibling.ptr()); - if (!isinstance(self)) { + chain = detail::function_record_ptr_from_PyObject(self); + if (chain && !chain->scope.is(rec->scope)) { + /* Never append a method to an overload chain of a parent class; + instead, hide the parent's overloads in this case */ chain = nullptr; - } else { - auto rec_capsule = reinterpret_borrow(self); - if (detail::is_function_record_capsule(rec_capsule)) { - chain = rec_capsule.get_pointer(); - /* Never append a method to an overload chain of a parent class; - instead, hide the parent's overloads in this case */ - if (!chain->scope.is(rec->scope)) { - chain = nullptr; - } - } else { - chain = nullptr; - } } } // Don't trigger for things like the default __init__, which are wrapper_descriptors @@ -563,21 +556,14 @@ class cpp_function : public function { = reinterpret_cast(reinterpret_cast(dispatcher)); rec->def->ml_flags = METH_VARARGS | METH_KEYWORDS; - capsule rec_capsule(unique_rec.release(), - detail::get_function_record_capsule_name(), - [](void *ptr) { destruct((detail::function_record *) ptr); }); + detail::function_record_PyTypeObject_PyType_Ready(); // Call-once initialization. + object py_func_rec = detail::function_record_PyObject_New(); + ((detail::function_record_PyObject *) py_func_rec.ptr())->cpp_func_rec + = unique_rec.release(); guarded_strdup.release(); - object scope_module; - if (rec->scope) { - if (hasattr(rec->scope, "__module__")) { - scope_module = rec->scope.attr("__module__"); - } else if (hasattr(rec->scope, "__name__")) { - scope_module = rec->scope.attr("__name__"); - } - } - - m_ptr = PyCFunction_NewEx(rec->def, rec_capsule.ptr(), scope_module.ptr()); + object scope_module = detail::get_scope_module(rec->scope); + m_ptr = PyCFunction_NewEx(rec->def, py_func_rec.ptr(), scope_module.ptr()); if (!m_ptr) { pybind11_fail("cpp_function::cpp_function(): Could not allocate function object"); } @@ -606,9 +592,9 @@ class cpp_function : public function { // chain. chain_start = rec; rec->next = chain; - auto rec_capsule - = reinterpret_borrow(((PyCFunctionObject *) m_ptr)->m_self); - rec_capsule.set_pointer(unique_rec.release()); + auto *py_func_rec + = (detail::function_record_PyObject *) PyCFunction_GET_SELF(m_ptr); + py_func_rec->cpp_func_rec = unique_rec.release(); guarded_strdup.release(); } else { // Or end of chain (normal behavior) @@ -682,6 +668,8 @@ class cpp_function : public function { } } + friend void detail::function_record_PyTypeObject_methods::tp_dealloc_impl(PyObject *); + /// When a cpp_function is GCed, release any memory allocated by pybind11 static void destruct(detail::function_record *rec, bool free_strings = true) { // If on Python 3.9, check the interpreter "MICRO" (patch) version. @@ -731,13 +719,11 @@ class cpp_function : public function { /// Main dispatch logic for calls to functions bound using pybind11 static PyObject *dispatcher(PyObject *self, PyObject *args_in, PyObject *kwargs_in) { using namespace detail; - assert(isinstance(self)); + const function_record *overloads = function_record_ptr_from_PyObject(self); + assert(overloads != nullptr); /* Iterator over the list of potentially admissible overloads */ - const function_record *overloads = reinterpret_cast( - PyCapsule_GetPointer(self, get_function_record_capsule_name())), - *current_overload = overloads; - assert(overloads != nullptr); + const function_record *current_overload = overloads; /* Need to know how many arguments + keyword arguments there are to pick the right overload */ @@ -1211,6 +1197,17 @@ class cpp_function : public function { PYBIND11_NAMESPACE_BEGIN(detail) +PYBIND11_NAMESPACE_BEGIN(function_record_PyTypeObject_methods) + +// This implementation needs the definition of `class cpp_function`. +inline void tp_dealloc_impl(PyObject *self) { + auto *py_func_rec = (function_record_PyObject *) self; + cpp_function::destruct(py_func_rec->cpp_func_rec); + py_func_rec->cpp_func_rec = nullptr; +} + +PYBIND11_NAMESPACE_END(function_record_PyTypeObject_methods) + /// Instance creation function for all pybind11 types. It only allocates space for the /// C++ object, but doesn't call the constructor -- an `__init__` function must do that. extern "C" inline PyObject *pybind11_object_new(PyTypeObject *type, PyObject *, PyObject *) { @@ -2290,14 +2287,7 @@ class class_ : public detail::generic_type { if (!func_self) { throw error_already_set(); } - if (!isinstance(func_self)) { - return nullptr; - } - auto cap = reinterpret_borrow(func_self); - if (!detail::is_function_record_capsule(cap)) { - return nullptr; - } - return cap.get_pointer(); + return detail::function_record_ptr_from_PyObject(func_self.ptr()); } }; diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h index 6b9c7a4f..c52efd89 100644 --- a/include/pybind11/pytypes.h +++ b/include/pybind11/pytypes.h @@ -1369,6 +1369,18 @@ class simple_collector; template class unpacking_collector; +inline object get_scope_module(handle scope) { + if (scope) { + if (hasattr(scope, "__module__")) { + return scope.attr("__module__"); + } + if (hasattr(scope, "__name__")) { + return scope.attr("__name__"); + } + } + return object(); +} + PYBIND11_NAMESPACE_END(detail) // TODO: After the deprecated constructors are removed, this macro can be simplified by diff --git a/tests/extra_python_package/test_files.py b/tests/extra_python_package/test_files.py index 02f23359..16418163 100644 --- a/tests/extra_python_package/test_files.py +++ b/tests/extra_python_package/test_files.py @@ -58,6 +58,7 @@ "include/pybind11/detail/cross_extension_shared_state.h", "include/pybind11/detail/descr.h", "include/pybind11/detail/dynamic_raw_ptr_cast_if_possible.h", + "include/pybind11/detail/function_record_pyobject.h", "include/pybind11/detail/init.h", "include/pybind11/detail/internals.h", "include/pybind11/detail/native_enum_data.h", diff --git a/tests/pybind11_tests.h b/tests/pybind11_tests.h index 3924cf05..7be58feb 100644 --- a/tests/pybind11_tests.h +++ b/tests/pybind11_tests.h @@ -59,7 +59,7 @@ class UnusualOpRef { using NonTrivialType = std::shared_ptr; // Almost any non-trivial type will do. // Overriding operator& should not break pybind11. NonTrivialType operator&() { return non_trivial_member; } - const NonTrivialType operator&() const { return non_trivial_member; } + NonTrivialType operator&() const { return non_trivial_member; } private: NonTrivialType non_trivial_member; diff --git a/tests/test_pickling.py b/tests/test_pickling.py index 859ac087..2d84ac9f 100644 --- a/tests/test_pickling.py +++ b/tests/test_pickling.py @@ -1,6 +1,5 @@ import copy import pickle -import re import pytest @@ -8,18 +7,41 @@ from pybind11_tests import pickling as m -def test_pickle_simple_callable(): +def test_assumptions(): + assert pickle.HIGHEST_PROTOCOL >= 0 + + +@pytest.mark.parametrize("protocol", range(pickle.HIGHEST_PROTOCOL + 1)) +def test_pickle_simple_callable(protocol): assert m.simple_callable() == 20220426 - if env.PYPY: - serialized = pickle.dumps(m.simple_callable) - deserialized = pickle.loads(serialized) - assert deserialized() == 20220426 - else: - # To document broken behavior: currently it fails universally with - # all C Python versions. - with pytest.raises(TypeError) as excinfo: - pickle.dumps(m.simple_callable) - assert re.search("can.*t pickle .*PyCapsule.* object", str(excinfo.value)) + serialized = pickle.dumps(m.simple_callable, protocol=protocol) + assert b"pybind11_tests.pickling" in serialized + assert b"simple_callable" in serialized + deserialized = pickle.loads(serialized) + assert deserialized() == 20220426 + assert deserialized is m.simple_callable + + # UNUSUAL: function record pickle roundtrip returns a module, not a function record object: + if not env.PYPY: + assert ( + pickle.loads(pickle.dumps(m.simple_callable.__self__, protocol=protocol)) + is m + ) + # This is not expected to create issues because the only purpose of + # `m.simple_callable.__self__` is to enable pickling: the only method it has is + # `__reduce_ex__`. Direct access for any other purpose is not supported. + # Note that `repr(m.simple_callable.__self__)` shows, e.g.: + # `` + # It is considered to be as much an implementation detail as the + # `pybind11::detail::function_record` C++ type is. + + # @rainwoodman suggested that the unusual pickle roundtrip behavior could be + # avoided by changing `reduce_ex_impl()` to produce, e.g.: + # `"__import__('importlib').import_module('pybind11_tests.pickling').simple_callable.__self__"` + # as the argument for the `eval()` function, and adding a getter to the + # `function_record_PyTypeObject` that returns `self`. However, the additional code complexity + # for this is deemed warranted only if the unusual pickle roundtrip behavior actually + # creates issues. @pytest.mark.parametrize("cls_name", ["Pickleable", "PickleableNew"])