Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Special handling of __setstate__ to support alternative pickle mechanisms. #30094

Merged
merged 7 commits into from
Jan 29, 2024
17 changes: 13 additions & 4 deletions include/pybind11/pybind11.h
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,11 @@ class cpp_function : public function {
// it alive.
auto *rec = unique_rec.get();

// Note the "__setstate__" manipulation below.
rec->is_constructor = rec->name != nullptr
&& (std::strcmp(rec->name, "__init__") == 0
|| std::strcmp(rec->name, "__setstate__") == 0);

// Keep track of strdup'ed strings, and clean them up as long as the function's capsule
// has not taken ownership yet (when `unique_rec.release()` is called).
// Note: This cannot easily be fixed by a `unique_ptr` with custom deleter, because the
Expand All @@ -403,7 +408,14 @@ class cpp_function : public function {
strdup_guard guarded_strdup;

/* Create copies of all referenced C-style strings */
rec->name = guarded_strdup(rec->name ? rec->name : "");
if (rec->name == nullptr) {
rec->name = guarded_strdup("");
} else if (std::strcmp(rec->name, "__setstate__[non-constructor]") == 0) {
// See google/pywrapcc#30094 for background.
rec->name = guarded_strdup("__setstate__");
} else {
rec->name = guarded_strdup(rec->name);
}
if (rec->doc) {
rec->doc = guarded_strdup(rec->doc);
}
Expand All @@ -418,9 +430,6 @@ class cpp_function : public function {
}
}

rec->is_constructor = (std::strcmp(rec->name, "__init__") == 0)
|| (std::strcmp(rec->name, "__setstate__") == 0);

#if defined(PYBIND11_DETAILED_ERROR_MESSAGES) && !defined(PYBIND11_DISABLE_NEW_STYLE_INIT_WARNING)
if (rec->is_constructor && !rec->is_new_style_constructor) {
const auto class_name
Expand Down
17 changes: 17 additions & 0 deletions tests/test_methods_and_attributes.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import abc
import sys

import pytest
Expand Down Expand Up @@ -227,6 +228,22 @@ def test_metaclass_override():
assert isinstance(m.MetaclassOverride.__dict__["readonly"], int)


def test_abc_meta_incompatibility(): # Mostly to clearly expose the behavior.
with pytest.raises(TypeError) as exc_info:

class ExampleMandAABC(m.ExampleMandA, metaclass=abc.ABCMeta):
pass

assert "metaclass conflict" in str(exc_info.value)


def test_abc_meta_compatibility():
class MetaclassOverrideABC(m.MetaclassOverride, metaclass=abc.ABCMeta):
pass

assert type(MetaclassOverrideABC).__name__ == "ABCMeta"


def test_no_mixed_overloads():
from pybind11_tests import detailed_error_messages_enabled

Expand Down
51 changes: 51 additions & 0 deletions tests/test_pickling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,14 @@
BSD-style license that can be found in the LICENSE file.
*/

#include <pybind11/stl.h>

#include "pybind11_tests.h"

#include <memory>
#include <stdexcept>
#include <utility>
#include <vector>

namespace exercise_trampoline {

Expand Down Expand Up @@ -60,6 +63,53 @@ void wrap(py::module m) {

} // namespace exercise_trampoline

namespace exercise_getinitargs_getstate_setstate {

// Exercise `__setstate__[non-constructor]` (see google/pywrapcc#30094), which
// was added to support a pickle protocol as established with Boost.Python
// (in 2002):
// https://www.boost.org/doc/libs/1_31_0/libs/python/doc/v2/pickle.html
// This mechanism was also adopted for PyCLIF:
// https://github.com/google/clif/blob/7d388e1de7db5beeb3d7429c18a2776d8188f44f/clif/testing/python/pickle_compatibility.clif
// The code below exercises the pickle protocol intentionally exactly as used
// in PyCLIF, to ensure that pybind11 stays fully compatible with existing
// client code relying on the long-established protocol. (It is impractical to
// change all such client code.)

class StoreTwoWithState {
public:
StoreTwoWithState(int v0, int v1) : values_{v0, v1}, state_{"blank"} {}
const std::vector<int> &GetInitArgs() const { return values_; }
const std::string &GetState() const { return state_; }
void SetState(const std::string &state) { state_ = state; }

private:
std::vector<int> values_;
std::string state_;
};

void wrap(py::module m) {
py::class_<StoreTwoWithState>(std::move(m), "StoreTwoWithState")
.def(py::init<int, int>())
.def("__getinitargs__",
[](const StoreTwoWithState &self) { return py::tuple(py::cast(self.GetInitArgs())); })
.def("__getstate__", &StoreTwoWithState::GetState)
.def(

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am a bit confused that both reduce_ex and setstate are provided in the same python class. I thought this would completely bypass setstate during unpickling?

Copy link
Contributor Author

@rwgk rwgk Jan 29, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added this to the comment further up:

// The code below exercises the pickle protocol intentionally exactly as used
// in PyCLIF, to ensure that pybind11 stays fully compatible with existing
// client code relying on the long-established protocol. (It is impractical to
// change all such client code.)

The __reduce_ex__ does indeed not necessarily have to call __getinitargs__ and __getstate__, but I feel reducing the protocol as used in the wild just for the test would be a loss.

BTW: __reduce__ (Boost.Python) / __reduce_ex__ (PyCLIF) is the trick to make pickling robust for any Python version and pickle protocol. In theory __getinitargs__ and __getstate__ are not needed or could be replaced by another protocol, but in 2002 I decided to emulate what I found at that time in the Python documentation (for native types).

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. This is to make a class that supports all different versions of the pickle protocol. My Python experience only started around 2010, so I do lack that part of background.

"__reduce_ex__",
[](py::handle self, int /*protocol*/) {
return py::make_tuple(self.attr("__class__"),
self.attr("__getinitargs__")(),
self.attr("__getstate__")());
},
py::arg("protocol") = -1)
.def(
"__setstate__[non-constructor]", // See google/pywrapcc#30094 for background.
[](StoreTwoWithState *self, const std::string &state) { self->SetState(state); },
py::arg("state"));
}

} // namespace exercise_getinitargs_getstate_setstate

TEST_SUBMODULE(pickling, m) {
m.def("simple_callable", []() { return 20220426; });

Expand Down Expand Up @@ -191,4 +241,5 @@ TEST_SUBMODULE(pickling, m) {
#endif

exercise_trampoline::wrap(m);
exercise_getinitargs_getstate_setstate::wrap(m);
}
20 changes: 20 additions & 0 deletions tests/test_pickling.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import copy
import pickle
import re

Expand Down Expand Up @@ -91,3 +92,22 @@ def test_roundtrip_simple_cpp_derived():
# Issue #3062: pickleable base C++ classes can incur object slicing
# if derived typeid is not registered with pybind11
assert not m.check_dynamic_cast_SimpleCppDerived(p2)


#
# exercise_getinitargs_getstate_setstate
#
def test_StoreTwoWithState():
obj = m.StoreTwoWithState(-38, 27)
assert obj.__getinitargs__() == (-38, 27)
assert obj.__getstate__() == "blank"
obj.__setstate__("blue")
reduced = obj.__reduce_ex__()
assert reduced == (m.StoreTwoWithState, (-38, 27), "blue")
cpy = copy.deepcopy(obj)
assert cpy.__reduce_ex__() == reduced
assert pickle.HIGHEST_PROTOCOL > 0 # To be sure the loop below makes sense.
for protocol in range(-1, pickle.HIGHEST_PROTOCOL + 1):
serialized = pickle.dumps(obj, protocol)
restored = pickle.loads(serialized)
assert restored.__reduce_ex__() == reduced
Loading