Skip to content

Improve object.__reduce_ex__ performance up to 20% #148269

@Bobronium

Description

@Bobronium

Feature or enhancement

Context

Currently, when object.__reduce_ex__ is called, it might forward the call to __reduce__.

For this to happen the following must be true:

  1. __reduce__ must be defined as class attribute and must not be object.__reduce__
  2. __reduce__ must be defined as instance attribute

Roughly the following logic is ran:

class object:
    def __reduce_ex__(self, protocol: int) -> tuple:
        reduce = getattr(self, "__reduce__", None)  # <-- might not be necessary
        if reduce is not None:
            if getattr(type(self), "__reduce__", None) is not object.__reduce__:  # <-- real descriminator
                return reduce()
            # `reduce` above is thrown away, but we paid upfront cost to look it up
        return self._common_reduce(protocol)
Actual code (toggle visibility)

cpython/Objects/typeobject.c

Lines 8245 to 8277 in eab7dbd

static PyObject *
object___reduce_ex___impl(PyObject *self, int protocol)
/*[clinic end generated code: output=2e157766f6b50094 input=f326b43fb8a4c5ff]*/
{
PyObject *reduce;
if (PyObject_GetOptionalAttr(self, &_Py_ID(__reduce__), &reduce) < 0) {
return NULL;
}
if (reduce != NULL) {
PyObject *cls, *clsreduce;
int override;
cls = (PyObject *) Py_TYPE(self);
clsreduce = PyObject_GetAttr(cls, &_Py_ID(__reduce__));
if (clsreduce == NULL) {
Py_DECREF(reduce);
return NULL;
}
PyInterpreterState *interp = _PyInterpreterState_GET();
override = (clsreduce != _Py_INTERP_CACHED_OBJECT(interp, objreduce));
Py_DECREF(clsreduce);
if (override) {
PyObject *res = _PyObject_CallNoArgs(reduce);
Py_DECREF(reduce);
return res;
}
else
Py_DECREF(reduce);
}
return _common_reduce(self, protocol);
}

Looking at this it's evident, that we always need to check the first condition, but might not need to check the second one.

Proposal

  1. Lookup instance attribute only after class-level override is confirmed
  2. Replace PyObject_GetAttr to _Py_LookupRef — just a little speedup
def _PyType_Lookup(cls, name):
    for base in cls.__mro__:
        if name in base.__dict__:
            return base.__dict__[name]
    return None

class object:
    def __reduce_ex__(self, protocol: int) -> tuple:
        if _PyType_Lookup(type(self), "__reduce__") is not object.__reduce__:
            reduce = getattr(self, "__reduce__", None)
            if reduce is not None:
                return reduce()
        return self._common_reduce(protocol)

Why not

Proposed change would make the following code behave differently (this is the only case when behavior differs I've been able to come up with):

class X:
    def __getattribute__(self, name):
        if name == "__reduce__":
            raise RuntimeError("Boom!")
        return object.__getattribute__(self, name)

x = X()
x.__reduce_ex__(5)  # current implementation will raise, proposed will not

Descriptors behavior is not affected by the change since they will always be defined as class attributes.

Potential speedup

Benchmark current patched
default_reduce_ex 297 ns 250 ns: 1.19x faster
slots_default_reduce_ex 283 ns 239 ns: 1.19x faster
instance_shadow_reduce_ex 278 ns 250 ns: 1.11x faster
small_dataclass_reduce_ex 299 ns 252 ns: 1.19x faster
class_override_reduce_ex 148 ns 137 ns: 1.08x faster
class_override_getattribute_reduce_ex 362 ns 348 ns: 1.04x faster
pickle_dumps_default 1.44 us 1.29 us: 1.12x faster
pickle_dumps_class_override 1.19 us 1.15 us: 1.03x faster
pickle_small_dataclass 1.50 us 1.45 us: 1.04x faster
Geometric mean (ref) 1.10x faster
Benchmark (toggle visibility)
"""
pyperf benchmarks for object.__reduce_ex__ behavior relevant to a CPython patch
that changes the lookup order from:

    1. self.__reduce__
    2. type(self).__reduce__

to:

    1. type(self).__reduce__
    2. self.__reduce__ only if the class actually overrides it

Usage:
    ./python bench_reduce_ex.py
    ./python bench_reduce_ex.py -o patched.json

Compare:
    python -m pyperf compare_to baseline.json patched.json
"""

import pickle
from dataclasses import dataclass
from functools import partial

import pyperf


class Default:
    pass


default_obj = Default()


class SlotsDefault:
    __slots__ = ()


slots_default_obj = SlotsDefault()


class DefaultWithInstanceReduce:
    pass


instance_shadow_obj = DefaultWithInstanceReduce()
instance_shadow_obj.__reduce__ = lambda: (DefaultWithInstanceReduce, ())


class ClassOverride:
    def __reduce__(self):
        return ClassOverride, ()


class_override_obj = ClassOverride()


class ClassOverrideWithGetattribute:
    def __getattribute__(self, name):
        return object.__getattribute__(self, name)

    def __reduce__(self):
        return ClassOverrideWithGetattribute, ()


class_override_getattribute_obj = ClassOverrideWithGetattribute()


@dataclass
class Data:
    x: int
    y: str


small_dataclass = Data(42, "foo")


def main():
    runner = pyperf.Runner()

    runner.bench_func(
        "default_reduce_ex",
        partial(default_obj.__reduce_ex__, 4),
    )
    runner.bench_func(
        "slots_default_reduce_ex",
        partial(slots_default_obj.__reduce_ex__, 4),
    )
    runner.bench_func(
        "instance_shadow_reduce_ex",
        partial(instance_shadow_obj.__reduce_ex__, 4),
    )
    runner.bench_func(
        "small_dataclass_reduce_ex",
        partial(small_dataclass.__reduce_ex__, 4),
    )
    runner.bench_func(
        "class_override_reduce_ex",
        partial(class_override_obj.__reduce_ex__, 4),
    )
    runner.bench_func(
        "class_override_getattribute_reduce_ex",
        partial(class_override_getattribute_obj.__reduce_ex__, 4),
    )

    runner.bench_func(
        "pickle_dumps_default",
        partial(pickle.dumps, default_obj),
    )
    runner.bench_func(
        "pickle_dumps_slots_default",
        partial(pickle.dumps, slots_default_obj),
    )
    runner.bench_func(
        "pickle_dumps_class_override",
        partial(pickle.dumps, class_override_obj),
    )
    runner.bench_func(
        "pickle_small_dataclass",
        partial(pickle.dumps, small_dataclass),
    )


if __name__ == "__main__":
    main()

Has this already been discussed elsewhere?

This is a minor feature, which does not need previous discussion elsewhere

Links to previous discussion of this feature:

No response

Linked PRs

Metadata

Metadata

Assignees

No one assigned

    Labels

    interpreter-core(Objects, Python, Grammar, and Parser dirs)performancePerformance or resource usagetype-featureA feature request or enhancement

    Projects

    Status

    No status

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions