Skip to content

Commit 05679f3

Browse files
authored
gh-151722: Defer GC tracking in frozendict.copy() (#152230)
Fix _PyDict_Or() and frozendict.copy(): only track the frozendict by the GC once the dictionary is fully initialized. Functions modifying frozendict now ensures that the object is not tracked by the GC (in debug mode). * can_modify_dict() checks that _PyObject_GC_IS_TRACKED() is false for frozendicts. * dict_merge_api() makes sure that the dictionary is tracked by the GC.
1 parent 72cad14 commit 05679f3

1 file changed

Lines changed: 72 additions & 37 deletions

File tree

Objects/dictobject.c

Lines changed: 72 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -305,16 +305,20 @@ static inline int
305305
can_modify_dict(PyDictObject *mp)
306306
{
307307
if (PyFrozenDict_Check(mp)) {
308+
// gh-151722: A frozendict must not be tracked by the GC
309+
// when it's being modified.
310+
assert(!_PyObject_GC_IS_TRACKED(mp));
311+
308312
// No locking required to modify a newly created frozendict
309313
// since it's only accessible from the current thread.
310-
return PyUnstable_Object_IsUniquelyReferenced(_PyObject_CAST(mp));
314+
assert(PyUnstable_Object_IsUniquelyReferenced(_PyObject_CAST(mp)));
311315
}
312316
else {
313317
// Locking is only required if the dictionary is not
314318
// uniquely referenced.
315319
ASSERT_DICT_LOCKED(mp);
316-
return 1;
317320
}
321+
return 1;
318322
}
319323
#endif
320324

@@ -937,7 +941,7 @@ free_values(PyDictValues *values, bool use_qsbr)
937941
static inline PyObject *
938942
new_dict_impl(PyDictObject *mp, PyDictKeysObject *keys,
939943
PyDictValues *values, Py_ssize_t used,
940-
int free_values_on_failure, int frozendict)
944+
int free_values_on_failure, int frozendict, int gc_track)
941945
{
942946
assert(keys != NULL);
943947
if (mp == NULL) {
@@ -956,12 +960,14 @@ new_dict_impl(PyDictObject *mp, PyDictKeysObject *keys,
956960
((PyFrozenDictObject *)mp)->ma_hash = -1;
957961
}
958962
ASSERT_CONSISTENT(mp);
959-
_PyObject_GC_TRACK(mp);
963+
if (gc_track) {
964+
_PyObject_GC_TRACK(mp);
965+
}
960966
return (PyObject *)mp;
961967
}
962968

963969
/* Consumes a reference to the keys object */
964-
static PyObject *
970+
static PyObject*
965971
new_dict(PyDictKeysObject *keys, PyDictValues *values,
966972
Py_ssize_t used, int free_values_on_failure)
967973
{
@@ -971,16 +977,30 @@ new_dict(PyDictKeysObject *keys, PyDictValues *values,
971977
}
972978
assert(mp == NULL || Py_IS_TYPE(mp, &PyDict_Type));
973979

974-
return new_dict_impl(mp, keys, values, used, free_values_on_failure, 0);
980+
return new_dict_impl(mp, keys, values, used, free_values_on_failure, 0, 1);
975981
}
976982

977983
/* Consumes a reference to the keys object */
978-
static PyObject *
979-
new_frozendict(PyDictKeysObject *keys, PyDictValues *values,
980-
Py_ssize_t used, int free_values_on_failure)
984+
static PyObject*
985+
new_dict_untracked(PyDictKeysObject *keys, PyDictValues *values,
986+
Py_ssize_t used, int free_values_on_failure)
987+
{
988+
PyDictObject *mp = _Py_FREELIST_POP(PyDictObject, dicts);
989+
if (mp == NULL) {
990+
mp = PyObject_GC_New(PyDictObject, &PyDict_Type);
991+
}
992+
assert(mp == NULL || Py_IS_TYPE(mp, &PyDict_Type));
993+
994+
return new_dict_impl(mp, keys, values, used, free_values_on_failure, 0, 0);
995+
}
996+
997+
/* Consumes a reference to the keys object */
998+
static PyObject*
999+
new_frozendict_untracked(PyDictKeysObject *keys, PyDictValues *values,
1000+
Py_ssize_t used, int free_values_on_failure)
9811001
{
9821002
PyDictObject *mp = PyObject_GC_New(PyDictObject, &PyFrozenDict_Type);
983-
return new_dict_impl(mp, keys, values, used, free_values_on_failure, 1);
1003+
return new_dict_impl(mp, keys, values, used, free_values_on_failure, 1, 0);
9841004
}
9851005

9861006
static PyObject *
@@ -3471,7 +3491,6 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)
34713491
}
34723492
if (PyFrozenDict_Check(d)) {
34733493
assert(can_modify_dict((PyDictObject*)d));
3474-
assert(!_PyObject_GC_IS_TRACKED(d));
34753494
}
34763495

34773496
if (PyDict_CheckExact(d)) {
@@ -4076,6 +4095,7 @@ merge_from_seq2_lock_held(PyObject *d, PyObject *seq2, int override)
40764095
assert(d != NULL);
40774096
assert(PyAnyDict_Check(d));
40784097
assert(seq2 != NULL);
4098+
assert(can_modify_dict((PyDictObject*)d));
40794099

40804100
it = PyObject_GetIter(seq2);
40814101
if (it == NULL)
@@ -4277,11 +4297,13 @@ dict_merge(PyObject *a, PyObject *b, int override, PyObject **dupkey)
42774297
assert(0 <= override && override <= 2);
42784298

42794299
PyDictObject *mp = _PyAnyDict_CAST(a);
4300+
42804301
int res = 0;
42814302
if (PyAnyDict_Check(b) && (Py_TYPE(b)->tp_iter == dict_iter)) {
42824303
PyDictObject *other = (PyDictObject*)b;
42834304
int res;
42844305
Py_BEGIN_CRITICAL_SECTION2(a, b);
4306+
assert(can_modify_dict(mp));
42854307
res = dict_dict_merge((PyDictObject *)a, other, override, dupkey);
42864308
ASSERT_CONSISTENT(a);
42874309
Py_END_CRITICAL_SECTION2();
@@ -4290,6 +4312,8 @@ dict_merge(PyObject *a, PyObject *b, int override, PyObject **dupkey)
42904312
else {
42914313
/* Do it the generic, slower way */
42924314
Py_BEGIN_CRITICAL_SECTION(a);
4315+
assert(can_modify_dict(mp));
4316+
42934317
PyObject *keys = PyMapping_Keys(b);
42944318
PyObject *iter;
42954319
PyObject *key, *value;
@@ -4382,9 +4406,7 @@ dict_merge_api(PyObject *a, PyObject *b, int override, PyObject **dupkey)
43824406
}
43834407

43844408
int res = dict_merge(a, b, override, dupkey);
4385-
if (PyDict_Check(a)) {
4386-
assert(_PyObject_GC_IS_TRACKED(a));
4387-
}
4409+
assert(_PyObject_GC_IS_TRACKED(a));
43884410
return res;
43894411
}
43904412

@@ -4442,7 +4464,7 @@ copy_values(PyDictValues *values)
44424464
}
44434465

44444466
static PyObject *
4445-
copy_lock_held(PyObject *o, int as_frozendict)
4467+
copy_lock_held_untracked(PyObject *o, int as_frozendict)
44464468
{
44474469
PyObject *copy;
44484470
PyDictObject *mp;
@@ -4455,12 +4477,15 @@ copy_lock_held(PyObject *o, int as_frozendict)
44554477
mp = (PyDictObject *)o;
44564478
if (mp->ma_used == 0) {
44574479
/* The dict is empty; just return a new dict. */
4480+
PyObject *d;
44584481
if (as_frozendict) {
4459-
return PyFrozenDict_New(NULL);
4482+
d = frozendict_new_untracked(&PyFrozenDict_Type);
44604483
}
44614484
else {
4462-
return PyDict_New();
4485+
d = dict_new_untracked(&PyDict_Type);
44634486
}
4487+
assert(!_PyObject_GC_IS_TRACKED(d));
4488+
return d;
44644489
}
44654490

44664491
if (_PyDict_HasSplitTable(mp)) {
@@ -4492,7 +4517,7 @@ copy_lock_held(PyObject *o, int as_frozendict)
44924517
PyFrozenDictObject *frozen = (PyFrozenDictObject *)split_copy;
44934518
frozen->ma_hash = -1;
44944519
}
4495-
_PyObject_GC_TRACK(split_copy);
4520+
assert(!_PyObject_GC_IS_TRACKED(split_copy));
44964521
return (PyObject *)split_copy;
44974522
}
44984523

@@ -4520,10 +4545,10 @@ copy_lock_held(PyObject *o, int as_frozendict)
45204545
}
45214546
PyDictObject *new;
45224547
if (as_frozendict) {
4523-
new = (PyDictObject *)new_frozendict(keys, NULL, 0, 0);
4548+
new = (PyDictObject *)new_frozendict_untracked(keys, NULL, 0, 0);
45244549
}
45254550
else {
4526-
new = (PyDictObject *)new_dict(keys, NULL, 0, 0);
4551+
new = (PyDictObject *)new_dict_untracked(keys, NULL, 0, 0);
45274552
}
45284553
if (new == NULL) {
45294554
/* In case of an error, new_dict()/new_frozendict() takes care of
@@ -4533,14 +4558,15 @@ copy_lock_held(PyObject *o, int as_frozendict)
45334558

45344559
new->ma_used = mp->ma_used;
45354560
ASSERT_CONSISTENT(new);
4561+
assert(!_PyObject_GC_IS_TRACKED(new));
45364562
return (PyObject *)new;
45374563
}
45384564

45394565
if (as_frozendict) {
4540-
copy = PyFrozenDict_New(NULL);
4566+
copy = frozendict_new_untracked(&PyFrozenDict_Type);
45414567
}
45424568
else {
4543-
copy = PyDict_New();
4569+
copy = dict_new_untracked(&PyDict_Type);
45444570
}
45454571
if (copy == NULL)
45464572
return NULL;
@@ -4549,9 +4575,7 @@ copy_lock_held(PyObject *o, int as_frozendict)
45494575
return NULL;
45504576
}
45514577

4552-
if (PyDict_Check(copy)) {
4553-
assert(_PyObject_GC_IS_TRACKED(copy));
4554-
}
4578+
assert(!_PyObject_GC_IS_TRACKED(copy));
45554579
return copy;
45564580
}
45574581

@@ -4565,25 +4589,28 @@ PyDict_Copy(PyObject *o)
45654589

45664590
PyObject *res;
45674591
Py_BEGIN_CRITICAL_SECTION(o);
4568-
res = copy_lock_held(o, 0);
4592+
res = copy_lock_held_untracked(o, 0);
45694593
Py_END_CRITICAL_SECTION();
4594+
if (res != NULL) {
4595+
_PyObject_GC_TRACK(res);
4596+
}
45704597
return res;
45714598
}
45724599

45734600
// Similar to PyDict_Copy(), but return a frozendict if the argument
45744601
// is a frozendict.
45754602
static PyObject *
4576-
anydict_copy(PyObject *o)
4603+
anydict_copy_untracked(PyObject *o)
45774604
{
45784605
assert(PyAnyDict_Check(o));
45794606

45804607
PyObject *res;
45814608
if (PyFrozenDict_Check(o)) {
4582-
res = copy_lock_held(o, 1);
4609+
res = copy_lock_held_untracked(o, 1);
45834610
}
45844611
else {
45854612
Py_BEGIN_CRITICAL_SECTION(o);
4586-
res = copy_lock_held(o, 0);
4613+
res = copy_lock_held_untracked(o, 0);
45874614
Py_END_CRITICAL_SECTION();
45884615
}
45894616
return res;
@@ -4598,13 +4625,16 @@ _PyDict_CopyAsDict(PyObject *o)
45984625

45994626
PyObject *res;
46004627
if (PyFrozenDict_Check(o)) {
4601-
res = copy_lock_held(o, 0);
4628+
res = copy_lock_held_untracked(o, 0);
46024629
}
46034630
else {
46044631
Py_BEGIN_CRITICAL_SECTION(o);
4605-
res = copy_lock_held(o, 0);
4632+
res = copy_lock_held_untracked(o, 0);
46064633
Py_END_CRITICAL_SECTION();
46074634
}
4635+
if (res != NULL) {
4636+
_PyObject_GC_TRACK(res);
4637+
}
46084638
return res;
46094639
}
46104640

@@ -5157,14 +5187,15 @@ _PyDict_Or(PyObject *self, PyObject *other)
51575187
if (!PyAnyDict_Check(self) || !PyAnyDict_Check(other)) {
51585188
Py_RETURN_NOTIMPLEMENTED;
51595189
}
5160-
PyObject *new = anydict_copy(self);
5190+
PyObject *new = anydict_copy_untracked(self);
51615191
if (new == NULL) {
51625192
return NULL;
51635193
}
51645194
if (dict_update_arg(new, other)) {
51655195
Py_DECREF(new);
51665196
return NULL;
51675197
}
5198+
_PyObject_GC_TRACK(new);
51685199
return new;
51695200
}
51705201

@@ -5352,7 +5383,6 @@ dict_new(PyTypeObject *type, PyObject *Py_UNUSED(args), PyObject *Py_UNUSED(kwds
53525383
if (self == NULL) {
53535384
return NULL;
53545385
}
5355-
assert(!_PyObject_GC_IS_TRACKED(self));
53565386
_PyObject_GC_TRACK(self);
53575387
return self;
53585388
}
@@ -5434,7 +5464,7 @@ frozendict_vectorcall(PyObject *type, PyObject * const*args,
54345464
}
54355465
}
54365466
}
5437-
assert(!_PyObject_GC_IS_TRACKED(self));
5467+
54385468
_PyObject_GC_TRACK(self);
54395469
return self;
54405470
}
@@ -6753,10 +6783,12 @@ dictitems_xor_lock_held(PyObject *d1, PyObject *d2)
67536783
ASSERT_DICT_LOCKED(d1);
67546784
ASSERT_DICT_LOCKED(d2);
67556785

6756-
PyObject *temp_dict = copy_lock_held(d1, 0);
6786+
PyObject *temp_dict = copy_lock_held_untracked(d1, 0);
67576787
if (temp_dict == NULL) {
67586788
return NULL;
67596789
}
6790+
_PyObject_GC_TRACK(temp_dict);
6791+
67606792
PyObject *result_set = PySet_New(NULL);
67616793
if (result_set == NULL) {
67626794
Py_CLEAR(temp_dict);
@@ -8488,7 +8520,6 @@ frozendict_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
84888520
assert(kwds == NULL);
84898521
}
84908522

8491-
assert(!_PyObject_GC_IS_TRACKED(d));
84928523
_PyObject_GC_TRACK(d);
84938524
return d;
84948525
}
@@ -8533,7 +8564,11 @@ frozendict_copy_impl(PyFrozenDictObject *self)
85338564
return Py_NewRef(self);
85348565
}
85358566

8536-
return anydict_copy((PyObject*)self);
8567+
PyObject *copy = anydict_copy_untracked((PyObject*)self);
8568+
if (copy != NULL) {
8569+
_PyObject_GC_TRACK(copy);
8570+
}
8571+
return copy;
85378572
}
85388573

85398574

0 commit comments

Comments
 (0)