Certain types of container cannot participate in a reference cycle, and so do not need to be tracked by the garbage collector. Untracking these objects reduces the cost of garbage collections. However, determining which objects may be untracked is not free, and the costs must be weighed against the benefits for garbage collection.
There are two possible strategies for when to untrack a container:
i) When the container is created. ii) When the container is examined by the garbage collector.
Tuples containing only immutable objects (integers, strings etc, and recursively, tuples of immutable objects) do not need to be tracked. The interpreter creates a large number of tuples, many of which will not survive until garbage collection. It is therefore not worthwhile to untrack eligible tuples at creation time.
Instead, all tuples except the empty tuple are tracked when created. During garbage collection it is determined whether any surviving tuples can be untracked. A tuple can be untracked if all of its contents are already not tracked. Tuples are examined for untracking in all garbage collection cycles. It may take more than one cycle to untrack a tuple.
Dictionaries containing only immutable objects also do not need to be tracked. Dictionaries are untracked when created. If a tracked item is inserted into a dictionary (either as a key or value), the dictionary becomes tracked. During a full garbage collection (all generations), the collector will untrack any dictionaries whose contents are not tracked.
The module provides the python function is_tracked(obj), which returns the CURRENT tracking status of the object. Subsequent garbage collections may change the tracking status of the object.
Untracking of certain containers was introduced in issue #4688, and the algorithm was refined in response to issue #14775. */
typedefstruct { // Pointer to next object in the list. // 0 means the object is not tracked uintptr_t _gc_next;
// Pointer to previous object in the list. // Lowest two bits are used for flags documented later. uintptr_t _gc_prev; } PyGC_Head;
structgc_generation { PyGC_Head head; int threshold; /* collection threshold */ int count; /* count of allocations or collections of younger generations */ };
if (_PyObject_IS_GC(op)) { PyGC_Head *gc = AS_GC(op); /* We're only interested in gc_refs for objects in the * generation being collected, which can be recognized * because only they have positive gc_refs. */ if (gc_is_collecting(gc)) { // 将该对象结点的引用计数减一 gc_decref(gc); } } return0; } staticinlinevoid gc_decref(PyGC_Head *g) { _PyObject_ASSERT_WITH_MSG(FROM_GC(g), gc_get_refs(g) > 0, "refcount is too small"); g->_gc_prev -= 1 << _PyGC_PREV_SHIFT; }
staticvoid move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) { // previous elem in the young list, used for restore gc_prev. PyGC_Head *prev = young; PyGC_Head *gc = GC_NEXT(young);
PyGC_Head *last = GC_PREV(unreachable); last->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)gc); _PyGCHead_SET_PREV(gc, last); gc->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)unreachable); unreachable->_gc_prev = (uintptr_t)gc; } gc = (PyGC_Head*)prev->_gc_next; } // young->_gc_prev must be last element remained in the list. young->_gc_prev = (uintptr_t)prev; // don't let the pollution of the list head's next pointer leak unreachable->_gc_next &= ~NEXT_MASK_UNREACHABLE; }
/* A traversal callback for move_unreachable. */ // 重设被可达对象引用的对象的gc_refs为1 staticint visit_reachable(PyObject *op, PyGC_Head *reachable) { // 忽略gc_refs值<=0的结点对象 if (!_PyObject_IS_GC(op)) { return0; }
/* 回收的核心算法 */ static Py_ssize_t gc_collect_main(PyThreadState *tstate, int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, int nofail) { int i; Py_ssize_t m = 0; /* 待收集的对象个数 */ Py_ssize_t n = 0; /* 不能被收集的不可达对象个数 */ PyGC_Head *young; /* 正在处理的这一代 */ PyGC_Head *old; /* young的下一代 */ PyGC_Head unreachable; /* 不可达链表 */ PyGC_Head finalizers; /* 实现__del__的对象 */ PyGC_Head *gc; _PyTime_t t1 = 0; /* initialize to prevent a compiler warning */
// GC状态体 GCState *gcstate = &tstate->interp->gc;
// gc_collect_main() must not be called before _PyGC_Init // or after _PyGC_Fini() assert(gcstate->garbage != NULL); assert(!_PyErr_Occurred(tstate));
#ifdef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS if (tstate->interp->config._isolated_interpreter) { // bpo-40533: The garbage collector must not be run on parallel on // Python objects shared by multiple interpreters. return 0; } #endif
if (PyDTrace_GC_START_ENABLED()) PyDTrace_GC_START(generation);
/* update collection and allocation counters */ // 将当前代的后一代的GC计数值 + 1 // 将当前代及其以前的GC计数值重置为0, 因为回收某一代时, 会一起回收该代之前的所有代的不可达结点 if (generation+1 < NUM_GENERATIONS) gcstate->generations[generation+1].count += 1; for (i = 0; i <= generation; i++) gcstate->generations[i].count = 0;
/* merge younger generations with one we are currently collecting */ // 将当前代与其之前的所有代的可收集结点的链表合并起来 for (i = 0; i < generation; i++) { gc_list_merge(GEN_HEAD(gcstate, i), GEN_HEAD(gcstate, generation)); }
/* handy references */ // young: 当前代可收集链表的头部节点, old: 当前代的下一代可收集链表的头部结点 young = GEN_HEAD(gcstate, generation); if (generation < NUM_GENERATIONS-1) old = GEN_HEAD(gcstate, generation+1); else old = young; validate_list(old, collecting_clear_unreachable_clear);
untrack_tuples(young); /* Move reachable objects to next generation. */ // 将当前代的可达对象移入下一代中, 并将当前代的链表置空。 if (young != old) { if (generation == NUM_GENERATIONS - 2) { gcstate->long_lived_pending += gc_list_size(young); } gc_list_merge(young, old); } else { /* We only un-track dicts in full collections, to avoid quadratic dict build-up. See issue #14775. */ untrack_dicts(young); gcstate->long_lived_pending = 0; gcstate->long_lived_total = gc_list_size(young); }
/* All objects in unreachable are trash, but objects reachable from * legacy finalizers (e.g. tp_del) can't safely be deleted. */ gc_list_init(&finalizers); // NEXT_MASK_UNREACHABLE is cleared here. // After move_legacy_finalizers(), unreachable is normal list.
// 去除不可达结点的标识, 最终移入finalizers链表 move_legacy_finalizers(&unreachable, &finalizers); /* finalizers contains the unreachable objects with a legacy finalizer; * unreachable objects reachable *from* those are also uncollectable, * and we move those into the finalizers list too. */ move_legacy_finalizer_reachable(&finalizers);
/* Call tp_finalize on objects which have one. */ finalize_garbage(tstate, &unreachable);
/* Handle any objects that may have resurrected after the call * to 'finalize_garbage' and continue the collection with the * objects that are still unreachable */ PyGC_Head final_unreachable; handle_resurrected_objects(&unreachable, &final_unreachable, old);
/* Call tp_clear on objects in the final_unreachable set. This will cause * the reference cycles to be broken. It may also cause some objects * in finalizers to be freed. */ m += gc_list_size(&final_unreachable);
/* Collect statistics on uncollectable objects found and print * debugging information. */ for (gc = GC_NEXT(&finalizers); gc != &finalizers; gc = GC_NEXT(gc)) { n++; if (gcstate->debug & DEBUG_UNCOLLECTABLE) debug_cycle("uncollectable", FROM_GC(gc)); } if (gcstate->debug & DEBUG_STATS) { double d = _PyTime_AsSecondsDouble(_PyTime_GetMonotonicClock() - t1); PySys_WriteStderr( "gc: done, %zd unreachable, %zd uncollectable, %.4fs elapsed\n", n+m, n, d); }
/* Append instances in the uncollectable set to a Python * reachable list of garbage. The programmer has to deal with * this if they insist on creating this type of structure. */ handle_legacy_finalizers(tstate, gcstate, &finalizers, old); validate_list(old, collecting_clear_unreachable_clear);
/* Clear free list only during the collection of the highest * generation */ if (generation == NUM_GENERATIONS-1) { clear_freelists(tstate->interp); }
if (_PyErr_Occurred(tstate)) { if (nofail) { _PyErr_Clear(tstate); } else { _PyErr_WriteUnraisableMsg("in garbage collection", NULL); } }
/* Update stats */ // 更新 if (n_collected) { *n_collected = m; } if (n_uncollectable) { *n_uncollectable = n; }