From a53bb2ecd83e9fa394b17cce71e8473ec8059998 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Fri, 3 Apr 2026 22:36:02 +0100 Subject: [PATCH] gh-146527: Heap-allocate gc_stats to avoid bloating PyInterpreterState MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The gc_stats struct contains ring buffers of gc_generation_stats entries (11 young + 3×2 old on default builds). Embedding it inline in _gc_runtime_state, which is itself inline in PyInterpreterState, pushed fields like _gil.locked and threads.head to offsets beyond what out-of-process profilers and debuggers can reasonably read in a single buffer (e.g. offset 9384 for _gil.locked vs an 8 KiB read buffer). Heap-allocate generation_stats via PyMem_RawCalloc in _PyGC_Init and free it in _PyGC_Fini. This shrinks PyInterpreterState by ~1.6 KiB and keeps the GIL, thread-list, and other frequently-inspected fields at stable, low offsets. --- Include/internal/pycore_interp_structs.h | 2 +- Modules/gcmodule.c | 6 +++--- Python/gc.c | 15 +++++++++++---- Python/gc_free_threading.c | 11 +++++++++-- 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index f76d4f41c55119..c4b084642668a9 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -248,7 +248,7 @@ struct _gc_runtime_state { struct gc_generation old[2]; /* a permanent generation which won't be collected */ struct gc_generation permanent_generation; - struct gc_stats generation_stats; + struct gc_stats *generation_stats; /* true if we are currently running the collector */ int collecting; // The frame that started the current collection. It might be NULL even when diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index c21b61589bd261..8da28130e9da9a 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -347,9 +347,9 @@ gc_get_stats_impl(PyObject *module) /* To get consistent values despite allocations while constructing the result list, we use a snapshot of the running stats. */ GCState *gcstate = get_gc_state(); - stats[0] = gcstate->generation_stats.young.items[gcstate->generation_stats.young.index]; - stats[1] = gcstate->generation_stats.old[0].items[gcstate->generation_stats.old[0].index]; - stats[2] = gcstate->generation_stats.old[1].items[gcstate->generation_stats.old[1].index]; + stats[0] = gcstate->generation_stats->young.items[gcstate->generation_stats->young.index]; + stats[1] = gcstate->generation_stats->old[0].items[gcstate->generation_stats->old[0].index]; + stats[2] = gcstate->generation_stats->old[1].items[gcstate->generation_stats->old[1].index]; PyObject *result = PyList_New(0); if (result == NULL) diff --git a/Python/gc.c b/Python/gc.c index 7bca40f6e3f58e..284ac725d37ac6 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -177,6 +177,11 @@ _PyGC_Init(PyInterpreterState *interp) { GCState *gcstate = &interp->gc; + gcstate->generation_stats = PyMem_RawCalloc(1, sizeof(struct gc_stats)); + if (gcstate->generation_stats == NULL) { + return _PyStatus_NO_MEMORY(); + } + gcstate->garbage = PyList_New(0); if (gcstate->garbage == NULL) { return _PyStatus_NO_MEMORY(); @@ -1398,13 +1403,13 @@ static struct gc_generation_stats * gc_get_stats(GCState *gcstate, int gen) { if (gen == 0) { - struct gc_young_stats_buffer *buffer = &gcstate->generation_stats.young; + struct gc_young_stats_buffer *buffer = &gcstate->generation_stats->young; buffer->index = (buffer->index + 1) % GC_YOUNG_STATS_SIZE; struct gc_generation_stats *stats = &buffer->items[buffer->index]; return stats; } else { - struct gc_old_stats_buffer *buffer = &gcstate->generation_stats.old[gen - 1]; + struct gc_old_stats_buffer *buffer = &gcstate->generation_stats->old[gen - 1]; buffer->index = (buffer->index + 1) % GC_OLD_STATS_SIZE; struct gc_generation_stats *stats = &buffer->items[buffer->index]; return stats; @@ -1415,12 +1420,12 @@ static struct gc_generation_stats * gc_get_prev_stats(GCState *gcstate, int gen) { if (gen == 0) { - struct gc_young_stats_buffer *buffer = &gcstate->generation_stats.young; + struct gc_young_stats_buffer *buffer = &gcstate->generation_stats->young; struct gc_generation_stats *stats = &buffer->items[buffer->index]; return stats; } else { - struct gc_old_stats_buffer *buffer = &gcstate->generation_stats.old[gen - 1]; + struct gc_old_stats_buffer *buffer = &gcstate->generation_stats->old[gen - 1]; struct gc_generation_stats *stats = &buffer->items[buffer->index]; return stats; } @@ -2299,6 +2304,8 @@ _PyGC_Fini(PyInterpreterState *interp) GCState *gcstate = &interp->gc; Py_CLEAR(gcstate->garbage); Py_CLEAR(gcstate->callbacks); + PyMem_RawFree(gcstate->generation_stats); + gcstate->generation_stats = NULL; /* Prevent a subtle bug that affects sub-interpreters that use basic * single-phase init extensions (m_size == -1). Those extensions cause objects diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 7ad60a73a56a69..4b46ca04f56b20 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1698,6 +1698,11 @@ _PyGC_Init(PyInterpreterState *interp) { GCState *gcstate = &interp->gc; + gcstate->generation_stats = PyMem_RawCalloc(1, sizeof(struct gc_stats)); + if (gcstate->generation_stats == NULL) { + return _PyStatus_NO_MEMORY(); + } + gcstate->garbage = PyList_New(0); if (gcstate->garbage == NULL) { return _PyStatus_NO_MEMORY(); @@ -2387,12 +2392,12 @@ static struct gc_generation_stats * get_stats(GCState *gcstate, int gen) { if (gen == 0) { - struct gc_young_stats_buffer *buffer = &gcstate->generation_stats.young; + struct gc_young_stats_buffer *buffer = &gcstate->generation_stats->young; struct gc_generation_stats *stats = &buffer->items[buffer->index]; return stats; } else { - struct gc_old_stats_buffer *buffer = &gcstate->generation_stats.old[gen - 1]; + struct gc_old_stats_buffer *buffer = &gcstate->generation_stats->old[gen - 1]; struct gc_generation_stats *stats = &buffer->items[buffer->index]; return stats; } @@ -2831,6 +2836,8 @@ _PyGC_Fini(PyInterpreterState *interp) GCState *gcstate = &interp->gc; Py_CLEAR(gcstate->garbage); Py_CLEAR(gcstate->callbacks); + PyMem_RawFree(gcstate->generation_stats); + gcstate->generation_stats = NULL; /* We expect that none of this interpreters objects are shared with other interpreters.