From 55766fc0354e25621d922bb588a248196d124051 Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Mon, 4 May 2026 14:24:40 -0700
Subject: [PATCH v4 2/3] Memory Pools.

A memory context can have a memory pool, which tracks the memory for
that context as well as all subcontexts in a single total, and also
holds an (unenforced) limit. Memory pools are automatically inherited
by child contexts.

Memory Pools solve two problems:

First, the previous implementation in MemoryContextMemAllocated()
recursively walked through subcontexts to total the mem_allocated
fields. For hash aggregates that used a child memory context for each
group, it was too slow. Now, it just reads the single total.

Second, with the block size doubling behavior, it was likely that a
single small chunk allocation could cause a block allocation that
overwhelmed a small value of work_mem. Hash Aggregation protected
against this by constraining maxBlockSize for known contexts, but not
their subcontexts. Now, subcontexts constrain the block size using the
inherited memory pool's limit.

Suggested-by: David Rowley <dgrowleyml@gmail.com>
Discussion: https://postgr.es/m/3579239.1775151498@sss.pgh.pa.us
---
 src/backend/utils/mmgr/aset.c         |   8 +-
 src/backend/utils/mmgr/bump.c         |   7 +-
 src/backend/utils/mmgr/generation.c   |   7 +-
 src/backend/utils/mmgr/mcxt.c         | 110 +++++++++++++++++++++++++-
 src/include/nodes/memnodes.h          |  16 ++++
 src/include/utils/memutils.h          |   1 +
 src/include/utils/memutils_internal.h |  76 +++++++++++++++++-
 src/tools/pgindent/typedefs.list      |   1 +
 8 files changed, 218 insertions(+), 8 deletions(-)

diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c
index 3ddef26a3d3..8981c128672 100644
--- a/src/backend/utils/mmgr/aset.c
+++ b/src/backend/utils/mmgr/aset.c
@@ -867,6 +867,7 @@ AllocSetAllocFromNewBlock(MemoryContext context, Size size, int flags,
 	Size		blksize;
 	Size		required_size;
 	Size		chunk_size;
+	Size		blockSizeLimit;
 
 	/* due to the keeper block set->blocks should always be valid */
 	Assert(set->blocks != NULL);
@@ -931,8 +932,11 @@ AllocSetAllocFromNewBlock(MemoryContext context, Size size, int flags,
 	 */
 	blksize = set->nextBlockSize;
 	set->nextBlockSize <<= 1;
-	if (set->nextBlockSize > set->maxBlockSize)
-		set->nextBlockSize = set->maxBlockSize;
+
+	blockSizeLimit = MemoryContextBlockSizeLimit(context, set->initBlockSize,
+												 set->maxBlockSize);
+	if (set->nextBlockSize > blockSizeLimit)
+		set->nextBlockSize = blockSizeLimit;
 
 	/* Choose the actual chunk size to allocate */
 	chunk_size = GetChunkSizeFromFreeListIdx(fidx);
diff --git a/src/backend/utils/mmgr/bump.c b/src/backend/utils/mmgr/bump.c
index 1deb73f451a..154ebdebd12 100644
--- a/src/backend/utils/mmgr/bump.c
+++ b/src/backend/utils/mmgr/bump.c
@@ -457,15 +457,18 @@ BumpAllocFromNewBlock(MemoryContext context, Size size, int flags,
 	BumpBlock  *block;
 	Size		blksize;
 	Size		required_size;
+	Size		blockSizeLimit;
 
 	/*
 	 * The first such block has size initBlockSize, and we double the space in
 	 * each succeeding block, but not more than maxBlockSize.
 	 */
 	blksize = set->nextBlockSize;
+	blockSizeLimit = MemoryContextBlockSizeLimit(context, set->initBlockSize,
+												 set->maxBlockSize);
 	set->nextBlockSize <<= 1;
-	if (set->nextBlockSize > set->maxBlockSize)
-		set->nextBlockSize = set->maxBlockSize;
+	if (set->nextBlockSize > blockSizeLimit)
+		set->nextBlockSize = blockSizeLimit;
 
 	/* we'll need space for the chunk, chunk hdr and block hdr */
 	required_size = chunk_size + Bump_CHUNKHDRSZ + Bump_BLOCKHDRSZ;
diff --git a/src/backend/utils/mmgr/generation.c b/src/backend/utils/mmgr/generation.c
index ddcd25c1e9d..cade3ae3fe9 100644
--- a/src/backend/utils/mmgr/generation.c
+++ b/src/backend/utils/mmgr/generation.c
@@ -488,15 +488,18 @@ GenerationAllocFromNewBlock(MemoryContext context, Size size, int flags,
 	GenerationBlock *block;
 	Size		blksize;
 	Size		required_size;
+	Size		blockSizeLimit;
 
 	/*
 	 * The first such block has size initBlockSize, and we double the space in
 	 * each succeeding block, but not more than maxBlockSize.
 	 */
 	blksize = set->nextBlockSize;
+	blockSizeLimit = MemoryContextBlockSizeLimit(context, set->initBlockSize,
+												 set->maxBlockSize);
 	set->nextBlockSize <<= 1;
-	if (set->nextBlockSize > set->maxBlockSize)
-		set->nextBlockSize = set->maxBlockSize;
+	if (set->nextBlockSize > blockSizeLimit)
+		set->nextBlockSize = blockSizeLimit;
 
 	/* we'll need space for the chunk, chunk hdr and block hdr */
 	required_size = chunk_size + Generation_CHUNKHDRSZ + Generation_BLOCKHDRSZ;
diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c
index 073bdb35d2a..472123612f2 100644
--- a/src/backend/utils/mmgr/mcxt.c
+++ b/src/backend/utils/mmgr/mcxt.c
@@ -664,6 +664,86 @@ MemoryContextSetIdentifier(MemoryContext context, const char *id)
 	context->ident = id;
 }
 
+/*
+ * Add a memory pool to an existing context. It may already be a member of an
+ * outer pool owned by an ancestor, but it may not already own its memory
+ * pool.
+ *
+ * Currently, the context must have no child contexts, but that restriction
+ * could be lifted if necessary.
+ */
+MemoryPool *
+MemoryContextCreatePool(MemoryContext context, Size limit)
+{
+	MemoryPool *pool = &context->pool;
+
+	Assert(MemoryContextIsValid(context));
+	Assert(limit > 0);
+	Assert(context->firstchild == NULL);
+	Assert(!MemoryContextOwnsPool(context));
+
+	/* outer may already be set */
+
+	pool->limit = limit;
+	pool->allocated = context->mem_allocated;
+
+	return pool;
+}
+
+/* helper for MemoryContextSetParent() */
+static void
+MemoryContextTransferPool(MemoryContext context, MemoryContext new_parent)
+{
+	MemoryPool *old_outer_pool = context->pool.outer;
+	MemoryPool *new_outer_pool = NULL;
+	Size		subtree_mem;
+
+	if (new_parent)
+		new_outer_pool = MemoryContextGetPool(new_parent);
+
+	if (!old_outer_pool && !new_outer_pool)
+		return;
+
+	if (old_outer_pool == new_outer_pool)
+		return;
+
+	/*
+	 * If the context's pool is inherited from some ancestor context, we need
+	 * to calculate the total for this subtree. If it's owned, we can just use
+	 * the pool's total.
+	 */
+	if (MemoryContextOwnsPool(context))
+		subtree_mem = context->pool.allocated;
+	else
+		subtree_mem = MemoryContextMemAllocated(context, true);
+
+	/* subtract from old subtree */
+	for (MemoryPool *pool = old_outer_pool; pool != NULL; pool = pool->outer)
+		pool->allocated -= subtree_mem;
+
+	/* add to new subtree */
+	for (MemoryPool *pool = new_outer_pool; pool != NULL; pool = pool->outer)
+		pool->allocated += subtree_mem;
+
+	/*
+	 * If it's not the owner of its pool, subcontexts could refer to pools
+	 * owned by the context's old ancestors. Search subtree for references to
+	 * old_outer_pool and replace with new_outer_pool. NB: old_outer_pool or
+	 * new_outer_pool might be NULL here.
+	 */
+	if (!MemoryContextOwnsPool(context))
+	{
+		for (MemoryContext cxt = context->firstchild; cxt != NULL;
+			 cxt = MemoryContextTraverseNext(cxt, context))
+		{
+			if (cxt->pool.outer == old_outer_pool)
+				cxt->pool.outer = new_outer_pool;
+		}
+	}
+
+	context->pool.outer = new_outer_pool;
+}
+
 /*
  * MemoryContextSetParent
  *		Change a context to belong to a new parent (or no parent).
@@ -692,6 +772,12 @@ MemoryContextSetParent(MemoryContext context, MemoryContext new_parent)
 	if (new_parent == context->parent)
 		return;
 
+	/*
+	 * This must happen here while we still have information about the
+	 * existing ancestor.
+	 */
+	MemoryContextTransferPool(context, new_parent);
+
 	/* Delink from existing parent, if any */
 	if (context->parent)
 	{
@@ -726,6 +812,10 @@ MemoryContextSetParent(MemoryContext context, MemoryContext new_parent)
 		context->prevchild = NULL;
 		context->nextchild = NULL;
 	}
+
+#ifdef MEMORY_CONTEXT_CHECKING
+	MemoryContextCheck(context);
+#endif
 }
 
 /*
@@ -1090,18 +1180,30 @@ MemoryContextStatsPrint(MemoryContext context, void *passthru,
 								 level, name, stats_string, truncated_ident)));
 }
 
+#ifdef MEMORY_CONTEXT_CHECKING
+
+static void
+MemoryContextCheckPool(MemoryContext context)
+{
+	if (MemoryContextOwnsPool(context))
+		Assert(context->pool.allocated == MemoryContextMemAllocated(context, true));
+
+	if (context->parent)
+		Assert(context->pool.outer == MemoryContextGetPool(context->parent));
+}
+
 /*
  * MemoryContextCheck
  *		Check all chunks in the named context and its children.
  *
  * This is just a debugging utility, so it's not fancy.
  */
-#ifdef MEMORY_CONTEXT_CHECKING
 void
 MemoryContextCheck(MemoryContext context)
 {
 	Assert(MemoryContextIsValid(context));
 	context->methods->check(context);
+	MemoryContextCheckPool(context);
 
 	for (MemoryContext curr = context->firstchild;
 		 curr != NULL;
@@ -1109,8 +1211,10 @@ MemoryContextCheck(MemoryContext context)
 	{
 		Assert(MemoryContextIsValid(curr));
 		curr->methods->check(curr);
+		MemoryContextCheckPool(curr);
 	}
 }
+
 #endif
 
 /*
@@ -1166,6 +1270,9 @@ MemoryContextCreate(MemoryContext node,
 	node->parent = parent;
 	node->firstchild = NULL;
 	node->mem_allocated = 0;
+	node->pool.limit = 0;
+	node->pool.allocated = 0;
+	node->pool.outer = NULL;
 	node->prevchild = NULL;
 	node->name = name;
 	node->ident = NULL;
@@ -1180,6 +1287,7 @@ MemoryContextCreate(MemoryContext node,
 		parent->firstchild = node;
 		/* inherit allowInCritSection flag from parent */
 		node->allowInCritSection = parent->allowInCritSection;
+		node->pool.outer = MemoryContextGetPool(parent);
 	}
 	else
 	{
diff --git a/src/include/nodes/memnodes.h b/src/include/nodes/memnodes.h
index 4b24778fe1e..8c3bc7b7d30 100644
--- a/src/include/nodes/memnodes.h
+++ b/src/include/nodes/memnodes.h
@@ -114,6 +114,21 @@ typedef struct MemoryContextMethods
 } MemoryContextMethods;
 
 
+/*
+ * Memory Pools account for memory usage.
+ *
+ * If limit > 0, then this pool is owned by the containing context, and
+ * 'allocated' is valid. If limit == 0, the pool was inherited from the parent
+ * context, and 'allocated' is invalid.
+ */
+typedef struct MemoryPool
+{
+	Size		limit;			/* limit (not enforced) */
+	Size		allocated;		/* allocation total for subtree */
+	struct MemoryPool *outer;	/* outer containing pool */
+} MemoryPool;
+
+
 typedef struct MemoryContextData
 {
 	pg_node_attr(abstract)		/* there are no nodes of this type */
@@ -123,6 +138,7 @@ typedef struct MemoryContextData
 	bool		isReset;		/* T = no space allocated since last reset */
 	bool		allowInCritSection; /* allow palloc in critical section */
 	Size		mem_allocated;	/* track memory allocated for this context */
+	MemoryPool	pool;			/* accounting, if enabled */
 	const MemoryContextMethods *methods;	/* virtual function table */
 	MemoryContext parent;		/* NULL if no parent (toplevel context) */
 	MemoryContext firstchild;	/* head of linked list of children */
diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h
index 11ab1717a16..e531876ba66 100644
--- a/src/include/utils/memutils.h
+++ b/src/include/utils/memutils.h
@@ -86,6 +86,7 @@ extern bool MemoryContextIsEmpty(MemoryContext context);
 extern Size MemoryContextMemAllocated(MemoryContext context, bool recurse);
 extern void MemoryContextMemConsumed(MemoryContext context,
 									 MemoryContextCounters *consumed);
+extern MemoryPool *MemoryContextCreatePool(MemoryContext owner, Size limit);
 extern void MemoryContextStats(MemoryContext context);
 extern void MemoryContextStatsDetail(MemoryContext context,
 									 int max_level, int max_children,
diff --git a/src/include/utils/memutils_internal.h b/src/include/utils/memutils_internal.h
index 8f793f3c781..8cacd8abb8c 100644
--- a/src/include/utils/memutils_internal.h
+++ b/src/include/utils/memutils_internal.h
@@ -17,6 +17,7 @@
 #define MEMUTILS_INTERNAL_H
 
 #include "utils/memutils.h"
+#include "port/pg_bitutils.h"
 
 /* These functions implement the MemoryContext API for AllocSet context. */
 extern void *AllocSetAlloc(MemoryContext context, Size size, int flags);
@@ -157,16 +158,89 @@ extern void MemoryContextCreate(MemoryContext node,
 								MemoryContext parent,
 								const char *name);
 
+static inline bool
+MemoryPoolIsValid(MemoryPool *pool)
+{
+	if (pool->limit > 0)
+		return true;
+
+	Assert(pool->allocated == 0);
+	return false;
+}
+
+static inline bool
+MemoryContextOwnsPool(MemoryContext context)
+{
+	return MemoryPoolIsValid(&context->pool);
+}
+
+static inline MemoryPool *
+MemoryPoolOuter(MemoryPool *pool)
+{
+	Assert(pool->outer == NULL || MemoryPoolIsValid(pool->outer));
+	return pool->outer;
+}
+
+/* get owned or inherited pool for memory context, or NULL */
+static inline MemoryPool *
+MemoryContextGetPool(MemoryContext context)
+{
+	MemoryPool *pool;
+
+	if (MemoryContextOwnsPool(context))
+		pool = &context->pool;
+	else
+		pool = context->pool.outer;
+
+	Assert(pool == NULL || MemoryPoolIsValid(pool));
+	return pool;
+}
+
+/*
+ * If the context is part of a memory pool, calculate a reasonable max block
+ * size given the memory pool limit. This avoids a case where a small chunk
+ * request causes a large block allocation that overwhelms the limit.
+ * Calculated dynamically because the limit could change, e.g. during
+ * MemoryContextSetParent().
+ */
+static inline Size
+MemoryContextBlockSizeLimit(MemoryContext context, Size min, Size max)
+{
+	MemoryPool *pool = MemoryContextGetPool(context);
+	Size		maxBlockSize;
+
+	if (pool != NULL)
+	{
+		/* reasonable value chosen as 1/16th of the limit */
+		maxBlockSize = pg_prevpower2_size_t(pool->limit / 16);
+
+		maxBlockSize = Min(maxBlockSize, max);
+		maxBlockSize = Max(maxBlockSize, min);
+	}
+	else
+		maxBlockSize = max;
+
+	return maxBlockSize;
+}
+
 /*
  * MemoryContextUpdateAlloc()
  *
- * Update allocation total.
+ * Update allocation total for context and pool(s).
  */
 static inline void
 MemoryContextUpdateAlloc(MemoryContext context, ssize_t size)
 {
 	Assert(size >= 0 || -size <= context->mem_allocated);
 	context->mem_allocated += size;
+
+	for (MemoryPool *pool = MemoryContextGetPool(context);
+		 pool != NULL;
+		 pool = MemoryPoolOuter(pool))
+	{
+		Assert(size >= 0 || -size <= pool->allocated);
+		pool->allocated += size;
+	}
 }
 
 extern void *MemoryContextAllocationFailure(MemoryContext context, Size size,
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 0abdb2d37e2..9fe2b799476 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1754,6 +1754,7 @@ MemoryContextData
 MemoryContextId
 MemoryContextMethodID
 MemoryContextMethods
+MemoryPool
 MemoryStatsPrintFunc
 MergeAction
 MergeActionState
-- 
2.43.0

