postgresql/contrib/pgstattuple/pgstatapprox.c

/*-------------------------------------------------------------------------
 *
 * pgstatapprox.c
 *		  Bloat estimation functions
 *
 * Copyright (c) 2014-2022, PostgreSQL Global Development Group
 *
 * IDENTIFICATION
 *		  contrib/pgstattuple/pgstatapprox.c
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include "access/heapam.h"
#include "access/htup_details.h"
#include "access/multixact.h"
#include "access/relation.h"
#include "access/transam.h"
#include "access/visibilitymap.h"
#include "access/xact.h"
#include "catalog/namespace.h"
#include "catalog/pg_am_d.h"
#include "commands/vacuum.h"
#include "funcapi.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/lmgr.h"
#include "storage/procarray.h"
#include "utils/builtins.h"

PG_FUNCTION_INFO_V1(pgstattuple_approx);
PG_FUNCTION_INFO_V1(pgstattuple_approx_v1_5);

Datum		pgstattuple_approx_internal(Oid relid, FunctionCallInfo fcinfo);

typedef struct output_type
{
	uint64		table_len;
	uint64		scanned_percent;
	uint64		tuple_count;
	uint64		tuple_len;
	double		tuple_percent;
	uint64		dead_tuple_count;
	uint64		dead_tuple_len;
	double		dead_tuple_percent;
	uint64		free_space;
	double		free_percent;
} output_type;

#define NUM_OUTPUT_COLUMNS 10

/*
 * This function takes an already open relation and scans its pages,
 * skipping those that have the corresponding visibility map bit set.
 * For pages we skip, we find the free space from the free space map
 * and approximate tuple_len on that basis. For the others, we count
 * the exact number of dead tuples etc.
 *
 * This scan is loosely based on vacuumlazy.c:lazy_scan_heap(), but
 * we do not try to avoid skipping single pages.
 */
static void
statapprox_heap(Relation rel, output_type *stat)
{
	BlockNumber scanned,
				nblocks,
				blkno;
	Buffer		vmbuffer = InvalidBuffer;
	BufferAccessStrategy bstrategy;
	TransactionId OldestXmin;

	OldestXmin = GetOldestNonRemovableTransactionId(rel);
	bstrategy = GetAccessStrategy(BAS_BULKREAD);

	nblocks = RelationGetNumberOfBlocks(rel);
	scanned = 0;

	for (blkno = 0; blkno < nblocks; blkno++)
	{
		Buffer		buf;
		Page		page;
		OffsetNumber offnum,
					maxoff;
		Size		freespace;

		CHECK_FOR_INTERRUPTS();

		/*
		 * If the page has only visible tuples, then we can find out the free
		 * space from the FSM and move on.
		 */
		if (VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
		{
			freespace = GetRecordedFreeSpace(rel, blkno);
			stat->tuple_len += BLCKSZ - freespace;
			stat->free_space += freespace;
			continue;
		}

		buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno,
								 RBM_NORMAL, bstrategy);

		LockBuffer(buf, BUFFER_LOCK_SHARE);

		page = BufferGetPage(buf);

		/*
		 * It's not safe to call PageGetHeapFreeSpace() on new pages, so we
		 * treat them as being free space for our purposes.
		 */
		if (!PageIsNew(page))
			stat->free_space += PageGetHeapFreeSpace(page);
		else
			stat->free_space += BLCKSZ - SizeOfPageHeaderData;

		/* We may count the page as scanned even if it's new/empty */
		scanned++;

		if (PageIsNew(page) || PageIsEmpty(page))
		{
			UnlockReleaseBuffer(buf);
			continue;
		}

		/*
		 * Look at each tuple on the page and decide whether it's live or
		 * dead, then count it and its size. Unlike lazy_scan_heap, we can
		 * afford to ignore problems and special cases.
		 */
		maxoff = PageGetMaxOffsetNumber(page);

		for (offnum = FirstOffsetNumber;
			 offnum <= maxoff;
			 offnum = OffsetNumberNext(offnum))
		{
			ItemId		itemid;
			HeapTupleData tuple;

			itemid = PageGetItemId(page, offnum);

			if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid) ||
				ItemIdIsDead(itemid))
			{
				continue;
			}

			Assert(ItemIdIsNormal(itemid));

			ItemPointerSet(&(tuple.t_self), blkno, offnum);

			tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
			tuple.t_len = ItemIdGetLength(itemid);
			tuple.t_tableOid = RelationGetRelid(rel);

			/*
			 * We follow VACUUM's lead in counting INSERT_IN_PROGRESS tuples
			 * as "dead" while DELETE_IN_PROGRESS tuples are "live".  We don't
			 * bother distinguishing tuples inserted/deleted by our own
			 * transaction.
			 */
			switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
			{
				case HEAPTUPLE_LIVE:
				case HEAPTUPLE_DELETE_IN_PROGRESS:
					stat->tuple_len += tuple.t_len;
					stat->tuple_count++;
					break;
				case HEAPTUPLE_DEAD:
				case HEAPTUPLE_RECENTLY_DEAD:
				case HEAPTUPLE_INSERT_IN_PROGRESS:
					stat->dead_tuple_len += tuple.t_len;
					stat->dead_tuple_count++;
					break;
				default:
					elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
					break;
			}
		}

		UnlockReleaseBuffer(buf);
	}

	stat->table_len = (uint64) nblocks * BLCKSZ;

	/*
	 * We don't know how many tuples are in the pages we didn't scan, so
	 * extrapolate the live-tuple count to the whole table in the same way
	 * that VACUUM does.  (Like VACUUM, we're not taking a random sample, so
	 * just extrapolating linearly seems unsafe.)  There should be no dead
	 * tuples in all-visible pages, so no correction is needed for that, and
	 * we already accounted for the space in those pages, too.
	 */
	stat->tuple_count = vac_estimate_reltuples(rel, nblocks, scanned,
											   stat->tuple_count);

	/* It's not clear if we could get -1 here, but be safe. */
	stat->tuple_count = Max(stat->tuple_count, 0);

	/*
	 * Calculate percentages if the relation has one or more pages.
	 */
	if (nblocks != 0)
	{
		stat->scanned_percent = 100 * scanned / nblocks;
		stat->tuple_percent = 100.0 * stat->tuple_len / stat->table_len;
		stat->dead_tuple_percent = 100.0 * stat->dead_tuple_len / stat->table_len;
		stat->free_percent = 100.0 * stat->free_space / stat->table_len;
	}

	if (BufferIsValid(vmbuffer))
	{
		ReleaseBuffer(vmbuffer);
		vmbuffer = InvalidBuffer;
	}
}

/*
 * Returns estimated live/dead tuple statistics for the given relid.
 *
 * The superuser() check here must be kept as the library might be upgraded
 * without the extension being upgraded, meaning that in pre-1.5 installations
 * these functions could be called by any user.
 */
Datum
pgstattuple_approx(PG_FUNCTION_ARGS)
{
	Oid			relid = PG_GETARG_OID(0);

	if (!superuser())
		ereport(ERROR,
				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
				 errmsg("must be superuser to use pgstattuple functions")));

	PG_RETURN_DATUM(pgstattuple_approx_internal(relid, fcinfo));
}

/*
 * As of pgstattuple version 1.5, we no longer need to check if the user
 * is a superuser because we REVOKE EXECUTE on the SQL function from PUBLIC.
 * Users can then grant access to it based on their policies.
 *
 * Otherwise identical to pgstattuple_approx (above).
 */
Datum
pgstattuple_approx_v1_5(PG_FUNCTION_ARGS)
{
	Oid			relid = PG_GETARG_OID(0);

	PG_RETURN_DATUM(pgstattuple_approx_internal(relid, fcinfo));
}

Datum
pgstattuple_approx_internal(Oid relid, FunctionCallInfo fcinfo)
{
	Relation	rel;
	output_type stat = {0};
	TupleDesc	tupdesc;
	bool		nulls[NUM_OUTPUT_COLUMNS];
	Datum		values[NUM_OUTPUT_COLUMNS];
	HeapTuple	ret;
	int			i = 0;

	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
		elog(ERROR, "return type must be a row type");

	if (tupdesc->natts != NUM_OUTPUT_COLUMNS)
		elog(ERROR, "incorrect number of output arguments");

	rel = relation_open(relid, AccessShareLock);

	/*
	 * Reject attempts to read non-local temporary relations; we would be
	 * likely to get wrong data since we have no visibility into the owning
	 * session's local buffers.
	 */
	if (RELATION_IS_OTHER_TEMP(rel))
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("cannot access temporary tables of other sessions")));

	/*
	 * We support only relation kinds with a visibility map and a free space
	 * map.
	 */
	if (!(rel->rd_rel->relkind == RELKIND_RELATION ||
		  rel->rd_rel->relkind == RELKIND_MATVIEW ||
		  rel->rd_rel->relkind == RELKIND_TOASTVALUE))
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("relation \"%s\" is of wrong relation kind",
						RelationGetRelationName(rel)),
				 errdetail_relkind_not_supported(rel->rd_rel->relkind)));

	if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
		ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
						errmsg("only heap AM is supported")));

	statapprox_heap(rel, &stat);

	relation_close(rel, AccessShareLock);

	memset(nulls, 0, sizeof(nulls));

	values[i++] = Int64GetDatum(stat.table_len);
	values[i++] = Float8GetDatum(stat.scanned_percent);
	values[i++] = Int64GetDatum(stat.tuple_count);
	values[i++] = Int64GetDatum(stat.tuple_len);
	values[i++] = Float8GetDatum(stat.tuple_percent);
	values[i++] = Int64GetDatum(stat.dead_tuple_count);
	values[i++] = Int64GetDatum(stat.dead_tuple_len);
	values[i++] = Float8GetDatum(stat.dead_tuple_percent);
	values[i++] = Int64GetDatum(stat.free_space);
	values[i++] = Float8GetDatum(stat.free_percent);

	ret = heap_form_tuple(tupdesc, values, nulls);
	return HeapTupleGetDatum(ret);
}
v15.2 2023-05-09 21:11:15 +00:00			`/*-------------------------------------------------------------------------`
			`*`
			`* pgstatapprox.c`
			`* Bloat estimation functions`
			`*`
			`* Copyright (c) 2014-2022, PostgreSQL Global Development Group`
			`*`
			`* IDENTIFICATION`
			`* contrib/pgstattuple/pgstatapprox.c`
			`*`
			`*-------------------------------------------------------------------------`
			`*/`
			`#include "postgres.h"`

			`#include "access/heapam.h"`
			`#include "access/htup_details.h"`
			`#include "access/multixact.h"`
			`#include "access/relation.h"`
			`#include "access/transam.h"`
			`#include "access/visibilitymap.h"`
			`#include "access/xact.h"`
			`#include "catalog/namespace.h"`
			`#include "catalog/pg_am_d.h"`
			`#include "commands/vacuum.h"`
			`#include "funcapi.h"`
			`#include "miscadmin.h"`
			`#include "storage/bufmgr.h"`
			`#include "storage/freespace.h"`
			`#include "storage/lmgr.h"`
			`#include "storage/procarray.h"`
			`#include "utils/builtins.h"`

			`PG_FUNCTION_INFO_V1(pgstattuple_approx);`
			`PG_FUNCTION_INFO_V1(pgstattuple_approx_v1_5);`

			`Datum pgstattuple_approx_internal(Oid relid, FunctionCallInfo fcinfo);`

			`typedef struct output_type`
			`{`
			`uint64 table_len;`
			`uint64 scanned_percent;`
			`uint64 tuple_count;`
			`uint64 tuple_len;`
			`double tuple_percent;`
			`uint64 dead_tuple_count;`
			`uint64 dead_tuple_len;`
			`double dead_tuple_percent;`
			`uint64 free_space;`
			`double free_percent;`
			`} output_type;`

			`#define NUM_OUTPUT_COLUMNS 10`

			`/*`
			`* This function takes an already open relation and scans its pages,`
			`* skipping those that have the corresponding visibility map bit set.`
			`* For pages we skip, we find the free space from the free space map`
			`* and approximate tuple_len on that basis. For the others, we count`
			`* the exact number of dead tuples etc.`
			`*`
			`* This scan is loosely based on vacuumlazy.c:lazy_scan_heap(), but`
			`* we do not try to avoid skipping single pages.`
			`*/`
			`static void`
			`statapprox_heap(Relation rel, output_type *stat)`
			`{`
			`BlockNumber scanned,`
			`nblocks,`
			`blkno;`
			`Buffer vmbuffer = InvalidBuffer;`
			`BufferAccessStrategy bstrategy;`
			`TransactionId OldestXmin;`

			`OldestXmin = GetOldestNonRemovableTransactionId(rel);`
			`bstrategy = GetAccessStrategy(BAS_BULKREAD);`

			`nblocks = RelationGetNumberOfBlocks(rel);`
			`scanned = 0;`

			`for (blkno = 0; blkno < nblocks; blkno++)`
			`{`
			`Buffer buf;`
			`Page page;`
			`OffsetNumber offnum,`
			`maxoff;`
			`Size freespace;`

			`CHECK_FOR_INTERRUPTS();`

			`/*`
			`* If the page has only visible tuples, then we can find out the free`
			`* space from the FSM and move on.`
			`*/`
			`if (VM_ALL_VISIBLE(rel, blkno, &vmbuffer))`
			`{`
			`freespace = GetRecordedFreeSpace(rel, blkno);`
			`stat->tuple_len += BLCKSZ - freespace;`
			`stat->free_space += freespace;`
			`continue;`
			`}`

			`buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno,`
			`RBM_NORMAL, bstrategy);`

			`LockBuffer(buf, BUFFER_LOCK_SHARE);`

			`page = BufferGetPage(buf);`

			`/*`
			`* It's not safe to call PageGetHeapFreeSpace() on new pages, so we`
			`* treat them as being free space for our purposes.`
			`*/`
			`if (!PageIsNew(page))`
			`stat->free_space += PageGetHeapFreeSpace(page);`
			`else`
			`stat->free_space += BLCKSZ - SizeOfPageHeaderData;`

			`/* We may count the page as scanned even if it's new/empty */`
			`scanned++;`

			`if (PageIsNew(page) \|\| PageIsEmpty(page))`
			`{`
			`UnlockReleaseBuffer(buf);`
			`continue;`
			`}`

			`/*`
			`* Look at each tuple on the page and decide whether it's live or`
			`* dead, then count it and its size. Unlike lazy_scan_heap, we can`
			`* afford to ignore problems and special cases.`
			`*/`
			`maxoff = PageGetMaxOffsetNumber(page);`

			`for (offnum = FirstOffsetNumber;`
			`offnum <= maxoff;`
			`offnum = OffsetNumberNext(offnum))`
			`{`
			`ItemId itemid;`
			`HeapTupleData tuple;`

			`itemid = PageGetItemId(page, offnum);`

			`if (!ItemIdIsUsed(itemid) \|\| ItemIdIsRedirected(itemid) \|\|`
			`ItemIdIsDead(itemid))`
			`{`
			`continue;`
			`}`

			`Assert(ItemIdIsNormal(itemid));`

			`ItemPointerSet(&(tuple.t_self), blkno, offnum);`

			`tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);`
			`tuple.t_len = ItemIdGetLength(itemid);`
			`tuple.t_tableOid = RelationGetRelid(rel);`

			`/*`
			`* We follow VACUUM's lead in counting INSERT_IN_PROGRESS tuples`
			`* as "dead" while DELETE_IN_PROGRESS tuples are "live". We don't`
			`* bother distinguishing tuples inserted/deleted by our own`
			`* transaction.`
			`*/`
			`switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))`
			`{`
			`case HEAPTUPLE_LIVE:`
			`case HEAPTUPLE_DELETE_IN_PROGRESS:`
			`stat->tuple_len += tuple.t_len;`
			`stat->tuple_count++;`
			`break;`
			`case HEAPTUPLE_DEAD:`
			`case HEAPTUPLE_RECENTLY_DEAD:`
			`case HEAPTUPLE_INSERT_IN_PROGRESS:`
			`stat->dead_tuple_len += tuple.t_len;`
			`stat->dead_tuple_count++;`
			`break;`
			`default:`
			`elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");`
			`break;`
			`}`
			`}`

			`UnlockReleaseBuffer(buf);`
			`}`

			`stat->table_len = (uint64) nblocks * BLCKSZ;`

			`/*`
			`* We don't know how many tuples are in the pages we didn't scan, so`
			`* extrapolate the live-tuple count to the whole table in the same way`
			`* that VACUUM does. (Like VACUUM, we're not taking a random sample, so`
			`* just extrapolating linearly seems unsafe.) There should be no dead`
			`* tuples in all-visible pages, so no correction is needed for that, and`
			`* we already accounted for the space in those pages, too.`
			`*/`
			`stat->tuple_count = vac_estimate_reltuples(rel, nblocks, scanned,`
			`stat->tuple_count);`

			`/* It's not clear if we could get -1 here, but be safe. */`
			`stat->tuple_count = Max(stat->tuple_count, 0);`

			`/*`
			`* Calculate percentages if the relation has one or more pages.`
			`*/`
			`if (nblocks != 0)`
			`{`
			`stat->scanned_percent = 100 * scanned / nblocks;`
			`stat->tuple_percent = 100.0 * stat->tuple_len / stat->table_len;`
			`stat->dead_tuple_percent = 100.0 * stat->dead_tuple_len / stat->table_len;`
			`stat->free_percent = 100.0 * stat->free_space / stat->table_len;`
			`}`

			`if (BufferIsValid(vmbuffer))`
			`{`
			`ReleaseBuffer(vmbuffer);`
			`vmbuffer = InvalidBuffer;`
			`}`
			`}`

			`/*`
			`* Returns estimated live/dead tuple statistics for the given relid.`
			`*`
			`* The superuser() check here must be kept as the library might be upgraded`
			`* without the extension being upgraded, meaning that in pre-1.5 installations`
			`* these functions could be called by any user.`
			`*/`
			`Datum`
			`pgstattuple_approx(PG_FUNCTION_ARGS)`
			`{`
			`Oid relid = PG_GETARG_OID(0);`

			`if (!superuser())`
			`ereport(ERROR,`
			`(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),`
			`errmsg("must be superuser to use pgstattuple functions")));`

			`PG_RETURN_DATUM(pgstattuple_approx_internal(relid, fcinfo));`
			`}`

			`/*`
			`* As of pgstattuple version 1.5, we no longer need to check if the user`
			`* is a superuser because we REVOKE EXECUTE on the SQL function from PUBLIC.`
			`* Users can then grant access to it based on their policies.`
			`*`
			`* Otherwise identical to pgstattuple_approx (above).`
			`*/`
			`Datum`
			`pgstattuple_approx_v1_5(PG_FUNCTION_ARGS)`
			`{`
			`Oid relid = PG_GETARG_OID(0);`

			`PG_RETURN_DATUM(pgstattuple_approx_internal(relid, fcinfo));`
			`}`

			`Datum`
			`pgstattuple_approx_internal(Oid relid, FunctionCallInfo fcinfo)`
			`{`
			`Relation rel;`
			`output_type stat = {0};`
			`TupleDesc tupdesc;`
			`bool nulls[NUM_OUTPUT_COLUMNS];`
			`Datum values[NUM_OUTPUT_COLUMNS];`
			`HeapTuple ret;`
			`int i = 0;`

			`if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)`
			`elog(ERROR, "return type must be a row type");`

			`if (tupdesc->natts != NUM_OUTPUT_COLUMNS)`
			`elog(ERROR, "incorrect number of output arguments");`

			`rel = relation_open(relid, AccessShareLock);`

			`/*`
			`* Reject attempts to read non-local temporary relations; we would be`
			`* likely to get wrong data since we have no visibility into the owning`
			`* session's local buffers.`
			`*/`
			`if (RELATION_IS_OTHER_TEMP(rel))`
			`ereport(ERROR,`
			`(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),`
			`errmsg("cannot access temporary tables of other sessions")));`

			`/*`
			`* We support only relation kinds with a visibility map and a free space`
			`* map.`
			`*/`
			`if (!(rel->rd_rel->relkind == RELKIND_RELATION \|\|`
			`rel->rd_rel->relkind == RELKIND_MATVIEW \|\|`
			`rel->rd_rel->relkind == RELKIND_TOASTVALUE))`
			`ereport(ERROR,`
			`(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),`
			`errmsg("relation \"%s\" is of wrong relation kind",`
			`RelationGetRelationName(rel)),`
			`errdetail_relkind_not_supported(rel->rd_rel->relkind)));`

			`if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)`
			`ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),`
			`errmsg("only heap AM is supported")));`

			`statapprox_heap(rel, &stat);`

			`relation_close(rel, AccessShareLock);`

			`memset(nulls, 0, sizeof(nulls));`

			`values[i++] = Int64GetDatum(stat.table_len);`
			`values[i++] = Float8GetDatum(stat.scanned_percent);`
			`values[i++] = Int64GetDatum(stat.tuple_count);`
			`values[i++] = Int64GetDatum(stat.tuple_len);`
			`values[i++] = Float8GetDatum(stat.tuple_percent);`
			`values[i++] = Int64GetDatum(stat.dead_tuple_count);`
			`values[i++] = Int64GetDatum(stat.dead_tuple_len);`
			`values[i++] = Float8GetDatum(stat.dead_tuple_percent);`
			`values[i++] = Int64GetDatum(stat.free_space);`
			`values[i++] = Float8GetDatum(stat.free_percent);`

			`ret = heap_form_tuple(tupdesc, values, nulls);`
			`return HeapTupleGetDatum(ret);`
			`}`