head	1.8;
access;
symbols
	release_4_2:1.8
	aix_ok:1.8;
locks; strict;
comment	@ * @;


1.8
date	91.07.03.19.43.03;	author mao;	state Exp;
branches;
next	1.7;

1.7
date	91.04.28.15.20.56;	author mao;	state Exp;
branches;
next	1.6;

1.6
date	91.04.24.18.44.20;	author mao;	state Exp;
branches;
next	1.5;

1.5
date	91.04.24.16.15.18;	author mao;	state Exp;
branches;
next	1.4;

1.4
date	91.04.21.21.41.39;	author mao;	state Exp;
branches;
next	1.3;

1.3
date	91.04.21.14.32.49;	author mao;	state Exp;
branches;
next	1.2;

1.2
date	91.04.20.16.03.02;	author mao;	state Exp;
branches;
next	1.1;

1.1
date	91.04.18.23.56.44;	author mao;	state Exp;
branches;
next	;


desc
@header file for new btree code
@


1.8
log
@change unique key generation, improve fanout
@
text
@/*
 *  btree.h -- header file for postgres btree access method implementation.
 *
 *	$Header: /users/mao/postgres/src/lib/H/access/RCS/nbtree.h,v 1.7 1991/04/28 15:20:56 mao Exp mao $
 */

/*
 *  BTPageOpaqueData -- At the end of every page, we store a pointer to
 *  both siblings in the tree.  See Lehman and Yao's paper for more info.
 *  In addition, we need to know what sort of page this is (leaf or internal),
 *  and whether the page is available for reuse.
 *
 *  Lehman and Yao's algorithm requires a ``high key'' on every page.  The
 *  high key on a page is guaranteed to be greater than or equal to any key
 *  that appears on this page.  Our insertion algorithm guarantees that we
 *  can use the initial least key on our right sibling as the high key.  We
 *  allocate space for the line pointer to the high key in the opaque data
 *  at the end of the page.
 *
 *  Rightmost pages in the tree have no high key.
 */

typedef struct BTPageOpaqueData {
	PageNumber	btpo_prev;
	PageNumber	btpo_next;
	uint16		btpo_flags;

#define BTP_LEAF	(1 << 0)
#define BTP_ROOT	(1 << 1)
#define BTP_FREE	(1 << 2)

} BTPageOpaqueData;

typedef BTPageOpaqueData	*BTPageOpaque;

/*
 *  ScanOpaqueData is used to remember which buffers we're currently
 *  examining in the scan.  We keep these buffers locked and pinned and
 *  recorded in the opaque entry of the scan in order to avoid doing a
 *  ReadBuffer() for every tuple in the index.  This avoids semop() calls,
 *  which are expensive.
 */

typedef struct BTScanOpaqueData {
    Buffer	btso_curbuf;
    Buffer	btso_mrkbuf;
} BTScanOpaqueData;

typedef BTScanOpaqueData	*BTScanOpaque;

/*
 *  BTItems are what we store in the btree.  Each item has an index tuple,
 *  including key and pointer values.  In addition, we must guarantee that
 *  all tuples in the index are unique, in order to satisfy some assumptions
 *  in Lehman and Yao.  The way that we do this is by generating a new OID
 *  for every insertion that we do in the tree.  This adds four bytes to the
 *  size of btree index tuples.
 */

typedef struct BTItemData {
	OID			bti_oid;
	IndexTupleData		bti_itup;
} BTItemData;

typedef BTItemData	*BTItem;

/*
 *  BTStackData -- As we descend a tree, we push the (key, pointer) pairs
 *  from internal nodes onto a private stack.  If we split a leaf, we use
 *  this stack to walk back up the tree and insert data into parent nodes
 *  (and possibly to split them, too).  Lehman and Yao's update algorithm
 *  guarantees that under no circumstances can our private stack give us
 *  an irredeemably bad picture up the tree.  Again, see the paper for
 *  details.
 */

typedef struct BTStackData {
	PageNumber		bts_blkno;
	OffsetIndex		bts_offset;
	BTItem			bts_btitem;
	struct BTStackData	*bts_parent;
} BTStackData;

typedef BTStackData	*BTStack;

/*
 *  We need to be able to tell the difference between read and write
 *  requests for pages, in order to do locking correctly.
 */

#define	BT_READ		0
#define	BT_WRITE	1

/*
 *  Similarly, the difference between insertion and non-insertion binary
 *  searches on a given page makes a difference when we're descending the
 *  tree.
 */

#define BT_INSERTION	0
#define BT_DESCENT	1

/*
 *  In general, the btree code tries to localize its knowledge about page
 *  layout to a couple of routines.  However, we need a special value to
 *  indicate "no page number" in those places where we expect page numbers.
 */

#define P_NONE		0

/*
 *  Strategy numbers -- ordering of these is <, <=, =, >=, > 
 */

#define BTLessStrategyNumber		1
#define BTLessEqualStrategyNumber	2
#define BTEqualStrategyNumber		3
#define BTGreaterEqualStrategyNumber	4
#define BTGreaterStrategyNumber		5
#define BTMaxStrategyNumber		5

/*
 *  When a new operator class is declared, we require that the user supply
 *  us with an amproc procudure for determining whether, for two keys a and
 *  b, a < b, a = b, or a > b.  This routine must return < 0, 0, > 0,
 *  respectively, in these three cases.  Since we only have one such proc
 *  in amproc, it's number 1.
 */

#define BTORDER_PROC	1

/* public routines */
char			*btgettuple();
InsertIndexResult	btinsert();

/* private routines */
InsertIndexResult	_bt_doinsert();
InsertIndexResult	_bt_insertonpg();
OffsetIndex		_bt_pgaddtup();
ScanKey			_bt_mkscankey();
BTStack			_bt_search();
BTStack			_bt_searchr();
void			_bt_relbuf();
Buffer			_bt_getbuf();
void			_bt_wrtbuf();
void			_bt_wrtnorelbuf();
bool			_bt_skeycmp();
OffsetIndex		_bt_binsrch();
OffsetIndex		_bt_firsteq();
Buffer			_bt_getstackbuf();
RetrieveIndexResult	_bt_first();
RetrieveIndexResult	_bt_next();
RetrieveIndexResult	_bt_endpoint();
bool			_bt_step();
bool			_bt_twostep();
StrategyNumber		_bt_getstrat();
bool			_bt_invokestrat();
BTItem			_bt_formitem();
bool			_bt_goesonpg();
void			_bt_regscan();
void			_bt_dropscan();
void			_bt_adjscans();
void			_bt_scandel();
bool			_bt_scantouched();
@


1.7
log
@cache current buffer in scans to avoid unnecessary calls to semop(); get
rid of BT_NONE access class, since this is no longer needed.
@
text
@d4 1
a4 1
 *	$Header: RCS/nbtree.h,v 1.6 91/04/24 18:44:20 mao Exp Locker: mao $
d55 3
a57 5
 *  in Lehman and Yao.  The way that we do this is by storing the XID of the
 *  inserting transaction, and a sequence number, which tells us which index
 *  tuple insertion in that transaction this is.  With alignment, this is
 *  twelve bytes of space overhead in order to provide high concurrency and
 *  short-duration locks.
d61 1
a61 2
	TransactionIdData	bti_xid;
	uint32			bti_seqno;
a65 11

/*
 *  The following macros manage btitem sequence numbers for insertions.
 *  The global variable is in private space, so we won't have contention
 *  problems since we further disambiguate with XID.  However, this does
 *  mean that this algorithm is not easy to parallelize.
 */

extern uint32 BTCurrSeqNo;
#define	BTSEQ_INIT()	BTCurrSeqNo = 0
#define BTSEQ_GET()	BTCurrSeqNo++
@


1.6
log
@fix scan positioning code
@
text
@d4 1
a4 1
 *	$Header: RCS/nbtree.h,v 1.5 91/04/24 16:15:18 mao Exp Locker: mao $
d37 15
d102 1
a102 2
 *  requests for pages, in order to do locking correctly.  If the access
 *  type is NONE, it means we still hold a lock from some earlier operation.
a106 1
#define BT_NONE		2
d174 5
@


1.5
log
@insertions, stack backtracking, walks right, and duplicate key handling
are now correct
@
text
@d4 1
a4 1
 *	$Header: RCS/nbtree.h,v 1.4 91/04/21 21:41:39 mao Exp Locker: mao $
d156 1
@


1.4
log
@multi-level trees are close to working
@
text
@d4 1
a4 1
 *	$Header: RCS/nbtree.h,v 1.3 91/04/21 14:32:49 mao Exp Locker: mao $
a31 1
	char		btpo_unused[7800];
d38 7
a44 5
 *  including key and pointer values.  In addition, BTItems have sequence
 *  numbers.  These sequence numbers are used to distinguish among multiple
 *  occurrences of a single key in the index.  Lehman and Yao disallow
 *  duplicate keys in their algorithm, so we use sequence numbers to guarantee
 *  that all tree entries are unique even when user data is not.
d48 3
a50 2
	uint32		bti_seqno;
	IndexTupleData	bti_itup;
d56 11
d138 2
a140 1
InsertIndexResult	_bt_insertonpg();
d158 2
@


1.3
log
@more extern decls for strategy management, searches
@
text
@d4 1
a4 1
 *	$Header: RCS/nbtree.h,v 1.2 91/04/20 16:03:02 mao Exp Locker: mao $
d32 1
@


1.2
log
@single-page indices appear to work
@
text
@d4 1
a4 1
 *	$Header: RCS/nbtree.h,v 1.1 91/04/18 23:56:44 mao Exp Locker: mao $
d133 1
a133 1
bool			_bt_skeyless();
d135 1
d140 1
d142 1
a142 1
bool			_bt_step();
@


1.1
log
@Initial revision
@
text
@d4 1
a4 1
 *	$Header$
d29 2
a30 1
#define BTP_FREE	(1 << 1)
d73 2
a74 1
 *  requests for pages, in order to do locking correctly.
d76 1
d79 1
d86 1
d101 2
a102 1
#define BTLessThanStrategyNumber	1
d119 5
a123 1
/* extern decls for routines used in this access method */
a124 1
InsertIndexResult	btinsert();
d132 1
d136 5
@
