head	1.6;
access;
symbols
	release_4_2:1.6
	aix_ok:1.6;
locks; strict;
comment	@ * @;


1.6
date	91.06.26.19.12.14;	author mao;	state Exp;
branches;
next	1.5;

1.5
date	91.06.12.08.57.36;	author mao;	state Exp;
branches;
next	1.4;

1.4
date	91.06.12.03.56.58;	author mao;	state Exp;
branches;
next	1.3;

1.3
date	91.06.11.07.41.44;	author mao;	state Exp;
branches;
next	1.2;

1.2
date	91.06.10.21.05.53;	author mao;	state Exp;
branches;
next	1.1;

1.1
date	91.05.28.14.34.00;	author mao;	state Exp;
branches;
next	;


desc
@no-overwrite btree header file
@


1.6
log
@compile-time switch for including nobtree code
@
text
@/*
 *  btree.h -- header file for postgres btree access method implementation.
 *
 *	$Header: /users/mao/nobtree/RCS/nobtree.h,v 1.5 1991/06/12 08:57:36 mao Exp $
 */

#ifdef NOBTREE

/* exactly one of these must be defined */
#undef	SHADOW
#undef	NORMAL
#define	REORG

/*
 *  NOBTPageOpaqueData -- At the end of every page, we store a pointer to
 *  both siblings in the tree.  See Lehman and Yao's paper for more info.
 *  In addition, we need to know what sort of page this is (leaf or internal),
 *  and whether the page is available for reuse.
 *
 *  Lehman and Yao's algorithm requires a ``high key'' on every page.  The
 *  high key on a page is guaranteed to be greater than or equal to any key
 *  that appears on this page.  Our insertion algorithm guarantees that we
 *  can use the initial least key on our right sibling as the high key.  We
 *  allocate space for the line pointer to the high key in the opaque data
 *  at the end of the page.
 *
 *  Rightmost pages in the tree have no high key.
 */

typedef struct NOBTPageOpaqueData {
#ifdef	SHADOW
	uint32		nobtpo_linktok;
	PageNumber	nobtpo_prev;
	PageNumber	nobtpo_oldprev;
	uint32		nobtpo_prevtok;
	PageNumber	nobtpo_next;
	PageNumber	nobtpo_oldnext;
	uint32		nobtpo_nexttok;
	uint32		nobtpo_repltok;
	PageNumber	nobtpo_replaced;
	uint16		nobtpo_flags;
#endif	/* SHADOW */
#ifdef	REORG
	uint32		nobtpo_linktok;	/* to guarantee one split/sync */
	PageNumber	nobtpo_prev;
	PageNumber	nobtpo_oldprev;
	uint32		nobtpo_prevtok;
	PageNumber	nobtpo_next;
	PageNumber	nobtpo_oldnext;
	uint32		nobtpo_nexttok;
	LocationIndex	nobtpo_oldlow;
	uint16		nobtpo_flags;
#endif	/* REORG */
#ifdef	NORMAL
	PageNumber	nobtpo_prev;
	PageNumber	nobtpo_next;
	uint16		nobtpo_flags;
	uint16		nobtpo_filler;
#endif	/* NORMAL */

#define NOBTP_LEAF	(1 << 0)
#define NOBTP_ROOT	(1 << 1)
#define NOBTP_SIBOK	(1 << 3)
#define NOBTP_FREE	(1 << 4)

} NOBTPageOpaqueData;

typedef NOBTPageOpaqueData	*NOBTPageOpaque;

/*
 *  ScanOpaqueData is used to remember which buffers we're currently
 *  examining in the scan.  We keep these buffers locked and pinned and
 *  recorded in the opaque entry of the scan in order to avoid doing a
 *  ReadBuffer() for every tuple in the index.  This avoids semop() calls,
 *  which are expensive.
 */

typedef struct NOBTScanOpaqueData {
    Buffer	nobtso_curbuf;
    Buffer	nobtso_mrkbuf;
} NOBTScanOpaqueData;

typedef NOBTScanOpaqueData	*NOBTScanOpaque;

typedef struct NOBTIItemData {
#ifdef	SHADOW
	PageNumber		nobtii_oldchild;
	PageNumber		nobtii_child;
	unsigned short		nobtii_info;
	unsigned short		nobtii_filler;	   /* must longalign key */
#endif	/* SHADOW */
#ifdef	REORG
	PageNumber		nobtii_child;
	unsigned short		nobtii_info;
#endif	/* REORG */
#ifdef	NORMAL
	PageNumber		nobtii_child;
	unsigned short		nobtii_info;
#endif	/* NORMAL */
	/* MORE DATA FOLLOWS AT END OF STRUCT */
} NOBTIItemData;

typedef NOBTIItemData	*NOBTIItem;

#define NOBTIItemGetSize(i)	((i)->nobtii_info & 0x1FFF)
#define NOBTIItemGetDSize(i)	((i).nobtii_info & 0x1FFF)

typedef struct NOBTLItemData {
	IndexTupleData		nobtli_itup;
	/* MORE DATA FOLLOWS AT END OF STRUCT */
} NOBTLItemData;

typedef NOBTLItemData	*NOBTLItem;

/*
 *  NOBTStackData -- As we descend a tree, we push the (key, pointer) pairs
 *  from internal nodes onto a private stack.  If we split a leaf, we use
 *  this stack to walk back up the tree and insert data into parent nodes
 *  (and possibly to split them, too).  Lehman and Yao's update algorithm
 *  guarantees that under no circumstances can our private stack give us
 *  an irredeemably bad picture up the tree.  Again, see the paper for
 *  details.
 *
 *  For the no-overwrite algorithm, we store the keys that bound the link
 *  we traverse in the tree in the stack.  When we reach the child, we use
 *  these keys to verify that we traversed an active link.
 */

typedef struct NOBTStackData {
	PageNumber		nobts_blkno;
	OffsetIndex		nobts_offset;
	NOBTIItem		nobts_btitem;
#ifdef	REORG
	NOBTIItem		nobts_nxtitem;
#endif	/* REORG */
#ifdef	SHADOW
	NOBTIItem		nobts_nxtitem;
#endif	/* SHADOW */
	struct NOBTStackData	*nobts_parent;
} NOBTStackData;

typedef NOBTStackData	*NOBTStack;

/*
 *  We need to be able to tell the difference between read and write
 *  requests for pages, in order to do locking correctly.
 */

#define	NOBT_READ	0
#define	NOBT_WRITE	1

/*
 *  Similarly, the difference between insertion and non-insertion binary
 *  searches on a given page makes a difference when we're descending the
 *  tree.
 */

#define NOBT_INSERTION	0
#define NOBT_DESCENT	1

/*
 *  In general, the btree code tries to localize its knowledge about page
 *  layout to a couple of routines.  However, we need a special value to
 *  indicate "no page number" in those places where we expect page numbers.
 */

#define P_NONE		0

/*
 *  Strategy numbers -- ordering of these is <, <=, =, >=, > 
 */

#define NOBTLessStrategyNumber		1
#define NOBTLessEqualStrategyNumber	2
#define NOBTEqualStrategyNumber		3
#define NOBTGreaterEqualStrategyNumber	4
#define NOBTGreaterStrategyNumber	5
#define NOBTMaxStrategyNumber		5

/*
 *  When a new operator class is declared, we require that the user supply
 *  us with an amproc procudure for determining whether, for two keys a and
 *  b, a < b, a = b, or a > b.  This routine must return < 0, 0, > 0,
 *  respectively, in these three cases.  Since we only have one such proc
 *  in amproc, it's number 1.
 */

#define NOBTORDER_PROC	1

/* public routines */
char			*nobtgettuple();
InsertIndexResult	nobtinsert();

/* private routines */
InsertIndexResult	_nobt_doinsert();
InsertIndexResult	_nobt_insertonpg();
OffsetIndex		_nobt_pgaddtup();
ScanKey			_nobt_mkscankey();
ScanKey			_nobt_imkscankey();
NOBTStack		_nobt_search();
NOBTStack		_nobt_searchr();
void			_nobt_relbuf();
Buffer			_nobt_getbuf();
void			_nobt_wrtbuf();
void			_nobt_wrtnorelbuf();
bool			_nobt_skeycmp();
OffsetIndex		_nobt_binsrch();
OffsetIndex		_nobt_firsteq();
Buffer			_nobt_getstackbuf();
RetrieveIndexResult	_nobt_first();
RetrieveIndexResult	_nobt_next();
RetrieveIndexResult	_nobt_endpoint();
bool			_nobt_step();
bool			_nobt_twostep();
StrategyNumber		_nobt_getstrat();
bool			_nobt_invokestrat();
NOBTLItem		_nobt_formitem();
NOBTIItem		_nobt_formiitem();
bool			_nobt_goesonpg();
void			_nobt_regscan();
void			_nobt_dropscan();
void			_nobt_adjscans();
void			_nobt_scandel();
bool			_nobt_scantouched();
OffsetIndex		_nobt_findsplitloc();

#endif /* NOBTREE */
@


1.5
log
@all three implementations work correctly, need to collect timings
@
text
@d4 1
a4 1
 *	$Header: /users/mao/nobtree/RCS/nobtree.h,v 1.4 1991/06/12 03:56:58 mao Exp mao $
d7 2
d226 2
@


1.4
log
@#defines for normal, shadow implemenentation; both impls work
@
text
@d4 1
a4 1
 *	$Header: /users/mao/nobtree/RCS/nobtree.h,v 1.3 1991/06/11 07:41:44 mao Exp mao $
d9 2
a10 2
#define	NORMAL
#undef	REORG
d39 1
d41 11
d90 4
d131 3
@


1.3
log
@works with minimal per-tuple, per-page overhead
@
text
@d4 1
a4 1
 *	$Header: /users/mao/nobtree/RCS/nobtree.h,v 1.2 1991/06/10 21:05:53 mao Exp mao $
d7 5
d29 1
d39 4
d44 2
a70 16
#ifdef notdef
/*
 *  NOBTItems are what we store in the btree.  The oldchild entry is used
 *  to store a pointer to the previous child page on internal items after
 *  a new one has been added by a page split.  It's necessary to store these
 *  in order to guarantee recoverability.
 */

typedef struct NOBTItemData {
	PageNumber		nobti_oldchild;
	uint16			nobti_filler;		/* force longalign */
	IndexTupleData		nobti_itup;
} NOBTItemData;

typedef NOBTItemData	*NOBTItem;
#else /* notdef */
d72 1
d77 5
a95 1
#endif /* notdef */
a113 4
#ifdef notdef
	NOBTItem		nobts_btitem;
	NOBTItem		nobts_nxtitem;
#else /* notdef */
d115 1
d117 1
a117 1
#endif /* notdef */
a129 5
#ifdef notdef
#define	NOBT_NEXTLNK	2
#define	NOBT_PREVLNK	3
#define	NOBT_LINKTOK	4
#endif /* notdef */
@


1.2
log
@structures are too big, but the code works
@
text
@d4 1
a4 1
 *	$Header: RCS/nobtree.h,v 1.1 91/05/28 14:34:00 mao Exp Locker: mao $
d59 1
a59 1
#ifndef notdef
a75 1
	uint16			nobtii_flags;
d79 1
d85 3
a88 1
	uint16			nobtli_flags;
d113 1
d116 4
d132 1
d136 1
d185 1
d203 2
a204 1
NOBTItem		_nobt_formitem();
d211 1
@


1.1
log
@Initial revision
@
text
@d4 1
a4 1
 *	$Header: RCS/nbtree.h,v 1.7 91/04/28 15:20:56 mao Exp $
d8 1
a8 1
 *  NONOBTPageOpaqueData -- At the end of every page, we store a pointer to
d23 2
a24 1
typedef struct NONOBTPageOpaqueData {
d26 2
d29 4
d37 2
a38 1
#define NOBTP_FREE	(1 << 2)
d40 1
a40 1
} NONOBTPageOpaqueData;
d42 1
a42 1
typedef NONOBTPageOpaqueData	*NOBTPageOpaque;
d52 1
a52 1
typedef struct NONOBTScanOpaqueData {
d55 1
a55 1
} NONOBTScanOpaqueData;
d57 1
a57 1
typedef NONOBTScanOpaqueData	*NOBTScanOpaque;
d59 1
d61 4
a64 8
 *  NOBTItems are what we store in the btree.  Each item has an index tuple,
 *  including key and pointer values.  In addition, we must guarantee that
 *  all tuples in the index are unique, in order to satisfy some assumptions
 *  in Lehman and Yao.  The way that we do this is by storing the XID of the
 *  inserting transaction, and a sequence number, which tells us which index
 *  tuple insertion in that transaction this is.  With alignment, this is
 *  twelve bytes of space overhead in order to provide high concurrency and
 *  short-duration locks.
d67 3
a69 3
typedef struct NONOBTItemData {
	TransactionIdData	nobti_xid;
	uint32			nobti_seqno;
d71 1
a71 1
} NONOBTItemData;
d73 17
a89 1
typedef NONOBTItemData	*NOBTItem;
d91 2
a92 6
/*
 *  The following macros manage btitem sequence numbers for insertions.
 *  The global variable is in private space, so we won't have contention
 *  problems since we further disambiguate with XID.  However, this does
 *  mean that this algorithm is not easy to parallelize.
 */
a93 4
extern uint32 NOBTCurrSeqNo;
#define	NOBTSEQ_INIT()	NOBTCurrSeqNo = 0
#define NOBTSEQ_GET()	NOBTCurrSeqNo++

d95 1
a95 1
 *  NONOBTStackData -- As we descend a tree, we push the (key, pointer) pairs
d102 4
d108 1
a108 1
typedef struct NONOBTStackData {
d111 4
a114 3
	NOBTItem			nobts_btitem;
	struct NONOBTStackData	*nobts_parent;
} NONOBTStackData;
d116 1
a116 1
typedef NONOBTStackData	*NOBTStack;
d123 1
a123 1
#define	NOBT_READ		0
d125 3
d154 1
a154 1
#define NOBTGreaterStrategyNumber		5
d176 2
a177 2
NOBTStack			_nobt_search();
NOBTStack			_nobt_searchr();
d193 1
a193 1
NOBTItem			_nobt_formitem();
@