head 1.6; access; symbols; locks; strict; comment @ * @; 1.6 date 91.06.26.19.12.14; author mao; state Exp; branches; next 1.5; 1.5 date 91.06.12.08.57.36; author mao; state Exp; branches; next 1.4; 1.4 date 91.06.12.03.56.58; author mao; state Exp; branches; next 1.3; 1.3 date 91.06.11.07.41.44; author mao; state Exp; branches; next 1.2; 1.2 date 91.06.10.21.05.53; author mao; state Exp; branches; next 1.1; 1.1 date 91.05.28.14.34.00; author mao; state Exp; branches; next ; desc @no-overwrite btree header file @ 1.6 log @compile-time switch for including nobtree code @ text @/* * btree.h -- header file for postgres btree access method implementation. * * $Header: /users/mao/nobtree/RCS/nobtree.h,v 1.5 1991/06/12 08:57:36 mao Exp $ */ #ifdef NOBTREE /* exactly one of these must be defined */ #undef SHADOW #undef NORMAL #define REORG /* * NOBTPageOpaqueData -- At the end of every page, we store a pointer to * both siblings in the tree. See Lehman and Yao's paper for more info. * In addition, we need to know what sort of page this is (leaf or internal), * and whether the page is available for reuse. * * Lehman and Yao's algorithm requires a ``high key'' on every page. The * high key on a page is guaranteed to be greater than or equal to any key * that appears on this page. Our insertion algorithm guarantees that we * can use the initial least key on our right sibling as the high key. We * allocate space for the line pointer to the high key in the opaque data * at the end of the page. * * Rightmost pages in the tree have no high key. */ typedef struct NOBTPageOpaqueData { #ifdef SHADOW uint32 nobtpo_linktok; PageNumber nobtpo_prev; PageNumber nobtpo_oldprev; uint32 nobtpo_prevtok; PageNumber nobtpo_next; PageNumber nobtpo_oldnext; uint32 nobtpo_nexttok; uint32 nobtpo_repltok; PageNumber nobtpo_replaced; uint16 nobtpo_flags; #endif /* SHADOW */ #ifdef REORG uint32 nobtpo_linktok; /* to guarantee one split/sync */ PageNumber nobtpo_prev; PageNumber nobtpo_oldprev; uint32 nobtpo_prevtok; PageNumber nobtpo_next; PageNumber nobtpo_oldnext; uint32 nobtpo_nexttok; LocationIndex nobtpo_oldlow; uint16 nobtpo_flags; #endif /* REORG */ #ifdef NORMAL PageNumber nobtpo_prev; PageNumber nobtpo_next; uint16 nobtpo_flags; uint16 nobtpo_filler; #endif /* NORMAL */ #define NOBTP_LEAF (1 << 0) #define NOBTP_ROOT (1 << 1) #define NOBTP_SIBOK (1 << 3) #define NOBTP_FREE (1 << 4) } NOBTPageOpaqueData; typedef NOBTPageOpaqueData *NOBTPageOpaque; /* * ScanOpaqueData is used to remember which buffers we're currently * examining in the scan. We keep these buffers locked and pinned and * recorded in the opaque entry of the scan in order to avoid doing a * ReadBuffer() for every tuple in the index. This avoids semop() calls, * which are expensive. */ typedef struct NOBTScanOpaqueData { Buffer nobtso_curbuf; Buffer nobtso_mrkbuf; } NOBTScanOpaqueData; typedef NOBTScanOpaqueData *NOBTScanOpaque; typedef struct NOBTIItemData { #ifdef SHADOW PageNumber nobtii_oldchild; PageNumber nobtii_child; unsigned short nobtii_info; unsigned short nobtii_filler; /* must longalign key */ #endif /* SHADOW */ #ifdef REORG PageNumber nobtii_child; unsigned short nobtii_info; #endif /* REORG */ #ifdef NORMAL PageNumber nobtii_child; unsigned short nobtii_info; #endif /* NORMAL */ /* MORE DATA FOLLOWS AT END OF STRUCT */ } NOBTIItemData; typedef NOBTIItemData *NOBTIItem; #define NOBTIItemGetSize(i) ((i)->nobtii_info & 0x1FFF) #define NOBTIItemGetDSize(i) ((i).nobtii_info & 0x1FFF) typedef struct NOBTLItemData { IndexTupleData nobtli_itup; /* MORE DATA FOLLOWS AT END OF STRUCT */ } NOBTLItemData; typedef NOBTLItemData *NOBTLItem; /* * NOBTStackData -- As we descend a tree, we push the (key, pointer) pairs * from internal nodes onto a private stack. If we split a leaf, we use * this stack to walk back up the tree and insert data into parent nodes * (and possibly to split them, too). Lehman and Yao's update algorithm * guarantees that under no circumstances can our private stack give us * an irredeemably bad picture up the tree. Again, see the paper for * details. * * For the no-overwrite algorithm, we store the keys that bound the link * we traverse in the tree in the stack. When we reach the child, we use * these keys to verify that we traversed an active link. */ typedef struct NOBTStackData { PageNumber nobts_blkno; OffsetIndex nobts_offset; NOBTIItem nobts_btitem; #ifdef REORG NOBTIItem nobts_nxtitem; #endif /* REORG */ #ifdef SHADOW NOBTIItem nobts_nxtitem; #endif /* SHADOW */ struct NOBTStackData *nobts_parent; } NOBTStackData; typedef NOBTStackData *NOBTStack; /* * We need to be able to tell the difference between read and write * requests for pages, in order to do locking correctly. */ #define NOBT_READ 0 #define NOBT_WRITE 1 /* * Similarly, the difference between insertion and non-insertion binary * searches on a given page makes a difference when we're descending the * tree. */ #define NOBT_INSERTION 0 #define NOBT_DESCENT 1 /* * In general, the btree code tries to localize its knowledge about page * layout to a couple of routines. However, we need a special value to * indicate "no page number" in those places where we expect page numbers. */ #define P_NONE 0 /* * Strategy numbers -- ordering of these is <, <=, =, >=, > */ #define NOBTLessStrategyNumber 1 #define NOBTLessEqualStrategyNumber 2 #define NOBTEqualStrategyNumber 3 #define NOBTGreaterEqualStrategyNumber 4 #define NOBTGreaterStrategyNumber 5 #define NOBTMaxStrategyNumber 5 /* * When a new operator class is declared, we require that the user supply * us with an amproc procudure for determining whether, for two keys a and * b, a < b, a = b, or a > b. This routine must return < 0, 0, > 0, * respectively, in these three cases. Since we only have one such proc * in amproc, it's number 1. */ #define NOBTORDER_PROC 1 /* public routines */ char *nobtgettuple(); InsertIndexResult nobtinsert(); /* private routines */ InsertIndexResult _nobt_doinsert(); InsertIndexResult _nobt_insertonpg(); OffsetIndex _nobt_pgaddtup(); ScanKey _nobt_mkscankey(); ScanKey _nobt_imkscankey(); NOBTStack _nobt_search(); NOBTStack _nobt_searchr(); void _nobt_relbuf(); Buffer _nobt_getbuf(); void _nobt_wrtbuf(); void _nobt_wrtnorelbuf(); bool _nobt_skeycmp(); OffsetIndex _nobt_binsrch(); OffsetIndex _nobt_firsteq(); Buffer _nobt_getstackbuf(); RetrieveIndexResult _nobt_first(); RetrieveIndexResult _nobt_next(); RetrieveIndexResult _nobt_endpoint(); bool _nobt_step(); bool _nobt_twostep(); StrategyNumber _nobt_getstrat(); bool _nobt_invokestrat(); NOBTLItem _nobt_formitem(); NOBTIItem _nobt_formiitem(); bool _nobt_goesonpg(); void _nobt_regscan(); void _nobt_dropscan(); void _nobt_adjscans(); void _nobt_scandel(); bool _nobt_scantouched(); OffsetIndex _nobt_findsplitloc(); #endif /* NOBTREE */ @ 1.5 log @all three implementations work correctly, need to collect timings @ text @d4 1 a4 1 * $Header: /users/mao/nobtree/RCS/nobtree.h,v 1.4 1991/06/12 03:56:58 mao Exp mao $ d7 2 d226 2 @ 1.4 log @#defines for normal, shadow implemenentation; both impls work @ text @d4 1 a4 1 * $Header: /users/mao/nobtree/RCS/nobtree.h,v 1.3 1991/06/11 07:41:44 mao Exp mao $ d9 2 a10 2 #define NORMAL #undef REORG d39 1 d41 11 d90 4 d131 3 @ 1.3 log @works with minimal per-tuple, per-page overhead @ text @d4 1 a4 1 * $Header: /users/mao/nobtree/RCS/nobtree.h,v 1.2 1991/06/10 21:05:53 mao Exp mao $ d7 5 d29 1 d39 4 d44 2 a70 16 #ifdef notdef /* * NOBTItems are what we store in the btree. The oldchild entry is used * to store a pointer to the previous child page on internal items after * a new one has been added by a page split. It's necessary to store these * in order to guarantee recoverability. */ typedef struct NOBTItemData { PageNumber nobti_oldchild; uint16 nobti_filler; /* force longalign */ IndexTupleData nobti_itup; } NOBTItemData; typedef NOBTItemData *NOBTItem; #else /* notdef */ d72 1 d77 5 a95 1 #endif /* notdef */ a113 4 #ifdef notdef NOBTItem nobts_btitem; NOBTItem nobts_nxtitem; #else /* notdef */ d115 1 d117 1 a117 1 #endif /* notdef */ a129 5 #ifdef notdef #define NOBT_NEXTLNK 2 #define NOBT_PREVLNK 3 #define NOBT_LINKTOK 4 #endif /* notdef */ @ 1.2 log @structures are too big, but the code works @ text @d4 1 a4 1 * $Header: RCS/nobtree.h,v 1.1 91/05/28 14:34:00 mao Exp Locker: mao $ d59 1 a59 1 #ifndef notdef a75 1 uint16 nobtii_flags; d79 1 d85 3 a88 1 uint16 nobtli_flags; d113 1 d116 4 d132 1 d136 1 d185 1 d203 2 a204 1 NOBTItem _nobt_formitem(); d211 1 @ 1.1 log @Initial revision @ text @d4 1 a4 1 * $Header: RCS/nbtree.h,v 1.7 91/04/28 15:20:56 mao Exp $ d8 1 a8 1 * NONOBTPageOpaqueData -- At the end of every page, we store a pointer to d23 2 a24 1 typedef struct NONOBTPageOpaqueData { d26 2 d29 4 d37 2 a38 1 #define NOBTP_FREE (1 << 2) d40 1 a40 1 } NONOBTPageOpaqueData; d42 1 a42 1 typedef NONOBTPageOpaqueData *NOBTPageOpaque; d52 1 a52 1 typedef struct NONOBTScanOpaqueData { d55 1 a55 1 } NONOBTScanOpaqueData; d57 1 a57 1 typedef NONOBTScanOpaqueData *NOBTScanOpaque; d59 1 d61 4 a64 8 * NOBTItems are what we store in the btree. Each item has an index tuple, * including key and pointer values. In addition, we must guarantee that * all tuples in the index are unique, in order to satisfy some assumptions * in Lehman and Yao. The way that we do this is by storing the XID of the * inserting transaction, and a sequence number, which tells us which index * tuple insertion in that transaction this is. With alignment, this is * twelve bytes of space overhead in order to provide high concurrency and * short-duration locks. d67 3 a69 3 typedef struct NONOBTItemData { TransactionIdData nobti_xid; uint32 nobti_seqno; d71 1 a71 1 } NONOBTItemData; d73 17 a89 1 typedef NONOBTItemData *NOBTItem; d91 2 a92 6 /* * The following macros manage btitem sequence numbers for insertions. * The global variable is in private space, so we won't have contention * problems since we further disambiguate with XID. However, this does * mean that this algorithm is not easy to parallelize. */ a93 4 extern uint32 NOBTCurrSeqNo; #define NOBTSEQ_INIT() NOBTCurrSeqNo = 0 #define NOBTSEQ_GET() NOBTCurrSeqNo++ d95 1 a95 1 * NONOBTStackData -- As we descend a tree, we push the (key, pointer) pairs d102 4 d108 1 a108 1 typedef struct NONOBTStackData { d111 4 a114 3 NOBTItem nobts_btitem; struct NONOBTStackData *nobts_parent; } NONOBTStackData; d116 1 a116 1 typedef NONOBTStackData *NOBTStack; d123 1 a123 1 #define NOBT_READ 0 d125 3 d154 1 a154 1 #define NOBTGreaterStrategyNumber 5 d176 2 a177 2 NOBTStack _nobt_search(); NOBTStack _nobt_searchr(); d193 1 a193 1 NOBTItem _nobt_formitem(); @