head 1.6; access; symbols; locks; strict; comment @ * @; 1.6 date 92.06.11.17.49.54; author mao; state Exp; branches; next 1.5; 1.5 date 91.10.29.00.12.55; author mao; state Exp; branches; next 1.4; 1.4 date 91.10.03.00.59.39; author mao; state Exp; branches; next 1.3; 1.3 date 91.08.22.06.34.41; author mao; state Exp; branches; next 1.2; 1.2 date 91.08.06.01.42.21; author mao; state Exp; branches; next 1.1; 1.1 date 91.08.03.00.27.14; author mao; state Exp; branches; next ; desc @structure defs and types for sony jukebox storage manater @ 1.6 log @blocks allocated to platters according to a more sensible policy @ text @/* * sj.h -- Private header file for the Sony jukebox storage manager. * * This file is only used if you're at Berkeley, and SONY_JUKEBOX * is defined. * * $Header: /private/mao/postgres/src/lib/H/storage/RCS/sj.h,v 1.5 1991/10/29 00:12:55 mao Exp mao $ */ #ifdef SONY_JUKEBOX /* * When the buffer pool requests a particular page, we load a group of * pages from the jukebox into the mag disk cache for efficiency. * SJCACHESIZE is the number of these groups in the disk cache. Every * group is represented by one entry in the shared memory cache. SJGRPSIZE * is the number of 8k pages in a group. */ #define SJCACHESIZE 64 /* # groups in mag disk cache */ #define SJGRPSIZE 16 /* # 8k pages in a group */ #define SJNBLKSIZE 20 /* # sizes cached in nblock cache */ #define SJPATHLEN 64 /* size of path to cache file */ /* misc constants */ #define SJCACHENAME "_sj_cache_" /* relative to $POSTGRESHOME/data */ #define SJMETANAME "_sj_meta_" /* relative to $POSTGRESHOME/data */ #define SJBLOCKNAME "_sj_nblocks_" /* relative to $POSTGRESHOME/data */ /* for allocations... */ #define SJNEWRELN 0 #define SJOLDRELN 1 /* * SJGroupDesc -- Descriptor block for a cache group. * * The first 1024 bytes in a group -- on a platter or in the magnetic * disk cache -- are a descriptor block. We choose 1024 bytes because * this is the native block size of the jukebox. * * This block includes a description of the data that appears in the * group, including relid, dbid, relname, dbname, and a unique OID * that we use to verify cache consistency on startup. SJGroupDesc * is the structure that contains this information. It resides at the * start of the 1024-byte block; the rest of the block is unused. */ typedef struct SJGroupDesc { long sjgd_magic; long sjgd_version; NameData sjgd_dbname; NameData sjgd_relname; ObjectId sjgd_dbid; ObjectId sjgd_relid; ObjectId sjgd_plid; long sjgd_relblkno; long sjgd_jboffset; long sjgd_extentsz; ObjectId sjgd_groupoid; } SJGroupDesc; #define SJGDMAGIC 0x060362 #define SJGDVERSION 2 #define JBBLOCKSZ 1024 /* size of SJCacheBuf */ #define SJBUFSIZE ((BLCKSZ * SJGRPSIZE) + JBBLOCKSZ) /* # of jb blocks in extent */ #define SJEXTENTSZ (SJBUFSIZE / JBBLOCKSZ) /* * SJCacheTag -- Unique identifier for individual groups in the magnetic * disk cache. * * We use this identifier to query the shared memory cache metadata * when we want to find a particular group. */ typedef struct SJCacheTag { ObjectId sjct_dbid; /* database OID of this group */ ObjectId sjct_relid; /* relation OID of this group */ BlockNumber sjct_base; /* number of first block in group */ } SJCacheTag; /* * SJHashEntry -- The hash table code returns a pointer to a structure * that has this layout. */ typedef struct SJHashEntry { SJCacheTag sjhe_tag; /* cache tag -- hash key */ int sjhe_groupno; /* which group this is in cache file */ } SJHashEntry; /* * SJCacheHeader -- Header data for in-memory metadata cache. */ typedef struct SJCacheHeader { int sjh_nentries; int sjh_freehead; int sjh_freetail; uint32 sjh_flags; #define SJH_INITING (1 << 0) #define SJH_INITED (1 << 1) #ifdef HAS_TEST_AND_SET slock_t sjh_initlock; /* initialization in progress lock */ #endif /* HAS_TEST_AND_SET */ } SJCacheHeader; /* * SJCacheItem -- Cache item describing blocks on the magnetic disk cache. * * An array of these is maintained in shared memory, with one entry * for every group that appears in the magnetic disk block cache. We * maintain a consistent copy of this array on magnetic disk whenever * we change the cache contents. This is because the magnetic disk * cache is persistent, and contains data that logically appears on the * jukebox between backend instances. * * The OID that appears in this structure is used to detect corruption * of the cache due to crashes during cache metadata update on disk. * When we detect corruption, we recover by marking the group free. We * are very careful to do this in a way that guarantees no data is lost, * and that does not require log processing. * * We keep a free list of groups to which no references exist. We * allocate groups off this list on demand. In general, references * to groups in the cache are very short-lived; we never return pointers * into private structures outside of the code that manages the cache. * The free list is maintained in LRU order, and the least-recently- * used group is allocated first. * * Groups on the jukebox include one page (the first) that describes the * group, including its dbid, relid, dbname, relname, and extent size. * This page also includes the OID described above. */ typedef struct SJCacheItem { SJCacheTag sjc_tag; /* dbid, relid, group triple */ int sjc_freeprev; /* free list pointer */ int sjc_freenext; /* free list pointer */ int sjc_refcount; /* number of active refs */ ObjectId sjc_oid; /* OID of group */ ObjectId sjc_plid; /* platter OID for group */ NameData sjc_plname; /* platter name for group */ int sjc_jboffset; /* offset of first block */ uint8 sjc_gflags; /* flags for entire group */ uint8 sjc_flags[SJGRPSIZE]; /* flag bytes, 1 per block */ #define SJC_CLEAR (uint8) 0x0 #define SJC_MISSING (1 << 0) #define SJC_ONPLATTER (1 << 1) #define SJC_IOINPROG (1 << 7) #ifdef HAS_TEST_AND_SET slock_t sjc_iolock; /* transfer in progress */ #endif /* HAS_TEST_AND_SET */ } SJCacheItem; #define MUST_FLUSH(f) (!(((f) & SJC_ONPLATTER) || ((f) & SJC_MISSING))) #endif /* SONY_JUKEBOX */ @ 1.5 log @add nblock cache @ text @d7 1 a7 1 * $Header: RCS/sj.h,v 1.4 91/10/03 00:59:39 mao Exp Locker: mao $ d21 2 a22 2 #define SJGRPSIZE 10 /* # 8k pages in a group */ #define SJNBLKSIZE 10 /* # sizes cached in nblock cache */ d29 4 @ 1.4 log @get rid of dirty bit on pages and group descriptors; this turned out to contain no information. had to bump version number (twice) due to this change and some interim testing of cache sizes, too. @ text @d7 1 a7 1 * $Header: /local/mao/postgres/src/lib/H/storage/RCS/sj.h,v 1.3 1991/08/22 06:34:41 mao Exp mao $ d22 1 a28 3 /* bogus macros */ #define RelationSetLockForExtend(r) @ 1.3 log @don't need SJNBlock anymore @ text @d7 1 a7 1 * $Header: /local/mao/postgres/src/lib/H/storage/RCS/sj.h,v 1.2 1991/08/06 01:42:21 mao Exp mao $ d61 1 a61 1 #define SJGDVERSION 0 d158 2 a159 3 #define SJC_DIRTY (1 << 0) #define SJC_MISSING (1 << 1) #define SJC_ONPLATTER (1 << 2) d169 2 @ 1.2 log @use a free list, rather than walking the ordinary lru list looking for unpinned pages @ text @d7 1 a7 1 * $Header: /users/mao/postgres/src/lib/H/storage/RCS/sj.h,v 1.1 1991/08/03 00:27:14 mao Exp mao $ a169 16 /* * SJNBlock -- Linked list of count of blocks in relations. * * Computing a block count is so expensive that we cache the count * in local space when we've done the work. This is really a stupid * way to do it -- we'd rather do it in shared memory and have the * computed count survive transactions -- but this will work for now. */ typedef struct SJNBlock { ObjectId sjnb_dbid; ObjectId sjnb_relid; int sjnb_nblocks; struct SJNBlock *sjnb_next; } SJNBlock; @ 1.1 log @Initial revision @ text @d7 1 a7 1 * $Header$ a22 1 #define SJEXTENTSZ (SJBufSize / JBBLOCKSZ) /* # of jb blocks in extent */ a23 2 extern int SJBufSize; d27 1 d64 6 d100 2 a101 2 int sjh_lruhead; int sjh_lrutail; d131 6 a136 6 * Since we never return pointers to private data, we don't need to * maintain a free list or pin count on magnetic disk cache groups. * In shared memory, we maintain a list of groups in LRU order (offsets * from the start of cache metadata are stored in this structure). * When we need a group for data transfer, we use the least-recently-used * group's space, kicking it out to the platter if necessary. d145 2 a146 2 int sjc_lruprev; /* LRU list pointer */ int sjc_lrunext; /* LRU list pointer */ d181 1 @