bt_cursor.c File Reference

#include "db_config.h"
#include <sys/types.h>
#include <string.h>
#include "db_int.h"
#include "dbinc/db_page.h"
#include "dbinc/db_shash.h"
#include "dbinc/btree.h"
#include "dbinc/lock.h"

Go to the source code of this file.

Defines

#define ACQUIRE(dbc, mode, lpgno, lock, fpgno, pagep, ret)
#define ACQUIRE_COUPLE(dbc, mode, lpgno, lock, fpgno, pagep, ret)
#define ACQUIRE_CUR(dbc, mode, p, ret)
#define ACQUIRE_CUR_COUPLE(dbc, mode, p, ret)
#define ACQUIRE_WRITE_LOCK(dbc, ret)
#define DISCARD(dbc, ldiscard, lock, pagep, ret)
#define DISCARD_CUR(dbc, ret)
#define IS_CUR_DELETED(dbc)   IS_DELETED((dbc)->dbp, (dbc)->internal->page, (dbc)->internal->indx)
#define IS_CUR_DUPLICATE(dbc, orig_pgno, orig_indx)
#define IS_DELETED(dbp, page, indx)
#define IS_DUPLICATE(dbc, i1, i2)

Functions

static int __bam_bulk (DBC *dbc, DBT *data, u_int32_t flags)
int __bam_bulk_duplicates (DBC *dbc, db_pgno_t pgno, u_int8_t *dbuf, int32_t *keyoff, int32_t **offpp, u_int8_t **dpp, u_int32_t *spacep, int no_dup)
int __bam_bulk_overflow (DBC *dbc, u_int32_t len, db_pgno_t pgno, u_int8_t *dp)
static int __bam_c_close (DBC *dbc, db_pgno_t root_pgno, int *rmroot)
int __bam_c_count (DBC *dbc, db_recno_t *recnop)
static int __bam_c_del (DBC *dbc)
static int __bam_c_destroy (DBC *dbc)
int __bam_c_dup (DBC *orig_dbc, DBC *new_dbc)
static int __bam_c_first (DBC *dbc)
static int __bam_c_get (DBC *dbc, DBT *key, DBT *data, u_int32_t flags, db_pgno_t *pgnop)
static int __bam_c_getstack (DBC *dbc)
int __bam_c_init (DBC *dbc, DBTYPE dbtype)
static int __bam_c_last (DBC *dbc)
static int __bam_c_next (DBC *dbc, int initial_move, int deleted_okay)
static int __bam_c_physdel (DBC *dbc)
static int __bam_c_prev (DBC *dbc)
static int __bam_c_put (DBC *dbc, DBT *key, DBT *data, u_int32_t flags, db_pgno_t *pgnop)
int __bam_c_refresh (DBC *dbc)
int __bam_c_rget (DBC *dbc, DBT *data)
static int __bam_c_search (DBC *dbc, db_pgno_t root_pgno, const DBT *key, u_int32_t flags, int *exactp)
static int __bam_c_writelock (DBC *dbc)
static int __bam_get_prev (DBC *dbc)
static int __bam_getboth_finddatum (DBC *dbc, DBT *data, u_int32_t flags)
static int __bam_getbothc (DBC *dbc, DBT *data)
static int __bam_isopd (DBC *dbc, db_pgno_t *pgnop)
static int __bam_bulk __P ((DBC *, DBT *, u_int32_t))

Variables

static const char revid [] = "$Id: bt_cursor.c,v 11.147 2002/08/13 20:46:07 ubell Exp $"


Define Documentation

#define ACQUIRE dbc,
mode,
lpgno,
lock,
fpgno,
pagep,
ret   ) 
 

Value:

{               \
        DB_MPOOLFILE *__mpf = (dbc)->dbp->mpf;                          \
        if ((pagep) != NULL) {                                          \
                ret = __mpf->put(__mpf, pagep, 0);                      \
                pagep = NULL;                                           \
        } else                                                          \
                ret = 0;                                                \
        if ((ret) == 0 && STD_LOCKING(dbc))                             \
                ret = __db_lget(dbc, LCK_COUPLE, lpgno, mode, 0, &(lock));\
        if ((ret) == 0)                                                 \
                ret = __mpf->get(__mpf, &(fpgno), 0, &(pagep));         \
}

Definition at line 59 of file bt_cursor.c.

Referenced by __bam_c_close(), and __bam_c_search().

#define ACQUIRE_COUPLE dbc,
mode,
lpgno,
lock,
fpgno,
pagep,
ret   ) 
 

Value:

{       \
        DB_MPOOLFILE *__mpf = (dbc)->dbp->mpf;                          \
        if ((pagep) != NULL) {                                          \
                ret = __mpf->put(__mpf, pagep, 0);                      \
                pagep = NULL;                                           \
        } else                                                          \
                ret = 0;                                                \
        if ((ret) == 0 && STD_LOCKING(dbc))                             \
                ret = __db_lget(dbc,                                    \
                    LCK_COUPLE_ALWAYS, lpgno, mode, 0, &(lock));        \
        if ((ret) == 0)                                                 \
                ret = __mpf->get(__mpf, &(fpgno), 0, &(pagep));         \
}

Definition at line 73 of file bt_cursor.c.

#define ACQUIRE_CUR dbc,
mode,
p,
ret   ) 
 

Value:

{                               \
        BTREE_CURSOR *__cp = (BTREE_CURSOR *)(dbc)->internal;           \
        ACQUIRE(dbc, mode, p, __cp->lock, p, __cp->page, ret);          \
        if ((ret) == 0) {                                               \
                __cp->pgno = p;                                         \
                __cp->lock_mode = (mode);                               \
        }                                                               \
}

Definition at line 89 of file bt_cursor.c.

Referenced by __bam_c_del(), __bam_c_next(), and __bam_c_prev().

#define ACQUIRE_CUR_COUPLE dbc,
mode,
p,
ret   ) 
 

Value:

{                               \
        BTREE_CURSOR *__cp = (BTREE_CURSOR *)(dbc)->internal;           \
        ACQUIRE_COUPLE(dbc, mode, p, __cp->lock, p, __cp->page, ret);   \
        if ((ret) == 0) {                                               \
                __cp->pgno = p;                                         \
                __cp->lock_mode = (mode);                               \
        }                                                               \
}

Definition at line 104 of file bt_cursor.c.

Referenced by __bam_c_first(), and __bam_c_last().

#define ACQUIRE_WRITE_LOCK dbc,
ret   ) 
 

Value:

{                                       \
        BTREE_CURSOR *__cp = (BTREE_CURSOR *)(dbc)->internal;           \
        ret = 0;                                                        \
        if (STD_LOCKING(dbc) &&                                         \
            __cp->lock_mode != DB_LOCK_WRITE &&                         \
            ((ret) = __db_lget(dbc,                                     \
            LOCK_ISSET(__cp->lock) ? LCK_COUPLE : 0,                    \
            __cp->pgno, DB_LOCK_WRITE, 0, &__cp->lock)) == 0)           \
                __cp->lock_mode = DB_LOCK_WRITE;                        \
}

Definition at line 120 of file bt_cursor.c.

Referenced by __bam_c_first(), __bam_c_last(), __bam_c_put(), and __bam_c_writelock().

#define DISCARD dbc,
ldiscard,
lock,
pagep,
ret   ) 
 

Value:

{                       \
        DB_MPOOLFILE *__mpf = (dbc)->dbp->mpf;                          \
        int __t_ret;                                                    \
        if ((pagep) != NULL) {                                          \
                ret = __mpf->put(__mpf, pagep, 0);                      \
                pagep = NULL;                                           \
        } else                                                          \
                ret = 0;                                                \
        if (ldiscard)                                                   \
                __t_ret = __LPUT((dbc), lock);                          \
        else                                                            \
                __t_ret = __TLPUT((dbc), lock);                         \
        if (__t_ret != 0 && (ret) == 0)                                 \
                ret = __t_ret;                                          \
}

Definition at line 133 of file bt_cursor.c.

Referenced by __bam_c_search().

#define DISCARD_CUR dbc,
ret   ) 
 

Value:

{                                               \
        BTREE_CURSOR *__cp = (BTREE_CURSOR *)(dbc)->internal;           \
        DISCARD(dbc, 0, __cp->lock, __cp->page, ret);                   \
        if ((ret) == 0)                                                 \
                __cp->lock_mode = DB_LOCK_NG;                           \
}

Definition at line 151 of file bt_cursor.c.

Referenced by __bam_c_close(), __bam_c_put(), and __bam_c_search().

#define IS_CUR_DELETED dbc   )     IS_DELETED((dbc)->dbp, (dbc)->internal->page, (dbc)->internal->indx)
 

Definition at line 164 of file bt_cursor.c.

Referenced by __bam_c_first(), __bam_c_get(), __bam_c_last(), __bam_c_next(), __bam_c_prev(), and __bam_getboth_finddatum().

#define IS_CUR_DUPLICATE dbc,
orig_pgno,
orig_indx   ) 
 

Value:

(F_ISSET(dbc, DBC_OPD) ||                                       \
            (orig_pgno == (dbc)->internal->pgno &&                      \
            IS_DUPLICATE(dbc, (dbc)->internal->indx, orig_indx)))

Definition at line 180 of file bt_cursor.c.

Referenced by __bam_c_get().

#define IS_DELETED dbp,
page,
indx   ) 
 

Value:

B_DISSET(GET_BKEYDATA(dbp, page,                                \
            (indx) + (TYPE(page) == P_LBTREE ? O_INDX : 0))->type)

Definition at line 160 of file bt_cursor.c.

Referenced by __bam_bulk(), __bam_bulk_duplicates(), and __bam_c_put().

#define IS_DUPLICATE dbc,
i1,
i2   ) 
 

Value:

(P_INP((dbc)->dbp,((PAGE *)(dbc)->internal->page))[i1] ==       \
             P_INP((dbc)->dbp,((PAGE *)(dbc)->internal->page))[i2])

Definition at line 176 of file bt_cursor.c.

Referenced by __bam_c_count(), __bam_c_put(), __bam_getboth_finddatum(), and __bam_getbothc().


Function Documentation

static int __bam_bulk DBC dbc,
DBT data,
u_int32_t  flags
[static]
 

Definition at line 1028 of file bt_cursor.c.

References __bam_bulk_duplicates(), __bam_bulk_overflow(), __bam_c_next(), __bam_get_prev(), ALIGN, B_DUPLICATE, B_OVERFLOW, B_TYPE, __db_dbt::data, DB_BTREE, DB_MULTIPLE_KEY, DB_NEXT_DUP, DB_NEXT_NODUP, DB_NOTFOUND, DB_OPFLAGS_MASK, DB_RECNO, DBC_TRANSIENT, F_ISSET, GET_BKEYDATA, HOFFSET, IS_DELETED, _bkeydata::len, LF_ISSET, memcpy, NULL, NUM_ENT, P_INDX, P_INP, _boverflow::pgno, __cursor::recno, RECNO_OOB, __db_dbt::size, size, SSZA, _boverflow::tlen, _bkeydata::type, and __db_dbt::ulen.

Referenced by __bam_c_init().

01032 {
01033         BKEYDATA *bk;
01034         BOVERFLOW *bo;
01035         BTREE_CURSOR *cp;
01036         PAGE *pg;
01037         db_indx_t *inp, indx, pg_keyoff;
01038         int32_t  *endp, key_off, *offp, *saveoffp;
01039         u_int8_t *dbuf, *dp, *np;
01040         u_int32_t key_size, size, space;
01041         int adj, is_key, need_pg, next_key, no_dup;
01042         int pagesize, rec_key, ret;
01043 
01044         ret = 0;
01045         key_off = 0;
01046         size = 0;
01047         pagesize = dbc->dbp->pgsize;
01048         cp = (BTREE_CURSOR *)dbc->internal;
01049 
01050         /*
01051          * dp tracks the beginging of the page in the buffer.
01052          * np is the next place to copy things into the buffer.
01053          * dbuf always stays at the beging of the buffer.
01054          */
01055         dbuf = data->data;
01056         np = dp = dbuf;
01057 
01058         /* Keep track of space that is left.  There is a termination entry */
01059         space = data->ulen;
01060         space -= sizeof(*offp);
01061 
01062         /* Build the offset/size table from the end up. */
01063         endp = (int32_t *)((u_int8_t *)dbuf + data->ulen);
01064         endp--;
01065         offp = endp;
01066 
01067         key_size = 0;
01068 
01069         /*
01070          * Distinguish between BTREE and RECNO.
01071          * There are no keys in RECNO.  If MULTIPLE_KEY is specified
01072          * then we return the record numbers.
01073          * is_key indicates that multiple btree keys are returned.
01074          * rec_key is set if we are returning record numbers.
01075          * next_key is set if we are going after the next key rather than dup.
01076          */
01077         if (dbc->dbtype == DB_BTREE) {
01078                 is_key = LF_ISSET(DB_MULTIPLE_KEY) ? 1: 0;
01079                 rec_key = 0;
01080                 next_key = is_key && LF_ISSET(DB_OPFLAGS_MASK) != DB_NEXT_DUP;
01081                 adj = 2;
01082         } else {
01083                 is_key = 0;
01084                 rec_key = LF_ISSET(DB_MULTIPLE_KEY) ? 1 : 0;
01085                 next_key = LF_ISSET(DB_OPFLAGS_MASK) != DB_NEXT_DUP;
01086                 adj = 1;
01087         }
01088         no_dup = LF_ISSET(DB_OPFLAGS_MASK) == DB_NEXT_NODUP;
01089 
01090 next_pg:
01091         indx = cp->indx;
01092         pg = cp->page;
01093 
01094         inp = P_INP(dbc->dbp, pg);
01095         /* The current page is not yet in the buffer. */
01096         need_pg = 1;
01097 
01098         /*
01099          * Keep track of the offset of the current key on the page.
01100          * If we are returning keys, set it to 0 first so we force
01101          * the copy of the key to the buffer.
01102          */
01103         pg_keyoff = 0;
01104         if (is_key == 0)
01105                 pg_keyoff = inp[indx];
01106 
01107         do {
01108                 if (IS_DELETED(dbc->dbp, pg, indx)) {
01109                         if (dbc->dbtype != DB_RECNO)
01110                                 continue;
01111 
01112                         cp->recno++;
01113                         /*
01114                          * If we are not returning recnos then we
01115                          * need to fill in every slot so the user
01116                          * can calculate the record numbers.
01117                          */
01118                         if (rec_key != 0)
01119                                 continue;
01120 
01121                         space -= 2 * sizeof(*offp);
01122                         /* Check if space as underflowed. */
01123                         if (space > data->ulen)
01124                                 goto back_up;
01125 
01126                         /* Just mark the empty recno slots. */
01127                         *offp-- = 0;
01128                         *offp-- = 0;
01129                         continue;
01130                 }
01131 
01132                 /*
01133                  * Check to see if we have a new key.
01134                  * If so, then see if we need to put the
01135                  * key on the page.  If its already there
01136                  * then we just point to it.
01137                  */
01138                 if (is_key && pg_keyoff != inp[indx]) {
01139                         bk = GET_BKEYDATA(dbc->dbp, pg, indx);
01140                         if (B_TYPE(bk->type) == B_OVERFLOW) {
01141                                 bo = (BOVERFLOW *)bk;
01142                                 size = key_size = bo->tlen;
01143                                 if (key_size > space)
01144                                         goto get_key_space;
01145                                 if ((ret = __bam_bulk_overflow(dbc,
01146                                     bo->tlen, bo->pgno, np)) != 0)
01147                                         return (ret);
01148                                 space -= key_size;
01149                                 key_off = (int32_t)(np - dbuf);
01150                                 np += key_size;
01151                         } else {
01152                                 if (need_pg) {
01153                                         dp = np;
01154                                         size = pagesize - HOFFSET(pg);
01155                                         if (space < size) {
01156 get_key_space:
01157                                                 /* Nothing added, then error. */
01158                                                 if (offp == endp) {
01159                                                         data->size =
01160                                                             ALIGN(size +
01161                                                             pagesize,
01162                                                             sizeof(u_int32_t));
01163                                                         return (ENOMEM);
01164                                                 }
01165                                                 /*
01166                                                  * We need to back up to the
01167                                                  * last record put into the
01168                                                  * buffer so that it is
01169                                                  * CURRENT.
01170                                                  */
01171                                                 if (indx != 0)
01172                                                         indx -= P_INDX;
01173                                                 else {
01174                                                         if ((ret =
01175                                                             __bam_get_prev(
01176                                                             dbc)) != 0)
01177                                                                 return (ret);
01178                                                         indx = cp->indx;
01179                                                         pg = cp->page;
01180                                                 }
01181                                                 break;
01182                                         }
01183                                         /*
01184                                          * Move the data part of the page
01185                                          * to the buffer.
01186                                          */
01187                                         memcpy(dp,
01188                                            (u_int8_t *)pg + HOFFSET(pg), size);
01189                                         need_pg = 0;
01190                                         space -= size;
01191                                         np += size;
01192                                 }
01193                                 key_size = bk->len;
01194                                 key_off = (int32_t)(inp[indx] - HOFFSET(pg)
01195                                     + dp - dbuf + SSZA(BKEYDATA, data));
01196                                 pg_keyoff = inp[indx];
01197                         }
01198                 }
01199 
01200                 /*
01201                  * Reserve space for the pointers and sizes.
01202                  * Either key/data pair or just for a data item.
01203                  */
01204                 space -= (is_key ? 4 : 2) * sizeof(*offp);
01205                 if (rec_key)
01206                         space -= sizeof(*offp);
01207 
01208                 /* Check to see if space has underflowed. */
01209                 if (space > data->ulen)
01210                         goto back_up;
01211 
01212                 /*
01213                  * Determine if the next record is in the
01214                  * buffer already or if it needs to be copied in.
01215                  * If we have an off page dup, then copy as many
01216                  * as will fit into the buffer.
01217                  */
01218                 bk = GET_BKEYDATA(dbc->dbp, pg, indx + adj - 1);
01219                 if (B_TYPE(bk->type) == B_DUPLICATE) {
01220                         bo = (BOVERFLOW *)bk;
01221                         if (is_key) {
01222                                 *offp-- = key_off;
01223                                 *offp-- = key_size;
01224                         }
01225                         /*
01226                          * We pass the offset of the current key.
01227                          * On return we check to see if offp has
01228                          * moved to see if any data fit.
01229                          */
01230                         saveoffp = offp;
01231                         if ((ret = __bam_bulk_duplicates(dbc, bo->pgno,
01232                             dbuf, is_key ? offp + P_INDX : NULL,
01233                             &offp, &np, &space, no_dup)) != 0) {
01234                                 if (ret == ENOMEM) {
01235                                         size = space;
01236                                         /* If nothing was added, then error. */
01237                                         if (offp == saveoffp) {
01238                                                 offp += 2;
01239                                                 goto back_up;
01240                                         }
01241                                         goto get_space;
01242                                 }
01243                                 return (ret);
01244                         }
01245                 } else if (B_TYPE(bk->type) == B_OVERFLOW) {
01246                         bo = (BOVERFLOW *)bk;
01247                         size = bo->tlen;
01248                         if (size > space)
01249                                 goto back_up;
01250                         if ((ret =
01251                             __bam_bulk_overflow(dbc,
01252                                 bo->tlen, bo->pgno, np)) != 0)
01253                                 return (ret);
01254                         space -= size;
01255                         if (is_key) {
01256                                 *offp-- = key_off;
01257                                 *offp-- = key_size;
01258                         } else if (rec_key)
01259                                 *offp-- = cp->recno;
01260                         *offp-- = (int32_t)(np - dbuf);
01261                         np += size;
01262                         *offp-- = size;
01263                 } else {
01264                         if (need_pg) {
01265                                 dp = np;
01266                                 size = pagesize - HOFFSET(pg);
01267                                 if (space < size) {
01268 back_up:
01269                                         /*
01270                                          * Back up the index so that the
01271                                          * last record in the buffer is CURRENT
01272                                          */
01273                                         if (indx >= adj)
01274                                                 indx -= adj;
01275                                         else {
01276                                                 if ((ret =
01277                                                     __bam_get_prev(dbc)) != 0 &&
01278                                                     ret != DB_NOTFOUND)
01279                                                         return (ret);
01280                                                 indx = cp->indx;
01281                                                 pg = cp->page;
01282                                         }
01283                                         if (dbc->dbtype == DB_RECNO)
01284                                                 cp->recno--;
01285 get_space:
01286                                         /*
01287                                          * See if we put anything in the
01288                                          * buffer or if we are doing a DBP->get
01289                                          * did we get all of the data.
01290                                          */
01291                                         if (offp >=
01292                                             (is_key ? &endp[-1] : endp) ||
01293                                             F_ISSET(dbc, DBC_TRANSIENT)) {
01294                                                 data->size = ALIGN(size +
01295                                                     data->ulen - space,
01296                                                     sizeof(u_int32_t));
01297                                                 return (ENOMEM);
01298                                         }
01299                                         break;
01300                                 }
01301                                 memcpy(dp, (u_int8_t *)pg + HOFFSET(pg), size);
01302                                 need_pg = 0;
01303                                 space -= size;
01304                                 np += size;
01305                         }
01306                         /*
01307                          * Add the offsets and sizes to the end of the buffer.
01308                          * First add the key info then the data info.
01309                          */
01310                         if (is_key) {
01311                                 *offp-- = key_off;
01312                                 *offp-- = key_size;
01313                         } else if (rec_key)
01314                                 *offp-- = cp->recno;
01315                         *offp-- = (int32_t)(inp[indx + adj - 1] - HOFFSET(pg)
01316                             + dp - dbuf + SSZA(BKEYDATA, data));
01317                         *offp-- = bk->len;
01318                 }
01319                 if (dbc->dbtype == DB_RECNO)
01320                         cp->recno++;
01321                 else if (no_dup) {
01322                         while (indx + adj < NUM_ENT(pg) &&
01323                             pg_keyoff == inp[indx + adj])
01324                                 indx += adj;
01325                 }
01326         /*
01327          * Stop when we either run off the page or we
01328          * move to the next key and we are not returning mulitple keys.
01329          */
01330         } while ((indx += adj) < NUM_ENT(pg) &&
01331             (next_key || pg_keyoff == inp[indx]));
01332 
01333         /* If we are off the page then try to the next page. */
01334         if (ret == 0 && next_key && indx >= NUM_ENT(pg)) {
01335                 cp->indx = indx;
01336                 ret = __bam_c_next(dbc, 0, 1);
01337                 if (ret == 0)
01338                         goto next_pg;
01339                 if (ret != DB_NOTFOUND)
01340                         return (ret);
01341         }
01342 
01343         /*
01344          * If we did a DBP->get we must error if we did not return
01345          * all the data for the current key because there is
01346          * no way to know if we did not get it all, nor any
01347          * interface to fetch the balance.
01348          */
01349 
01350         if (ret == 0 &&
01351             F_ISSET(dbc, DBC_TRANSIENT) && pg_keyoff == inp[indx]) {
01352                 data->size = (data->ulen - space) + size;
01353                 return (ENOMEM);
01354         }
01355         /*
01356          * Must leave the index pointing at the last record fetched.
01357          * If we are not fetching keys, we may have stepped to the
01358          * next key.
01359          */
01360         if (next_key || pg_keyoff == inp[indx])
01361                 cp->indx = indx;
01362         else
01363                 cp->indx = indx - P_INDX;
01364 
01365         if (rec_key == 1)
01366                 *offp = (u_int32_t) RECNO_OOB;
01367         else
01368                 *offp = (u_int32_t) -1;
01369         return (0);
01370 }

int __bam_bulk_duplicates DBC dbc,
db_pgno_t  pgno,
u_int8_t dbuf,
int32_t keyoff,
int32_t **  offpp,
u_int8_t **  dpp,
u_int32_t spacep,
int  no_dup
 

Definition at line 1405 of file bt_cursor.c.

References __bam_bulk_overflow(), __bam_c_next(), __bam_c_prev(), __db_c_newopd(), B_OVERFLOW, B_TYPE, data, DB_FIRST, DB_NOTFOUND, DB_RECNO, __dbc::dbp, dbp, __dbc::dbtype, GET_BKEYDATA, HOFFSET, __dbc::internal, IS_DELETED, key, _bkeydata::len, memcpy, NULL, NUM_ENT, P_INP, _boverflow::pgno, __db::pgsize, __cursor::recno, size, SSZA, _boverflow::tlen, and _bkeydata::type.

Referenced by __bam_bulk(), and __ham_bulk().

01413 {
01414         DB *dbp;
01415         BKEYDATA *bk;
01416         BOVERFLOW *bo;
01417         BTREE_CURSOR *cp;
01418         DBC *opd;
01419         DBT key, data;
01420         PAGE *pg;
01421         db_indx_t indx, *inp;
01422         int32_t *offp;
01423         u_int32_t size, space;
01424         u_int8_t *dp, *np;
01425         int first, need_pg, pagesize, ret, t_ret;
01426 
01427         ret = 0;
01428 
01429         dbp = dbc->dbp;
01430         cp = (BTREE_CURSOR *)dbc->internal;
01431         opd = cp->opd;
01432 
01433         if (opd == NULL) {
01434                 if ((ret = __db_c_newopd(dbc, pgno, NULL, &opd)) != 0)
01435                         return (ret);
01436                 cp->opd = opd;
01437                 if ((ret = opd->c_am_get(opd,
01438                     &key, &data, DB_FIRST, NULL)) != 0)
01439                         return (ret);
01440         }
01441 
01442         pagesize = opd->dbp->pgsize;
01443         cp = (BTREE_CURSOR *)opd->internal;
01444         space = *spacep;
01445         /* Get current offset slot. */
01446         offp = *offpp;
01447 
01448         /*
01449          * np is the next place to put data.
01450          * dp is the begining of the current page in the buffer.
01451          */
01452         np = dp = *dpp;
01453         first = 1;
01454         indx = cp->indx;
01455 
01456         do {
01457                 /* Fetch the current record.  No initial move. */
01458                 if ((ret = __bam_c_next(opd, 0, 0)) != 0)
01459                         break;
01460                 pg = cp->page;
01461                 indx = cp->indx;
01462                 inp = P_INP(dbp, pg);
01463                 /* We need to copy the page to the buffer. */
01464                 need_pg = 1;
01465 
01466                 do {
01467                         if (IS_DELETED(dbp, pg, indx))
01468                                 goto contin;
01469                         bk = GET_BKEYDATA(dbp, pg, indx);
01470                         space -= 2 * sizeof(*offp);
01471                         /* Allocate space for key if needed. */
01472                         if (first == 0 && keyoff != NULL)
01473                                 space -= 2 * sizeof(*offp);
01474 
01475                         /* Did space underflow? */
01476                         if (space > *spacep) {
01477                                 ret = ENOMEM;
01478                                 if (first == 1) {
01479                                         space = *spacep + -(int32_t)space;
01480                                         if (need_pg)
01481                                                 space += pagesize - HOFFSET(pg);
01482                                 }
01483                                 break;
01484                         }
01485                         if (B_TYPE(bk->type) == B_OVERFLOW) {
01486                                 bo = (BOVERFLOW *)bk;
01487                                 size = bo->tlen;
01488                                 if (size > space) {
01489                                         ret = ENOMEM;
01490                                         if (first == 1) {
01491                                                 space = *spacep + size;
01492                                         }
01493                                         break;
01494                                 }
01495                                 if (first == 0 && keyoff != NULL) {
01496                                         *offp-- = keyoff[0];
01497                                         *offp-- = keyoff[-1];
01498                                 }
01499                                 if ((ret = __bam_bulk_overflow(dbc,
01500                                     bo->tlen, bo->pgno, np)) != 0)
01501                                         return (ret);
01502                                 space -= size;
01503                                 *offp-- = (int32_t)(np - dbuf);
01504                                 np += size;
01505                         } else {
01506                                 if (need_pg) {
01507                                         dp = np;
01508                                         size = pagesize - HOFFSET(pg);
01509                                         if (space < size) {
01510                                                 ret = ENOMEM;
01511                                                 /* Return space required. */
01512                                                 if (first == 1) {
01513                                                         space = *spacep + size;
01514                                                 }
01515                                                 break;
01516                                         }
01517                                         memcpy(dp,
01518                                             (u_int8_t *)pg + HOFFSET(pg), size);
01519                                         need_pg = 0;
01520                                         space -= size;
01521                                         np += size;
01522                                 }
01523                                 if (first == 0 && keyoff != NULL) {
01524                                         *offp-- = keyoff[0];
01525                                         *offp-- = keyoff[-1];
01526                                 }
01527                                 size = bk->len;
01528                                 *offp-- = (int32_t)(inp[indx] - HOFFSET(pg)
01529                                     + dp - dbuf + SSZA(BKEYDATA, data));
01530                         }
01531                         *offp-- = size;
01532                         first = 0;
01533                         if (no_dup)
01534                                 break;
01535 contin:
01536                         indx++;
01537                         if (opd->dbtype == DB_RECNO)
01538                                 cp->recno++;
01539                 } while (indx < NUM_ENT(pg));
01540                 if (no_dup)
01541                         break;
01542                 cp->indx = indx;
01543 
01544         } while (ret == 0);
01545 
01546         /* Return the updated information. */
01547         *spacep = space;
01548         *offpp = offp;
01549         *dpp = np;
01550 
01551         /*
01552          * If we ran out of space back up the pointer.
01553          * If we did not return any dups or reached the end, close the opd.
01554          */
01555         if (ret == ENOMEM) {
01556                 if (opd->dbtype == DB_RECNO) {
01557                         if (--cp->recno == 0)
01558                                 goto close_opd;
01559                 } else if (indx != 0)
01560                         cp->indx--;
01561                 else {
01562                         t_ret = __bam_c_prev(opd);
01563                         if (t_ret == DB_NOTFOUND)
01564                                 goto close_opd;
01565                         if (t_ret != 0)
01566                                 ret = t_ret;
01567                 }
01568         } else if (keyoff == NULL && ret == DB_NOTFOUND) {
01569                 cp->indx--;
01570                 if (opd->dbtype == DB_RECNO)
01571                         --cp->recno;
01572         } else if (indx == 0 || ret == DB_NOTFOUND) {
01573 close_opd:
01574                 opd->c_close(opd);
01575                 ((BTREE_CURSOR *)dbc->internal)->opd = NULL;
01576         }
01577         if (ret == DB_NOTFOUND)
01578                 ret = 0;
01579 
01580         return (ret);
01581 }

int __bam_bulk_overflow DBC dbc,
u_int32_t  len,
db_pgno_t  pgno,
u_int8_t dp
 

Definition at line 1380 of file bt_cursor.c.

References __db_goff(), DB_DBT_USERMEM, F_SET, memset, and NULL.

Referenced by __bam_bulk(), __bam_bulk_duplicates(), and __ham_bulk().

01385 {
01386         DBT dbt;
01387 
01388         memset(&dbt, 0, sizeof(dbt));
01389         F_SET(&dbt, DB_DBT_USERMEM);
01390         dbt.ulen = len;
01391         dbt.data = (void *)dp;
01392         return (__db_goff(dbc->dbp, &dbt, len, pgno, NULL, NULL));
01393 }

static int __bam_c_close DBC dbc,
db_pgno_t  root_pgno,
int *  rmroot
[static]
 

Definition at line 305 of file bt_cursor.c.

References __bam_c_physdel(), __bam_ca_delete(), __db_free(), __db_unknown_type(), __lock_downgrade(), __ram_ca_delete(), ACQUIRE, C_DELETED, CDB_LOCKING, DB_BTREE, DB_LOCK_IWRITE, DB_LOCK_UPGRADE, DB_LOCK_WRITE, DB_RECNO, DBC_OPD, DBC_WRITECURSOR, DBC_WRITEDUP, __db::dbenv, dbp, __dbc::dbtype, DISCARD_CUR, err, F_ISSET, GET_BOVERFLOW, h, __dbc::internal, lock, __db::mpf, mpf, NULL, NUM_ENT, O_INDX, and PGNO_INVALID.

Referenced by __bam_c_init().

00309 {
00310         BTREE_CURSOR *cp, *cp_opd, *cp_c;
00311         DB *dbp;
00312         DBC *dbc_opd, *dbc_c;
00313         DB_MPOOLFILE *mpf;
00314         PAGE *h;
00315         int cdb_lock, ret, t_ret;
00316 
00317         dbp = dbc->dbp;
00318         mpf = dbp->mpf;
00319         cp = (BTREE_CURSOR *)dbc->internal;
00320         cp_opd = (dbc_opd = cp->opd) == NULL ?
00321             NULL : (BTREE_CURSOR *)dbc_opd->internal;
00322         cdb_lock = ret = 0;
00323 
00324         /*
00325          * There are 3 ways this function is called:
00326          *
00327          * 1. Closing a primary cursor: we get called with a pointer to a
00328          *    primary cursor that has a NULL opd field.  This happens when
00329          *    closing a btree/recno database cursor without an associated
00330          *    off-page duplicate tree.
00331          *
00332          * 2. Closing a primary and an off-page duplicate cursor stack: we
00333          *    get called with a pointer to the primary cursor which has a
00334          *    non-NULL opd field.  This happens when closing a btree cursor
00335          *    into database with an associated off-page btree/recno duplicate
00336          *    tree. (It can't be a primary recno database, recno databases
00337          *    don't support duplicates.)
00338          *
00339          * 3. Closing an off-page duplicate cursor stack: we get called with
00340          *    a pointer to the off-page duplicate cursor.  This happens when
00341          *    closing a non-btree database that has an associated off-page
00342          *    btree/recno duplicate tree or for a btree database when the
00343          *    opd tree is not empty (root_pgno == PGNO_INVALID).
00344          *
00345          * If either the primary or off-page duplicate cursor deleted a btree
00346          * key/data pair, check to see if the item is still referenced by a
00347          * different cursor.  If it is, confirm that cursor's delete flag is
00348          * set and leave it to that cursor to do the delete.
00349          *
00350          * NB: The test for == 0 below is correct.  Our caller already removed
00351          * our cursor argument from the active queue, we won't find it when we
00352          * search the queue in __bam_ca_delete().
00353          * NB: It can't be true that both the primary and off-page duplicate
00354          * cursors have deleted a btree key/data pair.  Either the primary
00355          * cursor may have deleted an item and there's no off-page duplicate
00356          * cursor, or there's an off-page duplicate cursor and it may have
00357          * deleted an item.
00358          *
00359          * Primary recno databases aren't an issue here.  Recno keys are either
00360          * deleted immediately or never deleted, and do not have to be handled
00361          * here.
00362          *
00363          * Off-page duplicate recno databases are an issue here, cases #2 and
00364          * #3 above can both be off-page recno databases.  The problem is the
00365          * same as the final problem for off-page duplicate btree databases.
00366          * If we no longer need the off-page duplicate tree, we want to remove
00367          * it.  For off-page duplicate btrees, we are done with the tree when
00368          * we delete the last item it contains, i.e., there can be no further
00369          * references to it when it's empty.  For off-page duplicate recnos,
00370          * we remove items from the tree as the application calls the remove
00371          * function, so we are done with the tree when we close the last cursor
00372          * that references it.
00373          *
00374          * We optionally take the root page number from our caller.  If the
00375          * primary database is a btree, we can get it ourselves because dbc
00376          * is the primary cursor.  If the primary database is not a btree,
00377          * the problem is that we may be dealing with a stack of pages.  The
00378          * cursor we're using to do the delete points at the bottom of that
00379          * stack and we need the top of the stack.
00380          */
00381         if (F_ISSET(cp, C_DELETED)) {
00382                 dbc_c = dbc;
00383                 switch (dbc->dbtype) {
00384                 case DB_BTREE:                          /* Case #1, #3. */
00385                         if (__bam_ca_delete(dbp, cp->pgno, cp->indx, 1) == 0)
00386                                 goto lock;
00387                         goto done;
00388                 case DB_RECNO:
00389                         if (!F_ISSET(dbc, DBC_OPD))     /* Case #1. */
00390                                 goto done;
00391                                                         /* Case #3. */
00392                         if (__ram_ca_delete(dbp, cp->root) == 0)
00393                                 goto lock;
00394                         goto done;
00395                 default:
00396                         return (__db_unknown_type(dbp->dbenv,
00397                                 "__bam_c_close", dbc->dbtype));
00398                 }
00399         }
00400 
00401         if (dbc_opd == NULL)
00402                 goto done;
00403 
00404         if (F_ISSET(cp_opd, C_DELETED)) {               /* Case #2. */
00405                 /*
00406                  * We will not have been provided a root page number.  Acquire
00407                  * one from the primary database.
00408                  */
00409                 if ((ret = mpf->get(mpf, &cp->pgno, 0, &h)) != 0)
00410                         goto err;
00411                 root_pgno = GET_BOVERFLOW(dbp, h, cp->indx + O_INDX)->pgno;
00412                 if ((ret = mpf->put(mpf, h, 0)) != 0)
00413                         goto err;
00414 
00415                 dbc_c = dbc_opd;
00416                 switch (dbc_opd->dbtype) {
00417                 case DB_BTREE:
00418                         if (__bam_ca_delete(
00419                             dbp, cp_opd->pgno, cp_opd->indx, 1) == 0)
00420                                 goto lock;
00421                         goto done;
00422                 case DB_RECNO:
00423                         if (__ram_ca_delete(dbp, cp_opd->root) == 0)
00424                                 goto lock;
00425                         goto done;
00426                 default:
00427                         return (__db_unknown_type(dbp->dbenv,
00428                                 "__bam_c_close", dbc->dbtype));
00429                 }
00430         }
00431         goto done;
00432 
00433 lock:   cp_c = (BTREE_CURSOR *)dbc_c->internal;
00434 
00435         /*
00436          * If this is CDB, upgrade the lock if necessary.  While we acquired
00437          * the write lock to logically delete the record, we released it when
00438          * we returned from that call, and so may not be holding a write lock
00439          * at the moment.  NB: to get here in CDB we must either be holding a
00440          * write lock or be the only cursor that is permitted to acquire write
00441          * locks.  The reason is that there can never be more than a single CDB
00442          * write cursor (that cursor cannot be dup'd), and so that cursor must
00443          * be closed and the item therefore deleted before any other cursor
00444          * could acquire a reference to this item.
00445          *
00446          * Note that dbc may be an off-page dup cursor;  this is the sole
00447          * instance in which an OPD cursor does any locking, but it's necessary
00448          * because we may be closed by ourselves without a parent cursor
00449          * handy, and we have to do a lock upgrade on behalf of somebody.
00450          * If this is the case, the OPD has been given the parent's locking
00451          * info in __db_c_get--the OPD is also a WRITEDUP.
00452          */
00453         if (CDB_LOCKING(dbp->dbenv)) {
00454                 if (F_ISSET(dbc, DBC_WRITEDUP | DBC_WRITECURSOR)) {
00455                         if ((ret = dbp->dbenv->lock_get(
00456                             dbp->dbenv, dbc->locker, DB_LOCK_UPGRADE,
00457                             &dbc->lock_dbt, DB_LOCK_WRITE, &dbc->mylock)) != 0)
00458                                 goto err;
00459                         cdb_lock = 1;
00460                 }
00461                 if ((ret = mpf->get(mpf, &cp_c->pgno, 0, &cp_c->page)) != 0)
00462                         goto err;
00463 
00464                 goto delete;
00465         }
00466 
00467         /*
00468          * The variable dbc_c has been initialized to reference the cursor in
00469          * which we're going to do the delete.  Initialize the cursor's page
00470          * and lock structures as necessary.
00471          *
00472          * First, we may not need to acquire any locks.  If we're in case #3,
00473          * that is, the primary database isn't a btree database, our caller
00474          * is responsible for acquiring any necessary locks before calling us.
00475          */
00476         if (F_ISSET(dbc, DBC_OPD)) {
00477                 if ((ret = mpf->get(mpf, &cp_c->pgno, 0, &cp_c->page)) != 0)
00478                         goto err;
00479                 goto delete;
00480         }
00481 
00482         /*
00483          * Otherwise, acquire a write lock.  If the cursor that did the initial
00484          * logical deletion (and which had a write lock) is not the same as the
00485          * cursor doing the physical deletion (which may have only ever had a
00486          * read lock on the item), we need to upgrade.  The confusion comes as
00487          * follows:
00488          *
00489          *      C1      created, acquires item read lock
00490          *      C2      dup C1, create C2, also has item read lock.
00491          *      C1      acquire write lock, delete item
00492          *      C1      close
00493          *      C2      close, needs a write lock to physically delete item.
00494          *
00495          * If we're in a TXN, we know that C2 will be able to acquire the write
00496          * lock, because no locker other than the one shared by C1 and C2 can
00497          * acquire a write lock -- the original write lock C1 acquire was never
00498          * discarded.
00499          *
00500          * If we're not in a TXN, it's nastier.  Other cursors might acquire
00501          * read locks on the item after C1 closed, discarding its write lock,
00502          * and such locks would prevent C2 from acquiring a read lock.  That's
00503          * OK, though, we'll simply wait until we can acquire a read lock, or
00504          * we'll deadlock.  (Which better not happen, since we're not in a TXN.)
00505          *
00506          * Lock the primary database page, regardless of whether we're deleting
00507          * an item on a primary database page or an off-page duplicates page.
00508          */
00509         ACQUIRE(dbc, DB_LOCK_WRITE,
00510             cp->pgno, cp_c->lock, cp_c->pgno, cp_c->page, ret);
00511         if (ret != 0)
00512                 goto err;
00513 
00514 delete: /*
00515          * If the delete occurred in a btree, delete the on-page physical item
00516          * referenced by the cursor.
00517          */
00518         if (dbc_c->dbtype == DB_BTREE && (ret = __bam_c_physdel(dbc_c)) != 0)
00519                 goto err;
00520 
00521         /*
00522          * If we're not working in an off-page duplicate tree, then we're
00523          * done.
00524          */
00525         if (!F_ISSET(dbc_c, DBC_OPD) || root_pgno == PGNO_INVALID)
00526                 goto done;
00527 
00528         /*
00529          * We may have just deleted the last element in the off-page duplicate
00530          * tree, and closed the last cursor in the tree.  For an off-page btree
00531          * there are no other cursors in the tree by definition, if the tree is
00532          * empty.  For an off-page recno we know we have closed the last cursor
00533          * in the tree because the __ram_ca_delete call above returned 0 only
00534          * in that case.  So, if the off-page duplicate tree is empty at this
00535          * point, we want to remove it.
00536          */
00537         if ((ret = mpf->get(mpf, &root_pgno, 0, &h)) != 0)
00538                 goto err;
00539         if (NUM_ENT(h) == 0) {
00540                 if ((ret = __db_free(dbc, h)) != 0)
00541                         goto err;
00542         } else {
00543                 if ((ret = mpf->put(mpf, h, 0)) != 0)
00544                         goto err;
00545                 goto done;
00546         }
00547 
00548         /*
00549          * When removing the tree, we have to do one of two things.  If this is
00550          * case #2, that is, the primary tree is a btree, delete the key that's
00551          * associated with the tree from the btree leaf page.  We know we are
00552          * the only reference to it and we already have the correct lock.  We
00553          * detect this case because the cursor that was passed to us references
00554          * an off-page duplicate cursor.
00555          *
00556          * If this is case #3, that is, the primary tree isn't a btree, pass
00557          * the information back to our caller, it's their job to do cleanup on
00558          * the primary page.
00559          */
00560         if (dbc_opd != NULL) {
00561                 if ((ret = mpf->get(mpf, &cp->pgno, 0, &cp->page)) != 0)
00562                         goto err;
00563                 if ((ret = __bam_c_physdel(dbc)) != 0)
00564                         goto err;
00565         } else
00566                 *rmroot = 1;
00567 err:
00568 done:   /*
00569          * Discard the page references and locks, and confirm that the stack
00570          * has been emptied.
00571          */
00572         if (dbc_opd != NULL) {
00573                 DISCARD_CUR(dbc_opd, t_ret);
00574                 if (t_ret != 0 && ret == 0)
00575                         ret = t_ret;
00576         }
00577         DISCARD_CUR(dbc, t_ret);
00578         if (t_ret != 0 && ret == 0)
00579                 ret = t_ret;
00580 
00581         /* Downgrade any CDB lock we acquired. */
00582         if (cdb_lock)
00583                 (void)__lock_downgrade(
00584                     dbp->dbenv, &dbc->mylock, DB_LOCK_IWRITE, 0);
00585 
00586         return (ret);
00587 }

int __bam_c_count DBC dbc,
db_recno_t recnop
 

Definition at line 610 of file bt_cursor.c.

References dbp, IS_DUPLICATE, __db::mpf, mpf, NULL, NUM_ENT, P_INDX, RE_NREC, and top.

Referenced by __db_c_count().

00613 {
00614         BTREE_CURSOR *cp;
00615         DB *dbp;
00616         DB_MPOOLFILE *mpf;
00617         db_indx_t indx, top;
00618         db_recno_t recno;
00619         int ret;
00620 
00621         dbp = dbc->dbp;
00622         mpf = dbp->mpf;
00623         cp = (BTREE_CURSOR *)dbc->internal;
00624 
00625         /*
00626          * Called with the top-level cursor that may reference an off-page
00627          * duplicates page.  If it's a set of on-page duplicates, get the
00628          * page and count.  Otherwise, get the root page of the off-page
00629          * duplicate tree, and use the count.  We don't have to acquire any
00630          * new locks, we have to have a read lock to even get here.
00631          */
00632         if (cp->opd == NULL) {
00633                 if ((ret = mpf->get(mpf, &cp->pgno, 0, &cp->page)) != 0)
00634                         return (ret);
00635 
00636                 /*
00637                  * Move back to the beginning of the set of duplicates and
00638                  * then count forward.
00639                  */
00640                 for (indx = cp->indx;; indx -= P_INDX)
00641                         if (indx == 0 ||
00642                             !IS_DUPLICATE(dbc, indx, indx - P_INDX))
00643                                 break;
00644                 for (recno = 1, top = NUM_ENT(cp->page) - P_INDX;
00645                     indx < top; ++recno, indx += P_INDX)
00646                         if (!IS_DUPLICATE(dbc, indx, indx + P_INDX))
00647                                 break;
00648                 *recnop = recno;
00649         } else {
00650                 if ((ret =
00651                     mpf->get(mpf, &cp->opd->internal->root, 0, &cp->page)) != 0)
00652                         return (ret);
00653 
00654                 *recnop = RE_NREC(cp->page);
00655         }
00656 
00657         ret = mpf->put(mpf, cp->page, 0);
00658         cp->page = NULL;
00659 
00660         return (ret);
00661 }

static int __bam_c_del DBC dbc  )  [static]
 

Definition at line 668 of file bt_cursor.c.

References __bam_adjust(), __bam_c_getstack(), __bam_ca_delete(), __bam_cdel_log(), __bam_stkrel(), ACQUIRE_CUR, B_DSET, C_DELETED, C_RECNUM, __cursor::csp, DB_ASSERT, DB_KEYEMPTY, DB_LOCK_WRITE, DB_MPOOL_DIRTY, DBC_LOGGING, dbp, err, F_ISSET, GET_BKEYDATA, LSN, LSN_NOT_LOGGED, __db::mpf, mpf, NULL, O_INDX, P_LBTREE, _epg::page, PGNO, and TYPE.

Referenced by __bam_c_init().

00670 {
00671         BTREE_CURSOR *cp;
00672         DB *dbp;
00673         DB_MPOOLFILE *mpf;
00674         int ret, t_ret;
00675 
00676         dbp = dbc->dbp;
00677         mpf = dbp->mpf;
00678         cp = (BTREE_CURSOR *)dbc->internal;
00679         ret = 0;
00680 
00681         /* If the item was already deleted, return failure. */
00682         if (F_ISSET(cp, C_DELETED))
00683                 return (DB_KEYEMPTY);
00684 
00685         /*
00686          * This code is always called with a page lock but no page.
00687          */
00688         DB_ASSERT(cp->page == NULL);
00689 
00690         /*
00691          * We don't physically delete the record until the cursor moves, so
00692          * we have to have a long-lived write lock on the page instead of a
00693          * a long-lived read lock.  Note, we have to have a read lock to even
00694          * get here.
00695          *
00696          * If we're maintaining record numbers, we lock the entire tree, else
00697          * we lock the single page.
00698          */
00699         if (F_ISSET(cp, C_RECNUM)) {
00700                 if ((ret = __bam_c_getstack(dbc)) != 0)
00701                         goto err;
00702                 cp->page = cp->csp->page;
00703         } else {
00704                 ACQUIRE_CUR(dbc, DB_LOCK_WRITE, cp->pgno, ret);
00705                 if (ret != 0)
00706                         goto err;
00707         }
00708 
00709         /* Log the change. */
00710         if (DBC_LOGGING(dbc)) {
00711                 if ((ret = __bam_cdel_log(dbp, dbc->txn, &LSN(cp->page), 0,
00712                     PGNO(cp->page), &LSN(cp->page), cp->indx)) != 0)
00713                         goto err;
00714         } else
00715                 LSN_NOT_LOGGED(LSN(cp->page));
00716 
00717         /* Set the intent-to-delete flag on the page. */
00718         if (TYPE(cp->page) == P_LBTREE)
00719                 B_DSET(GET_BKEYDATA(dbp, cp->page, cp->indx + O_INDX)->type);
00720         else
00721                 B_DSET(GET_BKEYDATA(dbp, cp->page, cp->indx)->type);
00722 
00723         /* Mark the page dirty. */
00724         ret = mpf->set(mpf, cp->page, DB_MPOOL_DIRTY);
00725 
00726 err:    /*
00727          * If we've been successful so far and the tree has record numbers,
00728          * adjust the record counts.  Either way, release acquired page(s).
00729          */
00730         if (F_ISSET(cp, C_RECNUM)) {
00731                 if (ret == 0)
00732                         ret = __bam_adjust(dbc, -1);
00733                 (void)__bam_stkrel(dbc, 0);
00734         } else
00735                 if (cp->page != NULL &&
00736                     (t_ret = mpf->put(mpf, cp->page, 0)) != 0 && ret == 0)
00737                         ret = t_ret;
00738 
00739         cp->page = NULL;
00740 
00741         /* Update the cursors last, after all chance of failure is past. */
00742         if (ret == 0)
00743                 (void)__bam_ca_delete(dbp, cp->pgno, cp->indx, 1);
00744 
00745         return (ret);
00746 }

static int __bam_c_destroy DBC dbc  )  [static]
 

Definition at line 594 of file bt_cursor.c.

References __os_free().

Referenced by __bam_c_init().

00596 {
00597         /* Discard the structures. */
00598         __os_free(dbc->dbp->dbenv, dbc->internal);
00599 
00600         return (0);
00601 }

int __bam_c_dup DBC orig_dbc,
DBC new_dbc
 

Definition at line 756 of file bt_cursor.c.

References __db_lget(), __cursor::flags, LOCK_ISSET, new(), NULL, __cursor::ovflsize, and __cursor::recno.

Referenced by __db_c_idup().

00758 {
00759         BTREE_CURSOR *orig, *new;
00760         int ret;
00761 
00762         orig = (BTREE_CURSOR *)orig_dbc->internal;
00763         new = (BTREE_CURSOR *)new_dbc->internal;
00764 
00765         /*
00766          * If we're holding a lock we need to acquire a copy of it, unless
00767          * we're in a transaction.  We don't need to copy any lock we're
00768          * holding inside a transaction because all the locks are retained
00769          * until the transaction commits or aborts.
00770          */
00771         if (LOCK_ISSET(orig->lock) && orig_dbc->txn == NULL) {
00772                 if ((ret = __db_lget(new_dbc,
00773                     0, new->pgno, new->lock_mode, 0, &new->lock)) != 0)
00774                         return (ret);
00775         }
00776         new->ovflsize = orig->ovflsize;
00777         new->recno = orig->recno;
00778         new->flags = orig->flags;
00779 
00780         return (0);
00781 }

static int __bam_c_first DBC dbc  )  [static]
 

Definition at line 2091 of file bt_cursor.c.

References __bam_c_next(), ACQUIRE_CUR_COUPLE, ACQUIRE_WRITE_LOCK, DB_LOCK_READ, DBC_RMW, F_ISSET, GET_BINTERNAL, IS_CUR_DELETED, ISLEAF, and NUM_ENT.

Referenced by __bam_c_get().

02093 {
02094         BTREE_CURSOR *cp;
02095         db_pgno_t pgno;
02096         int ret;
02097 
02098         cp = (BTREE_CURSOR *)dbc->internal;
02099         ret = 0;
02100 
02101         /* Walk down the left-hand side of the tree. */
02102         for (pgno = cp->root;;) {
02103                 ACQUIRE_CUR_COUPLE(dbc, DB_LOCK_READ, pgno, ret);
02104                 if (ret != 0)
02105                         return (ret);
02106 
02107                 /* If we find a leaf page, we're done. */
02108                 if (ISLEAF(cp->page))
02109                         break;
02110 
02111                 pgno = GET_BINTERNAL(dbc->dbp, cp->page, 0)->pgno;
02112         }
02113 
02114         /* If we want a write lock instead of a read lock, get it now. */
02115         if (F_ISSET(dbc, DBC_RMW)) {
02116                 ACQUIRE_WRITE_LOCK(dbc, ret);
02117                 if (ret != 0)
02118                         return (ret);
02119         }
02120 
02121         cp->indx = 0;
02122 
02123         /* If on an empty page or a deleted record, move to the next one. */
02124         if (NUM_ENT(cp->page) == 0 || IS_CUR_DELETED(dbc))
02125                 if ((ret = __bam_c_next(dbc, 0, 0)) != 0)
02126                         return (ret);
02127 
02128         return (0);
02129 }

static int __bam_c_get DBC dbc,
DBT key,
DBT data,
u_int32_t  flags,
db_pgno_t pgnop
[static]
 

Definition at line 788 of file bt_cursor.c.

References __bam_c_first(), __bam_c_last(), __bam_c_next(), __bam_c_prev(), __bam_c_search(), __bam_getboth_finddatum(), __bam_getbothc(), __bam_isopd(), __db_unknown_flag(), C_DELETED, DB_CURRENT, DB_DBT_ISSET, DB_FIRST, DB_GET_BOTH, DB_GET_BOTH_RANGE, DB_GET_BOTHC, DB_KEYEMPTY, DB_LAST, DB_NEXT, DB_NEXT_DUP, DB_NEXT_NODUP, DB_NOTFOUND, DB_PREV, DB_PREV_NODUP, DB_SET, DB_SET_RANGE, DB_SET_RECNO, DBC_OPD, __db::dbenv, dbp, err, F_CLR, F_ISSET, F_SET, IS_CUR_DELETED, IS_CUR_DUPLICATE, __db::mpf, mpf, NULL, NUM_ENT, and PGNO_INVALID.

Referenced by __bam_c_init().

00793 {
00794         BTREE_CURSOR *cp;
00795         DB *dbp;
00796         DB_MPOOLFILE *mpf;
00797         db_pgno_t orig_pgno;
00798         db_indx_t orig_indx;
00799         int exact, newopd, ret;
00800 
00801         dbp = dbc->dbp;
00802         mpf = dbp->mpf;
00803         cp = (BTREE_CURSOR *)dbc->internal;
00804         orig_pgno = cp->pgno;
00805         orig_indx = cp->indx;
00806 
00807         newopd = 0;
00808         switch (flags) {
00809         case DB_CURRENT:
00810                 /* It's not possible to return a deleted record. */
00811                 if (F_ISSET(cp, C_DELETED)) {
00812                         ret = DB_KEYEMPTY;
00813                         goto err;
00814                 }
00815 
00816                 /*
00817                  * Acquire the current page.  We have at least a read-lock
00818                  * already.  The caller may have set DB_RMW asking for a
00819                  * write lock, but upgrading to a write lock has no better
00820                  * chance of succeeding now instead of later, so don't try.
00821                  */
00822                 if ((ret = mpf->get(mpf, &cp->pgno, 0, &cp->page)) != 0)
00823                         goto err;
00824                 break;
00825         case DB_FIRST:
00826                 newopd = 1;
00827                 if ((ret = __bam_c_first(dbc)) != 0)
00828                         goto err;
00829                 break;
00830         case DB_GET_BOTH:
00831         case DB_GET_BOTH_RANGE:
00832                 /*
00833                  * There are two ways to get here based on DBcursor->c_get
00834                  * with the DB_GET_BOTH/DB_GET_BOTH_RANGE flags set:
00835                  *
00836                  * 1. Searching a sorted off-page duplicate tree: do a tree
00837                  * search.
00838                  *
00839                  * 2. Searching btree: do a tree search.  If it returns a
00840                  * reference to off-page duplicate tree, return immediately
00841                  * and let our caller deal with it.  If the search doesn't
00842                  * return a reference to off-page duplicate tree, continue
00843                  * with an on-page search.
00844                  */
00845                 if (F_ISSET(dbc, DBC_OPD)) {
00846                         if ((ret = __bam_c_search(
00847                             dbc, PGNO_INVALID, data, flags, &exact)) != 0)
00848                                 goto err;
00849                         if (flags == DB_GET_BOTH) {
00850                                 if (!exact) {
00851                                         ret = DB_NOTFOUND;
00852                                         goto err;
00853                                 }
00854                                 break;
00855                         }
00856 
00857                         /*
00858                          * We didn't require an exact match, so the search may
00859                          * may have returned an entry past the end of the page,
00860                          * or we may be referencing a deleted record.  If so,
00861                          * move to the next entry.
00862                          */
00863                         if ((cp->indx == NUM_ENT(cp->page) ||
00864                             IS_CUR_DELETED(dbc)) &&
00865                             (ret = __bam_c_next(dbc, 1, 0)) != 0)
00866                                 goto err;
00867                 } else {
00868                         if ((ret = __bam_c_search(
00869                             dbc, PGNO_INVALID, key, flags, &exact)) != 0)
00870                                 return (ret);
00871                         if (!exact) {
00872                                 ret = DB_NOTFOUND;
00873                                 goto err;
00874                         }
00875 
00876                         if (pgnop != NULL && __bam_isopd(dbc, pgnop)) {
00877                                 newopd = 1;
00878                                 break;
00879                         }
00880                         if ((ret =
00881                             __bam_getboth_finddatum(dbc, data, flags)) != 0)
00882                                 goto err;
00883                 }
00884                 break;
00885         case DB_GET_BOTHC:
00886                 if ((ret = __bam_getbothc(dbc, data)) != 0)
00887                         goto err;
00888                 break;
00889         case DB_LAST:
00890                 newopd = 1;
00891                 if ((ret = __bam_c_last(dbc)) != 0)
00892                         goto err;
00893                 break;
00894         case DB_NEXT:
00895                 newopd = 1;
00896                 if (cp->pgno == PGNO_INVALID) {
00897                         if ((ret = __bam_c_first(dbc)) != 0)
00898                                 goto err;
00899                 } else
00900                         if ((ret = __bam_c_next(dbc, 1, 0)) != 0)
00901                                 goto err;
00902                 break;
00903         case DB_NEXT_DUP:
00904                 if ((ret = __bam_c_next(dbc, 1, 0)) != 0)
00905                         goto err;
00906                 if (!IS_CUR_DUPLICATE(dbc, orig_pgno, orig_indx)) {
00907                         ret = DB_NOTFOUND;
00908                         goto err;
00909                 }
00910                 break;
00911         case DB_NEXT_NODUP:
00912                 newopd = 1;
00913                 if (cp->pgno == PGNO_INVALID) {
00914                         if ((ret = __bam_c_first(dbc)) != 0)
00915                                 goto err;
00916                 } else
00917                         do {
00918                                 if ((ret = __bam_c_next(dbc, 1, 0)) != 0)
00919                                         goto err;
00920                         } while (IS_CUR_DUPLICATE(dbc, orig_pgno, orig_indx));
00921                 break;
00922         case DB_PREV:
00923                 newopd = 1;
00924                 if (cp->pgno == PGNO_INVALID) {
00925                         if ((ret = __bam_c_last(dbc)) != 0)
00926                                 goto err;
00927                 } else
00928                         if ((ret = __bam_c_prev(dbc)) != 0)
00929                                 goto err;
00930                 break;
00931         case DB_PREV_NODUP:
00932                 newopd = 1;
00933                 if (cp->pgno == PGNO_INVALID) {
00934                         if ((ret = __bam_c_last(dbc)) != 0)
00935                                 goto err;
00936                 } else
00937                         do {
00938                                 if ((ret = __bam_c_prev(dbc)) != 0)
00939                                         goto err;
00940                         } while (IS_CUR_DUPLICATE(dbc, orig_pgno, orig_indx));
00941                 break;
00942         case DB_SET:
00943         case DB_SET_RECNO:
00944                 newopd = 1;
00945                 if ((ret = __bam_c_search(dbc,
00946                     PGNO_INVALID, key, flags, &exact)) != 0)
00947                         goto err;
00948                 break;
00949         case DB_SET_RANGE:
00950                 newopd = 1;
00951                 if ((ret = __bam_c_search(dbc,
00952                     PGNO_INVALID, key, flags, &exact)) != 0)
00953                         goto err;
00954 
00955                 /*
00956                  * As we didn't require an exact match, the search function
00957                  * may have returned an entry past the end of the page.  Or,
00958                  * we may be referencing a deleted record.  If so, move to
00959                  * the next entry.
00960                  */
00961                 if (cp->indx == NUM_ENT(cp->page) || IS_CUR_DELETED(dbc))
00962                         if ((ret = __bam_c_next(dbc, 0, 0)) != 0)
00963                                 goto err;
00964                 break;
00965         default:
00966                 ret = __db_unknown_flag(dbp->dbenv, "__bam_c_get", flags);
00967                 goto err;
00968         }
00969 
00970         /*
00971          * We may have moved to an off-page duplicate tree.  Return that
00972          * information to our caller.
00973          */
00974         if (newopd && pgnop != NULL)
00975                 (void)__bam_isopd(dbc, pgnop);
00976 
00977         /*
00978          * Don't return the key, it was passed to us (this is true even if the
00979          * application defines a compare function returning equality for more
00980          * than one key value, since in that case which actual value we store
00981          * in the database is undefined -- and particularly true in the case of
00982          * duplicates where we only store one key value).
00983          */
00984         if (flags == DB_GET_BOTH ||
00985             flags == DB_GET_BOTH_RANGE || flags == DB_SET)
00986                 F_SET(key, DB_DBT_ISSET);
00987 
00988 err:    /*
00989          * Regardless of whether we were successful or not, if the cursor
00990          * moved, clear the delete flag, DBcursor->c_get never references
00991          * a deleted key, if it moved at all.
00992          */
00993         if (F_ISSET(cp, C_DELETED) &&
00994             (cp->pgno != orig_pgno || cp->indx != orig_indx))
00995                 F_CLR(cp, C_DELETED);
00996 
00997         return (ret);
00998 }

static int __bam_c_getstack DBC dbc  )  [static]
 

Definition at line 2732 of file bt_cursor.c.

References __bam_search(), __db_ret(), __db_dbt::data, dbp, err, h, memset, __db::mpf, mpf, NULL, PGNO_INVALID, and S_KEYFIRST.

Referenced by __bam_c_del(), and __bam_c_put().

02734 {
02735         BTREE_CURSOR *cp;
02736         DB *dbp;
02737         DBT dbt;
02738         DB_MPOOLFILE *mpf;
02739         PAGE *h;
02740         int exact, ret, t_ret;
02741 
02742         dbp = dbc->dbp;
02743         mpf = dbp->mpf;
02744         cp = (BTREE_CURSOR *)dbc->internal;
02745 
02746         /*
02747          * Get the page with the current item on it.  The caller of this
02748          * routine has to already hold a read lock on the page, so there
02749          * is no additional lock to acquire.
02750          */
02751         if ((ret = mpf->get(mpf, &cp->pgno, 0, &h)) != 0)
02752                 return (ret);
02753 
02754         /* Get a copy of a key from the page. */
02755         memset(&dbt, 0, sizeof(DBT));
02756         if ((ret = __db_ret(dbp,
02757             h, 0, &dbt, &dbc->rkey->data, &dbc->rkey->ulen)) != 0)
02758                 goto err;
02759 
02760         /* Get a write-locked stack for the page. */
02761         exact = 0;
02762         ret = __bam_search(dbc, PGNO_INVALID,
02763             &dbt, S_KEYFIRST, 1, NULL, &exact);
02764 
02765 err:    /* Discard the key and the page. */
02766         if ((t_ret = mpf->put(mpf, h, 0)) != 0 && ret == 0)
02767                 ret = t_ret;
02768 
02769         return (ret);
02770 }

int __bam_c_init DBC dbc,
DBTYPE  dbtype
 

Definition at line 192 of file bt_cursor.c.

References __bam_bulk(), __bam_c_close(), __bam_c_del(), __bam_c_destroy(), __bam_c_get(), __bam_c_put(), __bam_c_writelock(), __db_c_close(), __db_c_count(), __db_c_del(), __db_c_dup(), __db_c_get(), __db_c_pget(), __db_c_put(), __os_malloc(), __ram_c_del(), __ram_c_get(), __ram_c_put(), DB_BTREE, dbenv, and NULL.

Referenced by __db_icursor().

00195 {
00196         DB_ENV *dbenv;
00197         int ret;
00198 
00199         dbenv = dbc->dbp->dbenv;
00200 
00201         /* Allocate/initialize the internal structure. */
00202         if (dbc->internal == NULL && (ret =
00203             __os_malloc(dbenv, sizeof(BTREE_CURSOR), &dbc->internal)) != 0)
00204                 return (ret);
00205 
00206         /* Initialize methods. */
00207         dbc->c_close = __db_c_close;
00208         dbc->c_count = __db_c_count;
00209         dbc->c_del = __db_c_del;
00210         dbc->c_dup = __db_c_dup;
00211         dbc->c_get = dbc->c_real_get = __db_c_get;
00212         dbc->c_pget = __db_c_pget;
00213         dbc->c_put = __db_c_put;
00214         if (dbtype == DB_BTREE) {
00215                 dbc->c_am_bulk = __bam_bulk;
00216                 dbc->c_am_close = __bam_c_close;
00217                 dbc->c_am_del = __bam_c_del;
00218                 dbc->c_am_destroy = __bam_c_destroy;
00219                 dbc->c_am_get = __bam_c_get;
00220                 dbc->c_am_put = __bam_c_put;
00221                 dbc->c_am_writelock = __bam_c_writelock;
00222         } else {
00223                 dbc->c_am_bulk = __bam_bulk;
00224                 dbc->c_am_close = __bam_c_close;
00225                 dbc->c_am_del = __ram_c_del;
00226                 dbc->c_am_destroy = __bam_c_destroy;
00227                 dbc->c_am_get = __ram_c_get;
00228                 dbc->c_am_put = __ram_c_put;
00229                 dbc->c_am_writelock = __bam_c_writelock;
00230         }
00231 
00232         return (0);
00233 }

static int __bam_c_last DBC dbc  )  [static]
 

Definition at line 2136 of file bt_cursor.c.

References __bam_c_prev(), ACQUIRE_CUR_COUPLE, ACQUIRE_WRITE_LOCK, DB_LOCK_READ, DBC_RMW, F_ISSET, GET_BINTERNAL, IS_CUR_DELETED, ISLEAF, NUM_ENT, O_INDX, P_INDX, P_LBTREE, and TYPE.

Referenced by __bam_c_get().

02138 {
02139         BTREE_CURSOR *cp;
02140         db_pgno_t pgno;
02141         int ret;
02142 
02143         cp = (BTREE_CURSOR *)dbc->internal;
02144         ret = 0;
02145 
02146         /* Walk down the right-hand side of the tree. */
02147         for (pgno = cp->root;;) {
02148                 ACQUIRE_CUR_COUPLE(dbc, DB_LOCK_READ, pgno, ret);
02149                 if (ret != 0)
02150                         return (ret);
02151 
02152                 /* If we find a leaf page, we're done. */
02153                 if (ISLEAF(cp->page))
02154                         break;
02155 
02156                 pgno = GET_BINTERNAL(dbc->dbp, cp->page,
02157                     NUM_ENT(cp->page) - O_INDX)->pgno;
02158         }
02159 
02160         /* If we want a write lock instead of a read lock, get it now. */
02161         if (F_ISSET(dbc, DBC_RMW)) {
02162                 ACQUIRE_WRITE_LOCK(dbc, ret);
02163                 if (ret != 0)
02164                         return (ret);
02165         }
02166 
02167         cp->indx = NUM_ENT(cp->page) == 0 ? 0 :
02168             NUM_ENT(cp->page) -
02169             (TYPE(cp->page) == P_LBTREE ? P_INDX : O_INDX);
02170 
02171         /* If on an empty page or a deleted record, move to the previous one. */
02172         if (NUM_ENT(cp->page) == 0 || IS_CUR_DELETED(dbc))
02173                 if ((ret = __bam_c_prev(dbc)) != 0)
02174                         return (ret);
02175 
02176         return (0);
02177 }

static int __bam_c_next DBC dbc,
int  initial_move,
int  deleted_okay
[static]
 

Definition at line 2184 of file bt_cursor.c.

References ACQUIRE_CUR, DB_BTREE, DB_LOCK_NG, DB_LOCK_READ, DB_LOCK_WRITE, DB_NOTFOUND, DBC_OPD, DBC_RMW, F_ISSET, IS_CUR_DELETED, lock_mode, NEXT_PGNO, NULL, NUM_ENT, O_INDX, P_INDX, and PGNO_INVALID.

Referenced by __bam_bulk(), __bam_bulk_duplicates(), __bam_c_first(), and __bam_c_get().

02187 {
02188         BTREE_CURSOR *cp;
02189         db_indx_t adjust;
02190         db_lockmode_t lock_mode;
02191         db_pgno_t pgno;
02192         int ret;
02193 
02194         cp = (BTREE_CURSOR *)dbc->internal;
02195         ret = 0;
02196 
02197         /*
02198          * We're either moving through a page of duplicates or a btree leaf
02199          * page.
02200          *
02201          * !!!
02202          * This code handles empty pages and pages with only deleted entries.
02203          */
02204         if (F_ISSET(dbc, DBC_OPD)) {
02205                 adjust = O_INDX;
02206                 lock_mode = DB_LOCK_NG;
02207         } else {
02208                 adjust = dbc->dbtype == DB_BTREE ? P_INDX : O_INDX;
02209                 lock_mode =
02210                     F_ISSET(dbc, DBC_RMW) ? DB_LOCK_WRITE : DB_LOCK_READ;
02211         }
02212         if (cp->page == NULL) {
02213                 ACQUIRE_CUR(dbc, lock_mode, cp->pgno, ret);
02214                 if (ret != 0)
02215                         return (ret);
02216         }
02217 
02218         if (initial_move)
02219                 cp->indx += adjust;
02220 
02221         for (;;) {
02222                 /*
02223                  * If at the end of the page, move to a subsequent page.
02224                  *
02225                  * !!!
02226                  * Check for >= NUM_ENT.  If the original search landed us on
02227                  * NUM_ENT, we may have incremented indx before the test.
02228                  */
02229                 if (cp->indx >= NUM_ENT(cp->page)) {
02230                         if ((pgno
02231                             = NEXT_PGNO(cp->page)) == PGNO_INVALID)
02232                                 return (DB_NOTFOUND);
02233 
02234                         ACQUIRE_CUR(dbc, lock_mode, pgno, ret);
02235                         if (ret != 0)
02236                                 return (ret);
02237                         cp->indx = 0;
02238                         continue;
02239                 }
02240                 if (!deleted_okay && IS_CUR_DELETED(dbc)) {
02241                         cp->indx += adjust;
02242                         continue;
02243                 }
02244                 break;
02245         }
02246         return (0);
02247 }

static int __bam_c_physdel DBC dbc  )  [static]
 

Definition at line 2540 of file bt_cursor.c.

References __bam_ca_di(), __bam_ditem(), __bam_dpages(), __bam_search(), __bam_stkrel(), __db_lget(), __db_pgfmt(), __db_ret(), BT_STK_POP, BT_STK_PUSH, __cursor::csp, DB_AM_REVSPLITOFF, DB_LOCK_WRITE, DBC_OPD, __db::dbenv, dbp, F_ISSET, GET_BINTERNAL, GET_RINTERNAL, h, ISLEAF, key, LEAFLEVEL, ndb_logevent::level, lock, memset, __db::mpf, mpf, NULL, NUM_ENT, P_IBTREE, P_IRECNO, P_LBTREE, _epg::page, _db_page::pgno, PGNO, PGNO_INVALID, S_WRPAIR, __cursor::sp, STK_NOLOCK, and TYPE.

Referenced by __bam_c_close().

02542 {
02543         BTREE_CURSOR *cp;
02544         DB *dbp;
02545         DBT key;
02546         DB_LOCK lock;
02547         DB_MPOOLFILE *mpf;
02548         PAGE *h;
02549         db_pgno_t pgno;
02550         int delete_page, empty_page, exact, level, ret;
02551 
02552         dbp = dbc->dbp;
02553         mpf = dbp->mpf;
02554         cp = (BTREE_CURSOR *)dbc->internal;
02555         delete_page = empty_page = ret = 0;
02556 
02557         /* If the page is going to be emptied, consider deleting it. */
02558         delete_page = empty_page =
02559             NUM_ENT(cp->page) == (TYPE(cp->page) == P_LBTREE ? 2 : 1);
02560 
02561         /*
02562          * Check if the application turned off reverse splits.  Applications
02563          * can't turn off reverse splits in off-page duplicate trees, that
02564          * space will never be reused unless the exact same key is specified.
02565          */
02566         if (delete_page &&
02567             !F_ISSET(dbc, DBC_OPD) && F_ISSET(dbp, DB_AM_REVSPLITOFF))
02568                 delete_page = 0;
02569 
02570         /*
02571          * We never delete the last leaf page.  (Not really true -- we delete
02572          * the last leaf page of off-page duplicate trees, but that's handled
02573          * by our caller, not down here.)
02574          */
02575         if (delete_page && cp->pgno == cp->root)
02576                 delete_page = 0;
02577 
02578         /*
02579          * To delete a leaf page other than an empty root page, we need a
02580          * copy of a key from the page.  Use the 0th page index since it's
02581          * the last key the page held.
02582          *
02583          * !!!
02584          * Note that because __bam_c_physdel is always called from a cursor
02585          * close, it should be safe to use the cursor's own "my_rkey" memory
02586          * to temporarily hold this key.  We shouldn't own any returned-data
02587          * memory of interest--if we do, we're in trouble anyway.
02588          */
02589         if (delete_page) {
02590                 memset(&key, 0, sizeof(DBT));
02591                 if ((ret = __db_ret(dbp, cp->page,
02592                     0, &key, &dbc->my_rkey.data, &dbc->my_rkey.ulen)) != 0)
02593                         return (ret);
02594         }
02595 
02596         /*
02597          * Delete the items.  If page isn't empty, we adjust the cursors.
02598          *
02599          * !!!
02600          * The following operations to delete a page may deadlock.  The easy
02601          * scenario is if we're deleting an item because we're closing cursors
02602          * because we've already deadlocked and want to call txn->abort.  If
02603          * we fail due to deadlock, we'll leave a locked, possibly empty page
02604          * in the tree, which won't be empty long because we'll undo the delete
02605          * when we undo the transaction's modifications.
02606          *
02607          * !!!
02608          * Delete the key item first, otherwise the on-page duplicate checks
02609          * in __bam_ditem() won't work!
02610          */
02611         if (TYPE(cp->page) == P_LBTREE) {
02612                 if ((ret = __bam_ditem(dbc, cp->page, cp->indx)) != 0)
02613                         return (ret);
02614                 if (!empty_page)
02615                         if ((ret = __bam_ca_di(dbc,
02616                             PGNO(cp->page), cp->indx, -1)) != 0)
02617                                 return (ret);
02618         }
02619         if ((ret = __bam_ditem(dbc, cp->page, cp->indx)) != 0)
02620                 return (ret);
02621         if (!empty_page)
02622                 if ((ret = __bam_ca_di(dbc, PGNO(cp->page), cp->indx, -1)) != 0)
02623                         return (ret);
02624 
02625         /* If we're not going to try and delete the page, we're done. */
02626         if (!delete_page)
02627                 return (0);
02628 
02629         /*
02630          * Call __bam_search to reacquire the empty leaf page, but this time
02631          * get both the leaf page and it's parent, locked.  Jump back up the
02632          * tree, until we have the top pair of pages that we want to delete.
02633          * Once we have the top page that we want to delete locked, lock the
02634          * underlying pages and check to make sure they're still empty.  If
02635          * they are, delete them.
02636          */
02637         for (level = LEAFLEVEL;; ++level) {
02638                 /* Acquire a page and its parent, locked. */
02639                 if ((ret = __bam_search(dbc, PGNO_INVALID,
02640                     &key, S_WRPAIR, level, NULL, &exact)) != 0)
02641                         return (ret);
02642 
02643                 /*
02644                  * If we reach the root or the parent page isn't going to be
02645                  * empty when we delete one record, stop.
02646                  */
02647                 h = cp->csp[-1].page;
02648                 if (h->pgno == cp->root || NUM_ENT(h) != 1)
02649                         break;
02650 
02651                 /* Discard the stack, retaining no locks. */
02652                 (void)__bam_stkrel(dbc, STK_NOLOCK);
02653         }
02654 
02655         /*
02656          * Move the stack pointer one after the last entry, we may be about
02657          * to push more items onto the page stack.
02658          */
02659         ++cp->csp;
02660 
02661         /*
02662          * cp->csp[-2].page is now the parent page, which we may or may not be
02663          * going to delete, and cp->csp[-1].page is the first page we know we
02664          * are going to delete.  Walk down the chain of pages, acquiring pages
02665          * until we've acquired a leaf page.  Generally, this shouldn't happen;
02666          * we should only see a single internal page with one item and a single
02667          * leaf page with no items.  The scenario where we could see something
02668          * else is if reverse splits were turned off for awhile and then turned
02669          * back on.  That could result in all sorts of strangeness, e.g., empty
02670          * pages in the tree, trees that looked like linked lists, and so on.
02671          *
02672          * !!!
02673          * Sheer paranoia: if we find any pages that aren't going to be emptied
02674          * by the delete, someone else added an item while we were walking the
02675          * tree, and we discontinue the delete.  Shouldn't be possible, but we
02676          * check regardless.
02677          */
02678         for (h = cp->csp[-1].page;;) {
02679                 if (ISLEAF(h)) {
02680                         if (NUM_ENT(h) != 0)
02681                                 break;
02682                         break;
02683                 } else
02684                         if (NUM_ENT(h) != 1)
02685                                 break;
02686 
02687                 /*
02688                  * Get the next page, write lock it and push it onto the stack.
02689                  * We know it's index 0, because it can only have one element.
02690                  */
02691                 switch (TYPE(h)) {
02692                 case P_IBTREE:
02693                         pgno = GET_BINTERNAL(dbp, h, 0)->pgno;
02694                         break;
02695                 case P_IRECNO:
02696                         pgno = GET_RINTERNAL(dbp, h, 0)->pgno;
02697                         break;
02698                 default:
02699                         return (__db_pgfmt(dbp->dbenv, PGNO(h)));
02700                 }
02701 
02702                 if ((ret =
02703                     __db_lget(dbc, 0, pgno, DB_LOCK_WRITE, 0, &lock)) != 0)
02704                         break;
02705                 if ((ret = mpf->get(mpf, &pgno, 0, &h)) != 0)
02706                         break;
02707                 BT_STK_PUSH(dbp->dbenv, cp, h, 0, lock, DB_LOCK_WRITE, ret);
02708                 if (ret != 0)
02709                         break;
02710         }
02711 
02712         /* Adjust the cursor stack to reference the last page on the stack. */
02713         BT_STK_POP(cp);
02714 
02715         /*
02716          * If everything worked, delete the stack, otherwise, release the
02717          * stack and page locks without further damage.
02718          */
02719         if (ret == 0)
02720                 ret = __bam_dpages(dbc, cp->sp);
02721         else
02722                 (void)__bam_stkrel(dbc, 0);
02723 
02724         return (ret);
02725 }

static int __bam_c_prev DBC dbc  )  [static]
 

Definition at line 2254 of file bt_cursor.c.

References ACQUIRE_CUR, DB_BTREE, DB_LOCK_NG, DB_LOCK_READ, DB_LOCK_WRITE, DB_NOTFOUND, DBC_OPD, DBC_RMW, F_ISSET, IS_CUR_DELETED, lock_mode, NULL, NUM_ENT, O_INDX, P_INDX, PGNO_INVALID, and PREV_PGNO.

Referenced by __bam_bulk_duplicates(), __bam_c_get(), __bam_c_last(), and __bam_get_prev().

02256 {
02257         BTREE_CURSOR *cp;
02258         db_indx_t adjust;
02259         db_lockmode_t lock_mode;
02260         db_pgno_t pgno;
02261         int ret;
02262 
02263         cp = (BTREE_CURSOR *)dbc->internal;
02264         ret = 0;
02265 
02266         /*
02267          * We're either moving through a page of duplicates or a btree leaf
02268          * page.
02269          *
02270          * !!!
02271          * This code handles empty pages and pages with only deleted entries.
02272          */
02273         if (F_ISSET(dbc, DBC_OPD)) {
02274                 adjust = O_INDX;
02275                 lock_mode = DB_LOCK_NG;
02276         } else {
02277                 adjust = dbc->dbtype == DB_BTREE ? P_INDX : O_INDX;
02278                 lock_mode =
02279                     F_ISSET(dbc, DBC_RMW) ? DB_LOCK_WRITE : DB_LOCK_READ;
02280         }
02281         if (cp->page == NULL) {
02282                 ACQUIRE_CUR(dbc, lock_mode, cp->pgno, ret);
02283                 if (ret != 0)
02284                         return (ret);
02285         }
02286 
02287         for (;;) {
02288                 /* If at the beginning of the page, move to a previous one. */
02289                 if (cp->indx == 0) {
02290                         if ((pgno =
02291                             PREV_PGNO(cp->page)) == PGNO_INVALID)
02292                                 return (DB_NOTFOUND);
02293 
02294                         ACQUIRE_CUR(dbc, lock_mode, pgno, ret);
02295                         if (ret != 0)
02296                                 return (ret);
02297 
02298                         if ((cp->indx = NUM_ENT(cp->page)) == 0)
02299                                 continue;
02300                 }
02301 
02302                 /* Ignore deleted records. */
02303                 cp->indx -= adjust;
02304                 if (IS_CUR_DELETED(dbc))
02305                         continue;
02306 
02307                 break;
02308         }
02309         return (0);
02310 }

static int __bam_c_put DBC dbc,
DBT key,
DBT data,
u_int32_t  flags,
db_pgno_t pgnop
[static]
 

Definition at line 1759 of file bt_cursor.c.

References __bam_c_getstack(), __bam_c_search(), __bam_cmp(), __bam_iitem(), __bam_isopd(), __bam_split(), __bam_stkrel(), __db_duperr(), __db_ret(), __db_unknown_flag(), ACQUIRE_WRITE_LOCK, arg, BT_STK_POP, C_DELETED, C_RECNUM, cmp, __cursor::csp, DB_AFTER, DB_AM_DUP, DB_BEFORE, DB_CURRENT, DB_KEYFIRST, DB_KEYLAST, DB_NEEDSPLIT, DB_NODUPDATA, DBC_OPD, __db::dbenv, dbp, DISCARD_CUR, err, F_CLR, F_ISSET, IS_DELETED, IS_DUPLICATE, memset, __db::mpf, mpf, NULL, NUM_ENT, O_INDX, P_INDX, P_INP, _epg::page, split(), stack, STK_CLRDBC, and STK_NOLOCK.

Referenced by __bam_c_init().

01764 {
01765         BTREE_CURSOR *cp;
01766         DB *dbp;
01767         DBT dbt;
01768         DB_MPOOLFILE *mpf;
01769         db_pgno_t root_pgno;
01770         u_int32_t iiop;
01771         int cmp, exact, ret, stack;
01772         void *arg;
01773 
01774         dbp = dbc->dbp;
01775         mpf = dbp->mpf;
01776         cp = (BTREE_CURSOR *)dbc->internal;
01777         root_pgno = cp->root;
01778 
01779 split:  ret = stack = 0;
01780         switch (flags) {
01781         case DB_AFTER:
01782         case DB_BEFORE:
01783         case DB_CURRENT:
01784                 iiop = flags;
01785 
01786                 /*
01787                  * If the Btree has record numbers (and we're not replacing an
01788                  * existing record), we need a complete stack so that we can
01789                  * adjust the record counts.  The check for flags == DB_CURRENT
01790                  * is superfluous but left in for clarity.  (If C_RECNUM is set
01791                  * we know that flags must be DB_CURRENT, as DB_AFTER/DB_BEFORE
01792                  * are illegal in a Btree unless it's configured for duplicates
01793                  * and you cannot configure a Btree for both record renumbering
01794                  * and duplicates.)
01795                  */
01796                 if (flags == DB_CURRENT &&
01797                     F_ISSET(cp, C_RECNUM) && F_ISSET(cp, C_DELETED)) {
01798                         if ((ret = __bam_c_getstack(dbc)) != 0)
01799                                 goto err;
01800                         /*
01801                          * Initialize the cursor from the stack.  Don't take
01802                          * the page number or page index, they should already
01803                          * be set.
01804                          */
01805                         cp->page = cp->csp->page;
01806                         cp->lock = cp->csp->lock;
01807                         cp->lock_mode = cp->csp->lock_mode;
01808 
01809                         stack = 1;
01810                         break;
01811                 }
01812 
01813                 /* Acquire the current page with a write lock. */
01814                 ACQUIRE_WRITE_LOCK(dbc, ret);
01815                 if (ret != 0)
01816                         goto err;
01817                 if ((ret = mpf->get(mpf, &cp->pgno, 0, &cp->page)) != 0)
01818                         goto err;
01819                 break;
01820         case DB_KEYFIRST:
01821         case DB_KEYLAST:
01822         case DB_NODUPDATA:
01823                 /*
01824                  * Searching off-page, sorted duplicate tree: do a tree search
01825                  * for the correct item; __bam_c_search returns the smallest
01826                  * slot greater than the key, use it.
01827                  *
01828                  * See comment below regarding where we can start the search.
01829                  */
01830                 if (F_ISSET(dbc, DBC_OPD)) {
01831                         if ((ret = __bam_c_search(dbc,
01832                             F_ISSET(cp, C_RECNUM) ? cp->root : root_pgno,
01833                             data, flags, &exact)) != 0)
01834                                 goto err;
01835                         stack = 1;
01836 
01837                         /* Disallow "sorted" duplicate duplicates. */
01838                         if (exact) {
01839                                 if (IS_DELETED(dbp, cp->page, cp->indx)) {
01840                                         iiop = DB_CURRENT;
01841                                         break;
01842                                 }
01843                                 ret = __db_duperr(dbp, flags);
01844                                 goto err;
01845                         }
01846                         iiop = DB_BEFORE;
01847                         break;
01848                 }
01849 
01850                 /*
01851                  * Searching a btree.
01852                  *
01853                  * If we've done a split, we can start the search from the
01854                  * parent of the split page, which __bam_split returned
01855                  * for us in root_pgno, unless we're in a Btree with record
01856                  * numbering.  In that case, we'll need the true root page
01857                  * in order to adjust the record count.
01858                  */
01859                 if ((ret = __bam_c_search(dbc,
01860                     F_ISSET(cp, C_RECNUM) ? cp->root : root_pgno, key,
01861                     flags == DB_KEYFIRST || dbp->dup_compare != NULL ?
01862                     DB_KEYFIRST : DB_KEYLAST, &exact)) != 0)
01863                         goto err;
01864                 stack = 1;
01865 
01866                 /*
01867                  * If we don't have an exact match, __bam_c_search returned
01868                  * the smallest slot greater than the key, use it.
01869                  */
01870                 if (!exact) {
01871                         iiop = DB_KEYFIRST;
01872                         break;
01873                 }
01874 
01875                 /*
01876                  * If duplicates aren't supported, replace the current item.
01877                  * (If implementing the DB->put function, our caller already
01878                  * checked the DB_NOOVERWRITE flag.)
01879                  */
01880                 if (!F_ISSET(dbp, DB_AM_DUP)) {
01881                         iiop = DB_CURRENT;
01882                         break;
01883                 }
01884 
01885                 /*
01886                  * If we find a matching entry, it may be an off-page duplicate
01887                  * tree.  Return the page number to our caller, we need a new
01888                  * cursor.
01889                  */
01890                 if (pgnop != NULL && __bam_isopd(dbc, pgnop))
01891                         goto done;
01892 
01893                 /* If the duplicates aren't sorted, move to the right slot. */
01894                 if (dbp->dup_compare == NULL) {
01895                         if (flags == DB_KEYFIRST)
01896                                 iiop = DB_BEFORE;
01897                         else
01898                                 for (;; cp->indx += P_INDX)
01899                                         if (cp->indx + P_INDX >=
01900                                             NUM_ENT(cp->page) ||
01901                                             !IS_DUPLICATE(dbc, cp->indx,
01902                                             cp->indx + P_INDX)) {
01903                                                 iiop = DB_AFTER;
01904                                                 break;
01905                                         }
01906                         break;
01907                 }
01908 
01909                 /*
01910                  * We know that we're looking at the first of a set of sorted
01911                  * on-page duplicates.  Walk the list to find the right slot.
01912                  */
01913                 for (;; cp->indx += P_INDX) {
01914                         if ((ret = __bam_cmp(dbp, data, cp->page,
01915                             cp->indx + O_INDX, dbp->dup_compare, &cmp)) != 0)
01916                                 goto err;
01917                         if (cmp < 0) {
01918                                 iiop = DB_BEFORE;
01919                                 break;
01920                         }
01921 
01922                         /* Disallow "sorted" duplicate duplicates. */
01923                         if (cmp == 0) {
01924                                 if (IS_DELETED(dbp, cp->page, cp->indx)) {
01925                                         iiop = DB_CURRENT;
01926                                         break;
01927                                 }
01928                                 ret = __db_duperr(dbp, flags);
01929                                 goto err;
01930                         }
01931 
01932                         if (cp->indx + P_INDX >= NUM_ENT(cp->page) ||
01933                             P_INP(dbp, ((PAGE *)cp->page))[cp->indx] !=
01934                             P_INP(dbp, ((PAGE *)cp->page))[cp->indx + P_INDX]) {
01935                                 iiop = DB_AFTER;
01936                                 break;
01937                         }
01938                 }
01939                 break;
01940         default:
01941                 ret = __db_unknown_flag(dbp->dbenv, "__bam_c_put", flags);
01942                 goto err;
01943         }
01944 
01945         switch (ret = __bam_iitem(dbc, key, data, iiop, 0)) {
01946         case 0:
01947                 break;
01948         case DB_NEEDSPLIT:
01949                 /*
01950                  * To split, we need a key for the page.  Either use the key
01951                  * argument or get a copy of the key from the page.
01952                  */
01953                 if (flags == DB_AFTER ||
01954                     flags == DB_BEFORE || flags == DB_CURRENT) {
01955                         memset(&dbt, 0, sizeof(DBT));
01956                         if ((ret = __db_ret(dbp, cp->page, 0, &dbt,
01957                             &dbc->rkey->data, &dbc->rkey->ulen)) != 0)
01958                                 goto err;
01959                         arg = &dbt;
01960                 } else
01961                         arg = F_ISSET(dbc, DBC_OPD) ? data : key;
01962 
01963                 /*
01964                  * Discard any locks and pinned pages (the locks are discarded
01965                  * even if we're running with transactions, as they lock pages
01966                  * that we're sorry we ever acquired).  If stack is set and the
01967                  * cursor entries are valid, they point to the same entries as
01968                  * the stack, don't free them twice.
01969                  */
01970                 if (stack)
01971                         ret = __bam_stkrel(dbc, STK_CLRDBC | STK_NOLOCK);
01972                 else
01973                         DISCARD_CUR(dbc, ret);
01974                 if (ret != 0)
01975                         goto err;
01976 
01977                 /* Split the tree. */
01978                 if ((ret = __bam_split(dbc, arg, &root_pgno)) != 0)
01979                         return (ret);
01980 
01981                 goto split;
01982         default:
01983                 goto err;
01984         }
01985 
01986 err:
01987 done:   /*
01988          * Discard any pages pinned in the tree and their locks, except for
01989          * the leaf page.  Note, the leaf page participated in any stack we
01990          * acquired, and so we have to adjust the stack as necessary.  If
01991          * there was only a single page on the stack, we don't have to free
01992          * further stack pages.
01993          */
01994         if (stack && BT_STK_POP(cp) != NULL)
01995                 (void)__bam_stkrel(dbc, 0);
01996 
01997         /*
01998          * Regardless of whether we were successful or not, clear the delete
01999          * flag.  If we're successful, we either moved the cursor or the item
02000          * is no longer deleted.  If we're not successful, then we're just a
02001          * copy, no need to have the flag set.
02002          */
02003         F_CLR(cp, C_DELETED);
02004 
02005         return (ret);
02006 }

int __bam_c_refresh DBC dbc  ) 
 

Definition at line 242 of file bt_cursor.c.

References B_MINKEY_TO_OVFLSIZE, __db::bt_internal, C_RECNUM, C_RENUMBER, __cursor::csp, DB_AM_RECNUM, DB_AM_RENUMBER, DB_LOCK_NG, DB_RECNO, DBC_OPD, dbp, __cursor::esp, F_ISSET, F_SET, __cursor::flags, INVALID_ORDER, LOCK_INIT, __cursor::order, __cursor::ovflsize, PGNO_INVALID, __db::pgsize, __cursor::recno, RECNO_OOB, __cursor::sp, and __cursor::stack.

Referenced by __db_icursor().

00244 {
00245         BTREE *t;
00246         BTREE_CURSOR *cp;
00247         DB *dbp;
00248 
00249         dbp = dbc->dbp;
00250         t = dbp->bt_internal;
00251         cp = (BTREE_CURSOR *)dbc->internal;
00252 
00253         /*
00254          * If our caller set the root page number, it's because the root was
00255          * known.  This is always the case for off page dup cursors.  Else,
00256          * pull it out of our internal information.
00257          */
00258         if (cp->root == PGNO_INVALID)
00259                 cp->root = t->bt_root;
00260 
00261         LOCK_INIT(cp->lock);
00262         cp->lock_mode = DB_LOCK_NG;
00263 
00264         cp->sp = cp->csp = cp->stack;
00265         cp->esp = cp->stack + sizeof(cp->stack) / sizeof(cp->stack[0]);
00266 
00267         /*
00268          * The btree leaf page data structures require that two key/data pairs
00269          * (or four items) fit on a page, but other than that there's no fixed
00270          * requirement.  The btree off-page duplicates only require two items,
00271          * to be exact, but requiring four for them as well seems reasonable.
00272          *
00273          * Recno uses the btree bt_ovflsize value -- it's close enough.
00274          */
00275         cp->ovflsize = B_MINKEY_TO_OVFLSIZE(
00276             dbp,  F_ISSET(dbc, DBC_OPD) ? 2 : t->bt_minkey, dbp->pgsize);
00277 
00278         cp->recno = RECNO_OOB;
00279         cp->order = INVALID_ORDER;
00280         cp->flags = 0;
00281 
00282         /* Initialize for record numbers. */
00283         if (F_ISSET(dbc, DBC_OPD) ||
00284             dbc->dbtype == DB_RECNO || F_ISSET(dbp, DB_AM_RECNUM)) {
00285                 F_SET(cp, C_RECNUM);
00286 
00287                 /*
00288                  * All btrees that support record numbers, optionally standard
00289                  * recno trees, and all off-page duplicate recno trees have
00290                  * mutable record numbers.
00291                  */
00292                 if ((F_ISSET(dbc, DBC_OPD) && dbc->dbtype == DB_RECNO) ||
00293                     F_ISSET(dbp, DB_AM_RECNUM | DB_AM_RENUMBER))
00294                         F_SET(cp, C_RENUMBER);
00295         }
00296 
00297         return (0);
00298 }

int __bam_c_rget DBC dbc,
DBT data
 

Definition at line 2015 of file bt_cursor.c.

References __bam_search(), __bam_stkrel(), __db_ret(), __db_retcopy(), __db_dbt::data, DBC_RMW, __db::dbenv, dbp, err, F_ISSET, memset, __db::mpf, mpf, NULL, PGNO_INVALID, S_FIND, and S_FIND_WR.

Referenced by __db_c_get().

02018 {
02019         BTREE_CURSOR *cp;
02020         DB *dbp;
02021         DBT dbt;
02022         DB_MPOOLFILE *mpf;
02023         db_recno_t recno;
02024         int exact, ret;
02025 
02026         dbp = dbc->dbp;
02027         mpf = dbp->mpf;
02028         cp = (BTREE_CURSOR *)dbc->internal;
02029 
02030         /*
02031          * Get the page with the current item on it.
02032          * Get a copy of the key.
02033          * Release the page, making sure we don't release it twice.
02034          */
02035         if ((ret = mpf->get(mpf, &cp->pgno, 0, &cp->page)) != 0)
02036                 return (ret);
02037         memset(&dbt, 0, sizeof(DBT));
02038         if ((ret = __db_ret(dbp, cp->page,
02039             cp->indx, &dbt, &dbc->rkey->data, &dbc->rkey->ulen)) != 0)
02040                 goto err;
02041         ret = mpf->put(mpf, cp->page, 0);
02042         cp->page = NULL;
02043         if (ret != 0)
02044                 return (ret);
02045 
02046         if ((ret = __bam_search(dbc, PGNO_INVALID, &dbt,
02047             F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND,
02048             1, &recno, &exact)) != 0)
02049                 goto err;
02050 
02051         ret = __db_retcopy(dbp->dbenv, data,
02052             &recno, sizeof(recno), &dbc->rdata->data, &dbc->rdata->ulen);
02053 
02054         /* Release the stack. */
02055 err:    __bam_stkrel(dbc, 0);
02056 
02057         return (ret);
02058 }

static int __bam_c_search DBC dbc,
db_pgno_t  root_pgno,
const DBT key,
u_int32_t  flags,
int *  exactp
[static]
 

Definition at line 2317 of file bt_cursor.c.

References __bam_cmp(), __bam_rsearch(), __bam_search(), __db_unknown_flag(), __ram_getno(), ACQUIRE, __db::bt_internal, BT_STK_CLR, BT_STK_ENTER, C_RECNUM, cmp, __cursor::csp, DB_GET_BOTH, DB_GET_BOTH_RANGE, DB_KEYFIRST, DB_KEYLAST, DB_LOCK_WRITE, DB_NODUPDATA, DB_SET, DB_SET_RANGE, DB_SET_RECNO, DBC_RMW, __db::dbenv, dbp, DISCARD, DISCARD_CUR, F_ISSET, h, NEXT_PGNO, _db_page::next_pgno, NULL, NUM_ENT, P_INDX, P_INP, P_LBTREE, _epg::page, PGNO_INVALID, PREV_PGNO, _db_page::prev_pgno, S_DUPFIRST, S_EXACT, S_FIND, S_FIND_WR, S_KEYFIRST, S_KEYLAST, S_READ, S_WRITE, and TYPE.

Referenced by __bam_c_get(), __bam_c_put(), and __bam_getbothc().

02323 {
02324         BTREE *t;
02325         BTREE_CURSOR *cp;
02326         DB *dbp;
02327         PAGE *h;
02328         db_indx_t indx, *inp;
02329         db_pgno_t bt_lpgno;
02330         db_recno_t recno;
02331         u_int32_t sflags;
02332         int cmp, ret;
02333 
02334         dbp = dbc->dbp;
02335         cp = (BTREE_CURSOR *)dbc->internal;
02336         t = dbp->bt_internal;
02337         ret = 0;
02338 
02339         /*
02340          * Find an entry in the database.  Discard any lock we currently hold,
02341          * we're going to search the tree.
02342          */
02343         DISCARD_CUR(dbc, ret);
02344         if (ret != 0)
02345                 return (ret);
02346 
02347         switch (flags) {
02348         case DB_SET_RECNO:
02349                 if ((ret = __ram_getno(dbc, key, &recno, 0)) != 0)
02350                         return (ret);
02351                 sflags = (F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND) | S_EXACT;
02352                 if ((ret = __bam_rsearch(dbc, &recno, sflags, 1, exactp)) != 0)
02353                         return (ret);
02354                 break;
02355         case DB_SET:
02356         case DB_GET_BOTH:
02357                 sflags = (F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND) | S_EXACT;
02358                 goto search;
02359         case DB_GET_BOTH_RANGE:
02360                 sflags = (F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND);
02361                 goto search;
02362         case DB_SET_RANGE:
02363                 sflags =
02364                     (F_ISSET(dbc, DBC_RMW) ? S_WRITE : S_READ) | S_DUPFIRST;
02365                 goto search;
02366         case DB_KEYFIRST:
02367                 sflags = S_KEYFIRST;
02368                 goto fast_search;
02369         case DB_KEYLAST:
02370         case DB_NODUPDATA:
02371                 sflags = S_KEYLAST;
02372 fast_search:    /*
02373                  * If the application has a history of inserting into the first
02374                  * or last pages of the database, we check those pages first to
02375                  * avoid doing a full search.
02376                  *
02377                  * If the tree has record numbers, we need a complete stack so
02378                  * that we can adjust the record counts, so fast_search isn't
02379                  * possible.
02380                  */
02381                 if (F_ISSET(cp, C_RECNUM))
02382                         goto search;
02383 
02384                 /*
02385                  * !!!
02386                  * We do not mutex protect the t->bt_lpgno field, which means
02387                  * that it can only be used in an advisory manner.  If we find
02388                  * page we can use, great.  If we don't, we don't care, we do
02389                  * it the slow way instead.  Regardless, copy it into a local
02390                  * variable, otherwise we might acquire a lock for a page and
02391                  * then read a different page because it changed underfoot.
02392                  */
02393                 bt_lpgno = t->bt_lpgno;
02394 
02395                 /*
02396                  * If the tree has no history of insertion, do it the slow way.
02397                  */
02398                 if (bt_lpgno == PGNO_INVALID)
02399                         goto search;
02400 
02401                 /* Lock and retrieve the page on which we last inserted. */
02402                 h = NULL;
02403                 ACQUIRE(dbc,
02404                     DB_LOCK_WRITE, bt_lpgno, cp->lock, bt_lpgno, h, ret);
02405                 if (ret != 0)
02406                         goto fast_miss;
02407 
02408                 inp = P_INP(dbp, h);
02409                 /*
02410                  * It's okay if the page type isn't right or it's empty, it
02411                  * just means that the world changed.
02412                  */
02413                 if (TYPE(h) != P_LBTREE || NUM_ENT(h) == 0)
02414                         goto fast_miss;
02415 
02416                 /*
02417                  * What we do here is test to see if we're at the beginning or
02418                  * end of the tree and if the new item sorts before/after the
02419                  * first/last page entry.  We don't try and catch inserts into
02420                  * the middle of the tree (although we could, as long as there
02421                  * were two keys on the page and we saved both the index and
02422                  * the page number of the last insert).
02423                  */
02424                 if (h->next_pgno == PGNO_INVALID) {
02425                         indx = NUM_ENT(h) - P_INDX;
02426                         if ((ret = __bam_cmp(dbp,
02427                             key, h, indx, t->bt_compare, &cmp)) != 0)
02428                                 return (ret);
02429 
02430                         if (cmp < 0)
02431                                 goto try_begin;
02432                         if (cmp > 0) {
02433                                 indx += P_INDX;
02434                                 goto fast_hit;
02435                         }
02436 
02437                         /*
02438                          * Found a duplicate.  If doing DB_KEYLAST, we're at
02439                          * the correct position, otherwise, move to the first
02440                          * of the duplicates.  If we're looking at off-page
02441                          * duplicates, duplicate duplicates aren't permitted,
02442                          * so we're done.
02443                          */
02444                         if (flags == DB_KEYLAST)
02445                                 goto fast_hit;
02446                         for (;
02447                             indx > 0 && inp[indx - P_INDX] == inp[indx];
02448                             indx -= P_INDX)
02449                                 ;
02450                         goto fast_hit;
02451                 }
02452 try_begin:      if (h->prev_pgno == PGNO_INVALID) {
02453                         indx = 0;
02454                         if ((ret = __bam_cmp(dbp,
02455                             key, h, indx, t->bt_compare, &cmp)) != 0)
02456                                 return (ret);
02457 
02458                         if (cmp > 0)
02459                                 goto fast_miss;
02460                         if (cmp < 0)
02461                                 goto fast_hit;
02462 
02463                         /*
02464                          * Found a duplicate.  If doing DB_KEYFIRST, we're at
02465                          * the correct position, otherwise, move to the last
02466                          * of the duplicates.  If we're looking at off-page
02467                          * duplicates, duplicate duplicates aren't permitted,
02468                          * so we're done.
02469                          */
02470                         if (flags == DB_KEYFIRST)
02471                                 goto fast_hit;
02472                         for (;
02473                             indx < (db_indx_t)(NUM_ENT(h) - P_INDX) &&
02474                             inp[indx] == inp[indx + P_INDX];
02475                             indx += P_INDX)
02476                                 ;
02477                         goto fast_hit;
02478                 }
02479                 goto fast_miss;
02480 
02481 fast_hit:       /* Set the exact match flag, we may have found a duplicate. */
02482                 *exactp = cmp == 0;
02483 
02484                 /*
02485                  * Insert the entry in the stack.  (Our caller is likely to
02486                  * call __bam_stkrel() after our return.)
02487                  */
02488                 BT_STK_CLR(cp);
02489                 BT_STK_ENTER(dbp->dbenv,
02490                     cp, h, indx, cp->lock, cp->lock_mode, ret);
02491                 if (ret != 0)
02492                         return (ret);
02493                 break;
02494 
02495 fast_miss:      /*
02496                  * This was not the right page, so we do not need to retain
02497                  * the lock even in the presence of transactions.
02498                  */
02499                 DISCARD(dbc, 1, cp->lock, h, ret);
02500                 if (ret != 0)
02501                         return (ret);
02502 
02503 search:         if ((ret = __bam_search(dbc, root_pgno,
02504                     key, sflags, 1, NULL, exactp)) != 0)
02505                         return (ret);
02506                 break;
02507         default:
02508                 return (__db_unknown_flag(dbp->dbenv, "__bam_c_search", flags));
02509         }
02510 
02511         /* Initialize the cursor from the stack. */
02512         cp->page = cp->csp->page;
02513         cp->pgno = cp->csp->page->pgno;
02514         cp->indx = cp->csp->indx;
02515         cp->lock = cp->csp->lock;
02516         cp->lock_mode = cp->csp->lock_mode;
02517 
02518         /*
02519          * If we inserted a key into the first or last slot of the tree,
02520          * remember where it was so we can do it more quickly next time.
02521          * If there are duplicates and we are inserting into the last slot,
02522          * the cursor will point _to_ the last item, not after it, which
02523          * is why we subtract P_INDX below.
02524          */
02525         if (TYPE(cp->page) == P_LBTREE &&
02526             (flags == DB_KEYFIRST || flags == DB_KEYLAST))
02527                 t->bt_lpgno =
02528                     (NEXT_PGNO(cp->page) == PGNO_INVALID &&
02529                     cp->indx >= NUM_ENT(cp->page) - P_INDX) ||
02530                     (PREV_PGNO(cp->page) == PGNO_INVALID &&
02531                     cp->indx == 0) ? cp->pgno : PGNO_INVALID;
02532         return (0);
02533 }

static int __bam_c_writelock DBC dbc  )  [static]
 

Definition at line 2065 of file bt_cursor.c.

References ACQUIRE_WRITE_LOCK, and DB_LOCK_WRITE.

Referenced by __bam_c_init().

02067 {
02068         BTREE_CURSOR *cp;
02069         int ret;
02070 
02071         cp = (BTREE_CURSOR *)dbc->internal;
02072 
02073         if (cp->lock_mode == DB_LOCK_WRITE)
02074                 return (0);
02075 
02076         /*
02077          * When writing to an off-page duplicate tree, we need to have the
02078          * appropriate page in the primary tree locked.  The general DBC
02079          * code calls us first with the primary cursor so we can acquire the
02080          * appropriate lock.
02081          */
02082         ACQUIRE_WRITE_LOCK(dbc, ret);
02083         return (ret);
02084 }

static int __bam_get_prev DBC dbc  )  [static]
 

Definition at line 1001 of file bt_cursor.c.

References __bam_c_prev(), __bam_isopd(), __db_c_newopd(), data, DB_LAST, key, and NULL.

Referenced by __bam_bulk().

01003 {
01004         BTREE_CURSOR *cp;
01005         DBT key, data;
01006         db_pgno_t pgno;
01007         int ret;
01008 
01009         if ((ret = __bam_c_prev(dbc)) != 0)
01010                 return (ret);
01011 
01012         if (__bam_isopd(dbc, &pgno)) {
01013                 cp = (BTREE_CURSOR *)dbc->internal;
01014                 if ((ret = __db_c_newopd(dbc, pgno, cp->opd, &cp->opd)) != 0)
01015                         return (ret);
01016                 if ((ret = cp->opd->c_am_get(cp->opd,
01017                     &key, &data, DB_LAST, NULL)) != 0)
01018                         return (ret);
01019         }
01020 
01021         return (0);
01022 }

static int __bam_getboth_finddatum DBC dbc,
DBT data,
u_int32_t  flags
[static]
 

Definition at line 1659 of file bt_cursor.c.

References __bam_cmp(), __bam_defcmp(), cmp, DB_GET_BOTH, DB_GET_BOTH_RANGE, DB_NOTFOUND, dbp, IS_CUR_DELETED, IS_DUPLICATE, NULL, NUM_ENT, O_INDX, P_INDX, and top.

Referenced by __bam_c_get(), and __bam_getbothc().

01663 {
01664         BTREE_CURSOR *cp;
01665         DB *dbp;
01666         db_indx_t base, lim, top;
01667         int cmp, ret;
01668 
01669         dbp = dbc->dbp;
01670         cp = (BTREE_CURSOR *)dbc->internal;
01671 
01672         /*
01673          * Called (sometimes indirectly) from DBC->get to search on-page data
01674          * item(s) for a matching value.  If the original flag was DB_GET_BOTH
01675          * or DB_GET_BOTH_RANGE, the cursor is set to the first undeleted data
01676          * item for the key.  If the original flag was DB_GET_BOTHC, the cursor
01677          * argument is set to the first data item we can potentially return.
01678          * In both cases, there may or may not be additional duplicate data
01679          * items to search.
01680          *
01681          * If the duplicates are not sorted, do a linear search.
01682          */
01683         if (dbp->dup_compare == NULL) {
01684                 for (;; cp->indx += P_INDX) {
01685                         if (!IS_CUR_DELETED(dbc) &&
01686                             (ret = __bam_cmp(dbp, data, cp->page,
01687                             cp->indx + O_INDX, __bam_defcmp, &cmp)) != 0)
01688                                 return (ret);
01689                         if (cmp == 0)
01690                                 return (0);
01691 
01692                         if (cp->indx + P_INDX >= NUM_ENT(cp->page) ||
01693                             !IS_DUPLICATE(dbc, cp->indx, cp->indx + P_INDX))
01694                                 break;
01695                 }
01696                 return (DB_NOTFOUND);
01697         }
01698 
01699         /*
01700          * If the duplicates are sorted, do a binary search.  The reason for
01701          * this is that large pages and small key/data pairs result in large
01702          * numbers of on-page duplicates before they get pushed off-page.
01703          *
01704          * Find the top and bottom of the duplicate set.  Binary search
01705          * requires at least two items, don't loop if there's only one.
01706          */
01707         for (base = top = cp->indx; top < NUM_ENT(cp->page); top += P_INDX)
01708                 if (!IS_DUPLICATE(dbc, cp->indx, top))
01709                         break;
01710         if (base == (top - P_INDX)) {
01711                 if  ((ret = __bam_cmp(dbp, data,
01712                     cp->page, cp->indx + O_INDX, dbp->dup_compare, &cmp)) != 0)
01713                         return (ret);
01714                 return (cmp == 0 ||
01715                     (cmp < 0 && flags == DB_GET_BOTH_RANGE) ? 0 : DB_NOTFOUND);
01716         }
01717 
01718         for (lim = (top - base) / (db_indx_t)P_INDX; lim != 0; lim >>= 1) {
01719                 cp->indx = base + ((lim >> 1) * P_INDX);
01720                 if ((ret = __bam_cmp(dbp, data, cp->page,
01721                     cp->indx + O_INDX, dbp->dup_compare, &cmp)) != 0)
01722                         return (ret);
01723                 if (cmp == 0) {
01724                         /*
01725                          * XXX
01726                          * No duplicate duplicates in sorted duplicate sets,
01727                          * so there can be only one.
01728                          */
01729                         if (!IS_CUR_DELETED(dbc))
01730                                 return (0);
01731                         break;
01732                 }
01733                 if (cmp > 0) {
01734                         base = cp->indx + P_INDX;
01735                         --lim;
01736                 }
01737         }
01738 
01739         /* No match found; if we're looking for an exact match, we're done. */
01740         if (flags == DB_GET_BOTH)
01741                 return (DB_NOTFOUND);
01742 
01743         /*
01744          * Base is the smallest index greater than the data item, may be zero
01745          * or a last + O_INDX index, and may be deleted.  Find an undeleted
01746          * item.
01747          */
01748         cp->indx = base;
01749         while (cp->indx < top && IS_CUR_DELETED(dbc))
01750                 cp->indx += P_INDX;
01751         return (cp->indx < top ? 0 : DB_NOTFOUND);
01752 }

static int __bam_getbothc DBC dbc,
DBT data
[static]
 

Definition at line 1588 of file bt_cursor.c.

References __bam_c_search(), __bam_cmp(), __bam_defcmp(), __bam_getboth_finddatum(), cmp, DB_GET_BOTH, DB_NOTFOUND, DBC_OPD, dbp, F_ISSET, IS_DUPLICATE, __db::mpf, mpf, NULL, NUM_ENT, P_INDX, and PGNO_INVALID.

Referenced by __bam_c_get().

01591 {
01592         BTREE_CURSOR *cp;
01593         DB *dbp;
01594         DB_MPOOLFILE *mpf;
01595         int cmp, exact, ret;
01596 
01597         dbp = dbc->dbp;
01598         mpf = dbp->mpf;
01599         cp = (BTREE_CURSOR *)dbc->internal;
01600 
01601         /*
01602          * Acquire the current page.  We have at least a read-lock
01603          * already.  The caller may have set DB_RMW asking for a
01604          * write lock, but upgrading to a write lock has no better
01605          * chance of succeeding now instead of later, so don't try.
01606          */
01607         if ((ret = mpf->get(mpf, &cp->pgno, 0, &cp->page)) != 0)
01608                 return (ret);
01609 
01610         /*
01611          * An off-page duplicate cursor.  Search the remaining duplicates
01612          * for one which matches (do a normal btree search, then verify
01613          * that the retrieved record is greater than the original one).
01614          */
01615         if (F_ISSET(dbc, DBC_OPD)) {
01616                 /*
01617                  * Check to make sure the desired item comes strictly after
01618                  * the current position;  if it doesn't, return DB_NOTFOUND.
01619                  */
01620                 if ((ret = __bam_cmp(dbp, data, cp->page, cp->indx,
01621                     dbp->dup_compare == NULL ? __bam_defcmp : dbp->dup_compare,
01622                     &cmp)) != 0)
01623                         return (ret);
01624 
01625                 if (cmp <= 0)
01626                         return (DB_NOTFOUND);
01627 
01628                 /* Discard the current page, we're going to do a full search. */
01629                 if ((ret = mpf->put(mpf, cp->page, 0)) != 0)
01630                         return (ret);
01631                 cp->page = NULL;
01632 
01633                 return (__bam_c_search(dbc,
01634                     PGNO_INVALID, data, DB_GET_BOTH, &exact));
01635         }
01636 
01637         /*
01638          * We're doing a DBC->c_get(DB_GET_BOTHC) and we're already searching
01639          * a set of on-page duplicates (either sorted or unsorted).  Continue
01640          * a linear search from after the current position.
01641          *
01642          * (Note that we could have just finished a "set" of one duplicate,
01643          * i.e. not a duplicate at all, but the following check will always
01644          * return DB_NOTFOUND in this case, which is the desired behavior.)
01645          */
01646         if (cp->indx + P_INDX >= NUM_ENT(cp->page) ||
01647             !IS_DUPLICATE(dbc, cp->indx, cp->indx + P_INDX))
01648                 return (DB_NOTFOUND);
01649         cp->indx += P_INDX;
01650 
01651         return (__bam_getboth_finddatum(dbc, data, DB_GET_BOTH));
01652 }

static int __bam_isopd DBC dbc,
db_pgno_t pgnop
[static]
 

Definition at line 2778 of file bt_cursor.c.

References B_DUPLICATE, B_TYPE, GET_BOVERFLOW, O_INDX, P_LBTREE, _boverflow::pgno, _boverflow::type, and TYPE.

Referenced by __bam_c_get(), __bam_c_put(), and __bam_get_prev().

02781 {
02782         BOVERFLOW *bo;
02783 
02784         if (TYPE(dbc->internal->page) != P_LBTREE)
02785                 return (0);
02786 
02787         bo = GET_BOVERFLOW(dbc->dbp,
02788             dbc->internal->page, dbc->internal->indx + O_INDX);
02789         if (B_TYPE(bo->type) == B_DUPLICATE) {
02790                 *pgnop = bo->pgno;
02791                 return (1);
02792         }
02793         return (0);
02794 }

static int __bam_getboth_finddatum __P (DBC *, DBT *, u_int32_t  )  [static]
 


Variable Documentation

const char revid[] = "$Id: bt_cursor.c,v 11.147 2002/08/13 20:46:07 ubell Exp $" [static]
 

Definition at line 11 of file bt_cursor.c.


Generated on Wed Jul 20 21:06:33 2005 for MySQL 5.0.9 Beta by  doxygen 1.4.3