ibis::bin Class Reference

The equality encoded bitmap index with binning. More...

#include <ibin.h>

Inheritance diagram for ibis::bin:

ibis::index ibis::ambit ibis::bak ibis::bak2 ibis::egale ibis::fuge ibis::mesa ibis::pack ibis::pale ibis::range ibis::zone

List of all members.

Public Types

typedef std::map< double,
granule * > 
granuleMap

Public Member Functions

long append (const array_t< uint32_t > &ind)
 Append a list of integers representing bin numbers.
long append (const ibis::bin &tail)
 Append the tail to this index.
virtual long append (const char *dt, const char *df, uint32_t nnew)
 Extend the index.
 bin (const ibis::column *c, const char *f, const std::vector< double > &bd)
 bin (const ibis::column *c, const char *f, const array_t< double > &bd)
 bin (const ibis::column *c, ibis::fileManager::storage *st, uint32_t offset=8)
 bin (const ibis::column *c=0, const char *f=0)
 Construct a bitmap index from current data.
 bin (const ibis::bin &rhs)
virtual void binBoundaries (std::vector< double > &) const
 The function binBoundaries and binWeights return bin boundaries and counts of each bin respectively.
virtual void binWeights (std::vector< uint32_t > &) const
long checkBin (const ibis::qRange &cmp, uint32_t jbin, const ibis::bitvector &mask, ibis::bitvector &res) const
 Candidate check using the binned values.
long checkBin (const ibis::qRange &cmp, uint32_t jbin, ibis::bitvector &res) const
 Candidate check using the binned values.
virtual int contractRange (ibis::qContinuousRange &rng) const
virtual int64_t estimate (const ibis::bin &idx2, const ibis::rangeJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2) const
virtual int64_t estimate (const ibis::bin &idx2, const ibis::rangeJoin &expr, const ibis::bitvector &mask) const
virtual int64_t estimate (const ibis::bin &idx2, const ibis::rangeJoin &expr) const
virtual void estimate (const ibis::bin &idx2, const ibis::rangeJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const
virtual void estimate (const ibis::bin &idx2, const ibis::rangeJoin &expr, const ibis::bitvector &mask, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const
virtual void estimate (const ibis::bin &idx2, const ibis::rangeJoin &expr, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const
 Estimate the number of hits for nonsymmetric joins.
virtual int64_t estimate (const ibis::rangeJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2) const
virtual void estimate (const ibis::rangeJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const
 Evaluating a join condition with one (likely composite) index.
virtual void estimate (const ibis::rangeJoin &expr, const ibis::bitvector &mask, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const
virtual void estimate (const ibis::rangeJoin &expr, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const
 Estimate the hits for symmetric joins.
virtual uint32_t estimate (const ibis::qContinuousRange &expr) const
 Returns an upper bound on the number of hits.
virtual void estimate (const ibis::qContinuousRange &expr, ibis::bitvector &lower, ibis::bitvector &upper) const
 Computes an approximation of hits as a pair of lower and upper bounds.
virtual double estimateCost (const ibis::qDiscreteRange &expr) const
virtual double estimateCost (const ibis::qContinuousRange &expr) const
 Estimate the code of evaluate a range condition.
virtual long evaluate (const ibis::qContinuousRange &expr, ibis::bitvector &hits) const
 To evaluate the exact hits.
virtual int expandRange (ibis::qContinuousRange &rng) const
 The functions expandRange and contractRange expands or contracts the boundaries of a range condition so that the new range will have exact answers using the function estimate.
virtual long getCumulativeDistribution (std::vector< double > &bds, std::vector< uint32_t > &cts) const
 Cumulative distribution of the data.
virtual long getDistribution (std::vector< double > &bbs, std::vector< uint32_t > &cts) const
 Binned distribution of the data.
virtual double getMax () const
 The maximum value recorded in the index.
virtual double getMin () const
 The minimum value recorded in the index.
virtual double getSum () const
 Compute the approximate sum of all the values indexed.
array_t< uint32_t > * indices (const ibis::bitvector &mask) const
virtual const char * name () const
 Returns the name of the index, similar to the function type, but returns a string instead.
virtual uint32_t numBins () const
virtual void print (std::ostream &out) const
 Prints human readable information.
int read (int fdes, uint32_t offset, const char *fname)
 Read an ibis::bin embedded inside a file.
virtual int read (ibis::fileManager::storage *st)
 Reconstructs an index from an array of bytes.
virtual int read (const char *idxfile)
 Reconstructs an index from the named file.
virtual void speedTest (std::ostream &out) const
 Time some logical operations and print out their speed.
virtual INDEX_TYPE type () const
 Returns an index type identifier.
virtual float undecidable (const ibis::qContinuousRange &expr, ibis::bitvector &iffy) const
 Mark the position of the rows that can not be decided with this index.
virtual int write (const char *dt) const
 Save index to a file.

Protected Member Functions

void addBounds (double lbd, double rbd, uint32_t nbins, uint32_t eqw)
virtual void adjustLength (uint32_t nrows)
 bin (const ibis::column *c, const uint32_t nbits, ibis::fileManager::storage *st, uint32_t offset=8)
 A constructor to handle the common portion of multicomponent encodings.
template<typename E>
void binning (const array_t< E > &varr, const array_t< double > &bd)
template<typename E>
void binning (const array_t< E > &varr)
void binning (const char *f)
 Read the data file and partition the values into bins according to the specified bin boundary.
void binning (const char *f, const array_t< double > &bd)
void binning (const char *f, const std::vector< double > &bd)
 Generate bins according to the specified boundaries.
template<typename E>
void binningT (const char *fname)
 Read the data file, partition the values, and write out the bin ordered data with .bin suffix.
long binOrder (const char *fname) const
template<typename E>
long binOrderT (const char *fname) const
 Write bin-ordered values.
template<typename E>
long checkBin0 (const ibis::qRange &cmp, uint32_t jbin, ibis::bitvector &res) const
template<typename E>
long checkBin1 (const ibis::qRange &cmp, uint32_t jbin, const ibis::bitvector &mask, ibis::bitvector &res) const
virtual void clear ()
 Clear the existing content.
virtual double computeSum () const
template<typename E>
void construct (const array_t< E > &varr)
void convertGranules (granuleMap &gmap)
void divideBitmaps (const std::vector< ibis::bitvector * > &bms, std::vector< unsigned > &parts) const
virtual void locate (const ibis::qContinuousRange &expr, uint32_t &cand0, uint32_t &cand1, uint32_t &hit0, uint32_t &hit1) const
virtual void locate (const ibis::qContinuousRange &expr, uint32_t &cand0, uint32_t &cand1) const
virtual uint32_t locate (const double &val) const
template<typename E>
void mapGranules (const array_t< E > &, granuleMap &gmap) const
uint32_t parseNbins () const
unsigned parsePrec () const
unsigned parseScale () const
void printGranules (std::ostream &out, const granuleMap &gmap) const
void readBinBoundaries (const char *name, uint32_t nb)
void scanAndPartition (const char *, unsigned, uint32_t nbins=0)
 The optional argument nbins can either be set outside or set to be the return value of function parseNbins.
template<typename E>
void scanAndPartition (const array_t< E > &, unsigned)
template<typename E>
void setBoundaries (const array_t< E > &varr)
void setBoundaries (array_t< double > &bnds, const ibis::bin &idx1, const array_t< uint32_t > cnt1, const array_t< uint32_t > cnt0) const
void setBoundaries (array_t< double > &bnds, const ibis::bin &bin0) const
void setBoundaries (const char *f)
 Set bin boundaries.
void swap (bin &rhs)
int write (int fptr) const

Protected Attributes

array_t< double > bounds
 The nominal boundaries.
array_t< double > maxval
 The maximal values in each bin.
array_t< double > minval
 The minimal values in each bin.
uint32_t nobs
 Number of bitvectors.

Friends

class ibis::ambit
class ibis::band
class ibis::mesa
class ibis::mesh
class ibis::pack
class ibis::pale
class ibis::range
class ibis::zone

Classes

struct  granule
 A data structure to assist the mapping of values to lower precisions. More...


Detailed Description

The equality encoded bitmap index with binning.

The exact bin boundary assignment is controlled by indexing options '<binning ... />'.

The 0th bit vector represents x < bounds[0]; The (nobs-1)st bit vector represents x >= bounds[nobs-2]; The ith bit vector represents bounds[i-1] <= x < bounds[i], (0 < i < nbos-1).


Member Function Documentation

void ibis::bin::binBoundaries ( std::vector< double > &   )  const [virtual]

The function binBoundaries and binWeights return bin boundaries and counts of each bin respectively.

Reimplemented from ibis::index.

Reimplemented in ibis::range, ibis::mesa, ibis::ambit, ibis::pale, ibis::pack, ibis::zone, ibis::egale, ibis::bak, and ibis::bak2.

Referenced by getCumulativeDistribution(), and getDistribution().

void ibis::bin::binning ( const char *  f  )  [protected]

void ibis::bin::binning ( const char *  f,
const std::vector< double > &  bd 
) [protected]

Generate bins according to the specified boundaries.

This version of the binning function takes an external specified bin boundaries -- if the array is too small to be valid, it uses the default option.

Note:
This function does not attempt to clear the content of the current data structure, the caller is responsible for this task!

References array_t< T >::back(), bounds, ibis::BYTE, ibis::index::col, ibis::DOUBLE, ibis::FLOAT, ibis::INT, ibis::LONG, ibis::column::name(), ibis::part::nRows(), ibis::index::nrows, ibis::column::partition(), array_t< T >::push_back(), array_t< T >::resize(), setBoundaries(), ibis::SHORT, array_t< T >::size(), ibis::column::type(), ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.

Referenced by ibis::moins::append(), ibis::entre::append(), ibis::egale::append(), append(), and bin().

template<typename E>
void ibis::bin::binningT ( const char *  fname  )  [inline, protected]

long ibis::bin::checkBin ( const ibis::qRange cmp,
uint32_t  jbin,
const ibis::bitvector mask,
ibis::bitvector res 
) const

long ibis::bin::checkBin ( const ibis::qRange cmp,
uint32_t  jbin,
ibis::bitvector res 
) const

void ibis::bin::clear (  )  [protected, virtual]

void ibis::bin::estimate ( const ibis::rangeJoin expr,
ibis::bitvector64 lower,
ibis::bitvector64 upper 
) const [virtual]

Estimate the hits for symmetric joins.

Evaluate the range join condition using the ibis::bin index.

Record the definite hits in lower, and all possible hits in upper. NOTE: upper includes all entries in lower.

References ibis::index::activate(), ibis::bitvector64::clear(), ibis::bitvector64::cnt(), ibis::index::col, ibis::horometer::CPUTime(), ibis::compRange::term::eval(), ibis::rangeJoin::getRange(), ibis::gVerbose, ibis::horometer::realTime(), ibis::bitvector64::set(), ibis::bitvector64::size(), ibis::horometer::start(), ibis::horometer::stop(), and ibis::compRange::term::termType().

void ibis::bin::estimate ( const ibis::qContinuousRange expr,
ibis::bitvector lower,
ibis::bitvector upper 
) const [virtual]

Computes an approximation of hits as a pair of lower and upper bounds.

Parameters:
expr the query expression to be evaluated.
lower a bitvector marking a subset of the hits. All rows marked with one (1) are definitely hits.
upper a bitvector marking a superset of the hits. All hits are marked with one, but some of the rows marked one may not be hits. If the variable upper is empty, the variable lower is assumed to contain the exact answer.

Reimplemented from ibis::index.

Reimplemented in ibis::range, ibis::mesa, ibis::ambit, ibis::pale, ibis::pack, ibis::zone, ibis::fuge, ibis::egale, ibis::moins, and ibis::entre.

References ibis::index::activate(), ibis::index::bits, ibis::bitvector::clear(), ibis::bitvector::copy(), ibis::gVerbose, ibis::bitvector::set(), and ibis::index::sumBits().

Referenced by estimate().

long ibis::bin::evaluate ( const ibis::qContinuousRange expr,
ibis::bitvector hits 
) const [virtual]

int ibis::bin::expandRange ( ibis::qContinuousRange rng  )  const [virtual]

The functions expandRange and contractRange expands or contracts the boundaries of a range condition so that the new range will have exact answers using the function estimate.

The default implementation provided does nothing since this is only meaningful for indices based on bins.

Reimplemented from ibis::index.

Reimplemented in ibis::range, ibis::bak, and ibis::bak2.

References ibis::qContinuousRange::leftBound(), ibis::qContinuousRange::leftOperator(), minval, ibis::qContinuousRange::rightBound(), ibis::qContinuousRange::rightOperator(), and array_t< T >::size().

long ibis::bin::getCumulativeDistribution ( std::vector< double > &  bds,
std::vector< uint32_t > &  cts 
) const [virtual]

Cumulative distribution of the data.

A brute-force approach to get an accurate cumulative distribution.

Reimplemented from ibis::index.

References binBoundaries(), ibis::util::logger::buffer(), ibis::index::col, ibis::column::logMessage(), and ibis::column::upperBound().

long ibis::bin::getDistribution ( std::vector< double > &  bbs,
std::vector< uint32_t > &  cts 
) const [virtual]

Binned distribution of the data.

A brute-force approach to get an accurate distribution.

Reimplemented from ibis::index.

References binBoundaries(), ibis::util::logger::buffer(), ibis::index::col, and ibis::column::logMessage().

double ibis::bin::getSum (  )  const [virtual]

Compute the approximate sum of all the values indexed.

If it decides that computing the sum directly from the vertical partition is more efficient, it will return NaN immediately.

Reimplemented from ibis::index.

Reimplemented in ibis::range, ibis::mesa, ibis::ambit, ibis::pack, ibis::egale, ibis::moins, and ibis::entre.

References ibis::index::col, ibis::column::elementSize(), ibis::part::nRows(), and ibis::column::partition().

virtual const char* ibis::bin::name (  )  const [inline, virtual]

Returns the name of the index, similar to the function type, but returns a string instead.

Implements ibis::index.

Reimplemented in ibis::range, ibis::mesa, ibis::ambit, ibis::pale, ibis::pack, ibis::zone, ibis::fuge, ibis::egale, ibis::moins, ibis::entre, ibis::bak, and ibis::bak2.

Referenced by binningT(), binOrderT(), and read().

void ibis::bin::print ( std::ostream &  out  )  const [virtual]

Prints human readable information.

Outputs information about the index as text to the specified output stream.

Implements ibis::index.

Reimplemented in ibis::range, ibis::mesa, ibis::ambit, ibis::pale, ibis::pack, ibis::zone, ibis::fuge, ibis::egale, ibis::moins, ibis::entre, ibis::bak, and ibis::bak2.

References ibis::index::bits, ibis::index::col, ibis::gVerbose, maxval, minval, ibis::column::name(), ibis::part::name(), and ibis::column::partition().

Referenced by append(), and bin().

int ibis::bin::read ( int  fdes,
uint32_t  start,
const char *  fn 
)

int ibis::bin::read ( ibis::fileManager::storage st  )  [virtual]

int ibis::bin::read ( const char *  name  )  [virtual]

void ibis::bin::scanAndPartition ( const char *  f,
unsigned  eqw,
uint32_t  nbins = 0 
) [protected]

void ibis::bin::setBoundaries ( const char *  f  )  [protected]

Set bin boundaries.

Parse the index specification to determine the bin boundaries and store the result in member variable bounds.

The bin specification can be of the following, where all fields are optional.

  •  equal([_-]?)[weight|length|ratio]) 
    
  •  no=xxx|nbins=xxx|bins:(\[begin, end, no=xxx\))+ 
    
  •  <binning (start=begin end=end nbins=xxx scale=[linear|log])* /> 
    
  •  <binning binFile=file-name[, nbins=xxx] /> 
    

The bin speficication can be read from the column object, the table object containing the column, or the global ibis::gParameters object under the name of table-name.column-name.index. If no index specification is found, it builts approximate equal weight bins.

Note:
If equal weight is specified, it take precedence over all other specification.

References ibis::util::logger::buffer(), ibis::index::col, ibis::DOUBLE, ibis::FLOAT, ibis::gVerbose, ibis::column::indexSpec(), ibis::column::logMessage(), ibis::column::logWarning(), ibis::column::lowerBound(), ibis::column::type(), and ibis::column::upperBound().

Referenced by ibis::moins::append(), ibis::entre::append(), ibis::egale::append(), append(), bin(), and binning().

float ibis::bin::undecidable ( const ibis::qContinuousRange expr,
ibis::bitvector iffy 
) const [virtual]

Mark the position of the rows that can not be decided with this index.

Parameters:
expr the range conditions to be evaluated.
iffy the bitvector marking the positions of rows that can not be decided using the index. Return value is the expected fraction of undecided rows that might satisfy the range conditions.

Reimplemented from ibis::index.

Reimplemented in ibis::range, ibis::mesa, ibis::ambit, ibis::pale, ibis::pack, ibis::zone, and ibis::egale.

References ibis::index::activate(), ibis::index::bits, ibis::bitvector::copy(), ibis::qContinuousRange::leftBound(), minval, ibis::qContinuousRange::rightBound(), and ibis::bitvector::set().

int ibis::bin::write ( const char *  name  )  const [virtual]

Save index to a file.

Outputs the index in a compact binary format to the named file or directory. The index file contains a header that can be identified by the function isIndex.

Implements ibis::index.

Reimplemented in ibis::range, ibis::mesa, ibis::ambit, ibis::pale, ibis::pack, ibis::zone, ibis::fuge, ibis::egale, ibis::moins, ibis::entre, ibis::bak, and ibis::bak2.

References ibis::index::activate(), array_t< T >::back(), array_t< T >::begin(), ibis::index::BINNING, ibis::index::bits, ibis::index::col, ibis::index::fname, ibis::gVerbose, ibis::index::indexFileName(), ibis::column::logMessage(), ibis::column::logWarning(), maxval, and minval.

Referenced by append(), and ibis::fuge::write().


The documentation for this class was generated from the following files:
Make It A Bit Faster
Disclaimers
FastBit source code
FastBit mailing list archive
Maintainer of this page