说明:
我实现了下面的类LabSetInt64(见下面的代码)。
这样做的目的是操纵大集大整数的(高达10M的值)尽可能快。 我的主要需求集中在:
- !关键:获取一个集合的大小/基数尽可能快
- !重要:如果能够通过一套非常快速迭代
因此,从下起执行,但仍存在两个点,我想和你商量。
该“popcount()”懒实施
int LabSetInt64::popcount(uint64_t x) {
int count;
for (count=0; x; count++)
x &= x-1;
return count;
}
我知道我选的实施是在市场上最慢的一个,但我无法找到我自己一个更好的。 所以,如果你可以点我到一个更好的(64位,当然)...
我听说了一个非常有效的算法计算基数:在“MIT HAKMEM 169”算法>>
// MIT HAKMEM.
// MIT HAKMEM Count is funky. Consider a 3 bit number as being 4a+2b+c. If we
// shift it right 1 bit, we have 2a+b. Subtracting this from the original gives
// 2a+b+c. If we right-shift the original 3-bit number by two bits, we get a,
// and so with another subtraction we have a+b+c, which is the number of bits
// in the original number. How is this insight employed? The first assignment
// statement in the routine computes tmp. Consider the octal representation of
// tmp. Each digit in the octal representation is simply the number of 1¡äs in
// the corresponding three bit positions in n. The last return statement sums
// these octal digits to produce the final answer. The key idea is to add
// adjacent pairs of octal digits together and then compute the remainder
// modulus 63. This is accomplished by right-shifting tmp by three bits, adding
// it to tmp itself and ANDing with a suitable mask. This yields a number in
// which groups of six adjacent bits (starting from the LSB) contain the number
// of 1¡äs among those six positions in n. This number modulo 63 yields the
// final answer. For 64-bit numbers, we would have to add triples of octal
// digits and use modulus 1023. This is HACKMEM 169, as used in X11 sources.
// Source: MIT AI Lab memo, late 1970¡äs.
// http://gurmeet.net/2008/08/05/fast-bit-counting-routines/
int CheckMIT(unsigned int n)
{
/* works for 32-bit numbers only */
/* fix last line for 64-bit numbers */
register unsigned int tmp;
tmp = n - ((n >> 1) & 033333333333)
- ((n >> 2) & 011111111111);
// the remainder of 63 for i equals the sum of 7 octal numbers which
// consititutes the 32-bit integer.
// For example, 03456 % 63 == 034 + 056
return ((tmp + (tmp >> 3)) & 030707070707) % 63;
}
我上面的算法问题是,我不明白它足以把它变成“uint64_t中”(64位)版本(尽管一些指令去做)。
所以,如果你们中的一个可以帮助我完成这项任务(或至少帮助我理解),这将是真棒。
这里是LabSetInt64.h:
/*
* LabSetInt64.h
*
* Created on: Feb 20, 2013
* Author: golgauth
*/
#ifndef LABSETINT64_H_
#define LABSETINT64_H_
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include <iostream>
#include <stdint.h>
#include <limits>
#include <algorithm>
#include <LabTimeUtils.h>
using namespace std;
namespace elps {
// Lets assume we need at most 1M distinct indices in our sets
#define DEFAULT_SIZE_IN_BITS 1000000
class LabSetInt64
{
public:
LabSetInt64();
LabSetInt64(int sizeinbits);
LabSetInt64(const LabSetInt64 &);
~LabSetInt64();
void Clear();
void Resize(int sizeinbits);
void Set(int i);
void UnSet(int i);
void Set(int i, bool b);
bool Find(int i);
int Cardinality();
int NextSetBit(int i);
void Print();
/**
* Should have been "=" operator, but this overload is not compatible
* with Cython, so we'll use "<<" instead.
* @param
* @return
*/
const LabSetInt64 & operator << ( const LabSetInt64 & );
LabSetInt64 operator ~ () const;
LabSetInt64 operator + ( const LabSetInt64 & );
LabSetInt64 operator * ( const LabSetInt64 & );
LabSetInt64 operator - ( const LabSetInt64 & );
LabSetInt64 operator ^ ( const LabSetInt64 & );
private:
uint64_t *data;
int data_len;
int bits_size;
int popcount(uint64_t x);
int nb_trailing_zeros(uint64_t v);
};
} /* namespace elps */
#endif /* LABSETINT64_H_ */
这里的LabSetInt64.cpp:
/*
* LabSetInt64.cpp
*
* Created on: Feb 20, 2013
* Author: golgauth
*/
#include "LabSetInt64.h"
namespace elps {
/** PUBLICS **/
LabSetInt64::LabSetInt64() : LabSetInt64(DEFAULT_SIZE_IN_BITS) {
}
LabSetInt64::LabSetInt64(int sizeinbits) {
bits_size = sizeinbits;
data_len = (bits_size + 63) / 64;
data = new uint64_t[data_len];
}
LabSetInt64::LabSetInt64(const LabSetInt64& src) {
bits_size = src.bits_size;
data_len = src.data_len;
data = new uint64_t[data_len];
for( int i = 0; i < data_len; i++ ) // copy into this set
data[i] = src.data[i];
}
LabSetInt64::~LabSetInt64() {
}
void LabSetInt64::Clear() {
std::fill_n(data, data_len, 0);
}
void LabSetInt64::Resize(int sizeinbits) {
bits_size = sizeinbits + 1;
int size = (bits_size + 63) / 64;
uint64_t *new_data = new uint64_t[size];
memcpy( new_data, data, data_len * sizeof(uint64_t) );
data_len = size;
delete [] data;
data = new_data;
}
void LabSetInt64::Set(int i) {
data[i / 64] |= (1l << (i % 64));
}
void LabSetInt64::UnSet(int i) {
data[i / 64] &= ~(1l << (i % 64));
}
void LabSetInt64::Set(int i, bool b) {
if(b) Set(i); else UnSet(i);
}
bool LabSetInt64::Find(int i) {
return ((data[i / 64]) & (1l << (i % 64))); // binary AND;
}
int LabSetInt64::Cardinality() {
int sum = 0;
for(int i=0; i<data_len; i++)
sum += popcount(data[i]);
//sum += __builtin_popcount(data[i]);
return sum;
}
int LabSetInt64::NextSetBit(int i) {
int x = i / 64;
long w = data[x];
w >>= (i % 64);
if (w != 0) {
return i + nb_trailing_zeros(w);
}
++x;
for (; x < data_len; ++x) {
if (data[x] != 0) {
return x * 64 + nb_trailing_zeros(data[x]);
}
}
return -1;
}
void LabSetInt64::Print() {
int cur_id = this->NextSetBit(0);
cout << "\nSet size : " << this->Cardinality() << endl;
cout << "{ ";
while (cur_id != -1)
{
cout << (cur_id) << " ";
cur_id = this->NextSetBit(cur_id+1);
}
cout << "}" << endl;;
}
/** PRIVATES **/
//This is better when most bits in x are 0
//It uses 3 arithmetic operations and one comparison/branch per "1" bit in x.
int LabSetInt64::popcount(uint64_t x) {
int count;
for (count=0; x; count++)
x &= x-1;
return count;
}
// output: c will count v's trailing zero bits,
// so if v is 1101000 (base 2), then c will be 3
int LabSetInt64::nb_trailing_zeros(uint64_t v) {
int c;
if (v)
{
v = (v ^ (v - 1)) >> 1; // Set v's trailing 0s to 1s and zero rest
for (c = 0; v; c++) {
v >>= 1;
}
}
else
c = 8 * sizeof(v);
return c;
}
/** ***************************************************************************
********************************* OPERATORS *******************************
*******************************************************************************/
/** PUBLICS **/
/*******************************************************************************
* TODO >> May be better to use "DEFAULT_SIZE_IN_BITS" instead of "data_len" *
* in all the operators !!! *
* *
* => For now, we assume that all the Sets are created with the default *
* constructor (aka : bit_size = DEFAULT_SIZE_IN_BITS) *
*******************************************************************************/
/**
* "operator = " assigns the right hand side to this set.
* returns: nothing
*/
const LabSetInt64 &LabSetInt64::operator << ( const LabSetInt64 &rhs )
{
if( &rhs != this ) // avoid self assignment
{
this->Resize(rhs.bits_size - 1);
for( int i = 0; i < data_len; i++ ) // copy into this set
data[i] = rhs.data[i];
}
return *this; // enable x << y << z;
}
/**
* "operator ~ " performs set complement operation (not).
* returns: pointer to set
*/
LabSetInt64 LabSetInt64::operator ~ () const
{
LabSetInt64 rv;
for( int i = 0; i < data_len; i++ )
rv.data[i] = ~data[i]; // bitwise complement
return rv;
}
/**
* "operator + " performs set union operation (or).
* returns: pointer to set
*/
LabSetInt64 LabSetInt64::operator + ( const LabSetInt64 &rhs )
{
LabSetInt64 rv;
for( int i = 0; i < data_len; i++ )
rv.data[i] = data[i] | rhs.data[i]; // bitwise OR
return rv;
}
/**
* "operator * " performs set intersection operation (and).
* returns: pointer to set
*/
LabSetInt64 LabSetInt64::operator * ( const LabSetInt64 &rhs )
{
LabSetInt64 rv;
for( int i = 0; i < data_len; i++ )
rv.data[i] = data[i] & rhs.data[i]; // bitwise AND
return rv;
}
/**
* "operator - " performs set difference operation.
* returns: pointer to set
*/
LabSetInt64 LabSetInt64::operator - ( const LabSetInt64 &rhs )
{
LabSetInt64 rv;
for( int i = 0; i < data_len; i++ )
rv.data[i] = data[i] & ( ~rhs.data[i] ); // bitwise a AND ~b
return rv;
}
/**
* "operator ^ " performs set symmetric difference operation (xor).
* returns: pointer to set
*/
LabSetInt64 LabSetInt64::operator ^ ( const LabSetInt64 &rhs )
{
LabSetInt64 rv;
for( int i = 0; i < data_len; i++ )
rv.data[i] = data[i] ^ rhs.data[i]; // bitwise XOR
return rv;
}
/** *****************************************************************************
*********************************************************************************/
} /* namespace elps */
对于执行的其余部分,我对表演很满意,但再次,任何好的意见将非常非常欢迎和好评。
非常感谢您的时间。
编辑:好了,毕竟我不是完全由“疏”的解决方案,因为相信我所需要的并,交,差的特点,这会是这样,我想,有很多比我的“按位”版本更昂贵(需要遍历通过一个潜在的巨大的项目的量),所以我仍然有兴趣在获得关于如何将上面的“MIT HAKMEM”算法把64位的一些帮助。 非常感谢。
编辑:该版本包含一些小的瑕疵。 请在源代码更好看波纹管给出。
在他的网站上,拉斯考克斯介绍一个非常简单的整组数据结构具有O(n)的迭代(而不是O(U),其中U是一些最大)和O(1)插入,删除和计数。 它不使用位摆弄。 它也倾向于使用更多的空间,但是可以扩展(通过简单地使用具有O(1)摊销追加std::vector
而非纯阵列)。
(我会粘贴示例代码,但我认为考克斯的描述是不够明晰,我只希望引入错误。)
于是,经过在研究一些其他的库( BitMagic和一些C ++和Java实现),以及大量bechmarkings的:
(我拒绝了,因为需要快速结合运营商的“疏”选项)...
我去了一些有趣的改进。
主要的改进是,我现在保持在飞行的基数(意思是快速计算不再是一个大问题)。
改进:
- 迭代(这是我的第一个问题)仍然比(BitMagic)慢了一点,但确定。 我的表现与BitMagic的对比证实,我的课没有那么糟糕,毕竟...但BitMagic的目的主要是“节省内存压缩的使用”,并且比升压慢得多打交道时(我想避免在这里)大载体。
- 显著改善我们计算基数的方式。
更好的迭代:
inline int nb_trailing_zeros(uint64_t v) {
if (v & 1l) return 0; // Fast check if no trailing zeros
// Avoids a useless call to __builtin_ctzl()
else return __builtin_ctzl(v);
}
两个改进:使用gcc的内置函数用于计算尾随零,避免调用它当块开始以1显著改善性能的同时迭代。
更好的计数:
内置的使用sse4.2
popcount功能:
inline int popcount(uint64_t x) {
// int count;
// for (count=0; x; count++)
// x &= x-1;
// return count;
return _mm_popcnt_u64(x); //__builtin_popcount(x) is ok too
// Thanks to @cschwan for pointing me on this
}
“_mm_popcnt_u64” 需要额外的编译选项 “-m64 -msse4.2”(而不是 “__builtin_popcount”)
附加功能,如:
int LabSetInt64::CU ( const LabSetInt64 &rhs ) {
int sum = 0;
for(int i=0; i<data_len; i++)
sum += popcount(data[i] | rhs.data[i]);
return sum;
}
不是进行操作,然后计算速度更快。
“的Optim计数”选项( CNT_OPT
模式):看到新类版本波纹管
减慢一点点的组合操作(只有几个,没有真正的后顾之忧......)
更快的插入:
通过仅如果没有找到插入:
void LabSetInt64::Set(int i) {
int n = i / 64;
uint64_t shift1l = 1l << (i % 64);
if ( !(data[n] & shift1l) ) { // Not found
data[n] |= shift1l;
if (CNT_OPT) ++cardinality;
}
}
这是更快,首先检查该位已经设置,比在任何情况下,设置它。 (最零点,我们尝试插入,最让我们节省时间)。 演出中设置非集位的情况下仍然相当不变。
增加了一些更快的运算符:
const LabSetInt64 & LabSetInt64::operator += ( const LabSetInt64 & rhs) {
for( int i = 0; i < data_len; i++ )
data[i] = data[i] | rhs.data[i]; // bitwise OR
return *this;
}
A + = B现在比快得多A = A + B(节省要返回一个新的向量的实例化)
增加了一些方便的功能和运算符:看到新类版本波纹管
[ Header
]
/**
* If the set to 1, will be optimized for counting (cardinality updated on the fly).
* WARNING : Optimizing for counting will slow down a little the following features :
* - Binary operation : (~, +, *, -, ^)
*/
#define CNT_OPT 1
#define DEFAULT_SIZE_IN_BITS 1000000
//This is better when most bits in x are 0
inline unsigned int popcount(uint64_t x) {
// // It uses 3 arithmetic operations and one comparison/branch per "1" bit in x.
// int count;
// for (count=0; x; count++)
// x &= x-1;
// return count;
return _mm_popcnt_u64(x); //__builtin_popcountl(x); //
}
/**
* Counts trailing zeros starting from a given point
* @param v Input to count trailing zero bits
* @return c will count v's trailing zero bits,
so if v is 1101000 (base 2), then c will be 3
*/
inline int nb_trailing_zeros(uint64_t v) {
if (v & 1l) return 0; // Fast check if no trailing zeros
// Avoids a useless call to __builtin_ctzl()
else return __builtin_ctzl(v);
}
class LabSetInt64
{
public:
LabSetInt64();
/**
* Instantiates a bitset with a given maximum size.
* @param sizeinbits Maximum size of the population
*/
LabSetInt64(unsigned int sizeinbits);
LabSetInt64(const LabSetInt64 &);
virtual ~LabSetInt64();
void Clear();
void Resize(unsigned int sizeinbits);
/**
* WARNING : no check is perform whether i is in the correct range :
* i MUST be in [0, size_in_bits-1]
*/
void Set(int i);
void UnSet(int i);
// void Set(int i, bool b);
int Cardinality();
bool Find(int i);
void Print();
int GetFirst();
int ChooseOne();
int GetAt(int n);
const LabSetInt64 & operator = ( const LabSetInt64 & );
/**
* All the following operators assume that left and right operands
* have the same size "size_in_bits" (enough for our needs,
* since memory usage is not really a problem for us).
*/
LabSetInt64 operator ~ () const; /** Inversion **/
LabSetInt64 operator + ( const LabSetInt64 & ); /** Union **/
LabSetInt64 operator * ( const LabSetInt64 & ); /** Intersection **/
LabSetInt64 operator - ( const LabSetInt64 & ); /** Difference **/
LabSetInt64 operator ^ ( const LabSetInt64 & ); /** Symmetrical D. **/
/**
* Comparison.
*/
bool operator == ( const LabSetInt64 & ) const;
bool operator != ( const LabSetInt64 & ) const;
/**
* Convenient, moreover :
* (A += B) is actually a lot faster than (A = A + B)...
* [ No intermediary storage ]
*/
const LabSetInt64 & operator += ( const LabSetInt64 & );
const LabSetInt64 & operator *= ( const LabSetInt64 & );
const LabSetInt64 & operator -= ( const LabSetInt64 & );
const LabSetInt64 & operator ^= ( const LabSetInt64 & );
const LabSetInt64 & INV();
/**
* Gets the population count of the union.
* Faster than realizing the union, then calling Cardinality().
* [ No intermediary storage ]
*/
int CU ( const LabSetInt64 & );
int CI ( const LabSetInt64 & );
int CD ( const LabSetInt64 & );
int CSD ( const LabSetInt64 & );
/**
* Basic iterator facility.
*/
class iter {
public:
iter(LabSetInt64 & bv) { pos_ = -1; bv_ = &bv; }
void reset() { pos_ = -1; }
int end() { return -2; }
int next() { pos_ = bv_->next_set_bit(pos_); return pos_; }
private:
int pos_;
LabSetInt64 * bv_;
};
private:
uint64_t * data_;
unsigned int data_len_;
unsigned int size_in_bits_;
unsigned int cardinality_;
void init_bitset(unsigned int sizeinbits);
int calc_cardinality();
int next_set_bit(int i);
};
[ Implementation
]
LabSetInt64::LabSetInt64() {
init_bitset(DEFAULT_SIZE_IN_BITS);
}
LabSetInt64::LabSetInt64(unsigned int sizeinbits) {
init_bitset(sizeinbits);
}
void LabSetInt64::init_bitset(unsigned int sizeinbits) {
// +1 (the last higher weighted bit is 0 : allows to stop when iterating)
// See example in "Print()" function
size_in_bits_ = sizeinbits + 1;
data_len_ = (size_in_bits_ + 63) / 64;
data_ = new uint64_t[data_len_];
if (CNT_OPT) cardinality_ = 0;
Clear(); // Start filled with 0s. // Necessary...
}
LabSetInt64::LabSetInt64(const LabSetInt64& src) {
size_in_bits_ = src.size_in_bits_;
data_len_ = src.data_len_;
data_ = new uint64_t[data_len_];
memcpy( data_, src.data_, src.data_len_ * sizeof(uint64_t) );
cardinality_ = src.cardinality_;
}
LabSetInt64::~LabSetInt64() {
if (data_ != NULL) delete[] data_;
}
void LabSetInt64::Clear() {
std::fill_n(data_, data_len_, 0);
if (CNT_OPT) cardinality_ = 0;
}
void LabSetInt64::Resize(unsigned int sizeinbits) {
bool truncated;
unsigned int new_sizeinbits = sizeinbits + 1;
if (size_in_bits_ != new_sizeinbits)
{
truncated = (size_in_bits_ > new_sizeinbits);
size_in_bits_ = new_sizeinbits;
unsigned int new_size = (size_in_bits_ + 63) / 64;
if (new_size != data_len_) // Resize only if storage size changed
{
uint64_t *new_data = new uint64_t[new_size];
if (!truncated) // Clear : required only if extended
std::fill_n(new_data, new_size, 0);
memcpy( new_data, data_, std::min(data_len_, new_size) * sizeof(uint64_t) );
data_len_ = new_size;
delete [] data_;
data_ = new_data;
}
//--
if (truncated) {
// Clear the bits beyond the maximum size
unsigned int begin = sizeinbits;
unsigned int end = data_len_ * 64;
for ( unsigned int i = begin; i<end; ++i ) UnSet(i);
// Update cardinality
if (CNT_OPT) cardinality_ = calc_cardinality();
}
}
}
void LabSetInt64::Set(int i) {
int n = i / 64;
uint64_t shift1l = 1l << (i % 64);
if ( !(data_[n] & shift1l) ) { // Not found
data_[n] |= shift1l;
if (CNT_OPT) ++cardinality_;
}
}
void LabSetInt64::UnSet(int i) {
int n = i / 64;
uint64_t shift1l = 1l << (i % 64);
if ( data_[n] & shift1l ) { // Found
data_[n] &= ~shift1l;
if (CNT_OPT) --cardinality_;
}
}
//void LabSetInt64::Set(int i, bool b) {
// if(b) Set(i); else UnSet(i);
//}
bool LabSetInt64::Find(int i) {
return ((data_[i / 64]) & (1l << (i % 64)));
}
int LabSetInt64::Cardinality() {
if (CNT_OPT) return cardinality_;
else return calc_cardinality();
}
int LabSetInt64::calc_cardinality() {
unsigned int sum = 0;
for(unsigned int i=0; i<data_len_; i++)
sum += popcount(data_[i]);
//sum += bitcount64_4way(data[i], data[i+1], data[i+2], data[i+3]);
return sum;
}
int LabSetInt64::next_set_bit(int i) {
++i;
unsigned int x = i / 64;
uint64_t w = data_[x];
w >>= (i % 64);
if (w != 0) {
return i + nb_trailing_zeros(w);
}
++x;
for (; x < data_len_; ++x) {
if (data_[x] != 0) {
return x * 64 + nb_trailing_zeros(data_[x]);
}
}
return -2;
}
void LabSetInt64::Print() {
cout << "\nSet size : " << this->Cardinality() << endl;
cout << "{ ";
int cnt = 0;
iter it(*this);
for (int val = it.next(); val != it.end(); val = it.next())
{
cout << (val) << " ";
if ((++cnt) % 30 == 0) cout << endl;
}
cout << "}" << endl;
}
int LabSetInt64::GetFirst() {
return this->next_set_bit(-1);
}
int LabSetInt64::ChooseOne() {
// Get lucky...
int id = rand() % this->size_in_bits_;
// Otherwise...
if (!Find(id))
id = GetAt(rand() % this->Cardinality());
return id;
}
int LabSetInt64::GetAt(int n) {
int cnt = 0, val;
iter it(*this);
while ((val = it.next()) != it.end() && cnt++ != n) { }
return val;
}
// *************************************************
/*----------------------------------------------------------------------------*
** "operator = " assigns the right hand side to this set.
**
** returns: nothing
*/
const LabSetInt64 &LabSetInt64::operator = ( const LabSetInt64 &rhs )
{
if( &rhs != this ) // avoid self assignment
{
this->Resize(rhs.size_in_bits_ - 1);
memcpy( data_, rhs.data_, rhs.data_len_ * sizeof(uint64_t) );
if (CNT_OPT) cardinality_ = rhs.cardinality_;
}
return *this; // enable x = y = z;
}
/*----------------------------------------------------------------------------*
** "operator ~ " performs set complement operation (not).
**
** returns: pointer to set
*/
LabSetInt64 LabSetInt64::operator ~ () const
{
LabSetInt64 rv;
unsigned int i;
for( i = 0; i < data_len_; i++ ) {
rv.data_[i] = ~data_[i]; // bitwise complement
if (CNT_OPT) rv.cardinality_ += popcount(rv.data_[i]);
}
rv.size_in_bits_ = size_in_bits_;
// Clear the bits beyond the maximum size
unsigned int begin = rv.size_in_bits_ - 1;
unsigned int end = data_len_ * 64;
for ( i = begin; i<end; ++i ) rv.UnSet(i);
return rv;
}
/*----------------------------------------------------------------------------*
** "operator + " performs set union operation (or).
**
** returns: pointer to set
*/
LabSetInt64 LabSetInt64::operator + ( const LabSetInt64 &rhs )
{
LabSetInt64 rv;
for( unsigned int i = 0; i < data_len_; i++ ) {
rv.data_[i] = data_[i] | rhs.data_[i]; // bitwise OR
if (CNT_OPT) rv.cardinality_ += popcount(rv.data_[i]);
}
return rv;
}
/*----------------------------------------------------------------------------*
** "operator * " performs set intersection operation (and).
**
** returns: pointer to set
*/
LabSetInt64 LabSetInt64::operator * ( const LabSetInt64 &rhs )
{
LabSetInt64 rv;
for( unsigned int i = 0; i < data_len_; i++ ) {
rv.data_[i] = data_[i] & rhs.data_[i]; // bitwise AND
if (CNT_OPT) rv.cardinality_ += popcount(rv.data_[i]);
}
return rv;
}
/*----------------------------------------------------------------------------*
** "operator - " performs set difference operation.
**
** returns: pointer to set
*/
LabSetInt64 LabSetInt64::operator - ( const LabSetInt64 &rhs )
{
LabSetInt64 rv;
for( unsigned int i = 0; i < data_len_; i++ ) {
rv.data_[i] = data_[i] & ( ~rhs.data_[i] ); // bitwise a AND ~b
if (CNT_OPT) rv.cardinality_ += popcount(rv.data_[i]);
}
return rv;
}
/*----------------------------------------------------------------------------*
** "operator ^ " performs set symmetric difference operation (xor).
**
** returns: pointer to set
*/
LabSetInt64 LabSetInt64::operator ^ ( const LabSetInt64 &rhs )
{
LabSetInt64 rv;
for( unsigned int i = 0; i < data_len_; i++ ) {
rv.data_[i] = data_[i] ^ rhs.data_[i]; // bitwise XOR
if (CNT_OPT) rv.cardinality_ += popcount(rv.data_[i]);
}
return rv;
}
bool LabSetInt64::operator == ( const LabSetInt64 &rhs ) const {
if ((CNT_OPT) && (cardinality_ != rhs.cardinality_)) return false;
for( unsigned int i = 0; i < data_len_; i++ )
{
if (data_[i] != rhs.data_[i]) {
return false;
}
}
return true;
}
bool LabSetInt64::operator != ( const LabSetInt64 &rhs ) const {
return !(*this == rhs);
}
const LabSetInt64 & LabSetInt64::operator += ( const LabSetInt64 & rhs) {
if (CNT_OPT) cardinality_ = 0;
for( unsigned int i = 0; i < data_len_; i++ ) {
data_[i] = data_[i] | rhs.data_[i];
if (CNT_OPT) cardinality_ += popcount(data_[i]);
}
return *this;
}
const LabSetInt64 & LabSetInt64::operator *= ( const LabSetInt64 & rhs) {
if (CNT_OPT) cardinality_ = 0;
for( unsigned int i = 0; i < data_len_; i++ ) {
data_[i] = data_[i] & rhs.data_[i];
if (CNT_OPT) cardinality_ += popcount(data_[i]);
}
return *this;
}
const LabSetInt64 & LabSetInt64::operator -= ( const LabSetInt64 & rhs) {
if (CNT_OPT) cardinality_ = 0;
for( unsigned int i = 0; i < data_len_; i++ ) {
data_[i] = data_[i] & ( ~rhs.data_[i] );
if (CNT_OPT) cardinality_ += popcount(data_[i]);
}
return *this;
}
const LabSetInt64 & LabSetInt64::operator ^= ( const LabSetInt64 & rhs) {
if (CNT_OPT) cardinality_ = 0;
for( unsigned int i = 0; i < data_len_; i++ ) {
data_[i] = data_[i] ^ rhs.data_[i];
if (CNT_OPT) cardinality_ += popcount(data_[i]);
}
return *this;
}
// *************************************************
const LabSetInt64 & LabSetInt64::INV() {
unsigned int i;
if (CNT_OPT) cardinality_ = 0;
for( i = 0; i < data_len_; i++ ) {
data_[i] = ~data_[i];
if (CNT_OPT) cardinality_ += popcount(data_[i]);
}
// Clear the bits beyond the maximum size
unsigned int begin = size_in_bits_ - 1;
unsigned int end = data_len_ * 64;
for ( i = begin; i<end; ++i ) UnSet(i);
return *this;
}
// *************************************************
int LabSetInt64::CU ( const LabSetInt64 & rhs ) {
int sum = 0;
for( unsigned int i=0; i < data_len_; i++ )
sum += popcount(data_[i] | rhs.data_[i]);
return sum;
}
int LabSetInt64::CI ( const LabSetInt64 & rhs ) {
int sum = 0;
for( unsigned int i=0; i < data_len_; i++ )
sum += popcount(data_[i] & rhs.data_[i]);
return sum;
}
int LabSetInt64::CD ( const LabSetInt64 & rhs ) {
int sum = 0;
for( unsigned int i=0; i < data_len_; i++ )
sum += popcount(data_[i] & (~rhs.data_[i]));
return sum;
}
int LabSetInt64::CSD ( const LabSetInt64 & rhs ) {
int sum = 0;
for( unsigned int i=0; i < data_len_; i++ )
sum += popcount(data_[i] ^ rhs.data_[i]);
return sum;
}
注意可移植性:
一个有些体系结构(如Windows 7
+ MinGW
),其中long
是只有32位,取代在上面的代码中,所有出现:
-
1l
带1ll
-
__builtin_popcountl
与__builtin_popcountll
-
__builtin_ctzl
与__builtin_ctzll
这将确保你总是使用64位数字( long long
)。
好吧,我希望这是信息,它可以为一些人一个很好的起点。