From boost doc,
This results in near-optimal code generation; the performance of
BOOST_FOREACH is usually within a few percent of the equivalent
hand-coded loop.
I guess using macros and non standard typeof operator, we can generate exactly equivalent one. What feature of BOOST_FOREACH makes it not exact?
Edit:
My version:
#define EACH(it,v) \
for(typeof(v.begin()) it = v.begin();it != v.end(); ++it)
//use this if you want a const_iterator from a non-const container
#define CONST_EACH(it,v) \
typedef typeof(v) v_type; \
typedef const v_type& const_type; \
for(typeof(static_cast<const_type>(v).begin()) it = static_cast<const_type>(v).begin(); it != static_cast<const_type>(v).end(); ++it)
I am trying to write a version without any overhead. This uses non-standard typeof and gives iterator instead of value_type. Am I missing anything here?
Boost foreach is far from trivial. with gcc 4.6:
int main()
{
std::string hello( "Hello, world!" );
BOOST_FOREACH( char ch, hello )
{
std::cout << ch;
}
return 0;
}
generates a lot of cases probed with A?B:C
.
int main()
{
std::string hello( "Hello, world!" );
if (
boost::foreach_detail_::auto_any_t _foreach_col9 =
boost::foreach_detail_::contain( (hello) , (true ? 0 :
boost::foreach_detail_::or_(
boost::foreach_detail_::and_(
boost::foreach_detail_::not_(
boost::foreach_detail_::is_array_(hello)) , (true ? 0 :
boost::foreach_detail_::is_rvalue_( (true ?
boost::foreach_detail_::make_probe(hello) : (hello)), 0))) ,
boost::foreach_detail_::and_(
boost::foreach_detail_::not_(boost_foreach_is_noncopyable(
boost::foreach_detail_::to_ptr(hello) , boost_foreach_argument_dependent_lookup_hack_value)) , boost_foreach_is_lightweight_proxy(
boost::foreach_detail_::to_ptr(hello) , boost_foreach_argument_dependent_lookup_hack_value)))))) {} else if (
boost::foreach_detail_::auto_any_t _foreach_cur9 =
boost::foreach_detail_::begin( _foreach_col9 , (true ? 0 :
boost::foreach_detail_::encode_type(hello,
boost::foreach_detail_::is_const_(hello))) , (true ? 0 :
boost::foreach_detail_::or_(
boost::foreach_detail_::and_(
boost::foreach_detail_::not_(
boost::foreach_detail_::is_array_(hello)) , (true ? 0 :
boost::foreach_detail_::is_rvalue_( (true ?
boost::foreach_detail_::make_probe(hello) : (hello)), 0))) ,
boost::foreach_detail_::and_(
boost::foreach_detail_::not_(boost_foreach_is_noncopyable(
boost::foreach_detail_::to_ptr(hello) , boost_foreach_argument_dependent_lookup_hack_value)) , boost_foreach_is_lightweight_proxy(
boost::foreach_detail_::to_ptr(hello) , boost_foreach_argument_dependent_lookup_hack_value)))))) {} else if (
boost::foreach_detail_::auto_any_t _foreach_end9 =
boost::foreach_detail_::end( _foreach_col9 , (true ? 0 :
boost::foreach_detail_::encode_type(hello,
boost::foreach_detail_::is_const_(hello))) , (true ? 0 :
boost::foreach_detail_::or_(
boost::foreach_detail_::and_(
boost::foreach_detail_::not_(
boost::foreach_detail_::is_array_(hello)) , (true ? 0 :
boost::foreach_detail_::is_rvalue_( (true ?
boost::foreach_detail_::make_probe(hello) : (hello)), 0))) ,
boost::foreach_detail_::and_(
boost::foreach_detail_::not_(boost_foreach_is_noncopyable(
boost::foreach_detail_::to_ptr(hello) , boost_foreach_argument_dependent_lookup_hack_value)) , boost_foreach_is_lightweight_proxy(
boost::foreach_detail_::to_ptr(hello) , boost_foreach_argument_dependent_lookup_hack_value)))))) {} else for (bool _foreach_continue9 = true; _foreach_continue9 && !
boost::foreach_detail_::done( _foreach_cur9 , _foreach_end9 , (true ? 0 :
boost::foreach_detail_::encode_type(hello,
boost::foreach_detail_::is_const_(hello)))); _foreach_continue9 ?
boost::foreach_detail_::next( _foreach_cur9 , (true ? 0 :
boost::foreach_detail_::encode_type(hello,
boost::foreach_detail_::is_const_(hello)))) : (void)0) if (
boost::foreach_detail_::set_false(_foreach_continue9)) {} else for (char ch =
boost::foreach_detail_::deref( _foreach_cur9 , (true ? 0 :
boost::foreach_detail_::encode_type(hello,
boost::foreach_detail_::is_const_(hello)))); !_foreach_continue9; _foreach_continue9 = true)
{
std::cout << ch;
}
return 0;
}
There are so many possibly types of things you may want to loop over. With c++11 all these tricks are not required anymore, as you can loop over almost anything with
for(auto const &a: something){ .. }
or
for(auto a=begin(something);a!=end(something);++i){ .. }
Why not ask your favorite compiler ?
Let us use a simple test case (to avoid the clutter):
#include <cstring>
#include <cstdio>
#include <boost/foreach.hpp>
char const* HelloWorld = "Hello, world!\n";
void simplefor() {
for(char const* it = HelloWorld, *end = HelloWorld + strlen(HelloWorld);
it != end;
++it)
{
printf("%c", *it);
}
}
void foreach() {
BOOST_FOREACH( char ch, HelloWorld )
{
printf("%c", ch);
}
}
With these commands we retrieve the LLVM IR:
~/projects$ clang++ -O2 -c -I/usr/lib/Boost/1-39-0-1/include/ test.cpp -emit-llvm
~/projects$ llvm-dis test.o -show-annotations
Which gives for simple:
define void @_Z9simpleforv() nounwind uwtable {
%1 = load i8** @HelloWorld, align 8, !tbaa !0 ; [#uses=3 type=i8*]
%2 = tail call i64 @strlen(i8* %1) nounwind readonly ; [#uses=2 type=i64]
%3 = getelementptr inbounds i8* %1, i64 %2 ; [#uses=1 type=i8*]
%4 = icmp eq i64 %2, 0 ; [#uses=1 type=i1]
br i1 %4, label %._crit_edge, label %.lr.ph
.lr.ph: ; preds = %.lr.ph, %0
%it.01 = phi i8* [ %7, %.lr.ph ], [ %1, %0 ] ; [#uses=2 type=i8*]
%5 = load i8* %it.01, align 1, !tbaa !1 ; [#uses=1 type=i8]
%6 = sext i8 %5 to i32 ; [#uses=1 type=i32]
%putchar = tail call i32 @putchar(i32 %6) nounwind ; [#uses=0 type=i32]
%7 = getelementptr inbounds i8* %it.01, i64 1 ; [#uses=2 type=i8*]
%8 = icmp eq i8* %7, %3 ; [#uses=1 type=i1]
br i1 %8, label %._crit_edge, label %.lr.ph
._crit_edge: ; preds = %.lr.ph, %0
ret void
}
and for BOOST_FOREACH
:
; [#uses=0]
define void @_Z7foreachv() nounwind uwtable {
%1 = load i8** @HelloWorld, align 8, !tbaa !0 ; [#uses=1 type=i8*]
br label %2
; <label>:2 ; preds = %.preheader, %0
%.in = phi i8* [ %6, %.preheader ], [ %1, %0 ] ; [#uses=2 type=i8*]
%3 = load i8* %.in, align 1, !tbaa !1 ; [#uses=2 type=i8]
%4 = icmp eq i8 %3, 0 ; [#uses=1 type=i1]
br i1 %4, label %.critedge, label %.preheader
.preheader: ; preds = %2
%5 = sext i8 %3 to i32 ; [#uses=1 type=i32]
%putchar = tail call i32 @putchar(i32 %5) nounwind ; [#uses=0 type=i32]
%6 = getelementptr inbounds i8* %.in, i64 1 ; [#uses=1 type=i8*]
br label %2
.critedge: ; preds = %2
ret void
}
I can say there are more instructions for the simple case but less branches (one per iteration instead of two), but I would be hard pressed to pin down the performance from there.
But of course... it does not matter any longer! Hail C++11:
void bestfor() {
for(char const ch: HelloWorld) {
printf("%c", ch);
}
}
I believe that some of the trickery that BOOST_FOREACH employs in order to support natural loop syntax may generate superfluous copies of objects.
If you hand code your loop, you can take advantage of known to you (but not necessarily the compiler or boost_foreach) properties of your iterators and ranges. So you can probably do better.
It also relies heavily on being able to detect certain properties of classes at compile time, and if it can't (i.e. the compiler can't support the template mechanisms it uses), it has to defer this till runtime. This is obviously not as efficient as the results you'd get with hand coding (where you're likely to just know).
I think it's mainly because of aliasing: when you use (const) references, the compiler has a harder time figuring out that some variables don't alias, and generates less optimal code.