Discussion:
virtual operator () vs. std:function<>
(too old to reply)
Bonita Montero
2024-07-24 09:12:43 UTC
Permalink
#include <iostream>
#include <functional>
#include <chrono>
#include <atomic>

using namespace std;
using namespace chrono;

function<int ( int, int )> fn( []( int a, int b ) { return a + b; } );

struct V
{
virtual int operator ()( int a, int b )
{
return a + b;
}
};

unique_ptr<V> v( make_unique<V>() );

int main()
{
int sum = 0;
auto bench = [&]<typename Fn>( char const *what, Fn fn )
{
using hr_t = high_resolution_clock;
using dur_t = hr_t::duration;
dur_t tMin = dur_t::max();
for( int turn = 100; turn--; )
{
auto start = hr_t::now();
for( int a = 1'000; a--; )
for( int b = 1'000; b--; )
sum += fn( a, b );
dur_t dur = hr_t::now() - start;
tMin = tMin > dur ? dur : tMin;
}
cout << what << duration_cast<nanoseconds>( tMin ).count() / 1.0e6 <<
endl;
};
bench( "function<int ( int, int )>: ", []( int a, int b ) { return
::fn( a, b ); } );
bench( "virtual function: ", []( int a, int b ) { return (*::v)( a, b
); } );
return sum;
}

I thought that the virtual call is somewhat faster because the functon
checks for an empty function<>-object and throws appropriately, but on
my Zen4-system the virtual operator () and the function<> are about 1.1
nanoseconds per call.
Paavo Helde
2024-07-24 09:51:32 UTC
Permalink
Post by Bonita Montero
#include <iostream>
#include <functional>
#include <chrono>
#include <atomic>
using namespace std;
using namespace chrono;
function<int ( int, int )> fn( []( int a, int b ) { return a + b; } );
struct V
{
    virtual int operator ()( int a, int b )
    {
        return a + b;
    }
};
unique_ptr<V> v( make_unique<V>() );
int main()
{
    int sum = 0;
    auto bench = [&]<typename Fn>( char const *what, Fn fn )
    {
        using hr_t = high_resolution_clock;
        using dur_t = hr_t::duration;
        dur_t tMin = dur_t::max();
        for( int turn = 100; turn--; )
        {
            auto start = hr_t::now();
            for( int a = 1'000; a--; )
                for( int b = 1'000; b--; )
                    sum += fn( a, b );
            dur_t dur = hr_t::now() - start;
            tMin = tMin > dur ? dur : tMin;
        }
        cout << what << duration_cast<nanoseconds>( tMin ).count() /
1.0e6 << endl;
    };
    bench( "function<int ( int, int )>: ", []( int a, int b ) { return
::fn( a, b ); } );
    bench( "virtual function: ", []( int a, int b ) { return (*::v)( a,
b ); } );
    return sum;
}
I thought that the virtual call is somewhat faster because the functon
checks for an empty function<>-object and throws appropriately, but on
my Zen4-system the virtual operator () and the function<> are about 1.1
nanoseconds per call.
What about template lambda without mentioning std::function()? I have
heard rumors that std::function contains a lot of overhead.

I tried to add such a template lambda version, seems faster:

function<int ( int, int )>: 1.4104
virtual function: 1.1389
template lambda: 0.2689
Bonita Montero
2024-07-24 11:32:00 UTC
Permalink
Post by Paavo Helde
Post by Bonita Montero
#include <iostream>
#include <functional>
#include <chrono>
#include <atomic>
using namespace std;
using namespace chrono;
function<int ( int, int )> fn( []( int a, int b ) { return a + b; } );
struct V
{
     virtual int operator ()( int a, int b )
     {
         return a + b;
     }
};
unique_ptr<V> v( make_unique<V>() );
int main()
{
     int sum = 0;
     auto bench = [&]<typename Fn>( char const *what, Fn fn )
     {
         using hr_t = high_resolution_clock;
         using dur_t = hr_t::duration;
         dur_t tMin = dur_t::max();
         for( int turn = 100; turn--; )
         {
             auto start = hr_t::now();
             for( int a = 1'000; a--; )
                 for( int b = 1'000; b--; )
                     sum += fn( a, b );
             dur_t dur = hr_t::now() - start;
             tMin = tMin > dur ? dur : tMin;
         }
         cout << what << duration_cast<nanoseconds>( tMin ).count() /
1.0e6 << endl;
     };
     bench( "function<int ( int, int )>: ", []( int a, int b ) {
return ::fn( a, b ); } );
     bench( "virtual function: ", []( int a, int b ) { return (*::v)(
a, b ); } );
     return sum;
}
I thought that the virtual call is somewhat faster because the functon
checks for an empty function<>-object and throws appropriately, but on
my Zen4-system the virtual operator () and the function<> are about 1.1
nanoseconds per call.
What about template lambda without mentioning std::function()?
I wanted to test runtime-polymorphism and not compile-time polymorphism.
Post by Paavo Helde
I have heard rumors that std::function contains a lot of overhead.
Each call is about 1.1ns on my Zen4-computer.
If you need runtime polymorphism you can't replace it with compile-time
polymorphism. But I'm using the latter also if it applies.
Post by Paavo Helde
function<int ( int, int )>: 1.4104
virtual function: 1.1389
template lambda: 0.2689
Andrey Tarasevich
2024-07-25 02:26:10 UTC
Permalink
Post by Paavo Helde
What about template lambda without mentioning std::function()? I have
heard rumors that std::function contains a lot of overhead.
But that's the whole point of the test. The type-erasure technique used
inside `std::function<>` will introduce overhead, which should be
similar/comparable to virtual call overhead.
--
Best regards,
Andrey
Bonita Montero
2024-08-10 18:30:30 UTC
Permalink
Post by Andrey Tarasevich
But that's the whole point of the test. The type-erasure technique
used inside `std::function<>` will introduce overhead, which should
be similar/comparable to virtual call overhead.
As I've shown the call is about one nanosecond. That's only an overhead
if the function is very small and called extremly often.

Bonita Montero
2024-07-25 16:24:48 UTC
Permalink
I further improved the code and now I'm also testing C functions,
ref'd function<>-objects, C functions attached to a function<>
-object and compiletime-polymorphism:

#include <iostream>
#include <functional>
#include <chrono>
#include <atomic>
#include <memory>

using namespace std;
using namespace chrono;

struct V
{
virtual int operator ()( int a, int b )
{
return a + b;
}
};

static atomic_int x( 0 );
auto adder = []( int a, int b ) { return a + b + ::x.load(
memory_order_relaxed ); };
function<int ( int, int )> fn( adder );
function<int ( int, int )> fnRef( ref( adder ) );
function<int ( int, int )> fnC( +adder );
atomic<int (*)( int, int )> cFn( +adder );
unique_ptr<V> v( make_unique<V>() );

int main()
{
int sum = 0;
auto bench = [&]<typename Fn>( char const *what, Fn fn )
{
using hr_t = high_resolution_clock;
using dur_t = hr_t::duration;
dur_t tMin = dur_t::max();
for( int turn = 100; turn--; )
{
auto start = hr_t::now();
for( int a = 1'000; a--; )
for( int b = 1'000; b--; )
sum += fn( a, b );
dur_t dur = hr_t::now() - start;
tMin = tMin > dur ? dur : tMin;
}
cout << what << duration_cast<nanoseconds>( tMin ).count() / 1.0e6 <<
endl;
};
bench( "function<int ( int, int )>: ", []( int a, int b ) { return
::fn( a, b ); } );
bench( "function<int ( int, int )> ref'd: ", []( int a, int b ) {
return ::fnRef( a, b ); } );
bench( "function<int ( int, int )> C'd: ", []( int a, int b ) { return
::fnC( a, b ); } );
bench( "virtual function: ", []( int a, int b ) { return (*::v)( a, b
); } );
bench( "c-function: ", []( int a, int b ) { return ::cFn.load(
memory_order_relaxed )( a, b ); } );
bench( "compile-time polymorphism: ", ::adder );
return sum;
}

This are the results on a single 5,7GHz Zen4 core:

function<int ( int, int )>: 0.914
function<int ( int, int )> ref'd: 1.1117
function<int ( int, int )> C'd: 1.1175
virtual function: 0.9358
c-function: 0.9273
compile-time polymorphism: 0.3906
Loading...