Discussion:
10 ^ N
(too old to reply)
Bonita Montero
2024-04-11 09:05:44 UTC
Permalink
I tried to use futexes for my pow10-function.
Is the usage correct ?

#include <bit>
#include <atomic>
#include <array>
#include <mutex>
#include "xmath.h"

using namespace std;

double pow10( int64_t exp )
{
constexpr uint64_t EXP_MASK = 0x7FFull << 52;
// table for binary exponentation with 10 ^ (2 ^ N)
static array<double, 64> tenPows;
// table initialized ?
if( static atomic_bool once( false ); !once.load( memory_order_acquire ) )
{
once.wait( false, memory_order_acquire );
if( !once.load( memory_order_relaxed ) )
{
// no: calculate table
for( double p10x2xN = 10.0; double &pow : tenPows )
pow = p10x2xN,
p10x2xN *= p10x2xN;
// set initialized flag with release semantics
once.store( true, memory_order_release );
once.notify_all();
}
}
// begin with 1.0 since x ^ 0 = 1
double result = 1.0;
// unsigned exponent
uint64_t uExp = exp >= 0 ? exp : -exp;
// highest set bit of exponent
size_t bit = 63 - countl_zero( uExp );
// bit mask to highest set bit
uint64_t mask = 1ull << bit;
// loop as long as there are bits in unsigned exponent
for( ; uExp; uExp &= ~mask, mask >>= 1, --bit )
// bit set ?
if( uExp & mask )
{
// yes: multiply result by 10 ^ (bit + 1)
result *= tenPows[bit];
// overlow ?
if( (bit_cast<uint64_t>( result ) & EXP_MASK) == EXP_MASK )
// yes: result wouldn't change furhter; stop
break;
}
// return 1 / result if exponent is negative
return exp >= 0 ? result : 1.0 / result;
};
Paavo Helde
2024-04-11 12:49:10 UTC
Permalink
Post by Bonita Montero
I tried to use futexes for my pow10-function.
Is the usage correct ?
Maybe, but it's not needed and makes the code more complex, redundant
and potentially a bit slower.

C++ statics' initialization is guaranteed to be thread-safe since C++11.
Just put your initialization logic into a separate function and use this
for initializing your static variable. C++ will perform any needed
multi-thread locking automatically, adding ones own explicit locks will
not make it faster.
Bonita Montero
2024-04-11 13:07:30 UTC
Permalink
Post by Paavo Helde
Maybe, but it's not needed and makes the code more complex,
redundant and potentially a bit slower.
I asked if the code uses double-checked locking correct since
I've implemented it with a mutex so far and not with a futex
and not if the code is hard to read for some people.
Bonita Montero
2024-04-12 19:59:39 UTC
Permalink
Post by Paavo Helde
Post by Bonita Montero
I tried to use futexes for my pow10-function.
Is the usage correct ?
Maybe, but it's not needed and makes the code more complex, redundant
and potentially a bit slower.
C++ statics' initialization is guaranteed to be thread-safe since C++11.
Just put your initialization logic into a separate function and use this
for initializing your static variable. C++ will perform any needed
multi-thread locking automatically, adding ones own explicit locks will
not make it faster.
I just simplified it:

double pow10( int64_t exp )
{
constexpr uint64_t EXP_MASK = 0x7FFull << 52;
constexpr double INF = numeric_limits<double>::infinity();
static double const rawTenPows[64] alignas(CL_SIZE)
{
0x1.4000000000000p+3,
0x1.9000000000000p+6,
0x1.3880000000000p+13,
0x1.7d78400000000p+26,
0x1.1c37937e08000p+53,
0x1.3b8b5b5056e17p+106,
0x1.84f03e93ff9f6p+212,
0x1.27748f9301d33p+425,
0x1.54fdd7f73bf3fp+850,
INF, INF, INF, INF, INF, INF, INF, INF, INF, INF,
INF, INF, INF, INF, INF, INF, INF, INF, INF, INF,
INF, INF, INF, INF, INF, INF, INF, INF, INF, INF,
INF, INF, INF, INF, INF, INF, INF, INF, INF, INF,
INF, INF, INF, INF, INF, INF, INF, INF, INF, INF,
INF, INF, INF, INF, INF
};
span tenPows( rawTenPows );
// begin with 1.0 since x ^ 0 = 1
double result = 1.0;
// unsigned exponent
uint64_t uExp = exp >= 0 ? exp : -exp;
// iterator to highest exponent
auto itExp = tenPows.rbegin();
// as long as there are bits set in uExp
for( size_t gap ; uExp; uExp <<= gap, uExp <<= 1 )
{
// exponent bits gap
gap = countl_zero( uExp );
// get next exponent
itExp += gap;
// multiply result by next pow10 exponent
result *= *itExp++;
// overlow / underflow ?
if( (bit_cast<uint64_t>( result ) & EXP_MASK) == EXP_MASK ) [[unlikely]]
// yes: result wouldn't change furhter; stop
return exp >= 0 ? result : 0.0;
}
return exp >= 0 ? result : 1.0 / result;
}
Chris M. Thomasson
2024-04-20 04:26:49 UTC
Permalink
Post by Bonita Montero
I tried to use futexes for my pow10-function.
Is the usage correct ?
#include <bit>
#include <atomic>
#include <array>
#include <mutex>
#include "xmath.h"
using namespace std;
double pow10( int64_t exp )
{
    constexpr uint64_t EXP_MASK = 0x7FFull << 52;
    // table for binary exponentation with 10 ^ (2 ^ N)
    static array<double, 64> tenPows;
    // table initialized ?
    if( static atomic_bool once( false ); !once.load(
memory_order_acquire ) )
    {
        once.wait( false, memory_order_acquire );
Any problems with spurious wakes? You need to recheck the predicate when
a futex wait returns. Kind of akin to a condvar.
Post by Bonita Montero
        if( !once.load( memory_order_relaxed ) )
        {
            // no: calculate table
            for( double p10x2xN = 10.0; double &pow : tenPows )
                pow = p10x2xN,
                p10x2xN *= p10x2xN;
            // set initialized flag with release semantics
            once.store( true, memory_order_release );
            once.notify_all();
        }
    }
    // begin with 1.0 since x ^ 0 = 1
    double result = 1.0;
    // unsigned exponent
    uint64_t uExp = exp >= 0 ? exp : -exp;
    // highest set bit of exponent
    size_t bit = 63 - countl_zero( uExp );
    // bit mask to highest set bit
    uint64_t mask = 1ull << bit;
    // loop as long as there are bits in unsigned exponent
    for( ; uExp; uExp &= ~mask, mask >>= 1, --bit )
        // bit set ?
        if( uExp & mask )
        {
            // yes: multiply result by 10 ^ (bit + 1)
            result *= tenPows[bit];
            // overlow ?
            if( (bit_cast<uint64_t>( result ) & EXP_MASK) == EXP_MASK )
                // yes: result wouldn't change furhter; stop
                break;
        }
    // return 1 / result if exponent is negative
    return exp >= 0 ? result : 1.0 / result;
};
Chris M. Thomasson
2024-04-20 04:29:27 UTC
Permalink
Post by Chris M. Thomasson
Post by Bonita Montero
I tried to use futexes for my pow10-function.
Is the usage correct ?
#include <bit>
#include <atomic>
#include <array>
#include <mutex>
#include "xmath.h"
using namespace std;
double pow10( int64_t exp )
{
     constexpr uint64_t EXP_MASK = 0x7FFull << 52;
     // table for binary exponentation with 10 ^ (2 ^ N)
     static array<double, 64> tenPows;
     // table initialized ?
     if( static atomic_bool once( false ); !once.load(
memory_order_acquire ) )
     {
         once.wait( false, memory_order_acquire );
Any problems with spurious wakes? You need to recheck the predicate when
a futex wait returns. Kind of akin to a condvar.
[...]

I need to focus on the code, but I think I can see some issues just at a
glance. Just because a futex wait returns does not mean that condition
it true.

Your code is rather odd for sure. Are you sure you know what you are doing?
Bonita Montero
2024-04-20 05:17:59 UTC
Permalink
Post by Chris M. Thomasson
Post by Bonita Montero
once.wait( false, memory_order_acquire );
Any problems with spurious wakes? You need to recheck the predicate when
a futex wait returns. ....
Post by Bonita Montero
if( !once.load( memory_order_relaxed ) )
What's that ?
Chris M. Thomasson
2024-04-20 09:47:16 UTC
Permalink
Post by Bonita Montero
Post by Chris M. Thomasson
Post by Bonita Montero
once.wait( false, memory_order_acquire );
Any problems with spurious wakes? You need to recheck the predicate
when a futex wait returns. ....
Post by Bonita Montero
if( !once.load( memory_order_relaxed ) )
What's that ?
Humm... Think if three threads hit once.wait, and the all get woken up
spuriously, once is still false. Now all three threads do your

if( !once.load( memory_order_relaxed ) )

once is false, and all three threads go into your init code at the same
time. What am I missing here?


if( static atomic_bool once( false ); !once.load( memory_order_acquire ) )
{
// three threads A, B and C wait.
once.wait( false, memory_order_acquire );


// A, B, get here on a spurious wake.


// oh shit!
if( !once.load( memory_order_relaxed ) )
{

// all three threads are going to see once as being false
// they are all in the init code!
// you are now fucked!

What am I missing?
Chris M. Thomasson
2024-04-20 09:53:57 UTC
Permalink
Post by Chris M. Thomasson
Post by Bonita Montero
Post by Chris M. Thomasson
Post by Bonita Montero
once.wait( false, memory_order_acquire );
Any problems with spurious wakes? You need to recheck the predicate
when a futex wait returns. ....
Post by Bonita Montero
if( !once.load( memory_order_relaxed ) )
What's that ?
Humm... Think if three threads hit once.wait, and the all get woken up
spuriously, once is still false. Now all three threads do your
if( !once.load( memory_order_relaxed ) )
once is false, and all three threads go into your init code at the same
time. What am I missing here?
 if( static atomic_bool once( false ); !once.load( memory_order_acquire
) )
    {
        // three threads A, B and C wait.
        once.wait( false, memory_order_acquire );
        // A, B, get here on a spurious wake.
        // oh shit!
        if( !once.load( memory_order_relaxed ) )
        {
              // all three threads are going to see once as being false
              // they are all in the init code!
              // you are now fucked!
What am I missing?
You probably want something like this pseudo-code, untested typed in the
newsreader:
________________________
atomic<int> g_state = 0;

if (! g_state.exchange(1, relaxed))
{
// critical section

// do your thing...

g_state.store(2, release);
g_state.notify_all(relaxed);
}

else
{
while (g_state.load(acquire) != 2)
{
g_state.wait(1, relaxed);
}
}
________________________

?
Chris M. Thomasson
2024-04-20 09:58:04 UTC
Permalink
Post by Chris M. Thomasson
Post by Chris M. Thomasson
Post by Bonita Montero
Post by Chris M. Thomasson
Post by Bonita Montero
once.wait( false, memory_order_acquire );
Any problems with spurious wakes? You need to recheck the predicate
when a futex wait returns. ....
Post by Bonita Montero
if( !once.load( memory_order_relaxed ) )
What's that ?
Humm... Think if three threads hit once.wait, and the all get woken up
spuriously, once is still false. Now all three threads do your
if( !once.load( memory_order_relaxed ) )
once is false, and all three threads go into your init code at the
same time. What am I missing here?
  if( static atomic_bool once( false ); !once.load(
memory_order_acquire ) )
     {
         // three threads A, B and C wait.
         once.wait( false, memory_order_acquire );
         // A, B, get here on a spurious wake.
         // oh shit!
         if( !once.load( memory_order_relaxed ) )
         {
               // all three threads are going to see once as being false
               // they are all in the init code!
               // you are now fucked!
What am I missing?
You probably want something like this pseudo-code, untested typed in the
________________________
atomic<int> g_state = 0;
if (! g_state.exchange(1, relaxed))
Yikes! That should be a CAS

if (g_state.cas(0, 1))

We want to set it to 1 if and only if it was 0 before, or do nothing.

Sorry about that! God damn it! lol.
Post by Chris M. Thomasson
{
    // critical section
    // do your thing...
    g_state.store(2, release);
    g_state.notify_all(relaxed);
}
else
{
    while (g_state.load(acquire) != 2)
    {
        g_state.wait(1, relaxed);
    }
}
________________________
?
Chris M. Thomasson
2024-04-20 22:55:04 UTC
Permalink
Post by Bonita Montero
Post by Chris M. Thomasson
Post by Bonita Montero
once.wait( false, memory_order_acquire );
Any problems with spurious wakes? You need to recheck the predicate
when a futex wait returns. ....
Post by Bonita Montero
if( !once.load( memory_order_relaxed ) )
What's that ?
Still not 100% sure if atomic::wait is allowed to return spuriously. The
docs seem to say that it cannot? That means that it must do a double
check in a loop in its logic. This is a lot different than normal futex
behavior.

https://en.cppreference.com/w/cpp/atomic/atomic/wait
Chris M. Thomasson
2024-04-20 22:57:18 UTC
Permalink
Post by Chris M. Thomasson
Post by Bonita Montero
Post by Chris M. Thomasson
Post by Bonita Montero
once.wait( false, memory_order_acquire );
Any problems with spurious wakes? You need to recheck the predicate
when a futex wait returns. ....
Post by Bonita Montero
if( !once.load( memory_order_relaxed ) )
What's that ?
Still not 100% sure if atomic::wait is allowed to return spuriously. The
docs seem to say that it cannot? That means that it must do a double
check in a loop in its logic. This is a lot different than normal futex
behavior.
https://en.cppreference.com/w/cpp/atomic/atomic/wait
Still, your logic is still going to have problems.
Chris M. Thomasson
2024-04-22 02:26:42 UTC
Permalink
Post by Chris M. Thomasson
Post by Chris M. Thomasson
Post by Bonita Montero
Post by Chris M. Thomasson
Post by Bonita Montero
once.wait( false, memory_order_acquire );
Any problems with spurious wakes? You need to recheck the predicate
when a futex wait returns. ....
Post by Bonita Montero
if( !once.load( memory_order_relaxed ) )
What's that ?
Still not 100% sure if atomic::wait is allowed to return spuriously.
The docs seem to say that it cannot? That means that it must do a
double check in a loop in its logic. This is a lot different than
normal futex behavior.
https://en.cppreference.com/w/cpp/atomic/atomic/wait
Still, your logic is still going to have problems.
Correct your program Bonita?
Chris M. Thomasson
2024-04-20 10:12:51 UTC
Permalink
Post by Bonita Montero
I tried to use futexes for my pow10-function.
Is the usage correct ?
[...]

I coded up a quick and dirty little test for a futex based double
checked lock.

Can you run it?
______________________________
#include <iostream>
#include <functional>
#include <atomic>
#include <thread>
#include <chrono>

#include <cstdlib>


// Ughh, some quick test params...
#define CT_L2_CACHE 128UL
#define CT_THREAD_N 8UL
#define CT_ITER_N 10000000UL


struct alignas(CT_L2_CACHE) ct_double_checked_lock
{
std::atomic<unsigned int> m_state = 0;
int m_var = 0;

void init()
{
unsigned int state = 0;

if (m_state.compare_exchange_strong(
state,
1,
std::memory_order_acquire))
{
// critical section
std::this_thread::yield();
m_var += 123;
std::this_thread::yield();

m_state.store(2, std::memory_order_release);
m_state.notify_all();
}

else
{
while (m_state.load(std::memory_order_acquire) != 2)
{
m_state.wait(1, std::memory_order_relaxed);
}
}
}
};


struct ct_shared
{
ct_double_checked_lock m_dcl;

bool check_state()
{
if (m_dcl.m_var != 123)
{
std::cout << "GOD DAMN IT!!!\n" << std::endl;
return false;
}

return true;
}
};


void ct_thread(ct_shared& shared)
{
for (unsigned long i = 0; i < CT_ITER_N; ++i)
{
shared.m_dcl.init();
}
}


int main()
{
std::cout << "ct_c++20_futex_test\n\n";
std::cout << "__________________________________\n\n" << std::endl;

{
ct_shared shared;

std::cout << "Creating Threads..." << std::endl;

std::thread threads[CT_THREAD_N];

for (unsigned long i = 0; i < CT_THREAD_N; ++i)
{
threads[i] = std::thread(ct_thread, std::ref(shared));
}

std::cout << "Running...\n" << std::endl;

for (unsigned long i = 0; i < CT_THREAD_N; ++i)
{
threads[i].join();
}

if (! shared.check_state())
{
std::cout << "OH SHIT!!!!! The crap sure hit the fan!!!\n";
}
}

std::cout << "Complete!" << std::endl;

return EXIT_SUCCESS;
}
______________________________
Chris M. Thomasson
2024-04-21 00:13:26 UTC
Permalink
Post by Chris M. Thomasson
Post by Bonita Montero
I tried to use futexes for my pow10-function.
Is the usage correct ?
[...]
I coded up a quick and dirty little test for a futex based double
checked lock.
Can you run it?
______________________________
#include <iostream>
#include <functional>
#include <atomic>
#include <thread>
#include <chrono>
#include <cstdlib>
// Ughh, some quick test params...
#define CT_L2_CACHE 128UL
#define CT_THREAD_N 8UL
#define CT_ITER_N 10000000UL
struct alignas(CT_L2_CACHE) ct_double_checked_lock
{
    std::atomic<unsigned int> m_state = 0;
    int m_var = 0;
    void init()
    {
        unsigned int state = 0;
        if (m_state.compare_exchange_strong(
                state,
                1,
                std::memory_order_acquire))
        {
            // critical section
            std::this_thread::yield();
            m_var += 123;
            std::this_thread::yield();
            m_state.store(2, std::memory_order_release);
            m_state.notify_all();
        }
        else
        {
            while (m_state.load(std::memory_order_acquire) != 2)
            {
                m_state.wait(1, std::memory_order_relaxed);
            }
^^^^^^^^^^^^^^^^^^^^

Now according to these docs:

https://en.cppreference.com/w/cpp/atomic/atomic/wait

It says that an underlying impl may encounter spurious wakes. However,
it also says this, which changes my previous view of futexes. C++ is a
bit different wrt:
_________________
These functions are guaranteed to return only if value has changed, even
if underlying implementation unblocks spuriously.
_________________

Oh, wow. So, this would mean I can get around that while loop in a slow
path of my DCL code. This is going against my previous knowledge of
futexes. C++ might as well have a std::wait_strong/weak akin to
compare_exchange_strong/weak... Anyway, so I should be able to get rid
of that loop:
_____________
while (m_state.load(std::memory_order_acquire) != 2)
{
m_state.wait(1, std::memory_order_relaxed);
}
_____________

Into just:
_____________
if (m_state.load(std::memory_order_acquire) != 2)
{
// slow path
m_state.wait(1, std::memory_order_relaxed);

// no need to recheck because m_state == 2 here for sure.
}
_____________

Humm... This goes against my futex ways! If C++ really does do its own
internal check and automatically handles any spurious wakes from the
underlying impl, then, well. Okay. Just not what I expected.
Post by Chris M. Thomasson
        }
    }
};
[...]
Chris M. Thomasson
2024-04-21 00:20:23 UTC
Permalink
On 4/20/2024 5:13 PM, Chris M. Thomasson wrote:
[...]
Post by Chris M. Thomasson
Oh, wow. So, this would mean I can get around that while loop in a slow
path of my DCL code. This is going against my previous knowledge of
futexes. C++ might as well have a std::wait_strong/weak akin to
compare_exchange_strong/weak... Anyway, so I should be able to get rid
_____________
while (m_state.load(std::memory_order_acquire) != 2)
{
    m_state.wait(1, std::memory_order_relaxed);
}
_____________
_____________
if (m_state.load(std::memory_order_acquire) != 2)
{
   // slow path
   m_state.wait(1, std::memory_order_relaxed);
^^^^^^^^^^^^^^

This wait would need memory_order_acquire, not relaxed. I am not used to
actually embedding a membar into a futex wait call. Afaict, the C++
futex sepcs are at a slightly higher level than traditional futex impls.

Good thing its just code in the newsreader. I need to implement this in
real code. If c++ does what those specs says they do, then it should
work, and that while loop can be omitted.
Post by Chris M. Thomasson
   // no need to recheck because m_state == 2 here for sure.
}
_____________
Humm... This goes against my futex ways! If C++ really does do its own
internal check and automatically handles any spurious wakes from the
underlying impl, then, well. Okay. Just not what I expected.
Post by Chris M. Thomasson
         }
     }
};
[...]
Chris M. Thomasson
2024-04-21 00:31:39 UTC
Permalink
On 4/20/2024 5:20 PM, Chris M. Thomasson wrote:
[...]
Post by Chris M. Thomasson
Humm... This goes against my futex ways! If C++ really does do its own
internal check and automatically handles any spurious wakes from the
underlying impl, then, well. Okay. Just not what I expected.
Actually, I can refine this further since std::atomic::wait is specified
to not return spuriously. The updated value from the failed
compare_exchange_strong can be used. Perhaps something like:
__________________
struct alignas(CT_L2_CACHE) ct_double_checked_lock
{
std::atomic<unsigned int> m_state = 0;
int m_var = 0;

void init()
{
unsigned int state = 0;

if (m_state.compare_exchange_strong(
state,
1,
std::memory_order_acquire))
{
// critical section
std::this_thread::yield();
m_var += 123;
std::this_thread::yield();

m_state.store(2, std::memory_order_release);
m_state.notify_all();
}

else if (state != 2)
{
m_state.wait(1, std::memory_order_acquire);
}
}
};
__________________

That should work? This goes against my old school futex thinking. ;^)
Chris M. Thomasson
2024-04-20 23:06:18 UTC
Permalink
Post by Bonita Montero
I tried to use futexes for my pow10-function.
Is the usage correct ?
#include <bit>
#include <atomic>
#include <array>
#include <mutex>
#include "xmath.h"
using namespace std;
double pow10( int64_t exp )
{
    constexpr uint64_t EXP_MASK = 0x7FFull << 52;
    // table for binary exponentation with 10 ^ (2 ^ N)
    static array<double, 64> tenPows;
    // table initialized ?
    if( static atomic_bool once( false ); !once.load(
memory_order_acquire ) )
    {
Say three threads got here. Now, they only block if once is false, which
Post by Bonita Montero
        once.wait( false, memory_order_acquire );
Why would they not wait forever right here? Spurious aside for a moment...


You have some interesting issues.
Post by Bonita Montero
        if( !once.load( memory_order_relaxed ) )
        {
            // no: calculate table
            for( double p10x2xN = 10.0; double &pow : tenPows )
                pow = p10x2xN,
                p10x2xN *= p10x2xN;
            // set initialized flag with release semantics
            once.store( true, memory_order_release );
            once.notify_all();
        }
    }
    // begin with 1.0 since x ^ 0 = 1
    double result = 1.0;
    // unsigned exponent
    uint64_t uExp = exp >= 0 ? exp : -exp;
    // highest set bit of exponent
    size_t bit = 63 - countl_zero( uExp );
    // bit mask to highest set bit
    uint64_t mask = 1ull << bit;
    // loop as long as there are bits in unsigned exponent
    for( ; uExp; uExp &= ~mask, mask >>= 1, --bit )
        // bit set ?
        if( uExp & mask )
        {
            // yes: multiply result by 10 ^ (bit + 1)
            result *= tenPows[bit];
            // overlow ?
            if( (bit_cast<uint64_t>( result ) & EXP_MASK) == EXP_MASK )
                // yes: result wouldn't change furhter; stop
                break;
        }
    // return 1 / result if exponent is negative
    return exp >= 0 ? result : 1.0 / result;
};
Chris M. Thomasson
2024-04-29 20:24:08 UTC
Permalink
Post by Bonita Montero
I tried to use futexes for my pow10-function.
Is the usage correct ?
[...]

No.

Loading...