Martin

Interview question, part 5

Posted on February 11, 2019March 14, 2019 by Martin

Find the starting position(s) of “runs” (consecutive characters) in the input from left to right.

Example input and output:

Input: 0000000000
Runs: 0

Input: 0101010101
Runs:

Input: 1111111111
Runs: 0

Input: 0110110110
Runs: 1 4 7

Input: 1110000011
Runs: 0 3 8

Input: 0010011010
Runs: 0 3 5

Program ended with exit code: 1

The answer:

#include <iostream>
using namespace std;

void print_runs(const char* input)
{
    size_t offset = 0;
    size_t index = 0;
    
    while(char current = input[index])
    {
        char next = input[index + 1];
        if(next && next == current)
        {
            cout << " " << offset;
            while(input[index] && current == input[index]) ++index;
            offset = index;
        }
        else
        {
            offset = ++index;
        }
    }
}

int main(int argv, char** argc)
{
    const char* inputs[] =
    {
        "0000000000",
        "0101010101",
        "1111111111",
        "0110110110",
        "1110000011",
        "0010011010"
    };

    for(size_t index = 0; index < (sizeof(inputs) / sizeof(*inputs)); ++index)
    {
        cout << "Input: " << inputs[index] << endl;
        cout << "Runs :";
        print_runs(inputs[index]);
        cout << endl << endl;
    }

    return 1;
}

#include <iostream>

using namespace std;

void print_runs(const char* input)

{

size_t offset = 0;

size_t index = 0;

while(char current = input[index])

{

char next = input[index + 1];

if(next && next == current)

{

cout << " " << offset;

while(input[index] && current == input[index]) ++index;

offset = index;

}

else

{

offset = ++index;

}

int main(int argv, char** argc)

{

const char* inputs[] =

{

"0000000000",

"0101010101",

"1111111111",

"0110110110",

"1110000011",

"0010011010"

};

for(size_t index = 0; index < (sizeof(inputs) / sizeof(*inputs)); ++index)

{

cout << "Input: " << inputs[index] << endl;

cout << "Runs :";

print_runs(inputs[index]);

cout << endl << endl;

}

return 1;

}

Interview question, part 4

Posted on February 11, 2019March 28, 2021 by Martin

I dug up another one from years ago 🙂 This one asked to write a program that:

Prints all binary numbers that do not contain consecutive bits set.

My approach was to brute force over all possible numbers and test each one with a sliding 0b11 bit mask for 2 consecutive bits set.

The answer:

#include <iostream>
#include <limits>
#include <bitset>
#include <cstdint>

using namespace std;

int main(int argc, char** argv)
{
    using type = uint8_t;

    for(type number = 0; number < numeric_limits<type>::max(); ++number)
    {
        bool print = true;

        for(type shift = 0; shift <= numeric_limits<type>::digits - 2; ++shift)
        {
            static type mask = 0b11;
            if((number & (mask << shift)) == (mask << shift))
            {
                print = false;
                break;
            }
        }

        if(print)
            cout << bitset<numeric_limits<type>::digits>(number) << endl;
    }

    return 1;
}

#include <iostream>

#include <limits>

#include <bitset>

#include <cstdint>

using namespace std;

int main(int argc, char** argv)

{

using type = uint8_t;

for(type number = 0; number < numeric_limits<type>::max(); ++number)

{

bool print = true;

for(type shift = 0; shift <= numeric_limits<type>::digits - 2; ++shift)

{

static type mask = 0b11;

if((number & (mask << shift)) == (mask << shift))

{

print = false;

break;

}

if(print)

cout << bitset<numeric_limits<type>::digits>(number) << endl;

}

return 1;

}

Interview question, part 3

Posted on February 10, 2019March 28, 2021 by Martin

I was given this piece of code to analyze and point out what’s wrong with it:

struct Base
{
    Base() { bar(); }
    virtual void foo() = 0;
    void bar() { foo(); }
};

struct Derived final : public Base
{
    virtual void foo() override {}
};

int main(int argc, char** argv)
{
    Derived d;
    return 1;
}

struct Base

{

Base() { bar(); }

virtual void foo() = 0;

void bar() { foo(); }

};

struct Derived final : public Base

{

virtual void foo() override {}

};

int main(int argc, char** argv)

{

Derived d;

return 1;

}

I was also able to compile and execute this program on my Mac; output from Xcode gives away the answer:

libc++abi.dylib: Pure virtual function called!
Program ended with exit code: 9
Program output.

But why? The constructor of Base calls void bar(), which in turn calls pure virtual void foo() = 0. The Derived overrides foo() so what’s the problem here?
Inside the constructor of Base, the type of this pointer is… Base. So the call to foo() inside bar() resolved to Base version of foo(), which is a pure virtual function lacking implementation. This is illegal and the compiler puts a check inside Base::foo() virtual function table pointing to __cxa_pure_virtual. That in turn calls __pthread_kill terminating the program.

__cxa_pure_virtual disassembly:

libc++abi.dylib`__cxa_pure_virtual:
    0x7fff723e8730 <+0>:  pushq  %rbp
    0x7fff723e8731 <+1>:  movq   %rsp, %rbp
    0x7fff723e8734 <+4>:  leaq   0x1f2f(%rip), %rdi        ; "Pure virtual function called!"
    0x7fff723e873b <+11>: xorl   %eax, %eax
    0x7fff723e873d <+13>: callq  0x7fff723dc14a            ; abort_message

libc++abi.dylib`__cxa_pure_virtual:

0x7fff723e8730 <+0>: pushq %rbp

0x7fff723e8731 <+1>: movq %rsp, %rbp

0x7fff723e8734 <+4>: leaq 0x1f2f(%rip), %rdi ; "Pure virtual function called!"

0x7fff723e873b <+11>: xorl %eax, %eax

0x7fff723e873d <+13>: callq 0x7fff723dc14a ; abort_message

UPDATE:

Item #9 From Effective C++.

Interview question, part 2

Posted on February 10, 2019March 14, 2019 by Martin

Another interview question I was asked in the past:

Given only a stack, implement a queue.

I remember thinking at first that it cannot be done! My mind for some reason imagined that I am limited to only one stack. I told the interviewer that I don’t think it can be done with one stack. His response was: I didn’t say anything about having only one stack. That’s when it hit me: if I use two stacks, one for pushing, one for popping, and move the elements between them, it can be done!

When pushing, items go on stack 1. So the bottom of stack 1 is the front of the queue. So how do we get to the bottom element? We pop all the items form stack 1 and push them onto stack 2. This effectively reverses the order of elements and the top of stack 2 becomes the front of the queue. We can then pop off the queue by popping off of stack 2. We do this reversal of elements on every push and pop of the queue. Yea it’s inefficient, but it works 🙂 Honestly I don’t know why you would ever want to implement a queue this way, but nevertheless it was a great interview question!

The answer:

#include <iostream>
#include <stack>
using namespace std;

mutex cout_lock;
#define trace(x) { scoped_lock<mutex> lock(cout_lock); cout << x << endl; }

template<typename T>
class queue
{
public:
    void push(const T& item)
    {
        spill(m_pop, m_push);
        m_push.push(item);
    }

    void pop(T& item)
    {
        spill(m_push, m_pop);
        item = m_pop.top();
        m_pop.pop();
    }

    bool empty() const noexcept
    {
        return m_push.empty() && m_pop.empty();
    }

private:
    void spill(stack<T>& from, stack<T>& to)
    {
        while(!from.empty())
        {
            to.push(from.top());
            from.pop();
        }
    }

    stack<T> m_push;
    stack<T> m_pop;
};

int main(int argc, char** argv)
{
    queue<int> q;

    q.push(1);
    q.push(2);
    q.push(3);

    while(!q.empty())
    {
        int item;
        q.pop(item);
        trace(item);
    }

    return 1;
}

#include <iostream>

#include <stack>

using namespace std;

mutex cout_lock;

#define trace(x) { scoped_lock<mutex> lock(cout_lock); cout << x << endl; }

template<typename T>

class queue

{

public:

void push(const T& item)

{

spill(m_pop, m_push);

m_push.push(item);

}

void pop(T& item)

{

spill(m_push, m_pop);

item = m_pop.top();

m_pop.pop();

}

bool empty() const noexcept

{

return m_push.empty() && m_pop.empty();

}

private:

void spill(stack<T>& from, stack<T>& to)

{

while(!from.empty())

{

to.push(from.top());

from.pop();

}

stack<T> m_push;

stack<T> m_pop;

};

int main(int argc, char** argv)

{

queue<int> q;

q.push(1);

q.push(2);

q.push(3);

while(!q.empty())

{

int item;

q.pop(item);

trace(item);

}

return 1;

}

1
2
3
Program output.

Interview question, part 1

Posted on February 10, 2019March 14, 2019 by Martin

When interviewing for a job in the past, I was asked the following question:

Write a multithreaded program that prints a sequence of characters like this: A, B, A, B, A, B… Synchronize properly so it never prints the same character more than once in a row.

Thinking quickly on my feet 🙂 I realized the trick is to use an event object (I will be using this one). But wait, one event object is not enough. The real trick is to use two event objects! Where thread 1 waits on event 1 to be signaled, then prints ‘A’, and finally signals event 2; and repeats the sequence in a loop. Thread 2 waits on event 2, then prints ‘B’, and finally signals event 1. Ditto loop. That should do it!

The answer:

#include <iostream>
#include <mutex>
#include <thread>
#include "event.h"
using namespace std;

mutex cout_lock;
#define trace(x) { scoped_lock<mutex> lock(cout_lock); cout << x << endl; }

const int COUNT = 3;

int main(int argc, char** argv)
{
    auto_event e1, e2;

    thread t1([&](){
        for(int i = 0; i < COUNT; ++i)
        {
            e1.wait();
            trace("A");
            e2.signal();
        }
    });

    thread t2([&](){
        for(int i = 0; i < COUNT; ++i)
        {
            e2.wait();
            trace("B");
            e1.signal();
        }
    });

    e1.signal();

    t1.join();
    t2.join();

    return 1;
}

#include <iostream>

#include <mutex>

#include <thread>

#include "event.h"

using namespace std;

mutex cout_lock;

#define trace(x) { scoped_lock<mutex> lock(cout_lock); cout << x << endl; }

const int COUNT = 3;

int main(int argc, char** argv)

{

auto_event e1, e2;

thread t1([&](){

for(int i = 0; i < COUNT; ++i)

{

e1.wait();

trace("A");

e2.signal();

}

});

thread t2([&](){

for(int i = 0; i < COUNT; ++i)

{

e2.wait();

trace("B");

e1.signal();

}

});

e1.signal();

t1.join();

t2.join();

return 1;

}

A
B
A
B
A
B
Program output.

Template concepts, sort of

Posted on February 9, 2019March 28, 2021 by Martin

Back to the queue class from previous posts (here and here)… I realized things can be done better: I want the queue to work with move-constructible and move-assignable types as well as copyable types, and do so automatically. This way the push and pop methods can use std::move to insert and remove elements from the queue. I also want it to work with no-throw constructible and assignable types; this way the push and pop methods can take a more optimized path that does not deal with exceptions. So I realized that I need four versions of push and pop methods: 1) copy-constructible, 2) no-throw copy-constructible, 3) move-constructible, 4) no-throw move-constructible. All fine and dandy, but how do we select the right version at compile time based on type T that the queue holds?
In C++20 we will get template concepts that will allow us to do just that sort of selection at compile time. In the meantime we have to make do with std::enable_if (see here) and other type traits.
Finally, for completeness of interface, I want to add a pop method that returns an item rather than taking an output reference parameter, and declares proper noexcept value depending on type T.

Below is the no-throw-movable pop method; it’s declared noexcept and lacks exception handling code (possibly making it faster at runtime):

template<typename Q = T>
typename std::enable_if<
    std::is_move_assignable<Q>::value and
    std::is_nothrow_move_assignable<Q>::value, void>::type
pop(T& item) noexcept
{
    m_fullSlots.wait();
    {
        std::lock_guard<std::mutex> lock(m_cs);
        item = std::move(m_data[m_popIndex]);
        m_data[m_popIndex].~T();
        m_popIndex = ++m_popIndex % m_size;
        --m_count;
    }
    m_openSlots.post();
}

template<typename Q = T>

typename std::enable_if<

std::is_move_assignable<Q>::value and

std::is_nothrow_move_assignable<Q>::value, void>::type

pop(T& item) noexcept

{

m_fullSlots.wait();

{

std::lock_guard<std::mutex> lock(m_cs);

item = std::move(m_data[m_popIndex]);

m_data[m_popIndex].~T();

m_popIndex = ++m_popIndex % m_size;

--m_count;

}

m_openSlots.post();

}

And here is the new pop method; notice its noexcept value is dependent on which version of pop it uses, and it is deduced at compile time 🙂

T pop() noexcept(std::is_nothrow_invocable_r<void, decltype(&blocking_queue<T>::pop<T>), T&>::value)
{
    T item;
    pop(item);
    return item;
}

T pop() noexcept(std::is_nothrow_invocable_r<void, decltype(&blocking_queue<T>::pop<T>), T&>::value)

{

T item;

pop(item);

return item;

}

Complete listing:

#pragma once

#include <mutex>
#include <utility>
#include <type_traits>
#include "semaphore.h"

template<typename T>
class blocking_queue
{
public:
	blocking_queue(unsigned int size)
	: m_size(size), m_pushIndex(0), m_popIndex(0), m_count(0),
	m_data((T*)operator new(size * sizeof(T))),
	m_openSlots(size), m_fullSlots(0) {}

	blocking_queue(const blocking_queue&) = delete;
	blocking_queue(blocking_queue&&) = delete;
	blocking_queue& operator = (const blocking_queue&) = delete;
	blocking_queue& operator = (blocking_queue&&) = delete;

	~blocking_queue() noexcept
	{
		while (m_count--)
		{
			m_data[m_popIndex].~T();
			m_popIndex = ++m_popIndex % m_size;
		}
		operator delete(m_data);
	}

    template<typename Q = T>
    typename std::enable_if<
        std::is_copy_constructible<Q>::value and
        std::is_nothrow_copy_constructible<Q>::value, void>::type
    push(const T& item) noexcept
    {
        m_openSlots.wait();
        {
            std::lock_guard<std::mutex> lock(m_cs);
            new (m_data + m_pushIndex) T (item);
            m_pushIndex = ++m_pushIndex % m_size;
            ++m_count;
        }
        m_fullSlots.post();
    }

    template<typename Q = T>
    typename std::enable_if<
        std::is_copy_constructible<Q>::value and not
        std::is_nothrow_copy_constructible<Q>::value, void>::type
	push(const T& item)
	{
		m_openSlots.wait();
		{
			std::lock_guard<std::mutex> lock(m_cs);
			try
			{
				new (m_data + m_pushIndex) T (item);
			}
			catch (...)
			{
				m_openSlots.post();
				throw;
			}
			m_pushIndex = ++m_pushIndex % m_size;
            ++m_count;
		}
		m_fullSlots.post();
	}

    template<typename Q = T>
    typename std::enable_if<
        std::is_move_constructible<Q>::value and
        std::is_nothrow_move_constructible<Q>::value, void>::type
    push(T&& item) noexcept
    {
        m_openSlots.wait();
        {
            std::lock_guard<std::mutex> lock(m_cs);
            new (m_data + m_pushIndex) T (std::move(item));
            m_pushIndex = ++m_pushIndex % m_size;
            ++m_count;
        }
        m_fullSlots.post();
    }

    template<typename Q = T>
    typename std::enable_if<
        std::is_move_constructible<Q>::value and not
        std::is_nothrow_move_constructible<Q>::value, void>::type
    push(T&& item)
    {
        m_openSlots.wait();
        {
            std::lock_guard<std::mutex> lock(m_cs);
            try
            {
                new (m_data + m_pushIndex) T (std::move(item));
            }
            catch (...)
            {
                m_openSlots.post();
                throw;
            }
            m_pushIndex = ++m_pushIndex % m_size;
            ++m_count;
        }
        m_fullSlots.post();
    }

    template<typename Q = T>
    typename std::enable_if<
        not std::is_move_assignable<Q>::value and
        std::is_nothrow_copy_assignable<Q>::value, void>::type
    pop(T& item) noexcept
    {
        m_fullSlots.wait();
        {
            std::lock_guard<std::mutex> lock(m_cs);
            item = m_data[m_popIndex];
            m_data[m_popIndex].~T();
            m_popIndex = ++m_popIndex % m_size;
            --m_count;
        }
        m_openSlots.post();
    }

    template<typename Q = T>
    typename std::enable_if<
        not std::is_move_assignable<Q>::value and not
        std::is_nothrow_copy_assignable<Q>::value, void>::type
    pop(T& item)
    {
        m_fullSlots.wait();
        {
            std::lock_guard<std::mutex> lock(m_cs);
            try
            {
                item = m_data[m_popIndex];
            }
            catch (...)
            {
                m_fullSlots.post();
                throw;
            }
            m_data[m_popIndex].~T();
            m_popIndex = ++m_popIndex % m_size;
            --m_count;
        }
        m_openSlots.post();
    }

    template<typename Q = T>
    typename std::enable_if<
        std::is_move_assignable<Q>::value and
        std::is_nothrow_move_assignable<Q>::value, void>::type
    pop(T& item) noexcept
    {
        m_fullSlots.wait();
        {
            std::lock_guard<std::mutex> lock(m_cs);
            item = std::move(m_data[m_popIndex]);
            m_data[m_popIndex].~T();
            m_popIndex = ++m_popIndex % m_size;
            --m_count;
        }
        m_openSlots.post();
    }

    template<typename Q = T>
    typename std::enable_if<
        std::is_move_assignable<Q>::value and not
        std::is_nothrow_move_assignable<Q>::value, void>::type
	pop(T& item)
	{
		m_fullSlots.wait();
		{
			std::lock_guard<std::mutex> lock(m_cs);
			try
			{
                item = std::move(m_data[m_popIndex]);
			}
			catch (...)
			{
				m_fullSlots.post();
				throw;
			}
			m_data[m_popIndex].~T();
			m_popIndex = ++m_popIndex % m_size;
            --m_count;
		}
		m_openSlots.post();
	}

    T pop() noexcept(std::is_nothrow_invocable_r<void, decltype(&blocking_queue<T>::pop<T>), T&>::value)
    {
        T item;
        pop(item);
        return item;
    }

    bool empty() const noexcept
    {
        std::lock_guard<std::mutex> lock(m_cs);
        return m_count == 0;
    }

    bool size() const noexcept
    {
        std::lock_guard<std::mutex> lock(m_cs);
        return m_count;
    }

    unsigned int max_size() const noexcept
    {
        return m_size;
    }

private:
	const unsigned int m_size;
	unsigned int m_pushIndex;
	unsigned int m_popIndex;
    unsigned int m_count;
	T* m_data;

    fast_semaphore m_openSlots;
	fast_semaphore m_fullSlots;
    std::mutex m_cs;
};

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

#pragma once

#include <mutex>

#include <utility>

#include <type_traits>

#include "semaphore.h"

template<typename T>

class blocking_queue

{

public:

blocking_queue(unsigned int size)

: m_size(size), m_pushIndex(0), m_popIndex(0), m_count(0),

m_data((T*)operator new(size * sizeof(T))),

m_openSlots(size), m_fullSlots(0) {}

blocking_queue(const blocking_queue&) = delete;

blocking_queue(blocking_queue&&) = delete;

blocking_queue& operator = (const blocking_queue&) = delete;

blocking_queue& operator = (blocking_queue&&) = delete;

~blocking_queue() noexcept

{

while (m_count--)

{

m_data[m_popIndex].~T();

m_popIndex = ++m_popIndex % m_size;

}

operator delete(m_data);

}

template<typename Q = T>

typename std::enable_if<

std::is_copy_constructible<Q>::value and

std::is_nothrow_copy_constructible<Q>::value, void>::type

push(const T& item) noexcept

{

m_openSlots.wait();

{

std::lock_guard<std::mutex> lock(m_cs);

new (m_data + m_pushIndex) T (item);

m_pushIndex = ++m_pushIndex % m_size;

++m_count;

}

m_fullSlots.post();

}

template<typename Q = T>

typename std::enable_if<

std::is_copy_constructible<Q>::value and not

std::is_nothrow_copy_constructible<Q>::value, void>::type

push(const T& item)

{

m_openSlots.wait();

{

std::lock_guard<std::mutex> lock(m_cs);

try

{

new (m_data + m_pushIndex) T (item);

}

catch (...)

{

m_openSlots.post();

throw;

}

m_pushIndex = ++m_pushIndex % m_size;

++m_count;

}

m_fullSlots.post();

}

template<typename Q = T>

typename std::enable_if<

std::is_move_constructible<Q>::value and

std::is_nothrow_move_constructible<Q>::value, void>::type

push(T&& item) noexcept

{

m_openSlots.wait();

{

std::lock_guard<std::mutex> lock(m_cs);

new (m_data + m_pushIndex) T (std::move(item));

m_pushIndex = ++m_pushIndex % m_size;

++m_count;

}

m_fullSlots.post();

}

template<typename Q = T>

typename std::enable_if<

std::is_move_constructible<Q>::value and not

std::is_nothrow_move_constructible<Q>::value, void>::type

push(T&& item)

{

m_openSlots.wait();

{

std::lock_guard<std::mutex> lock(m_cs);

try

{

new (m_data + m_pushIndex) T (std::move(item));

}

catch (...)

{

m_openSlots.post();

throw;

}

m_pushIndex = ++m_pushIndex % m_size;

++m_count;

}

m_fullSlots.post();

}

template<typename Q = T>

typename std::enable_if<

not std::is_move_assignable<Q>::value and

std::is_nothrow_copy_assignable<Q>::value, void>::type

pop(T& item) noexcept

{

m_fullSlots.wait();

{

std::lock_guard<std::mutex> lock(m_cs);

item = m_data[m_popIndex];

m_data[m_popIndex].~T();

m_popIndex = ++m_popIndex % m_size;

--m_count;

}

m_openSlots.post();

}

template<typename Q = T>

typename std::enable_if<

not std::is_move_assignable<Q>::value and not

std::is_nothrow_copy_assignable<Q>::value, void>::type

pop(T& item)

{

m_fullSlots.wait();

{

std::lock_guard<std::mutex> lock(m_cs);

try

{

item = m_data[m_popIndex];

}

catch (...)

{

m_fullSlots.post();

throw;

}

m_data[m_popIndex].~T();

m_popIndex = ++m_popIndex % m_size;

--m_count;

}

m_openSlots.post();

}

template<typename Q = T>

typename std::enable_if<

std::is_move_assignable<Q>::value and

std::is_nothrow_move_assignable<Q>::value, void>::type

pop(T& item) noexcept

{

m_fullSlots.wait();

{

std::lock_guard<std::mutex> lock(m_cs);

item = std::move(m_data[m_popIndex]);

m_data[m_popIndex].~T();

m_popIndex = ++m_popIndex % m_size;

--m_count;

}

m_openSlots.post();

}

template<typename Q = T>

typename std::enable_if<

std::is_move_assignable<Q>::value and not

std::is_nothrow_move_assignable<Q>::value, void>::type

pop(T& item)

{

m_fullSlots.wait();

{

std::lock_guard<std::mutex> lock(m_cs);

try

{

item = std::move(m_data[m_popIndex]);

}

catch (...)

{

m_fullSlots.post();

throw;

}

m_data[m_popIndex].~T();

m_popIndex = ++m_popIndex % m_size;

--m_count;

}

m_openSlots.post();

}

T pop() noexcept(std::is_nothrow_invocable_r<void, decltype(&blocking_queue<T>::pop<T>), T&>::value)

{

T item;

pop(item);

return item;

}

bool empty() const noexcept

{

std::lock_guard<std::mutex> lock(m_cs);

return m_count == 0;

}

bool size() const noexcept

{

std::lock_guard<std::mutex> lock(m_cs);

return m_count;

}

unsigned int max_size() const noexcept

{

return m_size;

}

private:

const unsigned int m_size;

unsigned int m_pushIndex;

unsigned int m_popIndex;

unsigned int m_count;

T* m_data;

fast_semaphore m_openSlots;

fast_semaphore m_fullSlots;

std::mutex m_cs;

};

Event objects

Posted on February 8, 2019March 28, 2021 by Martin

No, not the type where you subscribe to an event using an interface or a callback. The type where you wait on an event to be signaled, just like Windows Event Objects.
Thanks to many suggestions from the kind people on Reddit’s r/cpp I came up with 2 classes: manual_event that can be waited on, and when signaled stays signaled until it’s reset, unblocking all waiting threads. And auto_event that resets to non-signaled state when signaled, and unblocks only one waiting thread.
The below implementation is far from final and I am open to any suggestions from people experienced in multi-threaded programming.

The usage example:

#include <iostream>
#include <thread>
#include <mutex>
#include "event.h"
using namespace std;

mutex cout_lock;
#define trace(x) { scoped_lock<mutex> lock(cout_lock); cout << x << endl; }

const int COUNT = 3;

void manual()
{
    manual_event e;
    
    for(int i = 0; i < COUNT; ++i)
        thread([&](){
            trace("manual " << this_thread::get_id() << " blocked");
            e.wait();
            trace("manual " << this_thread::get_id() << " unblocked");
        }).detach();
    
    this_thread::sleep_for(500ms);
    e.signal();
    this_thread::sleep_for(500ms);
    e.reset();
    
    for(int i = 0; i < COUNT; ++i)
        thread([&](){
            trace("manual " << this_thread::get_id() << " blocked");
            e.wait();
            trace("manual " << this_thread::get_id() << " unblocked");
        }).detach();
    
    this_thread::sleep_for(500ms);
    e.signal();
    this_thread::sleep_for(500ms);
}

void automatic()
{
    auto_event e;

    for(int i = 0; i < COUNT; ++i)
        thread([&](){
            trace("auto " << this_thread::get_id() << " blocked");
            e.wait();
            trace("auto " << this_thread::get_id() << " unblocked");
        }).detach();

    for(int i = 0; i < COUNT; ++i)
    {
        this_thread::sleep_for(500ms);
        e.signal();
    }

    this_thread::sleep_for(500ms);
}

int main(int argc, char** argv)
{
    manual();
    automatic();
    return 1;
}

#include <iostream>

#include <thread>

#include <mutex>

#include "event.h"

using namespace std;

mutex cout_lock;

#define trace(x) { scoped_lock<mutex> lock(cout_lock); cout << x << endl; }

const int COUNT = 3;

void manual()

{

manual_event e;

for(int i = 0; i < COUNT; ++i)

thread([&](){

trace("manual " << this_thread::get_id() << " blocked");

e.wait();

trace("manual " << this_thread::get_id() << " unblocked");

}).detach();

this_thread::sleep_for(500ms);

e.signal();

this_thread::sleep_for(500ms);

e.reset();

for(int i = 0; i < COUNT; ++i)

thread([&](){

trace("manual " << this_thread::get_id() << " blocked");

e.wait();

trace("manual " << this_thread::get_id() << " unblocked");

}).detach();

this_thread::sleep_for(500ms);

e.signal();

this_thread::sleep_for(500ms);

}

void automatic()

{

auto_event e;

for(int i = 0; i < COUNT; ++i)

thread([&](){

trace("auto " << this_thread::get_id() << " blocked");

e.wait();

trace("auto " << this_thread::get_id() << " unblocked");

}).detach();

for(int i = 0; i < COUNT; ++i)

{

this_thread::sleep_for(500ms);

e.signal();

}

this_thread::sleep_for(500ms);

}

int main(int argc, char** argv)

{

manual();

automatic();

return 1;

}

The event.h:

#pragma once

#include <mutex>
#include <condition_variable>

class manual_event
{
public:
    explicit manual_event(bool signaled = false) noexcept
    : m_signaled(signaled) {}

    void signal() noexcept
    {
        std::unique_lock<std::mutex> lock(m_mutex);
        m_signaled = true;
        m_cv.notify_all();
    }

    void wait() noexcept
    {
        std::unique_lock<std::mutex> lock(m_mutex);
        m_cv.wait(lock, [&](){ return m_signaled != false; });
    }

    void reset() noexcept
    {
        std::unique_lock<std::mutex> lock(m_mutex);
        m_signaled = false;
    }

private:
    bool m_signaled = false;
    std::mutex m_mutex;
    std::condition_variable m_cv;
};

class auto_event
{
public:
    explicit auto_event(bool signaled = false) noexcept
    : m_signaled(signaled) {}

    void signal() noexcept
    {
        std::unique_lock<std::mutex> lock(m_mutex);
        m_signaled = true;
        m_cv.notify_one();
    }

    void wait() noexcept
    {
        std::unique_lock<std::mutex> lock(m_mutex);
        m_cv.wait(lock, [&](){ return m_signaled != false; });
        m_signaled = false;
    }

private:
    bool m_signaled = false;
    std::mutex m_mutex;
    std::condition_variable m_cv;
};

#pragma once

#include <mutex>

#include <condition_variable>

class manual_event

{

public:

explicit manual_event(bool signaled = false) noexcept

: m_signaled(signaled) {}

void signal() noexcept

{

std::unique_lock<std::mutex> lock(m_mutex);

m_signaled = true;

m_cv.notify_all();

}

void wait() noexcept

{

std::unique_lock<std::mutex> lock(m_mutex);

m_cv.wait(lock, [&](){ return m_signaled != false; });

}

void reset() noexcept

{

std::unique_lock<std::mutex> lock(m_mutex);

m_signaled = false;

}

private:

bool m_signaled = false;

std::mutex m_mutex;

std::condition_variable m_cv;

};

class auto_event

{

public:

explicit auto_event(bool signaled = false) noexcept

: m_signaled(signaled) {}

void signal() noexcept

{

std::unique_lock<std::mutex> lock(m_mutex);

m_signaled = true;

m_cv.notify_one();

}

void wait() noexcept

{

std::unique_lock<std::mutex> lock(m_mutex);

m_cv.wait(lock, [&](){ return m_signaled != false; });

m_signaled = false;

}

private:

bool m_signaled = false;

std::mutex m_mutex;

std::condition_variable m_cv;

};

Class mechanics

Posted on February 7, 2019March 28, 2021 by Martin

A friend asked me to make a post about the mechanics of virtual functions in C++. I thought about it for a few days and decided to write a broader post about several topics dealing with classes and what happens under the hood when we use member functions, inheritance, and virtual functions.

Member functions. You can think of member functions as no different than static members, or functions defined outside the class, except the compiler adds a hidden first parameter this. That’s how a member function is able to operate on an instance of a class. So when you write this:

class C
{
public:
    void MemFun(int arg) {}
};

C c;
c.MemFun(1);

class C

{

public:

void MemFun(int arg) {}

};

C c;

c.MemFun(1);

What you are really getting is this:

class C {};
void C_MemFun(C* _this_, int arg) {}

C c;
C_MemFun(&c, 1);

class C {};

void C_MemFun(C* _this_, int arg) {}

C c;

C_MemFun(&c, 1);

A compiler generated C_MemFun with extra parameter of type C*.

Inheritance. Here I want to briefly mention the order in which constructors and destructors are called: if D derives from B and you create an instance of D, the constructors will be executed in order: B’s first, D’s second. So when the control enters D’s constructor, B part of the class has been initialized already. The opposite is true for destructors. When you delete an instance of D, the destructors will be executed in order: D’s first, B’s second. The code at the end of this post will demonstrate it. BTW I’m skipping over virtual and pure virtual destructors in this post since that’s a topic that could easily become its own blog post 😉

Virtual functions. Several things happen when we declare a virtual function: for each class the compiler creates a hidden table called virtual function table. Pointers to virtual functions are stored in this table. Next each instance of a class gets a hidden member variable called virtual function table pointer that, as the name implies, points at the virtual function table for that particular class. So an instance of B has a hidden member pointing at B’s virtual function table, and an instance of D… ditto. When you create virtual functions the compiler generates the following dispatch code:

template<typename T, typename... A>
void VirtualDispatch(T* _this_, int VirtualFuncNum, A&&... args)
{
    _this_->VirtualTablePtr[VirtualFuncNum](_this_, forward<A>(args)...);
}

template<typename T, typename... A>

void VirtualDispatch(T* _this_, int VirtualFuncNum, A&&... args)

{

_this_->VirtualTablePtr[VirtualFuncNum](_this_, forward<A>(args)...);

}

For an instance of type T called _this_, it does a lookup in the VirtualTablePtr for virtual function number VirtualFuncNum, and calls it with _this_as the first argument plus whatever extra parameters it accepts. Depending on the type of _this_, VirtualTablePtr will point at a different virtual function table and that’s more or less how we get runtime polymorphism in C++ 🙂

Complete listing:

#include <iostream>
#include <utility>
#include <cstdlib>
using namespace std;

#define VIRTUAL_FUNCTION_1 0
#define VIRTUAL_FUNCTION_2 1

typedef void(*VirtualFunctionPtr)(void*, int arg);

struct Base
{
    static void BaseConstructor(void* _this_)
    {
        ((Base*)_this_)->VirtualTablePtr = BaseVirtualTable;
        ((Base*)_this_)->Member1 = rand() % 100;
        cout << "BaseConstructor(" << _this_ <<
            ", Member1 = " << ((Base*)_this_)->Member1 << ")" << endl;
    }
    
    static void BaseDestructor(void* _this_)
    {
        cout << "BaseDestructor(" << _this_ << ")" << endl;
    }

    static void BaseVirtualFunction_1(void* _this_, int arg)
    {
        cout << "Base(Member1 = " << ((Base*)_this_)->Member1 <<
            ")::BaseVirtualFunction_1(" << arg << ")" << endl;
    }
    
    static void BaseVirtualFunction_2(void* _this_, int arg)
    {
        cout << "Base(Member1 = " << ((Base*)_this_)->Member1 <<
            ")::BaseVirtualFunction_2(" << arg << ")"  << endl;
    }

    VirtualFunctionPtr* VirtualTablePtr;
    int Member1;
    static VirtualFunctionPtr BaseVirtualTable[3];
};

VirtualFunctionPtr Base::BaseVirtualTable[3] =
{
    &Base::BaseVirtualFunction_1,
    &Base::BaseVirtualFunction_2
};

struct Derived
{
    static void DerivedConstructor(void* _this_)
    {
        Base::BaseConstructor(_this_);
        ((Derived*)_this_)->VirtualTablePtr = DerivedVirtualTable;
        ((Derived*)_this_)->Member2 = rand() % 100;
        cout << "DerivedConstructor(" << _this_ <<
            ", Member2 = " << ((Derived*)_this_)->Member2 << ")" << endl;
    }
    
    static void DerivedDestructor(void* _this_)
    {
        cout << "DerivedDestructor(" << _this_ << ")" << endl;
        Base::BaseDestructor(_this_);
    }

    static void DerivedVirtualFunction_1(void* _this_, int arg)
    {
        cout << "Derived(Member1 = " << ((Base*)_this_)->Member1 <<
            ", Member2 = " << ((Derived*)_this_)->Member2 <<
            ")::DerivedVirtualFunction_1(" << arg << ")"  << endl;
    }
    
    static void DerivedVirtualFunction_2(void* _this_, int arg)
    {
        cout << "Derived(Member1 = " << ((Base*)_this_)->Member1 <<
            ", Member2 = " << ((Derived*)_this_)->Member2 <<
            ")::DerivedVirtualFunction_2(" << arg << ")"  << endl;
    }

    VirtualFunctionPtr* VirtualTablePtr;
    int __Space_for_Base_Member1__;
    int Member2;
    static VirtualFunctionPtr DerivedVirtualTable[3];
};

VirtualFunctionPtr Derived::DerivedVirtualTable[3] =
{
    &Derived::DerivedVirtualFunction_1,
    &Derived::DerivedVirtualFunction_2
};

template<typename T, typename... A>
void VirtualDispatch(T* _this_, int VirtualFuncNum, A&&... args)
{
    _this_->VirtualTablePtr[VirtualFuncNum](_this_, forward<A>(args)...);
}

int main(int argc, char** argv)
{
    srand((unsigned int)time(NULL));

    cout << "===> Base start <===" << endl;
    Base* base = (Base*)operator new(sizeof(Base));
    Base::BaseConstructor(base);

    VirtualDispatch(base, VIRTUAL_FUNCTION_1, rand() % 100);
    VirtualDispatch(base, VIRTUAL_FUNCTION_2, rand() % 100);

    Base::BaseDestructor(base);
    operator delete(base);
    cout << "===> Base end <===" << endl << endl;

    cout << "===> Derived start <===" << endl;
    Base* derived = (Base*)operator new(sizeof(Derived));
    Derived::DerivedConstructor(derived);

    VirtualDispatch(derived, VIRTUAL_FUNCTION_1, rand() % 100);
    VirtualDispatch(derived, VIRTUAL_FUNCTION_2, rand() % 100);

    Derived::DerivedDestructor(derived);
    operator delete(derived);
    cout << "===> Derived end <===" << endl << endl;

    return 1;
}

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

#include <iostream>

#include <utility>

#include <cstdlib>

using namespace std;

#define VIRTUAL_FUNCTION_1 0

#define VIRTUAL_FUNCTION_2 1

typedef void(*VirtualFunctionPtr)(void*, int arg);

struct Base

{

static void BaseConstructor(void* _this_)

{

((Base*)_this_)->VirtualTablePtr = BaseVirtualTable;

((Base*)_this_)->Member1 = rand() % 100;

cout << "BaseConstructor(" << _this_ <<

", Member1 = " << ((Base*)_this_)->Member1 << ")" << endl;

}

static void BaseDestructor(void* _this_)

{

cout << "BaseDestructor(" << _this_ << ")" << endl;

}

static void BaseVirtualFunction_1(void* _this_, int arg)

{

cout << "Base(Member1 = " << ((Base*)_this_)->Member1 <<

")::BaseVirtualFunction_1(" << arg << ")" << endl;

}

static void BaseVirtualFunction_2(void* _this_, int arg)

{

cout << "Base(Member1 = " << ((Base*)_this_)->Member1 <<

")::BaseVirtualFunction_2(" << arg << ")" << endl;

}

VirtualFunctionPtr* VirtualTablePtr;

int Member1;

static VirtualFunctionPtr BaseVirtualTable[3];

};

VirtualFunctionPtr Base::BaseVirtualTable[3] =

{

&Base::BaseVirtualFunction_1,

&Base::BaseVirtualFunction_2

};

struct Derived

{

static void DerivedConstructor(void* _this_)

{

Base::BaseConstructor(_this_);

((Derived*)_this_)->VirtualTablePtr = DerivedVirtualTable;

((Derived*)_this_)->Member2 = rand() % 100;

cout << "DerivedConstructor(" << _this_ <<

", Member2 = " << ((Derived*)_this_)->Member2 << ")" << endl;

}

static void DerivedDestructor(void* _this_)

{

cout << "DerivedDestructor(" << _this_ << ")" << endl;

Base::BaseDestructor(_this_);

}

static void DerivedVirtualFunction_1(void* _this_, int arg)

{

cout << "Derived(Member1 = " << ((Base*)_this_)->Member1 <<

", Member2 = " << ((Derived*)_this_)->Member2 <<

")::DerivedVirtualFunction_1(" << arg << ")" << endl;

}

static void DerivedVirtualFunction_2(void* _this_, int arg)

{

cout << "Derived(Member1 = " << ((Base*)_this_)->Member1 <<

", Member2 = " << ((Derived*)_this_)->Member2 <<

")::DerivedVirtualFunction_2(" << arg << ")" << endl;

}

VirtualFunctionPtr* VirtualTablePtr;

int __Space_for_Base_Member1__;

int Member2;

static VirtualFunctionPtr DerivedVirtualTable[3];

};

VirtualFunctionPtr Derived::DerivedVirtualTable[3] =

{

&Derived::DerivedVirtualFunction_1,

&Derived::DerivedVirtualFunction_2

};

template<typename T, typename... A>

void VirtualDispatch(T* _this_, int VirtualFuncNum, A&&... args)

{

_this_->VirtualTablePtr[VirtualFuncNum](_this_, forward<A>(args)...);

}

int main(int argc, char** argv)

{

srand((unsigned int)time(NULL));

cout << "===> Base start <===" << endl;

Base* base = (Base*)operator new(sizeof(Base));

Base::BaseConstructor(base);

VirtualDispatch(base, VIRTUAL_FUNCTION_1, rand() % 100);

VirtualDispatch(base, VIRTUAL_FUNCTION_2, rand() % 100);

Base::BaseDestructor(base);

operator delete(base);

cout << "===> Base end <===" << endl << endl;

cout << "===> Derived start <===" << endl;

Base* derived = (Base*)operator new(sizeof(Derived));

Derived::DerivedConstructor(derived);

VirtualDispatch(derived, VIRTUAL_FUNCTION_1, rand() % 100);

VirtualDispatch(derived, VIRTUAL_FUNCTION_2, rand() % 100);

Derived::DerivedDestructor(derived);

operator delete(derived);

cout << "===> Derived end <===" << endl << endl;

return 1;

}

===> Base start <===
BaseConstructor(0x1005845d0, Member1 = 29)
Base(Member1 = 29)::BaseVirtualFunction_1(59)
Base(Member1 = 29)::BaseVirtualFunction_2(52)
BaseDestructor(0x1005845d0)
===> Base end <===

===> Derived start <===
BaseConstructor(0x10060b6a0, Member1 = 52)
DerivedConstructor(0x10060b6a0, Member2 = 80)
Derived(Member1 = 52, Member2 = 80)::DerivedVirtualFunction_1(40)
Derived(Member1 = 52, Member2 = 80)::DerivedVirtualFunction_2(79)
DerivedDestructor(0x10060b6a0)
BaseDestructor(0x10060b6a0)
===> Derived end <===
Program output.

Refactoring std::auto_ptr

Posted on February 5, 2019March 28, 2021 by Martin

A colleague at work was recently tasked with refactoring some legacy code and came across the good old std::auto_ptr. In C++0x it has been deprecated (and later removed outright; I can’t even compile std::auto_ptr code in Visual Studio 2017 nor Xcode 10.1) so it was his job to rewrite it in the modern C++ way. The code that my colleague came across had another problem. Can you spot it?

std::auto_ptr<T> p1(new T[3]);

1	std::auto_ptr<T> p1(new T[3]);

std::auto_ptr was never meant to hold pointers to arrays. So the code above, assuming it didn’t crash, was leaking memory (regardless of what it holds, std::auto_ptr always calls delete in its destructor; delete[] was needed in the code above).

So how do we refactor legacy std::auto_ptr code into modern C++? The answer is std::unique_ptr(https://en.cppreference.com/w/cpp/memory/unique_ptr). A std::auto_ptr holding a pointer to a single object of type T, in modern C++, becomes:

auto p2 = std::make_unique<T>();

1	auto p2 = std::make_unique<T>();

std::make_unique can forward constructor parameters to T::T, like this:

auto p3 = std::make_unique<T>("string parameter");

1	auto p3 = std::make_unique<T>("string parameter");

And an array of T’s of size N becomes:

auto p4 = std::make_unique<T[]>(N);

1	auto p4 = std::make_unique<T[]>(N);

Note that for std::make_unique to be able to create an array of T’s, T must have a T::T() (default constructor; or a constructor with all parameters having default values: T::T(int x = 0)).

Fast semaphore

Posted on February 5, 2019March 28, 2021 by Martin

I received an interesting piece of code during my recent discussion about the blocking queue: a fast semaphore implementation. The fast_semaphore class uses a regular C++ semaphore as fallback for waiting and unblocking the thread(s) while doing its magic using an atomic variable and memory fences of the new C++ memory model (see here and here).

I present to you, my shortened version of, Fast-Semaphore by Joe Seigh; C++ Implementation by Chris Thomasson:

#pragma once

#include <mutex>
#include <atomic>
#include <condition_variable>
#include <cassert>

class semaphore
{
public:
    semaphore(int count) noexcept
    : m_count(count) { assert(count > -1); }

    void post() noexcept
    {
        {
            std::unique_lock<std::mutex> lock(m_mutex);
            ++m_count;
        }
        m_cv.notify_one();
    }

    void wait() noexcept
    {
        std::unique_lock<std::mutex> lock(m_mutex);
        m_cv.wait(lock, [&]() { return m_count != 0; });
        --m_count;
    }

private:
    int m_count;
    std::mutex m_mutex;
    std::condition_variable m_cv;
};

class fast_semaphore
{
public:
    fast_semaphore(int count) noexcept
    : m_count(count), m_semaphore(0) {}

    void post()
    {
        std::atomic_thread_fence(std::memory_order_release);
        int count = m_count.fetch_add(1, std::memory_order_relaxed);
        if (count < 0)
            m_semaphore.post();
    }

    void wait()
    {
        int count = m_count.fetch_sub(1, std::memory_order_relaxed);
        if (count < 1)
            m_semaphore.wait();
        std::atomic_thread_fence(std::memory_order_acquire);
    }

private:
    std::atomic<int> m_count;
    semaphore m_semaphore;
};

#pragma once

#include <mutex>

#include <atomic>

#include <condition_variable>

#include <cassert>

class semaphore

{

public:

semaphore(int count) noexcept

: m_count(count) { assert(count > -1); }

void post() noexcept

{

std::unique_lock<std::mutex> lock(m_mutex);

++m_count;

}

m_cv.notify_one();

}

void wait() noexcept

{

std::unique_lock<std::mutex> lock(m_mutex);

m_cv.wait(lock, [&]() { return m_count != 0; });

--m_count;

}

private:

int m_count;

std::mutex m_mutex;

std::condition_variable m_cv;

};

class fast_semaphore

{

public:

fast_semaphore(int count) noexcept

: m_count(count), m_semaphore(0) {}

void post()

{

std::atomic_thread_fence(std::memory_order_release);

int count = m_count.fetch_add(1, std::memory_order_relaxed);

if (count < 0)

m_semaphore.post();

}

void wait()

{

int count = m_count.fetch_sub(1, std::memory_order_relaxed);

if (count < 1)

m_semaphore.wait();

std::atomic_thread_fence(std::memory_order_acquire);

}

private:

std::atomic<int> m_count;

semaphore m_semaphore;

};

Better blocking queue

Posted on February 3, 2019March 28, 2021 by Martin

In the previous post we discussed a multi-threaded blocking queue who’s implementation was lacking: it was not exception safe. It left the semaphore counts wrongly modified in the presence of exceptions emitted by T’s copy constructor or assignment operator.

Below is a corrected void push(const T item) method:

void push(const T& item)
{
	m_openSlots.wait();
	{
		std::lock_guard<std::mutex> lock(m_cs);
		try
		{
			new (m_data + m_pushIndex) T (item);
		}
		catch (...)
		{
			m_openSlots.post();
			throw;
		}
		m_pushIndex = ++m_pushIndex % m_size;
		++m_count;
	}
	m_fullSlots.post();
}

void push(const T& item)

{

m_openSlots.wait();

{

std::lock_guard<std::mutex> lock(m_cs);

try

{

new (m_data + m_pushIndex) T (item);

}

catch (...)

{

m_openSlots.post();

throw;

}

m_pushIndex = ++m_pushIndex % m_size;

++m_count;

}

m_fullSlots.post();

}

In the corrected version we first catch any exception possibly emitted by T’s copy constructor then adjust back the open-slot semaphore. Since the push operation failed the number of open and full slots has not changed. Finally we re-throw the exception.

Below is a corrected void pop(T item) method:

void pop(T& item)
{
	m_fullSlots.wait();
	{
		std::lock_guard<std::mutex> lock(m_cs);
		try
		{
			item = m_data[m_popIndex];
		}
		catch (...)
		{
			m_fullSlots.post();
			throw;
		}
		m_data[m_popIndex].~T();
		m_popIndex = ++m_popIndex % m_size;
		--m_count;
	}
	m_openSlots.post();
}

void pop(T& item)

{

m_fullSlots.wait();

{

std::lock_guard<std::mutex> lock(m_cs);

try

{

item = m_data[m_popIndex];

}

catch (...)

{

m_fullSlots.post();

throw;

}

m_data[m_popIndex].~T();

m_popIndex = ++m_popIndex % m_size;

--m_count;

}

m_openSlots.post();

}

In the corrected version we first catch any exception possibly emitted by T’s assignment operator then adjust back the full-slot semaphore. Since the pop operation failed the number of open and full slots has not changed. Finally we re-throw the exception.

We now have a robust queue template class that can handle misbehaving T’s 🙂

Blocking queue

Posted on February 3, 2019March 28, 2021 by Martin

Where produce-consumer pattern is present it is often the case that one is faster that the other: a parsing producer reads records faster than a processing consumer; a disk reading producer is faster than network sending consumer.
Producer and consumer often communicate by queues: the producer will put items on a queue while the consumer will pop items off a queue. What happens when the queue becomes full, or empty? One approach of the producer is to try to put an item on a queue and if it’s full yield the thread and repeat. Similarly the consumer can try to pop an item off a queue and if it’s empty, ditto. This approach of try-fail-yield can unnecessarily burn CPU cycles in tight loops that constantly try to put or pop items off a queue.
Another approach is to temporarily grow the queue, but that doesn’t scale well. When do we stop growing? And once we stop we have to fall back onto the try-fail-yield method.

What if we could implement a blocking queue: a queue who’s put operation blocks when the queue if full, and unblocks only when another thread pops an item off the queue. Similarly a queue who’s pop operation blocks when the queue is empty, and unblocks only when another thread puts an item on the queue. An example of using such a queue would look like this (notice a fast producer and slow consumer in the code below):

#include <iostream>
#include <mutex>
#include <thread>
#include "queue.h"
using namespace std;

const int COUNT = 10;

int main(int argc, char** argv)
{
    blocking_queue<int> q(5);
    mutex cout_lock;

    thread producer([&]() {
        for(int i = 1; i <= COUNT; ++i)
        {
            q.push(i);
            {
                scoped_lock<mutex> lock(cout_lock);
                cout << "push v = " << i << endl;
            }
        }
    });

    thread consumer([&]() {
        for(int i = 1; i <= COUNT; ++i)
        {
            this_thread::sleep_for(1s);
            int v;
            q.pop(v);
            {
                scoped_lock<mutex> lock(cout_lock);
                cout << "pop  v = " << v << endl;
            }
        }
    });

    producer.join();
    consumer.join();

    return 1;
}

#include <iostream>

#include <mutex>

#include <thread>

#include "queue.h"

using namespace std;

const int COUNT = 10;

int main(int argc, char** argv)

{

blocking_queue<int> q(5);

mutex cout_lock;

thread producer([&]() {

for(int i = 1; i <= COUNT; ++i)

{

q.push(i);

{

scoped_lock<mutex> lock(cout_lock);

cout << "push v = " << i << endl;

}

});

thread consumer([&]() {

for(int i = 1; i <= COUNT; ++i)

{

this_thread::sleep_for(1s);

int v;

q.pop(v);

{

scoped_lock<mutex> lock(cout_lock);

cout << "pop v = " << v << endl;

}

});

producer.join();

consumer.join();

return 1;

}

Notice no try-fail-yield code in the example above. The put operation of the fast producer simply blocks until the slow consumer makes more room on the queue. The output of the program is as we expect; the fast producer fills up the queue then blocks, a second later the consumer starts to slowly pop items of the queue; they go in tandem for a while until the producer exits and the consumer drains the queue:

push v = 1
push v = 2
push v = 3
push v = 4
push v = 5
pop v = 1
push v = 6
pop v = 2
push v = 7
pop v = 3
push v = 8
pop v = 4
push v = 9
pop v = 5
push v = 10
pop v = 6
pop v = 7
pop v = 8
pop v = 9
pop v = 10
Program ended with exit code: 1

The trick to implementing such a queue is the ability to count both open and full slots of the queue, and block. A semaphore is a perfect mechanism to do just that 🙂 In fact we need two semaphores: one to count the open slots, and another to count the full slots. The open-slot semaphore starts with a count equal to the size of the queue. The full-slot semaphore starts with a count of zero. A push operation waits on the open-slot semaphore and signals the full-slot semaphore. A pop operation waits on the full-slot semaphore and signals the open-slot semaphore.

The blocking queue implementation below uses Boost semaphores to count and std::mutex to protect the critical section. In the next post I will show how to make it safe in the presence of exceptions; currently it misbehaves if T’s copy constructor or assignment operator throw (it assumes T’s destructor will never throw).

queue.h

#pragma once

#include <mutex>
#include <boost/interprocess/sync/interprocess_semaphore.hpp>

template<typename T>
class blocking_queue
{
public:
	blocking_queue(unsigned int size)
	: m_size(size), m_pushIndex(0), m_popIndex(0), m_count(0),
	m_data((T*)operator new(size * sizeof(T))),
	m_openSlots(size), m_fullSlots(0) {}

	blocking_queue(const blocking_queue&) = delete;
	blocking_queue(blocking_queue&&) = delete;
	blocking_queue& operator = (const blocking_queue&) = delete;
	blocking_queue& operator = (blocking_queue&&) = delete;

	~blocking_queue()
	{
		while (m_count--)
		{
			m_data[m_popIndex].~T();
			m_popIndex = ++m_popIndex % m_size;
		}
		operator delete(m_data);
	}

	void push(const T& item)
	{
		m_openSlots.wait();
		{
			std::lock_guard<std::mutex> lock(m_cs);
			new (m_data + m_pushIndex) T (item);
			m_pushIndex = ++m_pushIndex % m_size;
            ++m_count;
		}
		m_fullSlots.post();
	}

	void pop(T& item)
	{
		m_fullSlots.wait();
		{
			std::lock_guard<std::mutex> lock(m_cs);
			item = m_data[m_popIndex];
			m_data[m_popIndex].~T();
			m_popIndex = ++m_popIndex % m_size;
            --m_count;
		}
		m_openSlots.post();
	}

    bool empty()
    {
        std::lock_guard<std::mutex> lock(m_cs);
        return m_count == 0;
    }

private:
	unsigned int m_size;
	unsigned int m_pushIndex;
	unsigned int m_popIndex;
    unsigned int m_count;
	T* m_data;

    boost::interprocess::interprocess_semaphore m_openSlots;
	boost::interprocess::interprocess_semaphore m_fullSlots;
    std::mutex m_cs;
};

#pragma once

#include <mutex>

#include <boost/interprocess/sync/interprocess_semaphore.hpp>

template<typename T>

class blocking_queue

{

public:

blocking_queue(unsigned int size)

: m_size(size), m_pushIndex(0), m_popIndex(0), m_count(0),

m_data((T*)operator new(size * sizeof(T))),

m_openSlots(size), m_fullSlots(0) {}

blocking_queue(const blocking_queue&) = delete;

blocking_queue(blocking_queue&&) = delete;

blocking_queue& operator = (const blocking_queue&) = delete;

blocking_queue& operator = (blocking_queue&&) = delete;

~blocking_queue()

{

while (m_count--)

{

m_data[m_popIndex].~T();

m_popIndex = ++m_popIndex % m_size;

}

operator delete(m_data);

}

void push(const T& item)

{

m_openSlots.wait();

{

std::lock_guard<std::mutex> lock(m_cs);

new (m_data + m_pushIndex) T (item);

m_pushIndex = ++m_pushIndex % m_size;

++m_count;

}

m_fullSlots.post();

}

void pop(T& item)

{

m_fullSlots.wait();

{

std::lock_guard<std::mutex> lock(m_cs);

item = m_data[m_popIndex];

m_data[m_popIndex].~T();

m_popIndex = ++m_popIndex % m_size;

--m_count;

}

m_openSlots.post();

}

bool empty()

{

std::lock_guard<std::mutex> lock(m_cs);

return m_count == 0;

}

private:

unsigned int m_size;

unsigned int m_pushIndex;

unsigned int m_popIndex;

unsigned int m_count;

T* m_data;

boost::interprocess::interprocess_semaphore m_openSlots;

boost::interprocess::interprocess_semaphore m_fullSlots;

std::mutex m_cs;

};

P.S. It was suggested to me that the use of boost::interprocess::interprocess_semaphore is a heavy-handed approach. I agree. I only used it to keep the example code small and uncluttered with more utility classes. In production you should have a lightweight semaphore class that uses a mutex and a condition variable. Like this 🙂 …

#pragma once

#include <mutex>
#include <condition_variable>

class semaphore
{
public:
    semaphore(unsigned int count) : m_count(count) {}
    semaphore(const semaphore&&) = delete;
    semaphore(semaphore&&) = delete;
    semaphore& operator = (const semaphore&) = delete;
    semaphore& operator = (semaphore&&) = delete;
    ~semaphore() = default;
    
    void post()
    {
        std::unique_lock<std::mutex> lock(m_mutex);
        ++m_count;
        m_cv.notify_one();
    }
    
    void wait()
    {
        std::unique_lock<std::mutex> lock(m_mutex);
        m_cv.wait(lock, [&]{ return m_count > 0; });
        --m_count;
    }

private:
    std::mutex m_mutex;
    std::condition_variable m_cv;
    unsigned int m_count;
};

#pragma once

#include <mutex>

#include <condition_variable>

class semaphore

{

public:

semaphore(unsigned int count) : m_count(count) {}

semaphore(const semaphore&&) = delete;

semaphore(semaphore&&) = delete;

semaphore& operator = (const semaphore&) = delete;

semaphore& operator = (semaphore&&) = delete;

~semaphore() = default;

void post()

{

std::unique_lock<std::mutex> lock(m_mutex);

++m_count;

m_cv.notify_one();

}

void wait()

{

std::unique_lock<std::mutex> lock(m_mutex);

m_cv.wait(lock, [&]{ return m_count > 0; });

--m_count;

}

private:

std::mutex m_mutex;

std::condition_variable m_cv;

unsigned int m_count;

};

RAII

Posted on February 2, 2019March 28, 2021 by Martin

I know the topic of RAII has been blogged about plenty of times before. Still, I want to present to you my take on it 🙂 Recently I created a policy-based generic RAII template for holding various types of resources (pointers, file handles, mutexes, etc). The nice thing about my implementation is that in order to acquire and release a new type of a resource you only have to define simple Acquire and Release policy template classes and plug them as parameters to the RAII template. Here’s how you can use it with std::mutex:

template<typename T> struct LockPolicy { static void Execute(T t) { t.lock(); } };
template<typename T> struct UnlockPolicy { static void Execute(T t) { t.unlock(); } };
template<typename T> using scope_lock = RAII<T&, LockPolicy, UnlockPolicy>;

std::mutex m;
{
    scope_lock<std::mutex> lock(m);
}

template<typename T> struct LockPolicy { static void Execute(T t) { t.lock(); } };

template<typename T> struct UnlockPolicy { static void Execute(T t) { t.unlock(); } };

template<typename T> using scope_lock = RAII<T&, LockPolicy, UnlockPolicy>;

std::mutex m;

{

scope_lock<std::mutex> lock(m);

}

In the code above we declare 2 policy classes: LockPolicy which calls .lock(); on type T, and UnlockPolicy which calls .unlock(); on type T. Next we declare scope_lock to be a template which will hold type T by reference and apply LockPolicy::Execute(T t); and UnlockPolicy::Execute(T t); in the constructor and destructor of RAII. This way we can use scope_lock with any object that has .lock(); and .unlock(); methods.

As an exercise in writing policy classes let’s use RAII template to hold and automatically delete or delete[] pointers and pointers to arrays:

template<typename T> struct NoOpPolicy { static void Execute(T) {} };

template<typename T> struct PointerReleasePolicy { static void Execute(T ptr) { delete ptr; } };
template<typename T> struct ArrayReleasePolicy { static void Execute(T ptr) { delete[] ptr; } };

template<typename T> using ptr_handle_t = RAII<T*, NoOpPolicy, PointerReleasePolicy>;
template<typename T> using arr_ptr_handle_t = RAII<T*, NoOpPolicy, ArrayReleasePolicy>;

{
    ptr_handle_t<int> p1 = new int;
    arr_ptr_handle_t<int> p2 = new int [2];
    
    *p1 = 0xDEADBEEF;
    p2[1] = 0x8BADF00D;
}

template<typename T> struct NoOpPolicy { static void Execute(T) {} };

template<typename T> struct PointerReleasePolicy { static void Execute(T ptr) { delete ptr; } };

template<typename T> struct ArrayReleasePolicy { static void Execute(T ptr) { delete[] ptr; } };

template<typename T> using ptr_handle_t = RAII<T*, NoOpPolicy, PointerReleasePolicy>;

template<typename T> using arr_ptr_handle_t = RAII<T*, NoOpPolicy, ArrayReleasePolicy>;

{

ptr_handle_t<int> p1 = new int;

arr_ptr_handle_t<int> p2 = new int [2];

*p1 = 0xDEADBEEF;

p2[1] = 0x8BADF00D;

}

First we need a policy that does nothing; let’s call it NoOpPolicy. That is because nothing needs to happen to a pointer in the constructor of RAII; it’s already allocated. Next we declare two policies: PointerReleasePolicy which calls delete on type T, and ArrayReleasePolicy which calls delete[] on type T. Finally we define ptr_handle_t to be a RAII template which holds a pointer to T, applies NoOpPolicy::Execute(T t); in its constructor and PointerReleasePolicy::Execute(T t); in its destructor. We do the same for arr_ptr_handle_t except using ArrayReleasePolicy.

Complete listing:

#include <mutex>

template<
    typename T,
    template<typename> typename AcquirePolicy,
    template<typename> typename ReleasePolicy
>
class RAII
{
public:
    typedef T  val_type;
    typedef T& ref_type;
    
    RAII(val_type h) : m_handle(h) { AcquirePolicy<T>::Execute(m_handle); }
    RAII(const RAII&) = delete;
    RAII(RAII&&) = delete;
    RAII& operator = (const RAII&) = delete;
    ~RAII() { ReleasePolicy<T>::Execute(m_handle); }
    
    constexpr operator ref_type () { return m_handle; }
    constexpr operator ref_type () const { return m_handle; }
    
private:
    val_type m_handle;
};

template<typename T> struct LockPolicy { static void Execute(T t) { t.lock(); } };
template<typename T> struct UnlockPolicy { static void Execute(T t) { t.unlock(); } };
template<typename T> using scope_lock = RAII<T&, LockPolicy, UnlockPolicy>;

template<typename T> struct NoOpPolicy { static void Execute(T) {} };

template<typename T> struct PointerReleasePolicy { static void Execute(T ptr) { delete ptr; } };
template<typename T> struct ArrayReleasePolicy { static void Execute(T ptr) { delete[] ptr; } };

template<typename T> using arr_ptr_handle_t = RAII<T*, NoOpPolicy, ArrayReleasePolicy>;
template<typename T> using ptr_handle_t = RAII<T*, NoOpPolicy, PointerReleasePolicy>;

int main(int argc, char** argv)
{
    
    std::mutex m;
    scope_lock<std::mutex> lock(m);
 
    ptr_handle_t<int> p1 = new int;
    arr_ptr_handle_t<int> p2 = new int [2];
     
    *p1 = 0xDEADBEEF;
    p2[1] = 0x8BADF00D;

    return 1;
}

#include <mutex>

template<

typename T,

template<typename> typename AcquirePolicy,

template<typename> typename ReleasePolicy

class RAII

{

public:

typedef T val_type;

typedef T& ref_type;

RAII(val_type h) : m_handle(h) { AcquirePolicy<T>::Execute(m_handle); }

RAII(const RAII&) = delete;

RAII(RAII&&) = delete;

RAII& operator = (const RAII&) = delete;

~RAII() { ReleasePolicy<T>::Execute(m_handle); }

constexpr operator ref_type () { return m_handle; }

constexpr operator ref_type () const { return m_handle; }

private:

val_type m_handle;