Instead of using a semaphore (at least directly) or having main explicitly wake up a thread to get some work done, I've always used a thread-safe queue. When main wants a worker thread to do something, it pushes a description of the job to be done onto the queue. The worker threads each just do a job, then try to pop another job from the queue, and end up suspended until there's a job in the queue for them to do:
The code for the queue looks like this:
#ifndef QUEUE_H_INCLUDED
#define QUEUE_H_INCLUDED
#include <windows.h>
template<class T, unsigned max = 256>
class queue {
HANDLE space_avail; // at least one slot empty
HANDLE data_avail; // at least one slot full
CRITICAL_SECTION mutex; // protect buffer, in_pos, out_pos
T buffer[max];
long in_pos, out_pos;
public:
queue() : in_pos(0), out_pos(0) {
space_avail = CreateSemaphore(NULL, max, max, NULL);
data_avail = CreateSemaphore(NULL, 0, max, NULL);
InitializeCriticalSection(&mutex);
}
void push(T data) {
WaitForSingleObject(space_avail, INFINITE);
EnterCriticalSection(&mutex);
buffer[in_pos] = data;
in_pos = (in_pos + 1) % max;
LeaveCriticalSection(&mutex);
ReleaseSemaphore(data_avail, 1, NULL);
}
T pop() {
WaitForSingleObject(data_avail,INFINITE);
EnterCriticalSection(&mutex);
T retval = buffer[out_pos];
out_pos = (out_pos + 1) % max;
LeaveCriticalSection(&mutex);
ReleaseSemaphore(space_avail, 1, NULL);
return retval;
}
~queue() {
DeleteCriticalSection(&mutex);
CloseHandle(data_avail);
CloseHandle(space_avail);
}
};
#endif
And a rough equivalent of your code in the threads to use it looks something like this. I didn't sort out exactly what your thread function was doing, but it was something with summing square roots, and apparently you're more interested in the thread synch than what the threads actually do, for the moment.
Edit: (based on comment):
If you need main()
to wait for some tasks to finish, do some more work, then assign more tasks, it's generally best to handle that by putting an event (for example) into each task, and have your thread function set the events. Revised code to do that would look like this (note that the queue code isn't affected):
#include "queue.hpp"
#include <iostream>
#include <process.h>
#include <math.h>
#include <vector>
struct task {
int val;
HANDLE e;
task() : e(CreateEvent(NULL, 0, 0, NULL)) { }
task(int i) : val(i), e(CreateEvent(NULL, 0, 0, NULL)) {}
};
void process(void *p) {
queue<task> &q = *static_cast<queue<task> *>(p);
task t;
while ( -1 != (t=q.pop()).val) {
std::cout << t.val << "
";
SetEvent(t.e);
}
}
int main() {
queue<task> jobs;
enum { thread_count = 4 };
enum { task_count = 10 };
std::vector<HANDLE> threads;
std::vector<HANDLE> events;
std::cout << "Creating thread pool" << std::endl;
for (int t=0; t<thread_count; ++t)
threads.push_back((HANDLE)_beginthread(process, 0, &jobs));
std::cout << "Thread pool Waiting" << std::endl;
std::cout << "First round of tasks" << std::endl;
for (int i=0; i<task_count; ++i) {
task t(i+1);
events.push_back(t.e);
jobs.push(t);
}
WaitForMultipleObjects(events.size(), &events[0], TRUE, INFINITE);
events.clear();
std::cout << "Second round of tasks" << std::endl;
for (int i=0; i<task_count; ++i) {
task t(i+20);
events.push_back(t.e);
jobs.push(t);
}
WaitForMultipleObjects(events.size(), &events[0], true, INFINITE);
events.clear();
for (int j=0; j<thread_count; ++j)
jobs.push(-1);
WaitForMultipleObjects(threads.size(), &threads[0], TRUE, INFINITE);
return 0;
}