There is no need for threads to achieve your goal, all you need are small improvements to the isPrime()
function:
- test even numbers explicitly
- only test odd numbers in the loop
- stop the loop when
i > num / i
.
- return
0
for 0
and 1
Here is a modified version:
#include <stdio.h>
#include <stdlib.h>
int isPrime(int num) {
int i;
if ((num & 1) == 0)
return num == 2;
for (i = 3; i <= num / i; i += 2) {
if (num % i == 0)
return 0;
}
return num != 1;
}
int main(int argc, char *argv[]) {
if (argc != 3) {
printf("Too few arguments
");
printf("USAGE: ./primeCalc <prime pivot> <num of random numbers>
");
exit(0);
}
int randomPivot = atoi(argv[1]);
int numOfRandomNumbers = atoi(argv[2]);
long sum = 0;
long primeCounter = 0;
srand(randomPivot); //init rundom generator
for (int i = 0; i < numOfRandomNumbers; i++) {
int random = rand();
if (isPrime(random)) {
sum = sum + random;
primeCounter++;
}
}
printf("%ld,%ld
", sum, primeCounter);
return 0;
}
Output on my 2015 slow laptop:
~/dev/stackoverflow > time ./primeCalc 0 1000000
50915866465059,48687
real 0m4.151s
user 0m4.119s
sys 0m0.011s
To further improve the performance, we can precompute the prime numbers less or equal to the square root of RAND_MAX
, reducing the number of divisions for large prime numbers. Furthermore, the loop test can use a mutiplication as the prime factor candidates are all below square root of INT_MAX
:
#include <stdio.h>
#include <stdlib.h>
static int primes[4793]; /* There are 4792 primes <= sqrt(0x7fffffff) */
int isPrime(int num) {
if ((num & 1) == 0)
return num == 2;
for (int *p = primes + 1; *p && (*p) * (*p) <= num; p++) {
if (num % *p == 0)
return 0;
}
return num != 1;
}
void initPrimes(int max) {
int i = 0, p;
primes[i++] = 2;
for (p = 3; p <= max / p; p += 2) {
if (isPrime(p))
primes[i++] = p;
}
}
int main(int argc, char *argv[]) {
if (argc != 3) {
printf("Too few arguments
");
printf("USAGE: ./primeCalc <prime pivot> <num of random numbers>
");
exit(0);
}
int randomPivot = atoi(argv[1]);
int numOfRandomNumbers = atoi(argv[2]);
long sum = 0;
long primeCounter = 0;
initPrimes(RAND_MAX);
srand(randomPivot); //init rundom generator
for (int i = 0; i < numOfRandomNumbers; i++) {
int random = rand();
if (isPrime(random)) {
sum = sum + random;
primeCounter++;
}
}
printf("%ld,%ld
", sum, primeCounter);
return 0;
}
Output:
~/dev/stackoverflow > ./primeCalc1 0 1000000
50915866465059,48687
real 0m0.580s
user 0m0.566s
sys 0m0.004s
This result is consistent with your teacher's result, yet without threads.
You can further improve the timings using threads but there is a catch: for your test, you want to compute the same random numbers, which is not guaranteed if rand()
is called from different threads. You thus need to compute the random numbers in the main thread and store them in an array, then use separate threads separate parts of the array and combine the results. I suggest you store the arguments and results in a separate structure for each thread and pass a pointer to it as the opaque argument.
Here is an example:
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
static int primes[4793]; /* There are 4792 primes <= sqrt(0x7fffffff) */
int isPrime(int num) {
if ((num & 1) == 0)
return num == 2;
for (int *p = primes + 1; *p && (*p) * (*p) <= num; p++) {
if (num % *p == 0)
return 0;
}
return num != 1;
}
void initPrimes(int max) {
int i = 0, p;
primes[i++] = 2;
for (p = 3; p <= max / p; p += 2) {
if (isPrime(p))
primes[i++] = p;
}
}
struct primeCalcArgs {
pthread_t tid;
int *array;
int start, end;
long sum, count;
};
void *primeCalcTask(void *opaque) {
struct primeCalcArgs *p = opaque;
int *array = p->array;
p->sum = 0;
p->count = 0;
for (int i = p->start; i < p->end; i++) {
if (isPrime(array[i])) {
p->sum += array[i];
p->count++;
}
}
return NULL;
}
int main(int argc, char *argv[]) {
if (argc < 3) {
fprintf(stderr, "primeCalc: Too few arguments
");
fprintf(stderr, "usage: ./primeCalc <prime pivot> <num of random numbers> [<nb of threads>]
");
return 1;
}
int randomPivot = atoi(argv[1]);
int numOfRandomNumbers = atoi(argv[2]);
int numOfThreads = argc > 3 ? atoi(argv[3]) : 1;
long sum = 0;
long count = 0;
if (numOfThreads < 1)
numOfThreads = 1;
if (numOfRandomNumbers > 0) {
int i, n;
initPrimes(RAND_MAX);
int *array = malloc(sizeof(*array) * numOfRandomNumbers);
if (!array) {
fprintf(stderr, "primeCalc: cannot allocate random number array
");
return 1;
}
srand(randomPivot); //init rundom generator
for (i = 0; i < numOfRandomNumbers; i++) {
array[i] = rand();
}
struct primeCalcArgs args[numOfThreads];
for (n = 0; n < numOfThreads; n++) {
args[n].array = array;
args[n].start = (long long)numOfRandomNumbers * n / numOfThreads;
args[n].end = (long long)numOfRandomNumbers * (n + 1) / numOfThreads;
args[n].sum = 0;
args[n].count = 0;
if (pthread_create(&args[n].tid, NULL, primeCalcTask, &args[n])) {
fprintf(stderr, "primeCalc: cannot create thread %d
", n + 1);
break;
}
}
for (i = 0; i < n; i++) {
pthread_join(args[i].tid, NULL);
sum += args[i].sum;
count += args[i].count;
}
free(array);
}
printf("%ld,%ld
", sum, count);
return 0;
}
Output:
~/dev/stackoverflow > time ./primeCalc 0 1000000
50915866465059,48687
real 0m0.581s
user 0m0.565s
sys 0m0.007s
~/dev/stackoverflow > time ./primeCalc 0 1000000 2
50915866465059,48687
real 0m0.293s
user 0m0.552s
sys 0m0.006s
~/dev/stackoverflow > time ./primeCalc 0 1000000 3
50915866465059,48687
real 0m0.286s
user 0m0.739s
sys 0m0.007s
~/dev/stackoverflow > time ./primeCalc 0 1000000 4
50915866465059,48687
real 0m0.242s
user 0m0.846s
sys 0m0.006s
~/dev/stackoverflow > time ./primeCalc 0 1000000 5
50915866465059,48687
real 0m0.245s
user 0m0.845s
sys 0m0.008s
~/dev/stackoverflow > time ./primeCalc 0 1000000 10
50915866465059,48687
real 0m0.281s
user 0m0.858s
sys 0m0.008s
~/dev/stackoverflow > time ./primeCalc 0 1000000 100
50915866465059,48687
real 0m0.279s
user 0m0.846s
sys 0m0.012s
~/dev/stackoverflow > time ./primeCalc 0 1000000 10000
50915866465059,48687
real 0m1.274s
user 0m0.752s
sys 0m1.663s
As you can see, my laptop has full 2 cores: the real time is exactly half for 2 threads. It gets marginal improvements from its hyper-threading capability for 3 and 4 threads but no improvement for more than 4 threads. Performance actually degrades for extra threads and I am even surprised that requesting 10000 threads succeeds and still performs reasonably well.
For larger counts of random numbers, I would suggest using a sieve to compute a bitmap of all primes up to RAND_MAX
, adding a fixed overhead to the whole process. On my laptop, this sieve takes one or two seconds and makes the prime test immediate, leaving just the overhead of the random number generator.