I think I have found a way to clamp a double
into an integer in a standard-conforming fashion (this is not really what the question is about, but it helps a lot). First, we need to see why the obvious code is not correct.
// INCORRECT CODE
uint64_t double_to_uint64 (double x)
{
if (x < 0.0) {
return 0;
}
if (x > UINT64_MAX) {
return UINT64_MAX;
}
return x;
}
The problem here is that in the second comparison, UINT64_MAX
is being implicitly converted to double
. The C standard does not specify exactly how this conversion works, only that it is to be rounded up or down to a representable value. This means that the second comparison may be false, even if should mathematically be true (which can happen when UINT64_MAX
is rounded up, and 'x' is mathematically between UINT64_MAX
and (double)UINT64_MAX
). As such, the conversion of double
to uint64_t
can result in undefined behavior in that edge case.
Surprisingly, the solution is very simple. Consider that while UINT64_MAX
may not be exactly representable in a double
, UINT64_MAX+1
, being a power of two (and not too large), certainly is. So, if we first round the input to an integer, the comparison x > UINT64_MAX
is equivalent to x >= UINT64_MAX+1
, except for possible overflow in the addition. We can fix the overflow by using ldexp
instead of adding one to UINT64_MAX
. That being said, the following code should be correct.
/* Input: a double 'x', which must not be NaN.
* Output: If 'x' is lesser than zero, then zero;
* otherwise, if 'x' is greater than UINT64_MAX, then UINT64_MAX;
* otherwise, 'x', rounded down to an integer.
*/
uint64_t double_to_uint64 (double x)
{
assert(!isnan(x));
double y = floor(x);
if (y < 0.0) {
return 0;
}
if (y >= ldexp(1.0, 64)) {
return UINT64_MAX;
}
return y;
}
Now, to back to your question: is x
is exactly representable in an uint64_t
? Only if it was neither rounded nor clamped.
/* Input: a double 'x', which must not be NaN.
* Output: If 'x' is exactly representable in an uint64_t,
* then 1, otherwise 0.
*/
int double_representable_in_uint64 (double x)
{
assert(!isnan(x));
return (floor(x) == x && x >= 0.0 && x < ldexp(1.0, 64));
}
The same algorithm can be used for integers of different size, and also for signed integers with a minor modification. The code that follows does some very basic tests of the uint32_t
and uint64_t
versions (only false positives can possibly be caught), but is also suitable for manual examination of the edge cases.
#include <inttypes.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
#include <stdio.h>
uint32_t double_to_uint32 (double x)
{
assert(!isnan(x));
double y = floor(x);
if (y < 0.0) {
return 0;
}
if (y >= ldexp(1.0, 32)) {
return UINT32_MAX;
}
return y;
}
uint64_t double_to_uint64 (double x)
{
assert(!isnan(x));
double y = floor(x);
if (y < 0.0) {
return 0;
}
if (y >= ldexp(1.0, 64)) {
return UINT64_MAX;
}
return y;
}
int double_representable_in_uint32 (double x)
{
assert(!isnan(x));
return (floor(x) == x && x >= 0.0 && x < ldexp(1.0, 32));
}
int double_representable_in_uint64 (double x)
{
assert(!isnan(x));
return (floor(x) == x && x >= 0.0 && x < ldexp(1.0, 64));
}
int main ()
{
{
printf("Testing 32-bit
");
for (double x = 4294967295.999990; x < 4294967296.000017; x = nextafter(x, INFINITY)) {
uint32_t y = double_to_uint32(x);
int representable = double_representable_in_uint32(x);
printf("%f -> %" PRIu32 " representable=%d
", x, y, representable);
assert(!representable || (double)(uint32_t)x == x);
}
}
{
printf("Testing 64-bit
");
double x = ldexp(1.0, 64) - 40000.0;
for (double x = 18446744073709510656.0; x < 18446744073709629440.0; x = nextafter(x, INFINITY)) {
uint64_t y = double_to_uint64(x);
int representable = double_representable_in_uint64(x);
printf("%f -> %" PRIu64 " representable=%d
", x, y, representable);
assert(!representable || (double)(uint64_t)x == x);
}
}
}