diyfp.h Source File

diyfp.h Source File#

Composable Kernel: diyfp.h Source File
diyfp.h
Go to the documentation of this file.
1// Tencent is pleased to support the open source community by making RapidJSON available.
2//
3// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
4//
5// Licensed under the MIT License (the "License"); you may not use this file except
6// in compliance with the License. You may obtain a copy of the License at
7//
8// http://opensource.org/licenses/MIT
9//
10// Unless required by applicable law or agreed to in writing, software distributed
11// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13// specific language governing permissions and limitations under the License.
14
15// This is a C++ header-only implementation of Grisu2 algorithm from the publication:
16// Loitsch, Florian. "Printing floating-point numbers quickly and accurately with
17// integers." ACM Sigplan Notices 45.6 (2010): 233-243.
18
19#ifndef RAPIDJSON_DIYFP_H_
20#define RAPIDJSON_DIYFP_H_
21
22#include "../rapidjson.h"
23#include "clzll.h"
24#include <limits>
25
26#if defined(_MSC_VER) && defined(_M_AMD64) && !defined(__INTEL_COMPILER)
27#include <intrin.h>
28#if !defined(_ARM64EC_)
29#pragma intrinsic(_umul128)
30#else
31#pragma comment(lib, "softintrin")
32#endif
33#endif
34
36namespace internal {
37
38#ifdef __GNUC__
39RAPIDJSON_DIAG_PUSH
40RAPIDJSON_DIAG_OFF(effc++)
41#endif
42
43#ifdef __clang__
44RAPIDJSON_DIAG_PUSH
45RAPIDJSON_DIAG_OFF(padded)
46#endif
47
48struct DiyFp
49{
50 DiyFp() : f(), e() {}
51
52 DiyFp(uint64_t fp, int exp) : f(fp), e(exp) {}
53
54 explicit DiyFp(double d)
55 {
56 union
57 {
58 double d;
59 uint64_t u64;
60 } u = {d};
61
62 int biased_e = static_cast<int>((u.u64 & kDpExponentMask) >> kDpSignificandSize);
63 uint64_t significand = (u.u64 & kDpSignificandMask);
64 if(biased_e != 0)
65 {
66 f = significand + kDpHiddenBit;
67 e = biased_e - kDpExponentBias;
68 }
69 else
70 {
71 f = significand;
72 e = kDpMinExponent + 1;
73 }
74 }
75
76 DiyFp operator-(const DiyFp& rhs) const { return DiyFp(f - rhs.f, e); }
77
78 DiyFp operator*(const DiyFp& rhs) const
79 {
80#if defined(_MSC_VER) && defined(_M_AMD64)
81 uint64_t h;
82 uint64_t l = _umul128(f, rhs.f, &h);
83 if(l & (uint64_t(1) << 63)) // rounding
84 h++;
85 return DiyFp(h, e + rhs.e + 64);
86#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && \
87 defined(__x86_64__)
88 __extension__ typedef unsigned __int128 uint128;
89 uint128 p = static_cast<uint128>(f) * static_cast<uint128>(rhs.f);
90 uint64_t h = static_cast<uint64_t>(p >> 64);
91 uint64_t l = static_cast<uint64_t>(p);
92 if(l & (uint64_t(1) << 63)) // rounding
93 h++;
94 return DiyFp(h, e + rhs.e + 64);
95#else
96 const uint64_t M32 = 0xFFFFFFFF;
97 const uint64_t a = f >> 32;
98 const uint64_t b = f & M32;
99 const uint64_t c = rhs.f >> 32;
100 const uint64_t d = rhs.f & M32;
101 const uint64_t ac = a * c;
102 const uint64_t bc = b * c;
103 const uint64_t ad = a * d;
104 const uint64_t bd = b * d;
105 uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32);
106 tmp += 1U << 31;
107 return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64);
108#endif
109 }
110
112 {
113 int s = static_cast<int>(clzll(f));
114 return DiyFp(f << s, e - s);
115 }
116
118 {
119 DiyFp res = *this;
120 while(!(res.f & (kDpHiddenBit << 1)))
121 {
122 res.f <<= 1;
123 res.e--;
124 }
126 res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2);
127 return res;
128 }
129
130 void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const
131 {
132 DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary();
133 DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1);
134 mi.f <<= mi.e - pl.e;
135 mi.e = pl.e;
136 *plus = pl;
137 *minus = mi;
138 }
139
140 double ToDouble() const
141 {
142 union
143 {
144 double d;
145 uint64_t u64;
146 } u;
149 {
150 // Underflow.
151 return 0.0;
152 }
153 if(e >= kDpMaxExponent)
154 {
155 // Overflow.
156 return std::numeric_limits<double>::infinity();
157 }
158 const uint64_t be = (e == kDpDenormalExponent && (f & kDpHiddenBit) == 0)
159 ? 0
160 : static_cast<uint64_t>(e + kDpExponentBias);
161 u.u64 = (f & kDpSignificandMask) | (be << kDpSignificandSize);
162 return u.d;
163 }
164
165 static const int kDiySignificandSize = 64;
166 static const int kDpSignificandSize = 52;
167 static const int kDpExponentBias = 0x3FF + kDpSignificandSize;
168 static const int kDpMaxExponent = 0x7FF - kDpExponentBias;
169 static const int kDpMinExponent = -kDpExponentBias;
170 static const int kDpDenormalExponent = -kDpExponentBias + 1;
171 static const uint64_t kDpExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
172 static const uint64_t kDpSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
173 static const uint64_t kDpHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
174
176 int e;
177};
178
179inline DiyFp GetCachedPowerByIndex(size_t index)
180{
181 // 10^-348, 10^-340, ..., 10^340
182 static const uint64_t kCachedPowers_F[] = {
183 RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76),
184 RAPIDJSON_UINT64_C2(0x8b16fb20, 0x3055ac76), RAPIDJSON_UINT64_C2(0xcf42894a, 0x5dce35ea),
185 RAPIDJSON_UINT64_C2(0x9a6bb0aa, 0x55653b2d), RAPIDJSON_UINT64_C2(0xe61acf03, 0x3d1a45df),
186 RAPIDJSON_UINT64_C2(0xab70fe17, 0xc79ac6ca), RAPIDJSON_UINT64_C2(0xff77b1fc, 0xbebcdc4f),
187 RAPIDJSON_UINT64_C2(0xbe5691ef, 0x416bd60c), RAPIDJSON_UINT64_C2(0x8dd01fad, 0x907ffc3c),
188 RAPIDJSON_UINT64_C2(0xd3515c28, 0x31559a83), RAPIDJSON_UINT64_C2(0x9d71ac8f, 0xada6c9b5),
189 RAPIDJSON_UINT64_C2(0xea9c2277, 0x23ee8bcb), RAPIDJSON_UINT64_C2(0xaecc4991, 0x4078536d),
190 RAPIDJSON_UINT64_C2(0x823c1279, 0x5db6ce57), RAPIDJSON_UINT64_C2(0xc2109436, 0x4dfb5637),
191 RAPIDJSON_UINT64_C2(0x9096ea6f, 0x3848984f), RAPIDJSON_UINT64_C2(0xd77485cb, 0x25823ac7),
192 RAPIDJSON_UINT64_C2(0xa086cfcd, 0x97bf97f4), RAPIDJSON_UINT64_C2(0xef340a98, 0x172aace5),
193 RAPIDJSON_UINT64_C2(0xb23867fb, 0x2a35b28e), RAPIDJSON_UINT64_C2(0x84c8d4df, 0xd2c63f3b),
194 RAPIDJSON_UINT64_C2(0xc5dd4427, 0x1ad3cdba), RAPIDJSON_UINT64_C2(0x936b9fce, 0xbb25c996),
195 RAPIDJSON_UINT64_C2(0xdbac6c24, 0x7d62a584), RAPIDJSON_UINT64_C2(0xa3ab6658, 0x0d5fdaf6),
196 RAPIDJSON_UINT64_C2(0xf3e2f893, 0xdec3f126), RAPIDJSON_UINT64_C2(0xb5b5ada8, 0xaaff80b8),
197 RAPIDJSON_UINT64_C2(0x87625f05, 0x6c7c4a8b), RAPIDJSON_UINT64_C2(0xc9bcff60, 0x34c13053),
198 RAPIDJSON_UINT64_C2(0x964e858c, 0x91ba2655), RAPIDJSON_UINT64_C2(0xdff97724, 0x70297ebd),
199 RAPIDJSON_UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), RAPIDJSON_UINT64_C2(0xf8a95fcf, 0x88747d94),
200 RAPIDJSON_UINT64_C2(0xb9447093, 0x8fa89bcf), RAPIDJSON_UINT64_C2(0x8a08f0f8, 0xbf0f156b),
201 RAPIDJSON_UINT64_C2(0xcdb02555, 0x653131b6), RAPIDJSON_UINT64_C2(0x993fe2c6, 0xd07b7fac),
202 RAPIDJSON_UINT64_C2(0xe45c10c4, 0x2a2b3b06), RAPIDJSON_UINT64_C2(0xaa242499, 0x697392d3),
203 RAPIDJSON_UINT64_C2(0xfd87b5f2, 0x8300ca0e), RAPIDJSON_UINT64_C2(0xbce50864, 0x92111aeb),
204 RAPIDJSON_UINT64_C2(0x8cbccc09, 0x6f5088cc), RAPIDJSON_UINT64_C2(0xd1b71758, 0xe219652c),
205 RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), RAPIDJSON_UINT64_C2(0xe8d4a510, 0x00000000),
206 RAPIDJSON_UINT64_C2(0xad78ebc5, 0xac620000), RAPIDJSON_UINT64_C2(0x813f3978, 0xf8940984),
207 RAPIDJSON_UINT64_C2(0xc097ce7b, 0xc90715b3), RAPIDJSON_UINT64_C2(0x8f7e32ce, 0x7bea5c70),
208 RAPIDJSON_UINT64_C2(0xd5d238a4, 0xabe98068), RAPIDJSON_UINT64_C2(0x9f4f2726, 0x179a2245),
209 RAPIDJSON_UINT64_C2(0xed63a231, 0xd4c4fb27), RAPIDJSON_UINT64_C2(0xb0de6538, 0x8cc8ada8),
210 RAPIDJSON_UINT64_C2(0x83c7088e, 0x1aab65db), RAPIDJSON_UINT64_C2(0xc45d1df9, 0x42711d9a),
211 RAPIDJSON_UINT64_C2(0x924d692c, 0xa61be758), RAPIDJSON_UINT64_C2(0xda01ee64, 0x1a708dea),
212 RAPIDJSON_UINT64_C2(0xa26da399, 0x9aef774a), RAPIDJSON_UINT64_C2(0xf209787b, 0xb47d6b85),
213 RAPIDJSON_UINT64_C2(0xb454e4a1, 0x79dd1877), RAPIDJSON_UINT64_C2(0x865b8692, 0x5b9bc5c2),
214 RAPIDJSON_UINT64_C2(0xc83553c5, 0xc8965d3d), RAPIDJSON_UINT64_C2(0x952ab45c, 0xfa97a0b3),
215 RAPIDJSON_UINT64_C2(0xde469fbd, 0x99a05fe3), RAPIDJSON_UINT64_C2(0xa59bc234, 0xdb398c25),
216 RAPIDJSON_UINT64_C2(0xf6c69a72, 0xa3989f5c), RAPIDJSON_UINT64_C2(0xb7dcbf53, 0x54e9bece),
217 RAPIDJSON_UINT64_C2(0x88fcf317, 0xf22241e2), RAPIDJSON_UINT64_C2(0xcc20ce9b, 0xd35c78a5),
218 RAPIDJSON_UINT64_C2(0x98165af3, 0x7b2153df), RAPIDJSON_UINT64_C2(0xe2a0b5dc, 0x971f303a),
219 RAPIDJSON_UINT64_C2(0xa8d9d153, 0x5ce3b396), RAPIDJSON_UINT64_C2(0xfb9b7cd9, 0xa4a7443c),
220 RAPIDJSON_UINT64_C2(0xbb764c4c, 0xa7a44410), RAPIDJSON_UINT64_C2(0x8bab8eef, 0xb6409c1a),
221 RAPIDJSON_UINT64_C2(0xd01fef10, 0xa657842c), RAPIDJSON_UINT64_C2(0x9b10a4e5, 0xe9913129),
222 RAPIDJSON_UINT64_C2(0xe7109bfb, 0xa19c0c9d), RAPIDJSON_UINT64_C2(0xac2820d9, 0x623bf429),
223 RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d),
224 RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841),
225 RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9),
226 RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b)};
227 static const int16_t kCachedPowers_E[] = {
228 -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, -954, -927, -901,
229 -874, -847, -821, -794, -768, -741, -715, -688, -661, -635, -608, -582, -555,
230 -529, -502, -475, -449, -422, -396, -369, -343, -316, -289, -263, -236, -210,
231 -183, -157, -130, -103, -77, -50, -24, 3, 30, 56, 83, 109, 136,
232 162, 189, 216, 242, 269, 295, 322, 348, 375, 402, 428, 455, 481,
233 508, 534, 561, 588, 614, 641, 667, 694, 720, 747, 774, 800, 827,
234 853, 880, 907, 933, 960, 986, 1013, 1039, 1066};
235 RAPIDJSON_ASSERT(index < 87);
236 return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]);
237}
238
239inline DiyFp GetCachedPower(int e, int* K)
240{
241
242 // int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
243 double dk =
244 (-61 - e) * 0.30102999566398114 + 347; // dk must be positive, so can do ceiling in positive
245 int k = static_cast<int>(dk);
246 if(dk - k > 0.0)
247 k++;
248
249 unsigned index = static_cast<unsigned>((k >> 3) + 1);
250 *K = -(-348 + static_cast<int>(index << 3)); // decimal exponent no need lookup table
251
252 return GetCachedPowerByIndex(index);
253}
254
255inline DiyFp GetCachedPower10(int exp, int* outExp)
256{
257 RAPIDJSON_ASSERT(exp >= -348);
258 unsigned index = static_cast<unsigned>(exp + 348) / 8u;
259 *outExp = -348 + static_cast<int>(index) * 8;
260 return GetCachedPowerByIndex(index);
261}
262
263#ifdef __GNUC__
264RAPIDJSON_DIAG_POP
265#endif
266
267#ifdef __clang__
268RAPIDJSON_DIAG_POP
269RAPIDJSON_DIAG_OFF(padded)
270#endif
271
272} // namespace internal
274
275#endif // RAPIDJSON_DIYFP_H_
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition rapidjson.h:451
#define RAPIDJSON_NAMESPACE_BEGIN
provide custom rapidjson namespace (opening expression)
Definition rapidjson.h:121
#define RAPIDJSON_NAMESPACE_END
provide custom rapidjson namespace (closing expression)
Definition rapidjson.h:124
Definition allocators.h:459
DiyFp GetCachedPowerByIndex(size_t index)
Definition diyfp.h:179
DiyFp GetCachedPower10(int exp, int *outExp)
Definition diyfp.h:255
DiyFp GetCachedPower(int e, int *K)
Definition diyfp.h:239
uint32_t clzll(uint64_t x)
Definition clzll.h:32
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition pointer.h:1517
common definitions and configuration
#define RAPIDJSON_UINT64_C2(high32, low32)
Construct a 64-bit literal by a pair of 32-bit integer.
Definition rapidjson.h:326
signed short int16_t
Definition stdint.h:122
unsigned __int64 uint64_t
Definition stdint.h:136
Definition diyfp.h:49
static const int kDpSignificandSize
Definition diyfp.h:166
uint64_t f
Definition diyfp.h:175
static const int kDpExponentBias
Definition diyfp.h:167
DiyFp NormalizeBoundary() const
Definition diyfp.h:117
static const uint64_t kDpHiddenBit
Definition diyfp.h:173
static const int kDpMaxExponent
Definition diyfp.h:168
static const uint64_t kDpSignificandMask
Definition diyfp.h:172
DiyFp operator*(const DiyFp &rhs) const
Definition diyfp.h:78
static const int kDpDenormalExponent
Definition diyfp.h:170
DiyFp(uint64_t fp, int exp)
Definition diyfp.h:52
static const int kDpMinExponent
Definition diyfp.h:169
DiyFp operator-(const DiyFp &rhs) const
Definition diyfp.h:76
DiyFp Normalize() const
Definition diyfp.h:111
static const uint64_t kDpExponentMask
Definition diyfp.h:171
static const int kDiySignificandSize
Definition diyfp.h:165
double ToDouble() const
Definition diyfp.h:140
DiyFp(double d)
Definition diyfp.h:54
void NormalizedBoundaries(DiyFp *minus, DiyFp *plus) const
Definition diyfp.h:130
DiyFp()
Definition diyfp.h:50
int e
Definition diyfp.h:176