00001 /* 00002 * Copyright (c) 2005 Josef Cejka 00003 * All rights reserved. 00004 * 00005 * Redistribution and use in source and binary forms, with or without 00006 * modification, are permitted provided that the following conditions 00007 * are met: 00008 * 00009 * - Redistributions of source code must retain the above copyright 00010 * notice, this list of conditions and the following disclaimer. 00011 * - Redistributions in binary form must reproduce the above copyright 00012 * notice, this list of conditions and the following disclaimer in the 00013 * documentation and/or other materials provided with the distribution. 00014 * - The name of the author may not be used to endorse or promote products 00015 * derived from this software without specific prior written permission. 00016 * 00017 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 00018 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00019 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 00020 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 00021 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 00022 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00023 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00024 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00025 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 00026 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00027 */ 00028 00035 #include "sftypes.h" 00036 #include "conversion.h" 00037 #include "comparison.h" 00038 #include "common.h" 00039 00040 float64 convertFloat32ToFloat64(float32 a) 00041 { 00042 float64 result; 00043 uint64_t frac; 00044 00045 result.parts.sign = a.parts.sign; 00046 result.parts.fraction = a.parts.fraction; 00047 result.parts.fraction <<= (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE); 00048 00049 if ((isFloat32Infinity(a)) || (isFloat32NaN(a))) { 00050 result.parts.exp = 0x7FF; 00051 /* TODO; check if its correct for SigNaNs*/ 00052 return result; 00053 }; 00054 00055 result.parts.exp = a.parts.exp + ((int) FLOAT64_BIAS - FLOAT32_BIAS); 00056 if (a.parts.exp == 0) { 00057 /* normalize denormalized numbers */ 00058 00059 if (result.parts.fraction == 0ll) { /* fix zero */ 00060 result.parts.exp = 0ll; 00061 return result; 00062 } 00063 00064 frac = result.parts.fraction; 00065 00066 while (!(frac & (0x10000000000000ll))) { 00067 frac <<= 1; 00068 --result.parts.exp; 00069 }; 00070 00071 ++result.parts.exp; 00072 result.parts.fraction = frac; 00073 }; 00074 00075 return result; 00076 00077 } 00078 00079 float32 convertFloat64ToFloat32(float64 a) 00080 { 00081 float32 result; 00082 int32_t exp; 00083 uint64_t frac; 00084 00085 result.parts.sign = a.parts.sign; 00086 00087 if (isFloat64NaN(a)) { 00088 00089 result.parts.exp = 0xFF; 00090 00091 if (isFloat64SigNaN(a)) { 00092 result.parts.fraction = 0x400000; /* set first bit of fraction nonzero */ 00093 return result; 00094 } 00095 00096 result.parts.fraction = 0x1; /* fraction nonzero but its first bit is zero */ 00097 return result; 00098 }; 00099 00100 if (isFloat64Infinity(a)) { 00101 result.parts.fraction = 0; 00102 result.parts.exp = 0xFF; 00103 return result; 00104 }; 00105 00106 exp = (int)a.parts.exp - FLOAT64_BIAS + FLOAT32_BIAS; 00107 00108 if (exp >= 0xFF) { 00109 /*FIXME: overflow*/ 00110 result.parts.fraction = 0; 00111 result.parts.exp = 0xFF; 00112 return result; 00113 00114 } else if (exp <= 0 ) { 00115 00116 /* underflow or denormalized */ 00117 00118 result.parts.exp = 0; 00119 00120 exp *= -1; 00121 if (exp > FLOAT32_FRACTION_SIZE ) { 00122 /* FIXME: underflow */ 00123 result.parts.fraction = 0; 00124 return result; 00125 }; 00126 00127 /* denormalized */ 00128 00129 frac = a.parts.fraction; 00130 frac |= 0x10000000000000ll; /* denormalize and set hidden bit */ 00131 00132 frac >>= (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE + 1); 00133 00134 while (exp > 0) { 00135 --exp; 00136 frac >>= 1; 00137 }; 00138 result.parts.fraction = frac; 00139 00140 return result; 00141 }; 00142 00143 result.parts.exp = exp; 00144 result.parts.fraction = a.parts.fraction >> (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE); 00145 return result; 00146 } 00147 00148 00153 static uint32_t _float32_to_uint32_helper(float32 a) 00154 { 00155 uint32_t frac; 00156 00157 if (a.parts.exp < FLOAT32_BIAS) { 00158 /*TODO: rounding*/ 00159 return 0; 00160 } 00161 00162 frac = a.parts.fraction; 00163 00164 frac |= FLOAT32_HIDDEN_BIT_MASK; 00165 /* shift fraction to left so hidden bit will be the most significant bit */ 00166 frac <<= 32 - FLOAT32_FRACTION_SIZE - 1; 00167 00168 frac >>= 32 - (a.parts.exp - FLOAT32_BIAS) - 1; 00169 if ((a.parts.sign == 1) && (frac != 0)) { 00170 frac = ~frac; 00171 ++frac; 00172 } 00173 00174 return frac; 00175 } 00176 00177 /* Convert float to unsigned int32 00178 * FIXME: Im not sure what to return if overflow/underflow happens 00179 * - now its the biggest or the smallest int 00180 */ 00181 uint32_t float32_to_uint32(float32 a) 00182 { 00183 if (isFloat32NaN(a)) 00184 return UINT32_MAX; 00185 00186 if (isFloat32Infinity(a) || (a.parts.exp >= (32 + FLOAT32_BIAS))) { 00187 if (a.parts.sign) 00188 return UINT32_MIN; 00189 00190 return UINT32_MAX; 00191 } 00192 00193 return _float32_to_uint32_helper(a); 00194 } 00195 00196 /* Convert float to signed int32 00197 * FIXME: Im not sure what to return if overflow/underflow happens 00198 * - now its the biggest or the smallest int 00199 */ 00200 int32_t float32_to_int32(float32 a) 00201 { 00202 if (isFloat32NaN(a)) 00203 return INT32_MAX; 00204 00205 if (isFloat32Infinity(a) || (a.parts.exp >= (32 + FLOAT32_BIAS))) { 00206 if (a.parts.sign) 00207 return INT32_MIN; 00208 00209 return INT32_MAX; 00210 } 00211 00212 return _float32_to_uint32_helper(a); 00213 } 00214 00215 00220 static uint64_t _float64_to_uint64_helper(float64 a) 00221 { 00222 uint64_t frac; 00223 00224 if (a.parts.exp < FLOAT64_BIAS) { 00225 /*TODO: rounding*/ 00226 return 0; 00227 } 00228 00229 frac = a.parts.fraction; 00230 00231 frac |= FLOAT64_HIDDEN_BIT_MASK; 00232 /* shift fraction to left so hidden bit will be the most significant bit */ 00233 frac <<= 64 - FLOAT64_FRACTION_SIZE - 1; 00234 00235 frac >>= 64 - (a.parts.exp - FLOAT64_BIAS) - 1; 00236 if ((a.parts.sign == 1) && (frac != 0)) { 00237 frac = ~frac; 00238 ++frac; 00239 } 00240 00241 return frac; 00242 } 00243 00244 /* Convert float to unsigned int64 00245 * FIXME: Im not sure what to return if overflow/underflow happens 00246 * - now its the biggest or the smallest int 00247 */ 00248 uint64_t float64_to_uint64(float64 a) 00249 { 00250 if (isFloat64NaN(a)) 00251 return UINT64_MAX; 00252 00253 00254 if (isFloat64Infinity(a) || (a.parts.exp >= (64 + FLOAT64_BIAS))) { 00255 if (a.parts.sign) 00256 return UINT64_MIN; 00257 00258 return UINT64_MAX; 00259 } 00260 00261 return _float64_to_uint64_helper(a); 00262 } 00263 00264 /* Convert float to signed int64 00265 * FIXME: Im not sure what to return if overflow/underflow happens 00266 * - now its the biggest or the smallest int 00267 */ 00268 int64_t float64_to_int64(float64 a) 00269 { 00270 if (isFloat64NaN(a)) 00271 return INT64_MAX; 00272 00273 00274 if (isFloat64Infinity(a) || (a.parts.exp >= (64 + FLOAT64_BIAS))) { 00275 if (a.parts.sign) 00276 return INT64_MIN; 00277 00278 return INT64_MAX; 00279 } 00280 00281 return _float64_to_uint64_helper(a); 00282 } 00283 00284 00285 00286 00287 00292 static uint64_t _float32_to_uint64_helper(float32 a) 00293 { 00294 uint64_t frac; 00295 00296 if (a.parts.exp < FLOAT32_BIAS) { 00297 /*TODO: rounding*/ 00298 return 0; 00299 } 00300 00301 frac = a.parts.fraction; 00302 00303 frac |= FLOAT32_HIDDEN_BIT_MASK; 00304 /* shift fraction to left so hidden bit will be the most significant bit */ 00305 frac <<= 64 - FLOAT32_FRACTION_SIZE - 1; 00306 00307 frac >>= 64 - (a.parts.exp - FLOAT32_BIAS) - 1; 00308 if ((a.parts.sign == 1) && (frac != 0)) { 00309 frac = ~frac; 00310 ++frac; 00311 } 00312 00313 return frac; 00314 } 00315 00316 /* Convert float to unsigned int64 00317 * FIXME: Im not sure what to return if overflow/underflow happens 00318 * - now its the biggest or the smallest int 00319 */ 00320 uint64_t float32_to_uint64(float32 a) 00321 { 00322 if (isFloat32NaN(a)) 00323 return UINT64_MAX; 00324 00325 00326 if (isFloat32Infinity(a) || (a.parts.exp >= (64 + FLOAT32_BIAS))) { 00327 if (a.parts.sign) 00328 return UINT64_MIN; 00329 00330 return UINT64_MAX; 00331 } 00332 00333 return _float32_to_uint64_helper(a); 00334 } 00335 00336 /* Convert float to signed int64 00337 * FIXME: Im not sure what to return if overflow/underflow happens 00338 * - now its the biggest or the smallest int 00339 */ 00340 int64_t float32_to_int64(float32 a) 00341 { 00342 if (isFloat32NaN(a)) 00343 return INT64_MAX; 00344 00345 if (isFloat32Infinity(a) || (a.parts.exp >= (64 + FLOAT32_BIAS))) { 00346 if (a.parts.sign) 00347 return INT64_MIN; 00348 00349 return INT64_MAX; 00350 } 00351 00352 return _float32_to_uint64_helper(a); 00353 } 00354 00355 00356 /* Convert float64 to unsigned int32 00357 * FIXME: Im not sure what to return if overflow/underflow happens 00358 * - now its the biggest or the smallest int 00359 */ 00360 uint32_t float64_to_uint32(float64 a) 00361 { 00362 if (isFloat64NaN(a)) 00363 return UINT32_MAX; 00364 00365 00366 if (isFloat64Infinity(a) || (a.parts.exp >= (32 + FLOAT64_BIAS))) { 00367 if (a.parts.sign) 00368 return UINT32_MIN; 00369 00370 return UINT32_MAX; 00371 } 00372 00373 return (uint32_t) _float64_to_uint64_helper(a); 00374 } 00375 00376 /* Convert float64 to signed int32 00377 * FIXME: Im not sure what to return if overflow/underflow happens 00378 * - now its the biggest or the smallest int 00379 */ 00380 int32_t float64_to_int32(float64 a) 00381 { 00382 if (isFloat64NaN(a)) 00383 return INT32_MAX; 00384 00385 00386 if (isFloat64Infinity(a) || (a.parts.exp >= (32 + FLOAT64_BIAS))) { 00387 if (a.parts.sign) 00388 return INT32_MIN; 00389 00390 return INT32_MAX; 00391 } 00392 00393 return (int32_t) _float64_to_uint64_helper(a); 00394 } 00395 00400 float32 uint32_to_float32(uint32_t i) 00401 { 00402 int counter; 00403 int32_t exp; 00404 float32 result; 00405 00406 result.parts.sign = 0; 00407 result.parts.fraction = 0; 00408 00409 counter = countZeroes32(i); 00410 00411 exp = FLOAT32_BIAS + 32 - counter - 1; 00412 00413 if (counter == 32) { 00414 result.binary = 0; 00415 return result; 00416 } 00417 00418 if (counter > 0) { 00419 i <<= counter - 1; 00420 } else { 00421 i >>= 1; 00422 } 00423 00424 roundFloat32(&exp, &i); 00425 00426 result.parts.fraction = i >> 7; 00427 result.parts.exp = exp; 00428 00429 return result; 00430 } 00431 00432 float32 int32_to_float32(int32_t i) 00433 { 00434 float32 result; 00435 00436 if (i < 0) { 00437 result = uint32_to_float32((uint32_t)(-i)); 00438 } else { 00439 result = uint32_to_float32((uint32_t)i); 00440 } 00441 00442 result.parts.sign = i < 0; 00443 00444 return result; 00445 } 00446 00447 00448 float32 uint64_to_float32(uint64_t i) 00449 { 00450 int counter; 00451 int32_t exp; 00452 uint32_t j; 00453 float32 result; 00454 00455 result.parts.sign = 0; 00456 result.parts.fraction = 0; 00457 00458 counter = countZeroes64(i); 00459 00460 exp = FLOAT32_BIAS + 64 - counter - 1; 00461 00462 if (counter == 64) { 00463 result.binary = 0; 00464 return result; 00465 } 00466 00467 /* Shift all to the first 31 bits (31. will be hidden 1)*/ 00468 if (counter > 33) { 00469 i <<= counter - 1 - 32; 00470 } else { 00471 i >>= 1 + 32 - counter; 00472 } 00473 00474 j = (uint32_t)i; 00475 roundFloat32(&exp, &j); 00476 00477 result.parts.fraction = j >> 7; 00478 result.parts.exp = exp; 00479 return result; 00480 } 00481 00482 float32 int64_to_float32(int64_t i) 00483 { 00484 float32 result; 00485 00486 if (i < 0) { 00487 result = uint64_to_float32((uint64_t)(-i)); 00488 } else { 00489 result = uint64_to_float32((uint64_t)i); 00490 } 00491 00492 result.parts.sign = i < 0; 00493 00494 return result; 00495 } 00496 00501 float64 uint32_to_float64(uint32_t i) 00502 { 00503 int counter; 00504 int32_t exp; 00505 float64 result; 00506 uint64_t frac; 00507 00508 result.parts.sign = 0; 00509 result.parts.fraction = 0; 00510 00511 counter = countZeroes32(i); 00512 00513 exp = FLOAT64_BIAS + 32 - counter - 1; 00514 00515 if (counter == 32) { 00516 result.binary = 0; 00517 return result; 00518 } 00519 00520 frac = i; 00521 frac <<= counter + 32 - 1; 00522 00523 roundFloat64(&exp, &frac); 00524 00525 result.parts.fraction = frac >> 10; 00526 result.parts.exp = exp; 00527 00528 return result; 00529 } 00530 00531 float64 int32_to_float64(int32_t i) 00532 { 00533 float64 result; 00534 00535 if (i < 0) { 00536 result = uint32_to_float64((uint32_t)(-i)); 00537 } else { 00538 result = uint32_to_float64((uint32_t)i); 00539 } 00540 00541 result.parts.sign = i < 0; 00542 00543 return result; 00544 } 00545 00546 00547 float64 uint64_to_float64(uint64_t i) 00548 { 00549 int counter; 00550 int32_t exp; 00551 float64 result; 00552 00553 result.parts.sign = 0; 00554 result.parts.fraction = 0; 00555 00556 counter = countZeroes64(i); 00557 00558 exp = FLOAT64_BIAS + 64 - counter - 1; 00559 00560 if (counter == 64) { 00561 result.binary = 0; 00562 return result; 00563 } 00564 00565 if (counter > 0) { 00566 i <<= counter - 1; 00567 } else { 00568 i >>= 1; 00569 } 00570 00571 roundFloat64(&exp, &i); 00572 00573 result.parts.fraction = i >> 10; 00574 result.parts.exp = exp; 00575 return result; 00576 } 00577 00578 float64 int64_to_float64(int64_t i) 00579 { 00580 float64 result; 00581 00582 if (i < 0) { 00583 result = uint64_to_float64((uint64_t)(-i)); 00584 } else { 00585 result = uint64_to_float64((uint64_t)i); 00586 } 00587 00588 result.parts.sign = i < 0; 00589 00590 return result; 00591 } 00592