00001 /*
00002 Copyright (C) 2000 by Andrew Zabolotny (Intel version)
00003 Copyright (C) 2002 by Matthew Reda <reda@mac.com> (PowerPC version)
00004 Fast computation of sqrt(x) and 1/sqrt(x)
00005
00006 This library is free software; you can redistribute it and/or
00007 modify it under the terms of the GNU Library General Public
00008 License as published by the Free Software Foundation; either
00009 version 2 of the License, or (at your option) any later version.
00010
00011 This library is distributed in the hope that it will be useful,
00012 but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
00014 Library General Public License for more details.
00015
00016 You should have received a copy of the GNU Library General Public
00017 License along with this library; if not, write to the Free
00018 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00019 */
00020
00021 // Uncomment the following line to define CS_NO_QSQRT if you experience
00022 // mysterious problems with CS which you think are related to this
00023 // version of sqrt not behaving properly. If you find something like
00024 // that I'd like to be notified of this so we can make sure this really
00025 // is the problem.
00026 //#define CS_NO_QSQRT
00027
00028 #ifndef __QSQRT_H__
00029 #define __QSQRT_H__
00030
00031 #if (!defined (CS_NO_QSQRT)) && defined (PROC_X86) && defined (COMP_GCC)
00032
00033 /*
00034 NB: Single-precision floating-point format (32 bits):
00035 SEEEEEEE.EMMMMMMM.MMMMMMMM.MMMMMMMM
00036 S: Sign (0 - positive, 1 - negative)
00037 E: Exponent (plus 127, 8 bits)
00038 M: Mantissa (23 bits)
00039 */
00040
00049 static inline float qsqrt (float x)
00050 {
00051 float ret;
00052
00053 // Original C++ formulae:
00054 // float tmp = x;
00055 // *((unsigned *)&tmp) = (0xbe6f0000 - *((unsigned *)&tmp)) >> 1;
00056 // double h = x * 0.5;
00057 // double a = tmp;
00058 // a *= 1.5 - a * a * h;
00059 // a *= 1.5 - a * a * h;
00060 // return a * x;
00061
00062 __asm__ (
00063 "flds %1\n" // x
00064 "movl $0xbe6f0000,%%eax\n"
00065 "subl %1,%%eax\n"
00066 "shrl $1,%%eax\n"
00067 "movl %%eax,%1\n"
00068 "flds %2\n" // x 0.5
00069 "fmul %%st(1)\n" // x h
00070 "flds %3\n" // x h 1.5
00071 "flds %1\n" // x h 1.5 a
00072 "fld %%st\n" // x h 1.5 a a
00073 "fmul %%st\n" // x h 1.5 a a*a
00074 "fmul %%st(3)\n" // x h 1.5 a a*a*h
00075 "fsubr %%st(2)\n" // x h 1.5 a 1.5-a*a*h
00076 "fmulp %%st(1)\n" // x h 1.5 a
00077 "fld %%st\n" // x h 1.5 a a
00078 "fmul %%st\n" // x h 1.5 a a*a
00079 "fmulp %%st(3)\n" // x a*a*h 1.5 a
00080 "fxch\n" // x a*a*h a 1.5
00081 "fsubp %%st,%%st(2)\n" // x 1.5-a*a*h a
00082 "fmulp %%st(1)\n" // x a
00083 "fmulp %%st(1)\n" // a
00084 : "=&t" (ret), "+m" (x) : "m" (0.5F), "m" (1.5F)
00085 : "eax", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
00086 );
00087 return ret;
00088 }
00089
00097 static inline float qisqrt (float x)
00098 {
00099 float ret;
00100 __asm__ (
00101 "flds %1\n" // x
00102 "movl $0xbe6f0000,%%eax\n"
00103 "subl %1,%%eax\n"
00104 "shrl $1,%%eax\n"
00105 "movl %%eax,%1\n"
00106 "flds %2\n" // x 0.5
00107 "fmulp %%st(1)\n" // h
00108 "flds %3\n" // h 1.5
00109 "flds %1\n" // h 1.5 a
00110 "fld %%st\n" // h 1.5 a a
00111 "fmul %%st\n" // h 1.5 a a*a
00112 "fmul %%st(3)\n" // h 1.5 a a*a*h
00113 "fsubr %%st(2)\n" // h 1.5 a 1.5-a*a*h
00114 "fmulp %%st(1)\n" // h 1.5 a
00115 "fld %%st\n" // h 1.5 a a
00116 "fmul %%st\n" // h 1.5 a a*a
00117 "fmulp %%st(3)\n" // a*a*h 1.5 a
00118 "fxch\n" // a*a*h a 1.5
00119 "fsubp %%st,%%st(2)\n" // 1.5-a*a*h a
00120 "fmulp %%st(1)\n" // a
00121 : "=t" (ret), "+m" (x) : "m" (0.5F), "m" (1.5F)
00122 : "eax", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
00123 );
00124 return ret;
00125 }
00126
00127 #elif (!defined (CS_NO_QSQRT)) && defined (PROC_POWERPC) && defined (COMP_GCC)
00128
00136 static inline float qsqrt(float x)
00137 {
00138 float y0 = 0.0;
00139
00140 if (x != 0.0)
00141 {
00142 float x0 = x * 0.5f;
00143
00144 asm ("frsqrte %0,%1" : "=f" (y0) : "f" (x));
00145
00146 y0 = y0 * (1.5f - x0 * y0 * y0);
00147 y0 = (y0 * (1.5f - x0 * y0 * y0)) * x;
00148 };
00149
00150 return y0;
00151 };
00152
00157 static inline float qisqrt(float x)
00158 {
00159 float x0 = x * 0.5f;
00160 float y0;
00161 asm ("frsqrte %0,%1" : "=f" (y0) : "f" (x));
00162
00163 y0 = y0 * (1.5f - x0 * y0 * y0);
00164 y0 = y0 * (1.5f - x0 * y0 * y0);
00165
00166 return y0;
00167 };
00168
00169 #elif (!defined (CS_NO_QSQRT)) && defined (PROC_X86) && defined (COMP_VC)
00170
00171 #include <math.h>
00172
00173 #define qsqrt(x) sqrtf(x)
00174 #define qisqrt(x) (1.0f / sqrtf(x))
00175
00176 #else
00177
00178 #include <math.h>
00179 #define qsqrt(x) sqrt(x)
00180 #define qisqrt(x) (1.0/sqrt(x))
00181
00182 #endif
00183
00184 #endif // __QSQRT_H__