Main Page | Modules | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

ShFuncImpl.hpp

00001 // Sh: A GPU metaprogramming language. 00002 // 00003 // Copyright (c) 2003 University of Waterloo Computer Graphics Laboratory 00004 // Project administrator: Michael D. McCool 00005 // Authors: Zheng Qin, Stefanus Du Toit, Kevin Moule, Tiberiu S. Popa, 00006 // Bryan Chan, Michael D. McCool 00007 // 00008 // This software is provided 'as-is', without any express or implied 00009 // warranty. In no event will the authors be held liable for any damages 00010 // arising from the use of this software. 00011 // 00012 // Permission is granted to anyone to use this software for any purpose, 00013 // including commercial applications, and to alter it and redistribute it 00014 // freely, subject to the following restrictions: 00015 // 00016 // 1. The origin of this software must not be misrepresented; you must 00017 // not claim that you wrote the original software. If you use this 00018 // software in a product, an acknowledgment in the product documentation 00019 // would be appreciated but is not required. 00020 // 00021 // 2. Altered source versions must be plainly marked as such, and must 00022 // not be misrepresented as being the original software. 00023 // 00024 // 3. This notice may not be removed or altered from any source 00025 // distribution. 00027 #ifndef SHUTIL_FUNCIMPL_HPP 00028 #define SHUTIL_FUNCIMPL_HPP 00029 00030 #include <cmath> 00031 #include <numeric> 00032 #include "ShAttrib.hpp" 00033 #include "ShSwizzle.hpp" 00034 #include "ShVariable.hpp" 00035 #include "ShFunc.hpp" 00036 #include "ShLib.hpp" 00037 00038 namespace ShUtil { 00039 00040 using namespace SH; 00041 00042 template<int N, typename T> 00043 ShGeneric<N, T> smoothstep(const ShGeneric<N, T>& a, const ShGeneric<N, T>& b, 00044 const ShGeneric<N, T> x) { 00045 ShGeneric<N, T> t = (x - a) / (b - a); 00046 // TODO fix this for other types 00047 t = clamp(t, 0.0f, 1.0f); 00048 return t * t * mad(-2.0f, t, ShConstAttrib1f(3.0f)); 00049 } 00050 00053 template<int N, typename T> 00054 ShGeneric<1, T> distance(const ShGeneric<N, T>& a, const ShGeneric<N, T>& b) { 00055 ShGeneric<N, T> diff = abs(a - b); 00056 return sqrt(dot(diff, diff)); 00057 } 00058 00062 template<int N, typename T> 00063 ShGeneric<1, T> lOneDistance(const ShGeneric<N, T>& a, const ShGeneric<N, T>& b) { 00064 //TODO should use dot product with vector 1 00065 ShGeneric<N, T> diff = abs(a - b); 00066 return dot(fillcast<N>(1.0f), diff); 00067 } 00068 00072 template<int N, typename T> 00073 ShGeneric<1, T> lInfDistance(const ShGeneric<N, T>& a, const ShGeneric<N, T>& b) { 00074 ShGeneric<N, T> diff = abs(a - b); 00075 ShGeneric<1, T> result = max(diff(0), diff(1)); 00076 for(int i = 2; i < N; ++i) result = max(result, diff(i)); 00077 return result; 00078 } 00079 00080 00081 static const int LCG_REPS = 5; // total instructions for hashlcg will be LCG_REPS * 2 + 2 00088 // TODO: may not work as intended on 24-bit floats 00089 // since there may not be enough precision 00090 template<int N, typename T> 00091 ShGeneric<N, T> hashlcg(const ShGeneric<N, T>& p) { 00092 ShAttrib<N, SH_TEMP, T> result = frac(p * 0.01); 00093 00094 // TODO fix this for long tuples 00095 ShGeneric<N, T> a = fillcast<N>( 00096 ShConstAttrib4f(M_PI * M_PI * M_PI * M_PI, std::exp(4.0), 00097 std::pow(13.0, M_PI / 2.0), std::sqrt(1997.0))); 00098 ShGeneric<N, T> m = fillcast<N>( 00099 ShConstAttrib4f(std::sqrt(2.0), 1.0 / M_PI, std::sqrt(3.0), 00100 std::exp(-1.0))); 00101 00102 for(int i = 0; i < LCG_REPS; ++i) result = frac(mad(result, a, m)); 00103 return result; 00104 } 00105 00106 00107 static const int MRG_REPS = 2; // total instructions for hashmrg will be MRG_REPS * N * 2 + 2 00120 template<int N, typename T> 00121 ShGeneric<N, T> hashmrg(const ShGeneric<N, T>& p) { 00122 ShAttrib<N, SH_TEMP, T> result = frac(p * 0.01); 00123 ShGeneric<N, T> a = fillcast<N>( 00124 ShConstAttrib4f(M_PI * M_PI * M_PI * M_PI, std::exp(4.0), 00125 std::pow(13.0, M_PI / 2.0), std::sqrt(1997.0))); 00126 00127 for(int i = 0; i < MRG_REPS; ++i) { 00128 for(int j = 0; j < N; ++j) { 00129 result(j) = frac(dot(result, a)); 00130 } 00131 } 00132 return result; 00133 } 00134 00135 template<int N, ShBindingType Binding, typename T> 00136 ShAttrib<N, Binding, T> evenOddSort(const ShAttrib<N, Binding, T>& v) { 00137 ShAttrib<N, Binding, T> result = v; 00138 groupEvenOddSort<1>(&result); 00139 return result; 00140 } 00141 00146 template<int S, int N, ShBindingType Binding, typename T> 00147 void groupEvenOddSort(ShAttrib<N, Binding, T> v[]) { 00148 const int NE = (N + 1) / 2; // number of even elements 00149 const int NO = N / 2; // number of odd elements 00150 const int NU = NO; // number of components to compare for (2i, 2i+1) comparisons 00151 const int ND = NE - 1; // number of componnets to compare for (2i, 2i-1) comparisons 00152 00153 int i, j; 00154 // hold even/odd temps and condition code for (2i, 2i+1) "up" and (2i, 2i-1) "down" comparisons 00155 ShAttrib<NU, SH_TEMP, T> eu, ou, ccu; 00156 ShAttrib<ND, SH_TEMP, T> ed, od, ccd; 00157 00158 // even and odd swizzle (elms 0..NE-1 are the "even" subsequence, NE..N-1 "odd") 00159 int eswiz[NE], oswiz[NO]; 00160 for(i = 0; i < NE; ++i) eswiz[i] = i; 00161 for(i = 0; i < NO; ++i) oswiz[i] = NE + i; 00162 00163 for(i = 0; i < NE; ++i) { 00164 // TODO note the interesting syntax (does appear to be C++ standard) 00165 // that's required so that the gcc parser does 00166 // not crap out on the template swiz code 00167 // 00168 // if this doesn't work on other platforms, we may have to 00169 // rewrite the swiz function 00170 00171 // compare 2i, 2i+1 00172 eu = v[0].template swiz<NU>(eswiz); 00173 ou = v[0].template swiz<NU>(oswiz); 00174 if (S > 1) ccu = eu < ou; 00175 v[0].template swiz<NU>(eswiz) = min(eu, ou); 00176 v[0].template swiz<NU>(oswiz) = max(eu, ou); 00177 00178 for(j = 1; j < S; ++j) { 00179 eu = v[j].template swiz<NU>(eswiz); 00180 ou = v[j].template swiz<NU>(oswiz); 00181 v[j].template swiz<NU>(eswiz) = cond(ccu, eu, ou); 00182 v[j].template swiz<NU>(oswiz) = cond(ccu, ou, eu); 00183 } 00184 00185 // compare 2i, 2i-1 00186 ed = v[0].template swiz<ND>(eswiz + 1); 00187 od = v[0].template swiz<ND>(oswiz); 00188 if (S > 1) ccd = ed > od; 00189 v[0].template swiz<ND>(eswiz + 1) = max(ed, od); 00190 v[0].template swiz<ND>(oswiz) = min(ed, od); 00191 00192 for(j = 1; j < S; ++j) { 00193 ed = v[j].template swiz<ND>(eswiz + 1); 00194 od = v[j].template swiz<ND>(oswiz); 00195 v[j].template swiz<ND>(eswiz + 1) = cond(ccd, ed, od); 00196 v[j].template swiz<ND>(oswiz) = cond(ccd, od, ed); 00197 } 00198 } 00199 00200 // reswizzle "even" to 0, 2, 4,... "odd" to 1, 3, 5, .. 00201 int resultEswiz[NE], resultOswiz[NO]; 00202 for(i = 0; i < NE; ++i) resultEswiz[i] = i * 2; 00203 for(i = 0; i < NO; ++i) resultOswiz[i] = i * 2 + 1; 00204 for(i = 0; i < S; ++i) { 00205 ShAttrib<NE, SH_TEMP, T> evens = v[i].template swiz<NE>(eswiz); 00206 v[i].template swiz<NO>(resultOswiz) = v[i].template swiz<NO>(oswiz); 00207 v[i].template swiz<NE>(resultEswiz) = evens; 00208 } 00209 } 00210 00211 template<int S, int N, ShBindingType Binding, typename T> 00212 void groupBitonicSort(ShAttrib<N, Binding, T> v[]) { 00213 static const int C = N >> 1; // number of comparators 00214 00215 int i, j; 00216 int log2N, ceilN; 00217 int strideBit, flipBit, stride, flip; 00218 bool doFlip; 00219 int swiz[2][C]; 00220 ShAttrib<C, SH_TEMP, T> temp[2], condition; 00221 00222 for(log2N = 0; N > (1 << log2N); log2N++); // figure out ceil(log_2(N)) 00223 ceilN = 1 << log2N; 00224 00225 std::cout << "N = " << N << ", ceilN = " << ceilN << ", log2N = " << log2N << std::endl; 00226 00227 for(flipBit = 1; flipBit < log2N; ++flipBit) { 00228 flip = 1 << flipBit; 00229 for(strideBit = flipBit - 1; strideBit >= 0; --strideBit) { 00230 stride = 1 << strideBit; 00231 // prepare the swiz for this pass 00232 doFlip = false; 00233 for(i = j = 0; i < N - stride; ++j) { 00234 // handle non-power of 2 N (see http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm) 00235 if(i >= ceilN - N) { 00236 swiz[0][j] = i + (doFlip ? stride : 0); 00237 swiz[1][j] = i + (doFlip ? 0 : stride); 00238 } 00239 ++i; 00240 if(i % stride == 0) i += stride; 00241 if(i % flip == 0) doFlip = !doFlip; 00242 } 00243 std::cout << "Comparators:" << std::endl; 00244 for(i = 0; i < C; ++i) std::cout << " " << swiz[0][i] << " " << swiz[1][i] << std::endl; 00245 00246 // do the comparison using the first element 00247 for(i = 0; i < 2; ++i) temp[i] = v[0].template swiz<C>(swiz[i]); 00248 if(S > 1) condition = temp[0] < temp[1]; 00249 v[0].template swiz<C>(swiz[0]) = min(temp[0], temp[1]); 00250 v[0].template swiz<C>(swiz[1]) = max(temp[0], temp[1]); 00251 00252 // sort remaining elements based on first element comparison 00253 for(i = 1; i < S; ++i) { 00254 for(j = 0; j < 2; ++j) temp[j] = v[j].template swiz<C>(swiz[j]); 00255 v[i].template swiz<C>(swiz[0]) = cond(condition, temp[0], temp[1]); 00256 v[i].template swiz<C>(swiz[1]) = cond(condition, temp[1], temp[0]); 00257 } 00258 } 00259 } 00260 } 00261 00265 template<typename T> 00266 ShGeneric<3, T> changeBasis(const ShGeneric<3, T> &b0, 00267 const ShGeneric<3, T> &b1, const ShGeneric<3, T> &b2, const ShGeneric<3, T> &v) { 00268 ShAttrib<3, SH_TEMP, T> result; 00269 result(0) = b0 | v; 00270 result(1) = b1 | v; 00271 result(2) = b2 | v; 00272 return result; 00273 } 00274 00275 00276 } 00277 00278 #endif // SHUTIL_FUNCIMPL_HPP

Generated on Fri Nov 5 16:51:19 2004 for Sh by doxygen 1.3.7