Main Page | Modules | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

ShLibMiscImpl.hpp

00001 // Sh: A GPU metaprogramming language.
00002 //
00003 // Copyright 2003-2005 Serious Hack Inc.
00004 // 
00005 // This software is provided 'as-is', without any express or implied
00006 // warranty. In no event will the authors be held liable for any damages
00007 // arising from the use of this software.
00008 // 
00009 // Permission is granted to anyone to use this software for any purpose,
00010 // including commercial applications, and to alter it and redistribute it
00011 // freely, subject to the following restrictions:
00012 // 
00013 // 1. The origin of this software must not be misrepresented; you must
00014 // not claim that you wrote the original software. If you use this
00015 // software in a product, an acknowledgment in the product documentation
00016 // would be appreciated but is not required.
00017 // 
00018 // 2. Altered source versions must be plainly marked as such, and must
00019 // not be misrepresented as being the original software.
00020 // 
00021 // 3. This notice may not be removed or altered from any source
00022 // distribution.
00024 #ifndef SHLIBMISCIMPL_HPP
00025 #define SHLIBMISCIMPL_HPP
00026 
00027 #include "ShLibMisc.hpp"
00028 #include "ShInstructions.hpp"
00029 #include "ShProgram.hpp"
00030 
00031 namespace SH {
00032 
00033 template<int M, int N, typename T> 
00034 ShGeneric<M, T> cast(const ShGeneric<N, T>& a)
00035 {
00036   int copySize = std::min(M, N);
00037   ShAttrib<M, SH_TEMP, T> result;
00038 
00039   int* indices = new int[copySize];
00040   for(int i = 0; i < copySize; ++i) indices[i] = i;
00041   if(M < N) {
00042     result = a.template swiz<M>(indices);
00043   } else if( M > N ) {
00044     result.template swiz<N>(indices) = a;
00045   } else { // M == N
00046     shASN(result, a);
00047   }
00048   delete [] indices;
00049   return result;
00050 }
00051 
00052 template<int M> 
00053 inline
00054 ShGeneric<M, double> cast(double a)
00055 {
00056   return cast<M>(ShAttrib<1, SH_CONST, double>(a));
00057 }
00058 
00059 template<int M, int N, typename T> 
00060 ShGeneric<M, T> fillcast(const ShGeneric<N, T>& a)
00061 {
00062   if( M <= N ) return cast<M>(a);
00063   int indices[M];
00064   for(int i = 0; i < M; ++i) indices[i] = i >= N ? N - 1 : i;
00065   return a.template swiz<M>(indices);
00066 }
00067 
00068 template<int M> 
00069 inline
00070 ShGeneric<M, double> fillcast(double a)
00071 {
00072   return fillcast<M>(ShAttrib<1, SH_CONST, double>(a));
00073 }
00074 
00075 template<int M, int N, typename T1, typename T2> 
00076 ShGeneric<M+N, CT1T2> join(const ShGeneric<M, T1>& a, const ShGeneric<N, T2>& b)
00077 {
00078   int indices[M+N];
00079   for(int i = 0; i < M+N; ++i) indices[i] = i; 
00080   ShAttrib<M+N, SH_TEMP, CT1T2> result;
00081   result.template swiz<M>(indices) = a;
00082   result.template swiz<N>(indices + M) = b;
00083   return result;
00084 }
00085 
00086 template<int M, int N, int O, typename T1, typename T2, typename T3> 
00087 ShGeneric<M+N+O, CT1T2T3> join(const ShGeneric<M, T1>& a, 
00088                                const ShGeneric<N, T2> &b, 
00089                                const ShGeneric<O, T3> &c)
00090 {
00091   int indices[M+N+O];
00092   for(int i = 0; i < M+N+O; ++i) indices[i] = i; 
00093   ShAttrib<M+N+O, SH_TEMP, CT1T2T3> result;
00094   result.template swiz<M>(indices) = a;
00095   result.template swiz<N>(indices + M) = b;
00096   result.template swiz<N>(indices + M + N) = c;
00097   return result;
00098 }
00099 
00100 template<int M, int N, int O, int P, typename T1, typename T2, typename T3, typename T4> 
00101 ShGeneric<M+N+O+P, CT1T2T3T4> join(const ShGeneric<M, T1>& a, 
00102                                    const ShGeneric<N, T2> &b, 
00103                                    const ShGeneric<O, T3> &c, 
00104                                    const ShGeneric<P, T4> &d)
00105 {
00106   int indices[M+N+O+P];
00107   for(int i = 0; i < M+N+O+P; ++i) indices[i] = i; 
00108   ShAttrib<M+N+O+P, SH_TEMP, CT1T2T3T4> result;
00109   result.template swiz<M>(indices) = a;
00110   result.template swiz<N>(indices + M) = b;
00111   result.template swiz<N>(indices + M + N) = c;
00112   result.template swiz<N>(indices + M + N + O) = d;
00113   return result;
00114 }
00115 
00116 template<int N, typename T>
00117 inline
00118 void discard(const ShGeneric<N, T>& c)
00119 {
00120   shKIL(c);
00121 }
00122 
00123 template<int N, typename T>
00124 inline
00125 void kill(const ShGeneric<N, T>& c)
00126 {
00127   discard(c);
00128 }
00129 
00130 template<int N, typename T> 
00131 ShGeneric<N, T> sort(const ShGeneric<N, T>& a)
00132 {
00133   ShGeneric<N, T> result(a);
00134   groupsort<1>(&result);
00135   return result;
00136 }
00137 
00138 template<int S, typename VarType>
00139 void groupsort(VarType v[]) {
00140   const int N = VarType::typesize;
00141   typedef typename VarType::storage_type T;
00142 
00143   const int NE = (N + 1) / 2; // number of even elements
00144   const int NO = N / 2; // number of odd elements
00145   const int NU = NO; // number of components to compare for (2i, 2i+1) comparisons
00146   const int ND = NE - 1; // number of componnets to compare for (2i, 2i-1) comparisons
00147 
00148   int i, j;
00149   // hold even/odd temps and condition code for (2i, 2i+1) "up" and (2i, 2i-1) "down" comparisons 
00150 
00151   ShAttrib<NU, SH_TEMP, T> eu, ou, ccu; 
00152   ShAttrib<ND, SH_TEMP, T> ed, od, ccd; 
00153 
00154   // even and odd swizzle (elms 0..NE-1 are the "even" subsequence, NE..N-1 "odd")
00155   int eswiz[NE], oswiz[NO]; 
00156   for(i = 0; i < NE; ++i) eswiz[i] = i;
00157   for(i = 0; i < NO; ++i) oswiz[i] = NE + i;
00158 
00159   for(i = 0; i < NE; ++i) { 
00160     // compare 2i, 2i+1
00161     eu = v[0].template swiz<NU>(eswiz);
00162     ou = v[0].template swiz<NU>(oswiz);
00163     if (S > 1) ccu = eu < ou; 
00164     v[0].template swiz<NU>(eswiz) = min(eu, ou); 
00165     v[0].template swiz<NU>(oswiz) = max(eu, ou); 
00166 
00167     for(j = 1; j < S; ++j) {
00168       eu = v[j].template swiz<NU>(eswiz);
00169       ou = v[j].template swiz<NU>(oswiz);
00170       v[j].template swiz<NU>(eswiz) = cond(ccu, eu, ou); 
00171       v[j].template swiz<NU>(oswiz) = cond(ccu, ou, eu); 
00172     }
00173 
00174     // compare 2i, 2i-1
00175     ed = v[0].template swiz<ND>(eswiz + 1);
00176     od = v[0].template swiz<ND>(oswiz);
00177     if (S > 1) ccd = ed > od; 
00178     v[0].template swiz<ND>(eswiz + 1) = max(ed, od);
00179     v[0].template swiz<ND>(oswiz) = min(ed, od);
00180 
00181     for(j = 1; j < S; ++j) {
00182       ed = v[j].template swiz<ND>(eswiz + 1);
00183       od = v[j].template swiz<ND>(oswiz);
00184       v[j].template swiz<ND>(eswiz + 1) = cond(ccd, ed, od); 
00185       v[j].template swiz<ND>(oswiz) = cond(ccd, od, ed); 
00186     }
00187   }
00188 
00189   // reswizzle "even" to 0, 2, 4,... "odd" to 1, 3, 5, ..
00190   int resultEswiz[NE], resultOswiz[NO]; 
00191   for(i = 0; i < NE; ++i) resultEswiz[i] = i * 2;
00192   for(i = 0; i < NO; ++i) resultOswiz[i] = i * 2 + 1; 
00193   for(i = 0; i < S; ++i) {
00194     ShAttrib<NE, SH_TEMP, T> evens = v[i].template swiz<NE>(eswiz);
00195     v[i].template swiz<NO>(resultOswiz) = v[i].template swiz<NO>(oswiz);
00196     v[i].template swiz<NE>(resultEswiz) = evens;
00197   }
00198 }
00199 
00200 template<typename T>
00201 inline
00202 ShProgram freeze(const ShProgram& p,
00203                  const T& uniform)
00204 {
00205   return (p >> uniform) << (T::ConstType)(uniform);
00206 }
00207 
00208 template<int N, int M, typename T1, typename T2>
00209 ShGeneric<N, CT1T2> poly(const ShGeneric<N, T1>& a, const ShGeneric<M, T2>& b)
00210 {
00211   ShAttrib<N, SH_TEMP, CT1T2> t;
00212 
00213   for (int i=0; i < N; i++) {
00214     ShGeneric<1, CT1T2> r_i = t[i];
00215     ShGeneric<1, T1> a_i = a[i];
00216 
00217     // Uses Horner's rule
00218     r_i = b[M - 1];
00219     for (int j = M - 1; j > 0; j--) {
00220       r_i = mad(a_i, r_i, b[j-1]);
00221     }
00222   }
00223 
00224   return t;
00225 }
00226 
00227 }
00228 
00229 #endif

Generated on Wed Jun 15 18:12:40 2005 for Sh by  doxygen 1.4.3-20050530