Main Page | Modules | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

ShLibMiscImpl.hpp

00001 // Sh: A GPU metaprogramming language.
00002 //
00003 // Copyright 2003-2005 Serious Hack Inc.
00004 // 
00005 // This library is free software; you can redistribute it and/or
00006 // modify it under the terms of the GNU Lesser General Public
00007 // License as published by the Free Software Foundation; either
00008 // version 2.1 of the License, or (at your option) any later version.
00009 //
00010 // This library is distributed in the hope that it will be useful,
00011 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013 // Lesser General Public License for more details.
00014 //
00015 // You should have received a copy of the GNU Lesser General Public
00016 // License along with this library; if not, write to the Free Software
00017 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 
00018 // MA  02110-1301, USA
00020 #ifndef SHLIBMISCIMPL_HPP
00021 #define SHLIBMISCIMPL_HPP
00022 
00023 #include "ShLibMisc.hpp"
00024 #include "ShInstructions.hpp"
00025 #include "ShProgram.hpp"
00026 
00027 namespace SH {
00028 
00029 template<int M, int N, typename T> 
00030 ShGeneric<M, T> cast(const ShGeneric<N, T>& a)
00031 {
00032   int copySize = std::min(M, N);
00033   ShAttrib<M, SH_TEMP, T> result;
00034 
00035   int* indices = new int[copySize];
00036   for(int i = 0; i < copySize; ++i) indices[i] = i;
00037   if(M < N) {
00038     result = a.template swiz<M>(indices);
00039   } else if( M > N ) {
00040     result.template swiz<N>(indices) = a;
00041   } else { // M == N
00042     shASN(result, a);
00043   }
00044   delete [] indices;
00045   return result;
00046 }
00047 
00048 template<int M> 
00049 inline
00050 ShGeneric<M, double> cast(double a)
00051 {
00052   return cast<M>(ShAttrib<1, SH_CONST, double>(a));
00053 }
00054 
00055 template<int M, int N, typename T> 
00056 ShGeneric<M, T> fillcast(const ShGeneric<N, T>& a)
00057 {
00058   if( M <= N ) return cast<M>(a);
00059   int indices[M];
00060   for(int i = 0; i < M; ++i) indices[i] = i >= N ? N - 1 : i;
00061   return a.template swiz<M>(indices);
00062 }
00063 
00064 template<int M> 
00065 inline
00066 ShGeneric<M, double> fillcast(double a)
00067 {
00068   return fillcast<M>(ShAttrib<1, SH_CONST, double>(a));
00069 }
00070 
00071 template<int M, int N, typename T1, typename T2> 
00072 ShGeneric<M+N, CT1T2> join(const ShGeneric<M, T1>& a, const ShGeneric<N, T2>& b)
00073 {
00074   int indices[M+N];
00075   for(int i = 0; i < M+N; ++i) indices[i] = i; 
00076   ShAttrib<M+N, SH_TEMP, CT1T2> result;
00077   result.template swiz<M>(indices) = a;
00078   result.template swiz<N>(indices + M) = b;
00079   return result;
00080 }
00081 
00082 template<int M, typename T> 
00083 ShGeneric<M+1, T> join(const T& a, const ShGeneric<M, T>& b)
00084 {
00085   return join(ShAttrib<1, SH_CONST, T>(a), b);
00086 }
00087 
00088 template<int M, typename T> 
00089 ShGeneric<M+1, T> join(const ShGeneric<M, T>& a, const T& b)
00090 {
00091   return join(a, ShAttrib<1, SH_CONST, T>(b));
00092 }
00093 
00094 template<int M, int N, int O, typename T1, typename T2, typename T3> 
00095 ShGeneric<M+N+O, CT1T2T3> join(const ShGeneric<M, T1>& a, 
00096                                const ShGeneric<N, T2> &b, 
00097                                const ShGeneric<O, T3> &c)
00098 {
00099   int indices[M+N+O];
00100   for(int i = 0; i < M+N+O; ++i) indices[i] = i; 
00101   ShAttrib<M+N+O, SH_TEMP, CT1T2T3> result;
00102   result.template swiz<M>(indices) = a;
00103   result.template swiz<N>(indices + M) = b;
00104   result.template swiz<N>(indices + M + N) = c;
00105   return result;
00106 }
00107 
00108 template<int M, int N, int O, int P, typename T1, typename T2, typename T3, typename T4> 
00109 ShGeneric<M+N+O+P, CT1T2T3T4> join(const ShGeneric<M, T1>& a, 
00110                                    const ShGeneric<N, T2> &b, 
00111                                    const ShGeneric<O, T3> &c, 
00112                                    const ShGeneric<P, T4> &d)
00113 {
00114   int indices[M+N+O+P];
00115   for(int i = 0; i < M+N+O+P; ++i) indices[i] = i; 
00116   ShAttrib<M+N+O+P, SH_TEMP, CT1T2T3T4> result;
00117   result.template swiz<M>(indices) = a;
00118   result.template swiz<N>(indices + M) = b;
00119   result.template swiz<N>(indices + M + N) = c;
00120   result.template swiz<N>(indices + M + N + O) = d;
00121   return result;
00122 }
00123 
00124 template<int N, typename T>
00125 inline
00126 void discard(const ShGeneric<N, T>& c)
00127 {
00128   shKIL(c);
00129 }
00130 
00131 template<int N, typename T>
00132 inline
00133 void kill(const ShGeneric<N, T>& c)
00134 {
00135   discard(c);
00136 }
00137 
00138 template<int S, typename VarType>
00139 void groupsort(VarType v[]) {
00140   const int N = VarType::typesize;
00141   typedef typename VarType::storage_type T;
00142 
00143   const int NE = (N + 1) / 2; // number of even elements
00144   const int NO = N / 2; // number of odd elements
00145   const int NU = NO; // number of components to compare for (2i, 2i+1) comparisons
00146   const int ND = NE - 1; // number of componnets to compare for (2i, 2i-1) comparisons
00147 
00148   int i, j;
00149   // hold even/odd temps and condition code for (2i, 2i+1) "up" and (2i, 2i-1) "down" comparisons 
00150 
00151   ShAttrib<NU, SH_TEMP, T> eu, ou, ccu; 
00152   ShAttrib<ND, SH_TEMP, T> ed, od, ccd; 
00153 
00154   // even and odd swizzle (elms 0..NE-1 are the "even" subsequence, NE..N-1 "odd")
00155   int eswiz[NE], oswiz[NO]; 
00156   for(i = 0; i < NE; ++i) eswiz[i] = i;
00157   for(i = 0; i < NO; ++i) oswiz[i] = NE + i;
00158 
00159   for(i = 0; i < NE; ++i) { 
00160     // compare 2i, 2i+1
00161     eu = v[0].template swiz<NU>(eswiz);
00162     ou = v[0].template swiz<NU>(oswiz);
00163     if (S > 1) ccu = eu < ou; 
00164     v[0].template swiz<NU>(eswiz) = min(eu, ou); 
00165     v[0].template swiz<NU>(oswiz) = max(eu, ou); 
00166 
00167     for(j = 1; j < S; ++j) {
00168       eu = v[j].template swiz<NU>(eswiz);
00169       ou = v[j].template swiz<NU>(oswiz);
00170       v[j].template swiz<NU>(eswiz) = cond(ccu, eu, ou); 
00171       v[j].template swiz<NU>(oswiz) = cond(ccu, ou, eu); 
00172     }
00173 
00174     // compare 2i, 2i-1
00175     ed = v[0].template swiz<ND>(eswiz + 1);
00176     od = v[0].template swiz<ND>(oswiz);
00177     if (S > 1) ccd = ed > od; 
00178     v[0].template swiz<ND>(eswiz + 1) = max(ed, od);
00179     v[0].template swiz<ND>(oswiz) = min(ed, od);
00180 
00181     for(j = 1; j < S; ++j) {
00182       ed = v[j].template swiz<ND>(eswiz + 1);
00183       od = v[j].template swiz<ND>(oswiz);
00184       v[j].template swiz<ND>(eswiz + 1) = cond(ccd, ed, od); 
00185       v[j].template swiz<ND>(oswiz) = cond(ccd, od, ed); 
00186     }
00187   }
00188 
00189   // reswizzle "even" to 0, 2, 4,... "odd" to 1, 3, 5, ..
00190   int resultEswiz[NE], resultOswiz[NO]; 
00191   for(i = 0; i < NE; ++i) resultEswiz[i] = i * 2;
00192   for(i = 0; i < NO; ++i) resultOswiz[i] = i * 2 + 1; 
00193   for(i = 0; i < S; ++i) {
00194     ShAttrib<NE, SH_TEMP, T> evens = v[i].template swiz<NE>(eswiz);
00195     v[i].template swiz<NO>(resultOswiz) = v[i].template swiz<NO>(oswiz);
00196     v[i].template swiz<NE>(resultEswiz) = evens;
00197   }
00198 }
00199 
00200 template<int N, typename T> 
00201 ShGeneric<N, T> sort(const ShGeneric<N, T>& a)
00202 {
00203   ShGeneric<N, T> result(a);
00204   groupsort<1>(&result);
00205   return result;
00206 }
00207 
00208 template<typename T>
00209 inline
00210 ShProgram freeze(const ShProgram& p,
00211                  const T& uniform)
00212 {
00213   return (p >> uniform) << (T::ConstType)(uniform);
00214 }
00215 
00216 template<int N, int M, typename T1, typename T2>
00217 ShGeneric<N, CT1T2> poly(const ShGeneric<N, T1>& a, const ShGeneric<M, T2>& b)
00218 {
00219   ShAttrib<N, SH_TEMP, CT1T2> t;
00220 
00221   for (int i=0; i < N; i++) {
00222     ShGeneric<1, CT1T2> r_i = t[i];
00223     ShGeneric<1, T1> a_i = a[i];
00224 
00225     // Uses Horner's rule
00226     r_i = b[M - 1];
00227     for (int j = M - 1; j > 0; j--) {
00228       r_i = mad(a_i, r_i, b[j-1]);
00229     }
00230   }
00231 
00232   return t;
00233 }
00234 
00235 }
00236 
00237 #endif

Generated on Thu Jul 28 17:33:03 2005 for Sh by  doxygen 1.4.3-20050530