00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00020 #ifndef SHLIBMISCIMPL_HPP
00021 #define SHLIBMISCIMPL_HPP
00022
00023 #include "ShLibMisc.hpp"
00024 #include "ShInstructions.hpp"
00025 #include "ShProgram.hpp"
00026
00027 namespace SH {
00028
00029 template<int M, int N, typename T>
00030 ShGeneric<M, T> cast(const ShGeneric<N, T>& a)
00031 {
00032 int copySize = std::min(M, N);
00033 ShAttrib<M, SH_TEMP, T> result;
00034
00035 int* indices = new int[copySize];
00036 for(int i = 0; i < copySize; ++i) indices[i] = i;
00037 if(M < N) {
00038 result = a.template swiz<M>(indices);
00039 } else if( M > N ) {
00040 result.template swiz<N>(indices) = a;
00041 } else {
00042 shASN(result, a);
00043 }
00044 delete [] indices;
00045 return result;
00046 }
00047
00048 template<int M>
00049 inline
00050 ShGeneric<M, double> cast(double a)
00051 {
00052 return cast<M>(ShAttrib<1, SH_CONST, double>(a));
00053 }
00054
00055 template<int M, int N, typename T>
00056 ShGeneric<M, T> fillcast(const ShGeneric<N, T>& a)
00057 {
00058 if( M <= N ) return cast<M>(a);
00059 int indices[M];
00060 for(int i = 0; i < M; ++i) indices[i] = i >= N ? N - 1 : i;
00061 return a.template swiz<M>(indices);
00062 }
00063
00064 template<int M>
00065 inline
00066 ShGeneric<M, double> fillcast(double a)
00067 {
00068 return fillcast<M>(ShAttrib<1, SH_CONST, double>(a));
00069 }
00070
00071 template<int M, int N, typename T1, typename T2>
00072 ShGeneric<M+N, CT1T2> join(const ShGeneric<M, T1>& a, const ShGeneric<N, T2>& b)
00073 {
00074 int indices[M+N];
00075 for(int i = 0; i < M+N; ++i) indices[i] = i;
00076 ShAttrib<M+N, SH_TEMP, CT1T2> result;
00077 result.template swiz<M>(indices) = a;
00078 result.template swiz<N>(indices + M) = b;
00079 return result;
00080 }
00081
00082 template<int M, typename T>
00083 ShGeneric<M+1, T> join(const T& a, const ShGeneric<M, T>& b)
00084 {
00085 return join(ShAttrib<1, SH_CONST, T>(a), b);
00086 }
00087
00088 template<int M, typename T>
00089 ShGeneric<M+1, T> join(const ShGeneric<M, T>& a, const T& b)
00090 {
00091 return join(a, ShAttrib<1, SH_CONST, T>(b));
00092 }
00093
00094 template<int M, int N, int O, typename T1, typename T2, typename T3>
00095 ShGeneric<M+N+O, CT1T2T3> join(const ShGeneric<M, T1>& a,
00096 const ShGeneric<N, T2> &b,
00097 const ShGeneric<O, T3> &c)
00098 {
00099 int indices[M+N+O];
00100 for(int i = 0; i < M+N+O; ++i) indices[i] = i;
00101 ShAttrib<M+N+O, SH_TEMP, CT1T2T3> result;
00102 result.template swiz<M>(indices) = a;
00103 result.template swiz<N>(indices + M) = b;
00104 result.template swiz<N>(indices + M + N) = c;
00105 return result;
00106 }
00107
00108 template<int M, int N, int O, int P, typename T1, typename T2, typename T3, typename T4>
00109 ShGeneric<M+N+O+P, CT1T2T3T4> join(const ShGeneric<M, T1>& a,
00110 const ShGeneric<N, T2> &b,
00111 const ShGeneric<O, T3> &c,
00112 const ShGeneric<P, T4> &d)
00113 {
00114 int indices[M+N+O+P];
00115 for(int i = 0; i < M+N+O+P; ++i) indices[i] = i;
00116 ShAttrib<M+N+O+P, SH_TEMP, CT1T2T3T4> result;
00117 result.template swiz<M>(indices) = a;
00118 result.template swiz<N>(indices + M) = b;
00119 result.template swiz<N>(indices + M + N) = c;
00120 result.template swiz<N>(indices + M + N + O) = d;
00121 return result;
00122 }
00123
00124 template<int N, typename T>
00125 inline
00126 void discard(const ShGeneric<N, T>& c)
00127 {
00128 shKIL(c);
00129 }
00130
00131 template<int N, typename T>
00132 inline
00133 void kill(const ShGeneric<N, T>& c)
00134 {
00135 discard(c);
00136 }
00137
00138 template<int S, typename VarType>
00139 void groupsort(VarType v[]) {
00140 const int N = VarType::typesize;
00141 typedef typename VarType::storage_type T;
00142
00143 const int NE = (N + 1) / 2;
00144 const int NO = N / 2;
00145 const int NU = NO;
00146 const int ND = NE - 1;
00147
00148 int i, j;
00149
00150
00151 ShAttrib<NU, SH_TEMP, T> eu, ou, ccu;
00152 ShAttrib<ND, SH_TEMP, T> ed, od, ccd;
00153
00154
00155 int eswiz[NE], oswiz[NO];
00156 for(i = 0; i < NE; ++i) eswiz[i] = i;
00157 for(i = 0; i < NO; ++i) oswiz[i] = NE + i;
00158
00159 for(i = 0; i < NE; ++i) {
00160
00161 eu = v[0].template swiz<NU>(eswiz);
00162 ou = v[0].template swiz<NU>(oswiz);
00163 if (S > 1) ccu = eu < ou;
00164 v[0].template swiz<NU>(eswiz) = min(eu, ou);
00165 v[0].template swiz<NU>(oswiz) = max(eu, ou);
00166
00167 for(j = 1; j < S; ++j) {
00168 eu = v[j].template swiz<NU>(eswiz);
00169 ou = v[j].template swiz<NU>(oswiz);
00170 v[j].template swiz<NU>(eswiz) = cond(ccu, eu, ou);
00171 v[j].template swiz<NU>(oswiz) = cond(ccu, ou, eu);
00172 }
00173
00174
00175 ed = v[0].template swiz<ND>(eswiz + 1);
00176 od = v[0].template swiz<ND>(oswiz);
00177 if (S > 1) ccd = ed > od;
00178 v[0].template swiz<ND>(eswiz + 1) = max(ed, od);
00179 v[0].template swiz<ND>(oswiz) = min(ed, od);
00180
00181 for(j = 1; j < S; ++j) {
00182 ed = v[j].template swiz<ND>(eswiz + 1);
00183 od = v[j].template swiz<ND>(oswiz);
00184 v[j].template swiz<ND>(eswiz + 1) = cond(ccd, ed, od);
00185 v[j].template swiz<ND>(oswiz) = cond(ccd, od, ed);
00186 }
00187 }
00188
00189
00190 int resultEswiz[NE], resultOswiz[NO];
00191 for(i = 0; i < NE; ++i) resultEswiz[i] = i * 2;
00192 for(i = 0; i < NO; ++i) resultOswiz[i] = i * 2 + 1;
00193 for(i = 0; i < S; ++i) {
00194 ShAttrib<NE, SH_TEMP, T> evens = v[i].template swiz<NE>(eswiz);
00195 v[i].template swiz<NO>(resultOswiz) = v[i].template swiz<NO>(oswiz);
00196 v[i].template swiz<NE>(resultEswiz) = evens;
00197 }
00198 }
00199
00200 template<int N, typename T>
00201 ShGeneric<N, T> sort(const ShGeneric<N, T>& a)
00202 {
00203 ShGeneric<N, T> result(a);
00204 groupsort<1>(&result);
00205 return result;
00206 }
00207
00208 template<typename T>
00209 inline
00210 ShProgram freeze(const ShProgram& p,
00211 const T& uniform)
00212 {
00213 return (p >> uniform) << (T::ConstType)(uniform);
00214 }
00215
00216 template<int N, int M, typename T1, typename T2>
00217 ShGeneric<N, CT1T2> poly(const ShGeneric<N, T1>& a, const ShGeneric<M, T2>& b)
00218 {
00219 ShAttrib<N, SH_TEMP, CT1T2> t;
00220
00221 for (int i=0; i < N; i++) {
00222 ShGeneric<1, CT1T2> r_i = t[i];
00223 ShGeneric<1, T1> a_i = a[i];
00224
00225
00226 r_i = b[M - 1];
00227 for (int j = M - 1; j > 0; j--) {
00228 r_i = mad(a_i, r_i, b[j-1]);
00229 }
00230 }
00231
00232 return t;
00233 }
00234
00235 }
00236
00237 #endif