00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00024 #ifndef SHLIBMISCIMPL_HPP
00025 #define SHLIBMISCIMPL_HPP
00026
00027 #include "ShLibMisc.hpp"
00028 #include "ShInstructions.hpp"
00029 #include "ShProgram.hpp"
00030
00031 namespace SH {
00032
00033 template<int M, int N, typename T>
00034 ShGeneric<M, T> cast(const ShGeneric<N, T>& a)
00035 {
00036 int copySize = std::min(M, N);
00037 ShAttrib<M, SH_TEMP, T> result;
00038
00039 int* indices = new int[copySize];
00040 for(int i = 0; i < copySize; ++i) indices[i] = i;
00041 if(M < N) {
00042 result = a.template swiz<M>(indices);
00043 } else if( M > N ) {
00044 result.template swiz<N>(indices) = a;
00045 } else {
00046 shASN(result, a);
00047 }
00048 delete [] indices;
00049 return result;
00050 }
00051
00052 template<int M>
00053 inline
00054 ShGeneric<M, double> cast(double a)
00055 {
00056 return cast<M>(ShAttrib<1, SH_CONST, double>(a));
00057 }
00058
00059 template<int M, int N, typename T>
00060 ShGeneric<M, T> fillcast(const ShGeneric<N, T>& a)
00061 {
00062 if( M <= N ) return cast<M>(a);
00063 int indices[M];
00064 for(int i = 0; i < M; ++i) indices[i] = i >= N ? N - 1 : i;
00065 return a.template swiz<M>(indices);
00066 }
00067
00068 template<int M>
00069 inline
00070 ShGeneric<M, double> fillcast(double a)
00071 {
00072 return fillcast<M>(ShAttrib<1, SH_CONST, double>(a));
00073 }
00074
00075 template<int M, int N, typename T1, typename T2>
00076 ShGeneric<M+N, CT1T2> join(const ShGeneric<M, T1>& a, const ShGeneric<N, T2>& b)
00077 {
00078 int indices[M+N];
00079 for(int i = 0; i < M+N; ++i) indices[i] = i;
00080 ShAttrib<M+N, SH_TEMP, CT1T2> result;
00081 result.template swiz<M>(indices) = a;
00082 result.template swiz<N>(indices + M) = b;
00083 return result;
00084 }
00085
00086 template<int M, int N, int O, typename T1, typename T2, typename T3>
00087 ShGeneric<M+N+O, CT1T2T3> join(const ShGeneric<M, T1>& a,
00088 const ShGeneric<N, T2> &b,
00089 const ShGeneric<O, T3> &c)
00090 {
00091 int indices[M+N+O];
00092 for(int i = 0; i < M+N+O; ++i) indices[i] = i;
00093 ShAttrib<M+N+O, SH_TEMP, CT1T2T3> result;
00094 result.template swiz<M>(indices) = a;
00095 result.template swiz<N>(indices + M) = b;
00096 result.template swiz<N>(indices + M + N) = c;
00097 return result;
00098 }
00099
00100 template<int M, int N, int O, int P, typename T1, typename T2, typename T3, typename T4>
00101 ShGeneric<M+N+O+P, CT1T2T3T4> join(const ShGeneric<M, T1>& a,
00102 const ShGeneric<N, T2> &b,
00103 const ShGeneric<O, T3> &c,
00104 const ShGeneric<P, T4> &d)
00105 {
00106 int indices[M+N+O+P];
00107 for(int i = 0; i < M+N+O+P; ++i) indices[i] = i;
00108 ShAttrib<M+N+O+P, SH_TEMP, CT1T2T3T4> result;
00109 result.template swiz<M>(indices) = a;
00110 result.template swiz<N>(indices + M) = b;
00111 result.template swiz<N>(indices + M + N) = c;
00112 result.template swiz<N>(indices + M + N + O) = d;
00113 return result;
00114 }
00115
00116 template<int N, typename T>
00117 inline
00118 void discard(const ShGeneric<N, T>& c)
00119 {
00120 shKIL(c);
00121 }
00122
00123 template<int N, typename T>
00124 inline
00125 void kill(const ShGeneric<N, T>& c)
00126 {
00127 discard(c);
00128 }
00129
00130 template<int N, typename T>
00131 ShGeneric<N, T> sort(const ShGeneric<N, T>& a)
00132 {
00133 ShGeneric<N, T> result(a);
00134 groupsort<1>(&result);
00135 return result;
00136 }
00137
00138 template<int S, typename VarType>
00139 void groupsort(VarType v[]) {
00140 const int N = VarType::typesize;
00141 typedef typename VarType::storage_type T;
00142
00143 const int NE = (N + 1) / 2;
00144 const int NO = N / 2;
00145 const int NU = NO;
00146 const int ND = NE - 1;
00147
00148 int i, j;
00149
00150
00151 ShAttrib<NU, SH_TEMP, T> eu, ou, ccu;
00152 ShAttrib<ND, SH_TEMP, T> ed, od, ccd;
00153
00154
00155 int eswiz[NE], oswiz[NO];
00156 for(i = 0; i < NE; ++i) eswiz[i] = i;
00157 for(i = 0; i < NO; ++i) oswiz[i] = NE + i;
00158
00159 for(i = 0; i < NE; ++i) {
00160
00161 eu = v[0].template swiz<NU>(eswiz);
00162 ou = v[0].template swiz<NU>(oswiz);
00163 if (S > 1) ccu = eu < ou;
00164 v[0].template swiz<NU>(eswiz) = min(eu, ou);
00165 v[0].template swiz<NU>(oswiz) = max(eu, ou);
00166
00167 for(j = 1; j < S; ++j) {
00168 eu = v[j].template swiz<NU>(eswiz);
00169 ou = v[j].template swiz<NU>(oswiz);
00170 v[j].template swiz<NU>(eswiz) = cond(ccu, eu, ou);
00171 v[j].template swiz<NU>(oswiz) = cond(ccu, ou, eu);
00172 }
00173
00174
00175 ed = v[0].template swiz<ND>(eswiz + 1);
00176 od = v[0].template swiz<ND>(oswiz);
00177 if (S > 1) ccd = ed > od;
00178 v[0].template swiz<ND>(eswiz + 1) = max(ed, od);
00179 v[0].template swiz<ND>(oswiz) = min(ed, od);
00180
00181 for(j = 1; j < S; ++j) {
00182 ed = v[j].template swiz<ND>(eswiz + 1);
00183 od = v[j].template swiz<ND>(oswiz);
00184 v[j].template swiz<ND>(eswiz + 1) = cond(ccd, ed, od);
00185 v[j].template swiz<ND>(oswiz) = cond(ccd, od, ed);
00186 }
00187 }
00188
00189
00190 int resultEswiz[NE], resultOswiz[NO];
00191 for(i = 0; i < NE; ++i) resultEswiz[i] = i * 2;
00192 for(i = 0; i < NO; ++i) resultOswiz[i] = i * 2 + 1;
00193 for(i = 0; i < S; ++i) {
00194 ShAttrib<NE, SH_TEMP, T> evens = v[i].template swiz<NE>(eswiz);
00195 v[i].template swiz<NO>(resultOswiz) = v[i].template swiz<NO>(oswiz);
00196 v[i].template swiz<NE>(resultEswiz) = evens;
00197 }
00198 }
00199
00200 template<typename T>
00201 inline
00202 ShProgram freeze(const ShProgram& p,
00203 const T& uniform)
00204 {
00205 return (p >> uniform) << (T::ConstType)(uniform);
00206 }
00207
00208 template<int N, int M, typename T1, typename T2>
00209 ShGeneric<N, CT1T2> poly(const ShGeneric<N, T1>& a, const ShGeneric<M, T2>& b)
00210 {
00211 ShAttrib<N, SH_TEMP, CT1T2> t;
00212
00213 for (int i=0; i < N; i++) {
00214 ShGeneric<1, CT1T2> r_i = t[i];
00215 ShGeneric<1, T1> a_i = a[i];
00216
00217
00218 r_i = b[M - 1];
00219 for (int j = M - 1; j > 0; j--) {
00220 r_i = mad(a_i, r_i, b[j-1]);
00221 }
00222 }
00223
00224 return t;
00225 }
00226
00227 }
00228
00229 #endif