Main Page | Modules | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

ArbEmit.cpp

00001 // Sh: A GPU metaprogramming language. 00002 // 00003 // Copyright (c) 2003 University of Waterloo Computer Graphics Laboratory 00004 // Project administrator: Michael D. McCool 00005 // Authors: Zheng Qin, Stefanus Du Toit, Kevin Moule, Tiberiu S. Popa, 00006 // Michael D. McCool 00007 // 00008 // This software is provided 'as-is', without any express or implied 00009 // warranty. In no event will the authors be held liable for any damages 00010 // arising from the use of this software. 00011 // 00012 // Permission is granted to anyone to use this software for any purpose, 00013 // including commercial applications, and to alter it and redistribute it 00014 // freely, subject to the following restrictions: 00015 // 00016 // 1. The origin of this software must not be misrepresented; you must 00017 // not claim that you wrote the original software. If you use this 00018 // software in a product, an acknowledgment in the product documentation 00019 // would be appreciated but is not required. 00020 // 00021 // 2. Altered source versions must be plainly marked as such, and must 00022 // not be misrepresented as being the original software. 00023 // 00024 // 3. This notice may not be removed or altered from any source 00025 // distribution. 00027 #include "ArbCode.hpp" 00028 #include <algorithm> 00029 #include <cmath> 00030 #include "ShDebug.hpp" 00031 #include "ShError.hpp" 00032 00033 #ifdef WIN32 00034 namespace { 00035 double log2(double x) { return log(x)/log(2.0); } 00036 } 00037 #endif 00038 00039 namespace shgl { 00040 00041 using namespace SH; 00042 00043 // Transformations 00044 namespace { 00045 const unsigned int scalarize = 0x01; // Split into scalar instructions 00046 const unsigned int swap_sources = 0x02; // Swap first and second sources 00047 const unsigned int negate_first = 0x04; // Negate first source 00048 const unsigned int delay_mask = 0x08; // Do writemasking in separate step 00049 }; 00050 00051 struct ArbMapping { 00052 ShOperation sh_op; 00053 unsigned int filters; 00054 00055 unsigned int transforms; 00056 ArbOp arb_op; 00057 00058 typedef void (ArbCode::*ArbFunction)(const ShStatement&); 00059 ArbFunction function; 00060 }; 00061 00062 ArbMapping ArbCode::table[] = { 00063 {SH_OP_ASN, SH_ARB_ANY, 0, SH_ARB_MOV, 0}, 00064 00065 // Arithmetic 00066 {SH_OP_ADD, SH_ARB_ANY, 0, SH_ARB_ADD, 0}, 00067 {SH_OP_NEG, SH_ARB_ANY, negate_first, SH_ARB_MOV, 0}, 00068 {SH_OP_MUL, SH_ARB_ANY, 0, SH_ARB_MUL, 0}, 00069 00070 // Removed this temporarily because of a bug in the NV drivers 00071 //{SH_OP_DIV, SH_ARB_NVFP2, scalarize, SH_ARB_DIV, 0}, 00072 00073 {SH_OP_DIV, SH_ARB_ANY, scalarize, SH_ARB_FUN, &ArbCode::emit_div}, 00074 {SH_OP_POW, SH_ARB_ANY, scalarize, SH_ARB_POW, 0}, 00075 {SH_OP_RCP, SH_ARB_ANY, scalarize, SH_ARB_RCP, 0}, 00076 {SH_OP_RSQ, SH_ARB_ANY, scalarize, SH_ARB_RSQ, 0}, 00077 {SH_OP_SQRT, SH_ARB_ANY, scalarize, SH_ARB_FUN, &ArbCode::emit_sqrt}, 00078 00079 {SH_OP_LRP, SH_ARB_FP, 0, SH_ARB_LRP, 0}, 00080 {SH_OP_LRP, SH_ARB_VP, 0, SH_ARB_FUN, &ArbCode::emit_lerp}, 00081 {SH_OP_MAD, SH_ARB_ANY, 0, SH_ARB_MAD, 0}, 00082 00083 // Sum/product of components 00084 {SH_OP_CMUL, SH_ARB_ANY, 0, SH_ARB_FUN, &ArbCode::emit_cmul}, 00085 {SH_OP_CSUM, SH_ARB_ANY, 0, SH_ARB_FUN, &ArbCode::emit_csum}, 00086 00087 // Dot product 00088 {SH_OP_DOT, SH_ARB_VEC1, 0, SH_ARB_MUL, 0}, 00089 {SH_OP_DOT, SH_ARB_VEC3, 0, SH_ARB_DP3, 0}, 00090 {SH_OP_DOT, SH_ARB_VEC4, 0, SH_ARB_DP4, 0}, 00091 {SH_OP_DOT, SH_ARB_VEC2 | SH_ARB_NVFP2, 0, SH_ARB_DP2, 0}, 00092 {SH_OP_DOT, SH_ARB_VEC2, 0, SH_ARB_FUN, &ArbCode::emit_dot2}, 00093 00094 // Boolean 00095 {SH_OP_SLT, SH_ARB_ANY, 0, SH_ARB_SLT, 0}, 00096 {SH_OP_SGE, SH_ARB_ANY, 0, SH_ARB_SGE, 0}, 00097 00098 {SH_OP_SLE, SH_ARB_NVVP2, 0, SH_ARB_SLE, 0}, 00099 {SH_OP_SLE, SH_ARB_NVFP, 0, SH_ARB_SLE, 0}, 00100 {SH_OP_SLE, SH_ARB_ANY, swap_sources, SH_ARB_SGE, 0}, 00101 00102 {SH_OP_SGT, SH_ARB_NVVP2, 0, SH_ARB_SGT, 0}, 00103 {SH_OP_SGT, SH_ARB_NVFP, 0, SH_ARB_SGT, 0}, 00104 {SH_OP_SGT, SH_ARB_ANY, swap_sources, SH_ARB_SLT, 0}, 00105 00106 {SH_OP_SEQ, SH_ARB_NVVP2, 0, SH_ARB_SEQ, 0}, 00107 {SH_OP_SEQ, SH_ARB_NVFP, 0, SH_ARB_SEQ, 0}, 00108 {SH_OP_SEQ, SH_ARB_ANY, 0, SH_ARB_FUN, &ArbCode::emit_eq}, 00109 00110 {SH_OP_SNE, SH_ARB_NVVP2, 0, SH_ARB_SNE, 0}, 00111 {SH_OP_SNE, SH_ARB_NVFP, 0, SH_ARB_SNE, 0}, 00112 {SH_OP_SNE, SH_ARB_ANY, 0, SH_ARB_FUN, &ArbCode::emit_eq}, 00113 00114 // Clamping 00115 {SH_OP_ABS, SH_ARB_ANY, 0, SH_ARB_ABS, 0}, 00116 {SH_OP_CEIL, SH_ARB_ANY, 0, SH_ARB_FUN, &ArbCode::emit_ceil}, 00117 {SH_OP_FLR, SH_ARB_ANY, 0, SH_ARB_FLR, 0}, 00118 {SH_OP_FRAC, SH_ARB_ANY, 0, SH_ARB_FRC, 0}, 00119 {SH_OP_MOD, SH_ARB_ANY, 0, SH_ARB_FUN, &ArbCode::emit_mod}, 00120 {SH_OP_MAX, SH_ARB_ANY, 0, SH_ARB_MAX, 0}, 00121 {SH_OP_MIN, SH_ARB_ANY, 0, SH_ARB_MIN, 0}, 00122 {SH_OP_SGN, SH_ARB_NVVP2, 0, SH_ARB_SSG, 0}, 00123 {SH_OP_SGN, SH_ARB_ANY, 0, SH_ARB_FUN, &ArbCode::emit_sgn}, 00124 00125 // Trig 00126 {SH_OP_ACOS, SH_ARB_ANY, 0, SH_ARB_FUN, &ArbCode::emit_invtrig}, 00127 {SH_OP_ASIN, SH_ARB_ANY, 0, SH_ARB_FUN, &ArbCode::emit_invtrig}, 00128 /* TODO 00129 {SH_OP_ATAN, SH_ARB_ANY, 0, SH_ARB_FUN, &ArbCode::emit_invtrig}, 00130 {SH_OP_ATAN2, SH_ARB_ANY, 0, SH_ARB_FUN, &ArbCode::emit_invtrig}, 00131 */ 00132 {SH_OP_COS, SH_ARB_FP, scalarize, SH_ARB_COS, 0}, 00133 {SH_OP_COS, SH_ARB_VP, 0, SH_ARB_FUN, &ArbCode::emit_trig}, 00134 {SH_OP_SIN, SH_ARB_FP, scalarize, SH_ARB_SIN, 0}, 00135 {SH_OP_SIN, SH_ARB_VP, 0, SH_ARB_FUN, &ArbCode::emit_trig}, 00136 {SH_OP_TAN, SH_ARB_ANY, 0, SH_ARB_FUN, &ArbCode::emit_tan}, 00137 00138 // Derivatives 00139 {SH_OP_DX, SH_ARB_NVFP, 0, SH_ARB_DDX, 0}, 00140 {SH_OP_DY, SH_ARB_NVFP, 0, SH_ARB_DDY, 0}, 00141 00142 // Expontential 00143 {SH_OP_EXP2, SH_ARB_ANY, scalarize, SH_ARB_EX2, 0}, 00144 {SH_OP_LOG2, SH_ARB_ANY, scalarize, SH_ARB_LG2, 0}, 00145 {SH_OP_EXP, SH_ARB_ANY, scalarize, SH_ARB_FUN, &ArbCode::emit_exp}, 00146 {SH_OP_LOG, SH_ARB_ANY, 0, SH_ARB_FUN, &ArbCode::emit_log}, 00147 {SH_OP_EXP10, SH_ARB_ANY, scalarize, SH_ARB_FUN, &ArbCode::emit_exp}, 00148 {SH_OP_LOG10, SH_ARB_ANY, 0, SH_ARB_FUN, &ArbCode::emit_log}, 00149 00150 // Geometric 00151 {SH_OP_NORM, SH_ARB_NVFP2 | SH_ARB_VEC3, 0, SH_ARB_NRM, 0}, 00152 {SH_OP_NORM, SH_ARB_ANY, 0, SH_ARB_FUN, &ArbCode::emit_norm}, 00153 {SH_OP_XPD, SH_ARB_ANY | SH_ARB_VEC3, 0, SH_ARB_XPD, 0}, 00154 00155 // Texture 00156 {SH_OP_TEX, SH_ARB_NVVP3, 0, SH_ARB_FUN, &ArbCode::emit_tex}, 00157 {SH_OP_TEX, SH_ARB_FP, 0, SH_ARB_FUN, &ArbCode::emit_tex}, 00158 {SH_OP_TEXI, SH_ARB_NVVP3, 0, SH_ARB_FUN, &ArbCode::emit_tex}, 00159 {SH_OP_TEXI, SH_ARB_FP, 0, SH_ARB_FUN, &ArbCode::emit_tex}, 00160 00161 {SH_OP_TEXD, SH_ARB_NVFP, 0, SH_ARB_FUN, &ArbCode::emit_tex}, 00162 00163 // Misc. 00164 {SH_OP_COND, SH_ARB_NVFP, 0, SH_ARB_FUN, &ArbCode::emit_nvcond}, 00165 {SH_OP_COND, SH_ARB_NVVP2, 0, SH_ARB_FUN, &ArbCode::emit_nvcond}, 00166 {SH_OP_COND, SH_ARB_ANY, negate_first, SH_ARB_CMP, 0}, 00167 {SH_OP_KIL, SH_ARB_FP, 0, SH_ARB_FUN, &ArbCode::emit_kil}, 00168 00169 {SH_OP_ASN, SH_ARB_END, 0, SH_ARB_FUN, 0} 00170 }; 00171 00172 void ArbCode::emit(const ShStatement& stmt) 00173 { 00174 int maxlen = 0; // Maximum tuple length over all sources 00175 for (int i = 0; i < opInfo[stmt.op].arity; i++) { 00176 if (stmt.src[i].size() > maxlen) maxlen = stmt.src[i].size(); 00177 } 00178 00179 unsigned int match = m_environment; 00180 switch(maxlen) { 00181 case 1: match |= SH_ARB_VEC1; break; 00182 case 2: match |= SH_ARB_VEC2; break; 00183 case 3: match |= SH_ARB_VEC3; break; 00184 case 4: match |= SH_ARB_VEC4; break; 00185 } 00186 00187 ArbMapping* mapping; 00188 00189 for (mapping = table; mapping->filters != SH_ARB_END; mapping++) { 00190 if (mapping->sh_op != stmt.op) continue; 00191 if ((mapping->filters & match) != mapping->filters) continue; 00192 break; 00193 } 00194 if (mapping->filters == SH_ARB_END) { 00195 shError(ShException(std::string("ARB Code: Unknown operation ") + opInfo[stmt.op].name)); 00196 return; 00197 } 00198 00199 ShStatement actual = stmt; 00200 00201 if (mapping->transforms & swap_sources) { 00202 ShVariable tmp(actual.src[0]); 00203 actual.src[0] = actual.src[1]; 00204 actual.src[1] = tmp; 00205 } 00206 if (mapping->transforms & negate_first) { 00207 actual.src[0] = -actual.src[0]; 00208 } 00209 00210 std::list<ShStatement> stmts; 00211 if ((mapping->transforms & scalarize) && maxlen > 1) { 00212 for (int i = 0; i < maxlen; i++) { 00213 ShStatement scalar = actual; 00214 scalar.dest = scalar.dest(i); 00215 for (int j = 0; j < opInfo[actual.op].arity; j++) { 00216 scalar.src[j] = scalar.src[j](std::min(i, scalar.src[j].size() - 1)); 00217 } 00218 stmts.push_back(scalar); 00219 } 00220 } else { 00221 stmts.push_back(actual); 00222 } 00223 00224 if (mapping->transforms & delay_mask) { 00225 for (std::list<ShStatement>::iterator I = stmts.begin(); I != stmts.end(); ++I) { 00226 if (I->dest.swizzle().identity()) continue; 00227 00228 ShVariable realdest(I->dest); 00229 ShVariable tmp(new ShVariableNode(SH_TEMP, 4)); 00230 I->dest = tmp; 00231 00232 ShStatement mask(realdest, SH_OP_ASN, tmp); 00233 00234 std::list<ShStatement>::iterator next = I; ++next; 00235 I = stmts.insert(next, mask); 00236 } 00237 } 00238 00239 for (std::list<ShStatement>::const_iterator I = stmts.begin(); I != stmts.end(); ++I) { 00240 if (mapping->arb_op == SH_ARB_FUN) { 00241 (this->*(mapping->function))(*I); 00242 } else { 00243 // HACK for delay_mask to work. 00244 ArbOp op = (I->op == SH_OP_ASN) ? SH_ARB_MOV : mapping->arb_op; 00245 switch (opInfo[I->op].arity) { 00246 case 0: 00247 m_instructions.push_back(ArbInst(op, I->dest)); 00248 break; 00249 case 1: 00250 m_instructions.push_back(ArbInst(op, I->dest, 00251 I->src[0])); 00252 break; 00253 case 2: 00254 m_instructions.push_back(ArbInst(op, I->dest, 00255 I->src[0], I->src[1])); 00256 break; 00257 case 3: 00258 m_instructions.push_back(ArbInst(op, I->dest, 00259 I->src[0], I->src[1], I->src[2])); 00260 break; 00261 } 00262 } 00263 } 00264 } 00265 00266 void ArbCode::emit_div(const ShStatement& stmt) 00267 { 00268 ShVariable rcp(new ShVariableNode(SH_TEMP, 1)); 00269 m_instructions.push_back(ArbInst(SH_ARB_RCP, rcp, stmt.src[1])); 00270 m_instructions.push_back(ArbInst(SH_ARB_MUL, stmt.dest, stmt.src[0], rcp)); 00271 } 00272 00273 void ArbCode::emit_sqrt(const ShStatement& stmt) 00274 { 00275 ShVariable rsq(new ShVariableNode(SH_TEMP, 1)); 00276 m_instructions.push_back(ArbInst(SH_ARB_RSQ, rsq, stmt.src[0])); 00277 m_instructions.push_back(ArbInst(SH_ARB_RCP, stmt.dest, rsq)); 00278 } 00279 00280 void ArbCode::emit_lerp(const ShStatement& stmt) 00281 { 00282 // lerp(f,a,b)= f*a + (1-f)*b = f*(a-b) + b 00283 00284 ShVariable t(new ShVariableNode(SH_TEMP, stmt.src[1].size())); 00285 m_instructions.push_back(ArbInst(SH_ARB_ADD, t, stmt.src[1], -stmt.src[2])); 00286 m_instructions.push_back(ArbInst(SH_ARB_MAD, stmt.dest, stmt.src[0], t, stmt.src[2])); 00287 } 00288 00289 void ArbCode::emit_dot2(const ShStatement& stmt) 00290 { 00291 ShVariable mul(new ShVariableNode(SH_TEMP, 2)); 00292 m_instructions.push_back(ArbInst(SH_ARB_MUL, mul, stmt.src[0], stmt.src[1])); 00293 m_instructions.push_back(ArbInst(SH_ARB_ADD, stmt.dest, mul(0), mul(1))); 00294 } 00295 00296 void ArbCode::emit_eq(const ShStatement& stmt) 00297 { 00298 ShVariable t1(new ShVariableNode(SH_TEMP, stmt.dest.size())); 00299 ShVariable t2(new ShVariableNode(SH_TEMP, stmt.dest.size())); 00300 00301 ArbOp op; 00302 if (stmt.op == SH_OP_SEQ) { 00303 op = SH_ARB_SGE; 00304 } else if (stmt.op == SH_OP_SNE) { 00305 op = SH_ARB_SLT; 00306 } else { 00307 SH_DEBUG_ASSERT(false); 00308 } 00309 00310 m_instructions.push_back(ArbInst(SH_ARB_SGE, t1, stmt.src[0], stmt.src[1])); 00311 m_instructions.push_back(ArbInst(SH_ARB_SGE, t2, stmt.src[1], stmt.src[0])); 00312 m_instructions.push_back(ArbInst(SH_ARB_MUL, stmt.dest, t1, t2)); 00313 } 00314 00315 void ArbCode::emit_ceil(const ShStatement& stmt) 00316 { 00317 m_instructions.push_back(ArbInst(SH_ARB_FLR, stmt.dest, -stmt.src[0])); 00318 m_instructions.push_back(ArbInst(SH_ARB_MOV, stmt.dest, -stmt.dest)); 00319 } 00320 00321 void ArbCode::emit_mod(const ShStatement& stmt) 00322 { 00323 // TODO - is this really optimal? 00324 ShVariable t1(new ShVariableNode(SH_TEMP, stmt.src[0].size())); 00325 ShVariable t2(new ShVariableNode(SH_TEMP, stmt.src[0].size())); 00326 00327 // result = x - sign(x/y)*floor(abs(x/y))*y 00328 emit(ShStatement(t1, stmt.src[0], SH_OP_DIV, stmt.src[1])); 00329 m_instructions.push_back(ArbInst(SH_ARB_ABS, t2, t1)); 00330 emit(ShStatement(t1, SH_OP_SGN, t1)); 00331 m_instructions.push_back(ArbInst(SH_ARB_FLR, t2, t2)); 00332 m_instructions.push_back(ArbInst(SH_ARB_MUL, t1, t1, t2)); 00333 m_instructions.push_back(ArbInst(SH_ARB_MUL, t1, t1, stmt.src[1])); 00334 m_instructions.push_back(ArbInst(SH_ARB_SUB, stmt.dest, stmt.src[0], t1)); 00335 } 00336 00337 void ArbCode::emit_trig(const ShStatement& stmt) 00338 { 00339 ShVariableNode::ValueType c0_values[] = 00340 { 0.0, 0.5, 1.0, 0.0 }; 00341 ShVariable c0(new ShVariableNode(SH_CONST, 4)); 00342 c0.setValues(c0_values); 00343 ShVariableNode::ValueType c1_values[] = 00344 { 0.25, -9.0, 0.75, 1.0/(2.0*M_PI) }; 00345 ShVariable c1(new ShVariableNode(SH_CONST, 4)); 00346 c1.setValues(c1_values); 00347 ShVariableNode::ValueType c2_values[] = 00348 { 24.9808039603, -24.9808039603, -60.1458091736, 60.1458091736 }; 00349 ShVariable c2(new ShVariableNode(SH_CONST, 4)); 00350 c2.setValues(c2_values); 00351 ShVariableNode::ValueType c3_values[] = 00352 { 85.4537887573, -85.4537887573, -64.9393539429, 64.9393539429 }; 00353 ShVariable c3(new ShVariableNode(SH_CONST, 4)); 00354 c3.setValues(c3_values); 00355 ShVariableNode::ValueType c4_values[] = 00356 { 19.7392082214, -19.7392082214, -1.0, 1.0 }; 00357 ShVariable c4(new ShVariableNode(SH_CONST, 4)); 00358 c4.setValues(c4_values); 00359 m_shader->constants.push_back(c0.node()); 00360 m_shader->constants.push_back(c1.node()); 00361 m_shader->constants.push_back(c2.node()); 00362 m_shader->constants.push_back(c3.node()); 00363 m_shader->constants.push_back(c4.node()); 00364 00365 ShVariable r0(new ShVariableNode(SH_TEMP, 4)); 00366 ShVariable r1(new ShVariableNode(SH_TEMP, 4)); 00367 ShVariable r2(new ShVariableNode(SH_TEMP, 4)); 00368 ShVariable rs(new ShVariableNode(SH_TEMP, 4)); 00369 00370 if (stmt.op == SH_OP_SIN) { 00371 m_instructions.push_back(ArbInst(SH_ARB_MAD, rs, c1(3,3,3,3), stmt.src[0], -c1(0,0,0,0))); 00372 } else if (stmt.op == SH_OP_COS) { 00373 m_instructions.push_back(ArbInst(SH_ARB_MUL, rs, c1(3,3,3,3), stmt.src[0])); 00374 } else { 00375 SH_DEBUG_ASSERT(false); 00376 } 00377 m_instructions.push_back(ArbInst(SH_ARB_FRC, rs, rs)); 00378 for (int i = 0; i < stmt.src[0].size(); i++) { 00379 m_instructions.push_back(ArbInst(SH_ARB_SLT, r2(0), rs(i), c1(0))); 00380 m_instructions.push_back(ArbInst(SH_ARB_SGE, r2(1,2), rs(i,i), c1(1,2))); 00381 m_instructions.push_back(ArbInst(SH_ARB_DP3, r2(1), r2(0,1,2), c4(2,3,2))); 00382 m_instructions.push_back(ArbInst(SH_ARB_ADD, r0(0,1,2), -rs(i,i,i), c0(0,1,2))); 00383 m_instructions.push_back(ArbInst(SH_ARB_MUL, r0, r0, r0)); 00384 m_instructions.push_back(ArbInst(SH_ARB_MAD, r1, c2(0,1,0,1), r0, c2(2,3,2,3))); 00385 m_instructions.push_back(ArbInst(SH_ARB_MAD, r1, r1, r0, c3(0,1,0,1))); 00386 m_instructions.push_back(ArbInst(SH_ARB_MAD, r1, r1, r0, c3(2,3,2,3))); 00387 m_instructions.push_back(ArbInst(SH_ARB_MAD, r1, r1, r0, c4(0,1,0,1))); 00388 m_instructions.push_back(ArbInst(SH_ARB_MAD, r1, r1, r0, c4(2,3,2,3))); 00389 m_instructions.push_back(ArbInst(SH_ARB_DP3, r0(0), r1(0,1,2), -r2(0,1,2))); 00390 m_instructions.push_back(ArbInst(SH_ARB_MOV, stmt.dest(i), r0(0))); 00391 } 00392 } 00393 00394 void ArbCode::emit_invtrig(const ShStatement& stmt) 00395 { 00396 ShVariableNode::ValueType c0_values[] = 00397 { 0.0, 1.570796327, -0.5860008052, 0.5860008052 }; 00398 ShVariable c0(new ShVariableNode(SH_CONST, 4)); 00399 c0.setValues(c0_values); 00400 ShVariableNode::ValueType c1_values[] = 00401 { 1.571945105, -1.571945105, -1.669668977, 1.669668977 }; 00402 ShVariable c1(new ShVariableNode(SH_CONST, 4)); 00403 c1.setValues(c1_values); 00404 ShVariableNode::ValueType c2_values[] = 00405 { 0.8999841642, -0.8999841642, -0.6575341673, 0.6575341673 }; 00406 ShVariable c2(new ShVariableNode(SH_CONST, 4)); 00407 c2.setValues(c2_values); 00408 ShVariableNode::ValueType c3_values[] = 00409 { 1.012386649, -1.012386649, 0.9998421793, -0.9998421793 }; 00410 ShVariable c3(new ShVariableNode(SH_CONST, 4)); 00411 c3.setValues(c3_values); 00412 ShVariableNode::ValueType c4_values[] = 00413 { 1.0, -1.0, 1.0, -1.0 }; 00414 ShVariable c4(new ShVariableNode(SH_CONST, 4)); 00415 c4.setValues(c4_values); 00416 m_shader->constants.push_back(c0.node()); 00417 m_shader->constants.push_back(c1.node()); 00418 m_shader->constants.push_back(c2.node()); 00419 m_shader->constants.push_back(c3.node()); 00420 m_shader->constants.push_back(c4.node()); 00421 00422 ShVariable r0(new ShVariableNode(SH_TEMP, 4)); 00423 ShVariable r1(new ShVariableNode(SH_TEMP, 4)); 00424 ShVariable r2(new ShVariableNode(SH_TEMP, 4)); 00425 ShVariable offset(new ShVariableNode(SH_TEMP, 4)); 00426 ShVariable output(new ShVariableNode(SH_TEMP, stmt.dest.size())); 00427 m_instructions.push_back(ArbInst(SH_ARB_ABS, r0, stmt.src[0])); 00428 m_instructions.push_back(ArbInst(SH_ARB_MAD, offset, -r0, r0, c4(0,0,0,0))); 00429 00430 m_instructions.push_back(ArbInst(SH_ARB_MOV, r2, c0(0,1,0,1))); 00431 for (int i = 0; i < stmt.src[0].size(); i++) { 00432 m_instructions.push_back(ArbInst(SH_ARB_SLT, r2(1), stmt.src[0](i), c0(0))); 00433 m_instructions.push_back(ArbInst(SH_ARB_SGE, r2(0), stmt.src[0](i), c0(0))); 00434 m_instructions.push_back(ArbInst(SH_ARB_MAD, r1(0,1), c0(2,3), r0(i,i), c1(0,1))); 00435 m_instructions.push_back(ArbInst(SH_ARB_MAD, r1(0,1), r1(0,1), r0(i,i), c1(2,3))); 00436 m_instructions.push_back(ArbInst(SH_ARB_MAD, r1(0,1), r1(0,1), r0(i,i), c2(0,1))); 00437 m_instructions.push_back(ArbInst(SH_ARB_MAD, r1(0,1), r1(0,1), r0(i,i), c2(2,3))); 00438 m_instructions.push_back(ArbInst(SH_ARB_MAD, r1(0,1), r1(0,1), r0(i,i), c3(0,1))); 00439 m_instructions.push_back(ArbInst(SH_ARB_MAD, r1(0,1), r1(0,1), r0(i,i), c3(2,3))); 00440 m_instructions.push_back(ArbInst(SH_ARB_RSQ,offset(i), offset(i))); 00441 m_instructions.push_back(ArbInst(SH_ARB_RCP,offset(i), offset(i))); 00442 m_instructions.push_back(ArbInst(SH_ARB_MAD, r1(0,1), c4(1,0), offset(i,i), r1(0,1))); 00443 if (stmt.op == SH_OP_ACOS) { 00444 m_instructions.push_back(ArbInst(SH_ARB_DP3, output(i), r1(0,1,2), r2(0,1,2))); 00445 } else { 00446 m_instructions.push_back(ArbInst(SH_ARB_DP3, stmt.dest(i), r1(0,1,2), r2(0,1,2))); 00447 } 00448 } 00449 if (stmt.op == SH_OP_ACOS) { 00450 m_instructions.push_back(ArbInst(SH_ARB_ADD, stmt.dest, -output, c0(1,1,1,1))); 00451 } 00452 } 00453 00454 void ArbCode::emit_tan(const ShStatement& stmt) 00455 { 00456 ShVariable tmp1(new ShVariableNode(SH_TEMP, stmt.src[0].size())); 00457 ShVariable tmp2(new ShVariableNode(SH_TEMP, stmt.src[0].size())); 00458 00459 emit(ShStatement(tmp1, SH_OP_COS, stmt.src[0])); 00460 emit(ShStatement(tmp1, SH_OP_RCP, tmp1)); 00461 emit(ShStatement(tmp2, SH_OP_SIN, stmt.src[0])); 00462 00463 m_instructions.push_back(ArbInst(SH_ARB_MUL, stmt.dest, tmp1, tmp2)); 00464 } 00465 00466 void ArbCode::emit_exp(const ShStatement& stmt) 00467 { 00468 float basef = (stmt.op == SH_OP_EXP ? M_E : 10.0f); 00469 00470 ShVariable base(new ShVariableNode(SH_CONST, 1)); 00471 base.setValues(&basef); 00472 m_shader->constants.push_back(base.node()); 00473 00474 m_instructions.push_back(ArbInst(SH_ARB_POW, stmt.dest, base, stmt.src[0])); 00475 } 00476 00477 void ArbCode::emit_log(const ShStatement& stmt) 00478 { 00479 float scalef = 1.0/log2((stmt.op == SH_OP_LOG ? M_E : 10.0f)); 00480 00481 ShVariable scale(new ShVariableNode(SH_CONST, 1)); 00482 scale.setValues(&scalef); 00483 m_shader->constants.push_back(scale.node()); 00484 00485 ShVariable tmp(new ShVariableNode(SH_TEMP, stmt.dest.size())); 00486 00487 emit(ShStatement(tmp, SH_OP_LOG2, stmt.src[0])); 00488 m_instructions.push_back(ArbInst(SH_ARB_MUL, stmt.dest, tmp, scale)); 00489 } 00490 00491 void ArbCode::emit_norm(const ShStatement& stmt) 00492 { 00493 ShVariable tmp(new ShVariableNode(SH_TEMP, 1)); 00494 emit(ShStatement(tmp, stmt.src[0], SH_OP_DOT, stmt.src[0])); 00495 m_instructions.push_back(ArbInst(SH_ARB_RSQ, tmp, tmp)); 00496 m_instructions.push_back(ArbInst(SH_ARB_MUL, stmt.dest, tmp, stmt.src[0])); 00497 } 00498 00499 void ArbCode::emit_sgn(const ShStatement& stmt) 00500 { 00501 ShVariable tmp(new ShVariableNode(SH_TEMP, stmt.src[0].size())); 00502 m_instructions.push_back(ArbInst(SH_ARB_ABS, tmp, stmt.src[0])); 00503 emit(ShStatement(stmt.dest, stmt.src[0], SH_OP_DIV, tmp)); 00504 } 00505 00506 void ArbCode::emit_tex(const ShStatement& stmt) 00507 { 00508 bool delay = false; 00509 ShVariable tmpdest; 00510 ShVariable tmpsrc; 00511 00512 if (!stmt.dest.swizzle().identity()) { 00513 tmpdest = ShVariable(new ShVariableNode(SH_TEMP, 4)); 00514 tmpsrc = tmpdest; 00515 delay = true; 00516 } 00517 00518 ShTextureNodePtr tnode = shref_dynamic_cast<ShTextureNode>(stmt.src[0].node()); 00519 00520 SH_DEBUG_ASSERT(tnode); 00521 00522 if (tnode->size() == 2) { 00523 // Special case for LUMINANCE_ALPHA 00524 if (!delay) { 00525 tmpdest = ShVariable(new ShVariableNode(SH_TEMP, 4)); 00526 tmpsrc = tmpdest; 00527 } 00528 tmpsrc = tmpsrc(0,3); 00529 delay = true; 00530 } 00531 00532 if (stmt.op == SH_OP_TEXD) { 00533 SH_DEBUG_ASSERT(tnode->dims() == SH_TEXTURE_2D); 00534 m_instructions.push_back(ArbInst(SH_ARB_TXD, 00535 (delay ? tmpdest : stmt.dest), stmt.src[1], stmt.src[0], 00536 stmt.src[2](0,1), stmt.src[2](2,3))); 00537 } else { 00538 m_instructions.push_back(ArbInst(SH_ARB_TEX, 00539 (delay ? tmpdest : stmt.dest), stmt.src[1], stmt.src[0])); 00540 } 00541 if (delay) emit(ShStatement(stmt.dest, SH_OP_ASN, tmpsrc)); 00542 } 00543 00544 void ArbCode::emit_nvcond(const ShStatement& stmt) 00545 { 00546 00547 ShVariable dummy(new ShVariableNode(SH_TEMP, stmt.src[0].size())); 00548 ArbInst updatecc(SH_ARB_MOV, dummy, stmt.src[0]); 00549 updatecc.update_cc = true; 00550 m_instructions.push_back(updatecc); 00551 00552 /* 00553 ShSwizzle ccswiz = stmt.src[0].swizzle(); 00554 if (ccswiz.size() == 1) { 00555 int indices[4]; 00556 for (int i = 0; i < stmt.dest.size(); i++) { 00557 indices[i] = 0; 00558 } 00559 ccswiz *= ShSwizzle(1, stmt.dest.size(), indices); 00560 } 00561 */ 00562 if (stmt.dest != stmt.src[1]) { 00563 ArbInst movt(SH_ARB_MOV, stmt.dest, stmt.src[1]); 00564 movt.ccode = ArbInst::GT; 00565 movt.ccswiz = stmt.src[0].swizzle(); 00566 m_instructions.push_back(movt); 00567 } 00568 if (stmt.dest != stmt.src[2]) { 00569 ArbInst movf(SH_ARB_MOV, stmt.dest, stmt.src[2]); 00570 movf.ccode = ArbInst::LE; 00571 movf.ccswiz = stmt.src[0].swizzle(); 00572 m_instructions.push_back(movf); 00573 } 00574 } 00575 00576 void ArbCode::emit_csum(const ShStatement& stmt) 00577 { 00578 ShVariableNode::ValueType* c1_values = new ShVariableNode::ValueType[stmt.src[0].size()]; 00579 for (int i = 0; i < stmt.src[0].size(); i++) c1_values[i] = 1.0; 00580 ShVariable c1(new ShVariableNode(SH_CONST, stmt.src[0].size())); 00581 c1.setValues(c1_values); 00582 m_shader->constants.push_back(c1.node()); 00583 00584 emit(ShStatement(stmt.dest, stmt.src[0], SH_OP_DOT, c1)); 00585 } 00586 00587 void ArbCode::emit_cmul(const ShStatement& stmt) 00588 { 00589 ShVariable prod(new ShVariableNode(SH_TEMP, 1)); 00590 00591 // TODO: Could use vector mul here. 00592 00593 m_instructions.push_back(ArbInst(SH_ARB_MOV, prod, stmt.src[0](0))); 00594 for (int i = 1; i < stmt.src[0].size(); i++) { 00595 m_instructions.push_back(ArbInst(SH_ARB_MUL, prod, stmt.src[0](i))); 00596 } 00597 m_instructions.push_back(ArbInst(SH_ARB_MOV, stmt.dest, prod)); 00598 } 00599 00600 void ArbCode::emit_kil(const ShStatement& stmt) 00601 { 00602 m_instructions.push_back(ArbInst(SH_ARB_KIL, -stmt.src[0])); 00603 } 00604 00605 }

Generated on Mon Oct 18 14:17:38 2004 for Sh by doxygen 1.3.7