Main Page | Modules | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

ShSmBackend.cpp

00001 // Sh: A GPU metaprogramming language. 00002 // 00003 // Copyright (c) 2003 University of Waterloo Computer Graphics Laboratory 00004 // Project administrator: Michael D. McCool 00005 // Authors: Zheng Qin, Stefanus Du Toit, Kevin Moule, Tiberiu S. Popa, 00006 // Michael D. McCool 00007 // 00008 // This software is provided 'as-is', without any express or implied 00009 // warranty. In no event will the authors be held liable for any damages 00010 // arising from the use of this software. 00011 // 00012 // Permission is granted to anyone to use this software for any purpose, 00013 // including commercial applications, and to alter it and redistribute it 00014 // freely, subject to the following restrictions: 00015 // 00016 // 1. The origin of this software must not be misrepresented; you must 00017 // not claim that you wrote the original software. If you use this 00018 // software in a product, an acknowledgment in the product documentation 00019 // would be appreciated but is not required. 00020 // 00021 // 2. Altered source versions must be plainly marked as such, and must 00022 // not be misrepresented as being the original software. 00023 // 00024 // 3. This notice may not be removed or altered from any source 00025 // distribution. 00027 #include "ShSmBackend.hpp" 00028 #include <iostream> 00029 #include <sstream> 00030 #include "ShEnvironment.hpp" 00031 #include "ShContext.hpp" 00032 #include "ShDebug.hpp" 00033 #include "ShError.hpp" 00034 #include "ShOptimizations.hpp" 00035 #include "ShLinearAllocator.hpp" 00036 #include "ShCtrlGraph.hpp" 00037 #include "ShVariable.hpp" 00038 #include "ShStream.hpp" 00039 00040 namespace ShSm { 00041 00042 const int SmMaxTR = 256; 00043 00044 using namespace SH; 00045 00046 static SH::ShPointer<Backend> instance = new Backend(); 00047 static std::string* opNames; 00048 00049 void initOps(); 00050 00051 std::string SmRegister::print() const 00052 { 00053 std::ostringstream stream; 00054 switch(type) { 00055 case SHSM_REG_INPUT: 00056 stream << "iR"; 00057 break; 00058 case SHSM_REG_OUTPUT: 00059 stream << "oR"; 00060 break; 00061 case SHSM_REG_TEMP: 00062 stream << "tR"; 00063 break; 00064 case SHSM_REG_CONST: 00065 stream << "cR"; 00066 break; 00067 case SHSM_REG_TEXTURE: 00068 stream << index; 00069 return stream.str(); 00070 } 00071 stream << "[" << index << "]"; 00072 00073 return stream.str(); 00074 } 00075 00076 BackendCode::BackendCode(ShPointer<Backend> backend, const ShProgram& shader, 00077 const std::string& target) 00078 : m_backend(backend), m_shader(shader), m_originalShader(shader), 00079 m_smShader(0), m_target(target), 00080 m_maxCR(0), m_maxTR(0), m_maxIR(0), m_maxOR(0), m_maxTex(0), 00081 m_cR(0), m_tR(0), m_iR(0), m_oR(0) 00082 { 00083 } 00084 00085 BackendCode::~BackendCode() 00086 { 00087 delete [] m_cR; 00088 delete [] m_tR; 00089 delete [] m_iR; 00090 delete [] m_oR; 00091 00092 for (TextureNodeMap::iterator I = m_textureMap.begin(); I != m_textureMap.end(); ++I) { 00093 smDeleteTexture(I->second); 00094 } 00095 } 00096 00097 bool BackendCode::allocateRegister(const SH::ShVariableNodePtr& var) 00098 { 00099 if (!var) return true; 00100 if (var->kind() != SH_TEMP) return true; // ignore anything but temporaries 00101 if (var->uniform()) return true; 00102 00103 if (m_tempRegs.empty()) { 00104 SH_DEBUG_WARN("Oh no, out of registers!"); 00105 return false; 00106 } 00107 00108 int idx = m_tempRegs.front(); 00109 m_tempRegs.pop_front(); 00110 00111 if (idx + 1 > m_maxTR) m_maxTR = idx + 1; 00112 m_registers[var] = SmRegister(SHSM_REG_TEMP, idx); 00113 return true; 00114 } 00115 00116 void BackendCode::freeRegister(const SH::ShVariableNodePtr& var) 00117 { 00118 if (!var) return; 00119 if (var->kind() != SH_TEMP) return; // ignore anything but temporaries 00120 if (var->uniform()) return; 00121 00122 if (m_registers.find(var) == m_registers.end()) { 00123 shError( ShBackendException( "Cannot find register to free" ) ); 00124 } 00125 m_tempRegs.push_front(m_registers[var].index); 00126 } 00127 00128 int shSmTarget(const std::string& target) 00129 { 00130 if (target == "gpu:vertex") return 0; 00131 if (target == "gpu:fragment") return 1; 00132 return -1; 00133 } 00134 00135 void BackendCode::upload() 00136 { 00137 SH_DEBUG_PRINT("Uploading shader"); 00138 m_smShader = smDeclareShader(shSmTarget(m_target)); 00139 smShaderBegin(m_smShader); 00140 00141 for (int i = 0; i < m_maxTR; i++) m_tR[i] = smReg(); 00142 for (int i = 0; i < m_maxIR; i++) m_iR[i] = smInputReg(i); 00143 for (int i = 0; i < m_maxOR; i++) m_oR[i] = smOutputReg(i); 00144 for (int i = 0; i < m_maxCR; i++) m_cR[i] = smConstantReg(i); 00145 00146 int i = 0; 00147 for (SmInstList::const_iterator I = m_instructions.begin(); I != m_instructions.end(); 00148 ++I) { 00149 if (I->op == OP_TEX) { 00150 SH_DEBUG_PRINT("Adding an OP_TEX"); 00151 if (getReg(I->src2.node()).type != SHSM_REG_TEXTURE) { 00152 shError( ShBackendException( "src regster for OP_TEX is not a texture register" ) ); 00153 } 00154 smInstr(OP_TEX, getSmReg(I->dest), getSmReg(I->src1), getReg(I->src2.node()).index); 00155 } else if (I->src1.null()) { 00156 smInstr(I->op, getSmReg(I->dest)); 00157 } else if (I->src2.null()) { 00158 smInstr(I->op, getSmReg(I->dest), getSmReg(I->src1)); 00159 } else if (I->src3.null()) { 00160 smInstr(I->op, getSmReg(I->dest), getSmReg(I->src1), getSmReg(I->src2)); 00161 } else { 00162 smInstr(I->op, getSmReg(I->dest), getSmReg(I->src1), getSmReg(I->src2), getSmReg(I->src3)); 00163 } 00164 i++; 00165 } 00166 SH_DEBUG_PRINT(i << " instructions uploaded."); 00167 smShaderEnd(); 00168 } 00169 00170 void BackendCode::bind() 00171 { 00172 if (!m_smShader) upload(); 00173 00174 SH_DEBUG_PRINT("Binding shader"); 00175 00176 smBindShader(m_smShader); 00177 SH::ShEnvironment::boundShaders()[m_target] = m_originalShader; 00178 00179 // Initialize constants 00180 for (RegMap::const_iterator I = m_registers.begin(); I != m_registers.end(); ++I) { 00181 ShVariableNodePtr node = I->first; 00182 SmRegister reg = I->second; 00183 if (node->hasValues() && reg.type == SHSM_REG_CONST) { 00184 float values[4]; 00185 int i; 00186 for (i = 0; i < node->size(); i++) { 00187 values[i] = node->getValue(i); 00188 } 00189 for (; i < 4; i++) { 00190 values[i] = 0.0; 00191 } 00192 smModLocalConstant(shSmTarget(m_target), reg.index, smTuple(values[0], values[1], values[2], values[3])); 00193 } 00194 } 00195 00196 SH_DEBUG_PRINT("Uploading textures..."); 00197 for (ShProgramNode::VarList::const_iterator I = m_shader->textures.begin(); I != m_shader->textures.end(); 00198 ++I) { 00199 ShDataTextureNodePtr texture = *I; 00200 if (!texture) { 00201 SH_DEBUG_WARN((*I)->name() << " is not a valid texture!"); 00202 continue; 00203 } 00204 if (m_textureMap.find(texture) == m_textureMap.end()) { 00205 ShDataMemoryObjectPtr texmem = texture->mem(); 00206 if( !texmem ) { 00207 SH_DEBUG_WARN((*I)->name() << " has invalid texture data!"); 00208 continue; 00209 } 00210 // TODO: Other types of textures. 00211 00212 m_textureMap[texture] = smNewTexture2DRect(texture->width(), texture->height(), 00213 texture->elements(), SM_FLOAT); 00214 SMtexture smTex = m_textureMap[texture]; 00215 smTexImage2D(smTex, 0, const_cast<float*>(texmem->data())); 00216 00217 // TODO: Capabilities 00218 smTexWrapS(smTex, SM_REPEAT); 00219 smTexWrapT(smTex, SM_REPEAT); 00220 } else { 00221 SH_DEBUG_PRINT("Texture already allocated"); 00222 } 00223 SH_DEBUG_PRINT("Binding texture " << m_textureMap[texture] << " to texture unit " << getReg(texture).index); 00224 smBindTexture(shSmTarget(m_target), getReg(texture).index, m_textureMap[texture]); 00225 } 00226 } 00227 00228 std::string BackendCode::printVar(const ShVariable& var) 00229 { 00230 const char* swizChars = "xyzw"; 00231 00232 std::string out; 00233 00234 if (var.neg()) out += "-"; 00235 00236 out += getReg(var.node()).print(); 00237 00238 if (var.node()->kind() == SH_TEXTURE) return out; 00239 00240 if (var.swizzle().size()) { 00241 out += "[\""; 00242 for (int i = 0; i < std::min(var.swizzle().size(), 4); i++) { 00243 out += swizChars[var.swizzle()[i]]; 00244 } 00245 out += "\"]"; 00246 } else { 00247 if (var.size() < 4) { 00248 out += "[\""; 00249 for (int i = 0; i < var.size(); i++) out += swizChars[i]; 00250 out += "\"]"; 00251 } 00252 } 00253 return out; 00254 } 00255 00256 void BackendCode::updateUniform(const ShVariableNodePtr& uniform) 00257 { 00258 if (!haveReg(uniform)) return; 00259 00260 SmRegister reg = getReg(uniform); 00261 00262 float values[4]; 00263 int i; 00264 for (i = 0; i < uniform->size(); i++) { 00265 values[i] = uniform->getValue(i); 00266 } 00267 for (; i < 4; i++) { 00268 values[i] = 0.0; 00269 } 00270 smModLocalConstant(shSmTarget(m_target), reg.index, smTuple(values[0], values[1], values[2], values[3])); 00271 } 00272 00273 void BackendCode::generate() { 00274 // Transform code to be ARB_fragment_program compatible 00275 m_shader = cloneProgram(m_originalShader); 00276 ShEnvironment::shader = m_shader; 00277 ShTransformer transform(m_shader); 00278 00279 transform.convertInputOutput(); 00280 transform.splitTuples(4, m_splits); 00281 00282 if(transform.changed()) { 00283 optimize(m_shader); 00284 m_shader->collectVariables(); 00285 } else { 00286 m_shader = m_originalShader; 00287 ShEnvironment::shader = m_shader; 00288 } 00289 00290 shader->ctrlGraph->entry->clearMarked(); 00291 generateNode(code, entry); 00292 shader->ctrlGraph->entry->clearMarked(); 00293 00294 code->allocRegs(); 00295 } 00296 00297 std::ostream& BackendCode::print(std::ostream& out) 00298 { 00299 out << "SMshader shader = smDeclareShader(" << shSmTarget(m_target) << ");" << std::endl; 00300 out << "{" << std::endl; 00301 out << "smShaderBegin(shader);" << std::endl; 00302 out << std::endl; 00303 out << "// Register declarations" << std::endl; 00304 out << "SMreg tR[" << m_maxTR << "];" << std::endl; 00305 out << "for (int i = 0; i < " << m_maxTR << "; i++) tR[i] = smReg();" << std::endl; 00306 out << "SMreg iR[" << m_maxIR << "];" << std::endl; 00307 for (int i = 0; i < m_maxIR; i++) out << "iR[" << i << "] = smInputReg(" << i << ");" << std::endl; 00308 out << "SMreg oR[" << m_maxOR << "];" << std::endl; 00309 for (int i = 0; i < m_maxOR; i++) out << "oR[" << i << "] = smOutputReg(" << i << ");" << std::endl; 00310 out << "SMreg cR[" << m_maxCR << "];" << std::endl; 00311 for (int i = 0; i < m_maxCR; i++) out << "cR[" << i << "] = smConstantReg(" << i << ");" << std::endl; 00312 00313 out << std::endl; 00314 out << "// Shader body" << std::endl; 00315 #ifdef SH_DEBUG 00316 int i = 0; 00317 #endif 00318 for (SmInstList::const_iterator I = m_instructions.begin(); I != m_instructions.end(); 00319 ++I) { 00320 out << "sm" << opNames[I->op] << "(" << printVar(I->dest); 00321 if (!I->src1.null()) { 00322 out << ", " << printVar(I->src1); 00323 } 00324 if (!I->src2.null()) { 00325 out << ", " << printVar(I->src2); 00326 } 00327 if (!I->src3.null()) { 00328 out << ", " << printVar(I->src3); 00329 } 00330 out << ");"; 00331 #ifdef SH_DEBUG 00332 out << " // PC = " << i++; 00333 #endif 00334 out << std::endl; 00335 } 00336 out << std::endl; 00337 out << "smShaderEnd();" << std::endl; 00338 out << "}" << std::endl; 00339 00340 out << std::endl; 00341 out << "// Initialize constant registers" << std::endl; 00342 00343 // Set constants. 00344 // This should really only happen at bind time. 00345 // The question is: how useful is printing the code out in the 00346 // first place if the constant values aren't really accessible. 00347 // I guess printing out the code is more of a debugging tool than anything. 00348 for (RegMap::const_iterator I = m_registers.begin(); I != m_registers.end(); ++I) { 00349 ShVariableNodePtr node = I->first; 00350 SmRegister reg = I->second; 00351 if (node->hasValues() && reg.type == SHSM_REG_CONST) { 00352 out << "smModLocalConstant(" << shSmTarget(m_target) << ", " << reg.index << ", smTuple("; 00353 for (int i = 0; i < node->size(); i++) { 00354 if (i) out << ", "; 00355 out << node->getValue(i); 00356 } 00357 out << "));" << std::endl; 00358 } 00359 } 00360 return out; 00361 } 00362 00363 std::ostream& BackendCode::printInputOutputFormat(std::ostream& out) { 00364 // TODO implement this 00365 return out; 00366 } 00367 00368 void BackendCode::genScalarVectorInst( SH::ShVariable dest, SH::ShVariable op1, 00369 SH::ShVariable op2, Operation opcode ) { 00370 if (op1.size() != 1 || op2.size() != 1) { 00371 if (op1.size() == 1) { 00372 int* swizzle = new int[op2.size()]; 00373 for (int i = 0; i < op2.size(); i++) swizzle[i] = 0; 00374 m_instructions.push_back(SmInstruction((Operation)opcode, dest, 00375 op1(op2.size(), swizzle), op2)); 00376 delete [] swizzle; 00377 return; 00378 } else if (op2.size() == 1) { 00379 int* swizzle = new int[op1.size()]; 00380 for (int i = 0; i < op1.size(); i++) swizzle[i] = 0; 00381 m_instructions.push_back(SmInstruction((Operation)opcode, dest, op1, 00382 op2(op1.size(), swizzle))); 00383 delete [] swizzle; 00384 return; 00385 } 00386 } 00387 m_instructions.push_back(SmInstruction((Operation)opcode, dest, op1, op2)); 00388 } 00389 00390 void BackendCode::addBasicBlock(const ShBasicBlockPtr& block) 00391 { 00392 for (ShBasicBlock::ShStmtList::const_iterator I = block->begin(); 00393 I != block->end(); ++I) { 00394 const ShStatement& stmt = *I; 00395 switch (stmt.op) { 00396 // TODO: Check number and dimensions of args! 00397 case SH_OP_ASN: 00398 m_instructions.push_back(SmInstruction(OP_MOV, stmt.dest, stmt.src[0])); 00399 break; 00400 case SH_OP_ADD: 00401 m_instructions.push_back(SmInstruction(OP_ADD, stmt.dest, stmt.src[0], stmt.src[1])); 00402 break; 00403 case SH_OP_MUL: 00404 genScalarVectorInst(stmt.dest, stmt.src[0], stmt.src[1], OP_MUL); 00405 case SH_OP_DIV: 00406 { 00407 ShVariable rcp(new ShVariableNode(SH_TEMP, stmt.src[1].size())); 00408 m_instructions.push_back(SmInstruction(OP_RCP, rcp, stmt.src[1])); 00409 00410 if (rcp.size() == 1 && stmt.src[0].size() != 1) { 00411 int* swizzle = new int[stmt.src[0].size()]; 00412 for (int i = 0; i < stmt.src[0].size(); i++) swizzle[i] = 0; 00413 m_instructions.push_back(SmInstruction(OP_MUL, stmt.dest, stmt.src[0], 00414 rcp(stmt.src[0].size(), swizzle))); 00415 delete [] swizzle; 00416 } else { 00417 m_instructions.push_back(SmInstruction(OP_MUL, stmt.dest, stmt.src[0], rcp)); 00418 } 00419 break; 00420 } 00421 case SH_OP_SLT: 00422 genScalarVectorInst(stmt.dest, stmt.src[0], stmt.src[1], OP_SLT); 00423 break; 00424 case SH_OP_SLE: 00425 genScalarVectorInst(stmt.dest, stmt.src[0], stmt.src[1], OP_SLE); 00426 break; 00427 case SH_OP_SGT: 00428 genScalarVectorInst(stmt.dest, stmt.src[0], stmt.src[1], OP_SGT); 00429 break; 00430 case SH_OP_SGE: 00431 genScalarVectorInst(stmt.dest, stmt.src[0], stmt.src[1], OP_SGE); 00432 break; 00433 case SH_OP_SEQ: 00434 genScalarVectorInst(stmt.dest, stmt.src[0], stmt.src[1], OP_SEQ); 00435 break; 00436 case SH_OP_SNE: 00437 genScalarVectorInst(stmt.dest, stmt.src[0], stmt.src[1], OP_SNE); 00438 break; 00439 case SH_OP_ABS: 00440 m_instructions.push_back(SmInstruction(OP_ABS, stmt.dest, stmt.src[0])); 00441 break; 00442 case SH_OP_CEIL: 00443 m_instructions.push_back(SmInstruction(OP_CEIL, stmt.dest, stmt.src[0])); 00444 break; 00445 case SH_OP_COS: 00446 m_instructions.push_back(SmInstruction(OP_COS, stmt.dest, stmt.src[0])); 00447 break; 00448 case SH_OP_DOT: 00449 { 00450 ShVariable left = stmt.src[0]; 00451 ShVariable right = stmt.src[1]; 00452 00453 // expand left/right if they are scalar 00454 if( left.size() < right.size() ) { 00455 int *swizzle = new int[ right.size() ]; 00456 for( int i = 0; i < right.size(); ++i ) swizzle[i] = 0; 00457 left = left( right.size(), swizzle ); 00458 delete swizzle; 00459 } else if( right.size() < left.size() ) { 00460 int *swizzle = new int[ left.size() ]; 00461 for( int i = 0; i < left.size(); ++i ) swizzle[i] = 0; 00462 right = right( left.size(), swizzle ); 00463 delete swizzle; 00464 } 00465 00466 if (left.size() == 3) { 00467 m_instructions.push_back(SmInstruction(OP_DP3, stmt.dest, left, right)); 00468 } else if (left.size() == 4) { 00469 m_instructions.push_back(SmInstruction(OP_DP4, stmt.dest, left, right)); 00470 } else if (left.size() == 1) { 00471 m_instructions.push_back(SmInstruction(OP_MUL, stmt.dest, left, right)); 00472 } else { 00473 ShVariable mul(new ShVariableNode(SH_TEMP, left.size())); 00474 m_instructions.push_back(SmInstruction(OP_MUL, mul, left, right)); 00475 m_instructions.push_back(SmInstruction(OP_ADD, stmt.dest, mul(0), mul(1))); 00476 for (int i = 2; i < left.size(); i++) { 00477 m_instructions.push_back(SmInstruction(OP_ADD, stmt.dest, stmt.dest, mul(i))); 00478 } 00479 } 00480 break; 00481 } 00482 case SH_OP_FLR: 00483 m_instructions.push_back(SmInstruction(OP_FLR, stmt.dest, stmt.src[0])); 00484 break; 00485 case SH_OP_FRAC: 00486 m_instructions.push_back(SmInstruction(OP_FRC, stmt.dest, stmt.src[0])); 00487 break; 00488 case SH_OP_LRP: 00489 { 00490 if (stmt.src[0].size() == 1 && stmt.src[1].size() != 1) { 00491 int* swizzle = new int[stmt.src[1].size()]; 00492 for (int i = 0; i < stmt.src[1].size(); i++) swizzle[i] = 0; 00493 m_instructions.push_back(SmInstruction(OP_LRP, stmt.dest, stmt.src[0](stmt.src[1].size(),swizzle), stmt.src[1], stmt.src[2])); 00494 delete [] swizzle; 00495 } else { 00496 m_instructions.push_back(SmInstruction(OP_LRP, stmt.dest, stmt.src[0], stmt.src[1], stmt.src[2])); 00497 } 00498 } 00499 break; 00500 case SH_OP_MAD: 00501 { 00502 if (stmt.src[0].size() != 1 || stmt.src[1].size() != 1) { 00503 if (stmt.src[0].size() == 1) { 00504 int* swizzle = new int[stmt.src[1].size()]; 00505 for (int i = 0; i < stmt.src[1].size(); i++) swizzle[i] = 0; 00506 m_instructions.push_back(SmInstruction(OP_MAD, stmt.dest, 00507 stmt.src[0](stmt.src[1].size(), swizzle), stmt.src[1], stmt.src[2])); 00508 delete [] swizzle; 00509 break; 00510 } else if (stmt.src[1].size() == 1) { 00511 int* swizzle = new int[stmt.src[0].size()]; 00512 for (int i = 0; i < stmt.src[0].size(); i++) swizzle[i] = 0; 00513 m_instructions.push_back(SmInstruction(OP_MAD, stmt.dest, stmt.src[0], 00514 stmt.src[1](stmt.src[0].size(), swizzle), stmt.src[2])); 00515 delete [] swizzle; 00516 break; 00517 } 00518 } 00519 00520 m_instructions.push_back(SmInstruction(OP_MAD, stmt.dest, stmt.src[0], stmt.src[1], stmt.src[2])); 00521 break; 00522 } 00523 00524 case SH_OP_MAX: 00525 m_instructions.push_back(SmInstruction(OP_MAX, stmt.dest, stmt.src[0], stmt.src[1])); 00526 break; 00527 case SH_OP_MIN: 00528 m_instructions.push_back(SmInstruction(OP_MIN, stmt.dest, stmt.src[0], stmt.src[1])); 00529 break; 00530 case SH_OP_MOD: 00531 if (stmt.src[1].size() == 1 && stmt.src[0].size() != 1) { 00532 int* swizzle = new int[stmt.src[0].size()]; 00533 for (int i = 0; i < stmt.src[0].size(); i++) swizzle[i] = 0; 00534 m_instructions.push_back(SmInstruction(OP_FMOD, stmt.dest, stmt.src[0], 00535 stmt.src[1](stmt.src[0].size(), swizzle))); 00536 delete [] swizzle; 00537 } else { 00538 m_instructions.push_back(SmInstruction(OP_FMOD, stmt.dest, stmt.src[0], stmt.src[1])); 00539 } 00540 break; 00541 case SH_OP_POW: 00542 for (int i = 0; i < stmt.src[0].size(); i++) { 00543 m_instructions.push_back(SmInstruction(OP_POW, stmt.dest(i), stmt.src[0](i), stmt.src[1](i))); 00544 } 00545 break; 00546 case SH_OP_SIN: 00547 m_instructions.push_back(SmInstruction(OP_SIN, stmt.dest, stmt.src[0])); 00548 break; 00549 case SH_OP_SQRT: 00550 { 00551 int rsize = stmt.src[0].size(); 00552 ShVariable rsq(new ShVariableNode(SH_TEMP, rsize)); 00553 m_instructions.push_back(SmInstruction(OP_RSQ, rsq, stmt.src[0])); 00554 m_instructions.push_back(SmInstruction(OP_RCP, stmt.dest, rsq)); 00555 break; 00556 } 00557 case SH_OP_NORM: 00558 m_instructions.push_back(SmInstruction(OP_NORM, stmt.dest, stmt.src[0])); 00559 break; 00560 case SH_OP_TEX: 00561 { 00562 ShTextureNodePtr texture = stmt.src[0].node(); 00563 if (!texture) break; 00564 // TODO: Check texture for not mipmapped 00565 00566 /* 00567 // Scale the lookup as necessary 00568 ShVariableNodePtr scale = new ShVariableNode(SH_CONST, stmt.src[1].size()); 00569 00570 scale->setValue(0, texture->width()); 00571 if (stmt.src[1].size() >= 2) scale->setValue(1, texture->height()); 00572 00573 ShVariable scaled(new ShVariableNode(SH_TEMP, stmt.src[1].size())); 00574 m_instructions.push_back(SmInstruction(OP_MUL, scaled, stmt.src[1], ShVariable(scale))); 00575 */ 00576 m_instructions.push_back(SmInstruction(OP_TEX, stmt.dest, stmt.src[1], stmt.src[0])); 00577 break; 00578 } 00579 case SH_OP_XPD: 00580 m_instructions.push_back(SmInstruction(OP_XPD, stmt.dest, stmt.src[0], stmt.src[1])); 00581 break; 00582 case SH_OP_COND: 00583 if (stmt.src[0].size() == 1 && stmt.src[1].size() != 1) { 00584 int* swizzle = new int[stmt.src[1].size()]; 00585 for (int i = 0; i < stmt.src[1].size(); i++) swizzle[i] = 0; 00586 m_instructions.push_back(SmInstruction(OP_CMP, stmt.dest, -stmt.src[0](stmt.src[1].size(), swizzle), 00587 stmt.src[1], stmt.src[2])); 00588 delete [] swizzle; 00589 } else { 00590 m_instructions.push_back(SmInstruction(OP_CMP, stmt.dest, -stmt.src[0], stmt.src[1], stmt.src[2])); 00591 } 00592 break; 00593 default: 00594 // TODO: other ops 00595 SH_DEBUG_WARN(opInfo[stmt.op].name << " not implement in SM backend"); 00596 break; 00597 } 00598 } 00599 } 00600 00601 void BackendCode::allocRegs() 00602 { 00603 for (ShProgramNode::VarList::const_iterator I = m_shader->inputs.begin(); 00604 I != m_shader->inputs.end(); ++I) { 00605 getReg(*I); 00606 } 00607 for (ShProgramNode::VarList::const_iterator I = m_shader->outputs.begin(); 00608 I != m_shader->outputs.end(); ++I) { 00609 getReg(*I); 00610 } 00611 00612 m_tempRegs.clear(); 00613 m_maxTR = 0; 00614 for (int i = 0; i < SmMaxTR; i++) { 00615 m_tempRegs.push_back(i); 00616 } 00617 00618 ShLinearAllocator allocator(ShBackendCodePtr(this)); 00619 00620 for (std::size_t i = 0; i < m_instructions.size(); i++) { 00621 SmInstruction instr = m_instructions[i]; 00622 getReg(instr.dest.node()); 00623 allocator.mark(instr.dest.node(), i); 00624 getReg(instr.src1.node()); 00625 allocator.mark(instr.src1.node(), i); 00626 getReg(instr.src2.node()); 00627 allocator.mark(instr.src2.node(), i); 00628 getReg(instr.src3.node()); 00629 allocator.mark(instr.src3.node(), i); 00630 } 00631 allocator.allocate(); 00632 00633 m_tempRegs.clear(); 00634 00635 m_tR = new SMreg[m_maxTR]; 00636 m_cR = new SMreg[m_maxCR]; 00637 m_iR = new SMreg[m_maxIR]; 00638 m_oR = new SMreg[m_maxOR]; 00639 } 00640 00641 bool BackendCode::haveReg(const SH::ShVariableNodePtr& var) 00642 { 00643 return m_registers.find(var) != m_registers.end(); 00644 } 00645 00646 SmRegister BackendCode::getReg(const SH::ShVariableNodePtr& var) 00647 { 00648 if (!var) return SmRegister(SHSM_REG_TEMP, -1); 00649 RegMap::const_iterator I = m_registers.find(var); 00650 if (I != m_registers.end()) return I->second; 00651 00652 if (var->uniform() && var->kind() != SH_TEXTURE) { 00653 m_registers[var] = SmRegister(SHSM_REG_CONST, m_maxCR++); 00654 return m_registers[var]; 00655 } 00656 00657 if (var->kind() == SH_TEMP) return SmRegister(SHSM_REG_TEMP, -1); 00658 00659 switch (var->kind()) { 00660 case SH_INPUT: 00661 m_registers[var] = SmRegister(SHSM_REG_INPUT, m_maxIR++); 00662 break; 00663 case SH_OUTPUT: 00664 m_registers[var] = SmRegister(SHSM_REG_OUTPUT, m_maxOR++); 00665 break; 00666 case SH_TEMP: 00667 break; 00668 case SH_CONST: 00669 m_registers[var] = SmRegister(SHSM_REG_CONST, m_maxCR++); 00670 break; 00671 case SH_TEXTURE: 00672 SH_DEBUG_PRINT("Allocating texture unit " << m_maxTex); 00673 m_registers[var] = SmRegister(SHSM_REG_TEXTURE, m_maxTex++); 00674 break; 00675 } 00676 return m_registers[var]; 00677 } 00678 00679 SMreg BackendCode::getSmReg(const SH::ShVariable& var) 00680 { 00681 SmRegister reg = getReg(var.node()); 00682 if (reg.index < 0) { 00683 SH_DEBUG_WARN("Could not obtain register!"); 00684 return SMreg(); // TODO: Something better? 00685 } 00686 00687 SMreg smReg; 00688 // Get the storage register 00689 switch (reg.type) { 00690 case SHSM_REG_INPUT: 00691 smReg = m_iR[reg.index]; 00692 break; 00693 case SHSM_REG_OUTPUT: 00694 smReg = m_oR[reg.index]; 00695 break; 00696 case SHSM_REG_TEMP: 00697 smReg = m_tR[reg.index]; 00698 break; 00699 case SHSM_REG_CONST: 00700 smReg = m_cR[reg.index]; 00701 break; 00702 case SHSM_REG_TEXTURE: 00703 return SMreg(); 00704 break; 00705 default: 00706 SH_DEBUG_WARN("Unknown register type " << (int)reg.type); 00707 break; 00708 } 00709 00710 // Swizzling 00711 const char* swizChars = "xyzw"; 00712 std::string swizzle; 00713 if (var.swizzle().size()) { 00714 for (int i = 0; i < std::min(var.swizzle().size(), 4); i++) { 00715 swizzle += swizChars[var.swizzle()[i]]; 00716 } 00717 } else { 00718 if (var.size() < 4) { 00719 for (int i = 0; i < var.size(); i++) swizzle += swizChars[i]; 00720 } 00721 } 00722 if (!swizzle.empty()) smReg = smReg[swizzle.c_str()]; 00723 00724 // Negation 00725 if (var.neg()) smReg = -smReg; 00726 00727 return smReg; 00728 } 00729 00730 Backend::Backend() 00731 { 00732 SH_DEBUG_PRINT("SM Backend loaded."); 00733 initOps(); 00734 } 00735 00736 Backend::~Backend() 00737 { 00738 delete opNames; 00739 SH_DEBUG_PRINT("SM Backend unloaded."); 00740 } 00741 00742 void Backend::generateNode(BackendCodePtr& code, const ShCtrlGraphNodePtr& node) 00743 { 00744 if (node->marked()) return; 00745 node->mark(); 00746 if (node->block) code->addBasicBlock(node->block); 00747 for (std::vector<ShCtrlGraphBranch>::const_iterator I = node->successors.begin(); 00748 I != node->successors.end(); ++I) { 00749 // TODO: generate branch 00750 } 00751 if (node->follower) { 00752 // TODO: generate jump 00753 generateNode(code, node->follower); 00754 } 00755 for (std::vector<ShCtrlGraphBranch>::const_iterator I = node->successors.begin(); 00756 I != node->successors.end(); ++I) { 00757 generateNode(code, I->node); 00758 } 00759 } 00760 00761 ShBackendCodePtr Backend::generateCode(const std::string& target, const ShProgram& shader) 00762 { 00763 00764 BackendCodePtr code = new BackendCode(this, shader, target); 00765 code->generate(); 00766 00767 return code; 00768 } 00769 00770 std::string Backend::name() const 00771 { 00772 return "sm"; 00773 } 00774 00775 void initOps() 00776 { 00777 opNames = new std::string[256]; 00778 opNames[OP_ABS] = "ABS"; 00779 opNames[OP_ADD] = "ADD"; 00780 opNames[OP_BRA] = "BRA"; 00781 opNames[OP_CAL] = "CAL"; 00782 opNames[OP_CEIL] = "CEIL"; 00783 opNames[OP_CMP] = "CMP"; 00784 opNames[OP_COS] = "COS"; 00785 opNames[OP_DDX] = "DDX"; 00786 opNames[OP_DDY] = "DDY"; 00787 opNames[OP_DP3] = "DP3"; 00788 opNames[OP_DP4] = "DP4"; 00789 opNames[OP_DPH] = "DPH"; 00790 opNames[OP_DST] = "DST"; 00791 opNames[OP_EX2] = "EX2"; 00792 opNames[OP_EXP] = "EXP"; 00793 opNames[OP_FLR] = "FLR"; 00794 opNames[OP_FMOD] = "FMOD"; 00795 opNames[OP_FRC] = "FRC"; 00796 opNames[OP_KIL] = "KIL"; 00797 opNames[OP_LG2] = "LG2"; 00798 opNames[OP_LIT] = "LIT"; 00799 opNames[OP_LOG] = "LOG"; 00800 opNames[OP_LRP] = "LRP"; 00801 opNames[OP_MAD] = "MAD"; 00802 opNames[OP_MAX] = "MAX"; 00803 opNames[OP_MIN] = "MIN"; 00804 opNames[OP_MOV] = "MOV"; 00805 opNames[OP_MUL] = "MUL"; 00806 opNames[OP_NORM] = "NORM"; 00807 opNames[OP_OUT] = "OUT"; 00808 opNames[OP_POW] = "POW"; 00809 opNames[OP_RCC] = "RCC"; 00810 opNames[OP_RCP] = "RCP"; 00811 opNames[OP_RET] = "RET"; 00812 opNames[OP_RFL] = "RFL"; 00813 opNames[OP_RSQ] = "RSQ"; 00814 opNames[OP_SEQ] = "SEQ"; 00815 opNames[OP_SCS] = "SCS"; 00816 opNames[OP_SFL] = "SFL"; 00817 opNames[OP_SGE] = "SGE"; 00818 opNames[OP_SGT] = "SGT"; 00819 opNames[OP_SIN] = "SIN"; 00820 opNames[OP_SLE] = "SLE"; 00821 opNames[OP_SLT] = "SLT"; 00822 opNames[OP_SNE] = "SNE"; 00823 opNames[OP_SSG] = "SSG"; 00824 opNames[OP_STR] = "STR"; 00825 opNames[OP_SUB] = "SUB"; 00826 opNames[OP_TEX] = "TEX"; 00827 opNames[OP_TXB] = "TXB"; 00828 opNames[OP_TXD] = "TXD"; 00829 opNames[OP_TXP] = "TXP"; 00830 opNames[OP_X2D] = "X2D"; 00831 opNames[OP_XPD] = "XPD"; 00832 opNames[OP_HLT] = "HLT"; 00833 } 00834 00835 void Backend::execute(const ShProgram& program, ShStream& dest) 00836 { 00837 // TODO: NOT YET IMPLEMENTED 00838 } 00839 00840 }

Generated on Mon Oct 18 14:17:40 2004 for Sh by doxygen 1.3.7