Main Page | Modules | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

ArbCode.cpp

00001 // Sh: A GPU metaprogramming language. 00002 // 00003 // Copyright (c) 2003 University of Waterloo Computer Graphics Laboratory 00004 // Project administrator: Michael D. McCool 00005 // Authors: Zheng Qin, Stefanus Du Toit, Kevin Moule, Tiberiu S. Popa, 00006 // Michael D. McCool 00007 // 00008 // This software is provided 'as-is', without any express or implied 00009 // warranty. In no event will the authors be held liable for any damages 00010 // arising from the use of this software. 00011 // 00012 // Permission is granted to anyone to use this software for any purpose, 00013 // including commercial applications, and to alter it and redistribute it 00014 // freely, subject to the following restrictions: 00015 // 00016 // 1. The origin of this software must not be misrepresented; you must 00017 // not claim that you wrote the original software. If you use this 00018 // software in a product, an acknowledgment in the product documentation 00019 // would be appreciated but is not required. 00020 // 00021 // 2. Altered source versions must be plainly marked as such, and must 00022 // not be misrepresented as being the original software. 00023 // 00024 // 3. This notice may not be removed or altered from any source 00025 // distribution. 00027 #include "ArbCode.hpp" 00028 #include <iostream> 00029 #include <sstream> 00030 #include <cmath> 00031 #include <bitset> 00032 00033 #include "ShVariable.hpp" 00034 #include "ShDebug.hpp" 00035 #include "ShLinearAllocator.hpp" 00036 #include "ShInternals.hpp" 00037 #include "ShOptimizations.hpp" 00038 #include "ShEnvironment.hpp" 00039 #include "ShContext.hpp" 00040 #include "ShTextureNode.hpp" 00041 #include "ShSyntax.hpp" 00042 #include "ArbReg.hpp" 00043 #include "Arb.hpp" 00044 #include "ShAttrib.hpp" 00045 #include "ShError.hpp" 00046 00047 namespace shgl { 00048 00049 using namespace SH; 00050 00051 #define shGlProgramStringARB glProgramStringARB 00052 #define shGlActiveTextureARB glActiveTextureARB 00053 #define shGlProgramLocalParameter4fvARB glProgramLocalParameter4fvARB 00054 #define shGlProgramEnvParameter4fvARB glProgramEnvParameter4fvARB 00055 #define shGlGetProgramivARB glGetProgramivARB 00056 #define shGlGenProgramsARB glGenProgramsARB 00057 #define shGlDeleteProgramsARB glDeleteProgramsARB 00058 #define shGlBindProgramARB glBindProgramARB 00059 00060 struct ArbBindingSpecs { 00061 ArbRegBinding binding; 00062 int maxBindings; 00063 ShSemanticType semanticType; 00064 bool allowGeneric; 00065 }; 00066 00067 ArbBindingSpecs arbVertexAttribBindingSpecs[] = { 00068 {SH_ARB_REG_VERTEXPOS, 1, SH_POSITION, false}, 00069 {SH_ARB_REG_VERTEXNRM, 1, SH_NORMAL, false}, 00070 {SH_ARB_REG_VERTEXCOL, 1, SH_COLOR, false}, 00071 {SH_ARB_REG_VERTEXTEX, 8, SH_TEXCOORD, true}, 00072 {SH_ARB_REG_VERTEXFOG, 1, SH_ATTRIB, true}, 00073 {SH_ARB_REG_NONE, 0, SH_ATTRIB, true} 00074 }; 00075 00076 ArbBindingSpecs arbFragmentAttribBindingSpecs[] = { 00077 {SH_ARB_REG_FRAGMENTPOS, 1, SH_POSITION, false}, 00078 {SH_ARB_REG_FRAGMENTCOL, 1, SH_COLOR, false}, 00079 {SH_ARB_REG_FRAGMENTTEX, 8, SH_TEXCOORD, true}, 00080 {SH_ARB_REG_FRAGMENTFOG, 1, SH_ATTRIB, true}, 00081 {SH_ARB_REG_NONE, 0, SH_ATTRIB, true} 00082 }; 00083 00084 ArbBindingSpecs arbVertexOutputBindingSpecs[] = { 00085 {SH_ARB_REG_RESULTPOS, 1, SH_POSITION, false}, 00086 {SH_ARB_REG_RESULTCOL, 1, SH_COLOR, false}, 00087 {SH_ARB_REG_RESULTTEX, 8, SH_TEXCOORD, true}, 00088 {SH_ARB_REG_RESULTFOG, 1, SH_ATTRIB, true}, 00089 {SH_ARB_REG_RESULTPTS, 1, SH_ATTRIB, true}, 00090 {SH_ARB_REG_NONE, 0, SH_ATTRIB} 00091 }; 00092 00093 ArbBindingSpecs arbFragmentOutputBindingSpecs[] = { 00094 {SH_ARB_REG_RESULTCOL, 1, SH_COLOR, true}, 00095 {SH_ARB_REG_RESULTDPT, 1, SH_ATTRIB, false}, 00096 {SH_ARB_REG_NONE, 0, SH_ATTRIB} 00097 }; 00098 00099 ArbBindingSpecs* arbBindingSpecs(bool output, const std::string& unit) 00100 { 00101 if (unit == "vertex") 00102 return output ? arbVertexOutputBindingSpecs : arbVertexAttribBindingSpecs; 00103 if (unit == "fragment") 00104 return output ? arbFragmentOutputBindingSpecs : arbFragmentAttribBindingSpecs; 00105 return 0; 00106 } 00107 00108 using namespace SH; 00109 00110 ArbCode::ArbCode(const ShProgramNodeCPtr& shader, const std::string& unit, 00111 TextureStrategy* textures) 00112 : m_textures(textures), m_shader(0), m_originalShader(shader), m_unit(unit), 00113 m_numTemps(0), m_numInputs(0), m_numOutputs(0), m_numParams(0), m_numConsts(0), 00114 m_numTextures(0), m_programId(0), m_environment(0), m_max_label(0) 00115 { 00116 if (unit == "fragment") m_environment |= SH_ARB_FP; 00117 if (unit == "vertex") m_environment |= SH_ARB_VP; 00118 00119 const GLubyte* extensions = glGetString(GL_EXTENSIONS); 00120 00121 std::string extstr(reinterpret_cast<const char*>(extensions)); 00122 00123 if (unit == "fragment") { 00124 if (extstr.find("NV_fragment_program_option") != std::string::npos) { 00125 m_environment |= SH_ARB_NVFP; 00126 } 00127 if (extstr.find("NV_fragment_program2") != std::string::npos) { 00128 m_environment |= SH_ARB_NVFP2; 00129 } 00130 if (extstr.find("ATI_draw_buffers") != std::string::npos) { 00131 m_environment |= SH_ARB_ATIDB; 00132 } 00133 } 00134 if (unit == "vertex") { 00135 if (extstr.find("NV_vertex_program2_option") != std::string::npos) { 00136 m_environment |= SH_ARB_NVVP2; 00137 } 00138 if (extstr.find("NV_vertex_program3") != std::string::npos) { 00139 m_environment |= SH_ARB_NVVP3; 00140 } 00141 } 00142 } 00143 00144 ArbCode::~ArbCode() 00145 { 00146 } 00147 00148 void ArbCode::generate() 00149 { 00150 // Transform code to be ARB_fragment_program compatible 00151 m_shader = m_originalShader->clone(); 00152 ShContext::current()->enter(m_shader); 00153 ShTransformer transform(m_shader); 00154 00155 transform.convertInputOutput(); 00156 transform.splitTuples(4, m_splits); 00157 transform.convertTextureLookups(); 00158 00159 if(transform.changed()) { 00160 optimize(m_shader); 00161 m_shader->collectVariables(); 00162 } else { 00163 m_shader = shref_const_cast<ShProgramNode>(m_originalShader); 00164 ShContext::current()->exit(); 00165 ShContext::current()->enter(m_shader); 00166 } 00167 00168 if (m_environment & SH_ARB_NVFP2) { 00169 // In NV_fragment_program2, we actually generate structured code. 00170 ShStructural str(m_shader->ctrlGraph); 00171 00172 genStructNode(str.head()); 00173 00174 } else { 00175 m_shader->ctrlGraph->entry()->clearMarked(); 00176 genNode(m_shader->ctrlGraph->entry()); 00177 00178 if (m_environment & SH_ARB_NVVP2) { 00179 m_instructions.push_back(ArbInst(SH_ARB_LABEL, getLabel(m_shader->ctrlGraph->exit()))); 00180 } 00181 } 00182 m_shader->ctrlGraph->entry()->clearMarked(); 00183 allocRegs(); 00184 00185 ShContext::current()->exit(); 00186 } 00187 00188 bool ArbCode::allocateRegister(const ShVariableNodePtr& var) 00189 { 00190 if (!var) return true; 00191 if (var->kind() != SH_TEMP) return true; 00192 if (var->uniform()) return true; 00193 00194 if (m_tempRegs.empty()) { 00195 shError(ShException("ARB Backend: Out of registers")); 00196 return false; 00197 } 00198 00199 int idx = m_tempRegs.front(); 00200 m_tempRegs.pop_front(); 00201 if (idx + 1 > m_numTemps) m_numTemps = idx + 1; 00202 m_registers[var] = new ArbReg(SH_ARB_REG_TEMP, idx); 00203 m_reglist.push_back(m_registers[var]); 00204 00205 return true; 00206 } 00207 00208 void ArbCode::freeRegister(const ShVariableNodePtr& var) 00209 { 00210 if (!var) return; 00211 if (var->kind() != SH_TEMP) return; 00212 if (var->uniform()) return; 00213 00214 SH_DEBUG_ASSERT(m_registers.find(var) != m_registers.end()); 00215 m_tempRegs.push_front(m_registers[var]->index); 00216 } 00217 00218 void ArbCode::upload() 00219 { 00220 if (!m_programId) { 00221 SH_GL_CHECK_ERROR(shGlGenProgramsARB(1, &m_programId)); 00222 } 00223 00224 SH_GL_CHECK_ERROR(shGlBindProgramARB(arbTarget(m_unit), m_programId)); 00225 00226 std::ostringstream out; 00227 print(out); 00228 std::string text = out.str(); 00229 shGlProgramStringARB(arbTarget(m_unit), GL_PROGRAM_FORMAT_ASCII_ARB, 00230 (GLsizei)text.size(), text.c_str()); 00231 int error = glGetError(); 00232 if (error == GL_INVALID_OPERATION) { 00233 int pos = -1; 00234 SH_GL_CHECK_ERROR(glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos)); 00235 if (pos >= 0){ 00236 const unsigned char* message = glGetString(GL_PROGRAM_ERROR_STRING_ARB); 00237 SH_DEBUG_WARN("Error at character " << pos); 00238 SH_DEBUG_WARN("Message: " << message); 00239 while (pos >= 0 && text[pos] != '\n') pos--; 00240 if (pos > 0) pos++; 00241 SH_DEBUG_WARN("Code: " << text.substr(pos, text.find('\n', pos))); 00242 } 00243 } 00244 if (error != GL_NO_ERROR) { 00245 SH_DEBUG_ERROR("Error uploading ARB program (" << m_unit << "): " << error); 00246 SH_DEBUG_ERROR("shGlProgramStringARB(" << arbTarget(m_unit) 00247 << ", GL_PROGRAM_FORMAT_ASCII_ARB, " << (GLsizei)text.size() << 00248 ", <program text>);"); 00249 } 00250 } 00251 00252 void ArbCode::bind() 00253 { 00254 if (!m_programId) { 00255 upload(); 00256 } 00257 00258 SH_GL_CHECK_ERROR(shGlBindProgramARB(arbTarget(m_unit), m_programId)); 00259 00260 00261 ShContext::current()->set_binding(std::string("arb:") + m_unit, 00262 shref_const_cast<ShProgramNode>(m_originalShader)); 00263 00264 // Initialize constants 00265 for (RegMap::const_iterator I = m_registers.begin(); I != m_registers.end(); ++I) { 00266 ShVariableNodePtr node = I->first; 00267 ArbReg reg = *I->second; 00268 if (node->hasValues() && reg.type == SH_ARB_REG_PARAM) { 00269 updateUniform(node); 00270 } 00271 } 00272 // Make sure all textures are loaded. 00273 00274 bindTextures(); 00275 } 00276 00277 void ArbCode::updateUniform(const ShVariableNodePtr& uniform) 00278 { 00279 int i; 00280 00281 if (!uniform) return; 00282 00283 RegMap::const_iterator I = m_registers.find(uniform); 00284 if (I == m_registers.end()) { // perhaps uniform was split 00285 if( m_splits.count(uniform) > 0 ) { 00286 ShTransformer::VarNodeVec &splitVec = m_splits[uniform]; 00287 int offset = 0; 00288 for(ShTransformer::VarNodeVec::iterator it = splitVec.begin(); 00289 it != splitVec.end(); offset += (*it)->size(), ++it) { 00290 for(i = 0; i < (*it)->size(); ++i) { 00291 (*it)->setValue(i, uniform->getValue(i + offset)); 00292 } 00293 updateUniform(*it); 00294 } 00295 } 00296 return; 00297 } 00298 00299 ShTextureNodePtr tex = shref_dynamic_cast<ShTextureNode>(uniform); 00300 if (tex) { 00301 return; 00302 } 00303 00304 const ArbReg& reg = *I->second; 00305 00306 float values[4]; 00307 for (i = 0; i < uniform->size(); i++) { 00308 values[i] = (float)uniform->getValue(i); 00309 } 00310 for (; i < 4; i++) { 00311 values[i] = 0.0; 00312 } 00313 00314 if (reg.type != SH_ARB_REG_PARAM) return; 00315 switch(reg.binding) { 00316 case SH_ARB_REG_PARAMLOC: 00317 SH_GL_CHECK_ERROR(shGlProgramLocalParameter4fvARB(arbTarget(m_unit), reg.bindingIndex, values)); 00318 break; 00319 case SH_ARB_REG_PARAMENV: 00320 SH_GL_CHECK_ERROR(shGlProgramEnvParameter4fvARB(arbTarget(m_unit), reg.bindingIndex, values)); 00321 break; 00322 default: 00323 return; 00324 } 00325 } 00326 00327 std::ostream& ArbCode::printVar(std::ostream& out, bool dest, const ShVariable& var, 00328 bool collectingOp, const ShSwizzle& destSwiz = ShSwizzle(4)) const 00329 { 00330 RegMap::const_iterator I = m_registers.find(var.node()); 00331 if (I == m_registers.end()) { 00332 out << "<no reg for " << var.name() << ">"; 00333 return out; 00334 } 00335 const ArbReg& reg = *I->second; 00336 00337 // Negation 00338 if (var.neg()) out << '-'; 00339 00340 // Register name 00341 out << reg; 00342 00343 // Swizzling 00344 const char* swizChars = "xyzw"; 00345 out << "."; 00346 if (dest) { 00347 bool masked[4] = {false, false, false, false}; 00348 for (int i = 0; i < var.swizzle().size(); i++) { 00349 masked[var.swizzle()[i]] = true; 00350 } 00351 for (int i = 0; i < 4; i++) { 00352 if (masked[i]) out << swizChars[i]; 00353 } 00354 } else if (var.swizzle().size() == 1) { 00355 out << swizChars[var.swizzle()[0]]; 00356 } else if (collectingOp) { 00357 for (int i = 0; i < 4; i++) { 00358 out << swizChars[i < var.swizzle().size() ? var.swizzle()[i] : i]; 00359 } 00360 } else { 00361 for (int i = 0; i < 4; i++) { 00362 int j; 00363 for (j = 0; j < destSwiz.size(); j++) { 00364 if (destSwiz[j] == i) break; 00365 } 00366 if (j == destSwiz.size()) j = i; 00367 out << swizChars[j < var.size() ? var.swizzle()[j] : j]; 00368 } 00369 } 00370 00371 return out; 00372 } 00373 00374 struct LineNumberer { 00375 LineNumberer() { line = 0; } 00376 int line; 00377 }; 00378 00379 std::ostream& operator<<(std::ostream& out, LineNumberer& l) 00380 { 00381 out << " # " << ++l.line << std::endl; 00382 return out; 00383 } 00384 00385 bool ArbCode::printSamplingInstruction(std::ostream& out, const ArbInst& instr) const 00386 { 00387 if (instr.op != SH_ARB_TEX && instr.op != SH_ARB_TXP && instr.op != SH_ARB_TXB 00388 && instr.op != SH_ARB_TXD) 00389 return false; 00390 00391 ShTextureNodePtr texture = shref_dynamic_cast<ShTextureNode>(instr.src[1].node()); 00392 RegMap::const_iterator texRegIt = m_registers.find(instr.src[1].node()); 00393 if (texRegIt == m_registers.end()) { 00394 SH_DEBUG_PRINT("Unallocated texture found."); 00395 SH_DEBUG_PRINT("Operation = " << arbOpInfo[instr.op].name); 00396 SH_DEBUG_PRINT("Destination* = " << instr.dest.node().object()); 00397 if (instr.dest.node()) { 00398 SH_DEBUG_PRINT("Destination = " << instr.dest.name()); 00399 } 00400 SH_DEBUG_PRINT("Texture pointer = " << texture.object()); 00401 if (texture) { 00402 SH_DEBUG_PRINT("Texture = " << texture->name()); 00403 } 00404 out << " INVALID TEX INSTRUCTION;"; 00405 return true; 00406 } 00407 //SH_DEBUG_ASSERT(texRegIt != m_registers.end()); 00408 00409 const ArbReg& texReg = *texRegIt->second; 00410 00411 out << " "; 00412 out << arbOpInfo[instr.op].name << " "; 00413 printVar(out, true, instr.dest, false) << ", "; 00414 printVar(out, false, instr.src[0], true, instr.dest.swizzle()) << ", "; 00415 if (instr.op == SH_ARB_TXD) { 00416 printVar(out, false, instr.src[2], true, instr.dest.swizzle()) << ", "; 00417 printVar(out, false, instr.src[3], true, instr.dest.swizzle()) << ", "; 00418 } 00419 out << "texture[" << texReg.index << "], "; 00420 switch (texture->dims()) { 00421 case SH_TEXTURE_1D: 00422 out << "1D"; 00423 break; 00424 case SH_TEXTURE_2D: 00425 out << "2D"; 00426 break; 00427 case SH_TEXTURE_3D: 00428 out << "3D"; 00429 break; 00430 case SH_TEXTURE_CUBE: 00431 out << "CUBE"; 00432 break; 00433 case SH_TEXTURE_RECT: 00434 out << "RECT"; 00435 break; 00436 } 00437 out << ";"; 00438 return true; 00439 } 00440 00441 std::ostream& ArbCode::print(std::ostream& out) 00442 { 00443 LineNumberer endl; 00444 const char* swizChars = "xyzw"; 00445 00446 // Print version header 00447 if (m_unit == "vertex") { 00448 out << "!!ARBvp1.0" << endl; 00449 if (m_environment & SH_ARB_NVVP3) out << "OPTION NV_vertex_program3;" << endl; 00450 else if (m_environment & SH_ARB_NVVP2) out << "OPTION NV_vertex_program2;" << endl; 00451 } 00452 if (m_unit == "fragment") { 00453 out << "!!ARBfp1.0" << endl; 00454 00455 if (m_environment & SH_ARB_NVFP2) out << "OPTION NV_fragment_program2;" << endl; 00456 else if (m_environment & SH_ARB_NVFP) out << "OPTION NV_fragment_program;" << endl; 00457 00458 if (m_environment & SH_ARB_ATIDB) out << "OPTION ATI_draw_buffers;" << endl; 00459 } 00460 00461 // Print register declarations 00462 00463 for (RegList::const_iterator I = m_reglist.begin(); 00464 I != m_reglist.end(); ++I) { 00465 if ((*I)->type == SH_ARB_REG_TEMP) continue; 00466 if ((*I)->type == SH_ARB_REG_TEXTURE) continue; 00467 out << " "; 00468 (*I)->printDecl(out); 00469 out << endl; 00470 } 00471 if (m_numTemps) { 00472 out << " TEMP "; 00473 for (int i = 0; i < m_numTemps; i++) { 00474 if (i > 0) out << ", "; 00475 out << ArbReg(SH_ARB_REG_TEMP, i); 00476 } 00477 out << ";" << endl; 00478 } 00479 00480 out << endl; 00481 00482 // Print instructions 00483 for (ArbInstList::const_iterator I = m_instructions.begin(); 00484 I != m_instructions.end(); ++I) { 00485 if (I->op == SH_ARB_LABEL) { 00486 out << "label" << I->label << ": "; 00487 } else if (I->op == SH_ARB_ELSE) { 00488 out << " ELSE;"; 00489 } else if (I->op == SH_ARB_ENDIF) { 00490 out << " ENDIF;"; 00491 } else if (I->op == SH_ARB_BRA) { 00492 if (I->src[0].node()) { 00493 out << " MOVC "; 00494 printVar(out, true, I->src[0], false); 00495 out << ", "; 00496 printVar(out, false, I->src[0], false, I->src[0].swizzle()); 00497 out << ";" << endl; 00498 } 00499 out << " BRA label" << I->label; 00500 if (I->src[0].node()) { 00501 out << " (GT"; 00502 out << "."; 00503 for (int i = 0; i < I->src[0].swizzle().size(); i++) { 00504 out << swizChars[I->src[0].swizzle()[i]]; 00505 } 00506 out << ")"; 00507 } 00508 out << ";"; 00509 } else if (I->op == SH_ARB_REP) { 00510 out << " REP "; 00511 printVar(out, false, I->src[0], false, I->src[0].swizzle()); 00512 out << ";"; 00513 } else if (I->op == SH_ARB_BRK) { 00514 if (I->src[0].node()) { 00515 out << " MOVC "; 00516 printVar(out, true, I->src[0], false); 00517 out << ", "; 00518 printVar(out, false, I->src[0], false, I->src[0].swizzle()); 00519 out << ";" << endl; 00520 } 00521 out << " BRK "; 00522 if (I->src[0].node()) { 00523 out << " ("; 00524 if (I->invert) { 00525 out << "LE"; 00526 } else { 00527 out << "GT"; 00528 } 00529 out << "."; 00530 for (int i = 0; i < I->src[0].swizzle().size(); i++) { 00531 out << swizChars[I->src[0].swizzle()[i]]; 00532 } 00533 out << ")"; 00534 } 00535 out << ";"; 00536 } else if (I->op == SH_ARB_ENDREP) { 00537 out << " ENDREP;"; 00538 } else if (I->op == SH_ARB_IF) { 00539 if (I->src[0].node()) { 00540 out << " MOVC "; 00541 printVar(out, true, I->src[0], false); 00542 out << ", "; 00543 printVar(out, false, I->src[0], false, I->src[0].swizzle()); 00544 out << ";" << endl; 00545 } 00546 out << " IF "; 00547 if (I->src[0].node()) { 00548 out << "GT"; 00549 out << "."; 00550 for (int i = 0; i < I->src[0].swizzle().size(); i++) { 00551 out << swizChars[I->src[0].swizzle()[i]]; 00552 } 00553 } else { 00554 out << "TR"; 00555 } 00556 out << ";"; 00557 } else if (!printSamplingInstruction(out, *I)) { 00558 out << " "; 00559 out << arbOpInfo[I->op].name; 00560 if (I->update_cc) out << "C"; 00561 out << " "; 00562 printVar(out, true, I->dest, arbOpInfo[I->op].collectingOp); 00563 if (I->ccode != ArbInst::NOCC) { 00564 out << " ("; 00565 out << arbCCnames[I->ccode]; 00566 out << "."; 00567 for (int i = 0; i < 4; i++) { 00568 out << swizChars[(i < I->ccswiz.size() ? I->ccswiz[i] 00569 : (I->ccswiz.size() == 1 ? I->ccswiz[0] : i))]; 00570 } 00571 out << ") "; 00572 } 00573 for (int i = 0; i < arbOpInfo[I->op].arity; i++) { 00574 out << ", "; 00575 printVar(out, false, I->src[i], arbOpInfo[I->op].collectingOp, I->dest.swizzle()); 00576 } 00577 out << ';'; 00578 } 00579 out << " # "; 00580 if (I->dest.node() && I->dest.has_name()) { 00581 out << "d=" << I->dest.name() << " "; 00582 } 00583 for (int i = 0; i < ArbInst::max_num_sources; i++) { 00584 if (I->src[i].node() && I->src[i].has_name()) { 00585 out << "s[" << i << "]=" << I->src[i].name() << " "; 00586 } 00587 } 00588 out << endl; 00589 } 00590 00591 out << "END" << endl; 00592 return out; 00593 } 00594 00595 std::ostream& ArbCode::printInputOutputFormat(std::ostream& out) { 00596 ShProgramNode::VarList::const_iterator I; 00597 out << "Inputs:" << std::endl; 00598 for (I = m_shader->inputs.begin(); I != m_shader->inputs.end(); ++I) { 00599 out << " "; 00600 m_registers[*I]->printDecl(out); 00601 out << std::endl; 00602 } 00603 00604 out << "Outputs:" << std::endl; 00605 for (I = m_shader->outputs.begin(); I != m_shader->outputs.end(); ++I) { 00606 out << " "; 00607 m_registers[*I]->printDecl(out); 00608 out << std::endl; 00609 } 00610 return out; 00611 } 00612 00613 int ArbCode::getLabel(ShCtrlGraphNodePtr node) 00614 { 00615 if (m_label_map.find(node) == m_label_map.end()) { 00616 m_label_map[node] = m_max_label++; 00617 } 00618 return m_label_map[node]; 00619 } 00620 00621 void ArbCode::genNode(ShCtrlGraphNodePtr node) 00622 { 00623 if (!node || node->marked()) return; 00624 node->mark(); 00625 00626 if (node == m_shader->ctrlGraph->exit()) return; 00627 00628 if (m_environment & SH_ARB_NVVP2) { 00629 m_instructions.push_back(ArbInst(SH_ARB_LABEL, getLabel(node))); 00630 } 00631 00632 if (node->block) for (ShBasicBlock::ShStmtList::const_iterator I = node->block->begin(); 00633 I != node->block->end(); ++I) { 00634 const ShStatement& stmt = *I; 00635 emit(stmt); 00636 } 00637 00638 if (m_environment & SH_ARB_NVVP2) { 00639 for(std::vector<SH::ShCtrlGraphBranch>::iterator I = node->successors.begin(); 00640 I != node->successors.end(); I++) { 00641 m_instructions.push_back(ArbInst(SH_ARB_BRA, getLabel(I->node), I->cond)); 00642 } 00643 m_instructions.push_back(ArbInst(SH_ARB_BRA, getLabel(node->follower))); 00644 for(std::vector<SH::ShCtrlGraphBranch>::iterator I = node->successors.begin(); 00645 I != node->successors.end(); I++) { 00646 genNode(I->node); 00647 } 00648 } 00649 00650 genNode(node->follower); 00651 } 00652 00653 void ArbCode::genStructNode(const ShStructuralNodePtr& node) 00654 { 00655 if (!node) return; 00656 00657 if (node->type == ShStructuralNode::UNREDUCED) { 00658 ShBasicBlockPtr block = node->cfg_node->block; 00659 if (block) for (ShBasicBlock::ShStmtList::const_iterator I = block->begin(); 00660 I != block->end(); ++I) { 00661 const ShStatement& stmt = *I; 00662 emit(stmt); 00663 } 00664 } else if (node->type == ShStructuralNode::BLOCK) { 00665 for (ShStructuralNode::StructNodeList::const_iterator I = node->structnodes.begin(); 00666 I != node->structnodes.end(); ++I) { 00667 genStructNode(*I); 00668 } 00669 } else if (node->type == ShStructuralNode::IFELSE) { 00670 ShStructuralNodePtr header = node->structnodes.front(); 00671 // TODO Check that header->successors is only two. 00672 ShVariable cond; 00673 ShStructuralNodePtr ifnode, elsenode; 00674 for (ShStructuralNode::SuccessorList::iterator I = header->succs.begin(); 00675 I != header->succs.end(); ++I) { 00676 if (I->first.node()) { 00677 ifnode = I->second; 00678 cond = I->first; 00679 } else { 00680 elsenode = I->second; 00681 } 00682 } 00683 genStructNode(header); 00684 m_instructions.push_back(ArbInst(SH_ARB_IF, ShVariable(), cond)); { 00685 genStructNode(ifnode); 00686 } m_instructions.push_back(ArbInst(SH_ARB_ELSE, ShVariable())); { 00687 genStructNode(elsenode); 00688 } m_instructions.push_back(ArbInst(SH_ARB_ENDIF, ShVariable())); 00689 } else if (node->type == ShStructuralNode::WHILELOOP) { 00690 ShStructuralNodePtr header = node->structnodes.front(); 00691 00692 ShVariable cond = header->succs.front().first; 00693 00694 ShStructuralNodePtr body = node->structnodes.back(); 00695 00696 ShVariable maxloop(new ShVariableNode(SH_CONST, 1)); 00697 float maxloopval = 255.0; 00698 maxloop.setValues(&maxloopval); 00699 m_shader->constants.push_back(maxloop.node()); 00700 m_instructions.push_back(ArbInst(SH_ARB_REP, ShVariable(), maxloop)); 00701 genStructNode(header); 00702 ArbInst brk(SH_ARB_BRK, ShVariable(), cond); 00703 brk.invert = true; 00704 m_instructions.push_back(brk); 00705 genStructNode(body); 00706 00707 m_instructions.push_back(ArbInst(SH_ARB_ENDREP, ShVariable())); 00708 } else if (node->type == ShStructuralNode::SELFLOOP) { 00709 ShStructuralNodePtr loopnode = node->structnodes.front(); 00710 00711 bool condexit = true; // true if the condition causes us to exit the 00712 // loop, rather than continue it 00713 ShVariable cond; 00714 for (ShStructuralNode::SuccessorList::iterator I = loopnode->succs.begin(); 00715 I != loopnode->succs.end(); ++I) { 00716 if (I->first.node()) { 00717 if (I->second == loopnode) condexit = false; else condexit = true; 00718 cond = I->first; 00719 } 00720 } 00721 00722 ShVariable maxloop(new ShVariableNode(SH_CONST, 1)); 00723 float maxloopval = 255.0; 00724 maxloop.setValues(&maxloopval); 00725 m_shader->constants.push_back(maxloop.node()); 00726 m_instructions.push_back(ArbInst(SH_ARB_REP, ShVariable(), maxloop)); 00727 genStructNode(loopnode); 00728 ArbInst brk(SH_ARB_BRK, ShVariable(), cond); 00729 if (!condexit) { 00730 brk.invert = true; 00731 } 00732 m_instructions.push_back(brk); 00733 m_instructions.push_back(ArbInst(SH_ARB_ENDREP, ShVariable())); 00734 } 00735 } 00736 00737 void ArbCode::allocRegs() 00738 { 00739 ArbLimits limits(m_unit); 00740 00741 allocInputs(limits); 00742 00743 allocOutputs(limits); 00744 00745 for (ShProgramNode::VarList::const_iterator I = m_shader->uniforms.begin(); 00746 I != m_shader->uniforms.end(); ++I) { 00747 allocParam(limits, *I); 00748 } 00749 00750 allocConsts(limits); 00751 00752 allocTemps(limits); 00753 00754 allocTextures(limits); 00755 } 00756 00757 void ArbCode::bindSpecial(const ShProgramNode::VarList::const_iterator& begin, 00758 const ShProgramNode::VarList::const_iterator& end, 00759 const ArbBindingSpecs& specs, 00760 std::vector<int>& bindings, 00761 ArbRegType type, int& num) 00762 { 00763 bindings.push_back(0); 00764 00765 if (specs.semanticType == SH_ATTRIB) return; 00766 00767 for (ShProgramNode::VarList::const_iterator I = begin; I != end; ++I) { 00768 ShVariableNodePtr node = *I; 00769 00770 if (m_registers.find(node) != m_registers.end()) continue; 00771 if (node->specialType() != specs.semanticType) continue; 00772 00773 m_registers[node] = new ArbReg(type, num++, node->name()); 00774 m_registers[node]->binding = specs.binding; 00775 m_registers[node]->bindingIndex = bindings.back(); 00776 m_reglist.push_back(m_registers[node]); 00777 00778 bindings.back()++; 00779 if (bindings.back() == specs.maxBindings) break; 00780 } 00781 } 00782 00783 void ArbCode::allocInputs(const ArbLimits& limits) 00784 { 00785 // First, try to assign some "special" output register bindings 00786 for (int i = 0; arbBindingSpecs(false, m_unit)[i].binding != SH_ARB_REG_NONE; i++) { 00787 bindSpecial(m_shader->inputs.begin(), m_shader->inputs.end(), 00788 arbBindingSpecs(false, m_unit)[i], m_inputBindings, 00789 SH_ARB_REG_ATTRIB, m_numInputs); 00790 } 00791 00792 for (ShProgramNode::VarList::const_iterator I = m_shader->inputs.begin(); 00793 I != m_shader->inputs.end(); ++I) { 00794 ShVariableNodePtr node = *I; 00795 if (m_registers.find(node) != m_registers.end()) continue; 00796 m_registers[node] = new ArbReg(SH_ARB_REG_ATTRIB, m_numInputs++, node->name()); 00797 m_reglist.push_back(m_registers[node]); 00798 00799 // Binding 00800 for (int i = 0; arbBindingSpecs(false, m_unit)[i].binding != SH_ARB_REG_NONE; i++) { 00801 const ArbBindingSpecs& specs = arbBindingSpecs(false, m_unit)[i]; 00802 00803 if (specs.allowGeneric && m_inputBindings[i] < specs.maxBindings) { 00804 m_registers[node]->binding = specs.binding; 00805 m_registers[node]->bindingIndex = m_inputBindings[i]; 00806 m_inputBindings[i]++; 00807 break; 00808 } 00809 } 00810 } 00811 } 00812 00813 void ArbCode::allocOutputs(const ArbLimits& limits) 00814 { 00815 // First, try to assign some "special" output register bindings 00816 for (int i = 0; arbBindingSpecs(true, m_unit)[i].binding != SH_ARB_REG_NONE; i++) { 00817 bindSpecial(m_shader->outputs.begin(), m_shader->outputs.end(), 00818 arbBindingSpecs(true, m_unit)[i], m_outputBindings, 00819 SH_ARB_REG_OUTPUT, m_numOutputs); 00820 } 00821 00822 for (ShProgramNode::VarList::const_iterator I = m_shader->outputs.begin(); 00823 I != m_shader->outputs.end(); ++I) { 00824 ShVariableNodePtr node = *I; 00825 if (m_registers.find(node) != m_registers.end()) continue; 00826 m_registers[node] = new ArbReg(SH_ARB_REG_OUTPUT, m_numOutputs++, node->name()); 00827 m_reglist.push_back(m_registers[node]); 00828 00829 // Binding 00830 for (int i = 0; arbBindingSpecs(true, m_unit)[i].binding != SH_ARB_REG_NONE; i++) { 00831 const ArbBindingSpecs& specs = arbBindingSpecs(true, m_unit)[i]; 00832 00833 if (specs.allowGeneric && m_outputBindings[i] < specs.maxBindings) { 00834 m_registers[node]->binding = specs.binding; 00835 m_registers[node]->bindingIndex = m_outputBindings[i]; 00836 m_outputBindings[i]++; 00837 break; 00838 } 00839 } 00840 } 00841 } 00842 00843 void ArbCode::allocParam(const ArbLimits& limits, const ShVariableNodePtr& node) 00844 { 00845 // TODO: Check if we reached maximum 00846 if (m_registers.find(node) != m_registers.end()) return; 00847 m_registers[node] = new ArbReg(SH_ARB_REG_PARAM, m_numParams, node->name()); 00848 m_registers[node]->binding = SH_ARB_REG_PARAMLOC; 00849 m_registers[node]->bindingIndex = m_numParams; 00850 m_reglist.push_back(m_registers[node]); 00851 m_numParams++; 00852 } 00853 00854 void ArbCode::allocConsts(const ArbLimits& limits) 00855 { 00856 for (ShProgramNode::VarList::const_iterator I = m_shader->constants.begin(); 00857 I != m_shader->constants.end(); ++I) { 00858 ShVariableNodePtr node = *I; 00859 00860 // TODO: improve efficiency 00861 RegMap::const_iterator J; 00862 for (J = m_registers.begin(); J != m_registers.end(); ++J) { 00863 if (J->second->type != SH_ARB_REG_CONST) continue; 00864 int f = 0; 00865 for (int i = 0; i < node->size(); i++) { 00866 if (J->second->values[i] == node->getValue(i)) f++; 00867 } 00868 if (f == node->size()) break; 00869 } 00870 if (J == m_registers.end()) { 00871 m_registers[node] = new ArbReg(SH_ARB_REG_CONST, m_numConsts, node->name()); 00872 m_reglist.push_back(m_registers[node]); 00873 for (int i = 0; i < 4; i++) { 00874 m_registers[node]->values[i] = (float)(i < node->size() ? node->getValue(i) : 0.0); 00875 } 00876 m_numConsts++; 00877 } else { 00878 m_registers[node] = J->second; 00879 } 00880 } 00881 } 00882 00883 bool mark(ShLinearAllocator& allocator, ShVariableNodePtr node, int i) 00884 { 00885 if (!node) return false; 00886 if (node->kind() != SH_TEMP) return false; 00887 if (node->hasValues()) return false; 00888 allocator.mark(node, i); 00889 return true; 00890 } 00891 00892 bool markable(ShVariableNodePtr node) 00893 { 00894 if (!node) return false; 00895 if (node->kind() != SH_TEMP) return false; 00896 if (node->hasValues()) return false; 00897 return true; 00898 } 00899 00900 struct ArbScope { 00901 ArbScope(int start) 00902 : start(start) 00903 { 00904 } 00905 00906 typedef std::map< ShVariableNode*, std::bitset<4> > UsageMap; 00907 00908 typedef std::set<ShVariableNode*> MarkList; 00909 00910 MarkList need_mark; // Need to mark at end of loop 00911 int start; // location where loop started 00912 UsageMap usage_map; 00913 UsageMap write_map; // locations last written to 00914 }; 00915 00916 void ArbCode::allocTemps(const ArbLimits& limits) 00917 { 00918 00919 typedef std::list<ArbScope> ScopeStack; 00920 ScopeStack scopestack; 00921 00922 ShLinearAllocator allocator(this); 00923 00924 // { 00925 // ScopeStack scopestack; 00926 // // First do a backwards traversal to find loop nodes that need to be 00927 // // marked due to later uses of assignments 00928 // std::map<ShVariableNode*, int> last_use; 00929 00930 // for (int i = (int)m_instructions.size() - 1; i >= 0; --i) { 00931 // ArbInst instr = m_instructions[i]; 00932 // if (instr.op == SH_ARB_ENDREP) { 00933 // scopestack.push_back((int)i); 00934 // } 00935 // if (instr.op == SH_ARB_REP) { 00936 // const ArbScope& scope = scopestack.back(); 00937 // for (ArbScope::MarkList::const_iterator I = scope.need_mark.begin(); 00938 // I != scope.need_mark.end(); ++I) { 00939 // mark(allocator, *I, (int)i); 00940 // } 00941 // scopestack.pop_back(); 00942 // } 00943 00944 // if (markable(instr.dest.node())) { 00945 // if (last_use.find(instr.dest.node().object()) == last_use.end()) continue; 00946 // for (ScopeStack::iterator S = scopestack.begin(); S != scopestack.end(); ++S) { 00947 // ArbScope& scope = *S; 00948 // // Note scope.start == location of ENDREP 00949 // // TODO: Consider sub-components in last_use update and here. 00950 // if (last_use[instr.dest.node().object()] > scope.start) { 00951 // mark(allocator, instr.dest.node().object(), scope.start); 00952 // scope.need_mark.insert(instr.dest.node().object()); 00953 // } 00954 // } 00955 // } 00956 00957 // for (int j = 0; j < ArbInst::max_num_sources; j++) { 00958 // if (!markable(instr.src[j].node())) continue; 00959 00960 // if (last_use.find(instr.src[j].node().object()) == last_use.end()) { 00961 // last_use[instr.src[j].node().object()] = i; 00962 // } 00963 // } 00964 // } 00965 // } 00966 00967 { 00968 ScopeStack scopestack; 00969 // First do a backwards traversal to find loop nodes that need to be 00970 // marked due to later uses of assignments 00971 00972 // push root stack 00973 00974 scopestack.push_back(m_instructions.size() - 1); 00975 00976 for (int i = (int)m_instructions.size() - 1; i >= 0; --i) { 00977 ArbInst instr = m_instructions[i]; 00978 if (instr.op == SH_ARB_ENDREP) { 00979 scopestack.push_back((int)i); 00980 } 00981 if (instr.op == SH_ARB_REP) { 00982 const ArbScope& scope = scopestack.back(); 00983 for (ArbScope::MarkList::const_iterator I = scope.need_mark.begin(); 00984 I != scope.need_mark.end(); ++I) { 00985 mark(allocator, *I, (int)i); 00986 } 00987 scopestack.pop_back(); 00988 } 00989 00990 if (markable(instr.dest.node())) { 00991 std::bitset<4> writemask; 00992 for (int k = 0; k < instr.dest.size(); k++) { 00993 writemask[instr.dest.swizzle()[k]] = true; 00994 } 00995 std::bitset<4> used; 00996 for (ScopeStack::iterator S = scopestack.begin(); S != scopestack.end(); ++S) { 00997 ArbScope& scope = *S; 00998 00999 if ((used & writemask).any()) { 01000 mark(allocator, instr.dest.node().object(), scope.start); 01001 scope.need_mark.insert(instr.dest.node().object()); 01002 } 01003 01004 used |= scope.usage_map[instr.dest.node().object()]; 01005 } 01006 01007 ArbScope& scope = scopestack.back(); 01008 scope.usage_map[instr.dest.node().object()] &= ~writemask; 01009 } 01010 01011 for (int j = 0; j < ArbInst::max_num_sources; j++) { 01012 if (!markable(instr.src[j].node())) continue; 01013 std::bitset<4> usemask; 01014 for (int k = 0; k < instr.src[j].size(); k++) { 01015 usemask[instr.src[j].swizzle()[k]] = true; 01016 } 01017 ArbScope& scope = scopestack.back(); 01018 scope.usage_map[instr.src[j].node().object()] |= usemask; 01019 } 01020 } 01021 } 01022 01023 for (std::size_t i = 0; i < m_instructions.size(); i++) { 01024 ArbInst instr = m_instructions[i]; 01025 if (instr.op == SH_ARB_REP) { 01026 scopestack.push_back((int)i); 01027 } 01028 if (instr.op == SH_ARB_ENDREP) { 01029 const ArbScope& scope = scopestack.back(); 01030 for (ArbScope::MarkList::const_iterator I = scope.need_mark.begin(); 01031 I != scope.need_mark.end(); ++I) { 01032 mark(allocator, *I, (int)i); 01033 } 01034 scopestack.pop_back(); 01035 } 01036 01037 if (mark(allocator, instr.dest.node(), (int)i)) { 01038 for (ScopeStack::iterator S = scopestack.begin(); S != scopestack.end(); ++S) { 01039 ArbScope& scope = *S; 01040 std::bitset<4> writemask; 01041 for (int k = 0; k < instr.dest.size(); k++) { 01042 writemask[instr.dest.swizzle()[k]] = true; 01043 } 01044 // TODO: Only change the writemask for scopes that see this 01045 // write unconditionally 01046 // I.e. don't change it if the scope is outside an if 01047 // statement, or a post-BRK REP scope. 01048 scope.write_map[instr.dest.node().object()] |= writemask; 01049 01050 } 01051 } 01052 01053 for (int j = 0; j < ArbInst::max_num_sources; j++) { 01054 if (mark(allocator, instr.src[j].node(), (int)i)) { 01055 for (ScopeStack::iterator S = scopestack.begin(); S != scopestack.end(); ++S) { 01056 ArbScope& scope = *S; 01057 // Mark uses that weren't recently written to. 01058 std::bitset<4> usemask; 01059 for (int k = 0; k < instr.src[j].size(); k++) { 01060 usemask[instr.src[j].swizzle()[k]] = true; 01061 } 01062 if ((usemask & ~scope.write_map[instr.src[j].node().object()]).any()) { 01063 mark(allocator, instr.src[j].node(), scope.start); 01064 scope.need_mark.insert(instr.src[j].node().object()); 01065 } 01066 } 01067 } 01068 } 01069 } 01070 01071 m_tempRegs.clear(); 01072 m_numTemps = 0; 01073 for (int i = 0; i < limits.temps(); i++) { 01074 m_tempRegs.push_back(i); 01075 } 01076 01077 allocator.allocate(); 01078 01079 m_tempRegs.clear(); 01080 } 01081 01082 void ArbCode::allocTextures(const ArbLimits& limits) 01083 { 01084 for (ShProgramNode::TexList::const_iterator I = m_shader->textures.begin(); 01085 I != m_shader->textures.end(); ++I) { 01086 ShTextureNodePtr node = *I; 01087 int index; 01088 index = m_numTextures; 01089 m_registers[node] = new ArbReg(SH_ARB_REG_TEXTURE, index, node->name()); 01090 m_reglist.push_back(m_registers[node]); 01091 m_numTextures++; 01092 } 01093 } 01094 01095 void ArbCode::bindTextures() 01096 { 01097 for (ShProgramNode::TexList::const_iterator I = m_shader->textures.begin(); 01098 I != m_shader->textures.end(); ++I) { 01099 m_textures->bindTexture(*I, GL_TEXTURE0 + m_registers[*I]->index); 01100 } 01101 } 01102 01103 }

Generated on Mon Oct 18 14:17:38 2004 for Sh by doxygen 1.3.7