Main Page | Modules | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

ArbCode.cpp

00001 // Sh: A GPU metaprogramming language.
00002 //
00003 // Copyright (c) 2003 University of Waterloo Computer Graphics Laboratory
00004 // Project administrator: Michael D. McCool
00005 // Authors: Zheng Qin, Stefanus Du Toit, Kevin Moule, Tiberiu S. Popa,
00006 //          Michael D. McCool
00007 // 
00008 // This software is provided 'as-is', without any express or implied
00009 // warranty. In no event will the authors be held liable for any damages
00010 // arising from the use of this software.
00011 // 
00012 // Permission is granted to anyone to use this software for any purpose,
00013 // including commercial applications, and to alter it and redistribute it
00014 // freely, subject to the following restrictions:
00015 // 
00016 // 1. The origin of this software must not be misrepresented; you must
00017 // not claim that you wrote the original software. If you use this
00018 // software in a product, an acknowledgment in the product documentation
00019 // would be appreciated but is not required.
00020 // 
00021 // 2. Altered source versions must be plainly marked as such, and must
00022 // not be misrepresented as being the original software.
00023 // 
00024 // 3. This notice may not be removed or altered from any source
00025 // distribution.
00027 #include "ArbCode.hpp"
00028 #include <iostream>
00029 #include <sstream>
00030 #include <cmath>
00031 #include <bitset>
00032 
00033 #include "ShVariable.hpp"
00034 #include "ShDebug.hpp"
00035 #include "ShLinearAllocator.hpp"
00036 #include "ShInternals.hpp"
00037 #include "ShOptimizations.hpp"
00038 #include "ShEnvironment.hpp"
00039 #include "ShContext.hpp"
00040 #include "ShTypeInfo.hpp"
00041 #include "ShVariant.hpp"
00042 #include "ShTextureNode.hpp"
00043 #include "ShSyntax.hpp"
00044 #include "ArbReg.hpp"
00045 #include "Arb.hpp"
00046 #include "ShAttrib.hpp"
00047 #include "ShCastManager.hpp"
00048 #include "ShError.hpp"
00049 
00050 namespace shgl {
00051 
00052 using namespace SH;
00053 
00054 #define shGlProgramStringARB glProgramStringARB
00055 #define shGlActiveTextureARB glActiveTextureARB
00056 #define shGlProgramLocalParameter4fvARB glProgramLocalParameter4fvARB
00057 #define shGlProgramEnvParameter4fvARB glProgramEnvParameter4fvARB
00058 #define shGlGetProgramivARB glGetProgramivARB
00059 #define shGlGenProgramsARB glGenProgramsARB
00060 #define shGlDeleteProgramsARB glDeleteProgramsARB
00061 #define shGlBindProgramARB glBindProgramARB
00062 
00063 struct ArbBindingSpecs {
00064   ArbRegBinding binding;
00065   int maxBindings;
00066   ShSemanticType semanticType;
00067   bool allowGeneric;
00068 };
00069 
00070 ArbBindingSpecs arbVertexAttribBindingSpecs[] = {
00071   {SH_ARB_REG_VERTEXPOS, 1, SH_POSITION, false},
00072   {SH_ARB_REG_VERTEXNRM, 1, SH_NORMAL, false},
00073   {SH_ARB_REG_VERTEXCOL, 1, SH_COLOR, false},
00074   {SH_ARB_REG_VERTEXTEX, 8, SH_TEXCOORD, true},
00075   {SH_ARB_REG_VERTEXFOG, 1, SH_ATTRIB, true},
00076   {SH_ARB_REG_NONE, 0, SH_ATTRIB, true}
00077 };
00078 
00079 ArbBindingSpecs arbFragmentAttribBindingSpecs[] = {
00080   {SH_ARB_REG_FRAGMENTPOS, 1, SH_POSITION, false},
00081   {SH_ARB_REG_FRAGMENTCOL, 1, SH_COLOR, false},
00082   {SH_ARB_REG_FRAGMENTTEX, 8, SH_TEXCOORD, true},
00083   {SH_ARB_REG_FRAGMENTFOG, 1, SH_ATTRIB, true},
00084   {SH_ARB_REG_NONE, 0, SH_ATTRIB, true}
00085 };
00086 
00087 ArbBindingSpecs arbVertexOutputBindingSpecs[] = {
00088   {SH_ARB_REG_RESULTPOS, 1, SH_POSITION, false},
00089   {SH_ARB_REG_RESULTCOL, 1, SH_COLOR, false},
00090   {SH_ARB_REG_RESULTTEX, 8, SH_TEXCOORD, true},
00091   {SH_ARB_REG_RESULTFOG, 1, SH_ATTRIB, true},
00092   {SH_ARB_REG_RESULTPTS, 1, SH_ATTRIB, true},
00093   {SH_ARB_REG_NONE, 0, SH_ATTRIB}
00094 };
00095 
00096 ArbBindingSpecs arbFragmentOutputBindingSpecs[] = {
00097   {SH_ARB_REG_RESULTCOL, 1, SH_COLOR, true},
00098   {SH_ARB_REG_RESULTDPT, 1, SH_ATTRIB, false},
00099   {SH_ARB_REG_NONE, 0, SH_ATTRIB}
00100 };
00101 
00102 ArbBindingSpecs* arbBindingSpecs(bool output, const std::string& unit)
00103 {
00104   if (unit == "vertex")
00105     return output ? arbVertexOutputBindingSpecs : arbVertexAttribBindingSpecs;
00106   if (unit == "fragment")
00107     return output ? arbFragmentOutputBindingSpecs : arbFragmentAttribBindingSpecs;
00108   return 0;
00109 }
00110 
00111 using namespace SH;
00112 
00113 ArbCode::ArbCode(const ShProgramNodeCPtr& shader, const std::string& unit,
00114                  TextureStrategy* texture)
00115   : m_texture(texture), m_shader(0), m_originalShader(0), m_unit(unit),
00116     m_numTemps(0), m_numHalfTemps(0), m_numInputs(0), m_numOutputs(0), m_numParams(0), m_numParamBindings(0),
00117     m_numConsts(0),
00118     m_numTextures(0), m_programId(0), m_environment(0), m_max_label(0),
00119     m_address_register(new ShVariableNode(SH_TEMP, 1, SH_FLOAT))
00120 {
00121   m_originalShader =  const_cast<ShProgramNode*>(shader.object());
00122 
00123   if (unit == "fragment") m_environment |= SH_ARB_FP;
00124   if (unit == "vertex") m_environment |= SH_ARB_VP;
00125 
00126   const GLubyte* extensions = glGetString(GL_EXTENSIONS);
00127   if(extensions) { // DEBUGGING
00128     std::string extstr(reinterpret_cast<const char*>(extensions));
00129 
00130     if (unit == "fragment") {
00131       if (extstr.find("NV_fragment_program_option") != std::string::npos) {
00132         m_environment |= SH_ARB_NVFP;
00133       }
00134       if (extstr.find("NV_fragment_program2") != std::string::npos) {
00135         m_environment |= SH_ARB_NVFP2;
00136       }
00137       if (extstr.find("ATI_draw_buffers") != std::string::npos) {
00138         m_environment |= SH_ARB_ATIDB;
00139       }
00140     }
00141     if (unit == "vertex") {
00142       if (extstr.find("NV_vertex_program2_option") != std::string::npos) {
00143         m_environment |= SH_ARB_NVVP2;
00144       }
00145       if (extstr.find("NV_vertex_program3") != std::string::npos) {
00146         m_environment |= SH_ARB_NVVP3;
00147       }
00148     }
00149   }
00150 
00151   // initialize m_convertMap
00152   m_convertMap[SH_DOUBLE] = SH_FLOAT; 
00153 
00154   bool halfSupport = m_environment & (SH_ARB_NVFP | SH_ARB_NVFP2);
00155   if(!halfSupport) m_convertMap[SH_HALF] = SH_FLOAT;
00156 
00157   m_convertMap[SH_INT] = SH_FLOAT;
00158   m_convertMap[SH_SHORT] = halfSupport ? SH_HALF: SH_FLOAT;
00159   m_convertMap[SH_BYTE] = halfSupport ? SH_HALF: SH_FLOAT;
00160   m_convertMap[SH_UINT] = SH_FLOAT;
00161   m_convertMap[SH_USHORT] = halfSupport ? SH_HALF: SH_FLOAT;
00162   m_convertMap[SH_UBYTE] = halfSupport ? SH_HALF: SH_FLOAT;
00163 
00164   m_convertMap[SH_FINT] = SH_FLOAT;
00165   m_convertMap[SH_FSHORT] = SH_FLOAT;
00166   m_convertMap[SH_FBYTE] = halfSupport ? SH_HALF : SH_FLOAT;
00167   m_convertMap[SH_FUINT] = SH_FLOAT;
00168   m_convertMap[SH_FUSHORT] = SH_FLOAT;
00169   m_convertMap[SH_FUBYTE] = halfSupport ? SH_HALF : SH_FLOAT;
00170 }
00171 
00172 ArbCode::~ArbCode()
00173 {
00174  if (m_shader != m_originalShader)
00175    {
00176    delete m_shader;
00177    }
00178 }
00179 
00180 void ArbCode::generate()
00181 {
00182   // Transform code to be ARB_fragment_program compatible
00183   ShProgramNodePtr temp_shader = m_originalShader->clone();
00184   m_shader = temp_shader.object();
00185   m_shader->acquireRef();
00186   temp_shader = NULL;
00187 
00188   ShContext::current()->enter(m_shader);
00189   ShTransformer transform(m_shader);
00190 
00191 
00192   transform.convertInputOutput(); 
00193   transform.splitTuples(4, m_splits);
00194   transform.convertTextureLookups();
00195   transform.convertToFloat(m_convertMap);
00196   
00197   if(transform.changed()) {
00198     optimize(m_shader);
00199     m_shader->collectVariables();
00200   } else {
00201     m_shader->releaseRef();
00202     m_shader = m_originalShader;
00203     ShContext::current()->exit();
00204     ShContext::current()->enter(m_shader);
00205   }
00206 
00207   try {
00208     if (m_environment & SH_ARB_NVFP2) {
00209       // In NV_fragment_program2, we actually generate structured code.
00210       ShStructural str(m_shader->ctrlGraph);
00211 
00212       genStructNode(str.head());
00213     
00214     } else {
00215       m_shader->ctrlGraph->entry()->clearMarked();
00216       genNode(m_shader->ctrlGraph->entry());
00217     
00218       if (m_environment & SH_ARB_NVVP2) {
00219         m_instructions.push_back(ArbInst(SH_ARB_LABEL, getLabel(m_shader->ctrlGraph->exit())));
00220       }
00221     }
00222     m_shader->ctrlGraph->entry()->clearMarked();
00223     allocRegs();
00224   } catch (...) {
00225     m_shader->ctrlGraph->entry()->clearMarked();
00226     ShContext::current()->exit();
00227     throw;
00228   }
00229   ShContext::current()->exit();
00230 }
00231 
00232 bool ArbCode::allocateRegister(const ShVariableNodePtr& var)
00233 {
00234   if (!var) return true;
00235   if (var->kind() != SH_TEMP) return true;
00236   if (var->uniform()) return true;
00237 
00238   if (m_tempRegs.empty()) {
00239     // This gets caught around allocTemps.
00240     throw 1; // yes, it's hacky. Instead we should throw a different
00241              // type, or store the limit information.
00242   }
00243 
00244   int idx = m_tempRegs.front();
00245   m_tempRegs.pop_front();
00246   if(var->valueType() == SH_HALF) {
00247     if (idx + 1 > m_numHalfTemps) m_numHalfTemps = idx + 1;
00248     m_registers[var] = new ArbReg(SH_ARB_REG_HALF_TEMP, idx);
00249   } else {
00250     if (idx + 1 > m_numTemps) m_numTemps = idx + 1;
00251     m_registers[var] = new ArbReg(SH_ARB_REG_TEMP, idx);
00252   }
00253   m_reglist.push_back(m_registers[var]);
00254   
00255   return true;
00256 }
00257 
00258 void ArbCode::freeRegister(const ShVariableNodePtr& var)
00259 {
00260   if (!var) return;
00261   if (var->kind() != SH_TEMP) return;
00262   if (var->uniform()) return;
00263 
00264   SH_DEBUG_ASSERT(m_registers.find(var) != m_registers.end());
00265   m_tempRegs.push_front(m_registers[var]->index);
00266 }
00267 
00268 void ArbCode::upload()
00269 {
00270   if (!m_programId) {
00271     SH_GL_CHECK_ERROR(shGlGenProgramsARB(1, &m_programId));
00272   }
00273 
00274   SH_GL_CHECK_ERROR(shGlBindProgramARB(arbTarget(m_unit), m_programId));
00275   
00276   std::ostringstream out;
00277   print(out);
00278   std::string text = out.str();
00279   shGlProgramStringARB(arbTarget(m_unit), GL_PROGRAM_FORMAT_ASCII_ARB,
00280                        (GLsizei)text.size(), text.c_str());
00281   int error = glGetError();
00282   std::ostringstream error_os;
00283   if (error == GL_NO_ERROR) return;
00284   
00285   error_os << "Failed to upload ARB program." << std::endl;
00286   if (error == GL_INVALID_OPERATION) {
00287     error_os << "Program error:" << std::endl;
00288     int pos = -1;
00289     SH_GL_CHECK_ERROR(glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos));
00290     if (pos >= 0){
00291       const unsigned char* message = glGetString(GL_PROGRAM_ERROR_STRING_ARB);
00292       error_os << "Error at character " << pos << std::endl;
00293       error_os << "Driver Message: " << message << std::endl;
00294       while (pos >= 0 && text[pos] != '\n') pos--;
00295       if (pos > 0) pos++;
00296       error_os << "Code: " << text.substr(pos, text.find('\n', pos)) << std::endl;
00297     }
00298   } else {
00299     error_os << "Unknown error." << std::endl;
00300   }
00301   shError(ArbException(error_os.str()));
00302 }
00303 
00304 void ArbCode::bind()
00305 {
00306   if (!m_programId) {
00307     upload();
00308   }
00309   
00310   SH_GL_CHECK_ERROR(shGlBindProgramARB(arbTarget(m_unit), m_programId));
00311   
00312   ShContext::current()->set_binding(std::string("arb:") + m_unit, ShProgram(m_originalShader));
00313 
00314   // Initialize constants
00315   for (RegMap::const_iterator I = m_registers.begin(); I != m_registers.end(); ++I) {
00316     ShVariableNodePtr node = I->first;
00317     ArbReg reg = *I->second;
00318     if (node->hasValues() && reg.type == SH_ARB_REG_PARAM) {
00319       updateUniform(node);
00320     }
00321   }
00322   // Make sure all textures are loaded.
00323 
00324   bindTextures();
00325 }
00326 
00327 void ArbCode::update()
00328 {
00329   bindTextures();
00330 }
00331 
00332 void ArbCode::updateUniform(const ShVariableNodePtr& uniform)
00333 {
00334   int i;
00335 
00336   if (!uniform) return;
00337 
00338   if (!uniform->meta("opengl:readonly").empty())
00339     {
00340     return;
00341     }
00342 
00343   ShVariantCPtr uniformVariant = uniform->getVariant();
00344   RegMap::const_iterator I = m_registers.find(uniform);
00345   if (I == m_registers.end()) { // perhaps uniform was split
00346     if( m_splits.count(uniform) > 0 ) {
00347       ShTransformer::VarNodeVec &splitVec = m_splits[uniform];
00348 
00349       int offset = 0;
00350       int copySwiz[4];
00351       for(ShTransformer::VarNodeVec::iterator it = splitVec.begin();
00352           it != splitVec.end(); offset += (*it)->size(), ++it) {
00353         // TODO switch to properly swizzled version
00354         for(i = 0; i < (*it)->size(); ++i) copySwiz[i] = i + offset;
00355         (*it)->setVariant(uniformVariant->get(false,
00356             ShSwizzle(uniform->size(), (*it)->size(), copySwiz))); 
00357         updateUniform(*it);
00358       }
00359     } 
00360     return;
00361   }
00362 
00363   ShTextureNodePtr tex = shref_dynamic_cast<ShTextureNode>(uniform);
00364   if (tex) {
00365     return;
00366   }
00367     
00368   const ArbReg& reg = *I->second;
00369   
00370   // @todo type remove the two copies done below
00371   // (although it probably won't matter with all the other work we're doing...
00372   // cast to float 
00373   float values[4];
00374   ShPointer<ShDataVariant<float, SH_HOST> > floatVariant = 
00375     new ShDataVariant<float, SH_HOST>(uniform->size()); 
00376   floatVariant->set(uniformVariant);
00377 
00378   for (i = 0; i < uniform->size(); i++) {
00379     // TODO clean this up and handle different types
00380     values[i] = (*floatVariant)[i]; 
00381   }
00382   for (; i < 4; i++) {
00383     values[i] = 0.0;
00384   }
00385   
00386   if (reg.type != SH_ARB_REG_PARAM) return;
00387   switch(reg.binding.type) {
00388   case SH_ARB_REG_PROGRAMLOC:
00389     SH_GL_CHECK_ERROR(shGlProgramLocalParameter4fvARB(arbTarget(m_unit), reg.binding.index, values));
00390     break;
00391   case SH_ARB_REG_PROGRAMENV:
00392     SH_GL_CHECK_ERROR(shGlProgramEnvParameter4fvARB(arbTarget(m_unit), reg.binding.index, values));
00393     break;
00394   case SH_ARB_REG_STATE:
00395     SH_DEBUG_WARN("Updating uniforms bound to OpenGL state is not currently supported.");
00396   default:
00397     return;
00398   }
00399 }
00400 
00401 std::ostream& ArbCode::printVar(std::ostream& out, bool dest, const ShVariable& var,
00402                                 bool collectingOp, const ShSwizzle& destSwiz = ShSwizzle(4),
00403                                 bool do_swiz = true) const
00404 {
00405   RegMap::const_iterator I = m_registers.find(var.node());
00406   if (I == m_registers.end()) {
00407     out << "<no reg for " << var.name() << ">";
00408     return out;
00409   }
00410   const ArbReg& reg = *I->second;
00411 
00412   // Negation
00413   if (var.neg()) out << '-';
00414 
00415   // Register name
00416   out << reg;
00417 
00418   if (do_swiz) {
00419     // Swizzling
00420     const char* swizChars = "xyzw";
00421     out << ".";
00422     if (dest) {
00423       bool masked[4] = {false, false, false, false};
00424       for (int i = 0; i < var.swizzle().size(); i++) {
00425         masked[var.swizzle()[i]] = true;
00426       }
00427       for (int i = 0; i < 4; i++) {
00428         if (masked[i]) out << swizChars[i];
00429       }
00430     } else if (var.swizzle().size() == 1) {
00431       out << swizChars[var.swizzle()[0]];
00432     } else if (collectingOp) {
00433       for (int i = 0; i < 4; i++) {
00434         out << swizChars[i < var.swizzle().size() ? var.swizzle()[i] : i];
00435       }
00436     } else {
00437       for (int i = 0; i < 4; i++) {
00438         int j;
00439         for (j = 0; j < destSwiz.size(); j++) {
00440           if (destSwiz[j] == i) break;
00441         }
00442         if (j == destSwiz.size()) j = i;
00443         out << swizChars[j < var.size() ? var.swizzle()[j] : j];
00444       }
00445     }
00446   }
00447   
00448   return out;
00449 }
00450 
00451 struct LineNumberer {
00452   LineNumberer() { line = 0; }
00453   int line;
00454 };
00455 
00456 std::ostream& operator<<(std::ostream& out, LineNumberer& l)
00457 {
00458   out << " # " << ++l.line << std::endl;
00459   return out;
00460 }
00461 
00462 bool ArbCode::printSamplingInstruction(std::ostream& out, const ArbInst& instr) const
00463 {
00464   if (instr.op != SH_ARB_TEX && instr.op != SH_ARB_TXP && instr.op != SH_ARB_TXB
00465       && instr.op != SH_ARB_TXD)
00466     return false;
00467 
00468   ShTextureNodePtr texture = shref_dynamic_cast<ShTextureNode>(instr.src[1].node());
00469   RegMap::const_iterator texRegIt = m_registers.find(instr.src[1].node());
00470   if (texRegIt == m_registers.end()) {
00471     SH_DEBUG_PRINT("Unallocated texture found.");
00472     SH_DEBUG_PRINT("Operation = " << arbOpInfo[instr.op].name);
00473     SH_DEBUG_PRINT("Destination* = " << instr.dest.node().object());
00474     if (instr.dest.node()) {
00475       SH_DEBUG_PRINT("Destination = " << instr.dest.name());
00476     }
00477     SH_DEBUG_PRINT("Texture pointer = " << texture.object());
00478     if (texture) {
00479       SH_DEBUG_PRINT("Texture = " << texture->name());
00480     }
00481     out << "  INVALID TEX INSTRUCTION;";
00482     return true;
00483   }
00484   //SH_DEBUG_ASSERT(texRegIt != m_registers.end());
00485 
00486   const ArbReg& texReg = *texRegIt->second;
00487   
00488   out << "  ";
00489   out << arbOpInfo[instr.op].name << " ";
00490   printVar(out, true, instr.dest, false) << ", ";
00491   printVar(out, false, instr.src[0], true, instr.dest.swizzle()) << ", ";
00492   if (instr.op == SH_ARB_TXD) {
00493     printVar(out, false, instr.src[2], true, instr.dest.swizzle()) << ", ";
00494     printVar(out, false, instr.src[3], true, instr.dest.swizzle()) << ", ";
00495   }
00496   out << "texture[" << texReg.index << "], ";
00497   switch (texture->dims()) {
00498   case SH_TEXTURE_1D:
00499     out << "1D";
00500     break;
00501   case SH_TEXTURE_2D:
00502     out << "2D";
00503     break;
00504   case SH_TEXTURE_3D:
00505     out << "3D";
00506     break;
00507   case SH_TEXTURE_CUBE:
00508     out << "CUBE";
00509     break;
00510   case SH_TEXTURE_RECT:
00511     out << "RECT";
00512     break;
00513   }
00514   out << ";";
00515   return true;
00516 }
00517 
00518 std::ostream& ArbCode::print(std::ostream& out)
00519 {
00520   LineNumberer endl;
00521   const char* swizChars = "xyzw";
00522 
00523   // Print version header
00524   if (m_unit == "vertex") {
00525     out << "!!ARBvp1.0" << endl;
00526     if (m_environment & SH_ARB_NVVP3) out << "OPTION NV_vertex_program3;" << endl;
00527     else if (m_environment & SH_ARB_NVVP2) out << "OPTION NV_vertex_program2;" << endl;
00528   }
00529   if (m_unit == "fragment") {
00530     out << "!!ARBfp1.0" << endl;
00531 
00532     if (m_environment & SH_ARB_NVFP2) out << "OPTION NV_fragment_program2;" << endl;
00533     else if (m_environment & SH_ARB_NVFP) out << "OPTION NV_fragment_program;" << endl;
00534 
00535     if (m_environment & SH_ARB_ATIDB) out << "OPTION ATI_draw_buffers;" << endl;
00536   }
00537   
00538   // Print register declarations
00539   
00540   for (RegList::const_iterator I = m_reglist.begin();
00541        I != m_reglist.end(); ++I) {
00542     if ((*I)->type == SH_ARB_REG_TEMP) continue;
00543     if ((*I)->type == SH_ARB_REG_HALF_TEMP) continue;
00544     if ((*I)->type == SH_ARB_REG_TEXTURE) continue;
00545     out << "  ";
00546     (*I)->printDecl(out);
00547     out << endl;
00548   }
00549   if (m_numTemps +  m_numHalfTemps > 0) {
00550     out << "  TEMP ";
00551     for (int i = 0; i < m_numTemps; i++) {
00552       if (i > 0) out << ", ";
00553       out << ArbReg(SH_ARB_REG_TEMP, i);
00554     }
00555     if(m_numTemps > 0 && m_numHalfTemps > 0) out << ", ";
00556     for (int i = 0; i < m_numHalfTemps; i++) {
00557       if (i > 0) out << ", ";
00558       out << ArbReg(SH_ARB_REG_HALF_TEMP, i);
00559     }
00560     out << ";" << endl;
00561   }
00562 
00563   out << endl;
00564   
00565   // Print instructions
00566   for (ArbInstList::const_iterator I = m_instructions.begin();
00567        I != m_instructions.end(); ++I) {
00568     if (I->op == SH_ARB_LABEL) {
00569       out << "label" << I->label << ": ";
00570     } else if (I->op == SH_ARB_ELSE) {
00571       out << "  ELSE;";
00572     } else if (I->op == SH_ARB_ENDIF) {
00573       out << "  ENDIF;";
00574     } else if (I->op == SH_ARB_BRA) {
00575       if (I->src[0].node()) {
00576         out << "  MOVC ";
00577         printVar(out, true, I->src[0], false);
00578         out << ", ";
00579         printVar(out, false, I->src[0], false, I->src[0].swizzle());
00580         out << ";" << endl;
00581       }
00582       out << "  BRA label" << I->label;
00583       if (I->src[0].node()) {
00584         out << "  (GT";
00585         out << ".";
00586         for (int i = 0; i < I->src[0].swizzle().size(); i++) {
00587           out << swizChars[I->src[0].swizzle()[i]];
00588         }
00589         out << ")";
00590       }
00591       out << ";";
00592     } else if (I->op == SH_ARB_REP) {
00593       out << "  REP ";
00594       printVar(out, false, I->src[0], false, I->src[0].swizzle());
00595       out << ";";
00596     } else if (I->op == SH_ARB_BRK) {
00597       if (I->src[0].node()) {
00598         out << "  MOVC ";
00599         printVar(out, true, I->src[0], false);
00600         out << ", ";
00601         printVar(out, false, I->src[0], false, I->src[0].swizzle());
00602         out << ";" << endl;
00603       }
00604       out << "  BRK ";
00605       if (I->src[0].node()) {
00606         out << " (";
00607         if (I->invert) {
00608           out << "LE";
00609         } else {
00610           out << "GT";
00611         }
00612         out << ".";
00613         for (int i = 0; i < I->src[0].swizzle().size(); i++) {
00614           out << swizChars[I->src[0].swizzle()[i]];
00615         }
00616         out << ")";
00617       }
00618       out << ";";
00619     } else if (I->op == SH_ARB_ENDREP) {
00620       out << "  ENDREP;";
00621     } else if (I->op == SH_ARB_IF) {
00622       if (I->src[0].node()) {
00623         out << "  MOVC ";
00624         printVar(out, true, I->src[0], false);
00625         out << ", ";
00626         printVar(out, false, I->src[0], false, I->src[0].swizzle());
00627         out << ";" << endl;
00628       }
00629       out << "  IF ";
00630       if (I->src[0].node()) {
00631         out << "GT";
00632         out << ".";
00633         for (int i = 0; i < I->src[0].swizzle().size(); i++) {
00634           out << swizChars[I->src[0].swizzle()[i]];
00635         }
00636       } else {
00637         out << "TR";
00638       }
00639       out << ";";
00640     } else if (I->op == SH_ARB_ARRAYMOV) {
00641       out << "  MOV ";
00642       printVar(out, true, I->dest, false);
00643       out << ", ";
00644       printVar(out, false, I->src[0], false, ShSwizzle(4), false);
00645       out << "[";
00646       printVar(out, false, I->src[1], false);
00647       out << "]";
00648       out << ";";
00649     } else if (!printSamplingInstruction(out, *I)) {
00650       out << "  ";
00651       out << arbOpInfo[I->op].name;
00652       if (I->update_cc) out << "C";
00653       out << " ";
00654       printVar(out, true, I->dest, arbOpInfo[I->op].collectingOp);
00655       if (I->ccode != ArbInst::NOCC) {
00656         out << " (";
00657         out << arbCCnames[I->ccode];
00658         out << ".";
00659         for (int i = 0; i < 4; i++) {
00660           out << swizChars[(i < I->ccswiz.size() ? I->ccswiz[i]
00661                             : (I->ccswiz.size() == 1 ? I->ccswiz[0] : i))];
00662         }
00663         out << ") ";
00664       }
00665       for (int i = 0; i < arbOpInfo[I->op].arity; i++) {
00666         out << ", ";
00667         printVar(out, false, I->src[i], arbOpInfo[I->op].collectingOp, I->dest.swizzle());
00668       }
00669       out << ';';
00670     }
00671     out << " # ";
00672     if (I->dest.node() && I->dest.has_name()) {
00673       out << "d=" << I->dest.name() << " ";
00674     }
00675     for (int i = 0; i < ArbInst::max_num_sources; i++) {
00676       if (I->src[i].node()  && I->src[i].has_name()) {
00677         out << "s[" << i << "]=" << I->src[i].name() << " ";
00678       }
00679     }
00680     out << endl;
00681   }
00682 
00683   out << "END" << endl;
00684   return out;
00685 }
00686 
00687 std::ostream& ArbCode::describe_interface(std::ostream& out) {
00688   ShProgramNode::VarList::const_iterator I;
00689   out << "Inputs:" << std::endl;
00690   for (I = m_shader->inputs.begin(); I != m_shader->inputs.end(); ++I) {
00691     out << "  ";
00692     m_registers[*I]->printDecl(out);
00693     out << std::endl;
00694   }
00695 
00696   out << "Outputs:" << std::endl;
00697   for (I = m_shader->outputs.begin(); I != m_shader->outputs.end(); ++I) {
00698     out << "  ";
00699     m_registers[*I]->printDecl(out);
00700     out << std::endl;
00701   }
00702   return out;
00703 }
00704 
00705 int ArbCode::getLabel(ShCtrlGraphNodePtr node)
00706 {
00707   if (m_label_map.find(node) == m_label_map.end()) {
00708     m_label_map[node] = m_max_label++;
00709   }
00710   return m_label_map[node];
00711 }
00712 
00713 void ArbCode::genNode(ShCtrlGraphNodePtr node)
00714 {
00715   if (!node || node->marked()) return;
00716   node->mark();
00717 
00718   if (node == m_shader->ctrlGraph->exit()) return;
00719   
00720   if (m_environment & SH_ARB_NVVP2) {
00721     m_instructions.push_back(ArbInst(SH_ARB_LABEL, getLabel(node)));
00722   }
00723   
00724   if (node->block) for (ShBasicBlock::ShStmtList::const_iterator I = node->block->begin();
00725        I != node->block->end(); ++I) {
00726     const ShStatement& stmt = *I;
00727     emit(stmt);
00728   }
00729 
00730   if (m_environment & SH_ARB_NVVP2) {
00731     for(std::vector<SH::ShCtrlGraphBranch>::iterator I = node->successors.begin();
00732         I != node->successors.end(); I++) {
00733       m_instructions.push_back(ArbInst(SH_ARB_BRA, getLabel(I->node), I->cond));
00734     }
00735     if(!node->successors.empty() || node->follower->marked()) { // else it's next anyway, no need for bra
00736       m_instructions.push_back(ArbInst(SH_ARB_BRA, getLabel(node->follower)));
00737     }
00738     for(std::vector<SH::ShCtrlGraphBranch>::iterator I = node->successors.begin();
00739         I != node->successors.end(); I++) {
00740       genNode(I->node);
00741     }
00742   }
00743 
00744   genNode(node->follower);
00745 }
00746 
00747 void ArbCode::genStructNode(const ShStructuralNodePtr& node)
00748 {
00749   if (!node) return;
00750 
00751   if (node->type == ShStructuralNode::UNREDUCED) {
00752     ShBasicBlockPtr block = node->cfg_node->block;
00753     if (block) for (ShBasicBlock::ShStmtList::const_iterator I = block->begin();
00754                     I != block->end(); ++I) {
00755       const ShStatement& stmt = *I;
00756       emit(stmt);
00757     }
00758   } else if (node->type == ShStructuralNode::BLOCK) {
00759     for (ShStructuralNode::StructNodeList::const_iterator I = node->structnodes.begin();
00760          I != node->structnodes.end(); ++I) {
00761       genStructNode(*I);
00762     }
00763   } else if (node->type == ShStructuralNode::IFELSE) {
00764     ShStructuralNodePtr header = node->structnodes.front();
00765     // TODO Check that header->successors is only two.
00766     ShVariable cond;
00767     ShStructuralNodePtr ifnode, elsenode;
00768     for (ShStructuralNode::SuccessorList::iterator I = header->succs.begin();
00769          I != header->succs.end(); ++I) {
00770       if (I->first.node()) {
00771         ifnode = I->second;
00772         cond = I->first;
00773       } else {
00774         elsenode = I->second;
00775       }
00776     }
00777     genStructNode(header);
00778     m_instructions.push_back(ArbInst(SH_ARB_IF, ShVariable(), cond)); {
00779       genStructNode(ifnode);
00780     } m_instructions.push_back(ArbInst(SH_ARB_ELSE, ShVariable())); {
00781       genStructNode(elsenode);
00782     } m_instructions.push_back(ArbInst(SH_ARB_ENDIF, ShVariable()));
00783   } else if (node->type == ShStructuralNode::WHILELOOP) {
00784     ShStructuralNodePtr header = node->structnodes.front();
00785 
00786     ShVariable cond = header->succs.front().first;
00787     
00788     ShStructuralNodePtr body = node->structnodes.back();
00789 
00790     float maxloopval = 255.0f;
00791     ShConstAttrib1f maxloop(maxloopval);
00792 
00793     m_shader->constants.push_back(maxloop.node());
00794     m_instructions.push_back(ArbInst(SH_ARB_REP, ShVariable(), maxloop));
00795     genStructNode(header);
00796     ArbInst brk(SH_ARB_BRK, ShVariable(), cond);
00797     brk.invert = true;
00798     m_instructions.push_back(brk);
00799     genStructNode(body);
00800     
00801     m_instructions.push_back(ArbInst(SH_ARB_ENDREP, ShVariable()));
00802   } else if (node->type == ShStructuralNode::SELFLOOP) {
00803     ShStructuralNodePtr loopnode = node->structnodes.front();
00804 
00805     bool condexit = true; // true if the condition causes us to exit the
00806                           // loop, rather than continue it
00807     ShVariable cond;
00808     for (ShStructuralNode::SuccessorList::iterator I = loopnode->succs.begin();
00809          I != loopnode->succs.end(); ++I) {
00810       if (I->first.node()) {
00811         if (I->second == loopnode) condexit = false; else condexit = true;
00812         cond = I->first;
00813       }
00814     }
00815     
00816     float maxloopval = 255.0f;
00817     ShConstAttrib1f maxloop(maxloopval);
00818 
00819     m_shader->constants.push_back(maxloop.node());
00820     m_instructions.push_back(ArbInst(SH_ARB_REP, ShVariable(), maxloop));
00821     genStructNode(loopnode);
00822     ArbInst brk(SH_ARB_BRK, ShVariable(), cond);
00823     if (!condexit) {
00824       brk.invert = true;
00825     } 
00826     m_instructions.push_back(brk);
00827     m_instructions.push_back(ArbInst(SH_ARB_ENDREP, ShVariable()));
00828   }
00829 }
00830 
00831 void ArbCode::allocRegs()
00832 {
00833   ArbLimits limits(m_unit);
00834   
00835   allocInputs(limits);
00836   
00837   allocOutputs(limits);
00838 
00839   for (ShProgramNode::PaletteList::const_iterator I = m_shader->palettes_begin();
00840        I != m_shader->palettes_end(); ++I) {
00841     allocPalette(limits, *I);
00842   }
00843   
00844   for (ShProgramNode::VarList::const_iterator I = m_shader->uniforms_begin();
00845        I != m_shader->uniforms_end(); ++I) {
00846     allocParam(limits, *I);
00847   }
00848 
00849   allocConsts(limits);
00850 
00851   try {
00852     allocTemps(limits, false);
00853     bool halfSupport = m_environment & (SH_ARB_NVFP | SH_ARB_NVFP2);
00854     if(halfSupport) {
00855       allocTemps(limits, true);
00856     }
00857   } catch (int) {
00858     std::ostringstream os;
00859     os << "Out of temporary registers (" << limits.temps()
00860        << " were available)";
00861     throw ArbException(os.str());
00862   } catch (...) {
00863     throw;
00864   }
00865 
00866   // Allocate array register
00867   if (m_shader->palettes_begin() != m_shader->palettes.end()) {
00868     m_registers[m_address_register.node()] = new ArbReg(SH_ARB_REG_ADDRESS, 0);
00869     m_reglist.push_back(m_registers[m_address_register.node()]);
00870   }
00871 
00872   allocTextures(limits);
00873 }
00874 
00875 void ArbCode::bindSpecial(const ShProgramNode::VarList::const_iterator& begin,
00876                           const ShProgramNode::VarList::const_iterator& end,
00877                           const ArbBindingSpecs& specs, 
00878                           std::vector<int>& bindings,
00879                           ArbRegType type, int& num)
00880 {
00881   bindings.push_back(0);
00882   
00883   if (specs.semanticType == SH_ATTRIB) return;
00884   
00885   for (ShProgramNode::VarList::const_iterator I = begin; I != end; ++I) {
00886     ShVariableNodePtr node = *I;
00887     
00888     if (m_registers.find(node) != m_registers.end()) continue;
00889     if (node->specialType() != specs.semanticType) continue;
00890     
00891     m_registers[node] = new ArbReg(type, num++, node->name());
00892     m_registers[node]->binding.type = specs.binding;
00893     m_registers[node]->binding.index = bindings.back();
00894     m_reglist.push_back(m_registers[node]);
00895     
00896     bindings.back()++;
00897     if (bindings.back() == specs.maxBindings) break;
00898   }    
00899 }
00900 
00901 void ArbCode::allocInputs(const ArbLimits& limits)
00902 {
00903   // First, try to assign some "special" output register bindings
00904   for (int i = 0; arbBindingSpecs(false, m_unit)[i].binding != SH_ARB_REG_NONE; i++) {
00905     bindSpecial(m_shader->inputs.begin(), m_shader->inputs.end(),
00906                 arbBindingSpecs(false, m_unit)[i], m_inputBindings,
00907                 SH_ARB_REG_ATTRIB, m_numInputs);
00908   }
00909   
00910   for (ShProgramNode::VarList::const_iterator I = m_shader->inputs.begin();
00911        I != m_shader->inputs.end(); ++I) {
00912     ShVariableNodePtr node = *I;
00913     if (m_registers.find(node) != m_registers.end()) continue;
00914     m_registers[node] = new ArbReg(SH_ARB_REG_ATTRIB, m_numInputs++, node->name());
00915     m_reglist.push_back(m_registers[node]);
00916 
00917     // Binding
00918     for (int i = 0; arbBindingSpecs(false, m_unit)[i].binding != SH_ARB_REG_NONE; i++) {
00919       const ArbBindingSpecs& specs = arbBindingSpecs(false, m_unit)[i];
00920 
00921       if (specs.allowGeneric && m_inputBindings[i] < specs.maxBindings) {
00922         m_registers[node]->binding.type = specs.binding;
00923         m_registers[node]->binding.index = m_inputBindings[i];
00924         m_inputBindings[i]++;
00925         break;
00926       }
00927     }
00928   }
00929 }
00930 
00931 void ArbCode::allocOutputs(const ArbLimits& limits)
00932 {
00933   // First, try to assign some "special" output register bindings
00934   for (int i = 0; arbBindingSpecs(true, m_unit)[i].binding != SH_ARB_REG_NONE; i++) {
00935     bindSpecial(m_shader->outputs.begin(), m_shader->outputs.end(),
00936                 arbBindingSpecs(true, m_unit)[i], m_outputBindings,
00937                 SH_ARB_REG_OUTPUT, m_numOutputs);
00938   }
00939   
00940   for (ShProgramNode::VarList::const_iterator I = m_shader->outputs.begin();
00941        I != m_shader->outputs.end(); ++I) {
00942     ShVariableNodePtr node = *I;
00943     if (m_registers.find(node) != m_registers.end()) continue;
00944     m_registers[node] = new ArbReg(SH_ARB_REG_OUTPUT, m_numOutputs++, node->name());
00945     m_reglist.push_back(m_registers[node]);
00946     
00947     // Binding
00948     for (int i = 0; arbBindingSpecs(true, m_unit)[i].binding != SH_ARB_REG_NONE; i++) {
00949       const ArbBindingSpecs& specs = arbBindingSpecs(true, m_unit)[i];
00950 
00951       if (specs.allowGeneric && m_outputBindings[i] < specs.maxBindings) {
00952         m_registers[node]->binding.type = specs.binding;
00953         m_registers[node]->binding.index = m_outputBindings[i];
00954         m_outputBindings[i]++;
00955         break;
00956       }
00957     }
00958   }
00959 }
00960 
00961 void ArbCode::allocParam(const ArbLimits& limits, const ShVariableNodePtr& node)
00962 {
00963   // TODO: Check if we reached maximum
00964   if (m_registers.find(node) != m_registers.end()) return;
00965 
00966   m_registers[node] = new ArbReg(SH_ARB_REG_PARAM, m_numParams, node->name());
00967 
00968   if (!node->meta("opengl:state").empty())
00969     {
00970     m_registers[node]->binding.type = SH_ARB_REG_STATE;
00971     m_registers[node]->binding.name = node->meta("opengl:state");
00972     } 
00973   else
00974     {
00975     m_registers[node]->binding.type = SH_ARB_REG_PROGRAMLOC;
00976     m_registers[node]->binding.index = m_numParamBindings++;
00977     }
00978 
00979   m_reglist.push_back(m_registers[node]);
00980   m_numParams++;
00981 }
00982 
00983 void ArbCode::allocPalette(const ArbLimits& limits, const ShPaletteNodePtr& palette)
00984 {
00985   if (m_registers.find(palette) != m_registers.end()) return;
00986 
00987   m_registers[palette] = new ArbReg(SH_ARB_REG_PARAM, m_numParams, palette->name());
00988   m_registers[palette]->binding.type = SH_ARB_REG_PROGRAMLOC;
00989   m_registers[palette]->binding.index = m_numParamBindings;
00990   m_registers[palette]->binding.count = palette->palette_length();
00991   m_reglist.push_back(m_registers[palette]);
00992   m_numParams++;
00993   
00994   for (std::size_t i = 0; i < palette->palette_length(); i++) {
00995     ShVariableNodePtr node = palette->get_node(i);
00996     SH_DEBUG_ASSERT(m_registers.find(node) == m_registers.end());
00997     m_registers[node] = new ArbReg(SH_ARB_REG_PARAM, m_numParams + i, node->name());
00998     m_registers[node]->binding.type = SH_ARB_REG_PROGRAMLOC;
00999     m_registers[node]->binding.index = m_numParamBindings + i;
01000     m_reglist.push_back(m_registers[node]);
01001   }
01002 
01003   m_numParams += palette->palette_length();
01004   m_numParamBindings += palette->palette_length();
01005 }
01006 
01007 void ArbCode::allocConsts(const ArbLimits& limits)
01008 {
01009   for (ShProgramNode::VarList::const_iterator I = m_shader->constants.begin();
01010        I != m_shader->constants.end(); ++I) {
01011     ShVariableNodePtr node = *I;
01012 
01013     // @todo type don't really need to copy here, or shold use the
01014     // ArbReg's array...but whatever
01015     ShPointer<ShDataVariant<float, SH_HOST> > variant =
01016         new ShDataVariant<float, SH_HOST>(node->size());
01017     variant->set(node->getVariant());
01018 
01019     // TODO: improve efficiency
01020     RegMap::const_iterator J;
01021     for (J = m_registers.begin(); J != m_registers.end(); ++J) {
01022       if (J->second->type != SH_ARB_REG_CONST) continue;
01023       int f = 0;
01024       // TODO handle other stuff
01025       for (int i = 0; i < node->size(); i++) {
01026         if (J->second->binding.values[i] == (*variant)[i]) f++;
01027       }
01028       if (f == node->size()) break;
01029     }
01030     if (J == m_registers.end()) {
01031       m_registers[node] = new ArbReg(SH_ARB_REG_CONST, m_numConsts, node->name());
01032       m_reglist.push_back(m_registers[node]);
01033       for (int i = 0; i < 4; i++) {
01034         m_registers[node]->binding.values[i] = (float)(i < node->size() ? (*variant)[i] : 0.0);
01035       }
01036       m_numConsts++;
01037     } else {
01038       m_registers[node] = J->second;
01039     }
01040   }
01041 }
01042 
01043 bool mark(ShLinearAllocator& allocator, ShVariableNodePtr node, int i, bool half)
01044 {
01045   if (!node) return false;
01046   if (node->kind() != SH_TEMP) return false;
01047   if (half && (node->valueType() != SH_HALF)) return false; 
01048   if (node->hasValues()) return false;
01049   allocator.mark(node, i);
01050   return true;
01051 }
01052 
01053 bool markable(ShVariableNodePtr node, bool half)
01054 {
01055   if (!node) return false;
01056   if (node->kind() != SH_TEMP) return false;
01057   if (half && (node->valueType() != SH_HALF)) return false; 
01058   if (node->hasValues()) return false;
01059   return true;
01060 }
01061 
01062 struct ArbScope {
01063   ArbScope(int start)
01064     : start(start)
01065   {
01066   }
01067   
01068   typedef std::map< ShVariableNode*, std::bitset<4> > UsageMap;
01069 
01070   typedef std::set<ShVariableNode*> MarkList;
01071   
01072   MarkList need_mark; // Need to mark at end of loop
01073   int start; // location where loop started
01074   UsageMap usage_map;
01075   UsageMap write_map; // locations last written to
01076 };
01077 
01078 // @todo type
01079 // cheap hack with half to allocate temps twice, once for non-half temporaries
01080 // once for half temporaries
01081 void ArbCode::allocTemps(const ArbLimits& limits, bool half)
01082 {
01083 
01084   typedef std::list<ArbScope> ScopeStack;
01085   ScopeStack scopestack;
01086   
01087   ShLinearAllocator allocator(this);
01088 
01089 //   {
01090 //     ScopeStack scopestack;
01091 //     // First do a backwards traversal to find loop nodes that need to be
01092 //     // marked due to later uses of assignments
01093 //     std::map<ShVariableNode*, int> last_use;
01094     
01095 //     for (int i = (int)m_instructions.size() - 1; i >= 0; --i) {
01096 //       ArbInst instr = m_instructions[i];
01097 //       if (instr.op == SH_ARB_ENDREP) {
01098 //         scopestack.push_back((int)i);
01099 //       }
01100 //       if (instr.op == SH_ARB_REP) {
01101 //         const ArbScope& scope = scopestack.back();
01102 //         for (ArbScope::MarkList::const_iterator I = scope.need_mark.begin();
01103 //              I != scope.need_mark.end(); ++I) {
01104 //           mark(allocator, *I, (int)i);
01105 //         }
01106 //         scopestack.pop_back();
01107 //       }
01108 
01109 //       if (markable(instr.dest.node())) {
01110 //         if (last_use.find(instr.dest.node().object()) == last_use.end()) continue;
01111 //         for (ScopeStack::iterator S = scopestack.begin(); S != scopestack.end(); ++S) {
01112 //           ArbScope& scope = *S;
01113 //           // Note scope.start == location of ENDREP
01114 //           // TODO: Consider sub-components in last_use update and here.
01115 //           if (last_use[instr.dest.node().object()] > scope.start) {
01116 //             mark(allocator, instr.dest.node().object(), scope.start);
01117 //             scope.need_mark.insert(instr.dest.node().object());
01118 //           }
01119 //         }
01120 //       }
01121       
01122 //       for (int j = 0; j < ArbInst::max_num_sources; j++) {
01123 //         if (!markable(instr.src[j].node())) continue;
01124         
01125 //         if (last_use.find(instr.src[j].node().object()) == last_use.end()) {
01126 //           last_use[instr.src[j].node().object()] = i;
01127 //         }
01128 //       }
01129 //     }
01130 //   }
01131 
01132   {
01133     ScopeStack scopestack;
01134     // First do a backwards traversal to find loop nodes that need to be
01135     // marked due to later uses of assignments
01136 
01137     // push root stack
01138 
01139     scopestack.push_back(m_instructions.size() - 1);
01140     
01141     for (int i = (int)m_instructions.size() - 1; i >= 0; --i) {
01142       ArbInst instr = m_instructions[i];
01143       if (instr.op == SH_ARB_ENDREP) {
01144         scopestack.push_back((int)i);
01145       }
01146       if (instr.op == SH_ARB_REP) {
01147         const ArbScope& scope = scopestack.back();
01148         for (ArbScope::MarkList::const_iterator I = scope.need_mark.begin();
01149              I != scope.need_mark.end(); ++I) {
01150           mark(allocator, *I, (int)i, half);
01151         }
01152         scopestack.pop_back();
01153       }
01154 
01155       if (markable(instr.dest.node(), half)) {
01156         std::bitset<4> writemask;
01157         for (int k = 0; k < instr.dest.size(); k++) {
01158           writemask[instr.dest.swizzle()[k]] = true;
01159         }
01160         std::bitset<4> used;
01161         for (ScopeStack::iterator S = scopestack.begin(); S != scopestack.end(); ++S) {
01162           ArbScope& scope = *S;
01163 
01164           if ((used & writemask).any()) {
01165             mark(allocator, instr.dest.node().object(), scope.start, half);
01166             scope.need_mark.insert(instr.dest.node().object());
01167           }
01168           
01169           used |= scope.usage_map[instr.dest.node().object()];
01170         }
01171 
01172         ArbScope& scope = scopestack.back();
01173         scope.usage_map[instr.dest.node().object()] &= ~writemask;
01174       }
01175       
01176       for (int j = 0; j < ArbInst::max_num_sources; j++) {
01177         if (!markable(instr.src[j].node(), half)) continue;
01178         std::bitset<4> usemask;
01179         for (int k = 0; k < instr.src[j].size(); k++) {
01180           usemask[instr.src[j].swizzle()[k]] = true;
01181         }
01182         ArbScope& scope = scopestack.back();
01183         scope.usage_map[instr.src[j].node().object()] |= usemask;
01184       }
01185     }
01186   }
01187   
01188   for (std::size_t i = 0; i < m_instructions.size(); i++) {
01189     ArbInst instr = m_instructions[i];
01190     if (instr.op == SH_ARB_REP) {
01191       scopestack.push_back((int)i);
01192     }
01193     if (instr.op == SH_ARB_ENDREP) {
01194       const ArbScope& scope = scopestack.back();
01195       for (ArbScope::MarkList::const_iterator I = scope.need_mark.begin();
01196            I != scope.need_mark.end(); ++I) {
01197         mark(allocator, *I, (int)i, half);
01198       }
01199       scopestack.pop_back();
01200     }
01201 
01202     if (mark(allocator, instr.dest.node(), (int)i, half)) {
01203       for (ScopeStack::iterator S = scopestack.begin(); S != scopestack.end(); ++S) {
01204         ArbScope& scope = *S;
01205         std::bitset<4> writemask;
01206         for (int k = 0; k < instr.dest.size(); k++) {
01207           writemask[instr.dest.swizzle()[k]] = true;
01208         }
01209         // TODO: Only change the writemask for scopes that see this
01210         // write unconditionally
01211         // I.e. don't change it if the scope is outside an if
01212         // statement, or a post-BRK REP scope.
01213         scope.write_map[instr.dest.node().object()] |= writemask;
01214 
01215       }        
01216     }
01217     
01218     for (int j = 0; j < ArbInst::max_num_sources; j++) {
01219       if (mark(allocator, instr.src[j].node(), (int)i, half)) {
01220         for (ScopeStack::iterator S = scopestack.begin(); S != scopestack.end(); ++S) {
01221           ArbScope& scope = *S;
01222           // Mark uses that weren't recently written to.
01223           std::bitset<4> usemask;
01224           for (int k = 0; k < instr.src[j].size(); k++) {
01225             usemask[instr.src[j].swizzle()[k]] = true;
01226           }
01227           if ((usemask & ~scope.write_map[instr.src[j].node().object()]).any()) {
01228             mark(allocator, instr.src[j].node(), scope.start, half);
01229             scope.need_mark.insert(instr.src[j].node().object());
01230           }
01231         }
01232       }
01233     }
01234   }
01235   
01236   m_tempRegs.clear();
01237   int limit;
01238   if(half) {
01239     m_numHalfTemps = 0;
01240     limit = limits.halftemps(); 
01241   } else {
01242     m_numTemps = 0;
01243     limit = limits.temps();
01244   }
01245 
01246   for (int i = 0; i < limit; i++) {
01247     m_tempRegs.push_back(i);
01248   }
01249   
01250   allocator.allocate();
01251   
01252   m_tempRegs.clear();
01253 }
01254 
01255 void ArbCode::allocTextures(const ArbLimits& limits)
01256 {
01257   for (ShProgramNode::TexList::const_iterator I = m_shader->textures.begin();
01258        I != m_shader->textures.end(); ++I) {
01259     ShTextureNodePtr node = *I;
01260     int index;
01261     index = m_numTextures;
01262     m_registers[node] = new ArbReg(SH_ARB_REG_TEXTURE, index, node->name());
01263     m_reglist.push_back(m_registers[node]);
01264     m_numTextures++;
01265   }
01266 }
01267 
01268 void ArbCode::bindTextures()
01269 {
01270   for (ShProgramNode::TexList::const_iterator I = m_shader->textures.begin();
01271        I != m_shader->textures.end(); ++I) {
01272     m_texture->bindTexture(*I, GL_TEXTURE0 + m_registers[*I]->index);
01273   }
01274 }
01275 
01276 }

Generated on Mon Jan 24 18:36:29 2005 for Sh by  doxygen 1.4.1