00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00027 #include "ArbCode.hpp"
00028 #include <iostream>
00029 #include <sstream>
00030 #include <cmath>
00031 #include <bitset>
00032
00033 #include "ShVariable.hpp"
00034 #include "ShDebug.hpp"
00035 #include "ShLinearAllocator.hpp"
00036 #include "ShInternals.hpp"
00037 #include "ShOptimizations.hpp"
00038 #include "ShEnvironment.hpp"
00039 #include "ShContext.hpp"
00040 #include "ShTypeInfo.hpp"
00041 #include "ShVariant.hpp"
00042 #include "ShTextureNode.hpp"
00043 #include "ShSyntax.hpp"
00044 #include "ArbReg.hpp"
00045 #include "Arb.hpp"
00046 #include "ShAttrib.hpp"
00047 #include "ShCastManager.hpp"
00048 #include "ShError.hpp"
00049
00050 namespace shgl {
00051
00052 using namespace SH;
00053
00054 #define shGlProgramStringARB glProgramStringARB
00055 #define shGlActiveTextureARB glActiveTextureARB
00056 #define shGlProgramLocalParameter4fvARB glProgramLocalParameter4fvARB
00057 #define shGlProgramEnvParameter4fvARB glProgramEnvParameter4fvARB
00058 #define shGlGetProgramivARB glGetProgramivARB
00059 #define shGlGenProgramsARB glGenProgramsARB
00060 #define shGlDeleteProgramsARB glDeleteProgramsARB
00061 #define shGlBindProgramARB glBindProgramARB
00062
00063 struct ArbBindingSpecs {
00064 ArbRegBinding binding;
00065 int maxBindings;
00066 ShSemanticType semanticType;
00067 bool allowGeneric;
00068 };
00069
00070 ArbBindingSpecs arbVertexAttribBindingSpecs[] = {
00071 {SH_ARB_REG_VERTEXPOS, 1, SH_POSITION, false},
00072 {SH_ARB_REG_VERTEXNRM, 1, SH_NORMAL, false},
00073 {SH_ARB_REG_VERTEXCOL, 1, SH_COLOR, false},
00074 {SH_ARB_REG_VERTEXTEX, 8, SH_TEXCOORD, true},
00075 {SH_ARB_REG_VERTEXFOG, 1, SH_ATTRIB, true},
00076 {SH_ARB_REG_NONE, 0, SH_ATTRIB, true}
00077 };
00078
00079 ArbBindingSpecs arbFragmentAttribBindingSpecs[] = {
00080 {SH_ARB_REG_FRAGMENTPOS, 1, SH_POSITION, false},
00081 {SH_ARB_REG_FRAGMENTCOL, 1, SH_COLOR, false},
00082 {SH_ARB_REG_FRAGMENTTEX, 8, SH_TEXCOORD, true},
00083 {SH_ARB_REG_FRAGMENTFOG, 1, SH_ATTRIB, true},
00084 {SH_ARB_REG_NONE, 0, SH_ATTRIB, true}
00085 };
00086
00087 ArbBindingSpecs arbVertexOutputBindingSpecs[] = {
00088 {SH_ARB_REG_RESULTPOS, 1, SH_POSITION, false},
00089 {SH_ARB_REG_RESULTCOL, 1, SH_COLOR, false},
00090 {SH_ARB_REG_RESULTTEX, 8, SH_TEXCOORD, true},
00091 {SH_ARB_REG_RESULTFOG, 1, SH_ATTRIB, true},
00092 {SH_ARB_REG_RESULTPTS, 1, SH_ATTRIB, true},
00093 {SH_ARB_REG_NONE, 0, SH_ATTRIB}
00094 };
00095
00096 ArbBindingSpecs arbFragmentOutputBindingSpecs[] = {
00097 {SH_ARB_REG_RESULTCOL, 1, SH_COLOR, true},
00098 {SH_ARB_REG_RESULTDPT, 1, SH_ATTRIB, false},
00099 {SH_ARB_REG_NONE, 0, SH_ATTRIB}
00100 };
00101
00102 ArbBindingSpecs* arbBindingSpecs(bool output, const std::string& unit)
00103 {
00104 if (unit == "vertex")
00105 return output ? arbVertexOutputBindingSpecs : arbVertexAttribBindingSpecs;
00106 if (unit == "fragment")
00107 return output ? arbFragmentOutputBindingSpecs : arbFragmentAttribBindingSpecs;
00108 return 0;
00109 }
00110
00111 using namespace SH;
00112
00113 ArbCode::ArbCode(const ShProgramNodeCPtr& shader, const std::string& unit,
00114 TextureStrategy* texture)
00115 : m_texture(texture), m_shader(0), m_originalShader(0), m_unit(unit),
00116 m_numTemps(0), m_numHalfTemps(0), m_numInputs(0), m_numOutputs(0), m_numParams(0), m_numParamBindings(0),
00117 m_numConsts(0),
00118 m_numTextures(0), m_programId(0), m_environment(0), m_max_label(0),
00119 m_address_register(new ShVariableNode(SH_TEMP, 1, SH_FLOAT))
00120 {
00121 m_originalShader = const_cast<ShProgramNode*>(shader.object());
00122
00123 if (unit == "fragment") m_environment |= SH_ARB_FP;
00124 if (unit == "vertex") m_environment |= SH_ARB_VP;
00125
00126 const GLubyte* extensions = glGetString(GL_EXTENSIONS);
00127 if(extensions) {
00128 std::string extstr(reinterpret_cast<const char*>(extensions));
00129
00130 if (unit == "fragment") {
00131 if (extstr.find("NV_fragment_program_option") != std::string::npos) {
00132 m_environment |= SH_ARB_NVFP;
00133 }
00134 if (extstr.find("NV_fragment_program2") != std::string::npos) {
00135 m_environment |= SH_ARB_NVFP2;
00136 }
00137 if (extstr.find("ATI_draw_buffers") != std::string::npos) {
00138 m_environment |= SH_ARB_ATIDB;
00139 }
00140 }
00141 if (unit == "vertex") {
00142 if (extstr.find("NV_vertex_program2_option") != std::string::npos) {
00143 m_environment |= SH_ARB_NVVP2;
00144 }
00145 if (extstr.find("NV_vertex_program3") != std::string::npos) {
00146 m_environment |= SH_ARB_NVVP3;
00147 }
00148 }
00149 }
00150
00151
00152 m_convertMap[SH_DOUBLE] = SH_FLOAT;
00153
00154 bool halfSupport = m_environment & (SH_ARB_NVFP | SH_ARB_NVFP2);
00155 if(!halfSupport) m_convertMap[SH_HALF] = SH_FLOAT;
00156
00157 m_convertMap[SH_INT] = SH_FLOAT;
00158 m_convertMap[SH_SHORT] = halfSupport ? SH_HALF: SH_FLOAT;
00159 m_convertMap[SH_BYTE] = halfSupport ? SH_HALF: SH_FLOAT;
00160 m_convertMap[SH_UINT] = SH_FLOAT;
00161 m_convertMap[SH_USHORT] = halfSupport ? SH_HALF: SH_FLOAT;
00162 m_convertMap[SH_UBYTE] = halfSupport ? SH_HALF: SH_FLOAT;
00163
00164 m_convertMap[SH_FINT] = SH_FLOAT;
00165 m_convertMap[SH_FSHORT] = SH_FLOAT;
00166 m_convertMap[SH_FBYTE] = halfSupport ? SH_HALF : SH_FLOAT;
00167 m_convertMap[SH_FUINT] = SH_FLOAT;
00168 m_convertMap[SH_FUSHORT] = SH_FLOAT;
00169 m_convertMap[SH_FUBYTE] = halfSupport ? SH_HALF : SH_FLOAT;
00170 }
00171
00172 ArbCode::~ArbCode()
00173 {
00174 if (m_shader != m_originalShader)
00175 {
00176 delete m_shader;
00177 }
00178 }
00179
00180 void ArbCode::generate()
00181 {
00182
00183 ShProgramNodePtr temp_shader = m_originalShader->clone();
00184 m_shader = temp_shader.object();
00185 m_shader->acquireRef();
00186 temp_shader = NULL;
00187
00188 ShContext::current()->enter(m_shader);
00189 ShTransformer transform(m_shader);
00190
00191
00192 transform.convertInputOutput();
00193 transform.splitTuples(4, m_splits);
00194 transform.convertTextureLookups();
00195 transform.convertToFloat(m_convertMap);
00196
00197 if(transform.changed()) {
00198 optimize(m_shader);
00199 m_shader->collectVariables();
00200 } else {
00201 m_shader->releaseRef();
00202 m_shader = m_originalShader;
00203 ShContext::current()->exit();
00204 ShContext::current()->enter(m_shader);
00205 }
00206
00207 try {
00208 if (m_environment & SH_ARB_NVFP2) {
00209
00210 ShStructural str(m_shader->ctrlGraph);
00211
00212 genStructNode(str.head());
00213
00214 } else {
00215 m_shader->ctrlGraph->entry()->clearMarked();
00216 genNode(m_shader->ctrlGraph->entry());
00217
00218 if (m_environment & SH_ARB_NVVP2) {
00219 m_instructions.push_back(ArbInst(SH_ARB_LABEL, getLabel(m_shader->ctrlGraph->exit())));
00220 }
00221 }
00222 m_shader->ctrlGraph->entry()->clearMarked();
00223 allocRegs();
00224 } catch (...) {
00225 m_shader->ctrlGraph->entry()->clearMarked();
00226 ShContext::current()->exit();
00227 throw;
00228 }
00229 ShContext::current()->exit();
00230 }
00231
00232 bool ArbCode::allocateRegister(const ShVariableNodePtr& var)
00233 {
00234 if (!var) return true;
00235 if (var->kind() != SH_TEMP) return true;
00236 if (var->uniform()) return true;
00237
00238 if (m_tempRegs.empty()) {
00239
00240 throw 1;
00241
00242 }
00243
00244 int idx = m_tempRegs.front();
00245 m_tempRegs.pop_front();
00246 if(var->valueType() == SH_HALF) {
00247 if (idx + 1 > m_numHalfTemps) m_numHalfTemps = idx + 1;
00248 m_registers[var] = new ArbReg(SH_ARB_REG_HALF_TEMP, idx);
00249 } else {
00250 if (idx + 1 > m_numTemps) m_numTemps = idx + 1;
00251 m_registers[var] = new ArbReg(SH_ARB_REG_TEMP, idx);
00252 }
00253 m_reglist.push_back(m_registers[var]);
00254
00255 return true;
00256 }
00257
00258 void ArbCode::freeRegister(const ShVariableNodePtr& var)
00259 {
00260 if (!var) return;
00261 if (var->kind() != SH_TEMP) return;
00262 if (var->uniform()) return;
00263
00264 SH_DEBUG_ASSERT(m_registers.find(var) != m_registers.end());
00265 m_tempRegs.push_front(m_registers[var]->index);
00266 }
00267
00268 void ArbCode::upload()
00269 {
00270 if (!m_programId) {
00271 SH_GL_CHECK_ERROR(shGlGenProgramsARB(1, &m_programId));
00272 }
00273
00274 SH_GL_CHECK_ERROR(shGlBindProgramARB(arbTarget(m_unit), m_programId));
00275
00276 std::ostringstream out;
00277 print(out);
00278 std::string text = out.str();
00279 shGlProgramStringARB(arbTarget(m_unit), GL_PROGRAM_FORMAT_ASCII_ARB,
00280 (GLsizei)text.size(), text.c_str());
00281 int error = glGetError();
00282 std::ostringstream error_os;
00283 if (error == GL_NO_ERROR) return;
00284
00285 error_os << "Failed to upload ARB program." << std::endl;
00286 if (error == GL_INVALID_OPERATION) {
00287 error_os << "Program error:" << std::endl;
00288 int pos = -1;
00289 SH_GL_CHECK_ERROR(glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos));
00290 if (pos >= 0){
00291 const unsigned char* message = glGetString(GL_PROGRAM_ERROR_STRING_ARB);
00292 error_os << "Error at character " << pos << std::endl;
00293 error_os << "Driver Message: " << message << std::endl;
00294 while (pos >= 0 && text[pos] != '\n') pos--;
00295 if (pos > 0) pos++;
00296 error_os << "Code: " << text.substr(pos, text.find('\n', pos)) << std::endl;
00297 }
00298 } else {
00299 error_os << "Unknown error." << std::endl;
00300 }
00301 shError(ArbException(error_os.str()));
00302 }
00303
00304 void ArbCode::bind()
00305 {
00306 if (!m_programId) {
00307 upload();
00308 }
00309
00310 SH_GL_CHECK_ERROR(shGlBindProgramARB(arbTarget(m_unit), m_programId));
00311
00312 ShContext::current()->set_binding(std::string("arb:") + m_unit, ShProgram(m_originalShader));
00313
00314
00315 for (RegMap::const_iterator I = m_registers.begin(); I != m_registers.end(); ++I) {
00316 ShVariableNodePtr node = I->first;
00317 ArbReg reg = *I->second;
00318 if (node->hasValues() && reg.type == SH_ARB_REG_PARAM) {
00319 updateUniform(node);
00320 }
00321 }
00322
00323
00324 bindTextures();
00325 }
00326
00327 void ArbCode::update()
00328 {
00329 bindTextures();
00330 }
00331
00332 void ArbCode::updateUniform(const ShVariableNodePtr& uniform)
00333 {
00334 int i;
00335
00336 if (!uniform) return;
00337
00338 if (!uniform->meta("opengl:readonly").empty())
00339 {
00340 return;
00341 }
00342
00343 ShVariantCPtr uniformVariant = uniform->getVariant();
00344 RegMap::const_iterator I = m_registers.find(uniform);
00345 if (I == m_registers.end()) {
00346 if( m_splits.count(uniform) > 0 ) {
00347 ShTransformer::VarNodeVec &splitVec = m_splits[uniform];
00348
00349 int offset = 0;
00350 int copySwiz[4];
00351 for(ShTransformer::VarNodeVec::iterator it = splitVec.begin();
00352 it != splitVec.end(); offset += (*it)->size(), ++it) {
00353
00354 for(i = 0; i < (*it)->size(); ++i) copySwiz[i] = i + offset;
00355 (*it)->setVariant(uniformVariant->get(false,
00356 ShSwizzle(uniform->size(), (*it)->size(), copySwiz)));
00357 updateUniform(*it);
00358 }
00359 }
00360 return;
00361 }
00362
00363 ShTextureNodePtr tex = shref_dynamic_cast<ShTextureNode>(uniform);
00364 if (tex) {
00365 return;
00366 }
00367
00368 const ArbReg& reg = *I->second;
00369
00370
00371
00372
00373 float values[4];
00374 ShPointer<ShDataVariant<float, SH_HOST> > floatVariant =
00375 new ShDataVariant<float, SH_HOST>(uniform->size());
00376 floatVariant->set(uniformVariant);
00377
00378 for (i = 0; i < uniform->size(); i++) {
00379
00380 values[i] = (*floatVariant)[i];
00381 }
00382 for (; i < 4; i++) {
00383 values[i] = 0.0;
00384 }
00385
00386 if (reg.type != SH_ARB_REG_PARAM) return;
00387 switch(reg.binding.type) {
00388 case SH_ARB_REG_PROGRAMLOC:
00389 SH_GL_CHECK_ERROR(shGlProgramLocalParameter4fvARB(arbTarget(m_unit), reg.binding.index, values));
00390 break;
00391 case SH_ARB_REG_PROGRAMENV:
00392 SH_GL_CHECK_ERROR(shGlProgramEnvParameter4fvARB(arbTarget(m_unit), reg.binding.index, values));
00393 break;
00394 case SH_ARB_REG_STATE:
00395 SH_DEBUG_WARN("Updating uniforms bound to OpenGL state is not currently supported.");
00396 default:
00397 return;
00398 }
00399 }
00400
00401 std::ostream& ArbCode::printVar(std::ostream& out, bool dest, const ShVariable& var,
00402 bool collectingOp, const ShSwizzle& destSwiz = ShSwizzle(4),
00403 bool do_swiz = true) const
00404 {
00405 RegMap::const_iterator I = m_registers.find(var.node());
00406 if (I == m_registers.end()) {
00407 out << "<no reg for " << var.name() << ">";
00408 return out;
00409 }
00410 const ArbReg& reg = *I->second;
00411
00412
00413 if (var.neg()) out << '-';
00414
00415
00416 out << reg;
00417
00418 if (do_swiz) {
00419
00420 const char* swizChars = "xyzw";
00421 out << ".";
00422 if (dest) {
00423 bool masked[4] = {false, false, false, false};
00424 for (int i = 0; i < var.swizzle().size(); i++) {
00425 masked[var.swizzle()[i]] = true;
00426 }
00427 for (int i = 0; i < 4; i++) {
00428 if (masked[i]) out << swizChars[i];
00429 }
00430 } else if (var.swizzle().size() == 1) {
00431 out << swizChars[var.swizzle()[0]];
00432 } else if (collectingOp) {
00433 for (int i = 0; i < 4; i++) {
00434 out << swizChars[i < var.swizzle().size() ? var.swizzle()[i] : i];
00435 }
00436 } else {
00437 for (int i = 0; i < 4; i++) {
00438 int j;
00439 for (j = 0; j < destSwiz.size(); j++) {
00440 if (destSwiz[j] == i) break;
00441 }
00442 if (j == destSwiz.size()) j = i;
00443 out << swizChars[j < var.size() ? var.swizzle()[j] : j];
00444 }
00445 }
00446 }
00447
00448 return out;
00449 }
00450
00451 struct LineNumberer {
00452 LineNumberer() { line = 0; }
00453 int line;
00454 };
00455
00456 std::ostream& operator<<(std::ostream& out, LineNumberer& l)
00457 {
00458 out << " # " << ++l.line << std::endl;
00459 return out;
00460 }
00461
00462 bool ArbCode::printSamplingInstruction(std::ostream& out, const ArbInst& instr) const
00463 {
00464 if (instr.op != SH_ARB_TEX && instr.op != SH_ARB_TXP && instr.op != SH_ARB_TXB
00465 && instr.op != SH_ARB_TXD)
00466 return false;
00467
00468 ShTextureNodePtr texture = shref_dynamic_cast<ShTextureNode>(instr.src[1].node());
00469 RegMap::const_iterator texRegIt = m_registers.find(instr.src[1].node());
00470 if (texRegIt == m_registers.end()) {
00471 SH_DEBUG_PRINT("Unallocated texture found.");
00472 SH_DEBUG_PRINT("Operation = " << arbOpInfo[instr.op].name);
00473 SH_DEBUG_PRINT("Destination* = " << instr.dest.node().object());
00474 if (instr.dest.node()) {
00475 SH_DEBUG_PRINT("Destination = " << instr.dest.name());
00476 }
00477 SH_DEBUG_PRINT("Texture pointer = " << texture.object());
00478 if (texture) {
00479 SH_DEBUG_PRINT("Texture = " << texture->name());
00480 }
00481 out << " INVALID TEX INSTRUCTION;";
00482 return true;
00483 }
00484
00485
00486 const ArbReg& texReg = *texRegIt->second;
00487
00488 out << " ";
00489 out << arbOpInfo[instr.op].name << " ";
00490 printVar(out, true, instr.dest, false) << ", ";
00491 printVar(out, false, instr.src[0], true, instr.dest.swizzle()) << ", ";
00492 if (instr.op == SH_ARB_TXD) {
00493 printVar(out, false, instr.src[2], true, instr.dest.swizzle()) << ", ";
00494 printVar(out, false, instr.src[3], true, instr.dest.swizzle()) << ", ";
00495 }
00496 out << "texture[" << texReg.index << "], ";
00497 switch (texture->dims()) {
00498 case SH_TEXTURE_1D:
00499 out << "1D";
00500 break;
00501 case SH_TEXTURE_2D:
00502 out << "2D";
00503 break;
00504 case SH_TEXTURE_3D:
00505 out << "3D";
00506 break;
00507 case SH_TEXTURE_CUBE:
00508 out << "CUBE";
00509 break;
00510 case SH_TEXTURE_RECT:
00511 out << "RECT";
00512 break;
00513 }
00514 out << ";";
00515 return true;
00516 }
00517
00518 std::ostream& ArbCode::print(std::ostream& out)
00519 {
00520 LineNumberer endl;
00521 const char* swizChars = "xyzw";
00522
00523
00524 if (m_unit == "vertex") {
00525 out << "!!ARBvp1.0" << endl;
00526 if (m_environment & SH_ARB_NVVP3) out << "OPTION NV_vertex_program3;" << endl;
00527 else if (m_environment & SH_ARB_NVVP2) out << "OPTION NV_vertex_program2;" << endl;
00528 }
00529 if (m_unit == "fragment") {
00530 out << "!!ARBfp1.0" << endl;
00531
00532 if (m_environment & SH_ARB_NVFP2) out << "OPTION NV_fragment_program2;" << endl;
00533 else if (m_environment & SH_ARB_NVFP) out << "OPTION NV_fragment_program;" << endl;
00534
00535 if (m_environment & SH_ARB_ATIDB) out << "OPTION ATI_draw_buffers;" << endl;
00536 }
00537
00538
00539
00540 for (RegList::const_iterator I = m_reglist.begin();
00541 I != m_reglist.end(); ++I) {
00542 if ((*I)->type == SH_ARB_REG_TEMP) continue;
00543 if ((*I)->type == SH_ARB_REG_HALF_TEMP) continue;
00544 if ((*I)->type == SH_ARB_REG_TEXTURE) continue;
00545 out << " ";
00546 (*I)->printDecl(out);
00547 out << endl;
00548 }
00549 if (m_numTemps + m_numHalfTemps > 0) {
00550 out << " TEMP ";
00551 for (int i = 0; i < m_numTemps; i++) {
00552 if (i > 0) out << ", ";
00553 out << ArbReg(SH_ARB_REG_TEMP, i);
00554 }
00555 if(m_numTemps > 0 && m_numHalfTemps > 0) out << ", ";
00556 for (int i = 0; i < m_numHalfTemps; i++) {
00557 if (i > 0) out << ", ";
00558 out << ArbReg(SH_ARB_REG_HALF_TEMP, i);
00559 }
00560 out << ";" << endl;
00561 }
00562
00563 out << endl;
00564
00565
00566 for (ArbInstList::const_iterator I = m_instructions.begin();
00567 I != m_instructions.end(); ++I) {
00568 if (I->op == SH_ARB_LABEL) {
00569 out << "label" << I->label << ": ";
00570 } else if (I->op == SH_ARB_ELSE) {
00571 out << " ELSE;";
00572 } else if (I->op == SH_ARB_ENDIF) {
00573 out << " ENDIF;";
00574 } else if (I->op == SH_ARB_BRA) {
00575 if (I->src[0].node()) {
00576 out << " MOVC ";
00577 printVar(out, true, I->src[0], false);
00578 out << ", ";
00579 printVar(out, false, I->src[0], false, I->src[0].swizzle());
00580 out << ";" << endl;
00581 }
00582 out << " BRA label" << I->label;
00583 if (I->src[0].node()) {
00584 out << " (GT";
00585 out << ".";
00586 for (int i = 0; i < I->src[0].swizzle().size(); i++) {
00587 out << swizChars[I->src[0].swizzle()[i]];
00588 }
00589 out << ")";
00590 }
00591 out << ";";
00592 } else if (I->op == SH_ARB_REP) {
00593 out << " REP ";
00594 printVar(out, false, I->src[0], false, I->src[0].swizzle());
00595 out << ";";
00596 } else if (I->op == SH_ARB_BRK) {
00597 if (I->src[0].node()) {
00598 out << " MOVC ";
00599 printVar(out, true, I->src[0], false);
00600 out << ", ";
00601 printVar(out, false, I->src[0], false, I->src[0].swizzle());
00602 out << ";" << endl;
00603 }
00604 out << " BRK ";
00605 if (I->src[0].node()) {
00606 out << " (";
00607 if (I->invert) {
00608 out << "LE";
00609 } else {
00610 out << "GT";
00611 }
00612 out << ".";
00613 for (int i = 0; i < I->src[0].swizzle().size(); i++) {
00614 out << swizChars[I->src[0].swizzle()[i]];
00615 }
00616 out << ")";
00617 }
00618 out << ";";
00619 } else if (I->op == SH_ARB_ENDREP) {
00620 out << " ENDREP;";
00621 } else if (I->op == SH_ARB_IF) {
00622 if (I->src[0].node()) {
00623 out << " MOVC ";
00624 printVar(out, true, I->src[0], false);
00625 out << ", ";
00626 printVar(out, false, I->src[0], false, I->src[0].swizzle());
00627 out << ";" << endl;
00628 }
00629 out << " IF ";
00630 if (I->src[0].node()) {
00631 out << "GT";
00632 out << ".";
00633 for (int i = 0; i < I->src[0].swizzle().size(); i++) {
00634 out << swizChars[I->src[0].swizzle()[i]];
00635 }
00636 } else {
00637 out << "TR";
00638 }
00639 out << ";";
00640 } else if (I->op == SH_ARB_ARRAYMOV) {
00641 out << " MOV ";
00642 printVar(out, true, I->dest, false);
00643 out << ", ";
00644 printVar(out, false, I->src[0], false, ShSwizzle(4), false);
00645 out << "[";
00646 printVar(out, false, I->src[1], false);
00647 out << "]";
00648 out << ";";
00649 } else if (!printSamplingInstruction(out, *I)) {
00650 out << " ";
00651 out << arbOpInfo[I->op].name;
00652 if (I->update_cc) out << "C";
00653 out << " ";
00654 printVar(out, true, I->dest, arbOpInfo[I->op].collectingOp);
00655 if (I->ccode != ArbInst::NOCC) {
00656 out << " (";
00657 out << arbCCnames[I->ccode];
00658 out << ".";
00659 for (int i = 0; i < 4; i++) {
00660 out << swizChars[(i < I->ccswiz.size() ? I->ccswiz[i]
00661 : (I->ccswiz.size() == 1 ? I->ccswiz[0] : i))];
00662 }
00663 out << ") ";
00664 }
00665 for (int i = 0; i < arbOpInfo[I->op].arity; i++) {
00666 out << ", ";
00667 printVar(out, false, I->src[i], arbOpInfo[I->op].collectingOp, I->dest.swizzle());
00668 }
00669 out << ';';
00670 }
00671 out << " # ";
00672 if (I->dest.node() && I->dest.has_name()) {
00673 out << "d=" << I->dest.name() << " ";
00674 }
00675 for (int i = 0; i < ArbInst::max_num_sources; i++) {
00676 if (I->src[i].node() && I->src[i].has_name()) {
00677 out << "s[" << i << "]=" << I->src[i].name() << " ";
00678 }
00679 }
00680 out << endl;
00681 }
00682
00683 out << "END" << endl;
00684 return out;
00685 }
00686
00687 std::ostream& ArbCode::describe_interface(std::ostream& out) {
00688 ShProgramNode::VarList::const_iterator I;
00689 out << "Inputs:" << std::endl;
00690 for (I = m_shader->inputs.begin(); I != m_shader->inputs.end(); ++I) {
00691 out << " ";
00692 m_registers[*I]->printDecl(out);
00693 out << std::endl;
00694 }
00695
00696 out << "Outputs:" << std::endl;
00697 for (I = m_shader->outputs.begin(); I != m_shader->outputs.end(); ++I) {
00698 out << " ";
00699 m_registers[*I]->printDecl(out);
00700 out << std::endl;
00701 }
00702 return out;
00703 }
00704
00705 int ArbCode::getLabel(ShCtrlGraphNodePtr node)
00706 {
00707 if (m_label_map.find(node) == m_label_map.end()) {
00708 m_label_map[node] = m_max_label++;
00709 }
00710 return m_label_map[node];
00711 }
00712
00713 void ArbCode::genNode(ShCtrlGraphNodePtr node)
00714 {
00715 if (!node || node->marked()) return;
00716 node->mark();
00717
00718 if (node == m_shader->ctrlGraph->exit()) return;
00719
00720 if (m_environment & SH_ARB_NVVP2) {
00721 m_instructions.push_back(ArbInst(SH_ARB_LABEL, getLabel(node)));
00722 }
00723
00724 if (node->block) for (ShBasicBlock::ShStmtList::const_iterator I = node->block->begin();
00725 I != node->block->end(); ++I) {
00726 const ShStatement& stmt = *I;
00727 emit(stmt);
00728 }
00729
00730 if (m_environment & SH_ARB_NVVP2) {
00731 for(std::vector<SH::ShCtrlGraphBranch>::iterator I = node->successors.begin();
00732 I != node->successors.end(); I++) {
00733 m_instructions.push_back(ArbInst(SH_ARB_BRA, getLabel(I->node), I->cond));
00734 }
00735 if(!node->successors.empty() || node->follower->marked()) {
00736 m_instructions.push_back(ArbInst(SH_ARB_BRA, getLabel(node->follower)));
00737 }
00738 for(std::vector<SH::ShCtrlGraphBranch>::iterator I = node->successors.begin();
00739 I != node->successors.end(); I++) {
00740 genNode(I->node);
00741 }
00742 }
00743
00744 genNode(node->follower);
00745 }
00746
00747 void ArbCode::genStructNode(const ShStructuralNodePtr& node)
00748 {
00749 if (!node) return;
00750
00751 if (node->type == ShStructuralNode::UNREDUCED) {
00752 ShBasicBlockPtr block = node->cfg_node->block;
00753 if (block) for (ShBasicBlock::ShStmtList::const_iterator I = block->begin();
00754 I != block->end(); ++I) {
00755 const ShStatement& stmt = *I;
00756 emit(stmt);
00757 }
00758 } else if (node->type == ShStructuralNode::BLOCK) {
00759 for (ShStructuralNode::StructNodeList::const_iterator I = node->structnodes.begin();
00760 I != node->structnodes.end(); ++I) {
00761 genStructNode(*I);
00762 }
00763 } else if (node->type == ShStructuralNode::IFELSE) {
00764 ShStructuralNodePtr header = node->structnodes.front();
00765
00766 ShVariable cond;
00767 ShStructuralNodePtr ifnode, elsenode;
00768 for (ShStructuralNode::SuccessorList::iterator I = header->succs.begin();
00769 I != header->succs.end(); ++I) {
00770 if (I->first.node()) {
00771 ifnode = I->second;
00772 cond = I->first;
00773 } else {
00774 elsenode = I->second;
00775 }
00776 }
00777 genStructNode(header);
00778 m_instructions.push_back(ArbInst(SH_ARB_IF, ShVariable(), cond)); {
00779 genStructNode(ifnode);
00780 } m_instructions.push_back(ArbInst(SH_ARB_ELSE, ShVariable())); {
00781 genStructNode(elsenode);
00782 } m_instructions.push_back(ArbInst(SH_ARB_ENDIF, ShVariable()));
00783 } else if (node->type == ShStructuralNode::WHILELOOP) {
00784 ShStructuralNodePtr header = node->structnodes.front();
00785
00786 ShVariable cond = header->succs.front().first;
00787
00788 ShStructuralNodePtr body = node->structnodes.back();
00789
00790 float maxloopval = 255.0f;
00791 ShConstAttrib1f maxloop(maxloopval);
00792
00793 m_shader->constants.push_back(maxloop.node());
00794 m_instructions.push_back(ArbInst(SH_ARB_REP, ShVariable(), maxloop));
00795 genStructNode(header);
00796 ArbInst brk(SH_ARB_BRK, ShVariable(), cond);
00797 brk.invert = true;
00798 m_instructions.push_back(brk);
00799 genStructNode(body);
00800
00801 m_instructions.push_back(ArbInst(SH_ARB_ENDREP, ShVariable()));
00802 } else if (node->type == ShStructuralNode::SELFLOOP) {
00803 ShStructuralNodePtr loopnode = node->structnodes.front();
00804
00805 bool condexit = true;
00806
00807 ShVariable cond;
00808 for (ShStructuralNode::SuccessorList::iterator I = loopnode->succs.begin();
00809 I != loopnode->succs.end(); ++I) {
00810 if (I->first.node()) {
00811 if (I->second == loopnode) condexit = false; else condexit = true;
00812 cond = I->first;
00813 }
00814 }
00815
00816 float maxloopval = 255.0f;
00817 ShConstAttrib1f maxloop(maxloopval);
00818
00819 m_shader->constants.push_back(maxloop.node());
00820 m_instructions.push_back(ArbInst(SH_ARB_REP, ShVariable(), maxloop));
00821 genStructNode(loopnode);
00822 ArbInst brk(SH_ARB_BRK, ShVariable(), cond);
00823 if (!condexit) {
00824 brk.invert = true;
00825 }
00826 m_instructions.push_back(brk);
00827 m_instructions.push_back(ArbInst(SH_ARB_ENDREP, ShVariable()));
00828 }
00829 }
00830
00831 void ArbCode::allocRegs()
00832 {
00833 ArbLimits limits(m_unit);
00834
00835 allocInputs(limits);
00836
00837 allocOutputs(limits);
00838
00839 for (ShProgramNode::PaletteList::const_iterator I = m_shader->palettes_begin();
00840 I != m_shader->palettes_end(); ++I) {
00841 allocPalette(limits, *I);
00842 }
00843
00844 for (ShProgramNode::VarList::const_iterator I = m_shader->uniforms_begin();
00845 I != m_shader->uniforms_end(); ++I) {
00846 allocParam(limits, *I);
00847 }
00848
00849 allocConsts(limits);
00850
00851 try {
00852 allocTemps(limits, false);
00853 bool halfSupport = m_environment & (SH_ARB_NVFP | SH_ARB_NVFP2);
00854 if(halfSupport) {
00855 allocTemps(limits, true);
00856 }
00857 } catch (int) {
00858 std::ostringstream os;
00859 os << "Out of temporary registers (" << limits.temps()
00860 << " were available)";
00861 throw ArbException(os.str());
00862 } catch (...) {
00863 throw;
00864 }
00865
00866
00867 if (m_shader->palettes_begin() != m_shader->palettes.end()) {
00868 m_registers[m_address_register.node()] = new ArbReg(SH_ARB_REG_ADDRESS, 0);
00869 m_reglist.push_back(m_registers[m_address_register.node()]);
00870 }
00871
00872 allocTextures(limits);
00873 }
00874
00875 void ArbCode::bindSpecial(const ShProgramNode::VarList::const_iterator& begin,
00876 const ShProgramNode::VarList::const_iterator& end,
00877 const ArbBindingSpecs& specs,
00878 std::vector<int>& bindings,
00879 ArbRegType type, int& num)
00880 {
00881 bindings.push_back(0);
00882
00883 if (specs.semanticType == SH_ATTRIB) return;
00884
00885 for (ShProgramNode::VarList::const_iterator I = begin; I != end; ++I) {
00886 ShVariableNodePtr node = *I;
00887
00888 if (m_registers.find(node) != m_registers.end()) continue;
00889 if (node->specialType() != specs.semanticType) continue;
00890
00891 m_registers[node] = new ArbReg(type, num++, node->name());
00892 m_registers[node]->binding.type = specs.binding;
00893 m_registers[node]->binding.index = bindings.back();
00894 m_reglist.push_back(m_registers[node]);
00895
00896 bindings.back()++;
00897 if (bindings.back() == specs.maxBindings) break;
00898 }
00899 }
00900
00901 void ArbCode::allocInputs(const ArbLimits& limits)
00902 {
00903
00904 for (int i = 0; arbBindingSpecs(false, m_unit)[i].binding != SH_ARB_REG_NONE; i++) {
00905 bindSpecial(m_shader->inputs.begin(), m_shader->inputs.end(),
00906 arbBindingSpecs(false, m_unit)[i], m_inputBindings,
00907 SH_ARB_REG_ATTRIB, m_numInputs);
00908 }
00909
00910 for (ShProgramNode::VarList::const_iterator I = m_shader->inputs.begin();
00911 I != m_shader->inputs.end(); ++I) {
00912 ShVariableNodePtr node = *I;
00913 if (m_registers.find(node) != m_registers.end()) continue;
00914 m_registers[node] = new ArbReg(SH_ARB_REG_ATTRIB, m_numInputs++, node->name());
00915 m_reglist.push_back(m_registers[node]);
00916
00917
00918 for (int i = 0; arbBindingSpecs(false, m_unit)[i].binding != SH_ARB_REG_NONE; i++) {
00919 const ArbBindingSpecs& specs = arbBindingSpecs(false, m_unit)[i];
00920
00921 if (specs.allowGeneric && m_inputBindings[i] < specs.maxBindings) {
00922 m_registers[node]->binding.type = specs.binding;
00923 m_registers[node]->binding.index = m_inputBindings[i];
00924 m_inputBindings[i]++;
00925 break;
00926 }
00927 }
00928 }
00929 }
00930
00931 void ArbCode::allocOutputs(const ArbLimits& limits)
00932 {
00933
00934 for (int i = 0; arbBindingSpecs(true, m_unit)[i].binding != SH_ARB_REG_NONE; i++) {
00935 bindSpecial(m_shader->outputs.begin(), m_shader->outputs.end(),
00936 arbBindingSpecs(true, m_unit)[i], m_outputBindings,
00937 SH_ARB_REG_OUTPUT, m_numOutputs);
00938 }
00939
00940 for (ShProgramNode::VarList::const_iterator I = m_shader->outputs.begin();
00941 I != m_shader->outputs.end(); ++I) {
00942 ShVariableNodePtr node = *I;
00943 if (m_registers.find(node) != m_registers.end()) continue;
00944 m_registers[node] = new ArbReg(SH_ARB_REG_OUTPUT, m_numOutputs++, node->name());
00945 m_reglist.push_back(m_registers[node]);
00946
00947
00948 for (int i = 0; arbBindingSpecs(true, m_unit)[i].binding != SH_ARB_REG_NONE; i++) {
00949 const ArbBindingSpecs& specs = arbBindingSpecs(true, m_unit)[i];
00950
00951 if (specs.allowGeneric && m_outputBindings[i] < specs.maxBindings) {
00952 m_registers[node]->binding.type = specs.binding;
00953 m_registers[node]->binding.index = m_outputBindings[i];
00954 m_outputBindings[i]++;
00955 break;
00956 }
00957 }
00958 }
00959 }
00960
00961 void ArbCode::allocParam(const ArbLimits& limits, const ShVariableNodePtr& node)
00962 {
00963
00964 if (m_registers.find(node) != m_registers.end()) return;
00965
00966 m_registers[node] = new ArbReg(SH_ARB_REG_PARAM, m_numParams, node->name());
00967
00968 if (!node->meta("opengl:state").empty())
00969 {
00970 m_registers[node]->binding.type = SH_ARB_REG_STATE;
00971 m_registers[node]->binding.name = node->meta("opengl:state");
00972 }
00973 else
00974 {
00975 m_registers[node]->binding.type = SH_ARB_REG_PROGRAMLOC;
00976 m_registers[node]->binding.index = m_numParamBindings++;
00977 }
00978
00979 m_reglist.push_back(m_registers[node]);
00980 m_numParams++;
00981 }
00982
00983 void ArbCode::allocPalette(const ArbLimits& limits, const ShPaletteNodePtr& palette)
00984 {
00985 if (m_registers.find(palette) != m_registers.end()) return;
00986
00987 m_registers[palette] = new ArbReg(SH_ARB_REG_PARAM, m_numParams, palette->name());
00988 m_registers[palette]->binding.type = SH_ARB_REG_PROGRAMLOC;
00989 m_registers[palette]->binding.index = m_numParamBindings;
00990 m_registers[palette]->binding.count = palette->palette_length();
00991 m_reglist.push_back(m_registers[palette]);
00992 m_numParams++;
00993
00994 for (std::size_t i = 0; i < palette->palette_length(); i++) {
00995 ShVariableNodePtr node = palette->get_node(i);
00996 SH_DEBUG_ASSERT(m_registers.find(node) == m_registers.end());
00997 m_registers[node] = new ArbReg(SH_ARB_REG_PARAM, m_numParams + i, node->name());
00998 m_registers[node]->binding.type = SH_ARB_REG_PROGRAMLOC;
00999 m_registers[node]->binding.index = m_numParamBindings + i;
01000 m_reglist.push_back(m_registers[node]);
01001 }
01002
01003 m_numParams += palette->palette_length();
01004 m_numParamBindings += palette->palette_length();
01005 }
01006
01007 void ArbCode::allocConsts(const ArbLimits& limits)
01008 {
01009 for (ShProgramNode::VarList::const_iterator I = m_shader->constants.begin();
01010 I != m_shader->constants.end(); ++I) {
01011 ShVariableNodePtr node = *I;
01012
01013
01014
01015 ShPointer<ShDataVariant<float, SH_HOST> > variant =
01016 new ShDataVariant<float, SH_HOST>(node->size());
01017 variant->set(node->getVariant());
01018
01019
01020 RegMap::const_iterator J;
01021 for (J = m_registers.begin(); J != m_registers.end(); ++J) {
01022 if (J->second->type != SH_ARB_REG_CONST) continue;
01023 int f = 0;
01024
01025 for (int i = 0; i < node->size(); i++) {
01026 if (J->second->binding.values[i] == (*variant)[i]) f++;
01027 }
01028 if (f == node->size()) break;
01029 }
01030 if (J == m_registers.end()) {
01031 m_registers[node] = new ArbReg(SH_ARB_REG_CONST, m_numConsts, node->name());
01032 m_reglist.push_back(m_registers[node]);
01033 for (int i = 0; i < 4; i++) {
01034 m_registers[node]->binding.values[i] = (float)(i < node->size() ? (*variant)[i] : 0.0);
01035 }
01036 m_numConsts++;
01037 } else {
01038 m_registers[node] = J->second;
01039 }
01040 }
01041 }
01042
01043 bool mark(ShLinearAllocator& allocator, ShVariableNodePtr node, int i, bool half)
01044 {
01045 if (!node) return false;
01046 if (node->kind() != SH_TEMP) return false;
01047 if (half && (node->valueType() != SH_HALF)) return false;
01048 if (node->hasValues()) return false;
01049 allocator.mark(node, i);
01050 return true;
01051 }
01052
01053 bool markable(ShVariableNodePtr node, bool half)
01054 {
01055 if (!node) return false;
01056 if (node->kind() != SH_TEMP) return false;
01057 if (half && (node->valueType() != SH_HALF)) return false;
01058 if (node->hasValues()) return false;
01059 return true;
01060 }
01061
01062 struct ArbScope {
01063 ArbScope(int start)
01064 : start(start)
01065 {
01066 }
01067
01068 typedef std::map< ShVariableNode*, std::bitset<4> > UsageMap;
01069
01070 typedef std::set<ShVariableNode*> MarkList;
01071
01072 MarkList need_mark;
01073 int start;
01074 UsageMap usage_map;
01075 UsageMap write_map;
01076 };
01077
01078
01079
01080
01081 void ArbCode::allocTemps(const ArbLimits& limits, bool half)
01082 {
01083
01084 typedef std::list<ArbScope> ScopeStack;
01085 ScopeStack scopestack;
01086
01087 ShLinearAllocator allocator(this);
01088
01089
01090
01091
01092
01093
01094
01095
01096
01097
01098
01099
01100
01101
01102
01103
01104
01105
01106
01107
01108
01109
01110
01111
01112
01113
01114
01115
01116
01117
01118
01119
01120
01121
01122
01123
01124
01125
01126
01127
01128
01129
01130
01131
01132 {
01133 ScopeStack scopestack;
01134
01135
01136
01137
01138
01139 scopestack.push_back(m_instructions.size() - 1);
01140
01141 for (int i = (int)m_instructions.size() - 1; i >= 0; --i) {
01142 ArbInst instr = m_instructions[i];
01143 if (instr.op == SH_ARB_ENDREP) {
01144 scopestack.push_back((int)i);
01145 }
01146 if (instr.op == SH_ARB_REP) {
01147 const ArbScope& scope = scopestack.back();
01148 for (ArbScope::MarkList::const_iterator I = scope.need_mark.begin();
01149 I != scope.need_mark.end(); ++I) {
01150 mark(allocator, *I, (int)i, half);
01151 }
01152 scopestack.pop_back();
01153 }
01154
01155 if (markable(instr.dest.node(), half)) {
01156 std::bitset<4> writemask;
01157 for (int k = 0; k < instr.dest.size(); k++) {
01158 writemask[instr.dest.swizzle()[k]] = true;
01159 }
01160 std::bitset<4> used;
01161 for (ScopeStack::iterator S = scopestack.begin(); S != scopestack.end(); ++S) {
01162 ArbScope& scope = *S;
01163
01164 if ((used & writemask).any()) {
01165 mark(allocator, instr.dest.node().object(), scope.start, half);
01166 scope.need_mark.insert(instr.dest.node().object());
01167 }
01168
01169 used |= scope.usage_map[instr.dest.node().object()];
01170 }
01171
01172 ArbScope& scope = scopestack.back();
01173 scope.usage_map[instr.dest.node().object()] &= ~writemask;
01174 }
01175
01176 for (int j = 0; j < ArbInst::max_num_sources; j++) {
01177 if (!markable(instr.src[j].node(), half)) continue;
01178 std::bitset<4> usemask;
01179 for (int k = 0; k < instr.src[j].size(); k++) {
01180 usemask[instr.src[j].swizzle()[k]] = true;
01181 }
01182 ArbScope& scope = scopestack.back();
01183 scope.usage_map[instr.src[j].node().object()] |= usemask;
01184 }
01185 }
01186 }
01187
01188 for (std::size_t i = 0; i < m_instructions.size(); i++) {
01189 ArbInst instr = m_instructions[i];
01190 if (instr.op == SH_ARB_REP) {
01191 scopestack.push_back((int)i);
01192 }
01193 if (instr.op == SH_ARB_ENDREP) {
01194 const ArbScope& scope = scopestack.back();
01195 for (ArbScope::MarkList::const_iterator I = scope.need_mark.begin();
01196 I != scope.need_mark.end(); ++I) {
01197 mark(allocator, *I, (int)i, half);
01198 }
01199 scopestack.pop_back();
01200 }
01201
01202 if (mark(allocator, instr.dest.node(), (int)i, half)) {
01203 for (ScopeStack::iterator S = scopestack.begin(); S != scopestack.end(); ++S) {
01204 ArbScope& scope = *S;
01205 std::bitset<4> writemask;
01206 for (int k = 0; k < instr.dest.size(); k++) {
01207 writemask[instr.dest.swizzle()[k]] = true;
01208 }
01209
01210
01211
01212
01213 scope.write_map[instr.dest.node().object()] |= writemask;
01214
01215 }
01216 }
01217
01218 for (int j = 0; j < ArbInst::max_num_sources; j++) {
01219 if (mark(allocator, instr.src[j].node(), (int)i, half)) {
01220 for (ScopeStack::iterator S = scopestack.begin(); S != scopestack.end(); ++S) {
01221 ArbScope& scope = *S;
01222
01223 std::bitset<4> usemask;
01224 for (int k = 0; k < instr.src[j].size(); k++) {
01225 usemask[instr.src[j].swizzle()[k]] = true;
01226 }
01227 if ((usemask & ~scope.write_map[instr.src[j].node().object()]).any()) {
01228 mark(allocator, instr.src[j].node(), scope.start, half);
01229 scope.need_mark.insert(instr.src[j].node().object());
01230 }
01231 }
01232 }
01233 }
01234 }
01235
01236 m_tempRegs.clear();
01237 int limit;
01238 if(half) {
01239 m_numHalfTemps = 0;
01240 limit = limits.halftemps();
01241 } else {
01242 m_numTemps = 0;
01243 limit = limits.temps();
01244 }
01245
01246 for (int i = 0; i < limit; i++) {
01247 m_tempRegs.push_back(i);
01248 }
01249
01250 allocator.allocate();
01251
01252 m_tempRegs.clear();
01253 }
01254
01255 void ArbCode::allocTextures(const ArbLimits& limits)
01256 {
01257 for (ShProgramNode::TexList::const_iterator I = m_shader->textures.begin();
01258 I != m_shader->textures.end(); ++I) {
01259 ShTextureNodePtr node = *I;
01260 int index;
01261 index = m_numTextures;
01262 m_registers[node] = new ArbReg(SH_ARB_REG_TEXTURE, index, node->name());
01263 m_reglist.push_back(m_registers[node]);
01264 m_numTextures++;
01265 }
01266 }
01267
01268 void ArbCode::bindTextures()
01269 {
01270 for (ShProgramNode::TexList::const_iterator I = m_shader->textures.begin();
01271 I != m_shader->textures.end(); ++I) {
01272 m_texture->bindTexture(*I, GL_TEXTURE0 + m_registers[*I]->index);
01273 }
01274 }
01275
01276 }