Main Page | Modules | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

ArbEmit.cpp

00001 // Sh: A GPU metaprogramming language.
00002 //
00003 // Copyright (c) 2003 University of Waterloo Computer Graphics Laboratory
00004 // Project administrator: Michael D. McCool
00005 // Authors: Zheng Qin, Stefanus Du Toit, Kevin Moule, Tiberiu S. Popa,
00006 //          Michael D. McCool
00007 // 
00008 // This software is provided 'as-is', without any express or implied
00009 // warranty. In no event will the authors be held liable for any damages
00010 // arising from the use of this software.
00011 // 
00012 // Permission is granted to anyone to use this software for any purpose,
00013 // including commercial applications, and to alter it and redistribute it
00014 // freely, subject to the following restrictions:
00015 // 
00016 // 1. The origin of this software must not be misrepresented; you must
00017 // not claim that you wrote the original software. If you use this
00018 // software in a product, an acknowledgment in the product documentation
00019 // would be appreciated but is not required.
00020 // 
00021 // 2. Altered source versions must be plainly marked as such, and must
00022 // not be misrepresented as being the original software.
00023 // 
00024 // 3. This notice may not be removed or altered from any source
00025 // distribution.
00027 #include "ArbCode.hpp"
00028 #include <algorithm>
00029 #include <cmath>
00030 #include "ShDebug.hpp"
00031 #include "ShError.hpp"
00032 #include "ShAttrib.hpp"
00033 #include "ShTypeInfo.hpp"
00034 
00035 #ifdef WIN32
00036 namespace {
00037 double log2(double x) { return log(x)/log(2.0); }
00038 }
00039 #endif
00040 
00041 namespace shgl {
00042 
00043 using namespace SH;
00044 
00045 // Transformations
00046 namespace {
00047 const unsigned int scalarize    = 0x01; // Split into scalar instructions
00048 const unsigned int swap_sources = 0x02; // Swap first and second sources
00049 const unsigned int negate_first = 0x04; // Negate first source
00050 const unsigned int delay_mask   = 0x08; // Do writemasking in separate step
00051 };
00052 
00053 struct ArbMapping {
00054   ShOperation sh_op;
00055   unsigned int filters;
00056   
00057   unsigned int transforms;
00058   ArbOp arb_op;
00059 
00060   typedef void (ArbCode::*ArbFunction)(const ShStatement&);
00061   ArbFunction function;
00062 };
00063 
00064 ArbMapping ArbCode::table[] = {
00065   {SH_OP_ASN, SH_ARB_ANY, 0, SH_ARB_MOV, 0},
00066 
00067   // Arithmetic
00068   {SH_OP_ADD,  SH_ARB_ANY,   0,            SH_ARB_ADD, 0},
00069   {SH_OP_NEG,  SH_ARB_ANY,   negate_first, SH_ARB_MOV, 0},
00070   {SH_OP_MUL,  SH_ARB_ANY,   0,            SH_ARB_MUL, 0},
00071 
00072   // Removed this temporarily because of a bug in the NV drivers
00073   //{SH_OP_DIV,  SH_ARB_NVFP2, scalarize,    SH_ARB_DIV, 0},
00074   
00075   {SH_OP_DIV,  SH_ARB_ANY,   scalarize,    SH_ARB_FUN, &ArbCode::emit_div},
00076   {SH_OP_POW,  SH_ARB_ANY,   scalarize,    SH_ARB_POW, 0},
00077   {SH_OP_RCP,  SH_ARB_ANY,   scalarize,    SH_ARB_RCP, 0},
00078   {SH_OP_RSQ,  SH_ARB_ANY,   scalarize,    SH_ARB_RSQ, 0},
00079   {SH_OP_SQRT, SH_ARB_ANY,   scalarize,    SH_ARB_FUN, &ArbCode::emit_sqrt},
00080 
00081   {SH_OP_LRP, SH_ARB_FP,  0, SH_ARB_LRP, 0},
00082   {SH_OP_LRP, SH_ARB_VP,  0, SH_ARB_FUN, &ArbCode::emit_lerp},
00083   {SH_OP_MAD, SH_ARB_ANY, 0, SH_ARB_MAD, 0},
00084 
00085   // Sum/product of components
00086   {SH_OP_CMUL, SH_ARB_ANY, 0, SH_ARB_FUN, &ArbCode::emit_cmul},
00087   {SH_OP_CSUM, SH_ARB_ANY, 0, SH_ARB_FUN, &ArbCode::emit_csum},
00088   
00089   // Dot product
00090   {SH_OP_DOT, SH_ARB_VEC1,                0,         SH_ARB_MUL, 0},
00091   {SH_OP_DOT, SH_ARB_VEC3,                0,         SH_ARB_DP3, 0},
00092   {SH_OP_DOT, SH_ARB_VEC4,                0,         SH_ARB_DP4, 0},
00093   {SH_OP_DOT, SH_ARB_VEC2 | SH_ARB_NVFP2, 0,         SH_ARB_DP2, 0},
00094   {SH_OP_DOT, SH_ARB_VEC2,                0,         SH_ARB_FUN, &ArbCode::emit_dot2},
00095 
00096   // Boolean
00097   {SH_OP_SLT, SH_ARB_ANY,   0,            SH_ARB_SLT, 0},
00098   {SH_OP_SGE, SH_ARB_ANY,   0,            SH_ARB_SGE, 0},
00099 
00100   {SH_OP_SLE, SH_ARB_NVVP2, 0,            SH_ARB_SLE, 0},
00101   {SH_OP_SLE, SH_ARB_NVFP,  0,            SH_ARB_SLE, 0},
00102   {SH_OP_SLE, SH_ARB_ANY,   swap_sources, SH_ARB_SGE, 0},
00103 
00104   {SH_OP_SGT, SH_ARB_NVVP2, 0,            SH_ARB_SGT, 0},
00105   {SH_OP_SGT, SH_ARB_NVFP,  0,            SH_ARB_SGT, 0},
00106   {SH_OP_SGT, SH_ARB_ANY,   swap_sources, SH_ARB_SLT, 0},
00107 
00108   {SH_OP_SEQ, SH_ARB_NVVP2, 0,            SH_ARB_SEQ, 0},
00109   {SH_OP_SEQ, SH_ARB_NVFP,  0,            SH_ARB_SEQ, 0},
00110   {SH_OP_SEQ, SH_ARB_ANY,   0,            SH_ARB_FUN, &ArbCode::emit_eq},
00111 
00112   {SH_OP_SNE, SH_ARB_NVVP2, 0,            SH_ARB_SNE, 0},
00113   {SH_OP_SNE, SH_ARB_NVFP,  0,            SH_ARB_SNE, 0},
00114   {SH_OP_SNE, SH_ARB_ANY,   0,            SH_ARB_FUN, &ArbCode::emit_eq},
00115 
00116   // Clamping
00117   {SH_OP_ABS,  SH_ARB_ANY, 0, SH_ARB_ABS, 0},
00118   {SH_OP_CEIL, SH_ARB_ANY, 0, SH_ARB_FUN, &ArbCode::emit_ceil},
00119   {SH_OP_FLR,  SH_ARB_ANY, 0, SH_ARB_FLR, 0},
00120   {SH_OP_FRAC, SH_ARB_ANY, 0, SH_ARB_FRC, 0},
00121   {SH_OP_MOD,  SH_ARB_ANY, 0, SH_ARB_FUN, &ArbCode::emit_mod},
00122   {SH_OP_MAX,  SH_ARB_ANY, 0, SH_ARB_MAX, 0},
00123   {SH_OP_MIN,  SH_ARB_ANY, 0, SH_ARB_MIN, 0},
00124   {SH_OP_SGN,  SH_ARB_NVVP2, 0, SH_ARB_SSG, 0},
00125   {SH_OP_SGN,  SH_ARB_ANY, 0, SH_ARB_FUN, &ArbCode::emit_sgn},
00126   
00127   // Trig
00128   {SH_OP_ACOS,  SH_ARB_ANY, 0,         SH_ARB_FUN, &ArbCode::emit_invtrig},
00129   {SH_OP_ASIN,  SH_ARB_ANY, 0,         SH_ARB_FUN, &ArbCode::emit_invtrig},
00130   /* TODO
00131   {SH_OP_ATAN,  SH_ARB_ANY, 0,         SH_ARB_FUN, &ArbCode::emit_invtrig},
00132   {SH_OP_ATAN2, SH_ARB_ANY, 0,         SH_ARB_FUN, &ArbCode::emit_invtrig},
00133   */
00134   {SH_OP_COS,   SH_ARB_FP,  scalarize, SH_ARB_COS, 0},
00135   {SH_OP_COS,   SH_ARB_VP,  0,         SH_ARB_FUN, &ArbCode::emit_trig},
00136   {SH_OP_SIN,   SH_ARB_FP,  scalarize, SH_ARB_SIN, 0},
00137   {SH_OP_SIN,   SH_ARB_VP,  0,         SH_ARB_FUN, &ArbCode::emit_trig},
00138   {SH_OP_TAN,   SH_ARB_ANY, 0,         SH_ARB_FUN, &ArbCode::emit_tan},
00139 
00140   // Derivatives
00141   {SH_OP_DX, SH_ARB_NVFP, 0, SH_ARB_DDX, 0},
00142   {SH_OP_DY, SH_ARB_NVFP, 0, SH_ARB_DDY, 0},
00143 
00144   // Expontential
00145   {SH_OP_EXP2,  SH_ARB_ANY, scalarize, SH_ARB_EX2, 0},
00146   {SH_OP_LOG2,  SH_ARB_ANY, scalarize, SH_ARB_LG2, 0},
00147   {SH_OP_EXP,   SH_ARB_ANY, scalarize, SH_ARB_FUN, &ArbCode::emit_exp},
00148   {SH_OP_LOG,   SH_ARB_ANY, 0,         SH_ARB_FUN, &ArbCode::emit_log},
00149   {SH_OP_EXP10, SH_ARB_ANY, scalarize, SH_ARB_FUN, &ArbCode::emit_exp},
00150   {SH_OP_LOG10, SH_ARB_ANY, 0,         SH_ARB_FUN, &ArbCode::emit_log},
00151 
00152   // Geometric
00153   {SH_OP_NORM, SH_ARB_NVFP2 | SH_ARB_VEC3, 0, SH_ARB_NRM, 0},
00154   {SH_OP_NORM, SH_ARB_ANY,                 0, SH_ARB_FUN, &ArbCode::emit_norm},
00155   {SH_OP_XPD,  SH_ARB_ANY | SH_ARB_VEC3,   0, SH_ARB_XPD, 0},
00156 
00157   // Texture
00158   {SH_OP_TEX,  SH_ARB_NVVP3, 0, SH_ARB_FUN, &ArbCode::emit_tex},
00159   {SH_OP_TEX,  SH_ARB_FP,    0, SH_ARB_FUN, &ArbCode::emit_tex},
00160   {SH_OP_TEXI, SH_ARB_NVVP3, 0, SH_ARB_FUN, &ArbCode::emit_tex},
00161   {SH_OP_TEXI, SH_ARB_FP,    0, SH_ARB_FUN, &ArbCode::emit_tex},
00162 
00163   {SH_OP_TEXD,  SH_ARB_NVFP,  0, SH_ARB_FUN, &ArbCode::emit_tex},
00164   
00165   // Misc.
00166   {SH_OP_COND, SH_ARB_NVFP, 0, SH_ARB_FUN, &ArbCode::emit_nvcond},
00167   {SH_OP_COND, SH_ARB_NVVP2, 0, SH_ARB_FUN, &ArbCode::emit_nvcond},
00168   {SH_OP_COND, SH_ARB_ANY, negate_first, SH_ARB_CMP, 0},
00169   {SH_OP_KIL,  SH_ARB_FP,  0, SH_ARB_FUN, &ArbCode::emit_kil},
00170 
00171   {SH_OP_PAL,  SH_ARB_VP, 0, SH_ARB_FUN, &ArbCode::emit_pal},
00172 
00173   {SH_OP_ASN, SH_ARB_END, 0, SH_ARB_FUN, 0}
00174 };
00175 
00176 void ArbCode::emit(const ShStatement& stmt)
00177 {
00178   int maxlen = 0; // Maximum tuple length over all sources
00179   for (int i = 0; i < opInfo[stmt.op].arity; i++) {
00180     if (stmt.src[i].size() > maxlen) maxlen = stmt.src[i].size();
00181   }
00182 
00183   unsigned int match = m_environment;
00184   switch(maxlen) {
00185   case 1: match |= SH_ARB_VEC1; break;
00186   case 2: match |= SH_ARB_VEC2; break;
00187   case 3: match |= SH_ARB_VEC3; break;
00188   case 4: match |= SH_ARB_VEC4; break;
00189   }
00190   
00191   ArbMapping* mapping;
00192 
00193   for (mapping = table; mapping->filters != SH_ARB_END; mapping++) {
00194     if (mapping->sh_op != stmt.op) continue;
00195     if ((mapping->filters & match) != mapping->filters) continue;
00196     break;
00197   }
00198   if (mapping->filters == SH_ARB_END) {
00199     shError(ShException(std::string("ARB Code: Unknown operation ") + opInfo[stmt.op].name));
00200     return;
00201   }
00202 
00203   ShStatement actual = stmt;
00204   
00205   if (mapping->transforms & swap_sources) {
00206     ShVariable tmp(actual.src[0]);
00207     actual.src[0] = actual.src[1];
00208     actual.src[1] = tmp;
00209   }
00210   if (mapping->transforms & negate_first) {
00211     actual.src[0] = -actual.src[0];
00212   }
00213   
00214   std::list<ShStatement> stmts;
00215   if ((mapping->transforms & scalarize) && maxlen > 1) {
00216     for (int i = 0; i < maxlen; i++) {
00217       ShStatement scalar = actual;
00218       scalar.dest = scalar.dest(i);
00219       for (int j = 0; j < opInfo[actual.op].arity; j++) {
00220         scalar.src[j] = scalar.src[j](std::min(i, scalar.src[j].size() - 1));
00221       }
00222       stmts.push_back(scalar);
00223     }
00224   } else {
00225     stmts.push_back(actual);
00226   }
00227 
00228   if (mapping->transforms & delay_mask) {
00229     for (std::list<ShStatement>::iterator I = stmts.begin(); I != stmts.end(); ++I) {
00230       if (I->dest.swizzle().identity()) continue;
00231 
00232       ShVariable realdest(I->dest);
00233       ShVariable tmp(I->dest.node()->clone(SH_TEMP, SH_ATTRIB, 4));
00234       I->dest = tmp;
00235 
00236       ShStatement mask(realdest, SH_OP_ASN, tmp);
00237 
00238       std::list<ShStatement>::iterator next = I; ++next;
00239       I = stmts.insert(next, mask);
00240     }
00241   }
00242 
00243   for (std::list<ShStatement>::const_iterator I = stmts.begin(); I != stmts.end(); ++I) {
00244     if (mapping->arb_op == SH_ARB_FUN) {
00245       (this->*(mapping->function))(*I);
00246     } else {
00247       // HACK for delay_mask to work.
00248       ArbOp op = (I->op == SH_OP_ASN) ? SH_ARB_MOV : mapping->arb_op;
00249       switch (opInfo[I->op].arity) {
00250       case 0:
00251         m_instructions.push_back(ArbInst(op, I->dest));
00252         break;
00253       case 1:
00254         m_instructions.push_back(ArbInst(op, I->dest,
00255                                          I->src[0]));
00256         break;
00257       case 2:
00258         m_instructions.push_back(ArbInst(op, I->dest,
00259                                          I->src[0], I->src[1]));
00260         break;
00261       case 3:
00262         m_instructions.push_back(ArbInst(op, I->dest,
00263                                          I->src[0], I->src[1], I->src[2]));
00264         break;
00265       }
00266     }
00267   }
00268 }
00269 
00270 void ArbCode::emit_div(const ShStatement& stmt)
00271 {
00272   // @todo type should handle other types (half-floats, fixed point)
00273   ShVariable rcp(new ShVariableNode(SH_TEMP, 1, SH_FLOAT));
00274   m_instructions.push_back(ArbInst(SH_ARB_RCP, rcp, stmt.src[1]));
00275   m_instructions.push_back(ArbInst(SH_ARB_MUL, stmt.dest, stmt.src[0], rcp));
00276 }
00277 
00278 void ArbCode::emit_sqrt(const ShStatement& stmt)
00279 {
00280   ShVariable rsq(new ShVariableNode(SH_TEMP, 1, SH_FLOAT));
00281   m_instructions.push_back(ArbInst(SH_ARB_RSQ, rsq, stmt.src[0]));
00282   m_instructions.push_back(ArbInst(SH_ARB_RCP, stmt.dest, rsq));
00283 }
00284 
00285 void ArbCode::emit_lerp(const ShStatement& stmt)
00286 {
00287   // lerp(f,a,b)= f*a + (1-f)*b = f*(a-b) + b
00288   
00289   ShVariable t(new ShVariableNode(SH_TEMP, stmt.src[1].size(), SH_FLOAT));
00290   m_instructions.push_back(ArbInst(SH_ARB_ADD, t, stmt.src[1], -stmt.src[2]));
00291   m_instructions.push_back(ArbInst(SH_ARB_MAD, stmt.dest, stmt.src[0], t, stmt.src[2]));
00292 }
00293 
00294 void ArbCode::emit_dot2(const ShStatement& stmt)
00295 {
00296   ShVariable mul(new ShVariableNode(SH_TEMP, 2, SH_FLOAT));
00297   m_instructions.push_back(ArbInst(SH_ARB_MUL, mul, stmt.src[0], stmt.src[1]));
00298   m_instructions.push_back(ArbInst(SH_ARB_ADD, stmt.dest, mul(0), mul(1)));
00299 }
00300 
00301 void ArbCode::emit_eq(const ShStatement& stmt)
00302 {
00303   ShVariable t1(new ShVariableNode(SH_TEMP, stmt.dest.size(), SH_FLOAT));
00304   ShVariable t2(new ShVariableNode(SH_TEMP, stmt.dest.size(), SH_FLOAT));
00305 
00306   ArbOp op;
00307   if (stmt.op == SH_OP_SEQ) {
00308     op = SH_ARB_SGE;
00309   } else if (stmt.op == SH_OP_SNE) {
00310     op = SH_ARB_SLT;
00311   } else {
00312     SH_DEBUG_ASSERT(false);
00313   }
00314   
00315   m_instructions.push_back(ArbInst(SH_ARB_SGE, t1, stmt.src[0], stmt.src[1]));
00316   m_instructions.push_back(ArbInst(SH_ARB_SGE, t2, stmt.src[1], stmt.src[0]));
00317   m_instructions.push_back(ArbInst(SH_ARB_MUL, stmt.dest, t1, t2));
00318 }
00319 
00320 void ArbCode::emit_ceil(const ShStatement& stmt)
00321 {
00322   m_instructions.push_back(ArbInst(SH_ARB_FLR, stmt.dest, -stmt.src[0])); 
00323   m_instructions.push_back(ArbInst(SH_ARB_MOV, stmt.dest, -stmt.dest));
00324 }
00325 
00326 void ArbCode::emit_mod(const ShStatement& stmt)
00327 {
00328   // TODO - is this really optimal?
00329   ShVariable t1(new ShVariableNode(SH_TEMP, stmt.src[0].size(), SH_FLOAT));
00330   ShVariable t2(new ShVariableNode(SH_TEMP, stmt.src[0].size(), SH_FLOAT));
00331   
00332   // result = x - sign(x/y)*floor(abs(x/y))*y
00333   emit(ShStatement(t1, stmt.src[0], SH_OP_DIV, stmt.src[1]));
00334   m_instructions.push_back(ArbInst(SH_ARB_ABS, t2, t1));
00335   emit(ShStatement(t1, SH_OP_SGN, t1));
00336   m_instructions.push_back(ArbInst(SH_ARB_FLR, t2, t2)); 
00337   m_instructions.push_back(ArbInst(SH_ARB_MUL, t1, t1, t2)); 
00338   m_instructions.push_back(ArbInst(SH_ARB_MUL, t1, t1, stmt.src[1])); 
00339   m_instructions.push_back(ArbInst(SH_ARB_SUB, stmt.dest, stmt.src[0], t1)); 
00340 }
00341 
00342 void ArbCode::emit_trig(const ShStatement& stmt)
00343 {
00344   // Use float constants.  Conversions will take place as necessary
00345   ShConstAttrib4f c0(0.0, 0.5, 1.0, 0.0);
00346   ShConstAttrib4f c1(0.25, -9.0, 0.75, 1.0/(2.0*M_PI));
00347   ShConstAttrib4f c2(24.9808039603, -24.9808039603, -60.1458091736, 60.1458091736);
00348   ShConstAttrib4f c3(85.4537887573, -85.4537887573, -64.9393539429, 64.9393539429);
00349   ShConstAttrib4f c4(19.7392082214, -19.7392082214, -1.0, 1.0);
00350 
00351   m_shader->constants.push_back(c0.node());
00352   m_shader->constants.push_back(c1.node());
00353   m_shader->constants.push_back(c2.node());
00354   m_shader->constants.push_back(c3.node());
00355   m_shader->constants.push_back(c4.node());
00356   
00357   ShVariable r0(new ShVariableNode(SH_TEMP, 4, SH_FLOAT));
00358   ShVariable r1(new ShVariableNode(SH_TEMP, 4, SH_FLOAT));
00359   ShVariable r2(new ShVariableNode(SH_TEMP, 4, SH_FLOAT));
00360   ShVariable rs(new ShVariableNode(SH_TEMP, 4, SH_FLOAT));
00361   
00362   if (stmt.op == SH_OP_SIN) {
00363     m_instructions.push_back(ArbInst(SH_ARB_MAD, rs, c1(3,3,3,3), stmt.src[0], -c1(0,0,0,0)));
00364   } else if (stmt.op == SH_OP_COS) {
00365     m_instructions.push_back(ArbInst(SH_ARB_MUL, rs, c1(3,3,3,3), stmt.src[0]));
00366   } else {
00367     SH_DEBUG_ASSERT(false);
00368   }
00369   m_instructions.push_back(ArbInst(SH_ARB_FRC, rs, rs));
00370   for (int i = 0; i < stmt.src[0].size(); i++) {
00371     m_instructions.push_back(ArbInst(SH_ARB_SLT, r2(0), rs(i), c1(0)));
00372     m_instructions.push_back(ArbInst(SH_ARB_SGE, r2(1,2), rs(i,i), c1(1,2)));
00373     m_instructions.push_back(ArbInst(SH_ARB_DP3, r2(1), r2(0,1,2), c4(2,3,2)));
00374     m_instructions.push_back(ArbInst(SH_ARB_ADD, r0(0,1,2), -rs(i,i,i), c0(0,1,2)));
00375     m_instructions.push_back(ArbInst(SH_ARB_MUL, r0, r0, r0));
00376     m_instructions.push_back(ArbInst(SH_ARB_MAD, r1, c2(0,1,0,1), r0, c2(2,3,2,3)));
00377     m_instructions.push_back(ArbInst(SH_ARB_MAD, r1, r1, r0, c3(0,1,0,1)));
00378     m_instructions.push_back(ArbInst(SH_ARB_MAD, r1, r1, r0, c3(2,3,2,3)));
00379     m_instructions.push_back(ArbInst(SH_ARB_MAD, r1, r1, r0, c4(0,1,0,1)));
00380     m_instructions.push_back(ArbInst(SH_ARB_MAD, r1, r1, r0, c4(2,3,2,3)));
00381     m_instructions.push_back(ArbInst(SH_ARB_DP3, r0(0), r1(0,1,2), -r2(0,1,2)));
00382     m_instructions.push_back(ArbInst(SH_ARB_MOV, stmt.dest(i), r0(0)));
00383   }
00384 }
00385 
00386 void ArbCode::emit_invtrig(const ShStatement& stmt)
00387 {
00388   // Use float constants.  Conversions will take place to other types as necessary 
00389   ShConstAttrib4f c0(0.0, 1.570796327, -0.5860008052, 0.5860008052);
00390   ShConstAttrib4f c1(1.571945105, -1.571945105, -1.669668977, 1.669668977);
00391   ShConstAttrib4f c2(0.8999841642, -0.8999841642, -0.6575341673, 0.6575341673);
00392   ShConstAttrib4f c3(1.012386649, -1.012386649, 0.9998421793, -0.9998421793);
00393   ShConstAttrib4f c4(1.0, -1.0, 1.0, -1.0);
00394 
00395   m_shader->constants.push_back(c0.node());
00396   m_shader->constants.push_back(c1.node());
00397   m_shader->constants.push_back(c2.node());
00398   m_shader->constants.push_back(c3.node());
00399   m_shader->constants.push_back(c4.node());
00400   
00401   ShVariable r0(new ShVariableNode(SH_TEMP, 4, SH_FLOAT));
00402   ShVariable r1(new ShVariableNode(SH_TEMP, 4, SH_FLOAT));
00403   ShVariable r2(new ShVariableNode(SH_TEMP, 4, SH_FLOAT));
00404   ShVariable offset(new ShVariableNode(SH_TEMP, 4, SH_FLOAT));
00405   ShVariable output(new ShVariableNode(SH_TEMP, stmt.dest.size(), SH_FLOAT));
00406   m_instructions.push_back(ArbInst(SH_ARB_ABS, r0, stmt.src[0]));
00407   m_instructions.push_back(ArbInst(SH_ARB_MAD, offset, -r0, r0, c4(0,0,0,0)));
00408   
00409   m_instructions.push_back(ArbInst(SH_ARB_MOV, r2, c0(0,1,0,1)));
00410   for (int i = 0; i < stmt.src[0].size(); i++) {
00411     m_instructions.push_back(ArbInst(SH_ARB_SLT, r2(1), stmt.src[0](i), c0(0)));
00412     m_instructions.push_back(ArbInst(SH_ARB_SGE, r2(0), stmt.src[0](i), c0(0)));
00413     m_instructions.push_back(ArbInst(SH_ARB_MAD, r1(0,1), c0(2,3), r0(i,i), c1(0,1)));
00414     m_instructions.push_back(ArbInst(SH_ARB_MAD, r1(0,1), r1(0,1), r0(i,i), c1(2,3)));
00415     m_instructions.push_back(ArbInst(SH_ARB_MAD, r1(0,1), r1(0,1), r0(i,i), c2(0,1)));
00416     m_instructions.push_back(ArbInst(SH_ARB_MAD, r1(0,1), r1(0,1), r0(i,i), c2(2,3)));
00417     m_instructions.push_back(ArbInst(SH_ARB_MAD, r1(0,1), r1(0,1), r0(i,i), c3(0,1)));
00418     m_instructions.push_back(ArbInst(SH_ARB_MAD, r1(0,1), r1(0,1), r0(i,i), c3(2,3)));
00419     m_instructions.push_back(ArbInst(SH_ARB_RSQ,offset(i), offset(i)));
00420     m_instructions.push_back(ArbInst(SH_ARB_RCP,offset(i), offset(i)));
00421     m_instructions.push_back(ArbInst(SH_ARB_MAD, r1(0,1), c4(1,0), offset(i,i), r1(0,1)));
00422     if (stmt.op == SH_OP_ACOS) {
00423       m_instructions.push_back(ArbInst(SH_ARB_DP3, output(i), r1(0,1,2), r2(0,1,2)));
00424     } else {
00425       m_instructions.push_back(ArbInst(SH_ARB_DP3, stmt.dest(i), r1(0,1,2), r2(0,1,2)));
00426     }
00427   }
00428   if (stmt.op == SH_OP_ACOS) {
00429     m_instructions.push_back(ArbInst(SH_ARB_ADD, stmt.dest, -output, c0(1,1,1,1)));
00430   }
00431 }
00432 
00433 void ArbCode::emit_tan(const ShStatement& stmt)
00434 {
00435   ShVariable tmp1(new ShVariableNode(SH_TEMP, stmt.src[0].size(), SH_FLOAT));
00436   ShVariable tmp2(new ShVariableNode(SH_TEMP, stmt.src[0].size(), SH_FLOAT));
00437 
00438   emit(ShStatement(tmp1, SH_OP_COS, stmt.src[0]));
00439   emit(ShStatement(tmp1, SH_OP_RCP, tmp1));
00440   emit(ShStatement(tmp2, SH_OP_SIN, stmt.src[0]));
00441   
00442   m_instructions.push_back(ArbInst(SH_ARB_MUL, stmt.dest, tmp1, tmp2));
00443 }
00444 
00445 void ArbCode::emit_exp(const ShStatement& stmt)
00446 {
00447   float basef = (stmt.op == SH_OP_EXP ? M_E : 10.0f);
00448 
00449   ShConstAttrib1f base(basef);
00450   m_shader->constants.push_back(base.node());
00451 
00452   m_instructions.push_back(ArbInst(SH_ARB_POW, stmt.dest, base, stmt.src[0]));
00453 }
00454 
00455 void ArbCode::emit_log(const ShStatement& stmt)
00456 {
00457   float scalef = 1.0/log2((stmt.op == SH_OP_LOG ? M_E : 10.0f));
00458 
00459   ShConstAttrib1f scale(scalef);
00460   m_shader->constants.push_back(scale.node());
00461 
00462   ShVariable tmp(new ShVariableNode(SH_TEMP, stmt.dest.size(), SH_FLOAT));
00463   
00464   emit(ShStatement(tmp, SH_OP_LOG2, stmt.src[0]));
00465   m_instructions.push_back(ArbInst(SH_ARB_MUL, stmt.dest, tmp, scale));
00466 }
00467 
00468 void ArbCode::emit_norm(const ShStatement& stmt)
00469 {
00470   ShVariable tmp(new ShVariableNode(SH_TEMP, 1, SH_FLOAT));
00471   emit(ShStatement(tmp, stmt.src[0], SH_OP_DOT, stmt.src[0]));
00472   m_instructions.push_back(ArbInst(SH_ARB_RSQ, tmp, tmp));
00473   m_instructions.push_back(ArbInst(SH_ARB_MUL, stmt.dest, tmp, stmt.src[0]));
00474 }
00475 
00476 void ArbCode::emit_sgn(const ShStatement& stmt)
00477 {
00478   ShVariable tmp(new ShVariableNode(SH_TEMP, stmt.src[0].size(), SH_FLOAT));
00479   m_instructions.push_back(ArbInst(SH_ARB_ABS, tmp, stmt.src[0]));
00480   emit(ShStatement(stmt.dest, stmt.src[0], SH_OP_DIV, tmp));
00481 }
00482 
00483 void ArbCode::emit_tex(const ShStatement& stmt)
00484 {
00485   bool delay = false;
00486   ShVariable tmpdest;
00487   ShVariable tmpsrc;
00488   
00489   if (!stmt.dest.swizzle().identity()) {
00490     tmpdest = ShVariable(new ShVariableNode(SH_TEMP, 4, SH_FLOAT));
00491     tmpsrc = tmpdest;
00492     delay = true;
00493   }
00494 
00495   ShTextureNodePtr tnode = shref_dynamic_cast<ShTextureNode>(stmt.src[0].node());
00496 
00497   SH_DEBUG_ASSERT(tnode);
00498 
00499   if (tnode->size() == 2) {
00500     // Special case for LUMINANCE_ALPHA
00501     if (!delay) {
00502       tmpdest = ShVariable(new ShVariableNode(SH_TEMP, 4, SH_FLOAT));
00503       tmpsrc = tmpdest;
00504     }
00505     tmpsrc = tmpsrc(0,3);
00506     delay = true;
00507   }
00508 
00509   if (stmt.op == SH_OP_TEXD) {
00510     SH_DEBUG_ASSERT(tnode->dims() == SH_TEXTURE_2D);
00511     m_instructions.push_back(ArbInst(SH_ARB_TXD,
00512                                      (delay ? tmpdest : stmt.dest), stmt.src[1], stmt.src[0],
00513                                      stmt.src[2](0,1), stmt.src[2](2,3)));
00514   } else {
00515     m_instructions.push_back(ArbInst(SH_ARB_TEX,
00516                                      (delay ? tmpdest : stmt.dest), stmt.src[1], stmt.src[0]));
00517   }
00518   if (delay) emit(ShStatement(stmt.dest, SH_OP_ASN, tmpsrc));
00519 }
00520 
00521 void ArbCode::emit_nvcond(const ShStatement& stmt)
00522 {
00523 
00524   ShVariable dummy(new ShVariableNode(SH_TEMP, stmt.src[0].size(), SH_FLOAT));
00525   ArbInst updatecc(SH_ARB_MOV, dummy, stmt.src[0]);
00526   updatecc.update_cc = true;
00527   m_instructions.push_back(updatecc);
00528 
00529   /*
00530   ShSwizzle ccswiz = stmt.src[0].swizzle();
00531   if (ccswiz.size() == 1) {
00532     int indices[4];
00533     for (int i = 0; i < stmt.dest.size(); i++) {
00534       indices[i] = 0;
00535     }
00536     ccswiz *= ShSwizzle(1, stmt.dest.size(), indices);
00537   }
00538   */  
00539   if (stmt.dest != stmt.src[1]) {
00540     ArbInst movt(SH_ARB_MOV, stmt.dest, stmt.src[1]);
00541     movt.ccode = ArbInst::GT;
00542     movt.ccswiz = stmt.src[0].swizzle();
00543     m_instructions.push_back(movt);
00544   }
00545   if (stmt.dest != stmt.src[2]) {
00546     ArbInst movf(SH_ARB_MOV, stmt.dest, stmt.src[2]);
00547     movf.ccode = ArbInst::LE;
00548     movf.ccswiz = stmt.src[0].swizzle();
00549     m_instructions.push_back(movf);
00550   }
00551 }
00552 
00553 void ArbCode::emit_csum(const ShStatement& stmt)
00554 {
00555   // @todo type make this function handle more than floats
00556   ShDataVariant<float, SH_HOST> c1_values(stmt.src[0].size(), 1.0f); 
00557   ShVariable c1(new ShVariableNode(SH_CONST, stmt.src[0].size(), SH_FLOAT));
00558   c1.setVariant(&c1_values);
00559   m_shader->constants.push_back(c1.node());
00560   
00561   emit(ShStatement(stmt.dest, stmt.src[0], SH_OP_DOT, c1));
00562 }
00563 
00564 void ArbCode::emit_cmul(const ShStatement& stmt)
00565 {
00566   // @todo use clone
00567   ShVariable prod(new ShVariableNode(SH_TEMP, 1, stmt.dest.valueType()));
00568 
00569   // TODO: Could use vector mul here.
00570   
00571   m_instructions.push_back(ArbInst(SH_ARB_MOV, prod, stmt.src[0](0)));
00572   for (int i = 1; i < stmt.src[0].size(); i++) {
00573     m_instructions.push_back(ArbInst(SH_ARB_MUL, prod, stmt.src[0](i)));
00574   }
00575   m_instructions.push_back(ArbInst(SH_ARB_MOV, stmt.dest, prod));
00576 }
00577 
00578 void ArbCode::emit_kil(const ShStatement& stmt)
00579 {
00580   m_instructions.push_back(ArbInst(SH_ARB_KIL, -stmt.src[0]));
00581 }
00582 
00583 void ArbCode::emit_pal(const ShStatement& stmt)
00584 {
00585   m_instructions.push_back(ArbInst(SH_ARB_ARL, m_address_register, stmt.src[1]));
00586   m_instructions.push_back(ArbInst(SH_ARB_ARRAYMOV, stmt.dest, stmt.src[0], m_address_register));
00587 }
00588 
00589 }

Generated on Mon Jan 24 18:36:29 2005 for Sh by  doxygen 1.4.1