Main Page | Modules | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

PBufferStreams.cpp

00001 // Sh: A GPU metaprogramming language.
00002 //
00003 // Copyright (c) 2003 University of Waterloo Computer Graphics Laboratory
00004 // Project administrator: Michael D. McCool
00005 // Authors: Zheng Qin, Stefanus Du Toit, Kevin Moule, Tiberiu S. Popa,
00006 //          Michael D. McCool
00007 // 
00008 // This software is provided 'as-is', without any express or implied
00009 // warranty. In no event will the authors be held liable for any damages
00010 // arising from the use of this software.
00011 // 
00012 // Permission is granted to anyone to use this software for any purpose,
00013 // including commercial applications, and to alter it and redistribute it
00014 // freely, subject to the following restrictions:
00015 // 
00016 // 1. The origin of this software must not be misrepresented; you must
00017 // not claim that you wrote the original software. If you use this
00018 // software in a product, an acknowledgment in the product documentation
00019 // would be appreciated but is not required.
00020 // 
00021 // 2. Altered source versions must be plainly marked as such, and must
00022 // not be misrepresented as being the original software.
00023 // 
00024 // 3. This notice may not be removed or altered from any source
00025 // distribution.
00027 #include "PBufferStreams.hpp"
00028 
00030 //#define DO_PBUFFER_TIMING
00031 
00032 // Turn this on to debug the fragment programs.
00033 //#define SH_DEBUG_PBS_PRINTFP
00034 
00035 #include <map>
00036 #include <fstream>
00037 #include <cstdlib>
00038 
00039 #include "sh.hpp"
00040 #include "ShOptimizations.hpp"
00041 #include "ShException.hpp"
00042 #include "ShError.hpp"
00043 #include "ShTypeInfo.hpp"
00044 #include "ShVariant.hpp"
00045 
00046 #ifdef DO_PBUFFER_TIMING
00047 #include <sys/time.h>
00048 #include <time.h>
00049 #endif
00050 
00051 namespace shgl {
00052 
00053 using namespace SH;
00054 
00055 
00056 #ifdef DO_PBUFFER_TIMING
00057 
00058 class Timer {
00059 public:
00060   Timer() { start(); }
00061 
00062   void start() { gettimeofday(&startval, 0); }
00063 
00064   long diff() {
00065     timeval endval;
00066     gettimeofday(&endval, 0);
00067     return (endval.tv_sec - startval.tv_sec)*1000
00068       + (endval.tv_usec/1000 - startval.tv_usec/1000);
00069   }
00070 
00071 private:
00072   timeval startval;
00073 };
00074 
00075 #endif
00076 
00077 class PBufferStreamException : public ShException {
00078 public:
00079   PBufferStreamException(const std::string& message)
00080     : ShException("PBuffer Stream Execution: " + message)
00081   {
00082   }
00083 };
00084 
00085 typedef std::map<ShChannelNodePtr, ShTextureNodePtr> StreamInputMap;
00086 
00087 class StreamInputGatherer {
00088 public:
00089   StreamInputGatherer(StreamInputMap& input_map)
00090     : input_map(input_map)
00091   {
00092   }
00093   
00094   void operator()(const ShCtrlGraphNode* node)
00095   {
00096     if (!node->block) return;
00097     
00098     for (ShBasicBlock::ShStmtList::const_iterator I = node->block->begin();
00099          I != node->block->end(); ++I) {
00100       const ShStatement& stmt = *I;
00101       if (stmt.op != SH_OP_FETCH) continue;
00102 
00103       // TODO: ought to complain here
00104       if (stmt.src[0].node()->kind() != SH_STREAM) continue;
00105 
00106       ShChannelNodePtr stream_node = shref_dynamic_cast<ShChannelNode>(stmt.src[0].node());
00107       input_map.insert(std::make_pair(stream_node, ShTextureNodePtr(0)));
00108     }
00109   }
00110 
00111 private:
00112   StreamInputMap& input_map;
00113 };
00114 
00115 class TexFetcher {
00116 public:
00117   TexFetcher(StreamInputMap& input_map,
00118              ShVariableNodePtr tc_node,
00119              bool indexed,
00120              ShVariableNodePtr width_var,
00121              ShProgramNodePtr program)
00122     : input_map(input_map),
00123       tc_node(tc_node),
00124       indexed(indexed),
00125       width_var(width_var),
00126       program(program)
00127   {
00128   }
00129 
00130   void operator()(ShCtrlGraphNode* node)
00131   {
00132     if (!node->block) return;
00133     for (ShBasicBlock::ShStmtList::iterator I = node->block->begin();
00134          I != node->block->end(); ++I) {
00135       ShStatement& stmt = *I;
00136       if (stmt.op != SH_OP_FETCH && stmt.op != SH_OP_LOOKUP) continue;
00137       
00138       if (!stmt.src[0].node()) {
00139         SH_DEBUG_WARN("FETCH/LOOKUP from null stream");
00140         continue;
00141       }
00142       if (stmt.src[0].node()->kind() != SH_STREAM) {
00143         SH_DEBUG_WARN("FETCH/LOOKUP from non-stream");
00144         continue;
00145       }
00146       
00147       ShChannelNodePtr stream_node = shref_dynamic_cast<ShChannelNode>(stmt.src[0].node());
00148       StreamInputMap::const_iterator J = input_map.find(stream_node);
00149       if (J == input_map.end()) {
00150         SH_DEBUG_WARN("Stream node not found in input map");
00151         continue;
00152       }
00153 
00154       if (!J->second) {
00155         SH_DEBUG_WARN("No texture allocated for stream node");
00156         continue;
00157       }
00158 
00159       ShVariable texVar(J->second);
00160 
00161       if (stmt.op == SH_OP_FETCH) {
00162         ShVariable coordsVar(tc_node);
00163         if (indexed) {
00164           stmt = ShStatement(stmt.dest, texVar, SH_OP_TEXI, coordsVar);
00165         } else {
00166           stmt = ShStatement(stmt.dest, texVar, SH_OP_TEX, coordsVar);
00167         }
00168       } else {
00169         // Make sure our actualy index is a temporary in the program.
00170         ShContext::current()->enter(program);
00171         ShVariable coordsVar(new ShVariableNode(SH_TEMP, 2, SH_FLOAT));
00172         ShContext::current()->exit();
00173         
00174         ShBasicBlock::ShStmtList new_stmts;
00175         new_stmts.push_back(ShStatement(coordsVar(0), stmt.src[1], SH_OP_MOD, width_var));
00176         new_stmts.push_back(ShStatement(coordsVar(1), stmt.src[1], SH_OP_DIV, width_var));
00177         new_stmts.push_back(ShStatement(stmt.dest, texVar, SH_OP_TEXI, coordsVar));
00178         I = node->block->erase(I);
00179         node->block->splice(I, new_stmts);
00180         I--;
00181       }
00182       // The following is useful for debugging
00183       // stmt = ShStatement(stmt.dest, SH_OP_ASN, coordsVar);
00184     }
00185   }
00186   
00187 private:
00188   StreamInputMap& input_map;
00189   ShVariableNodePtr tc_node;
00190   bool indexed;
00191   ShVariableNodePtr width_var;
00192   ShProgramNodePtr program;
00193 };
00194 
00195 PBufferStreams::PBufferStreams(void) :
00196   m_setup_vp(false)
00197 {
00198 }
00199 
00200 PBufferStreams::~PBufferStreams()
00201 {
00202 }
00203 
00204 #ifdef DO_PBUFFER_TIMING
00205 int indent = 0;
00206 Timer supertimer;
00207 
00208 void fillin()
00209 {
00210   long sd = supertimer.diff();
00211   supertimer.start();
00212   if (indent) for (int j = 0; j < sd; j++) {
00213     for (int i = 0; i < indent; i++) std::cerr << "| ";
00214     std::cerr << std::endl;
00215   }
00216 }
00217 
00218 #define DECLARE_TIMER(t) Timer pbtime_ ## t; do { fillin(); for (int i = 0; i < indent; i++) std::cerr << "| "; std::cerr << "^ " << # t << " starts" << std::endl; indent++;} while (0)
00219 #define TIMING_RESULT(t) do {long d = pbtime_ ## t.diff(); fillin(); indent--; for (int i = 0; i < indent; i++) std::cerr << "| "; std::cerr << "v " << # t << " took " << d << " ms" << std::endl; supertimer.start(); } while (0)
00220 #else
00221 #define DECLARE_TIMER(t)
00222 #define TIMING_RESULT(t) 
00223 #endif
00224 
00225 void PBufferStreams::execute(const ShProgramNodeCPtr& program,
00226                              ShStream& dest)
00227 {
00228   DECLARE_TIMER(overhead);
00229 
00230   // Check program target
00231   if (program->target() != "gpu:stream") {
00232     shError(PBufferStreamException("This backend can only execute ``gpu:stream'' programs."));
00233     return;
00234   }
00235 
00236   // Make sure program has no inputs
00237   if (!program->inputs.empty()) {
00238     shError(PBufferStreamException("Stream program has unbound inputs, and can hence not be executed."));
00239     return;
00240   }
00241 
00242   if (dest.size() == 0) {
00243     SH_DEBUG_WARN("Stream program has no outputs?");
00244     return;
00245   }
00246 
00247   if ((int)program->outputs.size() != dest.size()) {
00248     SH_DEBUG_ERROR("Number of stream program outputs ("
00249                    << program->outputs.size()
00250                    << ") does not match number of destinations ("
00251                    << dest.size()
00252                    << ").");
00253     return;
00254   }
00255   TIMING_RESULT(overhead);
00256   
00257   if (dest.size() > 1) {
00258     DECLARE_TIMER(overall);
00259     // TODO: If any stream channels are used as both inputs and
00260     // outputs we will want to cache their new values elsewhere and
00261     // then overwrite them at the end.
00262     int i = 0;
00263     for (ShStream::NodeList::iterator I = dest.begin(); I != dest.end(); ++I, ++i) {
00264       ShStream s(*I);
00265       DECLARE_TIMER(specialize);
00266       ShProgram p = shSwizzle(i) << shref_const_cast<ShProgramNode>(program);
00267       TIMING_RESULT(specialize);
00268       execute(p.node(), s);
00269     }
00270     TIMING_RESULT(overall);
00271     return;
00272   }
00273 
00274   DECLARE_TIMER(onerun);
00275   // --- Set up the GLX context
00276   
00277   ShChannelNodePtr output = *dest.begin();
00278   int count = output->count();
00279   ShValueType valueType = output->valueType();
00280 
00281   // Pick a size for the texture that just fits the output data.
00282   int tex_size = 1;
00283   
00284   while (tex_size * tex_size < count) {
00285     tex_size <<= 1;
00286   }
00287 
00288   FloatExtension extension = setupContext(tex_size, tex_size);
00289 
00290   if (extension == SH_ARB_NO_FLOAT_EXT) return;
00291 
00292   DECLARE_TIMER(gather);
00293   
00294   StreamInputMap input_map;
00295 
00296   // Do a DFS through the program's control graph.
00297   StreamInputGatherer gatherer(input_map);
00298   program->ctrlGraph->dfs(gatherer);
00299 
00300   TIMING_RESULT(gather);
00301   
00302   if (input_map.empty()) {
00303     shError(PBufferStreamException("Stream program does not use any streams!"));
00304     return;
00305   }
00306 
00307   DECLARE_TIMER(texsetup);
00308   // First, allocate textures for each input stream.
00309   // Need to ensure that input stream sizes are the same.
00310   for (StreamInputMap::iterator I = input_map.begin(); I != input_map.end(); ++I) {
00311     if (I->first->count() != count) {
00312       SH_DEBUG_ERROR("Input lengths of stream program do not match ("
00313                      << I->first->count() << " != " << count << ")");
00314       return;
00315     }
00316     ShTextureNodePtr tex;
00317     ShTextureTraits traits = ShArrayTraits();
00318     traits.clamping(ShTextureTraits::SH_UNCLAMPED);
00319 
00320     // TODO!
00321     // We're copying a far larger amount of memory in here than we
00322     // should. Don't do this.
00323     switch (extension) {
00324     case SH_ARB_NV_FLOAT_BUFFER:
00325       tex = new ShTextureNode(SH_TEXTURE_RECT, I->first->size(),
00326                               I->first->valueType(), traits, tex_size, tex_size, 1, count);
00327       break;
00328     case SH_ARB_ATI_PIXEL_FORMAT_FLOAT:
00329       tex = new ShTextureNode(SH_TEXTURE_2D, I->first->size(),
00330                               I->first->valueType(), traits, tex_size, tex_size, 1, count);
00331       break;
00332     default:
00333       tex = 0;
00334       break;
00335     }
00336 
00337     tex->memory(I->first->memory());
00338     I->second = tex;
00339   }
00340   TIMING_RESULT(texsetup);
00341   
00342   DECLARE_TIMER(fpsetup);
00343   // Add in the texcoord variable
00344   ShProgram fp = ShProgram(shref_const_cast<ShProgramNode>(program))
00345     & lose<ShTexCoord2f>("streamcoord");
00346 
00347   // Make it a fragment program
00348   fp.node()->target() = "gpu:fragment";
00349   
00350   ShVariableNodePtr tc_node = fp.node()->inputs.back(); // there should be only one input anyways
00351 
00352   // Make a guaranteed uniform variable, by "pushing" the global scope
00353   ShContext::current()->enter(0);
00354   ShAttrib1f width = tex_size;
00355   ShContext::current()->exit();
00356   
00357   // replace FETCH with TEX
00358   TexFetcher texFetcher(input_map, tc_node, extension == SH_ARB_NV_FLOAT_BUFFER,
00359                         width.node(), fp.node());
00360   fp.node()->ctrlGraph->dfs(texFetcher);
00361   fp.node()->collectVariables(); // necessary to collect all the new textures
00362 
00363   // optimize
00364   optimize(fp);
00365 
00366   int gl_error;
00367   glEnable(GL_VERTEX_PROGRAM_ARB);
00368   gl_error = glGetError();
00369   if (gl_error != GL_NO_ERROR) {
00370     shError(PBufferStreamException("Could not enable GL_VERTEX_PROGRAM_ARB"));
00371     return;
00372   }
00373   glEnable(GL_FRAGMENT_PROGRAM_ARB);
00374   gl_error = glGetError();
00375   if (gl_error != GL_NO_ERROR) {
00376     shError(PBufferStreamException("Could not enable GL_FRAGMENT_PROGRAM_ARB"));
00377     return;
00378   }
00379 #ifdef SH_DEBUG_PBS_PRINTFP
00380   {
00381   std::ofstream fpgv("pb.dot");
00382   fp.node()->ctrlGraph->graphvizDump(fpgv);
00383   }
00384   system("dot -Tps -o pb.ps pb.dot");
00385 #endif
00386 
00387   // generate code
00388   shCompile(fp);
00389 
00390 #ifdef SH_DEBUG_PBS_PRINTFP
00391  {
00392   std::ofstream fpdbg("pbufferstream.fp");
00393   fp.code()->print(fpdbg);
00394  }
00395 #endif
00396 
00397   TIMING_RESULT(fpsetup);
00398 
00399   DECLARE_TIMER(vpsetup);
00400 
00401   if (!m_setup_vp)
00402     {
00403     // The (trivial) vertex program
00404     m_vp = keep<ShPosition4f>() & keep<ShTexCoord2f>();
00405     m_vp.node()->target() = "gpu:vertex";
00406     shCompile(m_vp);
00407     m_setup_vp = true;
00408     }
00409 
00410   TIMING_RESULT(vpsetup);
00411 
00412   DECLARE_TIMER(binding);
00413   // Then, bind vertex (pass-through) and fragment program
00414   shBind(m_vp);
00415   shBind(fp);
00416   TIMING_RESULT(binding);
00417 
00418   DECLARE_TIMER(clear);
00419   glClear(GL_COLOR_BUFFER_BIT);
00420   TIMING_RESULT(clear);
00421 
00422   DECLARE_TIMER(rendersetup);
00423   glViewport(0, 0, tex_size, tex_size);
00424 
00425   glMatrixMode(GL_PROJECTION);
00426   glLoadIdentity();
00427 
00428   glMatrixMode(GL_MODELVIEW);
00429   glLoadIdentity();
00430 
00431   float tc_right;
00432   float tc_upper;
00433 
00434   if (extension == SH_ARB_NV_FLOAT_BUFFER) {
00435     tc_right = static_cast<float>(tex_size);
00436     tc_upper = static_cast<float>(tex_size);
00437   } else {
00438     tc_right = 1.0;
00439     tc_upper = 1.0;
00440   }
00441   TIMING_RESULT(rendersetup);
00442 
00443   DECLARE_TIMER(render);
00444 
00445   // Generate quad geometry
00446   glBegin(GL_QUADS); {
00447     glTexCoord2f(0.0, 0.0);
00448     glVertex3f(-1.0, -1.0, 0.0);
00449     glTexCoord2f(0.0, tc_upper);
00450     glVertex3f(-1.0,  1.0, 0.0);
00451     glTexCoord2f(tc_right, tc_upper);
00452     glVertex3f( 1.0,  1.0, 0.0);
00453     glTexCoord2f(tc_right, 0.0);
00454     glVertex3f( 1.0, -1.0, 0.0);
00455   } glEnd();
00456 
00457   TIMING_RESULT(render);
00458 
00459   DECLARE_TIMER(finish);
00460   glFinish();
00461 
00462   TIMING_RESULT(finish);
00463 
00464   
00465   gl_error = glGetError();
00466   if (gl_error != GL_NO_ERROR) {
00467     shError(PBufferStreamException("Could not render"));
00468     return;
00469   }
00470   
00471   DECLARE_TIMER(findouthost);
00472 
00473   ShHostStoragePtr outhost
00474     = shref_dynamic_cast<ShHostStorage>(output->memory()->findStorage("host"));
00475   if (!outhost) {
00476     int datasize = shTypeInfo(valueType)->datasize(); 
00477     outhost = new ShHostStorage(output->memory().object(),
00478                                 datasize * output->size() * output->count());
00479   }
00480   TIMING_RESULT(findouthost);
00481 
00482   DECLARE_TIMER(dirtyouthost);
00483   // Read back
00484   outhost->dirty();
00485   TIMING_RESULT(dirtyouthost);
00486   
00487 
00488   GLenum format;
00489   switch (output->size()) {
00490   case 1:
00491     format = GL_RED;
00492     break;
00493   case 2:
00494     SH_DEBUG_ASSERT(0 && "Sorry, 2-component outputs aren't working right now!");
00495     break;
00496   case 3:
00497     format = GL_RGB;
00498     break;
00499   case 4:
00500     format = GL_RGBA;
00501     break;
00502   default:
00503     SH_DEBUG_ASSERT(false);
00504     break;
00505   }
00506 
00507   DECLARE_TIMER(readback);
00508 
00509   // @todo half-float
00510   ShVariantPtr  resultBuffer; 
00511   int resultDatasize = output->size() * count;
00512   GLenum readpixelType;
00513   ShValueType convertedType; 
00514   readpixelType = shGlType(valueType, convertedType);
00515   if(convertedType != SH_VALUETYPE_END) {
00516       SH_DEBUG_WARN("ARB backend does not handle stream output type " << shValueTypeName(valueType) << " natively."
00517           << "  Using " << shValueTypeName(convertedType) << " temporary buffer.");
00518       resultBuffer = shVariantFactory(convertedType, SH_MEM)->generate(resultDatasize);
00519   } else {
00520       resultBuffer = shVariantFactory(valueType, SH_MEM)->generate(
00521           outhost->data(), resultDatasize, false);
00522   }
00523 
00524   glReadPixels(0, 0, tex_size, count / tex_size, format,
00525                readpixelType, resultBuffer->array());
00526   gl_error = glGetError();
00527   if (gl_error != GL_NO_ERROR) {
00528     shError(PBufferStreamException("Could not do glReadPixels()"));
00529     return;
00530   }
00531   if (count % tex_size) {
00532     glReadPixels(0, count / tex_size, count % tex_size, 1, format, readpixelType,
00533                  (char*)(resultBuffer->array()) + (count - (count % tex_size)) * output->size() * resultBuffer->datasize());
00534     gl_error = glGetError();
00535     if (gl_error != GL_NO_ERROR) {
00536       shError(PBufferStreamException("Could not do rest of glReadPixels()"));
00537       return;
00538     }
00539   }
00540 
00541   if(convertedType != SH_VALUETYPE_END) { // need to copy to outhoust->data()
00542     ShVariantPtr outhostVariant = shVariantFactory(valueType, SH_MEM)->generate(
00543           outhost->data(), resultDatasize, false);
00544     outhostVariant->set(resultBuffer);
00545   }
00546 
00547   TIMING_RESULT(readback);
00548   
00549   // TODO: I think this is necessary, but it doesn't seem to be. I assume
00550   // that GLUT (or whatever UI toolkit) is setting up its one context when
00551   // its about to redraw. -Kevin
00552   restoreContext();
00553   
00554   TIMING_RESULT(onerun);
00555 }
00556 
00557 
00558 }

Generated on Mon Jan 24 18:36:29 2005 for Sh by  doxygen 1.4.1