00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00027 #include "PBufferStreams.hpp"
00028
00030
00031
00032
00033
00034
00035 #include <map>
00036 #include <fstream>
00037 #include <cstdlib>
00038
00039 #include "sh.hpp"
00040 #include "ShOptimizations.hpp"
00041 #include "ShException.hpp"
00042 #include "ShError.hpp"
00043 #include "ShTypeInfo.hpp"
00044 #include "ShVariant.hpp"
00045
00046 #ifdef DO_PBUFFER_TIMING
00047 #include <sys/time.h>
00048 #include <time.h>
00049 #endif
00050
00051 namespace shgl {
00052
00053 using namespace SH;
00054
00055
00056 #ifdef DO_PBUFFER_TIMING
00057
00058 class Timer {
00059 public:
00060 Timer() { start(); }
00061
00062 void start() { gettimeofday(&startval, 0); }
00063
00064 long diff() {
00065 timeval endval;
00066 gettimeofday(&endval, 0);
00067 return (endval.tv_sec - startval.tv_sec)*1000
00068 + (endval.tv_usec/1000 - startval.tv_usec/1000);
00069 }
00070
00071 private:
00072 timeval startval;
00073 };
00074
00075 #endif
00076
00077 class PBufferStreamException : public ShException {
00078 public:
00079 PBufferStreamException(const std::string& message)
00080 : ShException("PBuffer Stream Execution: " + message)
00081 {
00082 }
00083 };
00084
00085 typedef std::map<ShChannelNodePtr, ShTextureNodePtr> StreamInputMap;
00086
00087 class StreamInputGatherer {
00088 public:
00089 StreamInputGatherer(StreamInputMap& input_map)
00090 : input_map(input_map)
00091 {
00092 }
00093
00094 void operator()(const ShCtrlGraphNode* node)
00095 {
00096 if (!node->block) return;
00097
00098 for (ShBasicBlock::ShStmtList::const_iterator I = node->block->begin();
00099 I != node->block->end(); ++I) {
00100 const ShStatement& stmt = *I;
00101 if (stmt.op != SH_OP_FETCH) continue;
00102
00103
00104 if (stmt.src[0].node()->kind() != SH_STREAM) continue;
00105
00106 ShChannelNodePtr stream_node = shref_dynamic_cast<ShChannelNode>(stmt.src[0].node());
00107 input_map.insert(std::make_pair(stream_node, ShTextureNodePtr(0)));
00108 }
00109 }
00110
00111 private:
00112 StreamInputMap& input_map;
00113 };
00114
00115 class TexFetcher {
00116 public:
00117 TexFetcher(StreamInputMap& input_map,
00118 ShVariableNodePtr tc_node,
00119 bool indexed,
00120 ShVariableNodePtr width_var,
00121 ShProgramNodePtr program)
00122 : input_map(input_map),
00123 tc_node(tc_node),
00124 indexed(indexed),
00125 width_var(width_var),
00126 program(program)
00127 {
00128 }
00129
00130 void operator()(ShCtrlGraphNode* node)
00131 {
00132 if (!node->block) return;
00133 for (ShBasicBlock::ShStmtList::iterator I = node->block->begin();
00134 I != node->block->end(); ++I) {
00135 ShStatement& stmt = *I;
00136 if (stmt.op != SH_OP_FETCH && stmt.op != SH_OP_LOOKUP) continue;
00137
00138 if (!stmt.src[0].node()) {
00139 SH_DEBUG_WARN("FETCH/LOOKUP from null stream");
00140 continue;
00141 }
00142 if (stmt.src[0].node()->kind() != SH_STREAM) {
00143 SH_DEBUG_WARN("FETCH/LOOKUP from non-stream");
00144 continue;
00145 }
00146
00147 ShChannelNodePtr stream_node = shref_dynamic_cast<ShChannelNode>(stmt.src[0].node());
00148 StreamInputMap::const_iterator J = input_map.find(stream_node);
00149 if (J == input_map.end()) {
00150 SH_DEBUG_WARN("Stream node not found in input map");
00151 continue;
00152 }
00153
00154 if (!J->second) {
00155 SH_DEBUG_WARN("No texture allocated for stream node");
00156 continue;
00157 }
00158
00159 ShVariable texVar(J->second);
00160
00161 if (stmt.op == SH_OP_FETCH) {
00162 ShVariable coordsVar(tc_node);
00163 if (indexed) {
00164 stmt = ShStatement(stmt.dest, texVar, SH_OP_TEXI, coordsVar);
00165 } else {
00166 stmt = ShStatement(stmt.dest, texVar, SH_OP_TEX, coordsVar);
00167 }
00168 } else {
00169
00170 ShContext::current()->enter(program);
00171 ShVariable coordsVar(new ShVariableNode(SH_TEMP, 2, SH_FLOAT));
00172 ShContext::current()->exit();
00173
00174 ShBasicBlock::ShStmtList new_stmts;
00175 new_stmts.push_back(ShStatement(coordsVar(0), stmt.src[1], SH_OP_MOD, width_var));
00176 new_stmts.push_back(ShStatement(coordsVar(1), stmt.src[1], SH_OP_DIV, width_var));
00177 new_stmts.push_back(ShStatement(stmt.dest, texVar, SH_OP_TEXI, coordsVar));
00178 I = node->block->erase(I);
00179 node->block->splice(I, new_stmts);
00180 I--;
00181 }
00182
00183
00184 }
00185 }
00186
00187 private:
00188 StreamInputMap& input_map;
00189 ShVariableNodePtr tc_node;
00190 bool indexed;
00191 ShVariableNodePtr width_var;
00192 ShProgramNodePtr program;
00193 };
00194
00195 PBufferStreams::PBufferStreams(void) :
00196 m_setup_vp(false)
00197 {
00198 }
00199
00200 PBufferStreams::~PBufferStreams()
00201 {
00202 }
00203
00204 #ifdef DO_PBUFFER_TIMING
00205 int indent = 0;
00206 Timer supertimer;
00207
00208 void fillin()
00209 {
00210 long sd = supertimer.diff();
00211 supertimer.start();
00212 if (indent) for (int j = 0; j < sd; j++) {
00213 for (int i = 0; i < indent; i++) std::cerr << "| ";
00214 std::cerr << std::endl;
00215 }
00216 }
00217
00218 #define DECLARE_TIMER(t) Timer pbtime_ ## t; do { fillin(); for (int i = 0; i < indent; i++) std::cerr << "| "; std::cerr << "^ " << # t << " starts" << std::endl; indent++;} while (0)
00219 #define TIMING_RESULT(t) do {long d = pbtime_ ## t.diff(); fillin(); indent--; for (int i = 0; i < indent; i++) std::cerr << "| "; std::cerr << "v " << # t << " took " << d << " ms" << std::endl; supertimer.start(); } while (0)
00220 #else
00221 #define DECLARE_TIMER(t)
00222 #define TIMING_RESULT(t)
00223 #endif
00224
00225 void PBufferStreams::execute(const ShProgramNodeCPtr& program,
00226 ShStream& dest)
00227 {
00228 DECLARE_TIMER(overhead);
00229
00230
00231 if (program->target() != "gpu:stream") {
00232 shError(PBufferStreamException("This backend can only execute ``gpu:stream'' programs."));
00233 return;
00234 }
00235
00236
00237 if (!program->inputs.empty()) {
00238 shError(PBufferStreamException("Stream program has unbound inputs, and can hence not be executed."));
00239 return;
00240 }
00241
00242 if (dest.size() == 0) {
00243 SH_DEBUG_WARN("Stream program has no outputs?");
00244 return;
00245 }
00246
00247 if ((int)program->outputs.size() != dest.size()) {
00248 SH_DEBUG_ERROR("Number of stream program outputs ("
00249 << program->outputs.size()
00250 << ") does not match number of destinations ("
00251 << dest.size()
00252 << ").");
00253 return;
00254 }
00255 TIMING_RESULT(overhead);
00256
00257 if (dest.size() > 1) {
00258 DECLARE_TIMER(overall);
00259
00260
00261
00262 int i = 0;
00263 for (ShStream::NodeList::iterator I = dest.begin(); I != dest.end(); ++I, ++i) {
00264 ShStream s(*I);
00265 DECLARE_TIMER(specialize);
00266 ShProgram p = shSwizzle(i) << shref_const_cast<ShProgramNode>(program);
00267 TIMING_RESULT(specialize);
00268 execute(p.node(), s);
00269 }
00270 TIMING_RESULT(overall);
00271 return;
00272 }
00273
00274 DECLARE_TIMER(onerun);
00275
00276
00277 ShChannelNodePtr output = *dest.begin();
00278 int count = output->count();
00279 ShValueType valueType = output->valueType();
00280
00281
00282 int tex_size = 1;
00283
00284 while (tex_size * tex_size < count) {
00285 tex_size <<= 1;
00286 }
00287
00288 FloatExtension extension = setupContext(tex_size, tex_size);
00289
00290 if (extension == SH_ARB_NO_FLOAT_EXT) return;
00291
00292 DECLARE_TIMER(gather);
00293
00294 StreamInputMap input_map;
00295
00296
00297 StreamInputGatherer gatherer(input_map);
00298 program->ctrlGraph->dfs(gatherer);
00299
00300 TIMING_RESULT(gather);
00301
00302 if (input_map.empty()) {
00303 shError(PBufferStreamException("Stream program does not use any streams!"));
00304 return;
00305 }
00306
00307 DECLARE_TIMER(texsetup);
00308
00309
00310 for (StreamInputMap::iterator I = input_map.begin(); I != input_map.end(); ++I) {
00311 if (I->first->count() != count) {
00312 SH_DEBUG_ERROR("Input lengths of stream program do not match ("
00313 << I->first->count() << " != " << count << ")");
00314 return;
00315 }
00316 ShTextureNodePtr tex;
00317 ShTextureTraits traits = ShArrayTraits();
00318 traits.clamping(ShTextureTraits::SH_UNCLAMPED);
00319
00320
00321
00322
00323 switch (extension) {
00324 case SH_ARB_NV_FLOAT_BUFFER:
00325 tex = new ShTextureNode(SH_TEXTURE_RECT, I->first->size(),
00326 I->first->valueType(), traits, tex_size, tex_size, 1, count);
00327 break;
00328 case SH_ARB_ATI_PIXEL_FORMAT_FLOAT:
00329 tex = new ShTextureNode(SH_TEXTURE_2D, I->first->size(),
00330 I->first->valueType(), traits, tex_size, tex_size, 1, count);
00331 break;
00332 default:
00333 tex = 0;
00334 break;
00335 }
00336
00337 tex->memory(I->first->memory());
00338 I->second = tex;
00339 }
00340 TIMING_RESULT(texsetup);
00341
00342 DECLARE_TIMER(fpsetup);
00343
00344 ShProgram fp = ShProgram(shref_const_cast<ShProgramNode>(program))
00345 & lose<ShTexCoord2f>("streamcoord");
00346
00347
00348 fp.node()->target() = "gpu:fragment";
00349
00350 ShVariableNodePtr tc_node = fp.node()->inputs.back();
00351
00352
00353 ShContext::current()->enter(0);
00354 ShAttrib1f width = tex_size;
00355 ShContext::current()->exit();
00356
00357
00358 TexFetcher texFetcher(input_map, tc_node, extension == SH_ARB_NV_FLOAT_BUFFER,
00359 width.node(), fp.node());
00360 fp.node()->ctrlGraph->dfs(texFetcher);
00361 fp.node()->collectVariables();
00362
00363
00364 optimize(fp);
00365
00366 int gl_error;
00367 glEnable(GL_VERTEX_PROGRAM_ARB);
00368 gl_error = glGetError();
00369 if (gl_error != GL_NO_ERROR) {
00370 shError(PBufferStreamException("Could not enable GL_VERTEX_PROGRAM_ARB"));
00371 return;
00372 }
00373 glEnable(GL_FRAGMENT_PROGRAM_ARB);
00374 gl_error = glGetError();
00375 if (gl_error != GL_NO_ERROR) {
00376 shError(PBufferStreamException("Could not enable GL_FRAGMENT_PROGRAM_ARB"));
00377 return;
00378 }
00379 #ifdef SH_DEBUG_PBS_PRINTFP
00380 {
00381 std::ofstream fpgv("pb.dot");
00382 fp.node()->ctrlGraph->graphvizDump(fpgv);
00383 }
00384 system("dot -Tps -o pb.ps pb.dot");
00385 #endif
00386
00387
00388 shCompile(fp);
00389
00390 #ifdef SH_DEBUG_PBS_PRINTFP
00391 {
00392 std::ofstream fpdbg("pbufferstream.fp");
00393 fp.code()->print(fpdbg);
00394 }
00395 #endif
00396
00397 TIMING_RESULT(fpsetup);
00398
00399 DECLARE_TIMER(vpsetup);
00400
00401 if (!m_setup_vp)
00402 {
00403
00404 m_vp = keep<ShPosition4f>() & keep<ShTexCoord2f>();
00405 m_vp.node()->target() = "gpu:vertex";
00406 shCompile(m_vp);
00407 m_setup_vp = true;
00408 }
00409
00410 TIMING_RESULT(vpsetup);
00411
00412 DECLARE_TIMER(binding);
00413
00414 shBind(m_vp);
00415 shBind(fp);
00416 TIMING_RESULT(binding);
00417
00418 DECLARE_TIMER(clear);
00419 glClear(GL_COLOR_BUFFER_BIT);
00420 TIMING_RESULT(clear);
00421
00422 DECLARE_TIMER(rendersetup);
00423 glViewport(0, 0, tex_size, tex_size);
00424
00425 glMatrixMode(GL_PROJECTION);
00426 glLoadIdentity();
00427
00428 glMatrixMode(GL_MODELVIEW);
00429 glLoadIdentity();
00430
00431 float tc_right;
00432 float tc_upper;
00433
00434 if (extension == SH_ARB_NV_FLOAT_BUFFER) {
00435 tc_right = static_cast<float>(tex_size);
00436 tc_upper = static_cast<float>(tex_size);
00437 } else {
00438 tc_right = 1.0;
00439 tc_upper = 1.0;
00440 }
00441 TIMING_RESULT(rendersetup);
00442
00443 DECLARE_TIMER(render);
00444
00445
00446 glBegin(GL_QUADS); {
00447 glTexCoord2f(0.0, 0.0);
00448 glVertex3f(-1.0, -1.0, 0.0);
00449 glTexCoord2f(0.0, tc_upper);
00450 glVertex3f(-1.0, 1.0, 0.0);
00451 glTexCoord2f(tc_right, tc_upper);
00452 glVertex3f( 1.0, 1.0, 0.0);
00453 glTexCoord2f(tc_right, 0.0);
00454 glVertex3f( 1.0, -1.0, 0.0);
00455 } glEnd();
00456
00457 TIMING_RESULT(render);
00458
00459 DECLARE_TIMER(finish);
00460 glFinish();
00461
00462 TIMING_RESULT(finish);
00463
00464
00465 gl_error = glGetError();
00466 if (gl_error != GL_NO_ERROR) {
00467 shError(PBufferStreamException("Could not render"));
00468 return;
00469 }
00470
00471 DECLARE_TIMER(findouthost);
00472
00473 ShHostStoragePtr outhost
00474 = shref_dynamic_cast<ShHostStorage>(output->memory()->findStorage("host"));
00475 if (!outhost) {
00476 int datasize = shTypeInfo(valueType)->datasize();
00477 outhost = new ShHostStorage(output->memory().object(),
00478 datasize * output->size() * output->count());
00479 }
00480 TIMING_RESULT(findouthost);
00481
00482 DECLARE_TIMER(dirtyouthost);
00483
00484 outhost->dirty();
00485 TIMING_RESULT(dirtyouthost);
00486
00487
00488 GLenum format;
00489 switch (output->size()) {
00490 case 1:
00491 format = GL_RED;
00492 break;
00493 case 2:
00494 SH_DEBUG_ASSERT(0 && "Sorry, 2-component outputs aren't working right now!");
00495 break;
00496 case 3:
00497 format = GL_RGB;
00498 break;
00499 case 4:
00500 format = GL_RGBA;
00501 break;
00502 default:
00503 SH_DEBUG_ASSERT(false);
00504 break;
00505 }
00506
00507 DECLARE_TIMER(readback);
00508
00509
00510 ShVariantPtr resultBuffer;
00511 int resultDatasize = output->size() * count;
00512 GLenum readpixelType;
00513 ShValueType convertedType;
00514 readpixelType = shGlType(valueType, convertedType);
00515 if(convertedType != SH_VALUETYPE_END) {
00516 SH_DEBUG_WARN("ARB backend does not handle stream output type " << shValueTypeName(valueType) << " natively."
00517 << " Using " << shValueTypeName(convertedType) << " temporary buffer.");
00518 resultBuffer = shVariantFactory(convertedType, SH_MEM)->generate(resultDatasize);
00519 } else {
00520 resultBuffer = shVariantFactory(valueType, SH_MEM)->generate(
00521 outhost->data(), resultDatasize, false);
00522 }
00523
00524 glReadPixels(0, 0, tex_size, count / tex_size, format,
00525 readpixelType, resultBuffer->array());
00526 gl_error = glGetError();
00527 if (gl_error != GL_NO_ERROR) {
00528 shError(PBufferStreamException("Could not do glReadPixels()"));
00529 return;
00530 }
00531 if (count % tex_size) {
00532 glReadPixels(0, count / tex_size, count % tex_size, 1, format, readpixelType,
00533 (char*)(resultBuffer->array()) + (count - (count % tex_size)) * output->size() * resultBuffer->datasize());
00534 gl_error = glGetError();
00535 if (gl_error != GL_NO_ERROR) {
00536 shError(PBufferStreamException("Could not do rest of glReadPixels()"));
00537 return;
00538 }
00539 }
00540
00541 if(convertedType != SH_VALUETYPE_END) {
00542 ShVariantPtr outhostVariant = shVariantFactory(valueType, SH_MEM)->generate(
00543 outhost->data(), resultDatasize, false);
00544 outhostVariant->set(resultBuffer);
00545 }
00546
00547 TIMING_RESULT(readback);
00548
00549
00550
00551
00552 restoreContext();
00553
00554 TIMING_RESULT(onerun);
00555 }
00556
00557
00558 }