00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00027
#include "PBufferStreams.hpp"
00028
00030
00031
00032
00033
#define SH_DEBUG_PBS_PRINTFP
00034
00035
#include <map>
00036
#include <fstream>
00037
#include <cstdlib>
00038
00039
00040
00041
#ifndef GLX_ATI_pixel_format_float
00042
#define GLX_ATI_pixel_format_float 1
00043
00044
#define GLX_RGBA_FLOAT_ATI_BIT 0x00000100
00045
00046
#endif // GLX_ATI_pixel_format_float
00047
00048
#ifndef GLX_FLOAT_COMPONENTS_NV
00049
#define GLX_FLOAT_COMPONENTS_NV 0x20B0
00050
#endif // GLX_FLOAT_COMPONENTS_NV
00051
00052
#include "sh.hpp"
00053
#include "ShOptimizations.hpp"
00054
#include "ShException.hpp"
00055
#include "ShError.hpp"
00056
#include "ShContext.hpp"
00057
00058
#ifdef DO_PBUFFER_TIMING
00059
#include <sys/time.h>
00060
#include <time.h>
00061
#endif
00062
00063
namespace shgl {
00064
00065
using namespace SH;
00066
00067
#ifdef DO_PBUFFER_TIMING
00068
00069
class Timer {
00070
public:
00071 Timer() { start(); }
00072
00073
void start() { gettimeofday(&startval, 0); }
00074
00075
long diff() {
00076 timeval endval;
00077 gettimeofday(&endval, 0);
00078
return (endval.tv_sec - startval.tv_sec)*1000
00079 + (endval.tv_usec/1000 - startval.tv_usec/1000);
00080 }
00081
00082
private:
00083 timeval startval;
00084 };
00085
00086
#endif
00087
00088
class PBufferStreamException :
public ShException {
00089
public:
00090 PBufferStreamException(
const std::string& message)
00091 :
ShException("PBuffer Stream Execution: " + message)
00092 {
00093 }
00094 };
00095
00096
typedef std::map<ShChannelNodePtr, ShTextureNodePtr> StreamInputMap;
00097
00098
class StreamInputGatherer {
00099
public:
00100 StreamInputGatherer(StreamInputMap& input_map)
00101 : input_map(input_map)
00102 {
00103 }
00104
00105
void operator()(
const ShCtrlGraphNode* node)
00106 {
00107
if (!node->block)
return;
00108
00109
for (ShBasicBlock::ShStmtList::const_iterator I = node->block->begin();
00110 I != node->block->end(); ++I) {
00111
const ShStatement& stmt = *I;
00112
if (stmt.op !=
SH_OP_FETCH)
continue;
00113
00114
00115
if (stmt.src[0].node()->kind() != SH_STREAM)
continue;
00116
00117 ShChannelNodePtr stream_node = shref_dynamic_cast<ShChannelNode>(stmt.src[0].node());
00118 input_map.insert(std::make_pair(stream_node, ShTextureNodePtr(0)));
00119 }
00120 }
00121
00122
private:
00123 StreamInputMap& input_map;
00124 };
00125
00126
class TexFetcher {
00127
public:
00128 TexFetcher(StreamInputMap& input_map,
00129 ShVariableNodePtr tc_node,
00130
bool indexed)
00131 : input_map(input_map),
00132 tc_node(tc_node),
00133 indexed(indexed)
00134 {
00135 }
00136
00137
void operator()(ShCtrlGraphNode* node)
00138 {
00139 ShVariable coordsVar(tc_node);
00140
if (!node->block)
return;
00141
for (ShBasicBlock::ShStmtList::iterator I = node->block->begin();
00142 I != node->block->end(); ++I) {
00143 ShStatement& stmt = *I;
00144
if (stmt.op !=
SH_OP_FETCH)
continue;
00145
00146
if (!stmt.src[0].node()) {
00147 SH_DEBUG_WARN(
"FETCH from null stream");
00148
continue;
00149 }
00150
if (stmt.src[0].node()->kind() != SH_STREAM) {
00151 SH_DEBUG_WARN(
"FETCH from non-stream");
00152
continue;
00153 }
00154
00155 ShChannelNodePtr stream_node = shref_dynamic_cast<ShChannelNode>(stmt.src[0].node());
00156 StreamInputMap::const_iterator I = input_map.find(stream_node);
00157
if (I == input_map.end()) {
00158 SH_DEBUG_WARN(
"Stream node not found in input map");
00159
continue;
00160 }
00161
00162
if (!I->second) {
00163 SH_DEBUG_WARN(
"No texture allocated for stream node");
00164
continue;
00165 }
00166
00167 ShVariable texVar(I->second);
00168
00169
if (indexed) {
00170 stmt = ShStatement(stmt.dest, texVar, SH_OP_TEXI, coordsVar);
00171 }
else {
00172 stmt = ShStatement(stmt.dest, texVar, SH_OP_TEX, coordsVar);
00173 }
00174
00175
00176 }
00177 }
00178
00179
private:
00180 StreamInputMap& input_map;
00181 ShVariableNodePtr tc_node;
00182
bool indexed;
00183 };
00184
00185
class GlxState {
00186
public:
00187 GlxState()
00188 : display(0), drawable(0), context(0)
00189 {
00190 display = glXGetCurrentDisplay();
00191
if (display) {
00192 drawable = glXGetCurrentDrawable();
00193 context = glXGetCurrentContext();
00194 }
00195 }
00196
00197 ~GlxState()
00198 {
00199
if (display) {
00200 glXMakeCurrent(display, drawable, context);
00201 }
00202 }
00203
00204
private:
00205 Display* display;
00206 GLXDrawable drawable;
00207 GLXContext context;
00208 };
00209
00210 PBufferStreams::PBufferStreams(
int context)
00211 : m_context(context),
00212 m_setup_vp(-1),
00213 m_display(0)
00214 {
00215 }
00216
00217 PBufferStreams::~PBufferStreams()
00218 {
00219
00220 }
00221
00222 StreamStrategy* PBufferStreams::create(
int context)
00223 {
00224
return new PBufferStreams(context);
00225 }
00226
00227
#ifdef DO_PBUFFER_TIMING
00228
int indent = 0;
00229 Timer supertimer;
00230
00231
void fillin()
00232 {
00233
long sd = supertimer.diff();
00234 supertimer.start();
00235
if (indent)
for (
int j = 0; j < sd; j++) {
00236
for (
int i = 0; i < indent; i++) std::cerr <<
"| ";
00237 std::cerr << std::endl;
00238 }
00239 }
00240
00241
#define DECLARE_TIMER(t) Timer pbtime_ ## t; do { fillin(); for (int i = 0; i < indent; i++) std::cerr << "| "; std::cerr << "^ " << # t << " starts" << std::endl; indent++;} while (0)
00242
#define TIMING_RESULT(t) do {long d = pbtime_ ## t.diff(); fillin(); indent--; for (int i = 0; i < indent; i++) std::cerr << "| "; std::cerr << "v " << # t << " took " << d << " ms" << std::endl; supertimer.start(); } while (0)
00243
#else
00244
#define DECLARE_TIMER(t)
00245
#define TIMING_RESULT(t)
00246
#endif
00247
00248 FloatExtension PBufferStreams::setupContext(
int width,
int height)
00249 {
00250
if (m_info.valid()
00251 && m_info.width == width
00252 && m_info.height == height) {
00253 DECLARE_TIMER(activatecontext);
00254 shref_dynamic_cast<GlBackend>(ShEnvironment::backend)->setContext(m_info.shcontext);
00255 glXMakeCurrent(m_display, m_info.pbuffer, m_info.context);
00256 TIMING_RESULT(activatecontext);
00257
return m_info.extension;
00258 }
00259 DECLARE_TIMER(makecontext);
00260
if (m_info.shcontext >= 0) {
00261 shref_dynamic_cast<GlBackend>(ShEnvironment::backend)->setContext(m_info.shcontext);
00262 shref_dynamic_cast<GlBackend>(ShEnvironment::backend)->destroyContext();
00263 }
00264
00265 m_info.extension = SH_ARB_NO_FLOAT_EXT;
00266 m_info.width = width;
00267 m_info.height = height;
00268 m_info.pbuffer = 0;
00269 m_info.context = 0;
00270
00271 m_info.shcontext
00272 = shref_dynamic_cast<GlBackend>(ShEnvironment::backend)->newContext();
00273
00274
00275
00276
if (!m_display) {
00277 m_display = glXGetCurrentDisplay();
00278
if (!m_display) m_display = XOpenDisplay(0);
00279
if (!m_display) {
00280
shError(PBufferStreamException(
"Could not open X display"));
00281
return m_info.extension;
00282 }
00283 }
00284
00285
int scrnum;
00286 scrnum = DefaultScreen(m_display);
00287
00288 std::vector<int> fb_base_attribs;
00289 fb_base_attribs.push_back(GLX_DOUBLEBUFFER); fb_base_attribs.push_back(False);
00290 fb_base_attribs.push_back(GLX_RED_SIZE); fb_base_attribs.push_back(32);
00291 fb_base_attribs.push_back(GLX_GREEN_SIZE); fb_base_attribs.push_back(32);
00292 fb_base_attribs.push_back(GLX_BLUE_SIZE); fb_base_attribs.push_back(32);
00293 fb_base_attribs.push_back(GLX_DRAWABLE_TYPE); fb_base_attribs.push_back(GLX_PBUFFER_BIT);
00294
00295
int items;
00296
00297 GLXFBConfig* fb_config = 0;
00298
00299
00300
if (!fb_config) {
00301 std::vector<int> fb_attribs(fb_base_attribs);
00302 fb_attribs.push_back(GLX_RENDER_TYPE); fb_attribs.push_back(GLX_RGBA_BIT);
00303 fb_attribs.push_back(GLX_FLOAT_COMPONENTS_NV); fb_attribs.push_back(True);
00304 fb_attribs.push_back(None);
00305
00306 fb_config = glXChooseFBConfig(m_display, scrnum, &fb_attribs.front(), &items);
00307
if (fb_config) {
00308 m_info.extension = SH_ARB_NV_FLOAT_BUFFER;
00309 }
00310 }
00311
00312
if (!fb_config) {
00313 std::vector<int> fb_attribs(fb_base_attribs);
00314 fb_attribs.push_back(GLX_RENDER_TYPE); fb_attribs.push_back(GLX_RGBA_FLOAT_ATI_BIT);
00315 fb_attribs.push_back(None);
00316
00317 fb_config = glXChooseFBConfig(m_display, scrnum, &fb_attribs.front(), &items);
00318
if (fb_config) {
00319 m_info.extension = SH_ARB_ATI_PIXEL_FORMAT_FLOAT;
00320 }
00321 }
00322
00323
if (!fb_config) {
00324
shError(PBufferStreamException(
"Could not get GLX FB Config!\n"
00325
"Your card may not support the appropriate extensions."));
00326
return SH_ARB_NO_FLOAT_EXT;
00327 }
00328
00329
if (m_info.extension == SH_ARB_NO_FLOAT_EXT) {
00330
shError(PBufferStreamException(
"Could not choose a floating-point extension!\n"
00331
"Your card may not support the appropriate extensions."));
00332
return m_info.extension;
00333 }
00334
00335
00336
int pbuffer_attribs[] = {
00337 GLX_PBUFFER_WIDTH, width,
00338 GLX_PBUFFER_HEIGHT, height,
00339 GLX_LARGEST_PBUFFER, False,
00340 None
00341 };
00342
00343 m_info.pbuffer = glXCreatePbuffer(m_display, fb_config[0], pbuffer_attribs);
00344
if (!m_info.pbuffer) {
00345
shError(PBufferStreamException(
"Could not make pbuffer!"));
00346
return SH_ARB_NO_FLOAT_EXT;
00347 }
00348
00349 m_info.context = glXCreateNewContext(m_display, fb_config[0], GLX_RGBA_TYPE, 0, True);
00350
if (!m_info.context) {
00351
shError(PBufferStreamException(
"Could not create PBuffer context"));
00352 XFree(fb_config);
00353
return SH_ARB_NO_FLOAT_EXT;
00354 }
00355 glXMakeCurrent(m_display, m_info.pbuffer, m_info.context);
00356
00357 TIMING_RESULT(makecontext);
00358
return m_info.extension;
00359 }
00360
00361
void PBufferStreams::execute(
const ShProgramNodeCPtr& program,
00362 ShStream& dest)
00363 {
00364 DECLARE_TIMER(overhead);
00365
int prev = shref_dynamic_cast<GlBackend>(ShEnvironment::backend)->context();
00366
00367
00368
if (program->target() !=
"gpu:stream") {
00369
shError(PBufferStreamException(
"This backend can only execute ``gpu:stream'' programs."));
00370
return;
00371 }
00372
00373
00374
if (!program->inputs.empty()) {
00375
shError(PBufferStreamException(
"Stream program has unbound inputs, and can hence not be executed."));
00376
return;
00377 }
00378
00379
if (dest.size() == 0) {
00380 SH_DEBUG_WARN(
"Stream program has no outputs?");
00381
return;
00382 }
00383
00384
if ((
int)program->outputs.size() != dest.size()) {
00385 SH_DEBUG_ERROR(
"Number of stream program outputs ("
00386 << program->outputs.size()
00387 <<
") does not match number of destinations ("
00388 << dest.size()
00389 <<
").");
00390
return;
00391 }
00392 TIMING_RESULT(overhead);
00393
00394
if (dest.size() > 1) {
00395 DECLARE_TIMER(overall);
00396
00397
00398
00399
int i = 0;
00400
for (ShStream::NodeList::iterator I = dest.begin(); I != dest.end(); ++I, ++i) {
00401 ShStream s(*I);
00402 DECLARE_TIMER(specialize);
00403 ShProgram p =
shSwizzle(i) << shref_const_cast<ShProgramNode>(program);
00404 TIMING_RESULT(specialize);
00405 execute(p.node(), s);
00406 }
00407 TIMING_RESULT(overall);
00408
return;
00409 }
00410
00411 DECLARE_TIMER(onerun);
00412
00413
00414 ShChannelNodePtr output = *dest.begin();
00415
int count = output->count();
00416
00417
00418
int tex_size = 1;
00419
00420
while (tex_size * tex_size < count) {
00421 tex_size <<= 1;
00422 }
00423
00424 GlxState prevstate;
00425
00426 FloatExtension extension = setupContext(tex_size, tex_size);
00427
00428
if (extension == SH_ARB_NO_FLOAT_EXT)
return;
00429
00430 DECLARE_TIMER(gather);
00431
00432 StreamInputMap input_map;
00433
00434
00435 StreamInputGatherer gatherer(input_map);
00436 program->ctrlGraph->dfs(gatherer);
00437
00438 TIMING_RESULT(gather);
00439
00440
if (input_map.empty()) {
00441
shError(PBufferStreamException(
"Stream program does not use any streams!"));
00442
return;
00443 }
00444
00445 DECLARE_TIMER(texsetup);
00446
00447
00448
for (StreamInputMap::iterator I = input_map.begin(); I != input_map.end(); ++I) {
00449
if (I->first->count() != count) {
00450 SH_DEBUG_ERROR(
"Input lengths of stream program do not match ("
00451 << I->first->count() <<
" != " << count <<
")");
00452
return;
00453 }
00454 ShTextureNodePtr tex;
00455 ShTextureTraits traits = ShArrayTraits();
00456 traits.clamping(ShTextureTraits::SH_UNCLAMPED);
00457
00458
00459
00460
00461
switch (extension) {
00462
case SH_ARB_NV_FLOAT_BUFFER:
00463 tex =
new ShTextureNode(SH_TEXTURE_RECT, I->first->size(),
00464 traits, tex_size, tex_size, 1);
00465
break;
00466
case SH_ARB_ATI_PIXEL_FORMAT_FLOAT:
00467 tex =
new ShTextureNode(SH_TEXTURE_2D, I->first->size(),
00468 traits, tex_size, tex_size, 1);
00469
break;
00470
default:
00471 tex = 0;
00472
break;
00473 }
00474
00475 tex->memory(I->first->memory());
00476 I->second = tex;
00477 }
00478 TIMING_RESULT(texsetup);
00479
00480
00481 DECLARE_TIMER(fpsetup);
00482
00483 ShProgram fp = ShProgram(shref_const_cast<ShProgramNode>(program))
00484 & lose<ShTexCoord2f>(
"streamcoord");
00485
00486
00487 fp.node()->target() =
"gpu:fragment";
00488
00489 ShVariableNodePtr tc_node = fp.node()->inputs.back();
00490
00491
00492 TexFetcher texFetcher(input_map, tc_node, extension == SH_ARB_NV_FLOAT_BUFFER);
00493 fp.node()->ctrlGraph->dfs(texFetcher);
00494 fp.node()->collectVariables();
00495
00496
00497
00498
optimize(fp);
00499
00500
int gl_error;
00501 glEnable(GL_VERTEX_PROGRAM_ARB);
00502 gl_error = glGetError();
00503
if (gl_error != GL_NO_ERROR) {
00504
shError(PBufferStreamException(
"Could not enable GL_VERTEX_PROGRAM_ARB"));
00505
00506
00507
return;
00508 }
00509 glEnable(GL_FRAGMENT_PROGRAM_ARB);
00510 gl_error = glGetError();
00511
if (gl_error != GL_NO_ERROR) {
00512
shError(PBufferStreamException(
"Could not enable GL_FRAGMENT_PROGRAM_ARB"));
00513
00514
00515
return;
00516 }
00517
#ifdef SH_DEBUG_PBS_PRINTFP
00518
{
00519 std::ofstream fpgv(
"pb.dot");
00520 fp.node()->ctrlGraph->graphvizDump(fpgv);
00521 }
00522 system(
"dot -Tps -o pb.ps pb.dot");
00523
#endif
00524
00525
00526
shCompile(fp);
00527
00528
#ifdef SH_DEBUG_PBS_PRINTFP
00529
{
00530 std::ofstream fpdbg(
"pbufferstream.fp");
00531 fp.code()->print(fpdbg);
00532 }
00533
#endif
00534
00535 TIMING_RESULT(fpsetup);
00536
00537 DECLARE_TIMER(vpsetup);
00538
00539
int curcontext = shref_dynamic_cast<GlBackend>(ShEnvironment::backend)->context();
00540
if (m_setup_vp != curcontext) {
00541
00542
if (m_setup_vp < 0) {
00543 m_vp = keep<ShPosition4f>() & keep<ShTexCoord2f>();
00544 m_vp.node()->target() =
"gpu:vertex";
00545 }
00546
shCompile(m_vp);
00547 m_setup_vp = curcontext;
00548 }
00549
00550 TIMING_RESULT(vpsetup);
00551
00552 DECLARE_TIMER(binding);
00553
00554
shBind(m_vp);
00555
shBind(fp);
00556 TIMING_RESULT(binding);
00557
00558 DECLARE_TIMER(clear);
00559 glClear(GL_COLOR_BUFFER_BIT);
00560 TIMING_RESULT(clear);
00561
00562 DECLARE_TIMER(rendersetup);
00563 glViewport(0, 0, tex_size, tex_size);
00564
00565 glMatrixMode(GL_PROJECTION);
00566 glLoadIdentity();
00567
00568 glMatrixMode(GL_MODELVIEW);
00569 glLoadIdentity();
00570
00571
float tc_right;
00572
float tc_upper;
00573
00574
if (extension == SH_ARB_NV_FLOAT_BUFFER) {
00575 tc_right = static_cast<float>(tex_size);
00576 tc_upper = static_cast<float>(tex_size);
00577 }
else {
00578 tc_right = 1.0;
00579 tc_upper = 1.0;
00580 }
00581 TIMING_RESULT(rendersetup);
00582
00583 DECLARE_TIMER(render);
00584
00585
00586 glBegin(GL_QUADS); {
00587 glTexCoord2f(0.0, 0.0);
00588 glVertex3f(-1.0, -1.0, 0.0);
00589 glTexCoord2f(0.0, tc_upper);
00590 glVertex3f(-1.0, 1.0, 0.0);
00591 glTexCoord2f(tc_right, tc_upper);
00592 glVertex3f( 1.0, 1.0, 0.0);
00593 glTexCoord2f(tc_right, 0.0);
00594 glVertex3f( 1.0, -1.0, 0.0);
00595 } glEnd();
00596
00597 TIMING_RESULT(render);
00598
00599 DECLARE_TIMER(finish);
00600 glFinish();
00601
00602 TIMING_RESULT(finish);
00603
00604
00605 gl_error = glGetError();
00606
if (gl_error != GL_NO_ERROR) {
00607
shError(PBufferStreamException(
"Could not render"));
00608
00609
00610
return;
00611 }
00612
00613 DECLARE_TIMER(findouthost);
00614
00615 ShHostStoragePtr outhost
00616 = shref_dynamic_cast<ShHostStorage>(output->memory()->findStorage(
"host"));
00617
if (!outhost) {
00618 outhost =
new ShHostStorage(output->memory().object(),
00619
sizeof(
float) * output->size() * output->count());
00620 }
00621 TIMING_RESULT(findouthost);
00622
00623 DECLARE_TIMER(dirtyouthost);
00624
00625 outhost->dirty();
00626 TIMING_RESULT(dirtyouthost);
00627
00628
00629 GLenum format;
00630
switch (output->size()) {
00631
case 1:
00632 format = GL_RED;
00633
break;
00634
case 2:
00635 SH_DEBUG_ASSERT(0 &&
"Sorry, 2-component outputs aren't working right now!");
00636
break;
00637
case 3:
00638 format = GL_RGB;
00639
break;
00640
case 4:
00641 format = GL_RGBA;
00642
break;
00643
default:
00644 SH_DEBUG_ASSERT(
false);
00645
break;
00646 }
00647
00648 DECLARE_TIMER(readback);
00649
00650 glReadPixels(0, 0, tex_size, count / tex_size, format,
00651 GL_FLOAT, outhost->data());
00652 gl_error = glGetError();
00653
if (gl_error != GL_NO_ERROR) {
00654
shError(PBufferStreamException(
"Could not do glReadPixels()"));
00655
00656
00657
return;
00658 }
00659
if (count % tex_size) {
00660 glReadPixels(0, count / tex_size, count % tex_size, 1, format, GL_FLOAT,
00661 reinterpret_cast<float*>(outhost->data())
00662 + (count - (count % tex_size)) * output->size());
00663 gl_error = glGetError();
00664
if (gl_error != GL_NO_ERROR) {
00665
shError(PBufferStreamException(
"Could not do rest of glReadPixels()"));
00666
00667
00668
return;
00669 }
00670 }
00671
00672 TIMING_RESULT(readback);
00673
00674
00675 shref_dynamic_cast<GlBackend>(ShEnvironment::backend)->setContext(prev);
00676
00677
00678
00679
00680
00681
00682 TIMING_RESULT(onerun);
00683 }
00684
00685
00686 }