Main Page | Modules | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

ShFuncImpl.hpp

00001 // Sh: A GPU metaprogramming language.
00002 //
00003 // Copyright (c) 2003 University of Waterloo Computer Graphics Laboratory
00004 // Project administrator: Michael D. McCool
00005 // Authors: Zheng Qin, Stefanus Du Toit, Kevin Moule, Tiberiu S. Popa,
00006 //          Bryan Chan, Michael D. McCool
00007 // 
00008 // This software is provided 'as-is', without any express or implied
00009 // warranty. In no event will the authors be held liable for any damages
00010 // arising from the use of this software.
00011 // 
00012 // Permission is granted to anyone to use this software for any purpose,
00013 // including commercial applications, and to alter it and redistribute it
00014 // freely, subject to the following restrictions:
00015 // 
00016 // 1. The origin of this software must not be misrepresented; you must
00017 // not claim that you wrote the original software. If you use this
00018 // software in a product, an acknowledgment in the product documentation
00019 // would be appreciated but is not required.
00020 // 
00021 // 2. Altered source versions must be plainly marked as such, and must
00022 // not be misrepresented as being the original software.
00023 // 
00024 // 3. This notice may not be removed or altered from any source
00025 // distribution.
00027 #ifndef SHUTIL_FUNCIMPL_HPP 
00028 #define SHUTIL_FUNCIMPL_HPP 
00029 
00030 #include <cmath>
00031 #include <numeric>
00032 #include "ShAttrib.hpp"
00033 #include "ShSwizzle.hpp" 
00034 #include "ShVariable.hpp"
00035 #include "ShFunc.hpp"
00036 #include "ShLib.hpp"
00037 
00038 namespace ShUtil {
00039 
00040 using namespace SH;
00041 
00042 template<int N, typename T>
00043 ShGeneric<N, T> smoothstep(const ShGeneric<N, T>& a, const ShGeneric<N, T>& b,
00044     const ShGeneric<N, T> x) {
00045   ShGeneric<N, T> t = (x - a) / (b - a);
00046   // TODO fix this for other types
00047   t = clamp(t, 0.0f, 1.0f); 
00048   return t * t * mad(-2.0f, t, ShConstAttrib1f(3.0f));
00049 }
00050 
00053 template<int N, typename T>
00054 ShGeneric<1, T> distance(const ShGeneric<N, T>& a, const ShGeneric<N, T>& b) {
00055   ShGeneric<N, T> diff = abs(a - b);
00056   return sqrt(dot(diff, diff));
00057 }
00058 
00062 template<int N, typename T>
00063 ShGeneric<1, T> lOneDistance(const ShGeneric<N, T>& a, const ShGeneric<N, T>& b) {
00064   //TODO should use dot product with vector 1
00065   ShGeneric<N, T> diff = abs(a - b);
00066   return dot(fillcast<N>(1.0f), diff); 
00067 }
00068 
00072 template<int N, typename T>
00073 ShGeneric<1, T> lInfDistance(const ShGeneric<N, T>& a, const ShGeneric<N, T>& b) {
00074   ShGeneric<N, T> diff = abs(a - b);
00075   ShGeneric<1, T> result = max(diff(0), diff(1));
00076   for(int i = 2; i < N; ++i) result = max(result, diff(i));
00077   return result;
00078 }
00079 
00080 
00081 static const int LCG_REPS = 5; // total instructions for hashlcg will be LCG_REPS * 2 + 2
00088 // TODO: may not work as intended on 24-bit floats
00089 // since there may not be enough precision 
00090 template<int N, typename T>
00091 ShGeneric<N, T> hashlcg(const ShGeneric<N, T>& p) {
00092   ShAttrib<N, SH_TEMP, T> result = frac(p * 0.01);
00093 
00094   // TODO fix this for long tuples
00095   ShGeneric<N, T> a = fillcast<N>(
00096       ShConstAttrib4f(M_PI * M_PI * M_PI * M_PI, std::exp(4.0), 
00097           std::pow(13.0, M_PI / 2.0), std::sqrt(1997.0)));
00098   ShGeneric<N, T> m = fillcast<N>(
00099       ShConstAttrib4f(std::sqrt(2.0), 1.0 / M_PI, std::sqrt(3.0), 
00100           std::exp(-1.0)));
00101 
00102   for(int i = 0; i < LCG_REPS; ++i) result = frac(mad(result, a, m)); 
00103   return result;
00104 }
00105 
00106 
00107 static const int MRG_REPS = 2; // total instructions for hashmrg will be MRG_REPS * N * 2 + 2 
00120 template<int N, typename T>
00121 ShGeneric<N, T> hashmrg(const ShGeneric<N, T>& p) {
00122   ShAttrib<N, SH_TEMP, T> result = frac(p * 0.01);
00123   ShGeneric<N, T> a = fillcast<N>(
00124       ShConstAttrib4f(M_PI * M_PI * M_PI * M_PI, std::exp(4.0), 
00125           std::pow(13.0, M_PI / 2.0), std::sqrt(1997.0)));
00126 
00127   for(int i = 0; i < MRG_REPS; ++i) {
00128     for(int j = 0; j < N; ++j) { 
00129       result(j) = frac(dot(result, a));
00130     }
00131   }
00132   return result;
00133 }
00134 
00135 template<int N, ShBindingType Binding, typename T>
00136 ShAttrib<N, Binding, T> evenOddSort(const ShAttrib<N, Binding, T>& v) {
00137   ShAttrib<N, Binding, T> result = v;
00138   groupEvenOddSort<1>(&result);
00139   return result;
00140 }
00141 
00146 template<int S, int N, ShBindingType Binding, typename T>
00147 void groupEvenOddSort(ShAttrib<N, Binding, T> v[]) {
00148   const int NE = (N + 1) / 2; // number of even elements
00149   const int NO = N / 2; // number of odd elements
00150   const int NU = NO; // number of components to compare for (2i, 2i+1) comparisons
00151   const int ND = NE - 1; // number of componnets to compare for (2i, 2i-1) comparisons
00152 
00153   int i, j;
00154   // hold even/odd temps and condition code for (2i, 2i+1) "up" and (2i, 2i-1) "down" comparisons 
00155   ShAttrib<NU, SH_TEMP, T> eu, ou, ccu; 
00156   ShAttrib<ND, SH_TEMP, T> ed, od, ccd; 
00157 
00158   // even and odd swizzle (elms 0..NE-1 are the "even" subsequence, NE..N-1 "odd")
00159   int eswiz[NE], oswiz[NO]; 
00160   for(i = 0; i < NE; ++i) eswiz[i] = i;
00161   for(i = 0; i < NO; ++i) oswiz[i] = NE + i;
00162 
00163   for(i = 0; i < NE; ++i) { 
00164     // TODO note the interesting syntax (does appear to be C++ standard) 
00165     // that's required so that the gcc parser does 
00166     // not crap out on the template swiz code
00167     //
00168     // if this doesn't work on other platforms, we may have to
00169     // rewrite the swiz function
00170 
00171     // compare 2i, 2i+1
00172     eu = v[0].template swiz<NU>(eswiz);
00173     ou = v[0].template swiz<NU>(oswiz);
00174     if (S > 1) ccu = eu < ou; 
00175     v[0].template swiz<NU>(eswiz) = min(eu, ou); 
00176     v[0].template swiz<NU>(oswiz) = max(eu, ou); 
00177 
00178     for(j = 1; j < S; ++j) {
00179       eu = v[j].template swiz<NU>(eswiz);
00180       ou = v[j].template swiz<NU>(oswiz);
00181       v[j].template swiz<NU>(eswiz) = cond(ccu, eu, ou); 
00182       v[j].template swiz<NU>(oswiz) = cond(ccu, ou, eu); 
00183     }
00184 
00185     // compare 2i, 2i-1
00186     ed = v[0].template swiz<ND>(eswiz + 1);
00187     od = v[0].template swiz<ND>(oswiz);
00188     if (S > 1) ccd = ed > od; 
00189     v[0].template swiz<ND>(eswiz + 1) = max(ed, od);
00190     v[0].template swiz<ND>(oswiz) = min(ed, od);
00191 
00192     for(j = 1; j < S; ++j) {
00193       ed = v[j].template swiz<ND>(eswiz + 1);
00194       od = v[j].template swiz<ND>(oswiz);
00195       v[j].template swiz<ND>(eswiz + 1) = cond(ccd, ed, od); 
00196       v[j].template swiz<ND>(oswiz) = cond(ccd, od, ed); 
00197     }
00198   }
00199 
00200   // reswizzle "even" to 0, 2, 4,... "odd" to 1, 3, 5, ..
00201   int resultEswiz[NE], resultOswiz[NO]; 
00202   for(i = 0; i < NE; ++i) resultEswiz[i] = i * 2;
00203   for(i = 0; i < NO; ++i) resultOswiz[i] = i * 2 + 1; 
00204   for(i = 0; i < S; ++i) {
00205     ShAttrib<NE, SH_TEMP, T> evens = v[i].template swiz<NE>(eswiz);
00206     v[i].template swiz<NO>(resultOswiz) = v[i].template swiz<NO>(oswiz);
00207     v[i].template swiz<NE>(resultEswiz) = evens;
00208   }
00209 }
00210 
00211 template<int S, int N, ShBindingType Binding, typename T>
00212 void groupBitonicSort(ShAttrib<N, Binding, T> v[]) {
00213   static const int C = N >> 1; // number of comparators
00214 
00215   int i, j;
00216   int log2N, ceilN; 
00217   int strideBit, flipBit, stride, flip;
00218   bool doFlip;
00219   int swiz[2][C];
00220   ShAttrib<C, SH_TEMP, T> temp[2], condition; 
00221   
00222   for(log2N = 0; N > (1 << log2N); log2N++);  // figure out ceil(log_2(N))
00223   ceilN = 1 << log2N;
00224 
00225   std::cout << "N = " << N << ", ceilN = " << ceilN << ", log2N = " << log2N << std::endl;
00226 
00227   for(flipBit = 1; flipBit < log2N; ++flipBit) {
00228     flip = 1 << flipBit; 
00229     for(strideBit = flipBit - 1; strideBit >= 0; --strideBit) {
00230       stride = 1 << strideBit; 
00231       // prepare the swiz for this pass
00232       doFlip = false;
00233       for(i = j = 0; i < N - stride; ++j) {
00234         // handle non-power of 2 N (see http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm)
00235         if(i >= ceilN - N) { 
00236           swiz[0][j] = i + (doFlip ? stride : 0); 
00237           swiz[1][j] = i + (doFlip ? 0 : stride);
00238         }
00239         ++i;
00240         if(i % stride == 0) i += stride;
00241         if(i % flip == 0) doFlip = !doFlip;
00242       }
00243       std::cout << "Comparators:" << std::endl;
00244       for(i = 0; i < C; ++i) std::cout << "    " << swiz[0][i] << " " << swiz[1][i] << std::endl;
00245 
00246       // do the comparison using the first element 
00247       for(i = 0; i < 2; ++i) temp[i] = v[0].template swiz<C>(swiz[i]);
00248       if(S > 1) condition = temp[0] < temp[1]; 
00249       v[0].template swiz<C>(swiz[0]) = min(temp[0], temp[1]);
00250       v[0].template swiz<C>(swiz[1]) = max(temp[0], temp[1]);
00251 
00252       // sort remaining elements based on first element comparison 
00253       for(i = 1; i < S; ++i) {
00254         for(j = 0; j < 2; ++j) temp[j] = v[j].template swiz<C>(swiz[j]);
00255         v[i].template swiz<C>(swiz[0]) = cond(condition, temp[0], temp[1]); 
00256         v[i].template swiz<C>(swiz[1]) = cond(condition, temp[1], temp[0]); 
00257       }
00258     }
00259   }
00260 }
00261 
00265 template<typename T>
00266 ShGeneric<3, T> changeBasis(const ShGeneric<3, T> &b0, 
00267     const ShGeneric<3, T> &b1, const ShGeneric<3, T> &b2, const ShGeneric<3, T> &v) {
00268   ShAttrib<3, SH_TEMP, T> result;
00269   result(0) = b0 | v;
00270   result(1) = b1 | v;
00271   result(2) = b2 | v;
00272   return result;
00273 }
00274 
00275 
00276 }
00277 
00278 #endif // SHUTIL_FUNCIMPL_HPP 

Generated on Mon Jan 24 18:36:31 2005 for Sh by  doxygen 1.4.1