00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00027 #ifndef SHUTIL_FUNCIMPL_HPP
00028 #define SHUTIL_FUNCIMPL_HPP
00029
00030 #include <cmath>
00031 #include <numeric>
00032 #include "ShAttrib.hpp"
00033 #include "ShSwizzle.hpp"
00034 #include "ShVariable.hpp"
00035 #include "ShFunc.hpp"
00036 #include "ShLib.hpp"
00037
00038 namespace ShUtil {
00039
00040 using namespace SH;
00041
00042 template<int N, typename T>
00043 ShGeneric<N, T> smoothstep(const ShGeneric<N, T>& a, const ShGeneric<N, T>& b,
00044 const ShGeneric<N, T> x) {
00045 ShGeneric<N, T> t = (x - a) / (b - a);
00046
00047 t = clamp(t, 0.0f, 1.0f);
00048 return t * t * mad(-2.0f, t, ShConstAttrib1f(3.0f));
00049 }
00050
00053 template<int N, typename T>
00054 ShGeneric<1, T> distance(const ShGeneric<N, T>& a, const ShGeneric<N, T>& b) {
00055 ShGeneric<N, T> diff = abs(a - b);
00056 return sqrt(dot(diff, diff));
00057 }
00058
00062 template<int N, typename T>
00063 ShGeneric<1, T> lOneDistance(const ShGeneric<N, T>& a, const ShGeneric<N, T>& b) {
00064
00065 ShGeneric<N, T> diff = abs(a - b);
00066 return dot(fillcast<N>(1.0f), diff);
00067 }
00068
00072 template<int N, typename T>
00073 ShGeneric<1, T> lInfDistance(const ShGeneric<N, T>& a, const ShGeneric<N, T>& b) {
00074 ShGeneric<N, T> diff = abs(a - b);
00075 ShGeneric<1, T> result = max(diff(0), diff(1));
00076 for(int i = 2; i < N; ++i) result = max(result, diff(i));
00077 return result;
00078 }
00079
00080
00081 static const int LCG_REPS = 5;
00088
00089
00090 template<int N, typename T>
00091 ShGeneric<N, T> hashlcg(const ShGeneric<N, T>& p) {
00092 ShAttrib<N, SH_TEMP, T> result = frac(p * 0.01);
00093
00094
00095 ShGeneric<N, T> a = fillcast<N>(
00096 ShConstAttrib4f(M_PI * M_PI * M_PI * M_PI, std::exp(4.0),
00097 std::pow(13.0, M_PI / 2.0), std::sqrt(1997.0)));
00098 ShGeneric<N, T> m = fillcast<N>(
00099 ShConstAttrib4f(std::sqrt(2.0), 1.0 / M_PI, std::sqrt(3.0),
00100 std::exp(-1.0)));
00101
00102 for(int i = 0; i < LCG_REPS; ++i) result = frac(mad(result, a, m));
00103 return result;
00104 }
00105
00106
00107 static const int MRG_REPS = 2;
00120 template<int N, typename T>
00121 ShGeneric<N, T> hashmrg(const ShGeneric<N, T>& p) {
00122 ShAttrib<N, SH_TEMP, T> result = frac(p * 0.01);
00123 ShGeneric<N, T> a = fillcast<N>(
00124 ShConstAttrib4f(M_PI * M_PI * M_PI * M_PI, std::exp(4.0),
00125 std::pow(13.0, M_PI / 2.0), std::sqrt(1997.0)));
00126
00127 for(int i = 0; i < MRG_REPS; ++i) {
00128 for(int j = 0; j < N; ++j) {
00129 result(j) = frac(dot(result, a));
00130 }
00131 }
00132 return result;
00133 }
00134
00135 template<int N, ShBindingType Binding, typename T>
00136 ShAttrib<N, Binding, T> evenOddSort(const ShAttrib<N, Binding, T>& v) {
00137 ShAttrib<N, Binding, T> result = v;
00138 groupEvenOddSort<1>(&result);
00139 return result;
00140 }
00141
00146 template<int S, int N, ShBindingType Binding, typename T>
00147 void groupEvenOddSort(ShAttrib<N, Binding, T> v[]) {
00148 const int NE = (N + 1) / 2;
00149 const int NO = N / 2;
00150 const int NU = NO;
00151 const int ND = NE - 1;
00152
00153 int i, j;
00154
00155 ShAttrib<NU, SH_TEMP, T> eu, ou, ccu;
00156 ShAttrib<ND, SH_TEMP, T> ed, od, ccd;
00157
00158
00159 int eswiz[NE], oswiz[NO];
00160 for(i = 0; i < NE; ++i) eswiz[i] = i;
00161 for(i = 0; i < NO; ++i) oswiz[i] = NE + i;
00162
00163 for(i = 0; i < NE; ++i) {
00164
00165
00166
00167
00168
00169
00170
00171
00172 eu = v[0].template swiz<NU>(eswiz);
00173 ou = v[0].template swiz<NU>(oswiz);
00174 if (S > 1) ccu = eu < ou;
00175 v[0].template swiz<NU>(eswiz) = min(eu, ou);
00176 v[0].template swiz<NU>(oswiz) = max(eu, ou);
00177
00178 for(j = 1; j < S; ++j) {
00179 eu = v[j].template swiz<NU>(eswiz);
00180 ou = v[j].template swiz<NU>(oswiz);
00181 v[j].template swiz<NU>(eswiz) = cond(ccu, eu, ou);
00182 v[j].template swiz<NU>(oswiz) = cond(ccu, ou, eu);
00183 }
00184
00185
00186 ed = v[0].template swiz<ND>(eswiz + 1);
00187 od = v[0].template swiz<ND>(oswiz);
00188 if (S > 1) ccd = ed > od;
00189 v[0].template swiz<ND>(eswiz + 1) = max(ed, od);
00190 v[0].template swiz<ND>(oswiz) = min(ed, od);
00191
00192 for(j = 1; j < S; ++j) {
00193 ed = v[j].template swiz<ND>(eswiz + 1);
00194 od = v[j].template swiz<ND>(oswiz);
00195 v[j].template swiz<ND>(eswiz + 1) = cond(ccd, ed, od);
00196 v[j].template swiz<ND>(oswiz) = cond(ccd, od, ed);
00197 }
00198 }
00199
00200
00201 int resultEswiz[NE], resultOswiz[NO];
00202 for(i = 0; i < NE; ++i) resultEswiz[i] = i * 2;
00203 for(i = 0; i < NO; ++i) resultOswiz[i] = i * 2 + 1;
00204 for(i = 0; i < S; ++i) {
00205 ShAttrib<NE, SH_TEMP, T> evens = v[i].template swiz<NE>(eswiz);
00206 v[i].template swiz<NO>(resultOswiz) = v[i].template swiz<NO>(oswiz);
00207 v[i].template swiz<NE>(resultEswiz) = evens;
00208 }
00209 }
00210
00211 template<int S, int N, ShBindingType Binding, typename T>
00212 void groupBitonicSort(ShAttrib<N, Binding, T> v[]) {
00213 static const int C = N >> 1;
00214
00215 int i, j;
00216 int log2N, ceilN;
00217 int strideBit, flipBit, stride, flip;
00218 bool doFlip;
00219 int swiz[2][C];
00220 ShAttrib<C, SH_TEMP, T> temp[2], condition;
00221
00222 for(log2N = 0; N > (1 << log2N); log2N++);
00223 ceilN = 1 << log2N;
00224
00225 std::cout << "N = " << N << ", ceilN = " << ceilN << ", log2N = " << log2N << std::endl;
00226
00227 for(flipBit = 1; flipBit < log2N; ++flipBit) {
00228 flip = 1 << flipBit;
00229 for(strideBit = flipBit - 1; strideBit >= 0; --strideBit) {
00230 stride = 1 << strideBit;
00231
00232 doFlip = false;
00233 for(i = j = 0; i < N - stride; ++j) {
00234
00235 if(i >= ceilN - N) {
00236 swiz[0][j] = i + (doFlip ? stride : 0);
00237 swiz[1][j] = i + (doFlip ? 0 : stride);
00238 }
00239 ++i;
00240 if(i % stride == 0) i += stride;
00241 if(i % flip == 0) doFlip = !doFlip;
00242 }
00243 std::cout << "Comparators:" << std::endl;
00244 for(i = 0; i < C; ++i) std::cout << " " << swiz[0][i] << " " << swiz[1][i] << std::endl;
00245
00246
00247 for(i = 0; i < 2; ++i) temp[i] = v[0].template swiz<C>(swiz[i]);
00248 if(S > 1) condition = temp[0] < temp[1];
00249 v[0].template swiz<C>(swiz[0]) = min(temp[0], temp[1]);
00250 v[0].template swiz<C>(swiz[1]) = max(temp[0], temp[1]);
00251
00252
00253 for(i = 1; i < S; ++i) {
00254 for(j = 0; j < 2; ++j) temp[j] = v[j].template swiz<C>(swiz[j]);
00255 v[i].template swiz<C>(swiz[0]) = cond(condition, temp[0], temp[1]);
00256 v[i].template swiz<C>(swiz[1]) = cond(condition, temp[1], temp[0]);
00257 }
00258 }
00259 }
00260 }
00261
00265 template<typename T>
00266 ShGeneric<3, T> changeBasis(const ShGeneric<3, T> &b0,
00267 const ShGeneric<3, T> &b1, const ShGeneric<3, T> &b2, const ShGeneric<3, T> &v) {
00268 ShAttrib<3, SH_TEMP, T> result;
00269 result(0) = b0 | v;
00270 result(1) = b1 | v;
00271 result(2) = b2 | v;
00272 return result;
00273 }
00274
00275
00276 }
00277
00278 #endif // SHUTIL_FUNCIMPL_HPP