LDK: SIMD.h Source File

00001 #ifndef __LDK_SIMD_H__
00002 #define __LDK_SIMD_H__
00003 
00004 #include <assert.h>
00005 
00006 #ifdef __sse__
00007 #include <xmmintrin.h>
00008 #endif
00009 
00010 #include "LDK/TemplateMeta.h"
00011 
00012 namespace LDK
00013 {
00014 
00015 #ifdef __sse__
00016 typedef __m128 v4sf;
00017 
00018 class ALIGNED16 V4SF
00019 {
00020 protected:
00021 v4sf data;
00022 public:
00023     inline V4SF() {}
00024     inline V4SF(const V4SF& val) { data = val.data; } //very fast
00025     inline V4SF(const v4sf& val) { data = val; } //very fast
00026 
00027     inline V4SF(float* v) { data = _mm_loadu_ps(v); } //medium
00028     inline V4SF(float a, float b, float c, float d) { _mm_set_ps(a,b,c,d); } //slow
00029     inline explicit V4SF(float val) { data = _mm_set_ps1(val); } //slow
00030     inline explicit V4SF(double val) { data = _mm_set_ps1((float)val); } //slow
00031 
00032     inline V4SF& operator=(const V4SF& v) { data = v.data; } //very fast
00033     inline V4SF& operator=(const v4sf& v) { data = v; } //very fast
00034     inline V4SF& operator=(float val) { data = _mm_set_ps1(val); return *this; } //slow
00035     inline V4SF& operator=(double val) { data = _mm_set_ps1((float)val); return *this; }  //slow
00036 
00037     inline operator v4sf() const { return data; }
00038 
00039     inline V4SF operator-()
00040     {
00041         V4SF rtn(0.0f);
00042         rtn -= *this;
00043         return rtn;
00044     }
00045 
00046     //Arithmetic operators
00047     //all these are very fast
00048     friend inline V4SF operator+(const V4SF& a, const V4SF& b)  { return V4SF(_mm_add_ps(a,b)); }
00049     friend inline V4SF operator-(const V4SF& a, const V4SF& b)  { return V4SF(_mm_sub_ps(a,b)); }
00050     friend inline V4SF operator*(const V4SF& a, const V4SF& b)  { return V4SF(_mm_mul_ps(a,b)); }
00051     friend inline V4SF operator/(const V4SF& a, const V4SF& b)  { return V4SF(_mm_div_ps(a,b)); }
00052     inline V4SF& operator+=(const V4SF& v) { return *this = _mm_add_ps(data,v.data); }
00053     inline V4SF& operator-=(const V4SF& v) { return *this = _mm_sub_ps(data,v.data); }
00054     inline V4SF& operator*=(const V4SF& v) { return *this = _mm_mul_ps(data,v.data); }
00055     inline V4SF& operator/=(const V4SF& v) { return *this = _mm_div_ps(data,v.data); }
00056     //this is not optimal use of vector types, but can be handy. Medium speed
00057     friend inline V4SF operator+(const V4SF& a, float b)  { return V4SF(_mm_add_ps(a,_mm_set_ps1(b))); }
00058     friend inline V4SF operator-(const V4SF& a, float b)  { return V4SF(_mm_sub_ps(a,_mm_set_ps1(b))); }
00059     friend inline V4SF operator*(const V4SF& a, float b)  { return V4SF(_mm_mul_ps(a,_mm_set_ps1(b))); }
00060     friend inline V4SF operator/(const V4SF& a, float b)  { return V4SF(_mm_div_ps(a,_mm_set_ps1(b))); }
00061     inline V4SF& operator+=(float a) { return *this = V4SF(_mm_add_ps(data,_mm_set_ps1(a))); }
00062     inline V4SF& operator-=(float a) { return *this = V4SF(_mm_sub_ps(data,_mm_set_ps1(a))); }
00063     inline V4SF& operator*=(float a) { return *this = V4SF(_mm_mul_ps(data,_mm_set_ps1(a))); }
00064     inline V4SF& operator/=(float a) { return *this = V4SF(_mm_div_ps(data,_mm_set_ps1(a))); }
00065 
00066     //be carefull to stay in range 0-3!
00067     inline const float& operator[](int idx) const
00068     {
00069         assert((0 <= idx) && (idx <= 3));
00070         return *(((float*)&data)+idx);
00071     }
00072     inline float& operator[](int idx)
00073     {
00074         assert((0 <= idx) && (idx <= 3));
00075         return *(((float*)&data)+idx);
00076     }
00077 
00078     friend inline V4SF recip(const V4SF& v) { return _mm_rcp_ps(v); }
00079     //sse squareroot uses a lookup table, and is inaccurate but around 50x faster than c sqrt()
00080     friend inline V4SF sqrt(const V4SF& v) { return _mm_sqrt_ps(v); }
00081     friend inline V4SF recipsqrt(const V4SF& v) { return _mm_rsqrt_ps(v); }
00082 };
00083 
00084 #else //emulate the VPU with the FPU.
00085 #include <math.h>
00086 struct ALIGNED16 v4sf
00087 {
00088 float f0, f1, f2, f3;
00089     v4sf() {}
00090 };
00091 
00092 class ALIGNED16 V4SF
00093 {
00094 protected:
00095 v4sf data;
00096 public:
00097     V4SF() {}
00098     inline V4SF(float f0, float f1, float f2, float f3) { data.f0=f0; data.f1=f1; data.f2=f2; data.f3=f3; }
00099     inline V4SF(const v4sf& val) { data = val; }
00100     inline V4SF(float* dat) { data = (*(v4sf*)dat); }
00101     inline V4SF(float val) { *this = val; }
00102     inline V4SF& operator=(float val) { data.f0 = data.f1 = data.f2 = data.f3 = val; return *this; }
00103 
00104     inline operator v4sf() const { return data; }
00105     inline V4SF operator-()
00106     {
00107         V4SF rtn(0.0f);
00108         rtn -= *this;
00109         return rtn;
00110     }
00111 
00112     //all these are very slow. Strange. Numerous permutations of the code
00113     //have made no difference. Just a "feature" of the FPU :(
00114     //very slow with MSVC too, so it is not a g++ bug. Slow on both athlon and p4.
00115     friend inline V4SF operator+(const V4SF& a, const V4SF& b)
00116     {
00117         return V4SF(a.data.f0+b.data.f0, a.data.f1+b.data.f1,
00118             a.data.f2+b.data.f2, a.data.f3+b.data.f3);
00119     }
00120     friend inline V4SF operator-(const V4SF& a, const V4SF& b)
00121     {
00122         return V4SF(a.data.f0-b.data.f0, a.data.f1-b.data.f1,
00123             a.data.f2-b.data.f2, a.data.f3-b.data.f3);
00124     }
00125     friend inline V4SF operator*(const V4SF& a, const V4SF& b)
00126     {
00127         return V4SF(a.data.f0*b.data.f0, a.data.f1*b.data.f1,
00128             a.data.f2*b.data.f2, a.data.f3*b.data.f3);
00129     }
00130     friend inline V4SF operator/(const V4SF& a, const V4SF& b)
00131     {
00132         return V4SF(a.data.f0/b.data.f0, a.data.f1/b.data.f1,
00133             a.data.f2/b.data.f2, a.data.f3/b.data.f3);
00134     }
00135 
00136     //these are 3-4 times faster than the ops above
00137     //about 1/3 slower than the SSE ops i.e. very fast for an FPU
00138     inline V4SF& operator+=(const V4SF& val)
00139     {
00140         data.f0 += val.data.f0;
00141         data.f1 += val.data.f1;
00142         data.f2 += val.data.f2;
00143         data.f3 += val.data.f3;
00144         return *this;
00145     }
00146     inline V4SF& operator-=(const V4SF& val)
00147     {
00148         data.f0 -= val.data.f0;
00149         data.f1 -= val.data.f1;
00150         data.f2 -= val.data.f2;
00151         data.f3 -= val.data.f3;
00152         return *this;
00153     }
00154     inline V4SF& operator*=(const V4SF& val)
00155     {
00156         data.f0 *= val.data.f0;
00157         data.f1 *= val.data.f1;
00158         data.f2 *= val.data.f2;
00159         data.f3 *= val.data.f3;
00160         return *this;
00161     }
00162     inline V4SF& operator/=(const V4SF& val)
00163     {
00164         data.f0 /= val.data.f0;
00165         data.f1 /= val.data.f1;
00166         data.f2 /= val.data.f2;
00167         data.f3 /= val.data.f3;
00168         return *this;
00169     }
00170 
00171     //float operators. Slightly faster than vector/vector ops, but these ops are still 3-4 times slower than the compounds
00172     friend inline V4SF operator+(const V4SF& a, float b) { return V4SF(a.data.f0+b, a.data.f1+b, a.data.f2+b, a.data.f3+b); }
00173     friend inline V4SF operator-(const V4SF& a, float b) { return V4SF(a.data.f0-b, a.data.f1-b, a.data.f2-b, a.data.f3-b); }
00174     friend inline V4SF operator*(const V4SF& a, float b) { return V4SF(a.data.f0*b, a.data.f1*b, a.data.f2*b, a.data.f3*b); }
00175     friend inline V4SF operator/(const V4SF& a, float b) { return V4SF(a.data.f0/b, a.data.f1/b, a.data.f2/b, a.data.f3/b); }
00176     //these are the fastest emulated VPU ops
00177     inline V4SF& operator+=(float val)
00178     {
00179         data.f0 += val; data.f1 += val;
00180         data.f2 += val; data.f3 += val;
00181         return *this;
00182     }
00183     inline V4SF& operator-=(float val)
00184     {
00185         data.f0 -= val; data.f1 -= val;
00186         data.f2 -= val; data.f3 -= val;
00187         return *this;
00188     }
00189     inline V4SF& operator*=(float val)
00190     {
00191         data.f0 *= val; data.f1 *= val;
00192         data.f2 *= val; data.f3 *= val;
00193         return *this;
00194     }
00195     inline V4SF& operator/=(float val)
00196     {
00197         data.f0 /= val; data.f1 /= val;
00198         data.f2 /= val; data.f3 /= val;
00199         return *this;
00200     }
00201 
00202     inline const float& operator[](int idx) const { return *(((float*)&data)+idx); }
00203     inline float& operator[](int idx) { return *(((float*)&data)+idx); }
00204 
00205     friend inline V4SF recip(const V4SF& v) { return V4SF(1.0f/v[0], 1.0f/v[1], 1.0f/v[2], 1.0f/v[3]); } //fast
00206     //very, very, very slow. Need to do lookup tables for sqrt
00207     friend inline V4SF sqrt(const V4SF& v) { return V4SF(sqrt(v[0]), sqrt(v[1]), sqrt(v[2]), sqrt(v[3])); }
00208     friend inline V4SF recipsqrt(const V4SF& v) { return V4SF(1.0f/sqrt(v[0]), 1.0f/sqrt(v[1]), 1.0f/sqrt(v[2]), 1.0f/sqrt(v[3])); }
00209 };
00210 
00211 #endif //__sse__
00212 
00213 namespace V4SFV //private namespace for utilities related to fixed size vectors of V4SFs.
00214 {
00215 
00216 // Expr -- a vector expression iterator
00217 template<class A>
00218 class Expr
00219 {
00220 private:
00221     A mIter;
00222 public:
00223     Expr(const A& a)
00224         : mIter(a)
00225     { }
00226 
00227     V4SF operator*() const
00228     { return *mIter; }
00229 
00230     const V4SF& operator[](int i) const
00231     { return mIter[i]; }
00232 
00233     void operator++()
00234     { ++mIter; }
00235 };
00236 
00238 // Applicative templates...
00240 
00241 // +
00242 struct ApAdd
00243 {
00244     static inline V4SF apply(const V4SF& a, const V4SF& b)
00245     { return a + b; }
00246 };
00247 
00248 // /
00249 struct ApDivide
00250 {
00251     static inline V4SF apply(const V4SF& a, const V4SF& b)
00252     { return a / b; }
00253 };
00254 
00255 // -
00256 struct ApSubtract
00257 {
00258     static inline V4SF apply(const V4SF& a, const V4SF& b)
00259     { return a - b; }
00260 };
00261 
00262 // *
00263 struct ApMultiply
00264 {
00265     static inline V4SF apply(const V4SF& a, const V4SF& b)
00266     { return a * b; }
00267 };
00268 
00269 // =
00270 struct ApAssign
00271 {
00272     static inline void apply(V4SF&a, const V4SF&b)
00273     { a = b; }
00274 };
00275 
00276 // +=
00277 struct ApAddAssign
00278 {
00279     static inline void apply(V4SF&a, const V4SF&b)
00280     { a += b; }
00281 };
00282 
00283 // -=
00284 struct ApSubtractAssign
00285 {
00286     static inline void apply(V4SF&a, const V4SF&b)
00287     { a -= b; }
00288 };
00289 
00290 // *=
00291 struct ApMultiplyAssign
00292 {
00293     static inline void apply(V4SF&a, const V4SF&b)
00294     { a *= b; }
00295 };
00296 
00297 // /=
00298 struct ApDivideAssign
00299 {
00300     static inline void apply(V4SF&a, const V4SF&b)
00301     { a /= b; }
00302 };
00303 
00304 // A BinExprOp represents an operation on two dereferenced iterators (A,B).
00305 // It can be thought of as an "applicative iterator", combining the
00306 // notion of an STL applicative template, and an iterator.
00307 template<class A, class B, class Op>
00308 class BinExprOp
00309 {
00310 private:
00311     A mIter1;
00312     B mIter2;
00313 public:
00314     BinExprOp(const A& a, const B& b)
00315         : mIter1(a), mIter2(b)
00316     { }
00317 
00318     inline void operator++()
00319     { ++mIter1; ++mIter2; }
00320 
00321     inline V4SF operator*() const
00322     { return Op::apply(*mIter1,*mIter2); }
00323 
00324     inline V4SF operator[](int i) const
00325     { return Op::apply(mIter1[i], mIter2[i]); }
00326 };
00327 
00328 // A class that holds a float literal as a V4SF of identical values
00329 class ALIGNED16 FExprLiteral
00330 {
00331 private:
00332 V4SF mValue;
00333 public:
00334     FExprLiteral(float val) : mValue(val) {}
00335     inline void operator++() {}
00336     inline const V4SF& operator*() const { return mValue; }
00337     inline const V4SF& operator[](int idx) const { return mValue; }
00338 };
00339 
00340 // Template meta-program loop unrolling assignment policy
00341 template <int I, class A, class Op>
00342 struct AssignUnroll
00343 {
00344     static inline void loop(V4SF* a, V4SFV::Expr<A> expr)
00345     {
00346         Op::apply(*a,*expr);
00347         ++expr;
00348         AssignUnroll<I-1,A,Op>::loop(++a,expr);
00349     }
00350 };
00351 
00352 // meta-loop terminator
00353 template <class A, class Op>
00354 struct AssignUnroll<0, A, Op>
00355 {
00356     static inline void loop(V4SF* a, V4SFV::Expr<A> expr)
00357     { Op::apply(*a,*expr); }
00358 };
00359 
00360 // for loop assignment policy
00361 template <int Size, class A, class Op>
00362 struct AssignLoop
00363 {
00364     static inline void loop(V4SF* a, V4SFV::Expr<A> expr)
00365     {
00366         for(int i=0;i<Size;i++) //likely to be unrolled by the compiler anyway due to compile time Size :)
00367         {
00368             Op::apply(*a,*expr);
00369             ++a;
00370             ++expr;
00371         }
00372     }
00373 };
00374 
00375 }//namespace V4SFV
00376 
00377 // V4SFVector:
00378 template <int SizeInFloats, int Unroll=48>
00379 class V4SFVector
00380 {
00381 public:
00382     static const int SizeInF  = SizeInFloats;
00383     static const int Spare = SizeInF%4;
00384     static const int SizeInV = (SizeInF/4) + (Spare ? 1 : 0);
00385     typedef V4SF* iterator;
00386     typedef const V4SF* const_iterator;
00387     typedef V4SF element;
00388 protected:
00389 
00390     V4SF mData[SizeInV];
00391 public:
00392 
00393     inline V4SFVector()
00394     {}
00395 
00396     inline V4SFVector(const V4SFVector& rhs)
00397     {
00398         *this = rhs;
00399     }
00400 
00401     inline V4SFVector(const float *array)
00402     {
00403         for(int i=0;i<SizeInFloats;i++)
00404         {
00405             ((float*)mData)[i] = array[i];
00406         }
00407     }
00408 
00409     inline ~V4SFVector()
00410     {}
00411 
00412     explicit inline V4SFVector(V4SFVector& b) : mData(b.mData)
00413     {}
00414 
00415     inline explicit V4SFVector(float val)
00416     { *this = val; }
00417 
00418     inline const_iterator begin() const { return &mData[0]; }
00419     inline const_iterator end() const { return &mData[SizeInV-1]; }
00420     inline iterator      begin() { return mData; }
00421     inline iterator      end() { return &mData[SizeInV-1]; }
00422 
00423     //element access
00424     inline float& operator[](int idx)
00425     {
00426         assert((0 <= idx) && (idx<SizeInF+Spare));
00427         return ((float*)mData)[idx];
00428     }
00429     inline const float& operator[](int idx) const
00430     {
00431         assert((0 <= idx) && (idx<SizeInF+Spare));
00432         return ((float*)mData)[idx];
00433     }
00434     inline const V4SF& at(int idx)
00435     {
00436         assert((0 <= idx) && (idx<SizeInV));
00437         return mData[idx];
00438     }
00439     inline void at(int idx, const V4SF& v)
00440     {
00441         assert((0 <= idx) && (idx<SizeInV));
00442         mData[idx] = v;
00443     }
00444     //float element read with linear interpolation
00445     inline float interpolate(float idx)
00446     {
00447         int i = idx;
00448         assert((0 <= i) && (i<SizeInF+Spare));
00449 
00450         float difference = ((float*)mData)[i+1] - ((float*)mData)[i];
00451         difference *= (idx - (float)i);
00452         return (((float*)mData)[i])+difference;
00453     }
00454 
00455     //ASSIGNMENT OPERATORS
00456     template<class A>
00457     inline V4SFVector& operator=(V4SFV::Expr<A> expr)
00458     {
00459         typedef typename Meta::If<(SizeInV>Unroll),V4SFV::AssignUnroll<SizeInV,A,V4SFV::ApAssign>,\
00460             V4SFV::AssignLoop<SizeInV,A,V4SFV::ApAssign> >::result AssignP;
00461         AssignP::loop(begin(),expr);
00462         return *this;
00463     }
00464     inline V4SFVector& operator=(float b)
00465     {
00466         V4SFV::FExprLiteral bLit(b);
00467         *this = V4SFV::Expr<V4SFV::FExprLiteral>(bLit);
00468         return *this;
00469     }
00470 
00471     //COMPOUND OPERATORS
00472     /*not working :(
00473     template<class A>
00474     inline V4SFVector& operator+=(V4SFV::Expr<A> expr)
00475     {
00476         typedef typename Meta::If<(SizeInV>Unroll),V4SFV::AssignUnroll<SizeInV,A,V4SFV::ApAddAssign>,\
00477             V4SFV::AssignLoop<SizeInV,A,V4SFV::ApAddAssign> >::result AssignAddP;
00478         AssignAddP::loop(begin(),expr);
00479         return *this;
00480     }
00481 
00482     template<class A>
00483     inline V4SFVector& operator-=(V4SFV::Expr<A> expr)
00484     {
00485         typedef typename Meta::If<(SizeInV>Unroll),V4SFV::AssignUnroll<SizeInV,A,V4SFV::ApSubtractAssign>,\
00486             V4SFV::AssignLoop<SizeInV,A,V4SFV::ApSubtractAssign> >::result AssignSubtractP;
00487         AssignSubtractP::loop(begin(),expr);
00488         return *this;
00489     }
00490 
00491     template<class A>
00492     inline V4SFVector& operator*=(V4SFV::Expr<A> expr)
00493     {
00494         typedef typename Meta::If<(SizeInV>Unroll),V4SFV::AssignUnroll<SizeInV,A,V4SFV::ApMultiplyAssign>,\
00495             V4SFV::AssignLoop<SizeInV,A,V4SFV::ApMultiplyAssign> >::result AssignMultiplyP;
00496         AssignMultiplyP::loop(begin(),expr);
00497         return *this;
00498     }
00499 
00500     template<class A>
00501     inline V4SFVector& operator/=(V4SFV::Expr<A> expr)
00502     {
00503         typedef typename Meta::If<(SizeInV>Unroll),V4SFV::AssignUnroll<SizeInV,A,V4SFV::ApDivideAssign>,\
00504             V4SFV::AssignLoop<SizeInV,A,V4SFV::ApDivideAssign> >::result AssignDivideP;
00505         AssignDivideP::loop(begin(),expr);
00506         return *this;
00507     }*/
00508 
00509     //ADDITION OPERATORS
00510 
00511     friend inline
00512     V4SFV::Expr<V4SFV::BinExprOp<const_iterator,const_iterator,V4SFV::ApAdd> >
00513     operator+(const V4SFVector& a, const V4SFVector& b)
00514     {
00515         typedef V4SFV::BinExprOp<const_iterator,const_iterator,V4SFV::ApAdd> ExprT;
00516         return V4SFV::Expr<ExprT>(ExprT(a.begin(),b.begin()));
00517     }
00518 
00519     friend inline
00520     V4SFV::Expr<V4SFV::BinExprOp<const_iterator,V4SFV::FExprLiteral,V4SFV::ApAdd> >
00521     operator+(const V4SFVector& a, float b)
00522     {
00523         typedef V4SFV::BinExprOp<const_iterator,V4SFV::FExprLiteral,V4SFV::ApAdd> ExprT;
00524         return V4SFV::Expr<ExprT>(ExprT(a.begin(),b));
00525     }
00526 
00527     template<class A> friend inline
00528     V4SFV::Expr<V4SFV::BinExprOp<V4SFV::Expr<A>,const_iterator,V4SFV::ApAdd> >
00529     operator+(const V4SFV::Expr<A>& a, const V4SFVector& b)
00530     {
00531         typedef V4SFV::BinExprOp<V4SFV::Expr<A>,const_iterator,V4SFV::ApAdd> ExprT;
00532         return V4SFV::Expr<ExprT>(ExprT(a,b.begin()));
00533     }
00534 
00535     template<class A> friend inline
00536     V4SFV::Expr<V4SFV::BinExprOp<V4SFV::Expr<A>,V4SFV::FExprLiteral,V4SFV::ApAdd> >
00537     operator+(const V4SFV::Expr<A>& a, float b)
00538     {
00539         typedef V4SFV::BinExprOp<V4SFV::Expr<A>,V4SFV::FExprLiteral,V4SFV::ApAdd> ExprT;
00540         return V4SFV::Expr<ExprT>(ExprT(a,b));
00541     }
00542 
00543     template<class A> friend inline
00544     V4SFV::Expr<V4SFV::BinExprOp<const_iterator,V4SFV::Expr<A>,V4SFV::ApAdd> >
00545     operator+(const V4SFVector& a, const V4SFV::Expr<A>& b)
00546     {
00547         typedef V4SFV::BinExprOp<const_iterator,V4SFV::Expr<A>,V4SFV::ApAdd> ExprT;
00548         return V4SFV::Expr<ExprT>(ExprT(a.begin(),b));
00549     }
00550 
00551     template<class A, class B> friend inline
00552     V4SFV::Expr<V4SFV::BinExprOp<V4SFV::Expr<A>,V4SFV::Expr<B>,V4SFV::ApAdd> >
00553     operator+(const V4SFV::Expr<A>& a, const V4SFV::Expr<B>& b)
00554     {
00555         typedef V4SFV::BinExprOp<V4SFV::Expr<A>,V4SFV::Expr<B>,V4SFV::ApAdd> ExprT;
00556         return V4SFV::Expr<ExprT>(ExprT(a,b));
00557     }
00558 
00559     //SUBTRACTION OPERATORS
00560 
00561     friend inline
00562     V4SFV::Expr<V4SFV::BinExprOp<const_iterator,const_iterator,V4SFV::ApSubtract> >
00563     operator-(const V4SFVector& a, const V4SFVector& b)
00564     {
00565         typedef V4SFV::BinExprOp<const_iterator,const_iterator,V4SFV::ApSubtract> ExprT;
00566         return V4SFV::Expr<ExprT>(ExprT(a.begin(),b.begin()));
00567     }
00568 
00569     friend inline
00570     V4SFV::Expr<V4SFV::BinExprOp<const_iterator,V4SFV::FExprLiteral,V4SFV::ApSubtract> >
00571     operator-(const V4SFVector& a, float b)
00572     {
00573         typedef V4SFV::BinExprOp<const_iterator,V4SFV::FExprLiteral,V4SFV::ApSubtract> ExprT;
00574         return V4SFV::Expr<ExprT>(ExprT(a.begin(),b));
00575     }
00576 
00577     template<class A> friend inline
00578     V4SFV::Expr<V4SFV::BinExprOp<V4SFV::Expr<A>,const_iterator,V4SFV::ApSubtract> >
00579     operator-(const V4SFV::Expr<A>& a, const V4SFVector& b)
00580     {
00581         typedef V4SFV::BinExprOp<V4SFV::Expr<A>,const_iterator,V4SFV::ApSubtract> ExprT;
00582         return V4SFV::Expr<ExprT>(ExprT(a,b.begin()));
00583     }
00584 
00585     template<class A> friend inline
00586     V4SFV::Expr<V4SFV::BinExprOp<V4SFV::Expr<A>,V4SFV::FExprLiteral,V4SFV::ApSubtract> >
00587     operator-(const V4SFV::Expr<A>& a, float b)
00588     {
00589         typedef V4SFV::BinExprOp<V4SFV::Expr<A>,V4SFV::FExprLiteral,V4SFV::ApSubtract> ExprT;
00590         return V4SFV::Expr<ExprT>(ExprT(a,b));
00591     }
00592 
00593     template<class A> friend inline
00594     V4SFV::Expr<V4SFV::BinExprOp<const_iterator,V4SFV::Expr<A>,V4SFV::ApSubtract> >
00595     operator-(const V4SFVector& a, const V4SFV::Expr<A>& b)
00596     {
00597         typedef V4SFV::BinExprOp<const_iterator,V4SFV::Expr<A>,V4SFV::ApSubtract> ExprT;
00598         return V4SFV::Expr<ExprT>(ExprT(a.begin(),b));
00599     }
00600 
00601     template<class A, class B> friend inline
00602     V4SFV::Expr<V4SFV::BinExprOp<V4SFV::Expr<A>,V4SFV::Expr<B>,V4SFV::ApSubtract> >
00603     operator-(const V4SFV::Expr<A>& a, const V4SFV::Expr<B>& b)
00604     {
00605         typedef V4SFV::BinExprOp<V4SFV::Expr<A>,V4SFV::Expr<B>,V4SFV::ApSubtract> ExprT;
00606         return V4SFV::Expr<ExprT>(ExprT(a,b));
00607     }
00608 
00609     //DIVISION OPERATORS
00610 
00611     friend inline
00612     V4SFV::Expr<V4SFV::BinExprOp<const_iterator,const_iterator,V4SFV::ApDivide> >
00613     operator/(const V4SFVector& a, const V4SFVector& b)
00614     {
00615         typedef V4SFV::BinExprOp<const_iterator,const_iterator,V4SFV::ApDivide> ExprT;
00616         return V4SFV::Expr<ExprT>(ExprT(a.begin(),b.begin()));
00617     }
00618 
00619     friend inline
00620     V4SFV::Expr<V4SFV::BinExprOp<const_iterator,V4SFV::FExprLiteral,V4SFV::ApDivide> >
00621     operator/(const V4SFVector& a, float b)
00622     {
00623         typedef V4SFV::BinExprOp<const_iterator,V4SFV::FExprLiteral,V4SFV::ApDivide> ExprT;
00624         return V4SFV::Expr<ExprT>(ExprT(a.begin(),b));
00625     }
00626 
00627     template<class A> friend inline
00628     V4SFV::Expr<V4SFV::BinExprOp<V4SFV::Expr<A>,const_iterator,V4SFV::ApDivide> >
00629     operator/(const V4SFV::Expr<A>& a, const V4SFVector& b)
00630     {
00631         typedef V4SFV::BinExprOp<V4SFV::Expr<A>,const_iterator,V4SFV::ApDivide> ExprT;
00632         return V4SFV::Expr<ExprT>(ExprT(a,b.begin()));
00633     }
00634 
00635     template<class A> friend inline
00636     V4SFV::Expr<V4SFV::BinExprOp<V4SFV::Expr<A>,V4SFV::FExprLiteral,V4SFV::ApDivide> >
00637     operator/(const V4SFV::Expr<A>& a, float b)
00638     {
00639         typedef V4SFV::BinExprOp<V4SFV::Expr<A>,V4SFV::FExprLiteral,V4SFV::ApDivide> ExprT;
00640         return V4SFV::Expr<ExprT>(ExprT(a,b));
00641     }
00642 
00643     template<class A> friend inline
00644     V4SFV::Expr<V4SFV::BinExprOp<const_iterator,V4SFV::Expr<A>,V4SFV::ApDivide> >
00645     operator/(const V4SFVector& a, const V4SFV::Expr<A>& b)
00646     {
00647         typedef V4SFV::BinExprOp<const_iterator,V4SFV::Expr<A>,V4SFV::ApDivide> ExprT;
00648         return V4SFV::Expr<ExprT>(ExprT(a.begin(),b));
00649     }
00650 
00651     template<class A, class B> friend inline
00652     V4SFV::Expr<V4SFV::BinExprOp<V4SFV::Expr<A>,V4SFV::Expr<B>,V4SFV::ApDivide> >
00653     operator/(const V4SFV::Expr<A>& a, const V4SFV::Expr<B>& b)
00654     {
00655         typedef V4SFV::BinExprOp<V4SFV::Expr<A>,V4SFV::Expr<B>,V4SFV::ApDivide> ExprT;
00656         return V4SFV::Expr<ExprT>(ExprT(a,b));
00657     }
00658 
00659     //MULTIPLICATION OPERATORS
00660 
00661     friend inline
00662     V4SFV::Expr<V4SFV::BinExprOp<const_iterator,const_iterator,V4SFV::ApMultiply> >
00663     operator*(const V4SFVector& a, const V4SFVector& b)
00664     {
00665         typedef V4SFV::BinExprOp<const_iterator,const_iterator,V4SFV::ApMultiply> ExprT;
00666         return V4SFV::Expr<ExprT>(ExprT(a.begin(),b.begin()));
00667     }
00668 
00669     friend inline
00670     V4SFV::Expr<V4SFV::BinExprOp<const_iterator,V4SFV::FExprLiteral,V4SFV::ApMultiply> >
00671     operator*(const V4SFVector& a, float b)
00672     {
00673         typedef V4SFV::BinExprOp<const_iterator,V4SFV::FExprLiteral,V4SFV::ApMultiply> ExprT;
00674         return V4SFV::Expr<ExprT>(ExprT(a.begin(),b));
00675     }
00676 
00677     template<class A> friend inline
00678     V4SFV::Expr<V4SFV::BinExprOp<V4SFV::Expr<A>,const_iterator,V4SFV::ApMultiply> >
00679     operator*(const V4SFV::Expr<A>& a, const V4SFVector& b)
00680     {
00681         typedef V4SFV::BinExprOp<V4SFV::Expr<A>,const_iterator,V4SFV::ApMultiply> ExprT;
00682         return V4SFV::Expr<ExprT>(ExprT(a,b.begin()));
00683     }
00684 
00685     template<class A> friend inline
00686     V4SFV::Expr<V4SFV::BinExprOp<V4SFV::Expr<A>,V4SFV::FExprLiteral,V4SFV::ApMultiply> >
00687     operator*(const V4SFV::Expr<A>& a, float b)
00688     {
00689         typedef V4SFV::BinExprOp<V4SFV::Expr<A>,V4SFV::FExprLiteral,V4SFV::ApMultiply> ExprT;
00690         return V4SFV::Expr<ExprT>(ExprT(a,b));
00691     }
00692 
00693     template<class A> friend inline
00694     V4SFV::Expr<V4SFV::BinExprOp<const_iterator,V4SFV::Expr<A>,V4SFV::ApMultiply> >
00695     operator*(const V4SFVector& a, const V4SFV::Expr<A>& b)
00696     {
00697         typedef V4SFV::BinExprOp<const_iterator,V4SFV::Expr<A>,V4SFV::ApMultiply> ExprT;
00698         return V4SFV::Expr<ExprT>(ExprT(a.begin(),b));
00699     }
00700 
00701     template<class A, class B> friend inline
00702     V4SFV::Expr<V4SFV::BinExprOp<V4SFV::Expr<A>,V4SFV::Expr<B>,V4SFV::ApMultiply> >
00703     operator*(const V4SFV::Expr<A>& a, const V4SFV::Expr<B>& b)
00704     {
00705         typedef V4SFV::BinExprOp<V4SFV::Expr<A>,V4SFV::Expr<B>,V4SFV::ApMultiply> ExprT;
00706         return V4SFV::Expr<ExprT>(ExprT(a,b));
00707     }
00708 };
00709 
00710 }//namespace LDK
00711 
00712 #endif //__LDK_SIMD_H__