vector_x86.h

Go to the documentation of this file.
00001 /*                    V E C T O R _ X 8 6 . H
00002  * BRL-CAD
00003  *
00004  * Copyright (c) 2008-2012 United States Government as represented by
00005  * the U.S. Army Research Laboratory.
00006  *
00007  * This library is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public License
00009  * version 2.1 as published by the Free Software Foundation.
00010  *
00011  * This library is distributed in the hope that it will be useful, but
00012  * WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with this file; see the file named COPYING for more
00018  * information.
00019  */
00020 /** @file vector_x86.h
00021  *
00022  *
00023  */
00024 
00025 
00026 #ifndef __VECTOR_X86
00027 #define __VECTOR_X86
00028 
00029 #include "common.h"
00030 
00031 #ifdef HAVE_EMMINTRIN_H
00032 #  include <emmintrin.h>
00033 #endif
00034 
00035 //#define ALIGN16(_m) (double*)((((long)(_m)) + 0x10L) & ~0xFL);
00036 #undef VEC_ALIGN
00037 #define VEC_ALIGN __attribute__((aligned(16)))
00038 
00039 typedef double v2df __attribute__((vector_size(16)));
00040 
00041 template<int LEN>
00042 struct vec_internal {
00043     v2df v[LEN/2];
00044 };
00045 
00046 // inline dvec4::dvec4(double a, double b, double c, double d)
00047 //     : dvec<4>(
00048 // {
00049 //     double t[4] VEC_ALIGN = {a, b, c, d};
00050 
00051 // }
00052 
00053 template<int LEN>
00054 inline dvec<LEN>::dvec(double s)
00055 {
00056     double t[LEN] VEC_ALIGN;
00057     for (int i = 0; i < LEN/2; i++) {
00058         t[i*2]   = s;
00059         t[i*2+1] = s;
00060         data.v[i] = _mm_load_pd(&t[i*2]);
00061     }
00062 }
00063 
00064 template<int LEN>
00065 inline dvec<LEN>::dvec(const double* vals, bool aligned)
00066 {
00067     if (aligned) {
00068         for (int i = 0; i < LEN/2; i++) {
00069             data.v[i] = _mm_load_pd(&vals[i*2]);
00070         }
00071     } else {
00072         for (int i = 0; i < LEN/2; i++) {
00073             data.v[i] = _mm_loadu_pd(&vals[i*2]);
00074         }
00075     }
00076 }
00077 
00078 template<int LEN>
00079 inline dvec<LEN>::dvec(const dvec<LEN>& p)
00080 {
00081     for (int i = 0; i < LEN/2; i++) {
00082         data.v[i] = p.data.v[i];
00083     }
00084 }
00085 
00086 template<int LEN>
00087 inline dvec<LEN>::dvec(const vec_internal<LEN>& d)
00088 {
00089     for (int i = 0; i < LEN/2; i++) data.v[i] = d.v[i];
00090 }
00091 
00092 template<int LEN>
00093 inline dvec<LEN>&
00094 dvec<LEN>::operator=(const dvec<LEN>& p)
00095 {
00096     for (int i = 0; i < LEN/2; i++) {
00097         data.v[i] = p.data.v[i];
00098     }
00099     return *this;
00100 }
00101 
00102 template<int LEN>
00103 inline double
00104 dvec<LEN>::operator[](const int index) const
00105 {
00106     double t[2] __attribute__((aligned(16)));
00107     _mm_store_pd(t, data.v[index/2]);
00108     return t[index%2];
00109 }
00110 
00111 template<int LEN>
00112 inline void
00113 dvec<LEN>::u_store(double* arr) const
00114 {
00115     for (int i = 0; i < LEN/2; i++) {
00116         _mm_storeu_pd(&arr[i*2], data.v[i]);
00117     }
00118 }
00119 
00120 template<int LEN>
00121 inline void
00122 dvec<LEN>::a_store(double* arr) const
00123 {
00124     for (int i = 0; i < LEN/2; i++) {
00125         _mm_store_pd(&arr[i*2], data.v[i]);
00126     }
00127 }
00128 
00129 template<int LEN>
00130 inline bool
00131 dvec<LEN>::operator==(const dvec<LEN>& b) const
00132 {
00133     double ta[LEN] VEC_ALIGN;
00134     double tb[LEN] VEC_ALIGN;
00135     a_store(ta);
00136     b.a_store(tb);
00137     for (int i = 0; i < LEN; i++)
00138         if (fabs(ta[i]-tb[i]) > VEQUALITY) return false;
00139     return true;
00140 }
00141 
00142 #define OP_IMPL(__op__) {                             \
00143     vec_internal<LEN> result;                         \
00144     for (int i = 0; i < LEN/2; i++) {                 \
00145         result.v[i] = __op__(data.v[i], b.data.v[i]); \
00146     }                                                 \
00147     return dvec<LEN>(result);                         \
00148 }
00149 
00150 template<int LEN>
00151 inline dvec<LEN>
00152 dvec<LEN>::operator+(const dvec<LEN>& b)
00153 {
00154     OP_IMPL(_mm_add_pd);
00155 }
00156 
00157 template<int LEN>
00158 inline dvec<LEN>
00159 dvec<LEN>::operator-(const dvec<LEN>& b)
00160 {
00161     OP_IMPL(_mm_sub_pd);
00162 }
00163 
00164 template<int LEN>
00165 inline dvec<LEN>
00166 dvec<LEN>::operator*(const dvec<LEN>& b)
00167 {
00168     OP_IMPL(_mm_mul_pd);
00169 }
00170 
00171 template<int LEN>
00172 inline dvec<LEN>
00173 dvec<LEN>::operator/(const dvec<LEN>& b)
00174 {
00175     OP_IMPL(_mm_div_pd);
00176 }
00177 
00178 template<int LEN>
00179 inline dvec<LEN>
00180 dvec<LEN>::madd(const dvec<LEN>& s, const dvec<LEN>& b)
00181 {
00182     vec_internal<LEN> r;
00183     for (int i = 0; i < LEN/2; i++) {
00184         r.v[i] = _mm_mul_pd(data.v[i], s.data.v[i]);
00185     }
00186     for (int i = 0; i < LEN/2; i++) {
00187         r.v[i] = _mm_add_pd(r.v[i], b.data.v[i]);
00188     }
00189     return dvec<LEN>(r);
00190 }
00191 
00192 template<int LEN>
00193 inline dvec<LEN>
00194 dvec<LEN>::madd(const double s, const dvec<LEN>& b)
00195 {
00196     double _t[LEN] VEC_ALIGN;
00197     for (int i = 0; i < LEN; i++) _t[i] = s;
00198     dvec<LEN> t(_t, true);
00199     return madd(t, b);
00200 }
00201 
00202 template<int LEN>
00203 inline double
00204 dvec<LEN>::foldr(double identity, const dvec_op& op, int limit)
00205 {
00206     double _t[LEN] VEC_ALIGN;
00207     a_store(_t);
00208     double val = identity;
00209     for (int i = limit-1; i >= 0; i--) {
00210         val = op(_t[i], val);
00211     }
00212     return val;
00213 }
00214 
00215 template<int LEN>
00216 inline double
00217 dvec<LEN>::foldl(double identity, const dvec_op& op, int limit)
00218 {
00219     double _t[LEN] VEC_ALIGN;
00220     a_store(_t);
00221     double val = identity;
00222     for (int i = 0; i < limit; i++) {
00223         val = op(val, _t[i]);
00224     }
00225     return val;
00226 }
00227 
00228 
00229 template<int LEN>
00230 inline dvec<LEN>
00231 dvec<LEN>::map(const dvec_unop& op, int limit)
00232 {
00233     double _t[LEN] VEC_ALIGN;
00234     a_store(_t);
00235     for (int i = 0; i < limit; i++) {
00236         _t[i] = op(_t[i]);
00237     }
00238     return dvec<LEN>(_t);
00239 }
00240 
00241 
00242 template <int LEN>
00243 inline std::ostream&
00244 operator<<(std::ostream& out, const dvec<LEN>& v)
00245 {
00246     double _t[LEN] VEC_ALIGN;
00247     v.a_store(_t);
00248     out << "<";
00249     for (int i = 0; i < LEN; i++) {
00250         out << _t[i];
00251         if (i != LEN-1)
00252             out << ",";
00253     }
00254     out << ">";
00255     return out;
00256 }
00257 
00258 class vec2d {
00259 public:
00260 
00261     vec2d() {
00262         _init(0, 0);
00263     }
00264 
00265     vec2d(double x_, double y_) {
00266         _init(x_, y_);
00267     }
00268 
00269     vec2d(const vec2d& proto) {
00270         _vec = proto._vec;
00271     }
00272 
00273     vec2d& operator=(const vec2d& b) {
00274         _vec = b._vec;
00275         return *this;
00276     }
00277 
00278     double operator[](int index) const {
00279         double  v[2] __attribute__((aligned(16)));
00280         _mm_store_pd(v, _vec);
00281         return v[index];
00282     }
00283 
00284     void ustore(double* arr) const {
00285         // assume nothing about the alignment of arr
00286         double  v[2] __attribute__((aligned(16)));
00287         _mm_store_pd(v, _vec);
00288         arr[0] = v[0];
00289         arr[1] = v[1];
00290     }
00291 
00292     double x() const { return (*this)[0]; }
00293     double y() const { return (*this)[1]; }
00294 
00295     vec2d operator+(const vec2d& b) const {
00296         return vec2d(_mm_add_pd(_vec, b._vec));
00297     }
00298 
00299     vec2d operator-(const vec2d& b) const {
00300         return vec2d(_mm_sub_pd(vec(), b.vec()));
00301     }
00302 
00303     vec2d operator*(const vec2d& b) const {
00304         return vec2d(_mm_mul_pd(vec(), b.vec()));
00305     }
00306 
00307     vec2d operator/(const vec2d& b) const {
00308         return vec2d(_mm_div_pd(vec(), b.vec()));
00309     }
00310 
00311     vec2d madd(const double& scalar, const vec2d& b) const {
00312         return madd(vec2d(scalar, scalar), b);
00313     }
00314 
00315     vec2d madd(const vec2d& s, const vec2d& b) const {
00316         return vec2d(_mm_add_pd(_mm_mul_pd(vec(), s.vec()), b.vec()));
00317     }
00318 
00319 private:
00320     //double  v[2] __attribute__((aligned(16)));
00321     v2df _vec;
00322 
00323     vec2d(const v2df& result) {
00324         _vec = result;
00325     }
00326 
00327     v2df vec() const { return _vec; }
00328 
00329     void _init(double x_, double y_) {
00330         double  v[2] __attribute__((aligned(16)));
00331         v[0] = x_;
00332         v[1] = y_;
00333         _vec = _mm_load_pd(v);
00334     }
00335 
00336 };
00337 
00338 inline std::ostream&
00339 operator<<(std::ostream& out, const vec2d& v)
00340 {
00341     out << "<" << v.x() << "," << v.y() << ">";
00342     return out;
00343 }
00344 
00345 #endif
00346 
00347 /*
00348  * Local Variables:
00349  * mode: C++
00350  * tab-width: 8
00351  * indent-tabs-mode: t
00352  * c-file-style: "stroustrup"
00353  * End:
00354  * ex: shiftwidth=4 tabstop=8
00355  */
Generated on Tue Dec 11 13:14:27 2012 for LIBBN by  doxygen 1.6.3