87 TF_ALWAYS_INLINE FPTYPE
fptype_r2 ( FPTYPE *x1 , FPTYPE *x2 , FPTYPE *dx ) {
89 #if defined(VECTORIZE) && defined(FPTYPE_SINGLE) && defined(__SSE4_1__)
91 simd_vector(4,
float) v;
96 a.v = _mm_load_ps( x1 );
97 b.v = _mm_load_ps( x2 );
101 _mm_store_ps( dx , c.v );
104 d.v = _mm_dp_ps( c.v , c.v , 0x71 );
108 #elif defined(VECTORIZE) && defined(FPTYPE_SINGLE) && defined(__SSE3__)
110 simd_vector(4,
float) v;
115 a.v = _mm_load_ps( x1 );
116 b.v = _mm_load_ps( x2 );
120 _mm_store_ps( dx , c.v );
127 d.v = _mm_hadd_ps( d.v , d.v );
128 d.v = _mm_hadd_ps( d.v , d.v );
132 #elif defined(VECTORIZE) && defined(FPTYPE_DOUBLE) && defined(__AVX__)
139 a.v = _mm256_load_pd( x1 );
140 b.v = _mm256_load_pd( x2 );
144 _mm256_store_pd( dx , c.v );
151 d.v = _mm256_hadd_pd( d.v , d.v );
154 return d.f[0] + d.f[2];
155 #elif defined(VECTORIZE) && defined(FPTYPE_DOUBLE) && defined(__SSE4_1__)
157 simd_vector(2,
double) v;
159 } a1, a2, b1, b2, c1, c2, d1;
162 a1.v = _mm_load_pd( x1 );
163 b1.v = _mm_load_pd( x2 );
164 a2.v = _mm_load_pd( &x1[2] );
165 b2.v = _mm_load_pd( &x2[2] );
170 _mm_store_pd( dx , c1.v );
171 _mm_store_pd( &dx[2] , c2.v );
174 d1.v = _mm_dp_pd( c1.v , c1.v , 0x31 ) + c2.v * c2.v;
178 #elif defined(VECTORIZE) && defined(FPTYPE_DOUBLE) && defined(__SSE3__)
180 simd_vector(2,
double) v;
182 } a1, a2, b1, b2, c1, c2, d1, d2;
185 a1.v = _mm_load_pd( x1 );
186 b1.v = _mm_load_pd( x2 );
187 a2.v = _mm_load_pd( &x1[2] );
188 b2.v = _mm_load_pd( &x2[2] );
193 _mm_store_pd( dx , c1.v );
194 _mm_store_pd( &dx[2] , c2.v );
202 d1.v = _mm_hadd_pd( d1.v , d2.v );
203 d1.v = _mm_hadd_pd( d1.v , d1.v );
208 dx[0] = x1[0] - x2[0];
209 dx[1] = x1[1] - x2[1];
210 dx[2] = x1[2] - x2[2];
211 return dx[0]*dx[0] + dx[1]*dx[1] + dx[2]*dx[2];
TF_ALWAYS_INLINE FPTYPE fptype_r2(FPTYPE *x1, FPTYPE *x2, FPTYPE *dx)
Inlined function to compute the distance^2 between two vectors.
Definition tf_fptype.h:87