62 v2df_t rho1v, rho2v, rho3v;
72 if ( (
unsigned long ) z % 16 != 0 )
74 if ( (
unsigned long ) x % 16 == 0 ||
75 (
unsigned long ) y % 16 == 0 ||
76 (
unsigned long ) w % 16 == 0 )
bl1_abort();
81 n_run = ( n -
n_pre ) / 4;
82 n_left = ( n -
n_pre ) % 4;
110 rho1v.
v = _mm_setzero_pd();
111 rho2v.
v = _mm_setzero_pd();
112 rho3v.
v = _mm_setzero_pd();
114 for ( i = 0; i <
n_run; ++
i )
116 x1v.
v = _mm_load_pd( (
double* )x1 );
117 y1v.
v = _mm_load_pd( (
double* )y1 );
118 w1v.
v = _mm_load_pd( (
double* )w1 );
119 z1v.
v = _mm_load_pd( (
double* )z1 );
121 rho1v.
v += x1v.
v * z1v.
v;
122 rho2v.
v += y1v.
v * z1v.
v;
123 rho3v.
v += w1v.
v * z1v.
v;
125 x2v.
v = _mm_load_pd( (
double* )(x1 + 2) );
126 y2v.
v = _mm_load_pd( (
double* )(y1 + 2) );
127 w2v.
v = _mm_load_pd( (
double* )(w1 + 2) );
128 z2v.
v = _mm_load_pd( (
double* )(z1 + 2) );
130 rho1v.
v += x2v.
v * z2v.
v;
131 rho2v.
v += y2v.
v * z2v.
v;
132 rho3v.
v += w2v.
v * z2v.
v;
140 rho1 += rho1v.
d[0] + rho1v.
d[1];
141 rho2 += rho2v.
d[0] + rho2v.
d[1];
142 rho3 += rho3v.
d[0] + rho3v.
d[1];
double w1c
Definition: bl1_dotsv3.c:175
double *restrict w1
Definition: bl1_dotsv3.c:172
x1
Definition: bl1_dotsv3.c:452
double d[2]
Definition: blis_type_defs.h:119
int n_pre
Definition: bl1_dotsv3.c:179
double y1c
Definition: bl1_dotsv3.c:175
double rho1
Definition: bl1_dotsv3.c:174
double rho2
Definition: bl1_dotsv3.c:174
double *restrict z1
Definition: bl1_dotsv3.c:173
int n_left
Definition: bl1_dotsv3.c:181
int i
Definition: bl1_dotsv3.c:177
__m128d v
Definition: blis_type_defs.h:118
* rho_yz
Definition: bl1_dotsv3.c:269
* rho_xz
Definition: bl1_dotsv3.c:268
Definition: blis_type_defs.h:116
double z1c
Definition: bl1_dotsv3.c:175
double x1c
Definition: bl1_dotsv3.c:175
double *restrict y1
Definition: bl1_dotsv3.c:169
* rho_wz
Definition: bl1_dotsv3.c:270
void bl1_abort(void)
Definition: bl1_abort.c:13
int n_run
Definition: bl1_dotsv3.c:180
double rho3
Definition: bl1_dotsv3.c:174