@@ -179,59 +179,52 @@ void PresetOutputs::Render(const BeatDetect &music, const PipelineContext &conte
179
179
// N.B. The more optimization that can be done on this method, the better! This is called a lot and can probably be improved.
180
180
void PresetOutputs::PerPixelMath_c (const PipelineContext &context)
181
181
{
182
- for (int x = 0 ; x < gx; x++)
183
- {
184
- for (int y = 0 ; y < gy; y++)
185
- {
186
- const float fZoom2 = std::pow (this ->zoom_mesh [x][y], std::pow (this ->zoomexp_mesh [x][y],
187
- rad_mesh[x][y] * 2 .0f - 1 .0f ));
188
- const float fZoom2Inv = 1 .0f / fZoom2 ;
189
- this ->x_mesh [x][y] = this ->orig_x [x][y] * 0 .5f * fZoom2Inv + 0 .5f ;
190
- this ->x_mesh [x][y] = (this ->x_mesh [x][y] - this ->cx_mesh [x][y]) / this ->sx_mesh [x][y] + this ->cx_mesh [x][y];
191
- this ->y_mesh [x][y] = this ->orig_y [x][y] * 0 .5f * fZoom2Inv + 0 .5f ;
192
- this ->y_mesh [x][y] = (this ->y_mesh [x][y] - this ->cy_mesh [x][y]) / this ->sy_mesh [x][y] + this ->cy_mesh [x][y];
193
- }
194
- }
195
-
196
- const float fWarpTime = context.time * this ->fWarpAnimSpeed ;
197
- const float fWarpScaleInv = 1 .0f / this ->fWarpScale ;
198
- float f[4 ];
199
- f[0 ] = 11 .68f + 4 .0f * cosf (fWarpTime * 1 .413f + 10 );
200
- f[1 ] = 8 .77f + 3 .0f * cosf (fWarpTime * 1 .113f + 7 );
201
- f[2 ] = 10 .54f + 3 .0f * cosf (fWarpTime * 1 .233f + 3 );
202
- f[3 ] = 11 .49f + 4 .0f * cosf (fWarpTime * 0 .933f + 5 );
203
-
204
- for (int x = 0 ; x < gx; x++)
182
+ const float fWarpTime = context.time * this ->fWarpAnimSpeed ;
183
+ const float fWarpScaleInv = 1 .0f / this ->fWarpScale ;
184
+ float f[4 ];
185
+ f[0 ] = 11 .68f + 4 .0f * cosf (fWarpTime * 1 .413f + 10 );
186
+ f[1 ] = 8 .77f + 3 .0f * cosf (fWarpTime * 1 .113f + 7 );
187
+ f[2 ] = 10 .54f + 3 .0f * cosf (fWarpTime * 1 .233f + 3 );
188
+ f[3 ] = 11 .49f + 4 .0f * cosf (fWarpTime * 0 .933f + 5 );
189
+
190
+ for (int x = 0 ; x < gx; x++)
205
191
{
206
192
for (int y = 0 ; y < gy; y++)
207
193
{
208
194
const float orig_x2 = this ->orig_x [x][y];
209
195
const float orig_y2 = this ->orig_y [x][y];
210
- const float warp_mesh2 = this ->warp_mesh [x][y] * 0 .0035f ;
211
-
212
- this ->x_mesh [x][y] +=
213
- (warp_mesh2 * sinf (fWarpTime * 0 .333f + fWarpScaleInv * (orig_x2 * f[0 ] - orig_y2 * f[3 ]))) +
214
- (warp_mesh2 * cosf (fWarpTime * 0 .753f - fWarpScaleInv * (orig_x2 * f[1 ] - orig_y2 * f[2 ])));
215
196
216
- this ->y_mesh [x][y] +=
217
- (warp_mesh2 * cosf (fWarpTime * 0 .375f - fWarpScaleInv * (orig_x2 * f[2 ] + orig_y2 * f[1 ]))) +
218
- (warp_mesh2 * sinf (fWarpTime * 0 .825f + fWarpScaleInv * (orig_x2 * f[0 ] + orig_y2 * f[3 ])));
219
- }
220
- }
221
-
222
- for (int x = 0 ; x < gx; x++)
223
- {
224
- for (int y = 0 ; y < gy; y++)
225
- {
226
- const float u2 = this ->x_mesh [x][y] - this ->cx_mesh [x][y];
227
- const float v2 = this ->y_mesh [x][y] - this ->cy_mesh [x][y];
228
-
229
- const float rot2 = this ->rot_mesh [x][y];
230
- const float cos_rot = cosf (rot2);
231
- const float sin_rot = sinf (rot2);
232
-
233
- this ->x_mesh [x][y] = u2 * cos_rot - v2 * sin_rot + this ->cx_mesh [x][y] - this ->dx_mesh [x][y];
234
- this ->y_mesh [x][y] = u2 * sin_rot + v2 * cos_rot + this ->cy_mesh [x][y] - this ->dy_mesh [x][y];
197
+ // zoom and stretch
198
+ const float fZoom2Inv = this ->zoom_mesh [x][y] == 1.0 ? 1.0 :
199
+ std::pow (this ->zoom_mesh [x][y], -1 *std::pow (this ->zoomexp_mesh [x][y], rad_mesh[x][y] * 2 .0f - 1 .0f ));
200
+ float u = orig_x2 * 0 .5f * fZoom2Inv + 0 .5f ;
201
+ u = (u - this ->cx_mesh [x][y]) / this ->sx_mesh [x][y] + this ->cx_mesh [x][y];
202
+ float v = orig_y2 * 0 .5f * fZoom2Inv + 0 .5f ;
203
+ v = (v - this ->cy_mesh [x][y]) / this ->sy_mesh [x][y] + this ->cy_mesh [x][y];
204
+
205
+ // warp
206
+ if (this ->warp_mesh [x][y] != 0.0 )
207
+ {
208
+ const float warp_mesh2 = this ->warp_mesh [x][y] * 0 .0035f ;
209
+ u += warp_mesh2 * (sinf (fWarpTime * 0 .333f + fWarpScaleInv * (orig_x2 * f[0 ] - orig_y2 * f[3 ])) +
210
+ cosf (fWarpTime * 0 .753f - fWarpScaleInv * (orig_x2 * f[1 ] - orig_y2 * f[2 ])));
211
+
212
+ v += warp_mesh2 * (cosf (fWarpTime * 0 .375f - fWarpScaleInv * (orig_x2 * f[2 ] + orig_y2 * f[1 ])) +
213
+ sinf (fWarpTime * 0 .825f + fWarpScaleInv * (orig_x2 * f[0 ] + orig_y2 * f[3 ])));
214
+ }
215
+
216
+ // rotate and translate
217
+ if (rot != 0.0 )
218
+ {
219
+ const float cos_rot = cosf (this ->rot_mesh [x][y]);
220
+ const float sin_rot = sinf (this ->rot_mesh [x][y]);
221
+ const float u2 = u - this ->cx_mesh [x][y];
222
+ const float v2 = v - this ->cy_mesh [x][y];
223
+ u = u2 * cos_rot - v2 * sin_rot + this ->cx_mesh [x][y];
224
+ v = u2 * sin_rot + v2 * cos_rot + this ->cy_mesh [x][y];
225
+ }
226
+ this ->x_mesh [x][y] = u - this ->dx_mesh [x][y];
227
+ this ->y_mesh [x][y] = v - this ->dy_mesh [x][y];
235
228
}
236
229
}
237
230
}
@@ -291,157 +284,156 @@ inline __m128 _mm_cosf(__m128 x)
291
284
292
285
void PresetOutputs::PerPixelMath_sse (const PipelineContext &context)
293
286
{
287
+ const float fWarpTime = context.time * this ->fWarpAnimSpeed ;
288
+ const float fWarpScaleInv = 1 .0f / this ->fWarpScale ;
289
+ const float f[4 ] =
290
+ {
291
+ 11 .68f + 4 .0f * cosf (fWarpTime * 1 .413f + 10 ),
292
+ 8 .77f + 3 .0f * cosf (fWarpTime * 1 .113f + 7 ),
293
+ 10 .54f + 3 .0f * cosf (fWarpTime * 1 .233f + 3 ),
294
+ 11 .49f + 4 .0f * cosf (fWarpTime * 0 .933f + 5 )
295
+ };
296
+
294
297
for (int x = 0 ; x < gx; x++)
295
298
{
296
299
for (int y = 0 ; y < gy; y += 4 )
297
300
{
298
- // fZoom2 = std::pow(this->zoom_mesh[x][y], std::pow(this->zoomexp_mesh[x][y],
299
- // rad_mesh[x][y] * 2.0f - 1.0f));
300
- __m128 rad_mesh_scaled =
301
- _mm_sub_ps (
302
- _mm_mul_ps (
303
- _mm_load_ps (&this ->rad_mesh [x][y]),
304
- _mm_set_ps1 (2 .0f )),
305
- _mm_set_ps1 (1 .0f ));
306
- __m128 zoom_mesh2 = _mm_load_ps (&this ->zoom_mesh [x][y]);
307
- __m128 zoomexp_mesh2 = _mm_load_ps (&this ->zoomexp_mesh [x][y]);
308
- __m128 fZoom2 = _mm_pow (zoom_mesh2, _mm_pow (zoomexp_mesh2, rad_mesh_scaled));
309
- // fZoom2Inv = 1.0f / fZoom2;
310
- __m128 fZoomInv = _mm_rcp_ps (fZoom2 );
311
-
312
- // this->x_mesh[x][y] = this->orig_x[x][y] * 0.5f * fZoom2Inv + 0.5f;
313
- __m128 x_mesh2 =
301
+ // const float orig_x2 = this->orig_x[x][y];
302
+ // const float orig_y2 = this->orig_y[x][y];
303
+ const __m128 orig_x2 = _mm_load_ps (&this ->orig_x [x][y]);
304
+ const __m128 orig_y2 = _mm_load_ps (&this ->orig_y [x][y]);
305
+
306
+ bool zoomOne = this ->zoom_mesh [x][y] == 1.0 && this ->zoom_mesh [x][y+1 ] == 1.0 && this ->zoom_mesh [x][y+2 ] == 1.0 && this ->zoom_mesh [x][y+3 ] == 1.0 ;
307
+ __m128 fZoom2Inv = _mm_set_ps1 (1 .0f );
308
+ if (!zoomOne)
309
+ {
310
+ // fZoom2 = std::pow(this->zoom_mesh[x][y], std::pow(this->zoomexp_mesh[x][y],
311
+ // rad_mesh[x][y] * 2.0f - 1.0f));
312
+ const __m128 rad_mesh_scaled =
313
+ _mm_sub_ps (
314
+ _mm_mul_ps (
315
+ _mm_load_ps (&this ->rad_mesh [x][y]),
316
+ _mm_set_ps1 (2 .0f )),
317
+ _mm_set_ps1 (1 .0f ));
318
+ const __m128 zoom_mesh2 = _mm_load_ps (&this ->zoom_mesh [x][y]);
319
+ const __m128 zoomexp_mesh2 = _mm_load_ps (&this ->zoomexp_mesh [x][y]);
320
+ const __m128 fZoom2 = _mm_pow (zoom_mesh2, _mm_pow (zoomexp_mesh2, rad_mesh_scaled));
321
+ // fZoom2Inv = 1.0f / fZoom2;
322
+ fZoom2Inv = _mm_rcp_ps (fZoom2 );
323
+ }
324
+
325
+ // float u = orig_x2 * 0.5f * fZoom2Inv + 0.5f;
326
+ __m128 u =
314
327
_mm_add_ps (
315
328
_mm_mul_ps (
316
- _mm_load_ps (& this -> orig_x [x][y]) ,
317
- _mm_mul_ps (fZoomInv ,_mm_set_ps1 (0 .5f ))), // CONSIDER: common sub-expression
329
+ orig_x2 ,
330
+ _mm_mul_ps (fZoom2Inv ,_mm_set_ps1 (0 .5f ))), // CONSIDER: common sub-expression
318
331
_mm_set_ps1 (0 .5f ));
319
- // this->x_mesh[x][y] = (this->x_mesh[x][y] - this->cx_mesh[x][y]) / this->sx_mesh[x][y] + this->cx_mesh[x][y];
320
- __m128 cx_mesh2 = _mm_load_ps (&this ->cx_mesh [x][y]);
321
- __m128 sx_mesh2 = _mm_load_ps (&this ->sx_mesh [x][y]);
322
- _mm_store_ps (&this ->x_mesh [x][y],
323
- _mm_add_ps (
332
+ // u = (u - this->cx_mesh[x][y]) / this->sx_mesh[x][y] + this->cx_mesh[x][y];
333
+ const __m128 cx_mesh2 = _mm_load_ps (&this ->cx_mesh [x][y]);
334
+ const __m128 sx_mesh2 = _mm_load_ps (&this ->sx_mesh [x][y]);
335
+ u = _mm_add_ps (
324
336
_mm_div_ps (
325
- _mm_sub_ps (x_mesh2 ,cx_mesh2),
337
+ _mm_sub_ps (u ,cx_mesh2),
326
338
sx_mesh2),
327
339
cx_mesh2
328
- )) ;
340
+ );
329
341
330
- // this->y_mesh[x][y] = this->orig_y[x][y] * 0.5f * fZoom2Inv + 0.5f;
331
- __m128 y_mesh2 =
342
+ // float v = orig_y2 * 0.5f * fZoom2Inv + 0.5f;
343
+ __m128 v =
332
344
_mm_add_ps (
333
345
_mm_mul_ps (
334
- _mm_load_ps (& this -> orig_y [x][y]) ,
335
- _mm_mul_ps (fZoomInv ,_mm_set_ps1 (0 .5f ))),
346
+ orig_y2 ,
347
+ _mm_mul_ps (fZoom2Inv ,_mm_set_ps1 (0 .5f ))),
336
348
_mm_set_ps1 (0 .5f ));
337
- // this->y_mesh[x][y] = (this->y_mesh[x][y] - this->cy_mesh[x][y]) / this->sy_mesh[x][y] + this->cy_mesh[x][y];
338
- __m128 cy_mesh2 = _mm_load_ps (&this ->cy_mesh [x][y]);
339
- __m128 sy_mesh2 = _mm_load_ps (&this ->sy_mesh [x][y]);
340
- _mm_store_ps (&this ->y_mesh [x][y],
341
- _mm_add_ps (
349
+ // v = (v - this->cy_mesh[x][y]) / this->sy_mesh[x][y] + this->cy_mesh[x][y];
350
+ const __m128 cy_mesh2 = _mm_load_ps (&this ->cy_mesh [x][y]);
351
+ const __m128 sy_mesh2 = _mm_load_ps (&this ->sy_mesh [x][y]);
352
+ v = _mm_add_ps (
342
353
_mm_div_ps (
343
- _mm_sub_ps (y_mesh2 ,cy_mesh2),
354
+ _mm_sub_ps (v ,cy_mesh2),
344
355
sy_mesh2),
345
356
cy_mesh2
346
- ));
347
- }
348
- }
349
-
350
- const float fWarpTime = context.time * this ->fWarpAnimSpeed ;
351
- const float fWarpScaleInv = 1 .0f / this ->fWarpScale ;
352
- const float f[4 ] =
353
- {
354
- 11 .68f + 4 .0f * cosf (fWarpTime * 1 .413f + 10 ),
355
- 8 .77f + 3 .0f * cosf (fWarpTime * 1 .113f + 7 ),
356
- 10 .54f + 3 .0f * cosf (fWarpTime * 1 .233f + 3 ),
357
- 11 .49f + 4 .0f * cosf (fWarpTime * 0 .933f + 5 )
358
- };
359
-
360
- for (int x = 0 ; x < gx; x++)
361
- {
362
- for (int y = 0 ; y < gy; y+=4 )
363
- {
364
- // float orig_x = this->orig_x[x][y];
365
- // float orig_y = this->orig_y[x][y];
366
- // float warp_mesh = this->warp_mesh[x][y] * 0.0035f;
367
- const __m128 orig_x2 = _mm_load_ps (&this ->orig_x [x][y]);
368
- const __m128 orig_y2 = _mm_load_ps (&this ->orig_y [x][y]);
369
- const __m128 warp_mesh2 = _mm_mul_ps (_mm_load_ps (&this ->warp_mesh [x][y]), _mm_set_ps1 (0 .0035f ));
370
-
371
- // this->x_mesh[x][y] +=
372
- // (warp_mesh * sinf(fWarpTime * 0.333f + fWarpScaleInv * (orig_x * f[0] - orig_y * f[3]))) +
373
- // (warp_mesh * cosf(fWarpTime * 0.753f - fWarpScaleInv * (orig_x * f[1] - orig_y * f[2])));
374
- _mm_store_ps (&this ->x_mesh [x][y],
375
- _mm_add_ps (_mm_load_ps (&this ->x_mesh [x][y]),
376
- _mm_add_ps (
377
- _mm_mul_ps (warp_mesh2, _mm_sinf (
378
- _mm_add_ps (
379
- _mm_set_ps1 (fWarpTime *0 .333f ),
380
- _mm_mul_ps (_mm_set_ps1 (fWarpScaleInv ),
381
- _mm_sub_ps (
382
- _mm_mul_ps (orig_x2, _mm_set_ps1 (f[0 ])),
383
- _mm_mul_ps (orig_y2, _mm_set_ps1 (f[3 ]))
384
- ))))),
385
- _mm_mul_ps (warp_mesh2, _mm_cosf (
386
- _mm_sub_ps (
387
- _mm_set_ps1 (fWarpTime *0 .753f ),
388
- _mm_mul_ps (_mm_set_ps1 (fWarpScaleInv ),
389
- _mm_sub_ps (
390
- _mm_mul_ps (orig_x2, _mm_set_ps1 (f[1 ])),
391
- _mm_mul_ps (orig_y2, _mm_set_ps1 (f[2 ]))
392
- ))))))));
393
-
394
- // this->y_mesh[x][y] +=
395
- // (warp_mesh * cosf(fWarpTime * 0.375f - fWarpScaleInv * (orig_x * f[2] + orig_y * f[1]))) +
396
- // (warp_mesh * sinf(fWarpTime * 0.825f + fWarpScaleInv * (orig_x * f[0] + orig_y * f[3])));
397
- _mm_store_ps (&this ->y_mesh [x][y],
398
- _mm_add_ps (_mm_load_ps (&this ->y_mesh [x][y]),
399
- _mm_add_ps (
400
- _mm_mul_ps (warp_mesh2, _mm_cosf (
401
- _mm_sub_ps (
402
- _mm_set_ps1 (fWarpTime *0 .375f ),
403
- _mm_mul_ps (_mm_set_ps1 (fWarpScaleInv ),
404
- _mm_add_ps (
405
- _mm_mul_ps (orig_x2, _mm_set_ps1 (f[2 ])),
406
- _mm_mul_ps (orig_y2, _mm_set_ps1 (f[1 ]))
407
- ))))),
408
- _mm_mul_ps (warp_mesh2, _mm_sinf (
409
- _mm_add_ps (
410
- _mm_set_ps1 (fWarpTime *0 .825f ),
411
- _mm_mul_ps (_mm_set_ps1 (fWarpScaleInv ),
412
- _mm_add_ps (
413
- _mm_mul_ps (orig_x2, _mm_set_ps1 (f[0 ])),
414
- _mm_mul_ps (orig_y2, _mm_set_ps1 (f[3 ]))
415
- ))))))));
416
- }
417
- }
418
- for (int x = 0 ; x < gx; x++)
419
- {
420
- for (int y = 0 ; y < gy; y+=4 )
421
- {
422
- // const float u2 = this->x_mesh[x][y] - this->cx_mesh[x][y];
423
- // const float v2 = this->y_mesh[x][y] - this->cy_mesh[x][y];
424
- const __m128 u2 = _mm_sub_ps (_mm_load_ps (&this ->x_mesh [x][y]),_mm_load_ps (&this ->cx_mesh [x][y]));
425
- const __m128 v2 = _mm_sub_ps (_mm_load_ps (&this ->y_mesh [x][y]),_mm_load_ps (&this ->cy_mesh [x][y]));
426
-
427
- // const float rot = this->rot_mesh[x][y];
428
- // const float cos_rot = cosf(rot);
429
- // const float sin_rot = sinf(rot);
430
- __m128 sin_rot, cos_rot;
431
- _mm_sincosf (_mm_load_ps (&this ->rot_mesh [x][y]), sin_rot, cos_rot);
432
-
433
- // this->x_mesh[x][y] = u2 * cos_rot - v2 * sin_rot + this->cx_mesh[x][y] - this->dx_mesh[x][y];
434
- _mm_store_ps (&this ->x_mesh [x][y],
435
- _mm_add_ps (
436
- _mm_sub_ps (_mm_mul_ps (u2, cos_rot), _mm_mul_ps (v2,sin_rot)),
437
- _mm_sub_ps (_mm_load_ps (&this ->cx_mesh [x][y]), _mm_load_ps (&this ->dx_mesh [x][y]))
438
- ));
439
- // this->y_mesh[x][y] = u2 * sin_rot + v2 * cos_rot + this->cy_mesh[x][y] - this->dy_mesh[x][y];
440
- _mm_store_ps (&this ->y_mesh [x][y],
441
- _mm_add_ps (
442
- _mm_add_ps (_mm_mul_ps (u2, sin_rot), _mm_mul_ps (v2,cos_rot)),
443
- _mm_sub_ps (_mm_load_ps (&this ->cy_mesh [x][y]), _mm_load_ps (&this ->dy_mesh [x][y]))
444
- ));
357
+ );
358
+
359
+ // warp
360
+ bool warpZero = this ->warp_mesh [x][y] == 0.0 && this ->warp_mesh [x][y+1 ] == 0.00 && this ->warp_mesh [x][y+2 ] == 0.00 && this ->warp_mesh [x][y+3 ] == 0.00 ;
361
+ if (!warpZero)
362
+ {
363
+ // const float warp_mesh2 = this->warp_mesh[x][y] * 0.0035f;
364
+ const __m128 warp_mesh2 = _mm_mul_ps (_mm_load_ps (&this ->warp_mesh [x][y]), _mm_set_ps1 (0 .0035f ));
365
+
366
+ // u +=
367
+ // (warp_mesh * sinf(fWarpTime * 0.333f + fWarpScaleInv * (orig_x2 * f[0] - orig_y2 * f[3]))) +
368
+ // (warp_mesh * cosf(fWarpTime * 0.753f - fWarpScaleInv * (orig_x2 * f[1] - orig_y2 * f[2])));
369
+ u = _mm_add_ps (u,
370
+ _mm_add_ps (
371
+ _mm_mul_ps (warp_mesh2, _mm_sinf (
372
+ _mm_add_ps (
373
+ _mm_set_ps1 (fWarpTime *0 .333f ),
374
+ _mm_mul_ps (_mm_set_ps1 (fWarpScaleInv ),
375
+ _mm_sub_ps (
376
+ _mm_mul_ps (orig_x2, _mm_set_ps1 (f[0 ])),
377
+ _mm_mul_ps (orig_y2, _mm_set_ps1 (f[3 ]))
378
+ ))))),
379
+ _mm_mul_ps (warp_mesh2, _mm_cosf (
380
+ _mm_sub_ps (
381
+ _mm_set_ps1 (fWarpTime *0 .753f ),
382
+ _mm_mul_ps (_mm_set_ps1 (fWarpScaleInv ),
383
+ _mm_sub_ps (
384
+ _mm_mul_ps (orig_x2, _mm_set_ps1 (f[1 ])),
385
+ _mm_mul_ps (orig_y2, _mm_set_ps1 (f[2 ]))
386
+ )))))));
387
+
388
+ // v +=
389
+ // (warp_mesh * cosf(fWarpTime * 0.375f - fWarpScaleInv * (orig_x2 * f[2] + orig_y2 * f[1]))) +
390
+ // (warp_mesh * sinf(fWarpTime * 0.825f + fWarpScaleInv * (orig_x2 * f[0] + orig_y2 * f[3])));
391
+ v = _mm_add_ps (v,
392
+ _mm_add_ps (
393
+ _mm_mul_ps (warp_mesh2, _mm_cosf (
394
+ _mm_sub_ps (
395
+ _mm_set_ps1 (fWarpTime *0 .375f ),
396
+ _mm_mul_ps (_mm_set_ps1 (fWarpScaleInv ),
397
+ _mm_add_ps (
398
+ _mm_mul_ps (orig_x2, _mm_set_ps1 (f[2 ])),
399
+ _mm_mul_ps (orig_y2, _mm_set_ps1 (f[1 ]))
400
+ ))))),
401
+ _mm_mul_ps (warp_mesh2, _mm_sinf (
402
+ _mm_add_ps (
403
+ _mm_set_ps1 (fWarpTime *0 .825f ),
404
+ _mm_mul_ps (_mm_set_ps1 (fWarpScaleInv ),
405
+ _mm_add_ps (
406
+ _mm_mul_ps (orig_x2, _mm_set_ps1 (f[0 ])),
407
+ _mm_mul_ps (orig_y2, _mm_set_ps1 (f[3 ]))
408
+ )))))));
409
+ }
410
+
411
+ bool rotZero = this ->rot_mesh [x][y] == 0.0 && this ->rot_mesh [x][y+1 ] == 0.00 && this ->rot_mesh [x][y+2 ] == 0.00 && this ->rot_mesh [x][y+3 ] == 0.00 ;
412
+ if (!rotZero)
413
+ {
414
+ // const float u2 = u - this->cx_mesh[x][y];
415
+ // const float v2 = v - this->cy_mesh[x][y];
416
+ const __m128 u2 = _mm_sub_ps (u,_mm_load_ps (&this ->cx_mesh [x][y]));
417
+ const __m128 v2 = _mm_sub_ps (v,_mm_load_ps (&this ->cy_mesh [x][y]));
418
+
419
+ // const float cos_rot = cosf(this->rot_mesh[x][y]);
420
+ // const float sin_rot = sinf(this->rot_mesh[x][y]);
421
+ __m128 sin_rot, cos_rot;
422
+ _mm_sincosf (_mm_load_ps (&this ->rot_mesh [x][y]), sin_rot, cos_rot);
423
+
424
+ // u = u2 * cos_rot - v2 * sin_rot + this->cx_mesh[x][y];
425
+ u = _mm_add_ps (
426
+ _mm_sub_ps (_mm_mul_ps (u2, cos_rot), _mm_mul_ps (v2,sin_rot)),
427
+ _mm_load_ps (&this ->cx_mesh [x][y]));
428
+ // v = u2 * sin_rot + v2 * cos_rot + this->cy_mesh[x][y];
429
+ v = _mm_add_ps (
430
+ _mm_add_ps (_mm_mul_ps (u2, sin_rot), _mm_mul_ps (v2,cos_rot)),
431
+ _mm_load_ps (&this ->cy_mesh [x][y]));
432
+ }
433
+ // this->x_mesh[x][y] = u - this->dx_mesh[x][y];
434
+ // this->y_mesh[x][y] = v - this->dy_mesh[x][y];
435
+ _mm_store_ps (&this ->x_mesh [x][y], _mm_sub_ps (u, _mm_load_ps (&this ->dx_mesh [x][y])));
436
+ _mm_store_ps (&this ->y_mesh [x][y], _mm_sub_ps (v, _mm_load_ps (&this ->dy_mesh [x][y])));
445
437
}
446
438
}
447
439
}
@@ -494,7 +486,7 @@ void PresetOutputs::Initialize ( int _gx, int _gy )
494
486
float origx = x / (float ) (gx - 1 );
495
487
float origy = -((y / (float ) (gy - 1 )) - 1 );
496
488
497
- rad_mesh[x][y]=hypot ( ( origx-.5 ) *2 , ( origy-.5 ) *2 ) * . 7071067 ;
489
+ rad_mesh[x][y]=hypot ( ( origx-.5 ) *2 , ( origy-.5 ) *2 );
498
490
orig_x[x][y] = (origx - .5 ) * 2 ;
499
491
orig_y[x][y] = (origy - .5 ) * 2 ;
500
492
}
0 commit comments