-
-
Notifications
You must be signed in to change notification settings - Fork 305
/
simd_math.h
1230 lines (974 loc) · 45.2 KB
/
simd_math.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//----------------------------------------------------------------------------//
// //
// ozz-animation is hosted at http://github.com/guillaumeblanc/ozz-animation //
// and distributed under the MIT License (MIT). //
// //
// Copyright (c) 2019 Guillaume Blanc //
// //
// Permission is hereby granted, free of charge, to any person obtaining a //
// copy of this software and associated documentation files (the "Software"), //
// to deal in the Software without restriction, including without limitation //
// the rights to use, copy, modify, merge, publish, distribute, sublicense, //
// and/or sell copies of the Software, and to permit persons to whom the //
// Software is furnished to do so, subject to the following conditions: //
// //
// The above copyright notice and this permission notice shall be included in //
// all copies or substantial portions of the Software. //
// //
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR //
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, //
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL //
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER //
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING //
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER //
// DEALINGS IN THE SOFTWARE. //
// //
//----------------------------------------------------------------------------//
#ifndef OZZ_OZZ_BASE_MATHS_SIMD_MATH_H_
#define OZZ_OZZ_BASE_MATHS_SIMD_MATH_H_
#include "ozz/base/maths/internal/simd_math_config.h"
#include "ozz/base/platform.h"
namespace ozz {
namespace math {
// Returns SIMDimplementation name has decided at library build time.
const char* SimdImplementationName();
namespace simd_float4 {
// Returns a SimdFloat4 vector with all components set to 0.
OZZ_INLINE SimdFloat4 zero();
// Returns a SimdFloat4 vector with all components set to 1.
OZZ_INLINE SimdFloat4 one();
// Returns a SimdFloat4 vector with the x component set to 1 and all the others
// to 0.
OZZ_INLINE SimdFloat4 x_axis();
// Returns a SimdFloat4 vector with the y component set to 1 and all the others
// to 0.
OZZ_INLINE SimdFloat4 y_axis();
// Returns a SimdFloat4 vector with the z component set to 1 and all the others
// to 0.
OZZ_INLINE SimdFloat4 z_axis();
// Returns a SimdFloat4 vector with the w component set to 1 and all the others
// to 0.
OZZ_INLINE SimdFloat4 w_axis();
// Loads _x, _y, _z, _w to the returned vector.
// r.x = _x
// r.y = _y
// r.z = _z
// r.w = _w
OZZ_INLINE SimdFloat4 Load(float _x, float _y, float _z, float _w);
// Loads _x to the x component of the returned vector, and sets y, z and w to 0.
// r.x = _x
// r.y = 0
// r.z = 0
// r.w = 0
OZZ_INLINE SimdFloat4 LoadX(float _x);
// Loads _x to the all the components of the returned vector.
// r.x = _x
// r.y = _x
// r.z = _x
// r.w = _x
OZZ_INLINE SimdFloat4 Load1(float _x);
// Loads the 4 values of _f to the returned vector.
// _f must be aligned to 16 bytes.
// r.x = _f[0]
// r.y = _f[1]
// r.z = _f[2]
// r.w = _f[3]
OZZ_INLINE SimdFloat4 LoadPtr(const float* _f);
// Loads the 4 values of _f to the returned vector.
// _f must be aligned to 4 bytes.
// r.x = _f[0]
// r.y = _f[1]
// r.z = _f[2]
// r.w = _f[3]
OZZ_INLINE SimdFloat4 LoadPtrU(const float* _f);
// Loads _f[0] to the x component of the returned vector, and sets y, z and w
// to 0.
// _f must be aligned to 4 bytes.
// r.x = _f[0]
// r.y = 0
// r.z = 0
// r.w = 0
OZZ_INLINE SimdFloat4 LoadXPtrU(const float* _f);
// Loads _f[0] to all the components of the returned vector.
// _f must be aligned to 4 bytes.
// r.x = _f[0]
// r.y = _f[0]
// r.z = _f[0]
// r.w = _f[0]
OZZ_INLINE SimdFloat4 Load1PtrU(const float* _f);
// Loads the 2 first value of _f to the x and y components of the returned
// vector. The remaining components are set to 0.
// _f must be aligned to 4 bytes.
// r.x = _f[0]
// r.y = _f[1]
// r.z = 0
// r.w = 0
OZZ_INLINE SimdFloat4 Load2PtrU(const float* _f);
// Loads the 3 first value of _f to the x, y and z components of the returned
// vector. The remaining components are set to 0.
// _f must be aligned to 4 bytes.
// r.x = _f[0]
// r.y = _f[1]
// r.z = _f[2]
// r.w = 0
OZZ_INLINE SimdFloat4 Load3PtrU(const float* _f);
// Convert from integer to float.
OZZ_INLINE SimdFloat4 FromInt(_SimdInt4 _i);
} // namespace simd_float4
// Returns the x component of _v as a float.
OZZ_INLINE float GetX(_SimdFloat4 _v);
// Returns the y component of _v as a float.
OZZ_INLINE float GetY(_SimdFloat4 _v);
// Returns the z component of _v as a float.
OZZ_INLINE float GetZ(_SimdFloat4 _v);
// Returns the w component of _v as a float.
OZZ_INLINE float GetW(_SimdFloat4 _v);
// Returns _v with the x component set to x component of _f.
OZZ_INLINE SimdFloat4 SetX(_SimdFloat4 _v, _SimdFloat4 _f);
// Returns _v with the y component set to x component of _f.
OZZ_INLINE SimdFloat4 SetY(_SimdFloat4 _v, _SimdFloat4 _f);
// Returns _v with the z component set to x component of _f.
OZZ_INLINE SimdFloat4 SetZ(_SimdFloat4 _v, _SimdFloat4 _f);
// Returns _v with the w component set to x component of _f.
OZZ_INLINE SimdFloat4 SetW(_SimdFloat4 _v, _SimdFloat4 _f);
// Returns _v with the _i th component set to _f.
// _i must be in range [0,3]
OZZ_INLINE SimdFloat4 SetI(_SimdFloat4 _v, _SimdFloat4 _f, int _i);
// Stores the 4 components of _v to the four first floats of _f.
// _f must be aligned to 16 bytes.
// _f[0] = _v.x
// _f[1] = _v.y
// _f[2] = _v.z
// _f[3] = _v.w
OZZ_INLINE void StorePtr(_SimdFloat4 _v, float* _f);
// Stores the x component of _v to the first float of _f.
// _f must be aligned to 16 bytes.
// _f[0] = _v.x
OZZ_INLINE void Store1Ptr(_SimdFloat4 _v, float* _f);
// Stores x and y components of _v to the two first floats of _f.
// _f must be aligned to 16 bytes.
// _f[0] = _v.x
// _f[1] = _v.y
OZZ_INLINE void Store2Ptr(_SimdFloat4 _v, float* _f);
// Stores x, y and z components of _v to the three first floats of _f.
// _f must be aligned to 16 bytes.
// _f[0] = _v.x
// _f[1] = _v.y
// _f[2] = _v.z
OZZ_INLINE void Store3Ptr(_SimdFloat4 _v, float* _f);
// Stores the 4 components of _v to the four first floats of _f.
// _f must be aligned to 4 bytes.
// _f[0] = _v.x
// _f[1] = _v.y
// _f[2] = _v.z
// _f[3] = _v.w
OZZ_INLINE void StorePtrU(_SimdFloat4 _v, float* _f);
// Stores the x component of _v to the first float of _f.
// _f must be aligned to 4 bytes.
// _f[0] = _v.x
OZZ_INLINE void Store1PtrU(_SimdFloat4 _v, float* _f);
// Stores x and y components of _v to the two first floats of _f.
// _f must be aligned to 4 bytes.
// _f[0] = _v.x
// _f[1] = _v.y
OZZ_INLINE void Store2PtrU(_SimdFloat4 _v, float* _f);
// Stores x, y and z components of _v to the three first floats of _f.
// _f must be aligned to 4 bytes.
// _f[0] = _v.x
// _f[1] = _v.y
// _f[2] = _v.z
OZZ_INLINE void Store3PtrU(_SimdFloat4 _v, float* _f);
// Replicates x of _a to all the components of the returned vector.
OZZ_INLINE SimdFloat4 SplatX(_SimdFloat4 _v);
// Replicates y of _a to all the components of the returned vector.
OZZ_INLINE SimdFloat4 SplatY(_SimdFloat4 _v);
// Replicates z of _a to all the components of the returned vector.
OZZ_INLINE SimdFloat4 SplatZ(_SimdFloat4 _v);
// Replicates w of _a to all the components of the returned vector.
OZZ_INLINE SimdFloat4 SplatW(_SimdFloat4 _v);
// Swizzle x, y, z and w components based on compile time arguments _X, _Y, _Z
// and _W. Arguments can vary from 0 (x), to 3 (w).
template <size_t _X, size_t _Y, size_t _Z, size_t _W>
OZZ_INLINE SimdFloat4 Swizzle(_SimdFloat4 _v);
// Transposes the x components of the 4 SimdFloat4 of _in into the 1
// SimdFloat4 of _out.
OZZ_INLINE void Transpose4x1(const SimdFloat4 _in[4], SimdFloat4 _out[1]);
// Transposes x, y, z and w components of _in to the x components of _out.
// Remaining y, z and w are set to 0.
OZZ_INLINE void Transpose1x4(const SimdFloat4 _in[1], SimdFloat4 _out[4]);
// Transposes the 1 SimdFloat4 of _in into the x components of the 4
// SimdFloat4 of _out. Remaining y, z and w are set to 0.
OZZ_INLINE void Transpose2x4(const SimdFloat4 _in[2], SimdFloat4 _out[4]);
// Transposes the x and y components of the 4 SimdFloat4 of _in into the 2
// SimdFloat4 of _out.
OZZ_INLINE void Transpose4x2(const SimdFloat4 _in[4], SimdFloat4 _out[2]);
// Transposes the 2 SimdFloat4 of _in into the x and y components of the 4
// SimdFloat4 of _out. Remaining z and w are set to 0.
OZZ_INLINE void Transpose2x4(const SimdFloat4 _in[2], SimdFloat4 _out[4]);
// Transposes the x, y and z components of the 4 SimdFloat4 of _in into the 3
// SimdFloat4 of _out.
OZZ_INLINE void Transpose4x3(const SimdFloat4 _in[4], SimdFloat4 _out[3]);
// Transposes the 3 SimdFloat4 of _in into the x, y and z components of the 4
// SimdFloat4 of _out. Remaining w are set to 0.
OZZ_INLINE void Transpose3x4(const SimdFloat4 _in[3], SimdFloat4 _out[4]);
// Transposes the 4 SimdFloat4 of _in into the 4 SimdFloat4 of _out.
OZZ_INLINE void Transpose4x4(const SimdFloat4 _in[4], SimdFloat4 _out[4]);
// Transposes the 16 SimdFloat4 of _in into the 16 SimdFloat4 of _out.
OZZ_INLINE void Transpose16x16(const SimdFloat4 _in[16], SimdFloat4 _out[16]);
// Multiplies _a and _b, then adds _c.
// v = (_a * _b) + _c
OZZ_INLINE SimdFloat4 MAdd(_SimdFloat4 _a, _SimdFloat4 _b, _SimdFloat4 _c);
// Multiplies _a and _b, then subs _c.
// v = (_a * _b) + _c
OZZ_INLINE SimdFloat4 MSub(_SimdFloat4 _a, _SimdFloat4 _b, _SimdFloat4 _c);
// Multiplies _a and _b, negate it, then adds _c.
// v = -(_a * _b) + _c
OZZ_INLINE SimdFloat4 NMAdd(_SimdFloat4 _a, _SimdFloat4 _b, _SimdFloat4 _c);
// Multiplies _a and _b, negate it, then subs _c.
// v = -(_a * _b) + _c
OZZ_INLINE SimdFloat4 NMSub(_SimdFloat4 _a, _SimdFloat4 _b, _SimdFloat4 _c);
// Divides the x component of _a by the _x component of _b and stores it in the
// x component of the returned vector. y, z, w of the returned vector are the
// same as _a respective components.
// r.x = _a.x / _b.x
// r.y = _a.y
// r.z = _a.z
// r.w = _a.w
OZZ_INLINE SimdFloat4 DivX(_SimdFloat4 _a, _SimdFloat4 _b);
// Computes the (horizontal) addition of x and y components of _v. The result is
// stored in the x component of the returned value. y, z, w of the returned
// vector are the same as their respective components in _v.
// r.x = _a.x + _a.y
// r.y = _a.y
// r.z = _a.z
// r.w = _a.w
OZZ_INLINE SimdFloat4 HAdd2(_SimdFloat4 _v);
// Computes the (horizontal) addition of x, y and z components of _v. The result
// is stored in the x component of the returned value. y, z, w of the returned
// vector are the same as their respective components in _v.
// r.x = _a.x + _a.y + _a.z
// r.y = _a.y
// r.z = _a.z
// r.w = _a.w
OZZ_INLINE SimdFloat4 HAdd3(_SimdFloat4 _v);
// Computes the (horizontal) addition of x and y components of _v. The result is
// stored in the x component of the returned value. y, z, w of the returned
// vector are the same as their respective components in _v.
// r.x = _a.x + _a.y + _a.z + _a.w
// r.y = _a.y
// r.z = _a.z
// r.w = _a.w
OZZ_INLINE SimdFloat4 HAdd4(_SimdFloat4 _v);
// Computes the dot product of x and y components of _v. The result is
// stored in the x component of the returned value. y, z, w of the returned
// vector are undefined.
// r.x = _a.x * _a.x + _a.y * _a.y
// r.y = ?
// r.z = ?
// r.w = ?
OZZ_INLINE SimdFloat4 Dot2(_SimdFloat4 _a, _SimdFloat4 _b);
// Computes the dot product of x, y and z components of _v. The result is
// stored in the x component of the returned value. y, z, w of the returned
// vector are undefined.
// r.x = _a.x * _a.x + _a.y * _a.y + _a.z * _a.z
// r.y = ?
// r.z = ?
// r.w = ?
OZZ_INLINE SimdFloat4 Dot3(_SimdFloat4 _a, _SimdFloat4 _b);
// Computes the dot product of x, y, z and w components of _v. The result is
// stored in the x component of the returned value. y, z, w of the returned
// vector are undefined.
// r.x = _a.x * _a.x + _a.y * _a.y + _a.z * _a.z + _a.w * _a.w
// r.y = ?
// r.z = ?
// r.w = ?
OZZ_INLINE SimdFloat4 Dot4(_SimdFloat4 _a, _SimdFloat4 _b);
// Computes the cross product of x, y and z components of _v. The result is
// stored in the x, y and z components of the returned value. w of the returned
// vector is undefined.
// r.x = _a.y * _b.z - _a.z * _b.y
// r.y = _a.z * _b.x - _a.x * _b.z
// r.z = _a.x * _b.y - _a.y * _b.x
// r.w = ?
OZZ_INLINE SimdFloat4 Cross3(_SimdFloat4 _a, _SimdFloat4 _b);
// Returns the per component estimated reciprocal of _v.
OZZ_INLINE SimdFloat4 RcpEst(_SimdFloat4 _v);
// Returns the per component estimated reciprocal of _v, where approximation is
// improved with one more new Newton-Raphson step.
OZZ_INLINE SimdFloat4 RcpEstNR(_SimdFloat4 _v);
// Returns the estimated reciprocal of the x component of _v and stores it in
// the x component of the returned vector. y, z, w of the returned vector are
// the same as their respective components in _v.
OZZ_INLINE SimdFloat4 RcpEstX(_SimdFloat4 _v);
// Returns the estimated reciprocal of the x component of _v, where
// approximation is improved with one more new Newton-Raphson step. y, z, w of
// the returned vector are undefined.
OZZ_INLINE SimdFloat4 RcpEstXNR(_SimdFloat4 _v);
// Returns the per component square root of _v.
OZZ_INLINE SimdFloat4 Sqrt(_SimdFloat4 _v);
// Returns the square root of the x component of _v and stores it in the x
// component of the returned vector. y, z, w of the returned vector are the
// same as their respective components in _v.
OZZ_INLINE SimdFloat4 SqrtX(_SimdFloat4 _v);
// Returns the per component estimated reciprocal square root of _v.
OZZ_INLINE SimdFloat4 RSqrtEst(_SimdFloat4 _v);
// Returns the per component estimated reciprocal square root of _v, where
// approximation is improved with one more new Newton-Raphson step.
OZZ_INLINE SimdFloat4 RSqrtEstNR(_SimdFloat4 _v);
// Returns the estimated reciprocal square root of the x component of _v and
// stores it in the x component of the returned vector. y, z, w of the returned
// vector are the same as their respective components in _v.
OZZ_INLINE SimdFloat4 RSqrtEstX(_SimdFloat4 _v);
// Returns the estimated reciprocal square root of the x component of _v, where
// approximation is improved with one more new Newton-Raphson step. y, z, w of
// the returned vector are undefined.
OZZ_INLINE SimdFloat4 RSqrtEstXNR(_SimdFloat4 _v);
// Returns the per element absolute value of _v.
OZZ_INLINE SimdFloat4 Abs(_SimdFloat4 _v);
// Returns the sign bit of _v.
OZZ_INLINE SimdInt4 Sign(_SimdFloat4 _v);
// Returns the per component minimum of _a and _b.
OZZ_INLINE SimdFloat4 Min(_SimdFloat4 _a, _SimdFloat4 _b);
// Returns the per component maximum of _a and _b.
OZZ_INLINE SimdFloat4 Max(_SimdFloat4 _a, _SimdFloat4 _b);
// Returns the per component minimum of _v and 0.
OZZ_INLINE SimdFloat4 Min(_SimdFloat4 _v);
// Returns the per component maximum of _v and 0.
OZZ_INLINE SimdFloat4 Max0(_SimdFloat4 _v);
// Clamps each element of _x between _a and _b.
// Result is unknown if _a is not less or equal to _b.
OZZ_INLINE SimdFloat4 Clamp(_SimdFloat4 _a, _SimdFloat4 _v, _SimdFloat4 _b);
// Computes the length of the components x and y of _v, and stores it in the x
// component of the returned vector. y, z, w of the returned vector are
// undefined.
OZZ_INLINE SimdFloat4 Length2(_SimdFloat4 _v);
// Computes the length of the components x, y and z of _v, and stores it in the
// x component of the returned vector. undefined.
OZZ_INLINE SimdFloat4 Length3(_SimdFloat4 _v);
// Computes the length of _v, and stores it in the x component of the returned
// vector. y, z, w of the returned vector are undefined.
OZZ_INLINE SimdFloat4 Length4(_SimdFloat4 _v);
// Computes the square length of the components x and y of _v, and stores it
// in the x component of the returned vector. y, z, w of the returned vector are
// undefined.
OZZ_INLINE SimdFloat4 Length2Sqr(_SimdFloat4 _v);
// Computes the square length of the components x, y and z of _v, and stores it
// in the x component of the returned vector. y, z, w of the returned vector are
// undefined.
OZZ_INLINE SimdFloat4 Length3Sqr(_SimdFloat4 _v);
// Computes the square length of the components x, y, z and w of _v, and stores
// it in the x component of the returned vector. y, z, w of the returned vector
// undefined.
OZZ_INLINE SimdFloat4 Length4Sqr(_SimdFloat4 _v);
// Returns the normalized vector of the components x and y of _v, and stores
// it in the x and y components of the returned vector. z and w of the returned
// vector are the same as their respective components in _v.
OZZ_INLINE SimdFloat4 Normalize2(_SimdFloat4 _v);
// Returns the normalized vector of the components x, y and z of _v, and stores
// it in the x, y and z components of the returned vector. w of the returned
// vector is the same as its respective component in _v.
OZZ_INLINE SimdFloat4 Normalize3(_SimdFloat4 _v);
// Returns the normalized vector _v.
OZZ_INLINE SimdFloat4 Normalize4(_SimdFloat4 _v);
// Returns the estimated normalized vector of the components x and y of _v, and
// stores it in the x and y components of the returned vector. z and w of the
// returned vector are the same as their respective components in _v.
OZZ_INLINE SimdFloat4 NormalizeEst2(_SimdFloat4 _v);
// Returns the estimated normalized vector of the components x, y and z of _v,
// and stores it in the x, y and z components of the returned vector. w of the
// returned vector is the same as its respective component in _v.
OZZ_INLINE SimdFloat4 NormalizeEst3(_SimdFloat4 _v);
// Returns the estimated normalized vector _v.
OZZ_INLINE SimdFloat4 NormalizeEst4(_SimdFloat4 _v);
// Tests if the components x and y of _v forms a normalized vector.
// Returns the result in the x component of the returned vector. y, z and w are
// set to 0.
OZZ_INLINE SimdInt4 IsNormalized2(_SimdFloat4 _v);
// Tests if the components x, y and z of _v forms a normalized vector.
// Returns the result in the x component of the returned vector. y, z and w are
// set to 0.
OZZ_INLINE SimdInt4 IsNormalized3(_SimdFloat4 _v);
// Tests if the _v is a normalized vector.
// Returns the result in the x component of the returned vector. y, z and w are
// set to 0.
OZZ_INLINE SimdInt4 IsNormalized4(_SimdFloat4 _v);
// Tests if the components x and y of _v forms a normalized vector.
// Uses the estimated normalization coefficient, that matches estimated math
// functions (RecpEst, MormalizeEst...).
// Returns the result in the x component of the returned vector. y, z and w are
// set to 0.
OZZ_INLINE SimdInt4 IsNormalizedEst2(_SimdFloat4 _v);
// Tests if the components x, y and z of _v forms a normalized vector.
// Uses the estimated normalization coefficient, that matches estimated math
// functions (RecpEst, MormalizeEst...).
// Returns the result in the x component of the returned vector. y, z and w are
// set to 0.
OZZ_INLINE SimdInt4 IsNormalizedEst3(_SimdFloat4 _v);
// Tests if the _v is a normalized vector.
// Uses the estimated normalization coefficient, that matches estimated math
// functions (RecpEst, MormalizeEst...).
// Returns the result in the x component of the returned vector. y, z and w are
// set to 0.
OZZ_INLINE SimdInt4 IsNormalizedEst4(_SimdFloat4 _v);
// Returns the normalized vector of the components x and y of _v if it is
// normalizable, otherwise returns _safe. z and w of the returned vector are
// the same as their respective components in _v.
OZZ_INLINE SimdFloat4 NormalizeSafe2(_SimdFloat4 _v, _SimdFloat4 _safe);
// Returns the normalized vector of the components x, y, z and w of _v if it is
// normalizable, otherwise returns _safe. w of the returned vector is the same
// as its respective components in _v.
OZZ_INLINE SimdFloat4 NormalizeSafe3(_SimdFloat4 _v, _SimdFloat4 _safe);
// Returns the normalized vector _v if it is normalizable, otherwise returns
// _safe.
OZZ_INLINE SimdFloat4 NormalizeSafe4(_SimdFloat4 _v, _SimdFloat4 _safe);
// Returns the estimated normalized vector of the components x and y of _v if it
// is normalizable, otherwise returns _safe. z and w of the returned vector are
// the same as their respective components in _v.
OZZ_INLINE SimdFloat4 NormalizeSafeEst2(_SimdFloat4 _v, _SimdFloat4 _safe);
// Returns the estimated normalized vector of the components x, y, z and w of _v
// if it is normalizable, otherwise returns _safe. w of the returned vector is
// the same as its respective components in _v.
OZZ_INLINE SimdFloat4 NormalizeSafeEst3(_SimdFloat4 _v, _SimdFloat4 _safe);
// Returns the estimated normalized vector _v if it is normalizable, otherwise
// returns _safe.
OZZ_INLINE SimdFloat4 NormalizeSafeEst4(_SimdFloat4 _v, _SimdFloat4 _safe);
// Computes the per element linear interpolation of _a and _b, where _alpha is
// not bound to range [0,1].
OZZ_INLINE SimdFloat4 Lerp(_SimdFloat4 _a, _SimdFloat4 _b, _SimdFloat4 _alpha);
// Computes the per element cosine of _v.
OZZ_INLINE SimdFloat4 Cos(_SimdFloat4 _v);
// Computes the cosine of the x component of _v and stores it in the x
// component of the returned vector. y, z and w of the returned vector are the
// same as their respective components in _v.
OZZ_INLINE SimdFloat4 CosX(_SimdFloat4 _v);
// Computes the per element arccosine of _v.
OZZ_INLINE SimdFloat4 ACos(_SimdFloat4 _v);
// Computes the arccosine of the x component of _v and stores it in the x
// component of the returned vector. y, z and w of the returned vector are the
// same as their respective components in _v.
OZZ_INLINE SimdFloat4 ACosX(_SimdFloat4 _v);
// Computes the per element sines of _v.
OZZ_INLINE SimdFloat4 Sin(_SimdFloat4 _v);
// Computes the sines of the x component of _v and stores it in the x
// component of the returned vector. y, z and w of the returned vector are the
// same as their respective components in _v.
OZZ_INLINE SimdFloat4 SinX(_SimdFloat4 _v);
// Computes the per element arcsine of _v.
OZZ_INLINE SimdFloat4 ASin(_SimdFloat4 _v);
// Computes the arcsine of the x component of _v and stores it in the x
// component of the returned vector. y, z and w of the returned vector are the
// same as their respective components in _v.
OZZ_INLINE SimdFloat4 ASinX(_SimdFloat4 _v);
// Computes the per element tangent of _v.
OZZ_INLINE SimdFloat4 Tan(_SimdFloat4 _v);
// Computes the tangent of the x component of _v and stores it in the x
// component of the returned vector. y, z and w of the returned vector are the
// same as their respective components in _v.
OZZ_INLINE SimdFloat4 TanX(_SimdFloat4 _v);
// Computes the per element arctangent of _v.
OZZ_INLINE SimdFloat4 ATan(_SimdFloat4 _v);
// Computes the arctangent of the x component of _v and stores it in the x
// component of the returned vector. y, z and w of the returned vector are the
// same as their respective components in _v.
OZZ_INLINE SimdFloat4 ATanX(_SimdFloat4 _v);
// Returns boolean selection of vectors _true and _false according to condition
// _b. All bits a each component of _b must have the same value (O or
// 0xffffffff) to ensure portability.
OZZ_INLINE SimdFloat4 Select(_SimdInt4 _b, _SimdFloat4 _true,
_SimdFloat4 _false);
// Per element "equal" comparison of _a and _b.
OZZ_INLINE SimdInt4 CmpEq(_SimdFloat4 _a, _SimdFloat4 _b);
// Per element "not equal" comparison of _a and _b.
OZZ_INLINE SimdInt4 CmpNe(_SimdFloat4 _a, _SimdFloat4 _b);
// Per element "less than" comparison of _a and _b.
OZZ_INLINE SimdInt4 CmpLt(_SimdFloat4 _a, _SimdFloat4 _b);
// Per element "less than or equal" comparison of _a and _b.
OZZ_INLINE SimdInt4 CmpLe(_SimdFloat4 _a, _SimdFloat4 _b);
// Per element "greater than" comparison of _a and _b.
OZZ_INLINE SimdInt4 CmpGt(_SimdFloat4 _a, _SimdFloat4 _b);
// Per element "greater than or equal" comparison of _a and _b.
OZZ_INLINE SimdInt4 CmpGe(_SimdFloat4 _a, _SimdFloat4 _b);
// Returns per element binary and operation of _a and _b.
// _v[0...127] = _a[0...127] & _b[0...127]
OZZ_INLINE SimdFloat4 And(_SimdFloat4 _a, _SimdFloat4 _b);
// Returns per element binary or operation of _a and _b.
// _v[0...127] = _a[0...127] | _b[0...127]
OZZ_INLINE SimdFloat4 Or(_SimdFloat4 _a, _SimdFloat4 _b);
// Returns per element binary logical xor operation of _a and _b.
// _v[0...127] = _a[0...127] ^ _b[0...127]
OZZ_INLINE SimdFloat4 Xor(_SimdFloat4 _a, _SimdFloat4 _b);
// Returns per element binary and operation of _a and _b.
// _v[0...127] = _a[0...127] & _b[0...127]
OZZ_INLINE SimdFloat4 And(_SimdFloat4 _a, _SimdInt4 _b);
// Returns per element binary and operation of _a and ~_b.
// _v[0...127] = _a[0...127] & ~_b[0...127]
OZZ_INLINE SimdFloat4 AndNot(_SimdFloat4 _a, _SimdInt4 _b);
// Returns per element binary or operation of _a and _b.
// _v[0...127] = _a[0...127] | _b[0...127]
OZZ_INLINE SimdFloat4 Or(_SimdFloat4 _a, _SimdInt4 _b);
// Returns per element binary logical xor operation of _a and _b.
// _v[0...127] = _a[0...127] ^ _b[0...127]
OZZ_INLINE SimdFloat4 Xor(_SimdFloat4 _a, _SimdInt4 _b);
namespace simd_int4 {
// Returns a SimdInt4 vector with all components set to 0.
OZZ_INLINE SimdInt4 zero();
// Returns a SimdInt4 vector with all components set to 1.
OZZ_INLINE SimdInt4 one();
// Returns a SimdInt4 vector with the x component set to 1 and all the others
// to 0.
OZZ_INLINE SimdInt4 x_axis();
// Returns a SimdInt4 vector with the y component set to 1 and all the others
// to 0.
OZZ_INLINE SimdInt4 y_axis();
// Returns a SimdInt4 vector with the z component set to 1 and all the others
// to 0.
OZZ_INLINE SimdInt4 z_axis();
// Returns a SimdInt4 vector with the w component set to 1 and all the others
// to 0.
OZZ_INLINE SimdInt4 w_axis();
// Returns a SimdInt4 vector with all components set to true (0xffffffff).
OZZ_INLINE SimdInt4 all_true();
// Returns a SimdInt4 vector with all components set to false (0).
OZZ_INLINE SimdInt4 all_false();
// Returns a SimdInt4 vector with sign bits set to 1.
OZZ_INLINE SimdInt4 mask_sign();
// Returns a SimdInt4 vector with all bits set to 1 except sign.
OZZ_INLINE SimdInt4 mask_not_sign();
// Returns a SimdInt4 vector with sign bits of x, y and z components set to 1.
OZZ_INLINE SimdInt4 mask_sign_xyz();
// Returns a SimdInt4 vector with sign bits of w component set to 1.
OZZ_INLINE SimdInt4 mask_sign_w();
// Returns a SimdInt4 vector with all bits set to 1.
OZZ_INLINE SimdInt4 mask_ffff();
// Returns a SimdInt4 vector with all bits set to 0.
OZZ_INLINE SimdInt4 mask_0000();
// Returns a SimdInt4 vector with all the bits of the x, y, z components set to
// 1, while z is set to 0.
OZZ_INLINE SimdInt4 mask_fff0();
// Returns a SimdInt4 vector with all the bits of the x component set to 1,
// while the others are set to 0.
OZZ_INLINE SimdInt4 mask_f000();
// Returns a SimdInt4 vector with all the bits of the y component set to 1,
// while the others are set to 0.
OZZ_INLINE SimdInt4 mask_0f00();
// Returns a SimdInt4 vector with all the bits of the z component set to 1,
// while the others are set to 0.
OZZ_INLINE SimdInt4 mask_00f0();
// Returns a SimdInt4 vector with all the bits of the w component set to 1,
// while the others are set to 0.
OZZ_INLINE SimdInt4 mask_000f();
// Loads _x, _y, _z, _w to the returned vector.
// r.x = _x
// r.y = _y
// r.z = _z
// r.w = _w
OZZ_INLINE SimdInt4 Load(int _x, int _y, int _z, int _w);
// Loads _x, _y, _z, _w to the returned vector using the following conversion
// rule.
// r.x = _x ? 0xffffffff:0
// r.y = _y ? 0xffffffff:0
// r.z = _z ? 0xffffffff:0
// r.w = _w ? 0xffffffff:0
OZZ_INLINE SimdInt4 Load(bool _x, bool _y, bool _z, bool _w);
// Loads _x to the x component of the returned vector using the following
// conversion rule, and sets y, z and w to 0.
// r.x = _x ? 0xffffffff:0
// r.y = 0
// r.z = 0
// r.w = 0
OZZ_INLINE SimdInt4 LoadX(bool _x);
// Loads _x to the all the components of the returned vector using the following
// conversion rule.
// r.x = _x ? 0xffffffff:0
// r.y = _x ? 0xffffffff:0
// r.z = _x ? 0xffffffff:0
// r.w = _x ? 0xffffffff:0
OZZ_INLINE SimdInt4 Load1(bool _x);
// Loads the 4 values of _f to the returned vector.
// _i must be aligned to 16 bytes.
// r.x = _i[0]
// r.y = _i[1]
// r.z = _i[2]
// r.w = _i[3]
OZZ_INLINE SimdInt4 LoadPtr(const int* _i);
// Loads _i[0] to the x component of the returned vector, and sets y, z and w
// to 0.
// _i must be aligned to 16 bytes.
// r.x = _i[0]
// r.y = 0
// r.z = 0
// r.w = 0
OZZ_INLINE SimdInt4 LoadXPtr(const int* _i);
// Loads _i[0] to all the components of the returned vector.
// _i must be aligned to 16 bytes.
// r.x = _i[0]
// r.y = _i[0]
// r.z = _i[0]
// r.w = _i[0]
OZZ_INLINE SimdInt4 Load1Ptr(const int* _i);
// Loads the 2 first value of _i to the x and y components of the returned
// vector. The remaining components are set to 0.
// _f must be aligned to 4 bytes.
// r.x = _i[0]
// r.y = _i[1]
// r.z = 0
// r.w = 0
OZZ_INLINE SimdInt4 Load2Ptr(const int* _i);
// Loads the 3 first value of _i to the x, y and z components of the returned
// vector. The remaining components are set to 0.
// _f must be aligned to 16 bytes.
// r.x = _i[0]
// r.y = _i[1]
// r.z = _i[2]
// r.w = 0
OZZ_INLINE SimdInt4 Load3Ptr(const int* _i);
// Loads the 4 values of _f to the returned vector.
// _i must be aligned to 16 bytes.
// r.x = _i[0]
// r.y = _i[1]
// r.z = _i[2]
// r.w = _i[3]
OZZ_INLINE SimdInt4 LoadPtrU(const int* _i);
// Loads _i[0] to the x component of the returned vector, and sets y, z and w
// to 0.
// _f must be aligned to 4 bytes.
// r.x = _i[0]
// r.y = 0
// r.z = 0
// r.w = 0
OZZ_INLINE SimdInt4 LoadXPtrU(const int* _i);
// Loads the 4 values of _i to the returned vector.
// _i must be aligned to 4 bytes.
// r.x = _i[0]
// r.y = _i[0]
// r.z = _i[0]
// r.w = _i[0]
OZZ_INLINE SimdInt4 Load1PtrU(const int* _i);
// Loads the 2 first value of _i to the x and y components of the returned
// vector. The remaining components are set to 0.
// _f must be aligned to 4 bytes.
// r.x = _i[0]
// r.y = _i[1]
// r.z = 0
// r.w = 0
OZZ_INLINE SimdInt4 Load2PtrU(const int* _i);
// Loads the 3 first value of _i to the x, y and z components of the returned
// vector. The remaining components are set to 0.
// _f must be aligned to 4 bytes.
// r.x = _i[0]
// r.y = _i[1]
// r.z = _i[2]
// r.w = 0
OZZ_INLINE SimdInt4 Load3PtrU(const int* _i);
// Convert from float to integer by rounding the nearest value.
OZZ_INLINE SimdInt4 FromFloatRound(_SimdFloat4 _f);
// Convert from float to integer by truncating.
OZZ_INLINE SimdInt4 FromFloatTrunc(_SimdFloat4 _f);
} // namespace simd_int4
// Returns the x component of _v as an integer.
OZZ_INLINE int GetX(_SimdInt4 _v);
// Returns the y component of _v as a integer.
OZZ_INLINE int GetY(_SimdInt4 _v);
// Returns the z component of _v as a integer.
OZZ_INLINE int GetZ(_SimdInt4 _v);
// Returns the w component of _v as a integer.
OZZ_INLINE int GetW(_SimdInt4 _v);
// Returns _v with the x component set to x component of _i.
OZZ_INLINE SimdInt4 SetX(_SimdInt4 _v, _SimdInt4 _i);
// Returns _v with the y component set to x component of _i.
OZZ_INLINE SimdInt4 SetY(_SimdInt4 _v, _SimdInt4 _i);
// Returns _v with the z component set to x component of _i.
OZZ_INLINE SimdInt4 SetZ(_SimdInt4 _v, _SimdInt4 _i);
// Returns _v with the w component set to x component of _i.
OZZ_INLINE SimdInt4 SetW(_SimdInt4 _v, _SimdInt4 _i);
// Returns _v with the _ith component set to _i.
// _i must be in range [0,3]
OZZ_INLINE SimdInt4 SetI(_SimdInt4 _v, _SimdInt4 _i, int _ith);
// Stores the 4 components of _v to the four first integers of _i.
// _i must be aligned to 16 bytes.
// _i[0] = _v.x
// _i[1] = _v.y
// _i[2] = _v.z
// _i[3] = _v.w
OZZ_INLINE void StorePtr(_SimdInt4 _v, int* _i);
// Stores the x component of _v to the first integers of _i.
// _i must be aligned to 16 bytes.
// _i[0] = _v.x
OZZ_INLINE void Store1Ptr(_SimdInt4 _v, int* _i);
// Stores x and y components of _v to the two first integers of _i.
// _i must be aligned to 16 bytes.
// _i[0] = _v.x
// _i[1] = _v.y
OZZ_INLINE void Store2Ptr(_SimdInt4 _v, int* _i);
// Stores x, y and z components of _v to the three first integers of _i.
// _i must be aligned to 16 bytes.
// _i[0] = _v.x
// _i[1] = _v.y
// _i[2] = _v.z
OZZ_INLINE void Store3Ptr(_SimdInt4 _v, int* _i);
// Stores the 4 components of _v to the four first integers of _i.
// _i must be aligned to 4 bytes.
// _i[0] = _v.x
// _i[1] = _v.y
// _i[2] = _v.z
// _i[3] = _v.w
OZZ_INLINE void StorePtrU(_SimdInt4 _v, int* _i);
// Stores the x component of _v to the first float of _i.
// _i must be aligned to 4 bytes.
// _i[0] = _v.x
OZZ_INLINE void Store1PtrU(_SimdInt4 _v, int* _i);
// Stores x and y components of _v to the two first integers of _i.
// _i must be aligned to 4 bytes.
// _i[0] = _v.x
// _i[1] = _v.y
OZZ_INLINE void Store2PtrU(_SimdInt4 _v, int* _i);
// Stores x, y and z components of _v to the three first integers of _i.
// _i must be aligned to 4 bytes.
// _i[0] = _v.x
// _i[1] = _v.y
// _i[2] = _v.z
OZZ_INLINE void Store3PtrU(_SimdInt4 _v, int* _i);
// Replicates x of _a to all the components of the returned vector.
OZZ_INLINE SimdInt4 SplatX(_SimdInt4 _v);
// Replicates y of _a to all the components of the returned vector.
OZZ_INLINE SimdInt4 SplatY(_SimdInt4 _v);
// Replicates z of _a to all the components of the returned vector.
OZZ_INLINE SimdInt4 SplatZ(_SimdInt4 _v);
// Replicates w of _a to all the components of the returned vector.
OZZ_INLINE SimdInt4 SplatW(_SimdInt4 _v);
// Swizzle x, y, z and w components based on compile time arguments _X, _Y, _Z
// and _W. Arguments can vary from 0 (x), to 3 (w).
template <size_t _X, size_t _Y, size_t _Z, size_t _W>
OZZ_INLINE SimdInt4 Swizzle(_SimdInt4 _v);
// Creates a 4-bit mask from the most significant bits of each component of _v.
// i := sign(a3)<<3 | sign(a2)<<2 | sign(a1)<<1 | sign(a0)
OZZ_INLINE int MoveMask(_SimdInt4 _v);
// Returns true if all the components of _v are not 0.
OZZ_INLINE bool AreAllTrue(_SimdInt4 _v);
// Returns true if x, y and z components of _v are not 0.
OZZ_INLINE bool AreAllTrue3(_SimdInt4 _v);
// Returns true if x and y components of _v are not 0.
OZZ_INLINE bool AreAllTrue2(_SimdInt4 _v);
// Returns true if x component of _v is not 0.
OZZ_INLINE bool AreAllTrue1(_SimdInt4 _v);
// Returns true if all the components of _v are 0.
OZZ_INLINE bool AreAllFalse(_SimdInt4 _v);
// Returns true if x, y and z components of _v are 0.
OZZ_INLINE bool AreAllFalse3(_SimdInt4 _v);
// Returns true if x and y components of _v are 0.
OZZ_INLINE bool AreAllFalse2(_SimdInt4 _v);
// Returns true if x component of _v is 0.
OZZ_INLINE bool AreAllFalse1(_SimdInt4 _v);
// Computes the (horizontal) addition of x and y components of _v. The result is
// stored in the x component of the returned value. y, z, w of the returned
// vector are the same as their respective components in _v.
// r.x = _a.x + _a.y
// r.y = _a.y
// r.z = _a.z
// r.w = _a.w
OZZ_INLINE SimdInt4 HAdd2(_SimdInt4 _v);
// Computes the (horizontal) addition of x, y and z components of _v. The result
// is stored in the x component of the returned value. y, z, w of the returned
// vector are the same as their respective components in _v.
// r.x = _a.x + _a.y + _a.z
// r.y = _a.y
// r.z = _a.z
// r.w = _a.w
OZZ_INLINE SimdInt4 HAdd3(_SimdInt4 _v);
// Computes the (horizontal) addition of x and y components of _v. The result is
// stored in the x component of the returned value. y, z, w of the returned
// vector are the same as their respective components in _v.
// r.x = _a.x + _a.y + _a.z + _a.w
// r.y = _a.y
// r.z = _a.z
// r.w = _a.w
OZZ_INLINE SimdInt4 HAdd4(_SimdInt4 _v);
// Returns the per element absolute value of _v.
OZZ_INLINE SimdInt4 Abs(_SimdInt4 _v);
// Returns the sign bit of _v.
OZZ_INLINE SimdInt4 Sign(_SimdInt4 _v);
// Returns the per component minimum of _a and _b.
OZZ_INLINE SimdInt4 Min(_SimdInt4 _a, _SimdInt4 _b);
// Returns the per component maximum of _a and _b.
OZZ_INLINE SimdInt4 Max(_SimdInt4 _a, _SimdInt4 _b);
// Returns the per component minimum of _v and 0.
OZZ_INLINE SimdInt4 Min0(_SimdInt4 _v);