Upon checking the generated assembly, I'm noticing that it's executing against AVX2 but not using YMM registers at all. Would you know why?
I've compiled it using -O2.
AddVec4sProper_avx2:
00007FF6F9E167E0 test r8d,r8d
00007FF6F9E167E3 jle AddVec4sProper_avx2+0B9h (07FF6F9E16899h)
00007FF6F9E167E9 lea eax,[r8-1]
00007FF6F9E167ED mov r9d,r8d
00007FF6F9E167F0 and r9d,3
00007FF6F9E167F4 cmp eax,3
00007FF6F9E167F7 jae AddVec4sProper_avx2+26h (07FF6F9E16806h)
00007FF6F9E167F9 xor r10d,r10d
00007FF6F9E167FC test r9d,r9d
00007FF6F9E167FF jne AddVec4sProper_avx2+90h (07FF6F9E16870h)
00007FF6F9E16801 jmp AddVec4sProper_avx2+0B9h (07FF6F9E16899h)
00007FF6F9E16806 sub r8d,r9d
00007FF6F9E16809 mov eax,30h
00007FF6F9E1680E xor r10d,r10d
00007FF6F9E16811 vmovss xmm0,dword ptr [__real@3f800000 (07FF6FA0EAF20h)]
00007FF6F9E16819 nop dword ptr [rax]
00007FF6F9E16820 vmovaps xmm1,xmmword ptr [rdx+rax-30h]
00007FF6F9E16826 vaddss xmm1,xmm1,xmm0
00007FF6F9E1682A vmovaps xmmword ptr [rcx+rax-30h],xmm1
00007FF6F9E16830 vmovaps xmm1,xmmword ptr [rdx+rax-20h]
00007FF6F9E16836 vaddss xmm1,xmm1,xmm0
00007FF6F9E1683A vmovaps xmmword ptr [rcx+rax-20h],xmm1
00007FF6F9E16840 vmovaps xmm1,xmmword ptr [rdx+rax-10h]
00007FF6F9E16846 vaddss xmm1,xmm1,xmm0
00007FF6F9E1684A vmovaps xmmword ptr [rcx+rax-10h],xmm1
00007FF6F9E16850 vmovaps xmm1,xmmword ptr [rdx+rax]
00007FF6F9E16855 vaddss xmm1,xmm1,xmm0
00007FF6F9E16859 vmovaps xmmword ptr [rcx+rax],xmm1
00007FF6F9E1685E add r10,4
00007FF6F9E16862 add rax,40h
00007FF6F9E16866 cmp r8d,r10d
00007FF6F9E16869 jne AddVec4sProper_avx2+40h (07FF6F9E16820h)
00007FF6F9E1686B test r9d,r9d
00007FF6F9E1686E je AddVec4sProper_avx2+0B9h (07FF6F9E16899h)
00007FF6F9E16870 shl r10,4
00007FF6F9E16874 neg r9d
00007FF6F9E16877 vmovss xmm0,dword ptr [__real@3f800000 (07FF6FA0EAF20h)]
00007FF6F9E1687F nop
00007FF6F9E16880 vmovaps xmm1,xmmword ptr [rdx+r10]
00007FF6F9E16886 vaddss xmm1,xmm1,xmm0
00007FF6F9E1688A vmovaps xmmword ptr [rcx+r10],xmm1
00007FF6F9E16890 add r10,10h
00007FF6F9E16894 inc r9d
00007FF6F9E16897 jne AddVec4sProper_avx2+0A0h (07FF6F9E16880h)
00007FF6F9E16899 ret
00007FF6F9E1689A nop word ptr [rax+rax]