Algorithm:
1. populate value in XMM registers. Since maximum value of this function is 10 we need three registers.
movd %eax, %xmm0 // xmm0 = packed_dword(0, 0, 0, x) pshufd $0, %xmm0, %xmm0 \n" // xmm0 = packed_dword(x, x, x, x) movapd %xmm0, %xmm1 movapd %xmm0, %xmm22. compare these numbers with sequence of powers of 10.
// powers_a = packed_dword(10^1 - 1, 10^2 - 1, 10^3 - 1, 10^4 - 1) // powers_c = packed_dword(10^5 - 1, 10^6 - 1, 10^7 - 1, 10^8 - 1) // powers_c = packed_dword(10^9 - 1, 0, 0, 0) pcmpgtd powers_a, %xmm0 pcmpgtd powers_b, %xmm1 pcmpgtd powers_c, %xmm2result of comparisons are: 0 (false) or -1 (true), for example:
xmm0 = packed_dword(-1, -1, -1, -1) xmm1 = packed_dword( 0, 0, -1, -1) xmm2 = packed_dword( 0, 0, 0, 0)3. calculate sum of all dwords
psrld $31, %xmm0 // xmm0 = packed_dword( 1, 1, 1, 1) - convert -1 to 1 psubd %xmm1, %xmm0 // xmm0 = packed_dword( 1, 1, 2, 2) psubd %xmm2, %xmm0 // xmm0 = packed_dword( 1, 1, 2, 2) // convert packed_dword to packed_word pxor %xmm1, %xmm1 packssdw %xmm1, %xmm0 // xmm0 = packed_word(0, 0, 0, 0, 1, 1, 2, 2) // max value of word in xmm0 is 3, so higher // bytes are always zero psadbw %xmm1, %xmm0 // xmm0 = packded_qword(0, 6)4. save result, i.e. the lowest dword
movd %xmm0, %eax // eax = 6
Sample program is available.
Brak komentarzy:
Prześlij komentarz