本文主要是介绍汇编——SSE对齐(一. 未对齐情况),希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
SIMD是(Single Instrument Multi Data),MMX实现了SIMD;SSE是(Streaming SIMD Extension),它取代了MMX;后来AVX(Advanced Vector Extension,高级向量扩展)对SSE进行了扩展。如下代码展示了SSE处理未对齐内存的情况:
; sse_unaligned.asm
extern printf
section .dataspvector1 dd 1.1dd 2.2dd 3.3dd 4.4spvector2 dd 1.1dd 2.2dd 2.2dd 3.3dpvector1 dq 1.1dq 2.2dpvector2 dq 3.3dq 4.4fmt1 db "Single Precision Vector 1: %f, %f, %f, %f", 10, 0fmt2 db "Single Precision Vector 2: %f, %f, %f, %f", 10, 0fmt3 db "Sum of Single Precision Vector 1 and Vector 2: %f, %f, %f %f", 10, 0fmt4 db "Doule Precision Vector 1: %f, %f", 10, 0fmt5 db "Doule Precision Vector 2: %f, %f", 10, 0fmt6 db "Sum of Double Precision Vector 1 and Vector 2: %f, %f", 10, 0section .bssspvector_res resd 4dpvector_res resq 4
section .textglobal main
main:
push rbp
mov rbp, rspmov rsi, spvector1mov rdi, fmt1call printspfpmov rsi, spvector2mov rdi, fmt2call printspfpmovups xmm0, [spvector1]movups xmm1, [spvector2]addps xmm0, xmm1movups [spvector_res], xmm0mov rsi, spvector_resmov rdi, fmt3call printspfpmov rsi, dpvector1mov rdi, fmt4call printdpfpmov rsi, dpvector2mov rdi, fmt5call printdpfpmovupd xmm0, [dpvector1]movupd xmm1, [dpvector2]addpd xmm0, xmm1movupd [dpvector_res], xmm0mov rsi, dpvector_resmov rdi, fmt6call printdpfp
leave
retprintspfp:
push rbp
mov rbp, rspmovss xmm0, [rsi]cvtss2sd xmm0, xmm0movss xmm1, [rsi+4]cvtss2sd xmm1, xmm1movss xmm2, [rsi+8]cvtss2sd xmm2, xmm2movss xmm3, [rsi+12]cvtss2sd xmm3, xmm3mov rax, 4call printf
leave
retprintdpfp:
push rbp
mov rbp, rspmovsd xmm0, [rsi]movsd xmm1, [rsi+8]mov rax, 2call printf
leave
ret
需要注意的几个指令如下:
movups
: 移动未对齐的打包单精度;(u
:未对齐unaligned
;p
:打包的packed
;s
:单精度single
;)
addps
: 打包单精度相加;
movss
: 移动标量单精度;(s
:标量scalar
;s
:单精度single
)
cvtss2sd
: 将标量单精度转换为标量双精度;(d
:双精度double
)
这篇关于汇编——SSE对齐(一. 未对齐情况)的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!