- global main
- extern printf
- extern inicjuj_czas
- extern drukuj_czas
- size equ 16*1048576
- section .data
- zero: dq 0.0
- align 16
- one: dq 1.0, 1.0
- two: dq 2.0, 2.0
- align 32
- four: dq 4.0, 4.0, 4.0, 4.0
- init: dq 1.0, 2.0, 3.0, 4.0
- str: db '%lf %lf %lf',10,0
- section .bss
- align 32
- va: resq size
- vb: resq size
- vc: resq size
- section .text
- main:
- sub rsp,8
- call fill3
- call inicjuj_czas
- call vv3
- call drukuj_czas
- call print_result3
- add rsp,8
- ret
- vv1:
- mov r12,va
- mov r13,vb
- mov r14,vc
- mov r15d,size
- _petla:
- cmp r15d,0
- je _koniec
- movq xmm0,[r12]
- mulsd xmm0,xmm0
- movq xmm1,[r13]
- mulsd xmm1,xmm1
- addsd xmm1,xmm0
- sqrtsd xmm0,xmm1
- movq [r14],xmm0
- dec r15d
- add r12,8
- add r13,8
- add r14,8
- jmp _petla
- _koniec:
- ret
- ;do uzupelnienia
- ret
- vv2:
- mov r12,va
- mov r15d,size
- _petla2:
- cmp r15d,0
- je _koniec2
- movq xmm0,[r12]
- mulsd xmm0,xmm0
- movq xmm1,[r12+8]
- mulsd xmm1,xmm1
- addsd xmm1,xmm0
- sqrtsd xmm0,xmm1
- movq [r12+16],xmm0
- dec r15d
- add r12,24
- jmp _petla2
- _koniec2:
- ret
- ;do uzupelnienia
- ret
- vv3:
- mov r12,va
- mov r15d,size/2
- _petla3:
- cmp r15d,0
- je _koniec3
- movapd xmm0,[r12]
- mulpd xmm0,xmm0
- movapd xmm1,[r12+16]
- mulpd xmm1,xmm1
- addpd xmm1,xmm0
- sqrtpd xmm0,xmm1
- movapd [r12+32],xmm0
- dec r15d
- add r12,48
- jmp _petla3
- _koniec3:
- ret
- ;do uzupelnienia
- ret
- vv4:
- mov r15d,size/4
- mov rsi,va
- vv_loop4:
- vmovapd ymm0,[rsi]
- vmovapd ymm1,[rsi+32]
- vmulpd ymm0,ymm0
- vmulpd ymm1,ymm1
- vaddpd ymm1,ymm0
- vsqrtpd ymm0,ymm1
- add rsi,96
- vmovapd [rsi-32],ymm0
- dec r15d
- jne vv_loop4
- ret
- fill1:
- xor r15d,r15d
- movq xmm0,[zero]
- movq xmm1,[one]
- fill_loop1:
- addsd xmm0,xmm1
- movq [va+8*r15d],xmm0
- movq [vb+8*r15d],xmm0
- inc r15d
- cmp r15d,size
- jne fill_loop1
- ret
- fill2:
- mov r15d,size
- mov r13,va
- movq xmm0,[zero]
- movq xmm1,[one]
- fill_loop2:
- addsd xmm0,xmm1
- movq [r13],xmm0
- movq [r13+8],xmm0
- add r13,24
- dec r15d
- jne fill_loop2
- ret
- fill3:
- mov r15d,size/2
- mov r13,va
- movapd xmm0,[init]
- movapd xmm1,[two]
- fill_loop3:
- movapd [r13],xmm0
- movapd [r13+16],xmm0
- addpd xmm0,xmm1
- add r13,48
- dec r15d
- jne fill_loop3
- ret
- fill4:
- mov r15d,size/4
- mov r13,va
- vmovapd ymm0,[init]
- vmovapd ymm1,[four]
- fill_loop4:
- vmovapd [r13],ymm0
- vmovapd [r13+32],ymm0
- vaddpd ymm0,ymm1
- add r13,96
- dec r15d
- jne fill_loop4
- ret
- print_result1:
- xor r15d,r15d
- sub rsp,8
- print_loop1:
- mov rdi,str
- movq xmm0,[va+8*r15d]
- movq xmm1,[vb+8*r15d]
- movq xmm2,[vc+8*r15d]
- mov eax,3
- call printf
- inc r15d
- cmp r15d,size
- jne print_loop1
- add rsp,8
- ret
- print_result2:
- sub rsp,8
- mov r15d,size
- mov r13,va
- print_loop2:
- mov rdi,str
- movq xmm0,[r13]
- movq xmm1,[r13+8]
- movq xmm2,[r13+16]
- mov eax,3
- call printf
- add r13,24
- dec r15d
- jne print_loop2
- add rsp,8
- ret
- print_result3:
- sub rsp,8
- mov r15d,size/2
- mov r13,va
- print_loop3:
- xor r14d,r14d
- print_loop3a:
- mov rdi,str
- movq xmm0,[r13+r14]
- movq xmm1,[r13+r14+16]
- movq xmm2,[r13+r14+32]
- mov eax,3
- call printf
- add r14d,8
- cmp r14d,16
- jne print_loop3a
- add r13,48
- dec r15d
- jne print_loop3
- add rsp,8
- ret
- print_result4:
- sub rsp,8
- mov r15d,size/4
- mov r13,va
- print_loop4:
- xor r14d,r14d
- print_loop4a:
- mov rdi,str
- movq xmm0,[r13+r14]
- movq xmm1,[r13+r14+32]
- movq xmm2,[r13+r14+64]
- mov eax,3
- call printf
- add r14d,8
- cmp r14d,32
- jne print_loop4a
- add r13,96
- dec r15d
- jne print_loop4
- add rsp,8
- ret