提问人:francesco 提问时间:10/25/2019 最后编辑:francesco 更新时间:10/29/2019 访问量:430
-O1 改变浮点数学
-O1 alters floating point math
问:
以下程序在使用 或 编译时给出了数值上不同的结果。c++
-O0
-O1
#include <iostream>
#include <array>
#include <cmath>
#include <iomanip>
int main()
{
std::array<double, 2> v;
v.fill(0);
std::get<0>(v) = 0x1.5b4d3afe1f7d2p-1;
double theta = 0x1.1aef12f82caf9p+2;
double c = std::cos(theta);
double s = std::sin(theta);
double vi = c * std::get<0>(v) - s * std::get<1>(v);
double vj = s * std::get<0>(v) + c * std::get<1>(v);
std::get<0>(v) = vi;
std::get<1>(v) = vj;
std::cout << std::hexfloat;
for (const auto& x : v)
std::cout << x << " ";
std::cout << std::endl;
return 0;
}
我正在使用 9.2.0。我雇用确保以完全精确的方式打印出变量。gcc
std::hexfloat
double
编译方式
g++ -std=c++17 -Wall -pedantic -O0 -o test test.cpp
给出以下结果(粗体)
-0x1.8f4e436eb5371p-3 -0x1.4ca54aa5d4e1ep-1
跟
g++ -std=c++17 -Wall -pedantic -O1 -o test test.cpp
输出是
-0x1.8f4e436eb5372p-3 -0x1.4ca54aa5d4e1ep-1
启用的优化标志和我可以提取的优化标志之间的区别如下-O0
-O1
-Q --help=optimizers
$ g++ -O0 -Q --help=optimizers >optO0.txt
$ g++ -O1 -Q --help=optimizers >optO1.txt
$ diff optO0.txt optO1.txt|grep ^'>'
> -fbranch-count-reg [enabled]
> -fcombine-stack-adjustments [enabled]
> -fcompare-elim [enabled]
> -fcprop-registers [enabled]
> -fdefer-pop [enabled]
> -fforward-propagate [enabled]
> -fguess-branch-probability [enabled]
> -fif-conversion [enabled]
> -fif-conversion2 [enabled]
> -finline-functions-called-once [enabled]
> -fipa-profile [enabled]
> -fipa-pure-const [enabled]
> -fipa-reference [enabled]
> -fipa-reference-addressable [enabled]
> -fmove-loop-invariants [enabled]
> -fomit-frame-pointer [enabled]
> -freorder-blocks [enabled]
> -fshrink-wrap [enabled]
> -fsplit-wide-types [enabled]
> -fssa-phiopt [enabled]
> -ftree-bit-ccp [enabled]
> -ftree-builtin-call-dce [enabled]
> -ftree-ccp [enabled]
> -ftree-ch [enabled]
> -ftree-coalesce-vars [enabled]
> -ftree-copy-prop [enabled]
> -ftree-dce [enabled]
> -ftree-dominator-opts [enabled]
> -ftree-dse [enabled]
> -ftree-fre [enabled]
> -ftree-pta [enabled]
> -ftree-sink [enabled]
> -ftree-slsr [enabled]
> -ftree-sra [enabled]
> -ftree-ter [enabled]
根据此列表以及 的手册页,不会启用更改浮点数学的优化标志,例如 ,那么为什么输出会有所不同呢?gcc
-O1
-ffast-math
编辑:
使用此问题答案中建议的标志不会更改结果。-ffloat-store
C 库版本是
$ ldd --version
ldd (GNU libc) 2.30
该库是从 Arch Linux 的软件包安装的。建筑是x86_64。glibc 2.30-1
这是该案例的汇编程序转储(使用-O0
g++ -std=c++17 -Wall -pedantic -O0 -S -o test test.cpp
)
.file "test.cpp"
.text
.section .text._ZStanSt13_Ios_FmtflagsS_,"axG",@progbits,_ZStanSt13_Ios_FmtflagsS_,comdat
.weak _ZStanSt13_Ios_FmtflagsS_
.type _ZStanSt13_Ios_FmtflagsS_, @function
_ZStanSt13_Ios_FmtflagsS_:
.LFB1415:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl %edi, -4(%rbp)
movl %esi, -8(%rbp)
movl -4(%rbp), %eax
andl -8(%rbp), %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1415:
.size _ZStanSt13_Ios_FmtflagsS_, .-_ZStanSt13_Ios_FmtflagsS_
.section .text._ZStorSt13_Ios_FmtflagsS_,"axG",@progbits,_ZStorSt13_Ios_FmtflagsS_,comdat
.weak _ZStorSt13_Ios_FmtflagsS_
.type _ZStorSt13_Ios_FmtflagsS_, @function
_ZStorSt13_Ios_FmtflagsS_:
.LFB1416:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl %edi, -4(%rbp)
movl %esi, -8(%rbp)
movl -4(%rbp), %eax
orl -8(%rbp), %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1416:
.size _ZStorSt13_Ios_FmtflagsS_, .-_ZStorSt13_Ios_FmtflagsS_
.section .text._ZStcoSt13_Ios_Fmtflags,"axG",@progbits,_ZStcoSt13_Ios_Fmtflags,comdat
.weak _ZStcoSt13_Ios_Fmtflags
.type _ZStcoSt13_Ios_Fmtflags, @function
_ZStcoSt13_Ios_Fmtflags:
.LFB1418:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl %edi, -4(%rbp)
movl -4(%rbp), %eax
notl %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1418:
.size _ZStcoSt13_Ios_Fmtflags, .-_ZStcoSt13_Ios_Fmtflags
.section .text._ZStoRRSt13_Ios_FmtflagsS_,"axG",@progbits,_ZStoRRSt13_Ios_FmtflagsS_,comdat
.weak _ZStoRRSt13_Ios_FmtflagsS_
.type _ZStoRRSt13_Ios_FmtflagsS_, @function
_ZStoRRSt13_Ios_FmtflagsS_:
.LFB1419:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movq %rdi, -8(%rbp)
movl %esi, -12(%rbp)
movq -8(%rbp), %rax
movl (%rax), %eax
movl -12(%rbp), %edx
movl %edx, %esi
movl %eax, %edi
call _ZStorSt13_Ios_FmtflagsS_
movq -8(%rbp), %rdx
movl %eax, (%rdx)
movq -8(%rbp), %rax
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1419:
.size _ZStoRRSt13_Ios_FmtflagsS_, .-_ZStoRRSt13_Ios_FmtflagsS_
.section .text._ZStaNRSt13_Ios_FmtflagsS_,"axG",@progbits,_ZStaNRSt13_Ios_FmtflagsS_,comdat
.weak _ZStaNRSt13_Ios_FmtflagsS_
.type _ZStaNRSt13_Ios_FmtflagsS_, @function
_ZStaNRSt13_Ios_FmtflagsS_:
.LFB1420:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movq %rdi, -8(%rbp)
movl %esi, -12(%rbp)
movq -8(%rbp), %rax
movl (%rax), %eax
movl -12(%rbp), %edx
movl %edx, %esi
movl %eax, %edi
call _ZStanSt13_Ios_FmtflagsS_
movq -8(%rbp), %rdx
movl %eax, (%rdx)
movq -8(%rbp), %rax
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1420:
.size _ZStaNRSt13_Ios_FmtflagsS_, .-_ZStaNRSt13_Ios_FmtflagsS_
.section .text._ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_,"axG",@progbits,_ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_,comdat
.align 2
.weak _ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_
.type _ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_, @function
_ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_:
.LFB1449:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $32, %rsp
movq %rdi, -24(%rbp)
movl %esi, -28(%rbp)
movl %edx, -32(%rbp)
movq -24(%rbp), %rax
movl 24(%rax), %eax
movl %eax, -4(%rbp)
movl -32(%rbp), %eax
movl %eax, %edi
call _ZStcoSt13_Ios_Fmtflags
movl %eax, %edx
movq -24(%rbp), %rax
addq $24, %rax
movl %edx, %esi
movq %rax, %rdi
call _ZStaNRSt13_Ios_FmtflagsS_
movl -32(%rbp), %edx
movl -28(%rbp), %eax
movl %edx, %esi
movl %eax, %edi
call _ZStanSt13_Ios_FmtflagsS_
movl %eax, %edx
movq -24(%rbp), %rax
addq $24, %rax
movl %edx, %esi
movq %rax, %rdi
call _ZStoRRSt13_Ios_FmtflagsS_
movl -4(%rbp), %eax
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1449:
.size _ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_, .-_ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_
.section .text._ZSt8hexfloatRSt8ios_base,"axG",@progbits,_ZSt8hexfloatRSt8ios_base,comdat
.weak _ZSt8hexfloatRSt8ios_base
.type _ZSt8hexfloatRSt8ios_base, @function
_ZSt8hexfloatRSt8ios_base:
.LFB1481:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movq %rdi, -8(%rbp)
movl $256, %esi
movl $4, %edi
call _ZStorSt13_Ios_FmtflagsS_
movl %eax, %ecx
movq -8(%rbp), %rax
movl $260, %edx
movl %ecx, %esi
movq %rax, %rdi
call _ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_
movq -8(%rbp), %rax
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1481:
.size _ZSt8hexfloatRSt8ios_base, .-_ZSt8hexfloatRSt8ios_base
.local _ZStL8__ioinit
.comm _ZStL8__ioinit,1,1
.section .rodata
.LC3:
.string " "
.text
.globl main
.type main, @function
main:
.LFB2816:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
addq $-128, %rsp
movq %fs:40, %rax
movq %rax, -8(%rbp)
xorl %eax, %eax
pxor %xmm0, %xmm0
movsd %xmm0, -112(%rbp)
leaq -112(%rbp), %rdx
leaq -32(%rbp), %rax
movq %rdx, %rsi
movq %rax, %rdi
call _ZNSt5arrayIdLm2EE4fillERKd
leaq -32(%rbp), %rax
movq %rax, %rdi
call _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE
movsd .LC1(%rip), %xmm0
movsd %xmm0, (%rax)
movsd .LC2(%rip), %xmm0
movsd %xmm0, -96(%rbp)
movq -96(%rbp), %rax
movq %rax, %xmm0
call cos@PLT
movq %xmm0, %rax
movq %rax, -88(%rbp)
movq -96(%rbp), %rax
movq %rax, %xmm0
call sin@PLT
movq %xmm0, %rax
movq %rax, -80(%rbp)
leaq -32(%rbp), %rax
movq %rax, %rdi
call _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE
movsd (%rax), %xmm0
mulsd -88(%rbp), %xmm0
movsd %xmm0, -120(%rbp)
leaq -32(%rbp), %rax
movq %rax, %rdi
call _ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE
movsd (%rax), %xmm0
mulsd -80(%rbp), %xmm0
movsd -120(%rbp), %xmm1
subsd %xmm0, %xmm1
movapd %xmm1, %xmm0
movsd %xmm0, -72(%rbp)
leaq -32(%rbp), %rax
movq %rax, %rdi
call _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE
movsd (%rax), %xmm0
mulsd -80(%rbp), %xmm0
movsd %xmm0, -120(%rbp)
leaq -32(%rbp), %rax
movq %rax, %rdi
call _ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE
movsd (%rax), %xmm0
mulsd -88(%rbp), %xmm0
addsd -120(%rbp), %xmm0
movsd %xmm0, -64(%rbp)
movsd -72(%rbp), %xmm3
movsd %xmm3, -120(%rbp)
leaq -32(%rbp), %rax
movq %rax, %rdi
call _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE
movsd -120(%rbp), %xmm3
movsd %xmm3, (%rax)
movsd -64(%rbp), %xmm4
movsd %xmm4, -120(%rbp)
leaq -32(%rbp), %rax
movq %rax, %rdi
call _ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE
movsd -120(%rbp), %xmm4
movsd %xmm4, (%rax)
leaq _ZSt8hexfloatRSt8ios_base(%rip), %rsi
leaq _ZSt4cout(%rip), %rdi
call _ZNSolsEPFRSt8ios_baseS0_E@PLT
leaq -32(%rbp), %rax
movq %rax, -56(%rbp)
movq -56(%rbp), %rax
movq %rax, %rdi
call _ZNSt5arrayIdLm2EE5beginEv
movq %rax, -104(%rbp)
movq -56(%rbp), %rax
movq %rax, %rdi
call _ZNSt5arrayIdLm2EE3endEv
movq %rax, -48(%rbp)
.L17:
movq -104(%rbp), %rax
cmpq -48(%rbp), %rax
je .L16
movq -104(%rbp), %rax
movq %rax, -40(%rbp)
movq -40(%rbp), %rax
movq (%rax), %rax
movq %rax, %xmm0
leaq _ZSt4cout(%rip), %rdi
call _ZNSolsEd@PLT
leaq .LC3(%rip), %rsi
movq %rax, %rdi
call _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc@PLT
addq $8, -104(%rbp)
jmp .L17
.L16:
movq _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_@GOTPCREL(%rip), %rax
movq %rax, %rsi
leaq _ZSt4cout(%rip), %rdi
call _ZNSolsEPFRSoS_E@PLT
movl $0, %eax
movq -8(%rbp), %rcx
xorq %fs:40, %rcx
je .L19
call __stack_chk_fail@PLT
.L19:
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE2816:
.size main, .-main
.section .text._ZNSt5arrayIdLm2EE4fillERKd,"axG",@progbits,_ZNSt5arrayIdLm2EE4fillERKd,comdat
.align 2
.weak _ZNSt5arrayIdLm2EE4fillERKd
.type _ZNSt5arrayIdLm2EE4fillERKd, @function
_ZNSt5arrayIdLm2EE4fillERKd:
.LFB3128:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
pushq %rbx
subq $24, %rsp
.cfi_offset 3, -24
movq %rdi, -24(%rbp)
movq %rsi, -32(%rbp)
movq -24(%rbp), %rax
movq %rax, %rdi
call _ZNKSt5arrayIdLm2EE4sizeEv
movq %rax, %rbx
movq -24(%rbp), %rax
movq %rax, %rdi
call _ZNSt5arrayIdLm2EE5beginEv
movq %rax, %rcx
movq -32(%rbp), %rax
movq %rax, %rdx
movq %rbx, %rsi
movq %rcx, %rdi
call _ZSt6fill_nIPdmdET_S1_T0_RKT1_
nop
addq $24, %rsp
popq %rbx
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3128:
.size _ZNSt5arrayIdLm2EE4fillERKd, .-_ZNSt5arrayIdLm2EE4fillERKd
.section .text._ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE,"axG",@progbits,_ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE,comdat
.weak _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE
.type _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE, @function
_ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE:
.LFB3129:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
movl $0, %esi
movq %rax, %rdi
call _ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3129:
.size _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE, .-_ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE
.section .text._ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE,"axG",@progbits,_ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE,comdat
.weak _ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE
.type _ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE, @function
_ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE:
.LFB3130:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
movl $1, %esi
movq %rax, %rdi
call _ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3130:
.size _ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE, .-_ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE
.section .text._ZNSt5arrayIdLm2EE5beginEv,"axG",@progbits,_ZNSt5arrayIdLm2EE5beginEv,comdat
.align 2
.weak _ZNSt5arrayIdLm2EE5beginEv
.type _ZNSt5arrayIdLm2EE5beginEv, @function
_ZNSt5arrayIdLm2EE5beginEv:
.LFB3132:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
movq %rax, %rdi
call _ZNSt5arrayIdLm2EE4dataEv
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3132:
.size _ZNSt5arrayIdLm2EE5beginEv, .-_ZNSt5arrayIdLm2EE5beginEv
.section .text._ZNSt5arrayIdLm2EE3endEv,"axG",@progbits,_ZNSt5arrayIdLm2EE3endEv,comdat
.align 2
.weak _ZNSt5arrayIdLm2EE3endEv
.type _ZNSt5arrayIdLm2EE3endEv, @function
_ZNSt5arrayIdLm2EE3endEv:
.LFB3133:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
movq %rax, %rdi
call _ZNSt5arrayIdLm2EE4dataEv
addq $16, %rax
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3133:
.size _ZNSt5arrayIdLm2EE3endEv, .-_ZNSt5arrayIdLm2EE3endEv
.section .text._ZNKSt5arrayIdLm2EE4sizeEv,"axG",@progbits,_ZNKSt5arrayIdLm2EE4sizeEv,comdat
.align 2
.weak _ZNKSt5arrayIdLm2EE4sizeEv
.type _ZNKSt5arrayIdLm2EE4sizeEv, @function
_ZNKSt5arrayIdLm2EE4sizeEv:
.LFB3247:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -8(%rbp)
movl $2, %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3247:
.size _ZNKSt5arrayIdLm2EE4sizeEv, .-_ZNKSt5arrayIdLm2EE4sizeEv
.section .text._ZSt6fill_nIPdmdET_S1_T0_RKT1_,"axG",@progbits,_ZSt6fill_nIPdmdET_S1_T0_RKT1_,comdat
.weak _ZSt6fill_nIPdmdET_S1_T0_RKT1_
.type _ZSt6fill_nIPdmdET_S1_T0_RKT1_, @function
_ZSt6fill_nIPdmdET_S1_T0_RKT1_:
.LFB3248:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $32, %rsp
movq %rdi, -8(%rbp)
movq %rsi, -16(%rbp)
movq %rdx, -24(%rbp)
movq -8(%rbp), %rax
movq %rax, %rdi
call _ZSt12__niter_baseIPdET_S1_
movq %rax, %rcx
movq -24(%rbp), %rdx
movq -16(%rbp), %rax
movq %rax, %rsi
movq %rcx, %rdi
call _ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_
movq %rax, %rdx
leaq -8(%rbp), %rax
movq %rdx, %rsi
movq %rax, %rdi
call _ZSt12__niter_wrapIPdET_RKS1_S1_
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3248:
.size _ZSt6fill_nIPdmdET_S1_T0_RKT1_, .-_ZSt6fill_nIPdmdET_S1_T0_RKT1_
.section .text._ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm,"axG",@progbits,_ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm,comdat
.weak _ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm
.type _ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm, @function
_ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm:
.LFB3249:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -8(%rbp)
movq %rsi, -16(%rbp)
movq -16(%rbp), %rax
leaq 0(,%rax,8), %rdx
movq -8(%rbp), %rax
addq %rdx, %rax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3249:
.size _ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm, .-_ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm
.section .text._ZNSt5arrayIdLm2EE4dataEv,"axG",@progbits,_ZNSt5arrayIdLm2EE4dataEv,comdat
.align 2
.weak _ZNSt5arrayIdLm2EE4dataEv
.type _ZNSt5arrayIdLm2EE4dataEv, @function
_ZNSt5arrayIdLm2EE4dataEv:
.LFB3250:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
movq %rax, %rdi
call _ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3250:
.size _ZNSt5arrayIdLm2EE4dataEv, .-_ZNSt5arrayIdLm2EE4dataEv
.section .text._ZSt12__niter_baseIPdET_S1_,"axG",@progbits,_ZSt12__niter_baseIPdET_S1_,comdat
.weak _ZSt12__niter_baseIPdET_S1_
.type _ZSt12__niter_baseIPdET_S1_, @function
_ZSt12__niter_baseIPdET_S1_:
.LFB3318:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3318:
.size _ZSt12__niter_baseIPdET_S1_, .-_ZSt12__niter_baseIPdET_S1_
.section .text._ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_,"axG",@progbits,_ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_,comdat
.weak _ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_
.type _ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_, @function
_ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_:
.LFB3319:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -24(%rbp)
movq %rsi, -32(%rbp)
movq %rdx, -40(%rbp)
movq -40(%rbp), %rax
movsd (%rax), %xmm0
movsd %xmm0, -8(%rbp)
movq -32(%rbp), %rax
movq %rax, -16(%rbp)
.L41:
cmpq $0, -16(%rbp)
je .L40
movq -24(%rbp), %rax
movsd -8(%rbp), %xmm0
movsd %xmm0, (%rax)
subq $1, -16(%rbp)
addq $8, -24(%rbp)
jmp .L41
.L40:
movq -24(%rbp), %rax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3319:
.size _ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_, .-_ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_
.section .text._ZSt12__niter_wrapIPdET_RKS1_S1_,"axG",@progbits,_ZSt12__niter_wrapIPdET_RKS1_S1_,comdat
.weak _ZSt12__niter_wrapIPdET_RKS1_S1_
.type _ZSt12__niter_wrapIPdET_RKS1_S1_, @function
_ZSt12__niter_wrapIPdET_RKS1_S1_:
.LFB3320:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -8(%rbp)
movq %rsi, -16(%rbp)
movq -16(%rbp), %rax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3320:
.size _ZSt12__niter_wrapIPdET_RKS1_S1_, .-_ZSt12__niter_wrapIPdET_RKS1_S1_
.section .text._ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd,"axG",@progbits,_ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd,comdat
.weak _ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd
.type _ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd, @function
_ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd:
.LFB3321:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3321:
.size _ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd, .-_ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd
.text
.type _Z41__static_initialization_and_destruction_0ii, @function
_Z41__static_initialization_and_destruction_0ii:
.LFB3455:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movl %edi, -4(%rbp)
movl %esi, -8(%rbp)
cmpl $1, -4(%rbp)
jne .L49
cmpl $65535, -8(%rbp)
jne .L49
leaq _ZStL8__ioinit(%rip), %rdi
call _ZNSt8ios_base4InitC1Ev@PLT
leaq __dso_handle(%rip), %rdx
leaq _ZStL8__ioinit(%rip), %rsi
movq _ZNSt8ios_base4InitD1Ev@GOTPCREL(%rip), %rax
movq %rax, %rdi
call __cxa_atexit@PLT
.L49:
nop
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3455:
.size _Z41__static_initialization_and_destruction_0ii, .-_Z41__static_initialization_and_destruction_0ii
.type _GLOBAL__sub_I_main, @function
_GLOBAL__sub_I_main:
.LFB3456:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl $65535, %esi
movl $1, %edi
call _Z41__static_initialization_and_destruction_0ii
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3456:
.size _GLOBAL__sub_I_main, .-_GLOBAL__sub_I_main
.section .init_array,"aw"
.align 8
.quad _GLOBAL__sub_I_main
.section .rodata
.align 8
.LC1:
.long 2950821842
.long 1072018643
.align 8
.LC2:
.long 797100793
.long 1074900721
.hidden __dso_handle
.ident "GCC: (GNU) 9.2.0"
.section .note.GNU-stack,"",@progbits
使用 () 时的汇编程序转储是-O1
g++ -std=c++17 -Wall -pedantic -O1 -S -o test test.cpp
.file "test.cpp"
.text
.section .rodata.str1.1,"aMS",@progbits,1
.LC1:
.string " "
.text
.globl main
.type main, @function
main:
.LFB2853:
.cfi_startproc
pushq %rbx
.cfi_def_cfa_offset 16
.cfi_offset 3, -16
leaq _ZSt4cout(%rip), %rbx
movq _ZSt4cout(%rip), %rax
movq %rbx, %rcx
addq -24(%rax), %rcx
orl $260, 24(%rcx)
movsd .LC0(%rip), %xmm0
movq %rbx, %rdi
call _ZNSo9_M_insertIdEERSoT_@PLT
movq %rax, %rdi
movl $1, %edx
leaq .LC1(%rip), %rsi
call _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@PLT
movsd .LC2(%rip), %xmm0
movq %rbx, %rdi
call _ZNSo9_M_insertIdEERSoT_@PLT
movq %rax, %rdi
movl $1, %edx
leaq .LC1(%rip), %rsi
call _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@PLT
movq %rbx, %rdi
call _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_@PLT
movl $0, %eax
popq %rbx
.cfi_def_cfa_offset 8
ret
.cfi_endproc
.LFE2853:
.size main, .-main
.type _GLOBAL__sub_I_main, @function
_GLOBAL__sub_I_main:
.LFB3477:
.cfi_startproc
subq $8, %rsp
.cfi_def_cfa_offset 16
leaq _ZStL8__ioinit(%rip), %rdi
call _ZNSt8ios_base4InitC1Ev@PLT
leaq __dso_handle(%rip), %rdx
leaq _ZStL8__ioinit(%rip), %rsi
movq _ZNSt8ios_base4InitD1Ev@GOTPCREL(%rip), %rdi
call __cxa_atexit@PLT
addq $8, %rsp
.cfi_def_cfa_offset 8
ret
.cfi_endproc
.LFE3477:
.size _GLOBAL__sub_I_main, .-_GLOBAL__sub_I_main
.section .init_array,"aw"
.align 8
.quad _GLOBAL__sub_I_main
.local _ZStL8__ioinit
.comm _ZStL8__ioinit,1,1
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LC0:
.long 921391986
.long -1077349148
.align 8
.LC2:
.long 2858241566
.long -1075525036
.hidden __dso_handle
.ident "GCC: (GNU) 9.2.0"
.section .note.GNU-stack,"",@progbits
编辑二
根据要求,在编译的可执行文件上可以在这里找到(由于长度限制,我不能在这里发布)。输出 的指示可执行文件再次链接。此处提供了部分输出(部分 .init、.plt、sin 和 cos)。objdump -d
-O0
ldd
/usr/lib/libm.so.6
objdump -d /bin/libm.so.6
答:
使用 ,浮动计算在编译时使用 GNU MPFR 库进行。MPFR 应给出正确的舍入结果,即使对于 和 等函数也是如此。对于这些函数,您的数学库可能具有不同的精度目标,这就是为什么运行时计算(在优化级别)有时会给出不同的结果。例如,GNU C 库的一般准确率目标为几个 ulp。-O1
sin
cos
-O0
据报道,IEEE 754 仅对数学库函数的子集(显然)有精度要求,这使得数学库能够在超越函数的速度和精度之间选择不同的权衡。(我无法访问IEEE 754,因为不幸的是,IEEE反对公开传播知识。sqrt
评论
theta
v
gcc
虽然另一个答案已经解释了为什么您会看到(在运行时评估,结果略有不完美,四舍五入未指定)和(在编译时评估,结果和舍入)之间存在不同的行为,但我想补充一个解释,为什么我很难用 .我总是观察输出-O0
-O1
-O0
-0x1.8f4e436eb5372p-3 -0x1.4ca54aa5d4e1ep-1
在我自己的机器和编译器资源管理器上。
原因是您很可能使用的是使用带有标志编译的 glibc,即使用 FMA3。我已经测试了 gcc 的开关,并能够将其缩小到那个范围。-mfma
-march
在我的机器上,使用 Kaby Lake 处理器、gcc 9.2 和 glibc 2.29,编译 glibc 和 executable with 我得到输出-O2 -march=native
-O0
-0x1.8f4e436eb5371p-3 -0x1.4ca54aa5d4e1ep-1
编译 glibc with 和 the executable with I get-O2 -march=native -mno-fma
-O0
-0x1.8f4e436eb5372p-3 -0x1.4ca54aa5d4e1ep-1
无论哪种情况,编译可执行文件都会得到:-O1
-0x1.8f4e436eb5372p-3 -0x1.4ca54aa5d4e1ep-1
查看启用 FMA3 和启用 FMA3 的拆卸,很明显使用了这些指令。融合乘法加法使中间结果的舍入减少一个,从而可以稍微改变和/或的输出。我怀疑这就是所讨论的代码根据 glibc 的优化标志产生略有不同的输出的原因。然而,正如另一个答案所解释的,这种差异确实属于这些函数的记录差异范围。sin
cos
cos
sin
至于为什么允许编译器使用 FMA3 指令,尽管它改变了浮点运算的结果,请参阅此问题。
glibc 中还有一个称为 multi-arch 的功能,如果启用它,它将在运行时链接不同优化的数学函数,以适应运行程序的架构。如果启用此功能并且您的 CPU 支持 FMA3(例如 Haswell 及更高版本),那么您还将看到您的结果。
评论
-O0
-O1