@@ -116,14 +116,20 @@ impl_host_float!(f128, u128);
116116/// Assembly implementations on x86 which respect rounding mode.
117117#[ cfg( x86_sse2) ]
118118#[ cfg( target_has_reliable_f16) ]
119- mod x86 {
119+ pub mod x86 {
120+ use std:: mem:: offset_of;
121+
122+ use rustc_apfloat:: { Float , ieee:: X87DoubleExtended } ;
123+
120124 use super :: * ;
121125
122- /// Given a rounding mode, assembly operation, and assembly configuration,
126+ /// Given a rounding mode, assembly operations, and assembly configuration, invoke the
127+ /// operations within a context that has the correct rounding mode set for SSE. Returns
128+ /// the status registers.
123129 macro_rules! with_fp_env {
124130 (
125131 $rm: ident,
126- $op: literal,
132+ $( $ op: literal, ) +
127133 $( $name: ident = $dir: ident( $kind: ident) $val: expr $( => $dst: ident) ?, ) +
128134 ) => { {
129135 let mut csr_stash = 0u32 ;
@@ -134,7 +140,8 @@ mod x86 {
134140 "stmxcsr [{csr_stash}]" ,
135141 // set the control state we want, clears flags
136142 "ldmxcsr [{csr}]" ,
137- $op,
143+ // Per-invocation instructions
144+ $( $op, ) +
138145 // get the new control state
139146 "stmxcsr [{csr}]" ,
140147 // restore the original control state
@@ -146,7 +153,49 @@ mod x86 {
146153 ) ;
147154
148155 check_exceptions( csr)
149- } }
156+ } } ;
157+ }
158+
159+ /// Same as `with_fp_env` but for the x87 environment.
160+ macro_rules! with_x87_env {
161+ (
162+ $rm: ident,
163+ $( $op: literal, ) +
164+ $( $name: ident = $dir: ident( $kind: ident) $val: expr $( => $dst: ident) ?, ) +
165+ ) => { {
166+ let mut orig_env = X87FpEnv :: default ( ) ;
167+ let mut env = X87FpEnv :: default ( ) ;
168+ let cw = make_x87_cw( $rm, X87Precision :: Extended ) ?;
169+
170+ core:: arch:: asm!(
171+ // We need to use fnstenv since there is no way to load only the status word
172+ // Set the control state we want
173+ "fnstenv [{orig_env}]" ,
174+ "fnstenv [{env}]" ,
175+ // Set the control word we want then clear any existing exceptions
176+ "mov word ptr [{env}], {cw:x}" ,
177+ "and word ptr [{env} + {STATUS_OFFSET}], {STATUS_MASK}" ,
178+ // Store the env back
179+ "fldenv [{env}]" ,
180+ // Per-invocation instructions
181+ $( $op, ) +
182+ // Restore the original environment
183+ "fldenv [{orig_env}]" ,
184+ orig_env = in( reg) & mut orig_env,
185+ env = in( reg) & mut env,
186+ cw = in( reg) cw,
187+ STATUS_OFFSET = const offset_of!( X87FpEnv , status) ,
188+ STATUS_MASK = const !X87_STATUS_EXCEPTIONS ,
189+ $( $name = $dir( $kind) $val $( => $dst) ?, ) +
190+ out( "st(0)" ) _, out( "st(1)" ) _,
191+ out( "st(2)" ) _, out( "st(3)" ) _,
192+ out( "st(4)" ) _, out( "st(5)" ) _,
193+ out( "st(6)" ) _, out( "st(7)" ) _,
194+ options( nostack) ,
195+ ) ;
196+
197+ check_exceptions( env. status. into( ) )
198+ } } ;
150199 }
151200
152201 impl HostFloat for f16 {
@@ -604,11 +653,290 @@ mod x86 {
604653 }
605654 }
606655
607- /// Make a control word or return `None` if the rounding mode is not supported.
656+ #[ derive( Clone , Copy ) ]
657+ #[ repr( transparent) ]
658+ #[ allow( non_camel_case_types) ]
659+ pub struct x87_f80 ( u128 ) ;
660+
661+ impl fmt:: Display for x87_f80 {
662+ fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
663+ // Display via apfloat.
664+ X87DoubleExtended :: from_bits ( self . 0 ) . fmt ( f)
665+ }
666+ }
667+
668+ impl fmt:: Debug for x87_f80 {
669+ fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
670+ write ! ( f, "{:#022x}" , self . 0 )
671+ }
672+ }
673+
674+ impl HostFloat for x87_f80 {
675+ type UInt = u128 ;
676+
677+ fn from_bits ( bits : Self :: UInt ) -> Self {
678+ Self ( bits)
679+ }
680+
681+ fn to_bits ( self ) -> Self :: UInt {
682+ self . 0
683+ }
684+
685+ fn neg ( mut self ) -> Self {
686+ unsafe {
687+ core:: arch:: asm!(
688+ // Load the operand, flip the sign, and store it back.
689+ "fld tbyte ptr [{x}]" ,
690+ "fchs" ,
691+ "fstp tbyte ptr [{x}]" ,
692+ x = in( reg) & mut self ,
693+ out( "st(0)" ) _, out( "st(1)" ) _,
694+ out( "st(2)" ) _, out( "st(3)" ) _,
695+ out( "st(4)" ) _, out( "st(5)" ) _,
696+ out( "st(6)" ) _, out( "st(7)" ) _,
697+ options( nostack) ,
698+ ) ;
699+ }
700+ self
701+ }
702+
703+ fn add_r ( mut self , other : Self , rm : Round ) -> Option < StatusAnd < Self > > {
704+ unsafe {
705+ let status = with_x87_env ! (
706+ rm,
707+ "fld tbyte ptr [{x}]" ,
708+ "fld tbyte ptr [{y}]" ,
709+ "faddp st(1), st" ,
710+ "fstp tbyte ptr [{x}]" ,
711+ x = in( reg) & mut self ,
712+ y = in( reg) & other,
713+ ) ;
714+ Some ( status. and ( self ) )
715+ }
716+ }
717+
718+ fn sub_r ( mut self , other : Self , rm : Round ) -> Option < StatusAnd < Self > > {
719+ unsafe {
720+ let status = with_x87_env ! (
721+ rm,
722+ "fld tbyte ptr [{x}]" ,
723+ "fld tbyte ptr [{y}]" ,
724+ "fsubp st(1), st" ,
725+ "fstp tbyte ptr [{x}]" ,
726+ x = in( reg) & mut self ,
727+ y = in( reg) & other,
728+ ) ;
729+ Some ( status. and ( self ) )
730+ }
731+ }
732+
733+ fn mul_r ( mut self , other : Self , rm : Round ) -> Option < StatusAnd < Self > > {
734+ unsafe {
735+ let status = with_x87_env ! (
736+ rm,
737+ "fld tbyte ptr [{x}]" ,
738+ "fld tbyte ptr [{y}]" ,
739+ "fmulp st(1), st" ,
740+ "fstp tbyte ptr [{x}]" ,
741+ x = in( reg) & mut self ,
742+ y = in( reg) & other,
743+ ) ;
744+ Some ( status. and ( self ) )
745+ }
746+ }
747+
748+ fn div_r ( mut self , other : Self , rm : Round ) -> Option < StatusAnd < Self > > {
749+ unsafe {
750+ let status = with_x87_env ! (
751+ rm,
752+ "fld tbyte ptr [{x}]" ,
753+ "fld tbyte ptr [{y}]" ,
754+ "fdivp st(1), st" ,
755+ "fstp tbyte ptr [{x}]" ,
756+ x = in( reg) & mut self ,
757+ y = in( reg) & other,
758+ ) ;
759+ Some ( status. and ( self ) )
760+ }
761+ }
762+
763+ fn rem ( mut self , other : Self ) -> Self {
764+ unsafe {
765+ core:: arch:: asm!(
766+ "fld tbyte ptr [{y}]" ,
767+ "fld tbyte ptr [{x}]" ,
768+ // fprem must be repeated until C2 is cleared. Based on assembly from
769+ // musl fmodl.
770+ "2:" ,
771+ "fprem" ,
772+ "fnstsw ax" ,
773+ // Check if 0x0400 is set
774+ "test ah, 4" ,
775+ "jne 2b" ,
776+ "fstp st(1)" ,
777+ "fstp tbyte ptr [{x}]" ,
778+ x = in( reg) & mut self ,
779+ y = in( reg) & other,
780+ out( "ax" ) _,
781+ out( "st(0)" ) _, out( "st(1)" ) _,
782+ out( "st(2)" ) _, out( "st(3)" ) _,
783+ out( "st(4)" ) _, out( "st(5)" ) _,
784+ out( "st(6)" ) _, out( "st(7)" ) _,
785+ options( nostack) ,
786+ ) ;
787+ }
788+ self
789+ }
790+
791+ fn mul_add_r ( self , _mul : Self , _add : Self , _rm : Round ) -> Option < StatusAnd < Self > > {
792+ None
793+ }
794+
795+ /* We could call `__fixsfti` and similar, but that may not be available or may not
796+ * respect rounding modes. */
797+
798+ fn to_i128_r ( self , _rm : Round ) -> Option < StatusAnd < i128 > > {
799+ None
800+ }
801+
802+ fn to_u128_r ( self , _rm : Round ) -> Option < StatusAnd < u128 > > {
803+ None
804+ }
805+
806+ fn from_i128_r ( _x : i128 , _rm : Round ) -> Option < StatusAnd < Self > > {
807+ None
808+ }
809+
810+ fn from_u128_r ( _x : u128 , _rm : Round ) -> Option < StatusAnd < Self > > {
811+ None
812+ }
813+
814+ /* A load + store with the correct access sizes handles width conversions */
815+
816+ fn to_double_r ( self , rm : Round ) -> Option < StatusAnd < f64 > > {
817+ unsafe {
818+ let mut dst: f64 = 0.0 ;
819+ let status = with_x87_env ! (
820+ rm,
821+ "fld tbyte ptr [{x}]" ,
822+ "fstp qword ptr [{y}]" ,
823+ x = in( reg) & self ,
824+ y = in( reg) & mut dst,
825+ ) ;
826+ Some ( status. and ( dst) )
827+ }
828+ }
829+
830+ fn from_double_r ( x : f64 , rm : Round ) -> Option < StatusAnd < Self > > {
831+ unsafe {
832+ let mut dst = x87_f80 ( 0 ) ;
833+ let status = with_x87_env ! (
834+ rm,
835+ "fld qword ptr [{x}]" ,
836+ "fstp tbyte ptr [{y}]" ,
837+ x = in( reg) & x,
838+ y = in( reg) & mut dst,
839+ ) ;
840+ Some ( status. and ( dst) )
841+ }
842+ }
843+
844+ fn to_single_r ( self , rm : Round ) -> Option < StatusAnd < f32 > > {
845+ unsafe {
846+ let mut dst: f32 = 0.0 ;
847+ let status = with_x87_env ! (
848+ rm,
849+ "fld tbyte ptr [{x}]" ,
850+ "fstp dword ptr [{y}]" ,
851+ x = in( reg) & self ,
852+ y = in( reg) & mut dst,
853+ ) ;
854+ Some ( status. and ( dst) )
855+ }
856+ }
857+
858+ fn from_single_r ( x : f32 , rm : Round ) -> Option < StatusAnd < Self > > {
859+ unsafe {
860+ let mut dst = x87_f80 ( 0 ) ;
861+ let status = with_x87_env ! (
862+ rm,
863+ "fld dword ptr [{x}]" ,
864+ "fstp tbyte ptr [{y}]" ,
865+ x = in( reg) & x,
866+ y = in( reg) & mut dst,
867+ ) ;
868+ Some ( status. and ( dst) )
869+ }
870+ }
871+ }
872+
873+ /// Result of fsave and input to fstenv.
874+ ///
875+ /// See: Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 1:
876+ /// Basic Architecture, section 8.1.10 Saving the x87 FPU State, figure 8-9.
877+ #[ repr( C ) ]
878+ #[ derive( Clone , Copy , Debug , Default ) ]
879+ struct X87FpEnv {
880+ control : u16 ,
881+ reserved0 : u16 ,
882+ status : u16 ,
883+ reserved1 : u16 ,
884+ tag : u16 ,
885+ reserved2 : u16 ,
886+ iptr_offset : u32 ,
887+ iptr_sel : u16 ,
888+ opcode : u16 ,
889+ dptr_offset : u32 ,
890+ dptr_sel : u32 ,
891+ reserved3 : u16 ,
892+ }
893+
894+ /// Invert this mask to clear exceptions.
895+ const X87_STATUS_EXCEPTIONS : u16 = 0b111111 ;
896+
897+ #[ expect( unused) ] // only extended is used for now
898+ #[ derive( Clone , Copy , Debug , PartialEq ) ]
899+ enum X87Precision {
900+ Single ,
901+ Double ,
902+ Extended ,
903+ }
904+
905+ /// Make an x87 control word or return `None` if the rounding mode is not supported.
906+ ///
907+ /// See: Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 1:
908+ /// Basic Architecture, section 8.1.5 x87 FPU Control Word.
909+ const fn make_x87_cw ( round : Round , prec : X87Precision ) -> Option < u16 > {
910+ let mut csr = 0u16 ;
911+ // Set all exception masks so fp status doesn't turn into SIGFPE
912+ csr |= 0b00111111 ;
913+ // Reserved field is usually set by default
914+ csr |= 0b01000000 ;
915+
916+ let pc = match prec {
917+ X87Precision :: Single => 0b00 ,
918+ X87Precision :: Double => 0b10 ,
919+ X87Precision :: Extended => 0b11 ,
920+ } ;
921+ csr |= pc << 8 ;
922+
923+ let rc = match round {
924+ Round :: NearestTiesToEven => 0b00 ,
925+ Round :: TowardNegative => 0b01 ,
926+ Round :: TowardPositive => 0b10 ,
927+ Round :: TowardZero => 0b11 ,
928+ Round :: NearestTiesToAway => return None ,
929+ } ;
930+ csr |= rc << 10 ;
931+
932+ Some ( csr)
933+ }
934+
935+ /// Make a SSE control word or return `None` if the rounding mode is not supported.
608936 fn make_mxcsr_cw ( round : Round ) -> Option < u32 > {
609937 // Default: Clear exception flags, no DAZ, no FTZ
610938 let mut csr = 0u32 ;
611- // Set all masks so fp status doesn't turn into SIGFPE
939+ // Set all exception masks so fp status doesn't turn into SIGFPE
612940 csr |= 0b111111 << 7 ;
613941
614942 let rc = match round {
@@ -623,6 +951,7 @@ mod x86 {
623951 Some ( csr)
624952 }
625953
954+ /// Turn status flags from a register into a `Status`. Works for both x87 and SSE.
626955 fn check_exceptions ( csr : u32 ) -> Status {
627956 let mut status = Status :: OK ;
628957
0 commit comments