@@ -116,14 +116,20 @@ impl_host_float!(f128, u128);
116116/// Assembly implementations on x86 which respect rounding mode.
117117#[ cfg( x86_sse2) ]
118118#[ cfg( target_has_reliable_f16) ]
119- mod x86 {
119+ pub mod x86 {
120+ use std:: mem:: offset_of;
121+
122+ use rustc_apfloat:: { Float , ieee:: X87DoubleExtended } ;
123+
120124 use super :: * ;
121125
122- /// Given a rounding mode, assembly operation, and assembly configuration,
126+ /// Given a rounding mode, assembly operations, and assembly configuration, invoke the
127+ /// operations within a context that has the correct rounding mode set for SSE. Returns
128+ /// the status registers.
123129 macro_rules! with_fp_env {
124130 (
125131 $rm: ident,
126- $op: literal,
132+ $( $ op: literal, ) +
127133 $( $name: ident = $dir: ident( $kind: ident) $val: expr $( => $dst: ident) ?, ) +
128134 ) => { {
129135 let mut csr_stash = 0u32 ;
@@ -134,7 +140,8 @@ mod x86 {
134140 "stmxcsr [{csr_stash}]" ,
135141 // set the control state we want, clears flags
136142 "ldmxcsr [{csr}]" ,
137- $op,
143+ // Per-invocation instructions
144+ $( $op, ) +
138145 // get the new control state
139146 "stmxcsr [{csr}]" ,
140147 // restore the original control state
@@ -146,7 +153,49 @@ mod x86 {
146153 ) ;
147154
148155 check_exceptions( csr)
149- } }
156+ } } ;
157+ }
158+
159+ /// Same as `with_fp_env` but for the x87 environment.
160+ macro_rules! with_x87_env {
161+ (
162+ $rm: ident,
163+ $( $op: literal, ) +
164+ $( $name: ident = $dir: ident( $kind: ident) $val: expr $( => $dst: ident) ?, ) +
165+ ) => { {
166+ let mut orig_env = X87FpEnv :: default ( ) ;
167+ let mut env = X87FpEnv :: default ( ) ;
168+ let cw = make_x87_cw( $rm, X87Precision :: Extended ) ?;
169+
170+ core:: arch:: asm!(
171+ // We need to use fnstenv since there is no way to load only the status word. We
172+ // take two copies and modify one.
173+ "fnstenv [{orig_env}]" ,
174+ "fnstenv [{env}]" ,
175+ // Set the control word we want then clear any existing exceptions
176+ "mov word ptr [{env}], {cw:x}" ,
177+ "and word ptr [{env} + {STATUS_OFFSET}], {STATUS_MASK}" ,
178+ // Store the modified env back
179+ "fldenv [{env}]" ,
180+ // Per-invocation instructions
181+ $( $op, ) +
182+ // Restore the original environment
183+ "fldenv [{orig_env}]" ,
184+ orig_env = in( reg) & mut orig_env,
185+ env = in( reg) & mut env,
186+ cw = in( reg) cw,
187+ STATUS_OFFSET = const offset_of!( X87FpEnv , status) ,
188+ STATUS_MASK = const !X87_STATUS_EXCEPTIONS ,
189+ $( $name = $dir( $kind) $val $( => $dst) ?, ) +
190+ out( "st(0)" ) _, out( "st(1)" ) _,
191+ out( "st(2)" ) _, out( "st(3)" ) _,
192+ out( "st(4)" ) _, out( "st(5)" ) _,
193+ out( "st(6)" ) _, out( "st(7)" ) _,
194+ options( nostack) ,
195+ ) ;
196+
197+ check_exceptions( env. status. into( ) )
198+ } } ;
150199 }
151200
152201 impl HostFloat for f16 {
@@ -604,11 +653,291 @@ mod x86 {
604653 }
605654 }
606655
607- /// Make a control word or return `None` if the rounding mode is not supported.
656+ /// Proxy for running x87 operations on the host without a builtin type.
657+ #[ derive( Clone , Copy ) ]
658+ #[ repr( transparent) ]
659+ #[ allow( non_camel_case_types) ]
660+ pub struct x87_f80 ( u128 ) ;
661+
662+ impl fmt:: Display for x87_f80 {
663+ fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
664+ // Display via apfloat.
665+ X87DoubleExtended :: from_bits ( self . 0 ) . fmt ( f)
666+ }
667+ }
668+
669+ impl fmt:: Debug for x87_f80 {
670+ fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
671+ write ! ( f, "{:#022x}" , self . 0 )
672+ }
673+ }
674+
675+ impl HostFloat for x87_f80 {
676+ type UInt = u128 ;
677+
678+ fn from_bits ( bits : Self :: UInt ) -> Self {
679+ Self ( bits)
680+ }
681+
682+ fn to_bits ( self ) -> Self :: UInt {
683+ self . 0
684+ }
685+
686+ fn neg ( mut self ) -> Self {
687+ unsafe {
688+ core:: arch:: asm!(
689+ // Load the operand, flip the sign, and store it back.
690+ "fld tbyte ptr [{x}]" ,
691+ "fchs" ,
692+ "fstp tbyte ptr [{x}]" ,
693+ x = in( reg) & mut self ,
694+ out( "st(0)" ) _, out( "st(1)" ) _,
695+ out( "st(2)" ) _, out( "st(3)" ) _,
696+ out( "st(4)" ) _, out( "st(5)" ) _,
697+ out( "st(6)" ) _, out( "st(7)" ) _,
698+ options( nostack) ,
699+ ) ;
700+ }
701+ self
702+ }
703+
704+ fn add_r ( mut self , other : Self , rm : Round ) -> Option < StatusAnd < Self > > {
705+ unsafe {
706+ let status = with_x87_env ! (
707+ rm,
708+ "fld tbyte ptr [{x}]" ,
709+ "fld tbyte ptr [{y}]" ,
710+ "faddp st(1), st" ,
711+ "fstp tbyte ptr [{x}]" ,
712+ x = in( reg) & mut self ,
713+ y = in( reg) & other,
714+ ) ;
715+ Some ( status. and ( self ) )
716+ }
717+ }
718+
719+ fn sub_r ( mut self , other : Self , rm : Round ) -> Option < StatusAnd < Self > > {
720+ unsafe {
721+ let status = with_x87_env ! (
722+ rm,
723+ "fld tbyte ptr [{x}]" ,
724+ "fld tbyte ptr [{y}]" ,
725+ "fsubp st(1), st" ,
726+ "fstp tbyte ptr [{x}]" ,
727+ x = in( reg) & mut self ,
728+ y = in( reg) & other,
729+ ) ;
730+ Some ( status. and ( self ) )
731+ }
732+ }
733+
734+ fn mul_r ( mut self , other : Self , rm : Round ) -> Option < StatusAnd < Self > > {
735+ unsafe {
736+ let status = with_x87_env ! (
737+ rm,
738+ "fld tbyte ptr [{x}]" ,
739+ "fld tbyte ptr [{y}]" ,
740+ "fmulp st(1), st" ,
741+ "fstp tbyte ptr [{x}]" ,
742+ x = in( reg) & mut self ,
743+ y = in( reg) & other,
744+ ) ;
745+ Some ( status. and ( self ) )
746+ }
747+ }
748+
749+ fn div_r ( mut self , other : Self , rm : Round ) -> Option < StatusAnd < Self > > {
750+ unsafe {
751+ let status = with_x87_env ! (
752+ rm,
753+ "fld tbyte ptr [{x}]" ,
754+ "fld tbyte ptr [{y}]" ,
755+ "fdivp st(1), st" ,
756+ "fstp tbyte ptr [{x}]" ,
757+ x = in( reg) & mut self ,
758+ y = in( reg) & other,
759+ ) ;
760+ Some ( status. and ( self ) )
761+ }
762+ }
763+
764+ fn rem ( mut self , other : Self ) -> Self {
765+ unsafe {
766+ core:: arch:: asm!(
767+ "fld tbyte ptr [{y}]" ,
768+ "fld tbyte ptr [{x}]" ,
769+ // fprem must be repeated until C2 is cleared. Based on assembly from
770+ // musl fmodl.
771+ "2:" ,
772+ "fprem" ,
773+ "fnstsw ax" ,
774+ // Check if 0x0400 is set
775+ "test ah, 4" ,
776+ "jne 2b" ,
777+ "fstp st(1)" ,
778+ "fstp tbyte ptr [{x}]" ,
779+ x = in( reg) & mut self ,
780+ y = in( reg) & other,
781+ out( "ax" ) _,
782+ out( "st(0)" ) _, out( "st(1)" ) _,
783+ out( "st(2)" ) _, out( "st(3)" ) _,
784+ out( "st(4)" ) _, out( "st(5)" ) _,
785+ out( "st(6)" ) _, out( "st(7)" ) _,
786+ options( nostack) ,
787+ ) ;
788+ }
789+ self
790+ }
791+
792+ fn mul_add_r ( self , _mul : Self , _add : Self , _rm : Round ) -> Option < StatusAnd < Self > > {
793+ None
794+ }
795+
796+ /* We could call `__fixsfti` and similar, but that may not be available or may not
797+ * respect rounding modes. */
798+
799+ fn to_i128_r ( self , _rm : Round ) -> Option < StatusAnd < i128 > > {
800+ None
801+ }
802+
803+ fn to_u128_r ( self , _rm : Round ) -> Option < StatusAnd < u128 > > {
804+ None
805+ }
806+
807+ fn from_i128_r ( _x : i128 , _rm : Round ) -> Option < StatusAnd < Self > > {
808+ None
809+ }
810+
811+ fn from_u128_r ( _x : u128 , _rm : Round ) -> Option < StatusAnd < Self > > {
812+ None
813+ }
814+
815+ /* A load + store with the correct access sizes handles width conversions */
816+
817+ fn to_double_r ( self , rm : Round ) -> Option < StatusAnd < f64 > > {
818+ unsafe {
819+ let mut dst: f64 = 0.0 ;
820+ let status = with_x87_env ! (
821+ rm,
822+ "fld tbyte ptr [{x}]" ,
823+ "fstp qword ptr [{y}]" ,
824+ x = in( reg) & self ,
825+ y = in( reg) & mut dst,
826+ ) ;
827+ Some ( status. and ( dst) )
828+ }
829+ }
830+
831+ fn from_double_r ( x : f64 , rm : Round ) -> Option < StatusAnd < Self > > {
832+ unsafe {
833+ let mut dst = x87_f80 ( 0 ) ;
834+ let status = with_x87_env ! (
835+ rm,
836+ "fld qword ptr [{x}]" ,
837+ "fstp tbyte ptr [{y}]" ,
838+ x = in( reg) & x,
839+ y = in( reg) & mut dst,
840+ ) ;
841+ Some ( status. and ( dst) )
842+ }
843+ }
844+
845+ fn to_single_r ( self , rm : Round ) -> Option < StatusAnd < f32 > > {
846+ unsafe {
847+ let mut dst: f32 = 0.0 ;
848+ let status = with_x87_env ! (
849+ rm,
850+ "fld tbyte ptr [{x}]" ,
851+ "fstp dword ptr [{y}]" ,
852+ x = in( reg) & self ,
853+ y = in( reg) & mut dst,
854+ ) ;
855+ Some ( status. and ( dst) )
856+ }
857+ }
858+
859+ fn from_single_r ( x : f32 , rm : Round ) -> Option < StatusAnd < Self > > {
860+ unsafe {
861+ let mut dst = x87_f80 ( 0 ) ;
862+ let status = with_x87_env ! (
863+ rm,
864+ "fld dword ptr [{x}]" ,
865+ "fstp tbyte ptr [{y}]" ,
866+ x = in( reg) & x,
867+ y = in( reg) & mut dst,
868+ ) ;
869+ Some ( status. and ( dst) )
870+ }
871+ }
872+ }
873+
874+ /// Result of fsave and input to fstenv.
875+ ///
876+ /// See: Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 1:
877+ /// Basic Architecture, section 8.1.10 Saving the x87 FPU State, figure 8-9.
878+ #[ repr( C ) ]
879+ #[ derive( Clone , Copy , Debug , Default ) ]
880+ struct X87FpEnv {
881+ control : u16 ,
882+ reserved0 : u16 ,
883+ status : u16 ,
884+ reserved1 : u16 ,
885+ tag : u16 ,
886+ reserved2 : u16 ,
887+ iptr_offset : u32 ,
888+ iptr_sel : u16 ,
889+ opcode : u16 ,
890+ dptr_offset : u32 ,
891+ dptr_sel : u32 ,
892+ reserved3 : u16 ,
893+ }
894+
895+ /// Invert this mask to clear exceptions.
896+ const X87_STATUS_EXCEPTIONS : u16 = 0b111111 ;
897+
898+ #[ expect( unused) ] // only extended is used for now
899+ #[ derive( Clone , Copy , Debug , PartialEq ) ]
900+ enum X87Precision {
901+ Single ,
902+ Double ,
903+ Extended ,
904+ }
905+
906+ /// Make an x87 control word or return `None` if the rounding mode is not supported.
907+ ///
908+ /// See: Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 1:
909+ /// Basic Architecture, section 8.1.5 x87 FPU Control Word.
910+ const fn make_x87_cw ( round : Round , prec : X87Precision ) -> Option < u16 > {
911+ let mut csr = 0u16 ;
912+ // Set all exception masks so fp status doesn't turn into SIGFPE
913+ csr |= 0b00111111 ;
914+ // Reserved field is usually set by default
915+ csr |= 0b01000000 ;
916+
917+ let pc = match prec {
918+ X87Precision :: Single => 0b00 ,
919+ X87Precision :: Double => 0b10 ,
920+ X87Precision :: Extended => 0b11 ,
921+ } ;
922+ csr |= pc << 8 ;
923+
924+ let rc = match round {
925+ Round :: NearestTiesToEven => 0b00 ,
926+ Round :: TowardNegative => 0b01 ,
927+ Round :: TowardPositive => 0b10 ,
928+ Round :: TowardZero => 0b11 ,
929+ Round :: NearestTiesToAway => return None ,
930+ } ;
931+ csr |= rc << 10 ;
932+
933+ Some ( csr)
934+ }
935+
936+ /// Make a SSE control word or return `None` if the rounding mode is not supported.
608937 fn make_mxcsr_cw ( round : Round ) -> Option < u32 > {
609938 // Default: Clear exception flags, no DAZ, no FTZ
610939 let mut csr = 0u32 ;
611- // Set all masks so fp status doesn't turn into SIGFPE
940+ // Set all exception masks so fp status doesn't turn into SIGFPE
612941 csr |= 0b111111 << 7 ;
613942
614943 let rc = match round {
@@ -623,6 +952,7 @@ mod x86 {
623952 Some ( csr)
624953 }
625954
955+ /// Turn status flags from a register into a `Status`. Works for both x87 and SSE.
626956 fn check_exceptions ( csr : u32 ) -> Status {
627957 let mut status = Status :: OK ;
628958
0 commit comments