From 081db0baf28055c13c7ec7ee72f2b5f1176b1ccc Mon Sep 17 00:00:00 2001
From: Steven Watanabe <steven.watanabe@block.one>
Date: Fri, 3 Jan 2020 17:52:33 -0500
Subject: [PATCH 1/7] [WIP] Fix hardware floating point.

f32.min works.  The tests are extremely slow because they iterate through
all possible pairs of exponents/signs and the top bits of the mantissa.
---
 include/eosio/vm/backend.hpp |   2 +-
 include/eosio/vm/parser.hpp  |   7 ++
 include/eosio/vm/x86_64.hpp  |  64 ++++++++++++------
 tests/CMakeLists.txt         |   1 +
 tests/float_tests.cpp        | 121 +++++++++++++++++++++++++++++++++++
 5 files changed, 173 insertions(+), 22 deletions(-)
 create mode 100644 tests/float_tests.cpp
diff --git a/include/eosio/vm/backend.hpp b/include/eosio/vm/backend.hpp
index 8423e2cf..9328430e 100644
--- a/include/eosio/vm/backend.hpp
+++ b/include/eosio/vm/backend.hpp
@@ -26,7 +26,7 @@ namespace eosio { namespace vm {
       template<typename Host>
       using context = jit_execution_context<Host>;
       template<typename Host, typename Options>
-      using parser = binary_parser<machine_code_writer<jit_execution_context<Host>>, Options>;
+      using parser = binary_parser<machine_code_writer<jit_execution_context<Host>, detail::get_use_softfloat<Options>()>, Options>;
       static constexpr bool is_jit = true;
    };
 
diff --git a/include/eosio/vm/parser.hpp b/include/eosio/vm/parser.hpp
index 2cf42d37..769a31ae 100644
--- a/include/eosio/vm/parser.hpp
+++ b/include/eosio/vm/parser.hpp
@@ -190,6 +190,13 @@ namespace eosio { namespace vm {
 
    PARSER_OPTION(allow_zero_blocktype, false, bool)
 
+   template<typename Options>
+   constexpr bool get_use_softfloat(long) { return true; }
+   template<typename Options>
+   constexpr auto get_use_softfloat(int) -> decltype(Options::use_softfloat) { return Options::use_softfloat; }
+   template<typename Options>
+   constexpr bool get_use_softfloat() { return get_use_softfloat<Options>(0); }
+
 #undef MAX_ELEMENTS
 #undef PARSER_OPTION
 
diff --git a/include/eosio/vm/x86_64.hpp b/include/eosio/vm/x86_64.hpp
index d6921f42..6b87c30c 100644
--- a/include/eosio/vm/x86_64.hpp
+++ b/include/eosio/vm/x86_64.hpp
@@ -28,9 +28,10 @@ namespace eosio { namespace vm {
    //   the stack.
    //
    // - The base of memory is stored in rsi
+   // - Remaining stack depth is in rbx
    //
    // - FIXME: Factor the machine instructions into a separate assembler class.
-   template<typename Context>
+   template<typename Context, bool use_softfloat = true>
    class machine_code_writer {
     public:
       machine_code_writer(growable_allocator& alloc, std::size_t source_bytes, module& mod) :
@@ -1441,37 +1442,58 @@ namespace eosio { namespace vm {
          emit_f32_binop(0x5e, CHOOSE_FN(_eosio_f32_div));
       }
       void emit_f32_min() {
-         auto icount = softfloat_instr(47, 44);
+         auto icount = softfloat_instr(63, 44);
         if constexpr(use_softfloat) {
            emit_f32_binop_softfloat(CHOOSE_FN(_eosio_f32_min));
            return;
         }
-        // mov (%rsp), %eax
-        emit_bytes(0x8b, 0x04, 0x24);
-        // test %eax, %eax
-        emit_bytes(0x85, 0xc0);
-        // je ZERO
-        emit_bytes(0x0f, 0x84);
-        void* zero = emit_branch_target32();
+
         // movss 8(%rsp), %xmm0
         emit_bytes(0xf3, 0x0f, 0x10, 0x44, 0x24, 0x08);
-        // minss (%rsp), %xmm0
-        emit_bytes(0xf3, 0x0f, 0x5d, 0x04, 0x24);
-        // jmp DONE
-        emit_bytes(0xe9);
-        void* done = emit_branch_target32();
-        // ZERO:
-        fix_branch(zero, code);
+        // ucomiss %xmm0, %xmm0
+        emit_bytes(0x0f, 0x2e, 0xc0);
+        // jp DONE
+        emit_bytes(0x7a, 0x30);
         // movss (%rsp), %xmm0
         emit_bytes(0xf3, 0x0f, 0x10, 0x04, 0x24);
-        // minss 8(%rsp), %xmm0
-        emit_bytes(0xf3, 0x0f, 0x5d, 0x44, 0x24, 0x08);
+        // ucomiss %xmm0, %xmm0
+        emit_bytes(0x0f, 0x2e, 0xc0);
+
+        // mov (%rsp), %eax
+        emit_bytes(0x8b, 0x04, 0x24);
+        // jp SECOND
+        emit_bytes(0x7a, 0x1f);
+        // mov 8(%rsp), %r8d
+        emit_bytes(0x44, 0x8b, 0x44, 0x24, 0x08);
+
+        // mov %eax, %ecx
+        emit_bytes(0x89, 0xc1);
+        // sar $31, %ecx
+        emit_bytes(0xc1, 0xf9, 0x1f);
+        // shr $1, %ecx
+        emit_bytes(0xd1, 0xe9);
+        // xor %eax, %ecx
+        emit_bytes(0x31, 0xc1);
+        // mov %r8d, %edx
+        emit_bytes(0x44, 0x89, 0xc2);
+        // sar $31, %edx
+        emit_bytes(0xc1, 0xfa, 0x1f);
+        // shr $1, %edx
+        emit_bytes(0xd1, 0xea);
+        // xor %r8d, %edx
+        emit_bytes(0x44, 0x31, 0xc2);
+        // cmp %ecx, %edx
+        emit_bytes(0x39, 0xca);
+        // cmovl %r8d, %eax
+        emit_bytes(0x41, 0x0f, 0x4c, 0xc0);
+
+        // SECOND:
+        // mov %eax, 8(%rsp)
+        emit_bytes(0x89, 0x44, 0x24, 0x08);
+
         // DONE:
-        fix_branch(done, code);
         // add $8, %rsp
         emit_bytes(0x48, 0x83, 0xc4, 0x08);
-        // movss %xmm0, (%rsp)
-        emit_bytes(0xf3, 0x0f, 0x11, 0x04, 0x24);
       }
       void emit_f32_max() {
          auto icount = softfloat_instr(47, 44);
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index c85376b3..95925664 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -80,6 +80,7 @@ target_include_directories( eos_vm_spec_tests PUBLIC ${CMAKE_CURRENT_BINARY_DIR}
 
 add_executable(unit_tests main.cpp
                           allocator_tests.cpp
+                          float_tests.cpp
                           eosio_max_nested_structures_tests.cpp
                           guarded_ptr_tests.cpp
                           varint_tests.cpp
diff --git a/tests/float_tests.cpp b/tests/float_tests.cpp
new file mode 100644
index 00000000..7879de45
--- /dev/null
+++ b/tests/float_tests.cpp
@@ -0,0 +1,121 @@
+#include <eosio/vm/backend.hpp>
+#include <random>
+
+#include <catch2/catch.hpp>
+#include "utils.hpp"
+
+using namespace eosio::vm;
+
+extern wasm_allocator wa;
+
+struct softfloat_config {
+   static constexpr bool use_softfloat = true;
+};
+
+struct hardfloat_config {
+   static constexpr bool use_softfloat = false;
+};
+
+/*
+ * (module
+ *  (func (export "fn") (param f64) (result i64)
+ *   (local.get 0)
+ *   (f32.demote_f64)
+ *   (i32.reinterpret_f32)
+ *   (i64.extend_i32_u)
+ *  )
+ * )
+ */
+std::vector<uint8_t> f32_demote_f64_wasm = {
+  0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x06, 0x01, 0x60,
+  0x01, 0x7c, 0x01, 0x7e, 0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x01, 0x02,
+  0x66, 0x6e, 0x00, 0x00, 0x0a, 0x09, 0x01, 0x07, 0x00, 0x20, 0x00, 0xb6,
+  0xbc, 0xad, 0x0b
+};
+
+struct multi_backend {
+   explicit multi_backend(std::vector<uint8_t>& code) :
+     soft_interpreter_backend(code),
+     soft_jit_backend(code),
+     hard_interpreter_backend(code),
+     hard_jit_backend(code)
+   {
+      soft_interpreter_backend.set_wasm_allocator(&wa);
+      hard_interpreter_backend.set_wasm_allocator(&wa);
+      soft_jit_backend.set_wasm_allocator(&wa);
+      hard_jit_backend.set_wasm_allocator(&wa);
+   }
+   backend<nullptr_t, interpreter, softfloat_config> soft_interpreter_backend;
+   backend<nullptr_t, jit, softfloat_config> soft_jit_backend;
+   backend<nullptr_t, interpreter, hardfloat_config> hard_interpreter_backend;
+   backend<nullptr_t, jit, hardfloat_config> hard_jit_backend;
+   template<typename... A>
+   std::tuple<uint64_t, uint64_t, uint64_t, uint64_t> call_with_return(A... a) {
+      auto x0 = soft_interpreter_backend.call_with_return(nullptr, "env", "fn", a...)->to_ui64();
+      auto x1 = soft_jit_backend.call_with_return(nullptr, "env", "fn", a...)->to_ui64();
+      auto x2 = hard_interpreter_backend.call_with_return(nullptr, "env", "fn", a...)->to_ui64();
+      auto x3 = hard_jit_backend.call_with_return(nullptr, "env", "fn", a...)->to_ui64();
+      return {x0, x1, x2, x3};
+   }
+};
+
+TEST_CASE("test f32.demote_f64", "[float_tests]") {
+   backend<nullptr_t, interpreter, softfloat_config> soft_interpreter_backend(f32_demote_f64_wasm);
+   backend<nullptr_t, jit, softfloat_config> soft_jit_backend(f32_demote_f64_wasm);
+   backend<nullptr_t, interpreter, hardfloat_config> hard_interpreter_backend(f32_demote_f64_wasm);
+   backend<nullptr_t, jit, hardfloat_config> hard_jit_backend(f32_demote_f64_wasm);
+   soft_interpreter_backend.set_wasm_allocator(&wa);
+   hard_interpreter_backend.set_wasm_allocator(&wa);
+   soft_jit_backend.set_wasm_allocator(&wa);
+   hard_jit_backend.set_wasm_allocator(&wa);
+
+   for(int i = 0; i < (1 << 16); ++i) {
+      for(int j = -1; j <= 1; ++j) {
+         uint64_t argn = (static_cast<uint64_t>(i) << 48) + static_cast<uint64_t>(j);
+         double arg = bit_cast<double>(argn);
+         auto x0 = soft_interpreter_backend.call_with_return(nullptr, "env", "fn", arg)->to_ui64();
+         auto x1 = soft_jit_backend.call_with_return(nullptr, "env", "fn", arg)->to_ui64();
+         auto x2 = hard_interpreter_backend.call_with_return(nullptr, "env", "fn", arg)->to_ui64();
+         auto x3 = hard_jit_backend.call_with_return(nullptr, "env", "fn", arg)->to_ui64();
+         CHECK(x0 == x1);
+         CHECK(x1 == x2);
+         CHECK(x2 == x3);
+      }
+   }
+}
+
+/*
+ * (module
+ *  (func (export "fn") (param f32 f32) (result i64)
+ *   (local.get 0)
+ *   (local.get 1)
+ *   (f32.min)
+ *   (i32.reinterpret_f32)
+ *   (i64.extend_i32_u)
+ *  )
+ * )
+ */
+std::vector<uint8_t> f32_min_wasm = {
+   0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x07, 0x01, 0x60,
+   0x02, 0x7d, 0x7d, 0x01, 0x7e, 0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x01,
+   0x02, 0x66, 0x6e, 0x00, 0x00, 0x0a, 0x0b, 0x01, 0x09, 0x00, 0x20, 0x00,
+   0x20, 0x01, 0x96, 0xbc, 0xad, 0x0b
+};
+
+TEST_CASE("test f32.min", "[float_tests]") {
+   multi_backend bkend{f32_min_wasm};
+   for(int i = 0; i < (1 << 11); ++i) {
+      for(int j = -1; j <= 1; ++j) {
+         for(int k = 0; k < (1 << 11); ++k) {
+            for(int l = -1; l <= 1; ++l) {
+               float arg1 = bit_cast<float>((static_cast<uint32_t>(i) << 21) + static_cast<uint32_t>(j));
+               float arg2 = bit_cast<float>((static_cast<uint32_t>(k) << 21) + static_cast<uint32_t>(l));
+               auto [x0, x1, x2, x3] = bkend.call_with_return(arg1, arg2);
+               CHECK(x0 == x1);
+               CHECK(x1 == x2);
+               CHECK(x2 == x3);
+            }
+         }
+      }
+   }
+}

From 96cddf4c17a66559c1e13134a907c9860ff1a823 Mon Sep 17 00:00:00 2001
From: Steven Watanabe <steven.watanabe@block.one>
Date: Mon, 6 Jan 2020 15:57:51 -0500
Subject: [PATCH 2/7] Fix hardware float min/max.

---
 include/eosio/vm/backend.hpp           |  21 +--
 include/eosio/vm/interpret_visitor.hpp |  41 ++++-
 include/eosio/vm/x86_64.hpp            | 203 ++++++++++++++++---------
 tests/float_tests.cpp                  | 106 +++++++++++++
 4 files changed, 283 insertions(+), 88 deletions(-)

diff --git a/include/eosio/vm/backend.hpp b/include/eosio/vm/backend.hpp
index 9328430e..71d8ab18 100644
--- a/include/eosio/vm/backend.hpp
+++ b/include/eosio/vm/backend.hpp
@@ -87,7 +87,7 @@ namespace eosio { namespace vm {
          else
             _walloc->reset();
          _ctx.reset();
-         _ctx.execute_start(host, interpret_visitor(_ctx));
+         _ctx.execute_start(host, visitor_type(_ctx));
          return *this;
       }
 
@@ -96,9 +96,9 @@ namespace eosio { namespace vm {
          try {
             if constexpr (eos_vm_debug) {
                //_ctx.execute_func_table(host, debug_visitor(_ctx), func_index, args...);
-               _ctx.execute_func_table(host, interpret_visitor(_ctx), func_index, args...);
+               _ctx.execute_func_table(host, visitor_type(_ctx), func_index, args...);
             } else {
-               _ctx.execute_func_table(host, interpret_visitor(_ctx), func_index, args...);
+               _ctx.execute_func_table(host, visitor_type(_ctx), func_index, args...);
             }
             return true;
          } catch (...) {
@@ -112,9 +112,9 @@ namespace eosio { namespace vm {
          try {
             if constexpr (eos_vm_debug) {
                //_ctx.execute(host, debug_visitor(_ctx), func_index, args...);
-               _ctx.execute(host, interpret_visitor(_ctx), func_index, args...);
+               _ctx.execute(host, visitor_type(_ctx), func_index, args...);
             } else {
-               _ctx.execute(host, interpret_visitor(_ctx), func_index, args...);
+               _ctx.execute(host, visitor_type(_ctx), func_index, args...);
             }
             return true;
          } catch (...) {
@@ -128,9 +128,9 @@ namespace eosio { namespace vm {
          try {
             if constexpr (eos_vm_debug) {
                //_ctx.execute(host, debug_visitor(_ctx), func, args...);
-               _ctx.execute(host, interpret_visitor(_ctx), func, args...);
+               _ctx.execute(host, visitor_type(_ctx), func, args...);
             } else {
-               _ctx.execute(host, interpret_visitor(_ctx), func, args...);
+               _ctx.execute(host, visitor_type(_ctx), func, args...);
             }
             return true;
          } catch (...) {
@@ -145,9 +145,9 @@ namespace eosio { namespace vm {
          try {
             if constexpr (eos_vm_debug) {
                //return _ctx.execute(host, debug_visitor(_ctx), func, args...);
-               return _ctx.execute(host, interpret_visitor(_ctx), func, args...);
+               return _ctx.execute(host, visitor_type(_ctx), func, args...);
             } else {
-               return _ctx.execute(host, interpret_visitor(_ctx), func, args...);
+               return _ctx.execute(host, visitor_type(_ctx), func, args...);
             }
          } catch (...) {
             initialize(host);
@@ -202,7 +202,7 @@ namespace eosio { namespace vm {
 	          if constexpr (eos_vm_debug) {
                      print_result(_ctx.execute(host, debug_visitor(_ctx), s));
 	          } else {
-	             _ctx.execute(host, interpret_visitor(_ctx), s);
+	             _ctx.execute(host, visitor_type(_ctx), s);
 	          }
                }
             }
@@ -236,6 +236,7 @@ namespace eosio { namespace vm {
       }
 
     private:
+      using visitor_type = interpret_visitor<typename Impl::template context<Host>, detail::get_use_softfloat<Options>()>;
       wasm_allocator*         _walloc = nullptr; // non owning pointer
       module                  _mod;
       typename Impl::template context<Host> _ctx;
diff --git a/include/eosio/vm/interpret_visitor.hpp b/include/eosio/vm/interpret_visitor.hpp
index accbbae0..775b7ee5 100644
--- a/include/eosio/vm/interpret_visitor.hpp
+++ b/include/eosio/vm/interpret_visitor.hpp
@@ -12,11 +12,12 @@
 #include <cstddef>
 #include <cstdint>
 #include <cstring>
+#include <cmath>
 #include <limits>
 
 namespace eosio { namespace vm {
 
-   template <typename ExecutionContext>
+   template <typename ExecutionContext, bool use_softfloat = true>
    struct interpret_visitor : base_visitor {
       using base_visitor::operator();
       interpret_visitor(ExecutionContext& ec) : context(ec) {}
@@ -869,8 +870,14 @@ namespace eosio { namespace vm {
          auto&       lhs = context.peek_operand().to_f32();
          if constexpr (use_softfloat)
             lhs = _eosio_f32_min(lhs, rhs.to_f32());
-         else
-            lhs = __builtin_fminf(lhs, rhs.to_f32());
+         else {
+            if(std::isnan(lhs)) {}
+            else if(std::isnan(rhs.to_f32())) lhs = rhs.to_f32();
+            else if(lhs == 0.0f && rhs.to_f32() == 0.0f && std::signbit(lhs) != std::signbit(rhs.to_f32()))
+               lhs = -0.0f;
+            else
+               lhs = __builtin_fminf(lhs, rhs.to_f32());
+         }
       }
       [[gnu::always_inline]] inline void operator()(const f32_max_t& op) {
          context.inc_pc();
@@ -878,8 +885,14 @@ namespace eosio { namespace vm {
          auto&       lhs = context.peek_operand().to_f32();
          if constexpr (use_softfloat)
             lhs = _eosio_f32_max(lhs, rhs.to_f32());
-         else
+         else {
+            if(std::isnan(lhs)) {}
+            else if(std::isnan(rhs.to_f32())) lhs = rhs.to_f32();
+            else if(lhs == 0.0f && rhs.to_f32() == 0.0f && std::signbit(lhs) != std::signbit(rhs.to_f32()))
+               lhs = 0.0f;
+            else
             lhs = __builtin_fmaxf(lhs, rhs.to_f32());
+         }
       }
       [[gnu::always_inline]] inline void operator()(const f32_copysign_t& op) {
          context.inc_pc();
@@ -989,8 +1002,14 @@ namespace eosio { namespace vm {
          auto&       lhs = context.peek_operand().to_f64();
          if constexpr (use_softfloat)
             lhs = _eosio_f64_min(lhs, rhs.to_f64());
-         else
-            lhs = __builtin_fmin(lhs, rhs.to_f64());
+         else {
+            if(std::isnan(lhs)) {}
+            else if(std::isnan(rhs.to_f64())) lhs = rhs.to_f64();
+            else if(lhs == 0.0 && rhs.to_f64() == 0.0 && std::signbit(lhs) != std::signbit(rhs.to_f64()))
+               lhs = -0.0;
+            else
+               lhs = __builtin_fmin(lhs, rhs.to_f64());
+         }
       }
       [[gnu::always_inline]] inline void operator()(const f64_max_t& op) {
          context.inc_pc();
@@ -998,8 +1017,14 @@ namespace eosio { namespace vm {
          auto&       lhs = context.peek_operand().to_f64();
          if constexpr (use_softfloat)
             lhs = _eosio_f64_max(lhs, rhs.to_f64());
-         else
-            lhs = __builtin_fmax(lhs, rhs.to_f64());
+         else {
+            if(std::isnan(lhs)) {}
+            else if(std::isnan(rhs.to_f64())) lhs = rhs.to_f64();
+            else if(lhs == 0.0 && rhs.to_f64() == 0.0 && std::signbit(lhs) != std::signbit(rhs.to_f64()))
+               lhs = 0.0;
+            else
+               lhs = __builtin_fmax(lhs, rhs.to_f64());
+         }
       }
       [[gnu::always_inline]] inline void operator()(const f64_copysign_t& op) {
          context.inc_pc();
diff --git a/include/eosio/vm/x86_64.hpp b/include/eosio/vm/x86_64.hpp
index 6b87c30c..6cd30f35 100644
--- a/include/eosio/vm/x86_64.hpp
+++ b/include/eosio/vm/x86_64.hpp
@@ -1468,18 +1468,18 @@ namespace eosio { namespace vm {
 
         // mov %eax, %ecx
         emit_bytes(0x89, 0xc1);
-        // sar $31, %ecx
-        emit_bytes(0xc1, 0xf9, 0x1f);
-        // shr $1, %ecx
-        emit_bytes(0xd1, 0xe9);
-        // xor %eax, %ecx
-        emit_bytes(0x31, 0xc1);
         // mov %r8d, %edx
         emit_bytes(0x44, 0x89, 0xc2);
+        // sar $31, %ecx
+        emit_bytes(0xc1, 0xf9, 0x1f);
         // sar $31, %edx
         emit_bytes(0xc1, 0xfa, 0x1f);
+        // shr $1, %ecx
+        emit_bytes(0xd1, 0xe9);
         // shr $1, %edx
         emit_bytes(0xd1, 0xea);
+        // xor %eax, %ecx
+        emit_bytes(0x31, 0xc1);
         // xor %r8d, %edx
         emit_bytes(0x44, 0x31, 0xc2);
         // cmp %ecx, %edx
@@ -1501,32 +1501,53 @@ namespace eosio { namespace vm {
            emit_f32_binop_softfloat(CHOOSE_FN(_eosio_f32_max));
            return;
         }
-        // mov (%rsp), %eax
-        emit_bytes(0x8b, 0x04, 0x24);
-        // test %eax, %eax
-        emit_bytes(0x85, 0xc0);
-        // je ZERO
-        emit_bytes(0x0f, 0x84);
-        void* zero = emit_branch_target32();
-        // movss (%rsp), %xmm0
-        emit_bytes(0xf3, 0x0f, 0x10, 0x04, 0x24);
-        // maxss 8(%rsp), %xmm0
-        emit_bytes(0xf3, 0x0f, 0x5f, 0x44, 0x24, 0x08);
-        // jmp DONE
-        emit_bytes(0xe9);
-        void* done = emit_branch_target32();
-        // ZERO:
-        fix_branch(zero, code);
+
         // movss 8(%rsp), %xmm0
         emit_bytes(0xf3, 0x0f, 0x10, 0x44, 0x24, 0x08);
-        // maxss (%rsp), %xmm0
-        emit_bytes(0xf3, 0x0f, 0x5f, 0x04, 0x24);
+        // ucomiss %xmm0, %xmm0
+        emit_bytes(0x0f, 0x2e, 0xc0);
+        // jp DONE
+        emit_bytes(0x7a, 0x30);
+        // movss (%rsp), %xmm0
+        emit_bytes(0xf3, 0x0f, 0x10, 0x04, 0x24);
+        // ucomiss %xmm0, %xmm0
+        emit_bytes(0x0f, 0x2e, 0xc0);
+
+        // mov (%rsp), %eax
+        emit_bytes(0x8b, 0x04, 0x24);
+        // jp SECOND
+        emit_bytes(0x7a, 0x1f);
+        // mov 8(%rsp), %r8d
+        emit_bytes(0x44, 0x8b, 0x44, 0x24, 0x08);
+
+        // mov %eax, %ecx
+        emit_bytes(0x89, 0xc1);
+        // mov %r8d, %edx
+        emit_bytes(0x44, 0x89, 0xc2);
+        // sar $31, %ecx
+        emit_bytes(0xc1, 0xf9, 0x1f);
+        // sar $31, %edx
+        emit_bytes(0xc1, 0xfa, 0x1f);
+        // shr $1, %ecx
+        emit_bytes(0xd1, 0xe9);
+        // shr $1, %edx
+        emit_bytes(0xd1, 0xea);
+        // xor %eax, %ecx
+        emit_bytes(0x31, 0xc1);
+        // xor %r8d, %edx
+        emit_bytes(0x44, 0x31, 0xc2);
+        // cmp %ecx, %edx
+        emit_bytes(0x39, 0xca);
+        // cmovg %r8d, %eax
+        emit_bytes(0x41, 0x0f, 0x4f, 0xc0);
+
+        // SECOND:
+        // mov %eax, 8(%rsp)
+        emit_bytes(0x89, 0x44, 0x24, 0x08);
+
         // DONE:
-        fix_branch(done, code);
         // add $8, %rsp
         emit_bytes(0x48, 0x83, 0xc4, 0x08);
-        // movss %xmm0, (%rsp)
-        emit_bytes(0xf3, 0x0f, 0x11, 0x04, 0x24);
       }
 
       void emit_f32_copysign() {
@@ -1649,70 +1670,112 @@ namespace eosio { namespace vm {
          emit_f64_binop(0x5e, CHOOSE_FN(_eosio_f64_div));
       }
       void emit_f64_min() {
-         auto icount = softfloat_instr(49, 47);
+         auto icount = softfloat_instr(73, 47);
          if(use_softfloat) {
             emit_f64_binop_softfloat(CHOOSE_FN(_eosio_f64_min));
             return;
          }
-         // mov (%rsp), %rax
-         emit_bytes(0x48, 0x8b, 0x04, 0x24);
-         // test %rax, %rax
-         emit_bytes(0x48, 0x85, 0xc0);
-         // je ZERO
-         emit_bytes(0x0f, 0x84);
-         void* zero = emit_branch_target32();
+
          // movsd 8(%rsp), %xmm0
          emit_bytes(0xf2, 0x0f, 0x10, 0x44, 0x24, 0x08);
-         // minsd (%rsp), %xmm0
-         emit_bytes(0xf2, 0x0f, 0x5d, 0x04, 0x24);
-         // jmp DONE
-         emit_bytes(0xe9);
-         void* done = emit_branch_target32();
-         // ZERO:
-         fix_branch(zero, code);
+         // ucomisd %xmm0, %xmm0
+         emit_bytes(0x66, 0x0f, 0x2e, 0xc0);
+         // jp DONE
+         emit_bytes(0x7a, 0x3a);
          // movsd (%rsp), %xmm0
-         emit_bytes(0xf2, 0x0f, 0x10, 0x04, 0x24);
-         // minsd 8(%rsp), %xmm0
-         emit_bytes(0xf2, 0x0f, 0x5d, 0x44, 0x24, 0x08);
+         emit_bytes(0xf2, 0x0f, 0x10, 0x0c, 0x24);
+         // ucomiss %xmm0, %xmm0
+         emit_bytes(0x66, 0x0f, 0x2e, 0xc9);
+
+         // mov (%rsp), %rax
+         emit_bytes(0x48, 0x8b, 0x04, 0x24);
+         // jp SECOND
+         emit_bytes(0x7a, 0x26);
+         // mov 8(%rsp), %r8
+         emit_bytes(0x4c, 0x8b, 0x44, 0x24, 0x08);
+
+         // mov %rax, %rcx
+         emit_bytes(0x48, 0x89, 0xc1);
+         // mov %r8, %edx
+         emit_bytes(0x48, 0x89, 0xc2);
+         // sar $63, %ecx
+         emit_bytes(0x48, 0xc1, 0xf9, 0x3f);
+         // sar $63, %edx
+         emit_bytes(0x48, 0xc1, 0xfa, 0x3f);
+         // shr $1, %ecx
+         emit_bytes(0x48, 0xd1, 0xe9);
+         // shr $1, %edx
+         emit_bytes(0x48, 0xd1, 0xea);
+         // xor %rax, %rcx
+         emit_bytes(0x48, 0x31, 0xc1);
+         // xor %r8, %rdx
+         emit_bytes(0x4c, 0x31, 0xc2);
+         // cmp %rcx, %rdx
+         emit_bytes(0x48, 0x39, 0xca);
+         // cmovl %r8, %rax
+         emit_bytes(0x49, 0x0f, 0x4c, 0xc0);
+
+         // SECOND:
+         // mov %rax, 8(%rsp)
+         emit_bytes(0x48, 0x89, 0x44, 0x24, 0x08);
+
          // DONE:
-         fix_branch(done, code);
          // add $8, %rsp
          emit_bytes(0x48, 0x83, 0xc4, 0x08);
-         // movsd %xmm0, (%rsp)
-         emit_bytes(0xf2, 0x0f, 0x11, 0x04, 0x24);
       }
       void emit_f64_max() {
-         auto icount = softfloat_instr(49, 47);
+         auto icount = softfloat_instr(73, 47);
          if(use_softfloat) {
             emit_f64_binop_softfloat(CHOOSE_FN(_eosio_f64_max));
             return;
          }
-         // mov (%rsp), %rax
-         emit_bytes(0x48, 0x8b, 0x04, 0x24);
-         // test %rax, %rax
-         emit_bytes(0x48, 0x85, 0xc0);
-         // je ZERO
-         emit_bytes(0x0f, 0x84);
-         void* zero = emit_branch_target32();
-         // maxsd (%rsp), %xmm0
-         emit_bytes(0xf2, 0x0f, 0x10, 0x04, 0x24);
-         // maxsd 8(%rsp), %xmm0
-         emit_bytes(0xf2, 0x0f, 0x5f, 0x44, 0x24, 0x08);
-         // jmp DONE
-         emit_bytes(0xe9);
-         void* done = emit_branch_target32();
-         // ZERO:
-         fix_branch(zero, code);
+
          // movsd 8(%rsp), %xmm0
          emit_bytes(0xf2, 0x0f, 0x10, 0x44, 0x24, 0x08);
-         // maxsd (%rsp), %xmm0
-         emit_bytes(0xf2, 0x0f, 0x5f, 0x04, 0x24);
+         // ucomisd %xmm0, %xmm0
+         emit_bytes(0x66, 0x0f, 0x2e, 0xc0);
+         // jp DONE
+         emit_bytes(0x7a, 0x3a);
+         // movsd (%rsp), %xmm0
+         emit_bytes(0xf2, 0x0f, 0x10, 0x0c, 0x24);
+         // ucomiss %xmm0, %xmm0
+         emit_bytes(0x66, 0x0f, 0x2e, 0xc9);
+
+         // mov (%rsp), %rax
+         emit_bytes(0x48, 0x8b, 0x04, 0x24);
+         // jp SECOND
+         emit_bytes(0x7a, 0x26);
+         // mov 8(%rsp), %r8
+         emit_bytes(0x4c, 0x8b, 0x44, 0x24, 0x08);
+
+         // mov %rax, %rcx
+         emit_bytes(0x48, 0x89, 0xc1);
+         // mov %r8, %edx
+         emit_bytes(0x48, 0x89, 0xc2);
+         // sar $63, %ecx
+         emit_bytes(0x48, 0xc1, 0xf9, 0x3f);
+         // sar $63, %edx
+         emit_bytes(0x48, 0xc1, 0xfa, 0x3f);
+         // shr $1, %ecx
+         emit_bytes(0x48, 0xd1, 0xe9);
+         // shr $1, %edx
+         emit_bytes(0x48, 0xd1, 0xea);
+         // xor %rax, %rcx
+         emit_bytes(0x48, 0x31, 0xc1);
+         // xor %r8, %rdx
+         emit_bytes(0x4c, 0x31, 0xc2);
+         // cmp %rcx, %rdx
+         emit_bytes(0x48, 0x39, 0xca);
+         // cmovg %r8, %rax
+         emit_bytes(0x49, 0x0f, 0x4f, 0xc0);
+
+         // SECOND:
+         // mov %rax, 8(%rsp)
+         emit_bytes(0x48, 0x89, 0x44, 0x24, 0x08);
+
          // DONE:
-         fix_branch(done, code);
          // add $8, %rsp
          emit_bytes(0x48, 0x83, 0xc4, 0x08);
-         // movsd %xmm0, (%rsp)
-         emit_bytes(0xf2, 0x0f, 0x11, 0x04, 0x24);
       }
 
       void emit_f64_copysign() {
diff --git a/tests/float_tests.cpp b/tests/float_tests.cpp
index 7879de45..4ea9340e 100644
--- a/tests/float_tests.cpp
+++ b/tests/float_tests.cpp
@@ -119,3 +119,109 @@ TEST_CASE("test f32.min", "[float_tests]") {
       }
    }
 }
+
+/*
+ * (module
+ *  (func (export "fn") (param f32 f32) (result i64)
+ *   (local.get 0)
+ *   (local.get 1)
+ *   (f32.max)
+ *   (i32.reinterpret_f32)
+ *   (i64.extend_i32_u)
+ *  )
+ * )
+ */
+std::vector<uint8_t> f32_max_wasm = {
+   0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x07, 0x01, 0x60,
+   0x02, 0x7d, 0x7d, 0x01, 0x7e, 0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x01,
+   0x02, 0x66, 0x6e, 0x00, 0x00, 0x0a, 0x0b, 0x01, 0x09, 0x00, 0x20, 0x00,
+   0x20, 0x01, 0x97, 0xbc, 0xad, 0x0b
+};
+
+TEST_CASE("test f32.max", "[float_tests]") {
+   multi_backend bkend{f32_max_wasm};
+   for(int i = 0; i < (1 << 11); ++i) {
+      for(int j = -1; j <= 1; ++j) {
+         for(int k = 0; k < (1 << 11); ++k) {
+            for(int l = -1; l <= 1; ++l) {
+               float arg1 = bit_cast<float>((static_cast<uint32_t>(i) << 21) + static_cast<uint32_t>(j));
+               float arg2 = bit_cast<float>((static_cast<uint32_t>(k) << 21) + static_cast<uint32_t>(l));
+               auto [x0, x1, x2, x3] = bkend.call_with_return(arg1, arg2);
+               CHECK(x0 == x1);
+               CHECK(x1 == x2);
+               CHECK(x2 == x3);
+            }
+         }
+      }
+   }
+}
+
+/*
+ * (module
+ *  (func (export "fn") (param f64 f64) (result i64)
+ *   (local.get 0)
+ *   (local.get 1)
+ *   (f64.min)
+ *   (i64.reinterpret_f64)
+ *  )
+ * )
+ */
+std::vector<uint8_t> f64_min_wasm = {
+   0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x07, 0x01, 0x60,
+   0x02, 0x7c, 0x7c, 0x01, 0x7e, 0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x01,
+   0x02, 0x66, 0x6e, 0x00, 0x00, 0x0a, 0x0a, 0x01, 0x08, 0x00, 0x20, 0x00,
+   0x20, 0x01, 0xa4, 0xbd, 0x0b
+};
+
+TEST_CASE("test f64.min", "[float_tests]") {
+   multi_backend bkend{f64_min_wasm};
+   for(int i = 0; i < (1 << 14); ++i) {
+      for(int j = -1; j <= 1; ++j) {
+         for(int k = 0; k < (1 << 14); ++k) {
+            for(int l = -1; l <= 1; ++l) {
+               double arg1 = bit_cast<double>((static_cast<uint64_t>(i) << 50) + static_cast<uint64_t>(j));
+               double arg2 = bit_cast<double>((static_cast<uint64_t>(k) << 50) + static_cast<uint64_t>(l));
+               auto [x0, x1, x2, x3] = bkend.call_with_return(arg1, arg2);
+               CHECK(x0 == x1);
+               CHECK(x1 == x2);
+               CHECK(x2 == x3);
+            }
+         }
+      }
+   }
+}
+
+/*
+ * (module
+ *  (func (export "fn") (param f64 f64) (result i64)
+ *   (local.get 0)
+ *   (local.get 1)
+ *   (f64.max)
+ *   (i64.reinterpret_f64)
+ *  )
+ * )
+ */
+std::vector<uint8_t> f64_max_wasm = {
+   0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x07, 0x01, 0x60,
+   0x02, 0x7c, 0x7c, 0x01, 0x7e, 0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x01,
+   0x02, 0x66, 0x6e, 0x00, 0x00, 0x0a, 0x0a, 0x01, 0x08, 0x00, 0x20, 0x00,
+   0x20, 0x01, 0xa5, 0xbd, 0x0b
+};
+
+TEST_CASE("test f64.max", "[float_tests]") {
+   multi_backend bkend{f64_max_wasm};
+   for(int i = 0; i < (1 << 14); ++i) {
+      for(int j = -1; j <= 1; ++j) {
+         for(int k = 0; k < (1 << 14); ++k) {
+            for(int l = -1; l <= 1; ++l) {
+               double arg1 = bit_cast<double>((static_cast<uint64_t>(i) << 50) + static_cast<uint64_t>(j));
+               double arg2 = bit_cast<double>((static_cast<uint64_t>(k) << 50) + static_cast<uint64_t>(l));
+               auto [x0, x1, x2, x3] = bkend.call_with_return(arg1, arg2);
+               CHECK(x0 == x1);
+               CHECK(x1 == x2);
+               CHECK(x2 == x3);
+            }
+         }
+      }
+   }
+}

From 34cbfd1802a0cbd2a49ee95eb240da5933bc5588 Mon Sep 17 00:00:00 2001
From: Steven Watanabe <steven.watanabe@block.one>
Date: Tue, 7 Jan 2020 12:14:15 -0500
Subject: [PATCH 3/7] Add test contract generator for NaN propagation. Disable
 slow float tests by default.

---
 include/eosio/vm/leb128.hpp |   6 +-
 tests/CMakeLists.txt        |   4 +
 tests/float_tests.cpp       |  57 +++++------
 tests/gen_float_test.cpp    | 196 ++++++++++++++++++++++++++++++++++++
 4 files changed, 228 insertions(+), 35 deletions(-)
 create mode 100644 tests/gen_float_test.cpp

diff --git a/include/eosio/vm/leb128.hpp b/include/eosio/vm/leb128.hpp
index 0de94d51..53da3182 100644
--- a/include/eosio/vm/leb128.hpp
+++ b/include/eosio/vm/leb128.hpp
@@ -91,7 +91,11 @@ namespace eosio { namespace vm {
             }
             std::cout << std::endl;
          }
-      
+
+         void to(std::streambuf* out) {
+            out->sputn(reinterpret_cast<char*>(&storage[0]), bytes_used);
+         }
+
       private:
          std::array<uint8_t, bytes_needed<N>()> storage;
          uint8_t bytes_used = bytes_needed<N>(); 
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 95925664..bfe18642 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -121,5 +121,9 @@ target_compile_definitions(unit_tests PUBLIC -DCATCH_CONFIG_NO_POSIX_SIGNALS)
 
 add_executable( spec_test_generator ${CMAKE_CURRENT_SOURCE_DIR}/spec_test_generator/spec_test_generator.cpp )
 
+add_executable( gen_float_test gen_float_test.cpp )
+target_include_directories( gen_float_test PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../include )
+target_link_libraries(gen_float_test eos-vm)
+
 catch_discover_tests( eos_vm_spec_tests )
 catch_discover_tests( unit_tests )
diff --git a/tests/float_tests.cpp b/tests/float_tests.cpp
index 4ea9340e..62b08ea4 100644
--- a/tests/float_tests.cpp
+++ b/tests/float_tests.cpp
@@ -16,23 +16,6 @@ struct hardfloat_config {
    static constexpr bool use_softfloat = false;
 };
 
-/*
- * (module
- *  (func (export "fn") (param f64) (result i64)
- *   (local.get 0)
- *   (f32.demote_f64)
- *   (i32.reinterpret_f32)
- *   (i64.extend_i32_u)
- *  )
- * )
- */
-std::vector<uint8_t> f32_demote_f64_wasm = {
-  0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x06, 0x01, 0x60,
-  0x01, 0x7c, 0x01, 0x7e, 0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x01, 0x02,
-  0x66, 0x6e, 0x00, 0x00, 0x0a, 0x09, 0x01, 0x07, 0x00, 0x20, 0x00, 0xb6,
-  0xbc, 0xad, 0x0b
-};
-
 struct multi_backend {
    explicit multi_backend(std::vector<uint8_t>& code) :
      soft_interpreter_backend(code),
@@ -59,24 +42,30 @@ struct multi_backend {
    }
 };
 
-TEST_CASE("test f32.demote_f64", "[float_tests]") {
-   backend<nullptr_t, interpreter, softfloat_config> soft_interpreter_backend(f32_demote_f64_wasm);
-   backend<nullptr_t, jit, softfloat_config> soft_jit_backend(f32_demote_f64_wasm);
-   backend<nullptr_t, interpreter, hardfloat_config> hard_interpreter_backend(f32_demote_f64_wasm);
-   backend<nullptr_t, jit, hardfloat_config> hard_jit_backend(f32_demote_f64_wasm);
-   soft_interpreter_backend.set_wasm_allocator(&wa);
-   hard_interpreter_backend.set_wasm_allocator(&wa);
-   soft_jit_backend.set_wasm_allocator(&wa);
-   hard_jit_backend.set_wasm_allocator(&wa);
+/*
+ * (module
+ *  (func (export "fn") (param f64) (result i64)
+ *   (local.get 0)
+ *   (f32.demote_f64)
+ *   (i32.reinterpret_f32)
+ *   (i64.extend_i32_u)
+ *  )
+ * )
+ */
+std::vector<uint8_t> f32_demote_f64_wasm = {
+  0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x06, 0x01, 0x60,
+  0x01, 0x7c, 0x01, 0x7e, 0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x01, 0x02,
+  0x66, 0x6e, 0x00, 0x00, 0x0a, 0x09, 0x01, 0x07, 0x00, 0x20, 0x00, 0xb6,
+  0xbc, 0xad, 0x0b
+};
 
+TEST_CASE("test f32.demote_f64", "[.float_tests]") {
+   multi_backend bkend{f32_demote_f64_wasm};
    for(int i = 0; i < (1 << 16); ++i) {
       for(int j = -1; j <= 1; ++j) {
          uint64_t argn = (static_cast<uint64_t>(i) << 48) + static_cast<uint64_t>(j);
          double arg = bit_cast<double>(argn);
-         auto x0 = soft_interpreter_backend.call_with_return(nullptr, "env", "fn", arg)->to_ui64();
-         auto x1 = soft_jit_backend.call_with_return(nullptr, "env", "fn", arg)->to_ui64();
-         auto x2 = hard_interpreter_backend.call_with_return(nullptr, "env", "fn", arg)->to_ui64();
-         auto x3 = hard_jit_backend.call_with_return(nullptr, "env", "fn", arg)->to_ui64();
+         auto [x0, x1, x2, x3] = bkend.call_with_return(arg);
          CHECK(x0 == x1);
          CHECK(x1 == x2);
          CHECK(x2 == x3);
@@ -102,7 +91,7 @@ std::vector<uint8_t> f32_min_wasm = {
    0x20, 0x01, 0x96, 0xbc, 0xad, 0x0b
 };
 
-TEST_CASE("test f32.min", "[float_tests]") {
+TEST_CASE("test f32.min", "[.float_tests]") {
    multi_backend bkend{f32_min_wasm};
    for(int i = 0; i < (1 << 11); ++i) {
       for(int j = -1; j <= 1; ++j) {
@@ -138,7 +127,7 @@ std::vector<uint8_t> f32_max_wasm = {
    0x20, 0x01, 0x97, 0xbc, 0xad, 0x0b
 };
 
-TEST_CASE("test f32.max", "[float_tests]") {
+TEST_CASE("test f32.max", "[.float_tests]") {
    multi_backend bkend{f32_max_wasm};
    for(int i = 0; i < (1 << 11); ++i) {
       for(int j = -1; j <= 1; ++j) {
@@ -173,7 +162,7 @@ std::vector<uint8_t> f64_min_wasm = {
    0x20, 0x01, 0xa4, 0xbd, 0x0b
 };
 
-TEST_CASE("test f64.min", "[float_tests]") {
+TEST_CASE("test f64.min", "[.float_tests]") {
    multi_backend bkend{f64_min_wasm};
    for(int i = 0; i < (1 << 14); ++i) {
       for(int j = -1; j <= 1; ++j) {
@@ -208,7 +197,7 @@ std::vector<uint8_t> f64_max_wasm = {
    0x20, 0x01, 0xa5, 0xbd, 0x0b
 };
 
-TEST_CASE("test f64.max", "[float_tests]") {
+TEST_CASE("test f64.max", "[.float_tests]") {
    multi_backend bkend{f64_max_wasm};
    for(int i = 0; i < (1 << 14); ++i) {
       for(int j = -1; j <= 1; ++j) {
diff --git a/tests/gen_float_test.cpp b/tests/gen_float_test.cpp
new file mode 100644
index 00000000..8ef945e3
--- /dev/null
+++ b/tests/gen_float_test.cpp
@@ -0,0 +1,196 @@
+/*
+ * This program generates contracts that test NaN propagation in floating point instructions.
+ *
+ * It uses the eos-vm interpreter to generate the reference values.
+ *
+ * On failure, the contract prints the arguments that caused the failure
+ * as a hex blob.
+ */
+
+#include <iostream>
+#include <vector>
+#include <eosio/vm/backend.hpp>
+#include "utils.hpp"
+
+using namespace eosio::vm;
+
+std::vector<uint64_t> f64_values = {
+   0x0000'0000'0000'0000, // 0
+   0x7FF0'0000'0000'0000, // inf
+   0x7FF0'0000'0000'0001, // SNaN
+   0x7FF8'0000'0000'0000, // QNaN
+   0x7FF4'0000'0000'0000, // SNaN
+   0x7FFC'0000'0000'0000, // QNaN
+   0x0000'0000'0000'0001, // denorm min
+   0x000F'FFFF'FFFF'FFFF, // largest denormal
+   0x7FEF'FFFF'FFFF'FFFF, // max
+};
+
+uint64_t calc_f64_unop_result(uint64_t x, unsigned char op) {
+   std::vector<uint8_t> f64_unop_wasm = {
+      0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x06, 0x01, 0x60,
+      0x01, 0x7c, 0x01, 0x7e, 0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x01, 0x02,
+      0x66, 0x6e, 0x00, 0x00, 0x0a, 0x08, 0x01, 0x06, 0x00, 0x20, 0x00, op,
+      0xbd, 0x0b
+   };
+   backend<nullptr_t> bkend{f64_unop_wasm};
+   return bkend.call_with_return(nullptr, "env", "fn", bit_cast<double>(x))->to_ui64();
+}
+
+std::vector<unsigned char> f64_unop_prefix1{
+   0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x14, 0x04, 0x60,
+   0x02, 0x7f, 0x7f, 0x00, 0x60, 0x00, 0x00, 0x60, 0x02, 0x7c, 0x7c, 0x00,
+   0x60, 0x03, 0x7e, 0x7e, 0x7e, 0x00, 0x02, 0x1c, 0x02, 0x03, 0x65, 0x6e,
+   0x76, 0x08, 0x70, 0x72, 0x69, 0x6e, 0x74, 0x68, 0x65, 0x78, 0x00, 0x00,
+   0x03, 0x65, 0x6e, 0x76, 0x05, 0x61, 0x62, 0x6f, 0x72, 0x74, 0x00, 0x01,
+   0x03, 0x03, 0x02, 0x02, 0x03, 0x05, 0x03, 0x01, 0x00, 0x01, 0x07, 0x09,
+   0x01, 0x05, 0x61, 0x70, 0x70, 0x6c, 0x79, 0x00, 0x03, 0x0a
+};
+std::vector<unsigned char> f64_unop_prefix2{
+   /*0x42,*/ 0x02,
+   0x29, 0x00, 0x20, 0x00
+};
+std::vector<unsigned char> f64_unop_prefix3{
+   /*0x9f*/ 0xbd, 0x20, 0x01, 0xbd, 0x52, 0x04, 0x40,
+   0x41, 0x00, 0x20, 0x00, 0x39, 0x03, 0x00, 0x41, 0x00, 0x41, 0x08, 0x10,
+   0x00, 0x41, 0x00, 0x20, 0x01, 0x39, 0x03, 0x00, 0x41, 0x00, 0x41, 0x08,
+   0x10, 0x00, 0x10, 0x01, 0x0b, 0x0b
+};
+
+// 20 bytes
+void write_one_f64_unop_test(std::streambuf* out, uint64_t arg0, uint64_t result) {
+   out->sputc(0x44);
+   out->sputn(reinterpret_cast<char*>(&arg0), sizeof(arg0));
+   out->sputc(0x44);
+   out->sputn(reinterpret_cast<char*>(&result), sizeof(result));
+   out->sputc(0x10);
+   out->sputc(0x02);
+}
+
+void write_f64_unop_prefix(std::streambuf* out, std::size_t tests_size, unsigned char op) {
+   varuint<32> fn_size(static_cast<uint32_t>(tests_size + 2));
+   varuint<32> code_size(static_cast<uint32_t>(tests_size + fn_size.size() + 0x42 - 20 - 1));
+   out->sputn((char*)f64_unop_prefix1.data(), f64_unop_prefix1.size());
+   code_size.to(out);
+   out->sputn((char*)f64_unop_prefix2.data(), f64_unop_prefix2.size());
+   out->sputc((char)op);
+   out->sputn((char*)f64_unop_prefix3.data(), f64_unop_prefix3.size());
+   fn_size.to(out);
+   out->sputc((char)0);
+}
+
+void make_f64_unop_tests(std::streambuf* out, unsigned char op) {
+   write_f64_unop_prefix(out, 20*2*f64_values.size(), op);
+   for(uint64_t signbitx : {0ull, (1ull << 63)}) {
+      for(uint64_t x : f64_values) {
+         x |= signbitx;
+         uint64_t r = calc_f64_unop_result(x, op);
+         write_one_f64_unop_test(out, x, r);
+      }
+   }
+   out->sputc((char)0x0b);
+}
+
+uint64_t calc_f64_binop_result(uint64_t x, uint64_t y, unsigned char op) {
+   std::vector<uint8_t> f64_binop_wasm = {
+      0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x07, 0x01, 0x60,
+      0x02, 0x7c, 0x7c, 0x01, 0x7e, 0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x01,
+      0x02, 0x66, 0x6e, 0x00, 0x00, 0x0a, 0x0a, 0x01, 0x08, 0x00, 0x20, 0x00,
+      0x20, 0x01, op, 0xbd, 0x0b
+   };
+   backend<nullptr_t> bkend{f64_binop_wasm};
+   return bkend.call_with_return(nullptr, "env", "fn", bit_cast<double>(x), bit_cast<double>(y))->to_ui64();
+}
+
+std::vector<unsigned char> f64_binop_prefix1{
+   0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x15, 0x04, 0x60,
+   0x02, 0x7f, 0x7f, 0x00, 0x60, 0x00, 0x00, 0x60, 0x03, 0x7c, 0x7c, 0x7c,
+   0x00, 0x60, 0x03, 0x7e, 0x7e, 0x7e, 0x00, 0x02, 0x1c, 0x02, 0x03, 0x65,
+   0x6e, 0x76, 0x08, 0x70, 0x72, 0x69, 0x6e, 0x74, 0x68, 0x65, 0x78, 0x00,
+   0x00, 0x03, 0x65, 0x6e, 0x76, 0x05, 0x61, 0x62, 0x6f, 0x72, 0x74, 0x00,
+   0x01, 0x03, 0x03, 0x02, 0x02, 0x03, 0x05, 0x03, 0x01, 0x00, 0x01, 0x07,
+   0x09, 0x01, 0x05, 0x61, 0x70, 0x70, 0x6c, 0x79, 0x00, 0x03, 0x0a
+};
+
+std::vector<unsigned char> f64_binop_prefix2{
+  0x02, 0x2b, 0x00, 0x20, 0x00, 0x20, 0x01
+};
+
+std::vector<unsigned char> f64_binop_prefix3{
+  0xbd, 0x20, 0x02, 0xbd,
+  0x52, 0x04, 0x40, 0x41, 0x00, 0x20, 0x00, 0x39, 0x03, 0x00, 0x41, 0x00,
+  0x41, 0x08, 0x10, 0x00, 0x41, 0x00, 0x20, 0x01, 0x39, 0x03, 0x00, 0x41,
+  0x00, 0x41, 0x08, 0x10, 0x00, 0x10, 0x01, 0x0b, 0x0b
+};
+
+// 29 bytes
+void write_one_f64_binop_test(std::streambuf* out, uint64_t arg0, uint64_t arg1, uint64_t result) {
+   out->sputc(0x44);
+   out->sputn(reinterpret_cast<char*>(&arg0), sizeof(arg0));
+   out->sputc(0x44);
+   out->sputn(reinterpret_cast<char*>(&arg1), sizeof(arg1));
+   out->sputc(0x44);
+   out->sputn(reinterpret_cast<char*>(&result), sizeof(result));
+   out->sputc(0x10);
+   out->sputc(0x02);
+}
+
+void write_f64_binop_prefix(std::streambuf* out, std::size_t tests_size, unsigned char op) {
+   varuint<32> fn_size(static_cast<uint32_t>(tests_size + 2));
+   varuint<32> code_size(static_cast<uint32_t>(tests_size + fn_size.size() + 0x4d - 29 - 1));
+   out->sputn((char*)f64_binop_prefix1.data(), f64_binop_prefix1.size());
+   code_size.to(out);
+   out->sputn((char*)f64_binop_prefix2.data(), f64_binop_prefix2.size());
+   out->sputc((char)op);
+   out->sputn((char*)f64_binop_prefix3.data(), f64_binop_prefix3.size());
+   fn_size.to(out);
+   out->sputc((char)0);
+}
+
+void make_f64_binop_tests(std::streambuf* out, unsigned char op) {
+   write_f64_binop_prefix(out, 29*4*f64_values.size()*f64_values.size(), op);
+   for(uint64_t signbitx : {0ull, (1ull << 63)}) {
+      for(uint64_t x : f64_values) {
+         for(uint64_t signbity : {0ull, (1ull << 63)}) {
+            for(uint64_t y : f64_values) {
+               x |= signbitx;
+               y |= signbity;
+               uint64_t r = calc_f64_binop_result(x, y, op);
+               write_one_f64_binop_test(out, x, y, r);
+            }
+         }
+      }
+   }
+   out->sputc((char)0x0b);
+}
+
+int main(int argc, const char** argv) {
+   if(argc != 2) {
+      std::cerr << "Usage: gen_float_test <instr name>\n";
+      return 2;
+   }
+   std::string instr{argv[1]};
+   if(instr == "f64.ceil") {
+      make_f64_unop_tests(std::cout.rdbuf(), 0x9b);
+   } else if(instr == "f64.floor") {
+      make_f64_unop_tests(std::cout.rdbuf(), 0x9c);
+   } else if(instr == "f64.trunc") {
+      make_f64_unop_tests(std::cout.rdbuf(), 0x9d);
+   } else if(instr == "f64.nearest") {
+      make_f64_unop_tests(std::cout.rdbuf(), 0x9e);
+   } else if(instr == "f64.sqrt") {
+      make_f64_unop_tests(std::cout.rdbuf(), 0x9f);
+   } else if(instr == "f64.add") {
+      make_f64_binop_tests(std::cout.rdbuf(), 0xa0);
+   } else if(instr == "f64.sub") {
+      make_f64_binop_tests(std::cout.rdbuf(), 0xa1);
+   } else if(instr == "f64.mul") {
+      make_f64_binop_tests(std::cout.rdbuf(), 0xa2);
+   } else if(instr == "f64.div") {
+      make_f64_binop_tests(std::cout.rdbuf(), 0xa3);
+   } else if(instr == "f64.min") {
+      make_f64_binop_tests(std::cout.rdbuf(), 0xa4);
+   } else if(instr == "f64.max") {
+      make_f64_binop_tests(std::cout.rdbuf(), 0xa5);
+   }
+}

From 3a1f98c48307e896eeb42479c4f5b7f956b28ce5 Mon Sep 17 00:00:00 2001
From: Steven Watanabe <steven.watanabe@block.one>
Date: Tue, 7 Jan 2020 15:05:28 -0500
Subject: [PATCH 4/7] Match eosio in rounding instructions.

---
 include/eosio/vm/interpret_visitor.hpp |  16 ++--
 include/eosio/vm/x86_64.hpp            |  94 +++++++++++++++------
 tests/float_tests.cpp                  | 108 ++++++++++++++++++++++++-
 3 files changed, 183 insertions(+), 35 deletions(-)

diff --git a/include/eosio/vm/interpret_visitor.hpp b/include/eosio/vm/interpret_visitor.hpp
index 775b7ee5..0dc144ae 100644
--- a/include/eosio/vm/interpret_visitor.hpp
+++ b/include/eosio/vm/interpret_visitor.hpp
@@ -793,7 +793,7 @@ namespace eosio { namespace vm {
          auto& oper = context.peek_operand().to_f32();
          if constexpr (use_softfloat)
             oper = _eosio_f32_ceil(oper);
-         else
+         else if(!std::isnan(oper))
             oper = __builtin_ceilf(oper);
       }
       [[gnu::always_inline]] inline void operator()(const f32_floor_t& op) {
@@ -801,7 +801,7 @@ namespace eosio { namespace vm {
          auto& oper = context.peek_operand().to_f32();
          if constexpr (use_softfloat)
             oper = _eosio_f32_floor(oper);
-         else
+         else if(!std::isnan(oper))
             oper = __builtin_floorf(oper);
       }
       [[gnu::always_inline]] inline void operator()(const f32_trunc_t& op) {
@@ -809,7 +809,7 @@ namespace eosio { namespace vm {
          auto& oper = context.peek_operand().to_f32();
          if constexpr (use_softfloat)
             oper = _eosio_f32_trunc(oper);
-         else
+         else if(!std::isnan(oper))
             oper = __builtin_trunc(oper);
       }
       [[gnu::always_inline]] inline void operator()(const f32_nearest_t& op) {
@@ -817,7 +817,7 @@ namespace eosio { namespace vm {
          auto& oper = context.peek_operand().to_f32();
          if constexpr (use_softfloat)
             oper = _eosio_f32_nearest(oper);
-         else
+         else if(!std::isnan(oper))
             oper = __builtin_nearbyintf(oper);
       }
       [[gnu::always_inline]] inline void operator()(const f32_sqrt_t& op) {
@@ -925,7 +925,7 @@ namespace eosio { namespace vm {
          auto& oper = context.peek_operand().to_f64();
          if constexpr (use_softfloat)
             oper = _eosio_f64_ceil(oper);
-         else
+         else if(!std::isnan(oper))
             oper = __builtin_ceil(oper);
       }
       [[gnu::always_inline]] inline void operator()(const f64_floor_t& op) {
@@ -933,7 +933,7 @@ namespace eosio { namespace vm {
          auto& oper = context.peek_operand().to_f64();
          if constexpr (use_softfloat)
             oper = _eosio_f64_floor(oper);
-         else
+         else if(!std::isnan(oper))
             oper = __builtin_floor(oper);
       }
       [[gnu::always_inline]] inline void operator()(const f64_trunc_t& op) {
@@ -941,7 +941,7 @@ namespace eosio { namespace vm {
          auto& oper = context.peek_operand().to_f64();
          if constexpr (use_softfloat)
             oper = _eosio_f64_trunc(oper);
-         else
+         else if(!std::isnan(oper))
             oper = __builtin_trunc(oper);
       }
       [[gnu::always_inline]] inline void operator()(const f64_nearest_t& op) {
@@ -949,7 +949,7 @@ namespace eosio { namespace vm {
          auto& oper = context.peek_operand().to_f64();
          if constexpr (use_softfloat)
             oper = _eosio_f64_nearest(oper);
-         else
+         else if(!std::isnan(oper))
             oper = __builtin_nearbyint(oper);
       }
       [[gnu::always_inline]] inline void operator()(const f64_sqrt_t& op) {
diff --git a/include/eosio/vm/x86_64.hpp b/include/eosio/vm/x86_64.hpp
index 6cd30f35..3732ea13 100644
--- a/include/eosio/vm/x86_64.hpp
+++ b/include/eosio/vm/x86_64.hpp
@@ -1373,8 +1373,14 @@ namespace eosio { namespace vm {
          if constexpr (use_softfloat) {
             return emit_softfloat_unop(CHOOSE_FN(_eosio_f32_ceil));
          }
-         // roundss 0b1010, (%rsp), %xmm0
-         emit_bytes(0x66, 0x0f, 0x3a, 0x0a, 0x04, 0x24, 0x0a);
+         // movss (%rsp), %xmm0
+         emit_bytes(0xf3, 0x0f, 0x10, 0x04, 0x24);
+         // ucomiss %xmm0, %xmm0
+         emit_bytes(0x0f, 0x2e, 0xc0);
+         // jp LDONE
+         emit_bytes(0x7a, 0x0b);
+         // roundss 0b1010, %xmm0, %xmm0
+         emit_bytes(0x66, 0x0f, 0x3a, 0x0a, 0xc0, 0x0a);
          // movss %xmm0, (%rsp)
          emit_bytes(0xf3, 0x0f, 0x11, 0x04, 0x24);
       }
@@ -1384,8 +1390,14 @@ namespace eosio { namespace vm {
          if constexpr (use_softfloat) {
             return emit_softfloat_unop(CHOOSE_FN(_eosio_f32_floor));
          }
-         // roundss 0b1001, (%rsp), %xmm0
-         emit_bytes(0x66, 0x0f, 0x3a, 0x0a, 0x04, 0x24, 0x09);
+         // movss (%rsp), %xmm0
+         emit_bytes(0xf3, 0x0f, 0x10, 0x04, 0x24);
+         // ucomiss %xmm0, %xmm0
+         emit_bytes(0x0f, 0x2e, 0xc0);
+         // jp LDONE
+         emit_bytes(0x7a, 0x0b);
+         // roundss 0b1001, %xmm0, %xmm0
+         emit_bytes(0x66, 0x0f, 0x3a, 0x0a, 0xc0, 0x09);
          // movss %xmm0, (%rsp)
          emit_bytes(0xf3, 0x0f, 0x11, 0x04, 0x24);
       }
@@ -1395,8 +1407,14 @@ namespace eosio { namespace vm {
          if constexpr (use_softfloat) {
             return emit_softfloat_unop(CHOOSE_FN(_eosio_f32_trunc));
          }
-         // roundss 0b1011, (%rsp), %xmm0
-         emit_bytes(0x66, 0x0f, 0x3a, 0x0a, 0x04, 0x24, 0x0b);
+         // movss (%rsp), %xmm0
+         emit_bytes(0xf3, 0x0f, 0x10, 0x04, 0x24);
+         // ucomiss %xmm0, %xmm0
+         emit_bytes(0x0f, 0x2e, 0xc0);
+         // jp LDONE
+         emit_bytes(0x7a, 0x0b);
+         // roundss 0b1011, %xmm0, %xmm0
+         emit_bytes(0x66, 0x0f, 0x3a, 0x0a, 0xc0, 0x0b);
          // movss %xmm0, (%rsp)
          emit_bytes(0xf3, 0x0f, 0x11, 0x04, 0x24);
       }
@@ -1406,8 +1424,14 @@ namespace eosio { namespace vm {
          if constexpr (use_softfloat) {
             return emit_softfloat_unop(CHOOSE_FN(_eosio_f32_nearest));
          }
-         // roundss 0b1000, (%rsp), %xmm0
-         emit_bytes(0x66, 0x0f, 0x3a, 0x0a, 0x04, 0x24, 0x08);
+         // movss (%rsp), %xmm0
+         emit_bytes(0xf3, 0x0f, 0x10, 0x04, 0x24);
+         // ucomiss %xmm0, %xmm0
+         emit_bytes(0x0f, 0x2e, 0xc0);
+         // jp LDONE
+         emit_bytes(0x7a, 0x0b);
+         // roundss 0b1000, %xmm0, %xmm0
+         emit_bytes(0x66, 0x0f, 0x3a, 0x0a, 0xc0, 0x08);
          // movss %xmm0, (%rsp)
          emit_bytes(0xf3, 0x0f, 0x11, 0x04, 0x24);
       }
@@ -1597,46 +1621,70 @@ namespace eosio { namespace vm {
       }
 
       void emit_f64_ceil() {
-         auto icount = softfloat_instr(12, 38);
+         auto icount = softfloat_instr(22, 38);
          if constexpr (use_softfloat) {
             return emit_softfloat_unop(CHOOSE_FN(_eosio_f64_ceil));
          }
-         // roundsd 0b1010, (%rsp), %xmm0
-         emit_bytes(0x66, 0x0f, 0x3a, 0x0b, 0x04, 0x24, 0x0a);
+         // movsd (%rsp), %xmm0
+         emit_bytes(0xf2, 0x0f, 0x10, 0x04, 0x24);
+         // ucomisd %xmm0, %xmm0
+         emit_bytes(0x66, 0x0f, 0x2e, 0xc0);
+         // jp DONE
+         emit_bytes(0x7a, 0x0b);
+         // roundsd 0b1010, %xmm0, %xmm0
+         emit_bytes(0x66, 0x0f, 0x3a, 0x0b, 0xc0, 0x0a);
          // movsd %xmm0, (%rsp)
          emit_bytes(0xf2, 0x0f, 0x11, 0x04, 0x24);
       }
 
       void emit_f64_floor() {
-         auto icount = softfloat_instr(12, 38);
+         auto icount = softfloat_instr(22, 38);
          if constexpr (use_softfloat) {
             return emit_softfloat_unop(CHOOSE_FN(_eosio_f64_floor));
          }
-         // roundsd 0b1001, (%rsp), %xmm0
-         emit_bytes(0x66, 0x0f, 0x3a, 0x0b, 0x04, 0x24, 0x09);
-         // movss %xmm0, (%rsp)
+         // movsd (%rsp), %xmm0
+         emit_bytes(0xf2, 0x0f, 0x10, 0x04, 0x24);
+         // ucomisd %xmm0, %xmm0
+         emit_bytes(0x66, 0x0f, 0x2e, 0xc0);
+         // jp DONE
+         emit_bytes(0x7a, 0x0b);
+         // roundsd 0b1001, %xmm0, %xmm0
+         emit_bytes(0x66, 0x0f, 0x3a, 0x0b, 0xc0, 0x09);
+         // movsd %xmm0, (%rsp)
          emit_bytes(0xf2, 0x0f, 0x11, 0x04, 0x24);
       }
 
       void emit_f64_trunc() {
-         auto icount = softfloat_instr(12, 38);
+         auto icount = softfloat_instr(22, 38);
          if constexpr (use_softfloat) {
             return emit_softfloat_unop(CHOOSE_FN(_eosio_f64_trunc));
          }
-         // roundsd 0b1011, (%rsp), %xmm0
-         emit_bytes(0x66, 0x0f, 0x3a, 0x0b, 0x04, 0x24, 0x0b);
-         // movss %xmm0, (%rsp)
+         // movsd (%rsp), %xmm0
+         emit_bytes(0xf2, 0x0f, 0x10, 0x04, 0x24);
+         // ucomisd %xmm0, %xmm0
+         emit_bytes(0x66, 0x0f, 0x2e, 0xc0);
+         // jp DONE
+         emit_bytes(0x7a, 0x0b);
+         // roundsd 0b1011, %xmm0, %xmm0
+         emit_bytes(0x66, 0x0f, 0x3a, 0x0b, 0xc0, 0x0b);
+         // movsd %xmm0, (%rsp)
          emit_bytes(0xf2, 0x0f, 0x11, 0x04, 0x24);
       }
 
       void emit_f64_nearest() {
-         auto icount = softfloat_instr(12, 38);
+         auto icount = softfloat_instr(22, 38);
          if constexpr (use_softfloat) {
             return emit_softfloat_unop(CHOOSE_FN(_eosio_f64_nearest));
          }
-         // roundsd 0b1000, (%rsp), %xmm0
-         emit_bytes(0x66, 0x0f, 0x3a, 0x0b, 0x04, 0x24, 0x08);
-         // movss %xmm0, (%rsp)
+         // movsd (%rsp), %xmm0
+         emit_bytes(0xf2, 0x0f, 0x10, 0x04, 0x24);
+         // ucomisd %xmm0, %xmm0
+         emit_bytes(0x66, 0x0f, 0x2e, 0xc0);
+         // jp DONE
+         emit_bytes(0x7a, 0x0b);
+         // roundsd 0b1010, %xmm0, %xmm0
+         emit_bytes(0x66, 0x0f, 0x3a, 0x0b, 0xc0, 0x08);
+         // movsd %xmm0, (%rsp)
          emit_bytes(0xf2, 0x0f, 0x11, 0x04, 0x24);
       }
 
diff --git a/tests/float_tests.cpp b/tests/float_tests.cpp
index 62b08ea4..ceecd9b0 100644
--- a/tests/float_tests.cpp
+++ b/tests/float_tests.cpp
@@ -53,10 +53,10 @@ struct multi_backend {
  * )
  */
 std::vector<uint8_t> f32_demote_f64_wasm = {
-  0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x06, 0x01, 0x60,
-  0x01, 0x7c, 0x01, 0x7e, 0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x01, 0x02,
-  0x66, 0x6e, 0x00, 0x00, 0x0a, 0x09, 0x01, 0x07, 0x00, 0x20, 0x00, 0xb6,
-  0xbc, 0xad, 0x0b
+   0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x06, 0x01, 0x60,
+   0x01, 0x7c, 0x01, 0x7e, 0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x01, 0x02,
+   0x66, 0x6e, 0x00, 0x00, 0x0a, 0x09, 0x01, 0x07, 0x00, 0x20, 0x00, 0xb6,
+   0xbc, 0xad, 0x0b
 };
 
 TEST_CASE("test f32.demote_f64", "[.float_tests]") {
@@ -73,6 +73,106 @@ TEST_CASE("test f32.demote_f64", "[.float_tests]") {
    }
 }
 
+void test_f64_unop(uint8_t op) {
+   /*
+    * (module
+    *  (func (export "fn") (param f64) (result i64)
+    *   (local.get 0)
+    *   op
+    *   (i64.reinterpret_f64)
+    *  )
+    * )
+    */
+   std::vector<uint8_t> f64_unop_wasm = {
+      0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x06, 0x01, 0x60,
+      0x01, 0x7c, 0x01, 0x7e, 0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x01, 0x02,
+      0x66, 0x6e, 0x00, 0x00, 0x0a, 0x08, 0x01, 0x06, 0x00, 0x20, 0x00, op,
+      0xbd, 0x0b
+   };
+
+   multi_backend bkend{f64_unop_wasm};
+   for(int i = 0; i < (1 << 16); ++i) {
+      for(int j = -1; j <= 1; ++j) {
+         uint64_t argn = (static_cast<uint64_t>(i) << 48) + static_cast<uint64_t>(j);
+         double arg = bit_cast<double>(argn);
+         auto [x0, x1, x2, x3] = bkend.call_with_return(arg);
+         CHECK(x0 == x1);
+         CHECK(x1 == x2);
+         CHECK(x2 == x3);
+      }
+   }
+}
+
+TEST_CASE("test f64.ceil", "[.float_tests]") {
+   test_f64_unop(0x9b);
+}
+
+TEST_CASE("test f64.floor", "[.float_tests]") {
+   test_f64_unop(0x9c);
+}
+
+TEST_CASE("test f64.trunc", "[.float_tests]") {
+   test_f64_unop(0x9d);
+}
+
+TEST_CASE("test f64.nearest", "[.float_tests]") {
+   test_f64_unop(0x9e);
+}
+
+TEST_CASE("test f64.sqrt", "[.float_tests]") {
+   test_f64_unop(0x9f);
+}
+
+void test_f32_unop(uint8_t op) {
+   /*
+    * (module
+    *  (func (export "fn") (param f32) (result i64)
+    *   (local.get 0)
+    *   op
+    *   (i64.reinterpret_f32)
+    *  )
+    * )
+    */
+   std::vector<uint8_t> f32_unop_wasm = {
+      0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x06, 0x01, 0x60,
+      0x01, 0x7d, 0x01, 0x7e, 0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x01, 0x02,
+      0x66, 0x6e, 0x00, 0x00, 0x0a, 0x09, 0x01, 0x07, 0x00, 0x20, 0x00, op,
+      0xbc, 0xad, 0x0b
+   };
+
+   multi_backend bkend{f32_unop_wasm};
+   for(int i = 0; i < (1 << 16); ++i) {
+      for(int j = -1; j <= 1; ++j) {
+         uint32_t argn = (static_cast<uint32_t>(i) << 16) + static_cast<uint32_t>(j);
+         float arg = bit_cast<float>(argn);
+         auto [x0, x1, x2, x3] = bkend.call_with_return(arg);
+         CHECK(x0 == x1);
+         CHECK(x1 == x2);
+         CHECK(x2 == x3);
+      }
+   }
+}
+
+TEST_CASE("test f32.ceil", "[.float_tests]") {
+   test_f32_unop(0x8d);
+}
+
+TEST_CASE("test f32.floor", "[.float_tests]") {
+   test_f32_unop(0x8e);
+}
+
+TEST_CASE("test f32.trunc", "[.float_tests]") {
+   test_f32_unop(0x8f);
+}
+
+TEST_CASE("test f32.nearest", "[.float_tests]") {
+   test_f32_unop(0x90);
+}
+
+TEST_CASE("test f32.sqrt", "[.float_tests]") {
+   test_f32_unop(0x91);
+}
+
 /*
  * (module
  *  (func (export "fn") (param f32 f32) (result i64)

From f8a4ba3d10b0a8eca46080bdb348678af481947b Mon Sep 17 00:00:00 2001
From: Steven Watanabe <steven.watanabe@block.one>
Date: Tue, 7 Jan 2020 16:40:29 -0500
Subject: [PATCH 5/7] Add more tests for floating point ops.

---
 tests/float_tests.cpp | 212 ++++++++++++++++++++++--------------------
 1 file changed, 112 insertions(+), 100 deletions(-)

diff --git a/tests/float_tests.cpp b/tests/float_tests.cpp
index ceecd9b0..8b66e108 100644
--- a/tests/float_tests.cpp
+++ b/tests/float_tests.cpp
@@ -1,3 +1,8 @@
+/*
+ * Tests that all floating point variations produce identical behavior.
+ * These tests are very slow and are disabled by default.
+ */
+
 #include <eosio/vm/backend.hpp>
 #include <random>
 
@@ -73,6 +78,36 @@ TEST_CASE("test f32.demote_f64", "[.float_tests]") {
    }
 }
 
+/*
+ * (module
+ *  (func (export "fn") (param f32) (result i64)
+ *   (local.get 0)
+ *   (f64.promote_f32)
+ *   (i64.reinterpret_f64)
+ *  )
+ * )
+ */
+std::vector<uint8_t> f64_promote_f32_wasm = {
+   0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x06, 0x01, 0x60,
+   0x01, 0x7d, 0x01, 0x7e, 0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x01, 0x02,
+   0x66, 0x6e, 0x00, 0x00, 0x0a, 0x08, 0x01, 0x06, 0x00, 0x20, 0x00, 0xbb,
+   0xbd, 0x0b
+};
+
+TEST_CASE("test f64.promote_f32", "[.float_tests]") {
+   multi_backend bkend{f64_promote_f32_wasm};
+   for(int i = 0; i < (1 << 16); ++i) {
+      for(int j = -1; j <= 1; ++j) {
+         uint32_t argn = (static_cast<uint32_t>(i) << 16) + static_cast<uint32_t>(j);
+         float arg = bit_cast<float>(argn);
+         auto [x0, x1, x2, x3] = bkend.call_with_return(arg);
+         CHECK(x0 == x1);
+         CHECK(x1 == x2);
+         CHECK(x2 == x3);
+      }
+   }
+}
+
 void test_f64_unop(uint8_t op) {
    /*
     * (module
@@ -173,26 +208,26 @@ TEST_CASE("test f32.sqrt", "[.float_tests]") {
    test_f32_unop(0x91);
 }
 
-/*
- * (module
- *  (func (export "fn") (param f32 f32) (result i64)
- *   (local.get 0)
- *   (local.get 1)
- *   (f32.min)
- *   (i32.reinterpret_f32)
- *   (i64.extend_i32_u)
- *  )
- * )
- */
-std::vector<uint8_t> f32_min_wasm = {
-   0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x07, 0x01, 0x60,
-   0x02, 0x7d, 0x7d, 0x01, 0x7e, 0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x01,
-   0x02, 0x66, 0x6e, 0x00, 0x00, 0x0a, 0x0b, 0x01, 0x09, 0x00, 0x20, 0x00,
-   0x20, 0x01, 0x96, 0xbc, 0xad, 0x0b
-};
+void test_f32_binop(uint8_t op) {
+   /*
+    * (module
+    *  (func (export "fn") (param f32 f32) (result i64)
+    *   (local.get 0)
+    *   (local.get 1)
+    *   (op)
+    *   (i32.reinterpret_f32)
+    *   (i64.extend_i32_u)
+    *  )
+    * )
+    */
+   std::vector<uint8_t> f32_binop_wasm = {
+      0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x07, 0x01, 0x60,
+      0x02, 0x7d, 0x7d, 0x01, 0x7e, 0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x01,
+      0x02, 0x66, 0x6e, 0x00, 0x00, 0x0a, 0x0b, 0x01, 0x09, 0x00, 0x20, 0x00,
+      0x20, 0x01, op, 0xbc, 0xad, 0x0b
+   };
 
-TEST_CASE("test f32.min", "[.float_tests]") {
-   multi_backend bkend{f32_min_wasm};
+   multi_backend bkend{f32_binop_wasm};
    for(int i = 0; i < (1 << 11); ++i) {
       for(int j = -1; j <= 1; ++j) {
          for(int k = 0; k < (1 << 11); ++k) {
@@ -209,61 +244,49 @@ TEST_CASE("test f32.min", "[.float_tests]") {
    }
 }
 
-/*
- * (module
- *  (func (export "fn") (param f32 f32) (result i64)
- *   (local.get 0)
- *   (local.get 1)
- *   (f32.max)
- *   (i32.reinterpret_f32)
- *   (i64.extend_i32_u)
- *  )
- * )
- */
-std::vector<uint8_t> f32_max_wasm = {
-   0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x07, 0x01, 0x60,
-   0x02, 0x7d, 0x7d, 0x01, 0x7e, 0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x01,
-   0x02, 0x66, 0x6e, 0x00, 0x00, 0x0a, 0x0b, 0x01, 0x09, 0x00, 0x20, 0x00,
-   0x20, 0x01, 0x97, 0xbc, 0xad, 0x0b
-};
+TEST_CASE("test f32.add", "[.float_tests]") {
+   test_f32_binop(0x92);
+}
+
+TEST_CASE("test f32.sub", "[.float_tests]") {
+   test_f32_binop(0x93);
+}
+
+TEST_CASE("test f32.mul", "[.float_tests]") {
+   test_f32_binop(0x94);
+}
+
+TEST_CASE("test f32.div", "[.float_tests]") {
+   test_f32_binop(0x95);
+}
+
+TEST_CASE("test f32.min", "[.float_tests]") {
+   test_f32_binop(0x96);
+}
 
 TEST_CASE("test f32.max", "[.float_tests]") {
-   multi_backend bkend{f32_max_wasm};
-   for(int i = 0; i < (1 << 11); ++i) {
-      for(int j = -1; j <= 1; ++j) {
-         for(int k = 0; k < (1 << 11); ++k) {
-            for(int l = -1; l <= 1; ++l) {
-               float arg1 = bit_cast<float>((static_cast<uint32_t>(i) << 21) + static_cast<uint32_t>(j));
-               float arg2 = bit_cast<float>((static_cast<uint32_t>(k) << 21) + static_cast<uint32_t>(l));
-               auto [x0, x1, x2, x3] = bkend.call_with_return(arg1, arg2);
-               CHECK(x0 == x1);
-               CHECK(x1 == x2);
-               CHECK(x2 == x3);
-            }
-         }
-      }
-   }
+   test_f32_binop(0x97);
 }
 
-/*
- * (module
- *  (func (export "fn") (param f64 f64) (result i64)
- *   (local.get 0)
- *   (local.get 1)
- *   (f64.min)
- *   (i64.reinterpret_f64)
- *  )
- * )
- */
-std::vector<uint8_t> f64_min_wasm = {
-   0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x07, 0x01, 0x60,
-   0x02, 0x7c, 0x7c, 0x01, 0x7e, 0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x01,
-   0x02, 0x66, 0x6e, 0x00, 0x00, 0x0a, 0x0a, 0x01, 0x08, 0x00, 0x20, 0x00,
-   0x20, 0x01, 0xa4, 0xbd, 0x0b
-};
+void test_f64_binop(uint8_t op) {
+   /*
+    * (module
+    *  (func (export "fn") (param f64 f64) (result i64)
+    *   (local.get 0)
+    *   (local.get 1)
+    *   (op)
+    *   (i64.reinterpret_f64)
+    *  )
+    * )
+    */
+   std::vector<uint8_t> f64_binop_wasm = {
+      0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x07, 0x01, 0x60,
+      0x02, 0x7c, 0x7c, 0x01, 0x7e, 0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x01,
+      0x02, 0x66, 0x6e, 0x00, 0x00, 0x0a, 0x0a, 0x01, 0x08, 0x00, 0x20, 0x00,
+      0x20, 0x01, op, 0xbd, 0x0b
+   };
 
-TEST_CASE("test f64.min", "[.float_tests]") {
-   multi_backend bkend{f64_min_wasm};
+   multi_backend bkend{f64_binop_wasm};
    for(int i = 0; i < (1 << 14); ++i) {
       for(int j = -1; j <= 1; ++j) {
          for(int k = 0; k < (1 << 14); ++k) {
@@ -280,37 +303,26 @@ TEST_CASE("test f64.min", "[.float_tests]") {
    }
 }
 
-/*
- * (module
- *  (func (export "fn") (param f64 f64) (result i64)
- *   (local.get 0)
- *   (local.get 1)
- *   (f64.max)
- *   (i64.reinterpret_f64)
- *  )
- * )
- */
-std::vector<uint8_t> f64_max_wasm = {
-   0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x07, 0x01, 0x60,
-   0x02, 0x7c, 0x7c, 0x01, 0x7e, 0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x01,
-   0x02, 0x66, 0x6e, 0x00, 0x00, 0x0a, 0x0a, 0x01, 0x08, 0x00, 0x20, 0x00,
-   0x20, 0x01, 0xa5, 0xbd, 0x0b
-};
+TEST_CASE("test f64.add", "[.float_tests]") {
+   test_f64_binop(0xa0);
+}
+
+TEST_CASE("test f64.sub", "[.float_tests]") {
+   test_f64_binop(0xa1);
+}
+
+TEST_CASE("test f64.mul", "[.float_tests]") {
+   test_f64_binop(0xa2);
+}
+
+TEST_CASE("test f64.div", "[.float_tests]") {
+   test_f64_binop(0xa3);
+}
+
+TEST_CASE("test f64.min", "[.float_tests]") {
+   test_f64_binop(0xa4);
+}
 
 TEST_CASE("test f64.max", "[.float_tests]") {
-   multi_backend bkend{f64_max_wasm};
-   for(int i = 0; i < (1 << 14); ++i) {
-      for(int j = -1; j <= 1; ++j) {
-         for(int k = 0; k < (1 << 14); ++k) {
-            for(int l = -1; l <= 1; ++l) {
-               double arg1 = bit_cast<double>((static_cast<uint64_t>(i) << 50) + static_cast<uint64_t>(j));
-               double arg2 = bit_cast<double>((static_cast<uint64_t>(k) << 50) + static_cast<uint64_t>(l));
-               auto [x0, x1, x2, x3] = bkend.call_with_return(arg1, arg2);
-               CHECK(x0 == x1);
-               CHECK(x1 == x2);
-               CHECK(x2 == x3);
-            }
-         }
-      }
-   }
+   test_f64_binop(0xa5);
 }

From 62dc551febbadf1c022decdf7817bb2a4829cae4 Mon Sep 17 00:00:00 2001
From: Steven Watanabe <steven.watanabe@block.one>
Date: Fri, 6 Mar 2020 09:57:17 -0500
Subject: [PATCH 6/7] Add warning for interpreter+hardware float.

---
 include/eosio/vm/options.hpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/include/eosio/vm/options.hpp b/include/eosio/vm/options.hpp
index 5230ad96..4f58106d 100644
--- a/include/eosio/vm/options.hpp
+++ b/include/eosio/vm/options.hpp
@@ -58,6 +58,14 @@ struct options {
    bool allow_zero_blocktype = false;
    // Determines which components are counted towards max_function_local_bytes
    max_func_local_bytes_flags_t max_func_local_bytes_flags = max_func_local_bytes_flags_t::locals | max_func_local_bytes_flags_t::stack;
+
+   // Whether to use softfloat.
+   //
+   // @warning Hardware floating point is only strictly deterministic
+   // with jit.  When using hardware floating point with the interpreter,
+   // the behavior depends on the C++ implementation and differs from
+   // the behavior with softfloat.
+   static constexpr bool use_softfloat = true;
 };
 
 struct default_options {

From c30ca6296154b668dfec31d33f09f0b6efef436f Mon Sep 17 00:00:00 2001
From: Steven Watanabe <steven.watanabe@block.one>
Date: Fri, 6 Mar 2020 10:07:44 -0500
Subject: [PATCH 7/7] Disable hardware float interpreter determinism tests,
 since we don't expect them to pass.

---
 tests/float_tests.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/float_tests.cpp b/tests/float_tests.cpp
index 8b66e108..66032977 100644
--- a/tests/float_tests.cpp
+++ b/tests/float_tests.cpp
@@ -25,25 +25,25 @@ struct multi_backend {
    explicit multi_backend(std::vector<uint8_t>& code) :
      soft_interpreter_backend(code),
      soft_jit_backend(code),
-     hard_interpreter_backend(code),
+     //hard_interpreter_backend(code),
      hard_jit_backend(code)
    {
       soft_interpreter_backend.set_wasm_allocator(&wa);
-      hard_interpreter_backend.set_wasm_allocator(&wa);
+      //hard_interpreter_backend.set_wasm_allocator(&wa);
       soft_jit_backend.set_wasm_allocator(&wa);
       hard_jit_backend.set_wasm_allocator(&wa);
    }
    backend<nullptr_t, interpreter, softfloat_config> soft_interpreter_backend;
    backend<nullptr_t, jit, softfloat_config> soft_jit_backend;
-   backend<nullptr_t, interpreter, hardfloat_config> hard_interpreter_backend;
+   //backend<nullptr_t, interpreter, hardfloat_config> hard_interpreter_backend;
    backend<nullptr_t, jit, hardfloat_config> hard_jit_backend;
    template<typename... A>
    std::tuple<uint64_t, uint64_t, uint64_t, uint64_t> call_with_return(A... a) {
       auto x0 = soft_interpreter_backend.call_with_return(nullptr, "env", "fn", a...)->to_ui64();
       auto x1 = soft_jit_backend.call_with_return(nullptr, "env", "fn", a...)->to_ui64();
-      auto x2 = hard_interpreter_backend.call_with_return(nullptr, "env", "fn", a...)->to_ui64();
+      // auto x2 = hard_interpreter_backend.call_with_return(nullptr, "env", "fn", a...)->to_ui64();
       auto x3 = hard_jit_backend.call_with_return(nullptr, "env", "fn", a...)->to_ui64();
-      return {x0, x1, x2, x3};
+      return {x0, x1, /*x2,*/x3, x3};
    }
 };