Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 32 additions & 2 deletions dataconv/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ import (
"bytes"
"context"
"encoding/json"
"fmt"
"math"
"math/big"
"reflect"
"strings"
"time"
Expand Down Expand Up @@ -76,10 +78,19 @@ func MarshalStarlarkJSON(data starlark.Value, indent int) (string, error) {
// In comparison with DecodeStarlarkJSON, it gives you more control over type conversion but may be less efficient due to intermediate steps.
func UnmarshalStarlarkJSON(data []byte) (starlark.Value, error) {
var m interface{}
err := json.Unmarshal(data, &m)
if err != nil {
// decode with UseNumber so integers keep their exact value: a plain
// json.Unmarshal turns every number into a float64, which silently
// saturates/rounds integers beyond 2^53 before TypeConvert ever runs.
dec := json.NewDecoder(bytes.NewReader(data))
dec.UseNumber()
if err := dec.Decode(&m); err != nil {
return starlark.None, err
}
// json.Unmarshal rejected trailing content; the decoder does not, so
// re-impose single-document strictness (trailing whitespace still passes).
if dec.More() {
return starlark.None, fmt.Errorf("unexpected trailing data after JSON value")
}

// fix all values to their appropriate types
f := TypeConvert(m)
Expand Down Expand Up @@ -179,6 +190,25 @@ func TypeConvert(data interface{}) interface{} {
// If not a time or number, return the original string
return v

case json.Number:
// UnmarshalStarlarkJSON now decodes with UseNumber, so numbers arrive
// here exact. Map by literal form, the same rule Marshal/json.decode
// use: an integer literal becomes an int (arbitrary precision, no
// float64 saturation), anything with a decimal point or exponent
// becomes a float.
if !strings.ContainsAny(v.String(), ".eE") {
if i, err := v.Int64(); err == nil {
return i
}
if bi, ok := new(big.Int).SetString(v.String(), 10); ok {
return bi
}
}
if f, err := v.Float64(); err == nil {
return f
}
return v

case float64:
// Check for exact int match
if math.Floor(v) == v {
Expand Down
60 changes: 60 additions & 0 deletions dataconv/helper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package dataconv
import (
"context"
"fmt"
"math/big"
"reflect"
"testing"
"time"
Expand Down Expand Up @@ -338,6 +339,11 @@ func TestUnmarshalStarlarkJSON(t *testing.T) {
input: []byte(`{"foo":42}`),
want: d42,
},
{
name: "trailing data rejected",
input: []byte(`{"foo":42} {"bar":1}`),
wantErr: true,
},
{
name: "list",
input: []byte(`[43,"foo"]`),
Expand Down Expand Up @@ -374,6 +380,60 @@ func TestUnmarshalStarlarkJSON(t *testing.T) {
}
}

// TestUnmarshalStarlarkJSONNumberFidelity pins the number handling of
// UnmarshalStarlarkJSON: integers survive exactly at any magnitude (decoding
// uses UseNumber instead of collapsing through float64, which silently
// saturated large ints), and int vs float is decided by literal form. Values
// are compared by starlark.Equal because big-int-bearing starlark.Int does
// not reflect.DeepEqual reliably.
func TestUnmarshalStarlarkJSONNumberFidelity(t *testing.T) {
bigID, _ := new(big.Int).SetString("12345678901234567890", 10)
equalsWant := func(t *testing.T, got, want starlark.Value, ctx string) {
t.Helper()
eq, err := starlark.Equal(got, want)
if err != nil {
t.Fatalf("%s: Equal: %v", ctx, err)
}
if !eq {
t.Fatalf("%s = %s (%s), want %s", ctx, got, got.Type(), want)
}
}

for _, c := range []struct {
name, input string
want starlark.Value
}{
{"small int", `6`, starlark.MakeInt(6)},
{"big int exact", `12345678901234567890`, starlark.MakeBigInt(bigID)},
{"fractional is float", `6.5`, starlark.Float(6.5)},
{"exponent is float", `1e3`, starlark.Float(1000)},
} {
t.Run(c.name, func(t *testing.T) {
got, err := UnmarshalStarlarkJSON([]byte(c.input))
if err != nil {
t.Fatalf("UnmarshalStarlarkJSON(%q): %v", c.input, err)
}
equalsWant(t, got, c.want, c.name)
})
}

// the same exactness must hold for a number nested in a dict — this is
// the lib/http request-body path.
got, err := UnmarshalStarlarkJSON([]byte(`{"id":12345678901234567890}`))
if err != nil {
t.Fatal(err)
}
d, ok := got.(*starlark.Dict)
if !ok {
t.Fatalf("want dict, got %s", got.Type())
}
v, found, _ := d.Get(starlark.String("id"))
if !found {
t.Fatal("missing id key")
}
equalsWant(t, v, starlark.MakeBigInt(bigID), "dict id")
}

// TestEncodeStarlarkJSON tests the EncodeStarlarkJSON function
func TestEncodeStarlarkJSON(t *testing.T) {
now := time.Now()
Expand Down
4 changes: 3 additions & 1 deletion dataconv/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
// - Starlark value -> JSON text and back, via the Go shapes above:
// MarshalStarlarkJSON / UnmarshalStarlarkJSON. The decode direction
// applies TypeConvert heuristics (RFC3339-looking strings become time
// values, whole floats become ints).
// values) and maps numbers by literal form — an integer literal becomes
// an int (exact, arbitrary precision), a number with a decimal point or
// exponent becomes a float.
// - Starlark value -> JSON text and back, staying inside Starlark types:
// EncodeStarlarkJSON / DecodeStarlarkJSON (the interpreter's own json
// encoder: big ints work, bytes/time are errors, no heuristics).
Expand Down
38 changes: 38 additions & 0 deletions dataconv/marshal.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@ package dataconv
// Based on https://github.com/qri-io/starlib/tree/master/util with some modifications and additions

import (
"encoding/json"
"errors"
"fmt"
"math/big"
"strings"
"time"

"github.com/1set/starlight/convert"
Expand All @@ -24,6 +26,20 @@ func Marshal(data interface{}) (v starlark.Value, err error) {
v = starlark.Bool(x)
case string:
v = starlark.String(x)
case json.Number:
// a JSON number stays a number: Int for an integer literal (exact,
// arbitrary precision) and Float otherwise — matching json.decode and
// serial. A caller that decodes raw JSON into map[string]interface{}
// with dec.UseNumber() reaches this; it is the int-vs-float-preserving
// path that a plain json.Unmarshal (which collapses every number to
// float64, losing the int/float distinction) cannot offer.
v, err = marshalJSONNumber(x)
case *big.Int:
// the inverse of Unmarshal, which returns *big.Int for integers
// beyond uint64, so a marshal/unmarshal round-trip stays exact.
v = starlark.MakeBigInt(x)
case big.Int:
v = starlark.MakeBigInt(&x)
case int:
v = starlark.MakeInt(x)
case int8:
Expand Down Expand Up @@ -115,6 +131,28 @@ func Marshal(data interface{}) (v starlark.Value, err error) {
return
}

// marshalJSONNumber maps a json.Number to a Starlark Int (an integer literal,
// at arbitrary precision) or Float (a literal with a decimal point or
// exponent). It is the same int-vs-float rule json.decode and serial use, so a
// number written without a fractional part round-trips as an int and large
// integers keep their exact value instead of degrading through float64.
func marshalJSONNumber(n json.Number) (starlark.Value, error) {
s := n.String()
if !strings.ContainsAny(s, ".eE") {
if i, err := n.Int64(); err == nil {
return starlark.MakeInt64(i), nil
}
if bi, ok := new(big.Int).SetString(s, 10); ok {
return starlark.MakeBigInt(bi), nil
}
}
f, err := n.Float64()
if err != nil {
return nil, fmt.Errorf("invalid number %q: %w", s, err)
}
return starlark.Float(f), nil
}

// Unmarshal converts a starlark.Value into its Golang counterpart, like FromValue() of package starlight does.
//
// The contract:
Expand Down
21 changes: 21 additions & 0 deletions dataconv/marshal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@ import (
"go.starlark.net/syntax"
)

func mustBigInt(s string) *big.Int {
bi, ok := new(big.Int).SetString(s, 10)
if !ok {
panic("bad big int literal: " + s)
}
return bi
}

func TestMarshal(t *testing.T) {
expectedStringDict := starlark.NewDict(1)
if err := expectedStringDict.SetKey(starlark.String("foo"), starlark.MakeInt(42)); err != nil {
Expand Down Expand Up @@ -83,6 +91,19 @@ func TestMarshal(t *testing.T) {
{uint64(1 << 42), starlark.MakeUint64(1 << 42), ""},
{float32(42), starlark.Float(42), ""},
{42., starlark.Float(42), ""},
// json.Number maps by literal form: an integer literal -> Int (exact,
// arbitrary precision), a fractional/exponent literal -> Float. It is
// NOT mapped to a string (that would break value == 6 in scripts).
{json.Number("42"), starlark.MakeInt(42), ""},
{json.Number("-7"), starlark.MakeInt(-7), ""},
{json.Number("6.5"), starlark.Float(6.5), ""},
{json.Number("1e3"), starlark.Float(1000), ""},
{json.Number("12345678901234567890"), starlark.MakeBigInt(mustBigInt("12345678901234567890")), ""},
{json.Number("not-a-number"), nil, `invalid number "not-a-number"`},
// *big.Int is the inverse of Unmarshal (which returns *big.Int beyond
// uint64), so a marshal/unmarshal round-trip stays exact.
{mustBigInt("12345678901234567890"), starlark.MakeBigInt(mustBigInt("12345678901234567890")), ""},
{*mustBigInt("99"), starlark.MakeInt(99), ""},
{time.Unix(1588540633, 0), startime.Time(time.Unix(1588540633, 0)), ""},
{now, startime.Time(now), ""},
{[]byte("Aloha"), starlark.Bytes("Aloha"), ""},
Expand Down
Loading