Commit 010fbd2e authored by Kirill Smelkov's avatar Kirill Smelkov Committed by Kamil Kisiel

Add AsInt64 helper to help programs handle unpickled data be it int or long

On Python two different objects with different types can represent
essentially the same entity. For example 1 (int) and 1L (long) represent
integer number one via two different types and are decoded by ogórek into Go
types int64 and big.Int correspondingly. However on the Python side those
two representations are often used interchangeably and programs are usually
expected to handle both with the same effect.

For example I hit this situation for real with FileStorage index, ZEO
protocol, BTrees and Wendelin.core where data with integer fields, for
the same field, are sometimes represented as int and sometimes as long.
As the result ZODB/go and Wendelin.core had private XInt64 helper used
as follows:

https://lab.nexedi.com/nexedi/wendelin.core/-/blob/07087ec8/wcfs/internal/pycompat/pycompat.go
https://lab.nexedi.com/nexedi/wendelin.core/-/blob/07087ec8/wcfs/internal/zdata/zblk.go#L385
https://lab.nexedi.com/search?utf8=%E2%9C%93&search=XInt64&group_id=&project_id=73&search_code=true&repository_ref=f7776fc1&nav_source=navbar

Besides int/long we are also hitting similar situation with
bytes / str(py2) / str(py3) which will also need to use corresponding
helpers to be able to load pickled data from existing databases.

In the next patch we will add StrictUnicode mode to handle that strings,
and it will naturally come with AsBytes and AsString helpers. Thus it
also feels appropriate to keep the other useful helpers to handle
unpickled data inside ogórek itself.

This patch adds AsInt64 helper with top-level description and tests.
parent 17fed0ed
...@@ -99,6 +99,20 @@ ...@@ -99,6 +99,20 @@
// Please see DecoderConfig.PersistentLoad and EncoderConfig.PersistentRef for details. // Please see DecoderConfig.PersistentLoad and EncoderConfig.PersistentRef for details.
// //
// //
// Handling unpickled values
//
// On Python two different objects with different types can represent
// essentially the same entity. For example 1 (int) and 1L (long) represent
// integer number one via two different types and are decoded by ogórek into Go
// types int64 and big.Int correspondingly. However on the Python side those
// two representations are often used interchangeably and programs are usually
// expected to handle both with the same effect. To help handling decoded
// values with such differences ogórek provides utilities that bring objects
// to common type irregardless of which type variant was used in the pickle
// stream. For example AsInt64 tries to represent unpickled value as int64 if
// possible and errors if not.
//
//
// -------- // --------
// //
// (*) ogórek is Polish for "pickle". // (*) ogórek is Polish for "pickle".
......
package ogórek
// conversion in between Go types to match Python.
import (
"fmt"
"math/big"
)
// AsInt64 tries to represent unpickled value to int64.
//
// Python int is decoded as int64, while Python long is decoded as big.Int.
// Go code should use AsInt64 to accept normal-range integers independently of
// their Python representation.
func AsInt64(x interface{}) (int64, error) {
switch x := x.(type) {
case int64:
return x, nil
case *big.Int:
if !x.IsInt64() {
return 0, fmt.Errorf("long outside of int64 range")
}
return x.Int64(), nil
}
return 0, fmt.Errorf("expect int64|long; got %T", x)
}
package ogórek
import (
"fmt"
"reflect"
"testing"
)
func TestAsInt64(t *testing.T) {
Etype := func(typename string) error {
return fmt.Errorf("expect int64|long; got %s", typename)
}
Erange := fmt.Errorf("long outside of int64 range")
testv := []struct {
in interface{}
outOK interface{}
}{
{int64(0), int64(0)},
{int64(1), int64(1)},
{int64(2), int64(2)},
{int64(123), int64(123)},
{int64(0x7fffffffffffffff), int64(0x7fffffffffffffff)},
{int64(-0x8000000000000000), int64(-0x8000000000000000)},
{bigInt("0"), int64(0)},
{bigInt("1"), int64(1)},
{bigInt("2"), int64(2)},
{bigInt("123"), int64(123)},
{bigInt("9223372036854775807"), int64(0x7fffffffffffffff)},
{bigInt("9223372036854775808"), Erange},
{bigInt("-9223372036854775808"), int64(-0x8000000000000000)},
{bigInt("-9223372036854775809"), Erange},
{1.0, Etype("float64")},
{"a", Etype("string")},
}
for _, tt := range testv {
iout, err := AsInt64(tt.in)
var out interface{} = iout
if err != nil {
out = err
if iout != 0 {
t.Errorf("%T %#v -> err, but ret int64 = %d ; want 0",
tt.in, tt.in, iout)
}
}
if !reflect.DeepEqual(out, tt.outOK) {
t.Errorf("%T %#v -> %T %#v ; want %T %#v",
tt.in, tt.in, out, out, tt.outOK, tt.outOK)
}
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment