Commit adffe247 authored by Levin Zimmermann's avatar Levin Zimmermann Committed by Kirill Smelkov

BigArray: Fix API deviation with ndarray (shape)

The 'shape' argument of 'numpy.ndarray's initialization method accepts
integer and sequences of integers. But the 'shape' property of
'numpy.ndarray' always returns tuple[int, ...], so numpy manually
casts any legal argument into tuple[int, ...].

In 'BigArray' and 'ZBigArray' this internal casting didn't exist yet.
This patch adds the casting.

Before:

  ZBigArray(shape=[1, 2, 3], dtype=float).shape == [1, 2, 3]

After:

  ZBigArray(shape=[1, 2, 3], dtype=float).shape == (1, 2, 3)

In this way BigArray and ZBigArray API behaves closer to numpy.ndaray,
which should help avoiding confusion when people are using BigArray /
ZBigArray.

-----

See issue nexedi/wendelin.core#9 and
MR nexedi/wendelin.core!14
for additional context.

/reviewed-by @kirr
/reviewed-on nexedi/wendelin.core!14
parent 61dc1ff2
...@@ -39,12 +39,14 @@ of physical RAM. ...@@ -39,12 +39,14 @@ of physical RAM.
from __future__ import print_function from __future__ import print_function
from wendelin.lib.calc import mul from wendelin.lib.calc import mul
from wendelin.lib.xnumpy import _as_strided from wendelin.lib.xnumpy import _as_strided
from wendelin.lib.utils import inttuple
from numpy import ndarray, dtype, sign, newaxis, asarray, argmax, uint8 from numpy import ndarray, dtype, sign, newaxis, asarray, argmax, uint8
import logging import logging
pagesize = 2*1024*1024 # FIXME hardcoded, TODO -> fileh.ram.pagesize pagesize = 2*1024*1024 # FIXME hardcoded, TODO -> fileh.ram.pagesize
class BigArray(object): class BigArray(object):
# numpy.ndarray like # numpy.ndarray like
# XXX can't use slots, because that would create "multiple bases have # XXX can't use slots, because that would create "multiple bases have
...@@ -79,6 +81,7 @@ class BigArray(object): ...@@ -79,6 +81,7 @@ class BigArray(object):
# __init__ part without fileh # __init__ part without fileh
def _init0(self, shape, dtype_, order): def _init0(self, shape, dtype_, order):
shape = inttuple(shape) # mimic numpy
_dtype = dtype(dtype_) _dtype = dtype(dtype_)
if _dtype.hasobject: if _dtype.hasobject:
logging.warn("You tried to use dtype containing object (%r) with out-of-core array ..." % _dtype) logging.warn("You tried to use dtype containing object (%r) with out-of-core array ..." % _dtype)
......
...@@ -113,6 +113,22 @@ def test_bigarray_noobject(testbig): ...@@ -113,6 +113,22 @@ def test_bigarray_noobject(testbig):
BigArray((1,), dtype_, Zh) BigArray((1,), dtype_, Zh)
# Ensure BigArray mimics the behavior of ndarray
# when initializing its shape property.
# (array.shape should always return a tuple of ints,
# but during initialization it should allow sequences of
# ints or ints)
def test_bigarray_shape_initialization(testbig):
def assert_shape_becomes(shape_input, shape_property):
assert BigArray(shape_input, float, Zh).shape == shape_property
Zh = testbig.fopen()
assert_shape_becomes([1, 4, 3], (1, 4, 3))
assert_shape_becomes(42, (42,))
assert_shape_becomes((4, 4), (4, 4))
# basic ndarray-compatibility attributes of BigArray # basic ndarray-compatibility attributes of BigArray
def test_bigarray_basic(testbig): def test_bigarray_basic(testbig):
Zh = testbig.fopen() Zh = testbig.fopen()
......
# -*- coding: utf-8 -*-
# Wendelin.core.lib.utils | Tests
# Copyright (C) 2022 Nexedi SA and Contributors.
# Levin Zimmermann <levin.zimmermann@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
# option) any later version, as published by the Free Software Foundation.
#
# You can also Link and Combine this program with other software covered by
# the terms of any of the Free Software licenses or any of the Open Source
# Initiative approved licenses and Convey the resulting work. Corresponding
# source of such a combination shall include the source code for all other
# software used.
#
# This program is distributed WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options.
from fractions import Fraction
import numpy as np
from pytest import raises
import six
from wendelin.lib.utils import inttuple
def test_inttuple():
# Test int -> tuple conversion
assert inttuple(1) == (1,)
if six.PY2:
assert inttuple(long(4)) == (4,)
# Test sequence -> tuple conversion
assert inttuple([1, 2, 3]) == (1, 2, 3)
assert inttuple(np.array([1, 2, 3])) == (1, 2, 3)
assert inttuple((3, 2, 4)) == (3, 2, 4)
# Test exceptions
def assert_raises(object_):
with raises(TypeError, match="cannot be interpreted as integer"):
inttuple(object_)
# No none-integer single values are allowed
# non-integer numbers
assert_raises(3.2)
assert_raises(Fraction(3, 2))
# other objects
assert_raises("privjet")
assert_raises({1, 2, 3})
assert_raises({0: 1, 1: 2})
# No none-integer values inside sequences are allowed
assert_raises((1, 0.5))
# -*- coding: utf-8 -*-
# Utility functions for python part of wendelin.core
# Copyright (C) 2022 Nexedi SA and Contributors.
# Levin Zimmermann <levin.zimmermann@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
# option) any later version, as published by the Free Software Foundation.
#
# You can also Link and Combine this program with other software covered by
# the terms of any of the Free Software licenses or any of the Open Source
# Initiative approved licenses and Convey the resulting work. Corresponding
# source of such a combination shall include the source code for all other
# software used.
#
# This program is distributed WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options.
import numpy as np
import numbers
import six
if six.PY2:
from collections import Sequence
else:
from collections.abc import Sequence
def inttuple(int_or_int_sequence):
# Imitate the behaviour of numpy by converting
# int or sequence of int to a tuple of int and
# raise error in case this is not possible.
# See the following numpy + cpython references:
#
# https://github.com/numpy/numpy/blob/28e9227565b2/numpy/core/src/multiarray/conversion_utils.c#L133-L144
# https://github.com/python/cpython/blob/9c4ae037b9c3/Objects/abstract.c#L1706-L1713
def raise_if_not_integer(object_):
# NOTE: Use 'numbers.Integral' instead of 'int' to support
# python2 long type.
if not isinstance(object_, numbers.Integral):
raise TypeError(
"'%s' cannot be interpreted as integer" % type(object_)
)
# We need to explicitly add np.ndarray into checked types,
# because np.ndarray aren't Sequences, see also:
#
# https://github.com/numpy/numpy/issues/2776
if isinstance(int_or_int_sequence, (Sequence, np.ndarray)):
int_tuple = tuple(int_or_int_sequence)
[raise_if_not_integer(object_) for object_ in int_tuple]
return int_tuple
raise_if_not_integer(int_or_int_sequence)
return (int_or_int_sequence,)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment