Commit e5b7c31b authored by Kirill Smelkov's avatar Kirill Smelkov

bigarray: Fix __getitem__ for cases where element overlaps with edge between pages

When we serve indexing request, we first compute page range in backing
file, which contains the result based on major index range, then mmap
that file range and pick up result from there.

Page range math was however not correct: e.g. for positive strides, last
element's byte is (byte0_stop-1), NOT (byte0_stop - byte0_stride) which
for cases where byte0_stop is just a bit after page boundary, can make a
difference - page_max will be 1 page less what it should be and then
whole ndarray view creation breaks:

    ...
    Module wendelin.bigarray, line 381, in __getitem__
      view0 = ndarray(view0_shape, self._dtype, vma0, view0_offset, view0_stridev)
  ValueError: strides is incompatible with shape of requested array and size of buffer

( because vma0 was created less in size than what is needed to create view0_shape
  shaped array starting from view0_offset in vma0. )

Similar story for negative strides math - it was not correct neither.

Fix it.

/reported-by @Camata
parent 386ae339
...@@ -361,10 +361,16 @@ class BigArray(object): ...@@ -361,10 +361,16 @@ class BigArray(object):
byte0_start = idx0_start * stride0 byte0_start = idx0_start * stride0
byte0_stop = idx0_stop * stride0 byte0_stop = idx0_stop * stride0
byte0_stride = idx0_stride * stride0 byte0_stride = idx0_stride * stride0
#print('byte0:\t[%s:%s:%s]' % (byte0_start, byte0_stop, byte0_stride))
# major slice -> in file pages, always increasing, inclusive # major slice -> in file pages, always increasing, inclusive
page0_min = min(byte0_start, byte0_stop+byte0_stride) // pagesize # TODO -> fileh.pagesize if byte0_stride >= 0:
page0_max = max(byte0_stop-byte0_stride, byte0_start) // pagesize # TODO -> fileh.pagesize page0_min = byte0_start // pagesize # TODO -> fileh.pagesize
page0_max = (byte0_stop-1) // pagesize # TODO -> fileh.pagesize
else:
page0_min = (byte0_stop - byte0_stride) // pagesize# TODO -> fileh.pagesize
page0_max = (byte0_start - byte0_stride - 1) // pagesize# TODO -> fileh.pagesize
#print('page0:\t[%s, %s]' % (page0_min, page0_max))
# ~~~ mmap file part corresponding to full major slice into memory # ~~~ mmap file part corresponding to full major slice into memory
...@@ -376,6 +382,7 @@ class BigArray(object): ...@@ -376,6 +382,7 @@ class BigArray(object):
view0_offset = byte0_start - page0_min * pagesize # TODO -> fileh.pagesize view0_offset = byte0_start - page0_min * pagesize # TODO -> fileh.pagesize
view0_stridev = (byte0_stride,) + self._stridev[1:] view0_stridev = (byte0_stride,) + self._stridev[1:]
#print('view0_shape:\t', view0_shape, self.shape) #print('view0_shape:\t', view0_shape, self.shape)
#print('view0_stridv:\t', view0_stridev)
#print('view0_offset:\t', view0_offset) #print('view0_offset:\t', view0_offset)
#print('len(vma0):\t', len(vma0)) #print('len(vma0):\t', len(vma0))
view0 = ndarray(view0_shape, self._dtype, vma0, view0_offset, view0_stridev) view0 = ndarray(view0_shape, self._dtype, vma0, view0_offset, view0_stridev)
......
...@@ -99,6 +99,14 @@ class DoubleGet: ...@@ -99,6 +99,14 @@ class DoubleGet:
return self.obj1[key], self.obj2[key] return self.obj1[key], self.obj2[key]
# DoubleCheck(A1, A2)[key] -> assert array_equal(A1[key], A2[key])
class DoubleCheck(DoubleGet):
def __getitem__(self, key):
a1, a2 = DoubleGet.__getitem__(self, key)
assert array_equal(a1, a2)
# getitem/setitem (1d case) # getitem/setitem (1d case)
def test_bigarray_indexing_1d(): def test_bigarray_indexing_1d():
Z = BigFile_Zero(PS) Z = BigFile_Zero(PS)
...@@ -259,6 +267,45 @@ def test_bigarray_indexing_1d(): ...@@ -259,6 +267,45 @@ def test_bigarray_indexing_1d():
assert raises(ValueError, 'A[:4] = range(5)') assert raises(ValueError, 'A[:4] = range(5)')
# indexing where accessed element overlaps edge between pages
def test_bigarray_indexing_pageedge():
shape = (10, PS-1)
data = arange(mul(shape), dtype=uint32).view(uint8) # NOTE 4 times bigger than uint8
f = BigFile_Data_RO(data, PS)
fh = f.fileh_open()
A = BigArray(shape, uint8, fh) # bigarray with test data and shape
A_ = data[:mul(shape)].reshape(shape) # ndarray ----//----
# AA[key] -> assert array_equal(A[key], A_[key])
AA = DoubleCheck(A, A_)
AA[0]
AA[1] # tail of page0 - page1
AA[1:2] # ---- // ----
AA[1:2:-1] # []
AA[1:0] # []
AA[1:0:-1] # tail of page0 - page1
shape = (10, PS+1)
f = BigFile_Data_RO(data, PS)
fh = f.fileh_open()
A = BigArray(shape, uint8, fh)
A_ = data[:mul(shape)].reshape(shape)
AA = DoubleCheck(A, A_)
AA[0] # page0 - head of page1
AA[0:1] # ---- // ----
AA[0:1:-1] # []
AA[1:0] # []
AA[1:0:-1] # page0 - head of page1
# given dimension length n, yield index variants to test # given dimension length n, yield index variants to test
def indices_to_test(n): def indices_to_test(n):
# ":" # ":"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment