Commit e9d61a89 authored by Kirill Smelkov's avatar Kirill Smelkov

bigarray: ArrayRef utility

ArrayRef is a reference to NumPy array.

The reference is represented by root array object and instructions how to
create original array as some view of the root.

Such reference could be useful in situations where one needs to pass arrays
between processes and instead of copying array data, leverage the fact that
top-level array, for example ZBigArray, is already persisted separately, and
only send small amount of information referencing data in question.

Use `ArrayRef(array)` to create reference to an ndarray.

Use `.deref()` to convert ArrayRef to pointed array object.

NOTE

don't send ArrayRef unconditionally - for example when array object is
small regular ndarray with also regular, but big, root ndarray, sending
ArrayRef will send whole data for root object, not for small leaf.

Sending ArrayRef only makes sense when root object is known to be already
persisted by other means, for example something like below in ZODB context:

```python
  aref = ArrayRef(a)
  if isinstance(aref.root, Persistent):
      send aref
  else:
      send a 
```

Please see individual patches for more details.

/reviewed-on nexedi/wendelin.core!6
parents f785ac07 450ad804
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# BigArray submodule for Wendelin # BigArray submodule for Wendelin
# Copyright (C) 2014-2015 Nexedi SA and Contributors. # Copyright (C) 2014-2018 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
...@@ -38,7 +38,8 @@ of physical RAM. ...@@ -38,7 +38,8 @@ of physical RAM.
from __future__ import print_function from __future__ import print_function
from wendelin.lib.calc import mul from wendelin.lib.calc import mul
from numpy import ndarray, dtype, sign, newaxis, asarray, argmax from numpy import ndarray, dtype, sign, newaxis, asarray, argmax, uint8
from numpy.lib.stride_tricks import DummyArray
import logging import logging
...@@ -409,6 +410,10 @@ class BigArray(object): ...@@ -409,6 +410,10 @@ class BigArray(object):
# ~~~ mmap file part corresponding to full major slice into memory # ~~~ mmap file part corresponding to full major slice into memory
vmaM = self._fileh.mmap(pageM_min, pageM_max-pageM_min+1) vmaM = self._fileh.mmap(pageM_min, pageM_max-pageM_min+1)
# remember to which BigArray this vma belongs.
# this is needed for ArrayRef to be able to find root array object.
vmaM.pyuser = self
# first get ndarray view with only major slice specified and rest indices being ":" # first get ndarray view with only major slice specified and rest indices being ":"
viewM_shape = Mreplace(self._shape, nitemsM) viewM_shape = Mreplace(self._shape, nitemsM)
...@@ -473,3 +478,294 @@ class BigArray(object): ...@@ -473,3 +478,294 @@ class BigArray(object):
logging.warn('... address space can not be mapped at once and have to be') logging.warn('... address space can not be mapped at once and have to be')
logging.warn('... processed in chunks.') logging.warn('... processed in chunks.')
raise raise
# ----------------------------------------
# InvalidArrayRef is the exception raised when ArrayRef was tried to be
# dereferenced but found invalid.
class InvalidArrayRef(Exception):
pass
# _flatbytev returns []byte view of array a with index ↑ along with memory
def _flatbytev(a):
b = a[:].view(uint8)
# XXX vvv strictly speaking this might raise internally if numpy sees
# it cannot reshape without copy.
b.shape = -1 # flatten without copy
assert len(b.strides) == 1
if b.strides[0] < 0:
b = b[::-1]
assert b.strides[0] == +1
return b
# ArrayRef is a reference to NumPy array.
#
# The reference is represented by root array object and instructions how to
# create original array as some view of the root.
#
# Such reference could be useful in situations where one needs to pass arrays
# between processes and instead of copying array data, leverage the fact that
# top-level array, for example ZBigArray, is already persisted separately, and
# only send small amount of information referencing data in question.
#
# Use ArrayRef(array) to create reference to an ndarray.
#
# Use .deref() to convert ArrayRef to pointed array object.
#
# NOTE
#
# don't send ArrayRef unconditionally - for example when array object is
# small regular ndarray with also regular, but big, root ndarray, sending
# ArrayRef will send whole data for root object, not for small leaf.
#
# Sending ArrayRef only makes sense when root object is known to be already
# persisted by other means, for example something like below in ZODB context:
#
# aref = ArrayRef(a)
# if isinstance(aref.root, Persistent):
# send aref
# else:
# send a
class ArrayRef(object):
# .root top-level array object
#
# below broot is []byte view of .root array with index ↑ along with memory
#
# .lo, .hi raw array data is broot[lo:hi]
# .z0 array[0,0,...,0] is pointing ->z0 in broot[lo:hi]
#
# .shape shape and strides of the array with data taken from
# .stridev broot[lo:hi] with .z0 as zero offset.
#
# .dtype dtype of the array
# .atype type of the array (e.g. np.ndarray, np.recarray, etc...)
# deref returns ndarray represented by this reference.
#
# if the reference was found invalid - e.g. it had nonsensical data or
# shape/stride out of range - InvalidArrayRef is raised.
def deref(self):
# broot is []byte view of root array with index ↑ along with memory
broot = _flatbytev(self.root)
# check lo:hi is in within correct range.
if not (0 <= self.lo <= self.hi <= len(broot)):
raise InvalidArrayRef("lo:hi ([%d, %d]) out of correct [0, %d] range" %
(self.lo, self.hi, len(broot)))
# bchild is raw []byte underling data for recreated array
bchild = broot[self.lo:self.hi]
if not (0 <= self.z0 < len(bchild)):
raise InvalidArrayRef("z0 (%d) out of correct [0, %d) range" %
(self.z0, len(bchild)))
# check .shape and .stridev are within correct range, before
# applying unsafe stride tricks.
if len(self.shape) != len(self.stridev):
raise InvalidArrayRef("shape/stridev len mismatch (#shape: %d; #stridev: %d)" %
(len(self.shape), len(self.stridev)))
# [boffmin:boffmax) is recreated array's byte range rooted at its zero point
boffmin = 0
boffmax = 0
for n, s in zip(self.shape, self.stridev):
if n < 0 or int(n) != n:
raise InvalidArrayRef("shape (%s) has invalid element: %s" % (self.shape, n))
if int(s) != s:
raise InvalidArrayRef("stridev %s has invalid element: %s" % (self.stridev, s))
if n == 0:
continue # [0] dimension - does not affect anything
if s > 0:
boffmax += s*(n-1)
else:
boffmin += s*(n-1)
# when element is read - memory is accessed ↑ for itemsize bytes.
# we need to adjust only boffmax because for boffmin it grows ↑ too.
boffmax += self.dtype.itemsize
# [xlo,xhi) is the bchild's range accessed with .shape and .stridev
xlo = self.z0 + boffmin
xhi = self.z0 + boffmax
if not (0 <= xlo <= xhi <= len(bchild)):
raise InvalidArrayRef(
"shape/stride invalid: cover [%d, %d) while raw data range is [%d, %d)" %
(self.lo + xlo, self.lo + xhi, self.lo, self.hi))
# bchild_z0 points to the same underlying data buffer as bchild, but
# zero offset corresponds to zero offset of original array.
#
# len(bchild_z0) == 1, so that we can be sure bchild_z0 -> bchild
# memory pinning always works, not only for positive offsets(*).
#
# (*) we cannot make len(bchild_z0) == 0 since then, when creating
# empty slice, numpy won't adjust the array pointer at all.
#
# it could be also possible to adjust 'data' in __array_interface__
# for += z0, but going explicitly via slicing is more safe.
bchild_z0 = bchild[self.z0: self.z0 + 1]
# restore original array shape/strides/dtype via unsafe trick.
#
# it should be safe to cover memory corresponding to both negative and
# positive offsets to bchild_z0, because bchild_z0 holds reference to
# bchild and bchild covers whole raw data range.
#
# it is also safe because we checked .shape and .stridev not to escape
# from bchild data buffer.
#
# the code below is very close to
#
# a = stride_tricks.as_strided(bchild_z0, shape=self.shape, strides=self.stridev)
#
# but we don't use as_strided() because we also have to change dtype
# with shape and strides in one go - else changing dtype after either
# via a.dtype = ..., or via a.view(dtype=...) can raise errors like
#
# "When changing to a larger dtype, its size must be a
# divisor of the total size in bytes of the last axis
# of the array."
aiface = dict(bchild_z0.__array_interface__)
aiface['shape'] = tuple(self.shape)
aiface['strides'] = tuple(self.stridev)
# type: for now we only care that itemsize is the same
aiface['typestr'] = '|V%d' % self.dtype.itemsize
aiface['descr'] = [('', aiface['typestr'])]
a = asarray(DummyArray(aiface, base=bchild_z0))
# restore full dtype - it should not raise here, since itemsize is the same
a.dtype = self.dtype
# restore full array type
a = a.view(type=self.atype)
# we are done
return a
# ArrayRef(a) creates reference to ndarray a.
def __init__(aref, a):
# find root
root = a # top-level ndarray
bigvma = None # VMA, that is root.base, if there is one
while 1:
base = root.base
# top-level ndarray
if base is None:
break
# it was an ndarray (sub)view.
if isinstance(base, ndarray):
root = base
continue
# it might be also ndarray proxy - e.g. np.lib.stride_tricks.DummyArray
# with holding valid .base but not being ndarray.
basebase = getattr(base, 'base', None)
if isinstance(basebase, ndarray):
root = basebase
continue
# base is neither ndarray (sub)class nor ndarray proxy.
#
# either it is
#
# 1) top-level ndarray with base taken from an object
# with buffer interface, e.g. as here:
#
# In [1]: s = '123'
# In [2]: x = ndarray(shape=(1,), buffer=s, dtype='|S3')
# In [3]: x
# Out[3]: array(['123'], dtype='|S3')
# In [4]: x.base
# Out[4]: '123'
#
# and so it should be treated as top-level ndarray,
#
# 2) or it is a VMA created from under BigArray which will be
# treated as top-level too, and corrected for in the end.
basetype = type(base)
if basetype.__module__ + "." + basetype.__name__ == "_bigfile.VMA":
#if isinstance(base, _bigfile.VMA): XXX _bigfile does not expose VMA
bigvma = base
break
# broot is []byte view of root with idx ↑ along memory
broot = _flatbytev(root)
# [boffmin:boffmax) is a's byte range rooted at its zero point
boffmin = 0
boffmax = 0
assert len(a.shape) == len(a.strides)
for n, s in zip(a.shape, a.strides):
assert n >= 0
if n == 0:
continue # [0] dimension - does not affect anything
assert n >= 1
if s > 0:
boffmax += s*(n-1)
else:
boffmin += s*(n-1)
# when element is read - memory is accessed ↑ for itemsize bytes.
# we need to adjust only boffmax because for boffmin it grows ↑ too.
boffmax += a.itemsize
# compute bytes δz in between broot's and a's zero points.
# δz should be >= 0, since broot ↑ along memory.
adata = a.__array_interface__.get('data')
rdata = broot.__array_interface__.get('data')
assert adata is not None, "TODO __array_interface__.data = None"
assert rdata is not None, "TODO __array_interface__.data = None"
assert isinstance(adata, tuple), "TODO __array_interface__.data is %r" % (adata,)
assert isinstance(rdata, tuple), "TODO __array_interface__.data is %r" % (rdata,)
# {a,r}data is (data, readonly)
zdelta = adata[0] - rdata[0]
assert zdelta >= 0
# broot[lo:hi] is raw []byte underlying data for a, with z0 pointing
# to a's zero point.
lo = zdelta + boffmin
hi = zdelta + boffmax
assert 0 <= lo <= hi <= len(broot)
z0 = zdelta - lo
# reference is ready for ndarray root
aref.root = root
aref.lo = lo
aref.hi = hi
aref.z0 = z0
aref.shape = a.shape
aref.stridev = a.strides
aref.dtype = a.dtype
aref.atype = type(a)
# correct it, if the root is actually BigArray
if bigvma is not None:
assert bigvma.addr_start <= rdata[0]
assert rdata[0] + len(broot) <= bigvma.addr_stop
bigroot = bigvma.pyuser
assert isinstance(bigroot, BigArray)
# bigoff is broot position in bbigroot (both raw flat []byte ↑ along memory)
pgoff, _ = bigvma.filerange()
bigoff = pgoff * bigvma.pagesize() # vma start offset
bigoff += rdata[0] - bigvma.addr_start # broot offset from vma start
aref.root = bigroot
aref.lo += bigoff
aref.hi += bigoff
# we are done
return
# -*- coding: utf-8 -*-
# Wendeling.core.bigarray | Basic tests # Wendeling.core.bigarray | Basic tests
# Copyright (C) 2014-2015 Nexedi SA and Contributors. # Copyright (C) 2014-2018 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
...@@ -18,12 +19,13 @@ ...@@ -18,12 +19,13 @@
# See COPYING file for full licensing terms. # See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options. # See https://www.nexedi.com/licensing for rationale and options.
from wendelin.bigarray import BigArray from wendelin.bigarray import BigArray, ArrayRef, _flatbytev
from wendelin.bigfile import BigFile from wendelin.bigfile import BigFile
from wendelin.lib.mem import memcpy from wendelin.lib.mem import memcpy
from wendelin.lib.calc import mul from wendelin.lib.calc import mul
from numpy import ndarray, dtype, int64, int32, uint32, uint8, all, zeros, arange, \ from numpy import ndarray, dtype, int64, int32, uint32, int16, uint8, all, zeros, arange, \
array_equal, asarray array_equal, asarray, newaxis, swapaxes
from numpy.lib.stride_tricks import as_strided
import numpy import numpy
from pytest import raises from pytest import raises
...@@ -588,3 +590,240 @@ def test_bigarray_to_ndarray(): ...@@ -588,3 +590,240 @@ def test_bigarray_to_ndarray():
for i in range(48,65): for i in range(48,65):
C = BigArray(((1<<i)-1,), uint8, Zh) C = BigArray(((1<<i)-1,), uint8, Zh)
raises(MemoryError, 'asarray(C)') raises(MemoryError, 'asarray(C)')
def test_arrayref():
# test data - all items are unique - so we can check array by content
data = zeros(PS, dtype=uint8)
data32 = data.view(uint32)
data32[:] = arange(len(data32), dtype=uint32)
data[:256] = arange(256, dtype=uint8) # first starting bytes are all unique
# regular ndarray without parent at all
ref = ArrayRef(data)
assert ref.root is data
assert ref.lo == 0
assert ref.hi == len(data)
assert ref.z0 == 0
assert ref.shape == data.shape
assert ref.stridev == data.strides
assert ref.dtype == data.dtype
assert array_equal(ref.deref(), data)
# regular ndarrays with parent
ref = ArrayRef(data32)
assert ref.root is data
assert ref.lo == 0
assert ref.hi == len(data)
assert ref.z0 == 0
assert ref.shape == data32.shape
assert ref.stridev == data32.strides
assert ref.dtype == data32.dtype
assert array_equal(ref.deref(), data32)
a = data[100:140]
ref = ArrayRef(a)
assert ref.root is data
assert ref.lo == 100
assert ref.hi == 140
assert ref.z0 == 0
assert ref.shape == (40,)
assert ref.stridev == (1,)
assert ref.dtype == data.dtype
assert array_equal(ref.deref(), a)
a = data[140:100:-1]
ref = ArrayRef(a)
assert ref.root is data
assert ref.lo == 101
assert ref.hi == 141
assert ref.z0 == 39
assert ref.shape == (40,)
assert ref.stridev == (-1,)
assert ref.dtype == data.dtype
assert array_equal(ref.deref(), a)
a = data[100:140:-1] # empty
ref = ArrayRef(a)
assert ref.root is data
assert ref.lo == 0
assert ref.hi == 1
assert ref.z0 == 0
assert ref.shape == (0,)
assert ref.stridev == (1,)
assert ref.dtype == data.dtype
assert array_equal(ref.deref(), a)
# rdata is the same as data[::-1] but without base - i.e. it is toplevel
m = memoryview(data[::-1])
rdata = asarray(m)
assert array_equal(rdata[::-1], data)
assert rdata.strides == (-1,)
m_ = rdata.base
assert isinstance(m_, memoryview)
#assert m_ is m XXX strangely it is another object, not exactly m
# XXX however rdata.strides<0 and no rdata.base.base is enough for us here.
raises(AttributeError, 'm_.base')
a = rdata[100:140]
ref = ArrayRef(a)
assert ref.root is rdata
assert ref.lo == PS - 140
assert ref.hi == PS - 100
assert ref.z0 == 39
assert ref.shape == (40,)
assert ref.stridev == (-1,)
assert ref.dtype == data.dtype
assert array_equal(ref.deref(), a)
# BigArray with data backend.
# data_ is the same as data but shifted to exercise vma and vma->broot offsets calculation.
data_ = zeros(8*PS, dtype=uint8)
data_[2*PS-1:][:PS] = data
f = BigFile_Data_RO(data_, PS)
fh = f.fileh_open()
A = BigArray(data_.shape, data_.dtype, fh)
assert array_equal(A[2*PS-1:][:PS], data)
for root in (data, rdata, A): # both ndarray and BigArray roots
# refok verifies whether ArrayRef(x) works ok
def refok(x):
ref = ArrayRef(x)
assert ref.root is root
x_ = ref.deref()
assert array_equal(x_, x)
assert x_.dtype == x.dtype
assert type(x_) == type(x)
# check that deref won't access range outside lo:hi - by copying
# root, setting bytes in adjusted root outside lo:hi to either 0x00
# or 0xff and tweaking ref.root = root_.
root_ = numpy.copy(_flatbytev(root[:]))
root_[:ref.lo] = 0
root_[ref.hi:] = 0
ref.root = root_
assert array_equal(ref.deref(), x)
root_[:ref.lo] = 0xff
root_[ref.hi:] = 0xff
assert array_equal(ref.deref(), x)
if isinstance(root, BigArray):
a = root[2*PS-1:][:PS] # get to `data` range
# typeof(root) = ndarray
elif root is rdata:
a = root[::-1] # rdata
else:
a = root[:] # data
assert array_equal(a, data)
# subslices that is possible to get by just indexing
refok( a[:] )
refok( a[1:2] )
refok( a[1:10] )
refok( a[1:10:2] )
refok( a[1:10:3] )
refok( a[1:10:-1] ) # empty (.size = 0)
refok( a[10:1:-1] )
refok( a[10:1:-2] )
refok( a[10:1:-3] )
# long chain root -> a -> a[...] -> a[...] -> leaf
l = a[2:118]
l = l.view(uint32)[3:20]
l = l[1:9]
refok(l)
# not aligned - it is not possible to get to resulting slice just by indexing A
refok( a.view(uint8)[2:-2].view(uint32) )
refok( a.view(uint8)[2:-2].view(uint32)[::-1] )
refok( a.view(int64) ) # change of type ↑ in size
refok( a.view(int64)[::-1] )
refok( a.view(int16) ) # change of type ↓ in size
refok( a.view(int16)[::-1] )
# change of type to size not multiple of original
refok( a[1:1+5*10].view('V5') ) # 4 -> 5
refok( a[1:1+5*10].view('V5')[::-1] )
refok( a[1:1+3*10].view('V3') ) # 4 -> 3
refok( a[1:1+3*10].view('V3')[::-1] )
# intermediate parent with <0 stride
r = a[1:1+3*10].view('V3')[::-1]
refok( r[-2:2:-1] )
# 2d array
x = a.view(uint32).reshape((8, -1))
y = swapaxes(x, 0,1)
assert x.shape == (8, PS//(4*8))
assert x.strides == (PS//8, 4)
assert y.shape == (PS//(4*8), 8)
assert y.strides == (4, PS//8)
refok( x )
refok( y )
# array with both >0 and <0 strides
x_ = x[:,::-1]
y_ = y[:,::-1]
assert x_.shape == x.shape
assert x_.strides == (PS//8, -4)
assert y_.shape == y.shape
assert y_.strides == (4, -PS//8)
refok( x_ )
refok( y_ )
# array with [1] dimension
z1 = x[:, newaxis, :]
assert z1.shape == (8, 1, PS//(4*8))
assert z1.strides == (PS//8, 0, 4)
refok(z1)
# array with [0] dimension
z0 = z1[:, 0:0, :]
assert z0.shape == (8, 0, PS//(4*8))
assert z0.strides == (PS//8, 0, 4)
refok(z0)
# tricky array overlapping itself
t = a.view(uint32)
assert t.shape == (PS//4,)
assert t.strides == (4,)
assert t.itemsize == 4
t = as_strided(t, strides=(1,))
assert t.shape == (PS//4,)
assert t.strides == (1,)
assert t.itemsize == 4
refok(t)
# structured dtype
s = a.view(dtype=[('width', '<i2'), ('length', '<i2')])
assert s.shape == (PS//4,)
assert s.strides == (4,)
assert s.itemsize == 4
refok(s)
s_ = s['length']
assert s_.shape == (PS//4,)
assert s_.strides == (4,)
assert s_.itemsize == 2
refok(s_)
# ndarray subclass, e.g. np.recarray
r = s.view(type=numpy.recarray)
assert isinstance(r, numpy.recarray)
assert r.shape == (PS//4,)
assert r.strides == (4,)
assert r.itemsize == 4
assert array_equal(r.length, s['length'])
refok(r)
/* Wendelin.bigfile | Python interface to memory/files /* Wendelin.bigfile | Python interface to memory/files
* Copyright (C) 2014-2015 Nexedi SA and Contributors. * Copyright (C) 2014-2018 Nexedi SA and Contributors.
* Kirill Smelkov <kirr@nexedi.com> * Kirill Smelkov <kirr@nexedi.com>
* *
* This program is free software: you can Use, Study, Modify and Redistribute * This program is free software: you can Use, Study, Modify and Redistribute
...@@ -59,12 +59,36 @@ static PyObject *pybuf_str; ...@@ -59,12 +59,36 @@ static PyObject *pybuf_str;
/* /*
* python representation of VMA - exposes vma memory as python buffer * python representation of VMA - exposes vma memory as python buffer
*
* also exposes:
*
* .filerange() to know which range in mmaped file this vma covers.
* .pagesize() to know page size of underlying RAM.
*
* and:
*
* .addr_start, .addr_stop to know offset of ndarray in VMA.
* .pyuser generic python-level attribute (see below).
*/ */
struct PyVMA { struct PyVMA {
PyObject; PyObject;
PyObject *in_weakreflist; PyObject *in_weakreflist;
VMA; VMA;
/* python-level user of this VMA.
*
* for example for ArrayRef to work, BigArray needs to find out VMA ->
* top-level BigArray object for which this VMA was created.
*
* There is vma -> fileh -> file chain, but e.g. for a given ZBigFile there
* can be several ZBigArrays created on top of it to view its data (e.g. via
* BigArray.view()). So even if it can go from vma to -> zfile it does not
* help to find out the top-level ZBigArray object itself.
*
* This way we allow BigArray python code to set vma.pyuser attribute
* pointing to original BigArray object for which this VMA was created. */
PyObject *pyuser;
}; };
typedef struct PyVMA PyVMA; typedef struct PyVMA PyVMA;
...@@ -140,6 +164,11 @@ void XPyBufferObject_Unpin(PyBufferObject *bufo); ...@@ -140,6 +164,11 @@ void XPyBufferObject_Unpin(PyBufferObject *bufo);
void XPyBuffer_Unpin(Py_buffer *view); void XPyBuffer_Unpin(Py_buffer *view);
#define PyFunc(FUNC, DOC) \
static const char FUNC ##_doc[] = DOC; \
static PyObject *FUNC
/************ /************
* PyVMA * * PyVMA *
************/ ************/
...@@ -193,6 +222,50 @@ pyvma_len(PyObject *pyvma0) ...@@ -193,6 +222,50 @@ pyvma_len(PyObject *pyvma0)
} }
/* pyvma vs cyclic GC */
static int
pyvma_traverse(PyObject *pyvma0, visitproc visit, void *arg)
{
PyVMA *pyvma = upcast(PyVMA *, pyvma0);
Py_VISIT(pyvma->pyuser);
return 0;
}
static int
pyvma_clear(PyObject *pyvma0)
{
PyVMA *pyvma = upcast(PyVMA *, pyvma0);
Py_CLEAR(pyvma->pyuser);
return 0;
}
PyFunc(pyvma_filerange, "filerange() -> (pgoffset, pglen) -- file range this vma covers")
(PyObject *pyvma0, PyObject *args)
{
PyVMA *pyvma = upcast(PyVMA *, pyvma0);
Py_ssize_t pgoffset, pglen; // XXX Py_ssize_t vs pgoff_t
pgoffset = pyvma->f_pgoffset;
pglen = (pyvma->addr_stop - pyvma->addr_start) / pyvma->fileh->ramh->ram->pagesize;
/* NOTE ^^^ addr_stop and addr_start must be page-aligned */
return Py_BuildValue("(nn)", pgoffset, pglen);
}
PyFunc(pyvma_pagesize, "pagesize() -> pagesize -- page size of RAM underlying this VMA")
(PyObject *pyvma0, PyObject *args)
{
PyVMA *pyvma = upcast(PyVMA *, pyvma0);
Py_ssize_t pagesize = pyvma->fileh->ramh->ram->pagesize;
return Py_BuildValue("n", pagesize);
}
static void static void
pyvma_dealloc(PyObject *pyvma0) pyvma_dealloc(PyObject *pyvma0)
{ {
...@@ -210,6 +283,7 @@ pyvma_dealloc(PyObject *pyvma0) ...@@ -210,6 +283,7 @@ pyvma_dealloc(PyObject *pyvma0)
Py_DECREF(pyfileh); Py_DECREF(pyfileh);
} }
pyvma_clear(pyvma);
pyvma->ob_type->tp_free(pyvma); pyvma->ob_type->tp_free(pyvma);
} }
...@@ -247,12 +321,35 @@ static /*const*/ PySequenceMethods pyvma_as_seq = { ...@@ -247,12 +321,35 @@ static /*const*/ PySequenceMethods pyvma_as_seq = {
}; };
static /*const*/ PyMethodDef pyvma_methods[] = {
{"filerange", pyvma_filerange, METH_VARARGS, pyvma_filerange_doc},
{"pagesize", pyvma_pagesize, METH_VARARGS, pyvma_pagesize_doc},
{NULL}
};
// XXX vvv better switch on various possibilities and find approptiate type
// (e.g. on X32 uintptr_t will be 4 while long will be 8)
const int _ =
BUILD_ASSERT_OR_ZERO(sizeof(uintptr_t) == sizeof(unsigned long));
#define T_UINTPTR T_ULONG
static /*const*/ PyMemberDef pyvma_members[] = {
{"addr_start", T_UINTPTR, offsetof(PyVMA, addr_start), READONLY, "vma's start addr"},
{"addr_stop", T_UINTPTR, offsetof(PyVMA, addr_stop), READONLY, "vma's start addr"},
// XXX pyuser: restrict to read-only access?
{"pyuser", T_OBJECT_EX, offsetof(PyVMA, pyuser), 0, "user of this vma"},
{NULL}
};
static PyTypeObject PyVMA_Type = { static PyTypeObject PyVMA_Type = {
PyVarObject_HEAD_INIT(NULL, 0) PyVarObject_HEAD_INIT(NULL, 0)
.tp_name = "_bigfile.VMA", .tp_name = "_bigfile.VMA",
.tp_basicsize = sizeof(PyVMA), .tp_basicsize = sizeof(PyVMA),
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_NEWBUFFER, .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_NEWBUFFER | Py_TPFLAGS_HAVE_GC,
.tp_methods = NULL, // TODO ? .tp_traverse = pyvma_traverse,
.tp_clear = pyvma_clear,
.tp_methods = pyvma_methods,
.tp_members = pyvma_members,
.tp_as_sequence = &pyvma_as_seq, .tp_as_sequence = &pyvma_as_seq,
.tp_as_buffer = &pyvma_as_buffer, .tp_as_buffer = &pyvma_as_buffer,
.tp_dealloc = pyvma_dealloc, .tp_dealloc = pyvma_dealloc,
...@@ -268,10 +365,6 @@ static PyTypeObject PyVMA_Type = { ...@@ -268,10 +365,6 @@ static PyTypeObject PyVMA_Type = {
****************/ ****************/
#define PyFunc(FUNC, DOC) \
static const char FUNC ##_doc[] = DOC; \
static PyObject *FUNC
PyFunc(pyfileh_mmap, "mmap(pgoffset, pglen) - map fileh part into memory") PyFunc(pyfileh_mmap, "mmap(pgoffset, pglen) - map fileh part into memory")
(PyObject *pyfileh0, PyObject *args) (PyObject *pyfileh0, PyObject *args)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment